1#!/usr/bin/env python3
2# SPDX-License-Identifier: GPL-2.0-only
3#
4# top-like utility for displaying kvm statistics
5#
6# Copyright 2006-2008 Qumranet Technologies
7# Copyright 2008-2011 Red Hat, Inc.
8#
9# Authors:
10#  Avi Kivity <avi@redhat.com>
11#
12"""The kvm_stat module outputs statistics about running KVM VMs
13
14Three different ways of output formatting are available:
15- as a top-like text ui
16- in a key -> value format
17- in an all keys, all values format
18
19The data is sampled from the KVM's debugfs entries and its perf events.
20"""
21from __future__ import print_function
22
23import curses
24import sys
25import locale
26import os
27import time
28import argparse
29import ctypes
30import fcntl
31import resource
32import struct
33import re
34import subprocess
35import signal
36from collections import defaultdict, namedtuple
37from functools import reduce
38from datetime import datetime
39
40VMX_EXIT_REASONS = {
41    'EXCEPTION_NMI':        0,
42    'EXTERNAL_INTERRUPT':   1,
43    'TRIPLE_FAULT':         2,
44    'INIT_SIGNAL':          3,
45    'SIPI_SIGNAL':          4,
46    'INTERRUPT_WINDOW':     7,
47    'NMI_WINDOW':           8,
48    'TASK_SWITCH':          9,
49    'CPUID':                10,
50    'HLT':                  12,
51    'INVD':                 13,
52    'INVLPG':               14,
53    'RDPMC':                15,
54    'RDTSC':                16,
55    'VMCALL':               18,
56    'VMCLEAR':              19,
57    'VMLAUNCH':             20,
58    'VMPTRLD':              21,
59    'VMPTRST':              22,
60    'VMREAD':               23,
61    'VMRESUME':             24,
62    'VMWRITE':              25,
63    'VMOFF':                26,
64    'VMON':                 27,
65    'CR_ACCESS':            28,
66    'DR_ACCESS':            29,
67    'IO_INSTRUCTION':       30,
68    'MSR_READ':             31,
69    'MSR_WRITE':            32,
70    'INVALID_STATE':        33,
71    'MSR_LOAD_FAIL':        34,
72    'MWAIT_INSTRUCTION':    36,
73    'MONITOR_TRAP_FLAG':    37,
74    'MONITOR_INSTRUCTION':  39,
75    'PAUSE_INSTRUCTION':    40,
76    'MCE_DURING_VMENTRY':   41,
77    'TPR_BELOW_THRESHOLD':  43,
78    'APIC_ACCESS':          44,
79    'EOI_INDUCED':          45,
80    'GDTR_IDTR':            46,
81    'LDTR_TR':              47,
82    'EPT_VIOLATION':        48,
83    'EPT_MISCONFIG':        49,
84    'INVEPT':               50,
85    'RDTSCP':               51,
86    'PREEMPTION_TIMER':     52,
87    'INVVPID':              53,
88    'WBINVD':               54,
89    'XSETBV':               55,
90    'APIC_WRITE':           56,
91    'RDRAND':               57,
92    'INVPCID':              58,
93    'VMFUNC':               59,
94    'ENCLS':                60,
95    'RDSEED':               61,
96    'PML_FULL':             62,
97    'XSAVES':               63,
98    'XRSTORS':              64,
99    'UMWAIT':               67,
100    'TPAUSE':               68,
101    'BUS_LOCK':             74,
102    'NOTIFY':               75,
103}
104
105SVM_EXIT_REASONS = {
106    'READ_CR0':       0x000,
107    'READ_CR2':       0x002,
108    'READ_CR3':       0x003,
109    'READ_CR4':       0x004,
110    'READ_CR8':       0x008,
111    'WRITE_CR0':      0x010,
112    'WRITE_CR2':      0x012,
113    'WRITE_CR3':      0x013,
114    'WRITE_CR4':      0x014,
115    'WRITE_CR8':      0x018,
116    'READ_DR0':       0x020,
117    'READ_DR1':       0x021,
118    'READ_DR2':       0x022,
119    'READ_DR3':       0x023,
120    'READ_DR4':       0x024,
121    'READ_DR5':       0x025,
122    'READ_DR6':       0x026,
123    'READ_DR7':       0x027,
124    'WRITE_DR0':      0x030,
125    'WRITE_DR1':      0x031,
126    'WRITE_DR2':      0x032,
127    'WRITE_DR3':      0x033,
128    'WRITE_DR4':      0x034,
129    'WRITE_DR5':      0x035,
130    'WRITE_DR6':      0x036,
131    'WRITE_DR7':      0x037,
132    'EXCP_BASE':      0x040,
133    'LAST_EXCP':      0x05f,
134    'INTR':           0x060,
135    'NMI':            0x061,
136    'SMI':            0x062,
137    'INIT':           0x063,
138    'VINTR':          0x064,
139    'CR0_SEL_WRITE':  0x065,
140    'IDTR_READ':      0x066,
141    'GDTR_READ':      0x067,
142    'LDTR_READ':      0x068,
143    'TR_READ':        0x069,
144    'IDTR_WRITE':     0x06a,
145    'GDTR_WRITE':     0x06b,
146    'LDTR_WRITE':     0x06c,
147    'TR_WRITE':       0x06d,
148    'RDTSC':          0x06e,
149    'RDPMC':          0x06f,
150    'PUSHF':          0x070,
151    'POPF':           0x071,
152    'CPUID':          0x072,
153    'RSM':            0x073,
154    'IRET':           0x074,
155    'SWINT':          0x075,
156    'INVD':           0x076,
157    'PAUSE':          0x077,
158    'HLT':            0x078,
159    'INVLPG':         0x079,
160    'INVLPGA':        0x07a,
161    'IOIO':           0x07b,
162    'MSR':            0x07c,
163    'TASK_SWITCH':    0x07d,
164    'FERR_FREEZE':    0x07e,
165    'SHUTDOWN':       0x07f,
166    'VMRUN':          0x080,
167    'VMMCALL':        0x081,
168    'VMLOAD':         0x082,
169    'VMSAVE':         0x083,
170    'STGI':           0x084,
171    'CLGI':           0x085,
172    'SKINIT':         0x086,
173    'RDTSCP':         0x087,
174    'ICEBP':          0x088,
175    'WBINVD':         0x089,
176    'MONITOR':        0x08a,
177    'MWAIT':          0x08b,
178    'MWAIT_COND':     0x08c,
179    'XSETBV':         0x08d,
180    'RDPRU':          0x08e,
181    'EFER_WRITE_TRAP':           0x08f,
182    'CR0_WRITE_TRAP':            0x090,
183    'CR1_WRITE_TRAP':            0x091,
184    'CR2_WRITE_TRAP':            0x092,
185    'CR3_WRITE_TRAP':            0x093,
186    'CR4_WRITE_TRAP':            0x094,
187    'CR5_WRITE_TRAP':            0x095,
188    'CR6_WRITE_TRAP':            0x096,
189    'CR7_WRITE_TRAP':            0x097,
190    'CR8_WRITE_TRAP':            0x098,
191    'CR9_WRITE_TRAP':            0x099,
192    'CR10_WRITE_TRAP':           0x09a,
193    'CR11_WRITE_TRAP':           0x09b,
194    'CR12_WRITE_TRAP':           0x09c,
195    'CR13_WRITE_TRAP':           0x09d,
196    'CR14_WRITE_TRAP':           0x09e,
197    'CR15_WRITE_TRAP':           0x09f,
198    'INVPCID':        0x0a2,
199    'NPF':            0x400,
200    'AVIC_INCOMPLETE_IPI':       0x401,
201    'AVIC_UNACCELERATED_ACCESS': 0x402,
202    'VMGEXIT':        0x403,
203}
204
205# EC definition of HSR (from arch/arm64/include/asm/esr.h)
206AARCH64_EXIT_REASONS = {
207    'UNKNOWN':      0x00,
208    'WFx':          0x01,
209    'CP15_32':      0x03,
210    'CP15_64':      0x04,
211    'CP14_MR':      0x05,
212    'CP14_LS':      0x06,
213    'FP_ASIMD':     0x07,
214    'CP10_ID':      0x08,
215    'PAC':          0x09,
216    'CP14_64':      0x0C,
217    'BTI':          0x0D,
218    'ILL':          0x0E,
219    'SVC32':        0x11,
220    'HVC32':        0x12,
221    'SMC32':        0x13,
222    'SVC64':        0x15,
223    'HVC64':        0x16,
224    'SMC64':        0x17,
225    'SYS64':        0x18,
226    'SVE':          0x19,
227    'ERET':         0x1A,
228    'FPAC':         0x1C,
229    'SME':          0x1D,
230    'IMP_DEF':      0x1F,
231    'IABT_LOW':     0x20,
232    'IABT_CUR':     0x21,
233    'PC_ALIGN':     0x22,
234    'DABT_LOW':     0x24,
235    'DABT_CUR':     0x25,
236    'SP_ALIGN':     0x26,
237    'FP_EXC32':     0x28,
238    'FP_EXC64':     0x2C,
239    'SERROR':       0x2F,
240    'BREAKPT_LOW':  0x30,
241    'BREAKPT_CUR':  0x31,
242    'SOFTSTP_LOW':  0x32,
243    'SOFTSTP_CUR':  0x33,
244    'WATCHPT_LOW':  0x34,
245    'WATCHPT_CUR':  0x35,
246    'BKPT32':       0x38,
247    'VECTOR32':     0x3A,
248    'BRK64':        0x3C,
249}
250
251# From include/uapi/linux/kvm.h, KVM_EXIT_xxx
252USERSPACE_EXIT_REASONS = {
253    'UNKNOWN':          0,
254    'EXCEPTION':        1,
255    'IO':               2,
256    'HYPERCALL':        3,
257    'DEBUG':            4,
258    'HLT':              5,
259    'MMIO':             6,
260    'IRQ_WINDOW_OPEN':  7,
261    'SHUTDOWN':         8,
262    'FAIL_ENTRY':       9,
263    'INTR':             10,
264    'SET_TPR':          11,
265    'TPR_ACCESS':       12,
266    'S390_SIEIC':       13,
267    'S390_RESET':       14,
268    'DCR':              15,
269    'NMI':              16,
270    'INTERNAL_ERROR':   17,
271    'OSI':              18,
272    'PAPR_HCALL':       19,
273    'S390_UCONTROL':    20,
274    'WATCHDOG':         21,
275    'S390_TSCH':        22,
276    'EPR':              23,
277    'SYSTEM_EVENT':     24,
278    'S390_STSI':        25,
279    'IOAPIC_EOI':       26,
280    'HYPERV':           27,
281    'ARM_NISV':         28,
282    'X86_RDMSR':        29,
283    'X86_WRMSR':        30,
284    'DIRTY_RING_FULL':  31,
285    'AP_RESET_HOLD':    32,
286    'X86_BUS_LOCK':     33,
287    'XEN':              34,
288    'RISCV_SBI':        35,
289    'RISCV_CSR':        36,
290    'NOTIFY':           37,
291}
292
293IOCTL_NUMBERS = {
294    'SET_FILTER':  0x40082406,
295    'ENABLE':      0x00002400,
296    'DISABLE':     0x00002401,
297    'RESET':       0x00002403,
298}
299
300signal_received = False
301
302ENCODING = locale.getpreferredencoding(False)
303TRACE_FILTER = re.compile(r'^[^\(]*$')
304
305
306class Arch(object):
307    """Encapsulates global architecture specific data.
308
309    Contains the performance event open syscall and ioctl numbers, as
310    well as the VM exit reasons for the architecture it runs on.
311
312    """
313    @staticmethod
314    def get_arch():
315        machine = os.uname()[4]
316
317        if machine.startswith('ppc'):
318            return ArchPPC()
319        elif machine.startswith('aarch64'):
320            return ArchA64()
321        elif machine.startswith('s390'):
322            return ArchS390()
323        else:
324            # X86_64
325            for line in open('/proc/cpuinfo'):
326                if not line.startswith('flags'):
327                    continue
328
329                flags = line.split()
330                if 'vmx' in flags:
331                    return ArchX86(VMX_EXIT_REASONS)
332                if 'svm' in flags:
333                    return ArchX86(SVM_EXIT_REASONS)
334                return
335
336    def tracepoint_is_child(self, field):
337        if (TRACE_FILTER.match(field)):
338            return None
339        return field.split('(', 1)[0]
340
341
342class ArchX86(Arch):
343    def __init__(self, exit_reasons):
344        self.sc_perf_evt_open = 298
345        self.ioctl_numbers = IOCTL_NUMBERS
346        self.exit_reason_field = 'exit_reason'
347        self.exit_reasons = exit_reasons
348
349    def debugfs_is_child(self, field):
350        """ Returns name of parent if 'field' is a child, None otherwise """
351        return None
352
353
354class ArchPPC(Arch):
355    def __init__(self):
356        self.sc_perf_evt_open = 319
357        self.ioctl_numbers = IOCTL_NUMBERS
358        self.ioctl_numbers['ENABLE'] = 0x20002400
359        self.ioctl_numbers['DISABLE'] = 0x20002401
360        self.ioctl_numbers['RESET'] = 0x20002403
361
362        # PPC comes in 32 and 64 bit and some generated ioctl
363        # numbers depend on the wordsize.
364        char_ptr_size = ctypes.sizeof(ctypes.c_char_p)
365        self.ioctl_numbers['SET_FILTER'] = 0x80002406 | char_ptr_size << 16
366        self.exit_reason_field = 'exit_nr'
367        self.exit_reasons = {}
368
369    def debugfs_is_child(self, field):
370        """ Returns name of parent if 'field' is a child, None otherwise """
371        return None
372
373
374class ArchA64(Arch):
375    def __init__(self):
376        self.sc_perf_evt_open = 241
377        self.ioctl_numbers = IOCTL_NUMBERS
378        self.exit_reason_field = 'esr_ec'
379        self.exit_reasons = AARCH64_EXIT_REASONS
380
381    def debugfs_is_child(self, field):
382        """ Returns name of parent if 'field' is a child, None otherwise """
383        return None
384
385
386class ArchS390(Arch):
387    def __init__(self):
388        self.sc_perf_evt_open = 331
389        self.ioctl_numbers = IOCTL_NUMBERS
390        self.exit_reason_field = None
391        self.exit_reasons = None
392
393    def debugfs_is_child(self, field):
394        """ Returns name of parent if 'field' is a child, None otherwise """
395        if field.startswith('instruction_'):
396            return 'exit_instruction'
397
398
399ARCH = Arch.get_arch()
400
401
402class perf_event_attr(ctypes.Structure):
403    """Struct that holds the necessary data to set up a trace event.
404
405    For an extensive explanation see perf_event_open(2) and
406    include/uapi/linux/perf_event.h, struct perf_event_attr
407
408    All fields that are not initialized in the constructor are 0.
409
410    """
411    _fields_ = [('type', ctypes.c_uint32),
412                ('size', ctypes.c_uint32),
413                ('config', ctypes.c_uint64),
414                ('sample_freq', ctypes.c_uint64),
415                ('sample_type', ctypes.c_uint64),
416                ('read_format', ctypes.c_uint64),
417                ('flags', ctypes.c_uint64),
418                ('wakeup_events', ctypes.c_uint32),
419                ('bp_type', ctypes.c_uint32),
420                ('bp_addr', ctypes.c_uint64),
421                ('bp_len', ctypes.c_uint64),
422                ]
423
424    def __init__(self):
425        super(self.__class__, self).__init__()
426        self.type = PERF_TYPE_TRACEPOINT
427        self.size = ctypes.sizeof(self)
428        self.read_format = PERF_FORMAT_GROUP
429
430
431PERF_TYPE_TRACEPOINT = 2
432PERF_FORMAT_GROUP = 1 << 3
433
434
435class Group(object):
436    """Represents a perf event group."""
437
438    def __init__(self):
439        self.events = []
440
441    def add_event(self, event):
442        self.events.append(event)
443
444    def read(self):
445        """Returns a dict with 'event name: value' for all events in the
446        group.
447
448        Values are read by reading from the file descriptor of the
449        event that is the group leader. See perf_event_open(2) for
450        details.
451
452        Read format for the used event configuration is:
453        struct read_format {
454            u64 nr; /* The number of events */
455            struct {
456                u64 value; /* The value of the event */
457            } values[nr];
458        };
459
460        """
461        length = 8 * (1 + len(self.events))
462        read_format = 'xxxxxxxx' + 'Q' * len(self.events)
463        return dict(zip([event.name for event in self.events],
464                        struct.unpack(read_format,
465                                      os.read(self.events[0].fd, length))))
466
467
468class Event(object):
469    """Represents a performance event and manages its life cycle."""
470    def __init__(self, name, group, trace_cpu, trace_pid, trace_point,
471                 trace_filter, trace_set='kvm'):
472        self.libc = ctypes.CDLL('libc.so.6', use_errno=True)
473        self.syscall = self.libc.syscall
474        self.name = name
475        self.fd = None
476        self._setup_event(group, trace_cpu, trace_pid, trace_point,
477                          trace_filter, trace_set)
478
479    def __del__(self):
480        """Closes the event's file descriptor.
481
482        As no python file object was created for the file descriptor,
483        python will not reference count the descriptor and will not
484        close it itself automatically, so we do it.
485
486        """
487        if self.fd:
488            os.close(self.fd)
489
490    def _perf_event_open(self, attr, pid, cpu, group_fd, flags):
491        """Wrapper for the sys_perf_evt_open() syscall.
492
493        Used to set up performance events, returns a file descriptor or -1
494        on error.
495
496        Attributes are:
497        - syscall number
498        - struct perf_event_attr *
499        - pid or -1 to monitor all pids
500        - cpu number or -1 to monitor all cpus
501        - The file descriptor of the group leader or -1 to create a group.
502        - flags
503
504        """
505        return self.syscall(ARCH.sc_perf_evt_open, ctypes.pointer(attr),
506                            ctypes.c_int(pid), ctypes.c_int(cpu),
507                            ctypes.c_int(group_fd), ctypes.c_long(flags))
508
509    def _setup_event_attribute(self, trace_set, trace_point):
510        """Returns an initialized ctype perf_event_attr struct."""
511
512        id_path = os.path.join(PATH_DEBUGFS_TRACING, 'events', trace_set,
513                               trace_point, 'id')
514
515        event_attr = perf_event_attr()
516        event_attr.config = int(open(id_path).read())
517        return event_attr
518
519    def _setup_event(self, group, trace_cpu, trace_pid, trace_point,
520                     trace_filter, trace_set):
521        """Sets up the perf event in Linux.
522
523        Issues the syscall to register the event in the kernel and
524        then sets the optional filter.
525
526        """
527
528        event_attr = self._setup_event_attribute(trace_set, trace_point)
529
530        # First event will be group leader.
531        group_leader = -1
532
533        # All others have to pass the leader's descriptor instead.
534        if group.events:
535            group_leader = group.events[0].fd
536
537        fd = self._perf_event_open(event_attr, trace_pid,
538                                   trace_cpu, group_leader, 0)
539        if fd == -1:
540            err = ctypes.get_errno()
541            raise OSError(err, os.strerror(err),
542                          'while calling sys_perf_event_open().')
543
544        if trace_filter:
545            fcntl.ioctl(fd, ARCH.ioctl_numbers['SET_FILTER'],
546                        trace_filter)
547
548        self.fd = fd
549
550    def enable(self):
551        """Enables the trace event in the kernel.
552
553        Enabling the group leader makes reading counters from it and the
554        events under it possible.
555
556        """
557        fcntl.ioctl(self.fd, ARCH.ioctl_numbers['ENABLE'], 0)
558
559    def disable(self):
560        """Disables the trace event in the kernel.
561
562        Disabling the group leader makes reading all counters under it
563        impossible.
564
565        """
566        fcntl.ioctl(self.fd, ARCH.ioctl_numbers['DISABLE'], 0)
567
568    def reset(self):
569        """Resets the count of the trace event in the kernel."""
570        fcntl.ioctl(self.fd, ARCH.ioctl_numbers['RESET'], 0)
571
572
573class Provider(object):
574    """Encapsulates functionalities used by all providers."""
575    def __init__(self, pid):
576        self.child_events = False
577        self.pid = pid
578
579    @staticmethod
580    def is_field_wanted(fields_filter, field):
581        """Indicate whether field is valid according to fields_filter."""
582        if not fields_filter:
583            return True
584        return re.match(fields_filter, field) is not None
585
586    @staticmethod
587    def walkdir(path):
588        """Returns os.walk() data for specified directory.
589
590        As it is only a wrapper it returns the same 3-tuple of (dirpath,
591        dirnames, filenames).
592        """
593        return next(os.walk(path))
594
595
596class TracepointProvider(Provider):
597    """Data provider for the stats class.
598
599    Manages the events/groups from which it acquires its data.
600
601    """
602    def __init__(self, pid, fields_filter):
603        self.group_leaders = []
604        self.filters = self._get_filters()
605        self.update_fields(fields_filter)
606        super(TracepointProvider, self).__init__(pid)
607
608    @staticmethod
609    def _get_filters():
610        """Returns a dict of trace events, their filter ids and
611        the values that can be filtered.
612
613        Trace events can be filtered for special values by setting a
614        filter string via an ioctl. The string normally has the format
615        identifier==value. For each filter a new event will be created, to
616        be able to distinguish the events.
617
618        """
619        filters = {}
620        filters['kvm_userspace_exit'] = ('reason', USERSPACE_EXIT_REASONS)
621        if ARCH.exit_reason_field and ARCH.exit_reasons:
622            filters['kvm_exit'] = (ARCH.exit_reason_field, ARCH.exit_reasons)
623        return filters
624
625    def _get_available_fields(self):
626        """Returns a list of available events of format 'event name(filter
627        name)'.
628
629        All available events have directories under
630        /sys/kernel/debug/tracing/events/ which export information
631        about the specific event. Therefore, listing the dirs gives us
632        a list of all available events.
633
634        Some events like the vm exit reasons can be filtered for
635        specific values. To take account for that, the routine below
636        creates special fields with the following format:
637        event name(filter name)
638
639        """
640        path = os.path.join(PATH_DEBUGFS_TRACING, 'events', 'kvm')
641        fields = self.walkdir(path)[1]
642        extra = []
643        for field in fields:
644            if field in self.filters:
645                filter_name_, filter_dicts = self.filters[field]
646                for name in filter_dicts:
647                    extra.append(field + '(' + name + ')')
648        fields += extra
649        return fields
650
651    def update_fields(self, fields_filter):
652        """Refresh fields, applying fields_filter"""
653        self.fields = [field for field in self._get_available_fields()
654                       if self.is_field_wanted(fields_filter, field)]
655        # add parents for child fields - otherwise we won't see any output!
656        for field in self._fields:
657            parent = ARCH.tracepoint_is_child(field)
658            if (parent and parent not in self._fields):
659                self.fields.append(parent)
660
661    @staticmethod
662    def _get_online_cpus():
663        """Returns a list of cpu id integers."""
664        def parse_int_list(list_string):
665            """Returns an int list from a string of comma separated integers and
666            integer ranges."""
667            integers = []
668            members = list_string.split(',')
669
670            for member in members:
671                if '-' not in member:
672                    integers.append(int(member))
673                else:
674                    int_range = member.split('-')
675                    integers.extend(range(int(int_range[0]),
676                                          int(int_range[1]) + 1))
677
678            return integers
679
680        with open('/sys/devices/system/cpu/online') as cpu_list:
681            cpu_string = cpu_list.readline()
682            return parse_int_list(cpu_string)
683
684    def _setup_traces(self):
685        """Creates all event and group objects needed to be able to retrieve
686        data."""
687        fields = self._get_available_fields()
688        if self._pid > 0:
689            # Fetch list of all threads of the monitored pid, as qemu
690            # starts a thread for each vcpu.
691            path = os.path.join('/proc', str(self._pid), 'task')
692            groupids = self.walkdir(path)[1]
693        else:
694            groupids = self._get_online_cpus()
695
696        # The constant is needed as a buffer for python libs, std
697        # streams and other files that the script opens.
698        newlim = len(groupids) * len(fields) + 50
699        try:
700            softlim_, hardlim = resource.getrlimit(resource.RLIMIT_NOFILE)
701
702            if hardlim < newlim:
703                # Now we need CAP_SYS_RESOURCE, to increase the hard limit.
704                resource.setrlimit(resource.RLIMIT_NOFILE, (newlim, newlim))
705            else:
706                # Raising the soft limit is sufficient.
707                resource.setrlimit(resource.RLIMIT_NOFILE, (newlim, hardlim))
708
709        except ValueError:
710            sys.exit("NOFILE rlimit could not be raised to {0}".format(newlim))
711
712        for groupid in groupids:
713            group = Group()
714            for name in fields:
715                tracepoint = name
716                tracefilter = None
717                match = re.match(r'(.*)\((.*)\)', name)
718                if match:
719                    tracepoint, sub = match.groups()
720                    tracefilter = ('%s==%d\0' %
721                                   (self.filters[tracepoint][0],
722                                    self.filters[tracepoint][1][sub]))
723
724                # From perf_event_open(2):
725                # pid > 0 and cpu == -1
726                # This measures the specified process/thread on any CPU.
727                #
728                # pid == -1 and cpu >= 0
729                # This measures all processes/threads on the specified CPU.
730                trace_cpu = groupid if self._pid == 0 else -1
731                trace_pid = int(groupid) if self._pid != 0 else -1
732
733                group.add_event(Event(name=name,
734                                      group=group,
735                                      trace_cpu=trace_cpu,
736                                      trace_pid=trace_pid,
737                                      trace_point=tracepoint,
738                                      trace_filter=tracefilter))
739
740            self.group_leaders.append(group)
741
742    @property
743    def fields(self):
744        return self._fields
745
746    @fields.setter
747    def fields(self, fields):
748        """Enables/disables the (un)wanted events"""
749        self._fields = fields
750        for group in self.group_leaders:
751            for index, event in enumerate(group.events):
752                if event.name in fields:
753                    event.reset()
754                    event.enable()
755                else:
756                    # Do not disable the group leader.
757                    # It would disable all of its events.
758                    if index != 0:
759                        event.disable()
760
761    @property
762    def pid(self):
763        return self._pid
764
765    @pid.setter
766    def pid(self, pid):
767        """Changes the monitored pid by setting new traces."""
768        self._pid = pid
769        # The garbage collector will get rid of all Event/Group
770        # objects and open files after removing the references.
771        self.group_leaders = []
772        self._setup_traces()
773        self.fields = self._fields
774
775    def read(self, by_guest=0):
776        """Returns 'event name: current value' for all enabled events."""
777        ret = defaultdict(int)
778        for group in self.group_leaders:
779            for name, val in group.read().items():
780                if name not in self._fields:
781                    continue
782                parent = ARCH.tracepoint_is_child(name)
783                if parent:
784                    name += ' ' + parent
785                ret[name] += val
786        return ret
787
788    def reset(self):
789        """Reset all field counters"""
790        for group in self.group_leaders:
791            for event in group.events:
792                event.reset()
793
794
795class DebugfsProvider(Provider):
796    """Provides data from the files that KVM creates in the kvm debugfs
797    folder."""
798    def __init__(self, pid, fields_filter, include_past):
799        self.update_fields(fields_filter)
800        self._baseline = {}
801        self.do_read = True
802        self.paths = []
803        super(DebugfsProvider, self).__init__(pid)
804        if include_past:
805            self._restore()
806
807    def _get_available_fields(self):
808        """"Returns a list of available fields.
809
810        The fields are all available KVM debugfs files
811
812        """
813        exempt_list = ['halt_poll_fail_ns', 'halt_poll_success_ns', 'halt_wait_ns']
814        fields = [field for field in self.walkdir(PATH_DEBUGFS_KVM)[2]
815                  if field not in exempt_list]
816
817        return fields
818
819    def update_fields(self, fields_filter):
820        """Refresh fields, applying fields_filter"""
821        self._fields = [field for field in self._get_available_fields()
822                        if self.is_field_wanted(fields_filter, field)]
823        # add parents for child fields - otherwise we won't see any output!
824        for field in self._fields:
825            parent = ARCH.debugfs_is_child(field)
826            if (parent and parent not in self._fields):
827                self.fields.append(parent)
828
829    @property
830    def fields(self):
831        return self._fields
832
833    @fields.setter
834    def fields(self, fields):
835        self._fields = fields
836        self.reset()
837
838    @property
839    def pid(self):
840        return self._pid
841
842    @pid.setter
843    def pid(self, pid):
844        self._pid = pid
845        if pid != 0:
846            vms = self.walkdir(PATH_DEBUGFS_KVM)[1]
847            if len(vms) == 0:
848                self.do_read = False
849
850            self.paths = list(filter(lambda x: "{}-".format(pid) in x, vms))
851
852        else:
853            self.paths = []
854            self.do_read = True
855
856    def _verify_paths(self):
857        """Remove invalid paths"""
858        for path in self.paths:
859            if not os.path.exists(os.path.join(PATH_DEBUGFS_KVM, path)):
860                self.paths.remove(path)
861                continue
862
863    def read(self, reset=0, by_guest=0):
864        """Returns a dict with format:'file name / field -> current value'.
865
866        Parameter 'reset':
867          0   plain read
868          1   reset field counts to 0
869          2   restore the original field counts
870
871        """
872        results = {}
873
874        # If no debugfs filtering support is available, then don't read.
875        if not self.do_read:
876            return results
877        self._verify_paths()
878
879        paths = self.paths
880        if self._pid == 0:
881            paths = []
882            for entry in os.walk(PATH_DEBUGFS_KVM):
883                for dir in entry[1]:
884                    paths.append(dir)
885        for path in paths:
886            for field in self._fields:
887                value = self._read_field(field, path)
888                key = path + field
889                if reset == 1:
890                    self._baseline[key] = value
891                if reset == 2:
892                    self._baseline[key] = 0
893                if self._baseline.get(key, -1) == -1:
894                    self._baseline[key] = value
895                parent = ARCH.debugfs_is_child(field)
896                if parent:
897                    field = field + ' ' + parent
898                else:
899                    if by_guest:
900                        field = key.split('-')[0]    # set 'field' to 'pid'
901                increment = value - self._baseline.get(key, 0)
902                if field in results:
903                    results[field] += increment
904                else:
905                    results[field] = increment
906
907        return results
908
909    def _read_field(self, field, path):
910        """Returns the value of a single field from a specific VM."""
911        try:
912            return int(open(os.path.join(PATH_DEBUGFS_KVM,
913                                         path,
914                                         field))
915                       .read())
916        except IOError:
917            return 0
918
919    def reset(self):
920        """Reset field counters"""
921        self._baseline = {}
922        self.read(1)
923
924    def _restore(self):
925        """Reset field counters"""
926        self._baseline = {}
927        self.read(2)
928
929
930EventStat = namedtuple('EventStat', ['value', 'delta'])
931
932
933class Stats(object):
934    """Manages the data providers and the data they provide.
935
936    It is used to set filters on the provider's data and collect all
937    provider data.
938
939    """
940    def __init__(self, options):
941        self.providers = self._get_providers(options)
942        self._pid_filter = options.pid
943        self._fields_filter = options.fields
944        self.values = {}
945        self._child_events = False
946
947    def _get_providers(self, options):
948        """Returns a list of data providers depending on the passed options."""
949        providers = []
950
951        if options.debugfs:
952            providers.append(DebugfsProvider(options.pid, options.fields,
953                                             options.debugfs_include_past))
954        if options.tracepoints or not providers:
955            providers.append(TracepointProvider(options.pid, options.fields))
956
957        return providers
958
959    def _update_provider_filters(self):
960        """Propagates fields filters to providers."""
961        # As we reset the counters when updating the fields we can
962        # also clear the cache of old values.
963        self.values = {}
964        for provider in self.providers:
965            provider.update_fields(self._fields_filter)
966
967    def reset(self):
968        self.values = {}
969        for provider in self.providers:
970            provider.reset()
971
972    @property
973    def fields_filter(self):
974        return self._fields_filter
975
976    @fields_filter.setter
977    def fields_filter(self, fields_filter):
978        if fields_filter != self._fields_filter:
979            self._fields_filter = fields_filter
980            self._update_provider_filters()
981
982    @property
983    def pid_filter(self):
984        return self._pid_filter
985
986    @pid_filter.setter
987    def pid_filter(self, pid):
988        if pid != self._pid_filter:
989            self._pid_filter = pid
990            self.values = {}
991            for provider in self.providers:
992                provider.pid = self._pid_filter
993
994    @property
995    def child_events(self):
996        return self._child_events
997
998    @child_events.setter
999    def child_events(self, val):
1000        self._child_events = val
1001        for provider in self.providers:
1002            provider.child_events = val
1003
1004    def get(self, by_guest=0):
1005        """Returns a dict with field -> (value, delta to last value) of all
1006        provider data.
1007        Key formats:
1008          * plain: 'key' is event name
1009          * child-parent: 'key' is in format '<child> <parent>'
1010          * pid: 'key' is the pid of the guest, and the record contains the
1011               aggregated event data
1012        These formats are generated by the providers, and handled in class TUI.
1013        """
1014        for provider in self.providers:
1015            new = provider.read(by_guest=by_guest)
1016            for key in new:
1017                oldval = self.values.get(key, EventStat(0, 0)).value
1018                newval = new.get(key, 0)
1019                newdelta = newval - oldval
1020                self.values[key] = EventStat(newval, newdelta)
1021        return self.values
1022
1023    def toggle_display_guests(self, to_pid):
1024        """Toggle between collection of stats by individual event and by
1025        guest pid
1026
1027        Events reported by DebugfsProvider change when switching to/from
1028        reading by guest values. Hence we have to remove the excess event
1029        names from self.values.
1030
1031        """
1032        if any(isinstance(ins, TracepointProvider) for ins in self.providers):
1033            return 1
1034        if to_pid:
1035            for provider in self.providers:
1036                if isinstance(provider, DebugfsProvider):
1037                    for key in provider.fields:
1038                        if key in self.values.keys():
1039                            del self.values[key]
1040        else:
1041            oldvals = self.values.copy()
1042            for key in oldvals:
1043                if key.isdigit():
1044                    del self.values[key]
1045        # Update oldval (see get())
1046        self.get(to_pid)
1047        return 0
1048
1049
1050DELAY_DEFAULT = 3.0
1051MAX_GUEST_NAME_LEN = 48
1052MAX_REGEX_LEN = 44
1053SORT_DEFAULT = 0
1054MIN_DELAY = 0.1
1055MAX_DELAY = 25.5
1056
1057
1058class Tui(object):
1059    """Instruments curses to draw a nice text ui."""
1060    def __init__(self, stats, opts):
1061        self.stats = stats
1062        self.screen = None
1063        self._delay_initial = 0.25
1064        self._delay_regular = opts.set_delay
1065        self._sorting = SORT_DEFAULT
1066        self._display_guests = 0
1067
1068    def __enter__(self):
1069        """Initialises curses for later use.  Based on curses.wrapper
1070           implementation from the Python standard library."""
1071        self.screen = curses.initscr()
1072        curses.noecho()
1073        curses.cbreak()
1074
1075        # The try/catch works around a minor bit of
1076        # over-conscientiousness in the curses module, the error
1077        # return from C start_color() is ignorable.
1078        try:
1079            curses.start_color()
1080        except curses.error:
1081            pass
1082
1083        # Hide cursor in extra statement as some monochrome terminals
1084        # might support hiding but not colors.
1085        try:
1086            curses.curs_set(0)
1087        except curses.error:
1088            pass
1089
1090        curses.use_default_colors()
1091        return self
1092
1093    def __exit__(self, *exception):
1094        """Resets the terminal to its normal state.  Based on curses.wrapper
1095           implementation from the Python standard library."""
1096        if self.screen:
1097            self.screen.keypad(0)
1098            curses.echo()
1099            curses.nocbreak()
1100            curses.endwin()
1101
1102    @staticmethod
1103    def get_all_gnames():
1104        """Returns a list of (pid, gname) tuples of all running guests"""
1105        res = []
1106        try:
1107            child = subprocess.Popen(['ps', '-A', '--format', 'pid,args'],
1108                                     stdout=subprocess.PIPE)
1109        except:
1110            raise Exception
1111        for line in child.stdout:
1112            line = line.decode(ENCODING).lstrip().split(' ', 1)
1113            # perform a sanity check before calling the more expensive
1114            # function to possibly extract the guest name
1115            if ' -name ' in line[1]:
1116                res.append((line[0], Tui.get_gname_from_pid(line[0])))
1117        child.stdout.close()
1118
1119        return res
1120
1121    def _print_all_gnames(self, row):
1122        """Print a list of all running guests along with their pids."""
1123        self.screen.addstr(row, 2, '%8s  %-60s' %
1124                           ('Pid', 'Guest Name (fuzzy list, might be '
1125                            'inaccurate!)'),
1126                           curses.A_UNDERLINE)
1127        row += 1
1128        try:
1129            for line in self.get_all_gnames():
1130                self.screen.addstr(row, 2, '%8s  %-60s' % (line[0], line[1]))
1131                row += 1
1132                if row >= self.screen.getmaxyx()[0]:
1133                    break
1134        except Exception:
1135            self.screen.addstr(row + 1, 2, 'Not available')
1136
1137    @staticmethod
1138    def get_pid_from_gname(gname):
1139        """Fuzzy function to convert guest name to QEMU process pid.
1140
1141        Returns a list of potential pids, can be empty if no match found.
1142        Throws an exception on processing errors.
1143
1144        """
1145        pids = []
1146        for line in Tui.get_all_gnames():
1147            if gname == line[1]:
1148                pids.append(int(line[0]))
1149
1150        return pids
1151
1152    @staticmethod
1153    def get_gname_from_pid(pid):
1154        """Returns the guest name for a QEMU process pid.
1155
1156        Extracts the guest name from the QEMU comma line by processing the
1157        '-name' option. Will also handle names specified out of sequence.
1158
1159        """
1160        name = ''
1161        try:
1162            line = open('/proc/{}/cmdline'
1163                        .format(pid), 'r').read().split('\0')
1164            parms = line[line.index('-name') + 1].split(',')
1165            while '' in parms:
1166                # commas are escaped (i.e. ',,'), hence e.g. 'foo,bar' results
1167                # in # ['foo', '', 'bar'], which we revert here
1168                idx = parms.index('')
1169                parms[idx - 1] += ',' + parms[idx + 1]
1170                del parms[idx:idx+2]
1171            # the '-name' switch allows for two ways to specify the guest name,
1172            # where the plain name overrides the name specified via 'guest='
1173            for arg in parms:
1174                if '=' not in arg:
1175                    name = arg
1176                    break
1177                if arg[:6] == 'guest=':
1178                    name = arg[6:]
1179        except (ValueError, IOError, IndexError):
1180            pass
1181
1182        return name
1183
1184    def _update_pid(self, pid):
1185        """Propagates pid selection to stats object."""
1186        self.screen.addstr(4, 1, 'Updating pid filter...')
1187        self.screen.refresh()
1188        self.stats.pid_filter = pid
1189
1190    def _refresh_header(self, pid=None):
1191        """Refreshes the header."""
1192        if pid is None:
1193            pid = self.stats.pid_filter
1194        self.screen.erase()
1195        gname = self.get_gname_from_pid(pid)
1196        self._gname = gname
1197        if gname:
1198            gname = ('({})'.format(gname[:MAX_GUEST_NAME_LEN] + '...'
1199                                   if len(gname) > MAX_GUEST_NAME_LEN
1200                                   else gname))
1201        if pid > 0:
1202            self._headline = 'kvm statistics - pid {0} {1}'.format(pid, gname)
1203        else:
1204            self._headline = 'kvm statistics - summary'
1205        self.screen.addstr(0, 0, self._headline, curses.A_BOLD)
1206        if self.stats.fields_filter:
1207            regex = self.stats.fields_filter
1208            if len(regex) > MAX_REGEX_LEN:
1209                regex = regex[:MAX_REGEX_LEN] + '...'
1210            self.screen.addstr(1, 17, 'regex filter: {0}'.format(regex))
1211        if self._display_guests:
1212            col_name = 'Guest Name'
1213        else:
1214            col_name = 'Event'
1215        self.screen.addstr(2, 1, '%-40s %10s%7s %8s' %
1216                           (col_name, 'Total', '%Total', 'CurAvg/s'),
1217                           curses.A_STANDOUT)
1218        self.screen.addstr(4, 1, 'Collecting data...')
1219        self.screen.refresh()
1220
1221    def _refresh_body(self, sleeptime):
1222        def insert_child(sorted_items, child, values, parent):
1223            num = len(sorted_items)
1224            for i in range(0, num):
1225                # only add child if parent is present
1226                if parent.startswith(sorted_items[i][0]):
1227                    sorted_items.insert(i + 1, ('  ' + child, values))
1228
1229        def get_sorted_events(self, stats):
1230            """ separate parent and child events """
1231            if self._sorting == SORT_DEFAULT:
1232                def sortkey(pair):
1233                    # sort by (delta value, overall value)
1234                    v = pair[1]
1235                    return (v.delta, v.value)
1236            else:
1237                def sortkey(pair):
1238                    # sort by overall value
1239                    v = pair[1]
1240                    return v.value
1241
1242            childs = []
1243            sorted_items = []
1244            # we can't rule out child events to appear prior to parents even
1245            # when sorted - separate out all children first, and add in later
1246            for key, values in sorted(stats.items(), key=sortkey,
1247                                      reverse=True):
1248                if values == (0, 0):
1249                    continue
1250                if key.find(' ') != -1:
1251                    if not self.stats.child_events:
1252                        continue
1253                    childs.insert(0, (key, values))
1254                else:
1255                    sorted_items.append((key, values))
1256            if self.stats.child_events:
1257                for key, values in childs:
1258                    (child, parent) = key.split(' ')
1259                    insert_child(sorted_items, child, values, parent)
1260
1261            return sorted_items
1262
1263        if not self._is_running_guest(self.stats.pid_filter):
1264            if self._gname:
1265                try:  # ...to identify the guest by name in case it's back
1266                    pids = self.get_pid_from_gname(self._gname)
1267                    if len(pids) == 1:
1268                        self._refresh_header(pids[0])
1269                        self._update_pid(pids[0])
1270                        return
1271                except:
1272                    pass
1273            self._display_guest_dead()
1274            # leave final data on screen
1275            return
1276        row = 3
1277        self.screen.move(row, 0)
1278        self.screen.clrtobot()
1279        stats = self.stats.get(self._display_guests)
1280        total = 0.
1281        ctotal = 0.
1282        for key, values in stats.items():
1283            if self._display_guests:
1284                if self.get_gname_from_pid(key):
1285                    total += values.value
1286                continue
1287            if not key.find(' ') != -1:
1288                total += values.value
1289            else:
1290                ctotal += values.value
1291        if total == 0.:
1292            # we don't have any fields, or all non-child events are filtered
1293            total = ctotal
1294
1295        # print events
1296        tavg = 0
1297        tcur = 0
1298        guest_removed = False
1299        for key, values in get_sorted_events(self, stats):
1300            if row >= self.screen.getmaxyx()[0] - 1 or values == (0, 0):
1301                break
1302            if self._display_guests:
1303                key = self.get_gname_from_pid(key)
1304                if not key:
1305                    continue
1306            cur = int(round(values.delta / sleeptime)) if values.delta else 0
1307            if cur < 0:
1308                guest_removed = True
1309                continue
1310            if key[0] != ' ':
1311                if values.delta:
1312                    tcur += values.delta
1313                ptotal = values.value
1314                ltotal = total
1315            else:
1316                ltotal = ptotal
1317            self.screen.addstr(row, 1, '%-40s %10d%7.1f %8s' % (key,
1318                               values.value,
1319                               values.value * 100 / float(ltotal), cur))
1320            row += 1
1321        if row == 3:
1322            if guest_removed:
1323                self.screen.addstr(4, 1, 'Guest removed, updating...')
1324            else:
1325                self.screen.addstr(4, 1, 'No matching events reported yet')
1326        if row > 4:
1327            tavg = int(round(tcur / sleeptime)) if tcur > 0 else ''
1328            self.screen.addstr(row, 1, '%-40s %10d        %8s' %
1329                               ('Total', total, tavg), curses.A_BOLD)
1330        self.screen.refresh()
1331
1332    def _display_guest_dead(self):
1333        marker = '   Guest is DEAD   '
1334        y = min(len(self._headline), 80 - len(marker))
1335        self.screen.addstr(0, y, marker, curses.A_BLINK | curses.A_STANDOUT)
1336
1337    def _show_msg(self, text):
1338        """Display message centered text and exit on key press"""
1339        hint = 'Press any key to continue'
1340        curses.cbreak()
1341        self.screen.erase()
1342        (x, term_width) = self.screen.getmaxyx()
1343        row = 2
1344        for line in text:
1345            start = (term_width - len(line)) // 2
1346            self.screen.addstr(row, start, line)
1347            row += 1
1348        self.screen.addstr(row + 1, (term_width - len(hint)) // 2, hint,
1349                           curses.A_STANDOUT)
1350        self.screen.getkey()
1351
1352    def _show_help_interactive(self):
1353        """Display help with list of interactive commands"""
1354        msg = ('   b     toggle events by guests (debugfs only, honors'
1355               ' filters)',
1356               '   c     clear filter',
1357               '   f     filter by regular expression',
1358               '   g     filter by guest name/PID',
1359               '   h     display interactive commands reference',
1360               '   o     toggle sorting order (Total vs CurAvg/s)',
1361               '   p     filter by guest name/PID',
1362               '   q     quit',
1363               '   r     reset stats',
1364               '   s     set delay between refreshs (value range: '
1365               '%s-%s secs)' % (MIN_DELAY, MAX_DELAY),
1366               '   x     toggle reporting of stats for individual child trace'
1367               ' events',
1368               'Any other key refreshes statistics immediately')
1369        curses.cbreak()
1370        self.screen.erase()
1371        self.screen.addstr(0, 0, "Interactive commands reference",
1372                           curses.A_BOLD)
1373        self.screen.addstr(2, 0, "Press any key to exit", curses.A_STANDOUT)
1374        row = 4
1375        for line in msg:
1376            self.screen.addstr(row, 0, line)
1377            row += 1
1378        self.screen.getkey()
1379        self._refresh_header()
1380
1381    def _show_filter_selection(self):
1382        """Draws filter selection mask.
1383
1384        Asks for a valid regex and sets the fields filter accordingly.
1385
1386        """
1387        msg = ''
1388        while True:
1389            self.screen.erase()
1390            self.screen.addstr(0, 0,
1391                               "Show statistics for events matching a regex.",
1392                               curses.A_BOLD)
1393            self.screen.addstr(2, 0,
1394                               "Current regex: {0}"
1395                               .format(self.stats.fields_filter))
1396            self.screen.addstr(5, 0, msg)
1397            self.screen.addstr(3, 0, "New regex: ")
1398            curses.echo()
1399            regex = self.screen.getstr().decode(ENCODING)
1400            curses.noecho()
1401            if len(regex) == 0:
1402                self.stats.fields_filter = ''
1403                self._refresh_header()
1404                return
1405            try:
1406                re.compile(regex)
1407                self.stats.fields_filter = regex
1408                self._refresh_header()
1409                return
1410            except re.error:
1411                msg = '"' + regex + '": Not a valid regular expression'
1412                continue
1413
1414    def _show_set_update_interval(self):
1415        """Draws update interval selection mask."""
1416        msg = ''
1417        while True:
1418            self.screen.erase()
1419            self.screen.addstr(0, 0, 'Set update interval (defaults to %.1fs).'
1420                               % DELAY_DEFAULT, curses.A_BOLD)
1421            self.screen.addstr(4, 0, msg)
1422            self.screen.addstr(2, 0, 'Change delay from %.1fs to ' %
1423                               self._delay_regular)
1424            curses.echo()
1425            val = self.screen.getstr().decode(ENCODING)
1426            curses.noecho()
1427
1428            try:
1429                if len(val) > 0:
1430                    delay = float(val)
1431                    err = is_delay_valid(delay)
1432                    if err is not None:
1433                        msg = err
1434                        continue
1435                else:
1436                    delay = DELAY_DEFAULT
1437                self._delay_regular = delay
1438                break
1439
1440            except ValueError:
1441                msg = '"' + str(val) + '": Invalid value'
1442        self._refresh_header()
1443
1444    def _is_running_guest(self, pid):
1445        """Check if pid is still a running process."""
1446        if not pid:
1447            return True
1448        return os.path.isdir(os.path.join('/proc/', str(pid)))
1449
1450    def _show_vm_selection_by_guest(self):
1451        """Draws guest selection mask.
1452
1453        Asks for a guest name or pid until a valid guest name or '' is entered.
1454
1455        """
1456        msg = ''
1457        while True:
1458            self.screen.erase()
1459            self.screen.addstr(0, 0,
1460                               'Show statistics for specific guest or pid.',
1461                               curses.A_BOLD)
1462            self.screen.addstr(1, 0,
1463                               'This might limit the shown data to the trace '
1464                               'statistics.')
1465            self.screen.addstr(5, 0, msg)
1466            self._print_all_gnames(7)
1467            curses.echo()
1468            curses.curs_set(1)
1469            self.screen.addstr(3, 0, "Guest or pid [ENTER exits]: ")
1470            guest = self.screen.getstr().decode(ENCODING)
1471            curses.noecho()
1472
1473            pid = 0
1474            if not guest or guest == '0':
1475                break
1476            if guest.isdigit():
1477                if not self._is_running_guest(guest):
1478                    msg = '"' + guest + '": Not a running process'
1479                    continue
1480                pid = int(guest)
1481                break
1482            pids = []
1483            try:
1484                pids = self.get_pid_from_gname(guest)
1485            except:
1486                msg = '"' + guest + '": Internal error while searching, ' \
1487                      'use pid filter instead'
1488                continue
1489            if len(pids) == 0:
1490                msg = '"' + guest + '": Not an active guest'
1491                continue
1492            if len(pids) > 1:
1493                msg = '"' + guest + '": Multiple matches found, use pid ' \
1494                      'filter instead'
1495                continue
1496            pid = pids[0]
1497            break
1498        curses.curs_set(0)
1499        self._refresh_header(pid)
1500        self._update_pid(pid)
1501
1502    def show_stats(self):
1503        """Refreshes the screen and processes user input."""
1504        sleeptime = self._delay_initial
1505        self._refresh_header()
1506        start = 0.0  # result based on init value never appears on screen
1507        while True:
1508            self._refresh_body(time.time() - start)
1509            curses.halfdelay(int(sleeptime * 10))
1510            start = time.time()
1511            sleeptime = self._delay_regular
1512            try:
1513                char = self.screen.getkey()
1514                if char == 'b':
1515                    self._display_guests = not self._display_guests
1516                    if self.stats.toggle_display_guests(self._display_guests):
1517                        self._show_msg(['Command not available with '
1518                                        'tracepoints enabled', 'Restart with '
1519                                        'debugfs only (see option \'-d\') and '
1520                                        'try again!'])
1521                        self._display_guests = not self._display_guests
1522                    self._refresh_header()
1523                if char == 'c':
1524                    self.stats.fields_filter = ''
1525                    self._refresh_header(0)
1526                    self._update_pid(0)
1527                if char == 'f':
1528                    curses.curs_set(1)
1529                    self._show_filter_selection()
1530                    curses.curs_set(0)
1531                    sleeptime = self._delay_initial
1532                if char == 'g' or char == 'p':
1533                    self._show_vm_selection_by_guest()
1534                    sleeptime = self._delay_initial
1535                if char == 'h':
1536                    self._show_help_interactive()
1537                if char == 'o':
1538                    self._sorting = not self._sorting
1539                if char == 'q':
1540                    break
1541                if char == 'r':
1542                    self.stats.reset()
1543                if char == 's':
1544                    curses.curs_set(1)
1545                    self._show_set_update_interval()
1546                    curses.curs_set(0)
1547                    sleeptime = self._delay_initial
1548                if char == 'x':
1549                    self.stats.child_events = not self.stats.child_events
1550            except KeyboardInterrupt:
1551                break
1552            except curses.error:
1553                continue
1554
1555
1556def batch(stats):
1557    """Prints statistics in a key, value format."""
1558    try:
1559        s = stats.get()
1560        time.sleep(1)
1561        s = stats.get()
1562        for key, values in sorted(s.items()):
1563            print('%-42s%10d%10d' % (key.split(' ')[0], values.value,
1564                  values.delta))
1565    except KeyboardInterrupt:
1566        pass
1567
1568
1569class StdFormat(object):
1570    def __init__(self, keys):
1571        self._banner = ''
1572        for key in keys:
1573            self._banner += key.split(' ')[0] + ' '
1574
1575    def get_banner(self):
1576        return self._banner
1577
1578    def get_statline(self, keys, s):
1579        res = ''
1580        for key in keys:
1581            res += ' %9d' % s[key].delta
1582        return res
1583
1584
1585class CSVFormat(object):
1586    def __init__(self, keys):
1587        self._banner = 'timestamp'
1588        self._banner += reduce(lambda res, key: "{},{!s}".format(res,
1589                               key.split(' ')[0]), keys, '')
1590
1591    def get_banner(self):
1592        return self._banner
1593
1594    def get_statline(self, keys, s):
1595        return reduce(lambda res, key: "{},{!s}".format(res, s[key].delta),
1596                      keys, '')
1597
1598
1599def log(stats, opts, frmt, keys):
1600    """Prints statistics as reiterating key block, multiple value blocks."""
1601    global signal_received
1602    line = 0
1603    banner_repeat = 20
1604    f = None
1605
1606    def do_banner(opts):
1607        nonlocal f
1608        if opts.log_to_file:
1609            if not f:
1610                try:
1611                     f = open(opts.log_to_file, 'a')
1612                except (IOError, OSError):
1613                    sys.exit("Error: Could not open file: %s" %
1614                             opts.log_to_file)
1615                if isinstance(frmt, CSVFormat) and f.tell() != 0:
1616                    return
1617        print(frmt.get_banner(), file=f or sys.stdout)
1618
1619    def do_statline(opts, values):
1620        statline = datetime.now().strftime("%Y-%m-%d %H:%M:%S") + \
1621                   frmt.get_statline(keys, values)
1622        print(statline, file=f or sys.stdout)
1623
1624    do_banner(opts)
1625    banner_printed = True
1626    while True:
1627        try:
1628            time.sleep(opts.set_delay)
1629            if signal_received:
1630                banner_printed = True
1631                line = 0
1632                f.close()
1633                do_banner(opts)
1634                signal_received = False
1635            if (line % banner_repeat == 0 and not banner_printed and
1636                not (opts.log_to_file and isinstance(frmt, CSVFormat))):
1637                do_banner(opts)
1638                banner_printed = True
1639            values = stats.get()
1640            if (not opts.skip_zero_records or
1641                any(values[k].delta != 0 for k in keys)):
1642                do_statline(opts, values)
1643                line += 1
1644                banner_printed = False
1645        except KeyboardInterrupt:
1646            break
1647
1648    if opts.log_to_file:
1649        f.close()
1650
1651
1652def handle_signal(sig, frame):
1653    global signal_received
1654
1655    signal_received = True
1656
1657    return
1658
1659
1660def is_delay_valid(delay):
1661    """Verify delay is in valid value range."""
1662    msg = None
1663    if delay < MIN_DELAY:
1664        msg = '"' + str(delay) + '": Delay must be >=%s' % MIN_DELAY
1665    if delay > MAX_DELAY:
1666        msg = '"' + str(delay) + '": Delay must be <=%s' % MAX_DELAY
1667    return msg
1668
1669
1670def get_options():
1671    """Returns processed program arguments."""
1672    description_text = """
1673This script displays various statistics about VMs running under KVM.
1674The statistics are gathered from the KVM debugfs entries and / or the
1675currently available perf traces.
1676
1677The monitoring takes additional cpu cycles and might affect the VM's
1678performance.
1679
1680Requirements:
1681- Access to:
1682    %s
1683    %s/events/*
1684    /proc/pid/task
1685- /proc/sys/kernel/perf_event_paranoid < 1 if user has no
1686  CAP_SYS_ADMIN and perf events are used.
1687- CAP_SYS_RESOURCE if the hard limit is not high enough to allow
1688  the large number of files that are possibly opened.
1689
1690Interactive Commands:
1691   b     toggle events by guests (debugfs only, honors filters)
1692   c     clear filter
1693   f     filter by regular expression
1694   g     filter by guest name
1695   h     display interactive commands reference
1696   o     toggle sorting order (Total vs CurAvg/s)
1697   p     filter by PID
1698   q     quit
1699   r     reset stats
1700   s     set update interval (value range: 0.1-25.5 secs)
1701   x     toggle reporting of stats for individual child trace events
1702Press any other key to refresh statistics immediately.
1703""" % (PATH_DEBUGFS_KVM, PATH_DEBUGFS_TRACING)
1704
1705    class Guest_to_pid(argparse.Action):
1706        def __call__(self, parser, namespace, values, option_string=None):
1707            try:
1708                pids = Tui.get_pid_from_gname(values)
1709            except:
1710                sys.exit('Error while searching for guest "{}". Use "-p" to '
1711                         'specify a pid instead?'.format(values))
1712            if len(pids) == 0:
1713                sys.exit('Error: No guest by the name "{}" found'
1714                         .format(values))
1715            if len(pids) > 1:
1716                sys.exit('Error: Multiple processes found (pids: {}). Use "-p"'
1717                         ' to specify the desired pid'
1718                         .format(" ".join(map(str, pids))))
1719            namespace.pid = pids[0]
1720
1721    argparser = argparse.ArgumentParser(description=description_text,
1722                                        formatter_class=argparse
1723                                        .RawTextHelpFormatter)
1724    argparser.add_argument('-1', '--once', '--batch',
1725                           action='store_true',
1726                           default=False,
1727                           help='run in batch mode for one second',
1728                           )
1729    argparser.add_argument('-c', '--csv',
1730                           action='store_true',
1731                           default=False,
1732                           help='log in csv format - requires option -l/-L',
1733                           )
1734    argparser.add_argument('-d', '--debugfs',
1735                           action='store_true',
1736                           default=False,
1737                           help='retrieve statistics from debugfs',
1738                           )
1739    argparser.add_argument('-f', '--fields',
1740                           default='',
1741                           help='''fields to display (regex)
1742"-f help" for a list of available events''',
1743                           )
1744    argparser.add_argument('-g', '--guest',
1745                           type=str,
1746                           help='restrict statistics to guest by name',
1747                           action=Guest_to_pid,
1748                           )
1749    argparser.add_argument('-i', '--debugfs-include-past',
1750                           action='store_true',
1751                           default=False,
1752                           help='include all available data on past events for'
1753                                ' debugfs',
1754                           )
1755    argparser.add_argument('-l', '--log',
1756                           action='store_true',
1757                           default=False,
1758                           help='run in logging mode (like vmstat)',
1759                           )
1760    argparser.add_argument('-L', '--log-to-file',
1761                           type=str,
1762                           metavar='FILE',
1763                           help="like '--log', but logging to a file"
1764                           )
1765    argparser.add_argument('-p', '--pid',
1766                           type=int,
1767                           default=0,
1768                           help='restrict statistics to pid',
1769                           )
1770    argparser.add_argument('-s', '--set-delay',
1771                           type=float,
1772                           default=DELAY_DEFAULT,
1773                           metavar='DELAY',
1774                           help='set delay between refreshs (value range: '
1775                                '%s-%s secs)' % (MIN_DELAY, MAX_DELAY),
1776                           )
1777    argparser.add_argument('-t', '--tracepoints',
1778                           action='store_true',
1779                           default=False,
1780                           help='retrieve statistics from tracepoints',
1781                           )
1782    argparser.add_argument('-z', '--skip-zero-records',
1783                           action='store_true',
1784                           default=False,
1785                           help='omit records with all zeros in logging mode',
1786                           )
1787    options = argparser.parse_args()
1788    if options.csv and not (options.log or options.log_to_file):
1789        sys.exit('Error: Option -c/--csv requires -l/--log')
1790    if options.skip_zero_records and not (options.log or options.log_to_file):
1791        sys.exit('Error: Option -z/--skip-zero-records requires -l/-L')
1792    try:
1793        # verify that we were passed a valid regex up front
1794        re.compile(options.fields)
1795    except re.error:
1796        sys.exit('Error: "' + options.fields + '" is not a valid regular '
1797                 'expression')
1798
1799    return options
1800
1801
1802def check_access(options):
1803    """Exits if the current user can't access all needed directories."""
1804    if not os.path.exists(PATH_DEBUGFS_TRACING) and (options.tracepoints or
1805                                                     not options.debugfs):
1806        sys.stderr.write("Please enable CONFIG_TRACING in your kernel "
1807                         "when using the option -t (default).\n"
1808                         "If it is enabled, make {0} readable by the "
1809                         "current user.\n"
1810                         .format(PATH_DEBUGFS_TRACING))
1811        if options.tracepoints:
1812            sys.exit(1)
1813
1814        sys.stderr.write("Falling back to debugfs statistics!\n")
1815        options.debugfs = True
1816        time.sleep(5)
1817
1818    return options
1819
1820
1821def assign_globals():
1822    global PATH_DEBUGFS_KVM
1823    global PATH_DEBUGFS_TRACING
1824
1825    debugfs = ''
1826    for line in open('/proc/mounts'):
1827        if line.split(' ')[2] == 'debugfs':
1828            debugfs = line.split(' ')[1]
1829            break
1830    if debugfs == '':
1831        sys.stderr.write("Please make sure that CONFIG_DEBUG_FS is enabled in "
1832                         "your kernel, mounted and\nreadable by the current "
1833                         "user:\n"
1834                         "('mount -t debugfs debugfs /sys/kernel/debug')\n")
1835        sys.exit(1)
1836
1837    PATH_DEBUGFS_KVM = os.path.join(debugfs, 'kvm')
1838    PATH_DEBUGFS_TRACING = os.path.join(debugfs, 'tracing')
1839
1840    if not os.path.exists(PATH_DEBUGFS_KVM):
1841        sys.stderr.write("Please make sure that CONFIG_KVM is enabled in "
1842                         "your kernel and that the modules are loaded.\n")
1843        sys.exit(1)
1844
1845
1846def main():
1847    assign_globals()
1848    options = get_options()
1849    options = check_access(options)
1850
1851    if (options.pid > 0 and
1852        not os.path.isdir(os.path.join('/proc/',
1853                                       str(options.pid)))):
1854        sys.stderr.write('Did you use a (unsupported) tid instead of a pid?\n')
1855        sys.exit('Specified pid does not exist.')
1856
1857    err = is_delay_valid(options.set_delay)
1858    if err is not None:
1859        sys.exit('Error: ' + err)
1860
1861    stats = Stats(options)
1862
1863    if options.fields == 'help':
1864        stats.fields_filter = None
1865        event_list = []
1866        for key in stats.get().keys():
1867            event_list.append(key.split('(', 1)[0])
1868        sys.stdout.write('  ' + '\n  '.join(sorted(set(event_list))) + '\n')
1869        sys.exit(0)
1870
1871    if options.log or options.log_to_file:
1872        if options.log_to_file:
1873            signal.signal(signal.SIGHUP, handle_signal)
1874        keys = sorted(stats.get().keys())
1875        if options.csv:
1876            frmt = CSVFormat(keys)
1877        else:
1878            frmt = StdFormat(keys)
1879        log(stats, options, frmt, keys)
1880    elif not options.once:
1881        with Tui(stats, options) as tui:
1882            tui.show_stats()
1883    else:
1884        batch(stats)
1885
1886
1887if __name__ == "__main__":
1888    main()
1889