]>
Commit | Line | Data |
---|---|---|
f79e8fa3 AB |
1 | /* |
2 | * Copyright (C) 2019, Alex Bennée <alex.bennee@linaro.org> | |
3 | * | |
4 | * How vectorised is this code? | |
5 | * | |
6 | * Attempt to measure the amount of vectorisation that has been done | |
7 | * on some code by counting classes of instruction. | |
8 | * | |
9 | * License: GNU GPL, version 2 or later. | |
10 | * See the COPYING file in the top-level directory. | |
11 | */ | |
12 | #include <inttypes.h> | |
13 | #include <assert.h> | |
14 | #include <stdlib.h> | |
15 | #include <inttypes.h> | |
16 | #include <string.h> | |
17 | #include <unistd.h> | |
18 | #include <stdio.h> | |
19 | #include <glib.h> | |
20 | ||
21 | #include <qemu-plugin.h> | |
22 | ||
3fb356cc AB |
23 | QEMU_PLUGIN_EXPORT int qemu_plugin_version = QEMU_PLUGIN_VERSION; |
24 | ||
f79e8fa3 AB |
25 | #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) |
26 | ||
27 | typedef enum { | |
28 | COUNT_CLASS, | |
29 | COUNT_INDIVIDUAL, | |
30 | COUNT_NONE | |
31 | } CountType; | |
32 | ||
33 | static int limit = 50; | |
34 | static bool do_inline; | |
35 | static bool verbose; | |
36 | ||
37 | static GMutex lock; | |
38 | static GHashTable *insns; | |
39 | ||
40 | typedef struct { | |
41 | const char *class; | |
42 | const char *opt; | |
43 | uint32_t mask; | |
44 | uint32_t pattern; | |
45 | CountType what; | |
46 | uint64_t count; | |
47 | } InsnClassExecCount; | |
48 | ||
49 | typedef struct { | |
50 | char *insn; | |
51 | uint32_t opcode; | |
52 | uint64_t count; | |
53 | InsnClassExecCount *class; | |
54 | } InsnExecCount; | |
55 | ||
56 | /* | |
57 | * Matchers for classes of instructions, order is important. | |
58 | * | |
59 | * Your most precise match must be before looser matches. If no match | |
60 | * is found in the table we can create an individual entry. | |
61 | * | |
62 | * 31..28 27..24 23..20 19..16 15..12 11..8 7..4 3..0 | |
63 | */ | |
64 | static InsnClassExecCount aarch64_insn_classes[] = { | |
65 | /* "Reserved"" */ | |
66 | { " UDEF", "udef", 0xffff0000, 0x00000000, COUNT_NONE}, | |
67 | { " SVE", "sve", 0x1e000000, 0x04000000, COUNT_CLASS}, | |
68 | { "Reserved", "res", 0x1e000000, 0x00000000, COUNT_CLASS}, | |
69 | /* Data Processing Immediate */ | |
70 | { " PCrel addr", "pcrel", 0x1f000000, 0x10000000, COUNT_CLASS}, | |
71 | { " Add/Sub (imm,tags)","asit", 0x1f800000, 0x11800000, COUNT_CLASS}, | |
72 | { " Add/Sub (imm)", "asi", 0x1f000000, 0x11000000, COUNT_CLASS}, | |
73 | { " Logical (imm)", "logi", 0x1f800000, 0x12000000, COUNT_CLASS}, | |
74 | { " Move Wide (imm)", "movwi", 0x1f800000, 0x12800000, COUNT_CLASS}, | |
75 | { " Bitfield", "bitf", 0x1f800000, 0x13000000, COUNT_CLASS}, | |
76 | { " Extract", "extr", 0x1f800000, 0x13800000, COUNT_CLASS}, | |
77 | { "Data Proc Imm", "dpri", 0x1c000000, 0x10000000, COUNT_CLASS}, | |
78 | /* Branches */ | |
79 | { " Cond Branch (imm)", "cndb", 0xfe000000, 0x54000000, COUNT_CLASS}, | |
80 | { " Exception Gen", "excp", 0xff000000, 0xd4000000, COUNT_CLASS}, | |
81 | { " NOP", "nop", 0xffffffff, 0xd503201f, COUNT_NONE}, | |
82 | { " Hints", "hint", 0xfffff000, 0xd5032000, COUNT_CLASS}, | |
83 | { " Barriers", "barr", 0xfffff000, 0xd5033000, COUNT_CLASS}, | |
84 | { " PSTATE", "psta", 0xfff8f000, 0xd5004000, COUNT_CLASS}, | |
85 | { " System Insn", "sins", 0xffd80000, 0xd5080000, COUNT_CLASS}, | |
86 | { " System Reg", "sreg", 0xffd00000, 0xd5100000, COUNT_CLASS}, | |
87 | { " Branch (reg)", "breg", 0xfe000000, 0xd6000000, COUNT_CLASS}, | |
88 | { " Branch (imm)", "bimm", 0x7c000000, 0x14000000, COUNT_CLASS}, | |
89 | { " Cmp & Branch", "cmpb", 0x7e000000, 0x34000000, COUNT_CLASS}, | |
90 | { " Tst & Branch", "tstb", 0x7e000000, 0x36000000, COUNT_CLASS}, | |
91 | { "Branches", "branch", 0x1c000000, 0x14000000, COUNT_CLASS}, | |
92 | /* Loads and Stores */ | |
93 | { " AdvSimd ldstmult", "advlsm", 0xbfbf0000, 0x0c000000, COUNT_CLASS}, | |
94 | { " AdvSimd ldstmult++","advlsmp",0xbfb00000, 0x0c800000, COUNT_CLASS}, | |
95 | { " AdvSimd ldst", "advlss", 0xbf9f0000, 0x0d000000, COUNT_CLASS}, | |
96 | { " AdvSimd ldst++", "advlssp",0xbf800000, 0x0d800000, COUNT_CLASS}, | |
97 | { " ldst excl", "ldstx", 0x3f000000, 0x08000000, COUNT_CLASS}, | |
98 | { " Prefetch", "prfm", 0xff000000, 0xd8000000, COUNT_CLASS}, | |
99 | { " Load Reg (lit)", "ldlit", 0x1b000000, 0x18000000, COUNT_CLASS}, | |
100 | { " ldst noalloc pair", "ldstnap",0x3b800000, 0x28000000, COUNT_CLASS}, | |
101 | { " ldst pair", "ldstp", 0x38000000, 0x28000000, COUNT_CLASS}, | |
102 | { " ldst reg", "ldstr", 0x3b200000, 0x38000000, COUNT_CLASS}, | |
103 | { " Atomic ldst", "atomic", 0x3b200c00, 0x38200000, COUNT_CLASS}, | |
104 | { " ldst reg (reg off)","ldstro", 0x3b200b00, 0x38200800, COUNT_CLASS}, | |
105 | { " ldst reg (pac)", "ldstpa", 0x3b200200, 0x38200800, COUNT_CLASS}, | |
106 | { " ldst reg (imm)", "ldsti", 0x3b000000, 0x39000000, COUNT_CLASS}, | |
107 | { "Loads & Stores", "ldst", 0x0a000000, 0x08000000, COUNT_CLASS}, | |
108 | /* Data Processing Register */ | |
109 | { "Data Proc Reg", "dprr", 0x0e000000, 0x0a000000, COUNT_CLASS}, | |
110 | /* Scalar FP */ | |
111 | { "Scalar FP ", "fpsimd", 0x0e000000, 0x0e000000, COUNT_CLASS}, | |
112 | /* Unclassified */ | |
113 | { "Unclassified", "unclas", 0x00000000, 0x00000000, COUNT_CLASS}, | |
114 | }; | |
115 | ||
116 | static InsnClassExecCount sparc32_insn_classes[] = { | |
117 | { "Call", "call", 0xc0000000, 0x40000000, COUNT_CLASS}, | |
118 | { "Branch ICond", "bcc", 0xc1c00000, 0x00800000, COUNT_CLASS}, | |
119 | { "Branch Fcond", "fbcc", 0xc1c00000, 0x01800000, COUNT_CLASS}, | |
120 | { "SetHi", "sethi", 0xc1c00000, 0x01000000, COUNT_CLASS}, | |
121 | { "FPU ALU", "fpu", 0xc1f00000, 0x81a00000, COUNT_CLASS}, | |
122 | { "ALU", "alu", 0xc0000000, 0x80000000, COUNT_CLASS}, | |
123 | { "Load/Store", "ldst", 0xc0000000, 0xc0000000, COUNT_CLASS}, | |
124 | /* Unclassified */ | |
125 | { "Unclassified", "unclas", 0x00000000, 0x00000000, COUNT_INDIVIDUAL}, | |
126 | }; | |
127 | ||
128 | static InsnClassExecCount sparc64_insn_classes[] = { | |
129 | { "SetHi & Branches", "op0", 0xc0000000, 0x00000000, COUNT_CLASS}, | |
130 | { "Call", "op1", 0xc0000000, 0x40000000, COUNT_CLASS}, | |
131 | { "Arith/Logical/Move", "op2", 0xc0000000, 0x80000000, COUNT_CLASS}, | |
132 | { "Arith/Logical/Move", "op3", 0xc0000000, 0xc0000000, COUNT_CLASS}, | |
133 | /* Unclassified */ | |
134 | { "Unclassified", "unclas", 0x00000000, 0x00000000, COUNT_INDIVIDUAL}, | |
135 | }; | |
136 | ||
137 | /* Default matcher for currently unclassified architectures */ | |
138 | static InsnClassExecCount default_insn_classes[] = { | |
139 | { "Unclassified", "unclas", 0x00000000, 0x00000000, COUNT_INDIVIDUAL}, | |
140 | }; | |
141 | ||
142 | typedef struct { | |
143 | const char *qemu_target; | |
144 | InsnClassExecCount *table; | |
145 | int table_sz; | |
146 | } ClassSelector; | |
147 | ||
148 | static ClassSelector class_tables[] = | |
149 | { | |
150 | { "aarch64", aarch64_insn_classes, ARRAY_SIZE(aarch64_insn_classes) }, | |
151 | { "sparc", sparc32_insn_classes, ARRAY_SIZE(sparc32_insn_classes) }, | |
152 | { "sparc64", sparc64_insn_classes, ARRAY_SIZE(sparc64_insn_classes) }, | |
153 | { NULL, default_insn_classes, ARRAY_SIZE(default_insn_classes) }, | |
154 | }; | |
155 | ||
156 | static InsnClassExecCount *class_table; | |
157 | static int class_table_sz; | |
158 | ||
159 | static gint cmp_exec_count(gconstpointer a, gconstpointer b) | |
160 | { | |
161 | InsnExecCount *ea = (InsnExecCount *) a; | |
162 | InsnExecCount *eb = (InsnExecCount *) b; | |
163 | return ea->count > eb->count ? -1 : 1; | |
164 | } | |
165 | ||
ec11c4a8 AB |
166 | static void free_record(gpointer data) |
167 | { | |
168 | InsnExecCount *rec = (InsnExecCount *) data; | |
169 | g_free(rec->insn); | |
170 | g_free(rec); | |
171 | } | |
172 | ||
f79e8fa3 AB |
173 | static void plugin_exit(qemu_plugin_id_t id, void *p) |
174 | { | |
175 | g_autoptr(GString) report = g_string_new("Instruction Classes:\n"); | |
176 | int i; | |
177 | GList *counts; | |
178 | InsnClassExecCount *class = NULL; | |
179 | ||
180 | for (i = 0; i < class_table_sz; i++) { | |
181 | class = &class_table[i]; | |
182 | switch (class->what) { | |
183 | case COUNT_CLASS: | |
184 | if (class->count || verbose) { | |
185 | g_string_append_printf(report, "Class: %-24s\t(%ld hits)\n", | |
186 | class->class, | |
187 | class->count); | |
188 | } | |
189 | break; | |
190 | case COUNT_INDIVIDUAL: | |
191 | g_string_append_printf(report, "Class: %-24s\tcounted individually\n", | |
192 | class->class); | |
193 | break; | |
194 | case COUNT_NONE: | |
195 | g_string_append_printf(report, "Class: %-24s\tnot counted\n", | |
196 | class->class); | |
197 | break; | |
198 | default: | |
199 | break; | |
200 | } | |
201 | } | |
202 | ||
203 | counts = g_hash_table_get_values(insns); | |
204 | if (counts && g_list_next(counts)) { | |
f79e8fa3 | 205 | g_string_append_printf(report,"Individual Instructions:\n"); |
ec11c4a8 | 206 | counts = g_list_sort(counts, cmp_exec_count); |
f79e8fa3 | 207 | |
ec11c4a8 AB |
208 | for (i = 0; i < limit && g_list_next(counts); |
209 | i++, counts = g_list_next(counts)) { | |
210 | InsnExecCount *rec = (InsnExecCount *) counts->data; | |
211 | g_string_append_printf(report, | |
212 | "Instr: %-24s\t(%ld hits)\t(op=%#08x/%s)\n", | |
f79e8fa3 AB |
213 | rec->insn, |
214 | rec->count, | |
215 | rec->opcode, | |
216 | rec->class ? | |
217 | rec->class->class : "un-categorised"); | |
218 | } | |
ec11c4a8 | 219 | g_list_free(counts); |
f79e8fa3 AB |
220 | } |
221 | ||
ec11c4a8 AB |
222 | g_hash_table_destroy(insns); |
223 | ||
f79e8fa3 AB |
224 | qemu_plugin_outs(report->str); |
225 | } | |
226 | ||
227 | static void plugin_init(void) | |
228 | { | |
ec11c4a8 | 229 | insns = g_hash_table_new_full(NULL, g_direct_equal, NULL, &free_record); |
f79e8fa3 AB |
230 | } |
231 | ||
232 | static void vcpu_insn_exec_before(unsigned int cpu_index, void *udata) | |
233 | { | |
234 | uint64_t *count = (uint64_t *) udata; | |
235 | (*count)++; | |
236 | } | |
237 | ||
238 | static uint64_t * find_counter(struct qemu_plugin_insn *insn) | |
239 | { | |
240 | int i; | |
241 | uint64_t *cnt = NULL; | |
242 | uint32_t opcode; | |
243 | InsnClassExecCount *class = NULL; | |
244 | ||
245 | /* | |
246 | * We only match the first 32 bits of the instruction which is | |
247 | * fine for most RISCs but a bit limiting for CISC architectures. | |
248 | * They would probably benefit from a more tailored plugin. | |
249 | * However we can fall back to individual instruction counting. | |
250 | */ | |
251 | opcode = *((uint32_t *)qemu_plugin_insn_data(insn)); | |
252 | ||
253 | for (i = 0; !cnt && i < class_table_sz; i++) { | |
254 | class = &class_table[i]; | |
255 | uint32_t masked_bits = opcode & class->mask; | |
256 | if (masked_bits == class->pattern) { | |
257 | break; | |
258 | } | |
259 | } | |
260 | ||
261 | g_assert(class); | |
262 | ||
263 | switch (class->what) { | |
264 | case COUNT_NONE: | |
265 | return NULL; | |
266 | case COUNT_CLASS: | |
267 | return &class->count; | |
268 | case COUNT_INDIVIDUAL: | |
269 | { | |
270 | InsnExecCount *icount; | |
271 | ||
272 | g_mutex_lock(&lock); | |
273 | icount = (InsnExecCount *) g_hash_table_lookup(insns, | |
274 | GUINT_TO_POINTER(opcode)); | |
275 | ||
276 | if (!icount) { | |
277 | icount = g_new0(InsnExecCount, 1); | |
278 | icount->opcode = opcode; | |
279 | icount->insn = qemu_plugin_insn_disas(insn); | |
280 | icount->class = class; | |
281 | ||
282 | g_hash_table_insert(insns, GUINT_TO_POINTER(opcode), | |
283 | (gpointer) icount); | |
284 | } | |
285 | g_mutex_unlock(&lock); | |
286 | ||
287 | return &icount->count; | |
288 | } | |
289 | default: | |
290 | g_assert_not_reached(); | |
291 | } | |
292 | ||
293 | return NULL; | |
294 | } | |
295 | ||
296 | static void vcpu_tb_trans(qemu_plugin_id_t id, struct qemu_plugin_tb *tb) | |
297 | { | |
298 | size_t n = qemu_plugin_tb_n_insns(tb); | |
299 | size_t i; | |
300 | ||
301 | for (i = 0; i < n; i++) { | |
302 | uint64_t *cnt; | |
303 | struct qemu_plugin_insn *insn = qemu_plugin_tb_get_insn(tb, i); | |
304 | cnt = find_counter(insn); | |
305 | ||
306 | if (cnt) { | |
307 | if (do_inline) { | |
308 | qemu_plugin_register_vcpu_insn_exec_inline( | |
309 | insn, QEMU_PLUGIN_INLINE_ADD_U64, cnt, 1); | |
310 | } else { | |
311 | qemu_plugin_register_vcpu_insn_exec_cb( | |
312 | insn, vcpu_insn_exec_before, QEMU_PLUGIN_CB_NO_REGS, cnt); | |
313 | } | |
314 | } | |
315 | } | |
316 | } | |
317 | ||
318 | QEMU_PLUGIN_EXPORT int qemu_plugin_install(qemu_plugin_id_t id, | |
319 | const qemu_info_t *info, | |
320 | int argc, char **argv) | |
321 | { | |
322 | int i; | |
323 | ||
324 | /* Select a class table appropriate to the guest architecture */ | |
325 | for (i = 0; i < ARRAY_SIZE(class_tables); i++) { | |
326 | ClassSelector *entry = &class_tables[i]; | |
327 | if (!entry->qemu_target || | |
328 | strcmp(entry->qemu_target, info->target_name) == 0) { | |
329 | class_table = entry->table; | |
330 | class_table_sz = entry->table_sz; | |
331 | break; | |
332 | } | |
333 | } | |
334 | ||
335 | for (i = 0; i < argc; i++) { | |
336 | char *p = argv[i]; | |
337 | if (strcmp(p, "inline") == 0) { | |
338 | do_inline = true; | |
339 | } else if (strcmp(p, "verbose") == 0) { | |
340 | verbose = true; | |
341 | } else { | |
342 | int j; | |
343 | CountType type = COUNT_INDIVIDUAL; | |
344 | if (*p == '!') { | |
345 | type = COUNT_NONE; | |
346 | p++; | |
347 | } | |
348 | for (j = 0; j < class_table_sz; j++) { | |
349 | if (strcmp(p, class_table[j].opt) == 0) { | |
350 | class_table[j].what = type; | |
351 | break; | |
352 | } | |
353 | } | |
354 | } | |
355 | } | |
356 | ||
357 | plugin_init(); | |
358 | ||
359 | qemu_plugin_register_vcpu_tb_trans_cb(id, vcpu_tb_trans); | |
360 | qemu_plugin_register_atexit_cb(id, plugin_exit, NULL); | |
361 | return 0; | |
362 | } |