]>
Commit | Line | Data |
---|---|---|
f79e8fa3 AB |
1 | /* |
2 | * Copyright (C) 2019, Alex Bennée <alex.bennee@linaro.org> | |
3 | * | |
4 | * How vectorised is this code? | |
5 | * | |
6 | * Attempt to measure the amount of vectorisation that has been done | |
7 | * on some code by counting classes of instruction. | |
8 | * | |
9 | * License: GNU GPL, version 2 or later. | |
10 | * See the COPYING file in the top-level directory. | |
11 | */ | |
12 | #include <inttypes.h> | |
13 | #include <assert.h> | |
14 | #include <stdlib.h> | |
15 | #include <inttypes.h> | |
16 | #include <string.h> | |
17 | #include <unistd.h> | |
18 | #include <stdio.h> | |
19 | #include <glib.h> | |
20 | ||
21 | #include <qemu-plugin.h> | |
22 | ||
3fb356cc AB |
23 | QEMU_PLUGIN_EXPORT int qemu_plugin_version = QEMU_PLUGIN_VERSION; |
24 | ||
f79e8fa3 AB |
25 | #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) |
26 | ||
27 | typedef enum { | |
28 | COUNT_CLASS, | |
29 | COUNT_INDIVIDUAL, | |
30 | COUNT_NONE | |
31 | } CountType; | |
32 | ||
33 | static int limit = 50; | |
34 | static bool do_inline; | |
35 | static bool verbose; | |
36 | ||
37 | static GMutex lock; | |
38 | static GHashTable *insns; | |
39 | ||
40 | typedef struct { | |
41 | const char *class; | |
42 | const char *opt; | |
43 | uint32_t mask; | |
44 | uint32_t pattern; | |
45 | CountType what; | |
c125a8ab | 46 | qemu_plugin_u64 count; |
f79e8fa3 AB |
47 | } InsnClassExecCount; |
48 | ||
49 | typedef struct { | |
50 | char *insn; | |
51 | uint32_t opcode; | |
c125a8ab | 52 | qemu_plugin_u64 count; |
f79e8fa3 AB |
53 | InsnClassExecCount *class; |
54 | } InsnExecCount; | |
55 | ||
56 | /* | |
57 | * Matchers for classes of instructions, order is important. | |
58 | * | |
59 | * Your most precise match must be before looser matches. If no match | |
60 | * is found in the table we can create an individual entry. | |
61 | * | |
62 | * 31..28 27..24 23..20 19..16 15..12 11..8 7..4 3..0 | |
63 | */ | |
64 | static InsnClassExecCount aarch64_insn_classes[] = { | |
65 | /* "Reserved"" */ | |
66 | { " UDEF", "udef", 0xffff0000, 0x00000000, COUNT_NONE}, | |
67 | { " SVE", "sve", 0x1e000000, 0x04000000, COUNT_CLASS}, | |
68 | { "Reserved", "res", 0x1e000000, 0x00000000, COUNT_CLASS}, | |
69 | /* Data Processing Immediate */ | |
70 | { " PCrel addr", "pcrel", 0x1f000000, 0x10000000, COUNT_CLASS}, | |
edd4a85d | 71 | { " Add/Sub (imm,tags)", "asit", 0x1f800000, 0x11800000, COUNT_CLASS}, |
f79e8fa3 AB |
72 | { " Add/Sub (imm)", "asi", 0x1f000000, 0x11000000, COUNT_CLASS}, |
73 | { " Logical (imm)", "logi", 0x1f800000, 0x12000000, COUNT_CLASS}, | |
74 | { " Move Wide (imm)", "movwi", 0x1f800000, 0x12800000, COUNT_CLASS}, | |
75 | { " Bitfield", "bitf", 0x1f800000, 0x13000000, COUNT_CLASS}, | |
76 | { " Extract", "extr", 0x1f800000, 0x13800000, COUNT_CLASS}, | |
77 | { "Data Proc Imm", "dpri", 0x1c000000, 0x10000000, COUNT_CLASS}, | |
78 | /* Branches */ | |
79 | { " Cond Branch (imm)", "cndb", 0xfe000000, 0x54000000, COUNT_CLASS}, | |
80 | { " Exception Gen", "excp", 0xff000000, 0xd4000000, COUNT_CLASS}, | |
81 | { " NOP", "nop", 0xffffffff, 0xd503201f, COUNT_NONE}, | |
82 | { " Hints", "hint", 0xfffff000, 0xd5032000, COUNT_CLASS}, | |
83 | { " Barriers", "barr", 0xfffff000, 0xd5033000, COUNT_CLASS}, | |
84 | { " PSTATE", "psta", 0xfff8f000, 0xd5004000, COUNT_CLASS}, | |
85 | { " System Insn", "sins", 0xffd80000, 0xd5080000, COUNT_CLASS}, | |
86 | { " System Reg", "sreg", 0xffd00000, 0xd5100000, COUNT_CLASS}, | |
87 | { " Branch (reg)", "breg", 0xfe000000, 0xd6000000, COUNT_CLASS}, | |
88 | { " Branch (imm)", "bimm", 0x7c000000, 0x14000000, COUNT_CLASS}, | |
89 | { " Cmp & Branch", "cmpb", 0x7e000000, 0x34000000, COUNT_CLASS}, | |
90 | { " Tst & Branch", "tstb", 0x7e000000, 0x36000000, COUNT_CLASS}, | |
91 | { "Branches", "branch", 0x1c000000, 0x14000000, COUNT_CLASS}, | |
92 | /* Loads and Stores */ | |
93 | { " AdvSimd ldstmult", "advlsm", 0xbfbf0000, 0x0c000000, COUNT_CLASS}, | |
edd4a85d | 94 | { " AdvSimd ldstmult++", "advlsmp", 0xbfb00000, 0x0c800000, COUNT_CLASS}, |
f79e8fa3 | 95 | { " AdvSimd ldst", "advlss", 0xbf9f0000, 0x0d000000, COUNT_CLASS}, |
edd4a85d | 96 | { " AdvSimd ldst++", "advlssp", 0xbf800000, 0x0d800000, COUNT_CLASS}, |
f79e8fa3 AB |
97 | { " ldst excl", "ldstx", 0x3f000000, 0x08000000, COUNT_CLASS}, |
98 | { " Prefetch", "prfm", 0xff000000, 0xd8000000, COUNT_CLASS}, | |
99 | { " Load Reg (lit)", "ldlit", 0x1b000000, 0x18000000, COUNT_CLASS}, | |
edd4a85d | 100 | { " ldst noalloc pair", "ldstnap", 0x3b800000, 0x28000000, COUNT_CLASS}, |
f79e8fa3 AB |
101 | { " ldst pair", "ldstp", 0x38000000, 0x28000000, COUNT_CLASS}, |
102 | { " ldst reg", "ldstr", 0x3b200000, 0x38000000, COUNT_CLASS}, | |
103 | { " Atomic ldst", "atomic", 0x3b200c00, 0x38200000, COUNT_CLASS}, | |
edd4a85d | 104 | { " ldst reg (reg off)", "ldstro", 0x3b200b00, 0x38200800, COUNT_CLASS}, |
f79e8fa3 AB |
105 | { " ldst reg (pac)", "ldstpa", 0x3b200200, 0x38200800, COUNT_CLASS}, |
106 | { " ldst reg (imm)", "ldsti", 0x3b000000, 0x39000000, COUNT_CLASS}, | |
107 | { "Loads & Stores", "ldst", 0x0a000000, 0x08000000, COUNT_CLASS}, | |
108 | /* Data Processing Register */ | |
109 | { "Data Proc Reg", "dprr", 0x0e000000, 0x0a000000, COUNT_CLASS}, | |
110 | /* Scalar FP */ | |
111 | { "Scalar FP ", "fpsimd", 0x0e000000, 0x0e000000, COUNT_CLASS}, | |
112 | /* Unclassified */ | |
113 | { "Unclassified", "unclas", 0x00000000, 0x00000000, COUNT_CLASS}, | |
114 | }; | |
115 | ||
116 | static InsnClassExecCount sparc32_insn_classes[] = { | |
117 | { "Call", "call", 0xc0000000, 0x40000000, COUNT_CLASS}, | |
118 | { "Branch ICond", "bcc", 0xc1c00000, 0x00800000, COUNT_CLASS}, | |
119 | { "Branch Fcond", "fbcc", 0xc1c00000, 0x01800000, COUNT_CLASS}, | |
120 | { "SetHi", "sethi", 0xc1c00000, 0x01000000, COUNT_CLASS}, | |
121 | { "FPU ALU", "fpu", 0xc1f00000, 0x81a00000, COUNT_CLASS}, | |
122 | { "ALU", "alu", 0xc0000000, 0x80000000, COUNT_CLASS}, | |
123 | { "Load/Store", "ldst", 0xc0000000, 0xc0000000, COUNT_CLASS}, | |
124 | /* Unclassified */ | |
125 | { "Unclassified", "unclas", 0x00000000, 0x00000000, COUNT_INDIVIDUAL}, | |
126 | }; | |
127 | ||
128 | static InsnClassExecCount sparc64_insn_classes[] = { | |
129 | { "SetHi & Branches", "op0", 0xc0000000, 0x00000000, COUNT_CLASS}, | |
130 | { "Call", "op1", 0xc0000000, 0x40000000, COUNT_CLASS}, | |
131 | { "Arith/Logical/Move", "op2", 0xc0000000, 0x80000000, COUNT_CLASS}, | |
132 | { "Arith/Logical/Move", "op3", 0xc0000000, 0xc0000000, COUNT_CLASS}, | |
133 | /* Unclassified */ | |
134 | { "Unclassified", "unclas", 0x00000000, 0x00000000, COUNT_INDIVIDUAL}, | |
135 | }; | |
136 | ||
137 | /* Default matcher for currently unclassified architectures */ | |
138 | static InsnClassExecCount default_insn_classes[] = { | |
139 | { "Unclassified", "unclas", 0x00000000, 0x00000000, COUNT_INDIVIDUAL}, | |
140 | }; | |
141 | ||
142 | typedef struct { | |
143 | const char *qemu_target; | |
144 | InsnClassExecCount *table; | |
145 | int table_sz; | |
146 | } ClassSelector; | |
147 | ||
24fa5d66 | 148 | static ClassSelector class_tables[] = { |
f79e8fa3 AB |
149 | { "aarch64", aarch64_insn_classes, ARRAY_SIZE(aarch64_insn_classes) }, |
150 | { "sparc", sparc32_insn_classes, ARRAY_SIZE(sparc32_insn_classes) }, | |
151 | { "sparc64", sparc64_insn_classes, ARRAY_SIZE(sparc64_insn_classes) }, | |
152 | { NULL, default_insn_classes, ARRAY_SIZE(default_insn_classes) }, | |
153 | }; | |
154 | ||
155 | static InsnClassExecCount *class_table; | |
156 | static int class_table_sz; | |
157 | ||
158 | static gint cmp_exec_count(gconstpointer a, gconstpointer b) | |
159 | { | |
160 | InsnExecCount *ea = (InsnExecCount *) a; | |
161 | InsnExecCount *eb = (InsnExecCount *) b; | |
c125a8ab PB |
162 | uint64_t count_a = qemu_plugin_u64_sum(ea->count); |
163 | uint64_t count_b = qemu_plugin_u64_sum(eb->count); | |
164 | return count_a > count_b ? -1 : 1; | |
f79e8fa3 AB |
165 | } |
166 | ||
ec11c4a8 AB |
167 | static void free_record(gpointer data) |
168 | { | |
169 | InsnExecCount *rec = (InsnExecCount *) data; | |
73281023 | 170 | qemu_plugin_scoreboard_free(rec->count.score); |
ec11c4a8 AB |
171 | g_free(rec->insn); |
172 | g_free(rec); | |
173 | } | |
174 | ||
f79e8fa3 AB |
175 | static void plugin_exit(qemu_plugin_id_t id, void *p) |
176 | { | |
177 | g_autoptr(GString) report = g_string_new("Instruction Classes:\n"); | |
178 | int i; | |
c125a8ab | 179 | uint64_t total_count; |
f79e8fa3 AB |
180 | GList *counts; |
181 | InsnClassExecCount *class = NULL; | |
182 | ||
183 | for (i = 0; i < class_table_sz; i++) { | |
184 | class = &class_table[i]; | |
185 | switch (class->what) { | |
186 | case COUNT_CLASS: | |
c125a8ab PB |
187 | total_count = qemu_plugin_u64_sum(class->count); |
188 | if (total_count || verbose) { | |
9b60d6a1 PMD |
189 | g_string_append_printf(report, |
190 | "Class: %-24s\t(%" PRId64 " hits)\n", | |
f79e8fa3 | 191 | class->class, |
c125a8ab | 192 | total_count); |
f79e8fa3 AB |
193 | } |
194 | break; | |
195 | case COUNT_INDIVIDUAL: | |
196 | g_string_append_printf(report, "Class: %-24s\tcounted individually\n", | |
197 | class->class); | |
198 | break; | |
199 | case COUNT_NONE: | |
200 | g_string_append_printf(report, "Class: %-24s\tnot counted\n", | |
201 | class->class); | |
202 | break; | |
203 | default: | |
204 | break; | |
205 | } | |
206 | } | |
207 | ||
208 | counts = g_hash_table_get_values(insns); | |
209 | if (counts && g_list_next(counts)) { | |
edd4a85d | 210 | g_string_append_printf(report, "Individual Instructions:\n"); |
ec11c4a8 | 211 | counts = g_list_sort(counts, cmp_exec_count); |
f79e8fa3 | 212 | |
ec11c4a8 AB |
213 | for (i = 0; i < limit && g_list_next(counts); |
214 | i++, counts = g_list_next(counts)) { | |
215 | InsnExecCount *rec = (InsnExecCount *) counts->data; | |
216 | g_string_append_printf(report, | |
9b60d6a1 PMD |
217 | "Instr: %-24s\t(%" PRId64 " hits)" |
218 | "\t(op=0x%08x/%s)\n", | |
f79e8fa3 | 219 | rec->insn, |
c125a8ab | 220 | qemu_plugin_u64_sum(rec->count), |
f79e8fa3 AB |
221 | rec->opcode, |
222 | rec->class ? | |
223 | rec->class->class : "un-categorised"); | |
224 | } | |
ec11c4a8 | 225 | g_list_free(counts); |
f79e8fa3 AB |
226 | } |
227 | ||
ec11c4a8 | 228 | g_hash_table_destroy(insns); |
c125a8ab PB |
229 | for (i = 0; i < ARRAY_SIZE(class_tables); i++) { |
230 | for (int j = 0; j < class_tables[i].table_sz; ++j) { | |
231 | qemu_plugin_scoreboard_free(class_tables[i].table[j].count.score); | |
232 | } | |
233 | } | |
234 | ||
ec11c4a8 | 235 | |
f79e8fa3 AB |
236 | qemu_plugin_outs(report->str); |
237 | } | |
238 | ||
239 | static void plugin_init(void) | |
240 | { | |
ec11c4a8 | 241 | insns = g_hash_table_new_full(NULL, g_direct_equal, NULL, &free_record); |
f79e8fa3 AB |
242 | } |
243 | ||
244 | static void vcpu_insn_exec_before(unsigned int cpu_index, void *udata) | |
245 | { | |
c125a8ab PB |
246 | struct qemu_plugin_scoreboard *score = udata; |
247 | qemu_plugin_u64_add(qemu_plugin_scoreboard_u64(score), cpu_index, 1); | |
f79e8fa3 AB |
248 | } |
249 | ||
c125a8ab PB |
250 | static struct qemu_plugin_scoreboard *find_counter( |
251 | struct qemu_plugin_insn *insn) | |
f79e8fa3 AB |
252 | { |
253 | int i; | |
254 | uint64_t *cnt = NULL; | |
4abc8923 | 255 | uint32_t opcode = 0; |
f79e8fa3 AB |
256 | InsnClassExecCount *class = NULL; |
257 | ||
258 | /* | |
259 | * We only match the first 32 bits of the instruction which is | |
260 | * fine for most RISCs but a bit limiting for CISC architectures. | |
261 | * They would probably benefit from a more tailored plugin. | |
262 | * However we can fall back to individual instruction counting. | |
263 | */ | |
4abc8923 | 264 | qemu_plugin_insn_data(insn, &opcode, sizeof(opcode)); |
f79e8fa3 AB |
265 | |
266 | for (i = 0; !cnt && i < class_table_sz; i++) { | |
267 | class = &class_table[i]; | |
268 | uint32_t masked_bits = opcode & class->mask; | |
269 | if (masked_bits == class->pattern) { | |
270 | break; | |
271 | } | |
272 | } | |
273 | ||
274 | g_assert(class); | |
275 | ||
276 | switch (class->what) { | |
277 | case COUNT_NONE: | |
278 | return NULL; | |
279 | case COUNT_CLASS: | |
c125a8ab | 280 | return class->count.score; |
f79e8fa3 AB |
281 | case COUNT_INDIVIDUAL: |
282 | { | |
283 | InsnExecCount *icount; | |
284 | ||
285 | g_mutex_lock(&lock); | |
286 | icount = (InsnExecCount *) g_hash_table_lookup(insns, | |
287 | GUINT_TO_POINTER(opcode)); | |
288 | ||
289 | if (!icount) { | |
290 | icount = g_new0(InsnExecCount, 1); | |
291 | icount->opcode = opcode; | |
292 | icount->insn = qemu_plugin_insn_disas(insn); | |
293 | icount->class = class; | |
c125a8ab PB |
294 | struct qemu_plugin_scoreboard *score = |
295 | qemu_plugin_scoreboard_new(sizeof(uint64_t)); | |
296 | icount->count = qemu_plugin_scoreboard_u64(score); | |
f79e8fa3 AB |
297 | |
298 | g_hash_table_insert(insns, GUINT_TO_POINTER(opcode), | |
299 | (gpointer) icount); | |
300 | } | |
301 | g_mutex_unlock(&lock); | |
302 | ||
c125a8ab | 303 | return icount->count.score; |
f79e8fa3 AB |
304 | } |
305 | default: | |
306 | g_assert_not_reached(); | |
307 | } | |
308 | ||
309 | return NULL; | |
310 | } | |
311 | ||
312 | static void vcpu_tb_trans(qemu_plugin_id_t id, struct qemu_plugin_tb *tb) | |
313 | { | |
314 | size_t n = qemu_plugin_tb_n_insns(tb); | |
315 | size_t i; | |
316 | ||
317 | for (i = 0; i < n; i++) { | |
f79e8fa3 | 318 | struct qemu_plugin_insn *insn = qemu_plugin_tb_get_insn(tb, i); |
c125a8ab | 319 | struct qemu_plugin_scoreboard *cnt = find_counter(insn); |
f79e8fa3 AB |
320 | |
321 | if (cnt) { | |
322 | if (do_inline) { | |
c125a8ab PB |
323 | qemu_plugin_register_vcpu_insn_exec_inline_per_vcpu( |
324 | insn, QEMU_PLUGIN_INLINE_ADD_U64, | |
325 | qemu_plugin_scoreboard_u64(cnt), 1); | |
f79e8fa3 AB |
326 | } else { |
327 | qemu_plugin_register_vcpu_insn_exec_cb( | |
328 | insn, vcpu_insn_exec_before, QEMU_PLUGIN_CB_NO_REGS, cnt); | |
329 | } | |
330 | } | |
331 | } | |
332 | } | |
333 | ||
334 | QEMU_PLUGIN_EXPORT int qemu_plugin_install(qemu_plugin_id_t id, | |
335 | const qemu_info_t *info, | |
336 | int argc, char **argv) | |
337 | { | |
338 | int i; | |
339 | ||
c125a8ab PB |
340 | for (i = 0; i < ARRAY_SIZE(class_tables); i++) { |
341 | for (int j = 0; j < class_tables[i].table_sz; ++j) { | |
342 | struct qemu_plugin_scoreboard *score = | |
343 | qemu_plugin_scoreboard_new(sizeof(uint64_t)); | |
344 | class_tables[i].table[j].count = qemu_plugin_scoreboard_u64(score); | |
345 | } | |
346 | } | |
347 | ||
f79e8fa3 AB |
348 | /* Select a class table appropriate to the guest architecture */ |
349 | for (i = 0; i < ARRAY_SIZE(class_tables); i++) { | |
350 | ClassSelector *entry = &class_tables[i]; | |
351 | if (!entry->qemu_target || | |
352 | strcmp(entry->qemu_target, info->target_name) == 0) { | |
353 | class_table = entry->table; | |
354 | class_table_sz = entry->table_sz; | |
355 | break; | |
356 | } | |
357 | } | |
358 | ||
359 | for (i = 0; i < argc; i++) { | |
360 | char *p = argv[i]; | |
40258741 | 361 | g_auto(GStrv) tokens = g_strsplit(p, "=", -1); |
d8525358 MM |
362 | if (g_strcmp0(tokens[0], "inline") == 0) { |
363 | if (!qemu_plugin_bool_parse(tokens[0], tokens[1], &do_inline)) { | |
364 | fprintf(stderr, "boolean argument parsing failed: %s\n", p); | |
365 | return -1; | |
366 | } | |
367 | } else if (g_strcmp0(tokens[0], "verbose") == 0) { | |
368 | if (!qemu_plugin_bool_parse(tokens[0], tokens[1], &verbose)) { | |
369 | fprintf(stderr, "boolean argument parsing failed: %s\n", p); | |
370 | return -1; | |
371 | } | |
372 | } else if (g_strcmp0(tokens[0], "count") == 0) { | |
373 | char *value = tokens[1]; | |
f79e8fa3 AB |
374 | int j; |
375 | CountType type = COUNT_INDIVIDUAL; | |
d8525358 | 376 | if (*value == '!') { |
f79e8fa3 | 377 | type = COUNT_NONE; |
d8525358 | 378 | value++; |
f79e8fa3 AB |
379 | } |
380 | for (j = 0; j < class_table_sz; j++) { | |
d8525358 | 381 | if (strcmp(value, class_table[j].opt) == 0) { |
f79e8fa3 AB |
382 | class_table[j].what = type; |
383 | break; | |
384 | } | |
385 | } | |
d8525358 MM |
386 | } else { |
387 | fprintf(stderr, "option parsing failed: %s\n", p); | |
388 | return -1; | |
f79e8fa3 AB |
389 | } |
390 | } | |
391 | ||
392 | plugin_init(); | |
393 | ||
394 | qemu_plugin_register_vcpu_tb_trans_cb(id, vcpu_tb_trans); | |
395 | qemu_plugin_register_atexit_cb(id, plugin_exit, NULL); | |
396 | return 0; | |
397 | } |