contrib/plugins/cache.c

   1 /*
   2  * Copyright (C) 2021, Mahmoud Mandour <ma.mandourr@gmail.com>
   3  *
   4  * License: GNU GPL, version 2 or later.
   5  *   See the COPYING file in the top-level directory.
   6  */
   7
   8 #include <inttypes.h>
   9 #include <stdio.h>
  10 #include <glib.h>
  11
  12 #include <qemu-plugin.h>
  13
  14 #define STRTOLL(x) g_ascii_strtoll(x, NULL, 10)
  15
  16 QEMU_PLUGIN_EXPORT int qemu_plugin_version = QEMU_PLUGIN_VERSION;
  17
  18 static enum qemu_plugin_mem_rw rw = QEMU_PLUGIN_MEM_RW;
  19
  20 static GHashTable *miss_ht;
  21
  22 static GMutex hashtable_lock;
  23 static GRand *rng;
  24
  25 static int limit;
  26 static bool sys;
  27
  28 enum EvictionPolicy {
  29     LRU,
  30     FIFO,
  31     RAND,
  32 };
  33
  34 enum EvictionPolicy policy;
  35
  36 /*
  37  * A CacheSet is a set of cache blocks. A memory block that maps to a set can be
  38  * put in any of the blocks inside the set. The number of block per set is
  39  * called the associativity (assoc).
  40  *
  41  * Each block contains the stored tag and a valid bit. Since this is not
  42  * a functional simulator, the data itself is not stored. We only identify
  43  * whether a block is in the cache or not by searching for its tag.
  44  *
  45  * In order to search for memory data in the cache, the set identifier and tag
  46  * are extracted from the address and the set is probed to see whether a tag
  47  * match occur.
  48  *
  49  * An address is logically divided into three portions: The block offset,
  50  * the set number, and the tag.
  51  *
  52  * The set number is used to identify the set in which the block may exist.
  53  * The tag is compared against all the tags of a set to search for a match. If a
  54  * match is found, then the access is a hit.
  55  *
  56  * The CacheSet also contains bookkeaping information about eviction details.
  57  */
  58
  59 typedef struct {
  60     uint64_t tag;
  61     bool valid;
  62 } CacheBlock;
  63
  64 typedef struct {
  65     CacheBlock *blocks;
  66     uint64_t *lru_priorities;
  67     uint64_t lru_gen_counter;
  68     GQueue *fifo_queue;
  69 } CacheSet;
  70
  71 typedef struct {
  72     CacheSet *sets;
  73     int num_sets;
  74     int cachesize;
  75     int assoc;
  76     int blksize_shift;
  77     uint64_t set_mask;
  78     uint64_t tag_mask;
  79     uint64_t accesses;
  80     uint64_t misses;
  81 } Cache;
  82
  83 typedef struct {
  84     char *disas_str;
  85     const char *symbol;
  86     uint64_t addr;
  87     uint64_t l1_dmisses;
  88     uint64_t l1_imisses;
  89     uint64_t l2_misses;
  90 } InsnData;
  91
  92 void (*update_hit)(Cache *cache, int set, int blk);
  93 void (*update_miss)(Cache *cache, int set, int blk);
  94
  95 void (*metadata_init)(Cache *cache);
  96 void (*metadata_destroy)(Cache *cache);
  97
  98 static int cores;
  99 static Cache **l1_dcaches, **l1_icaches;
 100
 101 static bool use_l2;
 102 static Cache **l2_ucaches;
 103
 104 static GMutex *l1_dcache_locks;
 105 static GMutex *l1_icache_locks;
 106 static GMutex *l2_ucache_locks;
 107
 108 static uint64_t l1_dmem_accesses;
 109 static uint64_t l1_imem_accesses;
 110 static uint64_t l1_imisses;
 111 static uint64_t l1_dmisses;
 112
 113 static uint64_t l2_mem_accesses;
 114 static uint64_t l2_misses;
 115
 116 static int pow_of_two(int num)
 117 {
 118     g_assert((num & (num - 1)) == 0);
 119     int ret = 0;
 120     while (num /= 2) {
 121         ret++;
 122     }
 123     return ret;
 124 }
 125
 126 /*
 127  * LRU evection policy: For each set, a generation counter is maintained
 128  * alongside a priority array.
 129  *
 130  * On each set access, the generation counter is incremented.
 131  *
 132  * On a cache hit: The hit-block is assigned the current generation counter,
 133  * indicating that it is the most recently used block.
 134  *
 135  * On a cache miss: The block with the least priority is searched and replaced
 136  * with the newly-cached block, of which the priority is set to the current
 137  * generation number.
 138  */
 139
 140 static void lru_priorities_init(Cache *cache)
 141 {
 142     int i;
 143
 144     for (i = 0; i < cache->num_sets; i++) {
 145         cache->sets[i].lru_priorities = g_new0(uint64_t, cache->assoc);
 146         cache->sets[i].lru_gen_counter = 0;
 147     }
 148 }
 149
 150 static void lru_update_blk(Cache *cache, int set_idx, int blk_idx)
 151 {
 152     CacheSet *set = &cache->sets[set_idx];
 153     set->lru_priorities[blk_idx] = cache->sets[set_idx].lru_gen_counter;
 154     set->lru_gen_counter++;
 155 }
 156
 157 static int lru_get_lru_block(Cache *cache, int set_idx)
 158 {
 159     int i, min_idx, min_priority;
 160
 161     min_priority = cache->sets[set_idx].lru_priorities[0];
 162     min_idx = 0;
 163
 164     for (i = 1; i < cache->assoc; i++) {
 165         if (cache->sets[set_idx].lru_priorities[i] < min_priority) {
 166             min_priority = cache->sets[set_idx].lru_priorities[i];
 167             min_idx = i;
 168         }
 169     }
 170     return min_idx;
 171 }
 172
 173 static void lru_priorities_destroy(Cache *cache)
 174 {
 175     int i;
 176
 177     for (i = 0; i < cache->num_sets; i++) {
 178         g_free(cache->sets[i].lru_priorities);
 179     }
 180 }
 181
 182 /*
 183  * FIFO eviction policy: a FIFO queue is maintained for each CacheSet that
 184  * stores accesses to the cache.
 185  *
 186  * On a compulsory miss: The block index is enqueued to the fifo_queue to
 187  * indicate that it's the latest cached block.
 188  *
 189  * On a conflict miss: The first-in block is removed from the cache and the new
 190  * block is put in its place and enqueued to the FIFO queue.
 191  */
 192
 193 static void fifo_init(Cache *cache)
 194 {
 195     int i;
 196
 197     for (i = 0; i < cache->num_sets; i++) {
 198         cache->sets[i].fifo_queue = g_queue_new();
 199     }
 200 }
 201
 202 static int fifo_get_first_block(Cache *cache, int set)
 203 {
 204     GQueue *q = cache->sets[set].fifo_queue;
 205     return GPOINTER_TO_INT(g_queue_pop_tail(q));
 206 }
 207
 208 static void fifo_update_on_miss(Cache *cache, int set, int blk_idx)
 209 {
 210     GQueue *q = cache->sets[set].fifo_queue;
 211     g_queue_push_head(q, GINT_TO_POINTER(blk_idx));
 212 }
 213
 214 static void fifo_destroy(Cache *cache)
 215 {
 216     int i;
 217
 218     for (i = 0; i < cache->num_sets; i++) {
 219         g_queue_free(cache->sets[i].fifo_queue);
 220     }
 221 }
 222
 223 static inline uint64_t extract_tag(Cache *cache, uint64_t addr)
 224 {
 225     return addr & cache->tag_mask;
 226 }
 227
 228 static inline uint64_t extract_set(Cache *cache, uint64_t addr)
 229 {
 230     return (addr & cache->set_mask) >> cache->blksize_shift;
 231 }
 232
 233 static const char *cache_config_error(int blksize, int assoc, int cachesize)
 234 {
 235     if (cachesize % blksize != 0) {
 236         return "cache size must be divisible by block size";
 237     } else if (cachesize % (blksize * assoc) != 0) {
 238         return "cache size must be divisible by set size (assoc * block size)";
 239     } else {
 240         return NULL;
 241     }
 242 }
 243
 244 static bool bad_cache_params(int blksize, int assoc, int cachesize)
 245 {
 246     return (cachesize % blksize) != 0 || (cachesize % (blksize * assoc) != 0);
 247 }
 248
 249 static Cache *cache_init(int blksize, int assoc, int cachesize)
 250 {
 251     Cache *cache;
 252     int i;
 253     uint64_t blk_mask;
 254
 255     /*
 256      * This function shall not be called directly, and hence expects suitable
 257      * parameters.
 258      */
 259     g_assert(!bad_cache_params(blksize, assoc, cachesize));
 260
 261     cache = g_new(Cache, 1);
 262     cache->assoc = assoc;
 263     cache->cachesize = cachesize;
 264     cache->num_sets = cachesize / (blksize * assoc);
 265     cache->sets = g_new(CacheSet, cache->num_sets);
 266     cache->blksize_shift = pow_of_two(blksize);
 267     cache->accesses = 0;
 268     cache->misses = 0;
 269
 270     for (i = 0; i < cache->num_sets; i++) {
 271         cache->sets[i].blocks = g_new0(CacheBlock, assoc);
 272     }
 273
 274     blk_mask = blksize - 1;
 275     cache->set_mask = ((cache->num_sets - 1) << cache->blksize_shift);
 276     cache->tag_mask = ~(cache->set_mask | blk_mask);
 277
 278     if (metadata_init) {
 279         metadata_init(cache);
 280     }
 281
 282     return cache;
 283 }
 284
 285 static Cache **caches_init(int blksize, int assoc, int cachesize)
 286 {
 287     Cache **caches;
 288     int i;
 289
 290     if (bad_cache_params(blksize, assoc, cachesize)) {
 291         return NULL;
 292     }
 293
 294     caches = g_new(Cache *, cores);
 295
 296     for (i = 0; i < cores; i++) {
 297         caches[i] = cache_init(blksize, assoc, cachesize);
 298     }
 299
 300     return caches;
 301 }
 302
 303 static int get_invalid_block(Cache *cache, uint64_t set)
 304 {
 305     int i;
 306
 307     for (i = 0; i < cache->assoc; i++) {
 308         if (!cache->sets[set].blocks[i].valid) {
 309             return i;
 310         }
 311     }
 312
 313     return -1;
 314 }
 315
 316 static int get_replaced_block(Cache *cache, int set)
 317 {
 318     switch (policy) {
 319     case RAND:
 320         return g_rand_int_range(rng, 0, cache->assoc);
 321     case LRU:
 322         return lru_get_lru_block(cache, set);
 323     case FIFO:
 324         return fifo_get_first_block(cache, set);
 325     default:
 326         g_assert_not_reached();
 327     }
 328 }
 329
 330 static int in_cache(Cache *cache, uint64_t addr)
 331 {
 332     int i;
 333     uint64_t tag, set;
 334
 335     tag = extract_tag(cache, addr);
 336     set = extract_set(cache, addr);
 337
 338     for (i = 0; i < cache->assoc; i++) {
 339         if (cache->sets[set].blocks[i].tag == tag &&
 340                 cache->sets[set].blocks[i].valid) {
 341             return i;
 342         }
 343     }
 344
 345     return -1;
 346 }
 347
 348 /**
 349  * access_cache(): Simulate a cache access
 350  * @cache: The cache under simulation
 351  * @addr: The address of the requested memory location
 352  *
 353  * Returns true if the requested data is hit in the cache and false when missed.
 354  * The cache is updated on miss for the next access.
 355  */
 356 static bool access_cache(Cache *cache, uint64_t addr)
 357 {
 358     int hit_blk, replaced_blk;
 359     uint64_t tag, set;
 360
 361     tag = extract_tag(cache, addr);
 362     set = extract_set(cache, addr);
 363
 364     hit_blk = in_cache(cache, addr);
 365     if (hit_blk != -1) {
 366         if (update_hit) {
 367             update_hit(cache, set, hit_blk);
 368         }
 369         return true;
 370     }
 371
 372     replaced_blk = get_invalid_block(cache, set);
 373
 374     if (replaced_blk == -1) {
 375         replaced_blk = get_replaced_block(cache, set);
 376     }
 377
 378     if (update_miss) {
 379         update_miss(cache, set, replaced_blk);
 380     }
 381
 382     cache->sets[set].blocks[replaced_blk].tag = tag;
 383     cache->sets[set].blocks[replaced_blk].valid = true;
 384
 385     return false;
 386 }
 387
 388 static void vcpu_mem_access(unsigned int vcpu_index, qemu_plugin_meminfo_t info,
 389                             uint64_t vaddr, void *userdata)
 390 {
 391     uint64_t effective_addr;
 392     struct qemu_plugin_hwaddr *hwaddr;
 393     int cache_idx;
 394     InsnData *insn;
 395     bool hit_in_l1;
 396
 397     hwaddr = qemu_plugin_get_hwaddr(info, vaddr);
 398     if (hwaddr && qemu_plugin_hwaddr_is_io(hwaddr)) {
 399         return;
 400     }
 401
 402     effective_addr = hwaddr ? qemu_plugin_hwaddr_phys_addr(hwaddr) : vaddr;
 403     cache_idx = vcpu_index % cores;
 404
 405     g_mutex_lock(&l1_dcache_locks[cache_idx]);
 406     hit_in_l1 = access_cache(l1_dcaches[cache_idx], effective_addr);
 407     if (!hit_in_l1) {
 408         insn = userdata;
 409         __atomic_fetch_add(&insn->l1_dmisses, 1, __ATOMIC_SEQ_CST);
 410         l1_dcaches[cache_idx]->misses++;
 411     }
 412     l1_dcaches[cache_idx]->accesses++;
 413     g_mutex_unlock(&l1_dcache_locks[cache_idx]);
 414
 415     if (hit_in_l1 || !use_l2) {
 416         /* No need to access L2 */
 417         return;
 418     }
 419
 420     g_mutex_lock(&l2_ucache_locks[cache_idx]);
 421     if (!access_cache(l2_ucaches[cache_idx], effective_addr)) {
 422         insn = userdata;
 423         __atomic_fetch_add(&insn->l2_misses, 1, __ATOMIC_SEQ_CST);
 424         l2_ucaches[cache_idx]->misses++;
 425     }
 426     l2_ucaches[cache_idx]->accesses++;
 427     g_mutex_unlock(&l2_ucache_locks[cache_idx]);
 428 }
 429
 430 static void vcpu_insn_exec(unsigned int vcpu_index, void *userdata)
 431 {
 432     uint64_t insn_addr;
 433     InsnData *insn;
 434     int cache_idx;
 435     bool hit_in_l1;
 436
 437     insn_addr = ((InsnData *) userdata)->addr;
 438
 439     cache_idx = vcpu_index % cores;
 440     g_mutex_lock(&l1_icache_locks[cache_idx]);
 441     hit_in_l1 = access_cache(l1_icaches[cache_idx], insn_addr);
 442     if (!hit_in_l1) {
 443         insn = userdata;
 444         __atomic_fetch_add(&insn->l1_imisses, 1, __ATOMIC_SEQ_CST);
 445         l1_icaches[cache_idx]->misses++;
 446     }
 447     l1_icaches[cache_idx]->accesses++;
 448     g_mutex_unlock(&l1_icache_locks[cache_idx]);
 449
 450     if (hit_in_l1 || !use_l2) {
 451         /* No need to access L2 */
 452         return;
 453     }
 454
 455     g_mutex_lock(&l2_ucache_locks[cache_idx]);
 456     if (!access_cache(l2_ucaches[cache_idx], insn_addr)) {
 457         insn = userdata;
 458         __atomic_fetch_add(&insn->l2_misses, 1, __ATOMIC_SEQ_CST);
 459         l2_ucaches[cache_idx]->misses++;
 460     }
 461     l2_ucaches[cache_idx]->accesses++;
 462     g_mutex_unlock(&l2_ucache_locks[cache_idx]);
 463 }
 464
 465 static void vcpu_tb_trans(qemu_plugin_id_t id, struct qemu_plugin_tb *tb)
 466 {
 467     size_t n_insns;
 468     size_t i;
 469     InsnData *data;
 470
 471     n_insns = qemu_plugin_tb_n_insns(tb);
 472     for (i = 0; i < n_insns; i++) {
 473         struct qemu_plugin_insn *insn = qemu_plugin_tb_get_insn(tb, i);
 474         uint64_t effective_addr;
 475
 476         if (sys) {
 477             effective_addr = (uint64_t) qemu_plugin_insn_haddr(insn);
 478         } else {
 479             effective_addr = (uint64_t) qemu_plugin_insn_vaddr(insn);
 480         }
 481
 482         /*
 483          * Instructions might get translated multiple times, we do not create
 484          * new entries for those instructions. Instead, we fetch the same
 485          * entry from the hash table and register it for the callback again.
 486          */
 487         g_mutex_lock(&hashtable_lock);
 488         data = g_hash_table_lookup(miss_ht, GUINT_TO_POINTER(effective_addr));
 489         if (data == NULL) {
 490             data = g_new0(InsnData, 1);
 491             data->disas_str = qemu_plugin_insn_disas(insn);
 492             data->symbol = qemu_plugin_insn_symbol(insn);
 493             data->addr = effective_addr;
 494             g_hash_table_insert(miss_ht, GUINT_TO_POINTER(effective_addr),
 495                                (gpointer) data);
 496         }
 497         g_mutex_unlock(&hashtable_lock);
 498
 499         qemu_plugin_register_vcpu_mem_cb(insn, vcpu_mem_access,
 500                                          QEMU_PLUGIN_CB_NO_REGS,
 501                                          rw, data);
 502
 503         qemu_plugin_register_vcpu_insn_exec_cb(insn, vcpu_insn_exec,
 504                                                QEMU_PLUGIN_CB_NO_REGS, data);
 505     }
 506 }
 507
 508 static void insn_free(gpointer data)
 509 {
 510     InsnData *insn = (InsnData *) data;
 511     g_free(insn->disas_str);
 512     g_free(insn);
 513 }
 514
 515 static void cache_free(Cache *cache)
 516 {
 517     for (int i = 0; i < cache->num_sets; i++) {
 518         g_free(cache->sets[i].blocks);
 519     }
 520
 521     if (metadata_destroy) {
 522         metadata_destroy(cache);
 523     }
 524
 525     g_free(cache->sets);
 526     g_free(cache);
 527 }
 528
 529 static void caches_free(Cache **caches)
 530 {
 531     int i;
 532
 533     for (i = 0; i < cores; i++) {
 534         cache_free(caches[i]);
 535     }
 536 }
 537
 538 static void append_stats_line(GString *line, uint64_t l1_daccess,
 539                               uint64_t l1_dmisses, uint64_t l1_iaccess,
 540                               uint64_t l1_imisses,  uint64_t l2_access,
 541                               uint64_t l2_misses)
 542 {
 543     double l1_dmiss_rate, l1_imiss_rate, l2_miss_rate;
 544
 545     l1_dmiss_rate = ((double) l1_dmisses) / (l1_daccess) * 100.0;
 546     l1_imiss_rate = ((double) l1_imisses) / (l1_iaccess) * 100.0;
 547
 548     g_string_append_printf(line, "%-14" PRIu64 " %-12" PRIu64 " %9.4lf%%"
 549                            "  %-14" PRIu64 " %-12" PRIu64 " %9.4lf%%",
 550                            l1_daccess,
 551                            l1_dmisses,
 552                            l1_daccess ? l1_dmiss_rate : 0.0,
 553                            l1_iaccess,
 554                            l1_imisses,
 555                            l1_iaccess ? l1_imiss_rate : 0.0);
 556
 557     if (use_l2) {
 558         l2_miss_rate =  ((double) l2_misses) / (l2_access) * 100.0;
 559         g_string_append_printf(line,
 560                                "  %-12" PRIu64 " %-11" PRIu64 " %10.4lf%%",
 561                                l2_access,
 562                                l2_misses,
 563                                l2_access ? l2_miss_rate : 0.0);
 564     }
 565
 566     g_string_append(line, "\n");
 567 }
 568
 569 static void sum_stats(void)
 570 {
 571     int i;
 572
 573     g_assert(cores > 1);
 574     for (i = 0; i < cores; i++) {
 575         l1_imisses += l1_icaches[i]->misses;
 576         l1_dmisses += l1_dcaches[i]->misses;
 577         l1_imem_accesses += l1_icaches[i]->accesses;
 578         l1_dmem_accesses += l1_dcaches[i]->accesses;
 579
 580         if (use_l2) {
 581             l2_misses += l2_ucaches[i]->misses;
 582             l2_mem_accesses += l2_ucaches[i]->accesses;
 583         }
 584     }
 585 }
 586
 587 static int dcmp(gconstpointer a, gconstpointer b)
 588 {
 589     InsnData *insn_a = (InsnData *) a;
 590     InsnData *insn_b = (InsnData *) b;
 591
 592     return insn_a->l1_dmisses < insn_b->l1_dmisses ? 1 : -1;
 593 }
 594
 595 static int icmp(gconstpointer a, gconstpointer b)
 596 {
 597     InsnData *insn_a = (InsnData *) a;
 598     InsnData *insn_b = (InsnData *) b;
 599
 600     return insn_a->l1_imisses < insn_b->l1_imisses ? 1 : -1;
 601 }
 602
 603 static int l2_cmp(gconstpointer a, gconstpointer b)
 604 {
 605     InsnData *insn_a = (InsnData *) a;
 606     InsnData *insn_b = (InsnData *) b;
 607
 608     return insn_a->l2_misses < insn_b->l2_misses ? 1 : -1;
 609 }
 610
 611 static void log_stats(void)
 612 {
 613     int i;
 614     Cache *icache, *dcache, *l2_cache;
 615
 616     g_autoptr(GString) rep = g_string_new("core #, data accesses, data misses,"
 617                                           " dmiss rate, insn accesses,"
 618                                           " insn misses, imiss rate");
 619
 620     if (use_l2) {
 621         g_string_append(rep, ", l2 accesses, l2 misses, l2 miss rate");
 622     }
 623
 624     g_string_append(rep, "\n");
 625
 626     for (i = 0; i < cores; i++) {
 627         g_string_append_printf(rep, "%-8d", i);
 628         dcache = l1_dcaches[i];
 629         icache = l1_icaches[i];
 630         l2_cache = use_l2 ? l2_ucaches[i] : NULL;
 631         append_stats_line(rep, dcache->accesses, dcache->misses,
 632                 icache->accesses, icache->misses,
 633                 l2_cache ? l2_cache->accesses : 0,
 634                 l2_cache ? l2_cache->misses : 0);
 635     }
 636
 637     if (cores > 1) {
 638         sum_stats();
 639         g_string_append_printf(rep, "%-8s", "sum");
 640         append_stats_line(rep, l1_dmem_accesses, l1_dmisses,
 641                 l1_imem_accesses, l1_imisses,
 642                 l2_cache ? l2_mem_accesses : 0, l2_cache ? l2_misses : 0);
 643     }
 644
 645     g_string_append(rep, "\n");
 646     qemu_plugin_outs(rep->str);
 647 }
 648
 649 static void log_top_insns(void)
 650 {
 651     int i;
 652     GList *curr, *miss_insns;
 653     InsnData *insn;
 654
 655     miss_insns = g_hash_table_get_values(miss_ht);
 656     miss_insns = g_list_sort(miss_insns, dcmp);
 657     g_autoptr(GString) rep = g_string_new("");
 658     g_string_append_printf(rep, "%s", "address, data misses, instruction\n");
 659
 660     for (curr = miss_insns, i = 0; curr && i < limit; i++, curr = curr->next) {
 661         insn = (InsnData *) curr->data;
 662         g_string_append_printf(rep, "0x%" PRIx64, insn->addr);
 663         if (insn->symbol) {
 664             g_string_append_printf(rep, " (%s)", insn->symbol);
 665         }
 666         g_string_append_printf(rep, ", %" PRId64 ", %s\n",
 667                                insn->l1_dmisses, insn->disas_str);
 668     }
 669
 670     miss_insns = g_list_sort(miss_insns, icmp);
 671     g_string_append_printf(rep, "%s", "\naddress, fetch misses, instruction\n");
 672
 673     for (curr = miss_insns, i = 0; curr && i < limit; i++, curr = curr->next) {
 674         insn = (InsnData *) curr->data;
 675         g_string_append_printf(rep, "0x%" PRIx64, insn->addr);
 676         if (insn->symbol) {
 677             g_string_append_printf(rep, " (%s)", insn->symbol);
 678         }
 679         g_string_append_printf(rep, ", %" PRId64 ", %s\n",
 680                                insn->l1_imisses, insn->disas_str);
 681     }
 682
 683     if (!use_l2) {
 684         goto finish;
 685     }
 686
 687     miss_insns = g_list_sort(miss_insns, l2_cmp);
 688     g_string_append_printf(rep, "%s", "\naddress, L2 misses, instruction\n");
 689
 690     for (curr = miss_insns, i = 0; curr && i < limit; i++, curr = curr->next) {
 691         insn = (InsnData *) curr->data;
 692         g_string_append_printf(rep, "0x%" PRIx64, insn->addr);
 693         if (insn->symbol) {
 694             g_string_append_printf(rep, " (%s)", insn->symbol);
 695         }
 696         g_string_append_printf(rep, ", %" PRId64 ", %s\n",
 697                                insn->l2_misses, insn->disas_str);
 698     }
 699
 700 finish:
 701     qemu_plugin_outs(rep->str);
 702     g_list_free(miss_insns);
 703 }
 704
 705 static void plugin_exit(qemu_plugin_id_t id, void *p)
 706 {
 707     log_stats();
 708     log_top_insns();
 709
 710     caches_free(l1_dcaches);
 711     caches_free(l1_icaches);
 712
 713     g_free(l1_dcache_locks);
 714     g_free(l1_icache_locks);
 715
 716     if (use_l2) {
 717         caches_free(l2_ucaches);
 718         g_free(l2_ucache_locks);
 719     }
 720
 721     g_hash_table_destroy(miss_ht);
 722 }
 723
 724 static void policy_init(void)
 725 {
 726     switch (policy) {
 727     case LRU:
 728         update_hit = lru_update_blk;
 729         update_miss = lru_update_blk;
 730         metadata_init = lru_priorities_init;
 731         metadata_destroy = lru_priorities_destroy;
 732         break;
 733     case FIFO:
 734         update_miss = fifo_update_on_miss;
 735         metadata_init = fifo_init;
 736         metadata_destroy = fifo_destroy;
 737         break;
 738     case RAND:
 739         rng = g_rand_new();
 740         break;
 741     default:
 742         g_assert_not_reached();
 743     }
 744 }
 745
 746 QEMU_PLUGIN_EXPORT
 747 int qemu_plugin_install(qemu_plugin_id_t id, const qemu_info_t *info,
 748                         int argc, char **argv)
 749 {
 750     int i;
 751     int l1_iassoc, l1_iblksize, l1_icachesize;
 752     int l1_dassoc, l1_dblksize, l1_dcachesize;
 753     int l2_assoc, l2_blksize, l2_cachesize;
 754
 755     limit = 32;
 756     sys = info->system_emulation;
 757
 758     l1_dassoc = 8;
 759     l1_dblksize = 64;
 760     l1_dcachesize = l1_dblksize * l1_dassoc * 32;
 761
 762     l1_iassoc = 8;
 763     l1_iblksize = 64;
 764     l1_icachesize = l1_iblksize * l1_iassoc * 32;
 765
 766     l2_assoc = 16;
 767     l2_blksize = 64;
 768     l2_cachesize = l2_assoc * l2_blksize * 2048;
 769
 770     policy = LRU;
 771
 772     cores = sys ? qemu_plugin_n_vcpus() : 1;
 773
 774     for (i = 0; i < argc; i++) {
 775         char *opt = argv[i];
 776         g_auto(GStrv) tokens = g_strsplit(opt, "=", 2);
 777
 778         if (g_strcmp0(tokens[0], "iblksize") == 0) {
 779             l1_iblksize = STRTOLL(tokens[1]);
 780         } else if (g_strcmp0(tokens[0], "iassoc") == 0) {
 781             l1_iassoc = STRTOLL(tokens[1]);
 782         } else if (g_strcmp0(tokens[0], "icachesize") == 0) {
 783             l1_icachesize = STRTOLL(tokens[1]);
 784         } else if (g_strcmp0(tokens[0], "dblksize") == 0) {
 785             l1_dblksize = STRTOLL(tokens[1]);
 786         } else if (g_strcmp0(tokens[0], "dassoc") == 0) {
 787             l1_dassoc = STRTOLL(tokens[1]);
 788         } else if (g_strcmp0(tokens[0], "dcachesize") == 0) {
 789             l1_dcachesize = STRTOLL(tokens[1]);
 790         } else if (g_strcmp0(tokens[0], "limit") == 0) {
 791             limit = STRTOLL(tokens[1]);
 792         } else if (g_strcmp0(tokens[0], "cores") == 0) {
 793             cores = STRTOLL(tokens[1]);
 794         } else if (g_strcmp0(tokens[0], "l2cachesize") == 0) {
 795             use_l2 = true;
 796             l2_cachesize = STRTOLL(tokens[1]);
 797         } else if (g_strcmp0(tokens[0], "l2blksize") == 0) {
 798             use_l2 = true;
 799             l2_blksize = STRTOLL(tokens[1]);
 800         } else if (g_strcmp0(tokens[0], "l2assoc") == 0) {
 801             use_l2 = true;
 802             l2_assoc = STRTOLL(tokens[1]);
 803         } else if (g_strcmp0(tokens[0], "l2") == 0) {
 804             if (!qemu_plugin_bool_parse(tokens[0], tokens[1], &use_l2)) {
 805                 fprintf(stderr, "boolean argument parsing failed: %s\n", opt);
 806                 return -1;
 807             }
 808         } else if (g_strcmp0(tokens[0], "evict") == 0) {
 809             if (g_strcmp0(tokens[1], "rand") == 0) {
 810                 policy = RAND;
 811             } else if (g_strcmp0(tokens[1], "lru") == 0) {
 812                 policy = LRU;
 813             } else if (g_strcmp0(tokens[1], "fifo") == 0) {
 814                 policy = FIFO;
 815             } else {
 816                 fprintf(stderr, "invalid eviction policy: %s\n", opt);
 817                 return -1;
 818             }
 819         } else {
 820             fprintf(stderr, "option parsing failed: %s\n", opt);
 821             return -1;
 822         }
 823     }
 824
 825     policy_init();
 826
 827     l1_dcaches = caches_init(l1_dblksize, l1_dassoc, l1_dcachesize);
 828     if (!l1_dcaches) {
 829         const char *err = cache_config_error(l1_dblksize, l1_dassoc, l1_dcachesize);
 830         fprintf(stderr, "dcache cannot be constructed from given parameters\n");
 831         fprintf(stderr, "%s\n", err);
 832         return -1;
 833     }
 834
 835     l1_icaches = caches_init(l1_iblksize, l1_iassoc, l1_icachesize);
 836     if (!l1_icaches) {
 837         const char *err = cache_config_error(l1_iblksize, l1_iassoc, l1_icachesize);
 838         fprintf(stderr, "icache cannot be constructed from given parameters\n");
 839         fprintf(stderr, "%s\n", err);
 840         return -1;
 841     }
 842
 843     l2_ucaches = use_l2 ? caches_init(l2_blksize, l2_assoc, l2_cachesize) : NULL;
 844     if (!l2_ucaches && use_l2) {
 845         const char *err = cache_config_error(l2_blksize, l2_assoc, l2_cachesize);
 846         fprintf(stderr, "L2 cache cannot be constructed from given parameters\n");
 847         fprintf(stderr, "%s\n", err);
 848         return -1;
 849     }
 850
 851     l1_dcache_locks = g_new0(GMutex, cores);
 852     l1_icache_locks = g_new0(GMutex, cores);
 853     l2_ucache_locks = use_l2 ? g_new0(GMutex, cores) : NULL;
 854
 855     qemu_plugin_register_vcpu_tb_trans_cb(id, vcpu_tb_trans);
 856     qemu_plugin_register_atexit_cb(id, plugin_exit, NULL);
 857
 858     miss_ht = g_hash_table_new_full(NULL, g_direct_equal, NULL, insn_free);
 859
 860     return 0;
 861 }