mm/vmstat.c

   1 /*
   2  *  linux/mm/vmstat.c
   3  *
   4  *  Manages VM statistics
   5  *  Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
   6  *
   7  *  zoned VM statistics
   8  *  Copyright (C) 2006 Silicon Graphics, Inc.,
   9  *              Christoph Lameter <christoph@lameter.com>
  10  */
  11
  12 #include <linux/config.h>
  13 #include <linux/mm.h>
  14 #include <linux/module.h>
  15
  16 /*
  17  * Accumulate the page_state information across all CPUs.
  18  * The result is unavoidably approximate - it can change
  19  * during and after execution of this function.
  20  */
  21 DEFINE_PER_CPU(struct page_state, page_states) = {0};
  22
  23 atomic_t nr_pagecache = ATOMIC_INIT(0);
  24 EXPORT_SYMBOL(nr_pagecache);
  25 #ifdef CONFIG_SMP
  26 DEFINE_PER_CPU(long, nr_pagecache_local) = 0;
  27 #endif
  28
  29 static void __get_page_state(struct page_state *ret, int nr, cpumask_t *cpumask)
  30 {
  31         unsigned cpu;
  32
  33         memset(ret, 0, nr * sizeof(unsigned long));
  34         cpus_and(*cpumask, *cpumask, cpu_online_map);
  35
  36         for_each_cpu_mask(cpu, *cpumask) {
  37                 unsigned long *in;
  38                 unsigned long *out;
  39                 unsigned off;
  40                 unsigned next_cpu;
  41
  42                 in = (unsigned long *)&per_cpu(page_states, cpu);
  43
  44                 next_cpu = next_cpu(cpu, *cpumask);
  45                 if (likely(next_cpu < NR_CPUS))
  46                         prefetch(&per_cpu(page_states, next_cpu));
  47
  48                 out = (unsigned long *)ret;
  49                 for (off = 0; off < nr; off++)
  50                         *out++ += *in++;
  51         }
  52 }
  53
  54 void get_page_state_node(struct page_state *ret, int node)
  55 {
  56         int nr;
  57         cpumask_t mask = node_to_cpumask(node);
  58
  59         nr = offsetof(struct page_state, GET_PAGE_STATE_LAST);
  60         nr /= sizeof(unsigned long);
  61
  62         __get_page_state(ret, nr+1, &mask);
  63 }
  64
  65 void get_page_state(struct page_state *ret)
  66 {
  67         int nr;
  68         cpumask_t mask = CPU_MASK_ALL;
  69
  70         nr = offsetof(struct page_state, GET_PAGE_STATE_LAST);
  71         nr /= sizeof(unsigned long);
  72
  73         __get_page_state(ret, nr + 1, &mask);
  74 }
  75
  76 void get_full_page_state(struct page_state *ret)
  77 {
  78         cpumask_t mask = CPU_MASK_ALL;
  79
  80         __get_page_state(ret, sizeof(*ret) / sizeof(unsigned long), &mask);
  81 }
  82
  83 unsigned long read_page_state_offset(unsigned long offset)
  84 {
  85         unsigned long ret = 0;
  86         int cpu;
  87
  88         for_each_online_cpu(cpu) {
  89                 unsigned long in;
  90
  91                 in = (unsigned long)&per_cpu(page_states, cpu) + offset;
  92                 ret += *((unsigned long *)in);
  93         }
  94         return ret;
  95 }
  96
  97 void __mod_page_state_offset(unsigned long offset, unsigned long delta)
  98 {
  99         void *ptr;
 100
 101         ptr = &__get_cpu_var(page_states);
 102         *(unsigned long *)(ptr + offset) += delta;
 103 }
 104 EXPORT_SYMBOL(__mod_page_state_offset);
 105
 106 void mod_page_state_offset(unsigned long offset, unsigned long delta)
 107 {
 108         unsigned long flags;
 109         void *ptr;
 110
 111         local_irq_save(flags);
 112         ptr = &__get_cpu_var(page_states);
 113         *(unsigned long *)(ptr + offset) += delta;
 114         local_irq_restore(flags);
 115 }
 116 EXPORT_SYMBOL(mod_page_state_offset);
 117
 118 void __get_zone_counts(unsigned long *active, unsigned long *inactive,
 119                         unsigned long *free, struct pglist_data *pgdat)
 120 {
 121         struct zone *zones = pgdat->node_zones;
 122         int i;
 123
 124         *active = 0;
 125         *inactive = 0;
 126         *free = 0;
 127         for (i = 0; i < MAX_NR_ZONES; i++) {
 128                 *active += zones[i].nr_active;
 129                 *inactive += zones[i].nr_inactive;
 130                 *free += zones[i].free_pages;
 131         }
 132 }
 133
 134 void get_zone_counts(unsigned long *active,
 135                 unsigned long *inactive, unsigned long *free)
 136 {
 137         struct pglist_data *pgdat;
 138
 139         *active = 0;
 140         *inactive = 0;
 141         *free = 0;
 142         for_each_online_pgdat(pgdat) {
 143                 unsigned long l, m, n;
 144                 __get_zone_counts(&l, &m, &n, pgdat);
 145                 *active += l;
 146                 *inactive += m;
 147                 *free += n;
 148         }
 149 }
 150
 151 /*
 152  * Manage combined zone based / global counters
 153  *
 154  * vm_stat contains the global counters
 155  */
 156 atomic_long_t vm_stat[NR_VM_ZONE_STAT_ITEMS];
 157 EXPORT_SYMBOL(vm_stat);
 158
 159 #ifdef CONFIG_SMP
 160
 161 #define STAT_THRESHOLD 32
 162
 163 /*
 164  * Determine pointer to currently valid differential byte given a zone and
 165  * the item number.
 166  *
 167  * Preemption must be off
 168  */
 169 static inline s8 *diff_pointer(struct zone *zone, enum zone_stat_item item)
 170 {
 171         return &zone_pcp(zone, smp_processor_id())->vm_stat_diff[item];
 172 }
 173
 174 /*
 175  * For use when we know that interrupts are disabled.
 176  */
 177 void __mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
 178                                 int delta)
 179 {
 180         s8 *p;
 181         long x;
 182
 183         p = diff_pointer(zone, item);
 184         x = delta + *p;
 185
 186         if (unlikely(x > STAT_THRESHOLD || x < -STAT_THRESHOLD)) {
 187                 zone_page_state_add(x, zone, item);
 188                 x = 0;
 189         }
 190
 191         *p = x;
 192 }
 193 EXPORT_SYMBOL(__mod_zone_page_state);
 194
 195 /*
 196  * For an unknown interrupt state
 197  */
 198 void mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
 199                                         int delta)
 200 {
 201         unsigned long flags;
 202
 203         local_irq_save(flags);
 204         __mod_zone_page_state(zone, item, delta);
 205         local_irq_restore(flags);
 206 }
 207 EXPORT_SYMBOL(mod_zone_page_state);
 208
 209 /*
 210  * Optimized increment and decrement functions.
 211  *
 212  * These are only for a single page and therefore can take a struct page *
 213  * argument instead of struct zone *. This allows the inclusion of the code
 214  * generated for page_zone(page) into the optimized functions.
 215  *
 216  * No overflow check is necessary and therefore the differential can be
 217  * incremented or decremented in place which may allow the compilers to
 218  * generate better code.
 219  *
 220  * The increment or decrement is known and therefore one boundary check can
 221  * be omitted.
 222  *
 223  * Some processors have inc/dec instructions that are atomic vs an interrupt.
 224  * However, the code must first determine the differential location in a zone
 225  * based on the processor number and then inc/dec the counter. There is no
 226  * guarantee without disabling preemption that the processor will not change
 227  * in between and therefore the atomicity vs. interrupt cannot be exploited
 228  * in a useful way here.
 229  */
 230 void __inc_zone_page_state(struct page *page, enum zone_stat_item item)
 231 {
 232         struct zone *zone = page_zone(page);
 233         s8 *p = diff_pointer(zone, item);
 234
 235         (*p)++;
 236
 237         if (unlikely(*p > STAT_THRESHOLD)) {
 238                 zone_page_state_add(*p, zone, item);
 239                 *p = 0;
 240         }
 241 }
 242 EXPORT_SYMBOL(__inc_zone_page_state);
 243
 244 void __dec_zone_page_state(struct page *page, enum zone_stat_item item)
 245 {
 246         struct zone *zone = page_zone(page);
 247         s8 *p = diff_pointer(zone, item);
 248
 249         (*p)--;
 250
 251         if (unlikely(*p < -STAT_THRESHOLD)) {
 252                 zone_page_state_add(*p, zone, item);
 253                 *p = 0;
 254         }
 255 }
 256 EXPORT_SYMBOL(__dec_zone_page_state);
 257
 258 void inc_zone_page_state(struct page *page, enum zone_stat_item item)
 259 {
 260         unsigned long flags;
 261         struct zone *zone;
 262         s8 *p;
 263
 264         zone = page_zone(page);
 265         local_irq_save(flags);
 266         p = diff_pointer(zone, item);
 267
 268         (*p)++;
 269
 270         if (unlikely(*p > STAT_THRESHOLD)) {
 271                 zone_page_state_add(*p, zone, item);
 272                 *p = 0;
 273         }
 274         local_irq_restore(flags);
 275 }
 276 EXPORT_SYMBOL(inc_zone_page_state);
 277
 278 void dec_zone_page_state(struct page *page, enum zone_stat_item item)
 279 {
 280         unsigned long flags;
 281         struct zone *zone;
 282         s8 *p;
 283
 284         zone = page_zone(page);
 285         local_irq_save(flags);
 286         p = diff_pointer(zone, item);
 287
 288         (*p)--;
 289
 290         if (unlikely(*p < -STAT_THRESHOLD)) {
 291                 zone_page_state_add(*p, zone, item);
 292                 *p = 0;
 293         }
 294         local_irq_restore(flags);
 295 }
 296 EXPORT_SYMBOL(dec_zone_page_state);
 297
 298 /*
 299  * Update the zone counters for one cpu.
 300  */
 301 void refresh_cpu_vm_stats(int cpu)
 302 {
 303         struct zone *zone;
 304         int i;
 305         unsigned long flags;
 306
 307         for_each_zone(zone) {
 308                 struct per_cpu_pageset *pcp;
 309
 310                 pcp = zone_pcp(zone, cpu);
 311
 312                 for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
 313                         if (pcp->vm_stat_diff[i]) {
 314                                 local_irq_save(flags);
 315                                 zone_page_state_add(pcp->vm_stat_diff[i],
 316                                         zone, i);
 317                                 pcp->vm_stat_diff[i] = 0;
 318                                 local_irq_restore(flags);
 319                         }
 320         }
 321 }
 322
 323 static void __refresh_cpu_vm_stats(void *dummy)
 324 {
 325         refresh_cpu_vm_stats(smp_processor_id());
 326 }
 327
 328 /*
 329  * Consolidate all counters.
 330  *
 331  * Note that the result is less inaccurate but still inaccurate
 332  * if concurrent processes are allowed to run.
 333  */
 334 void refresh_vm_stats(void)
 335 {
 336         on_each_cpu(__refresh_cpu_vm_stats, NULL, 0, 1);
 337 }
 338 EXPORT_SYMBOL(refresh_vm_stats);
 339
 340 #endif
 341
 342 #ifdef CONFIG_PROC_FS
 343
 344 #include <linux/seq_file.h>
 345
 346 static void *frag_start(struct seq_file *m, loff_t *pos)
 347 {
 348         pg_data_t *pgdat;
 349         loff_t node = *pos;
 350         for (pgdat = first_online_pgdat();
 351              pgdat && node;
 352              pgdat = next_online_pgdat(pgdat))
 353                 --node;
 354
 355         return pgdat;
 356 }
 357
 358 static void *frag_next(struct seq_file *m, void *arg, loff_t *pos)
 359 {
 360         pg_data_t *pgdat = (pg_data_t *)arg;
 361
 362         (*pos)++;
 363         return next_online_pgdat(pgdat);
 364 }
 365
 366 static void frag_stop(struct seq_file *m, void *arg)
 367 {
 368 }
 369
 370 /*
 371  * This walks the free areas for each zone.
 372  */
 373 static int frag_show(struct seq_file *m, void *arg)
 374 {
 375         pg_data_t *pgdat = (pg_data_t *)arg;
 376         struct zone *zone;
 377         struct zone *node_zones = pgdat->node_zones;
 378         unsigned long flags;
 379         int order;
 380
 381         for (zone = node_zones; zone - node_zones < MAX_NR_ZONES; ++zone) {
 382                 if (!populated_zone(zone))
 383                         continue;
 384
 385                 spin_lock_irqsave(&zone->lock, flags);
 386                 seq_printf(m, "Node %d, zone %8s ", pgdat->node_id, zone->name);
 387                 for (order = 0; order < MAX_ORDER; ++order)
 388                         seq_printf(m, "%6lu ", zone->free_area[order].nr_free);
 389                 spin_unlock_irqrestore(&zone->lock, flags);
 390                 seq_putc(m, '\n');
 391         }
 392         return 0;
 393 }
 394
 395 struct seq_operations fragmentation_op = {
 396         .start  = frag_start,
 397         .next   = frag_next,
 398         .stop   = frag_stop,
 399         .show   = frag_show,
 400 };
 401
 402 static char *vmstat_text[] = {
 403         /* Zoned VM counters */
 404         "nr_mapped",
 405
 406         /* Page state */
 407         "nr_dirty",
 408         "nr_writeback",
 409         "nr_unstable",
 410         "nr_page_table_pages",
 411         "nr_slab",
 412
 413         "pgpgin",
 414         "pgpgout",
 415         "pswpin",
 416         "pswpout",
 417
 418         "pgalloc_high",
 419         "pgalloc_normal",
 420         "pgalloc_dma32",
 421         "pgalloc_dma",
 422
 423         "pgfree",
 424         "pgactivate",
 425         "pgdeactivate",
 426
 427         "pgfault",
 428         "pgmajfault",
 429
 430         "pgrefill_high",
 431         "pgrefill_normal",
 432         "pgrefill_dma32",
 433         "pgrefill_dma",
 434
 435         "pgsteal_high",
 436         "pgsteal_normal",
 437         "pgsteal_dma32",
 438         "pgsteal_dma",
 439
 440         "pgscan_kswapd_high",
 441         "pgscan_kswapd_normal",
 442         "pgscan_kswapd_dma32",
 443         "pgscan_kswapd_dma",
 444
 445         "pgscan_direct_high",
 446         "pgscan_direct_normal",
 447         "pgscan_direct_dma32",
 448         "pgscan_direct_dma",
 449
 450         "pginodesteal",
 451         "slabs_scanned",
 452         "kswapd_steal",
 453         "kswapd_inodesteal",
 454         "pageoutrun",
 455         "allocstall",
 456
 457         "pgrotated",
 458         "nr_bounce",
 459 };
 460
 461 /*
 462  * Output information about zones in @pgdat.
 463  */
 464 static int zoneinfo_show(struct seq_file *m, void *arg)
 465 {
 466         pg_data_t *pgdat = arg;
 467         struct zone *zone;
 468         struct zone *node_zones = pgdat->node_zones;
 469         unsigned long flags;
 470
 471         for (zone = node_zones; zone - node_zones < MAX_NR_ZONES; zone++) {
 472                 int i;
 473
 474                 if (!populated_zone(zone))
 475                         continue;
 476
 477                 spin_lock_irqsave(&zone->lock, flags);
 478                 seq_printf(m, "Node %d, zone %8s", pgdat->node_id, zone->name);
 479                 seq_printf(m,
 480                            "\n  pages free     %lu"
 481                            "\n        min      %lu"
 482                            "\n        low      %lu"
 483                            "\n        high     %lu"
 484                            "\n        active   %lu"
 485                            "\n        inactive %lu"
 486                            "\n        scanned  %lu (a: %lu i: %lu)"
 487                            "\n        spanned  %lu"
 488                            "\n        present  %lu",
 489                            zone->free_pages,
 490                            zone->pages_min,
 491                            zone->pages_low,
 492                            zone->pages_high,
 493                            zone->nr_active,
 494                            zone->nr_inactive,
 495                            zone->pages_scanned,
 496                            zone->nr_scan_active, zone->nr_scan_inactive,
 497                            zone->spanned_pages,
 498                            zone->present_pages);
 499
 500                 for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
 501                         seq_printf(m, "\n    %-12s %lu", vmstat_text[i],
 502                                         zone_page_state(zone, i));
 503
 504                 seq_printf(m,
 505                            "\n        protection: (%lu",
 506                            zone->lowmem_reserve[0]);
 507                 for (i = 1; i < ARRAY_SIZE(zone->lowmem_reserve); i++)
 508                         seq_printf(m, ", %lu", zone->lowmem_reserve[i]);
 509                 seq_printf(m,
 510                            ")"
 511                            "\n  pagesets");
 512                 for_each_online_cpu(i) {
 513                         struct per_cpu_pageset *pageset;
 514                         int j;
 515
 516                         pageset = zone_pcp(zone, i);
 517                         for (j = 0; j < ARRAY_SIZE(pageset->pcp); j++) {
 518                                 if (pageset->pcp[j].count)
 519                                         break;
 520                         }
 521                         if (j == ARRAY_SIZE(pageset->pcp))
 522                                 continue;
 523                         for (j = 0; j < ARRAY_SIZE(pageset->pcp); j++) {
 524                                 seq_printf(m,
 525                                            "\n    cpu: %i pcp: %i"
 526                                            "\n              count: %i"
 527                                            "\n              high:  %i"
 528                                            "\n              batch: %i",
 529                                            i, j,
 530                                            pageset->pcp[j].count,
 531                                            pageset->pcp[j].high,
 532                                            pageset->pcp[j].batch);
 533                         }
 534 #ifdef CONFIG_NUMA
 535                         seq_printf(m,
 536                                    "\n            numa_hit:       %lu"
 537                                    "\n            numa_miss:      %lu"
 538                                    "\n            numa_foreign:   %lu"
 539                                    "\n            interleave_hit: %lu"
 540                                    "\n            local_node:     %lu"
 541                                    "\n            other_node:     %lu",
 542                                    pageset->numa_hit,
 543                                    pageset->numa_miss,
 544                                    pageset->numa_foreign,
 545                                    pageset->interleave_hit,
 546                                    pageset->local_node,
 547                                    pageset->other_node);
 548 #endif
 549                 }
 550                 seq_printf(m,
 551                            "\n  all_unreclaimable: %u"
 552                            "\n  prev_priority:     %i"
 553                            "\n  temp_priority:     %i"
 554                            "\n  start_pfn:         %lu",
 555                            zone->all_unreclaimable,
 556                            zone->prev_priority,
 557                            zone->temp_priority,
 558                            zone->zone_start_pfn);
 559                 spin_unlock_irqrestore(&zone->lock, flags);
 560                 seq_putc(m, '\n');
 561         }
 562         return 0;
 563 }
 564
 565 struct seq_operations zoneinfo_op = {
 566         .start  = frag_start, /* iterate over all zones. The same as in
 567                                * fragmentation. */
 568         .next   = frag_next,
 569         .stop   = frag_stop,
 570         .show   = zoneinfo_show,
 571 };
 572
 573 static void *vmstat_start(struct seq_file *m, loff_t *pos)
 574 {
 575         unsigned long *v;
 576         struct page_state *ps;
 577         int i;
 578
 579         if (*pos >= ARRAY_SIZE(vmstat_text))
 580                 return NULL;
 581
 582         v = kmalloc(NR_VM_ZONE_STAT_ITEMS * sizeof(unsigned long)
 583                         + sizeof(*ps), GFP_KERNEL);
 584         m->private = v;
 585         if (!v)
 586                 return ERR_PTR(-ENOMEM);
 587         for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
 588                 v[i] = global_page_state(i);
 589         ps = (struct page_state *)(v + NR_VM_ZONE_STAT_ITEMS);
 590         get_full_page_state(ps);
 591         ps->pgpgin /= 2;                /* sectors -> kbytes */
 592         ps->pgpgout /= 2;
 593         return v + *pos;
 594 }
 595
 596 static void *vmstat_next(struct seq_file *m, void *arg, loff_t *pos)
 597 {
 598         (*pos)++;
 599         if (*pos >= ARRAY_SIZE(vmstat_text))
 600                 return NULL;
 601         return (unsigned long *)m->private + *pos;
 602 }
 603
 604 static int vmstat_show(struct seq_file *m, void *arg)
 605 {
 606         unsigned long *l = arg;
 607         unsigned long off = l - (unsigned long *)m->private;
 608
 609         seq_printf(m, "%s %lu\n", vmstat_text[off], *l);
 610         return 0;
 611 }
 612
 613 static void vmstat_stop(struct seq_file *m, void *arg)
 614 {
 615         kfree(m->private);
 616         m->private = NULL;
 617 }
 618
 619 struct seq_operations vmstat_op = {
 620         .start  = vmstat_start,
 621         .next   = vmstat_next,
 622         .stop   = vmstat_stop,
 623         .show   = vmstat_show,
 624 };
 625
 626 #endif /* CONFIG_PROC_FS */
 627