]>
Commit | Line | Data |
---|---|---|
f6ac2354 CL |
1 | /* |
2 | * linux/mm/vmstat.c | |
3 | * | |
4 | * Manages VM statistics | |
5 | * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds | |
2244b95a CL |
6 | * |
7 | * zoned VM statistics | |
8 | * Copyright (C) 2006 Silicon Graphics, Inc., | |
9 | * Christoph Lameter <christoph@lameter.com> | |
f6ac2354 CL |
10 | */ |
11 | ||
12 | #include <linux/config.h> | |
13 | #include <linux/mm.h> | |
2244b95a | 14 | #include <linux/module.h> |
f6ac2354 CL |
15 | |
16 | /* | |
17 | * Accumulate the page_state information across all CPUs. | |
18 | * The result is unavoidably approximate - it can change | |
19 | * during and after execution of this function. | |
20 | */ | |
21 | DEFINE_PER_CPU(struct page_state, page_states) = {0}; | |
22 | ||
f6ac2354 CL |
23 | static void __get_page_state(struct page_state *ret, int nr, cpumask_t *cpumask) |
24 | { | |
25 | unsigned cpu; | |
26 | ||
27 | memset(ret, 0, nr * sizeof(unsigned long)); | |
28 | cpus_and(*cpumask, *cpumask, cpu_online_map); | |
29 | ||
30 | for_each_cpu_mask(cpu, *cpumask) { | |
31 | unsigned long *in; | |
32 | unsigned long *out; | |
33 | unsigned off; | |
34 | unsigned next_cpu; | |
35 | ||
36 | in = (unsigned long *)&per_cpu(page_states, cpu); | |
37 | ||
38 | next_cpu = next_cpu(cpu, *cpumask); | |
39 | if (likely(next_cpu < NR_CPUS)) | |
40 | prefetch(&per_cpu(page_states, next_cpu)); | |
41 | ||
42 | out = (unsigned long *)ret; | |
43 | for (off = 0; off < nr; off++) | |
44 | *out++ += *in++; | |
45 | } | |
46 | } | |
47 | ||
f6ac2354 CL |
48 | void get_full_page_state(struct page_state *ret) |
49 | { | |
50 | cpumask_t mask = CPU_MASK_ALL; | |
51 | ||
52 | __get_page_state(ret, sizeof(*ret) / sizeof(unsigned long), &mask); | |
53 | } | |
54 | ||
55 | unsigned long read_page_state_offset(unsigned long offset) | |
56 | { | |
57 | unsigned long ret = 0; | |
58 | int cpu; | |
59 | ||
60 | for_each_online_cpu(cpu) { | |
61 | unsigned long in; | |
62 | ||
63 | in = (unsigned long)&per_cpu(page_states, cpu) + offset; | |
64 | ret += *((unsigned long *)in); | |
65 | } | |
66 | return ret; | |
67 | } | |
68 | ||
69 | void __mod_page_state_offset(unsigned long offset, unsigned long delta) | |
70 | { | |
71 | void *ptr; | |
72 | ||
73 | ptr = &__get_cpu_var(page_states); | |
74 | *(unsigned long *)(ptr + offset) += delta; | |
75 | } | |
76 | EXPORT_SYMBOL(__mod_page_state_offset); | |
77 | ||
78 | void mod_page_state_offset(unsigned long offset, unsigned long delta) | |
79 | { | |
80 | unsigned long flags; | |
81 | void *ptr; | |
82 | ||
83 | local_irq_save(flags); | |
84 | ptr = &__get_cpu_var(page_states); | |
85 | *(unsigned long *)(ptr + offset) += delta; | |
86 | local_irq_restore(flags); | |
87 | } | |
88 | EXPORT_SYMBOL(mod_page_state_offset); | |
89 | ||
90 | void __get_zone_counts(unsigned long *active, unsigned long *inactive, | |
91 | unsigned long *free, struct pglist_data *pgdat) | |
92 | { | |
93 | struct zone *zones = pgdat->node_zones; | |
94 | int i; | |
95 | ||
96 | *active = 0; | |
97 | *inactive = 0; | |
98 | *free = 0; | |
99 | for (i = 0; i < MAX_NR_ZONES; i++) { | |
100 | *active += zones[i].nr_active; | |
101 | *inactive += zones[i].nr_inactive; | |
102 | *free += zones[i].free_pages; | |
103 | } | |
104 | } | |
105 | ||
106 | void get_zone_counts(unsigned long *active, | |
107 | unsigned long *inactive, unsigned long *free) | |
108 | { | |
109 | struct pglist_data *pgdat; | |
110 | ||
111 | *active = 0; | |
112 | *inactive = 0; | |
113 | *free = 0; | |
114 | for_each_online_pgdat(pgdat) { | |
115 | unsigned long l, m, n; | |
116 | __get_zone_counts(&l, &m, &n, pgdat); | |
117 | *active += l; | |
118 | *inactive += m; | |
119 | *free += n; | |
120 | } | |
121 | } | |
122 | ||
2244b95a CL |
123 | /* |
124 | * Manage combined zone based / global counters | |
125 | * | |
126 | * vm_stat contains the global counters | |
127 | */ | |
128 | atomic_long_t vm_stat[NR_VM_ZONE_STAT_ITEMS]; | |
129 | EXPORT_SYMBOL(vm_stat); | |
130 | ||
131 | #ifdef CONFIG_SMP | |
132 | ||
133 | #define STAT_THRESHOLD 32 | |
134 | ||
135 | /* | |
136 | * Determine pointer to currently valid differential byte given a zone and | |
137 | * the item number. | |
138 | * | |
139 | * Preemption must be off | |
140 | */ | |
141 | static inline s8 *diff_pointer(struct zone *zone, enum zone_stat_item item) | |
142 | { | |
143 | return &zone_pcp(zone, smp_processor_id())->vm_stat_diff[item]; | |
144 | } | |
145 | ||
146 | /* | |
147 | * For use when we know that interrupts are disabled. | |
148 | */ | |
149 | void __mod_zone_page_state(struct zone *zone, enum zone_stat_item item, | |
150 | int delta) | |
151 | { | |
152 | s8 *p; | |
153 | long x; | |
154 | ||
155 | p = diff_pointer(zone, item); | |
156 | x = delta + *p; | |
157 | ||
158 | if (unlikely(x > STAT_THRESHOLD || x < -STAT_THRESHOLD)) { | |
159 | zone_page_state_add(x, zone, item); | |
160 | x = 0; | |
161 | } | |
162 | ||
163 | *p = x; | |
164 | } | |
165 | EXPORT_SYMBOL(__mod_zone_page_state); | |
166 | ||
167 | /* | |
168 | * For an unknown interrupt state | |
169 | */ | |
170 | void mod_zone_page_state(struct zone *zone, enum zone_stat_item item, | |
171 | int delta) | |
172 | { | |
173 | unsigned long flags; | |
174 | ||
175 | local_irq_save(flags); | |
176 | __mod_zone_page_state(zone, item, delta); | |
177 | local_irq_restore(flags); | |
178 | } | |
179 | EXPORT_SYMBOL(mod_zone_page_state); | |
180 | ||
181 | /* | |
182 | * Optimized increment and decrement functions. | |
183 | * | |
184 | * These are only for a single page and therefore can take a struct page * | |
185 | * argument instead of struct zone *. This allows the inclusion of the code | |
186 | * generated for page_zone(page) into the optimized functions. | |
187 | * | |
188 | * No overflow check is necessary and therefore the differential can be | |
189 | * incremented or decremented in place which may allow the compilers to | |
190 | * generate better code. | |
191 | * | |
192 | * The increment or decrement is known and therefore one boundary check can | |
193 | * be omitted. | |
194 | * | |
195 | * Some processors have inc/dec instructions that are atomic vs an interrupt. | |
196 | * However, the code must first determine the differential location in a zone | |
197 | * based on the processor number and then inc/dec the counter. There is no | |
198 | * guarantee without disabling preemption that the processor will not change | |
199 | * in between and therefore the atomicity vs. interrupt cannot be exploited | |
200 | * in a useful way here. | |
201 | */ | |
202 | void __inc_zone_page_state(struct page *page, enum zone_stat_item item) | |
203 | { | |
204 | struct zone *zone = page_zone(page); | |
205 | s8 *p = diff_pointer(zone, item); | |
206 | ||
207 | (*p)++; | |
208 | ||
209 | if (unlikely(*p > STAT_THRESHOLD)) { | |
210 | zone_page_state_add(*p, zone, item); | |
211 | *p = 0; | |
212 | } | |
213 | } | |
214 | EXPORT_SYMBOL(__inc_zone_page_state); | |
215 | ||
216 | void __dec_zone_page_state(struct page *page, enum zone_stat_item item) | |
217 | { | |
218 | struct zone *zone = page_zone(page); | |
219 | s8 *p = diff_pointer(zone, item); | |
220 | ||
221 | (*p)--; | |
222 | ||
223 | if (unlikely(*p < -STAT_THRESHOLD)) { | |
224 | zone_page_state_add(*p, zone, item); | |
225 | *p = 0; | |
226 | } | |
227 | } | |
228 | EXPORT_SYMBOL(__dec_zone_page_state); | |
229 | ||
230 | void inc_zone_page_state(struct page *page, enum zone_stat_item item) | |
231 | { | |
232 | unsigned long flags; | |
233 | struct zone *zone; | |
234 | s8 *p; | |
235 | ||
236 | zone = page_zone(page); | |
237 | local_irq_save(flags); | |
238 | p = diff_pointer(zone, item); | |
239 | ||
240 | (*p)++; | |
241 | ||
242 | if (unlikely(*p > STAT_THRESHOLD)) { | |
243 | zone_page_state_add(*p, zone, item); | |
244 | *p = 0; | |
245 | } | |
246 | local_irq_restore(flags); | |
247 | } | |
248 | EXPORT_SYMBOL(inc_zone_page_state); | |
249 | ||
250 | void dec_zone_page_state(struct page *page, enum zone_stat_item item) | |
251 | { | |
252 | unsigned long flags; | |
253 | struct zone *zone; | |
254 | s8 *p; | |
255 | ||
256 | zone = page_zone(page); | |
257 | local_irq_save(flags); | |
258 | p = diff_pointer(zone, item); | |
259 | ||
260 | (*p)--; | |
261 | ||
262 | if (unlikely(*p < -STAT_THRESHOLD)) { | |
263 | zone_page_state_add(*p, zone, item); | |
264 | *p = 0; | |
265 | } | |
266 | local_irq_restore(flags); | |
267 | } | |
268 | EXPORT_SYMBOL(dec_zone_page_state); | |
269 | ||
270 | /* | |
271 | * Update the zone counters for one cpu. | |
272 | */ | |
273 | void refresh_cpu_vm_stats(int cpu) | |
274 | { | |
275 | struct zone *zone; | |
276 | int i; | |
277 | unsigned long flags; | |
278 | ||
279 | for_each_zone(zone) { | |
280 | struct per_cpu_pageset *pcp; | |
281 | ||
282 | pcp = zone_pcp(zone, cpu); | |
283 | ||
284 | for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) | |
285 | if (pcp->vm_stat_diff[i]) { | |
286 | local_irq_save(flags); | |
287 | zone_page_state_add(pcp->vm_stat_diff[i], | |
288 | zone, i); | |
289 | pcp->vm_stat_diff[i] = 0; | |
290 | local_irq_restore(flags); | |
291 | } | |
292 | } | |
293 | } | |
294 | ||
295 | static void __refresh_cpu_vm_stats(void *dummy) | |
296 | { | |
297 | refresh_cpu_vm_stats(smp_processor_id()); | |
298 | } | |
299 | ||
300 | /* | |
301 | * Consolidate all counters. | |
302 | * | |
303 | * Note that the result is less inaccurate but still inaccurate | |
304 | * if concurrent processes are allowed to run. | |
305 | */ | |
306 | void refresh_vm_stats(void) | |
307 | { | |
308 | on_each_cpu(__refresh_cpu_vm_stats, NULL, 0, 1); | |
309 | } | |
310 | EXPORT_SYMBOL(refresh_vm_stats); | |
311 | ||
312 | #endif | |
313 | ||
f6ac2354 CL |
314 | #ifdef CONFIG_PROC_FS |
315 | ||
316 | #include <linux/seq_file.h> | |
317 | ||
318 | static void *frag_start(struct seq_file *m, loff_t *pos) | |
319 | { | |
320 | pg_data_t *pgdat; | |
321 | loff_t node = *pos; | |
322 | for (pgdat = first_online_pgdat(); | |
323 | pgdat && node; | |
324 | pgdat = next_online_pgdat(pgdat)) | |
325 | --node; | |
326 | ||
327 | return pgdat; | |
328 | } | |
329 | ||
330 | static void *frag_next(struct seq_file *m, void *arg, loff_t *pos) | |
331 | { | |
332 | pg_data_t *pgdat = (pg_data_t *)arg; | |
333 | ||
334 | (*pos)++; | |
335 | return next_online_pgdat(pgdat); | |
336 | } | |
337 | ||
338 | static void frag_stop(struct seq_file *m, void *arg) | |
339 | { | |
340 | } | |
341 | ||
342 | /* | |
343 | * This walks the free areas for each zone. | |
344 | */ | |
345 | static int frag_show(struct seq_file *m, void *arg) | |
346 | { | |
347 | pg_data_t *pgdat = (pg_data_t *)arg; | |
348 | struct zone *zone; | |
349 | struct zone *node_zones = pgdat->node_zones; | |
350 | unsigned long flags; | |
351 | int order; | |
352 | ||
353 | for (zone = node_zones; zone - node_zones < MAX_NR_ZONES; ++zone) { | |
354 | if (!populated_zone(zone)) | |
355 | continue; | |
356 | ||
357 | spin_lock_irqsave(&zone->lock, flags); | |
358 | seq_printf(m, "Node %d, zone %8s ", pgdat->node_id, zone->name); | |
359 | for (order = 0; order < MAX_ORDER; ++order) | |
360 | seq_printf(m, "%6lu ", zone->free_area[order].nr_free); | |
361 | spin_unlock_irqrestore(&zone->lock, flags); | |
362 | seq_putc(m, '\n'); | |
363 | } | |
364 | return 0; | |
365 | } | |
366 | ||
367 | struct seq_operations fragmentation_op = { | |
368 | .start = frag_start, | |
369 | .next = frag_next, | |
370 | .stop = frag_stop, | |
371 | .show = frag_show, | |
372 | }; | |
373 | ||
374 | static char *vmstat_text[] = { | |
2244b95a | 375 | /* Zoned VM counters */ |
f3dbd344 | 376 | "nr_anon_pages", |
65ba55f5 | 377 | "nr_mapped", |
347ce434 | 378 | "nr_file_pages", |
9a865ffa | 379 | "nr_slab", |
df849a15 | 380 | "nr_page_table_pages", |
b1e7a8fd | 381 | "nr_dirty", |
ce866b34 | 382 | "nr_writeback", |
f6ac2354 | 383 | "nr_unstable", |
f6ac2354 | 384 | |
fd39fc85 | 385 | /* Event counters */ |
f6ac2354 CL |
386 | "pgpgin", |
387 | "pgpgout", | |
388 | "pswpin", | |
389 | "pswpout", | |
390 | ||
391 | "pgalloc_high", | |
392 | "pgalloc_normal", | |
393 | "pgalloc_dma32", | |
394 | "pgalloc_dma", | |
395 | ||
396 | "pgfree", | |
397 | "pgactivate", | |
398 | "pgdeactivate", | |
399 | ||
400 | "pgfault", | |
401 | "pgmajfault", | |
402 | ||
403 | "pgrefill_high", | |
404 | "pgrefill_normal", | |
405 | "pgrefill_dma32", | |
406 | "pgrefill_dma", | |
407 | ||
408 | "pgsteal_high", | |
409 | "pgsteal_normal", | |
410 | "pgsteal_dma32", | |
411 | "pgsteal_dma", | |
412 | ||
413 | "pgscan_kswapd_high", | |
414 | "pgscan_kswapd_normal", | |
415 | "pgscan_kswapd_dma32", | |
416 | "pgscan_kswapd_dma", | |
417 | ||
418 | "pgscan_direct_high", | |
419 | "pgscan_direct_normal", | |
420 | "pgscan_direct_dma32", | |
421 | "pgscan_direct_dma", | |
422 | ||
423 | "pginodesteal", | |
424 | "slabs_scanned", | |
425 | "kswapd_steal", | |
426 | "kswapd_inodesteal", | |
427 | "pageoutrun", | |
428 | "allocstall", | |
429 | ||
430 | "pgrotated", | |
431 | "nr_bounce", | |
432 | }; | |
433 | ||
434 | /* | |
435 | * Output information about zones in @pgdat. | |
436 | */ | |
437 | static int zoneinfo_show(struct seq_file *m, void *arg) | |
438 | { | |
439 | pg_data_t *pgdat = arg; | |
440 | struct zone *zone; | |
441 | struct zone *node_zones = pgdat->node_zones; | |
442 | unsigned long flags; | |
443 | ||
444 | for (zone = node_zones; zone - node_zones < MAX_NR_ZONES; zone++) { | |
445 | int i; | |
446 | ||
447 | if (!populated_zone(zone)) | |
448 | continue; | |
449 | ||
450 | spin_lock_irqsave(&zone->lock, flags); | |
451 | seq_printf(m, "Node %d, zone %8s", pgdat->node_id, zone->name); | |
452 | seq_printf(m, | |
453 | "\n pages free %lu" | |
454 | "\n min %lu" | |
455 | "\n low %lu" | |
456 | "\n high %lu" | |
457 | "\n active %lu" | |
458 | "\n inactive %lu" | |
459 | "\n scanned %lu (a: %lu i: %lu)" | |
460 | "\n spanned %lu" | |
461 | "\n present %lu", | |
462 | zone->free_pages, | |
463 | zone->pages_min, | |
464 | zone->pages_low, | |
465 | zone->pages_high, | |
466 | zone->nr_active, | |
467 | zone->nr_inactive, | |
468 | zone->pages_scanned, | |
469 | zone->nr_scan_active, zone->nr_scan_inactive, | |
470 | zone->spanned_pages, | |
471 | zone->present_pages); | |
2244b95a CL |
472 | |
473 | for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) | |
474 | seq_printf(m, "\n %-12s %lu", vmstat_text[i], | |
475 | zone_page_state(zone, i)); | |
476 | ||
f6ac2354 CL |
477 | seq_printf(m, |
478 | "\n protection: (%lu", | |
479 | zone->lowmem_reserve[0]); | |
480 | for (i = 1; i < ARRAY_SIZE(zone->lowmem_reserve); i++) | |
481 | seq_printf(m, ", %lu", zone->lowmem_reserve[i]); | |
482 | seq_printf(m, | |
483 | ")" | |
484 | "\n pagesets"); | |
485 | for_each_online_cpu(i) { | |
486 | struct per_cpu_pageset *pageset; | |
487 | int j; | |
488 | ||
489 | pageset = zone_pcp(zone, i); | |
490 | for (j = 0; j < ARRAY_SIZE(pageset->pcp); j++) { | |
491 | if (pageset->pcp[j].count) | |
492 | break; | |
493 | } | |
494 | if (j == ARRAY_SIZE(pageset->pcp)) | |
495 | continue; | |
496 | for (j = 0; j < ARRAY_SIZE(pageset->pcp); j++) { | |
497 | seq_printf(m, | |
498 | "\n cpu: %i pcp: %i" | |
499 | "\n count: %i" | |
500 | "\n high: %i" | |
501 | "\n batch: %i", | |
502 | i, j, | |
503 | pageset->pcp[j].count, | |
504 | pageset->pcp[j].high, | |
505 | pageset->pcp[j].batch); | |
506 | } | |
507 | #ifdef CONFIG_NUMA | |
508 | seq_printf(m, | |
509 | "\n numa_hit: %lu" | |
510 | "\n numa_miss: %lu" | |
511 | "\n numa_foreign: %lu" | |
512 | "\n interleave_hit: %lu" | |
513 | "\n local_node: %lu" | |
514 | "\n other_node: %lu", | |
515 | pageset->numa_hit, | |
516 | pageset->numa_miss, | |
517 | pageset->numa_foreign, | |
518 | pageset->interleave_hit, | |
519 | pageset->local_node, | |
520 | pageset->other_node); | |
521 | #endif | |
522 | } | |
523 | seq_printf(m, | |
524 | "\n all_unreclaimable: %u" | |
525 | "\n prev_priority: %i" | |
526 | "\n temp_priority: %i" | |
527 | "\n start_pfn: %lu", | |
528 | zone->all_unreclaimable, | |
529 | zone->prev_priority, | |
530 | zone->temp_priority, | |
531 | zone->zone_start_pfn); | |
532 | spin_unlock_irqrestore(&zone->lock, flags); | |
533 | seq_putc(m, '\n'); | |
534 | } | |
535 | return 0; | |
536 | } | |
537 | ||
538 | struct seq_operations zoneinfo_op = { | |
539 | .start = frag_start, /* iterate over all zones. The same as in | |
540 | * fragmentation. */ | |
541 | .next = frag_next, | |
542 | .stop = frag_stop, | |
543 | .show = zoneinfo_show, | |
544 | }; | |
545 | ||
546 | static void *vmstat_start(struct seq_file *m, loff_t *pos) | |
547 | { | |
2244b95a | 548 | unsigned long *v; |
f6ac2354 | 549 | struct page_state *ps; |
2244b95a | 550 | int i; |
f6ac2354 CL |
551 | |
552 | if (*pos >= ARRAY_SIZE(vmstat_text)) | |
553 | return NULL; | |
554 | ||
2244b95a CL |
555 | v = kmalloc(NR_VM_ZONE_STAT_ITEMS * sizeof(unsigned long) |
556 | + sizeof(*ps), GFP_KERNEL); | |
557 | m->private = v; | |
558 | if (!v) | |
f6ac2354 | 559 | return ERR_PTR(-ENOMEM); |
2244b95a CL |
560 | for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) |
561 | v[i] = global_page_state(i); | |
562 | ps = (struct page_state *)(v + NR_VM_ZONE_STAT_ITEMS); | |
f6ac2354 CL |
563 | get_full_page_state(ps); |
564 | ps->pgpgin /= 2; /* sectors -> kbytes */ | |
565 | ps->pgpgout /= 2; | |
2244b95a | 566 | return v + *pos; |
f6ac2354 CL |
567 | } |
568 | ||
569 | static void *vmstat_next(struct seq_file *m, void *arg, loff_t *pos) | |
570 | { | |
571 | (*pos)++; | |
572 | if (*pos >= ARRAY_SIZE(vmstat_text)) | |
573 | return NULL; | |
574 | return (unsigned long *)m->private + *pos; | |
575 | } | |
576 | ||
577 | static int vmstat_show(struct seq_file *m, void *arg) | |
578 | { | |
579 | unsigned long *l = arg; | |
580 | unsigned long off = l - (unsigned long *)m->private; | |
581 | ||
582 | seq_printf(m, "%s %lu\n", vmstat_text[off], *l); | |
583 | return 0; | |
584 | } | |
585 | ||
586 | static void vmstat_stop(struct seq_file *m, void *arg) | |
587 | { | |
588 | kfree(m->private); | |
589 | m->private = NULL; | |
590 | } | |
591 | ||
592 | struct seq_operations vmstat_op = { | |
593 | .start = vmstat_start, | |
594 | .next = vmstat_next, | |
595 | .stop = vmstat_stop, | |
596 | .show = vmstat_show, | |
597 | }; | |
598 | ||
599 | #endif /* CONFIG_PROC_FS */ | |
600 |