]>
Commit | Line | Data |
---|---|---|
f6ac2354 CL |
1 | /* |
2 | * linux/mm/vmstat.c | |
3 | * | |
4 | * Manages VM statistics | |
5 | * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds | |
2244b95a CL |
6 | * |
7 | * zoned VM statistics | |
8 | * Copyright (C) 2006 Silicon Graphics, Inc., | |
9 | * Christoph Lameter <christoph@lameter.com> | |
f6ac2354 CL |
10 | */ |
11 | ||
12 | #include <linux/config.h> | |
13 | #include <linux/mm.h> | |
2244b95a | 14 | #include <linux/module.h> |
f6ac2354 CL |
15 | |
16 | /* | |
17 | * Accumulate the page_state information across all CPUs. | |
18 | * The result is unavoidably approximate - it can change | |
19 | * during and after execution of this function. | |
20 | */ | |
21 | DEFINE_PER_CPU(struct page_state, page_states) = {0}; | |
22 | ||
f6ac2354 CL |
23 | static void __get_page_state(struct page_state *ret, int nr, cpumask_t *cpumask) |
24 | { | |
25 | unsigned cpu; | |
26 | ||
27 | memset(ret, 0, nr * sizeof(unsigned long)); | |
28 | cpus_and(*cpumask, *cpumask, cpu_online_map); | |
29 | ||
30 | for_each_cpu_mask(cpu, *cpumask) { | |
31 | unsigned long *in; | |
32 | unsigned long *out; | |
33 | unsigned off; | |
34 | unsigned next_cpu; | |
35 | ||
36 | in = (unsigned long *)&per_cpu(page_states, cpu); | |
37 | ||
38 | next_cpu = next_cpu(cpu, *cpumask); | |
39 | if (likely(next_cpu < NR_CPUS)) | |
40 | prefetch(&per_cpu(page_states, next_cpu)); | |
41 | ||
42 | out = (unsigned long *)ret; | |
43 | for (off = 0; off < nr; off++) | |
44 | *out++ += *in++; | |
45 | } | |
46 | } | |
47 | ||
f6ac2354 CL |
48 | void get_full_page_state(struct page_state *ret) |
49 | { | |
50 | cpumask_t mask = CPU_MASK_ALL; | |
51 | ||
52 | __get_page_state(ret, sizeof(*ret) / sizeof(unsigned long), &mask); | |
53 | } | |
54 | ||
f6ac2354 CL |
55 | void __mod_page_state_offset(unsigned long offset, unsigned long delta) |
56 | { | |
57 | void *ptr; | |
58 | ||
59 | ptr = &__get_cpu_var(page_states); | |
60 | *(unsigned long *)(ptr + offset) += delta; | |
61 | } | |
62 | EXPORT_SYMBOL(__mod_page_state_offset); | |
63 | ||
64 | void mod_page_state_offset(unsigned long offset, unsigned long delta) | |
65 | { | |
66 | unsigned long flags; | |
67 | void *ptr; | |
68 | ||
69 | local_irq_save(flags); | |
70 | ptr = &__get_cpu_var(page_states); | |
71 | *(unsigned long *)(ptr + offset) += delta; | |
72 | local_irq_restore(flags); | |
73 | } | |
74 | EXPORT_SYMBOL(mod_page_state_offset); | |
75 | ||
76 | void __get_zone_counts(unsigned long *active, unsigned long *inactive, | |
77 | unsigned long *free, struct pglist_data *pgdat) | |
78 | { | |
79 | struct zone *zones = pgdat->node_zones; | |
80 | int i; | |
81 | ||
82 | *active = 0; | |
83 | *inactive = 0; | |
84 | *free = 0; | |
85 | for (i = 0; i < MAX_NR_ZONES; i++) { | |
86 | *active += zones[i].nr_active; | |
87 | *inactive += zones[i].nr_inactive; | |
88 | *free += zones[i].free_pages; | |
89 | } | |
90 | } | |
91 | ||
92 | void get_zone_counts(unsigned long *active, | |
93 | unsigned long *inactive, unsigned long *free) | |
94 | { | |
95 | struct pglist_data *pgdat; | |
96 | ||
97 | *active = 0; | |
98 | *inactive = 0; | |
99 | *free = 0; | |
100 | for_each_online_pgdat(pgdat) { | |
101 | unsigned long l, m, n; | |
102 | __get_zone_counts(&l, &m, &n, pgdat); | |
103 | *active += l; | |
104 | *inactive += m; | |
105 | *free += n; | |
106 | } | |
107 | } | |
108 | ||
2244b95a CL |
109 | /* |
110 | * Manage combined zone based / global counters | |
111 | * | |
112 | * vm_stat contains the global counters | |
113 | */ | |
114 | atomic_long_t vm_stat[NR_VM_ZONE_STAT_ITEMS]; | |
115 | EXPORT_SYMBOL(vm_stat); | |
116 | ||
117 | #ifdef CONFIG_SMP | |
118 | ||
119 | #define STAT_THRESHOLD 32 | |
120 | ||
121 | /* | |
122 | * Determine pointer to currently valid differential byte given a zone and | |
123 | * the item number. | |
124 | * | |
125 | * Preemption must be off | |
126 | */ | |
127 | static inline s8 *diff_pointer(struct zone *zone, enum zone_stat_item item) | |
128 | { | |
129 | return &zone_pcp(zone, smp_processor_id())->vm_stat_diff[item]; | |
130 | } | |
131 | ||
132 | /* | |
133 | * For use when we know that interrupts are disabled. | |
134 | */ | |
135 | void __mod_zone_page_state(struct zone *zone, enum zone_stat_item item, | |
136 | int delta) | |
137 | { | |
138 | s8 *p; | |
139 | long x; | |
140 | ||
141 | p = diff_pointer(zone, item); | |
142 | x = delta + *p; | |
143 | ||
144 | if (unlikely(x > STAT_THRESHOLD || x < -STAT_THRESHOLD)) { | |
145 | zone_page_state_add(x, zone, item); | |
146 | x = 0; | |
147 | } | |
148 | ||
149 | *p = x; | |
150 | } | |
151 | EXPORT_SYMBOL(__mod_zone_page_state); | |
152 | ||
153 | /* | |
154 | * For an unknown interrupt state | |
155 | */ | |
156 | void mod_zone_page_state(struct zone *zone, enum zone_stat_item item, | |
157 | int delta) | |
158 | { | |
159 | unsigned long flags; | |
160 | ||
161 | local_irq_save(flags); | |
162 | __mod_zone_page_state(zone, item, delta); | |
163 | local_irq_restore(flags); | |
164 | } | |
165 | EXPORT_SYMBOL(mod_zone_page_state); | |
166 | ||
167 | /* | |
168 | * Optimized increment and decrement functions. | |
169 | * | |
170 | * These are only for a single page and therefore can take a struct page * | |
171 | * argument instead of struct zone *. This allows the inclusion of the code | |
172 | * generated for page_zone(page) into the optimized functions. | |
173 | * | |
174 | * No overflow check is necessary and therefore the differential can be | |
175 | * incremented or decremented in place which may allow the compilers to | |
176 | * generate better code. | |
177 | * | |
178 | * The increment or decrement is known and therefore one boundary check can | |
179 | * be omitted. | |
180 | * | |
181 | * Some processors have inc/dec instructions that are atomic vs an interrupt. | |
182 | * However, the code must first determine the differential location in a zone | |
183 | * based on the processor number and then inc/dec the counter. There is no | |
184 | * guarantee without disabling preemption that the processor will not change | |
185 | * in between and therefore the atomicity vs. interrupt cannot be exploited | |
186 | * in a useful way here. | |
187 | */ | |
188 | void __inc_zone_page_state(struct page *page, enum zone_stat_item item) | |
189 | { | |
190 | struct zone *zone = page_zone(page); | |
191 | s8 *p = diff_pointer(zone, item); | |
192 | ||
193 | (*p)++; | |
194 | ||
195 | if (unlikely(*p > STAT_THRESHOLD)) { | |
196 | zone_page_state_add(*p, zone, item); | |
197 | *p = 0; | |
198 | } | |
199 | } | |
200 | EXPORT_SYMBOL(__inc_zone_page_state); | |
201 | ||
202 | void __dec_zone_page_state(struct page *page, enum zone_stat_item item) | |
203 | { | |
204 | struct zone *zone = page_zone(page); | |
205 | s8 *p = diff_pointer(zone, item); | |
206 | ||
207 | (*p)--; | |
208 | ||
209 | if (unlikely(*p < -STAT_THRESHOLD)) { | |
210 | zone_page_state_add(*p, zone, item); | |
211 | *p = 0; | |
212 | } | |
213 | } | |
214 | EXPORT_SYMBOL(__dec_zone_page_state); | |
215 | ||
216 | void inc_zone_page_state(struct page *page, enum zone_stat_item item) | |
217 | { | |
218 | unsigned long flags; | |
219 | struct zone *zone; | |
220 | s8 *p; | |
221 | ||
222 | zone = page_zone(page); | |
223 | local_irq_save(flags); | |
224 | p = diff_pointer(zone, item); | |
225 | ||
226 | (*p)++; | |
227 | ||
228 | if (unlikely(*p > STAT_THRESHOLD)) { | |
229 | zone_page_state_add(*p, zone, item); | |
230 | *p = 0; | |
231 | } | |
232 | local_irq_restore(flags); | |
233 | } | |
234 | EXPORT_SYMBOL(inc_zone_page_state); | |
235 | ||
236 | void dec_zone_page_state(struct page *page, enum zone_stat_item item) | |
237 | { | |
238 | unsigned long flags; | |
239 | struct zone *zone; | |
240 | s8 *p; | |
241 | ||
242 | zone = page_zone(page); | |
243 | local_irq_save(flags); | |
244 | p = diff_pointer(zone, item); | |
245 | ||
246 | (*p)--; | |
247 | ||
248 | if (unlikely(*p < -STAT_THRESHOLD)) { | |
249 | zone_page_state_add(*p, zone, item); | |
250 | *p = 0; | |
251 | } | |
252 | local_irq_restore(flags); | |
253 | } | |
254 | EXPORT_SYMBOL(dec_zone_page_state); | |
255 | ||
256 | /* | |
257 | * Update the zone counters for one cpu. | |
258 | */ | |
259 | void refresh_cpu_vm_stats(int cpu) | |
260 | { | |
261 | struct zone *zone; | |
262 | int i; | |
263 | unsigned long flags; | |
264 | ||
265 | for_each_zone(zone) { | |
266 | struct per_cpu_pageset *pcp; | |
267 | ||
268 | pcp = zone_pcp(zone, cpu); | |
269 | ||
270 | for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) | |
271 | if (pcp->vm_stat_diff[i]) { | |
272 | local_irq_save(flags); | |
273 | zone_page_state_add(pcp->vm_stat_diff[i], | |
274 | zone, i); | |
275 | pcp->vm_stat_diff[i] = 0; | |
276 | local_irq_restore(flags); | |
277 | } | |
278 | } | |
279 | } | |
280 | ||
281 | static void __refresh_cpu_vm_stats(void *dummy) | |
282 | { | |
283 | refresh_cpu_vm_stats(smp_processor_id()); | |
284 | } | |
285 | ||
286 | /* | |
287 | * Consolidate all counters. | |
288 | * | |
289 | * Note that the result is less inaccurate but still inaccurate | |
290 | * if concurrent processes are allowed to run. | |
291 | */ | |
292 | void refresh_vm_stats(void) | |
293 | { | |
294 | on_each_cpu(__refresh_cpu_vm_stats, NULL, 0, 1); | |
295 | } | |
296 | EXPORT_SYMBOL(refresh_vm_stats); | |
297 | ||
298 | #endif | |
299 | ||
f6ac2354 CL |
300 | #ifdef CONFIG_PROC_FS |
301 | ||
302 | #include <linux/seq_file.h> | |
303 | ||
304 | static void *frag_start(struct seq_file *m, loff_t *pos) | |
305 | { | |
306 | pg_data_t *pgdat; | |
307 | loff_t node = *pos; | |
308 | for (pgdat = first_online_pgdat(); | |
309 | pgdat && node; | |
310 | pgdat = next_online_pgdat(pgdat)) | |
311 | --node; | |
312 | ||
313 | return pgdat; | |
314 | } | |
315 | ||
316 | static void *frag_next(struct seq_file *m, void *arg, loff_t *pos) | |
317 | { | |
318 | pg_data_t *pgdat = (pg_data_t *)arg; | |
319 | ||
320 | (*pos)++; | |
321 | return next_online_pgdat(pgdat); | |
322 | } | |
323 | ||
324 | static void frag_stop(struct seq_file *m, void *arg) | |
325 | { | |
326 | } | |
327 | ||
328 | /* | |
329 | * This walks the free areas for each zone. | |
330 | */ | |
331 | static int frag_show(struct seq_file *m, void *arg) | |
332 | { | |
333 | pg_data_t *pgdat = (pg_data_t *)arg; | |
334 | struct zone *zone; | |
335 | struct zone *node_zones = pgdat->node_zones; | |
336 | unsigned long flags; | |
337 | int order; | |
338 | ||
339 | for (zone = node_zones; zone - node_zones < MAX_NR_ZONES; ++zone) { | |
340 | if (!populated_zone(zone)) | |
341 | continue; | |
342 | ||
343 | spin_lock_irqsave(&zone->lock, flags); | |
344 | seq_printf(m, "Node %d, zone %8s ", pgdat->node_id, zone->name); | |
345 | for (order = 0; order < MAX_ORDER; ++order) | |
346 | seq_printf(m, "%6lu ", zone->free_area[order].nr_free); | |
347 | spin_unlock_irqrestore(&zone->lock, flags); | |
348 | seq_putc(m, '\n'); | |
349 | } | |
350 | return 0; | |
351 | } | |
352 | ||
353 | struct seq_operations fragmentation_op = { | |
354 | .start = frag_start, | |
355 | .next = frag_next, | |
356 | .stop = frag_stop, | |
357 | .show = frag_show, | |
358 | }; | |
359 | ||
360 | static char *vmstat_text[] = { | |
2244b95a | 361 | /* Zoned VM counters */ |
f3dbd344 | 362 | "nr_anon_pages", |
65ba55f5 | 363 | "nr_mapped", |
347ce434 | 364 | "nr_file_pages", |
9a865ffa | 365 | "nr_slab", |
df849a15 | 366 | "nr_page_table_pages", |
b1e7a8fd | 367 | "nr_dirty", |
ce866b34 | 368 | "nr_writeback", |
f6ac2354 | 369 | "nr_unstable", |
d2c5e30c | 370 | "nr_bounce", |
f6ac2354 | 371 | |
fd39fc85 | 372 | /* Event counters */ |
f6ac2354 CL |
373 | "pgpgin", |
374 | "pgpgout", | |
375 | "pswpin", | |
376 | "pswpout", | |
377 | ||
378 | "pgalloc_high", | |
379 | "pgalloc_normal", | |
380 | "pgalloc_dma32", | |
381 | "pgalloc_dma", | |
382 | ||
383 | "pgfree", | |
384 | "pgactivate", | |
385 | "pgdeactivate", | |
386 | ||
387 | "pgfault", | |
388 | "pgmajfault", | |
389 | ||
390 | "pgrefill_high", | |
391 | "pgrefill_normal", | |
392 | "pgrefill_dma32", | |
393 | "pgrefill_dma", | |
394 | ||
395 | "pgsteal_high", | |
396 | "pgsteal_normal", | |
397 | "pgsteal_dma32", | |
398 | "pgsteal_dma", | |
399 | ||
400 | "pgscan_kswapd_high", | |
401 | "pgscan_kswapd_normal", | |
402 | "pgscan_kswapd_dma32", | |
403 | "pgscan_kswapd_dma", | |
404 | ||
405 | "pgscan_direct_high", | |
406 | "pgscan_direct_normal", | |
407 | "pgscan_direct_dma32", | |
408 | "pgscan_direct_dma", | |
409 | ||
410 | "pginodesteal", | |
411 | "slabs_scanned", | |
412 | "kswapd_steal", | |
413 | "kswapd_inodesteal", | |
414 | "pageoutrun", | |
415 | "allocstall", | |
416 | ||
417 | "pgrotated", | |
f6ac2354 CL |
418 | }; |
419 | ||
420 | /* | |
421 | * Output information about zones in @pgdat. | |
422 | */ | |
423 | static int zoneinfo_show(struct seq_file *m, void *arg) | |
424 | { | |
425 | pg_data_t *pgdat = arg; | |
426 | struct zone *zone; | |
427 | struct zone *node_zones = pgdat->node_zones; | |
428 | unsigned long flags; | |
429 | ||
430 | for (zone = node_zones; zone - node_zones < MAX_NR_ZONES; zone++) { | |
431 | int i; | |
432 | ||
433 | if (!populated_zone(zone)) | |
434 | continue; | |
435 | ||
436 | spin_lock_irqsave(&zone->lock, flags); | |
437 | seq_printf(m, "Node %d, zone %8s", pgdat->node_id, zone->name); | |
438 | seq_printf(m, | |
439 | "\n pages free %lu" | |
440 | "\n min %lu" | |
441 | "\n low %lu" | |
442 | "\n high %lu" | |
443 | "\n active %lu" | |
444 | "\n inactive %lu" | |
445 | "\n scanned %lu (a: %lu i: %lu)" | |
446 | "\n spanned %lu" | |
447 | "\n present %lu", | |
448 | zone->free_pages, | |
449 | zone->pages_min, | |
450 | zone->pages_low, | |
451 | zone->pages_high, | |
452 | zone->nr_active, | |
453 | zone->nr_inactive, | |
454 | zone->pages_scanned, | |
455 | zone->nr_scan_active, zone->nr_scan_inactive, | |
456 | zone->spanned_pages, | |
457 | zone->present_pages); | |
2244b95a CL |
458 | |
459 | for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) | |
460 | seq_printf(m, "\n %-12s %lu", vmstat_text[i], | |
461 | zone_page_state(zone, i)); | |
462 | ||
f6ac2354 CL |
463 | seq_printf(m, |
464 | "\n protection: (%lu", | |
465 | zone->lowmem_reserve[0]); | |
466 | for (i = 1; i < ARRAY_SIZE(zone->lowmem_reserve); i++) | |
467 | seq_printf(m, ", %lu", zone->lowmem_reserve[i]); | |
468 | seq_printf(m, | |
469 | ")" | |
470 | "\n pagesets"); | |
471 | for_each_online_cpu(i) { | |
472 | struct per_cpu_pageset *pageset; | |
473 | int j; | |
474 | ||
475 | pageset = zone_pcp(zone, i); | |
476 | for (j = 0; j < ARRAY_SIZE(pageset->pcp); j++) { | |
477 | if (pageset->pcp[j].count) | |
478 | break; | |
479 | } | |
480 | if (j == ARRAY_SIZE(pageset->pcp)) | |
481 | continue; | |
482 | for (j = 0; j < ARRAY_SIZE(pageset->pcp); j++) { | |
483 | seq_printf(m, | |
484 | "\n cpu: %i pcp: %i" | |
485 | "\n count: %i" | |
486 | "\n high: %i" | |
487 | "\n batch: %i", | |
488 | i, j, | |
489 | pageset->pcp[j].count, | |
490 | pageset->pcp[j].high, | |
491 | pageset->pcp[j].batch); | |
492 | } | |
493 | #ifdef CONFIG_NUMA | |
494 | seq_printf(m, | |
495 | "\n numa_hit: %lu" | |
496 | "\n numa_miss: %lu" | |
497 | "\n numa_foreign: %lu" | |
498 | "\n interleave_hit: %lu" | |
499 | "\n local_node: %lu" | |
500 | "\n other_node: %lu", | |
501 | pageset->numa_hit, | |
502 | pageset->numa_miss, | |
503 | pageset->numa_foreign, | |
504 | pageset->interleave_hit, | |
505 | pageset->local_node, | |
506 | pageset->other_node); | |
507 | #endif | |
508 | } | |
509 | seq_printf(m, | |
510 | "\n all_unreclaimable: %u" | |
511 | "\n prev_priority: %i" | |
512 | "\n temp_priority: %i" | |
513 | "\n start_pfn: %lu", | |
514 | zone->all_unreclaimable, | |
515 | zone->prev_priority, | |
516 | zone->temp_priority, | |
517 | zone->zone_start_pfn); | |
518 | spin_unlock_irqrestore(&zone->lock, flags); | |
519 | seq_putc(m, '\n'); | |
520 | } | |
521 | return 0; | |
522 | } | |
523 | ||
524 | struct seq_operations zoneinfo_op = { | |
525 | .start = frag_start, /* iterate over all zones. The same as in | |
526 | * fragmentation. */ | |
527 | .next = frag_next, | |
528 | .stop = frag_stop, | |
529 | .show = zoneinfo_show, | |
530 | }; | |
531 | ||
532 | static void *vmstat_start(struct seq_file *m, loff_t *pos) | |
533 | { | |
2244b95a | 534 | unsigned long *v; |
f6ac2354 | 535 | struct page_state *ps; |
2244b95a | 536 | int i; |
f6ac2354 CL |
537 | |
538 | if (*pos >= ARRAY_SIZE(vmstat_text)) | |
539 | return NULL; | |
540 | ||
2244b95a CL |
541 | v = kmalloc(NR_VM_ZONE_STAT_ITEMS * sizeof(unsigned long) |
542 | + sizeof(*ps), GFP_KERNEL); | |
543 | m->private = v; | |
544 | if (!v) | |
f6ac2354 | 545 | return ERR_PTR(-ENOMEM); |
2244b95a CL |
546 | for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) |
547 | v[i] = global_page_state(i); | |
548 | ps = (struct page_state *)(v + NR_VM_ZONE_STAT_ITEMS); | |
f6ac2354 CL |
549 | get_full_page_state(ps); |
550 | ps->pgpgin /= 2; /* sectors -> kbytes */ | |
551 | ps->pgpgout /= 2; | |
2244b95a | 552 | return v + *pos; |
f6ac2354 CL |
553 | } |
554 | ||
555 | static void *vmstat_next(struct seq_file *m, void *arg, loff_t *pos) | |
556 | { | |
557 | (*pos)++; | |
558 | if (*pos >= ARRAY_SIZE(vmstat_text)) | |
559 | return NULL; | |
560 | return (unsigned long *)m->private + *pos; | |
561 | } | |
562 | ||
563 | static int vmstat_show(struct seq_file *m, void *arg) | |
564 | { | |
565 | unsigned long *l = arg; | |
566 | unsigned long off = l - (unsigned long *)m->private; | |
567 | ||
568 | seq_printf(m, "%s %lu\n", vmstat_text[off], *l); | |
569 | return 0; | |
570 | } | |
571 | ||
572 | static void vmstat_stop(struct seq_file *m, void *arg) | |
573 | { | |
574 | kfree(m->private); | |
575 | m->private = NULL; | |
576 | } | |
577 | ||
578 | struct seq_operations vmstat_op = { | |
579 | .start = vmstat_start, | |
580 | .next = vmstat_next, | |
581 | .stop = vmstat_stop, | |
582 | .show = vmstat_show, | |
583 | }; | |
584 | ||
585 | #endif /* CONFIG_PROC_FS */ | |
586 |