]>
Commit | Line | Data |
---|---|---|
f6ac2354 CL |
1 | /* |
2 | * linux/mm/vmstat.c | |
3 | * | |
4 | * Manages VM statistics | |
5 | * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds | |
2244b95a CL |
6 | * |
7 | * zoned VM statistics | |
8 | * Copyright (C) 2006 Silicon Graphics, Inc., | |
9 | * Christoph Lameter <christoph@lameter.com> | |
f6ac2354 CL |
10 | */ |
11 | ||
12 | #include <linux/config.h> | |
13 | #include <linux/mm.h> | |
2244b95a | 14 | #include <linux/module.h> |
f6ac2354 CL |
15 | |
16 | /* | |
17 | * Accumulate the page_state information across all CPUs. | |
18 | * The result is unavoidably approximate - it can change | |
19 | * during and after execution of this function. | |
20 | */ | |
21 | DEFINE_PER_CPU(struct page_state, page_states) = {0}; | |
22 | ||
23 | atomic_t nr_pagecache = ATOMIC_INIT(0); | |
24 | EXPORT_SYMBOL(nr_pagecache); | |
25 | #ifdef CONFIG_SMP | |
26 | DEFINE_PER_CPU(long, nr_pagecache_local) = 0; | |
27 | #endif | |
28 | ||
29 | static void __get_page_state(struct page_state *ret, int nr, cpumask_t *cpumask) | |
30 | { | |
31 | unsigned cpu; | |
32 | ||
33 | memset(ret, 0, nr * sizeof(unsigned long)); | |
34 | cpus_and(*cpumask, *cpumask, cpu_online_map); | |
35 | ||
36 | for_each_cpu_mask(cpu, *cpumask) { | |
37 | unsigned long *in; | |
38 | unsigned long *out; | |
39 | unsigned off; | |
40 | unsigned next_cpu; | |
41 | ||
42 | in = (unsigned long *)&per_cpu(page_states, cpu); | |
43 | ||
44 | next_cpu = next_cpu(cpu, *cpumask); | |
45 | if (likely(next_cpu < NR_CPUS)) | |
46 | prefetch(&per_cpu(page_states, next_cpu)); | |
47 | ||
48 | out = (unsigned long *)ret; | |
49 | for (off = 0; off < nr; off++) | |
50 | *out++ += *in++; | |
51 | } | |
52 | } | |
53 | ||
54 | void get_page_state_node(struct page_state *ret, int node) | |
55 | { | |
56 | int nr; | |
57 | cpumask_t mask = node_to_cpumask(node); | |
58 | ||
59 | nr = offsetof(struct page_state, GET_PAGE_STATE_LAST); | |
60 | nr /= sizeof(unsigned long); | |
61 | ||
62 | __get_page_state(ret, nr+1, &mask); | |
63 | } | |
64 | ||
65 | void get_page_state(struct page_state *ret) | |
66 | { | |
67 | int nr; | |
68 | cpumask_t mask = CPU_MASK_ALL; | |
69 | ||
70 | nr = offsetof(struct page_state, GET_PAGE_STATE_LAST); | |
71 | nr /= sizeof(unsigned long); | |
72 | ||
73 | __get_page_state(ret, nr + 1, &mask); | |
74 | } | |
75 | ||
76 | void get_full_page_state(struct page_state *ret) | |
77 | { | |
78 | cpumask_t mask = CPU_MASK_ALL; | |
79 | ||
80 | __get_page_state(ret, sizeof(*ret) / sizeof(unsigned long), &mask); | |
81 | } | |
82 | ||
83 | unsigned long read_page_state_offset(unsigned long offset) | |
84 | { | |
85 | unsigned long ret = 0; | |
86 | int cpu; | |
87 | ||
88 | for_each_online_cpu(cpu) { | |
89 | unsigned long in; | |
90 | ||
91 | in = (unsigned long)&per_cpu(page_states, cpu) + offset; | |
92 | ret += *((unsigned long *)in); | |
93 | } | |
94 | return ret; | |
95 | } | |
96 | ||
97 | void __mod_page_state_offset(unsigned long offset, unsigned long delta) | |
98 | { | |
99 | void *ptr; | |
100 | ||
101 | ptr = &__get_cpu_var(page_states); | |
102 | *(unsigned long *)(ptr + offset) += delta; | |
103 | } | |
104 | EXPORT_SYMBOL(__mod_page_state_offset); | |
105 | ||
106 | void mod_page_state_offset(unsigned long offset, unsigned long delta) | |
107 | { | |
108 | unsigned long flags; | |
109 | void *ptr; | |
110 | ||
111 | local_irq_save(flags); | |
112 | ptr = &__get_cpu_var(page_states); | |
113 | *(unsigned long *)(ptr + offset) += delta; | |
114 | local_irq_restore(flags); | |
115 | } | |
116 | EXPORT_SYMBOL(mod_page_state_offset); | |
117 | ||
118 | void __get_zone_counts(unsigned long *active, unsigned long *inactive, | |
119 | unsigned long *free, struct pglist_data *pgdat) | |
120 | { | |
121 | struct zone *zones = pgdat->node_zones; | |
122 | int i; | |
123 | ||
124 | *active = 0; | |
125 | *inactive = 0; | |
126 | *free = 0; | |
127 | for (i = 0; i < MAX_NR_ZONES; i++) { | |
128 | *active += zones[i].nr_active; | |
129 | *inactive += zones[i].nr_inactive; | |
130 | *free += zones[i].free_pages; | |
131 | } | |
132 | } | |
133 | ||
134 | void get_zone_counts(unsigned long *active, | |
135 | unsigned long *inactive, unsigned long *free) | |
136 | { | |
137 | struct pglist_data *pgdat; | |
138 | ||
139 | *active = 0; | |
140 | *inactive = 0; | |
141 | *free = 0; | |
142 | for_each_online_pgdat(pgdat) { | |
143 | unsigned long l, m, n; | |
144 | __get_zone_counts(&l, &m, &n, pgdat); | |
145 | *active += l; | |
146 | *inactive += m; | |
147 | *free += n; | |
148 | } | |
149 | } | |
150 | ||
2244b95a CL |
151 | /* |
152 | * Manage combined zone based / global counters | |
153 | * | |
154 | * vm_stat contains the global counters | |
155 | */ | |
156 | atomic_long_t vm_stat[NR_VM_ZONE_STAT_ITEMS]; | |
157 | EXPORT_SYMBOL(vm_stat); | |
158 | ||
159 | #ifdef CONFIG_SMP | |
160 | ||
161 | #define STAT_THRESHOLD 32 | |
162 | ||
163 | /* | |
164 | * Determine pointer to currently valid differential byte given a zone and | |
165 | * the item number. | |
166 | * | |
167 | * Preemption must be off | |
168 | */ | |
169 | static inline s8 *diff_pointer(struct zone *zone, enum zone_stat_item item) | |
170 | { | |
171 | return &zone_pcp(zone, smp_processor_id())->vm_stat_diff[item]; | |
172 | } | |
173 | ||
174 | /* | |
175 | * For use when we know that interrupts are disabled. | |
176 | */ | |
177 | void __mod_zone_page_state(struct zone *zone, enum zone_stat_item item, | |
178 | int delta) | |
179 | { | |
180 | s8 *p; | |
181 | long x; | |
182 | ||
183 | p = diff_pointer(zone, item); | |
184 | x = delta + *p; | |
185 | ||
186 | if (unlikely(x > STAT_THRESHOLD || x < -STAT_THRESHOLD)) { | |
187 | zone_page_state_add(x, zone, item); | |
188 | x = 0; | |
189 | } | |
190 | ||
191 | *p = x; | |
192 | } | |
193 | EXPORT_SYMBOL(__mod_zone_page_state); | |
194 | ||
195 | /* | |
196 | * For an unknown interrupt state | |
197 | */ | |
198 | void mod_zone_page_state(struct zone *zone, enum zone_stat_item item, | |
199 | int delta) | |
200 | { | |
201 | unsigned long flags; | |
202 | ||
203 | local_irq_save(flags); | |
204 | __mod_zone_page_state(zone, item, delta); | |
205 | local_irq_restore(flags); | |
206 | } | |
207 | EXPORT_SYMBOL(mod_zone_page_state); | |
208 | ||
209 | /* | |
210 | * Optimized increment and decrement functions. | |
211 | * | |
212 | * These are only for a single page and therefore can take a struct page * | |
213 | * argument instead of struct zone *. This allows the inclusion of the code | |
214 | * generated for page_zone(page) into the optimized functions. | |
215 | * | |
216 | * No overflow check is necessary and therefore the differential can be | |
217 | * incremented or decremented in place which may allow the compilers to | |
218 | * generate better code. | |
219 | * | |
220 | * The increment or decrement is known and therefore one boundary check can | |
221 | * be omitted. | |
222 | * | |
223 | * Some processors have inc/dec instructions that are atomic vs an interrupt. | |
224 | * However, the code must first determine the differential location in a zone | |
225 | * based on the processor number and then inc/dec the counter. There is no | |
226 | * guarantee without disabling preemption that the processor will not change | |
227 | * in between and therefore the atomicity vs. interrupt cannot be exploited | |
228 | * in a useful way here. | |
229 | */ | |
230 | void __inc_zone_page_state(struct page *page, enum zone_stat_item item) | |
231 | { | |
232 | struct zone *zone = page_zone(page); | |
233 | s8 *p = diff_pointer(zone, item); | |
234 | ||
235 | (*p)++; | |
236 | ||
237 | if (unlikely(*p > STAT_THRESHOLD)) { | |
238 | zone_page_state_add(*p, zone, item); | |
239 | *p = 0; | |
240 | } | |
241 | } | |
242 | EXPORT_SYMBOL(__inc_zone_page_state); | |
243 | ||
244 | void __dec_zone_page_state(struct page *page, enum zone_stat_item item) | |
245 | { | |
246 | struct zone *zone = page_zone(page); | |
247 | s8 *p = diff_pointer(zone, item); | |
248 | ||
249 | (*p)--; | |
250 | ||
251 | if (unlikely(*p < -STAT_THRESHOLD)) { | |
252 | zone_page_state_add(*p, zone, item); | |
253 | *p = 0; | |
254 | } | |
255 | } | |
256 | EXPORT_SYMBOL(__dec_zone_page_state); | |
257 | ||
258 | void inc_zone_page_state(struct page *page, enum zone_stat_item item) | |
259 | { | |
260 | unsigned long flags; | |
261 | struct zone *zone; | |
262 | s8 *p; | |
263 | ||
264 | zone = page_zone(page); | |
265 | local_irq_save(flags); | |
266 | p = diff_pointer(zone, item); | |
267 | ||
268 | (*p)++; | |
269 | ||
270 | if (unlikely(*p > STAT_THRESHOLD)) { | |
271 | zone_page_state_add(*p, zone, item); | |
272 | *p = 0; | |
273 | } | |
274 | local_irq_restore(flags); | |
275 | } | |
276 | EXPORT_SYMBOL(inc_zone_page_state); | |
277 | ||
278 | void dec_zone_page_state(struct page *page, enum zone_stat_item item) | |
279 | { | |
280 | unsigned long flags; | |
281 | struct zone *zone; | |
282 | s8 *p; | |
283 | ||
284 | zone = page_zone(page); | |
285 | local_irq_save(flags); | |
286 | p = diff_pointer(zone, item); | |
287 | ||
288 | (*p)--; | |
289 | ||
290 | if (unlikely(*p < -STAT_THRESHOLD)) { | |
291 | zone_page_state_add(*p, zone, item); | |
292 | *p = 0; | |
293 | } | |
294 | local_irq_restore(flags); | |
295 | } | |
296 | EXPORT_SYMBOL(dec_zone_page_state); | |
297 | ||
298 | /* | |
299 | * Update the zone counters for one cpu. | |
300 | */ | |
301 | void refresh_cpu_vm_stats(int cpu) | |
302 | { | |
303 | struct zone *zone; | |
304 | int i; | |
305 | unsigned long flags; | |
306 | ||
307 | for_each_zone(zone) { | |
308 | struct per_cpu_pageset *pcp; | |
309 | ||
310 | pcp = zone_pcp(zone, cpu); | |
311 | ||
312 | for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) | |
313 | if (pcp->vm_stat_diff[i]) { | |
314 | local_irq_save(flags); | |
315 | zone_page_state_add(pcp->vm_stat_diff[i], | |
316 | zone, i); | |
317 | pcp->vm_stat_diff[i] = 0; | |
318 | local_irq_restore(flags); | |
319 | } | |
320 | } | |
321 | } | |
322 | ||
323 | static void __refresh_cpu_vm_stats(void *dummy) | |
324 | { | |
325 | refresh_cpu_vm_stats(smp_processor_id()); | |
326 | } | |
327 | ||
328 | /* | |
329 | * Consolidate all counters. | |
330 | * | |
331 | * Note that the result is less inaccurate but still inaccurate | |
332 | * if concurrent processes are allowed to run. | |
333 | */ | |
334 | void refresh_vm_stats(void) | |
335 | { | |
336 | on_each_cpu(__refresh_cpu_vm_stats, NULL, 0, 1); | |
337 | } | |
338 | EXPORT_SYMBOL(refresh_vm_stats); | |
339 | ||
340 | #endif | |
341 | ||
f6ac2354 CL |
342 | #ifdef CONFIG_PROC_FS |
343 | ||
344 | #include <linux/seq_file.h> | |
345 | ||
346 | static void *frag_start(struct seq_file *m, loff_t *pos) | |
347 | { | |
348 | pg_data_t *pgdat; | |
349 | loff_t node = *pos; | |
350 | for (pgdat = first_online_pgdat(); | |
351 | pgdat && node; | |
352 | pgdat = next_online_pgdat(pgdat)) | |
353 | --node; | |
354 | ||
355 | return pgdat; | |
356 | } | |
357 | ||
358 | static void *frag_next(struct seq_file *m, void *arg, loff_t *pos) | |
359 | { | |
360 | pg_data_t *pgdat = (pg_data_t *)arg; | |
361 | ||
362 | (*pos)++; | |
363 | return next_online_pgdat(pgdat); | |
364 | } | |
365 | ||
366 | static void frag_stop(struct seq_file *m, void *arg) | |
367 | { | |
368 | } | |
369 | ||
370 | /* | |
371 | * This walks the free areas for each zone. | |
372 | */ | |
373 | static int frag_show(struct seq_file *m, void *arg) | |
374 | { | |
375 | pg_data_t *pgdat = (pg_data_t *)arg; | |
376 | struct zone *zone; | |
377 | struct zone *node_zones = pgdat->node_zones; | |
378 | unsigned long flags; | |
379 | int order; | |
380 | ||
381 | for (zone = node_zones; zone - node_zones < MAX_NR_ZONES; ++zone) { | |
382 | if (!populated_zone(zone)) | |
383 | continue; | |
384 | ||
385 | spin_lock_irqsave(&zone->lock, flags); | |
386 | seq_printf(m, "Node %d, zone %8s ", pgdat->node_id, zone->name); | |
387 | for (order = 0; order < MAX_ORDER; ++order) | |
388 | seq_printf(m, "%6lu ", zone->free_area[order].nr_free); | |
389 | spin_unlock_irqrestore(&zone->lock, flags); | |
390 | seq_putc(m, '\n'); | |
391 | } | |
392 | return 0; | |
393 | } | |
394 | ||
395 | struct seq_operations fragmentation_op = { | |
396 | .start = frag_start, | |
397 | .next = frag_next, | |
398 | .stop = frag_stop, | |
399 | .show = frag_show, | |
400 | }; | |
401 | ||
402 | static char *vmstat_text[] = { | |
2244b95a | 403 | /* Zoned VM counters */ |
65ba55f5 | 404 | "nr_mapped", |
2244b95a CL |
405 | |
406 | /* Page state */ | |
f6ac2354 CL |
407 | "nr_dirty", |
408 | "nr_writeback", | |
409 | "nr_unstable", | |
410 | "nr_page_table_pages", | |
f6ac2354 CL |
411 | "nr_slab", |
412 | ||
413 | "pgpgin", | |
414 | "pgpgout", | |
415 | "pswpin", | |
416 | "pswpout", | |
417 | ||
418 | "pgalloc_high", | |
419 | "pgalloc_normal", | |
420 | "pgalloc_dma32", | |
421 | "pgalloc_dma", | |
422 | ||
423 | "pgfree", | |
424 | "pgactivate", | |
425 | "pgdeactivate", | |
426 | ||
427 | "pgfault", | |
428 | "pgmajfault", | |
429 | ||
430 | "pgrefill_high", | |
431 | "pgrefill_normal", | |
432 | "pgrefill_dma32", | |
433 | "pgrefill_dma", | |
434 | ||
435 | "pgsteal_high", | |
436 | "pgsteal_normal", | |
437 | "pgsteal_dma32", | |
438 | "pgsteal_dma", | |
439 | ||
440 | "pgscan_kswapd_high", | |
441 | "pgscan_kswapd_normal", | |
442 | "pgscan_kswapd_dma32", | |
443 | "pgscan_kswapd_dma", | |
444 | ||
445 | "pgscan_direct_high", | |
446 | "pgscan_direct_normal", | |
447 | "pgscan_direct_dma32", | |
448 | "pgscan_direct_dma", | |
449 | ||
450 | "pginodesteal", | |
451 | "slabs_scanned", | |
452 | "kswapd_steal", | |
453 | "kswapd_inodesteal", | |
454 | "pageoutrun", | |
455 | "allocstall", | |
456 | ||
457 | "pgrotated", | |
458 | "nr_bounce", | |
459 | }; | |
460 | ||
461 | /* | |
462 | * Output information about zones in @pgdat. | |
463 | */ | |
464 | static int zoneinfo_show(struct seq_file *m, void *arg) | |
465 | { | |
466 | pg_data_t *pgdat = arg; | |
467 | struct zone *zone; | |
468 | struct zone *node_zones = pgdat->node_zones; | |
469 | unsigned long flags; | |
470 | ||
471 | for (zone = node_zones; zone - node_zones < MAX_NR_ZONES; zone++) { | |
472 | int i; | |
473 | ||
474 | if (!populated_zone(zone)) | |
475 | continue; | |
476 | ||
477 | spin_lock_irqsave(&zone->lock, flags); | |
478 | seq_printf(m, "Node %d, zone %8s", pgdat->node_id, zone->name); | |
479 | seq_printf(m, | |
480 | "\n pages free %lu" | |
481 | "\n min %lu" | |
482 | "\n low %lu" | |
483 | "\n high %lu" | |
484 | "\n active %lu" | |
485 | "\n inactive %lu" | |
486 | "\n scanned %lu (a: %lu i: %lu)" | |
487 | "\n spanned %lu" | |
488 | "\n present %lu", | |
489 | zone->free_pages, | |
490 | zone->pages_min, | |
491 | zone->pages_low, | |
492 | zone->pages_high, | |
493 | zone->nr_active, | |
494 | zone->nr_inactive, | |
495 | zone->pages_scanned, | |
496 | zone->nr_scan_active, zone->nr_scan_inactive, | |
497 | zone->spanned_pages, | |
498 | zone->present_pages); | |
2244b95a CL |
499 | |
500 | for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) | |
501 | seq_printf(m, "\n %-12s %lu", vmstat_text[i], | |
502 | zone_page_state(zone, i)); | |
503 | ||
f6ac2354 CL |
504 | seq_printf(m, |
505 | "\n protection: (%lu", | |
506 | zone->lowmem_reserve[0]); | |
507 | for (i = 1; i < ARRAY_SIZE(zone->lowmem_reserve); i++) | |
508 | seq_printf(m, ", %lu", zone->lowmem_reserve[i]); | |
509 | seq_printf(m, | |
510 | ")" | |
511 | "\n pagesets"); | |
512 | for_each_online_cpu(i) { | |
513 | struct per_cpu_pageset *pageset; | |
514 | int j; | |
515 | ||
516 | pageset = zone_pcp(zone, i); | |
517 | for (j = 0; j < ARRAY_SIZE(pageset->pcp); j++) { | |
518 | if (pageset->pcp[j].count) | |
519 | break; | |
520 | } | |
521 | if (j == ARRAY_SIZE(pageset->pcp)) | |
522 | continue; | |
523 | for (j = 0; j < ARRAY_SIZE(pageset->pcp); j++) { | |
524 | seq_printf(m, | |
525 | "\n cpu: %i pcp: %i" | |
526 | "\n count: %i" | |
527 | "\n high: %i" | |
528 | "\n batch: %i", | |
529 | i, j, | |
530 | pageset->pcp[j].count, | |
531 | pageset->pcp[j].high, | |
532 | pageset->pcp[j].batch); | |
533 | } | |
534 | #ifdef CONFIG_NUMA | |
535 | seq_printf(m, | |
536 | "\n numa_hit: %lu" | |
537 | "\n numa_miss: %lu" | |
538 | "\n numa_foreign: %lu" | |
539 | "\n interleave_hit: %lu" | |
540 | "\n local_node: %lu" | |
541 | "\n other_node: %lu", | |
542 | pageset->numa_hit, | |
543 | pageset->numa_miss, | |
544 | pageset->numa_foreign, | |
545 | pageset->interleave_hit, | |
546 | pageset->local_node, | |
547 | pageset->other_node); | |
548 | #endif | |
549 | } | |
550 | seq_printf(m, | |
551 | "\n all_unreclaimable: %u" | |
552 | "\n prev_priority: %i" | |
553 | "\n temp_priority: %i" | |
554 | "\n start_pfn: %lu", | |
555 | zone->all_unreclaimable, | |
556 | zone->prev_priority, | |
557 | zone->temp_priority, | |
558 | zone->zone_start_pfn); | |
559 | spin_unlock_irqrestore(&zone->lock, flags); | |
560 | seq_putc(m, '\n'); | |
561 | } | |
562 | return 0; | |
563 | } | |
564 | ||
565 | struct seq_operations zoneinfo_op = { | |
566 | .start = frag_start, /* iterate over all zones. The same as in | |
567 | * fragmentation. */ | |
568 | .next = frag_next, | |
569 | .stop = frag_stop, | |
570 | .show = zoneinfo_show, | |
571 | }; | |
572 | ||
573 | static void *vmstat_start(struct seq_file *m, loff_t *pos) | |
574 | { | |
2244b95a | 575 | unsigned long *v; |
f6ac2354 | 576 | struct page_state *ps; |
2244b95a | 577 | int i; |
f6ac2354 CL |
578 | |
579 | if (*pos >= ARRAY_SIZE(vmstat_text)) | |
580 | return NULL; | |
581 | ||
2244b95a CL |
582 | v = kmalloc(NR_VM_ZONE_STAT_ITEMS * sizeof(unsigned long) |
583 | + sizeof(*ps), GFP_KERNEL); | |
584 | m->private = v; | |
585 | if (!v) | |
f6ac2354 | 586 | return ERR_PTR(-ENOMEM); |
2244b95a CL |
587 | for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) |
588 | v[i] = global_page_state(i); | |
589 | ps = (struct page_state *)(v + NR_VM_ZONE_STAT_ITEMS); | |
f6ac2354 CL |
590 | get_full_page_state(ps); |
591 | ps->pgpgin /= 2; /* sectors -> kbytes */ | |
592 | ps->pgpgout /= 2; | |
2244b95a | 593 | return v + *pos; |
f6ac2354 CL |
594 | } |
595 | ||
596 | static void *vmstat_next(struct seq_file *m, void *arg, loff_t *pos) | |
597 | { | |
598 | (*pos)++; | |
599 | if (*pos >= ARRAY_SIZE(vmstat_text)) | |
600 | return NULL; | |
601 | return (unsigned long *)m->private + *pos; | |
602 | } | |
603 | ||
604 | static int vmstat_show(struct seq_file *m, void *arg) | |
605 | { | |
606 | unsigned long *l = arg; | |
607 | unsigned long off = l - (unsigned long *)m->private; | |
608 | ||
609 | seq_printf(m, "%s %lu\n", vmstat_text[off], *l); | |
610 | return 0; | |
611 | } | |
612 | ||
613 | static void vmstat_stop(struct seq_file *m, void *arg) | |
614 | { | |
615 | kfree(m->private); | |
616 | m->private = NULL; | |
617 | } | |
618 | ||
619 | struct seq_operations vmstat_op = { | |
620 | .start = vmstat_start, | |
621 | .next = vmstat_next, | |
622 | .stop = vmstat_stop, | |
623 | .show = vmstat_show, | |
624 | }; | |
625 | ||
626 | #endif /* CONFIG_PROC_FS */ | |
627 |