1 // SPDX-License-Identifier: GPL-2.0
2 #include <linux/pagewalk.h>
3 #include <linux/vmacache.h>
4 #include <linux/hugetlb.h>
5 #include <linux/huge_mm.h>
6 #include <linux/mount.h>
7 #include <linux/seq_file.h>
8 #include <linux/highmem.h>
9 #include <linux/ptrace.h>
10 #include <linux/slab.h>
11 #include <linux/pagemap.h>
12 #include <linux/mempolicy.h>
13 #include <linux/rmap.h>
14 #include <linux/swap.h>
15 #include <linux/sched/mm.h>
16 #include <linux/swapops.h>
17 #include <linux/mmu_notifier.h>
18 #include <linux/page_idle.h>
19 #include <linux/shmem_fs.h>
20 #include <linux/uaccess.h>
21 #include <linux/pkeys.h>
25 #include <asm/tlbflush.h>
28 #define SEQ_PUT_DEC(str, val) \
29 seq_put_decimal_ull_width(m, str, (val) << (PAGE_SHIFT-10), 8)
30 void task_mem(struct seq_file
*m
, struct mm_struct
*mm
)
32 unsigned long text
, lib
, swap
, anon
, file
, shmem
;
33 unsigned long hiwater_vm
, total_vm
, hiwater_rss
, total_rss
;
35 anon
= get_mm_counter(mm
, MM_ANONPAGES
);
36 file
= get_mm_counter(mm
, MM_FILEPAGES
);
37 shmem
= get_mm_counter(mm
, MM_SHMEMPAGES
);
40 * Note: to minimize their overhead, mm maintains hiwater_vm and
41 * hiwater_rss only when about to *lower* total_vm or rss. Any
42 * collector of these hiwater stats must therefore get total_vm
43 * and rss too, which will usually be the higher. Barriers? not
44 * worth the effort, such snapshots can always be inconsistent.
46 hiwater_vm
= total_vm
= mm
->total_vm
;
47 if (hiwater_vm
< mm
->hiwater_vm
)
48 hiwater_vm
= mm
->hiwater_vm
;
49 hiwater_rss
= total_rss
= anon
+ file
+ shmem
;
50 if (hiwater_rss
< mm
->hiwater_rss
)
51 hiwater_rss
= mm
->hiwater_rss
;
53 /* split executable areas between text and lib */
54 text
= PAGE_ALIGN(mm
->end_code
) - (mm
->start_code
& PAGE_MASK
);
55 text
= min(text
, mm
->exec_vm
<< PAGE_SHIFT
);
56 lib
= (mm
->exec_vm
<< PAGE_SHIFT
) - text
;
58 swap
= get_mm_counter(mm
, MM_SWAPENTS
);
59 SEQ_PUT_DEC("VmPeak:\t", hiwater_vm
);
60 SEQ_PUT_DEC(" kB\nVmSize:\t", total_vm
);
61 SEQ_PUT_DEC(" kB\nVmLck:\t", mm
->locked_vm
);
62 SEQ_PUT_DEC(" kB\nVmPin:\t", atomic64_read(&mm
->pinned_vm
));
63 SEQ_PUT_DEC(" kB\nVmHWM:\t", hiwater_rss
);
64 SEQ_PUT_DEC(" kB\nVmRSS:\t", total_rss
);
65 SEQ_PUT_DEC(" kB\nRssAnon:\t", anon
);
66 SEQ_PUT_DEC(" kB\nRssFile:\t", file
);
67 SEQ_PUT_DEC(" kB\nRssShmem:\t", shmem
);
68 SEQ_PUT_DEC(" kB\nVmData:\t", mm
->data_vm
);
69 SEQ_PUT_DEC(" kB\nVmStk:\t", mm
->stack_vm
);
70 seq_put_decimal_ull_width(m
,
71 " kB\nVmExe:\t", text
>> 10, 8);
72 seq_put_decimal_ull_width(m
,
73 " kB\nVmLib:\t", lib
>> 10, 8);
74 seq_put_decimal_ull_width(m
,
75 " kB\nVmPTE:\t", mm_pgtables_bytes(mm
) >> 10, 8);
76 SEQ_PUT_DEC(" kB\nVmSwap:\t", swap
);
78 hugetlb_report_usage(m
, mm
);
82 unsigned long task_vsize(struct mm_struct
*mm
)
84 return PAGE_SIZE
* mm
->total_vm
;
87 unsigned long task_statm(struct mm_struct
*mm
,
88 unsigned long *shared
, unsigned long *text
,
89 unsigned long *data
, unsigned long *resident
)
91 *shared
= get_mm_counter(mm
, MM_FILEPAGES
) +
92 get_mm_counter(mm
, MM_SHMEMPAGES
);
93 *text
= (PAGE_ALIGN(mm
->end_code
) - (mm
->start_code
& PAGE_MASK
))
95 *data
= mm
->data_vm
+ mm
->stack_vm
;
96 *resident
= *shared
+ get_mm_counter(mm
, MM_ANONPAGES
);
102 * Save get_task_policy() for show_numa_map().
104 static void hold_task_mempolicy(struct proc_maps_private
*priv
)
106 struct task_struct
*task
= priv
->task
;
109 priv
->task_mempolicy
= get_task_policy(task
);
110 mpol_get(priv
->task_mempolicy
);
113 static void release_task_mempolicy(struct proc_maps_private
*priv
)
115 mpol_put(priv
->task_mempolicy
);
118 static void hold_task_mempolicy(struct proc_maps_private
*priv
)
121 static void release_task_mempolicy(struct proc_maps_private
*priv
)
126 static void *m_start(struct seq_file
*m
, loff_t
*ppos
)
128 struct proc_maps_private
*priv
= m
->private;
129 unsigned long last_addr
= *ppos
;
130 struct mm_struct
*mm
;
131 struct vm_area_struct
*vma
;
133 /* See m_next(). Zero at the start or after lseek. */
134 if (last_addr
== -1UL)
137 priv
->task
= get_proc_task(priv
->inode
);
139 return ERR_PTR(-ESRCH
);
142 if (!mm
|| !mmget_not_zero(mm
)) {
143 put_task_struct(priv
->task
);
148 if (mmap_read_lock_killable(mm
)) {
150 put_task_struct(priv
->task
);
152 return ERR_PTR(-EINTR
);
155 hold_task_mempolicy(priv
);
156 priv
->tail_vma
= get_gate_vma(mm
);
158 vma
= find_vma(mm
, last_addr
);
162 return priv
->tail_vma
;
165 static void *m_next(struct seq_file
*m
, void *v
, loff_t
*ppos
)
167 struct proc_maps_private
*priv
= m
->private;
168 struct vm_area_struct
*next
, *vma
= v
;
170 if (vma
== priv
->tail_vma
)
172 else if (vma
->vm_next
)
175 next
= priv
->tail_vma
;
177 *ppos
= next
? next
->vm_start
: -1UL;
182 static void m_stop(struct seq_file
*m
, void *v
)
184 struct proc_maps_private
*priv
= m
->private;
185 struct mm_struct
*mm
= priv
->mm
;
190 release_task_mempolicy(priv
);
191 mmap_read_unlock(mm
);
193 put_task_struct(priv
->task
);
197 static int proc_maps_open(struct inode
*inode
, struct file
*file
,
198 const struct seq_operations
*ops
, int psize
)
200 struct proc_maps_private
*priv
= __seq_open_private(file
, ops
, psize
);
206 priv
->mm
= proc_mem_open(inode
, PTRACE_MODE_READ
);
207 if (IS_ERR(priv
->mm
)) {
208 int err
= PTR_ERR(priv
->mm
);
210 seq_release_private(inode
, file
);
217 static int proc_map_release(struct inode
*inode
, struct file
*file
)
219 struct seq_file
*seq
= file
->private_data
;
220 struct proc_maps_private
*priv
= seq
->private;
225 return seq_release_private(inode
, file
);
228 static int do_maps_open(struct inode
*inode
, struct file
*file
,
229 const struct seq_operations
*ops
)
231 return proc_maps_open(inode
, file
, ops
,
232 sizeof(struct proc_maps_private
));
236 * Indicate if the VMA is a stack for the given task; for
237 * /proc/PID/maps that is the stack of the main task.
239 static int is_stack(struct vm_area_struct
*vma
)
242 * We make no effort to guess what a given thread considers to be
243 * its "stack". It's not even well-defined for programs written
246 return vma
->vm_start
<= vma
->vm_mm
->start_stack
&&
247 vma
->vm_end
>= vma
->vm_mm
->start_stack
;
250 static void show_vma_header_prefix(struct seq_file
*m
,
251 unsigned long start
, unsigned long end
,
252 vm_flags_t flags
, unsigned long long pgoff
,
253 dev_t dev
, unsigned long ino
)
255 seq_setwidth(m
, 25 + sizeof(void *) * 6 - 1);
256 seq_put_hex_ll(m
, NULL
, start
, 8);
257 seq_put_hex_ll(m
, "-", end
, 8);
259 seq_putc(m
, flags
& VM_READ
? 'r' : '-');
260 seq_putc(m
, flags
& VM_WRITE
? 'w' : '-');
261 seq_putc(m
, flags
& VM_EXEC
? 'x' : '-');
262 seq_putc(m
, flags
& VM_MAYSHARE
? 's' : 'p');
263 seq_put_hex_ll(m
, " ", pgoff
, 8);
264 seq_put_hex_ll(m
, " ", MAJOR(dev
), 2);
265 seq_put_hex_ll(m
, ":", MINOR(dev
), 2);
266 seq_put_decimal_ull(m
, " ", ino
);
271 show_map_vma(struct seq_file
*m
, struct vm_area_struct
*vma
)
273 struct mm_struct
*mm
= vma
->vm_mm
;
274 struct file
*file
= vma
->vm_file
;
275 vm_flags_t flags
= vma
->vm_flags
;
276 unsigned long ino
= 0;
277 unsigned long long pgoff
= 0;
278 unsigned long start
, end
;
280 const char *name
= NULL
;
283 struct inode
*inode
= file_inode(vma
->vm_file
);
284 dev
= inode
->i_sb
->s_dev
;
286 pgoff
= ((loff_t
)vma
->vm_pgoff
) << PAGE_SHIFT
;
289 start
= vma
->vm_start
;
291 show_vma_header_prefix(m
, start
, end
, flags
, pgoff
, dev
, ino
);
294 * Print the dentry name for named mappings, and a
295 * special [heap] marker for the heap:
299 seq_file_path(m
, file
, "\n");
303 if (vma
->vm_ops
&& vma
->vm_ops
->name
) {
304 name
= vma
->vm_ops
->name(vma
);
309 name
= arch_vma_name(vma
);
316 if (vma
->vm_start
<= mm
->brk
&&
317 vma
->vm_end
>= mm
->start_brk
) {
334 static int show_map(struct seq_file
*m
, void *v
)
340 static const struct seq_operations proc_pid_maps_op
= {
347 static int pid_maps_open(struct inode
*inode
, struct file
*file
)
349 return do_maps_open(inode
, file
, &proc_pid_maps_op
);
352 const struct file_operations proc_pid_maps_operations
= {
353 .open
= pid_maps_open
,
356 .release
= proc_map_release
,
360 * Proportional Set Size(PSS): my share of RSS.
362 * PSS of a process is the count of pages it has in memory, where each
363 * page is divided by the number of processes sharing it. So if a
364 * process has 1000 pages all to itself, and 1000 shared with one other
365 * process, its PSS will be 1500.
367 * To keep (accumulated) division errors low, we adopt a 64bit
368 * fixed-point pss counter to minimize division errors. So (pss >>
369 * PSS_SHIFT) would be the real byte count.
371 * A shift of 12 before division means (assuming 4K page size):
372 * - 1M 3-user-pages add up to 8KB errors;
373 * - supports mapcount up to 2^24, or 16M;
374 * - supports PSS up to 2^52 bytes, or 4PB.
378 #ifdef CONFIG_PROC_PAGE_MONITOR
379 struct mem_size_stats
{
380 unsigned long resident
;
381 unsigned long shared_clean
;
382 unsigned long shared_dirty
;
383 unsigned long private_clean
;
384 unsigned long private_dirty
;
385 unsigned long referenced
;
386 unsigned long anonymous
;
387 unsigned long lazyfree
;
388 unsigned long anonymous_thp
;
389 unsigned long shmem_thp
;
390 unsigned long file_thp
;
392 unsigned long shared_hugetlb
;
393 unsigned long private_hugetlb
;
400 bool check_shmem_swap
;
403 static void smaps_page_accumulate(struct mem_size_stats
*mss
,
404 struct page
*page
, unsigned long size
, unsigned long pss
,
405 bool dirty
, bool locked
, bool private)
410 mss
->pss_anon
+= pss
;
411 else if (PageSwapBacked(page
))
412 mss
->pss_shmem
+= pss
;
414 mss
->pss_file
+= pss
;
417 mss
->pss_locked
+= pss
;
419 if (dirty
|| PageDirty(page
)) {
421 mss
->private_dirty
+= size
;
423 mss
->shared_dirty
+= size
;
426 mss
->private_clean
+= size
;
428 mss
->shared_clean
+= size
;
432 static void smaps_account(struct mem_size_stats
*mss
, struct page
*page
,
433 bool compound
, bool young
, bool dirty
, bool locked
)
435 int i
, nr
= compound
? compound_nr(page
) : 1;
436 unsigned long size
= nr
* PAGE_SIZE
;
439 * First accumulate quantities that depend only on |size| and the type
440 * of the compound page.
442 if (PageAnon(page
)) {
443 mss
->anonymous
+= size
;
444 if (!PageSwapBacked(page
) && !dirty
&& !PageDirty(page
))
445 mss
->lazyfree
+= size
;
448 mss
->resident
+= size
;
449 /* Accumulate the size in pages that have been accessed. */
450 if (young
|| page_is_young(page
) || PageReferenced(page
))
451 mss
->referenced
+= size
;
454 * Then accumulate quantities that may depend on sharing, or that may
455 * differ page-by-page.
457 * page_count(page) == 1 guarantees the page is mapped exactly once.
458 * If any subpage of the compound page mapped with PTE it would elevate
461 if (page_count(page
) == 1) {
462 smaps_page_accumulate(mss
, page
, size
, size
<< PSS_SHIFT
, dirty
,
466 for (i
= 0; i
< nr
; i
++, page
++) {
467 int mapcount
= page_mapcount(page
);
468 unsigned long pss
= PAGE_SIZE
<< PSS_SHIFT
;
471 smaps_page_accumulate(mss
, page
, PAGE_SIZE
, pss
, dirty
, locked
,
477 static int smaps_pte_hole(unsigned long addr
, unsigned long end
,
478 __always_unused
int depth
, struct mm_walk
*walk
)
480 struct mem_size_stats
*mss
= walk
->private;
482 mss
->swap
+= shmem_partial_swap_usage(
483 walk
->vma
->vm_file
->f_mapping
, addr
, end
);
488 #define smaps_pte_hole NULL
489 #endif /* CONFIG_SHMEM */
491 static void smaps_pte_entry(pte_t
*pte
, unsigned long addr
,
492 struct mm_walk
*walk
)
494 struct mem_size_stats
*mss
= walk
->private;
495 struct vm_area_struct
*vma
= walk
->vma
;
496 bool locked
= !!(vma
->vm_flags
& VM_LOCKED
);
497 struct page
*page
= NULL
;
499 if (pte_present(*pte
)) {
500 page
= vm_normal_page(vma
, addr
, *pte
);
501 } else if (is_swap_pte(*pte
)) {
502 swp_entry_t swpent
= pte_to_swp_entry(*pte
);
504 if (!non_swap_entry(swpent
)) {
507 mss
->swap
+= PAGE_SIZE
;
508 mapcount
= swp_swapcount(swpent
);
510 u64 pss_delta
= (u64
)PAGE_SIZE
<< PSS_SHIFT
;
512 do_div(pss_delta
, mapcount
);
513 mss
->swap_pss
+= pss_delta
;
515 mss
->swap_pss
+= (u64
)PAGE_SIZE
<< PSS_SHIFT
;
517 } else if (is_pfn_swap_entry(swpent
))
518 page
= pfn_swap_entry_to_page(swpent
);
519 } else if (unlikely(IS_ENABLED(CONFIG_SHMEM
) && mss
->check_shmem_swap
520 && pte_none(*pte
))) {
521 page
= xa_load(&vma
->vm_file
->f_mapping
->i_pages
,
522 linear_page_index(vma
, addr
));
523 if (xa_is_value(page
))
524 mss
->swap
+= PAGE_SIZE
;
531 smaps_account(mss
, page
, false, pte_young(*pte
), pte_dirty(*pte
), locked
);
534 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
535 static void smaps_pmd_entry(pmd_t
*pmd
, unsigned long addr
,
536 struct mm_walk
*walk
)
538 struct mem_size_stats
*mss
= walk
->private;
539 struct vm_area_struct
*vma
= walk
->vma
;
540 bool locked
= !!(vma
->vm_flags
& VM_LOCKED
);
541 struct page
*page
= NULL
;
543 if (pmd_present(*pmd
)) {
544 /* FOLL_DUMP will return -EFAULT on huge zero page */
545 page
= follow_trans_huge_pmd(vma
, addr
, pmd
, FOLL_DUMP
);
546 } else if (unlikely(thp_migration_supported() && is_swap_pmd(*pmd
))) {
547 swp_entry_t entry
= pmd_to_swp_entry(*pmd
);
549 if (is_migration_entry(entry
))
550 page
= pfn_swap_entry_to_page(entry
);
552 if (IS_ERR_OR_NULL(page
))
555 mss
->anonymous_thp
+= HPAGE_PMD_SIZE
;
556 else if (PageSwapBacked(page
))
557 mss
->shmem_thp
+= HPAGE_PMD_SIZE
;
558 else if (is_zone_device_page(page
))
561 mss
->file_thp
+= HPAGE_PMD_SIZE
;
562 smaps_account(mss
, page
, true, pmd_young(*pmd
), pmd_dirty(*pmd
), locked
);
565 static void smaps_pmd_entry(pmd_t
*pmd
, unsigned long addr
,
566 struct mm_walk
*walk
)
571 static int smaps_pte_range(pmd_t
*pmd
, unsigned long addr
, unsigned long end
,
572 struct mm_walk
*walk
)
574 struct vm_area_struct
*vma
= walk
->vma
;
578 ptl
= pmd_trans_huge_lock(pmd
, vma
);
580 smaps_pmd_entry(pmd
, addr
, walk
);
585 if (pmd_trans_unstable(pmd
))
588 * The mmap_lock held all the way back in m_start() is what
589 * keeps khugepaged out of here and from collapsing things
592 pte
= pte_offset_map_lock(vma
->vm_mm
, pmd
, addr
, &ptl
);
593 for (; addr
!= end
; pte
++, addr
+= PAGE_SIZE
)
594 smaps_pte_entry(pte
, addr
, walk
);
595 pte_unmap_unlock(pte
- 1, ptl
);
601 static void show_smap_vma_flags(struct seq_file
*m
, struct vm_area_struct
*vma
)
604 * Don't forget to update Documentation/ on changes.
606 static const char mnemonics
[BITS_PER_LONG
][2] = {
608 * In case if we meet a flag we don't know about.
610 [0 ... (BITS_PER_LONG
-1)] = "??",
612 [ilog2(VM_READ
)] = "rd",
613 [ilog2(VM_WRITE
)] = "wr",
614 [ilog2(VM_EXEC
)] = "ex",
615 [ilog2(VM_SHARED
)] = "sh",
616 [ilog2(VM_MAYREAD
)] = "mr",
617 [ilog2(VM_MAYWRITE
)] = "mw",
618 [ilog2(VM_MAYEXEC
)] = "me",
619 [ilog2(VM_MAYSHARE
)] = "ms",
620 [ilog2(VM_GROWSDOWN
)] = "gd",
621 [ilog2(VM_PFNMAP
)] = "pf",
622 [ilog2(VM_LOCKED
)] = "lo",
623 [ilog2(VM_IO
)] = "io",
624 [ilog2(VM_SEQ_READ
)] = "sr",
625 [ilog2(VM_RAND_READ
)] = "rr",
626 [ilog2(VM_DONTCOPY
)] = "dc",
627 [ilog2(VM_DONTEXPAND
)] = "de",
628 [ilog2(VM_ACCOUNT
)] = "ac",
629 [ilog2(VM_NORESERVE
)] = "nr",
630 [ilog2(VM_HUGETLB
)] = "ht",
631 [ilog2(VM_SYNC
)] = "sf",
632 [ilog2(VM_ARCH_1
)] = "ar",
633 [ilog2(VM_WIPEONFORK
)] = "wf",
634 [ilog2(VM_DONTDUMP
)] = "dd",
635 #ifdef CONFIG_ARM64_BTI
636 [ilog2(VM_ARM64_BTI
)] = "bt",
638 #ifdef CONFIG_MEM_SOFT_DIRTY
639 [ilog2(VM_SOFTDIRTY
)] = "sd",
641 [ilog2(VM_MIXEDMAP
)] = "mm",
642 [ilog2(VM_HUGEPAGE
)] = "hg",
643 [ilog2(VM_NOHUGEPAGE
)] = "nh",
644 [ilog2(VM_MERGEABLE
)] = "mg",
645 [ilog2(VM_UFFD_MISSING
)]= "um",
646 [ilog2(VM_UFFD_WP
)] = "uw",
647 #ifdef CONFIG_ARM64_MTE
648 [ilog2(VM_MTE
)] = "mt",
649 [ilog2(VM_MTE_ALLOWED
)] = "",
651 #ifdef CONFIG_ARCH_HAS_PKEYS
652 /* These come out via ProtectionKey: */
653 [ilog2(VM_PKEY_BIT0
)] = "",
654 [ilog2(VM_PKEY_BIT1
)] = "",
655 [ilog2(VM_PKEY_BIT2
)] = "",
656 [ilog2(VM_PKEY_BIT3
)] = "",
658 [ilog2(VM_PKEY_BIT4
)] = "",
660 #endif /* CONFIG_ARCH_HAS_PKEYS */
661 #ifdef CONFIG_HAVE_ARCH_USERFAULTFD_MINOR
662 [ilog2(VM_UFFD_MINOR
)] = "ui",
663 #endif /* CONFIG_HAVE_ARCH_USERFAULTFD_MINOR */
667 seq_puts(m
, "VmFlags: ");
668 for (i
= 0; i
< BITS_PER_LONG
; i
++) {
669 if (!mnemonics
[i
][0])
671 if (vma
->vm_flags
& (1UL << i
)) {
672 seq_putc(m
, mnemonics
[i
][0]);
673 seq_putc(m
, mnemonics
[i
][1]);
680 #ifdef CONFIG_HUGETLB_PAGE
681 static int smaps_hugetlb_range(pte_t
*pte
, unsigned long hmask
,
682 unsigned long addr
, unsigned long end
,
683 struct mm_walk
*walk
)
685 struct mem_size_stats
*mss
= walk
->private;
686 struct vm_area_struct
*vma
= walk
->vma
;
687 struct page
*page
= NULL
;
689 if (pte_present(*pte
)) {
690 page
= vm_normal_page(vma
, addr
, *pte
);
691 } else if (is_swap_pte(*pte
)) {
692 swp_entry_t swpent
= pte_to_swp_entry(*pte
);
694 if (is_pfn_swap_entry(swpent
))
695 page
= pfn_swap_entry_to_page(swpent
);
698 int mapcount
= page_mapcount(page
);
701 mss
->shared_hugetlb
+= huge_page_size(hstate_vma(vma
));
703 mss
->private_hugetlb
+= huge_page_size(hstate_vma(vma
));
708 #define smaps_hugetlb_range NULL
709 #endif /* HUGETLB_PAGE */
711 static const struct mm_walk_ops smaps_walk_ops
= {
712 .pmd_entry
= smaps_pte_range
,
713 .hugetlb_entry
= smaps_hugetlb_range
,
716 static const struct mm_walk_ops smaps_shmem_walk_ops
= {
717 .pmd_entry
= smaps_pte_range
,
718 .hugetlb_entry
= smaps_hugetlb_range
,
719 .pte_hole
= smaps_pte_hole
,
723 * Gather mem stats from @vma with the indicated beginning
724 * address @start, and keep them in @mss.
726 * Use vm_start of @vma as the beginning address if @start is 0.
728 static void smap_gather_stats(struct vm_area_struct
*vma
,
729 struct mem_size_stats
*mss
, unsigned long start
)
731 const struct mm_walk_ops
*ops
= &smaps_walk_ops
;
734 if (start
>= vma
->vm_end
)
738 /* In case of smaps_rollup, reset the value from previous vma */
739 mss
->check_shmem_swap
= false;
740 if (vma
->vm_file
&& shmem_mapping(vma
->vm_file
->f_mapping
)) {
742 * For shared or readonly shmem mappings we know that all
743 * swapped out pages belong to the shmem object, and we can
744 * obtain the swap value much more efficiently. For private
745 * writable mappings, we might have COW pages that are
746 * not affected by the parent swapped out pages of the shmem
747 * object, so we have to distinguish them during the page walk.
748 * Unless we know that the shmem object (or the part mapped by
749 * our VMA) has no swapped out pages at all.
751 unsigned long shmem_swapped
= shmem_swap_usage(vma
);
753 if (!start
&& (!shmem_swapped
|| (vma
->vm_flags
& VM_SHARED
) ||
754 !(vma
->vm_flags
& VM_WRITE
))) {
755 mss
->swap
+= shmem_swapped
;
757 mss
->check_shmem_swap
= true;
758 ops
= &smaps_shmem_walk_ops
;
762 /* mmap_lock is held in m_start */
764 walk_page_vma(vma
, ops
, mss
);
766 walk_page_range(vma
->vm_mm
, start
, vma
->vm_end
, ops
, mss
);
769 #define SEQ_PUT_DEC(str, val) \
770 seq_put_decimal_ull_width(m, str, (val) >> 10, 8)
772 /* Show the contents common for smaps and smaps_rollup */
773 static void __show_smap(struct seq_file
*m
, const struct mem_size_stats
*mss
,
776 SEQ_PUT_DEC("Rss: ", mss
->resident
);
777 SEQ_PUT_DEC(" kB\nPss: ", mss
->pss
>> PSS_SHIFT
);
780 * These are meaningful only for smaps_rollup, otherwise two of
781 * them are zero, and the other one is the same as Pss.
783 SEQ_PUT_DEC(" kB\nPss_Anon: ",
784 mss
->pss_anon
>> PSS_SHIFT
);
785 SEQ_PUT_DEC(" kB\nPss_File: ",
786 mss
->pss_file
>> PSS_SHIFT
);
787 SEQ_PUT_DEC(" kB\nPss_Shmem: ",
788 mss
->pss_shmem
>> PSS_SHIFT
);
790 SEQ_PUT_DEC(" kB\nShared_Clean: ", mss
->shared_clean
);
791 SEQ_PUT_DEC(" kB\nShared_Dirty: ", mss
->shared_dirty
);
792 SEQ_PUT_DEC(" kB\nPrivate_Clean: ", mss
->private_clean
);
793 SEQ_PUT_DEC(" kB\nPrivate_Dirty: ", mss
->private_dirty
);
794 SEQ_PUT_DEC(" kB\nReferenced: ", mss
->referenced
);
795 SEQ_PUT_DEC(" kB\nAnonymous: ", mss
->anonymous
);
796 SEQ_PUT_DEC(" kB\nLazyFree: ", mss
->lazyfree
);
797 SEQ_PUT_DEC(" kB\nAnonHugePages: ", mss
->anonymous_thp
);
798 SEQ_PUT_DEC(" kB\nShmemPmdMapped: ", mss
->shmem_thp
);
799 SEQ_PUT_DEC(" kB\nFilePmdMapped: ", mss
->file_thp
);
800 SEQ_PUT_DEC(" kB\nShared_Hugetlb: ", mss
->shared_hugetlb
);
801 seq_put_decimal_ull_width(m
, " kB\nPrivate_Hugetlb: ",
802 mss
->private_hugetlb
>> 10, 7);
803 SEQ_PUT_DEC(" kB\nSwap: ", mss
->swap
);
804 SEQ_PUT_DEC(" kB\nSwapPss: ",
805 mss
->swap_pss
>> PSS_SHIFT
);
806 SEQ_PUT_DEC(" kB\nLocked: ",
807 mss
->pss_locked
>> PSS_SHIFT
);
808 seq_puts(m
, " kB\n");
811 static int show_smap(struct seq_file
*m
, void *v
)
813 struct vm_area_struct
*vma
= v
;
814 struct mem_size_stats mss
;
816 memset(&mss
, 0, sizeof(mss
));
818 smap_gather_stats(vma
, &mss
, 0);
820 show_map_vma(m
, vma
);
822 SEQ_PUT_DEC("Size: ", vma
->vm_end
- vma
->vm_start
);
823 SEQ_PUT_DEC(" kB\nKernelPageSize: ", vma_kernel_pagesize(vma
));
824 SEQ_PUT_DEC(" kB\nMMUPageSize: ", vma_mmu_pagesize(vma
));
825 seq_puts(m
, " kB\n");
827 __show_smap(m
, &mss
, false);
829 seq_printf(m
, "THPeligible: %d\n",
830 transparent_hugepage_active(vma
));
832 if (arch_pkeys_enabled())
833 seq_printf(m
, "ProtectionKey: %8u\n", vma_pkey(vma
));
834 show_smap_vma_flags(m
, vma
);
839 static int show_smaps_rollup(struct seq_file
*m
, void *v
)
841 struct proc_maps_private
*priv
= m
->private;
842 struct mem_size_stats mss
;
843 struct mm_struct
*mm
;
844 struct vm_area_struct
*vma
;
845 unsigned long last_vma_end
= 0;
848 priv
->task
= get_proc_task(priv
->inode
);
853 if (!mm
|| !mmget_not_zero(mm
)) {
858 memset(&mss
, 0, sizeof(mss
));
860 ret
= mmap_read_lock_killable(mm
);
864 hold_task_mempolicy(priv
);
866 for (vma
= priv
->mm
->mmap
; vma
;) {
867 smap_gather_stats(vma
, &mss
, 0);
868 last_vma_end
= vma
->vm_end
;
871 * Release mmap_lock temporarily if someone wants to
872 * access it for write request.
874 if (mmap_lock_is_contended(mm
)) {
875 mmap_read_unlock(mm
);
876 ret
= mmap_read_lock_killable(mm
);
878 release_task_mempolicy(priv
);
883 * After dropping the lock, there are four cases to
884 * consider. See the following example for explanation.
886 * +------+------+-----------+
887 * | VMA1 | VMA2 | VMA3 |
888 * +------+------+-----------+
892 * Suppose we drop the lock after reading VMA2 due to
893 * contention, then we get:
897 * 1) VMA2 is freed, but VMA3 exists:
899 * find_vma(mm, 16k - 1) will return VMA3.
900 * In this case, just continue from VMA3.
902 * 2) VMA2 still exists:
904 * find_vma(mm, 16k - 1) will return VMA2.
905 * Iterate the loop like the original one.
907 * 3) No more VMAs can be found:
909 * find_vma(mm, 16k - 1) will return NULL.
910 * No more things to do, just break.
912 * 4) (last_vma_end - 1) is the middle of a vma (VMA'):
914 * find_vma(mm, 16k - 1) will return VMA' whose range
915 * contains last_vma_end.
916 * Iterate VMA' from last_vma_end.
918 vma
= find_vma(mm
, last_vma_end
- 1);
924 if (vma
->vm_start
>= last_vma_end
)
928 if (vma
->vm_end
> last_vma_end
)
929 smap_gather_stats(vma
, &mss
, last_vma_end
);
935 show_vma_header_prefix(m
, priv
->mm
->mmap
->vm_start
,
936 last_vma_end
, 0, 0, 0, 0);
938 seq_puts(m
, "[rollup]\n");
940 __show_smap(m
, &mss
, true);
942 release_task_mempolicy(priv
);
943 mmap_read_unlock(mm
);
948 put_task_struct(priv
->task
);
955 static const struct seq_operations proc_pid_smaps_op
= {
962 static int pid_smaps_open(struct inode
*inode
, struct file
*file
)
964 return do_maps_open(inode
, file
, &proc_pid_smaps_op
);
967 static int smaps_rollup_open(struct inode
*inode
, struct file
*file
)
970 struct proc_maps_private
*priv
;
972 priv
= kzalloc(sizeof(*priv
), GFP_KERNEL_ACCOUNT
);
976 ret
= single_open(file
, show_smaps_rollup
, priv
);
981 priv
->mm
= proc_mem_open(inode
, PTRACE_MODE_READ
);
982 if (IS_ERR(priv
->mm
)) {
983 ret
= PTR_ERR(priv
->mm
);
985 single_release(inode
, file
);
996 static int smaps_rollup_release(struct inode
*inode
, struct file
*file
)
998 struct seq_file
*seq
= file
->private_data
;
999 struct proc_maps_private
*priv
= seq
->private;
1005 return single_release(inode
, file
);
1008 const struct file_operations proc_pid_smaps_operations
= {
1009 .open
= pid_smaps_open
,
1011 .llseek
= seq_lseek
,
1012 .release
= proc_map_release
,
1015 const struct file_operations proc_pid_smaps_rollup_operations
= {
1016 .open
= smaps_rollup_open
,
1018 .llseek
= seq_lseek
,
1019 .release
= smaps_rollup_release
,
1022 enum clear_refs_types
{
1026 CLEAR_REFS_SOFT_DIRTY
,
1027 CLEAR_REFS_MM_HIWATER_RSS
,
1031 struct clear_refs_private
{
1032 enum clear_refs_types type
;
1035 #ifdef CONFIG_MEM_SOFT_DIRTY
1037 static inline bool pte_is_pinned(struct vm_area_struct
*vma
, unsigned long addr
, pte_t pte
)
1041 if (!pte_write(pte
))
1043 if (!is_cow_mapping(vma
->vm_flags
))
1045 if (likely(!test_bit(MMF_HAS_PINNED
, &vma
->vm_mm
->flags
)))
1047 page
= vm_normal_page(vma
, addr
, pte
);
1050 return page_maybe_dma_pinned(page
);
1053 static inline void clear_soft_dirty(struct vm_area_struct
*vma
,
1054 unsigned long addr
, pte_t
*pte
)
1057 * The soft-dirty tracker uses #PF-s to catch writes
1058 * to pages, so write-protect the pte as well. See the
1059 * Documentation/admin-guide/mm/soft-dirty.rst for full description
1060 * of how soft-dirty works.
1064 if (pte_present(ptent
)) {
1067 if (pte_is_pinned(vma
, addr
, ptent
))
1069 old_pte
= ptep_modify_prot_start(vma
, addr
, pte
);
1070 ptent
= pte_wrprotect(old_pte
);
1071 ptent
= pte_clear_soft_dirty(ptent
);
1072 ptep_modify_prot_commit(vma
, addr
, pte
, old_pte
, ptent
);
1073 } else if (is_swap_pte(ptent
)) {
1074 ptent
= pte_swp_clear_soft_dirty(ptent
);
1075 set_pte_at(vma
->vm_mm
, addr
, pte
, ptent
);
1079 static inline void clear_soft_dirty(struct vm_area_struct
*vma
,
1080 unsigned long addr
, pte_t
*pte
)
1085 #if defined(CONFIG_MEM_SOFT_DIRTY) && defined(CONFIG_TRANSPARENT_HUGEPAGE)
1086 static inline void clear_soft_dirty_pmd(struct vm_area_struct
*vma
,
1087 unsigned long addr
, pmd_t
*pmdp
)
1089 pmd_t old
, pmd
= *pmdp
;
1091 if (pmd_present(pmd
)) {
1092 /* See comment in change_huge_pmd() */
1093 old
= pmdp_invalidate(vma
, addr
, pmdp
);
1095 pmd
= pmd_mkdirty(pmd
);
1097 pmd
= pmd_mkyoung(pmd
);
1099 pmd
= pmd_wrprotect(pmd
);
1100 pmd
= pmd_clear_soft_dirty(pmd
);
1102 set_pmd_at(vma
->vm_mm
, addr
, pmdp
, pmd
);
1103 } else if (is_migration_entry(pmd_to_swp_entry(pmd
))) {
1104 pmd
= pmd_swp_clear_soft_dirty(pmd
);
1105 set_pmd_at(vma
->vm_mm
, addr
, pmdp
, pmd
);
1109 static inline void clear_soft_dirty_pmd(struct vm_area_struct
*vma
,
1110 unsigned long addr
, pmd_t
*pmdp
)
1115 static int clear_refs_pte_range(pmd_t
*pmd
, unsigned long addr
,
1116 unsigned long end
, struct mm_walk
*walk
)
1118 struct clear_refs_private
*cp
= walk
->private;
1119 struct vm_area_struct
*vma
= walk
->vma
;
1124 ptl
= pmd_trans_huge_lock(pmd
, vma
);
1126 if (cp
->type
== CLEAR_REFS_SOFT_DIRTY
) {
1127 clear_soft_dirty_pmd(vma
, addr
, pmd
);
1131 if (!pmd_present(*pmd
))
1134 page
= pmd_page(*pmd
);
1136 /* Clear accessed and referenced bits. */
1137 pmdp_test_and_clear_young(vma
, addr
, pmd
);
1138 test_and_clear_page_young(page
);
1139 ClearPageReferenced(page
);
1145 if (pmd_trans_unstable(pmd
))
1148 pte
= pte_offset_map_lock(vma
->vm_mm
, pmd
, addr
, &ptl
);
1149 for (; addr
!= end
; pte
++, addr
+= PAGE_SIZE
) {
1152 if (cp
->type
== CLEAR_REFS_SOFT_DIRTY
) {
1153 clear_soft_dirty(vma
, addr
, pte
);
1157 if (!pte_present(ptent
))
1160 page
= vm_normal_page(vma
, addr
, ptent
);
1164 /* Clear accessed and referenced bits. */
1165 ptep_test_and_clear_young(vma
, addr
, pte
);
1166 test_and_clear_page_young(page
);
1167 ClearPageReferenced(page
);
1169 pte_unmap_unlock(pte
- 1, ptl
);
1174 static int clear_refs_test_walk(unsigned long start
, unsigned long end
,
1175 struct mm_walk
*walk
)
1177 struct clear_refs_private
*cp
= walk
->private;
1178 struct vm_area_struct
*vma
= walk
->vma
;
1180 if (vma
->vm_flags
& VM_PFNMAP
)
1184 * Writing 1 to /proc/pid/clear_refs affects all pages.
1185 * Writing 2 to /proc/pid/clear_refs only affects anonymous pages.
1186 * Writing 3 to /proc/pid/clear_refs only affects file mapped pages.
1187 * Writing 4 to /proc/pid/clear_refs affects all pages.
1189 if (cp
->type
== CLEAR_REFS_ANON
&& vma
->vm_file
)
1191 if (cp
->type
== CLEAR_REFS_MAPPED
&& !vma
->vm_file
)
1196 static const struct mm_walk_ops clear_refs_walk_ops
= {
1197 .pmd_entry
= clear_refs_pte_range
,
1198 .test_walk
= clear_refs_test_walk
,
1201 static ssize_t
clear_refs_write(struct file
*file
, const char __user
*buf
,
1202 size_t count
, loff_t
*ppos
)
1204 struct task_struct
*task
;
1205 char buffer
[PROC_NUMBUF
];
1206 struct mm_struct
*mm
;
1207 struct vm_area_struct
*vma
;
1208 enum clear_refs_types type
;
1212 memset(buffer
, 0, sizeof(buffer
));
1213 if (count
> sizeof(buffer
) - 1)
1214 count
= sizeof(buffer
) - 1;
1215 if (copy_from_user(buffer
, buf
, count
))
1217 rv
= kstrtoint(strstrip(buffer
), 10, &itype
);
1220 type
= (enum clear_refs_types
)itype
;
1221 if (type
< CLEAR_REFS_ALL
|| type
>= CLEAR_REFS_LAST
)
1224 task
= get_proc_task(file_inode(file
));
1227 mm
= get_task_mm(task
);
1229 struct mmu_notifier_range range
;
1230 struct clear_refs_private cp
= {
1234 if (mmap_write_lock_killable(mm
)) {
1238 if (type
== CLEAR_REFS_MM_HIWATER_RSS
) {
1240 * Writing 5 to /proc/pid/clear_refs resets the peak
1241 * resident set size to this mm's current rss value.
1243 reset_mm_hiwater_rss(mm
);
1247 if (type
== CLEAR_REFS_SOFT_DIRTY
) {
1248 for (vma
= mm
->mmap
; vma
; vma
= vma
->vm_next
) {
1249 if (!(vma
->vm_flags
& VM_SOFTDIRTY
))
1251 vma
->vm_flags
&= ~VM_SOFTDIRTY
;
1252 vma_set_page_prot(vma
);
1255 inc_tlb_flush_pending(mm
);
1256 mmu_notifier_range_init(&range
, MMU_NOTIFY_SOFT_DIRTY
,
1257 0, NULL
, mm
, 0, -1UL);
1258 mmu_notifier_invalidate_range_start(&range
);
1260 walk_page_range(mm
, 0, mm
->highest_vm_end
, &clear_refs_walk_ops
,
1262 if (type
== CLEAR_REFS_SOFT_DIRTY
) {
1263 mmu_notifier_invalidate_range_end(&range
);
1265 dec_tlb_flush_pending(mm
);
1268 mmap_write_unlock(mm
);
1272 put_task_struct(task
);
1277 const struct file_operations proc_clear_refs_operations
= {
1278 .write
= clear_refs_write
,
1279 .llseek
= noop_llseek
,
1286 struct pagemapread
{
1287 int pos
, len
; /* units: PM_ENTRY_BYTES, not bytes */
1288 pagemap_entry_t
*buffer
;
1292 #define PAGEMAP_WALK_SIZE (PMD_SIZE)
1293 #define PAGEMAP_WALK_MASK (PMD_MASK)
1295 #define PM_ENTRY_BYTES sizeof(pagemap_entry_t)
1296 #define PM_PFRAME_BITS 55
1297 #define PM_PFRAME_MASK GENMASK_ULL(PM_PFRAME_BITS - 1, 0)
1298 #define PM_SOFT_DIRTY BIT_ULL(55)
1299 #define PM_MMAP_EXCLUSIVE BIT_ULL(56)
1300 #define PM_UFFD_WP BIT_ULL(57)
1301 #define PM_FILE BIT_ULL(61)
1302 #define PM_SWAP BIT_ULL(62)
1303 #define PM_PRESENT BIT_ULL(63)
1305 #define PM_END_OF_BUFFER 1
1307 static inline pagemap_entry_t
make_pme(u64 frame
, u64 flags
)
1309 return (pagemap_entry_t
) { .pme
= (frame
& PM_PFRAME_MASK
) | flags
};
1312 static int add_to_pagemap(unsigned long addr
, pagemap_entry_t
*pme
,
1313 struct pagemapread
*pm
)
1315 pm
->buffer
[pm
->pos
++] = *pme
;
1316 if (pm
->pos
>= pm
->len
)
1317 return PM_END_OF_BUFFER
;
1321 static int pagemap_pte_hole(unsigned long start
, unsigned long end
,
1322 __always_unused
int depth
, struct mm_walk
*walk
)
1324 struct pagemapread
*pm
= walk
->private;
1325 unsigned long addr
= start
;
1328 while (addr
< end
) {
1329 struct vm_area_struct
*vma
= find_vma(walk
->mm
, addr
);
1330 pagemap_entry_t pme
= make_pme(0, 0);
1331 /* End of address space hole, which we mark as non-present. */
1332 unsigned long hole_end
;
1335 hole_end
= min(end
, vma
->vm_start
);
1339 for (; addr
< hole_end
; addr
+= PAGE_SIZE
) {
1340 err
= add_to_pagemap(addr
, &pme
, pm
);
1348 /* Addresses in the VMA. */
1349 if (vma
->vm_flags
& VM_SOFTDIRTY
)
1350 pme
= make_pme(0, PM_SOFT_DIRTY
);
1351 for (; addr
< min(end
, vma
->vm_end
); addr
+= PAGE_SIZE
) {
1352 err
= add_to_pagemap(addr
, &pme
, pm
);
1361 static pagemap_entry_t
pte_to_pagemap_entry(struct pagemapread
*pm
,
1362 struct vm_area_struct
*vma
, unsigned long addr
, pte_t pte
)
1364 u64 frame
= 0, flags
= 0;
1365 struct page
*page
= NULL
;
1367 if (pte_present(pte
)) {
1369 frame
= pte_pfn(pte
);
1370 flags
|= PM_PRESENT
;
1371 page
= vm_normal_page(vma
, addr
, pte
);
1372 if (pte_soft_dirty(pte
))
1373 flags
|= PM_SOFT_DIRTY
;
1374 if (pte_uffd_wp(pte
))
1375 flags
|= PM_UFFD_WP
;
1376 } else if (is_swap_pte(pte
)) {
1378 if (pte_swp_soft_dirty(pte
))
1379 flags
|= PM_SOFT_DIRTY
;
1380 if (pte_swp_uffd_wp(pte
))
1381 flags
|= PM_UFFD_WP
;
1382 entry
= pte_to_swp_entry(pte
);
1384 frame
= swp_type(entry
) |
1385 (swp_offset(entry
) << MAX_SWAPFILES_SHIFT
);
1387 if (is_pfn_swap_entry(entry
))
1388 page
= pfn_swap_entry_to_page(entry
);
1391 if (page
&& !PageAnon(page
))
1393 if (page
&& page_mapcount(page
) == 1)
1394 flags
|= PM_MMAP_EXCLUSIVE
;
1395 if (vma
->vm_flags
& VM_SOFTDIRTY
)
1396 flags
|= PM_SOFT_DIRTY
;
1398 return make_pme(frame
, flags
);
1401 static int pagemap_pmd_range(pmd_t
*pmdp
, unsigned long addr
, unsigned long end
,
1402 struct mm_walk
*walk
)
1404 struct vm_area_struct
*vma
= walk
->vma
;
1405 struct pagemapread
*pm
= walk
->private;
1407 pte_t
*pte
, *orig_pte
;
1410 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
1411 ptl
= pmd_trans_huge_lock(pmdp
, vma
);
1413 u64 flags
= 0, frame
= 0;
1415 struct page
*page
= NULL
;
1417 if (vma
->vm_flags
& VM_SOFTDIRTY
)
1418 flags
|= PM_SOFT_DIRTY
;
1420 if (pmd_present(pmd
)) {
1421 page
= pmd_page(pmd
);
1423 flags
|= PM_PRESENT
;
1424 if (pmd_soft_dirty(pmd
))
1425 flags
|= PM_SOFT_DIRTY
;
1426 if (pmd_uffd_wp(pmd
))
1427 flags
|= PM_UFFD_WP
;
1429 frame
= pmd_pfn(pmd
) +
1430 ((addr
& ~PMD_MASK
) >> PAGE_SHIFT
);
1432 #ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION
1433 else if (is_swap_pmd(pmd
)) {
1434 swp_entry_t entry
= pmd_to_swp_entry(pmd
);
1435 unsigned long offset
;
1438 offset
= swp_offset(entry
) +
1439 ((addr
& ~PMD_MASK
) >> PAGE_SHIFT
);
1440 frame
= swp_type(entry
) |
1441 (offset
<< MAX_SWAPFILES_SHIFT
);
1444 if (pmd_swp_soft_dirty(pmd
))
1445 flags
|= PM_SOFT_DIRTY
;
1446 if (pmd_swp_uffd_wp(pmd
))
1447 flags
|= PM_UFFD_WP
;
1448 VM_BUG_ON(!is_pmd_migration_entry(pmd
));
1449 page
= pfn_swap_entry_to_page(entry
);
1453 if (page
&& page_mapcount(page
) == 1)
1454 flags
|= PM_MMAP_EXCLUSIVE
;
1456 for (; addr
!= end
; addr
+= PAGE_SIZE
) {
1457 pagemap_entry_t pme
= make_pme(frame
, flags
);
1459 err
= add_to_pagemap(addr
, &pme
, pm
);
1463 if (flags
& PM_PRESENT
)
1465 else if (flags
& PM_SWAP
)
1466 frame
+= (1 << MAX_SWAPFILES_SHIFT
);
1473 if (pmd_trans_unstable(pmdp
))
1475 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
1478 * We can assume that @vma always points to a valid one and @end never
1479 * goes beyond vma->vm_end.
1481 orig_pte
= pte
= pte_offset_map_lock(walk
->mm
, pmdp
, addr
, &ptl
);
1482 for (; addr
< end
; pte
++, addr
+= PAGE_SIZE
) {
1483 pagemap_entry_t pme
;
1485 pme
= pte_to_pagemap_entry(pm
, vma
, addr
, *pte
);
1486 err
= add_to_pagemap(addr
, &pme
, pm
);
1490 pte_unmap_unlock(orig_pte
, ptl
);
1497 #ifdef CONFIG_HUGETLB_PAGE
1498 /* This function walks within one hugetlb entry in the single call */
1499 static int pagemap_hugetlb_range(pte_t
*ptep
, unsigned long hmask
,
1500 unsigned long addr
, unsigned long end
,
1501 struct mm_walk
*walk
)
1503 struct pagemapread
*pm
= walk
->private;
1504 struct vm_area_struct
*vma
= walk
->vma
;
1505 u64 flags
= 0, frame
= 0;
1509 if (vma
->vm_flags
& VM_SOFTDIRTY
)
1510 flags
|= PM_SOFT_DIRTY
;
1512 pte
= huge_ptep_get(ptep
);
1513 if (pte_present(pte
)) {
1514 struct page
*page
= pte_page(pte
);
1516 if (!PageAnon(page
))
1519 if (page_mapcount(page
) == 1)
1520 flags
|= PM_MMAP_EXCLUSIVE
;
1522 flags
|= PM_PRESENT
;
1524 frame
= pte_pfn(pte
) +
1525 ((addr
& ~hmask
) >> PAGE_SHIFT
);
1528 for (; addr
!= end
; addr
+= PAGE_SIZE
) {
1529 pagemap_entry_t pme
= make_pme(frame
, flags
);
1531 err
= add_to_pagemap(addr
, &pme
, pm
);
1534 if (pm
->show_pfn
&& (flags
& PM_PRESENT
))
1543 #define pagemap_hugetlb_range NULL
1544 #endif /* HUGETLB_PAGE */
1546 static const struct mm_walk_ops pagemap_ops
= {
1547 .pmd_entry
= pagemap_pmd_range
,
1548 .pte_hole
= pagemap_pte_hole
,
1549 .hugetlb_entry
= pagemap_hugetlb_range
,
1553 * /proc/pid/pagemap - an array mapping virtual pages to pfns
1555 * For each page in the address space, this file contains one 64-bit entry
1556 * consisting of the following:
1558 * Bits 0-54 page frame number (PFN) if present
1559 * Bits 0-4 swap type if swapped
1560 * Bits 5-54 swap offset if swapped
1561 * Bit 55 pte is soft-dirty (see Documentation/admin-guide/mm/soft-dirty.rst)
1562 * Bit 56 page exclusively mapped
1564 * Bit 61 page is file-page or shared-anon
1565 * Bit 62 page swapped
1566 * Bit 63 page present
1568 * If the page is not present but in swap, then the PFN contains an
1569 * encoding of the swap file number and the page's offset into the
1570 * swap. Unmapped pages return a null PFN. This allows determining
1571 * precisely which pages are mapped (or in swap) and comparing mapped
1572 * pages between processes.
1574 * Efficient users of this interface will use /proc/pid/maps to
1575 * determine which areas of memory are actually mapped and llseek to
1576 * skip over unmapped regions.
1578 static ssize_t
pagemap_read(struct file
*file
, char __user
*buf
,
1579 size_t count
, loff_t
*ppos
)
1581 struct mm_struct
*mm
= file
->private_data
;
1582 struct pagemapread pm
;
1584 unsigned long svpfn
;
1585 unsigned long start_vaddr
;
1586 unsigned long end_vaddr
;
1587 int ret
= 0, copied
= 0;
1589 if (!mm
|| !mmget_not_zero(mm
))
1593 /* file position must be aligned */
1594 if ((*ppos
% PM_ENTRY_BYTES
) || (count
% PM_ENTRY_BYTES
))
1601 /* do not disclose physical addresses: attack vector */
1602 pm
.show_pfn
= file_ns_capable(file
, &init_user_ns
, CAP_SYS_ADMIN
);
1604 pm
.len
= (PAGEMAP_WALK_SIZE
>> PAGE_SHIFT
);
1605 pm
.buffer
= kmalloc_array(pm
.len
, PM_ENTRY_BYTES
, GFP_KERNEL
);
1611 svpfn
= src
/ PM_ENTRY_BYTES
;
1612 end_vaddr
= mm
->task_size
;
1614 /* watch out for wraparound */
1615 start_vaddr
= end_vaddr
;
1616 if (svpfn
<= (ULONG_MAX
>> PAGE_SHIFT
))
1617 start_vaddr
= untagged_addr(svpfn
<< PAGE_SHIFT
);
1619 /* Ensure the address is inside the task */
1620 if (start_vaddr
> mm
->task_size
)
1621 start_vaddr
= end_vaddr
;
1624 * The odds are that this will stop walking way
1625 * before end_vaddr, because the length of the
1626 * user buffer is tracked in "pm", and the walk
1627 * will stop when we hit the end of the buffer.
1630 while (count
&& (start_vaddr
< end_vaddr
)) {
1635 end
= (start_vaddr
+ PAGEMAP_WALK_SIZE
) & PAGEMAP_WALK_MASK
;
1637 if (end
< start_vaddr
|| end
> end_vaddr
)
1639 ret
= mmap_read_lock_killable(mm
);
1642 ret
= walk_page_range(mm
, start_vaddr
, end
, &pagemap_ops
, &pm
);
1643 mmap_read_unlock(mm
);
1646 len
= min(count
, PM_ENTRY_BYTES
* pm
.pos
);
1647 if (copy_to_user(buf
, pm
.buffer
, len
)) {
1656 if (!ret
|| ret
== PM_END_OF_BUFFER
)
1667 static int pagemap_open(struct inode
*inode
, struct file
*file
)
1669 struct mm_struct
*mm
;
1671 mm
= proc_mem_open(inode
, PTRACE_MODE_READ
);
1674 file
->private_data
= mm
;
1678 static int pagemap_release(struct inode
*inode
, struct file
*file
)
1680 struct mm_struct
*mm
= file
->private_data
;
1687 const struct file_operations proc_pagemap_operations
= {
1688 .llseek
= mem_lseek
, /* borrow this */
1689 .read
= pagemap_read
,
1690 .open
= pagemap_open
,
1691 .release
= pagemap_release
,
1693 #endif /* CONFIG_PROC_PAGE_MONITOR */
1698 unsigned long pages
;
1700 unsigned long active
;
1701 unsigned long writeback
;
1702 unsigned long mapcount_max
;
1703 unsigned long dirty
;
1704 unsigned long swapcache
;
1705 unsigned long node
[MAX_NUMNODES
];
1708 struct numa_maps_private
{
1709 struct proc_maps_private proc_maps
;
1710 struct numa_maps md
;
1713 static void gather_stats(struct page
*page
, struct numa_maps
*md
, int pte_dirty
,
1714 unsigned long nr_pages
)
1716 int count
= page_mapcount(page
);
1718 md
->pages
+= nr_pages
;
1719 if (pte_dirty
|| PageDirty(page
))
1720 md
->dirty
+= nr_pages
;
1722 if (PageSwapCache(page
))
1723 md
->swapcache
+= nr_pages
;
1725 if (PageActive(page
) || PageUnevictable(page
))
1726 md
->active
+= nr_pages
;
1728 if (PageWriteback(page
))
1729 md
->writeback
+= nr_pages
;
1732 md
->anon
+= nr_pages
;
1734 if (count
> md
->mapcount_max
)
1735 md
->mapcount_max
= count
;
1737 md
->node
[page_to_nid(page
)] += nr_pages
;
1740 static struct page
*can_gather_numa_stats(pte_t pte
, struct vm_area_struct
*vma
,
1746 if (!pte_present(pte
))
1749 page
= vm_normal_page(vma
, addr
, pte
);
1753 if (PageReserved(page
))
1756 nid
= page_to_nid(page
);
1757 if (!node_isset(nid
, node_states
[N_MEMORY
]))
1763 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
1764 static struct page
*can_gather_numa_stats_pmd(pmd_t pmd
,
1765 struct vm_area_struct
*vma
,
1771 if (!pmd_present(pmd
))
1774 page
= vm_normal_page_pmd(vma
, addr
, pmd
);
1778 if (PageReserved(page
))
1781 nid
= page_to_nid(page
);
1782 if (!node_isset(nid
, node_states
[N_MEMORY
]))
1789 static int gather_pte_stats(pmd_t
*pmd
, unsigned long addr
,
1790 unsigned long end
, struct mm_walk
*walk
)
1792 struct numa_maps
*md
= walk
->private;
1793 struct vm_area_struct
*vma
= walk
->vma
;
1798 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
1799 ptl
= pmd_trans_huge_lock(pmd
, vma
);
1803 page
= can_gather_numa_stats_pmd(*pmd
, vma
, addr
);
1805 gather_stats(page
, md
, pmd_dirty(*pmd
),
1806 HPAGE_PMD_SIZE
/PAGE_SIZE
);
1811 if (pmd_trans_unstable(pmd
))
1814 orig_pte
= pte
= pte_offset_map_lock(walk
->mm
, pmd
, addr
, &ptl
);
1816 struct page
*page
= can_gather_numa_stats(*pte
, vma
, addr
);
1819 gather_stats(page
, md
, pte_dirty(*pte
), 1);
1821 } while (pte
++, addr
+= PAGE_SIZE
, addr
!= end
);
1822 pte_unmap_unlock(orig_pte
, ptl
);
1826 #ifdef CONFIG_HUGETLB_PAGE
1827 static int gather_hugetlb_stats(pte_t
*pte
, unsigned long hmask
,
1828 unsigned long addr
, unsigned long end
, struct mm_walk
*walk
)
1830 pte_t huge_pte
= huge_ptep_get(pte
);
1831 struct numa_maps
*md
;
1834 if (!pte_present(huge_pte
))
1837 page
= pte_page(huge_pte
);
1842 gather_stats(page
, md
, pte_dirty(huge_pte
), 1);
1847 static int gather_hugetlb_stats(pte_t
*pte
, unsigned long hmask
,
1848 unsigned long addr
, unsigned long end
, struct mm_walk
*walk
)
1854 static const struct mm_walk_ops show_numa_ops
= {
1855 .hugetlb_entry
= gather_hugetlb_stats
,
1856 .pmd_entry
= gather_pte_stats
,
1860 * Display pages allocated per node and memory policy via /proc.
1862 static int show_numa_map(struct seq_file
*m
, void *v
)
1864 struct numa_maps_private
*numa_priv
= m
->private;
1865 struct proc_maps_private
*proc_priv
= &numa_priv
->proc_maps
;
1866 struct vm_area_struct
*vma
= v
;
1867 struct numa_maps
*md
= &numa_priv
->md
;
1868 struct file
*file
= vma
->vm_file
;
1869 struct mm_struct
*mm
= vma
->vm_mm
;
1870 struct mempolicy
*pol
;
1877 /* Ensure we start with an empty set of numa_maps statistics. */
1878 memset(md
, 0, sizeof(*md
));
1880 pol
= __get_vma_policy(vma
, vma
->vm_start
);
1882 mpol_to_str(buffer
, sizeof(buffer
), pol
);
1885 mpol_to_str(buffer
, sizeof(buffer
), proc_priv
->task_mempolicy
);
1888 seq_printf(m
, "%08lx %s", vma
->vm_start
, buffer
);
1891 seq_puts(m
, " file=");
1892 seq_file_path(m
, file
, "\n\t= ");
1893 } else if (vma
->vm_start
<= mm
->brk
&& vma
->vm_end
>= mm
->start_brk
) {
1894 seq_puts(m
, " heap");
1895 } else if (is_stack(vma
)) {
1896 seq_puts(m
, " stack");
1899 if (is_vm_hugetlb_page(vma
))
1900 seq_puts(m
, " huge");
1902 /* mmap_lock is held by m_start */
1903 walk_page_vma(vma
, &show_numa_ops
, md
);
1909 seq_printf(m
, " anon=%lu", md
->anon
);
1912 seq_printf(m
, " dirty=%lu", md
->dirty
);
1914 if (md
->pages
!= md
->anon
&& md
->pages
!= md
->dirty
)
1915 seq_printf(m
, " mapped=%lu", md
->pages
);
1917 if (md
->mapcount_max
> 1)
1918 seq_printf(m
, " mapmax=%lu", md
->mapcount_max
);
1921 seq_printf(m
, " swapcache=%lu", md
->swapcache
);
1923 if (md
->active
< md
->pages
&& !is_vm_hugetlb_page(vma
))
1924 seq_printf(m
, " active=%lu", md
->active
);
1927 seq_printf(m
, " writeback=%lu", md
->writeback
);
1929 for_each_node_state(nid
, N_MEMORY
)
1931 seq_printf(m
, " N%d=%lu", nid
, md
->node
[nid
]);
1933 seq_printf(m
, " kernelpagesize_kB=%lu", vma_kernel_pagesize(vma
) >> 10);
1939 static const struct seq_operations proc_pid_numa_maps_op
= {
1943 .show
= show_numa_map
,
1946 static int pid_numa_maps_open(struct inode
*inode
, struct file
*file
)
1948 return proc_maps_open(inode
, file
, &proc_pid_numa_maps_op
,
1949 sizeof(struct numa_maps_private
));
1952 const struct file_operations proc_pid_numa_maps_operations
= {
1953 .open
= pid_numa_maps_open
,
1955 .llseek
= seq_lseek
,
1956 .release
= proc_map_release
,
1959 #endif /* CONFIG_NUMA */