return 0;
}
-/* NUMA hinting page fault entry point for regular pmds */
-#ifdef CONFIG_NUMA_BALANCING
-static int do_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
- unsigned long addr, pmd_t *pmdp)
-{
- pmd_t pmd;
- pte_t *pte, *orig_pte;
- unsigned long _addr = addr & PMD_MASK;
- unsigned long offset;
- spinlock_t *ptl;
- bool numa = false;
- int last_cpupid;
-
- spin_lock(&mm->page_table_lock);
- pmd = *pmdp;
- if (pmd_numa(pmd)) {
- set_pmd_at(mm, _addr, pmdp, pmd_mknonnuma(pmd));
- numa = true;
- }
- spin_unlock(&mm->page_table_lock);
-
- if (!numa)
- return 0;
-
- /* we're in a page fault so some vma must be in the range */
- BUG_ON(!vma);
- BUG_ON(vma->vm_start >= _addr + PMD_SIZE);
- offset = max(_addr, vma->vm_start) & ~PMD_MASK;
- VM_BUG_ON(offset >= PMD_SIZE);
- orig_pte = pte = pte_offset_map_lock(mm, pmdp, _addr, &ptl);
- pte += offset >> PAGE_SHIFT;
- for (addr = _addr + offset; addr < _addr + PMD_SIZE; pte++, addr += PAGE_SIZE) {
- pte_t pteval = *pte;
- struct page *page;
- int page_nid = -1;
- int target_nid;
- bool migrated = false;
- int flags = 0;
-
- if (!pte_present(pteval))
- continue;
- if (!pte_numa(pteval))
- continue;
- if (addr >= vma->vm_end) {
- vma = find_vma(mm, addr);
- /* there's a pte present so there must be a vma */
- BUG_ON(!vma);
- BUG_ON(addr < vma->vm_start);
- }
- if (pte_numa(pteval)) {
- pteval = pte_mknonnuma(pteval);
- set_pte_at(mm, addr, pte, pteval);
- }
- page = vm_normal_page(vma, addr, pteval);
- if (unlikely(!page))
- continue;
-
- /*
- * Avoid grouping on DSO/COW pages in specific and RO pages
- * in general, RO pages shouldn't hurt as much anyway since
- * they can be in shared cache state.
- */
- if (!pte_write(pteval))
- flags |= TNF_NO_GROUP;
-
- last_cpupid = page_cpupid_last(page);
- page_nid = page_to_nid(page);
- target_nid = numa_migrate_prep(page, vma, addr, page_nid);
- pte_unmap_unlock(pte, ptl);
- if (target_nid != -1) {
- migrated = migrate_misplaced_page(page, vma, target_nid);
- if (migrated) {
- page_nid = target_nid;
- flags |= TNF_MIGRATED;
- }
- } else {
- put_page(page);
- }
-
- if (page_nid != -1)
- task_numa_fault(last_cpupid, page_nid, 1, flags);
-
- pte = pte_offset_map_lock(mm, pmdp, addr, &ptl);
- }
- pte_unmap_unlock(orig_pte, ptl);
-
- return 0;
-}
-#else
-static int do_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
- unsigned long addr, pmd_t *pmdp)
-{
- BUG();
- return 0;
-}
-#endif /* CONFIG_NUMA_BALANCING */
-
/*
* These routines also need to handle stuff like marking pages dirty
* and/or accessed for architectures that don't do it in hardware (most
}
}
- if (pmd_numa(*pmd))
- return do_pmd_numa_page(mm, vma, address, pmd);
+ /* THP should already have been handled */
+ BUG_ON(pmd_numa(*pmd));
/*
* Use __pte_alloc instead of pte_alloc_map, because we can't
static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
unsigned long addr, unsigned long end, pgprot_t newprot,
- int dirty_accountable, int prot_numa, bool *ret_all_same_cpupid)
+ int dirty_accountable, int prot_numa)
{
struct mm_struct *mm = vma->vm_mm;
pte_t *pte, oldpte;
spinlock_t *ptl;
unsigned long pages = 0;
- bool all_same_cpupid = true;
- int last_cpu = -1;
- int last_pid = -1;
pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
arch_enter_lazy_mmu_mode();
page = vm_normal_page(vma, addr, oldpte);
if (page) {
- int cpupid = page_cpupid_last(page);
- int this_cpu = cpupid_to_cpu(cpupid);
- int this_pid = cpupid_to_pid(cpupid);
-
- if (last_cpu == -1)
- last_cpu = this_cpu;
- if (last_pid == -1)
- last_pid = this_pid;
- if (last_cpu != this_cpu ||
- last_pid != this_pid) {
- all_same_cpupid = false;
- }
-
if (!pte_numa(oldpte)) {
ptent = pte_mknuma(ptent);
updated = true;
arch_leave_lazy_mmu_mode();
pte_unmap_unlock(pte - 1, ptl);
- *ret_all_same_cpupid = all_same_cpupid;
return pages;
}
-#ifdef CONFIG_NUMA_BALANCING
-static inline void change_pmd_protnuma(struct mm_struct *mm, unsigned long addr,
- pmd_t *pmd)
-{
- spin_lock(&mm->page_table_lock);
- set_pmd_at(mm, addr & PMD_MASK, pmd, pmd_mknuma(*pmd));
- spin_unlock(&mm->page_table_lock);
-}
-#else
-static inline void change_pmd_protnuma(struct mm_struct *mm, unsigned long addr,
- pmd_t *pmd)
-{
- BUG();
-}
-#endif /* CONFIG_NUMA_BALANCING */
-
static inline unsigned long change_pmd_range(struct vm_area_struct *vma,
pud_t *pud, unsigned long addr, unsigned long end,
pgprot_t newprot, int dirty_accountable, int prot_numa)
pmd_t *pmd;
unsigned long next;
unsigned long pages = 0;
- bool all_same_cpupid;
pmd = pmd_offset(pud, addr);
do {
if (pmd_none_or_clear_bad(pmd))
continue;
this_pages = change_pte_range(vma, pmd, addr, next, newprot,
- dirty_accountable, prot_numa, &all_same_cpupid);
+ dirty_accountable, prot_numa);
pages += this_pages;
-
- /*
- * If we are changing protections for NUMA hinting faults then
- * set pmd_numa if the examined pages were all on the same
- * node. This allows a regular PMD to be handled as one fault
- * and effectively batches the taking of the PTL
- */
- if (prot_numa && this_pages && all_same_cpupid)
- change_pmd_protnuma(vma->vm_mm, addr, pmd);
} while (pmd++, addr = next, addr != end);
return pages;