#include <asm/pgalloc.h>
#include <asm/tlb.h>
-#ifdef HAVE_GENERIC_MMU_GATHER
+#ifndef CONFIG_HAVE_MMU_GATHER_NO_GATHER
static bool tlb_next_batch(struct mmu_gather *tlb)
{
return true;
}
-void arch_tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm,
- unsigned long start, unsigned long end)
-{
- tlb->mm = mm;
-
- /* Is it from 0 to ~0? */
- tlb->fullmm = !(start | (end+1));
- tlb->need_flush_all = 0;
- tlb->local.next = NULL;
- tlb->local.nr = 0;
- tlb->local.max = ARRAY_SIZE(tlb->__pages);
- tlb->active = &tlb->local;
- tlb->batch_count = 0;
-
-#ifdef CONFIG_HAVE_RCU_TABLE_FREE
- tlb->batch = NULL;
-#endif
- tlb->page_size = 0;
-
- __tlb_reset_range(tlb);
-}
-
-void tlb_flush_mmu_free(struct mmu_gather *tlb)
+static void tlb_batch_pages_flush(struct mmu_gather *tlb)
{
struct mmu_gather_batch *batch;
-#ifdef CONFIG_HAVE_RCU_TABLE_FREE
- tlb_table_flush(tlb);
-#endif
for (batch = &tlb->local; batch && batch->nr; batch = batch->next) {
free_pages_and_swap_cache(batch->pages, batch->nr);
batch->nr = 0;
tlb->active = &tlb->local;
}
-void tlb_flush_mmu(struct mmu_gather *tlb)
-{
- tlb_flush_mmu_tlbonly(tlb);
- tlb_flush_mmu_free(tlb);
-}
-
-/* tlb_finish_mmu
- * Called at the end of the shootdown operation to free up any resources
- * that were required.
- */
-void arch_tlb_finish_mmu(struct mmu_gather *tlb,
- unsigned long start, unsigned long end, bool force)
+static void tlb_batch_list_free(struct mmu_gather *tlb)
{
struct mmu_gather_batch *batch, *next;
- if (force) {
- __tlb_reset_range(tlb);
- __tlb_adjust_range(tlb, start, end - start);
- }
-
- tlb_flush_mmu(tlb);
-
- /* keep the page table cache within bounds */
- check_pgt_cache();
-
for (batch = tlb->local.next; batch; batch = next) {
next = batch->next;
free_pages((unsigned long)batch, 0);
tlb->local.next = NULL;
}
-/* __tlb_remove_page
- * Must perform the equivalent to __free_pte(pte_get_and_clear(ptep)), while
- * handling the additional races in SMP caused by other CPUs caching valid
- * mappings in their TLBs. Returns the number of free page slots left.
- * When out of page slots we must call tlb_flush_mmu().
- *returns true if the caller should flush.
- */
bool __tlb_remove_page_size(struct mmu_gather *tlb, struct page *page, int page_size)
{
struct mmu_gather_batch *batch;
VM_BUG_ON(!tlb->end);
+
+#ifdef CONFIG_HAVE_MMU_GATHER_PAGE_SIZE
VM_WARN_ON(tlb->page_size != page_size);
+#endif
batch = tlb->active;
/*
return false;
}
-#endif /* HAVE_GENERIC_MMU_GATHER */
+#endif /* HAVE_MMU_GATHER_NO_GATHER */
#ifdef CONFIG_HAVE_RCU_TABLE_FREE
*/
static inline void tlb_table_invalidate(struct mmu_gather *tlb)
{
-#ifdef CONFIG_HAVE_RCU_TABLE_INVALIDATE
+#ifndef CONFIG_HAVE_RCU_TABLE_NO_INVALIDATE
/*
* Invalidate page-table caches used by hardware walkers. Then we still
* need to RCU-sched wait while freeing the pages because software
free_page((unsigned long)batch);
}
-void tlb_table_flush(struct mmu_gather *tlb)
+static void tlb_table_flush(struct mmu_gather *tlb)
{
struct mmu_table_batch **batch = &tlb->batch;
#endif /* CONFIG_HAVE_RCU_TABLE_FREE */
+static void tlb_flush_mmu_free(struct mmu_gather *tlb)
+{
+#ifdef CONFIG_HAVE_RCU_TABLE_FREE
+ tlb_table_flush(tlb);
+#endif
+#ifndef CONFIG_HAVE_MMU_GATHER_NO_GATHER
+ tlb_batch_pages_flush(tlb);
+#endif
+}
+
+void tlb_flush_mmu(struct mmu_gather *tlb)
+{
+ tlb_flush_mmu_tlbonly(tlb);
+ tlb_flush_mmu_free(tlb);
+}
+
/**
* tlb_gather_mmu - initialize an mmu_gather structure for page-table tear-down
* @tlb: the mmu_gather structure to initialize
void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm,
unsigned long start, unsigned long end)
{
- arch_tlb_gather_mmu(tlb, mm, start, end);
+ tlb->mm = mm;
+
+ /* Is it from 0 to ~0? */
+ tlb->fullmm = !(start | (end+1));
+
+#ifndef CONFIG_HAVE_MMU_GATHER_NO_GATHER
+ tlb->need_flush_all = 0;
+ tlb->local.next = NULL;
+ tlb->local.nr = 0;
+ tlb->local.max = ARRAY_SIZE(tlb->__pages);
+ tlb->active = &tlb->local;
+ tlb->batch_count = 0;
+#endif
+
+#ifdef CONFIG_HAVE_RCU_TABLE_FREE
+ tlb->batch = NULL;
+#endif
+#ifdef CONFIG_HAVE_MMU_GATHER_PAGE_SIZE
+ tlb->page_size = 0;
+#endif
+
+ __tlb_reset_range(tlb);
inc_tlb_flush_pending(tlb->mm);
}
+/**
+ * tlb_finish_mmu - finish an mmu_gather structure
+ * @tlb: the mmu_gather structure to finish
+ * @start: start of the region that will be removed from the page-table
+ * @end: end of the region that will be removed from the page-table
+ *
+ * Called at the end of the shootdown operation to free up any resources that
+ * were required.
+ */
void tlb_finish_mmu(struct mmu_gather *tlb,
unsigned long start, unsigned long end)
{
/*
* If there are parallel threads are doing PTE changes on same range
- * under non-exclusive lock(e.g., mmap_sem read-side) but defer TLB
- * flush by batching, a thread has stable TLB entry can fail to flush
- * the TLB by observing pte_none|!pte_dirty, for example so flush TLB
- * forcefully if we detect parallel PTE batching threads.
+ * under non-exclusive lock (e.g., mmap_sem read-side) but defer TLB
+ * flush by batching, one thread may end up seeing inconsistent PTEs
+ * and result in having stale TLB entries. So flush TLB forcefully
+ * if we detect parallel PTE batching threads.
+ *
+ * However, some syscalls, e.g. munmap(), may free page tables, this
+ * needs force flush everything in the given range. Otherwise this
+ * may result in having stale TLB entries for some architectures,
+ * e.g. aarch64, that could specify flush what level TLB.
*/
- bool force = mm_tlb_flush_nested(tlb->mm);
+ if (mm_tlb_flush_nested(tlb->mm)) {
+ /*
+ * The aarch64 yields better performance with fullmm by
+ * avoiding multiple CPUs spamming TLBI messages at the
+ * same time.
+ *
+ * On x86 non-fullmm doesn't yield significant difference
+ * against fullmm.
+ */
+ tlb->fullmm = 1;
+ __tlb_reset_range(tlb);
+ tlb->freed_tables = 1;
+ }
- arch_tlb_finish_mmu(tlb, start, end, force);
+ tlb_flush_mmu(tlb);
+
+ /* keep the page table cache within bounds */
+ check_pgt_cache();
+#ifndef CONFIG_HAVE_MMU_GATHER_NO_GATHER
+ tlb_batch_list_free(tlb);
+#endif
dec_tlb_flush_pending(tlb->mm);
}