mm: introduce memalloc_noreclaim_{save,restore}

[mirror_ubuntu-jammy-kernel.git] / mm / page_alloc.c
diff --git a/mm/page_alloc.c b/mm/page_alloc.c

index 2c25de46c58fd68aa8e9f3657998e40136d329c7..f9e450c6b6e414d61b00d5a61be9cdea3b773e1b 100644 (file)
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1832,9 +1832,9 @@ static inline struct page *__rmqueue_cma_fallback(struct zone *zone,
   * Note that start_page and end_pages are not aligned on a pageblock
   * boundary. If alignment is required, use move_freepages_block()
   */
-int move_freepages(struct zone *zone,
+static int move_freepages(struct zone *zone,
                           struct page *start_page, struct page *end_page,
-                         int migratetype)
+                         int migratetype, int *num_movable)
  {
         struct page *page;
         unsigned int order;
@@ -1851,6 +1851,9 @@ int move_freepages(struct zone *zone,
         VM_BUG_ON(page_zone(start_page) != page_zone(end_page));
  #endif
  
+       if (num_movable)
+               *num_movable = 0;
+
         for (page = start_page; page <= end_page;) {
                 if (!pfn_valid_within(page_to_pfn(page))) {
                         page++;
@@ -1861,6 +1864,15 @@ int move_freepages(struct zone *zone,
                 VM_BUG_ON_PAGE(page_to_nid(page) != zone_to_nid(zone), page);
  
                 if (!PageBuddy(page)) {
+                       /*
+                        * We assume that pages that could be isolated for
+                        * migration are movable. But we don't actually try
+                        * isolating, as that would be expensive.
+                        */
+                       if (num_movable &&
+                                       (PageLRU(page) || __PageMovable(page)))
+                               (*num_movable)++;
+
                         page++;
                         continue;
                 }
@@ -1876,7 +1888,7 @@ int move_freepages(struct zone *zone,
  }
  
  int move_freepages_block(struct zone *zone, struct page *page,
-                               int migratetype)
+                               int migratetype, int *num_movable)
  {
         unsigned long start_pfn, end_pfn;
         struct page *start_page, *end_page;
@@ -1893,7 +1905,8 @@ int move_freepages_block(struct zone *zone, struct page *page,
         if (!zone_spans_pfn(zone, end_pfn))
                 return 0;
  
-       return move_freepages(zone, start_page, end_page, migratetype);
+       return move_freepages(zone, start_page, end_page, migratetype,
+                                                               num_movable);
  }
  
  static void change_pageblock_range(struct page *pageblock_page,
@@ -1943,28 +1956,79 @@ static bool can_steal_fallback(unsigned int order, int start_mt)
  /*
   * This function implements actual steal behaviour. If order is large enough,
   * we can steal whole pageblock. If not, we first move freepages in this
- * pageblock and check whether half of pages are moved or not. If half of
- * pages are moved, we can change migratetype of pageblock and permanently
- * use it's pages as requested migratetype in the future.
+ * pageblock to our migratetype and determine how many already-allocated pages
+ * are there in the pageblock with a compatible migratetype. If at least half
+ * of pages are free or compatible, we can change migratetype of the pageblock
+ * itself, so pages freed in the future will be put on the correct free list.
   */
  static void steal_suitable_fallback(struct zone *zone, struct page *page,
-                                                         int start_type)
+                                       int start_type, bool whole_block)
  {
         unsigned int current_order = page_order(page);
-       int pages;
+       struct free_area *area;
+       int free_pages, movable_pages, alike_pages;
+       int old_block_type;
+
+       old_block_type = get_pageblock_migratetype(page);
+
+       /*
+        * This can happen due to races and we want to prevent broken
+        * highatomic accounting.
+        */
+       if (is_migrate_highatomic(old_block_type))
+               goto single_page;
  
         /* Take ownership for orders >= pageblock_order */
         if (current_order >= pageblock_order) {
                 change_pageblock_range(page, current_order, start_type);
-               return;
+               goto single_page;
+       }
+
+       /* We are not allowed to try stealing from the whole block */
+       if (!whole_block)
+               goto single_page;
+
+       free_pages = move_freepages_block(zone, page, start_type,
+                                               &movable_pages);
+       /*
+        * Determine how many pages are compatible with our allocation.
+        * For movable allocation, it's the number of movable pages which
+        * we just obtained. For other types it's a bit more tricky.
+        */
+       if (start_type == MIGRATE_MOVABLE) {
+               alike_pages = movable_pages;
+       } else {
+               /*
+                * If we are falling back a RECLAIMABLE or UNMOVABLE allocation
+                * to MOVABLE pageblock, consider all non-movable pages as
+                * compatible. If it's UNMOVABLE falling back to RECLAIMABLE or
+                * vice versa, be conservative since we can't distinguish the
+                * exact migratetype of non-movable pages.
+                */
+               if (old_block_type == MIGRATE_MOVABLE)
+                       alike_pages = pageblock_nr_pages
+                                               - (free_pages + movable_pages);
+               else
+                       alike_pages = 0;
         }
  
-       pages = move_freepages_block(zone, page, start_type);
+       /* moving whole block can fail due to zone boundary conditions */
+       if (!free_pages)
+               goto single_page;
  
-       /* Claim the whole block if over half of it is free */
-       if (pages >= (1 << (pageblock_order-1)) ||
+       /*
+        * If a sufficient number of pages in the block are either free or of
+        * comparable migratability as our allocation, claim the whole block.
+        */
+       if (free_pages + alike_pages >= (1 << (pageblock_order-1)) ||
                         page_group_by_mobility_disabled)
                 set_pageblock_migratetype(page, start_type);
+
+       return;
+
+single_page:
+       area = &zone->free_area[current_order];
+       list_move(&page->lru, &area->free_list[start_type]);
  }
  
  /*
@@ -2034,7 +2098,7 @@ static void reserve_highatomic_pageblock(struct page *page, struct zone *zone,
             && !is_migrate_cma(mt)) {
                 zone->nr_reserved_highatomic += pageblock_nr_pages;
                 set_pageblock_migratetype(page, MIGRATE_HIGHATOMIC);
-               move_freepages_block(zone, page, MIGRATE_HIGHATOMIC);
+               move_freepages_block(zone, page, MIGRATE_HIGHATOMIC, NULL);
         }
  
  out_unlock:
@@ -2111,7 +2175,8 @@ static bool unreserve_highatomic_pageblock(const struct alloc_context *ac,
                          * may increase.
                          */
                         set_pageblock_migratetype(page, ac->migratetype);
-                       ret = move_freepages_block(zone, page, ac->migratetype);
+                       ret = move_freepages_block(zone, page, ac->migratetype,
+                                                                       NULL);
                         if (ret) {
                                 spin_unlock_irqrestore(&zone->lock, flags);
                                 return ret;
@@ -2123,8 +2188,13 @@ static bool unreserve_highatomic_pageblock(const struct alloc_context *ac,
         return false;
  }
  
-/* Remove an element from the buddy allocator from the fallback list */
-static inline struct page *
+/*
+ * Try finding a free buddy page on the fallback list and put it on the free
+ * list of requested migratetype, possibly along with other pages from the same
+ * block, depending on fragmentation avoidance heuristics. Returns true if
+ * fallback was found so that __rmqueue_smallest() can grab it.
+ */
+static inline bool
  __rmqueue_fallback(struct zone *zone, unsigned int order, int start_migratetype)
  {
         struct free_area *area;
@@ -2145,32 +2215,17 @@ __rmqueue_fallback(struct zone *zone, unsigned int order, int start_migratetype)
  
                 page = list_first_entry(&area->free_list[fallback_mt],
                                                 struct page, lru);
-               if (can_steal && !is_migrate_highatomic_page(page))
-                       steal_suitable_fallback(zone, page, start_migratetype);
-
-               /* Remove the page from the freelists */
-               area->nr_free--;
-               list_del(&page->lru);
-               rmv_page_order(page);
  
-               expand(zone, page, order, current_order, area,
-                                       start_migratetype);
-               /*
-                * The pcppage_migratetype may differ from pageblock's
-                * migratetype depending on the decisions in
-                * find_suitable_fallback(). This is OK as long as it does not
-                * differ for MIGRATE_CMA pageblocks. Those can be used as
-                * fallback only via special __rmqueue_cma_fallback() function
-                */
-               set_pcppage_migratetype(page, start_migratetype);
+               steal_suitable_fallback(zone, page, start_migratetype,
+                                                               can_steal);
  
                 trace_mm_page_alloc_extfrag(page, order, current_order,
                         start_migratetype, fallback_mt);
  
-               return page;
+               return true;
         }
  
-       return NULL;
+       return false;
  }
  
  /*
@@ -2182,13 +2237,14 @@ static struct page *__rmqueue(struct zone *zone, unsigned int order,
  {
         struct page *page;
  
+retry:
         page = __rmqueue_smallest(zone, order, migratetype);
         if (unlikely(!page)) {
                 if (migratetype == MIGRATE_MOVABLE)
                         page = __rmqueue_cma_fallback(zone, order);
  
-               if (!page)
-                       page = __rmqueue_fallback(zone, order, migratetype);
+               if (!page && __rmqueue_fallback(zone, order, migratetype))
+                       goto retry;
         }
  
         trace_mm_page_alloc_zone_locked(page, order, migratetype);
@@ -3227,14 +3283,15 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
                 enum compact_priority prio, enum compact_result *compact_result)
  {
         struct page *page;
+       unsigned int noreclaim_flag;
  
         if (!order)
                 return NULL;
  
-       current->flags |= PF_MEMALLOC;
+       noreclaim_flag = memalloc_noreclaim_save();
         *compact_result = try_to_compact_pages(gfp_mask, order, alloc_flags, ac,
                                                                         prio);
-       current->flags &= ~PF_MEMALLOC;
+       memalloc_noreclaim_restore(noreclaim_flag);
  
         if (*compact_result <= COMPACT_INACTIVE)
                 return NULL;
@@ -3381,12 +3438,13 @@ __perform_reclaim(gfp_t gfp_mask, unsigned int order,
  {
         struct reclaim_state reclaim_state;
         int progress;
+       unsigned int noreclaim_flag;
  
         cond_resched();
  
         /* We now go into synchronous reclaim */
         cpuset_memory_pressure_bump();
-       current->flags |= PF_MEMALLOC;
+       noreclaim_flag = memalloc_noreclaim_save();
         lockdep_set_current_reclaim_state(gfp_mask);
         reclaim_state.reclaimed_slab = 0;
         current->reclaim_state = &reclaim_state;
@@ -3396,7 +3454,7 @@ __perform_reclaim(gfp_t gfp_mask, unsigned int order,
  
         current->reclaim_state = NULL;
         lockdep_clear_current_reclaim_state();
-       current->flags &= ~PF_MEMALLOC;
+       memalloc_noreclaim_restore(noreclaim_flag);
  
         cond_resched();
  
@@ -3609,6 +3667,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
                                                 struct alloc_context *ac)
  {
         bool can_direct_reclaim = gfp_mask & __GFP_DIRECT_RECLAIM;
+       const bool costly_order = order > PAGE_ALLOC_COSTLY_ORDER;
         struct page *page = NULL;
         unsigned int alloc_flags;
         unsigned long did_some_progress;
@@ -3676,12 +3735,17 @@ retry_cpuset:
  
         /*
          * For costly allocations, try direct compaction first, as it's likely
-        * that we have enough base pages and don't need to reclaim. Don't try
-        * that for allocations that are allowed to ignore watermarks, as the
-        * ALLOC_NO_WATERMARKS attempt didn't yet happen.
+        * that we have enough base pages and don't need to reclaim. For non-
+        * movable high-order allocations, do that as well, as compaction will
+        * try prevent permanent fragmentation by migrating from blocks of the
+        * same migratetype.
+        * Don't try this for allocations that are allowed to ignore
+        * watermarks, as the ALLOC_NO_WATERMARKS attempt didn't yet happen.
          */
-       if (can_direct_reclaim && order > PAGE_ALLOC_COSTLY_ORDER &&
-               !gfp_pfmemalloc_allowed(gfp_mask)) {
+       if (can_direct_reclaim &&
+                       (costly_order ||
+                          (order > 0 && ac->migratetype != MIGRATE_MOVABLE))
+                       && !gfp_pfmemalloc_allowed(gfp_mask)) {
                 page = __alloc_pages_direct_compact(gfp_mask, order,
                                                 alloc_flags, ac,
                                                 INIT_COMPACT_PRIORITY,
@@ -3693,7 +3757,7 @@ retry_cpuset:
                  * Checks for costly allocations with __GFP_NORETRY, which
                  * includes THP page fault allocations
                  */
-               if (gfp_mask & __GFP_NORETRY) {
+               if (costly_order && (gfp_mask & __GFP_NORETRY)) {
                         /*
                          * If compaction is deferred for high-order allocations,
                          * it is because sync compaction recently failed. If
@@ -3774,7 +3838,7 @@ retry:
          * Do not retry costly high order allocations unless they are
          * __GFP_REPEAT
          */
-       if (order > PAGE_ALLOC_COSTLY_ORDER && !(gfp_mask & __GFP_REPEAT))
+       if (costly_order && !(gfp_mask & __GFP_REPEAT))
                 goto nopage;
  
         if (should_reclaim_retry(gfp_mask, order, ac, alloc_flags,