mm: vmscan: flatten kswapd priority loop

author Mel Gorman <mgorman@suse.de>

Wed, 3 Jul 2013 22:01:45 +0000 (15:01 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Wed, 3 Jul 2013 23:07:28 +0000 (16:07 -0700)
author Mel Gorman <mgorman@suse.de>
Wed, 3 Jul 2013 22:01:45 +0000 (15:01 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Wed, 3 Jul 2013 23:07:28 +0000 (16:07 -0700)
diff --git a/mm/vmscan.c b/mm/vmscan.c

index 26ad67f1962cd79bc1d80d9e44156cb2a6e66e09..1c10ee51221537b003d281d2e73b311136c43b61 100644 (file)
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2654,8 +2654,12 @@ static bool prepare_kswapd_sleep(pg_data_t *pgdat, int order, long remaining,
  /*
   * kswapd shrinks the zone by the number of pages required to reach
   * the high watermark.
+ *
+ * Returns true if kswapd scanned at least the requested number of pages to
+ * reclaim. This is used to determine if the scanning priority needs to be
+ * raised.
   */
-static void kswapd_shrink_zone(struct zone *zone,
+static bool kswapd_shrink_zone(struct zone *zone,
                                struct scan_control *sc,
                                unsigned long lru_pages)
  {
@@ -2675,6 +2679,8 @@ static void kswapd_shrink_zone(struct zone *zone,
  
         if (nr_slab == 0 && !zone_reclaimable(zone))
                 zone->all_unreclaimable = 1;
+
+       return sc->nr_scanned >= sc->nr_to_reclaim;
  }
  
  /*
@@ -2701,26 +2707,26 @@ static void kswapd_shrink_zone(struct zone *zone,
  static unsigned long balance_pgdat(pg_data_t *pgdat, int order,
                                                         int *classzone_idx)
  {
-       bool pgdat_is_balanced = false;
         int i;
         int end_zone = 0;       /* Inclusive.  0 = ZONE_DMA */
         unsigned long nr_soft_reclaimed;
         unsigned long nr_soft_scanned;
         struct scan_control sc = {
                 .gfp_mask = GFP_KERNEL,
+               .priority = DEF_PRIORITY,
                 .may_unmap = 1,
                 .may_swap = 1,
+               .may_writepage = !laptop_mode,
                 .order = order,
                 .target_mem_cgroup = NULL,
         };
-loop_again:
-       sc.priority = DEF_PRIORITY;
-       sc.nr_reclaimed = 0;
-       sc.may_writepage = !laptop_mode;
         count_vm_event(PAGEOUTRUN);
  
         do {
                 unsigned long lru_pages = 0;
+               bool raise_priority = true;
+
+               sc.nr_reclaimed = 0;
  
                 /*
                  * Scan in the highmem->dma direction for the highest
@@ -2762,10 +2768,8 @@ loop_again:
                         }
                 }
  
-               if (i < 0) {
-                       pgdat_is_balanced = true;
+               if (i < 0)
                         goto out;
-               }
  
                 for (i = 0; i <= end_zone; i++) {
                         struct zone *zone = pgdat->node_zones + i;
@@ -2832,8 +2836,16 @@ loop_again:
  
                         if ((buffer_heads_over_limit && is_highmem_idx(i)) ||
                             !zone_balanced(zone, testorder,
-                                          balance_gap, end_zone))
-                               kswapd_shrink_zone(zone, &sc, lru_pages);
+                                          balance_gap, end_zone)) {
+                               /*
+                                * There should be no need to raise the
+                                * scanning priority if enough pages are
+                                * already being scanned that high
+                                * watermark would be met at 100% efficiency.
+                                */
+                               if (kswapd_shrink_zone(zone, &sc, lru_pages))
+                                       raise_priority = false;
+                       }
  
                         /*
                          * If we're getting trouble reclaiming, start doing
@@ -2868,46 +2880,29 @@ loop_again:
                                 pfmemalloc_watermark_ok(pgdat))
                         wake_up(&pgdat->pfmemalloc_wait);
  
-               if (pgdat_balanced(pgdat, order, *classzone_idx)) {
-                       pgdat_is_balanced = true;
-                       break;          /* kswapd: all done */
-               }
-
                 /*
-                * We do this so kswapd doesn't build up large priorities for
-                * example when it is freeing in parallel with allocators. It
-                * matches the direct reclaim path behaviour in terms of impact
-                * on zone->*_priority.
+                * Fragmentation may mean that the system cannot be rebalanced
+                * for high-order allocations in all zones. If twice the
+                * allocation size has been reclaimed and the zones are still
+                * not balanced then recheck the watermarks at order-0 to
+                * prevent kswapd reclaiming excessively. Assume that a
+                * process requested a high-order can direct reclaim/compact.
                  */
-               if (sc.nr_reclaimed >= SWAP_CLUSTER_MAX)
-                       break;
-       } while (--sc.priority >= 0);
-
-out:
-       if (!pgdat_is_balanced) {
-               cond_resched();
+               if (order && sc.nr_reclaimed >= 2UL << order)
+                       order = sc.order = 0;
  
-               try_to_freeze();
+               /* Check if kswapd should be suspending */
+               if (try_to_freeze() || kthread_should_stop())
+                       break;
  
                 /*
-                * Fragmentation may mean that the system cannot be
-                * rebalanced for high-order allocations in all zones.
-                * At this point, if nr_reclaimed < SWAP_CLUSTER_MAX,
-                * it means the zones have been fully scanned and are still
-                * not balanced. For high-order allocations, there is
-                * little point trying all over again as kswapd may
-                * infinite loop.
-                *
-                * Instead, recheck all watermarks at order-0 as they
-                * are the most important. If watermarks are ok, kswapd will go
-                * back to sleep. High-order users can still perform direct
-                * reclaim if they wish.
+                * Raise priority if scanning rate is too low or there was no
+                * progress in reclaiming pages
                  */
-               if (sc.nr_reclaimed < SWAP_CLUSTER_MAX)
-                       order = sc.order = 0;
-
-               goto loop_again;
-       }
+               if (raise_priority || !sc.nr_reclaimed)
+                       sc.priority--;
+       } while (sc.priority >= 0 &&
+                !pgdat_balanced(pgdat, order, *classzone_idx));
  
         /*
          * If kswapd was reclaiming at a higher order, it has the option of
@@ -2936,6 +2931,7 @@ out:
                         compact_pgdat(pgdat, order);
         }
  
+out:
         /*
          * Return the order we were reclaiming at so prepare_kswapd_sleep()
          * makes a decision on the order we were last reclaiming at. However,
author	Mel Gorman <mgorman@suse.de>
	Wed, 3 Jul 2013 22:01:45 +0000 (15:01 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Wed, 3 Jul 2013 23:07:28 +0000 (16:07 -0700)