mm: throttle on IO only when there are too many dirty and writeback pages

author Michal Hocko <mhocko@suse.com>

Fri, 20 May 2016 23:57:03 +0000 (16:57 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Sat, 21 May 2016 00:58:30 +0000 (17:58 -0700)
author Michal Hocko <mhocko@suse.com>
Fri, 20 May 2016 23:57:03 +0000 (16:57 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Sat, 21 May 2016 00:58:30 +0000 (17:58 -0700)
diff --git a/mm/backing-dev.c b/mm/backing-dev.c

index 0c6317b7db38a086191a043b5ec480e4f5264e27..ed173b8ae8f24a317e11da29735f0a5faa9e361e 100644 (file)
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -957,9 +957,8 @@ EXPORT_SYMBOL(congestion_wait);
   * jiffies for either a BDI to exit congestion of the given @sync queue
   * or a write to complete.
   *
- * In the absence of zone congestion, a short sleep or a cond_resched is
- * performed to yield the processor and to allow other subsystems to make
- * a forward progress.
+ * In the absence of zone congestion, cond_resched() is called to yield
+ * the processor if necessary but otherwise does not sleep.
   *
   * The return value is 0 if the sleep is for the full timeout. Otherwise,
   * it is the number of jiffies that were still remaining when the function
@@ -979,20 +978,7 @@ long wait_iff_congested(struct zone *zone, int sync, long timeout)
          */
         if (atomic_read(&nr_wb_congested[sync]) == 0 ||
             !test_bit(ZONE_CONGESTED, &zone->flags)) {
-
-               /*
-                * Memory allocation/reclaim might be called from a WQ
-                * context and the current implementation of the WQ
-                * concurrency control doesn't recognize that a particular
-                * WQ is congested if the worker thread is looping without
-                * ever sleeping. Therefore we have to do a short sleep
-                * here rather than calling cond_resched().
-                */
-               if (current->flags & PF_WQ_WORKER)
-                       schedule_timeout_uninterruptible(1);
-               else
-                       cond_resched();
-
+               cond_resched();
                 /* In case we scheduled, work out time remaining */
                 ret = timeout - (jiffies - start);
                 if (ret < 0)
diff --git a/mm/page_alloc.c b/mm/page_alloc.c

index fa39efc3a692e7040924af6781678e6c1dda036f..f51c302126a19232bf17c925a9ac09e420725841 100644 (file)
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -3436,8 +3436,9 @@ should_reclaim_retry(gfp_t gfp_mask, unsigned order,
         for_each_zone_zonelist_nodemask(zone, z, ac->zonelist, ac->high_zoneidx,
                                         ac->nodemask) {
                 unsigned long available;
+               unsigned long reclaimable;
  
-               available = zone_reclaimable_pages(zone);
+               available = reclaimable = zone_reclaimable_pages(zone);
                 available -= DIV_ROUND_UP(no_progress_loops * available,
                                           MAX_RECLAIM_RETRIES);
                 available += zone_page_state_snapshot(zone, NR_FREE_PAGES);
@@ -3447,9 +3448,41 @@ should_reclaim_retry(gfp_t gfp_mask, unsigned order,
                  * available?
                  */
                 if (__zone_watermark_ok(zone, order, min_wmark_pages(zone),
-                               ac->high_zoneidx, alloc_flags, available)) {
-                       /* Wait for some write requests to complete then retry */
-                       wait_iff_congested(zone, BLK_RW_ASYNC, HZ/50);
+                               ac_classzone_idx(ac), alloc_flags, available)) {
+                       /*
+                        * If we didn't make any progress and have a lot of
+                        * dirty + writeback pages then we should wait for
+                        * an IO to complete to slow down the reclaim and
+                        * prevent from pre mature OOM
+                        */
+                       if (!did_some_progress) {
+                               unsigned long writeback;
+                               unsigned long dirty;
+
+                               writeback = zone_page_state_snapshot(zone,
+                                                                    NR_WRITEBACK);
+                               dirty = zone_page_state_snapshot(zone, NR_FILE_DIRTY);
+
+                               if (2*(writeback + dirty) > reclaimable) {
+                                       congestion_wait(BLK_RW_ASYNC, HZ/10);
+                                       return true;
+                               }
+                       }
+
+                       /*
+                        * Memory allocation/reclaim might be called from a WQ
+                        * context and the current implementation of the WQ
+                        * concurrency control doesn't recognize that
+                        * a particular WQ is congested if the worker thread is
+                        * looping without ever sleeping. Therefore we have to
+                        * do a short sleep here rather than calling
+                        * cond_resched().
+                        */
+                       if (current->flags & PF_WQ_WORKER)
+                               schedule_timeout_uninterruptible(1);
+                       else
+                               cond_resched();
+
                         return true;
                 }
         }
author	Michal Hocko <mhocko@suse.com>
	Fri, 20 May 2016 23:57:03 +0000 (16:57 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Sat, 21 May 2016 00:58:30 +0000 (17:58 -0700)
mm/backing-dev.c		patch \| blob \| blame \| history
mm/page_alloc.c		patch \| blob \| blame \| history