[pve-kernel-jessie.git] / 0002-Revert-mm-oom-rework-oom-detection.patch

From 6e2588df3dc3d1704eae939ed9c9425000f48069 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Fabian=20Gr=C3=BCnbichler?= <f.gruenbichler@proxmox.com>
Date: Wed, 4 Jan 2017 11:29:26 +0100
Subject: [PATCH 2/2] Revert "mm, oom: rework oom detection"
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This reverts commit c630ec12d831521b0566481eb56d7257b051911e.

Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
---
 include/linux/swap.h |   1 -
 mm/page_alloc.c      | 100 +++++----------------------------------------------
 mm/vmscan.c          |  25 ++++++++++---
 3 files changed, 29 insertions(+), 97 deletions(-)

diff --git a/include/linux/swap.h b/include/linux/swap.h
index 1498c5a..d8ca2ea 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -318,7 +318,6 @@ extern void lru_cache_add_active_or_unevictable(struct page *page,
 						struct vm_area_struct *vma);
 
 /* linux/mm/vmscan.c */
-extern unsigned long zone_reclaimable_pages(struct zone *zone);
 extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
 					gfp_t gfp_mask, nodemask_t *mask);
 extern int __isolate_lru_page(struct page *page, isolate_mode_t mode);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index f13b503..56319cf 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2988,77 +2988,6 @@ static inline bool is_thp_gfp_mask(gfp_t gfp_mask)
 	return (gfp_mask & (GFP_TRANSHUGE | __GFP_KSWAPD_RECLAIM)) == GFP_TRANSHUGE;
 }
 
-/*
- * Maximum number of reclaim retries without any progress before OOM killer
- * is consider as the only way to move forward.
- */
-#define MAX_RECLAIM_RETRIES 16
-
-/*
- * Checks whether it makes sense to retry the reclaim to make a forward progress
- * for the given allocation request.
- * The reclaim feedback represented by did_some_progress (any progress during
- * the last reclaim round), pages_reclaimed (cumulative number of reclaimed
- * pages) and no_progress_loops (number of reclaim rounds without any progress
- * in a row) is considered as well as the reclaimable pages on the applicable
- * zone list (with a backoff mechanism which is a function of no_progress_loops).
- *
- * Returns true if a retry is viable or false to enter the oom path.
- */
-static inline bool
-should_reclaim_retry(gfp_t gfp_mask, unsigned order,
-		     struct alloc_context *ac, int alloc_flags,
-		     bool did_some_progress, unsigned long pages_reclaimed,
-		     int no_progress_loops)
-{
-	struct zone *zone;
-	struct zoneref *z;
-
-	/*
-	 * Make sure we converge to OOM if we cannot make any progress
-	 * several times in the row.
-	 */
-	if (no_progress_loops > MAX_RECLAIM_RETRIES)
-		return false;
-
-	if (order > PAGE_ALLOC_COSTLY_ORDER) {
-		if (pages_reclaimed >= (1<<order))
-			return false;
-
-		if (did_some_progress)
-			return true;
-	}
-
-	/*
-	 * Keep reclaiming pages while there is a chance this will lead somewhere.
-	 * If none of the target zones can satisfy our allocation request even
-	 * if all reclaimable pages are considered then we are screwed and have
-	 * to go OOM.
-	 */
-	for_each_zone_zonelist_nodemask(zone, z, ac->zonelist, ac->high_zoneidx,
-					ac->nodemask) {
-		unsigned long available;
-
-		available = zone_reclaimable_pages(zone);
-		available -= DIV_ROUND_UP(no_progress_loops * available,
-					  MAX_RECLAIM_RETRIES);
-		available += zone_page_state_snapshot(zone, NR_FREE_PAGES);
-
-		/*
-		 * Would the allocation succeed if we reclaimed the whole
-		 * available?
-		 */
-		if (__zone_watermark_ok(zone, order, min_wmark_pages(zone),
-				ac->high_zoneidx, alloc_flags, available)) {
-			/* Wait for some write requests to complete then retry */
-			wait_iff_congested(zone, BLK_RW_ASYNC, HZ/50);
-			return true;
-		}
-	}
-
-	return false;
-}
-
 static inline struct page *
 __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
 						struct alloc_context *ac)
@@ -3071,7 +3000,6 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
 	enum migrate_mode migration_mode = MIGRATE_ASYNC;
 	bool deferred_compaction = false;
 	int contended_compaction = COMPACT_CONTENDED_NONE;
-	int no_progress_loops = 0;
 
 	/*
 	 * In the slowpath, we sanity check order to avoid ever trying to
@@ -3223,24 +3151,14 @@ retry:
 	if (gfp_mask & __GFP_NORETRY)
 		goto noretry;
 
-	/*
-	 * Do not retry costly high order allocations unless they are
-	 * __GFP_REPEAT
-	 */
-	if (order > PAGE_ALLOC_COSTLY_ORDER && !(gfp_mask & __GFP_REPEAT))
-		goto noretry;
-
-	if (did_some_progress) {
-		no_progress_loops = 0;
-		pages_reclaimed += did_some_progress;
-	} else {
-		no_progress_loops++;
-	}
-
-	if (should_reclaim_retry(gfp_mask, order, ac, alloc_flags,
-				 did_some_progress > 0, pages_reclaimed,
-				 no_progress_loops))
+	/* Keep reclaiming pages as long as there is reasonable progress */
+	pages_reclaimed += did_some_progress;
+	if ((did_some_progress && order <= PAGE_ALLOC_COSTLY_ORDER) ||
+	    ((gfp_mask & __GFP_REPEAT) && pages_reclaimed < (1 << order))) {
+		/* Wait for some write requests to complete then retry */
+		wait_iff_congested(ac->preferred_zone, BLK_RW_ASYNC, HZ/50);
 		goto retry;
+	}
 
 	/* Reclaim has failed us, start killing things */
 	page = __alloc_pages_may_oom(gfp_mask, order, ac, &did_some_progress);
@@ -3248,10 +3166,8 @@ retry:
 		goto got_pg;
 
 	/* Retry as long as the OOM killer is making progress */
-	if (did_some_progress) {
-		no_progress_loops = 0;
+	if (did_some_progress)
 		goto retry;
-	}
 
 noretry:
 	/*
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 56f902d..3597160 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -192,7 +192,7 @@ static bool sane_reclaim(struct scan_control *sc)
 }
 #endif
 
-unsigned long zone_reclaimable_pages(struct zone *zone)
+static unsigned long zone_reclaimable_pages(struct zone *zone)
 {
 	unsigned long nr;
 
@@ -2492,8 +2492,10 @@ static inline bool compaction_ready(struct zone *zone, int order)
  *
  * If a zone is deemed to be full of pinned pages then just give it a light
  * scan then give up on it.
+ *
+ * Returns true if a zone was reclaimable.
  */
-static void shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
+static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
 {
 	struct zoneref *z;
 	struct zone *zone;
@@ -2501,6 +2503,7 @@ static void shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
 	unsigned long nr_soft_scanned;
 	gfp_t orig_mask;
 	enum zone_type requested_highidx = gfp_zone(sc->gfp_mask);
+	bool reclaimable = false;
 
 	/*
 	 * If the number of buffer_heads in the machine exceeds the maximum
@@ -2565,10 +2568,17 @@ static void shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
 						&nr_soft_scanned);
 			sc->nr_reclaimed += nr_soft_reclaimed;
 			sc->nr_scanned += nr_soft_scanned;
+			if (nr_soft_reclaimed)
+				reclaimable = true;
 			/* need some check for avoid more shrink_zone() */
 		}
 
-		shrink_zone(zone, sc, zone_idx(zone) == classzone_idx);
+		if (shrink_zone(zone, sc, zone_idx(zone) == classzone_idx))
+			reclaimable = true;
+
+		if (global_reclaim(sc) &&
+		    !reclaimable && zone_reclaimable(zone))
+			reclaimable = true;
 	}
 
 	/*
@@ -2576,6 +2586,8 @@ static void shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
 	 * promoted it to __GFP_HIGHMEM.
 	 */
 	sc->gfp_mask = orig_mask;
+
+	return reclaimable;
 }
 
 /*
@@ -2600,6 +2612,7 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
 	int initial_priority = sc->priority;
 	unsigned long total_scanned = 0;
 	unsigned long writeback_threshold;
+	bool zones_reclaimable;
 retry:
 	delayacct_freepages_start();
 
@@ -2610,7 +2623,7 @@ retry:
 		vmpressure_prio(sc->gfp_mask, sc->target_mem_cgroup,
 				sc->priority);
 		sc->nr_scanned = 0;
-		shrink_zones(zonelist, sc);
+		zones_reclaimable = shrink_zones(zonelist, sc);
 
 		total_scanned += sc->nr_scanned;
 		if (sc->nr_reclaimed >= sc->nr_to_reclaim)
@@ -2657,6 +2670,10 @@ retry:
 		goto retry;
 	}
 
+	/* Any of the zones still reclaimable?  Don't OOM. */
+	if (zones_reclaimable)
+		return 1;
+
 	return 0;
 }
 
-- 
2.1.4
Commit	Line	Data
d354e29e FG	1	From 6e2588df3dc3d1704eae939ed9c9425000f48069 Mon Sep 17 00:00:00 2001
	2	From: =?UTF-8?q?Fabian=20Gr=C3=BCnbichler?= <f.gruenbichler@proxmox.com>
	3	Date: Wed, 4 Jan 2017 11:29:26 +0100
	4	Subject: [PATCH 2/2] Revert "mm, oom: rework oom detection"
	5	MIME-Version: 1.0
	6	Content-Type: text/plain; charset=UTF-8
	7	Content-Transfer-Encoding: 8bit
	8
	9	This reverts commit c630ec12d831521b0566481eb56d7257b051911e.
	10
	11	Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
	12	---
	13	include/linux/swap.h \| 1 -
	14	mm/page_alloc.c \| 100 +++++----------------------------------------------
	15	mm/vmscan.c \| 25 ++++++++++---
	16	3 files changed, 29 insertions(+), 97 deletions(-)
	17
	18	diff --git a/include/linux/swap.h b/include/linux/swap.h
	19	index 1498c5a..d8ca2ea 100644
	20	--- a/include/linux/swap.h
	21	+++ b/include/linux/swap.h
	22	@@ -318,7 +318,6 @@ extern void lru_cache_add_active_or_unevictable(struct page *page,
	23	struct vm_area_struct *vma);
	24
	25	/* linux/mm/vmscan.c */
	26	-extern unsigned long zone_reclaimable_pages(struct zone *zone);
	27	extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
	28	gfp_t gfp_mask, nodemask_t *mask);
	29	extern int __isolate_lru_page(struct page *page, isolate_mode_t mode);
	30	diff --git a/mm/page_alloc.c b/mm/page_alloc.c
	31	index f13b503..56319cf 100644
	32	--- a/mm/page_alloc.c
	33	+++ b/mm/page_alloc.c
	34	@@ -2988,77 +2988,6 @@ static inline bool is_thp_gfp_mask(gfp_t gfp_mask)
	35	return (gfp_mask & (GFP_TRANSHUGE \| __GFP_KSWAPD_RECLAIM)) == GFP_TRANSHUGE;
	36	}
	37
	38	-/*
	39	- * Maximum number of reclaim retries without any progress before OOM killer
	40	- * is consider as the only way to move forward.
	41	- */
	42	-#define MAX_RECLAIM_RETRIES 16
	43	-
	44	-/*
	45	- * Checks whether it makes sense to retry the reclaim to make a forward progress
	46	- * for the given allocation request.
	47	- * The reclaim feedback represented by did_some_progress (any progress during
	48	- * the last reclaim round), pages_reclaimed (cumulative number of reclaimed
	49	- * pages) and no_progress_loops (number of reclaim rounds without any progress
	50	- * in a row) is considered as well as the reclaimable pages on the applicable
	51	- * zone list (with a backoff mechanism which is a function of no_progress_loops).
	52	- *
	53	- * Returns true if a retry is viable or false to enter the oom path.
	54	- */
	55	-static inline bool
	56	-should_reclaim_retry(gfp_t gfp_mask, unsigned order,
	57	- struct alloc_context *ac, int alloc_flags,
	58	- bool did_some_progress, unsigned long pages_reclaimed,
	59	- int no_progress_loops)
	60	-{
	61	- struct zone *zone;
	62	- struct zoneref *z;
	63	-
	64	- /*
65	- * Make sure we converge to OOM if we cannot make any progress
66	- * several times in the row.
67	- */
68	- if (no_progress_loops > MAX_RECLAIM_RETRIES)
69	- return false;
70	-
71	- if (order > PAGE_ALLOC_COSTLY_ORDER) {
72	- if (pages_reclaimed >= (1<<order))
73	- return false;
74	-
75	- if (did_some_progress)
76	- return true;
77	- }
78	-
79	- /*
80	- * Keep reclaiming pages while there is a chance this will lead somewhere.
81	- * If none of the target zones can satisfy our allocation request even
82	- * if all reclaimable pages are considered then we are screwed and have
83	- * to go OOM.
84	- */
85	- for_each_zone_zonelist_nodemask(zone, z, ac->zonelist, ac->high_zoneidx,
86	- ac->nodemask) {
87	- unsigned long available;
88	-
89	- available = zone_reclaimable_pages(zone);
90	- available -= DIV_ROUND_UP(no_progress_loops * available,
91	- MAX_RECLAIM_RETRIES);
92	- available += zone_page_state_snapshot(zone, NR_FREE_PAGES);
93	-
94	- /*
95	- * Would the allocation succeed if we reclaimed the whole
96	- * available?
97	- */
98	- if (__zone_watermark_ok(zone, order, min_wmark_pages(zone),
99	- ac->high_zoneidx, alloc_flags, available)) {
100	- /* Wait for some write requests to complete then retry */
101	- wait_iff_congested(zone, BLK_RW_ASYNC, HZ/50);
102	- return true;
103	- }
104	- }
105	-
106	- return false;
107	-}
108	-
109	static inline struct page *
110	__alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
111	struct alloc_context *ac)
112	@@ -3071,7 +3000,6 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
113	enum migrate_mode migration_mode = MIGRATE_ASYNC;
114	bool deferred_compaction = false;
115	int contended_compaction = COMPACT_CONTENDED_NONE;
116	- int no_progress_loops = 0;
117
118	/*
119	* In the slowpath, we sanity check order to avoid ever trying to
120	@@ -3223,24 +3151,14 @@ retry:
121	if (gfp_mask & __GFP_NORETRY)
122	goto noretry;
123
124	- /*
125	- * Do not retry costly high order allocations unless they are
126	- * __GFP_REPEAT
127	- */
128	- if (order > PAGE_ALLOC_COSTLY_ORDER && !(gfp_mask & __GFP_REPEAT))
129	- goto noretry;
130	-
131	- if (did_some_progress) {
132	- no_progress_loops = 0;
133	- pages_reclaimed += did_some_progress;
134	- } else {
135	- no_progress_loops++;
136	- }
137	-
138	- if (should_reclaim_retry(gfp_mask, order, ac, alloc_flags,
139	- did_some_progress > 0, pages_reclaimed,
140	- no_progress_loops))
141	+ /* Keep reclaiming pages as long as there is reasonable progress */
142	+ pages_reclaimed += did_some_progress;
143	+ if ((did_some_progress && order <= PAGE_ALLOC_COSTLY_ORDER) \|\|
144	+ ((gfp_mask & __GFP_REPEAT) && pages_reclaimed < (1 << order))) {
145	+ /* Wait for some write requests to complete then retry */
146	+ wait_iff_congested(ac->preferred_zone, BLK_RW_ASYNC, HZ/50);
147	goto retry;
148	+ }
149
150	/* Reclaim has failed us, start killing things */
151	page = __alloc_pages_may_oom(gfp_mask, order, ac, &did_some_progress);
152	@@ -3248,10 +3166,8 @@ retry:
153	goto got_pg;
154
155	/* Retry as long as the OOM killer is making progress */
156	- if (did_some_progress) {
157	- no_progress_loops = 0;
158	+ if (did_some_progress)
159	goto retry;
160	- }
161
162	noretry:
163	/*
164	diff --git a/mm/vmscan.c b/mm/vmscan.c
165	index 56f902d..3597160 100644
166	--- a/mm/vmscan.c
167	+++ b/mm/vmscan.c
168	@@ -192,7 +192,7 @@ static bool sane_reclaim(struct scan_control *sc)
169	}
170	#endif
171
172	-unsigned long zone_reclaimable_pages(struct zone *zone)
173	+static unsigned long zone_reclaimable_pages(struct zone *zone)
174	{
175	unsigned long nr;
176
177	@@ -2492,8 +2492,10 @@ static inline bool compaction_ready(struct zone *zone, int order)
178	*
179	* If a zone is deemed to be full of pinned pages then just give it a light
180	* scan then give up on it.
181	+ *
182	+ * Returns true if a zone was reclaimable.
183	*/
184	-static void shrink_zones(struct zonelist zonelist, struct scan_control sc)
185	+static bool shrink_zones(struct zonelist zonelist, struct scan_control sc)
186	{
187	struct zoneref *z;
188	struct zone *zone;
189	@@ -2501,6 +2503,7 @@ static void shrink_zones(struct zonelist zonelist, struct scan_control sc)
190	unsigned long nr_soft_scanned;
191	gfp_t orig_mask;
192	enum zone_type requested_highidx = gfp_zone(sc->gfp_mask);
193	+ bool reclaimable = false;
194
195	/*
196	* If the number of buffer_heads in the machine exceeds the maximum
197	@@ -2565,10 +2568,17 @@ static void shrink_zones(struct zonelist zonelist, struct scan_control sc)
198	&nr_soft_scanned);
199	sc->nr_reclaimed += nr_soft_reclaimed;
200	sc->nr_scanned += nr_soft_scanned;
201	+ if (nr_soft_reclaimed)
202	+ reclaimable = true;
203	/* need some check for avoid more shrink_zone() */
204	}
205
206	- shrink_zone(zone, sc, zone_idx(zone) == classzone_idx);
207	+ if (shrink_zone(zone, sc, zone_idx(zone) == classzone_idx))
208	+ reclaimable = true;
209	+
210	+ if (global_reclaim(sc) &&
211	+ !reclaimable && zone_reclaimable(zone))
212	+ reclaimable = true;
213	}
214
215	/*
216	@@ -2576,6 +2586,8 @@ static void shrink_zones(struct zonelist zonelist, struct scan_control sc)
217	* promoted it to __GFP_HIGHMEM.
218	*/
219	sc->gfp_mask = orig_mask;
220	+
221	+ return reclaimable;
222	}
223
224	/*
225	@@ -2600,6 +2612,7 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
226	int initial_priority = sc->priority;
227	unsigned long total_scanned = 0;
228	unsigned long writeback_threshold;
229	+ bool zones_reclaimable;
230	retry:
231	delayacct_freepages_start();
232
233	@@ -2610,7 +2623,7 @@ retry:
234	vmpressure_prio(sc->gfp_mask, sc->target_mem_cgroup,
235	sc->priority);
236	sc->nr_scanned = 0;
237	- shrink_zones(zonelist, sc);
238	+ zones_reclaimable = shrink_zones(zonelist, sc);
239
240	total_scanned += sc->nr_scanned;
241	if (sc->nr_reclaimed >= sc->nr_to_reclaim)
242	@@ -2657,6 +2670,10 @@ retry:
243	goto retry;
244	}
245
246	+ /* Any of the zones still reclaimable? Don't OOM. */
247	+ if (zones_reclaimable)
248	+ return 1;
249	+
250	return 0;
251	}
252
253	--
254	2.1.4
255