struct mem_cgroup *root_mem_cgroup __read_mostly;
+/* The number of times we should retry reclaim failures before giving up. */
#define MEM_CGROUP_RECLAIM_RETRIES 5
/* Socket memory accounting disabled? */
return 0;
}
-static void reclaim_high(struct mem_cgroup *memcg,
- unsigned int nr_pages,
- gfp_t gfp_mask)
+static unsigned long reclaim_high(struct mem_cgroup *memcg,
+ unsigned int nr_pages,
+ gfp_t gfp_mask)
{
+ unsigned long nr_reclaimed = 0;
+
do {
if (page_counter_read(&memcg->memory) <=
READ_ONCE(memcg->memory.high))
continue;
memcg_memory_event(memcg, MEMCG_HIGH);
- try_to_free_mem_cgroup_pages(memcg, nr_pages, gfp_mask, true);
+ nr_reclaimed += try_to_free_mem_cgroup_pages(memcg, nr_pages,
+ gfp_mask, true);
} while ((memcg = parent_mem_cgroup(memcg)) &&
!mem_cgroup_is_root(memcg));
+
+ return nr_reclaimed;
}
static void high_work_func(struct work_struct *work)
{
unsigned long penalty_jiffies;
unsigned long pflags;
+ unsigned long nr_reclaimed;
unsigned int nr_pages = current->memcg_nr_pages_over_high;
+ int nr_retries = MEM_CGROUP_RECLAIM_RETRIES;
struct mem_cgroup *memcg;
+ bool in_retry = false;
if (likely(!nr_pages))
return;
memcg = get_mem_cgroup_from_mm(current->mm);
- reclaim_high(memcg, nr_pages, GFP_KERNEL);
current->memcg_nr_pages_over_high = 0;
+retry_reclaim:
+ /*
+ * The allocating task should reclaim at least the batch size, but for
+ * subsequent retries we only want to do what's necessary to prevent oom
+ * or breaching resource isolation.
+ *
+ * This is distinct from memory.max or page allocator behaviour because
+ * memory.high is currently batched, whereas memory.max and the page
+ * allocator run every time an allocation is made.
+ */
+ nr_reclaimed = reclaim_high(memcg,
+ in_retry ? SWAP_CLUSTER_MAX : nr_pages,
+ GFP_KERNEL);
+
/*
* memory.high is breached and reclaim is unable to keep up. Throttle
* allocators proactively to slow down excessive growth.
if (penalty_jiffies <= HZ / 100)
goto out;
+ /*
+ * If reclaim is making forward progress but we're still over
+ * memory.high, we want to encourage that rather than doing allocator
+ * throttling.
+ */
+ if (nr_reclaimed || nr_retries--) {
+ in_retry = true;
+ goto retry_reclaim;
+ }
+
/*
* If we exit early, we're guaranteed to die (since
* schedule_timeout_killable sets TASK_KILLABLE). This means we don't