]> git.proxmox.com Git - mirror_ubuntu-zesty-kernel.git/blobdiff - mm/ksm.c
mm: prevent potential recursive reclaim due to clearing PF_MEMALLOC
[mirror_ubuntu-zesty-kernel.git] / mm / ksm.c
index 9ae6011a41f895d56942175814d1fe0158a591b7..099dfa45d596cb222f26a6f90e1255e2e8350ab7 100644 (file)
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -223,6 +223,12 @@ static unsigned int ksm_thread_pages_to_scan = 100;
 /* Milliseconds ksmd should sleep between batches */
 static unsigned int ksm_thread_sleep_millisecs = 20;
 
+/* Checksum of an empty (zeroed) page */
+static unsigned int zero_checksum __read_mostly;
+
+/* Whether to merge empty (zeroed) pages with actual zero pages */
+static bool ksm_use_zero_pages __read_mostly;
+
 #ifdef CONFIG_NUMA
 /* Zeroed when merging across nodes is not allowed */
 static unsigned int ksm_merge_across_nodes = 1;
@@ -872,7 +878,8 @@ static int write_protect_page(struct vm_area_struct *vma, struct page *page,
        if (!ptep)
                goto out_mn;
 
-       if (pte_write(*ptep) || pte_dirty(*ptep)) {
+       if (pte_write(*ptep) || pte_dirty(*ptep) ||
+           (pte_protnone(*ptep) && pte_savedwrite(*ptep))) {
                pte_t entry;
 
                swapped = PageSwapCache(page);
@@ -897,7 +904,11 @@ static int write_protect_page(struct vm_area_struct *vma, struct page *page,
                }
                if (pte_dirty(entry))
                        set_page_dirty(page);
-               entry = pte_mkclean(pte_wrprotect(entry));
+
+               if (pte_protnone(entry))
+                       entry = pte_mkclean(pte_clear_savedwrite(entry));
+               else
+                       entry = pte_mkclean(pte_wrprotect(entry));
                set_pte_at_notify(mm, addr, ptep, entry);
        }
        *orig_pte = *ptep;
@@ -926,6 +937,7 @@ static int replace_page(struct vm_area_struct *vma, struct page *page,
        struct mm_struct *mm = vma->vm_mm;
        pmd_t *pmd;
        pte_t *ptep;
+       pte_t newpte;
        spinlock_t *ptl;
        unsigned long addr;
        int err = -EFAULT;
@@ -950,12 +962,22 @@ static int replace_page(struct vm_area_struct *vma, struct page *page,
                goto out_mn;
        }
 
-       get_page(kpage);
-       page_add_anon_rmap(kpage, vma, addr, false);
+       /*
+        * No need to check ksm_use_zero_pages here: we can only have a
+        * zero_page here if ksm_use_zero_pages was enabled alreaady.
+        */
+       if (!is_zero_pfn(page_to_pfn(kpage))) {
+               get_page(kpage);
+               page_add_anon_rmap(kpage, vma, addr, false);
+               newpte = mk_pte(kpage, vma->vm_page_prot);
+       } else {
+               newpte = pte_mkspecial(pfn_pte(page_to_pfn(kpage),
+                                              vma->vm_page_prot));
+       }
 
        flush_cache_page(vma, addr, pte_pfn(*ptep));
        ptep_clear_flush_notify(vma, addr, ptep);
-       set_pte_at_notify(mm, addr, ptep, mk_pte(kpage, vma->vm_page_prot));
+       set_pte_at_notify(mm, addr, ptep, newpte);
 
        page_remove_rmap(page, false);
        if (!page_mapped(page))
@@ -1467,6 +1489,23 @@ static void cmp_and_merge_page(struct page *page, struct rmap_item *rmap_item)
                return;
        }
 
+       /*
+        * Same checksum as an empty page. We attempt to merge it with the
+        * appropriate zero page if the user enabled this via sysfs.
+        */
+       if (ksm_use_zero_pages && (checksum == zero_checksum)) {
+               struct vm_area_struct *vma;
+
+               vma = find_mergeable_vma(rmap_item->mm, rmap_item->address);
+               err = try_to_merge_one_page(vma, page,
+                                           ZERO_PAGE(rmap_item->address));
+               /*
+                * In case of failure, the page was not really empty, so we
+                * need to continue. Otherwise we're done.
+                */
+               if (!err)
+                       return;
+       }
        tree_rmap_item =
                unstable_tree_search_insert(rmap_item, page, &tree_page);
        if (tree_rmap_item) {
@@ -1725,8 +1764,12 @@ static int ksm_scan_thread(void *nothing)
                try_to_freeze();
 
                if (ksmd_should_run()) {
-                       schedule_timeout_interruptible(
-                               msecs_to_jiffies(ksm_thread_sleep_millisecs));
+                       if (ksm_thread_sleep_millisecs >= 1000)
+                               schedule_timeout_interruptible(
+                                       msecs_to_jiffies(round_jiffies_relative(ksm_thread_sleep_millisecs)));
+                       else
+                               schedule_timeout_interruptible(
+                                       msecs_to_jiffies(ksm_thread_sleep_millisecs));
                } else {
                        wait_event_freezable(ksm_thread_wait,
                                ksmd_should_run() || kthread_should_stop());
@@ -2233,6 +2276,28 @@ static ssize_t merge_across_nodes_store(struct kobject *kobj,
 KSM_ATTR(merge_across_nodes);
 #endif
 
+static ssize_t use_zero_pages_show(struct kobject *kobj,
+                               struct kobj_attribute *attr, char *buf)
+{
+       return sprintf(buf, "%u\n", ksm_use_zero_pages);
+}
+static ssize_t use_zero_pages_store(struct kobject *kobj,
+                                  struct kobj_attribute *attr,
+                                  const char *buf, size_t count)
+{
+       int err;
+       bool value;
+
+       err = kstrtobool(buf, &value);
+       if (err)
+               return -EINVAL;
+
+       ksm_use_zero_pages = value;
+
+       return count;
+}
+KSM_ATTR(use_zero_pages);
+
 static ssize_t pages_shared_show(struct kobject *kobj,
                                 struct kobj_attribute *attr, char *buf)
 {
@@ -2290,6 +2355,7 @@ static struct attribute *ksm_attrs[] = {
 #ifdef CONFIG_NUMA
        &merge_across_nodes_attr.attr,
 #endif
+       &use_zero_pages_attr.attr,
        NULL,
 };
 
@@ -2304,6 +2370,11 @@ static int __init ksm_init(void)
        struct task_struct *ksm_thread;
        int err;
 
+       /* The correct value depends on page size and endianness */
+       zero_checksum = calc_checksum(ZERO_PAGE(0));
+       /* Default to false for backwards compatibility */
+       ksm_use_zero_pages = false;
+
        err = ksm_slab_init();
        if (err)
                goto out;