]> git.proxmox.com Git - mirror_ubuntu-jammy-kernel.git/commitdiff
Merge branch 'akpm' (patches from Andrew)
authorLinus Torvalds <torvalds@linux-foundation.org>
Tue, 19 Oct 2021 15:41:36 +0000 (05:41 -1000)
committerLinus Torvalds <torvalds@linux-foundation.org>
Tue, 19 Oct 2021 15:41:36 +0000 (05:41 -1000)
Merge misc fixes from Andrew Morton:
 "19 patches.

  Subsystems affected by this patch series: mm (userfaultfd, migration,
  memblock, mempolicy, slub, secretmem, and thp), ocfs2, binfmt, vfs,
  and misc"

* emailed patches from Andrew Morton <akpm@linux-foundation.org>:
  mailmap: add Andrej Shadura
  mm/thp: decrease nr_thps in file's mapping on THP split
  mm/secretmem: fix NULL page->mapping dereference in page_is_secretmem()
  vfs: check fd has read access in kernel_read_file_from_fd()
  elfcore: correct reference to CONFIG_UML
  mm, slub: fix incorrect memcg slab count for bulk free
  mm, slub: fix potential use-after-free in slab_debugfs_fops
  mm, slub: fix potential memoryleak in kmem_cache_open()
  mm, slub: fix mismatch between reconstructed freelist depth and cnt
  mm, slub: fix two bugs in slab_debug_trace_open()
  mm/mempolicy: do not allow illegal MPOL_F_NUMA_BALANCING | MPOL_LOCAL in mbind()
  memblock: check memory total_size
  ocfs2: mount fails with buffer overflow in strlen
  ocfs2: fix data corruption after conversion from inline format
  mm/migrate: fix CPUHP state to update node demotion order
  mm/migrate: add CPU hotplug to demotion #ifdef
  mm/migrate: optimize hotplug-time demotion order updates
  userfaultfd: fix a race between writeprotect and exit_mmap()
  mm/userfaultfd: selftests: fix memory corruption with thp enabled

17 files changed:
.mailmap
fs/kernel_read_file.c
fs/ocfs2/alloc.c
fs/ocfs2/super.c
fs/userfaultfd.c
include/linux/cpuhotplug.h
include/linux/elfcore.h
include/linux/memory.h
include/linux/secretmem.h
mm/huge_memory.c
mm/memblock.c
mm/mempolicy.c
mm/migrate.c
mm/page_ext.c
mm/slab.c
mm/slub.c
tools/testing/selftests/vm/userfaultfd.c

index 6e849110cb4e3ad65e2b01eb1b7d2b6fe96c3215..90e614d2bf7e3dd34c1718f89e4d7756e639fb30 100644 (file)
--- a/.mailmap
+++ b/.mailmap
@@ -33,6 +33,8 @@ Al Viro <viro@zenIV.linux.org.uk>
 Andi Kleen <ak@linux.intel.com> <ak@suse.de>
 Andi Shyti <andi@etezian.org> <andi.shyti@samsung.com>
 Andreas Herrmann <aherrman@de.ibm.com>
+Andrej Shadura <andrew.shadura@collabora.co.uk>
+Andrej Shadura <andrew@shadura.me> <andrew@beldisplaytech.com>
 Andrew Morton <akpm@linux-foundation.org>
 Andrew Murray <amurray@thegoodpenguin.co.uk> <amurray@embedded-bits.co.uk>
 Andrew Murray <amurray@thegoodpenguin.co.uk> <andrew.murray@arm.com>
index 87aac4c72c37da2f82cad443279d66f0b5a2cf8c..1b07550485b96415f0a79a7029b10bf5d78c643e 100644 (file)
@@ -178,7 +178,7 @@ int kernel_read_file_from_fd(int fd, loff_t offset, void **buf,
        struct fd f = fdget(fd);
        int ret = -EBADF;
 
-       if (!f.file)
+       if (!f.file || !(f.file->f_mode & FMODE_READ))
                goto out;
 
        ret = kernel_read_file(f.file, offset, buf, buf_size, file_size, id);
index f1cc8258d34a4ba7200cdc4dfe514d9ab671b9f2..5d9ae17bd443f209ce4098e1bc30ef737cfca9bd 100644 (file)
@@ -7045,7 +7045,7 @@ void ocfs2_set_inode_data_inline(struct inode *inode, struct ocfs2_dinode *di)
 int ocfs2_convert_inline_data_to_extents(struct inode *inode,
                                         struct buffer_head *di_bh)
 {
-       int ret, i, has_data, num_pages = 0;
+       int ret, has_data, num_pages = 0;
        int need_free = 0;
        u32 bit_off, num;
        handle_t *handle;
@@ -7054,26 +7054,17 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode,
        struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
        struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
        struct ocfs2_alloc_context *data_ac = NULL;
-       struct page **pages = NULL;
-       loff_t end = osb->s_clustersize;
+       struct page *page = NULL;
        struct ocfs2_extent_tree et;
        int did_quota = 0;
 
        has_data = i_size_read(inode) ? 1 : 0;
 
        if (has_data) {
-               pages = kcalloc(ocfs2_pages_per_cluster(osb->sb),
-                               sizeof(struct page *), GFP_NOFS);
-               if (pages == NULL) {
-                       ret = -ENOMEM;
-                       mlog_errno(ret);
-                       return ret;
-               }
-
                ret = ocfs2_reserve_clusters(osb, 1, &data_ac);
                if (ret) {
                        mlog_errno(ret);
-                       goto free_pages;
+                       goto out;
                }
        }
 
@@ -7093,7 +7084,8 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode,
        }
 
        if (has_data) {
-               unsigned int page_end;
+               unsigned int page_end = min_t(unsigned, PAGE_SIZE,
+                                                       osb->s_clustersize);
                u64 phys;
 
                ret = dquot_alloc_space_nodirty(inode,
@@ -7117,15 +7109,8 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode,
                 */
                block = phys = ocfs2_clusters_to_blocks(inode->i_sb, bit_off);
 
-               /*
-                * Non sparse file systems zero on extend, so no need
-                * to do that now.
-                */
-               if (!ocfs2_sparse_alloc(osb) &&
-                   PAGE_SIZE < osb->s_clustersize)
-                       end = PAGE_SIZE;
-
-               ret = ocfs2_grab_eof_pages(inode, 0, end, pages, &num_pages);
+               ret = ocfs2_grab_eof_pages(inode, 0, page_end, &page,
+                                          &num_pages);
                if (ret) {
                        mlog_errno(ret);
                        need_free = 1;
@@ -7136,20 +7121,15 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode,
                 * This should populate the 1st page for us and mark
                 * it up to date.
                 */
-               ret = ocfs2_read_inline_data(inode, pages[0], di_bh);
+               ret = ocfs2_read_inline_data(inode, page, di_bh);
                if (ret) {
                        mlog_errno(ret);
                        need_free = 1;
                        goto out_unlock;
                }
 
-               page_end = PAGE_SIZE;
-               if (PAGE_SIZE > osb->s_clustersize)
-                       page_end = osb->s_clustersize;
-
-               for (i = 0; i < num_pages; i++)
-                       ocfs2_map_and_dirty_page(inode, handle, 0, page_end,
-                                                pages[i], i > 0, &phys);
+               ocfs2_map_and_dirty_page(inode, handle, 0, page_end, page, 0,
+                                        &phys);
        }
 
        spin_lock(&oi->ip_lock);
@@ -7180,8 +7160,8 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode,
        }
 
 out_unlock:
-       if (pages)
-               ocfs2_unlock_and_free_pages(pages, num_pages);
+       if (page)
+               ocfs2_unlock_and_free_pages(&page, num_pages);
 
 out_commit:
        if (ret < 0 && did_quota)
@@ -7205,8 +7185,6 @@ out_commit:
 out:
        if (data_ac)
                ocfs2_free_alloc_context(data_ac);
-free_pages:
-       kfree(pages);
        return ret;
 }
 
index c86bd4e60e207dc5765164b6b9bd5cbf7b5c18c9..5c914ce9b3ac95636296e308cc2f61400f41ef55 100644 (file)
@@ -2167,11 +2167,17 @@ static int ocfs2_initialize_super(struct super_block *sb,
        }
 
        if (ocfs2_clusterinfo_valid(osb)) {
+               /*
+                * ci_stack and ci_cluster in ocfs2_cluster_info may not be null
+                * terminated, so make sure no overflow happens here by using
+                * memcpy. Destination strings will always be null terminated
+                * because osb is allocated using kzalloc.
+                */
                osb->osb_stackflags =
                        OCFS2_RAW_SB(di)->s_cluster_info.ci_stackflags;
-               strlcpy(osb->osb_cluster_stack,
+               memcpy(osb->osb_cluster_stack,
                       OCFS2_RAW_SB(di)->s_cluster_info.ci_stack,
-                      OCFS2_STACK_LABEL_LEN + 1);
+                      OCFS2_STACK_LABEL_LEN);
                if (strlen(osb->osb_cluster_stack) != OCFS2_STACK_LABEL_LEN) {
                        mlog(ML_ERROR,
                             "couldn't mount because of an invalid "
@@ -2180,9 +2186,9 @@ static int ocfs2_initialize_super(struct super_block *sb,
                        status = -EINVAL;
                        goto bail;
                }
-               strlcpy(osb->osb_cluster_name,
+               memcpy(osb->osb_cluster_name,
                        OCFS2_RAW_SB(di)->s_cluster_info.ci_cluster,
-                       OCFS2_CLUSTER_NAME_LEN + 1);
+                       OCFS2_CLUSTER_NAME_LEN);
        } else {
                /* The empty string is identical with classic tools that
                 * don't know about s_cluster_info. */
index 003f0d31743eb3cd95de8c8da21a4d0d731daf73..22bf14ab2d163b189ba3f9a3fedc5fe53ec7bccb 100644 (file)
@@ -1827,9 +1827,15 @@ static int userfaultfd_writeprotect(struct userfaultfd_ctx *ctx,
        if (mode_wp && mode_dontwake)
                return -EINVAL;
 
-       ret = mwriteprotect_range(ctx->mm, uffdio_wp.range.start,
-                                 uffdio_wp.range.len, mode_wp,
-                                 &ctx->mmap_changing);
+       if (mmget_not_zero(ctx->mm)) {
+               ret = mwriteprotect_range(ctx->mm, uffdio_wp.range.start,
+                                         uffdio_wp.range.len, mode_wp,
+                                         &ctx->mmap_changing);
+               mmput(ctx->mm);
+       } else {
+               return -ESRCH;
+       }
+
        if (ret)
                return ret;
 
index 832d8a74fa5960becb611c7e7adaec760c88c495..991911048857a8710a4ec3d2bd1c9f0faeccb6ee 100644 (file)
@@ -72,6 +72,8 @@ enum cpuhp_state {
        CPUHP_SLUB_DEAD,
        CPUHP_DEBUG_OBJ_DEAD,
        CPUHP_MM_WRITEBACK_DEAD,
+       /* Must be after CPUHP_MM_VMSTAT_DEAD */
+       CPUHP_MM_DEMOTION_DEAD,
        CPUHP_MM_VMSTAT_DEAD,
        CPUHP_SOFTIRQ_DEAD,
        CPUHP_NET_MVNETA_DEAD,
@@ -240,6 +242,8 @@ enum cpuhp_state {
        CPUHP_AP_BASE_CACHEINFO_ONLINE,
        CPUHP_AP_ONLINE_DYN,
        CPUHP_AP_ONLINE_DYN_END         = CPUHP_AP_ONLINE_DYN + 30,
+       /* Must be after CPUHP_AP_ONLINE_DYN for node_states[N_CPU] update */
+       CPUHP_AP_MM_DEMOTION_ONLINE,
        CPUHP_AP_X86_HPET_ONLINE,
        CPUHP_AP_X86_KVM_CLK_ONLINE,
        CPUHP_AP_DTPM_CPU_ONLINE,
index 2aaa15779d50bb723b6c5a10922798c0a3bf3395..957ebec35aad01e73a70f464925b5af27becbf87 100644 (file)
@@ -109,7 +109,7 @@ static inline int elf_core_copy_task_fpregs(struct task_struct *t, struct pt_reg
 #endif
 }
 
-#if defined(CONFIG_UM) || defined(CONFIG_IA64)
+#if (defined(CONFIG_UML) && defined(CONFIG_X86_32)) || defined(CONFIG_IA64)
 /*
  * These functions parameterize elf_core_dump in fs/binfmt_elf.c to write out
  * extra segments containing the gate DSO contents.  Dumping its
index 7efc0a7c14c9db85bbfea80faaf531d49639e715..182c606adb060ff962f6f34e4bd76afb9ab63b50 100644 (file)
@@ -160,7 +160,10 @@ int walk_dynamic_memory_groups(int nid, walk_memory_groups_func_t func,
 #define register_hotmemory_notifier(nb)                register_memory_notifier(nb)
 #define unregister_hotmemory_notifier(nb)      unregister_memory_notifier(nb)
 #else
-#define hotplug_memory_notifier(fn, pri)       ({ 0; })
+static inline int hotplug_memory_notifier(notifier_fn_t fn, int pri)
+{
+       return 0;
+}
 /* These aren't inline functions due to a GCC bug. */
 #define register_hotmemory_notifier(nb)    ({ (void)(nb); 0; })
 #define unregister_hotmemory_notifier(nb)  ({ (void)(nb); })
index 21c3771e6a56b0cc4304370ebf9673273a0cf7dc..988528b5da438fa2e96f4166446361ee58958e02 100644 (file)
@@ -23,7 +23,7 @@ static inline bool page_is_secretmem(struct page *page)
        mapping = (struct address_space *)
                ((unsigned long)page->mapping & ~PAGE_MAPPING_FLAGS);
 
-       if (mapping != page->mapping)
+       if (!mapping || mapping != page->mapping)
                return false;
 
        return mapping->a_ops == &secretmem_aops;
index 5e9ef0fc261e9747717f112a53bafd8f96df7393..92192cb086c79a2f675e9b412b938ada20cb216f 100644 (file)
@@ -2700,12 +2700,14 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
                if (mapping) {
                        int nr = thp_nr_pages(head);
 
-                       if (PageSwapBacked(head))
+                       if (PageSwapBacked(head)) {
                                __mod_lruvec_page_state(head, NR_SHMEM_THPS,
                                                        -nr);
-                       else
+                       } else {
                                __mod_lruvec_page_state(head, NR_FILE_THPS,
                                                        -nr);
+                               filemap_nr_thps_dec(mapping);
+                       }
                }
 
                __split_huge_page(page, list, end);
index 5c3503c98b2f113ca3de1f0a76e863a316d5c8cd..b91df5cf54d346c59e856c3dd7fba6dc663ee39f 100644 (file)
@@ -1692,7 +1692,7 @@ void __init memblock_cap_memory_range(phys_addr_t base, phys_addr_t size)
        if (!size)
                return;
 
-       if (memblock.memory.cnt <= 1) {
+       if (!memblock_memory->total_size) {
                pr_warn("%s: No memory registered yet\n", __func__);
                return;
        }
index 1592b081c58ef6dd63c6f075ad24722f2be7cb5d..d12e0608fced235dc9137d0628437046299c7cfc 100644 (file)
@@ -856,16 +856,6 @@ static long do_set_mempolicy(unsigned short mode, unsigned short flags,
                goto out;
        }
 
-       if (flags & MPOL_F_NUMA_BALANCING) {
-               if (new && new->mode == MPOL_BIND) {
-                       new->flags |= (MPOL_F_MOF | MPOL_F_MORON);
-               } else {
-                       ret = -EINVAL;
-                       mpol_put(new);
-                       goto out;
-               }
-       }
-
        ret = mpol_set_nodemask(new, nodes, scratch);
        if (ret) {
                mpol_put(new);
@@ -1458,7 +1448,11 @@ static inline int sanitize_mpol_flags(int *mode, unsigned short *flags)
                return -EINVAL;
        if ((*flags & MPOL_F_STATIC_NODES) && (*flags & MPOL_F_RELATIVE_NODES))
                return -EINVAL;
-
+       if (*flags & MPOL_F_NUMA_BALANCING) {
+               if (*mode != MPOL_BIND)
+                       return -EINVAL;
+               *flags |= (MPOL_F_MOF | MPOL_F_MORON);
+       }
        return 0;
 }
 
index a6a7743ee98f002c7f0ff3d410a2f99392c321cd..1852d787e6ab697d35c9e5a6992d2634f8695720 100644 (file)
@@ -3066,7 +3066,7 @@ void migrate_vma_finalize(struct migrate_vma *migrate)
 EXPORT_SYMBOL(migrate_vma_finalize);
 #endif /* CONFIG_DEVICE_PRIVATE */
 
-#if defined(CONFIG_MEMORY_HOTPLUG)
+#if defined(CONFIG_HOTPLUG_CPU)
 /* Disable reclaim-based migration. */
 static void __disable_all_migrate_targets(void)
 {
@@ -3208,25 +3208,6 @@ static void set_migration_target_nodes(void)
        put_online_mems();
 }
 
-/*
- * React to hotplug events that might affect the migration targets
- * like events that online or offline NUMA nodes.
- *
- * The ordering is also currently dependent on which nodes have
- * CPUs.  That means we need CPU on/offline notification too.
- */
-static int migration_online_cpu(unsigned int cpu)
-{
-       set_migration_target_nodes();
-       return 0;
-}
-
-static int migration_offline_cpu(unsigned int cpu)
-{
-       set_migration_target_nodes();
-       return 0;
-}
-
 /*
  * This leaves migrate-on-reclaim transiently disabled between
  * the MEM_GOING_OFFLINE and MEM_OFFLINE events.  This runs
@@ -3239,8 +3220,18 @@ static int migration_offline_cpu(unsigned int cpu)
  * set_migration_target_nodes().
  */
 static int __meminit migrate_on_reclaim_callback(struct notifier_block *self,
-                                                unsigned long action, void *arg)
+                                                unsigned long action, void *_arg)
 {
+       struct memory_notify *arg = _arg;
+
+       /*
+        * Only update the node migration order when a node is
+        * changing status, like online->offline.  This avoids
+        * the overhead of synchronize_rcu() in most cases.
+        */
+       if (arg->status_change_nid < 0)
+               return notifier_from_errno(0);
+
        switch (action) {
        case MEM_GOING_OFFLINE:
                /*
@@ -3274,13 +3265,31 @@ static int __meminit migrate_on_reclaim_callback(struct notifier_block *self,
        return notifier_from_errno(0);
 }
 
+/*
+ * React to hotplug events that might affect the migration targets
+ * like events that online or offline NUMA nodes.
+ *
+ * The ordering is also currently dependent on which nodes have
+ * CPUs.  That means we need CPU on/offline notification too.
+ */
+static int migration_online_cpu(unsigned int cpu)
+{
+       set_migration_target_nodes();
+       return 0;
+}
+
+static int migration_offline_cpu(unsigned int cpu)
+{
+       set_migration_target_nodes();
+       return 0;
+}
+
 static int __init migrate_on_reclaim_init(void)
 {
        int ret;
 
-       ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "migrate on reclaim",
-                               migration_online_cpu,
-                               migration_offline_cpu);
+       ret = cpuhp_setup_state_nocalls(CPUHP_MM_DEMOTION_DEAD, "mm/demotion:offline",
+                                       NULL, migration_offline_cpu);
        /*
         * In the unlikely case that this fails, the automatic
         * migration targets may become suboptimal for nodes
@@ -3288,9 +3297,12 @@ static int __init migrate_on_reclaim_init(void)
         * rare case, do not bother trying to do anything special.
         */
        WARN_ON(ret < 0);
+       ret = cpuhp_setup_state(CPUHP_AP_MM_DEMOTION_ONLINE, "mm/demotion:online",
+                               migration_online_cpu, NULL);
+       WARN_ON(ret < 0);
 
        hotplug_memory_notifier(migrate_on_reclaim_callback, 100);
        return 0;
 }
 late_initcall(migrate_on_reclaim_init);
-#endif /* CONFIG_MEMORY_HOTPLUG */
+#endif /* CONFIG_HOTPLUG_CPU */
index dfb91653d359eecb15cc57f5e36d702d3b4b13a9..2a52fd9ed464aa7d780dc50440b7f09989ad15ce 100644 (file)
@@ -269,7 +269,7 @@ static int __meminit init_section_page_ext(unsigned long pfn, int nid)
        total_usage += table_size;
        return 0;
 }
-#ifdef CONFIG_MEMORY_HOTPLUG
+
 static void free_page_ext(void *addr)
 {
        if (is_vmalloc_addr(addr)) {
@@ -374,8 +374,6 @@ static int __meminit page_ext_callback(struct notifier_block *self,
        return notifier_from_errno(ret);
 }
 
-#endif
-
 void __init page_ext_init(void)
 {
        unsigned long pfn;
index d0f725637663000ce14259df2e16b7f08cb7e504..874b3f8fe80da2525e8f197a8543f63c9a665543 100644 (file)
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -1095,7 +1095,7 @@ static int slab_offline_cpu(unsigned int cpu)
        return 0;
 }
 
-#if defined(CONFIG_NUMA) && defined(CONFIG_MEMORY_HOTPLUG)
+#if defined(CONFIG_NUMA)
 /*
  * Drains freelist for a node on each slab cache, used for memory hot-remove.
  * Returns -EBUSY if all objects cannot be drained so that the node is not
@@ -1157,7 +1157,7 @@ static int __meminit slab_memory_callback(struct notifier_block *self,
 out:
        return notifier_from_errno(ret);
 }
-#endif /* CONFIG_NUMA && CONFIG_MEMORY_HOTPLUG */
+#endif /* CONFIG_NUMA */
 
 /*
  * swap the static kmem_cache_node with kmalloced memory
index 3d2025f7163b295c45c2b26a8c5260414b2fc238..d8f77346376d871666ddf57f11db08b9c74f871d 100644 (file)
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1701,7 +1701,8 @@ static __always_inline bool slab_free_hook(struct kmem_cache *s,
 }
 
 static inline bool slab_free_freelist_hook(struct kmem_cache *s,
-                                          void **head, void **tail)
+                                          void **head, void **tail,
+                                          int *cnt)
 {
 
        void *object;
@@ -1728,6 +1729,12 @@ static inline bool slab_free_freelist_hook(struct kmem_cache *s,
                        *head = object;
                        if (!*tail)
                                *tail = object;
+               } else {
+                       /*
+                        * Adjust the reconstructed freelist depth
+                        * accordingly if object's reuse is delayed.
+                        */
+                       --(*cnt);
                }
        } while (object != old_tail);
 
@@ -3413,7 +3420,9 @@ static __always_inline void do_slab_free(struct kmem_cache *s,
        struct kmem_cache_cpu *c;
        unsigned long tid;
 
-       memcg_slab_free_hook(s, &head, 1);
+       /* memcg_slab_free_hook() is already called for bulk free. */
+       if (!tail)
+               memcg_slab_free_hook(s, &head, 1);
 redo:
        /*
         * Determine the currently cpus per cpu slab.
@@ -3480,7 +3489,7 @@ static __always_inline void slab_free(struct kmem_cache *s, struct page *page,
         * With KASAN enabled slab_free_freelist_hook modifies the freelist
         * to remove objects, whose reuse must be delayed.
         */
-       if (slab_free_freelist_hook(s, &head, &tail))
+       if (slab_free_freelist_hook(s, &head, &tail, &cnt))
                do_slab_free(s, page, head, tail, cnt, addr);
 }
 
@@ -4203,8 +4212,8 @@ static int kmem_cache_open(struct kmem_cache *s, slab_flags_t flags)
        if (alloc_kmem_cache_cpus(s))
                return 0;
 
-       free_kmem_cache_nodes(s);
 error:
+       __kmem_cache_release(s);
        return -EINVAL;
 }
 
@@ -4880,13 +4889,15 @@ int __kmem_cache_create(struct kmem_cache *s, slab_flags_t flags)
                return 0;
 
        err = sysfs_slab_add(s);
-       if (err)
+       if (err) {
                __kmem_cache_release(s);
+               return err;
+       }
 
        if (s->flags & SLAB_STORE_USER)
                debugfs_slab_add(s);
 
-       return err;
+       return 0;
 }
 
 void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, unsigned long caller)
@@ -6108,9 +6119,14 @@ static int slab_debug_trace_open(struct inode *inode, struct file *filep)
        struct kmem_cache *s = file_inode(filep)->i_private;
        unsigned long *obj_map;
 
+       if (!t)
+               return -ENOMEM;
+
        obj_map = bitmap_alloc(oo_objects(s->oo), GFP_KERNEL);
-       if (!obj_map)
+       if (!obj_map) {
+               seq_release_private(inode, filep);
                return -ENOMEM;
+       }
 
        if (strcmp(filep->f_path.dentry->d_name.name, "alloc_traces") == 0)
                alloc = TRACK_ALLOC;
@@ -6119,6 +6135,7 @@ static int slab_debug_trace_open(struct inode *inode, struct file *filep)
 
        if (!alloc_loc_track(t, PAGE_SIZE / sizeof(struct location), GFP_KERNEL)) {
                bitmap_free(obj_map);
+               seq_release_private(inode, filep);
                return -ENOMEM;
        }
 
index 10ab56c2484ae6211e30b6e58b389bcb56130380..60aa1a4fc69b63e165d241b4c14bce5f20c8a13d 100644 (file)
@@ -414,9 +414,6 @@ static void uffd_test_ctx_init_ext(uint64_t *features)
        uffd_test_ops->allocate_area((void **)&area_src);
        uffd_test_ops->allocate_area((void **)&area_dst);
 
-       uffd_test_ops->release_pages(area_src);
-       uffd_test_ops->release_pages(area_dst);
-
        userfaultfd_open(features);
 
        count_verify = malloc(nr_pages * sizeof(unsigned long long));
@@ -437,6 +434,26 @@ static void uffd_test_ctx_init_ext(uint64_t *features)
                *(area_count(area_src, nr) + 1) = 1;
        }
 
+       /*
+        * After initialization of area_src, we must explicitly release pages
+        * for area_dst to make sure it's fully empty.  Otherwise we could have
+        * some area_dst pages be errornously initialized with zero pages,
+        * hence we could hit memory corruption later in the test.
+        *
+        * One example is when THP is globally enabled, above allocate_area()
+        * calls could have the two areas merged into a single VMA (as they
+        * will have the same VMA flags so they're mergeable).  When we
+        * initialize the area_src above, it's possible that some part of
+        * area_dst could have been faulted in via one huge THP that will be
+        * shared between area_src and area_dst.  It could cause some of the
+        * area_dst won't be trapped by missing userfaults.
+        *
+        * This release_pages() will guarantee even if that happened, we'll
+        * proactively split the thp and drop any accidentally initialized
+        * pages within area_dst.
+        */
+       uffd_test_ops->release_pages(area_dst);
+
        pipefd = malloc(sizeof(int) * nr_cpus * 2);
        if (!pipefd)
                err("pipefd");