Merge branch 'akpm' (patches from Andrew)

author Linus Torvalds <torvalds@linux-foundation.org>

Tue, 19 Oct 2021 15:41:36 +0000 (05:41 -1000)

committer Linus Torvalds <torvalds@linux-foundation.org>

Tue, 19 Oct 2021 15:41:36 +0000 (05:41 -1000)
author Linus Torvalds <torvalds@linux-foundation.org>
Tue, 19 Oct 2021 15:41:36 +0000 (05:41 -1000)
committer Linus Torvalds <torvalds@linux-foundation.org>
Tue, 19 Oct 2021 15:41:36 +0000 (05:41 -1000)
diff --git a/.mailmap b/.mailmap

index 6e849110cb4e3ad65e2b01eb1b7d2b6fe96c3215..90e614d2bf7e3dd34c1718f89e4d7756e639fb30 100644 (file)
--- a/.mailmap
+++ b/.mailmap
@@ -33,6 +33,8 @@ Al Viro <viro@zenIV.linux.org.uk>
  Andi Kleen <ak@linux.intel.com> <ak@suse.de>
  Andi Shyti <andi@etezian.org> <andi.shyti@samsung.com>
  Andreas Herrmann <aherrman@de.ibm.com>
+Andrej Shadura <andrew.shadura@collabora.co.uk>
+Andrej Shadura <andrew@shadura.me> <andrew@beldisplaytech.com>
  Andrew Morton <akpm@linux-foundation.org>
  Andrew Murray <amurray@thegoodpenguin.co.uk> <amurray@embedded-bits.co.uk>
  Andrew Murray <amurray@thegoodpenguin.co.uk> <andrew.murray@arm.com>
diff --git a/fs/kernel_read_file.c b/fs/kernel_read_file.c

index 87aac4c72c37da2f82cad443279d66f0b5a2cf8c..1b07550485b96415f0a79a7029b10bf5d78c643e 100644 (file)
--- a/fs/kernel_read_file.c
+++ b/fs/kernel_read_file.c
@@ -178,7 +178,7 @@ int kernel_read_file_from_fd(int fd, loff_t offset, void **buf,
         struct fd f = fdget(fd);
         int ret = -EBADF;
  
-       if (!f.file)
+       if (!f.file || !(f.file->f_mode & FMODE_READ))
                 goto out;
  
         ret = kernel_read_file(f.file, offset, buf, buf_size, file_size, id);
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c

index f1cc8258d34a4ba7200cdc4dfe514d9ab671b9f2..5d9ae17bd443f209ce4098e1bc30ef737cfca9bd 100644 (file)
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -7045,7 +7045,7 @@ void ocfs2_set_inode_data_inline(struct inode *inode, struct ocfs2_dinode *di)
  int ocfs2_convert_inline_data_to_extents(struct inode *inode,
                                          struct buffer_head *di_bh)
  {
-       int ret, i, has_data, num_pages = 0;
+       int ret, has_data, num_pages = 0;
         int need_free = 0;
         u32 bit_off, num;
         handle_t *handle;
@@ -7054,26 +7054,17 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode,
         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
         struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
         struct ocfs2_alloc_context *data_ac = NULL;
-       struct page **pages = NULL;
-       loff_t end = osb->s_clustersize;
+       struct page *page = NULL;
         struct ocfs2_extent_tree et;
         int did_quota = 0;
  
         has_data = i_size_read(inode) ? 1 : 0;
  
         if (has_data) {
-               pages = kcalloc(ocfs2_pages_per_cluster(osb->sb),
-                               sizeof(struct page *), GFP_NOFS);
-               if (pages == NULL) {
-                       ret = -ENOMEM;
-                       mlog_errno(ret);
-                       return ret;
-               }
-
                 ret = ocfs2_reserve_clusters(osb, 1, &data_ac);
                 if (ret) {
                         mlog_errno(ret);
-                       goto free_pages;
+                       goto out;
                 }
         }
  
@@ -7093,7 +7084,8 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode,
         }
  
         if (has_data) {
-               unsigned int page_end;
+               unsigned int page_end = min_t(unsigned, PAGE_SIZE,
+                                                       osb->s_clustersize);
                 u64 phys;
  
                 ret = dquot_alloc_space_nodirty(inode,
@@ -7117,15 +7109,8 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode,
                  */
                 block = phys = ocfs2_clusters_to_blocks(inode->i_sb, bit_off);
  
-               /*
-                * Non sparse file systems zero on extend, so no need
-                * to do that now.
-                */
-               if (!ocfs2_sparse_alloc(osb) &&
-                   PAGE_SIZE < osb->s_clustersize)
-                       end = PAGE_SIZE;
-
-               ret = ocfs2_grab_eof_pages(inode, 0, end, pages, &num_pages);
+               ret = ocfs2_grab_eof_pages(inode, 0, page_end, &page,
+                                          &num_pages);
                 if (ret) {
                         mlog_errno(ret);
                         need_free = 1;
@@ -7136,20 +7121,15 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode,
                  * This should populate the 1st page for us and mark
                  * it up to date.
                  */
-               ret = ocfs2_read_inline_data(inode, pages[0], di_bh);
+               ret = ocfs2_read_inline_data(inode, page, di_bh);
                 if (ret) {
                         mlog_errno(ret);
                         need_free = 1;
                         goto out_unlock;
                 }
  
-               page_end = PAGE_SIZE;
-               if (PAGE_SIZE > osb->s_clustersize)
-                       page_end = osb->s_clustersize;
-
-               for (i = 0; i < num_pages; i++)
-                       ocfs2_map_and_dirty_page(inode, handle, 0, page_end,
-                                                pages[i], i > 0, &phys);
+               ocfs2_map_and_dirty_page(inode, handle, 0, page_end, page, 0,
+                                        &phys);
         }
  
         spin_lock(&oi->ip_lock);
@@ -7180,8 +7160,8 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode,
         }
  
  out_unlock:
-       if (pages)
-               ocfs2_unlock_and_free_pages(pages, num_pages);
+       if (page)
+               ocfs2_unlock_and_free_pages(&page, num_pages);
  
  out_commit:
         if (ret < 0 && did_quota)
@@ -7205,8 +7185,6 @@ out_commit:
  out:
         if (data_ac)
                 ocfs2_free_alloc_context(data_ac);
-free_pages:
-       kfree(pages);
         return ret;
  }
  
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c

index c86bd4e60e207dc5765164b6b9bd5cbf7b5c18c9..5c914ce9b3ac95636296e308cc2f61400f41ef55 100644 (file)
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -2167,11 +2167,17 @@ static int ocfs2_initialize_super(struct super_block *sb,
         }
  
         if (ocfs2_clusterinfo_valid(osb)) {
+               /*
+                * ci_stack and ci_cluster in ocfs2_cluster_info may not be null
+                * terminated, so make sure no overflow happens here by using
+                * memcpy. Destination strings will always be null terminated
+                * because osb is allocated using kzalloc.
+                */
                 osb->osb_stackflags =
                         OCFS2_RAW_SB(di)->s_cluster_info.ci_stackflags;
-               strlcpy(osb->osb_cluster_stack,
+               memcpy(osb->osb_cluster_stack,
                        OCFS2_RAW_SB(di)->s_cluster_info.ci_stack,
-                      OCFS2_STACK_LABEL_LEN + 1);
+                      OCFS2_STACK_LABEL_LEN);
                 if (strlen(osb->osb_cluster_stack) != OCFS2_STACK_LABEL_LEN) {
                         mlog(ML_ERROR,
                              "couldn't mount because of an invalid "
@@ -2180,9 +2186,9 @@ static int ocfs2_initialize_super(struct super_block *sb,
                         status = -EINVAL;
                         goto bail;
                 }
-               strlcpy(osb->osb_cluster_name,
+               memcpy(osb->osb_cluster_name,
                         OCFS2_RAW_SB(di)->s_cluster_info.ci_cluster,
-                       OCFS2_CLUSTER_NAME_LEN + 1);
+                       OCFS2_CLUSTER_NAME_LEN);
         } else {
                 /* The empty string is identical with classic tools that
                  * don't know about s_cluster_info. */
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c

index 003f0d31743eb3cd95de8c8da21a4d0d731daf73..22bf14ab2d163b189ba3f9a3fedc5fe53ec7bccb 100644 (file)
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -1827,9 +1827,15 @@ static int userfaultfd_writeprotect(struct userfaultfd_ctx *ctx,
         if (mode_wp && mode_dontwake)
                 return -EINVAL;
  
-       ret = mwriteprotect_range(ctx->mm, uffdio_wp.range.start,
-                                 uffdio_wp.range.len, mode_wp,
-                                 &ctx->mmap_changing);
+       if (mmget_not_zero(ctx->mm)) {
+               ret = mwriteprotect_range(ctx->mm, uffdio_wp.range.start,
+                                         uffdio_wp.range.len, mode_wp,
+                                         &ctx->mmap_changing);
+               mmput(ctx->mm);
+       } else {
+               return -ESRCH;
+       }
+
         if (ret)
                 return ret;
  
diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h

index 832d8a74fa5960becb611c7e7adaec760c88c495..991911048857a8710a4ec3d2bd1c9f0faeccb6ee 100644 (file)
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -72,6 +72,8 @@ enum cpuhp_state {
         CPUHP_SLUB_DEAD,
         CPUHP_DEBUG_OBJ_DEAD,
         CPUHP_MM_WRITEBACK_DEAD,
+       /* Must be after CPUHP_MM_VMSTAT_DEAD */
+       CPUHP_MM_DEMOTION_DEAD,
         CPUHP_MM_VMSTAT_DEAD,
         CPUHP_SOFTIRQ_DEAD,
         CPUHP_NET_MVNETA_DEAD,
@@ -240,6 +242,8 @@ enum cpuhp_state {
         CPUHP_AP_BASE_CACHEINFO_ONLINE,
         CPUHP_AP_ONLINE_DYN,
         CPUHP_AP_ONLINE_DYN_END         = CPUHP_AP_ONLINE_DYN + 30,
+       /* Must be after CPUHP_AP_ONLINE_DYN for node_states[N_CPU] update */
+       CPUHP_AP_MM_DEMOTION_ONLINE,
         CPUHP_AP_X86_HPET_ONLINE,
         CPUHP_AP_X86_KVM_CLK_ONLINE,
         CPUHP_AP_DTPM_CPU_ONLINE,
diff --git a/include/linux/elfcore.h b/include/linux/elfcore.h

index 2aaa15779d50bb723b6c5a10922798c0a3bf3395..957ebec35aad01e73a70f464925b5af27becbf87 100644 (file)
--- a/include/linux/elfcore.h
+++ b/include/linux/elfcore.h
@@ -109,7 +109,7 @@ static inline int elf_core_copy_task_fpregs(struct task_struct *t, struct pt_reg
  #endif
  }
  
-#if defined(CONFIG_UM) || defined(CONFIG_IA64)
+#if (defined(CONFIG_UML) && defined(CONFIG_X86_32)) || defined(CONFIG_IA64)
  /*
   * These functions parameterize elf_core_dump in fs/binfmt_elf.c to write out
   * extra segments containing the gate DSO contents.  Dumping its
diff --git a/include/linux/memory.h b/include/linux/memory.h

index 7efc0a7c14c9db85bbfea80faaf531d49639e715..182c606adb060ff962f6f34e4bd76afb9ab63b50 100644 (file)
--- a/include/linux/memory.h
+++ b/include/linux/memory.h
@@ -160,7 +160,10 @@ int walk_dynamic_memory_groups(int nid, walk_memory_groups_func_t func,
  #define register_hotmemory_notifier(nb)                register_memory_notifier(nb)
  #define unregister_hotmemory_notifier(nb)      unregister_memory_notifier(nb)
  #else
-#define hotplug_memory_notifier(fn, pri)       ({ 0; })
+static inline int hotplug_memory_notifier(notifier_fn_t fn, int pri)
+{
+       return 0;
+}
  /* These aren't inline functions due to a GCC bug. */
  #define register_hotmemory_notifier(nb)    ({ (void)(nb); 0; })
  #define unregister_hotmemory_notifier(nb)  ({ (void)(nb); })
diff --git a/include/linux/secretmem.h b/include/linux/secretmem.h

index 21c3771e6a56b0cc4304370ebf9673273a0cf7dc..988528b5da438fa2e96f4166446361ee58958e02 100644 (file)
--- a/include/linux/secretmem.h
+++ b/include/linux/secretmem.h
@@ -23,7 +23,7 @@ static inline bool page_is_secretmem(struct page *page)
         mapping = (struct address_space *)
                 ((unsigned long)page->mapping & ~PAGE_MAPPING_FLAGS);
  
-       if (mapping != page->mapping)
+       if (!mapping || mapping != page->mapping)
                 return false;
  
         return mapping->a_ops == &secretmem_aops;
diff --git a/mm/huge_memory.c b/mm/huge_memory.c

index 5e9ef0fc261e9747717f112a53bafd8f96df7393..92192cb086c79a2f675e9b412b938ada20cb216f 100644 (file)
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -2700,12 +2700,14 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
                 if (mapping) {
                         int nr = thp_nr_pages(head);
  
-                       if (PageSwapBacked(head))
+                       if (PageSwapBacked(head)) {
                                 __mod_lruvec_page_state(head, NR_SHMEM_THPS,
                                                         -nr);
-                       else
+                       } else {
                                 __mod_lruvec_page_state(head, NR_FILE_THPS,
                                                         -nr);
+                               filemap_nr_thps_dec(mapping);
+                       }
                 }
  
                 __split_huge_page(page, list, end);
diff --git a/mm/memblock.c b/mm/memblock.c

index 5c3503c98b2f113ca3de1f0a76e863a316d5c8cd..b91df5cf54d346c59e856c3dd7fba6dc663ee39f 100644 (file)
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -1692,7 +1692,7 @@ void __init memblock_cap_memory_range(phys_addr_t base, phys_addr_t size)
         if (!size)
                 return;
  
-       if (memblock.memory.cnt <= 1) {
+       if (!memblock_memory->total_size) {
                 pr_warn("%s: No memory registered yet\n", __func__);
                 return;
         }
diff --git a/mm/mempolicy.c b/mm/mempolicy.c

index 1592b081c58ef6dd63c6f075ad24722f2be7cb5d..d12e0608fced235dc9137d0628437046299c7cfc 100644 (file)
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -856,16 +856,6 @@ static long do_set_mempolicy(unsigned short mode, unsigned short flags,
                 goto out;
         }
  
-       if (flags & MPOL_F_NUMA_BALANCING) {
-               if (new && new->mode == MPOL_BIND) {
-                       new->flags |= (MPOL_F_MOF | MPOL_F_MORON);
-               } else {
-                       ret = -EINVAL;
-                       mpol_put(new);
-                       goto out;
-               }
-       }
-
         ret = mpol_set_nodemask(new, nodes, scratch);
         if (ret) {
                 mpol_put(new);
@@ -1458,7 +1448,11 @@ static inline int sanitize_mpol_flags(int *mode, unsigned short *flags)
                 return -EINVAL;
         if ((*flags & MPOL_F_STATIC_NODES) && (*flags & MPOL_F_RELATIVE_NODES))
                 return -EINVAL;
-
+       if (*flags & MPOL_F_NUMA_BALANCING) {
+               if (*mode != MPOL_BIND)
+                       return -EINVAL;
+               *flags |= (MPOL_F_MOF | MPOL_F_MORON);
+       }
         return 0;
  }
  
diff --git a/mm/migrate.c b/mm/migrate.c

index a6a7743ee98f002c7f0ff3d410a2f99392c321cd..1852d787e6ab697d35c9e5a6992d2634f8695720 100644 (file)
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -3066,7 +3066,7 @@ void migrate_vma_finalize(struct migrate_vma *migrate)
  EXPORT_SYMBOL(migrate_vma_finalize);
  #endif /* CONFIG_DEVICE_PRIVATE */
  
-#if defined(CONFIG_MEMORY_HOTPLUG)
+#if defined(CONFIG_HOTPLUG_CPU)
  /* Disable reclaim-based migration. */
  static void __disable_all_migrate_targets(void)
  {
@@ -3208,25 +3208,6 @@ static void set_migration_target_nodes(void)
         put_online_mems();
  }
  
-/*
- * React to hotplug events that might affect the migration targets
- * like events that online or offline NUMA nodes.
- *
- * The ordering is also currently dependent on which nodes have
- * CPUs.  That means we need CPU on/offline notification too.
- */
-static int migration_online_cpu(unsigned int cpu)
-{
-       set_migration_target_nodes();
-       return 0;
-}
-
-static int migration_offline_cpu(unsigned int cpu)
-{
-       set_migration_target_nodes();
-       return 0;
-}
-
  /*
   * This leaves migrate-on-reclaim transiently disabled between
   * the MEM_GOING_OFFLINE and MEM_OFFLINE events.  This runs
@@ -3239,8 +3220,18 @@ static int migration_offline_cpu(unsigned int cpu)
   * set_migration_target_nodes().
   */
  static int __meminit migrate_on_reclaim_callback(struct notifier_block *self,
-                                                unsigned long action, void *arg)
+                                                unsigned long action, void *_arg)
  {
+       struct memory_notify *arg = _arg;
+
+       /*
+        * Only update the node migration order when a node is
+        * changing status, like online->offline.  This avoids
+        * the overhead of synchronize_rcu() in most cases.
+        */
+       if (arg->status_change_nid < 0)
+               return notifier_from_errno(0);
+
         switch (action) {
         case MEM_GOING_OFFLINE:
                 /*
@@ -3274,13 +3265,31 @@ static int __meminit migrate_on_reclaim_callback(struct notifier_block *self,
         return notifier_from_errno(0);
  }
  
+/*
+ * React to hotplug events that might affect the migration targets
+ * like events that online or offline NUMA nodes.
+ *
+ * The ordering is also currently dependent on which nodes have
+ * CPUs.  That means we need CPU on/offline notification too.
+ */
+static int migration_online_cpu(unsigned int cpu)
+{
+       set_migration_target_nodes();
+       return 0;
+}
+
+static int migration_offline_cpu(unsigned int cpu)
+{
+       set_migration_target_nodes();
+       return 0;
+}
+
  static int __init migrate_on_reclaim_init(void)
  {
         int ret;
  
-       ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "migrate on reclaim",
-                               migration_online_cpu,
-                               migration_offline_cpu);
+       ret = cpuhp_setup_state_nocalls(CPUHP_MM_DEMOTION_DEAD, "mm/demotion:offline",
+                                       NULL, migration_offline_cpu);
         /*
          * In the unlikely case that this fails, the automatic
          * migration targets may become suboptimal for nodes
@@ -3288,9 +3297,12 @@ static int __init migrate_on_reclaim_init(void)
          * rare case, do not bother trying to do anything special.
          */
         WARN_ON(ret < 0);
+       ret = cpuhp_setup_state(CPUHP_AP_MM_DEMOTION_ONLINE, "mm/demotion:online",
+                               migration_online_cpu, NULL);
+       WARN_ON(ret < 0);
  
         hotplug_memory_notifier(migrate_on_reclaim_callback, 100);
         return 0;
  }
  late_initcall(migrate_on_reclaim_init);
-#endif /* CONFIG_MEMORY_HOTPLUG */
+#endif /* CONFIG_HOTPLUG_CPU */
diff --git a/mm/page_ext.c b/mm/page_ext.c

index dfb91653d359eecb15cc57f5e36d702d3b4b13a9..2a52fd9ed464aa7d780dc50440b7f09989ad15ce 100644 (file)
--- a/mm/page_ext.c
+++ b/mm/page_ext.c
@@ -269,7 +269,7 @@ static int __meminit init_section_page_ext(unsigned long pfn, int nid)
         total_usage += table_size;
         return 0;
  }
-#ifdef CONFIG_MEMORY_HOTPLUG
+
  static void free_page_ext(void *addr)
  {
         if (is_vmalloc_addr(addr)) {
@@ -374,8 +374,6 @@ static int __meminit page_ext_callback(struct notifier_block *self,
         return notifier_from_errno(ret);
  }
  
-#endif
-
  void __init page_ext_init(void)
  {
         unsigned long pfn;
diff --git a/mm/slab.c b/mm/slab.c

index d0f725637663000ce14259df2e16b7f08cb7e504..874b3f8fe80da2525e8f197a8543f63c9a665543 100644 (file)
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -1095,7 +1095,7 @@ static int slab_offline_cpu(unsigned int cpu)
         return 0;
  }
  
-#if defined(CONFIG_NUMA) && defined(CONFIG_MEMORY_HOTPLUG)
+#if defined(CONFIG_NUMA)
  /*
   * Drains freelist for a node on each slab cache, used for memory hot-remove.
   * Returns -EBUSY if all objects cannot be drained so that the node is not
@@ -1157,7 +1157,7 @@ static int __meminit slab_memory_callback(struct notifier_block *self,
  out:
         return notifier_from_errno(ret);
  }
-#endif /* CONFIG_NUMA && CONFIG_MEMORY_HOTPLUG */
+#endif /* CONFIG_NUMA */
  
  /*
   * swap the static kmem_cache_node with kmalloced memory
diff --git a/mm/slub.c b/mm/slub.c

index 3d2025f7163b295c45c2b26a8c5260414b2fc238..d8f77346376d871666ddf57f11db08b9c74f871d 100644 (file)
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1701,7 +1701,8 @@ static __always_inline bool slab_free_hook(struct kmem_cache *s,
  }
  
  static inline bool slab_free_freelist_hook(struct kmem_cache *s,
-                                          void **head, void **tail)
+                                          void **head, void **tail,
+                                          int *cnt)
  {
  
         void *object;
@@ -1728,6 +1729,12 @@ static inline bool slab_free_freelist_hook(struct kmem_cache *s,
                         *head = object;
                         if (!*tail)
                                 *tail = object;
+               } else {
+                       /*
+                        * Adjust the reconstructed freelist depth
+                        * accordingly if object's reuse is delayed.
+                        */
+                       --(*cnt);
                 }
         } while (object != old_tail);
  
@@ -3413,7 +3420,9 @@ static __always_inline void do_slab_free(struct kmem_cache *s,
         struct kmem_cache_cpu *c;
         unsigned long tid;
  
-       memcg_slab_free_hook(s, &head, 1);
+       /* memcg_slab_free_hook() is already called for bulk free. */
+       if (!tail)
+               memcg_slab_free_hook(s, &head, 1);
  redo:
         /*
          * Determine the currently cpus per cpu slab.
@@ -3480,7 +3489,7 @@ static __always_inline void slab_free(struct kmem_cache *s, struct page *page,
          * With KASAN enabled slab_free_freelist_hook modifies the freelist
          * to remove objects, whose reuse must be delayed.
          */
-       if (slab_free_freelist_hook(s, &head, &tail))
+       if (slab_free_freelist_hook(s, &head, &tail, &cnt))
                 do_slab_free(s, page, head, tail, cnt, addr);
  }
  
@@ -4203,8 +4212,8 @@ static int kmem_cache_open(struct kmem_cache *s, slab_flags_t flags)
         if (alloc_kmem_cache_cpus(s))
                 return 0;
  
-       free_kmem_cache_nodes(s);
  error:
+       __kmem_cache_release(s);
         return -EINVAL;
  }
  
@@ -4880,13 +4889,15 @@ int __kmem_cache_create(struct kmem_cache *s, slab_flags_t flags)
                 return 0;
  
         err = sysfs_slab_add(s);
-       if (err)
+       if (err) {
                 __kmem_cache_release(s);
+               return err;
+       }
  
         if (s->flags & SLAB_STORE_USER)
                 debugfs_slab_add(s);
  
-       return err;
+       return 0;
  }
  
  void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, unsigned long caller)
@@ -6108,9 +6119,14 @@ static int slab_debug_trace_open(struct inode *inode, struct file *filep)
         struct kmem_cache *s = file_inode(filep)->i_private;
         unsigned long *obj_map;
  
+       if (!t)
+               return -ENOMEM;
+
         obj_map = bitmap_alloc(oo_objects(s->oo), GFP_KERNEL);
-       if (!obj_map)
+       if (!obj_map) {
+               seq_release_private(inode, filep);
                 return -ENOMEM;
+       }
  
         if (strcmp(filep->f_path.dentry->d_name.name, "alloc_traces") == 0)
                 alloc = TRACK_ALLOC;
@@ -6119,6 +6135,7 @@ static int slab_debug_trace_open(struct inode *inode, struct file *filep)
  
         if (!alloc_loc_track(t, PAGE_SIZE / sizeof(struct location), GFP_KERNEL)) {
                 bitmap_free(obj_map);
+               seq_release_private(inode, filep);
                 return -ENOMEM;
         }
  
diff --git a/tools/testing/selftests/vm/userfaultfd.c b/tools/testing/selftests/vm/userfaultfd.c

index 10ab56c2484ae6211e30b6e58b389bcb56130380..60aa1a4fc69b63e165d241b4c14bce5f20c8a13d 100644 (file)
--- a/tools/testing/selftests/vm/userfaultfd.c
+++ b/tools/testing/selftests/vm/userfaultfd.c
@@ -414,9 +414,6 @@ static void uffd_test_ctx_init_ext(uint64_t *features)
         uffd_test_ops->allocate_area((void **)&area_src);
         uffd_test_ops->allocate_area((void **)&area_dst);
  
-       uffd_test_ops->release_pages(area_src);
-       uffd_test_ops->release_pages(area_dst);
-
         userfaultfd_open(features);
  
         count_verify = malloc(nr_pages * sizeof(unsigned long long));
@@ -437,6 +434,26 @@ static void uffd_test_ctx_init_ext(uint64_t *features)
                 *(area_count(area_src, nr) + 1) = 1;
         }
  
+       /*
+        * After initialization of area_src, we must explicitly release pages
+        * for area_dst to make sure it's fully empty.  Otherwise we could have
+        * some area_dst pages be errornously initialized with zero pages,
+        * hence we could hit memory corruption later in the test.
+        *
+        * One example is when THP is globally enabled, above allocate_area()
+        * calls could have the two areas merged into a single VMA (as they
+        * will have the same VMA flags so they're mergeable).  When we
+        * initialize the area_src above, it's possible that some part of
+        * area_dst could have been faulted in via one huge THP that will be
+        * shared between area_src and area_dst.  It could cause some of the
+        * area_dst won't be trapped by missing userfaults.
+        *
+        * This release_pages() will guarantee even if that happened, we'll
+        * proactively split the thp and drop any accidentally initialized
+        * pages within area_dst.
+        */
+       uffd_test_ops->release_pages(area_dst);
+
         pipefd = malloc(sizeof(int) * nr_cpus * 2);
         if (!pipefd)
                 err("pipefd");
author	Linus Torvalds <torvalds@linux-foundation.org>
	Tue, 19 Oct 2021 15:41:36 +0000 (05:41 -1000)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Tue, 19 Oct 2021 15:41:36 +0000 (05:41 -1000)
.mailmap		patch \| blob \| blame \| history
fs/kernel_read_file.c		patch \| blob \| blame \| history
fs/ocfs2/alloc.c		patch \| blob \| blame \| history
fs/ocfs2/super.c		patch \| blob \| blame \| history
fs/userfaultfd.c		patch \| blob \| blame \| history
include/linux/cpuhotplug.h		patch \| blob \| blame \| history
include/linux/elfcore.h		patch \| blob \| blame \| history
include/linux/memory.h		patch \| blob \| blame \| history
include/linux/secretmem.h		patch \| blob \| blame \| history
mm/huge_memory.c		patch \| blob \| blame \| history
mm/memblock.c		patch \| blob \| blame \| history
mm/mempolicy.c		patch \| blob \| blame \| history
mm/migrate.c		patch \| blob \| blame \| history
mm/page_ext.c		patch \| blob \| blame \| history
mm/slab.c		patch \| blob \| blame \| history
mm/slub.c		patch \| blob \| blame \| history
tools/testing/selftests/vm/userfaultfd.c		patch \| blob \| blame \| history