Merge branch 'akpm' (fixes from Andrew Morton)

author Linus Torvalds <torvalds@linux-foundation.org>

Fri, 14 Nov 2014 00:57:25 +0000 (16:57 -0800)

committer Linus Torvalds <torvalds@linux-foundation.org>

Fri, 14 Nov 2014 00:57:25 +0000 (16:57 -0800)
author Linus Torvalds <torvalds@linux-foundation.org>
Fri, 14 Nov 2014 00:57:25 +0000 (16:57 -0800)
committer Linus Torvalds <torvalds@linux-foundation.org>
Fri, 14 Nov 2014 00:57:25 +0000 (16:57 -0800)
diff --git a/MAINTAINERS b/MAINTAINERS

index ea4d0058fd1b68b7d5e11f5228a4867aa0b96eea..60b1163dba28a4dc880eeda43a1596d37081d469 100644 (file)
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -4716,6 +4716,7 @@ L:        linux-iio@vger.kernel.org
  S:     Maintained
  F:     drivers/iio/
  F:     drivers/staging/iio/
+F:     include/linux/iio/
  
  IKANOS/ADI EAGLE ADSL USB DRIVER
  M:     Matthieu Castet <castet.matthieu@free.fr>
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c

index 2ad0b5bce44be89494d9cb5f75da19b9e459cd10..3920ee45aa5942dd816a775180eeb16f662e804c 100644 (file)
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -560,7 +560,8 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index,
         }
  
         if (page_zero_filled(uncmem)) {
-               kunmap_atomic(user_mem);
+               if (user_mem)
+                       kunmap_atomic(user_mem);
                 /* Free memory associated with this sector now. */
                 bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
                 zram_free_page(zram, index);
diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c

index 9d3e9c50066aaf5856350cf3bc85576a79bab900..89326acd45615e50cc60c09cdd3531137b813138 100644 (file)
--- a/fs/notify/fsnotify.c
+++ b/fs/notify/fsnotify.c
@@ -229,8 +229,16 @@ int fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is,
                                               &fsnotify_mark_srcu);
         }
  
+       /*
+        * We need to merge inode & vfsmount mark lists so that inode mark
+        * ignore masks are properly reflected for mount mark notifications.
+        * That's why this traversal is so complicated...
+        */
         while (inode_node || vfsmount_node) {
-               inode_group = vfsmount_group = NULL;
+               inode_group = NULL;
+               inode_mark = NULL;
+               vfsmount_group = NULL;
+               vfsmount_mark = NULL;
  
                 if (inode_node) {
                         inode_mark = hlist_entry(srcu_dereference(inode_node, &fsnotify_mark_srcu),
@@ -244,21 +252,19 @@ int fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is,
                         vfsmount_group = vfsmount_mark->group;
                 }
  
-               if (inode_group > vfsmount_group) {
-                       /* handle inode */
-                       ret = send_to_group(to_tell, inode_mark, NULL, mask,
-                                           data, data_is, cookie, file_name);
-                       /* we didn't use the vfsmount_mark */
-                       vfsmount_group = NULL;
-               } else if (vfsmount_group > inode_group) {
-                       ret = send_to_group(to_tell, NULL, vfsmount_mark, mask,
-                                           data, data_is, cookie, file_name);
-                       inode_group = NULL;
-               } else {
-                       ret = send_to_group(to_tell, inode_mark, vfsmount_mark,
-                                           mask, data, data_is, cookie,
-                                           file_name);
+               if (inode_group && vfsmount_group) {
+                       int cmp = fsnotify_compare_groups(inode_group,
+                                                         vfsmount_group);
+                       if (cmp > 0) {
+                               inode_group = NULL;
+                               inode_mark = NULL;
+                       } else if (cmp < 0) {
+                               vfsmount_group = NULL;
+                               vfsmount_mark = NULL;
+                       }
                 }
+               ret = send_to_group(to_tell, inode_mark, vfsmount_mark, mask,
+                                   data, data_is, cookie, file_name);
  
                 if (ret && (mask & ALL_FSNOTIFY_PERM_EVENTS))
                         goto out;
diff --git a/fs/notify/fsnotify.h b/fs/notify/fsnotify.h

index 9c0898c4cfe1ce771a8a0832adda51c80225ce59..3b68b0ae0a97cb6beb6b642f3098de5c8aaec4d0 100644 (file)
--- a/fs/notify/fsnotify.h
+++ b/fs/notify/fsnotify.h
@@ -12,6 +12,10 @@ extern void fsnotify_flush_notify(struct fsnotify_group *group);
  /* protects reads of inode and vfsmount marks list */
  extern struct srcu_struct fsnotify_mark_srcu;
  
+/* compare two groups for sorting of marks lists */
+extern int fsnotify_compare_groups(struct fsnotify_group *a,
+                                  struct fsnotify_group *b);
+
  extern void fsnotify_set_inode_mark_mask_locked(struct fsnotify_mark *fsn_mark,
                                                 __u32 mask);
  /* add a mark to an inode */
diff --git a/fs/notify/inode_mark.c b/fs/notify/inode_mark.c

index e8497144b32342437377748f26a616168455339a..dfbf5447eea4cea8fdf664ff5b0232f8a61e68b0 100644 (file)
--- a/fs/notify/inode_mark.c
+++ b/fs/notify/inode_mark.c
@@ -194,6 +194,7 @@ int fsnotify_add_inode_mark(struct fsnotify_mark *mark,
  {
         struct fsnotify_mark *lmark, *last = NULL;
         int ret = 0;
+       int cmp;
  
         mark->flags |= FSNOTIFY_MARK_FLAG_INODE;
  
@@ -219,11 +220,8 @@ int fsnotify_add_inode_mark(struct fsnotify_mark *mark,
                         goto out;
                 }
  
-               if (mark->group->priority < lmark->group->priority)
-                       continue;
-
-               if ((mark->group->priority == lmark->group->priority) &&
-                   (mark->group < lmark->group))
+               cmp = fsnotify_compare_groups(lmark->group, mark->group);
+               if (cmp < 0)
                         continue;
  
                 hlist_add_before_rcu(&mark->i.i_list, &lmark->i.i_list);
diff --git a/fs/notify/mark.c b/fs/notify/mark.c

index d90deaa08e78f6e82cde921f5914000bc9ca4a97..34c38fabf514f1a892e10e2e24ff4a3d252ec000 100644 (file)
--- a/fs/notify/mark.c
+++ b/fs/notify/mark.c
@@ -209,6 +209,42 @@ void fsnotify_set_mark_ignored_mask_locked(struct fsnotify_mark *mark, __u32 mas
         mark->ignored_mask = mask;
  }
  
+/*
+ * Sorting function for lists of fsnotify marks.
+ *
+ * Fanotify supports different notification classes (reflected as priority of
+ * notification group). Events shall be passed to notification groups in
+ * decreasing priority order. To achieve this marks in notification lists for
+ * inodes and vfsmounts are sorted so that priorities of corresponding groups
+ * are descending.
+ *
+ * Furthermore correct handling of the ignore mask requires processing inode
+ * and vfsmount marks of each group together. Using the group address as
+ * further sort criterion provides a unique sorting order and thus we can
+ * merge inode and vfsmount lists of marks in linear time and find groups
+ * present in both lists.
+ *
+ * A return value of 1 signifies that b has priority over a.
+ * A return value of 0 signifies that the two marks have to be handled together.
+ * A return value of -1 signifies that a has priority over b.
+ */
+int fsnotify_compare_groups(struct fsnotify_group *a, struct fsnotify_group *b)
+{
+       if (a == b)
+               return 0;
+       if (!a)
+               return 1;
+       if (!b)
+               return -1;
+       if (a->priority < b->priority)
+               return 1;
+       if (a->priority > b->priority)
+               return -1;
+       if (a < b)
+               return 1;
+       return -1;
+}
+
  /*
   * Attach an initialized mark to a given group and fs object.
   * These marks may be used for the fsnotify backend to determine which
diff --git a/fs/notify/vfsmount_mark.c b/fs/notify/vfsmount_mark.c

index ac851e8376b1931d88adcf4ff5eaa8bd2445a635..faefa72a11ebaacff32569535e69a9005a0d4f0f 100644 (file)
--- a/fs/notify/vfsmount_mark.c
+++ b/fs/notify/vfsmount_mark.c
@@ -153,6 +153,7 @@ int fsnotify_add_vfsmount_mark(struct fsnotify_mark *mark,
         struct mount *m = real_mount(mnt);
         struct fsnotify_mark *lmark, *last = NULL;
         int ret = 0;
+       int cmp;
  
         mark->flags |= FSNOTIFY_MARK_FLAG_VFSMOUNT;
  
@@ -178,11 +179,8 @@ int fsnotify_add_vfsmount_mark(struct fsnotify_mark *mark,
                         goto out;
                 }
  
-               if (mark->group->priority < lmark->group->priority)
-                       continue;
-
-               if ((mark->group->priority == lmark->group->priority) &&
-                   (mark->group < lmark->group))
+               cmp = fsnotify_compare_groups(lmark->group, mark->group);
+               if (cmp < 0)
                         continue;
  
                 hlist_add_before_rcu(&mark->m.m_list, &lmark->m.m_list);
diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h

index 4e2bd4c95b66ff245fa78d64a455267ac646fcff..0995c2de8162c2f6368647503ddda9017a68f6c6 100644 (file)
--- a/include/linux/bootmem.h
+++ b/include/linux/bootmem.h
@@ -46,6 +46,7 @@ extern unsigned long init_bootmem_node(pg_data_t *pgdat,
  extern unsigned long init_bootmem(unsigned long addr, unsigned long memend);
  
  extern unsigned long free_all_bootmem(void);
+extern void reset_node_managed_pages(pg_data_t *pgdat);
  extern void reset_all_zones_managed_pages(void);
  
  extern void free_bootmem_node(pg_data_t *pgdat,
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h

index 48bf12ef6620ccc863c27afc615a2e2e460a6c99..ffe66e381c04237fb54a0447741f39a40d7c5d71 100644 (file)
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -431,6 +431,15 @@ struct zone {
          */
         int                     nr_migrate_reserve_block;
  
+#ifdef CONFIG_MEMORY_ISOLATION
+       /*
+        * Number of isolated pageblock. It is used to solve incorrect
+        * freepage counting problem due to racy retrieving migratetype
+        * of pageblock. Protected by zone->lock.
+        */
+       unsigned long           nr_isolate_pageblock;
+#endif
+
  #ifdef CONFIG_MEMORY_HOTPLUG
         /* see spanned/present_pages for more description */
         seqlock_t               span_seqlock;
diff --git a/include/linux/page-isolation.h b/include/linux/page-isolation.h

index 3fff8e774067904bb73b5817ba471099a80555da..2dc1e1697b451ce678781a55776a15c8934be7a5 100644 (file)
--- a/include/linux/page-isolation.h
+++ b/include/linux/page-isolation.h
@@ -2,6 +2,10 @@
  #define __LINUX_PAGEISOLATION_H
  
  #ifdef CONFIG_MEMORY_ISOLATION
+static inline bool has_isolate_pageblock(struct zone *zone)
+{
+       return zone->nr_isolate_pageblock;
+}
  static inline bool is_migrate_isolate_page(struct page *page)
  {
         return get_pageblock_migratetype(page) == MIGRATE_ISOLATE;
@@ -11,6 +15,10 @@ static inline bool is_migrate_isolate(int migratetype)
         return migratetype == MIGRATE_ISOLATE;
  }
  #else
+static inline bool has_isolate_pageblock(struct zone *zone)
+{
+       return false;
+}
  static inline bool is_migrate_isolate_page(struct page *page)
  {
         return false;
diff --git a/kernel/panic.c b/kernel/panic.c

index d09dc5c32c6740e41a5987cca0252ad86b43bdbd..cf80672b79246dd439f64cea16d3e631e7d35f2f 100644 (file)
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -244,6 +244,7 @@ static const struct tnt tnts[] = {
   *  'I' - Working around severe firmware bug.
   *  'O' - Out-of-tree module has been loaded.
   *  'E' - Unsigned module has been loaded.
+ *  'L' - A soft lockup has previously occurred.
   *
   *     The string is overwritten by the next call to print_tainted().
   */
diff --git a/mm/bootmem.c b/mm/bootmem.c

index 8a000cebb0d7428d5ec48dcfa979086c57e85109..477be696511d669230b47c73d52a8b3c1836c457 100644 (file)
--- a/mm/bootmem.c
+++ b/mm/bootmem.c
@@ -243,13 +243,10 @@ static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata)
  
  static int reset_managed_pages_done __initdata;
  
-static inline void __init reset_node_managed_pages(pg_data_t *pgdat)
+void reset_node_managed_pages(pg_data_t *pgdat)
  {
         struct zone *z;
  
-       if (reset_managed_pages_done)
-               return;
-
         for (z = pgdat->node_zones; z < pgdat->node_zones + MAX_NR_ZONES; z++)
                 z->managed_pages = 0;
  }
@@ -258,8 +255,12 @@ void __init reset_all_zones_managed_pages(void)
  {
         struct pglist_data *pgdat;
  
+       if (reset_managed_pages_done)
+               return;
+
         for_each_online_pgdat(pgdat)
                 reset_node_managed_pages(pgdat);
+
         reset_managed_pages_done = 1;
  }
  
diff --git a/mm/compaction.c b/mm/compaction.c

index ec74cf0123efd3944894cc0b159b385d6b837f25..f9792ba3537ccc830594e7954715ca66eb2e9654 100644 (file)
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -479,6 +479,16 @@ isolate_freepages_range(struct compact_control *cc,
  
                 block_end_pfn = min(block_end_pfn, end_pfn);
  
+               /*
+                * pfn could pass the block_end_pfn if isolated freepage
+                * is more than pageblock order. In this case, we adjust
+                * scanning range to right one.
+                */
+               if (pfn >= block_end_pfn) {
+                       block_end_pfn = ALIGN(pfn + 1, pageblock_nr_pages);
+                       block_end_pfn = min(block_end_pfn, end_pfn);
+               }
+
                 if (!pageblock_pfn_to_page(pfn, block_end_pfn, cc->zone))
                         break;
  
@@ -1029,8 +1039,12 @@ static isolate_migrate_t isolate_migratepages(struct zone *zone,
         }
  
         acct_isolated(zone, cc);
-       /* Record where migration scanner will be restarted */
-       cc->migrate_pfn = low_pfn;
+       /*
+        * Record where migration scanner will be restarted. If we end up in
+        * the same pageblock as the free scanner, make the scanners fully
+        * meet so that compact_finished() terminates compaction.
+        */
+       cc->migrate_pfn = (end_pfn <= cc->free_pfn) ? low_pfn : cc->free_pfn;
  
         return cc->nr_migratepages ? ISOLATE_SUCCESS : ISOLATE_NONE;
  }
diff --git a/mm/internal.h b/mm/internal.h

index 829304090b90e8ff57ee3eaf5281987deccb7e55..a4f90ba7068ef0af12ccdff8b3dc7408f772b447 100644 (file)
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -108,6 +108,31 @@ extern pmd_t *mm_find_pmd(struct mm_struct *mm, unsigned long address);
  /*
   * in mm/page_alloc.c
   */
+
+/*
+ * Locate the struct page for both the matching buddy in our
+ * pair (buddy1) and the combined O(n+1) page they form (page).
+ *
+ * 1) Any buddy B1 will have an order O twin B2 which satisfies
+ * the following equation:
+ *     B2 = B1 ^ (1 << O)
+ * For example, if the starting buddy (buddy2) is #8 its order
+ * 1 buddy is #10:
+ *     B2 = 8 ^ (1 << 1) = 8 ^ 2 = 10
+ *
+ * 2) Any buddy B will have an order O+1 parent P which
+ * satisfies the following equation:
+ *     P = B & ~(1 << O)
+ *
+ * Assumption: *_mem_map is contiguous at least up to MAX_ORDER
+ */
+static inline unsigned long
+__find_buddy_index(unsigned long page_idx, unsigned int order)
+{
+       return page_idx ^ (1 << order);
+}
+
+extern int __isolate_free_page(struct page *page, unsigned int order);
  extern void __free_pages_bootmem(struct page *page, unsigned int order);
  extern void prep_compound_page(struct page *page, unsigned long order);
  #ifdef CONFIG_MEMORY_FAILURE
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c

index 252e1dbbed86e9a81011ac8d135d9580969a1141..1bf4807cb21e49ccbd1bb4232574507e8d41384c 100644 (file)
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -31,6 +31,7 @@
  #include <linux/stop_machine.h>
  #include <linux/hugetlb.h>
  #include <linux/memblock.h>
+#include <linux/bootmem.h>
  
  #include <asm/tlbflush.h>
  
@@ -1066,6 +1067,16 @@ out:
  }
  #endif /* CONFIG_MEMORY_HOTPLUG_SPARSE */
  
+static void reset_node_present_pages(pg_data_t *pgdat)
+{
+       struct zone *z;
+
+       for (z = pgdat->node_zones; z < pgdat->node_zones + MAX_NR_ZONES; z++)
+               z->present_pages = 0;
+
+       pgdat->node_present_pages = 0;
+}
+
  /* we are OK calling __meminit stuff here - we have CONFIG_MEMORY_HOTPLUG */
  static pg_data_t __ref *hotadd_new_pgdat(int nid, u64 start)
  {
@@ -1096,6 +1107,21 @@ static pg_data_t __ref *hotadd_new_pgdat(int nid, u64 start)
         build_all_zonelists(pgdat, NULL);
         mutex_unlock(&zonelists_mutex);
  
+       /*
+        * zone->managed_pages is set to an approximate value in
+        * free_area_init_core(), which will cause
+        * /sys/device/system/node/nodeX/meminfo has wrong data.
+        * So reset it to 0 before any memory is onlined.
+        */
+       reset_node_managed_pages(pgdat);
+
+       /*
+        * When memory is hot-added, all the memory is in offline state. So
+        * clear all zones' present_pages because they will be updated in
+        * online_pages() and offline_pages().
+        */
+       reset_node_present_pages(pgdat);
+
         return pgdat;
  }
  
diff --git a/mm/nobootmem.c b/mm/nobootmem.c

index 7c7ab32ee5032dad07354f438b5832649aaa044b..90b50468333e38563d4388096e584b6c23fa9132 100644 (file)
--- a/mm/nobootmem.c
+++ b/mm/nobootmem.c
@@ -145,12 +145,10 @@ static unsigned long __init free_low_memory_core_early(void)
  
  static int reset_managed_pages_done __initdata;
  
-static inline void __init reset_node_managed_pages(pg_data_t *pgdat)
+void reset_node_managed_pages(pg_data_t *pgdat)
  {
         struct zone *z;
  
-       if (reset_managed_pages_done)
-               return;
         for (z = pgdat->node_zones; z < pgdat->node_zones + MAX_NR_ZONES; z++)
                 z->managed_pages = 0;
  }
@@ -159,8 +157,12 @@ void __init reset_all_zones_managed_pages(void)
  {
         struct pglist_data *pgdat;
  
+       if (reset_managed_pages_done)
+               return;
+
         for_each_online_pgdat(pgdat)
                 reset_node_managed_pages(pgdat);
+
         reset_managed_pages_done = 1;
  }
  
diff --git a/mm/page_alloc.c b/mm/page_alloc.c

index 9cd36b822444433539fbe0cc3acf8f312172345d..616a2c956b4b2a6aee5cc1f7d0098cf4a4cd5912 100644 (file)
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -466,29 +466,6 @@ static inline void rmv_page_order(struct page *page)
         set_page_private(page, 0);
  }
  
-/*
- * Locate the struct page for both the matching buddy in our
- * pair (buddy1) and the combined O(n+1) page they form (page).
- *
- * 1) Any buddy B1 will have an order O twin B2 which satisfies
- * the following equation:
- *     B2 = B1 ^ (1 << O)
- * For example, if the starting buddy (buddy2) is #8 its order
- * 1 buddy is #10:
- *     B2 = 8 ^ (1 << 1) = 8 ^ 2 = 10
- *
- * 2) Any buddy B will have an order O+1 parent P which
- * satisfies the following equation:
- *     P = B & ~(1 << O)
- *
- * Assumption: *_mem_map is contiguous at least up to MAX_ORDER
- */
-static inline unsigned long
-__find_buddy_index(unsigned long page_idx, unsigned int order)
-{
-       return page_idx ^ (1 << order);
-}
-
  /*
   * This function checks whether a page is free && is the buddy
   * we can do coalesce a page and its buddy if
@@ -569,6 +546,7 @@ static inline void __free_one_page(struct page *page,
         unsigned long combined_idx;
         unsigned long uninitialized_var(buddy_idx);
         struct page *buddy;
+       int max_order = MAX_ORDER;
  
         VM_BUG_ON(!zone_is_initialized(zone));
  
@@ -577,13 +555,24 @@ static inline void __free_one_page(struct page *page,
                         return;
  
         VM_BUG_ON(migratetype == -1);
+       if (is_migrate_isolate(migratetype)) {
+               /*
+                * We restrict max order of merging to prevent merge
+                * between freepages on isolate pageblock and normal
+                * pageblock. Without this, pageblock isolation
+                * could cause incorrect freepage accounting.
+                */
+               max_order = min(MAX_ORDER, pageblock_order + 1);
+       } else {
+               __mod_zone_freepage_state(zone, 1 << order, migratetype);
+       }
  
-       page_idx = pfn & ((1 << MAX_ORDER) - 1);
+       page_idx = pfn & ((1 << max_order) - 1);
  
         VM_BUG_ON_PAGE(page_idx & ((1 << order) - 1), page);
         VM_BUG_ON_PAGE(bad_range(zone, page), page);
  
-       while (order < MAX_ORDER-1) {
+       while (order < max_order - 1) {
                 buddy_idx = __find_buddy_index(page_idx, order);
                 buddy = page + (buddy_idx - page_idx);
                 if (!page_is_buddy(page, buddy, order))
@@ -594,9 +583,11 @@ static inline void __free_one_page(struct page *page,
                  */
                 if (page_is_guard(buddy)) {
                         clear_page_guard_flag(buddy);
-                       set_page_private(page, 0);
-                       __mod_zone_freepage_state(zone, 1 << order,
-                                                 migratetype);
+                       set_page_private(buddy, 0);
+                       if (!is_migrate_isolate(migratetype)) {
+                               __mod_zone_freepage_state(zone, 1 << order,
+                                                         migratetype);
+                       }
                 } else {
                         list_del(&buddy->lru);
                         zone->free_area[order].nr_free--;
@@ -715,14 +706,12 @@ static void free_pcppages_bulk(struct zone *zone, int count,
                         /* must delete as __free_one_page list manipulates */
                         list_del(&page->lru);
                         mt = get_freepage_migratetype(page);
+                       if (unlikely(has_isolate_pageblock(zone)))
+                               mt = get_pageblock_migratetype(page);
+
                         /* MIGRATE_MOVABLE list may include MIGRATE_RESERVEs */
                         __free_one_page(page, page_to_pfn(page), zone, 0, mt);
                         trace_mm_page_pcpu_drain(page, 0, mt);
-                       if (likely(!is_migrate_isolate_page(page))) {
-                               __mod_zone_page_state(zone, NR_FREE_PAGES, 1);
-                               if (is_migrate_cma(mt))
-                                       __mod_zone_page_state(zone, NR_FREE_CMA_PAGES, 1);
-                       }
                 } while (--to_free && --batch_free && !list_empty(list));
         }
         spin_unlock(&zone->lock);
@@ -739,9 +728,11 @@ static void free_one_page(struct zone *zone,
         if (nr_scanned)
                 __mod_zone_page_state(zone, NR_PAGES_SCANNED, -nr_scanned);
  
+       if (unlikely(has_isolate_pageblock(zone) ||
+               is_migrate_isolate(migratetype))) {
+               migratetype = get_pfnblock_migratetype(page, pfn);
+       }
         __free_one_page(page, pfn, zone, order, migratetype);
-       if (unlikely(!is_migrate_isolate(migratetype)))
-               __mod_zone_freepage_state(zone, 1 << order, migratetype);
         spin_unlock(&zone->lock);
  }
  
@@ -1484,7 +1475,7 @@ void split_page(struct page *page, unsigned int order)
  }
  EXPORT_SYMBOL_GPL(split_page);
  
-static int __isolate_free_page(struct page *page, unsigned int order)
+int __isolate_free_page(struct page *page, unsigned int order)
  {
         unsigned long watermark;
         struct zone *zone;
@@ -6408,13 +6399,12 @@ int alloc_contig_range(unsigned long start, unsigned long end,
  
         /* Make sure the range is really isolated. */
         if (test_pages_isolated(outer_start, end, false)) {
-               pr_warn("alloc_contig_range test_pages_isolated(%lx, %lx) failed\n",
-                      outer_start, end);
+               pr_info("%s: [%lx, %lx) PFNs busy\n",
+                       __func__, outer_start, end);
                 ret = -EBUSY;
                 goto done;
         }
  
-
         /* Grab isolated pages from freelists. */
         outer_end = isolate_freepages_range(&cc, outer_start, end);
         if (!outer_end) {
diff --git a/mm/page_isolation.c b/mm/page_isolation.c

index d1473b2e9481731988695755a618baa0991556a7..c8778f7e208e8a4a640e2c12f091956f4aa33575 100644 (file)
--- a/mm/page_isolation.c
+++ b/mm/page_isolation.c
@@ -60,6 +60,7 @@ out:
                 int migratetype = get_pageblock_migratetype(page);
  
                 set_pageblock_migratetype(page, MIGRATE_ISOLATE);
+               zone->nr_isolate_pageblock++;
                 nr_pages = move_freepages_block(zone, page, MIGRATE_ISOLATE);
  
                 __mod_zone_freepage_state(zone, -nr_pages, migratetype);
@@ -75,16 +76,54 @@ void unset_migratetype_isolate(struct page *page, unsigned migratetype)
  {
         struct zone *zone;
         unsigned long flags, nr_pages;
+       struct page *isolated_page = NULL;
+       unsigned int order;
+       unsigned long page_idx, buddy_idx;
+       struct page *buddy;
  
         zone = page_zone(page);
         spin_lock_irqsave(&zone->lock, flags);
         if (get_pageblock_migratetype(page) != MIGRATE_ISOLATE)
                 goto out;
-       nr_pages = move_freepages_block(zone, page, migratetype);
-       __mod_zone_freepage_state(zone, nr_pages, migratetype);
+
+       /*
+        * Because freepage with more than pageblock_order on isolated
+        * pageblock is restricted to merge due to freepage counting problem,
+        * it is possible that there is free buddy page.
+        * move_freepages_block() doesn't care of merge so we need other
+        * approach in order to merge them. Isolation and free will make
+        * these pages to be merged.
+        */
+       if (PageBuddy(page)) {
+               order = page_order(page);
+               if (order >= pageblock_order) {
+                       page_idx = page_to_pfn(page) & ((1 << MAX_ORDER) - 1);
+                       buddy_idx = __find_buddy_index(page_idx, order);
+                       buddy = page + (buddy_idx - page_idx);
+
+                       if (!is_migrate_isolate_page(buddy)) {
+                               __isolate_free_page(page, order);
+                               set_page_refcounted(page);
+                               isolated_page = page;
+                       }
+               }
+       }
+
+       /*
+        * If we isolate freepage with more than pageblock_order, there
+        * should be no freepage in the range, so we could avoid costly
+        * pageblock scanning for freepage moving.
+        */
+       if (!isolated_page) {
+               nr_pages = move_freepages_block(zone, page, migratetype);
+               __mod_zone_freepage_state(zone, nr_pages, migratetype);
+       }
         set_pageblock_migratetype(page, migratetype);
+       zone->nr_isolate_pageblock--;
  out:
         spin_unlock_irqrestore(&zone->lock, flags);
+       if (isolated_page)
+               __free_pages(isolated_page, order);
  }
  
  static inline struct page *
diff --git a/mm/slab_common.c b/mm/slab_common.c

index 406944207b61dbd607bc7f2d2b244b6998f47254..dcdab81bd240bafe3bec02cb32bf3390763a90e9 100644 (file)
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -259,6 +259,10 @@ struct kmem_cache *find_mergeable(size_t size, size_t align,
                 if (s->size - size >= sizeof(void *))
                         continue;
  
+               if (IS_ENABLED(CONFIG_SLAB) && align &&
+                       (align > s->align || s->align % align))
+                       continue;
+
                 return s;
         }
         return NULL;
author	Linus Torvalds <torvalds@linux-foundation.org>
	Fri, 14 Nov 2014 00:57:25 +0000 (16:57 -0800)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Fri, 14 Nov 2014 00:57:25 +0000 (16:57 -0800)
MAINTAINERS		patch \| blob \| blame \| history
drivers/block/zram/zram_drv.c		patch \| blob \| blame \| history
fs/notify/fsnotify.c		patch \| blob \| blame \| history
fs/notify/fsnotify.h		patch \| blob \| blame \| history
fs/notify/inode_mark.c		patch \| blob \| blame \| history
fs/notify/mark.c		patch \| blob \| blame \| history
fs/notify/vfsmount_mark.c		patch \| blob \| blame \| history
include/linux/bootmem.h		patch \| blob \| blame \| history
include/linux/mmzone.h		patch \| blob \| blame \| history
include/linux/page-isolation.h		patch \| blob \| blame \| history
kernel/panic.c		patch \| blob \| blame \| history
mm/bootmem.c		patch \| blob \| blame \| history
mm/compaction.c		patch \| blob \| blame \| history
mm/internal.h		patch \| blob \| blame \| history
mm/memory_hotplug.c		patch \| blob \| blame \| history
mm/nobootmem.c		patch \| blob \| blame \| history
mm/page_alloc.c		patch \| blob \| blame \| history
mm/page_isolation.c		patch \| blob \| blame \| history
mm/slab_common.c		patch \| blob \| blame \| history