ARM: bcm2835: dt: Add the DSI module nodes and clocks.

[mirror_ubuntu-zesty-kernel.git] / mm / mmap.c
diff --git a/mm/mmap.c b/mm/mmap.c

index ca9d91bca0d6c61983707f23641507dee0540af2..09c728a1eeee248b3af92f401db9c3b037f64570 100644 (file)
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -45,7 +45,7 @@
  #include <linux/moduleparam.h>
  #include <linux/pkeys.h>
  
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
  #include <asm/cacheflush.h>
  #include <asm/tlb.h>
  #include <asm/mmu_context.h>
@@ -88,6 +88,11 @@ static void unmap_region(struct mm_struct *mm,
   *             w: (no) no      w: (no) no      w: (copy) copy  w: (no) no
   *             x: (no) no      x: (no) yes     x: (no) yes     x: (yes) yes
   *
+ * On arm64, PROT_EXEC has the following behaviour for both MAP_SHARED and
+ * MAP_PRIVATE:
+ *                                                             r: (no) no
+ *                                                             w: (no) no
+ *                                                             x: (yes) yes
   */
  pgprot_t protection_map[16] = {
         __P000, __P001, __P010, __P011, __P100, __P101, __P110, __P111,
@@ -111,13 +116,15 @@ static pgprot_t vm_pgprot_modify(pgprot_t oldprot, unsigned long vm_flags)
  void vma_set_page_prot(struct vm_area_struct *vma)
  {
         unsigned long vm_flags = vma->vm_flags;
+       pgprot_t vm_page_prot;
  
-       vma->vm_page_prot = vm_pgprot_modify(vma->vm_page_prot, vm_flags);
-       if (vma_wants_writenotify(vma)) {
+       vm_page_prot = vm_pgprot_modify(vma->vm_page_prot, vm_flags);
+       if (vma_wants_writenotify(vma, vm_page_prot)) {
                 vm_flags &= ~VM_SHARED;
-               vma->vm_page_prot = vm_pgprot_modify(vma->vm_page_prot,
-                                                    vm_flags);
+               vm_page_prot = vm_pgprot_modify(vm_page_prot, vm_flags);
         }
+       /* remove_protection_ptes reads vma->vm_page_prot without mmap_sem */
+       WRITE_ONCE(vma->vm_page_prot, vm_page_prot);
  }
  
  /*
@@ -163,7 +170,7 @@ static struct vm_area_struct *remove_vma(struct vm_area_struct *vma)
         if (vma->vm_ops && vma->vm_ops->close)
                 vma->vm_ops->close(vma);
         if (vma->vm_file)
-               fput(vma->vm_file);
+               vma_fput(vma);
         mpol_put(vma_policy(vma));
         kmem_cache_free(vm_area_cachep, vma);
         return next;
@@ -176,6 +183,7 @@ SYSCALL_DEFINE1(brk, unsigned long, brk)
         unsigned long retval;
         unsigned long newbrk, oldbrk;
         struct mm_struct *mm = current->mm;
+       struct vm_area_struct *next;
         unsigned long min_brk;
         bool populate;
  
@@ -221,7 +229,8 @@ SYSCALL_DEFINE1(brk, unsigned long, brk)
         }
  
         /* Check against existing mmap mappings. */
-       if (find_vma_intersection(mm, oldbrk, newbrk+PAGE_SIZE))
+       next = find_vma(mm, oldbrk);
+       if (next && newbrk + PAGE_SIZE > vm_start_gap(next))
                 goto out;
  
         /* Ok, looks good - let it rip. */
@@ -244,10 +253,22 @@ out:
  
  static long vma_compute_subtree_gap(struct vm_area_struct *vma)
  {
-       unsigned long max, subtree_gap;
-       max = vma->vm_start;
-       if (vma->vm_prev)
-               max -= vma->vm_prev->vm_end;
+       unsigned long max, prev_end, subtree_gap;
+
+       /*
+        * Note: in the rare case of a VM_GROWSDOWN above a VM_GROWSUP, we
+        * allow two stack_guard_gaps between them here, and when choosing
+        * an unmapped area; whereas when expanding we only require one.
+        * That's a little inconsistent, but keeps the code here simpler.
+        */
+       max = vm_start_gap(vma);
+       if (vma->vm_prev) {
+               prev_end = vm_end_gap(vma->vm_prev);
+               if (max > prev_end)
+                       max -= prev_end;
+               else
+                       max = 0;
+       }
         if (vma->vm_rb.rb_left) {
                 subtree_gap = rb_entry(vma->vm_rb.rb_left,
                                 struct vm_area_struct, vm_rb)->rb_subtree_gap;
@@ -343,7 +364,7 @@ static void validate_mm(struct mm_struct *mm)
                         anon_vma_unlock_read(anon_vma);
                 }
  
-               highest_address = vma->vm_end;
+               highest_address = vm_end_gap(vma);
                 vma = vma->vm_next;
                 i++;
         }
@@ -395,14 +416,8 @@ static inline void vma_rb_insert(struct vm_area_struct *vma,
         rb_insert_augmented(&vma->vm_rb, root, &vma_gap_callbacks);
  }
  
-static void vma_rb_erase(struct vm_area_struct *vma, struct rb_root *root)
+static void __vma_rb_erase(struct vm_area_struct *vma, struct rb_root *root)
  {
-       /*
-        * All rb_subtree_gap values must be consistent prior to erase,
-        * with the possible exception of the vma being erased.
-        */
-       validate_mm_rb(root, vma);
-
         /*
          * Note rb_erase_augmented is a fairly large inline function,
          * so make sure we instantiate it only once with our desired
@@ -411,6 +426,32 @@ static void vma_rb_erase(struct vm_area_struct *vma, struct rb_root *root)
         rb_erase_augmented(&vma->vm_rb, root, &vma_gap_callbacks);
  }
  
+static __always_inline void vma_rb_erase_ignore(struct vm_area_struct *vma,
+                                               struct rb_root *root,
+                                               struct vm_area_struct *ignore)
+{
+       /*
+        * All rb_subtree_gap values must be consistent prior to erase,
+        * with the possible exception of the "next" vma being erased if
+        * next->vm_start was reduced.
+        */
+       validate_mm_rb(root, ignore);
+
+       __vma_rb_erase(vma, root);
+}
+
+static __always_inline void vma_rb_erase(struct vm_area_struct *vma,
+                                        struct rb_root *root)
+{
+       /*
+        * All rb_subtree_gap values must be consistent prior to erase,
+        * with the possible exception of the vma being erased.
+        */
+       validate_mm_rb(root, vma);
+
+       __vma_rb_erase(vma, root);
+}
+
  /*
   * vma has some anon_vma assigned, and is already inserted on that
   * anon_vma's interval trees.
@@ -512,7 +553,7 @@ void __vma_link_rb(struct mm_struct *mm, struct vm_area_struct *vma,
         if (vma->vm_next)
                 vma_gap_update(vma->vm_next);
         else
-               mm->highest_vm_end = vma->vm_end;
+               mm->highest_vm_end = vm_end_gap(vma);
  
         /*
          * vma->vm_prev wasn't known when we followed the rbtree to find the
@@ -594,14 +635,25 @@ static void __insert_vm_struct(struct mm_struct *mm, struct vm_area_struct *vma)
         mm->map_count++;
  }
  
-static inline void
-__vma_unlink(struct mm_struct *mm, struct vm_area_struct *vma,
-               struct vm_area_struct *prev)
+static __always_inline void __vma_unlink_common(struct mm_struct *mm,
+                                               struct vm_area_struct *vma,
+                                               struct vm_area_struct *prev,
+                                               bool has_prev,
+                                               struct vm_area_struct *ignore)
  {
         struct vm_area_struct *next;
  
-       vma_rb_erase(vma, &mm->mm_rb);
-       prev->vm_next = next = vma->vm_next;
+       vma_rb_erase_ignore(vma, &mm->mm_rb, ignore);
+       next = vma->vm_next;
+       if (has_prev)
+               prev->vm_next = next;
+       else {
+               prev = vma->vm_prev;
+               if (prev)
+                       prev->vm_next = next;
+               else
+                       mm->mmap = next;
+       }
         if (next)
                 next->vm_prev = prev;
  
@@ -609,6 +661,13 @@ __vma_unlink(struct mm_struct *mm, struct vm_area_struct *vma,
         vmacache_invalidate(mm);
  }
  
+static inline void __vma_unlink_prev(struct mm_struct *mm,
+                                    struct vm_area_struct *vma,
+                                    struct vm_area_struct *prev)
+{
+       __vma_unlink_common(mm, vma, prev, true, vma);
+}
+
  /*
   * We cannot adjust vm_start, vm_end, vm_pgoff fields of a vma that
   * is already present in an i_mmap tree without adjusting the tree.
@@ -616,11 +675,12 @@ __vma_unlink(struct mm_struct *mm, struct vm_area_struct *vma,
   * are necessary.  The "insert" vma (if any) is to be inserted
   * before we drop the necessary locks.
   */
-int vma_adjust(struct vm_area_struct *vma, unsigned long start,
-       unsigned long end, pgoff_t pgoff, struct vm_area_struct *insert)
+int __vma_adjust(struct vm_area_struct *vma, unsigned long start,
+       unsigned long end, pgoff_t pgoff, struct vm_area_struct *insert,
+       struct vm_area_struct *expand)
  {
         struct mm_struct *mm = vma->vm_mm;
-       struct vm_area_struct *next = vma->vm_next;
+       struct vm_area_struct *next = vma->vm_next, *orig_vma = vma;
         struct address_space *mapping = NULL;
         struct rb_root *root = NULL;
         struct anon_vma *anon_vma = NULL;
@@ -636,9 +696,38 @@ int vma_adjust(struct vm_area_struct *vma, unsigned long start,
                         /*
                          * vma expands, overlapping all the next, and
                          * perhaps the one after too (mprotect case 6).
+                        * The only other cases that gets here are
+                        * case 1, case 7 and case 8.
                          */
-                       remove_next = 1 + (end > next->vm_end);
-                       end = next->vm_end;
+                       if (next == expand) {
+                               /*
+                                * The only case where we don't expand "vma"
+                                * and we expand "next" instead is case 8.
+                                */
+                               VM_WARN_ON(end != next->vm_end);
+                               /*
+                                * remove_next == 3 means we're
+                                * removing "vma" and that to do so we
+                                * swapped "vma" and "next".
+                                */
+                               remove_next = 3;
+                               VM_WARN_ON(file != next->vm_file);
+                               swap(vma, next);
+                       } else {
+                               VM_WARN_ON(expand != vma);
+                               /*
+                                * case 1, 6, 7, remove_next == 2 is case 6,
+                                * remove_next == 1 is case 1 or 7.
+                                */
+                               remove_next = 1 + (end > next->vm_end);
+                               VM_WARN_ON(remove_next == 2 &&
+                                          end != next->vm_next->vm_end);
+                               VM_WARN_ON(remove_next == 1 &&
+                                          end != next->vm_end);
+                               /* trim end to next, for case 6 first pass */
+                               end = next->vm_end;
+                       }
+
                         exporter = next;
                         importer = vma;
  
@@ -646,7 +735,7 @@ int vma_adjust(struct vm_area_struct *vma, unsigned long start,
                          * If next doesn't have anon_vma, import from vma after
                          * next, if the vma overlaps with it.
                          */
-                       if (remove_next == 2 && next && !next->anon_vma)
+                       if (remove_next == 2 && !next->anon_vma)
                                 exporter = next->vm_next;
  
                 } else if (end > next->vm_start) {
@@ -657,6 +746,7 @@ int vma_adjust(struct vm_area_struct *vma, unsigned long start,
                         adjust_next = (end - next->vm_start) >> PAGE_SHIFT;
                         exporter = next;
                         importer = vma;
+                       VM_WARN_ON(expand != importer);
                 } else if (end < vma->vm_end) {
                         /*
                          * vma shrinks, and !insert tells it's not
@@ -666,6 +756,7 @@ int vma_adjust(struct vm_area_struct *vma, unsigned long start,
                         adjust_next = -((vma->vm_end - end) >> PAGE_SHIFT);
                         exporter = vma;
                         importer = next;
+                       VM_WARN_ON(expand != importer);
                 }
  
                 /*
@@ -683,7 +774,7 @@ int vma_adjust(struct vm_area_struct *vma, unsigned long start,
                 }
         }
  again:
-       vma_adjust_trans_huge(vma, start, end, adjust_next);
+       vma_adjust_trans_huge(orig_vma, start, end, adjust_next);
  
         if (file) {
                 mapping = file->f_mapping;
@@ -709,8 +800,8 @@ again:
         if (!anon_vma && adjust_next)
                 anon_vma = next->anon_vma;
         if (anon_vma) {
-               VM_BUG_ON_VMA(adjust_next && next->anon_vma &&
-                         anon_vma != next->anon_vma, next);
+               VM_WARN_ON(adjust_next && next->anon_vma &&
+                          anon_vma != next->anon_vma);
                 anon_vma_lock_write(anon_vma);
                 anon_vma_interval_tree_pre_update_vma(vma);
                 if (adjust_next)
@@ -750,7 +841,19 @@ again:
                  * vma_merge has merged next into vma, and needs
                  * us to remove next before dropping the locks.
                  */
-               __vma_unlink(mm, next, vma);
+               if (remove_next != 3)
+                       __vma_unlink_prev(mm, next, vma);
+               else
+                       /*
+                        * vma is not before next if they've been
+                        * swapped.
+                        *
+                        * pre-swap() next->vm_start was reduced so
+                        * tell validate_mm_rb to ignore pre-swap()
+                        * "next" (which is stored in post-swap()
+                        * "vma").
+                        */
+                       __vma_unlink_common(mm, next, NULL, false, vma);
                 if (file)
                         __remove_shared_vm_struct(next, file, mapping);
         } else if (insert) {
@@ -765,7 +868,7 @@ again:
                         vma_gap_update(vma);
                 if (end_changed) {
                         if (!next)
-                               mm->highest_vm_end = end;
+                               mm->highest_vm_end = vm_end_gap(vma);
                         else if (!adjust_next)
                                 vma_gap_update(next);
                 }
@@ -790,7 +893,7 @@ again:
         if (remove_next) {
                 if (file) {
                         uprobe_munmap(next, next->vm_start, next->vm_end);
-                       fput(file);
+                       vma_fput(vma);
                 }
                 if (next->anon_vma)
                         anon_vma_merge(vma, next);
@@ -802,7 +905,27 @@ again:
                  * we must remove another next too. It would clutter
                  * up the code too much to do both in one go.
                  */
-               next = vma->vm_next;
+               if (remove_next != 3) {
+                       /*
+                        * If "next" was removed and vma->vm_end was
+                        * expanded (up) over it, in turn
+                        * "next->vm_prev->vm_end" changed and the
+                        * "vma->vm_next" gap must be updated.
+                        */
+                       next = vma->vm_next;
+               } else {
+                       /*
+                        * For the scope of the comment "next" and
+                        * "vma" considered pre-swap(): if "vma" was
+                        * removed, next->vm_start was expanded (down)
+                        * over it and the "next" gap must be updated.
+                        * Because of the swap() the post-swap() "vma"
+                        * actually points to pre-swap() "next"
+                        * (post-swap() "next" as opposed is now a
+                        * dangling pointer).
+                        */
+                       next = vma;
+               }
                 if (remove_next == 2) {
                         remove_next = 1;
                         end = next->vm_end;
@@ -810,8 +933,28 @@ again:
                 }
                 else if (next)
                         vma_gap_update(next);
-               else
-                       mm->highest_vm_end = end;
+               else {
+                       /*
+                        * If remove_next == 2 we obviously can't
+                        * reach this path.
+                        *
+                        * If remove_next == 3 we can't reach this
+                        * path because pre-swap() next is always not
+                        * NULL. pre-swap() "next" is not being
+                        * removed and its next->vm_end is not altered
+                        * (and furthermore "end" already matches
+                        * next->vm_end in remove_next == 3).
+                        *
+                        * We reach this only in the remove_next == 1
+                        * case if the "next" vma that was removed was
+                        * the highest vma of the mm. However in such
+                        * case next->vm_end == "end" and the extended
+                        * "vma" has vma->vm_end == next->vm_end so
+                        * mm->highest_vm_end doesn't need any update
+                        * in remove_next == 1 case.
+                        */
+                       VM_WARN_ON(mm->highest_vm_end != vm_end_gap(vma));
+               }
         }
         if (insert && file)
                 uprobe_mmap(insert);
@@ -931,13 +1074,24 @@ can_vma_merge_after(struct vm_area_struct *vma, unsigned long vm_flags,
   *    cannot merge    might become    might become    might become
   *                    PPNNNNNNNNNN    PPPPPPPPPPNN    PPPPPPPPPPPP 6 or
   *    mmap, brk or    case 4 below    case 5 below    PPPPPPPPXXXX 7 or
- *    mremap move:                                    PPPPNNNNNNNN 8
+ *    mremap move:                                    PPPPXXXXXXXX 8
   *        AAAA
   *    PPPP    NNNN    PPPPPPPPPPPP    PPPPPPPPNNNN    PPPPNNNNNNNN
   *    might become    case 1 below    case 2 below    case 3 below
   *
- * Odd one out? Case 8, because it extends NNNN but needs flags of XXXX:
- * mprotect_fixup updates vm_flags & vm_page_prot on successful return.
+ * It is important for case 8 that the the vma NNNN overlapping the
+ * region AAAA is never going to extended over XXXX. Instead XXXX must
+ * be extended in region AAAA and NNNN must be removed. This way in
+ * all cases where vma_merge succeeds, the moment vma_adjust drops the
+ * rmap_locks, the properties of the merged vma will be already
+ * correct for the whole merged range. Some of those properties like
+ * vm_page_prot/vm_flags may be accessed by rmap_walks and they must
+ * be correct for the whole merged range immediately after the
+ * rmap_locks are released. Otherwise if XXXX would be removed and
+ * NNNN would be extended over the XXXX range, remove_migration_ptes
+ * or other rmap walkers (if working on addresses beyond the "end"
+ * parameter) may establish ptes with the wrong permissions of NNNN
+ * instead of the right permissions of XXXX.
   */
  struct vm_area_struct *vma_merge(struct mm_struct *mm,
                         struct vm_area_struct *prev, unsigned long addr,
@@ -962,9 +1116,14 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm,
         else
                 next = mm->mmap;
         area = next;
-       if (next && next->vm_end == end)                /* cases 6, 7, 8 */
+       if (area && area->vm_end == end)                /* cases 6, 7, 8 */
                 next = next->vm_next;
  
+       /* verify some invariant that must be enforced by the caller */
+       VM_WARN_ON(prev && addr <= prev->vm_start);
+       VM_WARN_ON(area && end > area->vm_end);
+       VM_WARN_ON(addr >= end);
+
         /*
          * Can it merge with the predecessor?
          */
@@ -985,11 +1144,12 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm,
                                 is_mergeable_anon_vma(prev->anon_vma,
                                                       next->anon_vma, NULL)) {
                                                         /* cases 1, 6 */
-                       err = vma_adjust(prev, prev->vm_start,
-                               next->vm_end, prev->vm_pgoff, NULL);
+                       err = __vma_adjust(prev, prev->vm_start,
+                                        next->vm_end, prev->vm_pgoff, NULL,
+                                        prev);
                 } else                                  /* cases 2, 5, 7 */
-                       err = vma_adjust(prev, prev->vm_start,
-                               end, prev->vm_pgoff, NULL);
+                       err = __vma_adjust(prev, prev->vm_start,
+                                        end, prev->vm_pgoff, NULL, prev);
                 if (err)
                         return NULL;
                 khugepaged_enter_vma_merge(prev, vm_flags);
@@ -1005,11 +1165,18 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm,
                                              anon_vma, file, pgoff+pglen,
                                              vm_userfaultfd_ctx)) {
                 if (prev && addr < prev->vm_end)        /* case 4 */
-                       err = vma_adjust(prev, prev->vm_start,
-                               addr, prev->vm_pgoff, NULL);
-               else                                    /* cases 3, 8 */
-                       err = vma_adjust(area, addr, next->vm_end,
-                               next->vm_pgoff - pglen, NULL);
+                       err = __vma_adjust(prev, prev->vm_start,
+                                        addr, prev->vm_pgoff, NULL, next);
+               else {                                  /* cases 3, 8 */
+                       err = __vma_adjust(area, addr, next->vm_end,
+                                        next->vm_pgoff - pglen, NULL, next);
+                       /*
+                        * In case 3 area is already equal to next and
+                        * this is a noop, but in case 8 "area" has
+                        * been removed and next was expanded over it.
+                        */
+                       area = next;
+               }
                 if (err)
                         return NULL;
                 khugepaged_enter_vma_merge(area, vm_flags);
@@ -1381,7 +1548,7 @@ SYSCALL_DEFINE1(old_mmap, struct mmap_arg_struct __user *, arg)
   * to the private version (using protection_map[] without the
   * VM_SHARED bit).
   */
-int vma_wants_writenotify(struct vm_area_struct *vma)
+int vma_wants_writenotify(struct vm_area_struct *vma, pgprot_t vm_page_prot)
  {
         vm_flags_t vm_flags = vma->vm_flags;
         const struct vm_operations_struct *vm_ops = vma->vm_ops;
@@ -1396,8 +1563,8 @@ int vma_wants_writenotify(struct vm_area_struct *vma)
  
         /* The open routine did something to the protections that pgprot_modify
          * won't preserve? */
-       if (pgprot_val(vma->vm_page_prot) !=
-           pgprot_val(vm_pgprot_modify(vma->vm_page_prot, vm_flags)))
+       if (pgprot_val(vm_page_prot) !=
+           pgprot_val(vm_pgprot_modify(vm_page_prot, vm_flags)))
                 return 0;
  
         /* Do we need to track softdirty? */
@@ -1574,8 +1741,8 @@ out:
         return addr;
  
  unmap_and_free_vma:
+       vma_fput(vma);
         vma->vm_file = NULL;
-       fput(file);
  
         /* Undo any partial mapping done by a device driver. */
         unmap_region(mm, vma, prev, vma->vm_start, vma->vm_end);
@@ -1630,7 +1797,7 @@ unsigned long unmapped_area(struct vm_unmapped_area_info *info)
  
         while (true) {
                 /* Visit left subtree if it looks promising */
-               gap_end = vma->vm_start;
+               gap_end = vm_start_gap(vma);
                 if (gap_end >= low_limit && vma->vm_rb.rb_left) {
                         struct vm_area_struct *left =
                                 rb_entry(vma->vm_rb.rb_left,
@@ -1641,12 +1808,13 @@ unsigned long unmapped_area(struct vm_unmapped_area_info *info)
                         }
                 }
  
-               gap_start = vma->vm_prev ? vma->vm_prev->vm_end : 0;
+               gap_start = vma->vm_prev ? vm_end_gap(vma->vm_prev) : 0;
  check_current:
                 /* Check if current node has a suitable gap */
                 if (gap_start > high_limit)
                         return -ENOMEM;
-               if (gap_end >= low_limit && gap_end - gap_start >= length)
+               if (gap_end >= low_limit &&
+                   gap_end > gap_start && gap_end - gap_start >= length)
                         goto found;
  
                 /* Visit right subtree if it looks promising */
@@ -1668,8 +1836,8 @@ check_current:
                         vma = rb_entry(rb_parent(prev),
                                        struct vm_area_struct, vm_rb);
                         if (prev == vma->vm_rb.rb_left) {
-                               gap_start = vma->vm_prev->vm_end;
-                               gap_end = vma->vm_start;
+                               gap_start = vm_end_gap(vma->vm_prev);
+                               gap_end = vm_start_gap(vma);
                                 goto check_current;
                         }
                 }
@@ -1733,7 +1901,7 @@ unsigned long unmapped_area_topdown(struct vm_unmapped_area_info *info)
  
         while (true) {
                 /* Visit right subtree if it looks promising */
-               gap_start = vma->vm_prev ? vma->vm_prev->vm_end : 0;
+               gap_start = vma->vm_prev ? vm_end_gap(vma->vm_prev) : 0;
                 if (gap_start <= high_limit && vma->vm_rb.rb_right) {
                         struct vm_area_struct *right =
                                 rb_entry(vma->vm_rb.rb_right,
@@ -1746,10 +1914,11 @@ unsigned long unmapped_area_topdown(struct vm_unmapped_area_info *info)
  
  check_current:
                 /* Check if current node has a suitable gap */
-               gap_end = vma->vm_start;
+               gap_end = vm_start_gap(vma);
                 if (gap_end < low_limit)
                         return -ENOMEM;
-               if (gap_start <= high_limit && gap_end - gap_start >= length)
+               if (gap_start <= high_limit &&
+                   gap_end > gap_start && gap_end - gap_start >= length)
                         goto found;
  
                 /* Visit left subtree if it looks promising */
@@ -1772,7 +1941,7 @@ check_current:
                                        struct vm_area_struct, vm_rb);
                         if (prev == vma->vm_rb.rb_right) {
                                 gap_start = vma->vm_prev ?
-                                       vma->vm_prev->vm_end : 0;
+                                       vm_end_gap(vma->vm_prev) : 0;
                                 goto check_current;
                         }
                 }
@@ -1810,7 +1979,7 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
                 unsigned long len, unsigned long pgoff, unsigned long flags)
  {
         struct mm_struct *mm = current->mm;
-       struct vm_area_struct *vma;
+       struct vm_area_struct *vma, *prev;
         struct vm_unmapped_area_info info;
  
         if (len > TASK_SIZE - mmap_min_addr)
@@ -1821,9 +1990,10 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
  
         if (addr) {
                 addr = PAGE_ALIGN(addr);
-               vma = find_vma(mm, addr);
+               vma = find_vma_prev(mm, addr, &prev);
                 if (TASK_SIZE - len >= addr && addr >= mmap_min_addr &&
-                   (!vma || addr + len <= vma->vm_start))
+                   (!vma || addr + len <= vm_start_gap(vma)) &&
+                   (!prev || addr >= vm_end_gap(prev)))
                         return addr;
         }
  
@@ -1846,7 +2016,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
                           const unsigned long len, const unsigned long pgoff,
                           const unsigned long flags)
  {
-       struct vm_area_struct *vma;
+       struct vm_area_struct *vma, *prev;
         struct mm_struct *mm = current->mm;
         unsigned long addr = addr0;
         struct vm_unmapped_area_info info;
@@ -1861,9 +2031,10 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
         /* requesting a specific address */
         if (addr) {
                 addr = PAGE_ALIGN(addr);
-               vma = find_vma(mm, addr);
+               vma = find_vma_prev(mm, addr, &prev);
                 if (TASK_SIZE - len >= addr && addr >= mmap_min_addr &&
-                               (!vma || addr + len <= vma->vm_start))
+                               (!vma || addr + len <= vm_start_gap(vma)) &&
+                               (!prev || addr >= vm_end_gap(prev)))
                         return addr;
         }
  
@@ -1998,21 +2169,19 @@ find_vma_prev(struct mm_struct *mm, unsigned long addr,
   * update accounting. This is shared with both the
   * grow-up and grow-down cases.
   */
-static int acct_stack_growth(struct vm_area_struct *vma, unsigned long size, unsigned long grow)
+static int acct_stack_growth(struct vm_area_struct *vma,
+                            unsigned long size, unsigned long grow)
  {
         struct mm_struct *mm = vma->vm_mm;
         struct rlimit *rlim = current->signal->rlim;
-       unsigned long new_start, actual_size;
+       unsigned long new_start;
  
         /* address space limit tests */
         if (!may_expand_vm(mm, vma->vm_flags, grow))
                 return -ENOMEM;
  
         /* Stack limit test */
-       actual_size = size;
-       if (size && (vma->vm_flags & (VM_GROWSUP | VM_GROWSDOWN)))
-               actual_size -= PAGE_SIZE;
-       if (actual_size > READ_ONCE(rlim[RLIMIT_STACK].rlim_cur))
+       if (size > READ_ONCE(rlim[RLIMIT_STACK].rlim_cur))
                 return -ENOMEM;
  
         /* mlock limit tests */
@@ -2050,16 +2219,33 @@ static int acct_stack_growth(struct vm_area_struct *vma, unsigned long size, uns
  int expand_upwards(struct vm_area_struct *vma, unsigned long address)
  {
         struct mm_struct *mm = vma->vm_mm;
+       struct vm_area_struct *next;
+       unsigned long gap_addr;
         int error = 0;
  
         if (!(vma->vm_flags & VM_GROWSUP))
                 return -EFAULT;
  
-       /* Guard against wrapping around to address 0. */
-       if (address < PAGE_ALIGN(address+4))
-               address = PAGE_ALIGN(address+4);
-       else
+       /* Guard against exceeding limits of the address space. */
+       address &= PAGE_MASK;
+       if (address >= TASK_SIZE)
                 return -ENOMEM;
+       address += PAGE_SIZE;
+
+       /* Enforce stack_guard_gap */
+       gap_addr = address + stack_guard_gap;
+
+       /* Guard against overflow */
+       if (gap_addr < address || gap_addr > TASK_SIZE)
+               gap_addr = TASK_SIZE;
+
+       next = vma->vm_next;
+       if (next && next->vm_start < gap_addr &&
+                       (next->vm_flags & (VM_WRITE|VM_READ|VM_EXEC))) {
+               if (!(next->vm_flags & VM_GROWSUP))
+                       return -ENOMEM;
+               /* Check that both stack segments have the same anon_vma? */
+       }
  
         /* We must make sure the anon_vma is allocated. */
         if (unlikely(anon_vma_prepare(vma)))
@@ -2104,7 +2290,7 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address)
                                 if (vma->vm_next)
                                         vma_gap_update(vma->vm_next);
                                 else
-                                       mm->highest_vm_end = address;
+                                       mm->highest_vm_end = vm_end_gap(vma);
                                 spin_unlock(&mm->page_table_lock);
  
                                 perf_event_mmap(vma);
@@ -2125,6 +2311,7 @@ int expand_downwards(struct vm_area_struct *vma,
                                    unsigned long address)
  {
         struct mm_struct *mm = vma->vm_mm;
+       struct vm_area_struct *prev;
         int error;
  
         address &= PAGE_MASK;
@@ -2132,6 +2319,15 @@ int expand_downwards(struct vm_area_struct *vma,
         if (error)
                 return error;
  
+       /* Enforce stack_guard_gap */
+       prev = vma->vm_prev;
+       /* Check that both stack segments have the same anon_vma? */
+       if (prev && !(prev->vm_flags & VM_GROWSDOWN) &&
+                       (prev->vm_flags & (VM_WRITE|VM_READ|VM_EXEC))) {
+               if (address - prev->vm_end < stack_guard_gap)
+                       return -ENOMEM;
+       }
+
         /* We must make sure the anon_vma is allocated. */
         if (unlikely(anon_vma_prepare(vma)))
                 return -ENOMEM;
@@ -2186,28 +2382,25 @@ int expand_downwards(struct vm_area_struct *vma,
         return error;
  }
  
-/*
- * Note how expand_stack() refuses to expand the stack all the way to
- * abut the next virtual mapping, *unless* that mapping itself is also
- * a stack mapping. We want to leave room for a guard page, after all
- * (the guard page itself is not added here, that is done by the
- * actual page faulting logic)
- *
- * This matches the behavior of the guard page logic (see mm/memory.c:
- * check_stack_guard_page()), which only allows the guard page to be
- * removed under these circumstances.
- */
+/* enforced gap between the expanding stack and other mappings. */
+unsigned long stack_guard_gap = 256UL<<PAGE_SHIFT;
+
+static int __init cmdline_parse_stack_guard_gap(char *p)
+{
+       unsigned long val;
+       char *endptr;
+
+       val = simple_strtoul(p, &endptr, 10);
+       if (!*endptr)
+               stack_guard_gap = val << PAGE_SHIFT;
+
+       return 0;
+}
+__setup("stack_guard_gap=", cmdline_parse_stack_guard_gap);
+
  #ifdef CONFIG_STACK_GROWSUP
  int expand_stack(struct vm_area_struct *vma, unsigned long address)
  {
-       struct vm_area_struct *next;
-
-       address &= PAGE_MASK;
-       next = vma->vm_next;
-       if (next && next->vm_start == address + PAGE_SIZE) {
-               if (!(next->vm_flags & VM_GROWSUP))
-                       return -ENOMEM;
-       }
         return expand_upwards(vma, address);
  }
  
@@ -2229,14 +2422,6 @@ find_extend_vma(struct mm_struct *mm, unsigned long addr)
  #else
  int expand_stack(struct vm_area_struct *vma, unsigned long address)
  {
-       struct vm_area_struct *prev;
-
-       address &= PAGE_MASK;
-       prev = vma->vm_prev;
-       if (prev && prev->vm_end == address) {
-               if (!(prev->vm_flags & VM_GROWSDOWN))
-                       return -ENOMEM;
-       }
         return expand_downwards(vma, address);
  }
  
@@ -2334,7 +2519,7 @@ detach_vmas_to_be_unmapped(struct mm_struct *mm, struct vm_area_struct *vma,
                 vma->vm_prev = prev;
                 vma_gap_update(vma);
         } else
-               mm->highest_vm_end = prev ? prev->vm_end : 0;
+               mm->highest_vm_end = prev ? vm_end_gap(prev) : 0;
         tail_vma->vm_next = NULL;
  
         /* Kill the cache */
@@ -2380,7 +2565,7 @@ static int __split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
                 goto out_free_mpol;
  
         if (new->vm_file)
-               get_file(new->vm_file);
+               vma_get_file(new);
  
         if (new->vm_ops && new->vm_ops->open)
                 new->vm_ops->open(new);
@@ -2399,7 +2584,7 @@ static int __split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
         if (new->vm_ops && new->vm_ops->close)
                 new->vm_ops->close(new);
         if (new->vm_file)
-               fput(new->vm_file);
+               vma_fput(new);
         unlink_anon_vmas(new);
   out_free_mpol:
         mpol_put(vma_policy(new));
@@ -2550,7 +2735,7 @@ SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size,
         struct vm_area_struct *vma;
         unsigned long populate = 0;
         unsigned long ret = -EINVAL;
-       struct file *file;
+       struct file *file, *prfile;
  
         pr_warn_once("%s (%d) uses deprecated remap_file_pages() syscall. See Documentation/vm/remap_file_pages.txt.\n",
                      current->comm, current->pid);
@@ -2625,10 +2810,27 @@ SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size,
                 }
         }
  
-       file = get_file(vma->vm_file);
+       vma_get_file(vma);
+       file = vma->vm_file;
+       prfile = vma->vm_prfile;
         ret = do_mmap_pgoff(vma->vm_file, start, size,
                         prot, flags, pgoff, &populate);
+       if (!IS_ERR_VALUE(ret) && file && prfile) {
+               struct vm_area_struct *new_vma;
+
+               new_vma = find_vma(mm, ret);
+               if (!new_vma->vm_prfile)
+                       new_vma->vm_prfile = prfile;
+               if (new_vma != vma)
+                       get_file(prfile);
+       }
+       /*
+        * two fput()s instead of vma_fput(vma),
+        * coz vma may not be available anymore.
+        */
         fput(file);
+       if (prfile)
+               fput(prfile);
  out:
         up_write(&mm->mmap_sem);
         if (populate)
@@ -2903,7 +3105,7 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
                 if (anon_vma_clone(new_vma, vma))
                         goto out_free_mempol;
                 if (new_vma->vm_file)
-                       get_file(new_vma->vm_file);
+                       vma_get_file(new_vma);
                 if (new_vma->vm_ops && new_vma->vm_ops->open)
                         new_vma->vm_ops->open(new_vma);
                 vma_link(mm, new_vma, prev, rb_link, rb_parent);
@@ -3063,6 +3265,14 @@ out:
         return ERR_PTR(ret);
  }
  
+bool vma_is_special_mapping(const struct vm_area_struct *vma,
+       const struct vm_special_mapping *sm)
+{
+       return vma->vm_private_data == sm &&
+               (vma->vm_ops == &special_mapping_vmops ||
+                vma->vm_ops == &legacy_special_mapping_vmops);
+}
+
  /*
   * Called with mm->mmap_sem held for writing.
   * Insert a new vma covering the given region, with the given flags.