Merge tag 'fbdev-fixes-for-3.5-1' of git://github.com/schandinat/linux-2.6

[mirror_ubuntu-zesty-kernel.git] / mm / shmem.c
diff --git a/mm/shmem.c b/mm/shmem.c

index 793dcd1bac8b3e0f5c9cac36b233a38ae33c4b5f..a15a466d0d1d14b21bd82de2bfd3df31654420db 100644 (file)
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -84,6 +84,18 @@ struct shmem_xattr {
         char value[0];
  };
  
+/*
+ * shmem_fallocate and shmem_writepage communicate via inode->i_private
+ * (with i_mutex making sure that it has only one user at a time):
+ * we would prefer not to enlarge the shmem inode just for that.
+ */
+struct shmem_falloc {
+       pgoff_t start;          /* start of range currently being fallocated */
+       pgoff_t next;           /* the next page offset to be fallocated */
+       pgoff_t nr_falloced;    /* how many new pages have been fallocated */
+       pgoff_t nr_unswapped;   /* how often writepage refused to swap out */
+};
+
  /* Flag allocation requirements to shmem_getpage */
  enum sgp_type {
         SGP_READ,       /* don't exceed i_size, don't allocate page */
@@ -671,10 +683,21 @@ static int shmem_unuse_inode(struct shmem_inode_info *info,
                 mutex_lock(&shmem_swaplist_mutex);
                 /*
                  * We needed to drop mutex to make that restrictive page
-                * allocation; but the inode might already be freed by now,
-                * and we cannot refer to inode or mapping or info to check.
-                * However, we do hold page lock on the PageSwapCache page,
-                * so can check if that still has our reference remaining.
+                * allocation, but the inode might have been freed while we
+                * dropped it: although a racing shmem_evict_inode() cannot
+                * complete without emptying the radix_tree, our page lock
+                * on this swapcache page is not enough to prevent that -
+                * free_swap_and_cache() of our swap entry will only
+                * trylock_page(), removing swap from radix_tree whatever.
+                *
+                * We must not proceed to shmem_add_to_page_cache() if the
+                * inode has been freed, but of course we cannot rely on
+                * inode or mapping or info to check that.  However, we can
+                * safely check if our swap entry is still in use (and here
+                * it can't have got reused for another page): if it's still
+                * in use, then the inode cannot have been freed yet, and we
+                * can safely proceed (if it's no longer in use, that tells
+                * nothing about the inode, but we don't need to unuse swap).
                  */
                 if (!page_swapcount(*pagep))
                         error = -ENOENT;
@@ -718,9 +741,9 @@ int shmem_unuse(swp_entry_t swap, struct page *page)
  
         /*
          * There's a faint possibility that swap page was replaced before
-        * caller locked it: it will come back later with the right page.
+        * caller locked it: caller will come back later with the right page.
          */
-       if (unlikely(!PageSwapCache(page)))
+       if (unlikely(!PageSwapCache(page) || page_private(page) != swap.val))
                 goto out;
  
         /*
@@ -791,8 +814,28 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc)
          * This is somewhat ridiculous, but without plumbing a SWAP_MAP_FALLOC
          * value into swapfile.c, the only way we can correctly account for a
          * fallocated page arriving here is now to initialize it and write it.
+        *
+        * That's okay for a page already fallocated earlier, but if we have
+        * not yet completed the fallocation, then (a) we want to keep track
+        * of this page in case we have to undo it, and (b) it may not be a
+        * good idea to continue anyway, once we're pushing into swap.  So
+        * reactivate the page, and let shmem_fallocate() quit when too many.
          */
         if (!PageUptodate(page)) {
+               if (inode->i_private) {
+                       struct shmem_falloc *shmem_falloc;
+                       spin_lock(&inode->i_lock);
+                       shmem_falloc = inode->i_private;
+                       if (shmem_falloc &&
+                           index >= shmem_falloc->start &&
+                           index < shmem_falloc->next)
+                               shmem_falloc->nr_unswapped++;
+                       else
+                               shmem_falloc = NULL;
+                       spin_unlock(&inode->i_lock);
+                       if (shmem_falloc)
+                               goto redirty;
+               }
                 clear_highpage(page);
                 flush_dcache_page(page);
                 SetPageUptodate(page);
@@ -963,21 +1006,15 @@ static int shmem_replace_page(struct page **pagep, gfp_t gfp,
         newpage = shmem_alloc_page(gfp, info, index);
         if (!newpage)
                 return -ENOMEM;
-       VM_BUG_ON(shmem_should_replace_page(newpage, gfp));
  
-       *pagep = newpage;
         page_cache_get(newpage);
         copy_highpage(newpage, oldpage);
+       flush_dcache_page(newpage);
  
-       VM_BUG_ON(!PageLocked(oldpage));
         __set_page_locked(newpage);
-       VM_BUG_ON(!PageUptodate(oldpage));
         SetPageUptodate(newpage);
-       VM_BUG_ON(!PageSwapBacked(oldpage));
         SetPageSwapBacked(newpage);
-       VM_BUG_ON(!swap_index);
         set_page_private(newpage, swap_index);
-       VM_BUG_ON(!PageSwapCache(oldpage));
         SetPageSwapCache(newpage);
  
         /*
@@ -987,13 +1024,24 @@ static int shmem_replace_page(struct page **pagep, gfp_t gfp,
         spin_lock_irq(&swap_mapping->tree_lock);
         error = shmem_radix_tree_replace(swap_mapping, swap_index, oldpage,
                                                                    newpage);
-       __inc_zone_page_state(newpage, NR_FILE_PAGES);
-       __dec_zone_page_state(oldpage, NR_FILE_PAGES);
+       if (!error) {
+               __inc_zone_page_state(newpage, NR_FILE_PAGES);
+               __dec_zone_page_state(oldpage, NR_FILE_PAGES);
+       }
         spin_unlock_irq(&swap_mapping->tree_lock);
-       BUG_ON(error);
  
-       mem_cgroup_replace_page_cache(oldpage, newpage);
-       lru_cache_add_anon(newpage);
+       if (unlikely(error)) {
+               /*
+                * Is this possible?  I think not, now that our callers check
+                * both PageSwapCache and page_private after getting page lock;
+                * but be defensive.  Reverse old to newpage for clear and free.
+                */
+               oldpage = newpage;
+       } else {
+               mem_cgroup_replace_page_cache(oldpage, newpage);
+               lru_cache_add_anon(newpage);
+               *pagep = newpage;
+       }
  
         ClearPageSwapCache(oldpage);
         set_page_private(oldpage, 0);
@@ -1001,7 +1049,7 @@ static int shmem_replace_page(struct page **pagep, gfp_t gfp,
         unlock_page(oldpage);
         page_cache_release(oldpage);
         page_cache_release(oldpage);
-       return 0;
+       return error;
  }
  
  /*
@@ -1075,7 +1123,8 @@ repeat:
  
                 /* We have to do this with page locked to prevent races */
                 lock_page(page);
-               if (!PageSwapCache(page) || page->mapping) {
+               if (!PageSwapCache(page) || page_private(page) != swap.val ||
+                   page->mapping) {
                         error = -EEXIST;        /* try again */
                         goto failed;
                 }
@@ -1642,11 +1691,104 @@ static ssize_t shmem_file_splice_read(struct file *in, loff_t *ppos,
         return error;
  }
  
+/*
+ * llseek SEEK_DATA or SEEK_HOLE through the radix_tree.
+ */
+static pgoff_t shmem_seek_hole_data(struct address_space *mapping,
+                                   pgoff_t index, pgoff_t end, int origin)
+{
+       struct page *page;
+       struct pagevec pvec;
+       pgoff_t indices[PAGEVEC_SIZE];
+       bool done = false;
+       int i;
+
+       pagevec_init(&pvec, 0);
+       pvec.nr = 1;            /* start small: we may be there already */
+       while (!done) {
+               pvec.nr = shmem_find_get_pages_and_swap(mapping, index,
+                                       pvec.nr, pvec.pages, indices);
+               if (!pvec.nr) {
+                       if (origin == SEEK_DATA)
+                               index = end;
+                       break;
+               }
+               for (i = 0; i < pvec.nr; i++, index++) {
+                       if (index < indices[i]) {
+                               if (origin == SEEK_HOLE) {
+                                       done = true;
+                                       break;
+                               }
+                               index = indices[i];
+                       }
+                       page = pvec.pages[i];
+                       if (page && !radix_tree_exceptional_entry(page)) {
+                               if (!PageUptodate(page))
+                                       page = NULL;
+                       }
+                       if (index >= end ||
+                           (page && origin == SEEK_DATA) ||
+                           (!page && origin == SEEK_HOLE)) {
+                               done = true;
+                               break;
+                       }
+               }
+               shmem_deswap_pagevec(&pvec);
+               pagevec_release(&pvec);
+               pvec.nr = PAGEVEC_SIZE;
+               cond_resched();
+       }
+       return index;
+}
+
+static loff_t shmem_file_llseek(struct file *file, loff_t offset, int origin)
+{
+       struct address_space *mapping;
+       struct inode *inode;
+       pgoff_t start, end;
+       loff_t new_offset;
+
+       if (origin != SEEK_DATA && origin != SEEK_HOLE)
+               return generic_file_llseek_size(file, offset, origin,
+                                                       MAX_LFS_FILESIZE);
+       mapping = file->f_mapping;
+       inode = mapping->host;
+       mutex_lock(&inode->i_mutex);
+       /* We're holding i_mutex so we can access i_size directly */
+
+       if (offset < 0)
+               offset = -EINVAL;
+       else if (offset >= inode->i_size)
+               offset = -ENXIO;
+       else {
+               start = offset >> PAGE_CACHE_SHIFT;
+               end = (inode->i_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+               new_offset = shmem_seek_hole_data(mapping, start, end, origin);
+               new_offset <<= PAGE_CACHE_SHIFT;
+               if (new_offset > offset) {
+                       if (new_offset < inode->i_size)
+                               offset = new_offset;
+                       else if (origin == SEEK_DATA)
+                               offset = -ENXIO;
+                       else
+                               offset = inode->i_size;
+               }
+       }
+
+       if (offset >= 0 && offset != file->f_pos) {
+               file->f_pos = offset;
+               file->f_version = 0;
+       }
+       mutex_unlock(&inode->i_mutex);
+       return offset;
+}
+
  static long shmem_fallocate(struct file *file, int mode, loff_t offset,
                                                          loff_t len)
  {
         struct inode *inode = file->f_path.dentry->d_inode;
         struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
+       struct shmem_falloc shmem_falloc;
         pgoff_t start, index, end;
         int error;
  
@@ -1679,6 +1821,14 @@ static long shmem_fallocate(struct file *file, int mode, loff_t offset,
                 goto out;
         }
  
+       shmem_falloc.start = start;
+       shmem_falloc.next  = start;
+       shmem_falloc.nr_falloced = 0;
+       shmem_falloc.nr_unswapped = 0;
+       spin_lock(&inode->i_lock);
+       inode->i_private = &shmem_falloc;
+       spin_unlock(&inode->i_lock);
+
         for (index = start; index < end; index++) {
                 struct page *page;
  
@@ -1688,6 +1838,8 @@ static long shmem_fallocate(struct file *file, int mode, loff_t offset,
                  */
                 if (signal_pending(current))
                         error = -EINTR;
+               else if (shmem_falloc.nr_unswapped > shmem_falloc.nr_falloced)
+                       error = -ENOMEM;
                 else
                         error = shmem_getpage(inode, index, &page, SGP_FALLOC,
                                                                         NULL);
@@ -1696,9 +1848,17 @@ static long shmem_fallocate(struct file *file, int mode, loff_t offset,
                         shmem_undo_range(inode,
                                 (loff_t)start << PAGE_CACHE_SHIFT,
                                 (loff_t)index << PAGE_CACHE_SHIFT, true);
-                       goto ctime;
+                       goto undone;
                 }
  
+               /*
+                * Inform shmem_writepage() how far we have reached.
+                * No need for lock or barrier: we have the page lock.
+                */
+               shmem_falloc.next++;
+               if (!PageUptodate(page))
+                       shmem_falloc.nr_falloced++;
+
                 /*
                  * If !PageUptodate, leave it that way so that freeable pages
                  * can be recognized if we need to rollback on error later.
@@ -1714,8 +1874,11 @@ static long shmem_fallocate(struct file *file, int mode, loff_t offset,
  
         if (!(mode & FALLOC_FL_KEEP_SIZE) && offset + len > inode->i_size)
                 i_size_write(inode, offset + len);
-ctime:
         inode->i_ctime = CURRENT_TIME;
+undone:
+       spin_lock(&inode->i_lock);
+       inode->i_private = NULL;
+       spin_unlock(&inode->i_lock);
  out:
         mutex_unlock(&inode->i_mutex);
         return error;
@@ -2293,11 +2456,9 @@ static struct dentry *shmem_fh_to_dentry(struct super_block *sb,
         return dentry;
  }
  
-static int shmem_encode_fh(struct dentry *dentry, __u32 *fh, int *len,
-                               int connectable)
+static int shmem_encode_fh(struct inode *inode, __u32 *fh, int *len,
+                               struct inode *parent)
  {
-       struct inode *inode = dentry->d_inode;
-
         if (*len < 3) {
                 *len = 3;
                 return 255;
@@ -2625,7 +2786,7 @@ static const struct address_space_operations shmem_aops = {
  static const struct file_operations shmem_file_operations = {
         .mmap           = shmem_mmap,
  #ifdef CONFIG_TMPFS
-       .llseek         = generic_file_llseek,
+       .llseek         = shmem_file_llseek,
         .read           = do_sync_read,
         .write          = do_sync_write,
         .aio_read       = shmem_file_aio_read,