]> git.proxmox.com Git - mirror_ubuntu-zesty-kernel.git/blobdiff - fs/dax.c
Add support for the AudioInjector.net Octo sound card
[mirror_ubuntu-zesty-kernel.git] / fs / dax.c
index a8732fbed381a45bbce44fcdf0731ccfdc1a09ba..917c66584810c510b112036364644af24cbbcfeb 100644 (file)
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -369,6 +369,22 @@ restart:
                }
                spin_lock_irq(&mapping->tree_lock);
 
+               if (!entry) {
+                       /*
+                        * We needed to drop the page_tree lock while calling
+                        * radix_tree_preload() and we didn't have an entry to
+                        * lock.  See if another thread inserted an entry at
+                        * our index during this time.
+                        */
+                       entry = __radix_tree_lookup(&mapping->page_tree, index,
+                                       NULL, &slot);
+                       if (entry) {
+                               radix_tree_preload_end();
+                               spin_unlock_irq(&mapping->tree_lock);
+                               goto restart;
+                       }
+               }
+
                if (pmd_downgrade) {
                        radix_tree_delete(&mapping->page_tree, index);
                        mapping->nrexceptional--;
@@ -384,19 +400,12 @@ restart:
                if (err) {
                        spin_unlock_irq(&mapping->tree_lock);
                        /*
-                        * Someone already created the entry?  This is a
-                        * normal failure when inserting PMDs in a range
-                        * that already contains PTEs.  In that case we want
-                        * to return -EEXIST immediately.
-                        */
-                       if (err == -EEXIST && !(size_flag & RADIX_DAX_PMD))
-                               goto restart;
-                       /*
-                        * Our insertion of a DAX PMD entry failed, most
-                        * likely because it collided with a PTE sized entry
-                        * at a different index in the PMD range.  We haven't
-                        * inserted anything into the radix tree and have no
-                        * waiters to wake.
+                        * Our insertion of a DAX entry failed, most likely
+                        * because we were inserting a PMD entry and it
+                        * collided with a PTE sized entry at a different
+                        * index in the PMD range.  We haven't inserted
+                        * anything into the radix tree and have no waiters to
+                        * wake.
                         */
                        return ERR_PTR(err);
                }
@@ -451,16 +460,37 @@ void dax_wake_mapping_entry_waiter(struct address_space *mapping,
                __wake_up(wq, TASK_NORMAL, wake_all ? 0 : 1, &key);
 }
 
+static int __dax_invalidate_mapping_entry(struct address_space *mapping,
+                                         pgoff_t index, bool trunc)
+{
+       int ret = 0;
+       void *entry;
+       struct radix_tree_root *page_tree = &mapping->page_tree;
+
+       spin_lock_irq(&mapping->tree_lock);
+       entry = get_unlocked_mapping_entry(mapping, index, NULL);
+       if (!entry || !radix_tree_exceptional_entry(entry))
+               goto out;
+       if (!trunc &&
+           (radix_tree_tag_get(page_tree, index, PAGECACHE_TAG_DIRTY) ||
+            radix_tree_tag_get(page_tree, index, PAGECACHE_TAG_TOWRITE)))
+               goto out;
+       radix_tree_delete(page_tree, index);
+       mapping->nrexceptional--;
+       ret = 1;
+out:
+       put_unlocked_mapping_entry(mapping, index, entry);
+       spin_unlock_irq(&mapping->tree_lock);
+       return ret;
+}
 /*
  * Delete exceptional DAX entry at @index from @mapping. Wait for radix tree
  * entry to get unlocked before deleting it.
  */
 int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index)
 {
-       void *entry;
+       int ret = __dax_invalidate_mapping_entry(mapping, index, true);
 
-       spin_lock_irq(&mapping->tree_lock);
-       entry = get_unlocked_mapping_entry(mapping, index, NULL);
        /*
         * This gets called from truncate / punch_hole path. As such, the caller
         * must hold locks protecting against concurrent modifications of the
@@ -468,16 +498,17 @@ int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index)
         * caller has seen exceptional entry for this index, we better find it
         * at that index as well...
         */
-       if (WARN_ON_ONCE(!entry || !radix_tree_exceptional_entry(entry))) {
-               spin_unlock_irq(&mapping->tree_lock);
-               return 0;
-       }
-       radix_tree_delete(&mapping->page_tree, index);
-       mapping->nrexceptional--;
-       spin_unlock_irq(&mapping->tree_lock);
-       dax_wake_mapping_entry_waiter(mapping, index, entry, true);
+       WARN_ON_ONCE(!ret);
+       return ret;
+}
 
-       return 1;
+/*
+ * Invalidate exceptional DAX entry if it is clean.
+ */
+int dax_invalidate_mapping_entry_sync(struct address_space *mapping,
+                                     pgoff_t index)
+{
+       return __dax_invalidate_mapping_entry(mapping, index, false);
 }
 
 /*
@@ -488,15 +519,16 @@ int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index)
  * otherwise it will simply fall out of the page cache under memory
  * pressure without ever having been dirtied.
  */
-static int dax_load_hole(struct address_space *mapping, void *entry,
+static int dax_load_hole(struct address_space *mapping, void **entry,
                         struct vm_fault *vmf)
 {
        struct page *page;
+       int ret;
 
        /* Hole page already exists? Return it...  */
-       if (!radix_tree_exceptional_entry(entry)) {
-               vmf->page = entry;
-               return VM_FAULT_LOCKED;
+       if (!radix_tree_exceptional_entry(*entry)) {
+               page = *entry;
+               goto out;
        }
 
        /* This will replace locked radix tree entry with a hole page */
@@ -504,8 +536,17 @@ static int dax_load_hole(struct address_space *mapping, void *entry,
                                   vmf->gfp_mask | __GFP_ZERO);
        if (!page)
                return VM_FAULT_OOM;
+ out:
        vmf->page = page;
-       return VM_FAULT_LOCKED;
+       ret = finish_fault(vmf);
+       vmf->page = NULL;
+       *entry = page;
+       if (!ret) {
+               /* Grab reference for PTE that is now referencing the page */
+               get_page(page);
+               return VM_FAULT_NOPAGE;
+       }
+       return ret;
 }
 
 static int copy_user_dax(struct block_device *bdev, sector_t sector, size_t size,
@@ -630,8 +671,8 @@ static void dax_mapping_entry_mkclean(struct address_space *mapping,
                                      pgoff_t index, unsigned long pfn)
 {
        struct vm_area_struct *vma;
-       pte_t *ptep;
-       pte_t pte;
+       pte_t pte, *ptep = NULL;
+       pmd_t *pmdp = NULL;
        spinlock_t *ptl;
        bool changed;
 
@@ -646,21 +687,42 @@ static void dax_mapping_entry_mkclean(struct address_space *mapping,
 
                address = pgoff_address(index, vma);
                changed = false;
-               if (follow_pte(vma->vm_mm, address, &ptep, &ptl))
+               if (follow_pte_pmd(vma->vm_mm, address, &ptep, &pmdp, &ptl))
                        continue;
-               if (pfn != pte_pfn(*ptep))
-                       goto unlock;
-               if (!pte_dirty(*ptep) && !pte_write(*ptep))
-                       goto unlock;
 
-               flush_cache_page(vma, address, pfn);
-               pte = ptep_clear_flush(vma, address, ptep);
-               pte = pte_wrprotect(pte);
-               pte = pte_mkclean(pte);
-               set_pte_at(vma->vm_mm, address, ptep, pte);
-               changed = true;
-unlock:
-               pte_unmap_unlock(ptep, ptl);
+               if (pmdp) {
+#ifdef CONFIG_FS_DAX_PMD
+                       pmd_t pmd;
+
+                       if (pfn != pmd_pfn(*pmdp))
+                               goto unlock_pmd;
+                       if (!pmd_dirty(*pmdp) && !pmd_write(*pmdp))
+                               goto unlock_pmd;
+
+                       flush_cache_page(vma, address, pfn);
+                       pmd = pmdp_huge_clear_flush(vma, address, pmdp);
+                       pmd = pmd_wrprotect(pmd);
+                       pmd = pmd_mkclean(pmd);
+                       set_pmd_at(vma->vm_mm, address, pmdp, pmd);
+                       changed = true;
+unlock_pmd:
+                       spin_unlock(ptl);
+#endif
+               } else {
+                       if (pfn != pte_pfn(*ptep))
+                               goto unlock_pte;
+                       if (!pte_dirty(*ptep) && !pte_write(*ptep))
+                               goto unlock_pte;
+
+                       flush_cache_page(vma, address, pfn);
+                       pte = ptep_clear_flush(vma, address, ptep);
+                       pte = pte_wrprotect(pte);
+                       pte = pte_mkclean(pte);
+                       set_pte_at(vma->vm_mm, address, ptep, pte);
+                       changed = true;
+unlock_pte:
+                       pte_unmap_unlock(ptep, ptl);
+               }
 
                if (changed)
                        mmu_notifier_invalidate_page(vma->vm_mm, address);
@@ -908,7 +970,6 @@ int __dax_zero_page_range(struct block_device *bdev, sector_t sector,
 }
 EXPORT_SYMBOL_GPL(__dax_zero_page_range);
 
-#ifdef CONFIG_FS_IOMAP
 static sector_t dax_iomap_sector(struct iomap *iomap, loff_t pos)
 {
        return iomap->blkno + (((pos & PAGE_MASK) - iomap->offset) >> 9);
@@ -934,11 +995,27 @@ dax_iomap_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
        if (WARN_ON_ONCE(iomap->type != IOMAP_MAPPED))
                return -EIO;
 
+       /*
+        * Write can allocate block for an area which has a hole page mapped
+        * into page tables. We have to tear down these mappings so that data
+        * written by write(2) is visible in mmap.
+        */
+       if (iomap->flags & IOMAP_F_NEW) {
+               invalidate_inode_pages2_range(inode->i_mapping,
+                                             pos >> PAGE_SHIFT,
+                                             (end - 1) >> PAGE_SHIFT);
+       }
+
        while (pos < end) {
                unsigned offset = pos & (PAGE_SIZE - 1);
                struct blk_dax_ctl dax = { 0 };
                ssize_t map_len;
 
+               if (fatal_signal_pending(current)) {
+                       ret = -EINTR;
+                       break;
+               }
+
                dax.sector = dax_iomap_sector(iomap, pos);
                dax.size = (length + offset + PAGE_SIZE - 1) & PAGE_MASK;
                map_len = dax_map_atomic(iomap->bdev, &dax);
@@ -992,23 +1069,6 @@ dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter,
        if (iov_iter_rw(iter) == WRITE)
                flags |= IOMAP_WRITE;
 
-       /*
-        * Yes, even DAX files can have page cache attached to them:  A zeroed
-        * page is inserted into the pagecache when we have to serve a write
-        * fault on a hole.  It should never be dirtied and can simply be
-        * dropped from the pagecache once we get real data for the page.
-        *
-        * XXX: This is racy against mmap, and there's nothing we can do about
-        * it. We'll eventually need to shift this down even further so that
-        * we can check if we allocated blocks over a hole first.
-        */
-       if (mapping->nrpages) {
-               ret = invalidate_inode_pages2_range(mapping,
-                               pos >> PAGE_SHIFT,
-                               (pos + iov_iter_count(iter) - 1) >> PAGE_SHIFT);
-               WARN_ON_ONCE(ret);
-       }
-
        while (iov_iter_count(iter)) {
                ret = iomap_apply(inode, pos, iov_iter_count(iter), flags, ops,
                                iter, dax_iomap_actor);
@@ -1023,6 +1083,15 @@ dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter,
 }
 EXPORT_SYMBOL_GPL(dax_iomap_rw);
 
+static int dax_fault_return(int error)
+{
+       if (error == 0)
+               return VM_FAULT_NOPAGE;
+       if (error == -ENOMEM)
+               return VM_FAULT_OOM;
+       return VM_FAULT_SIGBUS;
+}
+
 /**
  * dax_iomap_fault - handle a page fault on a DAX file
  * @vma: The virtual memory area where the fault occurred
@@ -1055,12 +1124,6 @@ int dax_iomap_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
        if (pos >= i_size_read(inode))
                return VM_FAULT_SIGBUS;
 
-       entry = grab_mapping_entry(mapping, vmf->pgoff, 0);
-       if (IS_ERR(entry)) {
-               error = PTR_ERR(entry);
-               goto out;
-       }
-
        if ((vmf->flags & FAULT_FLAG_WRITE) && !vmf->cow_page)
                flags |= IOMAP_WRITE;
 
@@ -1071,9 +1134,15 @@ int dax_iomap_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
         */
        error = ops->iomap_begin(inode, pos, PAGE_SIZE, flags, &iomap);
        if (error)
-               goto unlock_entry;
+               return dax_fault_return(error);
        if (WARN_ON_ONCE(iomap.offset + iomap.length < pos + PAGE_SIZE)) {
-               error = -EIO;           /* fs corruption? */
+               vmf_ret = dax_fault_return(-EIO);       /* fs corruption? */
+               goto finish_iomap;
+       }
+
+       entry = grab_mapping_entry(mapping, vmf->pgoff, 0);
+       if (IS_ERR(entry)) {
+               vmf_ret = dax_fault_return(PTR_ERR(entry));
                goto finish_iomap;
        }
 
@@ -1096,13 +1165,13 @@ int dax_iomap_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
                }
 
                if (error)
-                       goto finish_iomap;
+                       goto error_unlock_entry;
 
                __SetPageUptodate(vmf->cow_page);
                vmf_ret = finish_fault(vmf);
                if (!vmf_ret)
                        vmf_ret = VM_FAULT_DONE_COW;
-               goto finish_iomap;
+               goto unlock_entry;
        }
 
        switch (iomap.type) {
@@ -1114,12 +1183,15 @@ int dax_iomap_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
                }
                error = dax_insert_mapping(mapping, iomap.bdev, sector,
                                PAGE_SIZE, &entry, vma, vmf);
+               /* -EBUSY is fine, somebody else faulted on the same PTE */
+               if (error == -EBUSY)
+                       error = 0;
                break;
        case IOMAP_UNWRITTEN:
        case IOMAP_HOLE:
                if (!(vmf->flags & FAULT_FLAG_WRITE)) {
-                       vmf_ret = dax_load_hole(mapping, entry, vmf);
-                       break;
+                       vmf_ret = dax_load_hole(mapping, &entry, vmf);
+                       goto unlock_entry;
                }
                /*FALLTHRU*/
        default:
@@ -1128,31 +1200,25 @@ int dax_iomap_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
                break;
        }
 
+ error_unlock_entry:
+       vmf_ret = dax_fault_return(error) | major;
+ unlock_entry:
+       put_locked_mapping_entry(mapping, vmf->pgoff, entry);
  finish_iomap:
        if (ops->iomap_end) {
-               if (error || (vmf_ret & VM_FAULT_ERROR)) {
-                       /* keep previous error */
-                       ops->iomap_end(inode, pos, PAGE_SIZE, 0, flags,
-                                       &iomap);
-               } else {
-                       error = ops->iomap_end(inode, pos, PAGE_SIZE,
-                                       PAGE_SIZE, flags, &iomap);
-               }
-       }
- unlock_entry:
-       if (vmf_ret != VM_FAULT_LOCKED || error)
-               put_locked_mapping_entry(mapping, vmf->pgoff, entry);
- out:
-       if (error == -ENOMEM)
-               return VM_FAULT_OOM | major;
-       /* -EBUSY is fine, somebody else faulted on the same PTE */
-       if (error < 0 && error != -EBUSY)
-               return VM_FAULT_SIGBUS | major;
-       if (vmf_ret) {
-               WARN_ON_ONCE(error); /* -EBUSY from ops->iomap_end? */
-               return vmf_ret;
+               int copied = PAGE_SIZE;
+
+               if (vmf_ret & VM_FAULT_ERROR)
+                       copied = 0;
+               /*
+                * The fault is done by now and there's no way back (other
+                * thread may be already happily using PTE we have installed).
+                * Just ignore error from ->iomap_end since we cannot do much
+                * with it.
+                */
+               ops->iomap_end(inode, pos, PAGE_SIZE, copied, flags, &iomap);
        }
-       return VM_FAULT_NOPAGE | major;
+       return vmf_ret;
 }
 EXPORT_SYMBOL_GPL(dax_iomap_fault);
 
@@ -1276,16 +1342,6 @@ int dax_iomap_pmd_fault(struct vm_area_struct *vma, unsigned long address,
        if ((pgoff | PG_PMD_COLOUR) > max_pgoff)
                goto fallback;
 
-       /*
-        * grab_mapping_entry() will make sure we get a 2M empty entry, a DAX
-        * PMD or a HZP entry.  If it can't (because a 4k page is already in
-        * the tree, for instance), it will return -EEXIST and we just fall
-        * back to 4k entries.
-        */
-       entry = grab_mapping_entry(mapping, pgoff, RADIX_DAX_PMD);
-       if (IS_ERR(entry))
-               goto fallback;
-
        /*
         * Note that we don't use iomap_apply here.  We aren't doing I/O, only
         * setting up a mapping, so really we're using iomap_begin() as a way
@@ -1294,10 +1350,21 @@ int dax_iomap_pmd_fault(struct vm_area_struct *vma, unsigned long address,
        pos = (loff_t)pgoff << PAGE_SHIFT;
        error = ops->iomap_begin(inode, pos, PMD_SIZE, iomap_flags, &iomap);
        if (error)
-               goto unlock_entry;
+               goto fallback;
+
        if (iomap.offset + iomap.length < pos + PMD_SIZE)
                goto finish_iomap;
 
+       /*
+        * grab_mapping_entry() will make sure we get a 2M empty entry, a DAX
+        * PMD or a HZP entry.  If it can't (because a 4k page is already in
+        * the tree, for instance), it will return -EEXIST and we just fall
+        * back to 4k entries.
+        */
+       entry = grab_mapping_entry(mapping, pgoff, RADIX_DAX_PMD);
+       if (IS_ERR(entry))
+               goto finish_iomap;
+
        vmf.pgoff = pgoff;
        vmf.flags = flags;
        vmf.gfp_mask = mapping_gfp_mask(mapping) | __GFP_IO;
@@ -1310,7 +1377,7 @@ int dax_iomap_pmd_fault(struct vm_area_struct *vma, unsigned long address,
        case IOMAP_UNWRITTEN:
        case IOMAP_HOLE:
                if (WARN_ON_ONCE(write))
-                       goto finish_iomap;
+                       goto unlock_entry;
                result = dax_pmd_load_hole(vma, pmd, &vmf, address, &iomap,
                                &entry);
                break;
@@ -1319,20 +1386,23 @@ int dax_iomap_pmd_fault(struct vm_area_struct *vma, unsigned long address,
                break;
        }
 
+ unlock_entry:
+       put_locked_mapping_entry(mapping, pgoff, entry);
  finish_iomap:
        if (ops->iomap_end) {
-               if (result == VM_FAULT_FALLBACK) {
-                       ops->iomap_end(inode, pos, PMD_SIZE, 0, iomap_flags,
-                                       &iomap);
-               } else {
-                       error = ops->iomap_end(inode, pos, PMD_SIZE, PMD_SIZE,
-                                       iomap_flags, &iomap);
-                       if (error)
-                               result = VM_FAULT_FALLBACK;
-               }
+               int copied = PMD_SIZE;
+
+               if (result == VM_FAULT_FALLBACK)
+                       copied = 0;
+               /*
+                * The fault is done by now and there's no way back (other
+                * thread may be already happily using PMD we have installed).
+                * Just ignore error from ->iomap_end since we cannot do much
+                * with it.
+                */
+               ops->iomap_end(inode, pos, PMD_SIZE, copied, iomap_flags,
+                               &iomap);
        }
- unlock_entry:
-       put_locked_mapping_entry(mapping, pgoff, entry);
  fallback:
        if (result == VM_FAULT_FALLBACK) {
                split_huge_pmd(vma, pmd, address);
@@ -1342,4 +1412,3 @@ int dax_iomap_pmd_fault(struct vm_area_struct *vma, unsigned long address,
 }
 EXPORT_SYMBOL_GPL(dax_iomap_pmd_fault);
 #endif /* CONFIG_FS_DAX_PMD */
-#endif /* CONFIG_FS_IOMAP */