]> git.proxmox.com Git - mirror_ubuntu-jammy-kernel.git/commitdiff
Merge tag 'fuse-update-5.8' of git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi...
authorLinus Torvalds <torvalds@linux-foundation.org>
Tue, 9 Jun 2020 22:48:24 +0000 (15:48 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Tue, 9 Jun 2020 22:48:24 +0000 (15:48 -0700)
Pull fuse updates from Miklos Szeredi:

 - Fix a rare deadlock in virtiofs

 - Fix st_blocks in writeback cache mode

 - Fix wrong checks in splice move causing spurious warnings

 - Fix a race between a GETATTR request and a FUSE_NOTIFY_INVAL_INODE
   notification

 - Use rb-tree instead of linear search for pages currently under
   writeout by userspace

 - Fix copy_file_range() inconsistencies

* tag 'fuse-update-5.8' of git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/fuse:
  fuse: copy_file_range should truncate cache
  fuse: fix copy_file_range cache issues
  fuse: optimize writepages search
  fuse: update attr_version counter on fuse_notify_inval_inode()
  fuse: don't check refcount after stealing page
  fuse: fix weird page warning
  fuse: use dump_page
  virtiofs: do not use fuse_fill_super_common() for device installation
  fuse: always allow query of st_dev
  fuse: always flush dirty data on close(2)
  fuse: invalidate inode attr in writeback cache mode
  fuse: Update stale comment in queue_interrupt()
  fuse: BUG_ON correction in fuse_dev_splice_write()
  virtiofs: Add mount option and atime behavior to the doc
  virtiofs: schedule blocking async replies in separate worker

1  2 
fs/fuse/dev.c
fs/fuse/file.c

diff --combined fs/fuse/dev.c
index 8ccc97356cb5704f68cbb16aa4a1e9c683bda6cf,ec97cabe51ce87a414e00ecd9f38831858c4c98f..02b3c36b36766adc2c5c6e3d69363b192a3edbd8
@@@ -342,7 -342,7 +342,7 @@@ static int queue_interrupt(struct fuse_
                list_add_tail(&req->intr_entry, &fiq->interrupts);
                /*
                 * Pairs with smp_mb() implied by test_and_set_bit()
-                * from request_end().
+                * from fuse_request_end().
                 */
                smp_mb();
                if (test_bit(FR_FINISHED, &req->flags)) {
@@@ -764,16 -764,15 +764,15 @@@ static int fuse_check_page(struct page 
  {
        if (page_mapcount(page) ||
            page->mapping != NULL ||
-           page_count(page) != 1 ||
            (page->flags & PAGE_FLAGS_CHECK_AT_PREP &
             ~(1 << PG_locked |
               1 << PG_referenced |
               1 << PG_uptodate |
               1 << PG_lru |
               1 << PG_active |
-              1 << PG_reclaim))) {
-               pr_warn("trying to steal weird page\n");
-               pr_warn("  page=%p index=%li flags=%08lx, count=%i, mapcount=%i, mapping=%p\n", page, page->index, page->flags, page_count(page), page_mapcount(page), page->mapping);
+              1 << PG_reclaim |
+              1 << PG_waiters))) {
+               dump_page(page, "fuse: trying to steal weird page");
                return 1;
        }
        return 0;
@@@ -805,7 -804,7 +804,7 @@@ static int fuse_try_move_page(struct fu
        if (cs->len != PAGE_SIZE)
                goto out_fallback;
  
 -      if (pipe_buf_steal(cs->pipe, buf) != 0)
 +      if (!pipe_buf_try_steal(cs->pipe, buf))
                goto out_fallback;
  
        newpage = buf->page;
        get_page(newpage);
  
        if (!(buf->flags & PIPE_BUF_FLAG_LRU))
 -              lru_cache_add_file(newpage);
 +              lru_cache_add(newpage);
  
        err = 0;
        spin_lock(&cs->req->waitq.lock);
@@@ -1977,8 -1976,9 +1976,9 @@@ static ssize_t fuse_dev_splice_write(st
                struct pipe_buffer *ibuf;
                struct pipe_buffer *obuf;
  
-               BUG_ON(nbuf >= pipe->ring_size);
-               BUG_ON(tail == head);
+               if (WARN_ON(nbuf >= count || tail == head))
+                       goto out_free;
                ibuf = &pipe->bufs[tail & mask];
                obuf = &bufs[nbuf];
  
@@@ -2081,7 -2081,7 +2081,7 @@@ static void end_polls(struct fuse_conn 
   * The same effect is usually achievable through killing the filesystem daemon
   * and all users of the filesystem.  The exception is the combination of an
   * asynchronous request and the tricky deadlock (see
 - * Documentation/filesystems/fuse.txt).
 + * Documentation/filesystems/fuse.rst).
   *
   * Aborting requests under I/O goes as follows: 1: Separate out unlocked
   * requests, they should be finished off immediately.  Locked requests will be
diff --combined fs/fuse/file.c
index bac51c32d660263112182c7fdeb8c917b3c33455,336d1cf72da00743953bab6a8338f334b8e249b2..e573b0cd2737dc1f8f83489584c4e05af45050d3
@@@ -357,7 -357,7 +357,7 @@@ u64 fuse_lock_owner_id(struct fuse_con
  
  struct fuse_writepage_args {
        struct fuse_io_args ia;
-       struct list_head writepages_entry;
+       struct rb_node writepages_entry;
        struct list_head queue_entry;
        struct fuse_writepage_args *next;
        struct inode *inode;
  static struct fuse_writepage_args *fuse_find_writeback(struct fuse_inode *fi,
                                            pgoff_t idx_from, pgoff_t idx_to)
  {
-       struct fuse_writepage_args *wpa;
+       struct rb_node *n;
+       n = fi->writepages.rb_node;
  
-       list_for_each_entry(wpa, &fi->writepages, writepages_entry) {
+       while (n) {
+               struct fuse_writepage_args *wpa;
                pgoff_t curr_index;
  
+               wpa = rb_entry(n, struct fuse_writepage_args, writepages_entry);
                WARN_ON(get_fuse_inode(wpa->inode) != fi);
                curr_index = wpa->ia.write.in.offset >> PAGE_SHIFT;
-               if (idx_from < curr_index + wpa->ia.ap.num_pages &&
-                   curr_index <= idx_to) {
+               if (idx_from >= curr_index + wpa->ia.ap.num_pages)
+                       n = n->rb_right;
+               else if (idx_to < curr_index)
+                       n = n->rb_left;
+               else
                        return wpa;
-               }
        }
        return NULL;
  }
@@@ -445,9 -451,6 +451,6 @@@ static int fuse_flush(struct file *file
        if (is_bad_inode(inode))
                return -EIO;
  
-       if (fc->no_flush)
-               return 0;
        err = write_inode_now(inode, 1);
        if (err)
                return err;
        if (err)
                return err;
  
+       err = 0;
+       if (fc->no_flush)
+               goto inval_attr_out;
        memset(&inarg, 0, sizeof(inarg));
        inarg.fh = ff->fh;
        inarg.lock_owner = fuse_lock_owner_id(fc, id);
                fc->no_flush = 1;
                err = 0;
        }
+ inval_attr_out:
+       /*
+        * In memory i_blocks is not maintained by fuse, if writeback cache is
+        * enabled, i_blocks from cached attr may not be accurate.
+        */
+       if (!err && fc->writeback_cache)
+               fuse_invalidate_attr(inode);
        return err;
  }
  
@@@ -712,6 -727,7 +727,7 @@@ static ssize_t fuse_async_req_send(stru
        spin_unlock(&io->lock);
  
        ia->ap.args.end = fuse_aio_complete_req;
+       ia->ap.args.may_block = io->should_dirty;
        err = fuse_simple_background(fc, &ia->ap.args, GFP_KERNEL);
        if (err)
                fuse_aio_complete_req(fc, &ia->ap.args, err);
@@@ -915,40 -931,84 +931,40 @@@ static void fuse_send_readpages(struct 
        fuse_readpages_end(fc, &ap->args, err);
  }
  
 -struct fuse_fill_data {
 -      struct fuse_io_args *ia;
 -      struct file *file;
 -      struct inode *inode;
 -      unsigned int nr_pages;
 -      unsigned int max_pages;
 -};
 -
 -static int fuse_readpages_fill(void *_data, struct page *page)
 +static void fuse_readahead(struct readahead_control *rac)
  {
 -      struct fuse_fill_data *data = _data;
 -      struct fuse_io_args *ia = data->ia;
 -      struct fuse_args_pages *ap = &ia->ap;
 -      struct inode *inode = data->inode;
 +      struct inode *inode = rac->mapping->host;
        struct fuse_conn *fc = get_fuse_conn(inode);
 +      unsigned int i, max_pages, nr_pages = 0;
  
 -      fuse_wait_on_page_writeback(inode, page->index);
 -
 -      if (ap->num_pages &&
 -          (ap->num_pages == fc->max_pages ||
 -           (ap->num_pages + 1) * PAGE_SIZE > fc->max_read ||
 -           ap->pages[ap->num_pages - 1]->index + 1 != page->index)) {
 -              data->max_pages = min_t(unsigned int, data->nr_pages,
 -                                      fc->max_pages);
 -              fuse_send_readpages(ia, data->file);
 -              data->ia = ia = fuse_io_alloc(NULL, data->max_pages);
 -              if (!ia) {
 -                      unlock_page(page);
 -                      return -ENOMEM;
 -              }
 -              ap = &ia->ap;
 -      }
 -
 -      if (WARN_ON(ap->num_pages >= data->max_pages)) {
 -              unlock_page(page);
 -              fuse_io_free(ia);
 -              return -EIO;
 -      }
 -
 -      get_page(page);
 -      ap->pages[ap->num_pages] = page;
 -      ap->descs[ap->num_pages].length = PAGE_SIZE;
 -      ap->num_pages++;
 -      data->nr_pages--;
 -      return 0;
 -}
 -
 -static int fuse_readpages(struct file *file, struct address_space *mapping,
 -                        struct list_head *pages, unsigned nr_pages)
 -{
 -      struct inode *inode = mapping->host;
 -      struct fuse_conn *fc = get_fuse_conn(inode);
 -      struct fuse_fill_data data;
 -      int err;
 -
 -      err = -EIO;
        if (is_bad_inode(inode))
 -              goto out;
 +              return;
  
 -      data.file = file;
 -      data.inode = inode;
 -      data.nr_pages = nr_pages;
 -      data.max_pages = min_t(unsigned int, nr_pages, fc->max_pages);
 -;
 -      data.ia = fuse_io_alloc(NULL, data.max_pages);
 -      err = -ENOMEM;
 -      if (!data.ia)
 -              goto out;
 +      max_pages = min_t(unsigned int, fc->max_pages,
 +                      fc->max_read / PAGE_SIZE);
  
 -      err = read_cache_pages(mapping, pages, fuse_readpages_fill, &data);
 -      if (!err) {
 -              if (data.ia->ap.num_pages)
 -                      fuse_send_readpages(data.ia, file);
 -              else
 -                      fuse_io_free(data.ia);
 +      for (;;) {
 +              struct fuse_io_args *ia;
 +              struct fuse_args_pages *ap;
 +
 +              nr_pages = readahead_count(rac) - nr_pages;
 +              if (nr_pages > max_pages)
 +                      nr_pages = max_pages;
 +              if (nr_pages == 0)
 +                      break;
 +              ia = fuse_io_alloc(NULL, nr_pages);
 +              if (!ia)
 +                      return;
 +              ap = &ia->ap;
 +              nr_pages = __readahead_batch(rac, ap->pages, nr_pages);
 +              for (i = 0; i < nr_pages; i++) {
 +                      fuse_wait_on_page_writeback(inode,
 +                                                  readahead_index(rac) + i);
 +                      ap->descs[i].length = PAGE_SIZE;
 +              }
 +              ap->num_pages = nr_pages;
 +              fuse_send_readpages(ia, rac->file);
        }
 -out:
 -      return err;
  }
  
  static ssize_t fuse_cache_read_iter(struct kiocb *iocb, struct iov_iter *to)
@@@ -1570,7 -1630,7 +1586,7 @@@ static void fuse_writepage_finish(struc
        struct backing_dev_info *bdi = inode_to_bdi(inode);
        int i;
  
-       list_del(&wpa->writepages_entry);
+       rb_erase(&wpa->writepages_entry, &fi->writepages);
        for (i = 0; i < ap->num_pages; i++) {
                dec_wb_stat(&bdi->wb, WB_WRITEBACK);
                dec_node_page_state(ap->pages[i], NR_WRITEBACK_TEMP);
@@@ -1658,6 -1718,36 +1674,36 @@@ __acquires(fi->lock
        }
  }
  
+ static void tree_insert(struct rb_root *root, struct fuse_writepage_args *wpa)
+ {
+       pgoff_t idx_from = wpa->ia.write.in.offset >> PAGE_SHIFT;
+       pgoff_t idx_to = idx_from + wpa->ia.ap.num_pages - 1;
+       struct rb_node **p = &root->rb_node;
+       struct rb_node  *parent = NULL;
+       WARN_ON(!wpa->ia.ap.num_pages);
+       while (*p) {
+               struct fuse_writepage_args *curr;
+               pgoff_t curr_index;
+               parent = *p;
+               curr = rb_entry(parent, struct fuse_writepage_args,
+                               writepages_entry);
+               WARN_ON(curr->inode != wpa->inode);
+               curr_index = curr->ia.write.in.offset >> PAGE_SHIFT;
+               if (idx_from >= curr_index + curr->ia.ap.num_pages)
+                       p = &(*p)->rb_right;
+               else if (idx_to < curr_index)
+                       p = &(*p)->rb_left;
+               else
+                       return (void) WARN_ON(true);
+       }
+       rb_link_node(&wpa->writepages_entry, parent, p);
+       rb_insert_color(&wpa->writepages_entry, root);
+ }
  static void fuse_writepage_end(struct fuse_conn *fc, struct fuse_args *args,
                               int error)
  {
                wpa->next = next->next;
                next->next = NULL;
                next->ia.ff = fuse_file_get(wpa->ia.ff);
-               list_add(&next->writepages_entry, &fi->writepages);
+               tree_insert(&fi->writepages, next);
  
                /*
                 * Skip fuse_flush_writepages() to make it easy to crop requests
@@@ -1811,7 -1901,7 +1857,7 @@@ static int fuse_writepage_locked(struc
        inc_node_page_state(tmp_page, NR_WRITEBACK_TEMP);
  
        spin_lock(&fi->lock);
-       list_add(&wpa->writepages_entry, &fi->writepages);
+       tree_insert(&fi->writepages, wpa);
        list_add_tail(&wpa->queue_entry, &fi->queued_writes);
        fuse_flush_writepages(inode);
        spin_unlock(&fi->lock);
@@@ -1923,10 -2013,10 +1969,10 @@@ static bool fuse_writepage_in_flight(st
        WARN_ON(new_ap->num_pages != 0);
  
        spin_lock(&fi->lock);
-       list_del(&new_wpa->writepages_entry);
+       rb_erase(&new_wpa->writepages_entry, &fi->writepages);
        old_wpa = fuse_find_writeback(fi, page->index, page->index);
        if (!old_wpa) {
-               list_add(&new_wpa->writepages_entry, &fi->writepages);
+               tree_insert(&fi->writepages, new_wpa);
                spin_unlock(&fi->lock);
                return false;
        }
@@@ -2041,7 -2131,7 +2087,7 @@@ static int fuse_writepages_fill(struct 
                wpa->inode = inode;
  
                spin_lock(&fi->lock);
-               list_add(&wpa->writepages_entry, &fi->writepages);
+               tree_insert(&fi->writepages, wpa);
                spin_unlock(&fi->lock);
  
                data->wpa = wpa;
@@@ -3235,13 -3325,11 +3281,11 @@@ static ssize_t __fuse_copy_file_range(s
        if (file_inode(file_in)->i_sb != file_inode(file_out)->i_sb)
                return -EXDEV;
  
-       if (fc->writeback_cache) {
-               inode_lock(inode_in);
-               err = fuse_writeback_range(inode_in, pos_in, pos_in + len);
-               inode_unlock(inode_in);
-               if (err)
-                       return err;
-       }
+       inode_lock(inode_in);
+       err = fuse_writeback_range(inode_in, pos_in, pos_in + len - 1);
+       inode_unlock(inode_in);
+       if (err)
+               return err;
  
        inode_lock(inode_out);
  
        if (err)
                goto out;
  
-       if (fc->writeback_cache) {
-               err = fuse_writeback_range(inode_out, pos_out, pos_out + len);
-               if (err)
-                       goto out;
-       }
+       /*
+        * Write out dirty pages in the destination file before sending the COPY
+        * request to userspace.  After the request is completed, truncate off
+        * pages (including partial ones) from the cache that have been copied,
+        * since these contain stale data at that point.
+        *
+        * This should be mostly correct, but if the COPY writes to partial
+        * pages (at the start or end) and the parts not covered by the COPY are
+        * written through a memory map after calling fuse_writeback_range(),
+        * then these partial page modifications will be lost on truncation.
+        *
+        * It is unlikely that someone would rely on such mixed style
+        * modifications.  Yet this does give less guarantees than if the
+        * copying was performed with write(2).
+        *
+        * To fix this a i_mmap_sem style lock could be used to prevent new
+        * faults while the copy is ongoing.
+        */
+       err = fuse_writeback_range(inode_out, pos_out, pos_out + len - 1);
+       if (err)
+               goto out;
  
        if (is_unstable)
                set_bit(FUSE_I_SIZE_UNSTABLE, &fi_out->state);
        if (err)
                goto out;
  
+       truncate_inode_pages_range(inode_out->i_mapping,
+                                  ALIGN_DOWN(pos_out, PAGE_SIZE),
+                                  ALIGN(pos_out + outarg.size, PAGE_SIZE) - 1);
        if (fc->writeback_cache) {
                fuse_write_update_size(inode_out, pos_out + outarg.size);
                file_update_time(file_out);
@@@ -3329,10 -3437,10 +3393,10 @@@ static const struct file_operations fus
  
  static const struct address_space_operations fuse_file_aops  = {
        .readpage       = fuse_readpage,
 +      .readahead      = fuse_readahead,
        .writepage      = fuse_writepage,
        .writepages     = fuse_writepages,
        .launder_page   = fuse_launder_page,
 -      .readpages      = fuse_readpages,
        .set_page_dirty = __set_page_dirty_nobuffers,
        .bmap           = fuse_bmap,
        .direct_IO      = fuse_direct_IO,
@@@ -3351,5 -3459,5 +3415,5 @@@ void fuse_init_file_inode(struct inode 
        INIT_LIST_HEAD(&fi->queued_writes);
        fi->writectr = 0;
        init_waitqueue_head(&fi->page_waitq);
-       INIT_LIST_HEAD(&fi->writepages);
+       fi->writepages = RB_ROOT;
  }