]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/commitdiff
Merge branch 'for-next' of git://git.samba.org/sfrench/cifs-2.6
authorLinus Torvalds <torvalds@linux-foundation.org>
Sat, 9 Aug 2014 20:03:34 +0000 (13:03 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Sat, 9 Aug 2014 20:03:34 +0000 (13:03 -0700)
Pull CIFS updates from Steve French:
 "The most visible change in this set is the additional of multi-credit
  support for SMB2/SMB3 which dramatically improves the large file i/o
  performance for these dialects and significantly increases the maximum
  i/o size used on the wire for SMB2/SMB3.

  Also reconnection behavior after network failure is improved"

* 'for-next' of git://git.samba.org/sfrench/cifs-2.6: (35 commits)
  Add worker function to set allocation size
  [CIFS] Fix incorrect hex vs. decimal in some debug print statements
  update CIFS TODO list
  Add Pavel to contributor list in cifs AUTHORS file
  Update cifs version
  CIFS: Fix STATUS_CANNOT_DELETE error mapping for SMB2
  CIFS: Optimize readpages in a short read case on reconnects
  CIFS: Optimize cifs_user_read() in a short read case on reconnects
  CIFS: Improve indentation in cifs_user_read()
  CIFS: Fix possible buffer corruption in cifs_user_read()
  CIFS: Count got bytes in read_into_pages()
  CIFS: Use separate var for the number of bytes got in async read
  CIFS: Indicate reconnect with ECONNABORTED error code
  CIFS: Use multicredits for SMB 2.1/3 reads
  CIFS: Fix rsize usage for sync read
  CIFS: Fix rsize usage in user read
  CIFS: Separate page reading from user read
  CIFS: Fix rsize usage in readpages
  CIFS: Separate page search from readpages
  CIFS: Use multicredits for SMB 2.1/3 writes
  ...

1  2 
fs/cifs/connect.c
fs/cifs/file.c
fs/cifs/misc.c

diff --combined fs/cifs/connect.c
index b98366f21f9e83370a0dc9babb9e193b451e1891,b0427f6ea97135a3fe056688023ed842a17efb92..03ed8a09581ca0104468d1207ff0cadefd0f9390
@@@ -557,7 -557,7 +557,7 @@@ cifs_readv_from_socket(struct TCP_Serve
                try_to_freeze();
  
                if (server_unresponsive(server)) {
-                       total_read = -EAGAIN;
+                       total_read = -ECONNABORTED;
                        break;
                }
  
                        break;
                } else if (server->tcpStatus == CifsNeedReconnect) {
                        cifs_reconnect(server);
-                       total_read = -EAGAIN;
+                       total_read = -ECONNABORTED;
                        break;
                } else if (length == -ERESTARTSYS ||
                           length == -EAGAIN ||
                        cifs_dbg(FYI, "Received no data or error: expecting %d\n"
                                 "got %d", to_read, length);
                        cifs_reconnect(server);
-                       total_read = -EAGAIN;
+                       total_read = -ECONNABORTED;
                        break;
                }
        }
@@@ -786,7 -786,7 +786,7 @@@ standard_receive3(struct TCP_Server_Inf
                cifs_dbg(VFS, "SMB response too long (%u bytes)\n", pdu_length);
                cifs_reconnect(server);
                wake_up(&server->response_q);
-               return -EAGAIN;
+               return -ECONNABORTED;
        }
  
        /* switch to large buffer if too big for a small one */
@@@ -3934,6 -3934,13 +3934,6 @@@ cifs_sb_master_tcon(struct cifs_sb_inf
        return tlink_tcon(cifs_sb_master_tlink(cifs_sb));
  }
  
 -static int
 -cifs_sb_tcon_pending_wait(void *unused)
 -{
 -      schedule();
 -      return signal_pending(current) ? -ERESTARTSYS : 0;
 -}
 -
  /* find and return a tlink with given uid */
  static struct tcon_link *
  tlink_rb_search(struct rb_root *root, kuid_t uid)
@@@ -4032,10 -4039,11 +4032,10 @@@ cifs_sb_tlink(struct cifs_sb_info *cifs
        } else {
  wait_for_construction:
                ret = wait_on_bit(&tlink->tl_flags, TCON_LINK_PENDING,
 -                                cifs_sb_tcon_pending_wait,
                                  TASK_INTERRUPTIBLE);
                if (ret) {
                        cifs_put_tlink(tlink);
 -                      return ERR_PTR(ret);
 +                      return ERR_PTR(-ERESTARTSYS);
                }
  
                /* if it's good, return it */
diff --combined fs/cifs/file.c
index b88b1ade4d3d2589342b3c30217a9aa5ee54b09f,12b64e02eee1de915bfabca4cb04a35a158934a3..4ab2f79ffa7a4eb7f2e6ac3a2a8d67131d8f8080
@@@ -1670,8 -1670,8 +1670,8 @@@ cifs_write(struct cifsFileInfo *open_fi
                                        break;
                        }
  
-                       len = min((size_t)cifs_sb->wsize,
-                                 write_size - total_written);
+                       len = min(server->ops->wp_retry_size(dentry->d_inode),
+                                 (unsigned int)write_size - total_written);
                        /* iov[0] is reserved for smb header */
                        iov[1].iov_base = (char *)write_data + total_written;
                        iov[1].iov_len = len;
@@@ -1878,15 -1878,163 +1878,163 @@@ static int cifs_partialpagewrite(struc
        return rc;
  }
  
+ static struct cifs_writedata *
+ wdata_alloc_and_fillpages(pgoff_t tofind, struct address_space *mapping,
+                         pgoff_t end, pgoff_t *index,
+                         unsigned int *found_pages)
+ {
+       unsigned int nr_pages;
+       struct page **pages;
+       struct cifs_writedata *wdata;
+       wdata = cifs_writedata_alloc((unsigned int)tofind,
+                                    cifs_writev_complete);
+       if (!wdata)
+               return NULL;
+       /*
+        * find_get_pages_tag seems to return a max of 256 on each
+        * iteration, so we must call it several times in order to
+        * fill the array or the wsize is effectively limited to
+        * 256 * PAGE_CACHE_SIZE.
+        */
+       *found_pages = 0;
+       pages = wdata->pages;
+       do {
+               nr_pages = find_get_pages_tag(mapping, index,
+                                             PAGECACHE_TAG_DIRTY, tofind,
+                                             pages);
+               *found_pages += nr_pages;
+               tofind -= nr_pages;
+               pages += nr_pages;
+       } while (nr_pages && tofind && *index <= end);
+       return wdata;
+ }
+ static unsigned int
+ wdata_prepare_pages(struct cifs_writedata *wdata, unsigned int found_pages,
+                   struct address_space *mapping,
+                   struct writeback_control *wbc,
+                   pgoff_t end, pgoff_t *index, pgoff_t *next, bool *done)
+ {
+       unsigned int nr_pages = 0, i;
+       struct page *page;
+       for (i = 0; i < found_pages; i++) {
+               page = wdata->pages[i];
+               /*
+                * At this point we hold neither mapping->tree_lock nor
+                * lock on the page itself: the page may be truncated or
+                * invalidated (changing page->mapping to NULL), or even
+                * swizzled back from swapper_space to tmpfs file
+                * mapping
+                */
+               if (nr_pages == 0)
+                       lock_page(page);
+               else if (!trylock_page(page))
+                       break;
+               if (unlikely(page->mapping != mapping)) {
+                       unlock_page(page);
+                       break;
+               }
+               if (!wbc->range_cyclic && page->index > end) {
+                       *done = true;
+                       unlock_page(page);
+                       break;
+               }
+               if (*next && (page->index != *next)) {
+                       /* Not next consecutive page */
+                       unlock_page(page);
+                       break;
+               }
+               if (wbc->sync_mode != WB_SYNC_NONE)
+                       wait_on_page_writeback(page);
+               if (PageWriteback(page) ||
+                               !clear_page_dirty_for_io(page)) {
+                       unlock_page(page);
+                       break;
+               }
+               /*
+                * This actually clears the dirty bit in the radix tree.
+                * See cifs_writepage() for more commentary.
+                */
+               set_page_writeback(page);
+               if (page_offset(page) >= i_size_read(mapping->host)) {
+                       *done = true;
+                       unlock_page(page);
+                       end_page_writeback(page);
+                       break;
+               }
+               wdata->pages[i] = page;
+               *next = page->index + 1;
+               ++nr_pages;
+       }
+       /* reset index to refind any pages skipped */
+       if (nr_pages == 0)
+               *index = wdata->pages[0]->index + 1;
+       /* put any pages we aren't going to use */
+       for (i = nr_pages; i < found_pages; i++) {
+               page_cache_release(wdata->pages[i]);
+               wdata->pages[i] = NULL;
+       }
+       return nr_pages;
+ }
+ static int
+ wdata_send_pages(struct cifs_writedata *wdata, unsigned int nr_pages,
+                struct address_space *mapping, struct writeback_control *wbc)
+ {
+       int rc = 0;
+       struct TCP_Server_Info *server;
+       unsigned int i;
+       wdata->sync_mode = wbc->sync_mode;
+       wdata->nr_pages = nr_pages;
+       wdata->offset = page_offset(wdata->pages[0]);
+       wdata->pagesz = PAGE_CACHE_SIZE;
+       wdata->tailsz = min(i_size_read(mapping->host) -
+                       page_offset(wdata->pages[nr_pages - 1]),
+                       (loff_t)PAGE_CACHE_SIZE);
+       wdata->bytes = ((nr_pages - 1) * PAGE_CACHE_SIZE) + wdata->tailsz;
+       if (wdata->cfile != NULL)
+               cifsFileInfo_put(wdata->cfile);
+       wdata->cfile = find_writable_file(CIFS_I(mapping->host), false);
+       if (!wdata->cfile) {
+               cifs_dbg(VFS, "No writable handles for inode\n");
+               rc = -EBADF;
+       } else {
+               wdata->pid = wdata->cfile->pid;
+               server = tlink_tcon(wdata->cfile->tlink)->ses->server;
+               rc = server->ops->async_writev(wdata, cifs_writedata_release);
+       }
+       for (i = 0; i < nr_pages; ++i)
+               unlock_page(wdata->pages[i]);
+       return rc;
+ }
  static int cifs_writepages(struct address_space *mapping,
                           struct writeback_control *wbc)
  {
        struct cifs_sb_info *cifs_sb = CIFS_SB(mapping->host->i_sb);
+       struct TCP_Server_Info *server;
        bool done = false, scanned = false, range_whole = false;
        pgoff_t end, index;
        struct cifs_writedata *wdata;
-       struct TCP_Server_Info *server;
-       struct page *page;
        int rc = 0;
  
        /*
                        range_whole = true;
                scanned = true;
        }
+       server = cifs_sb_master_tcon(cifs_sb)->ses->server;
  retry:
        while (!done && index <= end) {
-               unsigned int i, nr_pages, found_pages;
-               pgoff_t next = 0, tofind;
-               struct page **pages;
+               unsigned int i, nr_pages, found_pages, wsize, credits;
+               pgoff_t next = 0, tofind, saved_index = index;
+               rc = server->ops->wait_mtu_credits(server, cifs_sb->wsize,
+                                                  &wsize, &credits);
+               if (rc)
+                       break;
  
-               tofind = min((cifs_sb->wsize / PAGE_CACHE_SIZE) - 1,
-                               end - index) + 1;
+               tofind = min((wsize / PAGE_CACHE_SIZE) - 1, end - index) + 1;
  
-               wdata = cifs_writedata_alloc((unsigned int)tofind,
-                                            cifs_writev_complete);
+               wdata = wdata_alloc_and_fillpages(tofind, mapping, end, &index,
+                                                 &found_pages);
                if (!wdata) {
                        rc = -ENOMEM;
+                       add_credits_and_wake_if(server, credits, 0);
                        break;
                }
  
-               /*
-                * find_get_pages_tag seems to return a max of 256 on each
-                * iteration, so we must call it several times in order to
-                * fill the array or the wsize is effectively limited to
-                * 256 * PAGE_CACHE_SIZE.
-                */
-               found_pages = 0;
-               pages = wdata->pages;
-               do {
-                       nr_pages = find_get_pages_tag(mapping, &index,
-                                                       PAGECACHE_TAG_DIRTY,
-                                                       tofind, pages);
-                       found_pages += nr_pages;
-                       tofind -= nr_pages;
-                       pages += nr_pages;
-               } while (nr_pages && tofind && index <= end);
                if (found_pages == 0) {
                        kref_put(&wdata->refcount, cifs_writedata_release);
+                       add_credits_and_wake_if(server, credits, 0);
                        break;
                }
  
-               nr_pages = 0;
-               for (i = 0; i < found_pages; i++) {
-                       page = wdata->pages[i];
-                       /*
-                        * At this point we hold neither mapping->tree_lock nor
-                        * lock on the page itself: the page may be truncated or
-                        * invalidated (changing page->mapping to NULL), or even
-                        * swizzled back from swapper_space to tmpfs file
-                        * mapping
-                        */
-                       if (nr_pages == 0)
-                               lock_page(page);
-                       else if (!trylock_page(page))
-                               break;
-                       if (unlikely(page->mapping != mapping)) {
-                               unlock_page(page);
-                               break;
-                       }
-                       if (!wbc->range_cyclic && page->index > end) {
-                               done = true;
-                               unlock_page(page);
-                               break;
-                       }
-                       if (next && (page->index != next)) {
-                               /* Not next consecutive page */
-                               unlock_page(page);
-                               break;
-                       }
-                       if (wbc->sync_mode != WB_SYNC_NONE)
-                               wait_on_page_writeback(page);
-                       if (PageWriteback(page) ||
-                                       !clear_page_dirty_for_io(page)) {
-                               unlock_page(page);
-                               break;
-                       }
-                       /*
-                        * This actually clears the dirty bit in the radix tree.
-                        * See cifs_writepage() for more commentary.
-                        */
-                       set_page_writeback(page);
-                       if (page_offset(page) >= i_size_read(mapping->host)) {
-                               done = true;
-                               unlock_page(page);
-                               end_page_writeback(page);
-                               break;
-                       }
-                       wdata->pages[i] = page;
-                       next = page->index + 1;
-                       ++nr_pages;
-               }
-               /* reset index to refind any pages skipped */
-               if (nr_pages == 0)
-                       index = wdata->pages[0]->index + 1;
-               /* put any pages we aren't going to use */
-               for (i = nr_pages; i < found_pages; i++) {
-                       page_cache_release(wdata->pages[i]);
-                       wdata->pages[i] = NULL;
-               }
+               nr_pages = wdata_prepare_pages(wdata, found_pages, mapping, wbc,
+                                              end, &index, &next, &done);
  
                /* nothing to write? */
                if (nr_pages == 0) {
                        kref_put(&wdata->refcount, cifs_writedata_release);
+                       add_credits_and_wake_if(server, credits, 0);
                        continue;
                }
  
-               wdata->sync_mode = wbc->sync_mode;
-               wdata->nr_pages = nr_pages;
-               wdata->offset = page_offset(wdata->pages[0]);
-               wdata->pagesz = PAGE_CACHE_SIZE;
-               wdata->tailsz =
-                       min(i_size_read(mapping->host) -
-                           page_offset(wdata->pages[nr_pages - 1]),
-                           (loff_t)PAGE_CACHE_SIZE);
-               wdata->bytes = ((nr_pages - 1) * PAGE_CACHE_SIZE) +
-                                       wdata->tailsz;
-               do {
-                       if (wdata->cfile != NULL)
-                               cifsFileInfo_put(wdata->cfile);
-                       wdata->cfile = find_writable_file(CIFS_I(mapping->host),
-                                                         false);
-                       if (!wdata->cfile) {
-                               cifs_dbg(VFS, "No writable handles for inode\n");
-                               rc = -EBADF;
-                               break;
-                       }
-                       wdata->pid = wdata->cfile->pid;
-                       server = tlink_tcon(wdata->cfile->tlink)->ses->server;
-                       rc = server->ops->async_writev(wdata,
-                                                       cifs_writedata_release);
-               } while (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN);
+               wdata->credits = credits;
  
-               for (i = 0; i < nr_pages; ++i)
-                       unlock_page(wdata->pages[i]);
+               rc = wdata_send_pages(wdata, nr_pages, mapping, wbc);
  
                /* send failure -- clean up the mess */
                if (rc != 0) {
+                       add_credits_and_wake_if(server, wdata->credits, 0);
                        for (i = 0; i < nr_pages; ++i) {
                                if (rc == -EAGAIN)
                                        redirty_page_for_writepage(wbc,
                }
                kref_put(&wdata->refcount, cifs_writedata_release);
  
+               if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN) {
+                       index = saved_index;
+                       continue;
+               }
                wbc->nr_to_write -= nr_pages;
                if (wbc->nr_to_write <= 0)
                        done = true;
@@@ -2362,123 -2413,109 +2413,109 @@@ cifs_uncached_writev_complete(struct wo
        kref_put(&wdata->refcount, cifs_uncached_writedata_release);
  }
  
- /* attempt to send write to server, retry on any -EAGAIN errors */
  static int
- cifs_uncached_retry_writev(struct cifs_writedata *wdata)
+ wdata_fill_from_iovec(struct cifs_writedata *wdata, struct iov_iter *from,
+                     size_t *len, unsigned long *num_pages)
  {
-       int rc;
-       struct TCP_Server_Info *server;
+       size_t save_len, copied, bytes, cur_len = *len;
+       unsigned long i, nr_pages = *num_pages;
  
-       server = tlink_tcon(wdata->cfile->tlink)->ses->server;
+       save_len = cur_len;
+       for (i = 0; i < nr_pages; i++) {
+               bytes = min_t(const size_t, cur_len, PAGE_SIZE);
+               copied = copy_page_from_iter(wdata->pages[i], 0, bytes, from);
+               cur_len -= copied;
+               /*
+                * If we didn't copy as much as we expected, then that
+                * may mean we trod into an unmapped area. Stop copying
+                * at that point. On the next pass through the big
+                * loop, we'll likely end up getting a zero-length
+                * write and bailing out of it.
+                */
+               if (copied < bytes)
+                       break;
+       }
+       cur_len = save_len - cur_len;
+       *len = cur_len;
  
-       do {
-               if (wdata->cfile->invalidHandle) {
-                       rc = cifs_reopen_file(wdata->cfile, false);
-                       if (rc != 0)
-                               continue;
-               }
-               rc = server->ops->async_writev(wdata,
-                                              cifs_uncached_writedata_release);
-       } while (rc == -EAGAIN);
+       /*
+        * If we have no data to send, then that probably means that
+        * the copy above failed altogether. That's most likely because
+        * the address in the iovec was bogus. Return -EFAULT and let
+        * the caller free anything we allocated and bail out.
+        */
+       if (!cur_len)
+               return -EFAULT;
  
-       return rc;
+       /*
+        * i + 1 now represents the number of pages we actually used in
+        * the copy phase above.
+        */
+       *num_pages = i + 1;
+       return 0;
  }
  
- static ssize_t
- cifs_iovec_write(struct file *file, struct iov_iter *from, loff_t *poffset)
+ static int
+ cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from,
+                    struct cifsFileInfo *open_file,
+                    struct cifs_sb_info *cifs_sb, struct list_head *wdata_list)
  {
-       unsigned long nr_pages, i;
-       size_t bytes, copied, len, cur_len;
-       ssize_t total_written = 0;
-       loff_t offset;
-       struct cifsFileInfo *open_file;
-       struct cifs_tcon *tcon;
-       struct cifs_sb_info *cifs_sb;
-       struct cifs_writedata *wdata, *tmp;
-       struct list_head wdata_list;
-       int rc;
+       int rc = 0;
+       size_t cur_len;
+       unsigned long nr_pages, num_pages, i;
+       struct cifs_writedata *wdata;
+       struct iov_iter saved_from;
+       loff_t saved_offset = offset;
        pid_t pid;
-       len = iov_iter_count(from);
-       rc = generic_write_checks(file, poffset, &len, 0);
-       if (rc)
-               return rc;
-       if (!len)
-               return 0;
-       iov_iter_truncate(from, len);
-       INIT_LIST_HEAD(&wdata_list);
-       cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
-       open_file = file->private_data;
-       tcon = tlink_tcon(open_file->tlink);
-       if (!tcon->ses->server->ops->async_writev)
-               return -ENOSYS;
-       offset = *poffset;
+       struct TCP_Server_Info *server;
  
        if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
                pid = open_file->pid;
        else
                pid = current->tgid;
  
+       server = tlink_tcon(open_file->tlink)->ses->server;
+       memcpy(&saved_from, from, sizeof(struct iov_iter));
        do {
-               size_t save_len;
+               unsigned int wsize, credits;
+               rc = server->ops->wait_mtu_credits(server, cifs_sb->wsize,
+                                                  &wsize, &credits);
+               if (rc)
+                       break;
  
-               nr_pages = get_numpages(cifs_sb->wsize, len, &cur_len);
+               nr_pages = get_numpages(wsize, len, &cur_len);
                wdata = cifs_writedata_alloc(nr_pages,
                                             cifs_uncached_writev_complete);
                if (!wdata) {
                        rc = -ENOMEM;
+                       add_credits_and_wake_if(server, credits, 0);
                        break;
                }
  
                rc = cifs_write_allocate_pages(wdata->pages, nr_pages);
                if (rc) {
                        kfree(wdata);
+                       add_credits_and_wake_if(server, credits, 0);
                        break;
                }
  
-               save_len = cur_len;
-               for (i = 0; i < nr_pages; i++) {
-                       bytes = min_t(size_t, cur_len, PAGE_SIZE);
-                       copied = copy_page_from_iter(wdata->pages[i], 0, bytes,
-                                                    from);
-                       cur_len -= copied;
-                       /*
-                        * If we didn't copy as much as we expected, then that
-                        * may mean we trod into an unmapped area. Stop copying
-                        * at that point. On the next pass through the big
-                        * loop, we'll likely end up getting a zero-length
-                        * write and bailing out of it.
-                        */
-                       if (copied < bytes)
-                               break;
-               }
-               cur_len = save_len - cur_len;
-               /*
-                * If we have no data to send, then that probably means that
-                * the copy above failed altogether. That's most likely because
-                * the address in the iovec was bogus. Set the rc to -EFAULT,
-                * free anything we allocated and bail out.
-                */
-               if (!cur_len) {
+               num_pages = nr_pages;
+               rc = wdata_fill_from_iovec(wdata, from, &cur_len, &num_pages);
+               if (rc) {
                        for (i = 0; i < nr_pages; i++)
                                put_page(wdata->pages[i]);
                        kfree(wdata);
-                       rc = -EFAULT;
+                       add_credits_and_wake_if(server, credits, 0);
                        break;
                }
  
                /*
-                * i + 1 now represents the number of pages we actually used in
-                * the copy phase above. Bring nr_pages down to that, and free
-                * any pages that we didn't use.
+                * Bring nr_pages down to the number of pages we actually used,
+                * and free any pages that we didn't use.
                 */
-               for ( ; nr_pages > i + 1; nr_pages--)
+               for ( ; nr_pages > num_pages; nr_pages--)
                        put_page(wdata->pages[nr_pages - 1]);
  
                wdata->sync_mode = WB_SYNC_ALL;
                wdata->bytes = cur_len;
                wdata->pagesz = PAGE_SIZE;
                wdata->tailsz = cur_len - ((nr_pages - 1) * PAGE_SIZE);
-               rc = cifs_uncached_retry_writev(wdata);
+               wdata->credits = credits;
+               if (!wdata->cfile->invalidHandle ||
+                   !cifs_reopen_file(wdata->cfile, false))
+                       rc = server->ops->async_writev(wdata,
+                                       cifs_uncached_writedata_release);
                if (rc) {
+                       add_credits_and_wake_if(server, wdata->credits, 0);
                        kref_put(&wdata->refcount,
                                 cifs_uncached_writedata_release);
+                       if (rc == -EAGAIN) {
+                               memcpy(from, &saved_from,
+                                      sizeof(struct iov_iter));
+                               iov_iter_advance(from, offset - saved_offset);
+                               continue;
+                       }
                        break;
                }
  
-               list_add_tail(&wdata->list, &wdata_list);
+               list_add_tail(&wdata->list, wdata_list);
                offset += cur_len;
                len -= cur_len;
        } while (len > 0);
  
+       return rc;
+ }
+ static ssize_t
+ cifs_iovec_write(struct file *file, struct iov_iter *from, loff_t *poffset)
+ {
+       size_t len;
+       ssize_t total_written = 0;
+       struct cifsFileInfo *open_file;
+       struct cifs_tcon *tcon;
+       struct cifs_sb_info *cifs_sb;
+       struct cifs_writedata *wdata, *tmp;
+       struct list_head wdata_list;
+       struct iov_iter saved_from;
+       int rc;
+       len = iov_iter_count(from);
+       rc = generic_write_checks(file, poffset, &len, 0);
+       if (rc)
+               return rc;
+       if (!len)
+               return 0;
+       iov_iter_truncate(from, len);
+       INIT_LIST_HEAD(&wdata_list);
+       cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
+       open_file = file->private_data;
+       tcon = tlink_tcon(open_file->tlink);
+       if (!tcon->ses->server->ops->async_writev)
+               return -ENOSYS;
+       memcpy(&saved_from, from, sizeof(struct iov_iter));
+       rc = cifs_write_from_iter(*poffset, len, from, open_file, cifs_sb,
+                                 &wdata_list);
        /*
         * If at least one write was successfully sent, then discard any rc
         * value from the later writes. If the other write succeeds, then
@@@ -2529,7 -2617,25 +2617,25 @@@ restart_loop
  
                        /* resend call if it's a retryable error */
                        if (rc == -EAGAIN) {
-                               rc = cifs_uncached_retry_writev(wdata);
+                               struct list_head tmp_list;
+                               struct iov_iter tmp_from;
+                               INIT_LIST_HEAD(&tmp_list);
+                               list_del_init(&wdata->list);
+                               memcpy(&tmp_from, &saved_from,
+                                      sizeof(struct iov_iter));
+                               iov_iter_advance(&tmp_from,
+                                                wdata->offset - *poffset);
+                               rc = cifs_write_from_iter(wdata->offset,
+                                               wdata->bytes, &tmp_from,
+                                               open_file, cifs_sb, &tmp_list);
+                               list_splice(&tmp_list, &wdata_list);
+                               kref_put(&wdata->refcount,
+                                        cifs_uncached_writedata_release);
                                goto restart_loop;
                        }
                }
@@@ -2722,26 -2828,6 +2828,6 @@@ cifs_uncached_readdata_release(struct k
        cifs_readdata_release(refcount);
  }
  
- static int
- cifs_retry_async_readv(struct cifs_readdata *rdata)
- {
-       int rc;
-       struct TCP_Server_Info *server;
-       server = tlink_tcon(rdata->cfile->tlink)->ses->server;
-       do {
-               if (rdata->cfile->invalidHandle) {
-                       rc = cifs_reopen_file(rdata->cfile, true);
-                       if (rc != 0)
-                               continue;
-               }
-               rc = server->ops->async_readv(rdata);
-       } while (rc == -EAGAIN);
-       return rc;
- }
  /**
   * cifs_readdata_to_iov - copy data from pages in response to an iovec
   * @rdata:    the readdata response with list of pages holding data
  static int
  cifs_readdata_to_iov(struct cifs_readdata *rdata, struct iov_iter *iter)
  {
-       size_t remaining = rdata->bytes;
+       size_t remaining = rdata->got_bytes;
        unsigned int i;
  
        for (i = 0; i < rdata->nr_pages; i++) {
@@@ -2782,11 -2868,12 +2868,12 @@@ static in
  cifs_uncached_read_into_pages(struct TCP_Server_Info *server,
                        struct cifs_readdata *rdata, unsigned int len)
  {
-       int total_read = 0, result = 0;
+       int result = 0;
        unsigned int i;
        unsigned int nr_pages = rdata->nr_pages;
        struct kvec iov;
  
+       rdata->got_bytes = 0;
        rdata->tailsz = PAGE_SIZE;
        for (i = 0; i < nr_pages; i++) {
                struct page *page = rdata->pages[i];
                if (result < 0)
                        break;
  
-               total_read += result;
+               rdata->got_bytes += result;
        }
  
-       return total_read > 0 ? total_read : result;
+       return rdata->got_bytes > 0 && result != -ECONNABORTED ?
+                                               rdata->got_bytes : result;
  }
  
- ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to)
+ static int
+ cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file,
+                    struct cifs_sb_info *cifs_sb, struct list_head *rdata_list)
  {
-       struct file *file = iocb->ki_filp;
-       ssize_t rc;
-       size_t len, cur_len;
-       ssize_t total_read = 0;
-       loff_t offset = iocb->ki_pos;
-       unsigned int npages;
-       struct cifs_sb_info *cifs_sb;
-       struct cifs_tcon *tcon;
-       struct cifsFileInfo *open_file;
-       struct cifs_readdata *rdata, *tmp;
-       struct list_head rdata_list;
+       struct cifs_readdata *rdata;
+       unsigned int npages, rsize, credits;
+       size_t cur_len;
+       int rc;
        pid_t pid;
+       struct TCP_Server_Info *server;
  
-       len = iov_iter_count(to);
-       if (!len)
-               return 0;
-       INIT_LIST_HEAD(&rdata_list);
-       cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
-       open_file = file->private_data;
-       tcon = tlink_tcon(open_file->tlink);
-       if (!tcon->ses->server->ops->async_readv)
-               return -ENOSYS;
+       server = tlink_tcon(open_file->tlink)->ses->server;
  
        if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
                pid = open_file->pid;
        else
                pid = current->tgid;
  
-       if ((file->f_flags & O_ACCMODE) == O_WRONLY)
-               cifs_dbg(FYI, "attempting read on write only file instance\n");
        do {
-               cur_len = min_t(const size_t, len - total_read, cifs_sb->rsize);
+               rc = server->ops->wait_mtu_credits(server, cifs_sb->rsize,
+                                                  &rsize, &credits);
+               if (rc)
+                       break;
+               cur_len = min_t(const size_t, len, rsize);
                npages = DIV_ROUND_UP(cur_len, PAGE_SIZE);
  
                /* allocate a readdata struct */
                rdata = cifs_readdata_alloc(npages,
                                            cifs_uncached_readv_complete);
                if (!rdata) {
+                       add_credits_and_wake_if(server, credits, 0);
                        rc = -ENOMEM;
                        break;
                }
                rdata->pid = pid;
                rdata->pagesz = PAGE_SIZE;
                rdata->read_into_pages = cifs_uncached_read_into_pages;
+               rdata->credits = credits;
  
-               rc = cifs_retry_async_readv(rdata);
+               if (!rdata->cfile->invalidHandle ||
+                   !cifs_reopen_file(rdata->cfile, true))
+                       rc = server->ops->async_readv(rdata);
  error:
                if (rc) {
+                       add_credits_and_wake_if(server, rdata->credits, 0);
                        kref_put(&rdata->refcount,
                                 cifs_uncached_readdata_release);
+                       if (rc == -EAGAIN)
+                               continue;
                        break;
                }
  
-               list_add_tail(&rdata->list, &rdata_list);
+               list_add_tail(&rdata->list, rdata_list);
                offset += cur_len;
                len -= cur_len;
        } while (len > 0);
  
+       return rc;
+ }
+ ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to)
+ {
+       struct file *file = iocb->ki_filp;
+       ssize_t rc;
+       size_t len;
+       ssize_t total_read = 0;
+       loff_t offset = iocb->ki_pos;
+       struct cifs_sb_info *cifs_sb;
+       struct cifs_tcon *tcon;
+       struct cifsFileInfo *open_file;
+       struct cifs_readdata *rdata, *tmp;
+       struct list_head rdata_list;
+       len = iov_iter_count(to);
+       if (!len)
+               return 0;
+       INIT_LIST_HEAD(&rdata_list);
+       cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
+       open_file = file->private_data;
+       tcon = tlink_tcon(open_file->tlink);
+       if (!tcon->ses->server->ops->async_readv)
+               return -ENOSYS;
+       if ((file->f_flags & O_ACCMODE) == O_WRONLY)
+               cifs_dbg(FYI, "attempting read on write only file instance\n");
+       rc = cifs_send_async_read(offset, len, open_file, cifs_sb, &rdata_list);
        /* if at least one read request send succeeded, then reset rc */
        if (!list_empty(&rdata_list))
                rc = 0;
  
        len = iov_iter_count(to);
        /* the loop below should proceed in the order of increasing offsets */
+ again:
        list_for_each_entry_safe(rdata, tmp, &rdata_list, list) {
-       again:
                if (!rc) {
                        /* FIXME: freezable sleep too? */
                        rc = wait_for_completion_killable(&rdata->done);
                        if (rc)
                                rc = -EINTR;
-                       else if (rdata->result) {
-                               rc = rdata->result;
+                       else if (rdata->result == -EAGAIN) {
                                /* resend call if it's a retryable error */
-                               if (rc == -EAGAIN) {
-                                       rc = cifs_retry_async_readv(rdata);
-                                       goto again;
+                               struct list_head tmp_list;
+                               unsigned int got_bytes = rdata->got_bytes;
+                               list_del_init(&rdata->list);
+                               INIT_LIST_HEAD(&tmp_list);
+                               /*
+                                * Got a part of data and then reconnect has
+                                * happened -- fill the buffer and continue
+                                * reading.
+                                */
+                               if (got_bytes && got_bytes < rdata->bytes) {
+                                       rc = cifs_readdata_to_iov(rdata, to);
+                                       if (rc) {
+                                               kref_put(&rdata->refcount,
+                                               cifs_uncached_readdata_release);
+                                               continue;
+                                       }
                                }
-                       } else {
+                               rc = cifs_send_async_read(
+                                               rdata->offset + got_bytes,
+                                               rdata->bytes - got_bytes,
+                                               rdata->cfile, cifs_sb,
+                                               &tmp_list);
+                               list_splice(&tmp_list, &rdata_list);
+                               kref_put(&rdata->refcount,
+                                        cifs_uncached_readdata_release);
+                               goto again;
+                       } else if (rdata->result)
+                               rc = rdata->result;
+                       else
                                rc = cifs_readdata_to_iov(rdata, to);
-                       }
  
+                       /* if there was a short read -- discard anything left */
+                       if (rdata->got_bytes && rdata->got_bytes < rdata->bytes)
+                               rc = -ENODATA;
                }
                list_del_init(&rdata->list);
                kref_put(&rdata->refcount, cifs_uncached_readdata_release);
@@@ -3030,18 -3176,19 +3176,19 @@@ cifs_read(struct file *file, char *read
  
        for (total_read = 0, cur_offset = read_data; read_size > total_read;
             total_read += bytes_read, cur_offset += bytes_read) {
-               current_read_size = min_t(uint, read_size - total_read, rsize);
-               /*
-                * For windows me and 9x we do not want to request more than it
-                * negotiated since it will refuse the read then.
-                */
-               if ((tcon->ses) && !(tcon->ses->capabilities &
+               do {
+                       current_read_size = min_t(uint, read_size - total_read,
+                                                 rsize);
+                       /*
+                        * For windows me and 9x we do not want to request more
+                        * than it negotiated since it will refuse the read
+                        * then.
+                        */
+                       if ((tcon->ses) && !(tcon->ses->capabilities &
                                tcon->ses->server->vals->cap_large_files)) {
-                       current_read_size = min_t(uint, current_read_size,
-                                       CIFSMaxBufSize);
-               }
-               rc = -EAGAIN;
-               while (rc == -EAGAIN) {
+                               current_read_size = min_t(uint,
+                                       current_read_size, CIFSMaxBufSize);
+                       }
                        if (open_file->invalidHandle) {
                                rc = cifs_reopen_file(open_file, true);
                                if (rc != 0)
                        rc = server->ops->sync_read(xid, open_file, &io_parms,
                                                    &bytes_read, &cur_offset,
                                                    &buf_type);
-               }
+               } while (rc == -EAGAIN);
                if (rc || (bytes_read == 0)) {
                        if (total_read) {
                                break;
@@@ -3133,25 -3281,30 +3281,30 @@@ int cifs_file_mmap(struct file *file, s
  static void
  cifs_readv_complete(struct work_struct *work)
  {
-       unsigned int i;
+       unsigned int i, got_bytes;
        struct cifs_readdata *rdata = container_of(work,
                                                struct cifs_readdata, work);
  
+       got_bytes = rdata->got_bytes;
        for (i = 0; i < rdata->nr_pages; i++) {
                struct page *page = rdata->pages[i];
  
                lru_cache_add_file(page);
  
-               if (rdata->result == 0) {
+               if (rdata->result == 0 ||
+                   (rdata->result == -EAGAIN && got_bytes)) {
                        flush_dcache_page(page);
                        SetPageUptodate(page);
                }
  
                unlock_page(page);
  
-               if (rdata->result == 0)
+               if (rdata->result == 0 ||
+                   (rdata->result == -EAGAIN && got_bytes))
                        cifs_readpage_to_fscache(rdata->mapping->host, page);
  
+               got_bytes -= min_t(unsigned int, PAGE_CACHE_SIZE, got_bytes);
                page_cache_release(page);
                rdata->pages[i] = NULL;
        }
@@@ -3162,7 -3315,7 +3315,7 @@@ static in
  cifs_readpages_read_into_pages(struct TCP_Server_Info *server,
                        struct cifs_readdata *rdata, unsigned int len)
  {
-       int total_read = 0, result = 0;
+       int result = 0;
        unsigned int i;
        u64 eof;
        pgoff_t eof_index;
        eof_index = eof ? (eof - 1) >> PAGE_CACHE_SHIFT : 0;
        cifs_dbg(FYI, "eof=%llu eof_index=%lu\n", eof, eof_index);
  
+       rdata->got_bytes = 0;
        rdata->tailsz = PAGE_CACHE_SIZE;
        for (i = 0; i < nr_pages; i++) {
                struct page *page = rdata->pages[i];
                if (result < 0)
                        break;
  
-               total_read += result;
+               rdata->got_bytes += result;
        }
  
-       return total_read > 0 ? total_read : result;
+       return rdata->got_bytes > 0 && result != -ECONNABORTED ?
+                                               rdata->got_bytes : result;
+ }
+ static int
+ readpages_get_pages(struct address_space *mapping, struct list_head *page_list,
+                   unsigned int rsize, struct list_head *tmplist,
+                   unsigned int *nr_pages, loff_t *offset, unsigned int *bytes)
+ {
+       struct page *page, *tpage;
+       unsigned int expected_index;
+       int rc;
+       INIT_LIST_HEAD(tmplist);
+       page = list_entry(page_list->prev, struct page, lru);
+       /*
+        * Lock the page and put it in the cache. Since no one else
+        * should have access to this page, we're safe to simply set
+        * PG_locked without checking it first.
+        */
+       __set_page_locked(page);
+       rc = add_to_page_cache_locked(page, mapping,
+                                     page->index, GFP_KERNEL);
+       /* give up if we can't stick it in the cache */
+       if (rc) {
+               __clear_page_locked(page);
+               return rc;
+       }
+       /* move first page to the tmplist */
+       *offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
+       *bytes = PAGE_CACHE_SIZE;
+       *nr_pages = 1;
+       list_move_tail(&page->lru, tmplist);
+       /* now try and add more pages onto the request */
+       expected_index = page->index + 1;
+       list_for_each_entry_safe_reverse(page, tpage, page_list, lru) {
+               /* discontinuity ? */
+               if (page->index != expected_index)
+                       break;
+               /* would this page push the read over the rsize? */
+               if (*bytes + PAGE_CACHE_SIZE > rsize)
+                       break;
+               __set_page_locked(page);
+               if (add_to_page_cache_locked(page, mapping, page->index,
+                                                               GFP_KERNEL)) {
+                       __clear_page_locked(page);
+                       break;
+               }
+               list_move_tail(&page->lru, tmplist);
+               (*bytes) += PAGE_CACHE_SIZE;
+               expected_index++;
+               (*nr_pages)++;
+       }
+       return rc;
  }
  
  static int cifs_readpages(struct file *file, struct address_space *mapping,
        struct list_head tmplist;
        struct cifsFileInfo *open_file = file->private_data;
        struct cifs_sb_info *cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
-       unsigned int rsize = cifs_sb->rsize;
+       struct TCP_Server_Info *server;
        pid_t pid;
  
-       /*
-        * Give up immediately if rsize is too small to read an entire page.
-        * The VFS will fall back to readpage. We should never reach this
-        * point however since we set ra_pages to 0 when the rsize is smaller
-        * than a cache page.
-        */
-       if (unlikely(rsize < PAGE_CACHE_SIZE))
-               return 0;
        /*
         * Reads as many pages as possible from fscache. Returns -ENOBUFS
         * immediately if the cookie is negative
                pid = current->tgid;
  
        rc = 0;
-       INIT_LIST_HEAD(&tmplist);
+       server = tlink_tcon(open_file->tlink)->ses->server;
  
        cifs_dbg(FYI, "%s: file=%p mapping=%p num_pages=%u\n",
                 __func__, file, mapping, num_pages);
         * the rdata->pages, then we want them in increasing order.
         */
        while (!list_empty(page_list)) {
-               unsigned int i;
-               unsigned int bytes = PAGE_CACHE_SIZE;
-               unsigned int expected_index;
-               unsigned int nr_pages = 1;
+               unsigned int i, nr_pages, bytes, rsize;
                loff_t offset;
                struct page *page, *tpage;
                struct cifs_readdata *rdata;
+               unsigned credits;
  
-               page = list_entry(page_list->prev, struct page, lru);
+               rc = server->ops->wait_mtu_credits(server, cifs_sb->rsize,
+                                                  &rsize, &credits);
+               if (rc)
+                       break;
  
                /*
-                * Lock the page and put it in the cache. Since no one else
-                * should have access to this page, we're safe to simply set
-                * PG_locked without checking it first.
+                * Give up immediately if rsize is too small to read an entire
+                * page. The VFS will fall back to readpage. We should never
+                * reach this point however since we set ra_pages to 0 when the
+                * rsize is smaller than a cache page.
                 */
-               __set_page_locked(page);
-               rc = add_to_page_cache_locked(page, mapping,
-                                             page->index, GFP_KERNEL);
+               if (unlikely(rsize < PAGE_CACHE_SIZE)) {
+                       add_credits_and_wake_if(server, credits, 0);
+                       return 0;
+               }
  
-               /* give up if we can't stick it in the cache */
+               rc = readpages_get_pages(mapping, page_list, rsize, &tmplist,
+                                        &nr_pages, &offset, &bytes);
                if (rc) {
-                       __clear_page_locked(page);
+                       add_credits_and_wake_if(server, credits, 0);
                        break;
                }
  
-               /* move first page to the tmplist */
-               offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
-               list_move_tail(&page->lru, &tmplist);
-               /* now try and add more pages onto the request */
-               expected_index = page->index + 1;
-               list_for_each_entry_safe_reverse(page, tpage, page_list, lru) {
-                       /* discontinuity ? */
-                       if (page->index != expected_index)
-                               break;
-                       /* would this page push the read over the rsize? */
-                       if (bytes + PAGE_CACHE_SIZE > rsize)
-                               break;
-                       __set_page_locked(page);
-                       if (add_to_page_cache_locked(page, mapping,
-                                               page->index, GFP_KERNEL)) {
-                               __clear_page_locked(page);
-                               break;
-                       }
-                       list_move_tail(&page->lru, &tmplist);
-                       bytes += PAGE_CACHE_SIZE;
-                       expected_index++;
-                       nr_pages++;
-               }
                rdata = cifs_readdata_alloc(nr_pages, cifs_readv_complete);
                if (!rdata) {
                        /* best to give up if we're out of mem */
                                page_cache_release(page);
                        }
                        rc = -ENOMEM;
+                       add_credits_and_wake_if(server, credits, 0);
                        break;
                }
  
                rdata->pid = pid;
                rdata->pagesz = PAGE_CACHE_SIZE;
                rdata->read_into_pages = cifs_readpages_read_into_pages;
+               rdata->credits = credits;
  
                list_for_each_entry_safe(page, tpage, &tmplist, lru) {
                        list_del(&page->lru);
                        rdata->pages[rdata->nr_pages++] = page;
                }
  
-               rc = cifs_retry_async_readv(rdata);
-               if (rc != 0) {
+               if (!rdata->cfile->invalidHandle ||
+                   !cifs_reopen_file(rdata->cfile, true))
+                       rc = server->ops->async_readv(rdata);
+               if (rc) {
+                       add_credits_and_wake_if(server, rdata->credits, 0);
                        for (i = 0; i < rdata->nr_pages; i++) {
                                page = rdata->pages[i];
                                lru_cache_add_file(page);
                                unlock_page(page);
                                page_cache_release(page);
+                               if (rc == -EAGAIN)
+                                       list_add_tail(&page->lru, &tmplist);
                        }
                        kref_put(&rdata->refcount, cifs_readdata_release);
+                       if (rc == -EAGAIN) {
+                               /* Re-add pages to the page_list and retry */
+                               list_splice(&tmplist, page_list);
+                               continue;
+                       }
                        break;
                }
  
@@@ -3618,6 -3812,13 +3812,6 @@@ static int cifs_launder_page(struct pag
        return rc;
  }
  
 -static int
 -cifs_pending_writers_wait(void *unused)
 -{
 -      schedule();
 -      return 0;
 -}
 -
  void cifs_oplock_break(struct work_struct *work)
  {
        struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
        int rc = 0;
  
        wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS,
 -                      cifs_pending_writers_wait, TASK_UNINTERRUPTIBLE);
 +                      TASK_UNINTERRUPTIBLE);
  
        server->ops->downgrade_oplock(server, cinode,
                test_bit(CIFS_INODE_DOWNGRADE_OPLOCK_TO_L2, &cinode->flags));
diff --combined fs/cifs/misc.c
index 6bf55d0ed4944a79341b53914617fdf4664b9a76,e65e17b3d484a9872c56b5a7d63836e7fba0d874..81340c6253eb36bcaf922c8e1b955e5d493099ca
@@@ -226,6 -226,15 +226,15 @@@ cifs_small_buf_release(void *buf_to_fre
        return;
  }
  
+ void
+ free_rsp_buf(int resp_buftype, void *rsp)
+ {
+       if (resp_buftype == CIFS_SMALL_BUFFER)
+               cifs_small_buf_release(rsp);
+       else if (resp_buftype == CIFS_LARGE_BUFFER)
+               cifs_buf_release(rsp);
+ }
  /* NB: MID can not be set if treeCon not passed in, in that
     case it is responsbility of caller to set the mid */
  void
@@@ -414,7 -423,7 +423,7 @@@ is_valid_oplock_break(char *buffer, str
                        return true;
                }
                if (pSMBr->hdr.Status.CifsError) {
-                       cifs_dbg(FYI, "notify err 0x%d\n",
+                       cifs_dbg(FYI, "notify err 0x%x\n",
                                 pSMBr->hdr.Status.CifsError);
                        return true;
                }
        if (pSMB->hdr.WordCount != 8)
                return false;
  
-       cifs_dbg(FYI, "oplock type 0x%d level 0x%d\n",
+       cifs_dbg(FYI, "oplock type 0x%x level 0x%x\n",
                 pSMB->LockType, pSMB->OplockLevel);
        if (!(pSMB->LockType & LOCKING_ANDX_OPLOCK_RELEASE))
                return false;
@@@ -582,7 -591,7 +591,7 @@@ int cifs_get_writer(struct cifsInodeInf
  
  start:
        rc = wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_OPLOCK_BREAK,
 -                                 cifs_oplock_break_wait, TASK_KILLABLE);
 +                       TASK_KILLABLE);
        if (rc)
                return rc;