]> git.proxmox.com Git - mirror_ubuntu-jammy-kernel.git/blob - fs/cifs/file.c
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
[mirror_ubuntu-jammy-kernel.git] / fs / cifs / file.c
1 /*
2 * fs/cifs/file.c
3 *
4 * vfs operations that deal with files
5 *
6 * Copyright (C) International Business Machines Corp., 2002,2010
7 * Author(s): Steve French (sfrench@us.ibm.com)
8 * Jeremy Allison (jra@samba.org)
9 *
10 * This library is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU Lesser General Public License as published
12 * by the Free Software Foundation; either version 2.1 of the License, or
13 * (at your option) any later version.
14 *
15 * This library is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
18 * the GNU Lesser General Public License for more details.
19 *
20 * You should have received a copy of the GNU Lesser General Public License
21 * along with this library; if not, write to the Free Software
22 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 */
24 #include <linux/fs.h>
25 #include <linux/backing-dev.h>
26 #include <linux/stat.h>
27 #include <linux/fcntl.h>
28 #include <linux/pagemap.h>
29 #include <linux/pagevec.h>
30 #include <linux/writeback.h>
31 #include <linux/task_io_accounting_ops.h>
32 #include <linux/delay.h>
33 #include <linux/mount.h>
34 #include <linux/slab.h>
35 #include <linux/swap.h>
36 #include <linux/mm.h>
37 #include <asm/div64.h>
38 #include "cifsfs.h"
39 #include "cifspdu.h"
40 #include "cifsglob.h"
41 #include "cifsproto.h"
42 #include "cifs_unicode.h"
43 #include "cifs_debug.h"
44 #include "cifs_fs_sb.h"
45 #include "fscache.h"
46 #include "smbdirect.h"
47
48 static inline int cifs_convert_flags(unsigned int flags)
49 {
50 if ((flags & O_ACCMODE) == O_RDONLY)
51 return GENERIC_READ;
52 else if ((flags & O_ACCMODE) == O_WRONLY)
53 return GENERIC_WRITE;
54 else if ((flags & O_ACCMODE) == O_RDWR) {
55 /* GENERIC_ALL is too much permission to request
56 can cause unnecessary access denied on create */
57 /* return GENERIC_ALL; */
58 return (GENERIC_READ | GENERIC_WRITE);
59 }
60
61 return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
62 FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
63 FILE_READ_DATA);
64 }
65
66 static u32 cifs_posix_convert_flags(unsigned int flags)
67 {
68 u32 posix_flags = 0;
69
70 if ((flags & O_ACCMODE) == O_RDONLY)
71 posix_flags = SMB_O_RDONLY;
72 else if ((flags & O_ACCMODE) == O_WRONLY)
73 posix_flags = SMB_O_WRONLY;
74 else if ((flags & O_ACCMODE) == O_RDWR)
75 posix_flags = SMB_O_RDWR;
76
77 if (flags & O_CREAT) {
78 posix_flags |= SMB_O_CREAT;
79 if (flags & O_EXCL)
80 posix_flags |= SMB_O_EXCL;
81 } else if (flags & O_EXCL)
82 cifs_dbg(FYI, "Application %s pid %d has incorrectly set O_EXCL flag but not O_CREAT on file open. Ignoring O_EXCL\n",
83 current->comm, current->tgid);
84
85 if (flags & O_TRUNC)
86 posix_flags |= SMB_O_TRUNC;
87 /* be safe and imply O_SYNC for O_DSYNC */
88 if (flags & O_DSYNC)
89 posix_flags |= SMB_O_SYNC;
90 if (flags & O_DIRECTORY)
91 posix_flags |= SMB_O_DIRECTORY;
92 if (flags & O_NOFOLLOW)
93 posix_flags |= SMB_O_NOFOLLOW;
94 if (flags & O_DIRECT)
95 posix_flags |= SMB_O_DIRECT;
96
97 return posix_flags;
98 }
99
100 static inline int cifs_get_disposition(unsigned int flags)
101 {
102 if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
103 return FILE_CREATE;
104 else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
105 return FILE_OVERWRITE_IF;
106 else if ((flags & O_CREAT) == O_CREAT)
107 return FILE_OPEN_IF;
108 else if ((flags & O_TRUNC) == O_TRUNC)
109 return FILE_OVERWRITE;
110 else
111 return FILE_OPEN;
112 }
113
114 int cifs_posix_open(char *full_path, struct inode **pinode,
115 struct super_block *sb, int mode, unsigned int f_flags,
116 __u32 *poplock, __u16 *pnetfid, unsigned int xid)
117 {
118 int rc;
119 FILE_UNIX_BASIC_INFO *presp_data;
120 __u32 posix_flags = 0;
121 struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
122 struct cifs_fattr fattr;
123 struct tcon_link *tlink;
124 struct cifs_tcon *tcon;
125
126 cifs_dbg(FYI, "posix open %s\n", full_path);
127
128 presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL);
129 if (presp_data == NULL)
130 return -ENOMEM;
131
132 tlink = cifs_sb_tlink(cifs_sb);
133 if (IS_ERR(tlink)) {
134 rc = PTR_ERR(tlink);
135 goto posix_open_ret;
136 }
137
138 tcon = tlink_tcon(tlink);
139 mode &= ~current_umask();
140
141 posix_flags = cifs_posix_convert_flags(f_flags);
142 rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data,
143 poplock, full_path, cifs_sb->local_nls,
144 cifs_remap(cifs_sb));
145 cifs_put_tlink(tlink);
146
147 if (rc)
148 goto posix_open_ret;
149
150 if (presp_data->Type == cpu_to_le32(-1))
151 goto posix_open_ret; /* open ok, caller does qpathinfo */
152
153 if (!pinode)
154 goto posix_open_ret; /* caller does not need info */
155
156 cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb);
157
158 /* get new inode and set it up */
159 if (*pinode == NULL) {
160 cifs_fill_uniqueid(sb, &fattr);
161 *pinode = cifs_iget(sb, &fattr);
162 if (!*pinode) {
163 rc = -ENOMEM;
164 goto posix_open_ret;
165 }
166 } else {
167 cifs_fattr_to_inode(*pinode, &fattr);
168 }
169
170 posix_open_ret:
171 kfree(presp_data);
172 return rc;
173 }
174
175 static int
176 cifs_nt_open(char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
177 struct cifs_tcon *tcon, unsigned int f_flags, __u32 *oplock,
178 struct cifs_fid *fid, unsigned int xid)
179 {
180 int rc;
181 int desired_access;
182 int disposition;
183 int create_options = CREATE_NOT_DIR;
184 FILE_ALL_INFO *buf;
185 struct TCP_Server_Info *server = tcon->ses->server;
186 struct cifs_open_parms oparms;
187
188 if (!server->ops->open)
189 return -ENOSYS;
190
191 desired_access = cifs_convert_flags(f_flags);
192
193 /*********************************************************************
194 * open flag mapping table:
195 *
196 * POSIX Flag CIFS Disposition
197 * ---------- ----------------
198 * O_CREAT FILE_OPEN_IF
199 * O_CREAT | O_EXCL FILE_CREATE
200 * O_CREAT | O_TRUNC FILE_OVERWRITE_IF
201 * O_TRUNC FILE_OVERWRITE
202 * none of the above FILE_OPEN
203 *
204 * Note that there is not a direct match between disposition
205 * FILE_SUPERSEDE (ie create whether or not file exists although
206 * O_CREAT | O_TRUNC is similar but truncates the existing
207 * file rather than creating a new file as FILE_SUPERSEDE does
208 * (which uses the attributes / metadata passed in on open call)
209 *?
210 *? O_SYNC is a reasonable match to CIFS writethrough flag
211 *? and the read write flags match reasonably. O_LARGEFILE
212 *? is irrelevant because largefile support is always used
213 *? by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
214 * O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
215 *********************************************************************/
216
217 disposition = cifs_get_disposition(f_flags);
218
219 /* BB pass O_SYNC flag through on file attributes .. BB */
220
221 buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
222 if (!buf)
223 return -ENOMEM;
224
225 if (backup_cred(cifs_sb))
226 create_options |= CREATE_OPEN_BACKUP_INTENT;
227
228 /* O_SYNC also has bit for O_DSYNC so following check picks up either */
229 if (f_flags & O_SYNC)
230 create_options |= CREATE_WRITE_THROUGH;
231
232 if (f_flags & O_DIRECT)
233 create_options |= CREATE_NO_BUFFER;
234
235 oparms.tcon = tcon;
236 oparms.cifs_sb = cifs_sb;
237 oparms.desired_access = desired_access;
238 oparms.create_options = create_options;
239 oparms.disposition = disposition;
240 oparms.path = full_path;
241 oparms.fid = fid;
242 oparms.reconnect = false;
243
244 rc = server->ops->open(xid, &oparms, oplock, buf);
245
246 if (rc)
247 goto out;
248
249 if (tcon->unix_ext)
250 rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
251 xid);
252 else
253 rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
254 xid, fid);
255
256 out:
257 kfree(buf);
258 return rc;
259 }
260
261 static bool
262 cifs_has_mand_locks(struct cifsInodeInfo *cinode)
263 {
264 struct cifs_fid_locks *cur;
265 bool has_locks = false;
266
267 down_read(&cinode->lock_sem);
268 list_for_each_entry(cur, &cinode->llist, llist) {
269 if (!list_empty(&cur->locks)) {
270 has_locks = true;
271 break;
272 }
273 }
274 up_read(&cinode->lock_sem);
275 return has_locks;
276 }
277
278 struct cifsFileInfo *
279 cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
280 struct tcon_link *tlink, __u32 oplock)
281 {
282 struct dentry *dentry = file_dentry(file);
283 struct inode *inode = d_inode(dentry);
284 struct cifsInodeInfo *cinode = CIFS_I(inode);
285 struct cifsFileInfo *cfile;
286 struct cifs_fid_locks *fdlocks;
287 struct cifs_tcon *tcon = tlink_tcon(tlink);
288 struct TCP_Server_Info *server = tcon->ses->server;
289
290 cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
291 if (cfile == NULL)
292 return cfile;
293
294 fdlocks = kzalloc(sizeof(struct cifs_fid_locks), GFP_KERNEL);
295 if (!fdlocks) {
296 kfree(cfile);
297 return NULL;
298 }
299
300 INIT_LIST_HEAD(&fdlocks->locks);
301 fdlocks->cfile = cfile;
302 cfile->llist = fdlocks;
303 down_write(&cinode->lock_sem);
304 list_add(&fdlocks->llist, &cinode->llist);
305 up_write(&cinode->lock_sem);
306
307 cfile->count = 1;
308 cfile->pid = current->tgid;
309 cfile->uid = current_fsuid();
310 cfile->dentry = dget(dentry);
311 cfile->f_flags = file->f_flags;
312 cfile->invalidHandle = false;
313 cfile->tlink = cifs_get_tlink(tlink);
314 INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
315 mutex_init(&cfile->fh_mutex);
316 spin_lock_init(&cfile->file_info_lock);
317
318 cifs_sb_active(inode->i_sb);
319
320 /*
321 * If the server returned a read oplock and we have mandatory brlocks,
322 * set oplock level to None.
323 */
324 if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
325 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
326 oplock = 0;
327 }
328
329 spin_lock(&tcon->open_file_lock);
330 if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE && oplock)
331 oplock = fid->pending_open->oplock;
332 list_del(&fid->pending_open->olist);
333
334 fid->purge_cache = false;
335 server->ops->set_fid(cfile, fid, oplock);
336
337 list_add(&cfile->tlist, &tcon->openFileList);
338 atomic_inc(&tcon->num_local_opens);
339
340 /* if readable file instance put first in list*/
341 if (file->f_mode & FMODE_READ)
342 list_add(&cfile->flist, &cinode->openFileList);
343 else
344 list_add_tail(&cfile->flist, &cinode->openFileList);
345 spin_unlock(&tcon->open_file_lock);
346
347 if (fid->purge_cache)
348 cifs_zap_mapping(inode);
349
350 file->private_data = cfile;
351 return cfile;
352 }
353
354 struct cifsFileInfo *
355 cifsFileInfo_get(struct cifsFileInfo *cifs_file)
356 {
357 spin_lock(&cifs_file->file_info_lock);
358 cifsFileInfo_get_locked(cifs_file);
359 spin_unlock(&cifs_file->file_info_lock);
360 return cifs_file;
361 }
362
363 /*
364 * Release a reference on the file private data. This may involve closing
365 * the filehandle out on the server. Must be called without holding
366 * tcon->open_file_lock and cifs_file->file_info_lock.
367 */
368 void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
369 {
370 struct inode *inode = d_inode(cifs_file->dentry);
371 struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
372 struct TCP_Server_Info *server = tcon->ses->server;
373 struct cifsInodeInfo *cifsi = CIFS_I(inode);
374 struct super_block *sb = inode->i_sb;
375 struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
376 struct cifsLockInfo *li, *tmp;
377 struct cifs_fid fid;
378 struct cifs_pending_open open;
379 bool oplock_break_cancelled;
380
381 spin_lock(&tcon->open_file_lock);
382
383 spin_lock(&cifs_file->file_info_lock);
384 if (--cifs_file->count > 0) {
385 spin_unlock(&cifs_file->file_info_lock);
386 spin_unlock(&tcon->open_file_lock);
387 return;
388 }
389 spin_unlock(&cifs_file->file_info_lock);
390
391 if (server->ops->get_lease_key)
392 server->ops->get_lease_key(inode, &fid);
393
394 /* store open in pending opens to make sure we don't miss lease break */
395 cifs_add_pending_open_locked(&fid, cifs_file->tlink, &open);
396
397 /* remove it from the lists */
398 list_del(&cifs_file->flist);
399 list_del(&cifs_file->tlist);
400 atomic_dec(&tcon->num_local_opens);
401
402 if (list_empty(&cifsi->openFileList)) {
403 cifs_dbg(FYI, "closing last open instance for inode %p\n",
404 d_inode(cifs_file->dentry));
405 /*
406 * In strict cache mode we need invalidate mapping on the last
407 * close because it may cause a error when we open this file
408 * again and get at least level II oplock.
409 */
410 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)
411 set_bit(CIFS_INO_INVALID_MAPPING, &cifsi->flags);
412 cifs_set_oplock_level(cifsi, 0);
413 }
414
415 spin_unlock(&tcon->open_file_lock);
416
417 oplock_break_cancelled = cancel_work_sync(&cifs_file->oplock_break);
418
419 if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
420 struct TCP_Server_Info *server = tcon->ses->server;
421 unsigned int xid;
422
423 xid = get_xid();
424 if (server->ops->close)
425 server->ops->close(xid, tcon, &cifs_file->fid);
426 _free_xid(xid);
427 }
428
429 if (oplock_break_cancelled)
430 cifs_done_oplock_break(cifsi);
431
432 cifs_del_pending_open(&open);
433
434 /*
435 * Delete any outstanding lock records. We'll lose them when the file
436 * is closed anyway.
437 */
438 down_write(&cifsi->lock_sem);
439 list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) {
440 list_del(&li->llist);
441 cifs_del_lock_waiters(li);
442 kfree(li);
443 }
444 list_del(&cifs_file->llist->llist);
445 kfree(cifs_file->llist);
446 up_write(&cifsi->lock_sem);
447
448 cifs_put_tlink(cifs_file->tlink);
449 dput(cifs_file->dentry);
450 cifs_sb_deactive(sb);
451 kfree(cifs_file);
452 }
453
454 int cifs_open(struct inode *inode, struct file *file)
455
456 {
457 int rc = -EACCES;
458 unsigned int xid;
459 __u32 oplock;
460 struct cifs_sb_info *cifs_sb;
461 struct TCP_Server_Info *server;
462 struct cifs_tcon *tcon;
463 struct tcon_link *tlink;
464 struct cifsFileInfo *cfile = NULL;
465 char *full_path = NULL;
466 bool posix_open_ok = false;
467 struct cifs_fid fid;
468 struct cifs_pending_open open;
469
470 xid = get_xid();
471
472 cifs_sb = CIFS_SB(inode->i_sb);
473 tlink = cifs_sb_tlink(cifs_sb);
474 if (IS_ERR(tlink)) {
475 free_xid(xid);
476 return PTR_ERR(tlink);
477 }
478 tcon = tlink_tcon(tlink);
479 server = tcon->ses->server;
480
481 full_path = build_path_from_dentry(file_dentry(file));
482 if (full_path == NULL) {
483 rc = -ENOMEM;
484 goto out;
485 }
486
487 cifs_dbg(FYI, "inode = 0x%p file flags are 0x%x for %s\n",
488 inode, file->f_flags, full_path);
489
490 if (file->f_flags & O_DIRECT &&
491 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) {
492 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
493 file->f_op = &cifs_file_direct_nobrl_ops;
494 else
495 file->f_op = &cifs_file_direct_ops;
496 }
497
498 if (server->oplocks)
499 oplock = REQ_OPLOCK;
500 else
501 oplock = 0;
502
503 if (!tcon->broken_posix_open && tcon->unix_ext &&
504 cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP &
505 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
506 /* can not refresh inode info since size could be stale */
507 rc = cifs_posix_open(full_path, &inode, inode->i_sb,
508 cifs_sb->mnt_file_mode /* ignored */,
509 file->f_flags, &oplock, &fid.netfid, xid);
510 if (rc == 0) {
511 cifs_dbg(FYI, "posix open succeeded\n");
512 posix_open_ok = true;
513 } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
514 if (tcon->ses->serverNOS)
515 cifs_dbg(VFS, "server %s of type %s returned unexpected error on SMB posix open, disabling posix open support. Check if server update available.\n",
516 tcon->ses->serverName,
517 tcon->ses->serverNOS);
518 tcon->broken_posix_open = true;
519 } else if ((rc != -EIO) && (rc != -EREMOTE) &&
520 (rc != -EOPNOTSUPP)) /* path not found or net err */
521 goto out;
522 /*
523 * Else fallthrough to retry open the old way on network i/o
524 * or DFS errors.
525 */
526 }
527
528 if (server->ops->get_lease_key)
529 server->ops->get_lease_key(inode, &fid);
530
531 cifs_add_pending_open(&fid, tlink, &open);
532
533 if (!posix_open_ok) {
534 if (server->ops->get_lease_key)
535 server->ops->get_lease_key(inode, &fid);
536
537 rc = cifs_nt_open(full_path, inode, cifs_sb, tcon,
538 file->f_flags, &oplock, &fid, xid);
539 if (rc) {
540 cifs_del_pending_open(&open);
541 goto out;
542 }
543 }
544
545 cfile = cifs_new_fileinfo(&fid, file, tlink, oplock);
546 if (cfile == NULL) {
547 if (server->ops->close)
548 server->ops->close(xid, tcon, &fid);
549 cifs_del_pending_open(&open);
550 rc = -ENOMEM;
551 goto out;
552 }
553
554 cifs_fscache_set_inode_cookie(inode, file);
555
556 if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) {
557 /*
558 * Time to set mode which we can not set earlier due to
559 * problems creating new read-only files.
560 */
561 struct cifs_unix_set_info_args args = {
562 .mode = inode->i_mode,
563 .uid = INVALID_UID, /* no change */
564 .gid = INVALID_GID, /* no change */
565 .ctime = NO_CHANGE_64,
566 .atime = NO_CHANGE_64,
567 .mtime = NO_CHANGE_64,
568 .device = 0,
569 };
570 CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid.netfid,
571 cfile->pid);
572 }
573
574 out:
575 kfree(full_path);
576 free_xid(xid);
577 cifs_put_tlink(tlink);
578 return rc;
579 }
580
581 static int cifs_push_posix_locks(struct cifsFileInfo *cfile);
582
583 /*
584 * Try to reacquire byte range locks that were released when session
585 * to server was lost.
586 */
587 static int
588 cifs_relock_file(struct cifsFileInfo *cfile)
589 {
590 struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
591 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
592 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
593 int rc = 0;
594
595 down_read_nested(&cinode->lock_sem, SINGLE_DEPTH_NESTING);
596 if (cinode->can_cache_brlcks) {
597 /* can cache locks - no need to relock */
598 up_read(&cinode->lock_sem);
599 return rc;
600 }
601
602 if (cap_unix(tcon->ses) &&
603 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
604 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
605 rc = cifs_push_posix_locks(cfile);
606 else
607 rc = tcon->ses->server->ops->push_mand_locks(cfile);
608
609 up_read(&cinode->lock_sem);
610 return rc;
611 }
612
613 static int
614 cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
615 {
616 int rc = -EACCES;
617 unsigned int xid;
618 __u32 oplock;
619 struct cifs_sb_info *cifs_sb;
620 struct cifs_tcon *tcon;
621 struct TCP_Server_Info *server;
622 struct cifsInodeInfo *cinode;
623 struct inode *inode;
624 char *full_path = NULL;
625 int desired_access;
626 int disposition = FILE_OPEN;
627 int create_options = CREATE_NOT_DIR;
628 struct cifs_open_parms oparms;
629
630 xid = get_xid();
631 mutex_lock(&cfile->fh_mutex);
632 if (!cfile->invalidHandle) {
633 mutex_unlock(&cfile->fh_mutex);
634 rc = 0;
635 free_xid(xid);
636 return rc;
637 }
638
639 inode = d_inode(cfile->dentry);
640 cifs_sb = CIFS_SB(inode->i_sb);
641 tcon = tlink_tcon(cfile->tlink);
642 server = tcon->ses->server;
643
644 /*
645 * Can not grab rename sem here because various ops, including those
646 * that already have the rename sem can end up causing writepage to get
647 * called and if the server was down that means we end up here, and we
648 * can never tell if the caller already has the rename_sem.
649 */
650 full_path = build_path_from_dentry(cfile->dentry);
651 if (full_path == NULL) {
652 rc = -ENOMEM;
653 mutex_unlock(&cfile->fh_mutex);
654 free_xid(xid);
655 return rc;
656 }
657
658 cifs_dbg(FYI, "inode = 0x%p file flags 0x%x for %s\n",
659 inode, cfile->f_flags, full_path);
660
661 if (tcon->ses->server->oplocks)
662 oplock = REQ_OPLOCK;
663 else
664 oplock = 0;
665
666 if (tcon->unix_ext && cap_unix(tcon->ses) &&
667 (CIFS_UNIX_POSIX_PATH_OPS_CAP &
668 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
669 /*
670 * O_CREAT, O_EXCL and O_TRUNC already had their effect on the
671 * original open. Must mask them off for a reopen.
672 */
673 unsigned int oflags = cfile->f_flags &
674 ~(O_CREAT | O_EXCL | O_TRUNC);
675
676 rc = cifs_posix_open(full_path, NULL, inode->i_sb,
677 cifs_sb->mnt_file_mode /* ignored */,
678 oflags, &oplock, &cfile->fid.netfid, xid);
679 if (rc == 0) {
680 cifs_dbg(FYI, "posix reopen succeeded\n");
681 oparms.reconnect = true;
682 goto reopen_success;
683 }
684 /*
685 * fallthrough to retry open the old way on errors, especially
686 * in the reconnect path it is important to retry hard
687 */
688 }
689
690 desired_access = cifs_convert_flags(cfile->f_flags);
691
692 if (backup_cred(cifs_sb))
693 create_options |= CREATE_OPEN_BACKUP_INTENT;
694
695 if (server->ops->get_lease_key)
696 server->ops->get_lease_key(inode, &cfile->fid);
697
698 oparms.tcon = tcon;
699 oparms.cifs_sb = cifs_sb;
700 oparms.desired_access = desired_access;
701 oparms.create_options = create_options;
702 oparms.disposition = disposition;
703 oparms.path = full_path;
704 oparms.fid = &cfile->fid;
705 oparms.reconnect = true;
706
707 /*
708 * Can not refresh inode by passing in file_info buf to be returned by
709 * ops->open and then calling get_inode_info with returned buf since
710 * file might have write behind data that needs to be flushed and server
711 * version of file size can be stale. If we knew for sure that inode was
712 * not dirty locally we could do this.
713 */
714 rc = server->ops->open(xid, &oparms, &oplock, NULL);
715 if (rc == -ENOENT && oparms.reconnect == false) {
716 /* durable handle timeout is expired - open the file again */
717 rc = server->ops->open(xid, &oparms, &oplock, NULL);
718 /* indicate that we need to relock the file */
719 oparms.reconnect = true;
720 }
721
722 if (rc) {
723 mutex_unlock(&cfile->fh_mutex);
724 cifs_dbg(FYI, "cifs_reopen returned 0x%x\n", rc);
725 cifs_dbg(FYI, "oplock: %d\n", oplock);
726 goto reopen_error_exit;
727 }
728
729 reopen_success:
730 cfile->invalidHandle = false;
731 mutex_unlock(&cfile->fh_mutex);
732 cinode = CIFS_I(inode);
733
734 if (can_flush) {
735 rc = filemap_write_and_wait(inode->i_mapping);
736 if (!is_interrupt_error(rc))
737 mapping_set_error(inode->i_mapping, rc);
738
739 if (tcon->unix_ext)
740 rc = cifs_get_inode_info_unix(&inode, full_path,
741 inode->i_sb, xid);
742 else
743 rc = cifs_get_inode_info(&inode, full_path, NULL,
744 inode->i_sb, xid, NULL);
745 }
746 /*
747 * Else we are writing out data to server already and could deadlock if
748 * we tried to flush data, and since we do not know if we have data that
749 * would invalidate the current end of file on the server we can not go
750 * to the server to get the new inode info.
751 */
752
753 /*
754 * If the server returned a read oplock and we have mandatory brlocks,
755 * set oplock level to None.
756 */
757 if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
758 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
759 oplock = 0;
760 }
761
762 server->ops->set_fid(cfile, &cfile->fid, oplock);
763 if (oparms.reconnect)
764 cifs_relock_file(cfile);
765
766 reopen_error_exit:
767 kfree(full_path);
768 free_xid(xid);
769 return rc;
770 }
771
772 int cifs_close(struct inode *inode, struct file *file)
773 {
774 if (file->private_data != NULL) {
775 cifsFileInfo_put(file->private_data);
776 file->private_data = NULL;
777 }
778
779 /* return code from the ->release op is always ignored */
780 return 0;
781 }
782
783 void
784 cifs_reopen_persistent_handles(struct cifs_tcon *tcon)
785 {
786 struct cifsFileInfo *open_file;
787 struct list_head *tmp;
788 struct list_head *tmp1;
789 struct list_head tmp_list;
790
791 if (!tcon->use_persistent || !tcon->need_reopen_files)
792 return;
793
794 tcon->need_reopen_files = false;
795
796 cifs_dbg(FYI, "Reopen persistent handles");
797 INIT_LIST_HEAD(&tmp_list);
798
799 /* list all files open on tree connection, reopen resilient handles */
800 spin_lock(&tcon->open_file_lock);
801 list_for_each(tmp, &tcon->openFileList) {
802 open_file = list_entry(tmp, struct cifsFileInfo, tlist);
803 if (!open_file->invalidHandle)
804 continue;
805 cifsFileInfo_get(open_file);
806 list_add_tail(&open_file->rlist, &tmp_list);
807 }
808 spin_unlock(&tcon->open_file_lock);
809
810 list_for_each_safe(tmp, tmp1, &tmp_list) {
811 open_file = list_entry(tmp, struct cifsFileInfo, rlist);
812 if (cifs_reopen_file(open_file, false /* do not flush */))
813 tcon->need_reopen_files = true;
814 list_del_init(&open_file->rlist);
815 cifsFileInfo_put(open_file);
816 }
817 }
818
819 int cifs_closedir(struct inode *inode, struct file *file)
820 {
821 int rc = 0;
822 unsigned int xid;
823 struct cifsFileInfo *cfile = file->private_data;
824 struct cifs_tcon *tcon;
825 struct TCP_Server_Info *server;
826 char *buf;
827
828 cifs_dbg(FYI, "Closedir inode = 0x%p\n", inode);
829
830 if (cfile == NULL)
831 return rc;
832
833 xid = get_xid();
834 tcon = tlink_tcon(cfile->tlink);
835 server = tcon->ses->server;
836
837 cifs_dbg(FYI, "Freeing private data in close dir\n");
838 spin_lock(&cfile->file_info_lock);
839 if (server->ops->dir_needs_close(cfile)) {
840 cfile->invalidHandle = true;
841 spin_unlock(&cfile->file_info_lock);
842 if (server->ops->close_dir)
843 rc = server->ops->close_dir(xid, tcon, &cfile->fid);
844 else
845 rc = -ENOSYS;
846 cifs_dbg(FYI, "Closing uncompleted readdir with rc %d\n", rc);
847 /* not much we can do if it fails anyway, ignore rc */
848 rc = 0;
849 } else
850 spin_unlock(&cfile->file_info_lock);
851
852 buf = cfile->srch_inf.ntwrk_buf_start;
853 if (buf) {
854 cifs_dbg(FYI, "closedir free smb buf in srch struct\n");
855 cfile->srch_inf.ntwrk_buf_start = NULL;
856 if (cfile->srch_inf.smallBuf)
857 cifs_small_buf_release(buf);
858 else
859 cifs_buf_release(buf);
860 }
861
862 cifs_put_tlink(cfile->tlink);
863 kfree(file->private_data);
864 file->private_data = NULL;
865 /* BB can we lock the filestruct while this is going on? */
866 free_xid(xid);
867 return rc;
868 }
869
870 static struct cifsLockInfo *
871 cifs_lock_init(__u64 offset, __u64 length, __u8 type, __u16 flags)
872 {
873 struct cifsLockInfo *lock =
874 kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
875 if (!lock)
876 return lock;
877 lock->offset = offset;
878 lock->length = length;
879 lock->type = type;
880 lock->pid = current->tgid;
881 lock->flags = flags;
882 INIT_LIST_HEAD(&lock->blist);
883 init_waitqueue_head(&lock->block_q);
884 return lock;
885 }
886
887 void
888 cifs_del_lock_waiters(struct cifsLockInfo *lock)
889 {
890 struct cifsLockInfo *li, *tmp;
891 list_for_each_entry_safe(li, tmp, &lock->blist, blist) {
892 list_del_init(&li->blist);
893 wake_up(&li->block_q);
894 }
895 }
896
897 #define CIFS_LOCK_OP 0
898 #define CIFS_READ_OP 1
899 #define CIFS_WRITE_OP 2
900
901 /* @rw_check : 0 - no op, 1 - read, 2 - write */
902 static bool
903 cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset,
904 __u64 length, __u8 type, __u16 flags,
905 struct cifsFileInfo *cfile,
906 struct cifsLockInfo **conf_lock, int rw_check)
907 {
908 struct cifsLockInfo *li;
909 struct cifsFileInfo *cur_cfile = fdlocks->cfile;
910 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
911
912 list_for_each_entry(li, &fdlocks->locks, llist) {
913 if (offset + length <= li->offset ||
914 offset >= li->offset + li->length)
915 continue;
916 if (rw_check != CIFS_LOCK_OP && current->tgid == li->pid &&
917 server->ops->compare_fids(cfile, cur_cfile)) {
918 /* shared lock prevents write op through the same fid */
919 if (!(li->type & server->vals->shared_lock_type) ||
920 rw_check != CIFS_WRITE_OP)
921 continue;
922 }
923 if ((type & server->vals->shared_lock_type) &&
924 ((server->ops->compare_fids(cfile, cur_cfile) &&
925 current->tgid == li->pid) || type == li->type))
926 continue;
927 if (rw_check == CIFS_LOCK_OP &&
928 (flags & FL_OFDLCK) && (li->flags & FL_OFDLCK) &&
929 server->ops->compare_fids(cfile, cur_cfile))
930 continue;
931 if (conf_lock)
932 *conf_lock = li;
933 return true;
934 }
935 return false;
936 }
937
938 bool
939 cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
940 __u8 type, __u16 flags,
941 struct cifsLockInfo **conf_lock, int rw_check)
942 {
943 bool rc = false;
944 struct cifs_fid_locks *cur;
945 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
946
947 list_for_each_entry(cur, &cinode->llist, llist) {
948 rc = cifs_find_fid_lock_conflict(cur, offset, length, type,
949 flags, cfile, conf_lock,
950 rw_check);
951 if (rc)
952 break;
953 }
954
955 return rc;
956 }
957
958 /*
959 * Check if there is another lock that prevents us to set the lock (mandatory
960 * style). If such a lock exists, update the flock structure with its
961 * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
962 * or leave it the same if we can't. Returns 0 if we don't need to request to
963 * the server or 1 otherwise.
964 */
965 static int
966 cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
967 __u8 type, struct file_lock *flock)
968 {
969 int rc = 0;
970 struct cifsLockInfo *conf_lock;
971 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
972 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
973 bool exist;
974
975 down_read(&cinode->lock_sem);
976
977 exist = cifs_find_lock_conflict(cfile, offset, length, type,
978 flock->fl_flags, &conf_lock,
979 CIFS_LOCK_OP);
980 if (exist) {
981 flock->fl_start = conf_lock->offset;
982 flock->fl_end = conf_lock->offset + conf_lock->length - 1;
983 flock->fl_pid = conf_lock->pid;
984 if (conf_lock->type & server->vals->shared_lock_type)
985 flock->fl_type = F_RDLCK;
986 else
987 flock->fl_type = F_WRLCK;
988 } else if (!cinode->can_cache_brlcks)
989 rc = 1;
990 else
991 flock->fl_type = F_UNLCK;
992
993 up_read(&cinode->lock_sem);
994 return rc;
995 }
996
997 static void
998 cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock)
999 {
1000 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1001 down_write(&cinode->lock_sem);
1002 list_add_tail(&lock->llist, &cfile->llist->locks);
1003 up_write(&cinode->lock_sem);
1004 }
1005
1006 /*
1007 * Set the byte-range lock (mandatory style). Returns:
1008 * 1) 0, if we set the lock and don't need to request to the server;
1009 * 2) 1, if no locks prevent us but we need to request to the server;
1010 * 3) -EACCES, if there is a lock that prevents us and wait is false.
1011 */
1012 static int
1013 cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock,
1014 bool wait)
1015 {
1016 struct cifsLockInfo *conf_lock;
1017 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1018 bool exist;
1019 int rc = 0;
1020
1021 try_again:
1022 exist = false;
1023 down_write(&cinode->lock_sem);
1024
1025 exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length,
1026 lock->type, lock->flags, &conf_lock,
1027 CIFS_LOCK_OP);
1028 if (!exist && cinode->can_cache_brlcks) {
1029 list_add_tail(&lock->llist, &cfile->llist->locks);
1030 up_write(&cinode->lock_sem);
1031 return rc;
1032 }
1033
1034 if (!exist)
1035 rc = 1;
1036 else if (!wait)
1037 rc = -EACCES;
1038 else {
1039 list_add_tail(&lock->blist, &conf_lock->blist);
1040 up_write(&cinode->lock_sem);
1041 rc = wait_event_interruptible(lock->block_q,
1042 (lock->blist.prev == &lock->blist) &&
1043 (lock->blist.next == &lock->blist));
1044 if (!rc)
1045 goto try_again;
1046 down_write(&cinode->lock_sem);
1047 list_del_init(&lock->blist);
1048 }
1049
1050 up_write(&cinode->lock_sem);
1051 return rc;
1052 }
1053
1054 /*
1055 * Check if there is another lock that prevents us to set the lock (posix
1056 * style). If such a lock exists, update the flock structure with its
1057 * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1058 * or leave it the same if we can't. Returns 0 if we don't need to request to
1059 * the server or 1 otherwise.
1060 */
1061 static int
1062 cifs_posix_lock_test(struct file *file, struct file_lock *flock)
1063 {
1064 int rc = 0;
1065 struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1066 unsigned char saved_type = flock->fl_type;
1067
1068 if ((flock->fl_flags & FL_POSIX) == 0)
1069 return 1;
1070
1071 down_read(&cinode->lock_sem);
1072 posix_test_lock(file, flock);
1073
1074 if (flock->fl_type == F_UNLCK && !cinode->can_cache_brlcks) {
1075 flock->fl_type = saved_type;
1076 rc = 1;
1077 }
1078
1079 up_read(&cinode->lock_sem);
1080 return rc;
1081 }
1082
1083 /*
1084 * Set the byte-range lock (posix style). Returns:
1085 * 1) 0, if we set the lock and don't need to request to the server;
1086 * 2) 1, if we need to request to the server;
1087 * 3) <0, if the error occurs while setting the lock.
1088 */
1089 static int
1090 cifs_posix_lock_set(struct file *file, struct file_lock *flock)
1091 {
1092 struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1093 int rc = 1;
1094
1095 if ((flock->fl_flags & FL_POSIX) == 0)
1096 return rc;
1097
1098 try_again:
1099 down_write(&cinode->lock_sem);
1100 if (!cinode->can_cache_brlcks) {
1101 up_write(&cinode->lock_sem);
1102 return rc;
1103 }
1104
1105 rc = posix_lock_file(file, flock, NULL);
1106 up_write(&cinode->lock_sem);
1107 if (rc == FILE_LOCK_DEFERRED) {
1108 rc = wait_event_interruptible(flock->fl_wait, !flock->fl_blocker);
1109 if (!rc)
1110 goto try_again;
1111 locks_delete_block(flock);
1112 }
1113 return rc;
1114 }
1115
1116 int
1117 cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
1118 {
1119 unsigned int xid;
1120 int rc = 0, stored_rc;
1121 struct cifsLockInfo *li, *tmp;
1122 struct cifs_tcon *tcon;
1123 unsigned int num, max_num, max_buf;
1124 LOCKING_ANDX_RANGE *buf, *cur;
1125 static const int types[] = {
1126 LOCKING_ANDX_LARGE_FILES,
1127 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1128 };
1129 int i;
1130
1131 xid = get_xid();
1132 tcon = tlink_tcon(cfile->tlink);
1133
1134 /*
1135 * Accessing maxBuf is racy with cifs_reconnect - need to store value
1136 * and check it before using.
1137 */
1138 max_buf = tcon->ses->server->maxBuf;
1139 if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE))) {
1140 free_xid(xid);
1141 return -EINVAL;
1142 }
1143
1144 BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1145 PAGE_SIZE);
1146 max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1147 PAGE_SIZE);
1148 max_num = (max_buf - sizeof(struct smb_hdr)) /
1149 sizeof(LOCKING_ANDX_RANGE);
1150 buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1151 if (!buf) {
1152 free_xid(xid);
1153 return -ENOMEM;
1154 }
1155
1156 for (i = 0; i < 2; i++) {
1157 cur = buf;
1158 num = 0;
1159 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1160 if (li->type != types[i])
1161 continue;
1162 cur->Pid = cpu_to_le16(li->pid);
1163 cur->LengthLow = cpu_to_le32((u32)li->length);
1164 cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1165 cur->OffsetLow = cpu_to_le32((u32)li->offset);
1166 cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1167 if (++num == max_num) {
1168 stored_rc = cifs_lockv(xid, tcon,
1169 cfile->fid.netfid,
1170 (__u8)li->type, 0, num,
1171 buf);
1172 if (stored_rc)
1173 rc = stored_rc;
1174 cur = buf;
1175 num = 0;
1176 } else
1177 cur++;
1178 }
1179
1180 if (num) {
1181 stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1182 (__u8)types[i], 0, num, buf);
1183 if (stored_rc)
1184 rc = stored_rc;
1185 }
1186 }
1187
1188 kfree(buf);
1189 free_xid(xid);
1190 return rc;
1191 }
1192
1193 static __u32
1194 hash_lockowner(fl_owner_t owner)
1195 {
1196 return cifs_lock_secret ^ hash32_ptr((const void *)owner);
1197 }
1198
1199 struct lock_to_push {
1200 struct list_head llist;
1201 __u64 offset;
1202 __u64 length;
1203 __u32 pid;
1204 __u16 netfid;
1205 __u8 type;
1206 };
1207
1208 static int
1209 cifs_push_posix_locks(struct cifsFileInfo *cfile)
1210 {
1211 struct inode *inode = d_inode(cfile->dentry);
1212 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1213 struct file_lock *flock;
1214 struct file_lock_context *flctx = inode->i_flctx;
1215 unsigned int count = 0, i;
1216 int rc = 0, xid, type;
1217 struct list_head locks_to_send, *el;
1218 struct lock_to_push *lck, *tmp;
1219 __u64 length;
1220
1221 xid = get_xid();
1222
1223 if (!flctx)
1224 goto out;
1225
1226 spin_lock(&flctx->flc_lock);
1227 list_for_each(el, &flctx->flc_posix) {
1228 count++;
1229 }
1230 spin_unlock(&flctx->flc_lock);
1231
1232 INIT_LIST_HEAD(&locks_to_send);
1233
1234 /*
1235 * Allocating count locks is enough because no FL_POSIX locks can be
1236 * added to the list while we are holding cinode->lock_sem that
1237 * protects locking operations of this inode.
1238 */
1239 for (i = 0; i < count; i++) {
1240 lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL);
1241 if (!lck) {
1242 rc = -ENOMEM;
1243 goto err_out;
1244 }
1245 list_add_tail(&lck->llist, &locks_to_send);
1246 }
1247
1248 el = locks_to_send.next;
1249 spin_lock(&flctx->flc_lock);
1250 list_for_each_entry(flock, &flctx->flc_posix, fl_list) {
1251 if (el == &locks_to_send) {
1252 /*
1253 * The list ended. We don't have enough allocated
1254 * structures - something is really wrong.
1255 */
1256 cifs_dbg(VFS, "Can't push all brlocks!\n");
1257 break;
1258 }
1259 length = 1 + flock->fl_end - flock->fl_start;
1260 if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK)
1261 type = CIFS_RDLCK;
1262 else
1263 type = CIFS_WRLCK;
1264 lck = list_entry(el, struct lock_to_push, llist);
1265 lck->pid = hash_lockowner(flock->fl_owner);
1266 lck->netfid = cfile->fid.netfid;
1267 lck->length = length;
1268 lck->type = type;
1269 lck->offset = flock->fl_start;
1270 }
1271 spin_unlock(&flctx->flc_lock);
1272
1273 list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1274 int stored_rc;
1275
1276 stored_rc = CIFSSMBPosixLock(xid, tcon, lck->netfid, lck->pid,
1277 lck->offset, lck->length, NULL,
1278 lck->type, 0);
1279 if (stored_rc)
1280 rc = stored_rc;
1281 list_del(&lck->llist);
1282 kfree(lck);
1283 }
1284
1285 out:
1286 free_xid(xid);
1287 return rc;
1288 err_out:
1289 list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1290 list_del(&lck->llist);
1291 kfree(lck);
1292 }
1293 goto out;
1294 }
1295
1296 static int
1297 cifs_push_locks(struct cifsFileInfo *cfile)
1298 {
1299 struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
1300 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1301 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1302 int rc = 0;
1303
1304 /* we are going to update can_cache_brlcks here - need a write access */
1305 down_write(&cinode->lock_sem);
1306 if (!cinode->can_cache_brlcks) {
1307 up_write(&cinode->lock_sem);
1308 return rc;
1309 }
1310
1311 if (cap_unix(tcon->ses) &&
1312 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1313 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1314 rc = cifs_push_posix_locks(cfile);
1315 else
1316 rc = tcon->ses->server->ops->push_mand_locks(cfile);
1317
1318 cinode->can_cache_brlcks = false;
1319 up_write(&cinode->lock_sem);
1320 return rc;
1321 }
1322
1323 static void
1324 cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock,
1325 bool *wait_flag, struct TCP_Server_Info *server)
1326 {
1327 if (flock->fl_flags & FL_POSIX)
1328 cifs_dbg(FYI, "Posix\n");
1329 if (flock->fl_flags & FL_FLOCK)
1330 cifs_dbg(FYI, "Flock\n");
1331 if (flock->fl_flags & FL_SLEEP) {
1332 cifs_dbg(FYI, "Blocking lock\n");
1333 *wait_flag = true;
1334 }
1335 if (flock->fl_flags & FL_ACCESS)
1336 cifs_dbg(FYI, "Process suspended by mandatory locking - not implemented yet\n");
1337 if (flock->fl_flags & FL_LEASE)
1338 cifs_dbg(FYI, "Lease on file - not implemented yet\n");
1339 if (flock->fl_flags &
1340 (~(FL_POSIX | FL_FLOCK | FL_SLEEP |
1341 FL_ACCESS | FL_LEASE | FL_CLOSE | FL_OFDLCK)))
1342 cifs_dbg(FYI, "Unknown lock flags 0x%x\n", flock->fl_flags);
1343
1344 *type = server->vals->large_lock_type;
1345 if (flock->fl_type == F_WRLCK) {
1346 cifs_dbg(FYI, "F_WRLCK\n");
1347 *type |= server->vals->exclusive_lock_type;
1348 *lock = 1;
1349 } else if (flock->fl_type == F_UNLCK) {
1350 cifs_dbg(FYI, "F_UNLCK\n");
1351 *type |= server->vals->unlock_lock_type;
1352 *unlock = 1;
1353 /* Check if unlock includes more than one lock range */
1354 } else if (flock->fl_type == F_RDLCK) {
1355 cifs_dbg(FYI, "F_RDLCK\n");
1356 *type |= server->vals->shared_lock_type;
1357 *lock = 1;
1358 } else if (flock->fl_type == F_EXLCK) {
1359 cifs_dbg(FYI, "F_EXLCK\n");
1360 *type |= server->vals->exclusive_lock_type;
1361 *lock = 1;
1362 } else if (flock->fl_type == F_SHLCK) {
1363 cifs_dbg(FYI, "F_SHLCK\n");
1364 *type |= server->vals->shared_lock_type;
1365 *lock = 1;
1366 } else
1367 cifs_dbg(FYI, "Unknown type of lock\n");
1368 }
1369
1370 static int
1371 cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
1372 bool wait_flag, bool posix_lck, unsigned int xid)
1373 {
1374 int rc = 0;
1375 __u64 length = 1 + flock->fl_end - flock->fl_start;
1376 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1377 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1378 struct TCP_Server_Info *server = tcon->ses->server;
1379 __u16 netfid = cfile->fid.netfid;
1380
1381 if (posix_lck) {
1382 int posix_lock_type;
1383
1384 rc = cifs_posix_lock_test(file, flock);
1385 if (!rc)
1386 return rc;
1387
1388 if (type & server->vals->shared_lock_type)
1389 posix_lock_type = CIFS_RDLCK;
1390 else
1391 posix_lock_type = CIFS_WRLCK;
1392 rc = CIFSSMBPosixLock(xid, tcon, netfid,
1393 hash_lockowner(flock->fl_owner),
1394 flock->fl_start, length, flock,
1395 posix_lock_type, wait_flag);
1396 return rc;
1397 }
1398
1399 rc = cifs_lock_test(cfile, flock->fl_start, length, type, flock);
1400 if (!rc)
1401 return rc;
1402
1403 /* BB we could chain these into one lock request BB */
1404 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, type,
1405 1, 0, false);
1406 if (rc == 0) {
1407 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1408 type, 0, 1, false);
1409 flock->fl_type = F_UNLCK;
1410 if (rc != 0)
1411 cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1412 rc);
1413 return 0;
1414 }
1415
1416 if (type & server->vals->shared_lock_type) {
1417 flock->fl_type = F_WRLCK;
1418 return 0;
1419 }
1420
1421 type &= ~server->vals->exclusive_lock_type;
1422
1423 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1424 type | server->vals->shared_lock_type,
1425 1, 0, false);
1426 if (rc == 0) {
1427 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1428 type | server->vals->shared_lock_type, 0, 1, false);
1429 flock->fl_type = F_RDLCK;
1430 if (rc != 0)
1431 cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1432 rc);
1433 } else
1434 flock->fl_type = F_WRLCK;
1435
1436 return 0;
1437 }
1438
1439 void
1440 cifs_move_llist(struct list_head *source, struct list_head *dest)
1441 {
1442 struct list_head *li, *tmp;
1443 list_for_each_safe(li, tmp, source)
1444 list_move(li, dest);
1445 }
1446
1447 void
1448 cifs_free_llist(struct list_head *llist)
1449 {
1450 struct cifsLockInfo *li, *tmp;
1451 list_for_each_entry_safe(li, tmp, llist, llist) {
1452 cifs_del_lock_waiters(li);
1453 list_del(&li->llist);
1454 kfree(li);
1455 }
1456 }
1457
1458 int
1459 cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
1460 unsigned int xid)
1461 {
1462 int rc = 0, stored_rc;
1463 static const int types[] = {
1464 LOCKING_ANDX_LARGE_FILES,
1465 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1466 };
1467 unsigned int i;
1468 unsigned int max_num, num, max_buf;
1469 LOCKING_ANDX_RANGE *buf, *cur;
1470 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1471 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1472 struct cifsLockInfo *li, *tmp;
1473 __u64 length = 1 + flock->fl_end - flock->fl_start;
1474 struct list_head tmp_llist;
1475
1476 INIT_LIST_HEAD(&tmp_llist);
1477
1478 /*
1479 * Accessing maxBuf is racy with cifs_reconnect - need to store value
1480 * and check it before using.
1481 */
1482 max_buf = tcon->ses->server->maxBuf;
1483 if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE)))
1484 return -EINVAL;
1485
1486 BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1487 PAGE_SIZE);
1488 max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1489 PAGE_SIZE);
1490 max_num = (max_buf - sizeof(struct smb_hdr)) /
1491 sizeof(LOCKING_ANDX_RANGE);
1492 buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1493 if (!buf)
1494 return -ENOMEM;
1495
1496 down_write(&cinode->lock_sem);
1497 for (i = 0; i < 2; i++) {
1498 cur = buf;
1499 num = 0;
1500 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1501 if (flock->fl_start > li->offset ||
1502 (flock->fl_start + length) <
1503 (li->offset + li->length))
1504 continue;
1505 if (current->tgid != li->pid)
1506 continue;
1507 if (types[i] != li->type)
1508 continue;
1509 if (cinode->can_cache_brlcks) {
1510 /*
1511 * We can cache brlock requests - simply remove
1512 * a lock from the file's list.
1513 */
1514 list_del(&li->llist);
1515 cifs_del_lock_waiters(li);
1516 kfree(li);
1517 continue;
1518 }
1519 cur->Pid = cpu_to_le16(li->pid);
1520 cur->LengthLow = cpu_to_le32((u32)li->length);
1521 cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1522 cur->OffsetLow = cpu_to_le32((u32)li->offset);
1523 cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1524 /*
1525 * We need to save a lock here to let us add it again to
1526 * the file's list if the unlock range request fails on
1527 * the server.
1528 */
1529 list_move(&li->llist, &tmp_llist);
1530 if (++num == max_num) {
1531 stored_rc = cifs_lockv(xid, tcon,
1532 cfile->fid.netfid,
1533 li->type, num, 0, buf);
1534 if (stored_rc) {
1535 /*
1536 * We failed on the unlock range
1537 * request - add all locks from the tmp
1538 * list to the head of the file's list.
1539 */
1540 cifs_move_llist(&tmp_llist,
1541 &cfile->llist->locks);
1542 rc = stored_rc;
1543 } else
1544 /*
1545 * The unlock range request succeed -
1546 * free the tmp list.
1547 */
1548 cifs_free_llist(&tmp_llist);
1549 cur = buf;
1550 num = 0;
1551 } else
1552 cur++;
1553 }
1554 if (num) {
1555 stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1556 types[i], num, 0, buf);
1557 if (stored_rc) {
1558 cifs_move_llist(&tmp_llist,
1559 &cfile->llist->locks);
1560 rc = stored_rc;
1561 } else
1562 cifs_free_llist(&tmp_llist);
1563 }
1564 }
1565
1566 up_write(&cinode->lock_sem);
1567 kfree(buf);
1568 return rc;
1569 }
1570
1571 static int
1572 cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
1573 bool wait_flag, bool posix_lck, int lock, int unlock,
1574 unsigned int xid)
1575 {
1576 int rc = 0;
1577 __u64 length = 1 + flock->fl_end - flock->fl_start;
1578 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1579 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1580 struct TCP_Server_Info *server = tcon->ses->server;
1581 struct inode *inode = d_inode(cfile->dentry);
1582
1583 if (posix_lck) {
1584 int posix_lock_type;
1585
1586 rc = cifs_posix_lock_set(file, flock);
1587 if (!rc || rc < 0)
1588 return rc;
1589
1590 if (type & server->vals->shared_lock_type)
1591 posix_lock_type = CIFS_RDLCK;
1592 else
1593 posix_lock_type = CIFS_WRLCK;
1594
1595 if (unlock == 1)
1596 posix_lock_type = CIFS_UNLCK;
1597
1598 rc = CIFSSMBPosixLock(xid, tcon, cfile->fid.netfid,
1599 hash_lockowner(flock->fl_owner),
1600 flock->fl_start, length,
1601 NULL, posix_lock_type, wait_flag);
1602 goto out;
1603 }
1604
1605 if (lock) {
1606 struct cifsLockInfo *lock;
1607
1608 lock = cifs_lock_init(flock->fl_start, length, type,
1609 flock->fl_flags);
1610 if (!lock)
1611 return -ENOMEM;
1612
1613 rc = cifs_lock_add_if(cfile, lock, wait_flag);
1614 if (rc < 0) {
1615 kfree(lock);
1616 return rc;
1617 }
1618 if (!rc)
1619 goto out;
1620
1621 /*
1622 * Windows 7 server can delay breaking lease from read to None
1623 * if we set a byte-range lock on a file - break it explicitly
1624 * before sending the lock to the server to be sure the next
1625 * read won't conflict with non-overlapted locks due to
1626 * pagereading.
1627 */
1628 if (!CIFS_CACHE_WRITE(CIFS_I(inode)) &&
1629 CIFS_CACHE_READ(CIFS_I(inode))) {
1630 cifs_zap_mapping(inode);
1631 cifs_dbg(FYI, "Set no oplock for inode=%p due to mand locks\n",
1632 inode);
1633 CIFS_I(inode)->oplock = 0;
1634 }
1635
1636 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1637 type, 1, 0, wait_flag);
1638 if (rc) {
1639 kfree(lock);
1640 return rc;
1641 }
1642
1643 cifs_lock_add(cfile, lock);
1644 } else if (unlock)
1645 rc = server->ops->mand_unlock_range(cfile, flock, xid);
1646
1647 out:
1648 if (flock->fl_flags & FL_POSIX && !rc)
1649 rc = locks_lock_file_wait(file, flock);
1650 return rc;
1651 }
1652
1653 int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
1654 {
1655 int rc, xid;
1656 int lock = 0, unlock = 0;
1657 bool wait_flag = false;
1658 bool posix_lck = false;
1659 struct cifs_sb_info *cifs_sb;
1660 struct cifs_tcon *tcon;
1661 struct cifsInodeInfo *cinode;
1662 struct cifsFileInfo *cfile;
1663 __u16 netfid;
1664 __u32 type;
1665
1666 rc = -EACCES;
1667 xid = get_xid();
1668
1669 cifs_dbg(FYI, "Lock parm: 0x%x flockflags: 0x%x flocktype: 0x%x start: %lld end: %lld\n",
1670 cmd, flock->fl_flags, flock->fl_type,
1671 flock->fl_start, flock->fl_end);
1672
1673 cfile = (struct cifsFileInfo *)file->private_data;
1674 tcon = tlink_tcon(cfile->tlink);
1675
1676 cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag,
1677 tcon->ses->server);
1678 cifs_sb = CIFS_FILE_SB(file);
1679 netfid = cfile->fid.netfid;
1680 cinode = CIFS_I(file_inode(file));
1681
1682 if (cap_unix(tcon->ses) &&
1683 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1684 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1685 posix_lck = true;
1686 /*
1687 * BB add code here to normalize offset and length to account for
1688 * negative length which we can not accept over the wire.
1689 */
1690 if (IS_GETLK(cmd)) {
1691 rc = cifs_getlk(file, flock, type, wait_flag, posix_lck, xid);
1692 free_xid(xid);
1693 return rc;
1694 }
1695
1696 if (!lock && !unlock) {
1697 /*
1698 * if no lock or unlock then nothing to do since we do not
1699 * know what it is
1700 */
1701 free_xid(xid);
1702 return -EOPNOTSUPP;
1703 }
1704
1705 rc = cifs_setlk(file, flock, type, wait_flag, posix_lck, lock, unlock,
1706 xid);
1707 free_xid(xid);
1708 return rc;
1709 }
1710
1711 /*
1712 * update the file size (if needed) after a write. Should be called with
1713 * the inode->i_lock held
1714 */
1715 void
1716 cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
1717 unsigned int bytes_written)
1718 {
1719 loff_t end_of_write = offset + bytes_written;
1720
1721 if (end_of_write > cifsi->server_eof)
1722 cifsi->server_eof = end_of_write;
1723 }
1724
1725 static ssize_t
1726 cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data,
1727 size_t write_size, loff_t *offset)
1728 {
1729 int rc = 0;
1730 unsigned int bytes_written = 0;
1731 unsigned int total_written;
1732 struct cifs_sb_info *cifs_sb;
1733 struct cifs_tcon *tcon;
1734 struct TCP_Server_Info *server;
1735 unsigned int xid;
1736 struct dentry *dentry = open_file->dentry;
1737 struct cifsInodeInfo *cifsi = CIFS_I(d_inode(dentry));
1738 struct cifs_io_parms io_parms;
1739
1740 cifs_sb = CIFS_SB(dentry->d_sb);
1741
1742 cifs_dbg(FYI, "write %zd bytes to offset %lld of %pd\n",
1743 write_size, *offset, dentry);
1744
1745 tcon = tlink_tcon(open_file->tlink);
1746 server = tcon->ses->server;
1747
1748 if (!server->ops->sync_write)
1749 return -ENOSYS;
1750
1751 xid = get_xid();
1752
1753 for (total_written = 0; write_size > total_written;
1754 total_written += bytes_written) {
1755 rc = -EAGAIN;
1756 while (rc == -EAGAIN) {
1757 struct kvec iov[2];
1758 unsigned int len;
1759
1760 if (open_file->invalidHandle) {
1761 /* we could deadlock if we called
1762 filemap_fdatawait from here so tell
1763 reopen_file not to flush data to
1764 server now */
1765 rc = cifs_reopen_file(open_file, false);
1766 if (rc != 0)
1767 break;
1768 }
1769
1770 len = min(server->ops->wp_retry_size(d_inode(dentry)),
1771 (unsigned int)write_size - total_written);
1772 /* iov[0] is reserved for smb header */
1773 iov[1].iov_base = (char *)write_data + total_written;
1774 iov[1].iov_len = len;
1775 io_parms.pid = pid;
1776 io_parms.tcon = tcon;
1777 io_parms.offset = *offset;
1778 io_parms.length = len;
1779 rc = server->ops->sync_write(xid, &open_file->fid,
1780 &io_parms, &bytes_written, iov, 1);
1781 }
1782 if (rc || (bytes_written == 0)) {
1783 if (total_written)
1784 break;
1785 else {
1786 free_xid(xid);
1787 return rc;
1788 }
1789 } else {
1790 spin_lock(&d_inode(dentry)->i_lock);
1791 cifs_update_eof(cifsi, *offset, bytes_written);
1792 spin_unlock(&d_inode(dentry)->i_lock);
1793 *offset += bytes_written;
1794 }
1795 }
1796
1797 cifs_stats_bytes_written(tcon, total_written);
1798
1799 if (total_written > 0) {
1800 spin_lock(&d_inode(dentry)->i_lock);
1801 if (*offset > d_inode(dentry)->i_size)
1802 i_size_write(d_inode(dentry), *offset);
1803 spin_unlock(&d_inode(dentry)->i_lock);
1804 }
1805 mark_inode_dirty_sync(d_inode(dentry));
1806 free_xid(xid);
1807 return total_written;
1808 }
1809
1810 struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
1811 bool fsuid_only)
1812 {
1813 struct cifsFileInfo *open_file = NULL;
1814 struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1815 struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb);
1816
1817 /* only filter by fsuid on multiuser mounts */
1818 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1819 fsuid_only = false;
1820
1821 spin_lock(&tcon->open_file_lock);
1822 /* we could simply get the first_list_entry since write-only entries
1823 are always at the end of the list but since the first entry might
1824 have a close pending, we go through the whole list */
1825 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1826 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
1827 continue;
1828 if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
1829 if (!open_file->invalidHandle) {
1830 /* found a good file */
1831 /* lock it so it will not be closed on us */
1832 cifsFileInfo_get(open_file);
1833 spin_unlock(&tcon->open_file_lock);
1834 return open_file;
1835 } /* else might as well continue, and look for
1836 another, or simply have the caller reopen it
1837 again rather than trying to fix this handle */
1838 } else /* write only file */
1839 break; /* write only files are last so must be done */
1840 }
1841 spin_unlock(&tcon->open_file_lock);
1842 return NULL;
1843 }
1844
1845 /* Return -EBADF if no handle is found and general rc otherwise */
1846 int
1847 cifs_get_writable_file(struct cifsInodeInfo *cifs_inode, bool fsuid_only,
1848 struct cifsFileInfo **ret_file)
1849 {
1850 struct cifsFileInfo *open_file, *inv_file = NULL;
1851 struct cifs_sb_info *cifs_sb;
1852 struct cifs_tcon *tcon;
1853 bool any_available = false;
1854 int rc = -EBADF;
1855 unsigned int refind = 0;
1856
1857 *ret_file = NULL;
1858
1859 /*
1860 * Having a null inode here (because mapping->host was set to zero by
1861 * the VFS or MM) should not happen but we had reports of on oops (due
1862 * to it being zero) during stress testcases so we need to check for it
1863 */
1864
1865 if (cifs_inode == NULL) {
1866 cifs_dbg(VFS, "Null inode passed to cifs_writeable_file\n");
1867 dump_stack();
1868 return rc;
1869 }
1870
1871 cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1872 tcon = cifs_sb_master_tcon(cifs_sb);
1873
1874 /* only filter by fsuid on multiuser mounts */
1875 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1876 fsuid_only = false;
1877
1878 spin_lock(&tcon->open_file_lock);
1879 refind_writable:
1880 if (refind > MAX_REOPEN_ATT) {
1881 spin_unlock(&tcon->open_file_lock);
1882 return rc;
1883 }
1884 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1885 if (!any_available && open_file->pid != current->tgid)
1886 continue;
1887 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
1888 continue;
1889 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
1890 if (!open_file->invalidHandle) {
1891 /* found a good writable file */
1892 cifsFileInfo_get(open_file);
1893 spin_unlock(&tcon->open_file_lock);
1894 *ret_file = open_file;
1895 return 0;
1896 } else {
1897 if (!inv_file)
1898 inv_file = open_file;
1899 }
1900 }
1901 }
1902 /* couldn't find useable FH with same pid, try any available */
1903 if (!any_available) {
1904 any_available = true;
1905 goto refind_writable;
1906 }
1907
1908 if (inv_file) {
1909 any_available = false;
1910 cifsFileInfo_get(inv_file);
1911 }
1912
1913 spin_unlock(&tcon->open_file_lock);
1914
1915 if (inv_file) {
1916 rc = cifs_reopen_file(inv_file, false);
1917 if (!rc) {
1918 *ret_file = inv_file;
1919 return 0;
1920 }
1921
1922 spin_lock(&tcon->open_file_lock);
1923 list_move_tail(&inv_file->flist, &cifs_inode->openFileList);
1924 spin_unlock(&tcon->open_file_lock);
1925 cifsFileInfo_put(inv_file);
1926 ++refind;
1927 inv_file = NULL;
1928 spin_lock(&tcon->open_file_lock);
1929 goto refind_writable;
1930 }
1931
1932 return rc;
1933 }
1934
1935 struct cifsFileInfo *
1936 find_writable_file(struct cifsInodeInfo *cifs_inode, bool fsuid_only)
1937 {
1938 struct cifsFileInfo *cfile;
1939 int rc;
1940
1941 rc = cifs_get_writable_file(cifs_inode, fsuid_only, &cfile);
1942 if (rc)
1943 cifs_dbg(FYI, "couldn't find writable handle rc=%d", rc);
1944
1945 return cfile;
1946 }
1947
1948 static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
1949 {
1950 struct address_space *mapping = page->mapping;
1951 loff_t offset = (loff_t)page->index << PAGE_SHIFT;
1952 char *write_data;
1953 int rc = -EFAULT;
1954 int bytes_written = 0;
1955 struct inode *inode;
1956 struct cifsFileInfo *open_file;
1957
1958 if (!mapping || !mapping->host)
1959 return -EFAULT;
1960
1961 inode = page->mapping->host;
1962
1963 offset += (loff_t)from;
1964 write_data = kmap(page);
1965 write_data += from;
1966
1967 if ((to > PAGE_SIZE) || (from > to)) {
1968 kunmap(page);
1969 return -EIO;
1970 }
1971
1972 /* racing with truncate? */
1973 if (offset > mapping->host->i_size) {
1974 kunmap(page);
1975 return 0; /* don't care */
1976 }
1977
1978 /* check to make sure that we are not extending the file */
1979 if (mapping->host->i_size - offset < (loff_t)to)
1980 to = (unsigned)(mapping->host->i_size - offset);
1981
1982 rc = cifs_get_writable_file(CIFS_I(mapping->host), false, &open_file);
1983 if (!rc) {
1984 bytes_written = cifs_write(open_file, open_file->pid,
1985 write_data, to - from, &offset);
1986 cifsFileInfo_put(open_file);
1987 /* Does mm or vfs already set times? */
1988 inode->i_atime = inode->i_mtime = current_time(inode);
1989 if ((bytes_written > 0) && (offset))
1990 rc = 0;
1991 else if (bytes_written < 0)
1992 rc = bytes_written;
1993 else
1994 rc = -EFAULT;
1995 } else {
1996 cifs_dbg(FYI, "No writable handle for write page rc=%d\n", rc);
1997 if (!is_retryable_error(rc))
1998 rc = -EIO;
1999 }
2000
2001 kunmap(page);
2002 return rc;
2003 }
2004
2005 static struct cifs_writedata *
2006 wdata_alloc_and_fillpages(pgoff_t tofind, struct address_space *mapping,
2007 pgoff_t end, pgoff_t *index,
2008 unsigned int *found_pages)
2009 {
2010 struct cifs_writedata *wdata;
2011
2012 wdata = cifs_writedata_alloc((unsigned int)tofind,
2013 cifs_writev_complete);
2014 if (!wdata)
2015 return NULL;
2016
2017 *found_pages = find_get_pages_range_tag(mapping, index, end,
2018 PAGECACHE_TAG_DIRTY, tofind, wdata->pages);
2019 return wdata;
2020 }
2021
2022 static unsigned int
2023 wdata_prepare_pages(struct cifs_writedata *wdata, unsigned int found_pages,
2024 struct address_space *mapping,
2025 struct writeback_control *wbc,
2026 pgoff_t end, pgoff_t *index, pgoff_t *next, bool *done)
2027 {
2028 unsigned int nr_pages = 0, i;
2029 struct page *page;
2030
2031 for (i = 0; i < found_pages; i++) {
2032 page = wdata->pages[i];
2033 /*
2034 * At this point we hold neither the i_pages lock nor the
2035 * page lock: the page may be truncated or invalidated
2036 * (changing page->mapping to NULL), or even swizzled
2037 * back from swapper_space to tmpfs file mapping
2038 */
2039
2040 if (nr_pages == 0)
2041 lock_page(page);
2042 else if (!trylock_page(page))
2043 break;
2044
2045 if (unlikely(page->mapping != mapping)) {
2046 unlock_page(page);
2047 break;
2048 }
2049
2050 if (!wbc->range_cyclic && page->index > end) {
2051 *done = true;
2052 unlock_page(page);
2053 break;
2054 }
2055
2056 if (*next && (page->index != *next)) {
2057 /* Not next consecutive page */
2058 unlock_page(page);
2059 break;
2060 }
2061
2062 if (wbc->sync_mode != WB_SYNC_NONE)
2063 wait_on_page_writeback(page);
2064
2065 if (PageWriteback(page) ||
2066 !clear_page_dirty_for_io(page)) {
2067 unlock_page(page);
2068 break;
2069 }
2070
2071 /*
2072 * This actually clears the dirty bit in the radix tree.
2073 * See cifs_writepage() for more commentary.
2074 */
2075 set_page_writeback(page);
2076 if (page_offset(page) >= i_size_read(mapping->host)) {
2077 *done = true;
2078 unlock_page(page);
2079 end_page_writeback(page);
2080 break;
2081 }
2082
2083 wdata->pages[i] = page;
2084 *next = page->index + 1;
2085 ++nr_pages;
2086 }
2087
2088 /* reset index to refind any pages skipped */
2089 if (nr_pages == 0)
2090 *index = wdata->pages[0]->index + 1;
2091
2092 /* put any pages we aren't going to use */
2093 for (i = nr_pages; i < found_pages; i++) {
2094 put_page(wdata->pages[i]);
2095 wdata->pages[i] = NULL;
2096 }
2097
2098 return nr_pages;
2099 }
2100
2101 static int
2102 wdata_send_pages(struct cifs_writedata *wdata, unsigned int nr_pages,
2103 struct address_space *mapping, struct writeback_control *wbc)
2104 {
2105 int rc;
2106 struct TCP_Server_Info *server =
2107 tlink_tcon(wdata->cfile->tlink)->ses->server;
2108
2109 wdata->sync_mode = wbc->sync_mode;
2110 wdata->nr_pages = nr_pages;
2111 wdata->offset = page_offset(wdata->pages[0]);
2112 wdata->pagesz = PAGE_SIZE;
2113 wdata->tailsz = min(i_size_read(mapping->host) -
2114 page_offset(wdata->pages[nr_pages - 1]),
2115 (loff_t)PAGE_SIZE);
2116 wdata->bytes = ((nr_pages - 1) * PAGE_SIZE) + wdata->tailsz;
2117 wdata->pid = wdata->cfile->pid;
2118
2119 rc = adjust_credits(server, &wdata->credits, wdata->bytes);
2120 if (rc)
2121 return rc;
2122
2123 if (wdata->cfile->invalidHandle)
2124 rc = -EAGAIN;
2125 else
2126 rc = server->ops->async_writev(wdata, cifs_writedata_release);
2127
2128 return rc;
2129 }
2130
2131 static int cifs_writepages(struct address_space *mapping,
2132 struct writeback_control *wbc)
2133 {
2134 struct inode *inode = mapping->host;
2135 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2136 struct TCP_Server_Info *server;
2137 bool done = false, scanned = false, range_whole = false;
2138 pgoff_t end, index;
2139 struct cifs_writedata *wdata;
2140 struct cifsFileInfo *cfile = NULL;
2141 int rc = 0;
2142 int saved_rc = 0;
2143 unsigned int xid;
2144
2145 /*
2146 * If wsize is smaller than the page cache size, default to writing
2147 * one page at a time via cifs_writepage
2148 */
2149 if (cifs_sb->wsize < PAGE_SIZE)
2150 return generic_writepages(mapping, wbc);
2151
2152 xid = get_xid();
2153 if (wbc->range_cyclic) {
2154 index = mapping->writeback_index; /* Start from prev offset */
2155 end = -1;
2156 } else {
2157 index = wbc->range_start >> PAGE_SHIFT;
2158 end = wbc->range_end >> PAGE_SHIFT;
2159 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
2160 range_whole = true;
2161 scanned = true;
2162 }
2163 server = cifs_sb_master_tcon(cifs_sb)->ses->server;
2164 retry:
2165 while (!done && index <= end) {
2166 unsigned int i, nr_pages, found_pages, wsize;
2167 pgoff_t next = 0, tofind, saved_index = index;
2168 struct cifs_credits credits_on_stack;
2169 struct cifs_credits *credits = &credits_on_stack;
2170 int get_file_rc = 0;
2171
2172 if (cfile)
2173 cifsFileInfo_put(cfile);
2174
2175 rc = cifs_get_writable_file(CIFS_I(inode), false, &cfile);
2176
2177 /* in case of an error store it to return later */
2178 if (rc)
2179 get_file_rc = rc;
2180
2181 rc = server->ops->wait_mtu_credits(server, cifs_sb->wsize,
2182 &wsize, credits);
2183 if (rc != 0) {
2184 done = true;
2185 break;
2186 }
2187
2188 tofind = min((wsize / PAGE_SIZE) - 1, end - index) + 1;
2189
2190 wdata = wdata_alloc_and_fillpages(tofind, mapping, end, &index,
2191 &found_pages);
2192 if (!wdata) {
2193 rc = -ENOMEM;
2194 done = true;
2195 add_credits_and_wake_if(server, credits, 0);
2196 break;
2197 }
2198
2199 if (found_pages == 0) {
2200 kref_put(&wdata->refcount, cifs_writedata_release);
2201 add_credits_and_wake_if(server, credits, 0);
2202 break;
2203 }
2204
2205 nr_pages = wdata_prepare_pages(wdata, found_pages, mapping, wbc,
2206 end, &index, &next, &done);
2207
2208 /* nothing to write? */
2209 if (nr_pages == 0) {
2210 kref_put(&wdata->refcount, cifs_writedata_release);
2211 add_credits_and_wake_if(server, credits, 0);
2212 continue;
2213 }
2214
2215 wdata->credits = credits_on_stack;
2216 wdata->cfile = cfile;
2217 cfile = NULL;
2218
2219 if (!wdata->cfile) {
2220 cifs_dbg(VFS, "No writable handle in writepages rc=%d\n",
2221 get_file_rc);
2222 if (is_retryable_error(get_file_rc))
2223 rc = get_file_rc;
2224 else
2225 rc = -EBADF;
2226 } else
2227 rc = wdata_send_pages(wdata, nr_pages, mapping, wbc);
2228
2229 for (i = 0; i < nr_pages; ++i)
2230 unlock_page(wdata->pages[i]);
2231
2232 /* send failure -- clean up the mess */
2233 if (rc != 0) {
2234 add_credits_and_wake_if(server, &wdata->credits, 0);
2235 for (i = 0; i < nr_pages; ++i) {
2236 if (is_retryable_error(rc))
2237 redirty_page_for_writepage(wbc,
2238 wdata->pages[i]);
2239 else
2240 SetPageError(wdata->pages[i]);
2241 end_page_writeback(wdata->pages[i]);
2242 put_page(wdata->pages[i]);
2243 }
2244 if (!is_retryable_error(rc))
2245 mapping_set_error(mapping, rc);
2246 }
2247 kref_put(&wdata->refcount, cifs_writedata_release);
2248
2249 if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN) {
2250 index = saved_index;
2251 continue;
2252 }
2253
2254 /* Return immediately if we received a signal during writing */
2255 if (is_interrupt_error(rc)) {
2256 done = true;
2257 break;
2258 }
2259
2260 if (rc != 0 && saved_rc == 0)
2261 saved_rc = rc;
2262
2263 wbc->nr_to_write -= nr_pages;
2264 if (wbc->nr_to_write <= 0)
2265 done = true;
2266
2267 index = next;
2268 }
2269
2270 if (!scanned && !done) {
2271 /*
2272 * We hit the last page and there is more work to be done: wrap
2273 * back to the start of the file
2274 */
2275 scanned = true;
2276 index = 0;
2277 goto retry;
2278 }
2279
2280 if (saved_rc != 0)
2281 rc = saved_rc;
2282
2283 if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
2284 mapping->writeback_index = index;
2285
2286 if (cfile)
2287 cifsFileInfo_put(cfile);
2288 free_xid(xid);
2289 return rc;
2290 }
2291
2292 static int
2293 cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
2294 {
2295 int rc;
2296 unsigned int xid;
2297
2298 xid = get_xid();
2299 /* BB add check for wbc flags */
2300 get_page(page);
2301 if (!PageUptodate(page))
2302 cifs_dbg(FYI, "ppw - page not up to date\n");
2303
2304 /*
2305 * Set the "writeback" flag, and clear "dirty" in the radix tree.
2306 *
2307 * A writepage() implementation always needs to do either this,
2308 * or re-dirty the page with "redirty_page_for_writepage()" in
2309 * the case of a failure.
2310 *
2311 * Just unlocking the page will cause the radix tree tag-bits
2312 * to fail to update with the state of the page correctly.
2313 */
2314 set_page_writeback(page);
2315 retry_write:
2316 rc = cifs_partialpagewrite(page, 0, PAGE_SIZE);
2317 if (is_retryable_error(rc)) {
2318 if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN)
2319 goto retry_write;
2320 redirty_page_for_writepage(wbc, page);
2321 } else if (rc != 0) {
2322 SetPageError(page);
2323 mapping_set_error(page->mapping, rc);
2324 } else {
2325 SetPageUptodate(page);
2326 }
2327 end_page_writeback(page);
2328 put_page(page);
2329 free_xid(xid);
2330 return rc;
2331 }
2332
2333 static int cifs_writepage(struct page *page, struct writeback_control *wbc)
2334 {
2335 int rc = cifs_writepage_locked(page, wbc);
2336 unlock_page(page);
2337 return rc;
2338 }
2339
2340 static int cifs_write_end(struct file *file, struct address_space *mapping,
2341 loff_t pos, unsigned len, unsigned copied,
2342 struct page *page, void *fsdata)
2343 {
2344 int rc;
2345 struct inode *inode = mapping->host;
2346 struct cifsFileInfo *cfile = file->private_data;
2347 struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
2348 __u32 pid;
2349
2350 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2351 pid = cfile->pid;
2352 else
2353 pid = current->tgid;
2354
2355 cifs_dbg(FYI, "write_end for page %p from pos %lld with %d bytes\n",
2356 page, pos, copied);
2357
2358 if (PageChecked(page)) {
2359 if (copied == len)
2360 SetPageUptodate(page);
2361 ClearPageChecked(page);
2362 } else if (!PageUptodate(page) && copied == PAGE_SIZE)
2363 SetPageUptodate(page);
2364
2365 if (!PageUptodate(page)) {
2366 char *page_data;
2367 unsigned offset = pos & (PAGE_SIZE - 1);
2368 unsigned int xid;
2369
2370 xid = get_xid();
2371 /* this is probably better than directly calling
2372 partialpage_write since in this function the file handle is
2373 known which we might as well leverage */
2374 /* BB check if anything else missing out of ppw
2375 such as updating last write time */
2376 page_data = kmap(page);
2377 rc = cifs_write(cfile, pid, page_data + offset, copied, &pos);
2378 /* if (rc < 0) should we set writebehind rc? */
2379 kunmap(page);
2380
2381 free_xid(xid);
2382 } else {
2383 rc = copied;
2384 pos += copied;
2385 set_page_dirty(page);
2386 }
2387
2388 if (rc > 0) {
2389 spin_lock(&inode->i_lock);
2390 if (pos > inode->i_size)
2391 i_size_write(inode, pos);
2392 spin_unlock(&inode->i_lock);
2393 }
2394
2395 unlock_page(page);
2396 put_page(page);
2397
2398 return rc;
2399 }
2400
2401 int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
2402 int datasync)
2403 {
2404 unsigned int xid;
2405 int rc = 0;
2406 struct cifs_tcon *tcon;
2407 struct TCP_Server_Info *server;
2408 struct cifsFileInfo *smbfile = file->private_data;
2409 struct inode *inode = file_inode(file);
2410 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2411
2412 rc = file_write_and_wait_range(file, start, end);
2413 if (rc)
2414 return rc;
2415 inode_lock(inode);
2416
2417 xid = get_xid();
2418
2419 cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2420 file, datasync);
2421
2422 if (!CIFS_CACHE_READ(CIFS_I(inode))) {
2423 rc = cifs_zap_mapping(inode);
2424 if (rc) {
2425 cifs_dbg(FYI, "rc: %d during invalidate phase\n", rc);
2426 rc = 0; /* don't care about it in fsync */
2427 }
2428 }
2429
2430 tcon = tlink_tcon(smbfile->tlink);
2431 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2432 server = tcon->ses->server;
2433 if (server->ops->flush)
2434 rc = server->ops->flush(xid, tcon, &smbfile->fid);
2435 else
2436 rc = -ENOSYS;
2437 }
2438
2439 free_xid(xid);
2440 inode_unlock(inode);
2441 return rc;
2442 }
2443
2444 int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
2445 {
2446 unsigned int xid;
2447 int rc = 0;
2448 struct cifs_tcon *tcon;
2449 struct TCP_Server_Info *server;
2450 struct cifsFileInfo *smbfile = file->private_data;
2451 struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
2452 struct inode *inode = file->f_mapping->host;
2453
2454 rc = file_write_and_wait_range(file, start, end);
2455 if (rc)
2456 return rc;
2457 inode_lock(inode);
2458
2459 xid = get_xid();
2460
2461 cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2462 file, datasync);
2463
2464 tcon = tlink_tcon(smbfile->tlink);
2465 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2466 server = tcon->ses->server;
2467 if (server->ops->flush)
2468 rc = server->ops->flush(xid, tcon, &smbfile->fid);
2469 else
2470 rc = -ENOSYS;
2471 }
2472
2473 free_xid(xid);
2474 inode_unlock(inode);
2475 return rc;
2476 }
2477
2478 /*
2479 * As file closes, flush all cached write data for this inode checking
2480 * for write behind errors.
2481 */
2482 int cifs_flush(struct file *file, fl_owner_t id)
2483 {
2484 struct inode *inode = file_inode(file);
2485 int rc = 0;
2486
2487 if (file->f_mode & FMODE_WRITE)
2488 rc = filemap_write_and_wait(inode->i_mapping);
2489
2490 cifs_dbg(FYI, "Flush inode %p file %p rc %d\n", inode, file, rc);
2491
2492 return rc;
2493 }
2494
2495 static int
2496 cifs_write_allocate_pages(struct page **pages, unsigned long num_pages)
2497 {
2498 int rc = 0;
2499 unsigned long i;
2500
2501 for (i = 0; i < num_pages; i++) {
2502 pages[i] = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2503 if (!pages[i]) {
2504 /*
2505 * save number of pages we have already allocated and
2506 * return with ENOMEM error
2507 */
2508 num_pages = i;
2509 rc = -ENOMEM;
2510 break;
2511 }
2512 }
2513
2514 if (rc) {
2515 for (i = 0; i < num_pages; i++)
2516 put_page(pages[i]);
2517 }
2518 return rc;
2519 }
2520
2521 static inline
2522 size_t get_numpages(const size_t wsize, const size_t len, size_t *cur_len)
2523 {
2524 size_t num_pages;
2525 size_t clen;
2526
2527 clen = min_t(const size_t, len, wsize);
2528 num_pages = DIV_ROUND_UP(clen, PAGE_SIZE);
2529
2530 if (cur_len)
2531 *cur_len = clen;
2532
2533 return num_pages;
2534 }
2535
2536 static void
2537 cifs_uncached_writedata_release(struct kref *refcount)
2538 {
2539 int i;
2540 struct cifs_writedata *wdata = container_of(refcount,
2541 struct cifs_writedata, refcount);
2542
2543 kref_put(&wdata->ctx->refcount, cifs_aio_ctx_release);
2544 for (i = 0; i < wdata->nr_pages; i++)
2545 put_page(wdata->pages[i]);
2546 cifs_writedata_release(refcount);
2547 }
2548
2549 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx);
2550
2551 static void
2552 cifs_uncached_writev_complete(struct work_struct *work)
2553 {
2554 struct cifs_writedata *wdata = container_of(work,
2555 struct cifs_writedata, work);
2556 struct inode *inode = d_inode(wdata->cfile->dentry);
2557 struct cifsInodeInfo *cifsi = CIFS_I(inode);
2558
2559 spin_lock(&inode->i_lock);
2560 cifs_update_eof(cifsi, wdata->offset, wdata->bytes);
2561 if (cifsi->server_eof > inode->i_size)
2562 i_size_write(inode, cifsi->server_eof);
2563 spin_unlock(&inode->i_lock);
2564
2565 complete(&wdata->done);
2566 collect_uncached_write_data(wdata->ctx);
2567 /* the below call can possibly free the last ref to aio ctx */
2568 kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2569 }
2570
2571 static int
2572 wdata_fill_from_iovec(struct cifs_writedata *wdata, struct iov_iter *from,
2573 size_t *len, unsigned long *num_pages)
2574 {
2575 size_t save_len, copied, bytes, cur_len = *len;
2576 unsigned long i, nr_pages = *num_pages;
2577
2578 save_len = cur_len;
2579 for (i = 0; i < nr_pages; i++) {
2580 bytes = min_t(const size_t, cur_len, PAGE_SIZE);
2581 copied = copy_page_from_iter(wdata->pages[i], 0, bytes, from);
2582 cur_len -= copied;
2583 /*
2584 * If we didn't copy as much as we expected, then that
2585 * may mean we trod into an unmapped area. Stop copying
2586 * at that point. On the next pass through the big
2587 * loop, we'll likely end up getting a zero-length
2588 * write and bailing out of it.
2589 */
2590 if (copied < bytes)
2591 break;
2592 }
2593 cur_len = save_len - cur_len;
2594 *len = cur_len;
2595
2596 /*
2597 * If we have no data to send, then that probably means that
2598 * the copy above failed altogether. That's most likely because
2599 * the address in the iovec was bogus. Return -EFAULT and let
2600 * the caller free anything we allocated and bail out.
2601 */
2602 if (!cur_len)
2603 return -EFAULT;
2604
2605 /*
2606 * i + 1 now represents the number of pages we actually used in
2607 * the copy phase above.
2608 */
2609 *num_pages = i + 1;
2610 return 0;
2611 }
2612
2613 static int
2614 cifs_resend_wdata(struct cifs_writedata *wdata, struct list_head *wdata_list,
2615 struct cifs_aio_ctx *ctx)
2616 {
2617 unsigned int wsize;
2618 struct cifs_credits credits;
2619 int rc;
2620 struct TCP_Server_Info *server =
2621 tlink_tcon(wdata->cfile->tlink)->ses->server;
2622
2623 /*
2624 * Wait for credits to resend this wdata.
2625 * Note: we are attempting to resend the whole wdata not in segments
2626 */
2627 do {
2628 rc = server->ops->wait_mtu_credits(server, wdata->bytes, &wsize,
2629 &credits);
2630
2631 if (rc)
2632 goto out;
2633
2634 if (wsize < wdata->bytes) {
2635 add_credits_and_wake_if(server, &credits, 0);
2636 msleep(1000);
2637 }
2638 } while (wsize < wdata->bytes);
2639
2640 wdata->credits = credits;
2641 rc = -EAGAIN;
2642 while (rc == -EAGAIN) {
2643 rc = 0;
2644 if (wdata->cfile->invalidHandle)
2645 rc = cifs_reopen_file(wdata->cfile, false);
2646 if (!rc)
2647 rc = server->ops->async_writev(wdata,
2648 cifs_uncached_writedata_release);
2649 }
2650
2651 if (!rc) {
2652 list_add_tail(&wdata->list, wdata_list);
2653 return 0;
2654 }
2655
2656 add_credits_and_wake_if(server, &wdata->credits, 0);
2657 out:
2658 kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2659
2660 return rc;
2661 }
2662
2663 static int
2664 cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from,
2665 struct cifsFileInfo *open_file,
2666 struct cifs_sb_info *cifs_sb, struct list_head *wdata_list,
2667 struct cifs_aio_ctx *ctx)
2668 {
2669 int rc = 0;
2670 size_t cur_len;
2671 unsigned long nr_pages, num_pages, i;
2672 struct cifs_writedata *wdata;
2673 struct iov_iter saved_from = *from;
2674 loff_t saved_offset = offset;
2675 pid_t pid;
2676 struct TCP_Server_Info *server;
2677 struct page **pagevec;
2678 size_t start;
2679 unsigned int xid;
2680
2681 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2682 pid = open_file->pid;
2683 else
2684 pid = current->tgid;
2685
2686 server = tlink_tcon(open_file->tlink)->ses->server;
2687 xid = get_xid();
2688
2689 do {
2690 unsigned int wsize;
2691 struct cifs_credits credits_on_stack;
2692 struct cifs_credits *credits = &credits_on_stack;
2693
2694 if (open_file->invalidHandle) {
2695 rc = cifs_reopen_file(open_file, false);
2696 if (rc == -EAGAIN)
2697 continue;
2698 else if (rc)
2699 break;
2700 }
2701
2702 rc = server->ops->wait_mtu_credits(server, cifs_sb->wsize,
2703 &wsize, credits);
2704 if (rc)
2705 break;
2706
2707 cur_len = min_t(const size_t, len, wsize);
2708
2709 if (ctx->direct_io) {
2710 ssize_t result;
2711
2712 result = iov_iter_get_pages_alloc(
2713 from, &pagevec, cur_len, &start);
2714 if (result < 0) {
2715 cifs_dbg(VFS,
2716 "direct_writev couldn't get user pages "
2717 "(rc=%zd) iter type %d iov_offset %zd "
2718 "count %zd\n",
2719 result, from->type,
2720 from->iov_offset, from->count);
2721 dump_stack();
2722
2723 rc = result;
2724 add_credits_and_wake_if(server, credits, 0);
2725 break;
2726 }
2727 cur_len = (size_t)result;
2728 iov_iter_advance(from, cur_len);
2729
2730 nr_pages =
2731 (cur_len + start + PAGE_SIZE - 1) / PAGE_SIZE;
2732
2733 wdata = cifs_writedata_direct_alloc(pagevec,
2734 cifs_uncached_writev_complete);
2735 if (!wdata) {
2736 rc = -ENOMEM;
2737 add_credits_and_wake_if(server, credits, 0);
2738 break;
2739 }
2740
2741
2742 wdata->page_offset = start;
2743 wdata->tailsz =
2744 nr_pages > 1 ?
2745 cur_len - (PAGE_SIZE - start) -
2746 (nr_pages - 2) * PAGE_SIZE :
2747 cur_len;
2748 } else {
2749 nr_pages = get_numpages(wsize, len, &cur_len);
2750 wdata = cifs_writedata_alloc(nr_pages,
2751 cifs_uncached_writev_complete);
2752 if (!wdata) {
2753 rc = -ENOMEM;
2754 add_credits_and_wake_if(server, credits, 0);
2755 break;
2756 }
2757
2758 rc = cifs_write_allocate_pages(wdata->pages, nr_pages);
2759 if (rc) {
2760 kvfree(wdata->pages);
2761 kfree(wdata);
2762 add_credits_and_wake_if(server, credits, 0);
2763 break;
2764 }
2765
2766 num_pages = nr_pages;
2767 rc = wdata_fill_from_iovec(
2768 wdata, from, &cur_len, &num_pages);
2769 if (rc) {
2770 for (i = 0; i < nr_pages; i++)
2771 put_page(wdata->pages[i]);
2772 kvfree(wdata->pages);
2773 kfree(wdata);
2774 add_credits_and_wake_if(server, credits, 0);
2775 break;
2776 }
2777
2778 /*
2779 * Bring nr_pages down to the number of pages we
2780 * actually used, and free any pages that we didn't use.
2781 */
2782 for ( ; nr_pages > num_pages; nr_pages--)
2783 put_page(wdata->pages[nr_pages - 1]);
2784
2785 wdata->tailsz = cur_len - ((nr_pages - 1) * PAGE_SIZE);
2786 }
2787
2788 wdata->sync_mode = WB_SYNC_ALL;
2789 wdata->nr_pages = nr_pages;
2790 wdata->offset = (__u64)offset;
2791 wdata->cfile = cifsFileInfo_get(open_file);
2792 wdata->pid = pid;
2793 wdata->bytes = cur_len;
2794 wdata->pagesz = PAGE_SIZE;
2795 wdata->credits = credits_on_stack;
2796 wdata->ctx = ctx;
2797 kref_get(&ctx->refcount);
2798
2799 rc = adjust_credits(server, &wdata->credits, wdata->bytes);
2800
2801 if (!rc) {
2802 if (wdata->cfile->invalidHandle)
2803 rc = -EAGAIN;
2804 else
2805 rc = server->ops->async_writev(wdata,
2806 cifs_uncached_writedata_release);
2807 }
2808
2809 if (rc) {
2810 add_credits_and_wake_if(server, &wdata->credits, 0);
2811 kref_put(&wdata->refcount,
2812 cifs_uncached_writedata_release);
2813 if (rc == -EAGAIN) {
2814 *from = saved_from;
2815 iov_iter_advance(from, offset - saved_offset);
2816 continue;
2817 }
2818 break;
2819 }
2820
2821 list_add_tail(&wdata->list, wdata_list);
2822 offset += cur_len;
2823 len -= cur_len;
2824 } while (len > 0);
2825
2826 free_xid(xid);
2827 return rc;
2828 }
2829
2830 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx)
2831 {
2832 struct cifs_writedata *wdata, *tmp;
2833 struct cifs_tcon *tcon;
2834 struct cifs_sb_info *cifs_sb;
2835 struct dentry *dentry = ctx->cfile->dentry;
2836 unsigned int i;
2837 int rc;
2838
2839 tcon = tlink_tcon(ctx->cfile->tlink);
2840 cifs_sb = CIFS_SB(dentry->d_sb);
2841
2842 mutex_lock(&ctx->aio_mutex);
2843
2844 if (list_empty(&ctx->list)) {
2845 mutex_unlock(&ctx->aio_mutex);
2846 return;
2847 }
2848
2849 rc = ctx->rc;
2850 /*
2851 * Wait for and collect replies for any successful sends in order of
2852 * increasing offset. Once an error is hit, then return without waiting
2853 * for any more replies.
2854 */
2855 restart_loop:
2856 list_for_each_entry_safe(wdata, tmp, &ctx->list, list) {
2857 if (!rc) {
2858 if (!try_wait_for_completion(&wdata->done)) {
2859 mutex_unlock(&ctx->aio_mutex);
2860 return;
2861 }
2862
2863 if (wdata->result)
2864 rc = wdata->result;
2865 else
2866 ctx->total_len += wdata->bytes;
2867
2868 /* resend call if it's a retryable error */
2869 if (rc == -EAGAIN) {
2870 struct list_head tmp_list;
2871 struct iov_iter tmp_from = ctx->iter;
2872
2873 INIT_LIST_HEAD(&tmp_list);
2874 list_del_init(&wdata->list);
2875
2876 if (ctx->direct_io)
2877 rc = cifs_resend_wdata(
2878 wdata, &tmp_list, ctx);
2879 else {
2880 iov_iter_advance(&tmp_from,
2881 wdata->offset - ctx->pos);
2882
2883 rc = cifs_write_from_iter(wdata->offset,
2884 wdata->bytes, &tmp_from,
2885 ctx->cfile, cifs_sb, &tmp_list,
2886 ctx);
2887 }
2888
2889 list_splice(&tmp_list, &ctx->list);
2890
2891 kref_put(&wdata->refcount,
2892 cifs_uncached_writedata_release);
2893 goto restart_loop;
2894 }
2895 }
2896 list_del_init(&wdata->list);
2897 kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2898 }
2899
2900 if (!ctx->direct_io)
2901 for (i = 0; i < ctx->npages; i++)
2902 put_page(ctx->bv[i].bv_page);
2903
2904 cifs_stats_bytes_written(tcon, ctx->total_len);
2905 set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(dentry->d_inode)->flags);
2906
2907 ctx->rc = (rc == 0) ? ctx->total_len : rc;
2908
2909 mutex_unlock(&ctx->aio_mutex);
2910
2911 if (ctx->iocb && ctx->iocb->ki_complete)
2912 ctx->iocb->ki_complete(ctx->iocb, ctx->rc, 0);
2913 else
2914 complete(&ctx->done);
2915 }
2916
2917 static ssize_t __cifs_writev(
2918 struct kiocb *iocb, struct iov_iter *from, bool direct)
2919 {
2920 struct file *file = iocb->ki_filp;
2921 ssize_t total_written = 0;
2922 struct cifsFileInfo *cfile;
2923 struct cifs_tcon *tcon;
2924 struct cifs_sb_info *cifs_sb;
2925 struct cifs_aio_ctx *ctx;
2926 struct iov_iter saved_from = *from;
2927 size_t len = iov_iter_count(from);
2928 int rc;
2929
2930 /*
2931 * iov_iter_get_pages_alloc doesn't work with ITER_KVEC.
2932 * In this case, fall back to non-direct write function.
2933 * this could be improved by getting pages directly in ITER_KVEC
2934 */
2935 if (direct && from->type & ITER_KVEC) {
2936 cifs_dbg(FYI, "use non-direct cifs_writev for kvec I/O\n");
2937 direct = false;
2938 }
2939
2940 rc = generic_write_checks(iocb, from);
2941 if (rc <= 0)
2942 return rc;
2943
2944 cifs_sb = CIFS_FILE_SB(file);
2945 cfile = file->private_data;
2946 tcon = tlink_tcon(cfile->tlink);
2947
2948 if (!tcon->ses->server->ops->async_writev)
2949 return -ENOSYS;
2950
2951 ctx = cifs_aio_ctx_alloc();
2952 if (!ctx)
2953 return -ENOMEM;
2954
2955 ctx->cfile = cifsFileInfo_get(cfile);
2956
2957 if (!is_sync_kiocb(iocb))
2958 ctx->iocb = iocb;
2959
2960 ctx->pos = iocb->ki_pos;
2961
2962 if (direct) {
2963 ctx->direct_io = true;
2964 ctx->iter = *from;
2965 ctx->len = len;
2966 } else {
2967 rc = setup_aio_ctx_iter(ctx, from, WRITE);
2968 if (rc) {
2969 kref_put(&ctx->refcount, cifs_aio_ctx_release);
2970 return rc;
2971 }
2972 }
2973
2974 /* grab a lock here due to read response handlers can access ctx */
2975 mutex_lock(&ctx->aio_mutex);
2976
2977 rc = cifs_write_from_iter(iocb->ki_pos, ctx->len, &saved_from,
2978 cfile, cifs_sb, &ctx->list, ctx);
2979
2980 /*
2981 * If at least one write was successfully sent, then discard any rc
2982 * value from the later writes. If the other write succeeds, then
2983 * we'll end up returning whatever was written. If it fails, then
2984 * we'll get a new rc value from that.
2985 */
2986 if (!list_empty(&ctx->list))
2987 rc = 0;
2988
2989 mutex_unlock(&ctx->aio_mutex);
2990
2991 if (rc) {
2992 kref_put(&ctx->refcount, cifs_aio_ctx_release);
2993 return rc;
2994 }
2995
2996 if (!is_sync_kiocb(iocb)) {
2997 kref_put(&ctx->refcount, cifs_aio_ctx_release);
2998 return -EIOCBQUEUED;
2999 }
3000
3001 rc = wait_for_completion_killable(&ctx->done);
3002 if (rc) {
3003 mutex_lock(&ctx->aio_mutex);
3004 ctx->rc = rc = -EINTR;
3005 total_written = ctx->total_len;
3006 mutex_unlock(&ctx->aio_mutex);
3007 } else {
3008 rc = ctx->rc;
3009 total_written = ctx->total_len;
3010 }
3011
3012 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3013
3014 if (unlikely(!total_written))
3015 return rc;
3016
3017 iocb->ki_pos += total_written;
3018 return total_written;
3019 }
3020
3021 ssize_t cifs_direct_writev(struct kiocb *iocb, struct iov_iter *from)
3022 {
3023 return __cifs_writev(iocb, from, true);
3024 }
3025
3026 ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from)
3027 {
3028 return __cifs_writev(iocb, from, false);
3029 }
3030
3031 static ssize_t
3032 cifs_writev(struct kiocb *iocb, struct iov_iter *from)
3033 {
3034 struct file *file = iocb->ki_filp;
3035 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
3036 struct inode *inode = file->f_mapping->host;
3037 struct cifsInodeInfo *cinode = CIFS_I(inode);
3038 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
3039 ssize_t rc;
3040
3041 inode_lock(inode);
3042 /*
3043 * We need to hold the sem to be sure nobody modifies lock list
3044 * with a brlock that prevents writing.
3045 */
3046 down_read(&cinode->lock_sem);
3047
3048 rc = generic_write_checks(iocb, from);
3049 if (rc <= 0)
3050 goto out;
3051
3052 if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(from),
3053 server->vals->exclusive_lock_type, 0,
3054 NULL, CIFS_WRITE_OP))
3055 rc = __generic_file_write_iter(iocb, from);
3056 else
3057 rc = -EACCES;
3058 out:
3059 up_read(&cinode->lock_sem);
3060 inode_unlock(inode);
3061
3062 if (rc > 0)
3063 rc = generic_write_sync(iocb, rc);
3064 return rc;
3065 }
3066
3067 ssize_t
3068 cifs_strict_writev(struct kiocb *iocb, struct iov_iter *from)
3069 {
3070 struct inode *inode = file_inode(iocb->ki_filp);
3071 struct cifsInodeInfo *cinode = CIFS_I(inode);
3072 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
3073 struct cifsFileInfo *cfile = (struct cifsFileInfo *)
3074 iocb->ki_filp->private_data;
3075 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3076 ssize_t written;
3077
3078 written = cifs_get_writer(cinode);
3079 if (written)
3080 return written;
3081
3082 if (CIFS_CACHE_WRITE(cinode)) {
3083 if (cap_unix(tcon->ses) &&
3084 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability))
3085 && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) {
3086 written = generic_file_write_iter(iocb, from);
3087 goto out;
3088 }
3089 written = cifs_writev(iocb, from);
3090 goto out;
3091 }
3092 /*
3093 * For non-oplocked files in strict cache mode we need to write the data
3094 * to the server exactly from the pos to pos+len-1 rather than flush all
3095 * affected pages because it may cause a error with mandatory locks on
3096 * these pages but not on the region from pos to ppos+len-1.
3097 */
3098 written = cifs_user_writev(iocb, from);
3099 if (CIFS_CACHE_READ(cinode)) {
3100 /*
3101 * We have read level caching and we have just sent a write
3102 * request to the server thus making data in the cache stale.
3103 * Zap the cache and set oplock/lease level to NONE to avoid
3104 * reading stale data from the cache. All subsequent read
3105 * operations will read new data from the server.
3106 */
3107 cifs_zap_mapping(inode);
3108 cifs_dbg(FYI, "Set Oplock/Lease to NONE for inode=%p after write\n",
3109 inode);
3110 cinode->oplock = 0;
3111 }
3112 out:
3113 cifs_put_writer(cinode);
3114 return written;
3115 }
3116
3117 static struct cifs_readdata *
3118 cifs_readdata_direct_alloc(struct page **pages, work_func_t complete)
3119 {
3120 struct cifs_readdata *rdata;
3121
3122 rdata = kzalloc(sizeof(*rdata), GFP_KERNEL);
3123 if (rdata != NULL) {
3124 rdata->pages = pages;
3125 kref_init(&rdata->refcount);
3126 INIT_LIST_HEAD(&rdata->list);
3127 init_completion(&rdata->done);
3128 INIT_WORK(&rdata->work, complete);
3129 }
3130
3131 return rdata;
3132 }
3133
3134 static struct cifs_readdata *
3135 cifs_readdata_alloc(unsigned int nr_pages, work_func_t complete)
3136 {
3137 struct page **pages =
3138 kcalloc(nr_pages, sizeof(struct page *), GFP_KERNEL);
3139 struct cifs_readdata *ret = NULL;
3140
3141 if (pages) {
3142 ret = cifs_readdata_direct_alloc(pages, complete);
3143 if (!ret)
3144 kfree(pages);
3145 }
3146
3147 return ret;
3148 }
3149
3150 void
3151 cifs_readdata_release(struct kref *refcount)
3152 {
3153 struct cifs_readdata *rdata = container_of(refcount,
3154 struct cifs_readdata, refcount);
3155 #ifdef CONFIG_CIFS_SMB_DIRECT
3156 if (rdata->mr) {
3157 smbd_deregister_mr(rdata->mr);
3158 rdata->mr = NULL;
3159 }
3160 #endif
3161 if (rdata->cfile)
3162 cifsFileInfo_put(rdata->cfile);
3163
3164 kvfree(rdata->pages);
3165 kfree(rdata);
3166 }
3167
3168 static int
3169 cifs_read_allocate_pages(struct cifs_readdata *rdata, unsigned int nr_pages)
3170 {
3171 int rc = 0;
3172 struct page *page;
3173 unsigned int i;
3174
3175 for (i = 0; i < nr_pages; i++) {
3176 page = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
3177 if (!page) {
3178 rc = -ENOMEM;
3179 break;
3180 }
3181 rdata->pages[i] = page;
3182 }
3183
3184 if (rc) {
3185 for (i = 0; i < nr_pages; i++) {
3186 put_page(rdata->pages[i]);
3187 rdata->pages[i] = NULL;
3188 }
3189 }
3190 return rc;
3191 }
3192
3193 static void
3194 cifs_uncached_readdata_release(struct kref *refcount)
3195 {
3196 struct cifs_readdata *rdata = container_of(refcount,
3197 struct cifs_readdata, refcount);
3198 unsigned int i;
3199
3200 kref_put(&rdata->ctx->refcount, cifs_aio_ctx_release);
3201 for (i = 0; i < rdata->nr_pages; i++) {
3202 put_page(rdata->pages[i]);
3203 }
3204 cifs_readdata_release(refcount);
3205 }
3206
3207 /**
3208 * cifs_readdata_to_iov - copy data from pages in response to an iovec
3209 * @rdata: the readdata response with list of pages holding data
3210 * @iter: destination for our data
3211 *
3212 * This function copies data from a list of pages in a readdata response into
3213 * an array of iovecs. It will first calculate where the data should go
3214 * based on the info in the readdata and then copy the data into that spot.
3215 */
3216 static int
3217 cifs_readdata_to_iov(struct cifs_readdata *rdata, struct iov_iter *iter)
3218 {
3219 size_t remaining = rdata->got_bytes;
3220 unsigned int i;
3221
3222 for (i = 0; i < rdata->nr_pages; i++) {
3223 struct page *page = rdata->pages[i];
3224 size_t copy = min_t(size_t, remaining, PAGE_SIZE);
3225 size_t written;
3226
3227 if (unlikely(iov_iter_is_pipe(iter))) {
3228 void *addr = kmap_atomic(page);
3229
3230 written = copy_to_iter(addr, copy, iter);
3231 kunmap_atomic(addr);
3232 } else
3233 written = copy_page_to_iter(page, 0, copy, iter);
3234 remaining -= written;
3235 if (written < copy && iov_iter_count(iter) > 0)
3236 break;
3237 }
3238 return remaining ? -EFAULT : 0;
3239 }
3240
3241 static void collect_uncached_read_data(struct cifs_aio_ctx *ctx);
3242
3243 static void
3244 cifs_uncached_readv_complete(struct work_struct *work)
3245 {
3246 struct cifs_readdata *rdata = container_of(work,
3247 struct cifs_readdata, work);
3248
3249 complete(&rdata->done);
3250 collect_uncached_read_data(rdata->ctx);
3251 /* the below call can possibly free the last ref to aio ctx */
3252 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3253 }
3254
3255 static int
3256 uncached_fill_pages(struct TCP_Server_Info *server,
3257 struct cifs_readdata *rdata, struct iov_iter *iter,
3258 unsigned int len)
3259 {
3260 int result = 0;
3261 unsigned int i;
3262 unsigned int nr_pages = rdata->nr_pages;
3263 unsigned int page_offset = rdata->page_offset;
3264
3265 rdata->got_bytes = 0;
3266 rdata->tailsz = PAGE_SIZE;
3267 for (i = 0; i < nr_pages; i++) {
3268 struct page *page = rdata->pages[i];
3269 size_t n;
3270 unsigned int segment_size = rdata->pagesz;
3271
3272 if (i == 0)
3273 segment_size -= page_offset;
3274 else
3275 page_offset = 0;
3276
3277
3278 if (len <= 0) {
3279 /* no need to hold page hostage */
3280 rdata->pages[i] = NULL;
3281 rdata->nr_pages--;
3282 put_page(page);
3283 continue;
3284 }
3285
3286 n = len;
3287 if (len >= segment_size)
3288 /* enough data to fill the page */
3289 n = segment_size;
3290 else
3291 rdata->tailsz = len;
3292 len -= n;
3293
3294 if (iter)
3295 result = copy_page_from_iter(
3296 page, page_offset, n, iter);
3297 #ifdef CONFIG_CIFS_SMB_DIRECT
3298 else if (rdata->mr)
3299 result = n;
3300 #endif
3301 else
3302 result = cifs_read_page_from_socket(
3303 server, page, page_offset, n);
3304 if (result < 0)
3305 break;
3306
3307 rdata->got_bytes += result;
3308 }
3309
3310 return rdata->got_bytes > 0 && result != -ECONNABORTED ?
3311 rdata->got_bytes : result;
3312 }
3313
3314 static int
3315 cifs_uncached_read_into_pages(struct TCP_Server_Info *server,
3316 struct cifs_readdata *rdata, unsigned int len)
3317 {
3318 return uncached_fill_pages(server, rdata, NULL, len);
3319 }
3320
3321 static int
3322 cifs_uncached_copy_into_pages(struct TCP_Server_Info *server,
3323 struct cifs_readdata *rdata,
3324 struct iov_iter *iter)
3325 {
3326 return uncached_fill_pages(server, rdata, iter, iter->count);
3327 }
3328
3329 static int cifs_resend_rdata(struct cifs_readdata *rdata,
3330 struct list_head *rdata_list,
3331 struct cifs_aio_ctx *ctx)
3332 {
3333 unsigned int rsize;
3334 struct cifs_credits credits;
3335 int rc;
3336 struct TCP_Server_Info *server =
3337 tlink_tcon(rdata->cfile->tlink)->ses->server;
3338
3339 /*
3340 * Wait for credits to resend this rdata.
3341 * Note: we are attempting to resend the whole rdata not in segments
3342 */
3343 do {
3344 rc = server->ops->wait_mtu_credits(server, rdata->bytes,
3345 &rsize, &credits);
3346
3347 if (rc)
3348 goto out;
3349
3350 if (rsize < rdata->bytes) {
3351 add_credits_and_wake_if(server, &credits, 0);
3352 msleep(1000);
3353 }
3354 } while (rsize < rdata->bytes);
3355
3356 rdata->credits = credits;
3357 rc = -EAGAIN;
3358 while (rc == -EAGAIN) {
3359 rc = 0;
3360 if (rdata->cfile->invalidHandle)
3361 rc = cifs_reopen_file(rdata->cfile, true);
3362 if (!rc)
3363 rc = server->ops->async_readv(rdata);
3364 }
3365
3366 if (!rc) {
3367 /* Add to aio pending list */
3368 list_add_tail(&rdata->list, rdata_list);
3369 return 0;
3370 }
3371
3372 add_credits_and_wake_if(server, &rdata->credits, 0);
3373 out:
3374 kref_put(&rdata->refcount,
3375 cifs_uncached_readdata_release);
3376
3377 return rc;
3378 }
3379
3380 static int
3381 cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file,
3382 struct cifs_sb_info *cifs_sb, struct list_head *rdata_list,
3383 struct cifs_aio_ctx *ctx)
3384 {
3385 struct cifs_readdata *rdata;
3386 unsigned int npages, rsize;
3387 struct cifs_credits credits_on_stack;
3388 struct cifs_credits *credits = &credits_on_stack;
3389 size_t cur_len;
3390 int rc;
3391 pid_t pid;
3392 struct TCP_Server_Info *server;
3393 struct page **pagevec;
3394 size_t start;
3395 struct iov_iter direct_iov = ctx->iter;
3396
3397 server = tlink_tcon(open_file->tlink)->ses->server;
3398
3399 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3400 pid = open_file->pid;
3401 else
3402 pid = current->tgid;
3403
3404 if (ctx->direct_io)
3405 iov_iter_advance(&direct_iov, offset - ctx->pos);
3406
3407 do {
3408 if (open_file->invalidHandle) {
3409 rc = cifs_reopen_file(open_file, true);
3410 if (rc == -EAGAIN)
3411 continue;
3412 else if (rc)
3413 break;
3414 }
3415
3416 rc = server->ops->wait_mtu_credits(server, cifs_sb->rsize,
3417 &rsize, credits);
3418 if (rc)
3419 break;
3420
3421 cur_len = min_t(const size_t, len, rsize);
3422
3423 if (ctx->direct_io) {
3424 ssize_t result;
3425
3426 result = iov_iter_get_pages_alloc(
3427 &direct_iov, &pagevec,
3428 cur_len, &start);
3429 if (result < 0) {
3430 cifs_dbg(VFS,
3431 "couldn't get user pages (rc=%zd)"
3432 " iter type %d"
3433 " iov_offset %zd count %zd\n",
3434 result, direct_iov.type,
3435 direct_iov.iov_offset,
3436 direct_iov.count);
3437 dump_stack();
3438
3439 rc = result;
3440 add_credits_and_wake_if(server, credits, 0);
3441 break;
3442 }
3443 cur_len = (size_t)result;
3444 iov_iter_advance(&direct_iov, cur_len);
3445
3446 rdata = cifs_readdata_direct_alloc(
3447 pagevec, cifs_uncached_readv_complete);
3448 if (!rdata) {
3449 add_credits_and_wake_if(server, credits, 0);
3450 rc = -ENOMEM;
3451 break;
3452 }
3453
3454 npages = (cur_len + start + PAGE_SIZE-1) / PAGE_SIZE;
3455 rdata->page_offset = start;
3456 rdata->tailsz = npages > 1 ?
3457 cur_len-(PAGE_SIZE-start)-(npages-2)*PAGE_SIZE :
3458 cur_len;
3459
3460 } else {
3461
3462 npages = DIV_ROUND_UP(cur_len, PAGE_SIZE);
3463 /* allocate a readdata struct */
3464 rdata = cifs_readdata_alloc(npages,
3465 cifs_uncached_readv_complete);
3466 if (!rdata) {
3467 add_credits_and_wake_if(server, credits, 0);
3468 rc = -ENOMEM;
3469 break;
3470 }
3471
3472 rc = cifs_read_allocate_pages(rdata, npages);
3473 if (rc) {
3474 kvfree(rdata->pages);
3475 kfree(rdata);
3476 add_credits_and_wake_if(server, credits, 0);
3477 break;
3478 }
3479
3480 rdata->tailsz = PAGE_SIZE;
3481 }
3482
3483 rdata->cfile = cifsFileInfo_get(open_file);
3484 rdata->nr_pages = npages;
3485 rdata->offset = offset;
3486 rdata->bytes = cur_len;
3487 rdata->pid = pid;
3488 rdata->pagesz = PAGE_SIZE;
3489 rdata->read_into_pages = cifs_uncached_read_into_pages;
3490 rdata->copy_into_pages = cifs_uncached_copy_into_pages;
3491 rdata->credits = credits_on_stack;
3492 rdata->ctx = ctx;
3493 kref_get(&ctx->refcount);
3494
3495 rc = adjust_credits(server, &rdata->credits, rdata->bytes);
3496
3497 if (!rc) {
3498 if (rdata->cfile->invalidHandle)
3499 rc = -EAGAIN;
3500 else
3501 rc = server->ops->async_readv(rdata);
3502 }
3503
3504 if (rc) {
3505 add_credits_and_wake_if(server, &rdata->credits, 0);
3506 kref_put(&rdata->refcount,
3507 cifs_uncached_readdata_release);
3508 if (rc == -EAGAIN) {
3509 iov_iter_revert(&direct_iov, cur_len);
3510 continue;
3511 }
3512 break;
3513 }
3514
3515 list_add_tail(&rdata->list, rdata_list);
3516 offset += cur_len;
3517 len -= cur_len;
3518 } while (len > 0);
3519
3520 return rc;
3521 }
3522
3523 static void
3524 collect_uncached_read_data(struct cifs_aio_ctx *ctx)
3525 {
3526 struct cifs_readdata *rdata, *tmp;
3527 struct iov_iter *to = &ctx->iter;
3528 struct cifs_sb_info *cifs_sb;
3529 struct cifs_tcon *tcon;
3530 unsigned int i;
3531 int rc;
3532
3533 tcon = tlink_tcon(ctx->cfile->tlink);
3534 cifs_sb = CIFS_SB(ctx->cfile->dentry->d_sb);
3535
3536 mutex_lock(&ctx->aio_mutex);
3537
3538 if (list_empty(&ctx->list)) {
3539 mutex_unlock(&ctx->aio_mutex);
3540 return;
3541 }
3542
3543 rc = ctx->rc;
3544 /* the loop below should proceed in the order of increasing offsets */
3545 again:
3546 list_for_each_entry_safe(rdata, tmp, &ctx->list, list) {
3547 if (!rc) {
3548 if (!try_wait_for_completion(&rdata->done)) {
3549 mutex_unlock(&ctx->aio_mutex);
3550 return;
3551 }
3552
3553 if (rdata->result == -EAGAIN) {
3554 /* resend call if it's a retryable error */
3555 struct list_head tmp_list;
3556 unsigned int got_bytes = rdata->got_bytes;
3557
3558 list_del_init(&rdata->list);
3559 INIT_LIST_HEAD(&tmp_list);
3560
3561 /*
3562 * Got a part of data and then reconnect has
3563 * happened -- fill the buffer and continue
3564 * reading.
3565 */
3566 if (got_bytes && got_bytes < rdata->bytes) {
3567 rc = 0;
3568 if (!ctx->direct_io)
3569 rc = cifs_readdata_to_iov(rdata, to);
3570 if (rc) {
3571 kref_put(&rdata->refcount,
3572 cifs_uncached_readdata_release);
3573 continue;
3574 }
3575 }
3576
3577 if (ctx->direct_io) {
3578 /*
3579 * Re-use rdata as this is a
3580 * direct I/O
3581 */
3582 rc = cifs_resend_rdata(
3583 rdata,
3584 &tmp_list, ctx);
3585 } else {
3586 rc = cifs_send_async_read(
3587 rdata->offset + got_bytes,
3588 rdata->bytes - got_bytes,
3589 rdata->cfile, cifs_sb,
3590 &tmp_list, ctx);
3591
3592 kref_put(&rdata->refcount,
3593 cifs_uncached_readdata_release);
3594 }
3595
3596 list_splice(&tmp_list, &ctx->list);
3597
3598 goto again;
3599 } else if (rdata->result)
3600 rc = rdata->result;
3601 else if (!ctx->direct_io)
3602 rc = cifs_readdata_to_iov(rdata, to);
3603
3604 /* if there was a short read -- discard anything left */
3605 if (rdata->got_bytes && rdata->got_bytes < rdata->bytes)
3606 rc = -ENODATA;
3607
3608 ctx->total_len += rdata->got_bytes;
3609 }
3610 list_del_init(&rdata->list);
3611 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3612 }
3613
3614 if (!ctx->direct_io) {
3615 for (i = 0; i < ctx->npages; i++) {
3616 if (ctx->should_dirty)
3617 set_page_dirty(ctx->bv[i].bv_page);
3618 put_page(ctx->bv[i].bv_page);
3619 }
3620
3621 ctx->total_len = ctx->len - iov_iter_count(to);
3622 }
3623
3624 /* mask nodata case */
3625 if (rc == -ENODATA)
3626 rc = 0;
3627
3628 ctx->rc = (rc == 0) ? ctx->total_len : rc;
3629
3630 mutex_unlock(&ctx->aio_mutex);
3631
3632 if (ctx->iocb && ctx->iocb->ki_complete)
3633 ctx->iocb->ki_complete(ctx->iocb, ctx->rc, 0);
3634 else
3635 complete(&ctx->done);
3636 }
3637
3638 static ssize_t __cifs_readv(
3639 struct kiocb *iocb, struct iov_iter *to, bool direct)
3640 {
3641 size_t len;
3642 struct file *file = iocb->ki_filp;
3643 struct cifs_sb_info *cifs_sb;
3644 struct cifsFileInfo *cfile;
3645 struct cifs_tcon *tcon;
3646 ssize_t rc, total_read = 0;
3647 loff_t offset = iocb->ki_pos;
3648 struct cifs_aio_ctx *ctx;
3649
3650 /*
3651 * iov_iter_get_pages_alloc() doesn't work with ITER_KVEC,
3652 * fall back to data copy read path
3653 * this could be improved by getting pages directly in ITER_KVEC
3654 */
3655 if (direct && to->type & ITER_KVEC) {
3656 cifs_dbg(FYI, "use non-direct cifs_user_readv for kvec I/O\n");
3657 direct = false;
3658 }
3659
3660 len = iov_iter_count(to);
3661 if (!len)
3662 return 0;
3663
3664 cifs_sb = CIFS_FILE_SB(file);
3665 cfile = file->private_data;
3666 tcon = tlink_tcon(cfile->tlink);
3667
3668 if (!tcon->ses->server->ops->async_readv)
3669 return -ENOSYS;
3670
3671 if ((file->f_flags & O_ACCMODE) == O_WRONLY)
3672 cifs_dbg(FYI, "attempting read on write only file instance\n");
3673
3674 ctx = cifs_aio_ctx_alloc();
3675 if (!ctx)
3676 return -ENOMEM;
3677
3678 ctx->cfile = cifsFileInfo_get(cfile);
3679
3680 if (!is_sync_kiocb(iocb))
3681 ctx->iocb = iocb;
3682
3683 if (iter_is_iovec(to))
3684 ctx->should_dirty = true;
3685
3686 if (direct) {
3687 ctx->pos = offset;
3688 ctx->direct_io = true;
3689 ctx->iter = *to;
3690 ctx->len = len;
3691 } else {
3692 rc = setup_aio_ctx_iter(ctx, to, READ);
3693 if (rc) {
3694 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3695 return rc;
3696 }
3697 len = ctx->len;
3698 }
3699
3700 /* grab a lock here due to read response handlers can access ctx */
3701 mutex_lock(&ctx->aio_mutex);
3702
3703 rc = cifs_send_async_read(offset, len, cfile, cifs_sb, &ctx->list, ctx);
3704
3705 /* if at least one read request send succeeded, then reset rc */
3706 if (!list_empty(&ctx->list))
3707 rc = 0;
3708
3709 mutex_unlock(&ctx->aio_mutex);
3710
3711 if (rc) {
3712 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3713 return rc;
3714 }
3715
3716 if (!is_sync_kiocb(iocb)) {
3717 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3718 return -EIOCBQUEUED;
3719 }
3720
3721 rc = wait_for_completion_killable(&ctx->done);
3722 if (rc) {
3723 mutex_lock(&ctx->aio_mutex);
3724 ctx->rc = rc = -EINTR;
3725 total_read = ctx->total_len;
3726 mutex_unlock(&ctx->aio_mutex);
3727 } else {
3728 rc = ctx->rc;
3729 total_read = ctx->total_len;
3730 }
3731
3732 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3733
3734 if (total_read) {
3735 iocb->ki_pos += total_read;
3736 return total_read;
3737 }
3738 return rc;
3739 }
3740
3741 ssize_t cifs_direct_readv(struct kiocb *iocb, struct iov_iter *to)
3742 {
3743 return __cifs_readv(iocb, to, true);
3744 }
3745
3746 ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to)
3747 {
3748 return __cifs_readv(iocb, to, false);
3749 }
3750
3751 ssize_t
3752 cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to)
3753 {
3754 struct inode *inode = file_inode(iocb->ki_filp);
3755 struct cifsInodeInfo *cinode = CIFS_I(inode);
3756 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
3757 struct cifsFileInfo *cfile = (struct cifsFileInfo *)
3758 iocb->ki_filp->private_data;
3759 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3760 int rc = -EACCES;
3761
3762 /*
3763 * In strict cache mode we need to read from the server all the time
3764 * if we don't have level II oplock because the server can delay mtime
3765 * change - so we can't make a decision about inode invalidating.
3766 * And we can also fail with pagereading if there are mandatory locks
3767 * on pages affected by this read but not on the region from pos to
3768 * pos+len-1.
3769 */
3770 if (!CIFS_CACHE_READ(cinode))
3771 return cifs_user_readv(iocb, to);
3772
3773 if (cap_unix(tcon->ses) &&
3774 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
3775 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
3776 return generic_file_read_iter(iocb, to);
3777
3778 /*
3779 * We need to hold the sem to be sure nobody modifies lock list
3780 * with a brlock that prevents reading.
3781 */
3782 down_read(&cinode->lock_sem);
3783 if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(to),
3784 tcon->ses->server->vals->shared_lock_type,
3785 0, NULL, CIFS_READ_OP))
3786 rc = generic_file_read_iter(iocb, to);
3787 up_read(&cinode->lock_sem);
3788 return rc;
3789 }
3790
3791 static ssize_t
3792 cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset)
3793 {
3794 int rc = -EACCES;
3795 unsigned int bytes_read = 0;
3796 unsigned int total_read;
3797 unsigned int current_read_size;
3798 unsigned int rsize;
3799 struct cifs_sb_info *cifs_sb;
3800 struct cifs_tcon *tcon;
3801 struct TCP_Server_Info *server;
3802 unsigned int xid;
3803 char *cur_offset;
3804 struct cifsFileInfo *open_file;
3805 struct cifs_io_parms io_parms;
3806 int buf_type = CIFS_NO_BUFFER;
3807 __u32 pid;
3808
3809 xid = get_xid();
3810 cifs_sb = CIFS_FILE_SB(file);
3811
3812 /* FIXME: set up handlers for larger reads and/or convert to async */
3813 rsize = min_t(unsigned int, cifs_sb->rsize, CIFSMaxBufSize);
3814
3815 if (file->private_data == NULL) {
3816 rc = -EBADF;
3817 free_xid(xid);
3818 return rc;
3819 }
3820 open_file = file->private_data;
3821 tcon = tlink_tcon(open_file->tlink);
3822 server = tcon->ses->server;
3823
3824 if (!server->ops->sync_read) {
3825 free_xid(xid);
3826 return -ENOSYS;
3827 }
3828
3829 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3830 pid = open_file->pid;
3831 else
3832 pid = current->tgid;
3833
3834 if ((file->f_flags & O_ACCMODE) == O_WRONLY)
3835 cifs_dbg(FYI, "attempting read on write only file instance\n");
3836
3837 for (total_read = 0, cur_offset = read_data; read_size > total_read;
3838 total_read += bytes_read, cur_offset += bytes_read) {
3839 do {
3840 current_read_size = min_t(uint, read_size - total_read,
3841 rsize);
3842 /*
3843 * For windows me and 9x we do not want to request more
3844 * than it negotiated since it will refuse the read
3845 * then.
3846 */
3847 if ((tcon->ses) && !(tcon->ses->capabilities &
3848 tcon->ses->server->vals->cap_large_files)) {
3849 current_read_size = min_t(uint,
3850 current_read_size, CIFSMaxBufSize);
3851 }
3852 if (open_file->invalidHandle) {
3853 rc = cifs_reopen_file(open_file, true);
3854 if (rc != 0)
3855 break;
3856 }
3857 io_parms.pid = pid;
3858 io_parms.tcon = tcon;
3859 io_parms.offset = *offset;
3860 io_parms.length = current_read_size;
3861 rc = server->ops->sync_read(xid, &open_file->fid, &io_parms,
3862 &bytes_read, &cur_offset,
3863 &buf_type);
3864 } while (rc == -EAGAIN);
3865
3866 if (rc || (bytes_read == 0)) {
3867 if (total_read) {
3868 break;
3869 } else {
3870 free_xid(xid);
3871 return rc;
3872 }
3873 } else {
3874 cifs_stats_bytes_read(tcon, total_read);
3875 *offset += bytes_read;
3876 }
3877 }
3878 free_xid(xid);
3879 return total_read;
3880 }
3881
3882 /*
3883 * If the page is mmap'ed into a process' page tables, then we need to make
3884 * sure that it doesn't change while being written back.
3885 */
3886 static vm_fault_t
3887 cifs_page_mkwrite(struct vm_fault *vmf)
3888 {
3889 struct page *page = vmf->page;
3890
3891 lock_page(page);
3892 return VM_FAULT_LOCKED;
3893 }
3894
3895 static const struct vm_operations_struct cifs_file_vm_ops = {
3896 .fault = filemap_fault,
3897 .map_pages = filemap_map_pages,
3898 .page_mkwrite = cifs_page_mkwrite,
3899 };
3900
3901 int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
3902 {
3903 int xid, rc = 0;
3904 struct inode *inode = file_inode(file);
3905
3906 xid = get_xid();
3907
3908 if (!CIFS_CACHE_READ(CIFS_I(inode)))
3909 rc = cifs_zap_mapping(inode);
3910 if (!rc)
3911 rc = generic_file_mmap(file, vma);
3912 if (!rc)
3913 vma->vm_ops = &cifs_file_vm_ops;
3914
3915 free_xid(xid);
3916 return rc;
3917 }
3918
3919 int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
3920 {
3921 int rc, xid;
3922
3923 xid = get_xid();
3924
3925 rc = cifs_revalidate_file(file);
3926 if (rc)
3927 cifs_dbg(FYI, "Validation prior to mmap failed, error=%d\n",
3928 rc);
3929 if (!rc)
3930 rc = generic_file_mmap(file, vma);
3931 if (!rc)
3932 vma->vm_ops = &cifs_file_vm_ops;
3933
3934 free_xid(xid);
3935 return rc;
3936 }
3937
3938 static void
3939 cifs_readv_complete(struct work_struct *work)
3940 {
3941 unsigned int i, got_bytes;
3942 struct cifs_readdata *rdata = container_of(work,
3943 struct cifs_readdata, work);
3944
3945 got_bytes = rdata->got_bytes;
3946 for (i = 0; i < rdata->nr_pages; i++) {
3947 struct page *page = rdata->pages[i];
3948
3949 lru_cache_add_file(page);
3950
3951 if (rdata->result == 0 ||
3952 (rdata->result == -EAGAIN && got_bytes)) {
3953 flush_dcache_page(page);
3954 SetPageUptodate(page);
3955 }
3956
3957 unlock_page(page);
3958
3959 if (rdata->result == 0 ||
3960 (rdata->result == -EAGAIN && got_bytes))
3961 cifs_readpage_to_fscache(rdata->mapping->host, page);
3962
3963 got_bytes -= min_t(unsigned int, PAGE_SIZE, got_bytes);
3964
3965 put_page(page);
3966 rdata->pages[i] = NULL;
3967 }
3968 kref_put(&rdata->refcount, cifs_readdata_release);
3969 }
3970
3971 static int
3972 readpages_fill_pages(struct TCP_Server_Info *server,
3973 struct cifs_readdata *rdata, struct iov_iter *iter,
3974 unsigned int len)
3975 {
3976 int result = 0;
3977 unsigned int i;
3978 u64 eof;
3979 pgoff_t eof_index;
3980 unsigned int nr_pages = rdata->nr_pages;
3981 unsigned int page_offset = rdata->page_offset;
3982
3983 /* determine the eof that the server (probably) has */
3984 eof = CIFS_I(rdata->mapping->host)->server_eof;
3985 eof_index = eof ? (eof - 1) >> PAGE_SHIFT : 0;
3986 cifs_dbg(FYI, "eof=%llu eof_index=%lu\n", eof, eof_index);
3987
3988 rdata->got_bytes = 0;
3989 rdata->tailsz = PAGE_SIZE;
3990 for (i = 0; i < nr_pages; i++) {
3991 struct page *page = rdata->pages[i];
3992 unsigned int to_read = rdata->pagesz;
3993 size_t n;
3994
3995 if (i == 0)
3996 to_read -= page_offset;
3997 else
3998 page_offset = 0;
3999
4000 n = to_read;
4001
4002 if (len >= to_read) {
4003 len -= to_read;
4004 } else if (len > 0) {
4005 /* enough for partial page, fill and zero the rest */
4006 zero_user(page, len + page_offset, to_read - len);
4007 n = rdata->tailsz = len;
4008 len = 0;
4009 } else if (page->index > eof_index) {
4010 /*
4011 * The VFS will not try to do readahead past the
4012 * i_size, but it's possible that we have outstanding
4013 * writes with gaps in the middle and the i_size hasn't
4014 * caught up yet. Populate those with zeroed out pages
4015 * to prevent the VFS from repeatedly attempting to
4016 * fill them until the writes are flushed.
4017 */
4018 zero_user(page, 0, PAGE_SIZE);
4019 lru_cache_add_file(page);
4020 flush_dcache_page(page);
4021 SetPageUptodate(page);
4022 unlock_page(page);
4023 put_page(page);
4024 rdata->pages[i] = NULL;
4025 rdata->nr_pages--;
4026 continue;
4027 } else {
4028 /* no need to hold page hostage */
4029 lru_cache_add_file(page);
4030 unlock_page(page);
4031 put_page(page);
4032 rdata->pages[i] = NULL;
4033 rdata->nr_pages--;
4034 continue;
4035 }
4036
4037 if (iter)
4038 result = copy_page_from_iter(
4039 page, page_offset, n, iter);
4040 #ifdef CONFIG_CIFS_SMB_DIRECT
4041 else if (rdata->mr)
4042 result = n;
4043 #endif
4044 else
4045 result = cifs_read_page_from_socket(
4046 server, page, page_offset, n);
4047 if (result < 0)
4048 break;
4049
4050 rdata->got_bytes += result;
4051 }
4052
4053 return rdata->got_bytes > 0 && result != -ECONNABORTED ?
4054 rdata->got_bytes : result;
4055 }
4056
4057 static int
4058 cifs_readpages_read_into_pages(struct TCP_Server_Info *server,
4059 struct cifs_readdata *rdata, unsigned int len)
4060 {
4061 return readpages_fill_pages(server, rdata, NULL, len);
4062 }
4063
4064 static int
4065 cifs_readpages_copy_into_pages(struct TCP_Server_Info *server,
4066 struct cifs_readdata *rdata,
4067 struct iov_iter *iter)
4068 {
4069 return readpages_fill_pages(server, rdata, iter, iter->count);
4070 }
4071
4072 static int
4073 readpages_get_pages(struct address_space *mapping, struct list_head *page_list,
4074 unsigned int rsize, struct list_head *tmplist,
4075 unsigned int *nr_pages, loff_t *offset, unsigned int *bytes)
4076 {
4077 struct page *page, *tpage;
4078 unsigned int expected_index;
4079 int rc;
4080 gfp_t gfp = readahead_gfp_mask(mapping);
4081
4082 INIT_LIST_HEAD(tmplist);
4083
4084 page = lru_to_page(page_list);
4085
4086 /*
4087 * Lock the page and put it in the cache. Since no one else
4088 * should have access to this page, we're safe to simply set
4089 * PG_locked without checking it first.
4090 */
4091 __SetPageLocked(page);
4092 rc = add_to_page_cache_locked(page, mapping,
4093 page->index, gfp);
4094
4095 /* give up if we can't stick it in the cache */
4096 if (rc) {
4097 __ClearPageLocked(page);
4098 return rc;
4099 }
4100
4101 /* move first page to the tmplist */
4102 *offset = (loff_t)page->index << PAGE_SHIFT;
4103 *bytes = PAGE_SIZE;
4104 *nr_pages = 1;
4105 list_move_tail(&page->lru, tmplist);
4106
4107 /* now try and add more pages onto the request */
4108 expected_index = page->index + 1;
4109 list_for_each_entry_safe_reverse(page, tpage, page_list, lru) {
4110 /* discontinuity ? */
4111 if (page->index != expected_index)
4112 break;
4113
4114 /* would this page push the read over the rsize? */
4115 if (*bytes + PAGE_SIZE > rsize)
4116 break;
4117
4118 __SetPageLocked(page);
4119 if (add_to_page_cache_locked(page, mapping, page->index, gfp)) {
4120 __ClearPageLocked(page);
4121 break;
4122 }
4123 list_move_tail(&page->lru, tmplist);
4124 (*bytes) += PAGE_SIZE;
4125 expected_index++;
4126 (*nr_pages)++;
4127 }
4128 return rc;
4129 }
4130
4131 static int cifs_readpages(struct file *file, struct address_space *mapping,
4132 struct list_head *page_list, unsigned num_pages)
4133 {
4134 int rc;
4135 struct list_head tmplist;
4136 struct cifsFileInfo *open_file = file->private_data;
4137 struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
4138 struct TCP_Server_Info *server;
4139 pid_t pid;
4140 unsigned int xid;
4141
4142 xid = get_xid();
4143 /*
4144 * Reads as many pages as possible from fscache. Returns -ENOBUFS
4145 * immediately if the cookie is negative
4146 *
4147 * After this point, every page in the list might have PG_fscache set,
4148 * so we will need to clean that up off of every page we don't use.
4149 */
4150 rc = cifs_readpages_from_fscache(mapping->host, mapping, page_list,
4151 &num_pages);
4152 if (rc == 0) {
4153 free_xid(xid);
4154 return rc;
4155 }
4156
4157 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
4158 pid = open_file->pid;
4159 else
4160 pid = current->tgid;
4161
4162 rc = 0;
4163 server = tlink_tcon(open_file->tlink)->ses->server;
4164
4165 cifs_dbg(FYI, "%s: file=%p mapping=%p num_pages=%u\n",
4166 __func__, file, mapping, num_pages);
4167
4168 /*
4169 * Start with the page at end of list and move it to private
4170 * list. Do the same with any following pages until we hit
4171 * the rsize limit, hit an index discontinuity, or run out of
4172 * pages. Issue the async read and then start the loop again
4173 * until the list is empty.
4174 *
4175 * Note that list order is important. The page_list is in
4176 * the order of declining indexes. When we put the pages in
4177 * the rdata->pages, then we want them in increasing order.
4178 */
4179 while (!list_empty(page_list)) {
4180 unsigned int i, nr_pages, bytes, rsize;
4181 loff_t offset;
4182 struct page *page, *tpage;
4183 struct cifs_readdata *rdata;
4184 struct cifs_credits credits_on_stack;
4185 struct cifs_credits *credits = &credits_on_stack;
4186
4187 if (open_file->invalidHandle) {
4188 rc = cifs_reopen_file(open_file, true);
4189 if (rc == -EAGAIN)
4190 continue;
4191 else if (rc)
4192 break;
4193 }
4194
4195 rc = server->ops->wait_mtu_credits(server, cifs_sb->rsize,
4196 &rsize, credits);
4197 if (rc)
4198 break;
4199
4200 /*
4201 * Give up immediately if rsize is too small to read an entire
4202 * page. The VFS will fall back to readpage. We should never
4203 * reach this point however since we set ra_pages to 0 when the
4204 * rsize is smaller than a cache page.
4205 */
4206 if (unlikely(rsize < PAGE_SIZE)) {
4207 add_credits_and_wake_if(server, credits, 0);
4208 free_xid(xid);
4209 return 0;
4210 }
4211
4212 rc = readpages_get_pages(mapping, page_list, rsize, &tmplist,
4213 &nr_pages, &offset, &bytes);
4214 if (rc) {
4215 add_credits_and_wake_if(server, credits, 0);
4216 break;
4217 }
4218
4219 rdata = cifs_readdata_alloc(nr_pages, cifs_readv_complete);
4220 if (!rdata) {
4221 /* best to give up if we're out of mem */
4222 list_for_each_entry_safe(page, tpage, &tmplist, lru) {
4223 list_del(&page->lru);
4224 lru_cache_add_file(page);
4225 unlock_page(page);
4226 put_page(page);
4227 }
4228 rc = -ENOMEM;
4229 add_credits_and_wake_if(server, credits, 0);
4230 break;
4231 }
4232
4233 rdata->cfile = cifsFileInfo_get(open_file);
4234 rdata->mapping = mapping;
4235 rdata->offset = offset;
4236 rdata->bytes = bytes;
4237 rdata->pid = pid;
4238 rdata->pagesz = PAGE_SIZE;
4239 rdata->tailsz = PAGE_SIZE;
4240 rdata->read_into_pages = cifs_readpages_read_into_pages;
4241 rdata->copy_into_pages = cifs_readpages_copy_into_pages;
4242 rdata->credits = credits_on_stack;
4243
4244 list_for_each_entry_safe(page, tpage, &tmplist, lru) {
4245 list_del(&page->lru);
4246 rdata->pages[rdata->nr_pages++] = page;
4247 }
4248
4249 rc = adjust_credits(server, &rdata->credits, rdata->bytes);
4250
4251 if (!rc) {
4252 if (rdata->cfile->invalidHandle)
4253 rc = -EAGAIN;
4254 else
4255 rc = server->ops->async_readv(rdata);
4256 }
4257
4258 if (rc) {
4259 add_credits_and_wake_if(server, &rdata->credits, 0);
4260 for (i = 0; i < rdata->nr_pages; i++) {
4261 page = rdata->pages[i];
4262 lru_cache_add_file(page);
4263 unlock_page(page);
4264 put_page(page);
4265 }
4266 /* Fallback to the readpage in error/reconnect cases */
4267 kref_put(&rdata->refcount, cifs_readdata_release);
4268 break;
4269 }
4270
4271 kref_put(&rdata->refcount, cifs_readdata_release);
4272 }
4273
4274 /* Any pages that have been shown to fscache but didn't get added to
4275 * the pagecache must be uncached before they get returned to the
4276 * allocator.
4277 */
4278 cifs_fscache_readpages_cancel(mapping->host, page_list);
4279 free_xid(xid);
4280 return rc;
4281 }
4282
4283 /*
4284 * cifs_readpage_worker must be called with the page pinned
4285 */
4286 static int cifs_readpage_worker(struct file *file, struct page *page,
4287 loff_t *poffset)
4288 {
4289 char *read_data;
4290 int rc;
4291
4292 /* Is the page cached? */
4293 rc = cifs_readpage_from_fscache(file_inode(file), page);
4294 if (rc == 0)
4295 goto read_complete;
4296
4297 read_data = kmap(page);
4298 /* for reads over a certain size could initiate async read ahead */
4299
4300 rc = cifs_read(file, read_data, PAGE_SIZE, poffset);
4301
4302 if (rc < 0)
4303 goto io_error;
4304 else
4305 cifs_dbg(FYI, "Bytes read %d\n", rc);
4306
4307 /* we do not want atime to be less than mtime, it broke some apps */
4308 file_inode(file)->i_atime = current_time(file_inode(file));
4309 if (timespec64_compare(&(file_inode(file)->i_atime), &(file_inode(file)->i_mtime)))
4310 file_inode(file)->i_atime = file_inode(file)->i_mtime;
4311 else
4312 file_inode(file)->i_atime = current_time(file_inode(file));
4313
4314 if (PAGE_SIZE > rc)
4315 memset(read_data + rc, 0, PAGE_SIZE - rc);
4316
4317 flush_dcache_page(page);
4318 SetPageUptodate(page);
4319
4320 /* send this page to the cache */
4321 cifs_readpage_to_fscache(file_inode(file), page);
4322
4323 rc = 0;
4324
4325 io_error:
4326 kunmap(page);
4327 unlock_page(page);
4328
4329 read_complete:
4330 return rc;
4331 }
4332
4333 static int cifs_readpage(struct file *file, struct page *page)
4334 {
4335 loff_t offset = (loff_t)page->index << PAGE_SHIFT;
4336 int rc = -EACCES;
4337 unsigned int xid;
4338
4339 xid = get_xid();
4340
4341 if (file->private_data == NULL) {
4342 rc = -EBADF;
4343 free_xid(xid);
4344 return rc;
4345 }
4346
4347 cifs_dbg(FYI, "readpage %p at offset %d 0x%x\n",
4348 page, (int)offset, (int)offset);
4349
4350 rc = cifs_readpage_worker(file, page, &offset);
4351
4352 free_xid(xid);
4353 return rc;
4354 }
4355
4356 static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
4357 {
4358 struct cifsFileInfo *open_file;
4359 struct cifs_tcon *tcon =
4360 cifs_sb_master_tcon(CIFS_SB(cifs_inode->vfs_inode.i_sb));
4361
4362 spin_lock(&tcon->open_file_lock);
4363 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
4364 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
4365 spin_unlock(&tcon->open_file_lock);
4366 return 1;
4367 }
4368 }
4369 spin_unlock(&tcon->open_file_lock);
4370 return 0;
4371 }
4372
4373 /* We do not want to update the file size from server for inodes
4374 open for write - to avoid races with writepage extending
4375 the file - in the future we could consider allowing
4376 refreshing the inode only on increases in the file size
4377 but this is tricky to do without racing with writebehind
4378 page caching in the current Linux kernel design */
4379 bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file)
4380 {
4381 if (!cifsInode)
4382 return true;
4383
4384 if (is_inode_writable(cifsInode)) {
4385 /* This inode is open for write at least once */
4386 struct cifs_sb_info *cifs_sb;
4387
4388 cifs_sb = CIFS_SB(cifsInode->vfs_inode.i_sb);
4389 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
4390 /* since no page cache to corrupt on directio
4391 we can change size safely */
4392 return true;
4393 }
4394
4395 if (i_size_read(&cifsInode->vfs_inode) < end_of_file)
4396 return true;
4397
4398 return false;
4399 } else
4400 return true;
4401 }
4402
4403 static int cifs_write_begin(struct file *file, struct address_space *mapping,
4404 loff_t pos, unsigned len, unsigned flags,
4405 struct page **pagep, void **fsdata)
4406 {
4407 int oncethru = 0;
4408 pgoff_t index = pos >> PAGE_SHIFT;
4409 loff_t offset = pos & (PAGE_SIZE - 1);
4410 loff_t page_start = pos & PAGE_MASK;
4411 loff_t i_size;
4412 struct page *page;
4413 int rc = 0;
4414
4415 cifs_dbg(FYI, "write_begin from %lld len %d\n", (long long)pos, len);
4416
4417 start:
4418 page = grab_cache_page_write_begin(mapping, index, flags);
4419 if (!page) {
4420 rc = -ENOMEM;
4421 goto out;
4422 }
4423
4424 if (PageUptodate(page))
4425 goto out;
4426
4427 /*
4428 * If we write a full page it will be up to date, no need to read from
4429 * the server. If the write is short, we'll end up doing a sync write
4430 * instead.
4431 */
4432 if (len == PAGE_SIZE)
4433 goto out;
4434
4435 /*
4436 * optimize away the read when we have an oplock, and we're not
4437 * expecting to use any of the data we'd be reading in. That
4438 * is, when the page lies beyond the EOF, or straddles the EOF
4439 * and the write will cover all of the existing data.
4440 */
4441 if (CIFS_CACHE_READ(CIFS_I(mapping->host))) {
4442 i_size = i_size_read(mapping->host);
4443 if (page_start >= i_size ||
4444 (offset == 0 && (pos + len) >= i_size)) {
4445 zero_user_segments(page, 0, offset,
4446 offset + len,
4447 PAGE_SIZE);
4448 /*
4449 * PageChecked means that the parts of the page
4450 * to which we're not writing are considered up
4451 * to date. Once the data is copied to the
4452 * page, it can be set uptodate.
4453 */
4454 SetPageChecked(page);
4455 goto out;
4456 }
4457 }
4458
4459 if ((file->f_flags & O_ACCMODE) != O_WRONLY && !oncethru) {
4460 /*
4461 * might as well read a page, it is fast enough. If we get
4462 * an error, we don't need to return it. cifs_write_end will
4463 * do a sync write instead since PG_uptodate isn't set.
4464 */
4465 cifs_readpage_worker(file, page, &page_start);
4466 put_page(page);
4467 oncethru = 1;
4468 goto start;
4469 } else {
4470 /* we could try using another file handle if there is one -
4471 but how would we lock it to prevent close of that handle
4472 racing with this read? In any case
4473 this will be written out by write_end so is fine */
4474 }
4475 out:
4476 *pagep = page;
4477 return rc;
4478 }
4479
4480 static int cifs_release_page(struct page *page, gfp_t gfp)
4481 {
4482 if (PagePrivate(page))
4483 return 0;
4484
4485 return cifs_fscache_release_page(page, gfp);
4486 }
4487
4488 static void cifs_invalidate_page(struct page *page, unsigned int offset,
4489 unsigned int length)
4490 {
4491 struct cifsInodeInfo *cifsi = CIFS_I(page->mapping->host);
4492
4493 if (offset == 0 && length == PAGE_SIZE)
4494 cifs_fscache_invalidate_page(page, &cifsi->vfs_inode);
4495 }
4496
4497 static int cifs_launder_page(struct page *page)
4498 {
4499 int rc = 0;
4500 loff_t range_start = page_offset(page);
4501 loff_t range_end = range_start + (loff_t)(PAGE_SIZE - 1);
4502 struct writeback_control wbc = {
4503 .sync_mode = WB_SYNC_ALL,
4504 .nr_to_write = 0,
4505 .range_start = range_start,
4506 .range_end = range_end,
4507 };
4508
4509 cifs_dbg(FYI, "Launder page: %p\n", page);
4510
4511 if (clear_page_dirty_for_io(page))
4512 rc = cifs_writepage_locked(page, &wbc);
4513
4514 cifs_fscache_invalidate_page(page, page->mapping->host);
4515 return rc;
4516 }
4517
4518 void cifs_oplock_break(struct work_struct *work)
4519 {
4520 struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
4521 oplock_break);
4522 struct inode *inode = d_inode(cfile->dentry);
4523 struct cifsInodeInfo *cinode = CIFS_I(inode);
4524 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
4525 struct TCP_Server_Info *server = tcon->ses->server;
4526 int rc = 0;
4527
4528 wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS,
4529 TASK_UNINTERRUPTIBLE);
4530
4531 server->ops->downgrade_oplock(server, cinode,
4532 test_bit(CIFS_INODE_DOWNGRADE_OPLOCK_TO_L2, &cinode->flags));
4533
4534 if (!CIFS_CACHE_WRITE(cinode) && CIFS_CACHE_READ(cinode) &&
4535 cifs_has_mand_locks(cinode)) {
4536 cifs_dbg(FYI, "Reset oplock to None for inode=%p due to mand locks\n",
4537 inode);
4538 cinode->oplock = 0;
4539 }
4540
4541 if (inode && S_ISREG(inode->i_mode)) {
4542 if (CIFS_CACHE_READ(cinode))
4543 break_lease(inode, O_RDONLY);
4544 else
4545 break_lease(inode, O_WRONLY);
4546 rc = filemap_fdatawrite(inode->i_mapping);
4547 if (!CIFS_CACHE_READ(cinode)) {
4548 rc = filemap_fdatawait(inode->i_mapping);
4549 mapping_set_error(inode->i_mapping, rc);
4550 cifs_zap_mapping(inode);
4551 }
4552 cifs_dbg(FYI, "Oplock flush inode %p rc %d\n", inode, rc);
4553 }
4554
4555 rc = cifs_push_locks(cfile);
4556 if (rc)
4557 cifs_dbg(VFS, "Push locks rc = %d\n", rc);
4558
4559 /*
4560 * releasing stale oplock after recent reconnect of smb session using
4561 * a now incorrect file handle is not a data integrity issue but do
4562 * not bother sending an oplock release if session to server still is
4563 * disconnected since oplock already released by the server
4564 */
4565 if (!cfile->oplock_break_cancelled) {
4566 rc = tcon->ses->server->ops->oplock_response(tcon, &cfile->fid,
4567 cinode);
4568 cifs_dbg(FYI, "Oplock release rc = %d\n", rc);
4569 }
4570 cifs_done_oplock_break(cinode);
4571 }
4572
4573 /*
4574 * The presence of cifs_direct_io() in the address space ops vector
4575 * allowes open() O_DIRECT flags which would have failed otherwise.
4576 *
4577 * In the non-cached mode (mount with cache=none), we shunt off direct read and write requests
4578 * so this method should never be called.
4579 *
4580 * Direct IO is not yet supported in the cached mode.
4581 */
4582 static ssize_t
4583 cifs_direct_io(struct kiocb *iocb, struct iov_iter *iter)
4584 {
4585 /*
4586 * FIXME
4587 * Eventually need to support direct IO for non forcedirectio mounts
4588 */
4589 return -EINVAL;
4590 }
4591
4592
4593 const struct address_space_operations cifs_addr_ops = {
4594 .readpage = cifs_readpage,
4595 .readpages = cifs_readpages,
4596 .writepage = cifs_writepage,
4597 .writepages = cifs_writepages,
4598 .write_begin = cifs_write_begin,
4599 .write_end = cifs_write_end,
4600 .set_page_dirty = __set_page_dirty_nobuffers,
4601 .releasepage = cifs_release_page,
4602 .direct_IO = cifs_direct_io,
4603 .invalidatepage = cifs_invalidate_page,
4604 .launder_page = cifs_launder_page,
4605 };
4606
4607 /*
4608 * cifs_readpages requires the server to support a buffer large enough to
4609 * contain the header plus one complete page of data. Otherwise, we need
4610 * to leave cifs_readpages out of the address space operations.
4611 */
4612 const struct address_space_operations cifs_addr_ops_smallbuf = {
4613 .readpage = cifs_readpage,
4614 .writepage = cifs_writepage,
4615 .writepages = cifs_writepages,
4616 .write_begin = cifs_write_begin,
4617 .write_end = cifs_write_end,
4618 .set_page_dirty = __set_page_dirty_nobuffers,
4619 .releasepage = cifs_release_page,
4620 .invalidatepage = cifs_invalidate_page,
4621 .launder_page = cifs_launder_page,
4622 };