]> git.proxmox.com Git - mirror_ubuntu-jammy-kernel.git/blob - fs/cifs/file.c
Merge tag 'io_uring-20190323' of git://git.kernel.dk/linux-block
[mirror_ubuntu-jammy-kernel.git] / fs / cifs / file.c
1 /*
2 * fs/cifs/file.c
3 *
4 * vfs operations that deal with files
5 *
6 * Copyright (C) International Business Machines Corp., 2002,2010
7 * Author(s): Steve French (sfrench@us.ibm.com)
8 * Jeremy Allison (jra@samba.org)
9 *
10 * This library is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU Lesser General Public License as published
12 * by the Free Software Foundation; either version 2.1 of the License, or
13 * (at your option) any later version.
14 *
15 * This library is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
18 * the GNU Lesser General Public License for more details.
19 *
20 * You should have received a copy of the GNU Lesser General Public License
21 * along with this library; if not, write to the Free Software
22 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 */
24 #include <linux/fs.h>
25 #include <linux/backing-dev.h>
26 #include <linux/stat.h>
27 #include <linux/fcntl.h>
28 #include <linux/pagemap.h>
29 #include <linux/pagevec.h>
30 #include <linux/writeback.h>
31 #include <linux/task_io_accounting_ops.h>
32 #include <linux/delay.h>
33 #include <linux/mount.h>
34 #include <linux/slab.h>
35 #include <linux/swap.h>
36 #include <linux/mm.h>
37 #include <asm/div64.h>
38 #include "cifsfs.h"
39 #include "cifspdu.h"
40 #include "cifsglob.h"
41 #include "cifsproto.h"
42 #include "cifs_unicode.h"
43 #include "cifs_debug.h"
44 #include "cifs_fs_sb.h"
45 #include "fscache.h"
46 #include "smbdirect.h"
47
48 static inline int cifs_convert_flags(unsigned int flags)
49 {
50 if ((flags & O_ACCMODE) == O_RDONLY)
51 return GENERIC_READ;
52 else if ((flags & O_ACCMODE) == O_WRONLY)
53 return GENERIC_WRITE;
54 else if ((flags & O_ACCMODE) == O_RDWR) {
55 /* GENERIC_ALL is too much permission to request
56 can cause unnecessary access denied on create */
57 /* return GENERIC_ALL; */
58 return (GENERIC_READ | GENERIC_WRITE);
59 }
60
61 return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
62 FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
63 FILE_READ_DATA);
64 }
65
66 static u32 cifs_posix_convert_flags(unsigned int flags)
67 {
68 u32 posix_flags = 0;
69
70 if ((flags & O_ACCMODE) == O_RDONLY)
71 posix_flags = SMB_O_RDONLY;
72 else if ((flags & O_ACCMODE) == O_WRONLY)
73 posix_flags = SMB_O_WRONLY;
74 else if ((flags & O_ACCMODE) == O_RDWR)
75 posix_flags = SMB_O_RDWR;
76
77 if (flags & O_CREAT) {
78 posix_flags |= SMB_O_CREAT;
79 if (flags & O_EXCL)
80 posix_flags |= SMB_O_EXCL;
81 } else if (flags & O_EXCL)
82 cifs_dbg(FYI, "Application %s pid %d has incorrectly set O_EXCL flag but not O_CREAT on file open. Ignoring O_EXCL\n",
83 current->comm, current->tgid);
84
85 if (flags & O_TRUNC)
86 posix_flags |= SMB_O_TRUNC;
87 /* be safe and imply O_SYNC for O_DSYNC */
88 if (flags & O_DSYNC)
89 posix_flags |= SMB_O_SYNC;
90 if (flags & O_DIRECTORY)
91 posix_flags |= SMB_O_DIRECTORY;
92 if (flags & O_NOFOLLOW)
93 posix_flags |= SMB_O_NOFOLLOW;
94 if (flags & O_DIRECT)
95 posix_flags |= SMB_O_DIRECT;
96
97 return posix_flags;
98 }
99
100 static inline int cifs_get_disposition(unsigned int flags)
101 {
102 if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
103 return FILE_CREATE;
104 else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
105 return FILE_OVERWRITE_IF;
106 else if ((flags & O_CREAT) == O_CREAT)
107 return FILE_OPEN_IF;
108 else if ((flags & O_TRUNC) == O_TRUNC)
109 return FILE_OVERWRITE;
110 else
111 return FILE_OPEN;
112 }
113
114 int cifs_posix_open(char *full_path, struct inode **pinode,
115 struct super_block *sb, int mode, unsigned int f_flags,
116 __u32 *poplock, __u16 *pnetfid, unsigned int xid)
117 {
118 int rc;
119 FILE_UNIX_BASIC_INFO *presp_data;
120 __u32 posix_flags = 0;
121 struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
122 struct cifs_fattr fattr;
123 struct tcon_link *tlink;
124 struct cifs_tcon *tcon;
125
126 cifs_dbg(FYI, "posix open %s\n", full_path);
127
128 presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL);
129 if (presp_data == NULL)
130 return -ENOMEM;
131
132 tlink = cifs_sb_tlink(cifs_sb);
133 if (IS_ERR(tlink)) {
134 rc = PTR_ERR(tlink);
135 goto posix_open_ret;
136 }
137
138 tcon = tlink_tcon(tlink);
139 mode &= ~current_umask();
140
141 posix_flags = cifs_posix_convert_flags(f_flags);
142 rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data,
143 poplock, full_path, cifs_sb->local_nls,
144 cifs_remap(cifs_sb));
145 cifs_put_tlink(tlink);
146
147 if (rc)
148 goto posix_open_ret;
149
150 if (presp_data->Type == cpu_to_le32(-1))
151 goto posix_open_ret; /* open ok, caller does qpathinfo */
152
153 if (!pinode)
154 goto posix_open_ret; /* caller does not need info */
155
156 cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb);
157
158 /* get new inode and set it up */
159 if (*pinode == NULL) {
160 cifs_fill_uniqueid(sb, &fattr);
161 *pinode = cifs_iget(sb, &fattr);
162 if (!*pinode) {
163 rc = -ENOMEM;
164 goto posix_open_ret;
165 }
166 } else {
167 cifs_fattr_to_inode(*pinode, &fattr);
168 }
169
170 posix_open_ret:
171 kfree(presp_data);
172 return rc;
173 }
174
175 static int
176 cifs_nt_open(char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
177 struct cifs_tcon *tcon, unsigned int f_flags, __u32 *oplock,
178 struct cifs_fid *fid, unsigned int xid)
179 {
180 int rc;
181 int desired_access;
182 int disposition;
183 int create_options = CREATE_NOT_DIR;
184 FILE_ALL_INFO *buf;
185 struct TCP_Server_Info *server = tcon->ses->server;
186 struct cifs_open_parms oparms;
187
188 if (!server->ops->open)
189 return -ENOSYS;
190
191 desired_access = cifs_convert_flags(f_flags);
192
193 /*********************************************************************
194 * open flag mapping table:
195 *
196 * POSIX Flag CIFS Disposition
197 * ---------- ----------------
198 * O_CREAT FILE_OPEN_IF
199 * O_CREAT | O_EXCL FILE_CREATE
200 * O_CREAT | O_TRUNC FILE_OVERWRITE_IF
201 * O_TRUNC FILE_OVERWRITE
202 * none of the above FILE_OPEN
203 *
204 * Note that there is not a direct match between disposition
205 * FILE_SUPERSEDE (ie create whether or not file exists although
206 * O_CREAT | O_TRUNC is similar but truncates the existing
207 * file rather than creating a new file as FILE_SUPERSEDE does
208 * (which uses the attributes / metadata passed in on open call)
209 *?
210 *? O_SYNC is a reasonable match to CIFS writethrough flag
211 *? and the read write flags match reasonably. O_LARGEFILE
212 *? is irrelevant because largefile support is always used
213 *? by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
214 * O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
215 *********************************************************************/
216
217 disposition = cifs_get_disposition(f_flags);
218
219 /* BB pass O_SYNC flag through on file attributes .. BB */
220
221 buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
222 if (!buf)
223 return -ENOMEM;
224
225 if (backup_cred(cifs_sb))
226 create_options |= CREATE_OPEN_BACKUP_INTENT;
227
228 /* O_SYNC also has bit for O_DSYNC so following check picks up either */
229 if (f_flags & O_SYNC)
230 create_options |= CREATE_WRITE_THROUGH;
231
232 if (f_flags & O_DIRECT)
233 create_options |= CREATE_NO_BUFFER;
234
235 oparms.tcon = tcon;
236 oparms.cifs_sb = cifs_sb;
237 oparms.desired_access = desired_access;
238 oparms.create_options = create_options;
239 oparms.disposition = disposition;
240 oparms.path = full_path;
241 oparms.fid = fid;
242 oparms.reconnect = false;
243
244 rc = server->ops->open(xid, &oparms, oplock, buf);
245
246 if (rc)
247 goto out;
248
249 if (tcon->unix_ext)
250 rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
251 xid);
252 else
253 rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
254 xid, fid);
255
256 out:
257 kfree(buf);
258 return rc;
259 }
260
261 static bool
262 cifs_has_mand_locks(struct cifsInodeInfo *cinode)
263 {
264 struct cifs_fid_locks *cur;
265 bool has_locks = false;
266
267 down_read(&cinode->lock_sem);
268 list_for_each_entry(cur, &cinode->llist, llist) {
269 if (!list_empty(&cur->locks)) {
270 has_locks = true;
271 break;
272 }
273 }
274 up_read(&cinode->lock_sem);
275 return has_locks;
276 }
277
278 struct cifsFileInfo *
279 cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
280 struct tcon_link *tlink, __u32 oplock)
281 {
282 struct dentry *dentry = file_dentry(file);
283 struct inode *inode = d_inode(dentry);
284 struct cifsInodeInfo *cinode = CIFS_I(inode);
285 struct cifsFileInfo *cfile;
286 struct cifs_fid_locks *fdlocks;
287 struct cifs_tcon *tcon = tlink_tcon(tlink);
288 struct TCP_Server_Info *server = tcon->ses->server;
289
290 cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
291 if (cfile == NULL)
292 return cfile;
293
294 fdlocks = kzalloc(sizeof(struct cifs_fid_locks), GFP_KERNEL);
295 if (!fdlocks) {
296 kfree(cfile);
297 return NULL;
298 }
299
300 INIT_LIST_HEAD(&fdlocks->locks);
301 fdlocks->cfile = cfile;
302 cfile->llist = fdlocks;
303 down_write(&cinode->lock_sem);
304 list_add(&fdlocks->llist, &cinode->llist);
305 up_write(&cinode->lock_sem);
306
307 cfile->count = 1;
308 cfile->pid = current->tgid;
309 cfile->uid = current_fsuid();
310 cfile->dentry = dget(dentry);
311 cfile->f_flags = file->f_flags;
312 cfile->invalidHandle = false;
313 cfile->tlink = cifs_get_tlink(tlink);
314 INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
315 mutex_init(&cfile->fh_mutex);
316 spin_lock_init(&cfile->file_info_lock);
317
318 cifs_sb_active(inode->i_sb);
319
320 /*
321 * If the server returned a read oplock and we have mandatory brlocks,
322 * set oplock level to None.
323 */
324 if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
325 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
326 oplock = 0;
327 }
328
329 spin_lock(&tcon->open_file_lock);
330 if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE && oplock)
331 oplock = fid->pending_open->oplock;
332 list_del(&fid->pending_open->olist);
333
334 fid->purge_cache = false;
335 server->ops->set_fid(cfile, fid, oplock);
336
337 list_add(&cfile->tlist, &tcon->openFileList);
338 atomic_inc(&tcon->num_local_opens);
339
340 /* if readable file instance put first in list*/
341 if (file->f_mode & FMODE_READ)
342 list_add(&cfile->flist, &cinode->openFileList);
343 else
344 list_add_tail(&cfile->flist, &cinode->openFileList);
345 spin_unlock(&tcon->open_file_lock);
346
347 if (fid->purge_cache)
348 cifs_zap_mapping(inode);
349
350 file->private_data = cfile;
351 return cfile;
352 }
353
354 struct cifsFileInfo *
355 cifsFileInfo_get(struct cifsFileInfo *cifs_file)
356 {
357 spin_lock(&cifs_file->file_info_lock);
358 cifsFileInfo_get_locked(cifs_file);
359 spin_unlock(&cifs_file->file_info_lock);
360 return cifs_file;
361 }
362
363 /*
364 * Release a reference on the file private data. This may involve closing
365 * the filehandle out on the server. Must be called without holding
366 * tcon->open_file_lock and cifs_file->file_info_lock.
367 */
368 void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
369 {
370 struct inode *inode = d_inode(cifs_file->dentry);
371 struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
372 struct TCP_Server_Info *server = tcon->ses->server;
373 struct cifsInodeInfo *cifsi = CIFS_I(inode);
374 struct super_block *sb = inode->i_sb;
375 struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
376 struct cifsLockInfo *li, *tmp;
377 struct cifs_fid fid;
378 struct cifs_pending_open open;
379 bool oplock_break_cancelled;
380
381 spin_lock(&tcon->open_file_lock);
382
383 spin_lock(&cifs_file->file_info_lock);
384 if (--cifs_file->count > 0) {
385 spin_unlock(&cifs_file->file_info_lock);
386 spin_unlock(&tcon->open_file_lock);
387 return;
388 }
389 spin_unlock(&cifs_file->file_info_lock);
390
391 if (server->ops->get_lease_key)
392 server->ops->get_lease_key(inode, &fid);
393
394 /* store open in pending opens to make sure we don't miss lease break */
395 cifs_add_pending_open_locked(&fid, cifs_file->tlink, &open);
396
397 /* remove it from the lists */
398 list_del(&cifs_file->flist);
399 list_del(&cifs_file->tlist);
400 atomic_dec(&tcon->num_local_opens);
401
402 if (list_empty(&cifsi->openFileList)) {
403 cifs_dbg(FYI, "closing last open instance for inode %p\n",
404 d_inode(cifs_file->dentry));
405 /*
406 * In strict cache mode we need invalidate mapping on the last
407 * close because it may cause a error when we open this file
408 * again and get at least level II oplock.
409 */
410 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)
411 set_bit(CIFS_INO_INVALID_MAPPING, &cifsi->flags);
412 cifs_set_oplock_level(cifsi, 0);
413 }
414
415 spin_unlock(&tcon->open_file_lock);
416
417 oplock_break_cancelled = cancel_work_sync(&cifs_file->oplock_break);
418
419 if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
420 struct TCP_Server_Info *server = tcon->ses->server;
421 unsigned int xid;
422
423 xid = get_xid();
424 if (server->ops->close)
425 server->ops->close(xid, tcon, &cifs_file->fid);
426 _free_xid(xid);
427 }
428
429 if (oplock_break_cancelled)
430 cifs_done_oplock_break(cifsi);
431
432 cifs_del_pending_open(&open);
433
434 /*
435 * Delete any outstanding lock records. We'll lose them when the file
436 * is closed anyway.
437 */
438 down_write(&cifsi->lock_sem);
439 list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) {
440 list_del(&li->llist);
441 cifs_del_lock_waiters(li);
442 kfree(li);
443 }
444 list_del(&cifs_file->llist->llist);
445 kfree(cifs_file->llist);
446 up_write(&cifsi->lock_sem);
447
448 cifs_put_tlink(cifs_file->tlink);
449 dput(cifs_file->dentry);
450 cifs_sb_deactive(sb);
451 kfree(cifs_file);
452 }
453
454 int cifs_open(struct inode *inode, struct file *file)
455
456 {
457 int rc = -EACCES;
458 unsigned int xid;
459 __u32 oplock;
460 struct cifs_sb_info *cifs_sb;
461 struct TCP_Server_Info *server;
462 struct cifs_tcon *tcon;
463 struct tcon_link *tlink;
464 struct cifsFileInfo *cfile = NULL;
465 char *full_path = NULL;
466 bool posix_open_ok = false;
467 struct cifs_fid fid;
468 struct cifs_pending_open open;
469
470 xid = get_xid();
471
472 cifs_sb = CIFS_SB(inode->i_sb);
473 tlink = cifs_sb_tlink(cifs_sb);
474 if (IS_ERR(tlink)) {
475 free_xid(xid);
476 return PTR_ERR(tlink);
477 }
478 tcon = tlink_tcon(tlink);
479 server = tcon->ses->server;
480
481 full_path = build_path_from_dentry(file_dentry(file));
482 if (full_path == NULL) {
483 rc = -ENOMEM;
484 goto out;
485 }
486
487 cifs_dbg(FYI, "inode = 0x%p file flags are 0x%x for %s\n",
488 inode, file->f_flags, full_path);
489
490 if (file->f_flags & O_DIRECT &&
491 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) {
492 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
493 file->f_op = &cifs_file_direct_nobrl_ops;
494 else
495 file->f_op = &cifs_file_direct_ops;
496 }
497
498 if (server->oplocks)
499 oplock = REQ_OPLOCK;
500 else
501 oplock = 0;
502
503 if (!tcon->broken_posix_open && tcon->unix_ext &&
504 cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP &
505 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
506 /* can not refresh inode info since size could be stale */
507 rc = cifs_posix_open(full_path, &inode, inode->i_sb,
508 cifs_sb->mnt_file_mode /* ignored */,
509 file->f_flags, &oplock, &fid.netfid, xid);
510 if (rc == 0) {
511 cifs_dbg(FYI, "posix open succeeded\n");
512 posix_open_ok = true;
513 } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
514 if (tcon->ses->serverNOS)
515 cifs_dbg(VFS, "server %s of type %s returned unexpected error on SMB posix open, disabling posix open support. Check if server update available.\n",
516 tcon->ses->serverName,
517 tcon->ses->serverNOS);
518 tcon->broken_posix_open = true;
519 } else if ((rc != -EIO) && (rc != -EREMOTE) &&
520 (rc != -EOPNOTSUPP)) /* path not found or net err */
521 goto out;
522 /*
523 * Else fallthrough to retry open the old way on network i/o
524 * or DFS errors.
525 */
526 }
527
528 if (server->ops->get_lease_key)
529 server->ops->get_lease_key(inode, &fid);
530
531 cifs_add_pending_open(&fid, tlink, &open);
532
533 if (!posix_open_ok) {
534 if (server->ops->get_lease_key)
535 server->ops->get_lease_key(inode, &fid);
536
537 rc = cifs_nt_open(full_path, inode, cifs_sb, tcon,
538 file->f_flags, &oplock, &fid, xid);
539 if (rc) {
540 cifs_del_pending_open(&open);
541 goto out;
542 }
543 }
544
545 cfile = cifs_new_fileinfo(&fid, file, tlink, oplock);
546 if (cfile == NULL) {
547 if (server->ops->close)
548 server->ops->close(xid, tcon, &fid);
549 cifs_del_pending_open(&open);
550 rc = -ENOMEM;
551 goto out;
552 }
553
554 cifs_fscache_set_inode_cookie(inode, file);
555
556 if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) {
557 /*
558 * Time to set mode which we can not set earlier due to
559 * problems creating new read-only files.
560 */
561 struct cifs_unix_set_info_args args = {
562 .mode = inode->i_mode,
563 .uid = INVALID_UID, /* no change */
564 .gid = INVALID_GID, /* no change */
565 .ctime = NO_CHANGE_64,
566 .atime = NO_CHANGE_64,
567 .mtime = NO_CHANGE_64,
568 .device = 0,
569 };
570 CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid.netfid,
571 cfile->pid);
572 }
573
574 out:
575 kfree(full_path);
576 free_xid(xid);
577 cifs_put_tlink(tlink);
578 return rc;
579 }
580
581 static int cifs_push_posix_locks(struct cifsFileInfo *cfile);
582
583 /*
584 * Try to reacquire byte range locks that were released when session
585 * to server was lost.
586 */
587 static int
588 cifs_relock_file(struct cifsFileInfo *cfile)
589 {
590 struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
591 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
592 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
593 int rc = 0;
594
595 down_read_nested(&cinode->lock_sem, SINGLE_DEPTH_NESTING);
596 if (cinode->can_cache_brlcks) {
597 /* can cache locks - no need to relock */
598 up_read(&cinode->lock_sem);
599 return rc;
600 }
601
602 if (cap_unix(tcon->ses) &&
603 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
604 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
605 rc = cifs_push_posix_locks(cfile);
606 else
607 rc = tcon->ses->server->ops->push_mand_locks(cfile);
608
609 up_read(&cinode->lock_sem);
610 return rc;
611 }
612
613 static int
614 cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
615 {
616 int rc = -EACCES;
617 unsigned int xid;
618 __u32 oplock;
619 struct cifs_sb_info *cifs_sb;
620 struct cifs_tcon *tcon;
621 struct TCP_Server_Info *server;
622 struct cifsInodeInfo *cinode;
623 struct inode *inode;
624 char *full_path = NULL;
625 int desired_access;
626 int disposition = FILE_OPEN;
627 int create_options = CREATE_NOT_DIR;
628 struct cifs_open_parms oparms;
629
630 xid = get_xid();
631 mutex_lock(&cfile->fh_mutex);
632 if (!cfile->invalidHandle) {
633 mutex_unlock(&cfile->fh_mutex);
634 rc = 0;
635 free_xid(xid);
636 return rc;
637 }
638
639 inode = d_inode(cfile->dentry);
640 cifs_sb = CIFS_SB(inode->i_sb);
641 tcon = tlink_tcon(cfile->tlink);
642 server = tcon->ses->server;
643
644 /*
645 * Can not grab rename sem here because various ops, including those
646 * that already have the rename sem can end up causing writepage to get
647 * called and if the server was down that means we end up here, and we
648 * can never tell if the caller already has the rename_sem.
649 */
650 full_path = build_path_from_dentry(cfile->dentry);
651 if (full_path == NULL) {
652 rc = -ENOMEM;
653 mutex_unlock(&cfile->fh_mutex);
654 free_xid(xid);
655 return rc;
656 }
657
658 cifs_dbg(FYI, "inode = 0x%p file flags 0x%x for %s\n",
659 inode, cfile->f_flags, full_path);
660
661 if (tcon->ses->server->oplocks)
662 oplock = REQ_OPLOCK;
663 else
664 oplock = 0;
665
666 if (tcon->unix_ext && cap_unix(tcon->ses) &&
667 (CIFS_UNIX_POSIX_PATH_OPS_CAP &
668 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
669 /*
670 * O_CREAT, O_EXCL and O_TRUNC already had their effect on the
671 * original open. Must mask them off for a reopen.
672 */
673 unsigned int oflags = cfile->f_flags &
674 ~(O_CREAT | O_EXCL | O_TRUNC);
675
676 rc = cifs_posix_open(full_path, NULL, inode->i_sb,
677 cifs_sb->mnt_file_mode /* ignored */,
678 oflags, &oplock, &cfile->fid.netfid, xid);
679 if (rc == 0) {
680 cifs_dbg(FYI, "posix reopen succeeded\n");
681 oparms.reconnect = true;
682 goto reopen_success;
683 }
684 /*
685 * fallthrough to retry open the old way on errors, especially
686 * in the reconnect path it is important to retry hard
687 */
688 }
689
690 desired_access = cifs_convert_flags(cfile->f_flags);
691
692 if (backup_cred(cifs_sb))
693 create_options |= CREATE_OPEN_BACKUP_INTENT;
694
695 if (server->ops->get_lease_key)
696 server->ops->get_lease_key(inode, &cfile->fid);
697
698 oparms.tcon = tcon;
699 oparms.cifs_sb = cifs_sb;
700 oparms.desired_access = desired_access;
701 oparms.create_options = create_options;
702 oparms.disposition = disposition;
703 oparms.path = full_path;
704 oparms.fid = &cfile->fid;
705 oparms.reconnect = true;
706
707 /*
708 * Can not refresh inode by passing in file_info buf to be returned by
709 * ops->open and then calling get_inode_info with returned buf since
710 * file might have write behind data that needs to be flushed and server
711 * version of file size can be stale. If we knew for sure that inode was
712 * not dirty locally we could do this.
713 */
714 rc = server->ops->open(xid, &oparms, &oplock, NULL);
715 if (rc == -ENOENT && oparms.reconnect == false) {
716 /* durable handle timeout is expired - open the file again */
717 rc = server->ops->open(xid, &oparms, &oplock, NULL);
718 /* indicate that we need to relock the file */
719 oparms.reconnect = true;
720 }
721
722 if (rc) {
723 mutex_unlock(&cfile->fh_mutex);
724 cifs_dbg(FYI, "cifs_reopen returned 0x%x\n", rc);
725 cifs_dbg(FYI, "oplock: %d\n", oplock);
726 goto reopen_error_exit;
727 }
728
729 reopen_success:
730 cfile->invalidHandle = false;
731 mutex_unlock(&cfile->fh_mutex);
732 cinode = CIFS_I(inode);
733
734 if (can_flush) {
735 rc = filemap_write_and_wait(inode->i_mapping);
736 if (!is_interrupt_error(rc))
737 mapping_set_error(inode->i_mapping, rc);
738
739 if (tcon->unix_ext)
740 rc = cifs_get_inode_info_unix(&inode, full_path,
741 inode->i_sb, xid);
742 else
743 rc = cifs_get_inode_info(&inode, full_path, NULL,
744 inode->i_sb, xid, NULL);
745 }
746 /*
747 * Else we are writing out data to server already and could deadlock if
748 * we tried to flush data, and since we do not know if we have data that
749 * would invalidate the current end of file on the server we can not go
750 * to the server to get the new inode info.
751 */
752
753 /*
754 * If the server returned a read oplock and we have mandatory brlocks,
755 * set oplock level to None.
756 */
757 if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
758 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
759 oplock = 0;
760 }
761
762 server->ops->set_fid(cfile, &cfile->fid, oplock);
763 if (oparms.reconnect)
764 cifs_relock_file(cfile);
765
766 reopen_error_exit:
767 kfree(full_path);
768 free_xid(xid);
769 return rc;
770 }
771
772 int cifs_close(struct inode *inode, struct file *file)
773 {
774 if (file->private_data != NULL) {
775 cifsFileInfo_put(file->private_data);
776 file->private_data = NULL;
777 }
778
779 /* return code from the ->release op is always ignored */
780 return 0;
781 }
782
783 void
784 cifs_reopen_persistent_handles(struct cifs_tcon *tcon)
785 {
786 struct cifsFileInfo *open_file;
787 struct list_head *tmp;
788 struct list_head *tmp1;
789 struct list_head tmp_list;
790
791 if (!tcon->use_persistent || !tcon->need_reopen_files)
792 return;
793
794 tcon->need_reopen_files = false;
795
796 cifs_dbg(FYI, "Reopen persistent handles");
797 INIT_LIST_HEAD(&tmp_list);
798
799 /* list all files open on tree connection, reopen resilient handles */
800 spin_lock(&tcon->open_file_lock);
801 list_for_each(tmp, &tcon->openFileList) {
802 open_file = list_entry(tmp, struct cifsFileInfo, tlist);
803 if (!open_file->invalidHandle)
804 continue;
805 cifsFileInfo_get(open_file);
806 list_add_tail(&open_file->rlist, &tmp_list);
807 }
808 spin_unlock(&tcon->open_file_lock);
809
810 list_for_each_safe(tmp, tmp1, &tmp_list) {
811 open_file = list_entry(tmp, struct cifsFileInfo, rlist);
812 if (cifs_reopen_file(open_file, false /* do not flush */))
813 tcon->need_reopen_files = true;
814 list_del_init(&open_file->rlist);
815 cifsFileInfo_put(open_file);
816 }
817 }
818
819 int cifs_closedir(struct inode *inode, struct file *file)
820 {
821 int rc = 0;
822 unsigned int xid;
823 struct cifsFileInfo *cfile = file->private_data;
824 struct cifs_tcon *tcon;
825 struct TCP_Server_Info *server;
826 char *buf;
827
828 cifs_dbg(FYI, "Closedir inode = 0x%p\n", inode);
829
830 if (cfile == NULL)
831 return rc;
832
833 xid = get_xid();
834 tcon = tlink_tcon(cfile->tlink);
835 server = tcon->ses->server;
836
837 cifs_dbg(FYI, "Freeing private data in close dir\n");
838 spin_lock(&cfile->file_info_lock);
839 if (server->ops->dir_needs_close(cfile)) {
840 cfile->invalidHandle = true;
841 spin_unlock(&cfile->file_info_lock);
842 if (server->ops->close_dir)
843 rc = server->ops->close_dir(xid, tcon, &cfile->fid);
844 else
845 rc = -ENOSYS;
846 cifs_dbg(FYI, "Closing uncompleted readdir with rc %d\n", rc);
847 /* not much we can do if it fails anyway, ignore rc */
848 rc = 0;
849 } else
850 spin_unlock(&cfile->file_info_lock);
851
852 buf = cfile->srch_inf.ntwrk_buf_start;
853 if (buf) {
854 cifs_dbg(FYI, "closedir free smb buf in srch struct\n");
855 cfile->srch_inf.ntwrk_buf_start = NULL;
856 if (cfile->srch_inf.smallBuf)
857 cifs_small_buf_release(buf);
858 else
859 cifs_buf_release(buf);
860 }
861
862 cifs_put_tlink(cfile->tlink);
863 kfree(file->private_data);
864 file->private_data = NULL;
865 /* BB can we lock the filestruct while this is going on? */
866 free_xid(xid);
867 return rc;
868 }
869
870 static struct cifsLockInfo *
871 cifs_lock_init(__u64 offset, __u64 length, __u8 type, __u16 flags)
872 {
873 struct cifsLockInfo *lock =
874 kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
875 if (!lock)
876 return lock;
877 lock->offset = offset;
878 lock->length = length;
879 lock->type = type;
880 lock->pid = current->tgid;
881 lock->flags = flags;
882 INIT_LIST_HEAD(&lock->blist);
883 init_waitqueue_head(&lock->block_q);
884 return lock;
885 }
886
887 void
888 cifs_del_lock_waiters(struct cifsLockInfo *lock)
889 {
890 struct cifsLockInfo *li, *tmp;
891 list_for_each_entry_safe(li, tmp, &lock->blist, blist) {
892 list_del_init(&li->blist);
893 wake_up(&li->block_q);
894 }
895 }
896
897 #define CIFS_LOCK_OP 0
898 #define CIFS_READ_OP 1
899 #define CIFS_WRITE_OP 2
900
901 /* @rw_check : 0 - no op, 1 - read, 2 - write */
902 static bool
903 cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset,
904 __u64 length, __u8 type, __u16 flags,
905 struct cifsFileInfo *cfile,
906 struct cifsLockInfo **conf_lock, int rw_check)
907 {
908 struct cifsLockInfo *li;
909 struct cifsFileInfo *cur_cfile = fdlocks->cfile;
910 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
911
912 list_for_each_entry(li, &fdlocks->locks, llist) {
913 if (offset + length <= li->offset ||
914 offset >= li->offset + li->length)
915 continue;
916 if (rw_check != CIFS_LOCK_OP && current->tgid == li->pid &&
917 server->ops->compare_fids(cfile, cur_cfile)) {
918 /* shared lock prevents write op through the same fid */
919 if (!(li->type & server->vals->shared_lock_type) ||
920 rw_check != CIFS_WRITE_OP)
921 continue;
922 }
923 if ((type & server->vals->shared_lock_type) &&
924 ((server->ops->compare_fids(cfile, cur_cfile) &&
925 current->tgid == li->pid) || type == li->type))
926 continue;
927 if (rw_check == CIFS_LOCK_OP &&
928 (flags & FL_OFDLCK) && (li->flags & FL_OFDLCK) &&
929 server->ops->compare_fids(cfile, cur_cfile))
930 continue;
931 if (conf_lock)
932 *conf_lock = li;
933 return true;
934 }
935 return false;
936 }
937
938 bool
939 cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
940 __u8 type, __u16 flags,
941 struct cifsLockInfo **conf_lock, int rw_check)
942 {
943 bool rc = false;
944 struct cifs_fid_locks *cur;
945 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
946
947 list_for_each_entry(cur, &cinode->llist, llist) {
948 rc = cifs_find_fid_lock_conflict(cur, offset, length, type,
949 flags, cfile, conf_lock,
950 rw_check);
951 if (rc)
952 break;
953 }
954
955 return rc;
956 }
957
958 /*
959 * Check if there is another lock that prevents us to set the lock (mandatory
960 * style). If such a lock exists, update the flock structure with its
961 * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
962 * or leave it the same if we can't. Returns 0 if we don't need to request to
963 * the server or 1 otherwise.
964 */
965 static int
966 cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
967 __u8 type, struct file_lock *flock)
968 {
969 int rc = 0;
970 struct cifsLockInfo *conf_lock;
971 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
972 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
973 bool exist;
974
975 down_read(&cinode->lock_sem);
976
977 exist = cifs_find_lock_conflict(cfile, offset, length, type,
978 flock->fl_flags, &conf_lock,
979 CIFS_LOCK_OP);
980 if (exist) {
981 flock->fl_start = conf_lock->offset;
982 flock->fl_end = conf_lock->offset + conf_lock->length - 1;
983 flock->fl_pid = conf_lock->pid;
984 if (conf_lock->type & server->vals->shared_lock_type)
985 flock->fl_type = F_RDLCK;
986 else
987 flock->fl_type = F_WRLCK;
988 } else if (!cinode->can_cache_brlcks)
989 rc = 1;
990 else
991 flock->fl_type = F_UNLCK;
992
993 up_read(&cinode->lock_sem);
994 return rc;
995 }
996
997 static void
998 cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock)
999 {
1000 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1001 down_write(&cinode->lock_sem);
1002 list_add_tail(&lock->llist, &cfile->llist->locks);
1003 up_write(&cinode->lock_sem);
1004 }
1005
1006 /*
1007 * Set the byte-range lock (mandatory style). Returns:
1008 * 1) 0, if we set the lock and don't need to request to the server;
1009 * 2) 1, if no locks prevent us but we need to request to the server;
1010 * 3) -EACCES, if there is a lock that prevents us and wait is false.
1011 */
1012 static int
1013 cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock,
1014 bool wait)
1015 {
1016 struct cifsLockInfo *conf_lock;
1017 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1018 bool exist;
1019 int rc = 0;
1020
1021 try_again:
1022 exist = false;
1023 down_write(&cinode->lock_sem);
1024
1025 exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length,
1026 lock->type, lock->flags, &conf_lock,
1027 CIFS_LOCK_OP);
1028 if (!exist && cinode->can_cache_brlcks) {
1029 list_add_tail(&lock->llist, &cfile->llist->locks);
1030 up_write(&cinode->lock_sem);
1031 return rc;
1032 }
1033
1034 if (!exist)
1035 rc = 1;
1036 else if (!wait)
1037 rc = -EACCES;
1038 else {
1039 list_add_tail(&lock->blist, &conf_lock->blist);
1040 up_write(&cinode->lock_sem);
1041 rc = wait_event_interruptible(lock->block_q,
1042 (lock->blist.prev == &lock->blist) &&
1043 (lock->blist.next == &lock->blist));
1044 if (!rc)
1045 goto try_again;
1046 down_write(&cinode->lock_sem);
1047 list_del_init(&lock->blist);
1048 }
1049
1050 up_write(&cinode->lock_sem);
1051 return rc;
1052 }
1053
1054 /*
1055 * Check if there is another lock that prevents us to set the lock (posix
1056 * style). If such a lock exists, update the flock structure with its
1057 * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1058 * or leave it the same if we can't. Returns 0 if we don't need to request to
1059 * the server or 1 otherwise.
1060 */
1061 static int
1062 cifs_posix_lock_test(struct file *file, struct file_lock *flock)
1063 {
1064 int rc = 0;
1065 struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1066 unsigned char saved_type = flock->fl_type;
1067
1068 if ((flock->fl_flags & FL_POSIX) == 0)
1069 return 1;
1070
1071 down_read(&cinode->lock_sem);
1072 posix_test_lock(file, flock);
1073
1074 if (flock->fl_type == F_UNLCK && !cinode->can_cache_brlcks) {
1075 flock->fl_type = saved_type;
1076 rc = 1;
1077 }
1078
1079 up_read(&cinode->lock_sem);
1080 return rc;
1081 }
1082
1083 /*
1084 * Set the byte-range lock (posix style). Returns:
1085 * 1) 0, if we set the lock and don't need to request to the server;
1086 * 2) 1, if we need to request to the server;
1087 * 3) <0, if the error occurs while setting the lock.
1088 */
1089 static int
1090 cifs_posix_lock_set(struct file *file, struct file_lock *flock)
1091 {
1092 struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1093 int rc = 1;
1094
1095 if ((flock->fl_flags & FL_POSIX) == 0)
1096 return rc;
1097
1098 try_again:
1099 down_write(&cinode->lock_sem);
1100 if (!cinode->can_cache_brlcks) {
1101 up_write(&cinode->lock_sem);
1102 return rc;
1103 }
1104
1105 rc = posix_lock_file(file, flock, NULL);
1106 up_write(&cinode->lock_sem);
1107 if (rc == FILE_LOCK_DEFERRED) {
1108 rc = wait_event_interruptible(flock->fl_wait, !flock->fl_blocker);
1109 if (!rc)
1110 goto try_again;
1111 locks_delete_block(flock);
1112 }
1113 return rc;
1114 }
1115
1116 int
1117 cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
1118 {
1119 unsigned int xid;
1120 int rc = 0, stored_rc;
1121 struct cifsLockInfo *li, *tmp;
1122 struct cifs_tcon *tcon;
1123 unsigned int num, max_num, max_buf;
1124 LOCKING_ANDX_RANGE *buf, *cur;
1125 static const int types[] = {
1126 LOCKING_ANDX_LARGE_FILES,
1127 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1128 };
1129 int i;
1130
1131 xid = get_xid();
1132 tcon = tlink_tcon(cfile->tlink);
1133
1134 /*
1135 * Accessing maxBuf is racy with cifs_reconnect - need to store value
1136 * and check it before using.
1137 */
1138 max_buf = tcon->ses->server->maxBuf;
1139 if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE))) {
1140 free_xid(xid);
1141 return -EINVAL;
1142 }
1143
1144 BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1145 PAGE_SIZE);
1146 max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1147 PAGE_SIZE);
1148 max_num = (max_buf - sizeof(struct smb_hdr)) /
1149 sizeof(LOCKING_ANDX_RANGE);
1150 buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1151 if (!buf) {
1152 free_xid(xid);
1153 return -ENOMEM;
1154 }
1155
1156 for (i = 0; i < 2; i++) {
1157 cur = buf;
1158 num = 0;
1159 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1160 if (li->type != types[i])
1161 continue;
1162 cur->Pid = cpu_to_le16(li->pid);
1163 cur->LengthLow = cpu_to_le32((u32)li->length);
1164 cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1165 cur->OffsetLow = cpu_to_le32((u32)li->offset);
1166 cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1167 if (++num == max_num) {
1168 stored_rc = cifs_lockv(xid, tcon,
1169 cfile->fid.netfid,
1170 (__u8)li->type, 0, num,
1171 buf);
1172 if (stored_rc)
1173 rc = stored_rc;
1174 cur = buf;
1175 num = 0;
1176 } else
1177 cur++;
1178 }
1179
1180 if (num) {
1181 stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1182 (__u8)types[i], 0, num, buf);
1183 if (stored_rc)
1184 rc = stored_rc;
1185 }
1186 }
1187
1188 kfree(buf);
1189 free_xid(xid);
1190 return rc;
1191 }
1192
1193 static __u32
1194 hash_lockowner(fl_owner_t owner)
1195 {
1196 return cifs_lock_secret ^ hash32_ptr((const void *)owner);
1197 }
1198
1199 struct lock_to_push {
1200 struct list_head llist;
1201 __u64 offset;
1202 __u64 length;
1203 __u32 pid;
1204 __u16 netfid;
1205 __u8 type;
1206 };
1207
1208 static int
1209 cifs_push_posix_locks(struct cifsFileInfo *cfile)
1210 {
1211 struct inode *inode = d_inode(cfile->dentry);
1212 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1213 struct file_lock *flock;
1214 struct file_lock_context *flctx = inode->i_flctx;
1215 unsigned int count = 0, i;
1216 int rc = 0, xid, type;
1217 struct list_head locks_to_send, *el;
1218 struct lock_to_push *lck, *tmp;
1219 __u64 length;
1220
1221 xid = get_xid();
1222
1223 if (!flctx)
1224 goto out;
1225
1226 spin_lock(&flctx->flc_lock);
1227 list_for_each(el, &flctx->flc_posix) {
1228 count++;
1229 }
1230 spin_unlock(&flctx->flc_lock);
1231
1232 INIT_LIST_HEAD(&locks_to_send);
1233
1234 /*
1235 * Allocating count locks is enough because no FL_POSIX locks can be
1236 * added to the list while we are holding cinode->lock_sem that
1237 * protects locking operations of this inode.
1238 */
1239 for (i = 0; i < count; i++) {
1240 lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL);
1241 if (!lck) {
1242 rc = -ENOMEM;
1243 goto err_out;
1244 }
1245 list_add_tail(&lck->llist, &locks_to_send);
1246 }
1247
1248 el = locks_to_send.next;
1249 spin_lock(&flctx->flc_lock);
1250 list_for_each_entry(flock, &flctx->flc_posix, fl_list) {
1251 if (el == &locks_to_send) {
1252 /*
1253 * The list ended. We don't have enough allocated
1254 * structures - something is really wrong.
1255 */
1256 cifs_dbg(VFS, "Can't push all brlocks!\n");
1257 break;
1258 }
1259 length = 1 + flock->fl_end - flock->fl_start;
1260 if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK)
1261 type = CIFS_RDLCK;
1262 else
1263 type = CIFS_WRLCK;
1264 lck = list_entry(el, struct lock_to_push, llist);
1265 lck->pid = hash_lockowner(flock->fl_owner);
1266 lck->netfid = cfile->fid.netfid;
1267 lck->length = length;
1268 lck->type = type;
1269 lck->offset = flock->fl_start;
1270 }
1271 spin_unlock(&flctx->flc_lock);
1272
1273 list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1274 int stored_rc;
1275
1276 stored_rc = CIFSSMBPosixLock(xid, tcon, lck->netfid, lck->pid,
1277 lck->offset, lck->length, NULL,
1278 lck->type, 0);
1279 if (stored_rc)
1280 rc = stored_rc;
1281 list_del(&lck->llist);
1282 kfree(lck);
1283 }
1284
1285 out:
1286 free_xid(xid);
1287 return rc;
1288 err_out:
1289 list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1290 list_del(&lck->llist);
1291 kfree(lck);
1292 }
1293 goto out;
1294 }
1295
1296 static int
1297 cifs_push_locks(struct cifsFileInfo *cfile)
1298 {
1299 struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
1300 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1301 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1302 int rc = 0;
1303
1304 /* we are going to update can_cache_brlcks here - need a write access */
1305 down_write(&cinode->lock_sem);
1306 if (!cinode->can_cache_brlcks) {
1307 up_write(&cinode->lock_sem);
1308 return rc;
1309 }
1310
1311 if (cap_unix(tcon->ses) &&
1312 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1313 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1314 rc = cifs_push_posix_locks(cfile);
1315 else
1316 rc = tcon->ses->server->ops->push_mand_locks(cfile);
1317
1318 cinode->can_cache_brlcks = false;
1319 up_write(&cinode->lock_sem);
1320 return rc;
1321 }
1322
1323 static void
1324 cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock,
1325 bool *wait_flag, struct TCP_Server_Info *server)
1326 {
1327 if (flock->fl_flags & FL_POSIX)
1328 cifs_dbg(FYI, "Posix\n");
1329 if (flock->fl_flags & FL_FLOCK)
1330 cifs_dbg(FYI, "Flock\n");
1331 if (flock->fl_flags & FL_SLEEP) {
1332 cifs_dbg(FYI, "Blocking lock\n");
1333 *wait_flag = true;
1334 }
1335 if (flock->fl_flags & FL_ACCESS)
1336 cifs_dbg(FYI, "Process suspended by mandatory locking - not implemented yet\n");
1337 if (flock->fl_flags & FL_LEASE)
1338 cifs_dbg(FYI, "Lease on file - not implemented yet\n");
1339 if (flock->fl_flags &
1340 (~(FL_POSIX | FL_FLOCK | FL_SLEEP |
1341 FL_ACCESS | FL_LEASE | FL_CLOSE | FL_OFDLCK)))
1342 cifs_dbg(FYI, "Unknown lock flags 0x%x\n", flock->fl_flags);
1343
1344 *type = server->vals->large_lock_type;
1345 if (flock->fl_type == F_WRLCK) {
1346 cifs_dbg(FYI, "F_WRLCK\n");
1347 *type |= server->vals->exclusive_lock_type;
1348 *lock = 1;
1349 } else if (flock->fl_type == F_UNLCK) {
1350 cifs_dbg(FYI, "F_UNLCK\n");
1351 *type |= server->vals->unlock_lock_type;
1352 *unlock = 1;
1353 /* Check if unlock includes more than one lock range */
1354 } else if (flock->fl_type == F_RDLCK) {
1355 cifs_dbg(FYI, "F_RDLCK\n");
1356 *type |= server->vals->shared_lock_type;
1357 *lock = 1;
1358 } else if (flock->fl_type == F_EXLCK) {
1359 cifs_dbg(FYI, "F_EXLCK\n");
1360 *type |= server->vals->exclusive_lock_type;
1361 *lock = 1;
1362 } else if (flock->fl_type == F_SHLCK) {
1363 cifs_dbg(FYI, "F_SHLCK\n");
1364 *type |= server->vals->shared_lock_type;
1365 *lock = 1;
1366 } else
1367 cifs_dbg(FYI, "Unknown type of lock\n");
1368 }
1369
1370 static int
1371 cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
1372 bool wait_flag, bool posix_lck, unsigned int xid)
1373 {
1374 int rc = 0;
1375 __u64 length = 1 + flock->fl_end - flock->fl_start;
1376 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1377 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1378 struct TCP_Server_Info *server = tcon->ses->server;
1379 __u16 netfid = cfile->fid.netfid;
1380
1381 if (posix_lck) {
1382 int posix_lock_type;
1383
1384 rc = cifs_posix_lock_test(file, flock);
1385 if (!rc)
1386 return rc;
1387
1388 if (type & server->vals->shared_lock_type)
1389 posix_lock_type = CIFS_RDLCK;
1390 else
1391 posix_lock_type = CIFS_WRLCK;
1392 rc = CIFSSMBPosixLock(xid, tcon, netfid,
1393 hash_lockowner(flock->fl_owner),
1394 flock->fl_start, length, flock,
1395 posix_lock_type, wait_flag);
1396 return rc;
1397 }
1398
1399 rc = cifs_lock_test(cfile, flock->fl_start, length, type, flock);
1400 if (!rc)
1401 return rc;
1402
1403 /* BB we could chain these into one lock request BB */
1404 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, type,
1405 1, 0, false);
1406 if (rc == 0) {
1407 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1408 type, 0, 1, false);
1409 flock->fl_type = F_UNLCK;
1410 if (rc != 0)
1411 cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1412 rc);
1413 return 0;
1414 }
1415
1416 if (type & server->vals->shared_lock_type) {
1417 flock->fl_type = F_WRLCK;
1418 return 0;
1419 }
1420
1421 type &= ~server->vals->exclusive_lock_type;
1422
1423 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1424 type | server->vals->shared_lock_type,
1425 1, 0, false);
1426 if (rc == 0) {
1427 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1428 type | server->vals->shared_lock_type, 0, 1, false);
1429 flock->fl_type = F_RDLCK;
1430 if (rc != 0)
1431 cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1432 rc);
1433 } else
1434 flock->fl_type = F_WRLCK;
1435
1436 return 0;
1437 }
1438
1439 void
1440 cifs_move_llist(struct list_head *source, struct list_head *dest)
1441 {
1442 struct list_head *li, *tmp;
1443 list_for_each_safe(li, tmp, source)
1444 list_move(li, dest);
1445 }
1446
1447 void
1448 cifs_free_llist(struct list_head *llist)
1449 {
1450 struct cifsLockInfo *li, *tmp;
1451 list_for_each_entry_safe(li, tmp, llist, llist) {
1452 cifs_del_lock_waiters(li);
1453 list_del(&li->llist);
1454 kfree(li);
1455 }
1456 }
1457
1458 int
1459 cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
1460 unsigned int xid)
1461 {
1462 int rc = 0, stored_rc;
1463 static const int types[] = {
1464 LOCKING_ANDX_LARGE_FILES,
1465 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1466 };
1467 unsigned int i;
1468 unsigned int max_num, num, max_buf;
1469 LOCKING_ANDX_RANGE *buf, *cur;
1470 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1471 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1472 struct cifsLockInfo *li, *tmp;
1473 __u64 length = 1 + flock->fl_end - flock->fl_start;
1474 struct list_head tmp_llist;
1475
1476 INIT_LIST_HEAD(&tmp_llist);
1477
1478 /*
1479 * Accessing maxBuf is racy with cifs_reconnect - need to store value
1480 * and check it before using.
1481 */
1482 max_buf = tcon->ses->server->maxBuf;
1483 if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE)))
1484 return -EINVAL;
1485
1486 BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1487 PAGE_SIZE);
1488 max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1489 PAGE_SIZE);
1490 max_num = (max_buf - sizeof(struct smb_hdr)) /
1491 sizeof(LOCKING_ANDX_RANGE);
1492 buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1493 if (!buf)
1494 return -ENOMEM;
1495
1496 down_write(&cinode->lock_sem);
1497 for (i = 0; i < 2; i++) {
1498 cur = buf;
1499 num = 0;
1500 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1501 if (flock->fl_start > li->offset ||
1502 (flock->fl_start + length) <
1503 (li->offset + li->length))
1504 continue;
1505 if (current->tgid != li->pid)
1506 continue;
1507 if (types[i] != li->type)
1508 continue;
1509 if (cinode->can_cache_brlcks) {
1510 /*
1511 * We can cache brlock requests - simply remove
1512 * a lock from the file's list.
1513 */
1514 list_del(&li->llist);
1515 cifs_del_lock_waiters(li);
1516 kfree(li);
1517 continue;
1518 }
1519 cur->Pid = cpu_to_le16(li->pid);
1520 cur->LengthLow = cpu_to_le32((u32)li->length);
1521 cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1522 cur->OffsetLow = cpu_to_le32((u32)li->offset);
1523 cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1524 /*
1525 * We need to save a lock here to let us add it again to
1526 * the file's list if the unlock range request fails on
1527 * the server.
1528 */
1529 list_move(&li->llist, &tmp_llist);
1530 if (++num == max_num) {
1531 stored_rc = cifs_lockv(xid, tcon,
1532 cfile->fid.netfid,
1533 li->type, num, 0, buf);
1534 if (stored_rc) {
1535 /*
1536 * We failed on the unlock range
1537 * request - add all locks from the tmp
1538 * list to the head of the file's list.
1539 */
1540 cifs_move_llist(&tmp_llist,
1541 &cfile->llist->locks);
1542 rc = stored_rc;
1543 } else
1544 /*
1545 * The unlock range request succeed -
1546 * free the tmp list.
1547 */
1548 cifs_free_llist(&tmp_llist);
1549 cur = buf;
1550 num = 0;
1551 } else
1552 cur++;
1553 }
1554 if (num) {
1555 stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1556 types[i], num, 0, buf);
1557 if (stored_rc) {
1558 cifs_move_llist(&tmp_llist,
1559 &cfile->llist->locks);
1560 rc = stored_rc;
1561 } else
1562 cifs_free_llist(&tmp_llist);
1563 }
1564 }
1565
1566 up_write(&cinode->lock_sem);
1567 kfree(buf);
1568 return rc;
1569 }
1570
1571 static int
1572 cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
1573 bool wait_flag, bool posix_lck, int lock, int unlock,
1574 unsigned int xid)
1575 {
1576 int rc = 0;
1577 __u64 length = 1 + flock->fl_end - flock->fl_start;
1578 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1579 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1580 struct TCP_Server_Info *server = tcon->ses->server;
1581 struct inode *inode = d_inode(cfile->dentry);
1582
1583 if (posix_lck) {
1584 int posix_lock_type;
1585
1586 rc = cifs_posix_lock_set(file, flock);
1587 if (!rc || rc < 0)
1588 return rc;
1589
1590 if (type & server->vals->shared_lock_type)
1591 posix_lock_type = CIFS_RDLCK;
1592 else
1593 posix_lock_type = CIFS_WRLCK;
1594
1595 if (unlock == 1)
1596 posix_lock_type = CIFS_UNLCK;
1597
1598 rc = CIFSSMBPosixLock(xid, tcon, cfile->fid.netfid,
1599 hash_lockowner(flock->fl_owner),
1600 flock->fl_start, length,
1601 NULL, posix_lock_type, wait_flag);
1602 goto out;
1603 }
1604
1605 if (lock) {
1606 struct cifsLockInfo *lock;
1607
1608 lock = cifs_lock_init(flock->fl_start, length, type,
1609 flock->fl_flags);
1610 if (!lock)
1611 return -ENOMEM;
1612
1613 rc = cifs_lock_add_if(cfile, lock, wait_flag);
1614 if (rc < 0) {
1615 kfree(lock);
1616 return rc;
1617 }
1618 if (!rc)
1619 goto out;
1620
1621 /*
1622 * Windows 7 server can delay breaking lease from read to None
1623 * if we set a byte-range lock on a file - break it explicitly
1624 * before sending the lock to the server to be sure the next
1625 * read won't conflict with non-overlapted locks due to
1626 * pagereading.
1627 */
1628 if (!CIFS_CACHE_WRITE(CIFS_I(inode)) &&
1629 CIFS_CACHE_READ(CIFS_I(inode))) {
1630 cifs_zap_mapping(inode);
1631 cifs_dbg(FYI, "Set no oplock for inode=%p due to mand locks\n",
1632 inode);
1633 CIFS_I(inode)->oplock = 0;
1634 }
1635
1636 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1637 type, 1, 0, wait_flag);
1638 if (rc) {
1639 kfree(lock);
1640 return rc;
1641 }
1642
1643 cifs_lock_add(cfile, lock);
1644 } else if (unlock)
1645 rc = server->ops->mand_unlock_range(cfile, flock, xid);
1646
1647 out:
1648 if (flock->fl_flags & FL_POSIX) {
1649 /*
1650 * If this is a request to remove all locks because we
1651 * are closing the file, it doesn't matter if the
1652 * unlocking failed as both cifs.ko and the SMB server
1653 * remove the lock on file close
1654 */
1655 if (rc) {
1656 cifs_dbg(VFS, "%s failed rc=%d\n", __func__, rc);
1657 if (!(flock->fl_flags & FL_CLOSE))
1658 return rc;
1659 }
1660 rc = locks_lock_file_wait(file, flock);
1661 }
1662 return rc;
1663 }
1664
1665 int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
1666 {
1667 int rc, xid;
1668 int lock = 0, unlock = 0;
1669 bool wait_flag = false;
1670 bool posix_lck = false;
1671 struct cifs_sb_info *cifs_sb;
1672 struct cifs_tcon *tcon;
1673 struct cifsInodeInfo *cinode;
1674 struct cifsFileInfo *cfile;
1675 __u16 netfid;
1676 __u32 type;
1677
1678 rc = -EACCES;
1679 xid = get_xid();
1680
1681 cifs_dbg(FYI, "Lock parm: 0x%x flockflags: 0x%x flocktype: 0x%x start: %lld end: %lld\n",
1682 cmd, flock->fl_flags, flock->fl_type,
1683 flock->fl_start, flock->fl_end);
1684
1685 cfile = (struct cifsFileInfo *)file->private_data;
1686 tcon = tlink_tcon(cfile->tlink);
1687
1688 cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag,
1689 tcon->ses->server);
1690 cifs_sb = CIFS_FILE_SB(file);
1691 netfid = cfile->fid.netfid;
1692 cinode = CIFS_I(file_inode(file));
1693
1694 if (cap_unix(tcon->ses) &&
1695 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1696 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1697 posix_lck = true;
1698 /*
1699 * BB add code here to normalize offset and length to account for
1700 * negative length which we can not accept over the wire.
1701 */
1702 if (IS_GETLK(cmd)) {
1703 rc = cifs_getlk(file, flock, type, wait_flag, posix_lck, xid);
1704 free_xid(xid);
1705 return rc;
1706 }
1707
1708 if (!lock && !unlock) {
1709 /*
1710 * if no lock or unlock then nothing to do since we do not
1711 * know what it is
1712 */
1713 free_xid(xid);
1714 return -EOPNOTSUPP;
1715 }
1716
1717 rc = cifs_setlk(file, flock, type, wait_flag, posix_lck, lock, unlock,
1718 xid);
1719 free_xid(xid);
1720 return rc;
1721 }
1722
1723 /*
1724 * update the file size (if needed) after a write. Should be called with
1725 * the inode->i_lock held
1726 */
1727 void
1728 cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
1729 unsigned int bytes_written)
1730 {
1731 loff_t end_of_write = offset + bytes_written;
1732
1733 if (end_of_write > cifsi->server_eof)
1734 cifsi->server_eof = end_of_write;
1735 }
1736
1737 static ssize_t
1738 cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data,
1739 size_t write_size, loff_t *offset)
1740 {
1741 int rc = 0;
1742 unsigned int bytes_written = 0;
1743 unsigned int total_written;
1744 struct cifs_sb_info *cifs_sb;
1745 struct cifs_tcon *tcon;
1746 struct TCP_Server_Info *server;
1747 unsigned int xid;
1748 struct dentry *dentry = open_file->dentry;
1749 struct cifsInodeInfo *cifsi = CIFS_I(d_inode(dentry));
1750 struct cifs_io_parms io_parms;
1751
1752 cifs_sb = CIFS_SB(dentry->d_sb);
1753
1754 cifs_dbg(FYI, "write %zd bytes to offset %lld of %pd\n",
1755 write_size, *offset, dentry);
1756
1757 tcon = tlink_tcon(open_file->tlink);
1758 server = tcon->ses->server;
1759
1760 if (!server->ops->sync_write)
1761 return -ENOSYS;
1762
1763 xid = get_xid();
1764
1765 for (total_written = 0; write_size > total_written;
1766 total_written += bytes_written) {
1767 rc = -EAGAIN;
1768 while (rc == -EAGAIN) {
1769 struct kvec iov[2];
1770 unsigned int len;
1771
1772 if (open_file->invalidHandle) {
1773 /* we could deadlock if we called
1774 filemap_fdatawait from here so tell
1775 reopen_file not to flush data to
1776 server now */
1777 rc = cifs_reopen_file(open_file, false);
1778 if (rc != 0)
1779 break;
1780 }
1781
1782 len = min(server->ops->wp_retry_size(d_inode(dentry)),
1783 (unsigned int)write_size - total_written);
1784 /* iov[0] is reserved for smb header */
1785 iov[1].iov_base = (char *)write_data + total_written;
1786 iov[1].iov_len = len;
1787 io_parms.pid = pid;
1788 io_parms.tcon = tcon;
1789 io_parms.offset = *offset;
1790 io_parms.length = len;
1791 rc = server->ops->sync_write(xid, &open_file->fid,
1792 &io_parms, &bytes_written, iov, 1);
1793 }
1794 if (rc || (bytes_written == 0)) {
1795 if (total_written)
1796 break;
1797 else {
1798 free_xid(xid);
1799 return rc;
1800 }
1801 } else {
1802 spin_lock(&d_inode(dentry)->i_lock);
1803 cifs_update_eof(cifsi, *offset, bytes_written);
1804 spin_unlock(&d_inode(dentry)->i_lock);
1805 *offset += bytes_written;
1806 }
1807 }
1808
1809 cifs_stats_bytes_written(tcon, total_written);
1810
1811 if (total_written > 0) {
1812 spin_lock(&d_inode(dentry)->i_lock);
1813 if (*offset > d_inode(dentry)->i_size)
1814 i_size_write(d_inode(dentry), *offset);
1815 spin_unlock(&d_inode(dentry)->i_lock);
1816 }
1817 mark_inode_dirty_sync(d_inode(dentry));
1818 free_xid(xid);
1819 return total_written;
1820 }
1821
1822 struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
1823 bool fsuid_only)
1824 {
1825 struct cifsFileInfo *open_file = NULL;
1826 struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1827 struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb);
1828
1829 /* only filter by fsuid on multiuser mounts */
1830 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1831 fsuid_only = false;
1832
1833 spin_lock(&tcon->open_file_lock);
1834 /* we could simply get the first_list_entry since write-only entries
1835 are always at the end of the list but since the first entry might
1836 have a close pending, we go through the whole list */
1837 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1838 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
1839 continue;
1840 if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
1841 if (!open_file->invalidHandle) {
1842 /* found a good file */
1843 /* lock it so it will not be closed on us */
1844 cifsFileInfo_get(open_file);
1845 spin_unlock(&tcon->open_file_lock);
1846 return open_file;
1847 } /* else might as well continue, and look for
1848 another, or simply have the caller reopen it
1849 again rather than trying to fix this handle */
1850 } else /* write only file */
1851 break; /* write only files are last so must be done */
1852 }
1853 spin_unlock(&tcon->open_file_lock);
1854 return NULL;
1855 }
1856
1857 /* Return -EBADF if no handle is found and general rc otherwise */
1858 int
1859 cifs_get_writable_file(struct cifsInodeInfo *cifs_inode, bool fsuid_only,
1860 struct cifsFileInfo **ret_file)
1861 {
1862 struct cifsFileInfo *open_file, *inv_file = NULL;
1863 struct cifs_sb_info *cifs_sb;
1864 struct cifs_tcon *tcon;
1865 bool any_available = false;
1866 int rc = -EBADF;
1867 unsigned int refind = 0;
1868
1869 *ret_file = NULL;
1870
1871 /*
1872 * Having a null inode here (because mapping->host was set to zero by
1873 * the VFS or MM) should not happen but we had reports of on oops (due
1874 * to it being zero) during stress testcases so we need to check for it
1875 */
1876
1877 if (cifs_inode == NULL) {
1878 cifs_dbg(VFS, "Null inode passed to cifs_writeable_file\n");
1879 dump_stack();
1880 return rc;
1881 }
1882
1883 cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1884 tcon = cifs_sb_master_tcon(cifs_sb);
1885
1886 /* only filter by fsuid on multiuser mounts */
1887 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1888 fsuid_only = false;
1889
1890 spin_lock(&tcon->open_file_lock);
1891 refind_writable:
1892 if (refind > MAX_REOPEN_ATT) {
1893 spin_unlock(&tcon->open_file_lock);
1894 return rc;
1895 }
1896 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1897 if (!any_available && open_file->pid != current->tgid)
1898 continue;
1899 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
1900 continue;
1901 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
1902 if (!open_file->invalidHandle) {
1903 /* found a good writable file */
1904 cifsFileInfo_get(open_file);
1905 spin_unlock(&tcon->open_file_lock);
1906 *ret_file = open_file;
1907 return 0;
1908 } else {
1909 if (!inv_file)
1910 inv_file = open_file;
1911 }
1912 }
1913 }
1914 /* couldn't find useable FH with same pid, try any available */
1915 if (!any_available) {
1916 any_available = true;
1917 goto refind_writable;
1918 }
1919
1920 if (inv_file) {
1921 any_available = false;
1922 cifsFileInfo_get(inv_file);
1923 }
1924
1925 spin_unlock(&tcon->open_file_lock);
1926
1927 if (inv_file) {
1928 rc = cifs_reopen_file(inv_file, false);
1929 if (!rc) {
1930 *ret_file = inv_file;
1931 return 0;
1932 }
1933
1934 spin_lock(&tcon->open_file_lock);
1935 list_move_tail(&inv_file->flist, &cifs_inode->openFileList);
1936 spin_unlock(&tcon->open_file_lock);
1937 cifsFileInfo_put(inv_file);
1938 ++refind;
1939 inv_file = NULL;
1940 spin_lock(&tcon->open_file_lock);
1941 goto refind_writable;
1942 }
1943
1944 return rc;
1945 }
1946
1947 struct cifsFileInfo *
1948 find_writable_file(struct cifsInodeInfo *cifs_inode, bool fsuid_only)
1949 {
1950 struct cifsFileInfo *cfile;
1951 int rc;
1952
1953 rc = cifs_get_writable_file(cifs_inode, fsuid_only, &cfile);
1954 if (rc)
1955 cifs_dbg(FYI, "couldn't find writable handle rc=%d", rc);
1956
1957 return cfile;
1958 }
1959
1960 static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
1961 {
1962 struct address_space *mapping = page->mapping;
1963 loff_t offset = (loff_t)page->index << PAGE_SHIFT;
1964 char *write_data;
1965 int rc = -EFAULT;
1966 int bytes_written = 0;
1967 struct inode *inode;
1968 struct cifsFileInfo *open_file;
1969
1970 if (!mapping || !mapping->host)
1971 return -EFAULT;
1972
1973 inode = page->mapping->host;
1974
1975 offset += (loff_t)from;
1976 write_data = kmap(page);
1977 write_data += from;
1978
1979 if ((to > PAGE_SIZE) || (from > to)) {
1980 kunmap(page);
1981 return -EIO;
1982 }
1983
1984 /* racing with truncate? */
1985 if (offset > mapping->host->i_size) {
1986 kunmap(page);
1987 return 0; /* don't care */
1988 }
1989
1990 /* check to make sure that we are not extending the file */
1991 if (mapping->host->i_size - offset < (loff_t)to)
1992 to = (unsigned)(mapping->host->i_size - offset);
1993
1994 rc = cifs_get_writable_file(CIFS_I(mapping->host), false, &open_file);
1995 if (!rc) {
1996 bytes_written = cifs_write(open_file, open_file->pid,
1997 write_data, to - from, &offset);
1998 cifsFileInfo_put(open_file);
1999 /* Does mm or vfs already set times? */
2000 inode->i_atime = inode->i_mtime = current_time(inode);
2001 if ((bytes_written > 0) && (offset))
2002 rc = 0;
2003 else if (bytes_written < 0)
2004 rc = bytes_written;
2005 else
2006 rc = -EFAULT;
2007 } else {
2008 cifs_dbg(FYI, "No writable handle for write page rc=%d\n", rc);
2009 if (!is_retryable_error(rc))
2010 rc = -EIO;
2011 }
2012
2013 kunmap(page);
2014 return rc;
2015 }
2016
2017 static struct cifs_writedata *
2018 wdata_alloc_and_fillpages(pgoff_t tofind, struct address_space *mapping,
2019 pgoff_t end, pgoff_t *index,
2020 unsigned int *found_pages)
2021 {
2022 struct cifs_writedata *wdata;
2023
2024 wdata = cifs_writedata_alloc((unsigned int)tofind,
2025 cifs_writev_complete);
2026 if (!wdata)
2027 return NULL;
2028
2029 *found_pages = find_get_pages_range_tag(mapping, index, end,
2030 PAGECACHE_TAG_DIRTY, tofind, wdata->pages);
2031 return wdata;
2032 }
2033
2034 static unsigned int
2035 wdata_prepare_pages(struct cifs_writedata *wdata, unsigned int found_pages,
2036 struct address_space *mapping,
2037 struct writeback_control *wbc,
2038 pgoff_t end, pgoff_t *index, pgoff_t *next, bool *done)
2039 {
2040 unsigned int nr_pages = 0, i;
2041 struct page *page;
2042
2043 for (i = 0; i < found_pages; i++) {
2044 page = wdata->pages[i];
2045 /*
2046 * At this point we hold neither the i_pages lock nor the
2047 * page lock: the page may be truncated or invalidated
2048 * (changing page->mapping to NULL), or even swizzled
2049 * back from swapper_space to tmpfs file mapping
2050 */
2051
2052 if (nr_pages == 0)
2053 lock_page(page);
2054 else if (!trylock_page(page))
2055 break;
2056
2057 if (unlikely(page->mapping != mapping)) {
2058 unlock_page(page);
2059 break;
2060 }
2061
2062 if (!wbc->range_cyclic && page->index > end) {
2063 *done = true;
2064 unlock_page(page);
2065 break;
2066 }
2067
2068 if (*next && (page->index != *next)) {
2069 /* Not next consecutive page */
2070 unlock_page(page);
2071 break;
2072 }
2073
2074 if (wbc->sync_mode != WB_SYNC_NONE)
2075 wait_on_page_writeback(page);
2076
2077 if (PageWriteback(page) ||
2078 !clear_page_dirty_for_io(page)) {
2079 unlock_page(page);
2080 break;
2081 }
2082
2083 /*
2084 * This actually clears the dirty bit in the radix tree.
2085 * See cifs_writepage() for more commentary.
2086 */
2087 set_page_writeback(page);
2088 if (page_offset(page) >= i_size_read(mapping->host)) {
2089 *done = true;
2090 unlock_page(page);
2091 end_page_writeback(page);
2092 break;
2093 }
2094
2095 wdata->pages[i] = page;
2096 *next = page->index + 1;
2097 ++nr_pages;
2098 }
2099
2100 /* reset index to refind any pages skipped */
2101 if (nr_pages == 0)
2102 *index = wdata->pages[0]->index + 1;
2103
2104 /* put any pages we aren't going to use */
2105 for (i = nr_pages; i < found_pages; i++) {
2106 put_page(wdata->pages[i]);
2107 wdata->pages[i] = NULL;
2108 }
2109
2110 return nr_pages;
2111 }
2112
2113 static int
2114 wdata_send_pages(struct cifs_writedata *wdata, unsigned int nr_pages,
2115 struct address_space *mapping, struct writeback_control *wbc)
2116 {
2117 int rc;
2118 struct TCP_Server_Info *server =
2119 tlink_tcon(wdata->cfile->tlink)->ses->server;
2120
2121 wdata->sync_mode = wbc->sync_mode;
2122 wdata->nr_pages = nr_pages;
2123 wdata->offset = page_offset(wdata->pages[0]);
2124 wdata->pagesz = PAGE_SIZE;
2125 wdata->tailsz = min(i_size_read(mapping->host) -
2126 page_offset(wdata->pages[nr_pages - 1]),
2127 (loff_t)PAGE_SIZE);
2128 wdata->bytes = ((nr_pages - 1) * PAGE_SIZE) + wdata->tailsz;
2129 wdata->pid = wdata->cfile->pid;
2130
2131 rc = adjust_credits(server, &wdata->credits, wdata->bytes);
2132 if (rc)
2133 return rc;
2134
2135 if (wdata->cfile->invalidHandle)
2136 rc = -EAGAIN;
2137 else
2138 rc = server->ops->async_writev(wdata, cifs_writedata_release);
2139
2140 return rc;
2141 }
2142
2143 static int cifs_writepages(struct address_space *mapping,
2144 struct writeback_control *wbc)
2145 {
2146 struct inode *inode = mapping->host;
2147 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2148 struct TCP_Server_Info *server;
2149 bool done = false, scanned = false, range_whole = false;
2150 pgoff_t end, index;
2151 struct cifs_writedata *wdata;
2152 struct cifsFileInfo *cfile = NULL;
2153 int rc = 0;
2154 int saved_rc = 0;
2155 unsigned int xid;
2156
2157 /*
2158 * If wsize is smaller than the page cache size, default to writing
2159 * one page at a time via cifs_writepage
2160 */
2161 if (cifs_sb->wsize < PAGE_SIZE)
2162 return generic_writepages(mapping, wbc);
2163
2164 xid = get_xid();
2165 if (wbc->range_cyclic) {
2166 index = mapping->writeback_index; /* Start from prev offset */
2167 end = -1;
2168 } else {
2169 index = wbc->range_start >> PAGE_SHIFT;
2170 end = wbc->range_end >> PAGE_SHIFT;
2171 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
2172 range_whole = true;
2173 scanned = true;
2174 }
2175 server = cifs_sb_master_tcon(cifs_sb)->ses->server;
2176 retry:
2177 while (!done && index <= end) {
2178 unsigned int i, nr_pages, found_pages, wsize;
2179 pgoff_t next = 0, tofind, saved_index = index;
2180 struct cifs_credits credits_on_stack;
2181 struct cifs_credits *credits = &credits_on_stack;
2182 int get_file_rc = 0;
2183
2184 if (cfile)
2185 cifsFileInfo_put(cfile);
2186
2187 rc = cifs_get_writable_file(CIFS_I(inode), false, &cfile);
2188
2189 /* in case of an error store it to return later */
2190 if (rc)
2191 get_file_rc = rc;
2192
2193 rc = server->ops->wait_mtu_credits(server, cifs_sb->wsize,
2194 &wsize, credits);
2195 if (rc != 0) {
2196 done = true;
2197 break;
2198 }
2199
2200 tofind = min((wsize / PAGE_SIZE) - 1, end - index) + 1;
2201
2202 wdata = wdata_alloc_and_fillpages(tofind, mapping, end, &index,
2203 &found_pages);
2204 if (!wdata) {
2205 rc = -ENOMEM;
2206 done = true;
2207 add_credits_and_wake_if(server, credits, 0);
2208 break;
2209 }
2210
2211 if (found_pages == 0) {
2212 kref_put(&wdata->refcount, cifs_writedata_release);
2213 add_credits_and_wake_if(server, credits, 0);
2214 break;
2215 }
2216
2217 nr_pages = wdata_prepare_pages(wdata, found_pages, mapping, wbc,
2218 end, &index, &next, &done);
2219
2220 /* nothing to write? */
2221 if (nr_pages == 0) {
2222 kref_put(&wdata->refcount, cifs_writedata_release);
2223 add_credits_and_wake_if(server, credits, 0);
2224 continue;
2225 }
2226
2227 wdata->credits = credits_on_stack;
2228 wdata->cfile = cfile;
2229 cfile = NULL;
2230
2231 if (!wdata->cfile) {
2232 cifs_dbg(VFS, "No writable handle in writepages rc=%d\n",
2233 get_file_rc);
2234 if (is_retryable_error(get_file_rc))
2235 rc = get_file_rc;
2236 else
2237 rc = -EBADF;
2238 } else
2239 rc = wdata_send_pages(wdata, nr_pages, mapping, wbc);
2240
2241 for (i = 0; i < nr_pages; ++i)
2242 unlock_page(wdata->pages[i]);
2243
2244 /* send failure -- clean up the mess */
2245 if (rc != 0) {
2246 add_credits_and_wake_if(server, &wdata->credits, 0);
2247 for (i = 0; i < nr_pages; ++i) {
2248 if (is_retryable_error(rc))
2249 redirty_page_for_writepage(wbc,
2250 wdata->pages[i]);
2251 else
2252 SetPageError(wdata->pages[i]);
2253 end_page_writeback(wdata->pages[i]);
2254 put_page(wdata->pages[i]);
2255 }
2256 if (!is_retryable_error(rc))
2257 mapping_set_error(mapping, rc);
2258 }
2259 kref_put(&wdata->refcount, cifs_writedata_release);
2260
2261 if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN) {
2262 index = saved_index;
2263 continue;
2264 }
2265
2266 /* Return immediately if we received a signal during writing */
2267 if (is_interrupt_error(rc)) {
2268 done = true;
2269 break;
2270 }
2271
2272 if (rc != 0 && saved_rc == 0)
2273 saved_rc = rc;
2274
2275 wbc->nr_to_write -= nr_pages;
2276 if (wbc->nr_to_write <= 0)
2277 done = true;
2278
2279 index = next;
2280 }
2281
2282 if (!scanned && !done) {
2283 /*
2284 * We hit the last page and there is more work to be done: wrap
2285 * back to the start of the file
2286 */
2287 scanned = true;
2288 index = 0;
2289 goto retry;
2290 }
2291
2292 if (saved_rc != 0)
2293 rc = saved_rc;
2294
2295 if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
2296 mapping->writeback_index = index;
2297
2298 if (cfile)
2299 cifsFileInfo_put(cfile);
2300 free_xid(xid);
2301 return rc;
2302 }
2303
2304 static int
2305 cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
2306 {
2307 int rc;
2308 unsigned int xid;
2309
2310 xid = get_xid();
2311 /* BB add check for wbc flags */
2312 get_page(page);
2313 if (!PageUptodate(page))
2314 cifs_dbg(FYI, "ppw - page not up to date\n");
2315
2316 /*
2317 * Set the "writeback" flag, and clear "dirty" in the radix tree.
2318 *
2319 * A writepage() implementation always needs to do either this,
2320 * or re-dirty the page with "redirty_page_for_writepage()" in
2321 * the case of a failure.
2322 *
2323 * Just unlocking the page will cause the radix tree tag-bits
2324 * to fail to update with the state of the page correctly.
2325 */
2326 set_page_writeback(page);
2327 retry_write:
2328 rc = cifs_partialpagewrite(page, 0, PAGE_SIZE);
2329 if (is_retryable_error(rc)) {
2330 if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN)
2331 goto retry_write;
2332 redirty_page_for_writepage(wbc, page);
2333 } else if (rc != 0) {
2334 SetPageError(page);
2335 mapping_set_error(page->mapping, rc);
2336 } else {
2337 SetPageUptodate(page);
2338 }
2339 end_page_writeback(page);
2340 put_page(page);
2341 free_xid(xid);
2342 return rc;
2343 }
2344
2345 static int cifs_writepage(struct page *page, struct writeback_control *wbc)
2346 {
2347 int rc = cifs_writepage_locked(page, wbc);
2348 unlock_page(page);
2349 return rc;
2350 }
2351
2352 static int cifs_write_end(struct file *file, struct address_space *mapping,
2353 loff_t pos, unsigned len, unsigned copied,
2354 struct page *page, void *fsdata)
2355 {
2356 int rc;
2357 struct inode *inode = mapping->host;
2358 struct cifsFileInfo *cfile = file->private_data;
2359 struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
2360 __u32 pid;
2361
2362 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2363 pid = cfile->pid;
2364 else
2365 pid = current->tgid;
2366
2367 cifs_dbg(FYI, "write_end for page %p from pos %lld with %d bytes\n",
2368 page, pos, copied);
2369
2370 if (PageChecked(page)) {
2371 if (copied == len)
2372 SetPageUptodate(page);
2373 ClearPageChecked(page);
2374 } else if (!PageUptodate(page) && copied == PAGE_SIZE)
2375 SetPageUptodate(page);
2376
2377 if (!PageUptodate(page)) {
2378 char *page_data;
2379 unsigned offset = pos & (PAGE_SIZE - 1);
2380 unsigned int xid;
2381
2382 xid = get_xid();
2383 /* this is probably better than directly calling
2384 partialpage_write since in this function the file handle is
2385 known which we might as well leverage */
2386 /* BB check if anything else missing out of ppw
2387 such as updating last write time */
2388 page_data = kmap(page);
2389 rc = cifs_write(cfile, pid, page_data + offset, copied, &pos);
2390 /* if (rc < 0) should we set writebehind rc? */
2391 kunmap(page);
2392
2393 free_xid(xid);
2394 } else {
2395 rc = copied;
2396 pos += copied;
2397 set_page_dirty(page);
2398 }
2399
2400 if (rc > 0) {
2401 spin_lock(&inode->i_lock);
2402 if (pos > inode->i_size)
2403 i_size_write(inode, pos);
2404 spin_unlock(&inode->i_lock);
2405 }
2406
2407 unlock_page(page);
2408 put_page(page);
2409
2410 return rc;
2411 }
2412
2413 int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
2414 int datasync)
2415 {
2416 unsigned int xid;
2417 int rc = 0;
2418 struct cifs_tcon *tcon;
2419 struct TCP_Server_Info *server;
2420 struct cifsFileInfo *smbfile = file->private_data;
2421 struct inode *inode = file_inode(file);
2422 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2423
2424 rc = file_write_and_wait_range(file, start, end);
2425 if (rc)
2426 return rc;
2427 inode_lock(inode);
2428
2429 xid = get_xid();
2430
2431 cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2432 file, datasync);
2433
2434 if (!CIFS_CACHE_READ(CIFS_I(inode))) {
2435 rc = cifs_zap_mapping(inode);
2436 if (rc) {
2437 cifs_dbg(FYI, "rc: %d during invalidate phase\n", rc);
2438 rc = 0; /* don't care about it in fsync */
2439 }
2440 }
2441
2442 tcon = tlink_tcon(smbfile->tlink);
2443 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2444 server = tcon->ses->server;
2445 if (server->ops->flush)
2446 rc = server->ops->flush(xid, tcon, &smbfile->fid);
2447 else
2448 rc = -ENOSYS;
2449 }
2450
2451 free_xid(xid);
2452 inode_unlock(inode);
2453 return rc;
2454 }
2455
2456 int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
2457 {
2458 unsigned int xid;
2459 int rc = 0;
2460 struct cifs_tcon *tcon;
2461 struct TCP_Server_Info *server;
2462 struct cifsFileInfo *smbfile = file->private_data;
2463 struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
2464 struct inode *inode = file->f_mapping->host;
2465
2466 rc = file_write_and_wait_range(file, start, end);
2467 if (rc)
2468 return rc;
2469 inode_lock(inode);
2470
2471 xid = get_xid();
2472
2473 cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2474 file, datasync);
2475
2476 tcon = tlink_tcon(smbfile->tlink);
2477 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2478 server = tcon->ses->server;
2479 if (server->ops->flush)
2480 rc = server->ops->flush(xid, tcon, &smbfile->fid);
2481 else
2482 rc = -ENOSYS;
2483 }
2484
2485 free_xid(xid);
2486 inode_unlock(inode);
2487 return rc;
2488 }
2489
2490 /*
2491 * As file closes, flush all cached write data for this inode checking
2492 * for write behind errors.
2493 */
2494 int cifs_flush(struct file *file, fl_owner_t id)
2495 {
2496 struct inode *inode = file_inode(file);
2497 int rc = 0;
2498
2499 if (file->f_mode & FMODE_WRITE)
2500 rc = filemap_write_and_wait(inode->i_mapping);
2501
2502 cifs_dbg(FYI, "Flush inode %p file %p rc %d\n", inode, file, rc);
2503
2504 return rc;
2505 }
2506
2507 static int
2508 cifs_write_allocate_pages(struct page **pages, unsigned long num_pages)
2509 {
2510 int rc = 0;
2511 unsigned long i;
2512
2513 for (i = 0; i < num_pages; i++) {
2514 pages[i] = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2515 if (!pages[i]) {
2516 /*
2517 * save number of pages we have already allocated and
2518 * return with ENOMEM error
2519 */
2520 num_pages = i;
2521 rc = -ENOMEM;
2522 break;
2523 }
2524 }
2525
2526 if (rc) {
2527 for (i = 0; i < num_pages; i++)
2528 put_page(pages[i]);
2529 }
2530 return rc;
2531 }
2532
2533 static inline
2534 size_t get_numpages(const size_t wsize, const size_t len, size_t *cur_len)
2535 {
2536 size_t num_pages;
2537 size_t clen;
2538
2539 clen = min_t(const size_t, len, wsize);
2540 num_pages = DIV_ROUND_UP(clen, PAGE_SIZE);
2541
2542 if (cur_len)
2543 *cur_len = clen;
2544
2545 return num_pages;
2546 }
2547
2548 static void
2549 cifs_uncached_writedata_release(struct kref *refcount)
2550 {
2551 int i;
2552 struct cifs_writedata *wdata = container_of(refcount,
2553 struct cifs_writedata, refcount);
2554
2555 kref_put(&wdata->ctx->refcount, cifs_aio_ctx_release);
2556 for (i = 0; i < wdata->nr_pages; i++)
2557 put_page(wdata->pages[i]);
2558 cifs_writedata_release(refcount);
2559 }
2560
2561 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx);
2562
2563 static void
2564 cifs_uncached_writev_complete(struct work_struct *work)
2565 {
2566 struct cifs_writedata *wdata = container_of(work,
2567 struct cifs_writedata, work);
2568 struct inode *inode = d_inode(wdata->cfile->dentry);
2569 struct cifsInodeInfo *cifsi = CIFS_I(inode);
2570
2571 spin_lock(&inode->i_lock);
2572 cifs_update_eof(cifsi, wdata->offset, wdata->bytes);
2573 if (cifsi->server_eof > inode->i_size)
2574 i_size_write(inode, cifsi->server_eof);
2575 spin_unlock(&inode->i_lock);
2576
2577 complete(&wdata->done);
2578 collect_uncached_write_data(wdata->ctx);
2579 /* the below call can possibly free the last ref to aio ctx */
2580 kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2581 }
2582
2583 static int
2584 wdata_fill_from_iovec(struct cifs_writedata *wdata, struct iov_iter *from,
2585 size_t *len, unsigned long *num_pages)
2586 {
2587 size_t save_len, copied, bytes, cur_len = *len;
2588 unsigned long i, nr_pages = *num_pages;
2589
2590 save_len = cur_len;
2591 for (i = 0; i < nr_pages; i++) {
2592 bytes = min_t(const size_t, cur_len, PAGE_SIZE);
2593 copied = copy_page_from_iter(wdata->pages[i], 0, bytes, from);
2594 cur_len -= copied;
2595 /*
2596 * If we didn't copy as much as we expected, then that
2597 * may mean we trod into an unmapped area. Stop copying
2598 * at that point. On the next pass through the big
2599 * loop, we'll likely end up getting a zero-length
2600 * write and bailing out of it.
2601 */
2602 if (copied < bytes)
2603 break;
2604 }
2605 cur_len = save_len - cur_len;
2606 *len = cur_len;
2607
2608 /*
2609 * If we have no data to send, then that probably means that
2610 * the copy above failed altogether. That's most likely because
2611 * the address in the iovec was bogus. Return -EFAULT and let
2612 * the caller free anything we allocated and bail out.
2613 */
2614 if (!cur_len)
2615 return -EFAULT;
2616
2617 /*
2618 * i + 1 now represents the number of pages we actually used in
2619 * the copy phase above.
2620 */
2621 *num_pages = i + 1;
2622 return 0;
2623 }
2624
2625 static int
2626 cifs_resend_wdata(struct cifs_writedata *wdata, struct list_head *wdata_list,
2627 struct cifs_aio_ctx *ctx)
2628 {
2629 unsigned int wsize;
2630 struct cifs_credits credits;
2631 int rc;
2632 struct TCP_Server_Info *server =
2633 tlink_tcon(wdata->cfile->tlink)->ses->server;
2634
2635 /*
2636 * Wait for credits to resend this wdata.
2637 * Note: we are attempting to resend the whole wdata not in segments
2638 */
2639 do {
2640 rc = server->ops->wait_mtu_credits(server, wdata->bytes, &wsize,
2641 &credits);
2642
2643 if (rc)
2644 goto out;
2645
2646 if (wsize < wdata->bytes) {
2647 add_credits_and_wake_if(server, &credits, 0);
2648 msleep(1000);
2649 }
2650 } while (wsize < wdata->bytes);
2651
2652 wdata->credits = credits;
2653 rc = -EAGAIN;
2654 while (rc == -EAGAIN) {
2655 rc = 0;
2656 if (wdata->cfile->invalidHandle)
2657 rc = cifs_reopen_file(wdata->cfile, false);
2658 if (!rc)
2659 rc = server->ops->async_writev(wdata,
2660 cifs_uncached_writedata_release);
2661 }
2662
2663 if (!rc) {
2664 list_add_tail(&wdata->list, wdata_list);
2665 return 0;
2666 }
2667
2668 add_credits_and_wake_if(server, &wdata->credits, 0);
2669 out:
2670 kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2671
2672 return rc;
2673 }
2674
2675 static int
2676 cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from,
2677 struct cifsFileInfo *open_file,
2678 struct cifs_sb_info *cifs_sb, struct list_head *wdata_list,
2679 struct cifs_aio_ctx *ctx)
2680 {
2681 int rc = 0;
2682 size_t cur_len;
2683 unsigned long nr_pages, num_pages, i;
2684 struct cifs_writedata *wdata;
2685 struct iov_iter saved_from = *from;
2686 loff_t saved_offset = offset;
2687 pid_t pid;
2688 struct TCP_Server_Info *server;
2689 struct page **pagevec;
2690 size_t start;
2691 unsigned int xid;
2692
2693 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2694 pid = open_file->pid;
2695 else
2696 pid = current->tgid;
2697
2698 server = tlink_tcon(open_file->tlink)->ses->server;
2699 xid = get_xid();
2700
2701 do {
2702 unsigned int wsize;
2703 struct cifs_credits credits_on_stack;
2704 struct cifs_credits *credits = &credits_on_stack;
2705
2706 if (open_file->invalidHandle) {
2707 rc = cifs_reopen_file(open_file, false);
2708 if (rc == -EAGAIN)
2709 continue;
2710 else if (rc)
2711 break;
2712 }
2713
2714 rc = server->ops->wait_mtu_credits(server, cifs_sb->wsize,
2715 &wsize, credits);
2716 if (rc)
2717 break;
2718
2719 cur_len = min_t(const size_t, len, wsize);
2720
2721 if (ctx->direct_io) {
2722 ssize_t result;
2723
2724 result = iov_iter_get_pages_alloc(
2725 from, &pagevec, cur_len, &start);
2726 if (result < 0) {
2727 cifs_dbg(VFS,
2728 "direct_writev couldn't get user pages "
2729 "(rc=%zd) iter type %d iov_offset %zd "
2730 "count %zd\n",
2731 result, from->type,
2732 from->iov_offset, from->count);
2733 dump_stack();
2734
2735 rc = result;
2736 add_credits_and_wake_if(server, credits, 0);
2737 break;
2738 }
2739 cur_len = (size_t)result;
2740 iov_iter_advance(from, cur_len);
2741
2742 nr_pages =
2743 (cur_len + start + PAGE_SIZE - 1) / PAGE_SIZE;
2744
2745 wdata = cifs_writedata_direct_alloc(pagevec,
2746 cifs_uncached_writev_complete);
2747 if (!wdata) {
2748 rc = -ENOMEM;
2749 add_credits_and_wake_if(server, credits, 0);
2750 break;
2751 }
2752
2753
2754 wdata->page_offset = start;
2755 wdata->tailsz =
2756 nr_pages > 1 ?
2757 cur_len - (PAGE_SIZE - start) -
2758 (nr_pages - 2) * PAGE_SIZE :
2759 cur_len;
2760 } else {
2761 nr_pages = get_numpages(wsize, len, &cur_len);
2762 wdata = cifs_writedata_alloc(nr_pages,
2763 cifs_uncached_writev_complete);
2764 if (!wdata) {
2765 rc = -ENOMEM;
2766 add_credits_and_wake_if(server, credits, 0);
2767 break;
2768 }
2769
2770 rc = cifs_write_allocate_pages(wdata->pages, nr_pages);
2771 if (rc) {
2772 kvfree(wdata->pages);
2773 kfree(wdata);
2774 add_credits_and_wake_if(server, credits, 0);
2775 break;
2776 }
2777
2778 num_pages = nr_pages;
2779 rc = wdata_fill_from_iovec(
2780 wdata, from, &cur_len, &num_pages);
2781 if (rc) {
2782 for (i = 0; i < nr_pages; i++)
2783 put_page(wdata->pages[i]);
2784 kvfree(wdata->pages);
2785 kfree(wdata);
2786 add_credits_and_wake_if(server, credits, 0);
2787 break;
2788 }
2789
2790 /*
2791 * Bring nr_pages down to the number of pages we
2792 * actually used, and free any pages that we didn't use.
2793 */
2794 for ( ; nr_pages > num_pages; nr_pages--)
2795 put_page(wdata->pages[nr_pages - 1]);
2796
2797 wdata->tailsz = cur_len - ((nr_pages - 1) * PAGE_SIZE);
2798 }
2799
2800 wdata->sync_mode = WB_SYNC_ALL;
2801 wdata->nr_pages = nr_pages;
2802 wdata->offset = (__u64)offset;
2803 wdata->cfile = cifsFileInfo_get(open_file);
2804 wdata->pid = pid;
2805 wdata->bytes = cur_len;
2806 wdata->pagesz = PAGE_SIZE;
2807 wdata->credits = credits_on_stack;
2808 wdata->ctx = ctx;
2809 kref_get(&ctx->refcount);
2810
2811 rc = adjust_credits(server, &wdata->credits, wdata->bytes);
2812
2813 if (!rc) {
2814 if (wdata->cfile->invalidHandle)
2815 rc = -EAGAIN;
2816 else
2817 rc = server->ops->async_writev(wdata,
2818 cifs_uncached_writedata_release);
2819 }
2820
2821 if (rc) {
2822 add_credits_and_wake_if(server, &wdata->credits, 0);
2823 kref_put(&wdata->refcount,
2824 cifs_uncached_writedata_release);
2825 if (rc == -EAGAIN) {
2826 *from = saved_from;
2827 iov_iter_advance(from, offset - saved_offset);
2828 continue;
2829 }
2830 break;
2831 }
2832
2833 list_add_tail(&wdata->list, wdata_list);
2834 offset += cur_len;
2835 len -= cur_len;
2836 } while (len > 0);
2837
2838 free_xid(xid);
2839 return rc;
2840 }
2841
2842 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx)
2843 {
2844 struct cifs_writedata *wdata, *tmp;
2845 struct cifs_tcon *tcon;
2846 struct cifs_sb_info *cifs_sb;
2847 struct dentry *dentry = ctx->cfile->dentry;
2848 unsigned int i;
2849 int rc;
2850
2851 tcon = tlink_tcon(ctx->cfile->tlink);
2852 cifs_sb = CIFS_SB(dentry->d_sb);
2853
2854 mutex_lock(&ctx->aio_mutex);
2855
2856 if (list_empty(&ctx->list)) {
2857 mutex_unlock(&ctx->aio_mutex);
2858 return;
2859 }
2860
2861 rc = ctx->rc;
2862 /*
2863 * Wait for and collect replies for any successful sends in order of
2864 * increasing offset. Once an error is hit, then return without waiting
2865 * for any more replies.
2866 */
2867 restart_loop:
2868 list_for_each_entry_safe(wdata, tmp, &ctx->list, list) {
2869 if (!rc) {
2870 if (!try_wait_for_completion(&wdata->done)) {
2871 mutex_unlock(&ctx->aio_mutex);
2872 return;
2873 }
2874
2875 if (wdata->result)
2876 rc = wdata->result;
2877 else
2878 ctx->total_len += wdata->bytes;
2879
2880 /* resend call if it's a retryable error */
2881 if (rc == -EAGAIN) {
2882 struct list_head tmp_list;
2883 struct iov_iter tmp_from = ctx->iter;
2884
2885 INIT_LIST_HEAD(&tmp_list);
2886 list_del_init(&wdata->list);
2887
2888 if (ctx->direct_io)
2889 rc = cifs_resend_wdata(
2890 wdata, &tmp_list, ctx);
2891 else {
2892 iov_iter_advance(&tmp_from,
2893 wdata->offset - ctx->pos);
2894
2895 rc = cifs_write_from_iter(wdata->offset,
2896 wdata->bytes, &tmp_from,
2897 ctx->cfile, cifs_sb, &tmp_list,
2898 ctx);
2899 }
2900
2901 list_splice(&tmp_list, &ctx->list);
2902
2903 kref_put(&wdata->refcount,
2904 cifs_uncached_writedata_release);
2905 goto restart_loop;
2906 }
2907 }
2908 list_del_init(&wdata->list);
2909 kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2910 }
2911
2912 if (!ctx->direct_io)
2913 for (i = 0; i < ctx->npages; i++)
2914 put_page(ctx->bv[i].bv_page);
2915
2916 cifs_stats_bytes_written(tcon, ctx->total_len);
2917 set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(dentry->d_inode)->flags);
2918
2919 ctx->rc = (rc == 0) ? ctx->total_len : rc;
2920
2921 mutex_unlock(&ctx->aio_mutex);
2922
2923 if (ctx->iocb && ctx->iocb->ki_complete)
2924 ctx->iocb->ki_complete(ctx->iocb, ctx->rc, 0);
2925 else
2926 complete(&ctx->done);
2927 }
2928
2929 static ssize_t __cifs_writev(
2930 struct kiocb *iocb, struct iov_iter *from, bool direct)
2931 {
2932 struct file *file = iocb->ki_filp;
2933 ssize_t total_written = 0;
2934 struct cifsFileInfo *cfile;
2935 struct cifs_tcon *tcon;
2936 struct cifs_sb_info *cifs_sb;
2937 struct cifs_aio_ctx *ctx;
2938 struct iov_iter saved_from = *from;
2939 size_t len = iov_iter_count(from);
2940 int rc;
2941
2942 /*
2943 * iov_iter_get_pages_alloc doesn't work with ITER_KVEC.
2944 * In this case, fall back to non-direct write function.
2945 * this could be improved by getting pages directly in ITER_KVEC
2946 */
2947 if (direct && from->type & ITER_KVEC) {
2948 cifs_dbg(FYI, "use non-direct cifs_writev for kvec I/O\n");
2949 direct = false;
2950 }
2951
2952 rc = generic_write_checks(iocb, from);
2953 if (rc <= 0)
2954 return rc;
2955
2956 cifs_sb = CIFS_FILE_SB(file);
2957 cfile = file->private_data;
2958 tcon = tlink_tcon(cfile->tlink);
2959
2960 if (!tcon->ses->server->ops->async_writev)
2961 return -ENOSYS;
2962
2963 ctx = cifs_aio_ctx_alloc();
2964 if (!ctx)
2965 return -ENOMEM;
2966
2967 ctx->cfile = cifsFileInfo_get(cfile);
2968
2969 if (!is_sync_kiocb(iocb))
2970 ctx->iocb = iocb;
2971
2972 ctx->pos = iocb->ki_pos;
2973
2974 if (direct) {
2975 ctx->direct_io = true;
2976 ctx->iter = *from;
2977 ctx->len = len;
2978 } else {
2979 rc = setup_aio_ctx_iter(ctx, from, WRITE);
2980 if (rc) {
2981 kref_put(&ctx->refcount, cifs_aio_ctx_release);
2982 return rc;
2983 }
2984 }
2985
2986 /* grab a lock here due to read response handlers can access ctx */
2987 mutex_lock(&ctx->aio_mutex);
2988
2989 rc = cifs_write_from_iter(iocb->ki_pos, ctx->len, &saved_from,
2990 cfile, cifs_sb, &ctx->list, ctx);
2991
2992 /*
2993 * If at least one write was successfully sent, then discard any rc
2994 * value from the later writes. If the other write succeeds, then
2995 * we'll end up returning whatever was written. If it fails, then
2996 * we'll get a new rc value from that.
2997 */
2998 if (!list_empty(&ctx->list))
2999 rc = 0;
3000
3001 mutex_unlock(&ctx->aio_mutex);
3002
3003 if (rc) {
3004 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3005 return rc;
3006 }
3007
3008 if (!is_sync_kiocb(iocb)) {
3009 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3010 return -EIOCBQUEUED;
3011 }
3012
3013 rc = wait_for_completion_killable(&ctx->done);
3014 if (rc) {
3015 mutex_lock(&ctx->aio_mutex);
3016 ctx->rc = rc = -EINTR;
3017 total_written = ctx->total_len;
3018 mutex_unlock(&ctx->aio_mutex);
3019 } else {
3020 rc = ctx->rc;
3021 total_written = ctx->total_len;
3022 }
3023
3024 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3025
3026 if (unlikely(!total_written))
3027 return rc;
3028
3029 iocb->ki_pos += total_written;
3030 return total_written;
3031 }
3032
3033 ssize_t cifs_direct_writev(struct kiocb *iocb, struct iov_iter *from)
3034 {
3035 return __cifs_writev(iocb, from, true);
3036 }
3037
3038 ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from)
3039 {
3040 return __cifs_writev(iocb, from, false);
3041 }
3042
3043 static ssize_t
3044 cifs_writev(struct kiocb *iocb, struct iov_iter *from)
3045 {
3046 struct file *file = iocb->ki_filp;
3047 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
3048 struct inode *inode = file->f_mapping->host;
3049 struct cifsInodeInfo *cinode = CIFS_I(inode);
3050 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
3051 ssize_t rc;
3052
3053 inode_lock(inode);
3054 /*
3055 * We need to hold the sem to be sure nobody modifies lock list
3056 * with a brlock that prevents writing.
3057 */
3058 down_read(&cinode->lock_sem);
3059
3060 rc = generic_write_checks(iocb, from);
3061 if (rc <= 0)
3062 goto out;
3063
3064 if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(from),
3065 server->vals->exclusive_lock_type, 0,
3066 NULL, CIFS_WRITE_OP))
3067 rc = __generic_file_write_iter(iocb, from);
3068 else
3069 rc = -EACCES;
3070 out:
3071 up_read(&cinode->lock_sem);
3072 inode_unlock(inode);
3073
3074 if (rc > 0)
3075 rc = generic_write_sync(iocb, rc);
3076 return rc;
3077 }
3078
3079 ssize_t
3080 cifs_strict_writev(struct kiocb *iocb, struct iov_iter *from)
3081 {
3082 struct inode *inode = file_inode(iocb->ki_filp);
3083 struct cifsInodeInfo *cinode = CIFS_I(inode);
3084 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
3085 struct cifsFileInfo *cfile = (struct cifsFileInfo *)
3086 iocb->ki_filp->private_data;
3087 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3088 ssize_t written;
3089
3090 written = cifs_get_writer(cinode);
3091 if (written)
3092 return written;
3093
3094 if (CIFS_CACHE_WRITE(cinode)) {
3095 if (cap_unix(tcon->ses) &&
3096 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability))
3097 && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) {
3098 written = generic_file_write_iter(iocb, from);
3099 goto out;
3100 }
3101 written = cifs_writev(iocb, from);
3102 goto out;
3103 }
3104 /*
3105 * For non-oplocked files in strict cache mode we need to write the data
3106 * to the server exactly from the pos to pos+len-1 rather than flush all
3107 * affected pages because it may cause a error with mandatory locks on
3108 * these pages but not on the region from pos to ppos+len-1.
3109 */
3110 written = cifs_user_writev(iocb, from);
3111 if (CIFS_CACHE_READ(cinode)) {
3112 /*
3113 * We have read level caching and we have just sent a write
3114 * request to the server thus making data in the cache stale.
3115 * Zap the cache and set oplock/lease level to NONE to avoid
3116 * reading stale data from the cache. All subsequent read
3117 * operations will read new data from the server.
3118 */
3119 cifs_zap_mapping(inode);
3120 cifs_dbg(FYI, "Set Oplock/Lease to NONE for inode=%p after write\n",
3121 inode);
3122 cinode->oplock = 0;
3123 }
3124 out:
3125 cifs_put_writer(cinode);
3126 return written;
3127 }
3128
3129 static struct cifs_readdata *
3130 cifs_readdata_direct_alloc(struct page **pages, work_func_t complete)
3131 {
3132 struct cifs_readdata *rdata;
3133
3134 rdata = kzalloc(sizeof(*rdata), GFP_KERNEL);
3135 if (rdata != NULL) {
3136 rdata->pages = pages;
3137 kref_init(&rdata->refcount);
3138 INIT_LIST_HEAD(&rdata->list);
3139 init_completion(&rdata->done);
3140 INIT_WORK(&rdata->work, complete);
3141 }
3142
3143 return rdata;
3144 }
3145
3146 static struct cifs_readdata *
3147 cifs_readdata_alloc(unsigned int nr_pages, work_func_t complete)
3148 {
3149 struct page **pages =
3150 kcalloc(nr_pages, sizeof(struct page *), GFP_KERNEL);
3151 struct cifs_readdata *ret = NULL;
3152
3153 if (pages) {
3154 ret = cifs_readdata_direct_alloc(pages, complete);
3155 if (!ret)
3156 kfree(pages);
3157 }
3158
3159 return ret;
3160 }
3161
3162 void
3163 cifs_readdata_release(struct kref *refcount)
3164 {
3165 struct cifs_readdata *rdata = container_of(refcount,
3166 struct cifs_readdata, refcount);
3167 #ifdef CONFIG_CIFS_SMB_DIRECT
3168 if (rdata->mr) {
3169 smbd_deregister_mr(rdata->mr);
3170 rdata->mr = NULL;
3171 }
3172 #endif
3173 if (rdata->cfile)
3174 cifsFileInfo_put(rdata->cfile);
3175
3176 kvfree(rdata->pages);
3177 kfree(rdata);
3178 }
3179
3180 static int
3181 cifs_read_allocate_pages(struct cifs_readdata *rdata, unsigned int nr_pages)
3182 {
3183 int rc = 0;
3184 struct page *page;
3185 unsigned int i;
3186
3187 for (i = 0; i < nr_pages; i++) {
3188 page = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
3189 if (!page) {
3190 rc = -ENOMEM;
3191 break;
3192 }
3193 rdata->pages[i] = page;
3194 }
3195
3196 if (rc) {
3197 for (i = 0; i < nr_pages; i++) {
3198 put_page(rdata->pages[i]);
3199 rdata->pages[i] = NULL;
3200 }
3201 }
3202 return rc;
3203 }
3204
3205 static void
3206 cifs_uncached_readdata_release(struct kref *refcount)
3207 {
3208 struct cifs_readdata *rdata = container_of(refcount,
3209 struct cifs_readdata, refcount);
3210 unsigned int i;
3211
3212 kref_put(&rdata->ctx->refcount, cifs_aio_ctx_release);
3213 for (i = 0; i < rdata->nr_pages; i++) {
3214 put_page(rdata->pages[i]);
3215 }
3216 cifs_readdata_release(refcount);
3217 }
3218
3219 /**
3220 * cifs_readdata_to_iov - copy data from pages in response to an iovec
3221 * @rdata: the readdata response with list of pages holding data
3222 * @iter: destination for our data
3223 *
3224 * This function copies data from a list of pages in a readdata response into
3225 * an array of iovecs. It will first calculate where the data should go
3226 * based on the info in the readdata and then copy the data into that spot.
3227 */
3228 static int
3229 cifs_readdata_to_iov(struct cifs_readdata *rdata, struct iov_iter *iter)
3230 {
3231 size_t remaining = rdata->got_bytes;
3232 unsigned int i;
3233
3234 for (i = 0; i < rdata->nr_pages; i++) {
3235 struct page *page = rdata->pages[i];
3236 size_t copy = min_t(size_t, remaining, PAGE_SIZE);
3237 size_t written;
3238
3239 if (unlikely(iov_iter_is_pipe(iter))) {
3240 void *addr = kmap_atomic(page);
3241
3242 written = copy_to_iter(addr, copy, iter);
3243 kunmap_atomic(addr);
3244 } else
3245 written = copy_page_to_iter(page, 0, copy, iter);
3246 remaining -= written;
3247 if (written < copy && iov_iter_count(iter) > 0)
3248 break;
3249 }
3250 return remaining ? -EFAULT : 0;
3251 }
3252
3253 static void collect_uncached_read_data(struct cifs_aio_ctx *ctx);
3254
3255 static void
3256 cifs_uncached_readv_complete(struct work_struct *work)
3257 {
3258 struct cifs_readdata *rdata = container_of(work,
3259 struct cifs_readdata, work);
3260
3261 complete(&rdata->done);
3262 collect_uncached_read_data(rdata->ctx);
3263 /* the below call can possibly free the last ref to aio ctx */
3264 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3265 }
3266
3267 static int
3268 uncached_fill_pages(struct TCP_Server_Info *server,
3269 struct cifs_readdata *rdata, struct iov_iter *iter,
3270 unsigned int len)
3271 {
3272 int result = 0;
3273 unsigned int i;
3274 unsigned int nr_pages = rdata->nr_pages;
3275 unsigned int page_offset = rdata->page_offset;
3276
3277 rdata->got_bytes = 0;
3278 rdata->tailsz = PAGE_SIZE;
3279 for (i = 0; i < nr_pages; i++) {
3280 struct page *page = rdata->pages[i];
3281 size_t n;
3282 unsigned int segment_size = rdata->pagesz;
3283
3284 if (i == 0)
3285 segment_size -= page_offset;
3286 else
3287 page_offset = 0;
3288
3289
3290 if (len <= 0) {
3291 /* no need to hold page hostage */
3292 rdata->pages[i] = NULL;
3293 rdata->nr_pages--;
3294 put_page(page);
3295 continue;
3296 }
3297
3298 n = len;
3299 if (len >= segment_size)
3300 /* enough data to fill the page */
3301 n = segment_size;
3302 else
3303 rdata->tailsz = len;
3304 len -= n;
3305
3306 if (iter)
3307 result = copy_page_from_iter(
3308 page, page_offset, n, iter);
3309 #ifdef CONFIG_CIFS_SMB_DIRECT
3310 else if (rdata->mr)
3311 result = n;
3312 #endif
3313 else
3314 result = cifs_read_page_from_socket(
3315 server, page, page_offset, n);
3316 if (result < 0)
3317 break;
3318
3319 rdata->got_bytes += result;
3320 }
3321
3322 return rdata->got_bytes > 0 && result != -ECONNABORTED ?
3323 rdata->got_bytes : result;
3324 }
3325
3326 static int
3327 cifs_uncached_read_into_pages(struct TCP_Server_Info *server,
3328 struct cifs_readdata *rdata, unsigned int len)
3329 {
3330 return uncached_fill_pages(server, rdata, NULL, len);
3331 }
3332
3333 static int
3334 cifs_uncached_copy_into_pages(struct TCP_Server_Info *server,
3335 struct cifs_readdata *rdata,
3336 struct iov_iter *iter)
3337 {
3338 return uncached_fill_pages(server, rdata, iter, iter->count);
3339 }
3340
3341 static int cifs_resend_rdata(struct cifs_readdata *rdata,
3342 struct list_head *rdata_list,
3343 struct cifs_aio_ctx *ctx)
3344 {
3345 unsigned int rsize;
3346 struct cifs_credits credits;
3347 int rc;
3348 struct TCP_Server_Info *server =
3349 tlink_tcon(rdata->cfile->tlink)->ses->server;
3350
3351 /*
3352 * Wait for credits to resend this rdata.
3353 * Note: we are attempting to resend the whole rdata not in segments
3354 */
3355 do {
3356 rc = server->ops->wait_mtu_credits(server, rdata->bytes,
3357 &rsize, &credits);
3358
3359 if (rc)
3360 goto out;
3361
3362 if (rsize < rdata->bytes) {
3363 add_credits_and_wake_if(server, &credits, 0);
3364 msleep(1000);
3365 }
3366 } while (rsize < rdata->bytes);
3367
3368 rdata->credits = credits;
3369 rc = -EAGAIN;
3370 while (rc == -EAGAIN) {
3371 rc = 0;
3372 if (rdata->cfile->invalidHandle)
3373 rc = cifs_reopen_file(rdata->cfile, true);
3374 if (!rc)
3375 rc = server->ops->async_readv(rdata);
3376 }
3377
3378 if (!rc) {
3379 /* Add to aio pending list */
3380 list_add_tail(&rdata->list, rdata_list);
3381 return 0;
3382 }
3383
3384 add_credits_and_wake_if(server, &rdata->credits, 0);
3385 out:
3386 kref_put(&rdata->refcount,
3387 cifs_uncached_readdata_release);
3388
3389 return rc;
3390 }
3391
3392 static int
3393 cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file,
3394 struct cifs_sb_info *cifs_sb, struct list_head *rdata_list,
3395 struct cifs_aio_ctx *ctx)
3396 {
3397 struct cifs_readdata *rdata;
3398 unsigned int npages, rsize;
3399 struct cifs_credits credits_on_stack;
3400 struct cifs_credits *credits = &credits_on_stack;
3401 size_t cur_len;
3402 int rc;
3403 pid_t pid;
3404 struct TCP_Server_Info *server;
3405 struct page **pagevec;
3406 size_t start;
3407 struct iov_iter direct_iov = ctx->iter;
3408
3409 server = tlink_tcon(open_file->tlink)->ses->server;
3410
3411 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3412 pid = open_file->pid;
3413 else
3414 pid = current->tgid;
3415
3416 if (ctx->direct_io)
3417 iov_iter_advance(&direct_iov, offset - ctx->pos);
3418
3419 do {
3420 if (open_file->invalidHandle) {
3421 rc = cifs_reopen_file(open_file, true);
3422 if (rc == -EAGAIN)
3423 continue;
3424 else if (rc)
3425 break;
3426 }
3427
3428 rc = server->ops->wait_mtu_credits(server, cifs_sb->rsize,
3429 &rsize, credits);
3430 if (rc)
3431 break;
3432
3433 cur_len = min_t(const size_t, len, rsize);
3434
3435 if (ctx->direct_io) {
3436 ssize_t result;
3437
3438 result = iov_iter_get_pages_alloc(
3439 &direct_iov, &pagevec,
3440 cur_len, &start);
3441 if (result < 0) {
3442 cifs_dbg(VFS,
3443 "couldn't get user pages (rc=%zd)"
3444 " iter type %d"
3445 " iov_offset %zd count %zd\n",
3446 result, direct_iov.type,
3447 direct_iov.iov_offset,
3448 direct_iov.count);
3449 dump_stack();
3450
3451 rc = result;
3452 add_credits_and_wake_if(server, credits, 0);
3453 break;
3454 }
3455 cur_len = (size_t)result;
3456 iov_iter_advance(&direct_iov, cur_len);
3457
3458 rdata = cifs_readdata_direct_alloc(
3459 pagevec, cifs_uncached_readv_complete);
3460 if (!rdata) {
3461 add_credits_and_wake_if(server, credits, 0);
3462 rc = -ENOMEM;
3463 break;
3464 }
3465
3466 npages = (cur_len + start + PAGE_SIZE-1) / PAGE_SIZE;
3467 rdata->page_offset = start;
3468 rdata->tailsz = npages > 1 ?
3469 cur_len-(PAGE_SIZE-start)-(npages-2)*PAGE_SIZE :
3470 cur_len;
3471
3472 } else {
3473
3474 npages = DIV_ROUND_UP(cur_len, PAGE_SIZE);
3475 /* allocate a readdata struct */
3476 rdata = cifs_readdata_alloc(npages,
3477 cifs_uncached_readv_complete);
3478 if (!rdata) {
3479 add_credits_and_wake_if(server, credits, 0);
3480 rc = -ENOMEM;
3481 break;
3482 }
3483
3484 rc = cifs_read_allocate_pages(rdata, npages);
3485 if (rc) {
3486 kvfree(rdata->pages);
3487 kfree(rdata);
3488 add_credits_and_wake_if(server, credits, 0);
3489 break;
3490 }
3491
3492 rdata->tailsz = PAGE_SIZE;
3493 }
3494
3495 rdata->cfile = cifsFileInfo_get(open_file);
3496 rdata->nr_pages = npages;
3497 rdata->offset = offset;
3498 rdata->bytes = cur_len;
3499 rdata->pid = pid;
3500 rdata->pagesz = PAGE_SIZE;
3501 rdata->read_into_pages = cifs_uncached_read_into_pages;
3502 rdata->copy_into_pages = cifs_uncached_copy_into_pages;
3503 rdata->credits = credits_on_stack;
3504 rdata->ctx = ctx;
3505 kref_get(&ctx->refcount);
3506
3507 rc = adjust_credits(server, &rdata->credits, rdata->bytes);
3508
3509 if (!rc) {
3510 if (rdata->cfile->invalidHandle)
3511 rc = -EAGAIN;
3512 else
3513 rc = server->ops->async_readv(rdata);
3514 }
3515
3516 if (rc) {
3517 add_credits_and_wake_if(server, &rdata->credits, 0);
3518 kref_put(&rdata->refcount,
3519 cifs_uncached_readdata_release);
3520 if (rc == -EAGAIN) {
3521 iov_iter_revert(&direct_iov, cur_len);
3522 continue;
3523 }
3524 break;
3525 }
3526
3527 list_add_tail(&rdata->list, rdata_list);
3528 offset += cur_len;
3529 len -= cur_len;
3530 } while (len > 0);
3531
3532 return rc;
3533 }
3534
3535 static void
3536 collect_uncached_read_data(struct cifs_aio_ctx *ctx)
3537 {
3538 struct cifs_readdata *rdata, *tmp;
3539 struct iov_iter *to = &ctx->iter;
3540 struct cifs_sb_info *cifs_sb;
3541 struct cifs_tcon *tcon;
3542 unsigned int i;
3543 int rc;
3544
3545 tcon = tlink_tcon(ctx->cfile->tlink);
3546 cifs_sb = CIFS_SB(ctx->cfile->dentry->d_sb);
3547
3548 mutex_lock(&ctx->aio_mutex);
3549
3550 if (list_empty(&ctx->list)) {
3551 mutex_unlock(&ctx->aio_mutex);
3552 return;
3553 }
3554
3555 rc = ctx->rc;
3556 /* the loop below should proceed in the order of increasing offsets */
3557 again:
3558 list_for_each_entry_safe(rdata, tmp, &ctx->list, list) {
3559 if (!rc) {
3560 if (!try_wait_for_completion(&rdata->done)) {
3561 mutex_unlock(&ctx->aio_mutex);
3562 return;
3563 }
3564
3565 if (rdata->result == -EAGAIN) {
3566 /* resend call if it's a retryable error */
3567 struct list_head tmp_list;
3568 unsigned int got_bytes = rdata->got_bytes;
3569
3570 list_del_init(&rdata->list);
3571 INIT_LIST_HEAD(&tmp_list);
3572
3573 /*
3574 * Got a part of data and then reconnect has
3575 * happened -- fill the buffer and continue
3576 * reading.
3577 */
3578 if (got_bytes && got_bytes < rdata->bytes) {
3579 rc = 0;
3580 if (!ctx->direct_io)
3581 rc = cifs_readdata_to_iov(rdata, to);
3582 if (rc) {
3583 kref_put(&rdata->refcount,
3584 cifs_uncached_readdata_release);
3585 continue;
3586 }
3587 }
3588
3589 if (ctx->direct_io) {
3590 /*
3591 * Re-use rdata as this is a
3592 * direct I/O
3593 */
3594 rc = cifs_resend_rdata(
3595 rdata,
3596 &tmp_list, ctx);
3597 } else {
3598 rc = cifs_send_async_read(
3599 rdata->offset + got_bytes,
3600 rdata->bytes - got_bytes,
3601 rdata->cfile, cifs_sb,
3602 &tmp_list, ctx);
3603
3604 kref_put(&rdata->refcount,
3605 cifs_uncached_readdata_release);
3606 }
3607
3608 list_splice(&tmp_list, &ctx->list);
3609
3610 goto again;
3611 } else if (rdata->result)
3612 rc = rdata->result;
3613 else if (!ctx->direct_io)
3614 rc = cifs_readdata_to_iov(rdata, to);
3615
3616 /* if there was a short read -- discard anything left */
3617 if (rdata->got_bytes && rdata->got_bytes < rdata->bytes)
3618 rc = -ENODATA;
3619
3620 ctx->total_len += rdata->got_bytes;
3621 }
3622 list_del_init(&rdata->list);
3623 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3624 }
3625
3626 if (!ctx->direct_io) {
3627 for (i = 0; i < ctx->npages; i++) {
3628 if (ctx->should_dirty)
3629 set_page_dirty(ctx->bv[i].bv_page);
3630 put_page(ctx->bv[i].bv_page);
3631 }
3632
3633 ctx->total_len = ctx->len - iov_iter_count(to);
3634 }
3635
3636 /* mask nodata case */
3637 if (rc == -ENODATA)
3638 rc = 0;
3639
3640 ctx->rc = (rc == 0) ? ctx->total_len : rc;
3641
3642 mutex_unlock(&ctx->aio_mutex);
3643
3644 if (ctx->iocb && ctx->iocb->ki_complete)
3645 ctx->iocb->ki_complete(ctx->iocb, ctx->rc, 0);
3646 else
3647 complete(&ctx->done);
3648 }
3649
3650 static ssize_t __cifs_readv(
3651 struct kiocb *iocb, struct iov_iter *to, bool direct)
3652 {
3653 size_t len;
3654 struct file *file = iocb->ki_filp;
3655 struct cifs_sb_info *cifs_sb;
3656 struct cifsFileInfo *cfile;
3657 struct cifs_tcon *tcon;
3658 ssize_t rc, total_read = 0;
3659 loff_t offset = iocb->ki_pos;
3660 struct cifs_aio_ctx *ctx;
3661
3662 /*
3663 * iov_iter_get_pages_alloc() doesn't work with ITER_KVEC,
3664 * fall back to data copy read path
3665 * this could be improved by getting pages directly in ITER_KVEC
3666 */
3667 if (direct && to->type & ITER_KVEC) {
3668 cifs_dbg(FYI, "use non-direct cifs_user_readv for kvec I/O\n");
3669 direct = false;
3670 }
3671
3672 len = iov_iter_count(to);
3673 if (!len)
3674 return 0;
3675
3676 cifs_sb = CIFS_FILE_SB(file);
3677 cfile = file->private_data;
3678 tcon = tlink_tcon(cfile->tlink);
3679
3680 if (!tcon->ses->server->ops->async_readv)
3681 return -ENOSYS;
3682
3683 if ((file->f_flags & O_ACCMODE) == O_WRONLY)
3684 cifs_dbg(FYI, "attempting read on write only file instance\n");
3685
3686 ctx = cifs_aio_ctx_alloc();
3687 if (!ctx)
3688 return -ENOMEM;
3689
3690 ctx->cfile = cifsFileInfo_get(cfile);
3691
3692 if (!is_sync_kiocb(iocb))
3693 ctx->iocb = iocb;
3694
3695 if (iter_is_iovec(to))
3696 ctx->should_dirty = true;
3697
3698 if (direct) {
3699 ctx->pos = offset;
3700 ctx->direct_io = true;
3701 ctx->iter = *to;
3702 ctx->len = len;
3703 } else {
3704 rc = setup_aio_ctx_iter(ctx, to, READ);
3705 if (rc) {
3706 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3707 return rc;
3708 }
3709 len = ctx->len;
3710 }
3711
3712 /* grab a lock here due to read response handlers can access ctx */
3713 mutex_lock(&ctx->aio_mutex);
3714
3715 rc = cifs_send_async_read(offset, len, cfile, cifs_sb, &ctx->list, ctx);
3716
3717 /* if at least one read request send succeeded, then reset rc */
3718 if (!list_empty(&ctx->list))
3719 rc = 0;
3720
3721 mutex_unlock(&ctx->aio_mutex);
3722
3723 if (rc) {
3724 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3725 return rc;
3726 }
3727
3728 if (!is_sync_kiocb(iocb)) {
3729 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3730 return -EIOCBQUEUED;
3731 }
3732
3733 rc = wait_for_completion_killable(&ctx->done);
3734 if (rc) {
3735 mutex_lock(&ctx->aio_mutex);
3736 ctx->rc = rc = -EINTR;
3737 total_read = ctx->total_len;
3738 mutex_unlock(&ctx->aio_mutex);
3739 } else {
3740 rc = ctx->rc;
3741 total_read = ctx->total_len;
3742 }
3743
3744 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3745
3746 if (total_read) {
3747 iocb->ki_pos += total_read;
3748 return total_read;
3749 }
3750 return rc;
3751 }
3752
3753 ssize_t cifs_direct_readv(struct kiocb *iocb, struct iov_iter *to)
3754 {
3755 return __cifs_readv(iocb, to, true);
3756 }
3757
3758 ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to)
3759 {
3760 return __cifs_readv(iocb, to, false);
3761 }
3762
3763 ssize_t
3764 cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to)
3765 {
3766 struct inode *inode = file_inode(iocb->ki_filp);
3767 struct cifsInodeInfo *cinode = CIFS_I(inode);
3768 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
3769 struct cifsFileInfo *cfile = (struct cifsFileInfo *)
3770 iocb->ki_filp->private_data;
3771 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3772 int rc = -EACCES;
3773
3774 /*
3775 * In strict cache mode we need to read from the server all the time
3776 * if we don't have level II oplock because the server can delay mtime
3777 * change - so we can't make a decision about inode invalidating.
3778 * And we can also fail with pagereading if there are mandatory locks
3779 * on pages affected by this read but not on the region from pos to
3780 * pos+len-1.
3781 */
3782 if (!CIFS_CACHE_READ(cinode))
3783 return cifs_user_readv(iocb, to);
3784
3785 if (cap_unix(tcon->ses) &&
3786 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
3787 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
3788 return generic_file_read_iter(iocb, to);
3789
3790 /*
3791 * We need to hold the sem to be sure nobody modifies lock list
3792 * with a brlock that prevents reading.
3793 */
3794 down_read(&cinode->lock_sem);
3795 if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(to),
3796 tcon->ses->server->vals->shared_lock_type,
3797 0, NULL, CIFS_READ_OP))
3798 rc = generic_file_read_iter(iocb, to);
3799 up_read(&cinode->lock_sem);
3800 return rc;
3801 }
3802
3803 static ssize_t
3804 cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset)
3805 {
3806 int rc = -EACCES;
3807 unsigned int bytes_read = 0;
3808 unsigned int total_read;
3809 unsigned int current_read_size;
3810 unsigned int rsize;
3811 struct cifs_sb_info *cifs_sb;
3812 struct cifs_tcon *tcon;
3813 struct TCP_Server_Info *server;
3814 unsigned int xid;
3815 char *cur_offset;
3816 struct cifsFileInfo *open_file;
3817 struct cifs_io_parms io_parms;
3818 int buf_type = CIFS_NO_BUFFER;
3819 __u32 pid;
3820
3821 xid = get_xid();
3822 cifs_sb = CIFS_FILE_SB(file);
3823
3824 /* FIXME: set up handlers for larger reads and/or convert to async */
3825 rsize = min_t(unsigned int, cifs_sb->rsize, CIFSMaxBufSize);
3826
3827 if (file->private_data == NULL) {
3828 rc = -EBADF;
3829 free_xid(xid);
3830 return rc;
3831 }
3832 open_file = file->private_data;
3833 tcon = tlink_tcon(open_file->tlink);
3834 server = tcon->ses->server;
3835
3836 if (!server->ops->sync_read) {
3837 free_xid(xid);
3838 return -ENOSYS;
3839 }
3840
3841 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3842 pid = open_file->pid;
3843 else
3844 pid = current->tgid;
3845
3846 if ((file->f_flags & O_ACCMODE) == O_WRONLY)
3847 cifs_dbg(FYI, "attempting read on write only file instance\n");
3848
3849 for (total_read = 0, cur_offset = read_data; read_size > total_read;
3850 total_read += bytes_read, cur_offset += bytes_read) {
3851 do {
3852 current_read_size = min_t(uint, read_size - total_read,
3853 rsize);
3854 /*
3855 * For windows me and 9x we do not want to request more
3856 * than it negotiated since it will refuse the read
3857 * then.
3858 */
3859 if ((tcon->ses) && !(tcon->ses->capabilities &
3860 tcon->ses->server->vals->cap_large_files)) {
3861 current_read_size = min_t(uint,
3862 current_read_size, CIFSMaxBufSize);
3863 }
3864 if (open_file->invalidHandle) {
3865 rc = cifs_reopen_file(open_file, true);
3866 if (rc != 0)
3867 break;
3868 }
3869 io_parms.pid = pid;
3870 io_parms.tcon = tcon;
3871 io_parms.offset = *offset;
3872 io_parms.length = current_read_size;
3873 rc = server->ops->sync_read(xid, &open_file->fid, &io_parms,
3874 &bytes_read, &cur_offset,
3875 &buf_type);
3876 } while (rc == -EAGAIN);
3877
3878 if (rc || (bytes_read == 0)) {
3879 if (total_read) {
3880 break;
3881 } else {
3882 free_xid(xid);
3883 return rc;
3884 }
3885 } else {
3886 cifs_stats_bytes_read(tcon, total_read);
3887 *offset += bytes_read;
3888 }
3889 }
3890 free_xid(xid);
3891 return total_read;
3892 }
3893
3894 /*
3895 * If the page is mmap'ed into a process' page tables, then we need to make
3896 * sure that it doesn't change while being written back.
3897 */
3898 static vm_fault_t
3899 cifs_page_mkwrite(struct vm_fault *vmf)
3900 {
3901 struct page *page = vmf->page;
3902
3903 lock_page(page);
3904 return VM_FAULT_LOCKED;
3905 }
3906
3907 static const struct vm_operations_struct cifs_file_vm_ops = {
3908 .fault = filemap_fault,
3909 .map_pages = filemap_map_pages,
3910 .page_mkwrite = cifs_page_mkwrite,
3911 };
3912
3913 int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
3914 {
3915 int xid, rc = 0;
3916 struct inode *inode = file_inode(file);
3917
3918 xid = get_xid();
3919
3920 if (!CIFS_CACHE_READ(CIFS_I(inode)))
3921 rc = cifs_zap_mapping(inode);
3922 if (!rc)
3923 rc = generic_file_mmap(file, vma);
3924 if (!rc)
3925 vma->vm_ops = &cifs_file_vm_ops;
3926
3927 free_xid(xid);
3928 return rc;
3929 }
3930
3931 int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
3932 {
3933 int rc, xid;
3934
3935 xid = get_xid();
3936
3937 rc = cifs_revalidate_file(file);
3938 if (rc)
3939 cifs_dbg(FYI, "Validation prior to mmap failed, error=%d\n",
3940 rc);
3941 if (!rc)
3942 rc = generic_file_mmap(file, vma);
3943 if (!rc)
3944 vma->vm_ops = &cifs_file_vm_ops;
3945
3946 free_xid(xid);
3947 return rc;
3948 }
3949
3950 static void
3951 cifs_readv_complete(struct work_struct *work)
3952 {
3953 unsigned int i, got_bytes;
3954 struct cifs_readdata *rdata = container_of(work,
3955 struct cifs_readdata, work);
3956
3957 got_bytes = rdata->got_bytes;
3958 for (i = 0; i < rdata->nr_pages; i++) {
3959 struct page *page = rdata->pages[i];
3960
3961 lru_cache_add_file(page);
3962
3963 if (rdata->result == 0 ||
3964 (rdata->result == -EAGAIN && got_bytes)) {
3965 flush_dcache_page(page);
3966 SetPageUptodate(page);
3967 }
3968
3969 unlock_page(page);
3970
3971 if (rdata->result == 0 ||
3972 (rdata->result == -EAGAIN && got_bytes))
3973 cifs_readpage_to_fscache(rdata->mapping->host, page);
3974
3975 got_bytes -= min_t(unsigned int, PAGE_SIZE, got_bytes);
3976
3977 put_page(page);
3978 rdata->pages[i] = NULL;
3979 }
3980 kref_put(&rdata->refcount, cifs_readdata_release);
3981 }
3982
3983 static int
3984 readpages_fill_pages(struct TCP_Server_Info *server,
3985 struct cifs_readdata *rdata, struct iov_iter *iter,
3986 unsigned int len)
3987 {
3988 int result = 0;
3989 unsigned int i;
3990 u64 eof;
3991 pgoff_t eof_index;
3992 unsigned int nr_pages = rdata->nr_pages;
3993 unsigned int page_offset = rdata->page_offset;
3994
3995 /* determine the eof that the server (probably) has */
3996 eof = CIFS_I(rdata->mapping->host)->server_eof;
3997 eof_index = eof ? (eof - 1) >> PAGE_SHIFT : 0;
3998 cifs_dbg(FYI, "eof=%llu eof_index=%lu\n", eof, eof_index);
3999
4000 rdata->got_bytes = 0;
4001 rdata->tailsz = PAGE_SIZE;
4002 for (i = 0; i < nr_pages; i++) {
4003 struct page *page = rdata->pages[i];
4004 unsigned int to_read = rdata->pagesz;
4005 size_t n;
4006
4007 if (i == 0)
4008 to_read -= page_offset;
4009 else
4010 page_offset = 0;
4011
4012 n = to_read;
4013
4014 if (len >= to_read) {
4015 len -= to_read;
4016 } else if (len > 0) {
4017 /* enough for partial page, fill and zero the rest */
4018 zero_user(page, len + page_offset, to_read - len);
4019 n = rdata->tailsz = len;
4020 len = 0;
4021 } else if (page->index > eof_index) {
4022 /*
4023 * The VFS will not try to do readahead past the
4024 * i_size, but it's possible that we have outstanding
4025 * writes with gaps in the middle and the i_size hasn't
4026 * caught up yet. Populate those with zeroed out pages
4027 * to prevent the VFS from repeatedly attempting to
4028 * fill them until the writes are flushed.
4029 */
4030 zero_user(page, 0, PAGE_SIZE);
4031 lru_cache_add_file(page);
4032 flush_dcache_page(page);
4033 SetPageUptodate(page);
4034 unlock_page(page);
4035 put_page(page);
4036 rdata->pages[i] = NULL;
4037 rdata->nr_pages--;
4038 continue;
4039 } else {
4040 /* no need to hold page hostage */
4041 lru_cache_add_file(page);
4042 unlock_page(page);
4043 put_page(page);
4044 rdata->pages[i] = NULL;
4045 rdata->nr_pages--;
4046 continue;
4047 }
4048
4049 if (iter)
4050 result = copy_page_from_iter(
4051 page, page_offset, n, iter);
4052 #ifdef CONFIG_CIFS_SMB_DIRECT
4053 else if (rdata->mr)
4054 result = n;
4055 #endif
4056 else
4057 result = cifs_read_page_from_socket(
4058 server, page, page_offset, n);
4059 if (result < 0)
4060 break;
4061
4062 rdata->got_bytes += result;
4063 }
4064
4065 return rdata->got_bytes > 0 && result != -ECONNABORTED ?
4066 rdata->got_bytes : result;
4067 }
4068
4069 static int
4070 cifs_readpages_read_into_pages(struct TCP_Server_Info *server,
4071 struct cifs_readdata *rdata, unsigned int len)
4072 {
4073 return readpages_fill_pages(server, rdata, NULL, len);
4074 }
4075
4076 static int
4077 cifs_readpages_copy_into_pages(struct TCP_Server_Info *server,
4078 struct cifs_readdata *rdata,
4079 struct iov_iter *iter)
4080 {
4081 return readpages_fill_pages(server, rdata, iter, iter->count);
4082 }
4083
4084 static int
4085 readpages_get_pages(struct address_space *mapping, struct list_head *page_list,
4086 unsigned int rsize, struct list_head *tmplist,
4087 unsigned int *nr_pages, loff_t *offset, unsigned int *bytes)
4088 {
4089 struct page *page, *tpage;
4090 unsigned int expected_index;
4091 int rc;
4092 gfp_t gfp = readahead_gfp_mask(mapping);
4093
4094 INIT_LIST_HEAD(tmplist);
4095
4096 page = lru_to_page(page_list);
4097
4098 /*
4099 * Lock the page and put it in the cache. Since no one else
4100 * should have access to this page, we're safe to simply set
4101 * PG_locked without checking it first.
4102 */
4103 __SetPageLocked(page);
4104 rc = add_to_page_cache_locked(page, mapping,
4105 page->index, gfp);
4106
4107 /* give up if we can't stick it in the cache */
4108 if (rc) {
4109 __ClearPageLocked(page);
4110 return rc;
4111 }
4112
4113 /* move first page to the tmplist */
4114 *offset = (loff_t)page->index << PAGE_SHIFT;
4115 *bytes = PAGE_SIZE;
4116 *nr_pages = 1;
4117 list_move_tail(&page->lru, tmplist);
4118
4119 /* now try and add more pages onto the request */
4120 expected_index = page->index + 1;
4121 list_for_each_entry_safe_reverse(page, tpage, page_list, lru) {
4122 /* discontinuity ? */
4123 if (page->index != expected_index)
4124 break;
4125
4126 /* would this page push the read over the rsize? */
4127 if (*bytes + PAGE_SIZE > rsize)
4128 break;
4129
4130 __SetPageLocked(page);
4131 if (add_to_page_cache_locked(page, mapping, page->index, gfp)) {
4132 __ClearPageLocked(page);
4133 break;
4134 }
4135 list_move_tail(&page->lru, tmplist);
4136 (*bytes) += PAGE_SIZE;
4137 expected_index++;
4138 (*nr_pages)++;
4139 }
4140 return rc;
4141 }
4142
4143 static int cifs_readpages(struct file *file, struct address_space *mapping,
4144 struct list_head *page_list, unsigned num_pages)
4145 {
4146 int rc;
4147 struct list_head tmplist;
4148 struct cifsFileInfo *open_file = file->private_data;
4149 struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
4150 struct TCP_Server_Info *server;
4151 pid_t pid;
4152 unsigned int xid;
4153
4154 xid = get_xid();
4155 /*
4156 * Reads as many pages as possible from fscache. Returns -ENOBUFS
4157 * immediately if the cookie is negative
4158 *
4159 * After this point, every page in the list might have PG_fscache set,
4160 * so we will need to clean that up off of every page we don't use.
4161 */
4162 rc = cifs_readpages_from_fscache(mapping->host, mapping, page_list,
4163 &num_pages);
4164 if (rc == 0) {
4165 free_xid(xid);
4166 return rc;
4167 }
4168
4169 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
4170 pid = open_file->pid;
4171 else
4172 pid = current->tgid;
4173
4174 rc = 0;
4175 server = tlink_tcon(open_file->tlink)->ses->server;
4176
4177 cifs_dbg(FYI, "%s: file=%p mapping=%p num_pages=%u\n",
4178 __func__, file, mapping, num_pages);
4179
4180 /*
4181 * Start with the page at end of list and move it to private
4182 * list. Do the same with any following pages until we hit
4183 * the rsize limit, hit an index discontinuity, or run out of
4184 * pages. Issue the async read and then start the loop again
4185 * until the list is empty.
4186 *
4187 * Note that list order is important. The page_list is in
4188 * the order of declining indexes. When we put the pages in
4189 * the rdata->pages, then we want them in increasing order.
4190 */
4191 while (!list_empty(page_list)) {
4192 unsigned int i, nr_pages, bytes, rsize;
4193 loff_t offset;
4194 struct page *page, *tpage;
4195 struct cifs_readdata *rdata;
4196 struct cifs_credits credits_on_stack;
4197 struct cifs_credits *credits = &credits_on_stack;
4198
4199 if (open_file->invalidHandle) {
4200 rc = cifs_reopen_file(open_file, true);
4201 if (rc == -EAGAIN)
4202 continue;
4203 else if (rc)
4204 break;
4205 }
4206
4207 rc = server->ops->wait_mtu_credits(server, cifs_sb->rsize,
4208 &rsize, credits);
4209 if (rc)
4210 break;
4211
4212 /*
4213 * Give up immediately if rsize is too small to read an entire
4214 * page. The VFS will fall back to readpage. We should never
4215 * reach this point however since we set ra_pages to 0 when the
4216 * rsize is smaller than a cache page.
4217 */
4218 if (unlikely(rsize < PAGE_SIZE)) {
4219 add_credits_and_wake_if(server, credits, 0);
4220 free_xid(xid);
4221 return 0;
4222 }
4223
4224 rc = readpages_get_pages(mapping, page_list, rsize, &tmplist,
4225 &nr_pages, &offset, &bytes);
4226 if (rc) {
4227 add_credits_and_wake_if(server, credits, 0);
4228 break;
4229 }
4230
4231 rdata = cifs_readdata_alloc(nr_pages, cifs_readv_complete);
4232 if (!rdata) {
4233 /* best to give up if we're out of mem */
4234 list_for_each_entry_safe(page, tpage, &tmplist, lru) {
4235 list_del(&page->lru);
4236 lru_cache_add_file(page);
4237 unlock_page(page);
4238 put_page(page);
4239 }
4240 rc = -ENOMEM;
4241 add_credits_and_wake_if(server, credits, 0);
4242 break;
4243 }
4244
4245 rdata->cfile = cifsFileInfo_get(open_file);
4246 rdata->mapping = mapping;
4247 rdata->offset = offset;
4248 rdata->bytes = bytes;
4249 rdata->pid = pid;
4250 rdata->pagesz = PAGE_SIZE;
4251 rdata->tailsz = PAGE_SIZE;
4252 rdata->read_into_pages = cifs_readpages_read_into_pages;
4253 rdata->copy_into_pages = cifs_readpages_copy_into_pages;
4254 rdata->credits = credits_on_stack;
4255
4256 list_for_each_entry_safe(page, tpage, &tmplist, lru) {
4257 list_del(&page->lru);
4258 rdata->pages[rdata->nr_pages++] = page;
4259 }
4260
4261 rc = adjust_credits(server, &rdata->credits, rdata->bytes);
4262
4263 if (!rc) {
4264 if (rdata->cfile->invalidHandle)
4265 rc = -EAGAIN;
4266 else
4267 rc = server->ops->async_readv(rdata);
4268 }
4269
4270 if (rc) {
4271 add_credits_and_wake_if(server, &rdata->credits, 0);
4272 for (i = 0; i < rdata->nr_pages; i++) {
4273 page = rdata->pages[i];
4274 lru_cache_add_file(page);
4275 unlock_page(page);
4276 put_page(page);
4277 }
4278 /* Fallback to the readpage in error/reconnect cases */
4279 kref_put(&rdata->refcount, cifs_readdata_release);
4280 break;
4281 }
4282
4283 kref_put(&rdata->refcount, cifs_readdata_release);
4284 }
4285
4286 /* Any pages that have been shown to fscache but didn't get added to
4287 * the pagecache must be uncached before they get returned to the
4288 * allocator.
4289 */
4290 cifs_fscache_readpages_cancel(mapping->host, page_list);
4291 free_xid(xid);
4292 return rc;
4293 }
4294
4295 /*
4296 * cifs_readpage_worker must be called with the page pinned
4297 */
4298 static int cifs_readpage_worker(struct file *file, struct page *page,
4299 loff_t *poffset)
4300 {
4301 char *read_data;
4302 int rc;
4303
4304 /* Is the page cached? */
4305 rc = cifs_readpage_from_fscache(file_inode(file), page);
4306 if (rc == 0)
4307 goto read_complete;
4308
4309 read_data = kmap(page);
4310 /* for reads over a certain size could initiate async read ahead */
4311
4312 rc = cifs_read(file, read_data, PAGE_SIZE, poffset);
4313
4314 if (rc < 0)
4315 goto io_error;
4316 else
4317 cifs_dbg(FYI, "Bytes read %d\n", rc);
4318
4319 /* we do not want atime to be less than mtime, it broke some apps */
4320 file_inode(file)->i_atime = current_time(file_inode(file));
4321 if (timespec64_compare(&(file_inode(file)->i_atime), &(file_inode(file)->i_mtime)))
4322 file_inode(file)->i_atime = file_inode(file)->i_mtime;
4323 else
4324 file_inode(file)->i_atime = current_time(file_inode(file));
4325
4326 if (PAGE_SIZE > rc)
4327 memset(read_data + rc, 0, PAGE_SIZE - rc);
4328
4329 flush_dcache_page(page);
4330 SetPageUptodate(page);
4331
4332 /* send this page to the cache */
4333 cifs_readpage_to_fscache(file_inode(file), page);
4334
4335 rc = 0;
4336
4337 io_error:
4338 kunmap(page);
4339 unlock_page(page);
4340
4341 read_complete:
4342 return rc;
4343 }
4344
4345 static int cifs_readpage(struct file *file, struct page *page)
4346 {
4347 loff_t offset = (loff_t)page->index << PAGE_SHIFT;
4348 int rc = -EACCES;
4349 unsigned int xid;
4350
4351 xid = get_xid();
4352
4353 if (file->private_data == NULL) {
4354 rc = -EBADF;
4355 free_xid(xid);
4356 return rc;
4357 }
4358
4359 cifs_dbg(FYI, "readpage %p at offset %d 0x%x\n",
4360 page, (int)offset, (int)offset);
4361
4362 rc = cifs_readpage_worker(file, page, &offset);
4363
4364 free_xid(xid);
4365 return rc;
4366 }
4367
4368 static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
4369 {
4370 struct cifsFileInfo *open_file;
4371 struct cifs_tcon *tcon =
4372 cifs_sb_master_tcon(CIFS_SB(cifs_inode->vfs_inode.i_sb));
4373
4374 spin_lock(&tcon->open_file_lock);
4375 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
4376 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
4377 spin_unlock(&tcon->open_file_lock);
4378 return 1;
4379 }
4380 }
4381 spin_unlock(&tcon->open_file_lock);
4382 return 0;
4383 }
4384
4385 /* We do not want to update the file size from server for inodes
4386 open for write - to avoid races with writepage extending
4387 the file - in the future we could consider allowing
4388 refreshing the inode only on increases in the file size
4389 but this is tricky to do without racing with writebehind
4390 page caching in the current Linux kernel design */
4391 bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file)
4392 {
4393 if (!cifsInode)
4394 return true;
4395
4396 if (is_inode_writable(cifsInode)) {
4397 /* This inode is open for write at least once */
4398 struct cifs_sb_info *cifs_sb;
4399
4400 cifs_sb = CIFS_SB(cifsInode->vfs_inode.i_sb);
4401 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
4402 /* since no page cache to corrupt on directio
4403 we can change size safely */
4404 return true;
4405 }
4406
4407 if (i_size_read(&cifsInode->vfs_inode) < end_of_file)
4408 return true;
4409
4410 return false;
4411 } else
4412 return true;
4413 }
4414
4415 static int cifs_write_begin(struct file *file, struct address_space *mapping,
4416 loff_t pos, unsigned len, unsigned flags,
4417 struct page **pagep, void **fsdata)
4418 {
4419 int oncethru = 0;
4420 pgoff_t index = pos >> PAGE_SHIFT;
4421 loff_t offset = pos & (PAGE_SIZE - 1);
4422 loff_t page_start = pos & PAGE_MASK;
4423 loff_t i_size;
4424 struct page *page;
4425 int rc = 0;
4426
4427 cifs_dbg(FYI, "write_begin from %lld len %d\n", (long long)pos, len);
4428
4429 start:
4430 page = grab_cache_page_write_begin(mapping, index, flags);
4431 if (!page) {
4432 rc = -ENOMEM;
4433 goto out;
4434 }
4435
4436 if (PageUptodate(page))
4437 goto out;
4438
4439 /*
4440 * If we write a full page it will be up to date, no need to read from
4441 * the server. If the write is short, we'll end up doing a sync write
4442 * instead.
4443 */
4444 if (len == PAGE_SIZE)
4445 goto out;
4446
4447 /*
4448 * optimize away the read when we have an oplock, and we're not
4449 * expecting to use any of the data we'd be reading in. That
4450 * is, when the page lies beyond the EOF, or straddles the EOF
4451 * and the write will cover all of the existing data.
4452 */
4453 if (CIFS_CACHE_READ(CIFS_I(mapping->host))) {
4454 i_size = i_size_read(mapping->host);
4455 if (page_start >= i_size ||
4456 (offset == 0 && (pos + len) >= i_size)) {
4457 zero_user_segments(page, 0, offset,
4458 offset + len,
4459 PAGE_SIZE);
4460 /*
4461 * PageChecked means that the parts of the page
4462 * to which we're not writing are considered up
4463 * to date. Once the data is copied to the
4464 * page, it can be set uptodate.
4465 */
4466 SetPageChecked(page);
4467 goto out;
4468 }
4469 }
4470
4471 if ((file->f_flags & O_ACCMODE) != O_WRONLY && !oncethru) {
4472 /*
4473 * might as well read a page, it is fast enough. If we get
4474 * an error, we don't need to return it. cifs_write_end will
4475 * do a sync write instead since PG_uptodate isn't set.
4476 */
4477 cifs_readpage_worker(file, page, &page_start);
4478 put_page(page);
4479 oncethru = 1;
4480 goto start;
4481 } else {
4482 /* we could try using another file handle if there is one -
4483 but how would we lock it to prevent close of that handle
4484 racing with this read? In any case
4485 this will be written out by write_end so is fine */
4486 }
4487 out:
4488 *pagep = page;
4489 return rc;
4490 }
4491
4492 static int cifs_release_page(struct page *page, gfp_t gfp)
4493 {
4494 if (PagePrivate(page))
4495 return 0;
4496
4497 return cifs_fscache_release_page(page, gfp);
4498 }
4499
4500 static void cifs_invalidate_page(struct page *page, unsigned int offset,
4501 unsigned int length)
4502 {
4503 struct cifsInodeInfo *cifsi = CIFS_I(page->mapping->host);
4504
4505 if (offset == 0 && length == PAGE_SIZE)
4506 cifs_fscache_invalidate_page(page, &cifsi->vfs_inode);
4507 }
4508
4509 static int cifs_launder_page(struct page *page)
4510 {
4511 int rc = 0;
4512 loff_t range_start = page_offset(page);
4513 loff_t range_end = range_start + (loff_t)(PAGE_SIZE - 1);
4514 struct writeback_control wbc = {
4515 .sync_mode = WB_SYNC_ALL,
4516 .nr_to_write = 0,
4517 .range_start = range_start,
4518 .range_end = range_end,
4519 };
4520
4521 cifs_dbg(FYI, "Launder page: %p\n", page);
4522
4523 if (clear_page_dirty_for_io(page))
4524 rc = cifs_writepage_locked(page, &wbc);
4525
4526 cifs_fscache_invalidate_page(page, page->mapping->host);
4527 return rc;
4528 }
4529
4530 void cifs_oplock_break(struct work_struct *work)
4531 {
4532 struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
4533 oplock_break);
4534 struct inode *inode = d_inode(cfile->dentry);
4535 struct cifsInodeInfo *cinode = CIFS_I(inode);
4536 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
4537 struct TCP_Server_Info *server = tcon->ses->server;
4538 int rc = 0;
4539
4540 wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS,
4541 TASK_UNINTERRUPTIBLE);
4542
4543 server->ops->downgrade_oplock(server, cinode,
4544 test_bit(CIFS_INODE_DOWNGRADE_OPLOCK_TO_L2, &cinode->flags));
4545
4546 if (!CIFS_CACHE_WRITE(cinode) && CIFS_CACHE_READ(cinode) &&
4547 cifs_has_mand_locks(cinode)) {
4548 cifs_dbg(FYI, "Reset oplock to None for inode=%p due to mand locks\n",
4549 inode);
4550 cinode->oplock = 0;
4551 }
4552
4553 if (inode && S_ISREG(inode->i_mode)) {
4554 if (CIFS_CACHE_READ(cinode))
4555 break_lease(inode, O_RDONLY);
4556 else
4557 break_lease(inode, O_WRONLY);
4558 rc = filemap_fdatawrite(inode->i_mapping);
4559 if (!CIFS_CACHE_READ(cinode)) {
4560 rc = filemap_fdatawait(inode->i_mapping);
4561 mapping_set_error(inode->i_mapping, rc);
4562 cifs_zap_mapping(inode);
4563 }
4564 cifs_dbg(FYI, "Oplock flush inode %p rc %d\n", inode, rc);
4565 }
4566
4567 rc = cifs_push_locks(cfile);
4568 if (rc)
4569 cifs_dbg(VFS, "Push locks rc = %d\n", rc);
4570
4571 /*
4572 * releasing stale oplock after recent reconnect of smb session using
4573 * a now incorrect file handle is not a data integrity issue but do
4574 * not bother sending an oplock release if session to server still is
4575 * disconnected since oplock already released by the server
4576 */
4577 if (!cfile->oplock_break_cancelled) {
4578 rc = tcon->ses->server->ops->oplock_response(tcon, &cfile->fid,
4579 cinode);
4580 cifs_dbg(FYI, "Oplock release rc = %d\n", rc);
4581 }
4582 cifs_done_oplock_break(cinode);
4583 }
4584
4585 /*
4586 * The presence of cifs_direct_io() in the address space ops vector
4587 * allowes open() O_DIRECT flags which would have failed otherwise.
4588 *
4589 * In the non-cached mode (mount with cache=none), we shunt off direct read and write requests
4590 * so this method should never be called.
4591 *
4592 * Direct IO is not yet supported in the cached mode.
4593 */
4594 static ssize_t
4595 cifs_direct_io(struct kiocb *iocb, struct iov_iter *iter)
4596 {
4597 /*
4598 * FIXME
4599 * Eventually need to support direct IO for non forcedirectio mounts
4600 */
4601 return -EINVAL;
4602 }
4603
4604
4605 const struct address_space_operations cifs_addr_ops = {
4606 .readpage = cifs_readpage,
4607 .readpages = cifs_readpages,
4608 .writepage = cifs_writepage,
4609 .writepages = cifs_writepages,
4610 .write_begin = cifs_write_begin,
4611 .write_end = cifs_write_end,
4612 .set_page_dirty = __set_page_dirty_nobuffers,
4613 .releasepage = cifs_release_page,
4614 .direct_IO = cifs_direct_io,
4615 .invalidatepage = cifs_invalidate_page,
4616 .launder_page = cifs_launder_page,
4617 };
4618
4619 /*
4620 * cifs_readpages requires the server to support a buffer large enough to
4621 * contain the header plus one complete page of data. Otherwise, we need
4622 * to leave cifs_readpages out of the address space operations.
4623 */
4624 const struct address_space_operations cifs_addr_ops_smallbuf = {
4625 .readpage = cifs_readpage,
4626 .writepage = cifs_writepage,
4627 .writepages = cifs_writepages,
4628 .write_begin = cifs_write_begin,
4629 .write_end = cifs_write_end,
4630 .set_page_dirty = __set_page_dirty_nobuffers,
4631 .releasepage = cifs_release_page,
4632 .invalidatepage = cifs_invalidate_page,
4633 .launder_page = cifs_launder_page,
4634 };