]> git.proxmox.com Git - mirror_ubuntu-eoan-kernel.git/blob - fs/cifs/file.c
Merge tag 'kvm-arm-fixes-for-5.0' of git://git.kernel.org/pub/scm/linux/kernel/git...
[mirror_ubuntu-eoan-kernel.git] / fs / cifs / file.c
1 /*
2 * fs/cifs/file.c
3 *
4 * vfs operations that deal with files
5 *
6 * Copyright (C) International Business Machines Corp., 2002,2010
7 * Author(s): Steve French (sfrench@us.ibm.com)
8 * Jeremy Allison (jra@samba.org)
9 *
10 * This library is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU Lesser General Public License as published
12 * by the Free Software Foundation; either version 2.1 of the License, or
13 * (at your option) any later version.
14 *
15 * This library is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
18 * the GNU Lesser General Public License for more details.
19 *
20 * You should have received a copy of the GNU Lesser General Public License
21 * along with this library; if not, write to the Free Software
22 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 */
24 #include <linux/fs.h>
25 #include <linux/backing-dev.h>
26 #include <linux/stat.h>
27 #include <linux/fcntl.h>
28 #include <linux/pagemap.h>
29 #include <linux/pagevec.h>
30 #include <linux/writeback.h>
31 #include <linux/task_io_accounting_ops.h>
32 #include <linux/delay.h>
33 #include <linux/mount.h>
34 #include <linux/slab.h>
35 #include <linux/swap.h>
36 #include <linux/mm.h>
37 #include <asm/div64.h>
38 #include "cifsfs.h"
39 #include "cifspdu.h"
40 #include "cifsglob.h"
41 #include "cifsproto.h"
42 #include "cifs_unicode.h"
43 #include "cifs_debug.h"
44 #include "cifs_fs_sb.h"
45 #include "fscache.h"
46 #include "smbdirect.h"
47
48 static inline int cifs_convert_flags(unsigned int flags)
49 {
50 if ((flags & O_ACCMODE) == O_RDONLY)
51 return GENERIC_READ;
52 else if ((flags & O_ACCMODE) == O_WRONLY)
53 return GENERIC_WRITE;
54 else if ((flags & O_ACCMODE) == O_RDWR) {
55 /* GENERIC_ALL is too much permission to request
56 can cause unnecessary access denied on create */
57 /* return GENERIC_ALL; */
58 return (GENERIC_READ | GENERIC_WRITE);
59 }
60
61 return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
62 FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
63 FILE_READ_DATA);
64 }
65
66 static u32 cifs_posix_convert_flags(unsigned int flags)
67 {
68 u32 posix_flags = 0;
69
70 if ((flags & O_ACCMODE) == O_RDONLY)
71 posix_flags = SMB_O_RDONLY;
72 else if ((flags & O_ACCMODE) == O_WRONLY)
73 posix_flags = SMB_O_WRONLY;
74 else if ((flags & O_ACCMODE) == O_RDWR)
75 posix_flags = SMB_O_RDWR;
76
77 if (flags & O_CREAT) {
78 posix_flags |= SMB_O_CREAT;
79 if (flags & O_EXCL)
80 posix_flags |= SMB_O_EXCL;
81 } else if (flags & O_EXCL)
82 cifs_dbg(FYI, "Application %s pid %d has incorrectly set O_EXCL flag but not O_CREAT on file open. Ignoring O_EXCL\n",
83 current->comm, current->tgid);
84
85 if (flags & O_TRUNC)
86 posix_flags |= SMB_O_TRUNC;
87 /* be safe and imply O_SYNC for O_DSYNC */
88 if (flags & O_DSYNC)
89 posix_flags |= SMB_O_SYNC;
90 if (flags & O_DIRECTORY)
91 posix_flags |= SMB_O_DIRECTORY;
92 if (flags & O_NOFOLLOW)
93 posix_flags |= SMB_O_NOFOLLOW;
94 if (flags & O_DIRECT)
95 posix_flags |= SMB_O_DIRECT;
96
97 return posix_flags;
98 }
99
100 static inline int cifs_get_disposition(unsigned int flags)
101 {
102 if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
103 return FILE_CREATE;
104 else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
105 return FILE_OVERWRITE_IF;
106 else if ((flags & O_CREAT) == O_CREAT)
107 return FILE_OPEN_IF;
108 else if ((flags & O_TRUNC) == O_TRUNC)
109 return FILE_OVERWRITE;
110 else
111 return FILE_OPEN;
112 }
113
114 int cifs_posix_open(char *full_path, struct inode **pinode,
115 struct super_block *sb, int mode, unsigned int f_flags,
116 __u32 *poplock, __u16 *pnetfid, unsigned int xid)
117 {
118 int rc;
119 FILE_UNIX_BASIC_INFO *presp_data;
120 __u32 posix_flags = 0;
121 struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
122 struct cifs_fattr fattr;
123 struct tcon_link *tlink;
124 struct cifs_tcon *tcon;
125
126 cifs_dbg(FYI, "posix open %s\n", full_path);
127
128 presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL);
129 if (presp_data == NULL)
130 return -ENOMEM;
131
132 tlink = cifs_sb_tlink(cifs_sb);
133 if (IS_ERR(tlink)) {
134 rc = PTR_ERR(tlink);
135 goto posix_open_ret;
136 }
137
138 tcon = tlink_tcon(tlink);
139 mode &= ~current_umask();
140
141 posix_flags = cifs_posix_convert_flags(f_flags);
142 rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data,
143 poplock, full_path, cifs_sb->local_nls,
144 cifs_remap(cifs_sb));
145 cifs_put_tlink(tlink);
146
147 if (rc)
148 goto posix_open_ret;
149
150 if (presp_data->Type == cpu_to_le32(-1))
151 goto posix_open_ret; /* open ok, caller does qpathinfo */
152
153 if (!pinode)
154 goto posix_open_ret; /* caller does not need info */
155
156 cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb);
157
158 /* get new inode and set it up */
159 if (*pinode == NULL) {
160 cifs_fill_uniqueid(sb, &fattr);
161 *pinode = cifs_iget(sb, &fattr);
162 if (!*pinode) {
163 rc = -ENOMEM;
164 goto posix_open_ret;
165 }
166 } else {
167 cifs_fattr_to_inode(*pinode, &fattr);
168 }
169
170 posix_open_ret:
171 kfree(presp_data);
172 return rc;
173 }
174
175 static int
176 cifs_nt_open(char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
177 struct cifs_tcon *tcon, unsigned int f_flags, __u32 *oplock,
178 struct cifs_fid *fid, unsigned int xid)
179 {
180 int rc;
181 int desired_access;
182 int disposition;
183 int create_options = CREATE_NOT_DIR;
184 FILE_ALL_INFO *buf;
185 struct TCP_Server_Info *server = tcon->ses->server;
186 struct cifs_open_parms oparms;
187
188 if (!server->ops->open)
189 return -ENOSYS;
190
191 desired_access = cifs_convert_flags(f_flags);
192
193 /*********************************************************************
194 * open flag mapping table:
195 *
196 * POSIX Flag CIFS Disposition
197 * ---------- ----------------
198 * O_CREAT FILE_OPEN_IF
199 * O_CREAT | O_EXCL FILE_CREATE
200 * O_CREAT | O_TRUNC FILE_OVERWRITE_IF
201 * O_TRUNC FILE_OVERWRITE
202 * none of the above FILE_OPEN
203 *
204 * Note that there is not a direct match between disposition
205 * FILE_SUPERSEDE (ie create whether or not file exists although
206 * O_CREAT | O_TRUNC is similar but truncates the existing
207 * file rather than creating a new file as FILE_SUPERSEDE does
208 * (which uses the attributes / metadata passed in on open call)
209 *?
210 *? O_SYNC is a reasonable match to CIFS writethrough flag
211 *? and the read write flags match reasonably. O_LARGEFILE
212 *? is irrelevant because largefile support is always used
213 *? by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
214 * O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
215 *********************************************************************/
216
217 disposition = cifs_get_disposition(f_flags);
218
219 /* BB pass O_SYNC flag through on file attributes .. BB */
220
221 buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
222 if (!buf)
223 return -ENOMEM;
224
225 if (backup_cred(cifs_sb))
226 create_options |= CREATE_OPEN_BACKUP_INTENT;
227
228 /* O_SYNC also has bit for O_DSYNC so following check picks up either */
229 if (f_flags & O_SYNC)
230 create_options |= CREATE_WRITE_THROUGH;
231
232 if (f_flags & O_DIRECT)
233 create_options |= CREATE_NO_BUFFER;
234
235 oparms.tcon = tcon;
236 oparms.cifs_sb = cifs_sb;
237 oparms.desired_access = desired_access;
238 oparms.create_options = create_options;
239 oparms.disposition = disposition;
240 oparms.path = full_path;
241 oparms.fid = fid;
242 oparms.reconnect = false;
243
244 rc = server->ops->open(xid, &oparms, oplock, buf);
245
246 if (rc)
247 goto out;
248
249 if (tcon->unix_ext)
250 rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
251 xid);
252 else
253 rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
254 xid, fid);
255
256 out:
257 kfree(buf);
258 return rc;
259 }
260
261 static bool
262 cifs_has_mand_locks(struct cifsInodeInfo *cinode)
263 {
264 struct cifs_fid_locks *cur;
265 bool has_locks = false;
266
267 down_read(&cinode->lock_sem);
268 list_for_each_entry(cur, &cinode->llist, llist) {
269 if (!list_empty(&cur->locks)) {
270 has_locks = true;
271 break;
272 }
273 }
274 up_read(&cinode->lock_sem);
275 return has_locks;
276 }
277
278 struct cifsFileInfo *
279 cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
280 struct tcon_link *tlink, __u32 oplock)
281 {
282 struct dentry *dentry = file_dentry(file);
283 struct inode *inode = d_inode(dentry);
284 struct cifsInodeInfo *cinode = CIFS_I(inode);
285 struct cifsFileInfo *cfile;
286 struct cifs_fid_locks *fdlocks;
287 struct cifs_tcon *tcon = tlink_tcon(tlink);
288 struct TCP_Server_Info *server = tcon->ses->server;
289
290 cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
291 if (cfile == NULL)
292 return cfile;
293
294 fdlocks = kzalloc(sizeof(struct cifs_fid_locks), GFP_KERNEL);
295 if (!fdlocks) {
296 kfree(cfile);
297 return NULL;
298 }
299
300 INIT_LIST_HEAD(&fdlocks->locks);
301 fdlocks->cfile = cfile;
302 cfile->llist = fdlocks;
303 down_write(&cinode->lock_sem);
304 list_add(&fdlocks->llist, &cinode->llist);
305 up_write(&cinode->lock_sem);
306
307 cfile->count = 1;
308 cfile->pid = current->tgid;
309 cfile->uid = current_fsuid();
310 cfile->dentry = dget(dentry);
311 cfile->f_flags = file->f_flags;
312 cfile->invalidHandle = false;
313 cfile->tlink = cifs_get_tlink(tlink);
314 INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
315 mutex_init(&cfile->fh_mutex);
316 spin_lock_init(&cfile->file_info_lock);
317
318 cifs_sb_active(inode->i_sb);
319
320 /*
321 * If the server returned a read oplock and we have mandatory brlocks,
322 * set oplock level to None.
323 */
324 if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
325 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
326 oplock = 0;
327 }
328
329 spin_lock(&tcon->open_file_lock);
330 if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE && oplock)
331 oplock = fid->pending_open->oplock;
332 list_del(&fid->pending_open->olist);
333
334 fid->purge_cache = false;
335 server->ops->set_fid(cfile, fid, oplock);
336
337 list_add(&cfile->tlist, &tcon->openFileList);
338 atomic_inc(&tcon->num_local_opens);
339
340 /* if readable file instance put first in list*/
341 if (file->f_mode & FMODE_READ)
342 list_add(&cfile->flist, &cinode->openFileList);
343 else
344 list_add_tail(&cfile->flist, &cinode->openFileList);
345 spin_unlock(&tcon->open_file_lock);
346
347 if (fid->purge_cache)
348 cifs_zap_mapping(inode);
349
350 file->private_data = cfile;
351 return cfile;
352 }
353
354 struct cifsFileInfo *
355 cifsFileInfo_get(struct cifsFileInfo *cifs_file)
356 {
357 spin_lock(&cifs_file->file_info_lock);
358 cifsFileInfo_get_locked(cifs_file);
359 spin_unlock(&cifs_file->file_info_lock);
360 return cifs_file;
361 }
362
363 /*
364 * Release a reference on the file private data. This may involve closing
365 * the filehandle out on the server. Must be called without holding
366 * tcon->open_file_lock and cifs_file->file_info_lock.
367 */
368 void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
369 {
370 struct inode *inode = d_inode(cifs_file->dentry);
371 struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
372 struct TCP_Server_Info *server = tcon->ses->server;
373 struct cifsInodeInfo *cifsi = CIFS_I(inode);
374 struct super_block *sb = inode->i_sb;
375 struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
376 struct cifsLockInfo *li, *tmp;
377 struct cifs_fid fid;
378 struct cifs_pending_open open;
379 bool oplock_break_cancelled;
380
381 spin_lock(&tcon->open_file_lock);
382
383 spin_lock(&cifs_file->file_info_lock);
384 if (--cifs_file->count > 0) {
385 spin_unlock(&cifs_file->file_info_lock);
386 spin_unlock(&tcon->open_file_lock);
387 return;
388 }
389 spin_unlock(&cifs_file->file_info_lock);
390
391 if (server->ops->get_lease_key)
392 server->ops->get_lease_key(inode, &fid);
393
394 /* store open in pending opens to make sure we don't miss lease break */
395 cifs_add_pending_open_locked(&fid, cifs_file->tlink, &open);
396
397 /* remove it from the lists */
398 list_del(&cifs_file->flist);
399 list_del(&cifs_file->tlist);
400 atomic_dec(&tcon->num_local_opens);
401
402 if (list_empty(&cifsi->openFileList)) {
403 cifs_dbg(FYI, "closing last open instance for inode %p\n",
404 d_inode(cifs_file->dentry));
405 /*
406 * In strict cache mode we need invalidate mapping on the last
407 * close because it may cause a error when we open this file
408 * again and get at least level II oplock.
409 */
410 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)
411 set_bit(CIFS_INO_INVALID_MAPPING, &cifsi->flags);
412 cifs_set_oplock_level(cifsi, 0);
413 }
414
415 spin_unlock(&tcon->open_file_lock);
416
417 oplock_break_cancelled = cancel_work_sync(&cifs_file->oplock_break);
418
419 if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
420 struct TCP_Server_Info *server = tcon->ses->server;
421 unsigned int xid;
422
423 xid = get_xid();
424 if (server->ops->close)
425 server->ops->close(xid, tcon, &cifs_file->fid);
426 _free_xid(xid);
427 }
428
429 if (oplock_break_cancelled)
430 cifs_done_oplock_break(cifsi);
431
432 cifs_del_pending_open(&open);
433
434 /*
435 * Delete any outstanding lock records. We'll lose them when the file
436 * is closed anyway.
437 */
438 down_write(&cifsi->lock_sem);
439 list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) {
440 list_del(&li->llist);
441 cifs_del_lock_waiters(li);
442 kfree(li);
443 }
444 list_del(&cifs_file->llist->llist);
445 kfree(cifs_file->llist);
446 up_write(&cifsi->lock_sem);
447
448 cifs_put_tlink(cifs_file->tlink);
449 dput(cifs_file->dentry);
450 cifs_sb_deactive(sb);
451 kfree(cifs_file);
452 }
453
454 int cifs_open(struct inode *inode, struct file *file)
455
456 {
457 int rc = -EACCES;
458 unsigned int xid;
459 __u32 oplock;
460 struct cifs_sb_info *cifs_sb;
461 struct TCP_Server_Info *server;
462 struct cifs_tcon *tcon;
463 struct tcon_link *tlink;
464 struct cifsFileInfo *cfile = NULL;
465 char *full_path = NULL;
466 bool posix_open_ok = false;
467 struct cifs_fid fid;
468 struct cifs_pending_open open;
469
470 xid = get_xid();
471
472 cifs_sb = CIFS_SB(inode->i_sb);
473 tlink = cifs_sb_tlink(cifs_sb);
474 if (IS_ERR(tlink)) {
475 free_xid(xid);
476 return PTR_ERR(tlink);
477 }
478 tcon = tlink_tcon(tlink);
479 server = tcon->ses->server;
480
481 full_path = build_path_from_dentry(file_dentry(file));
482 if (full_path == NULL) {
483 rc = -ENOMEM;
484 goto out;
485 }
486
487 cifs_dbg(FYI, "inode = 0x%p file flags are 0x%x for %s\n",
488 inode, file->f_flags, full_path);
489
490 if (file->f_flags & O_DIRECT &&
491 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) {
492 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
493 file->f_op = &cifs_file_direct_nobrl_ops;
494 else
495 file->f_op = &cifs_file_direct_ops;
496 }
497
498 if (server->oplocks)
499 oplock = REQ_OPLOCK;
500 else
501 oplock = 0;
502
503 if (!tcon->broken_posix_open && tcon->unix_ext &&
504 cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP &
505 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
506 /* can not refresh inode info since size could be stale */
507 rc = cifs_posix_open(full_path, &inode, inode->i_sb,
508 cifs_sb->mnt_file_mode /* ignored */,
509 file->f_flags, &oplock, &fid.netfid, xid);
510 if (rc == 0) {
511 cifs_dbg(FYI, "posix open succeeded\n");
512 posix_open_ok = true;
513 } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
514 if (tcon->ses->serverNOS)
515 cifs_dbg(VFS, "server %s of type %s returned unexpected error on SMB posix open, disabling posix open support. Check if server update available.\n",
516 tcon->ses->serverName,
517 tcon->ses->serverNOS);
518 tcon->broken_posix_open = true;
519 } else if ((rc != -EIO) && (rc != -EREMOTE) &&
520 (rc != -EOPNOTSUPP)) /* path not found or net err */
521 goto out;
522 /*
523 * Else fallthrough to retry open the old way on network i/o
524 * or DFS errors.
525 */
526 }
527
528 if (server->ops->get_lease_key)
529 server->ops->get_lease_key(inode, &fid);
530
531 cifs_add_pending_open(&fid, tlink, &open);
532
533 if (!posix_open_ok) {
534 if (server->ops->get_lease_key)
535 server->ops->get_lease_key(inode, &fid);
536
537 rc = cifs_nt_open(full_path, inode, cifs_sb, tcon,
538 file->f_flags, &oplock, &fid, xid);
539 if (rc) {
540 cifs_del_pending_open(&open);
541 goto out;
542 }
543 }
544
545 cfile = cifs_new_fileinfo(&fid, file, tlink, oplock);
546 if (cfile == NULL) {
547 if (server->ops->close)
548 server->ops->close(xid, tcon, &fid);
549 cifs_del_pending_open(&open);
550 rc = -ENOMEM;
551 goto out;
552 }
553
554 cifs_fscache_set_inode_cookie(inode, file);
555
556 if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) {
557 /*
558 * Time to set mode which we can not set earlier due to
559 * problems creating new read-only files.
560 */
561 struct cifs_unix_set_info_args args = {
562 .mode = inode->i_mode,
563 .uid = INVALID_UID, /* no change */
564 .gid = INVALID_GID, /* no change */
565 .ctime = NO_CHANGE_64,
566 .atime = NO_CHANGE_64,
567 .mtime = NO_CHANGE_64,
568 .device = 0,
569 };
570 CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid.netfid,
571 cfile->pid);
572 }
573
574 out:
575 kfree(full_path);
576 free_xid(xid);
577 cifs_put_tlink(tlink);
578 return rc;
579 }
580
581 static int cifs_push_posix_locks(struct cifsFileInfo *cfile);
582
583 /*
584 * Try to reacquire byte range locks that were released when session
585 * to server was lost.
586 */
587 static int
588 cifs_relock_file(struct cifsFileInfo *cfile)
589 {
590 struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
591 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
592 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
593 int rc = 0;
594
595 down_read_nested(&cinode->lock_sem, SINGLE_DEPTH_NESTING);
596 if (cinode->can_cache_brlcks) {
597 /* can cache locks - no need to relock */
598 up_read(&cinode->lock_sem);
599 return rc;
600 }
601
602 if (cap_unix(tcon->ses) &&
603 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
604 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
605 rc = cifs_push_posix_locks(cfile);
606 else
607 rc = tcon->ses->server->ops->push_mand_locks(cfile);
608
609 up_read(&cinode->lock_sem);
610 return rc;
611 }
612
613 static int
614 cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
615 {
616 int rc = -EACCES;
617 unsigned int xid;
618 __u32 oplock;
619 struct cifs_sb_info *cifs_sb;
620 struct cifs_tcon *tcon;
621 struct TCP_Server_Info *server;
622 struct cifsInodeInfo *cinode;
623 struct inode *inode;
624 char *full_path = NULL;
625 int desired_access;
626 int disposition = FILE_OPEN;
627 int create_options = CREATE_NOT_DIR;
628 struct cifs_open_parms oparms;
629
630 xid = get_xid();
631 mutex_lock(&cfile->fh_mutex);
632 if (!cfile->invalidHandle) {
633 mutex_unlock(&cfile->fh_mutex);
634 rc = 0;
635 free_xid(xid);
636 return rc;
637 }
638
639 inode = d_inode(cfile->dentry);
640 cifs_sb = CIFS_SB(inode->i_sb);
641 tcon = tlink_tcon(cfile->tlink);
642 server = tcon->ses->server;
643
644 /*
645 * Can not grab rename sem here because various ops, including those
646 * that already have the rename sem can end up causing writepage to get
647 * called and if the server was down that means we end up here, and we
648 * can never tell if the caller already has the rename_sem.
649 */
650 full_path = build_path_from_dentry(cfile->dentry);
651 if (full_path == NULL) {
652 rc = -ENOMEM;
653 mutex_unlock(&cfile->fh_mutex);
654 free_xid(xid);
655 return rc;
656 }
657
658 cifs_dbg(FYI, "inode = 0x%p file flags 0x%x for %s\n",
659 inode, cfile->f_flags, full_path);
660
661 if (tcon->ses->server->oplocks)
662 oplock = REQ_OPLOCK;
663 else
664 oplock = 0;
665
666 if (tcon->unix_ext && cap_unix(tcon->ses) &&
667 (CIFS_UNIX_POSIX_PATH_OPS_CAP &
668 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
669 /*
670 * O_CREAT, O_EXCL and O_TRUNC already had their effect on the
671 * original open. Must mask them off for a reopen.
672 */
673 unsigned int oflags = cfile->f_flags &
674 ~(O_CREAT | O_EXCL | O_TRUNC);
675
676 rc = cifs_posix_open(full_path, NULL, inode->i_sb,
677 cifs_sb->mnt_file_mode /* ignored */,
678 oflags, &oplock, &cfile->fid.netfid, xid);
679 if (rc == 0) {
680 cifs_dbg(FYI, "posix reopen succeeded\n");
681 oparms.reconnect = true;
682 goto reopen_success;
683 }
684 /*
685 * fallthrough to retry open the old way on errors, especially
686 * in the reconnect path it is important to retry hard
687 */
688 }
689
690 desired_access = cifs_convert_flags(cfile->f_flags);
691
692 if (backup_cred(cifs_sb))
693 create_options |= CREATE_OPEN_BACKUP_INTENT;
694
695 if (server->ops->get_lease_key)
696 server->ops->get_lease_key(inode, &cfile->fid);
697
698 oparms.tcon = tcon;
699 oparms.cifs_sb = cifs_sb;
700 oparms.desired_access = desired_access;
701 oparms.create_options = create_options;
702 oparms.disposition = disposition;
703 oparms.path = full_path;
704 oparms.fid = &cfile->fid;
705 oparms.reconnect = true;
706
707 /*
708 * Can not refresh inode by passing in file_info buf to be returned by
709 * ops->open and then calling get_inode_info with returned buf since
710 * file might have write behind data that needs to be flushed and server
711 * version of file size can be stale. If we knew for sure that inode was
712 * not dirty locally we could do this.
713 */
714 rc = server->ops->open(xid, &oparms, &oplock, NULL);
715 if (rc == -ENOENT && oparms.reconnect == false) {
716 /* durable handle timeout is expired - open the file again */
717 rc = server->ops->open(xid, &oparms, &oplock, NULL);
718 /* indicate that we need to relock the file */
719 oparms.reconnect = true;
720 }
721
722 if (rc) {
723 mutex_unlock(&cfile->fh_mutex);
724 cifs_dbg(FYI, "cifs_reopen returned 0x%x\n", rc);
725 cifs_dbg(FYI, "oplock: %d\n", oplock);
726 goto reopen_error_exit;
727 }
728
729 reopen_success:
730 cfile->invalidHandle = false;
731 mutex_unlock(&cfile->fh_mutex);
732 cinode = CIFS_I(inode);
733
734 if (can_flush) {
735 rc = filemap_write_and_wait(inode->i_mapping);
736 if (!is_interrupt_error(rc))
737 mapping_set_error(inode->i_mapping, rc);
738
739 if (tcon->unix_ext)
740 rc = cifs_get_inode_info_unix(&inode, full_path,
741 inode->i_sb, xid);
742 else
743 rc = cifs_get_inode_info(&inode, full_path, NULL,
744 inode->i_sb, xid, NULL);
745 }
746 /*
747 * Else we are writing out data to server already and could deadlock if
748 * we tried to flush data, and since we do not know if we have data that
749 * would invalidate the current end of file on the server we can not go
750 * to the server to get the new inode info.
751 */
752
753 /*
754 * If the server returned a read oplock and we have mandatory brlocks,
755 * set oplock level to None.
756 */
757 if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
758 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
759 oplock = 0;
760 }
761
762 server->ops->set_fid(cfile, &cfile->fid, oplock);
763 if (oparms.reconnect)
764 cifs_relock_file(cfile);
765
766 reopen_error_exit:
767 kfree(full_path);
768 free_xid(xid);
769 return rc;
770 }
771
772 int cifs_close(struct inode *inode, struct file *file)
773 {
774 if (file->private_data != NULL) {
775 cifsFileInfo_put(file->private_data);
776 file->private_data = NULL;
777 }
778
779 /* return code from the ->release op is always ignored */
780 return 0;
781 }
782
783 void
784 cifs_reopen_persistent_handles(struct cifs_tcon *tcon)
785 {
786 struct cifsFileInfo *open_file;
787 struct list_head *tmp;
788 struct list_head *tmp1;
789 struct list_head tmp_list;
790
791 if (!tcon->use_persistent || !tcon->need_reopen_files)
792 return;
793
794 tcon->need_reopen_files = false;
795
796 cifs_dbg(FYI, "Reopen persistent handles");
797 INIT_LIST_HEAD(&tmp_list);
798
799 /* list all files open on tree connection, reopen resilient handles */
800 spin_lock(&tcon->open_file_lock);
801 list_for_each(tmp, &tcon->openFileList) {
802 open_file = list_entry(tmp, struct cifsFileInfo, tlist);
803 if (!open_file->invalidHandle)
804 continue;
805 cifsFileInfo_get(open_file);
806 list_add_tail(&open_file->rlist, &tmp_list);
807 }
808 spin_unlock(&tcon->open_file_lock);
809
810 list_for_each_safe(tmp, tmp1, &tmp_list) {
811 open_file = list_entry(tmp, struct cifsFileInfo, rlist);
812 if (cifs_reopen_file(open_file, false /* do not flush */))
813 tcon->need_reopen_files = true;
814 list_del_init(&open_file->rlist);
815 cifsFileInfo_put(open_file);
816 }
817 }
818
819 int cifs_closedir(struct inode *inode, struct file *file)
820 {
821 int rc = 0;
822 unsigned int xid;
823 struct cifsFileInfo *cfile = file->private_data;
824 struct cifs_tcon *tcon;
825 struct TCP_Server_Info *server;
826 char *buf;
827
828 cifs_dbg(FYI, "Closedir inode = 0x%p\n", inode);
829
830 if (cfile == NULL)
831 return rc;
832
833 xid = get_xid();
834 tcon = tlink_tcon(cfile->tlink);
835 server = tcon->ses->server;
836
837 cifs_dbg(FYI, "Freeing private data in close dir\n");
838 spin_lock(&cfile->file_info_lock);
839 if (server->ops->dir_needs_close(cfile)) {
840 cfile->invalidHandle = true;
841 spin_unlock(&cfile->file_info_lock);
842 if (server->ops->close_dir)
843 rc = server->ops->close_dir(xid, tcon, &cfile->fid);
844 else
845 rc = -ENOSYS;
846 cifs_dbg(FYI, "Closing uncompleted readdir with rc %d\n", rc);
847 /* not much we can do if it fails anyway, ignore rc */
848 rc = 0;
849 } else
850 spin_unlock(&cfile->file_info_lock);
851
852 buf = cfile->srch_inf.ntwrk_buf_start;
853 if (buf) {
854 cifs_dbg(FYI, "closedir free smb buf in srch struct\n");
855 cfile->srch_inf.ntwrk_buf_start = NULL;
856 if (cfile->srch_inf.smallBuf)
857 cifs_small_buf_release(buf);
858 else
859 cifs_buf_release(buf);
860 }
861
862 cifs_put_tlink(cfile->tlink);
863 kfree(file->private_data);
864 file->private_data = NULL;
865 /* BB can we lock the filestruct while this is going on? */
866 free_xid(xid);
867 return rc;
868 }
869
870 static struct cifsLockInfo *
871 cifs_lock_init(__u64 offset, __u64 length, __u8 type, __u16 flags)
872 {
873 struct cifsLockInfo *lock =
874 kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
875 if (!lock)
876 return lock;
877 lock->offset = offset;
878 lock->length = length;
879 lock->type = type;
880 lock->pid = current->tgid;
881 lock->flags = flags;
882 INIT_LIST_HEAD(&lock->blist);
883 init_waitqueue_head(&lock->block_q);
884 return lock;
885 }
886
887 void
888 cifs_del_lock_waiters(struct cifsLockInfo *lock)
889 {
890 struct cifsLockInfo *li, *tmp;
891 list_for_each_entry_safe(li, tmp, &lock->blist, blist) {
892 list_del_init(&li->blist);
893 wake_up(&li->block_q);
894 }
895 }
896
897 #define CIFS_LOCK_OP 0
898 #define CIFS_READ_OP 1
899 #define CIFS_WRITE_OP 2
900
901 /* @rw_check : 0 - no op, 1 - read, 2 - write */
902 static bool
903 cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset,
904 __u64 length, __u8 type, __u16 flags,
905 struct cifsFileInfo *cfile,
906 struct cifsLockInfo **conf_lock, int rw_check)
907 {
908 struct cifsLockInfo *li;
909 struct cifsFileInfo *cur_cfile = fdlocks->cfile;
910 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
911
912 list_for_each_entry(li, &fdlocks->locks, llist) {
913 if (offset + length <= li->offset ||
914 offset >= li->offset + li->length)
915 continue;
916 if (rw_check != CIFS_LOCK_OP && current->tgid == li->pid &&
917 server->ops->compare_fids(cfile, cur_cfile)) {
918 /* shared lock prevents write op through the same fid */
919 if (!(li->type & server->vals->shared_lock_type) ||
920 rw_check != CIFS_WRITE_OP)
921 continue;
922 }
923 if ((type & server->vals->shared_lock_type) &&
924 ((server->ops->compare_fids(cfile, cur_cfile) &&
925 current->tgid == li->pid) || type == li->type))
926 continue;
927 if (rw_check == CIFS_LOCK_OP &&
928 (flags & FL_OFDLCK) && (li->flags & FL_OFDLCK) &&
929 server->ops->compare_fids(cfile, cur_cfile))
930 continue;
931 if (conf_lock)
932 *conf_lock = li;
933 return true;
934 }
935 return false;
936 }
937
938 bool
939 cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
940 __u8 type, __u16 flags,
941 struct cifsLockInfo **conf_lock, int rw_check)
942 {
943 bool rc = false;
944 struct cifs_fid_locks *cur;
945 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
946
947 list_for_each_entry(cur, &cinode->llist, llist) {
948 rc = cifs_find_fid_lock_conflict(cur, offset, length, type,
949 flags, cfile, conf_lock,
950 rw_check);
951 if (rc)
952 break;
953 }
954
955 return rc;
956 }
957
958 /*
959 * Check if there is another lock that prevents us to set the lock (mandatory
960 * style). If such a lock exists, update the flock structure with its
961 * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
962 * or leave it the same if we can't. Returns 0 if we don't need to request to
963 * the server or 1 otherwise.
964 */
965 static int
966 cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
967 __u8 type, struct file_lock *flock)
968 {
969 int rc = 0;
970 struct cifsLockInfo *conf_lock;
971 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
972 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
973 bool exist;
974
975 down_read(&cinode->lock_sem);
976
977 exist = cifs_find_lock_conflict(cfile, offset, length, type,
978 flock->fl_flags, &conf_lock,
979 CIFS_LOCK_OP);
980 if (exist) {
981 flock->fl_start = conf_lock->offset;
982 flock->fl_end = conf_lock->offset + conf_lock->length - 1;
983 flock->fl_pid = conf_lock->pid;
984 if (conf_lock->type & server->vals->shared_lock_type)
985 flock->fl_type = F_RDLCK;
986 else
987 flock->fl_type = F_WRLCK;
988 } else if (!cinode->can_cache_brlcks)
989 rc = 1;
990 else
991 flock->fl_type = F_UNLCK;
992
993 up_read(&cinode->lock_sem);
994 return rc;
995 }
996
997 static void
998 cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock)
999 {
1000 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1001 down_write(&cinode->lock_sem);
1002 list_add_tail(&lock->llist, &cfile->llist->locks);
1003 up_write(&cinode->lock_sem);
1004 }
1005
1006 /*
1007 * Set the byte-range lock (mandatory style). Returns:
1008 * 1) 0, if we set the lock and don't need to request to the server;
1009 * 2) 1, if no locks prevent us but we need to request to the server;
1010 * 3) -EACCES, if there is a lock that prevents us and wait is false.
1011 */
1012 static int
1013 cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock,
1014 bool wait)
1015 {
1016 struct cifsLockInfo *conf_lock;
1017 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1018 bool exist;
1019 int rc = 0;
1020
1021 try_again:
1022 exist = false;
1023 down_write(&cinode->lock_sem);
1024
1025 exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length,
1026 lock->type, lock->flags, &conf_lock,
1027 CIFS_LOCK_OP);
1028 if (!exist && cinode->can_cache_brlcks) {
1029 list_add_tail(&lock->llist, &cfile->llist->locks);
1030 up_write(&cinode->lock_sem);
1031 return rc;
1032 }
1033
1034 if (!exist)
1035 rc = 1;
1036 else if (!wait)
1037 rc = -EACCES;
1038 else {
1039 list_add_tail(&lock->blist, &conf_lock->blist);
1040 up_write(&cinode->lock_sem);
1041 rc = wait_event_interruptible(lock->block_q,
1042 (lock->blist.prev == &lock->blist) &&
1043 (lock->blist.next == &lock->blist));
1044 if (!rc)
1045 goto try_again;
1046 down_write(&cinode->lock_sem);
1047 list_del_init(&lock->blist);
1048 }
1049
1050 up_write(&cinode->lock_sem);
1051 return rc;
1052 }
1053
1054 /*
1055 * Check if there is another lock that prevents us to set the lock (posix
1056 * style). If such a lock exists, update the flock structure with its
1057 * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1058 * or leave it the same if we can't. Returns 0 if we don't need to request to
1059 * the server or 1 otherwise.
1060 */
1061 static int
1062 cifs_posix_lock_test(struct file *file, struct file_lock *flock)
1063 {
1064 int rc = 0;
1065 struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1066 unsigned char saved_type = flock->fl_type;
1067
1068 if ((flock->fl_flags & FL_POSIX) == 0)
1069 return 1;
1070
1071 down_read(&cinode->lock_sem);
1072 posix_test_lock(file, flock);
1073
1074 if (flock->fl_type == F_UNLCK && !cinode->can_cache_brlcks) {
1075 flock->fl_type = saved_type;
1076 rc = 1;
1077 }
1078
1079 up_read(&cinode->lock_sem);
1080 return rc;
1081 }
1082
1083 /*
1084 * Set the byte-range lock (posix style). Returns:
1085 * 1) 0, if we set the lock and don't need to request to the server;
1086 * 2) 1, if we need to request to the server;
1087 * 3) <0, if the error occurs while setting the lock.
1088 */
1089 static int
1090 cifs_posix_lock_set(struct file *file, struct file_lock *flock)
1091 {
1092 struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1093 int rc = 1;
1094
1095 if ((flock->fl_flags & FL_POSIX) == 0)
1096 return rc;
1097
1098 try_again:
1099 down_write(&cinode->lock_sem);
1100 if (!cinode->can_cache_brlcks) {
1101 up_write(&cinode->lock_sem);
1102 return rc;
1103 }
1104
1105 rc = posix_lock_file(file, flock, NULL);
1106 up_write(&cinode->lock_sem);
1107 if (rc == FILE_LOCK_DEFERRED) {
1108 rc = wait_event_interruptible(flock->fl_wait, !flock->fl_blocker);
1109 if (!rc)
1110 goto try_again;
1111 locks_delete_block(flock);
1112 }
1113 return rc;
1114 }
1115
1116 int
1117 cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
1118 {
1119 unsigned int xid;
1120 int rc = 0, stored_rc;
1121 struct cifsLockInfo *li, *tmp;
1122 struct cifs_tcon *tcon;
1123 unsigned int num, max_num, max_buf;
1124 LOCKING_ANDX_RANGE *buf, *cur;
1125 static const int types[] = {
1126 LOCKING_ANDX_LARGE_FILES,
1127 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1128 };
1129 int i;
1130
1131 xid = get_xid();
1132 tcon = tlink_tcon(cfile->tlink);
1133
1134 /*
1135 * Accessing maxBuf is racy with cifs_reconnect - need to store value
1136 * and check it before using.
1137 */
1138 max_buf = tcon->ses->server->maxBuf;
1139 if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE))) {
1140 free_xid(xid);
1141 return -EINVAL;
1142 }
1143
1144 BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1145 PAGE_SIZE);
1146 max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1147 PAGE_SIZE);
1148 max_num = (max_buf - sizeof(struct smb_hdr)) /
1149 sizeof(LOCKING_ANDX_RANGE);
1150 buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1151 if (!buf) {
1152 free_xid(xid);
1153 return -ENOMEM;
1154 }
1155
1156 for (i = 0; i < 2; i++) {
1157 cur = buf;
1158 num = 0;
1159 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1160 if (li->type != types[i])
1161 continue;
1162 cur->Pid = cpu_to_le16(li->pid);
1163 cur->LengthLow = cpu_to_le32((u32)li->length);
1164 cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1165 cur->OffsetLow = cpu_to_le32((u32)li->offset);
1166 cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1167 if (++num == max_num) {
1168 stored_rc = cifs_lockv(xid, tcon,
1169 cfile->fid.netfid,
1170 (__u8)li->type, 0, num,
1171 buf);
1172 if (stored_rc)
1173 rc = stored_rc;
1174 cur = buf;
1175 num = 0;
1176 } else
1177 cur++;
1178 }
1179
1180 if (num) {
1181 stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1182 (__u8)types[i], 0, num, buf);
1183 if (stored_rc)
1184 rc = stored_rc;
1185 }
1186 }
1187
1188 kfree(buf);
1189 free_xid(xid);
1190 return rc;
1191 }
1192
1193 static __u32
1194 hash_lockowner(fl_owner_t owner)
1195 {
1196 return cifs_lock_secret ^ hash32_ptr((const void *)owner);
1197 }
1198
1199 struct lock_to_push {
1200 struct list_head llist;
1201 __u64 offset;
1202 __u64 length;
1203 __u32 pid;
1204 __u16 netfid;
1205 __u8 type;
1206 };
1207
1208 static int
1209 cifs_push_posix_locks(struct cifsFileInfo *cfile)
1210 {
1211 struct inode *inode = d_inode(cfile->dentry);
1212 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1213 struct file_lock *flock;
1214 struct file_lock_context *flctx = inode->i_flctx;
1215 unsigned int count = 0, i;
1216 int rc = 0, xid, type;
1217 struct list_head locks_to_send, *el;
1218 struct lock_to_push *lck, *tmp;
1219 __u64 length;
1220
1221 xid = get_xid();
1222
1223 if (!flctx)
1224 goto out;
1225
1226 spin_lock(&flctx->flc_lock);
1227 list_for_each(el, &flctx->flc_posix) {
1228 count++;
1229 }
1230 spin_unlock(&flctx->flc_lock);
1231
1232 INIT_LIST_HEAD(&locks_to_send);
1233
1234 /*
1235 * Allocating count locks is enough because no FL_POSIX locks can be
1236 * added to the list while we are holding cinode->lock_sem that
1237 * protects locking operations of this inode.
1238 */
1239 for (i = 0; i < count; i++) {
1240 lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL);
1241 if (!lck) {
1242 rc = -ENOMEM;
1243 goto err_out;
1244 }
1245 list_add_tail(&lck->llist, &locks_to_send);
1246 }
1247
1248 el = locks_to_send.next;
1249 spin_lock(&flctx->flc_lock);
1250 list_for_each_entry(flock, &flctx->flc_posix, fl_list) {
1251 if (el == &locks_to_send) {
1252 /*
1253 * The list ended. We don't have enough allocated
1254 * structures - something is really wrong.
1255 */
1256 cifs_dbg(VFS, "Can't push all brlocks!\n");
1257 break;
1258 }
1259 length = 1 + flock->fl_end - flock->fl_start;
1260 if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK)
1261 type = CIFS_RDLCK;
1262 else
1263 type = CIFS_WRLCK;
1264 lck = list_entry(el, struct lock_to_push, llist);
1265 lck->pid = hash_lockowner(flock->fl_owner);
1266 lck->netfid = cfile->fid.netfid;
1267 lck->length = length;
1268 lck->type = type;
1269 lck->offset = flock->fl_start;
1270 }
1271 spin_unlock(&flctx->flc_lock);
1272
1273 list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1274 int stored_rc;
1275
1276 stored_rc = CIFSSMBPosixLock(xid, tcon, lck->netfid, lck->pid,
1277 lck->offset, lck->length, NULL,
1278 lck->type, 0);
1279 if (stored_rc)
1280 rc = stored_rc;
1281 list_del(&lck->llist);
1282 kfree(lck);
1283 }
1284
1285 out:
1286 free_xid(xid);
1287 return rc;
1288 err_out:
1289 list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1290 list_del(&lck->llist);
1291 kfree(lck);
1292 }
1293 goto out;
1294 }
1295
1296 static int
1297 cifs_push_locks(struct cifsFileInfo *cfile)
1298 {
1299 struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
1300 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1301 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1302 int rc = 0;
1303
1304 /* we are going to update can_cache_brlcks here - need a write access */
1305 down_write(&cinode->lock_sem);
1306 if (!cinode->can_cache_brlcks) {
1307 up_write(&cinode->lock_sem);
1308 return rc;
1309 }
1310
1311 if (cap_unix(tcon->ses) &&
1312 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1313 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1314 rc = cifs_push_posix_locks(cfile);
1315 else
1316 rc = tcon->ses->server->ops->push_mand_locks(cfile);
1317
1318 cinode->can_cache_brlcks = false;
1319 up_write(&cinode->lock_sem);
1320 return rc;
1321 }
1322
1323 static void
1324 cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock,
1325 bool *wait_flag, struct TCP_Server_Info *server)
1326 {
1327 if (flock->fl_flags & FL_POSIX)
1328 cifs_dbg(FYI, "Posix\n");
1329 if (flock->fl_flags & FL_FLOCK)
1330 cifs_dbg(FYI, "Flock\n");
1331 if (flock->fl_flags & FL_SLEEP) {
1332 cifs_dbg(FYI, "Blocking lock\n");
1333 *wait_flag = true;
1334 }
1335 if (flock->fl_flags & FL_ACCESS)
1336 cifs_dbg(FYI, "Process suspended by mandatory locking - not implemented yet\n");
1337 if (flock->fl_flags & FL_LEASE)
1338 cifs_dbg(FYI, "Lease on file - not implemented yet\n");
1339 if (flock->fl_flags &
1340 (~(FL_POSIX | FL_FLOCK | FL_SLEEP |
1341 FL_ACCESS | FL_LEASE | FL_CLOSE | FL_OFDLCK)))
1342 cifs_dbg(FYI, "Unknown lock flags 0x%x\n", flock->fl_flags);
1343
1344 *type = server->vals->large_lock_type;
1345 if (flock->fl_type == F_WRLCK) {
1346 cifs_dbg(FYI, "F_WRLCK\n");
1347 *type |= server->vals->exclusive_lock_type;
1348 *lock = 1;
1349 } else if (flock->fl_type == F_UNLCK) {
1350 cifs_dbg(FYI, "F_UNLCK\n");
1351 *type |= server->vals->unlock_lock_type;
1352 *unlock = 1;
1353 /* Check if unlock includes more than one lock range */
1354 } else if (flock->fl_type == F_RDLCK) {
1355 cifs_dbg(FYI, "F_RDLCK\n");
1356 *type |= server->vals->shared_lock_type;
1357 *lock = 1;
1358 } else if (flock->fl_type == F_EXLCK) {
1359 cifs_dbg(FYI, "F_EXLCK\n");
1360 *type |= server->vals->exclusive_lock_type;
1361 *lock = 1;
1362 } else if (flock->fl_type == F_SHLCK) {
1363 cifs_dbg(FYI, "F_SHLCK\n");
1364 *type |= server->vals->shared_lock_type;
1365 *lock = 1;
1366 } else
1367 cifs_dbg(FYI, "Unknown type of lock\n");
1368 }
1369
1370 static int
1371 cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
1372 bool wait_flag, bool posix_lck, unsigned int xid)
1373 {
1374 int rc = 0;
1375 __u64 length = 1 + flock->fl_end - flock->fl_start;
1376 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1377 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1378 struct TCP_Server_Info *server = tcon->ses->server;
1379 __u16 netfid = cfile->fid.netfid;
1380
1381 if (posix_lck) {
1382 int posix_lock_type;
1383
1384 rc = cifs_posix_lock_test(file, flock);
1385 if (!rc)
1386 return rc;
1387
1388 if (type & server->vals->shared_lock_type)
1389 posix_lock_type = CIFS_RDLCK;
1390 else
1391 posix_lock_type = CIFS_WRLCK;
1392 rc = CIFSSMBPosixLock(xid, tcon, netfid,
1393 hash_lockowner(flock->fl_owner),
1394 flock->fl_start, length, flock,
1395 posix_lock_type, wait_flag);
1396 return rc;
1397 }
1398
1399 rc = cifs_lock_test(cfile, flock->fl_start, length, type, flock);
1400 if (!rc)
1401 return rc;
1402
1403 /* BB we could chain these into one lock request BB */
1404 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, type,
1405 1, 0, false);
1406 if (rc == 0) {
1407 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1408 type, 0, 1, false);
1409 flock->fl_type = F_UNLCK;
1410 if (rc != 0)
1411 cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1412 rc);
1413 return 0;
1414 }
1415
1416 if (type & server->vals->shared_lock_type) {
1417 flock->fl_type = F_WRLCK;
1418 return 0;
1419 }
1420
1421 type &= ~server->vals->exclusive_lock_type;
1422
1423 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1424 type | server->vals->shared_lock_type,
1425 1, 0, false);
1426 if (rc == 0) {
1427 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1428 type | server->vals->shared_lock_type, 0, 1, false);
1429 flock->fl_type = F_RDLCK;
1430 if (rc != 0)
1431 cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1432 rc);
1433 } else
1434 flock->fl_type = F_WRLCK;
1435
1436 return 0;
1437 }
1438
1439 void
1440 cifs_move_llist(struct list_head *source, struct list_head *dest)
1441 {
1442 struct list_head *li, *tmp;
1443 list_for_each_safe(li, tmp, source)
1444 list_move(li, dest);
1445 }
1446
1447 void
1448 cifs_free_llist(struct list_head *llist)
1449 {
1450 struct cifsLockInfo *li, *tmp;
1451 list_for_each_entry_safe(li, tmp, llist, llist) {
1452 cifs_del_lock_waiters(li);
1453 list_del(&li->llist);
1454 kfree(li);
1455 }
1456 }
1457
1458 int
1459 cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
1460 unsigned int xid)
1461 {
1462 int rc = 0, stored_rc;
1463 static const int types[] = {
1464 LOCKING_ANDX_LARGE_FILES,
1465 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1466 };
1467 unsigned int i;
1468 unsigned int max_num, num, max_buf;
1469 LOCKING_ANDX_RANGE *buf, *cur;
1470 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1471 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1472 struct cifsLockInfo *li, *tmp;
1473 __u64 length = 1 + flock->fl_end - flock->fl_start;
1474 struct list_head tmp_llist;
1475
1476 INIT_LIST_HEAD(&tmp_llist);
1477
1478 /*
1479 * Accessing maxBuf is racy with cifs_reconnect - need to store value
1480 * and check it before using.
1481 */
1482 max_buf = tcon->ses->server->maxBuf;
1483 if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE)))
1484 return -EINVAL;
1485
1486 BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1487 PAGE_SIZE);
1488 max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1489 PAGE_SIZE);
1490 max_num = (max_buf - sizeof(struct smb_hdr)) /
1491 sizeof(LOCKING_ANDX_RANGE);
1492 buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1493 if (!buf)
1494 return -ENOMEM;
1495
1496 down_write(&cinode->lock_sem);
1497 for (i = 0; i < 2; i++) {
1498 cur = buf;
1499 num = 0;
1500 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1501 if (flock->fl_start > li->offset ||
1502 (flock->fl_start + length) <
1503 (li->offset + li->length))
1504 continue;
1505 if (current->tgid != li->pid)
1506 continue;
1507 if (types[i] != li->type)
1508 continue;
1509 if (cinode->can_cache_brlcks) {
1510 /*
1511 * We can cache brlock requests - simply remove
1512 * a lock from the file's list.
1513 */
1514 list_del(&li->llist);
1515 cifs_del_lock_waiters(li);
1516 kfree(li);
1517 continue;
1518 }
1519 cur->Pid = cpu_to_le16(li->pid);
1520 cur->LengthLow = cpu_to_le32((u32)li->length);
1521 cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1522 cur->OffsetLow = cpu_to_le32((u32)li->offset);
1523 cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1524 /*
1525 * We need to save a lock here to let us add it again to
1526 * the file's list if the unlock range request fails on
1527 * the server.
1528 */
1529 list_move(&li->llist, &tmp_llist);
1530 if (++num == max_num) {
1531 stored_rc = cifs_lockv(xid, tcon,
1532 cfile->fid.netfid,
1533 li->type, num, 0, buf);
1534 if (stored_rc) {
1535 /*
1536 * We failed on the unlock range
1537 * request - add all locks from the tmp
1538 * list to the head of the file's list.
1539 */
1540 cifs_move_llist(&tmp_llist,
1541 &cfile->llist->locks);
1542 rc = stored_rc;
1543 } else
1544 /*
1545 * The unlock range request succeed -
1546 * free the tmp list.
1547 */
1548 cifs_free_llist(&tmp_llist);
1549 cur = buf;
1550 num = 0;
1551 } else
1552 cur++;
1553 }
1554 if (num) {
1555 stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1556 types[i], num, 0, buf);
1557 if (stored_rc) {
1558 cifs_move_llist(&tmp_llist,
1559 &cfile->llist->locks);
1560 rc = stored_rc;
1561 } else
1562 cifs_free_llist(&tmp_llist);
1563 }
1564 }
1565
1566 up_write(&cinode->lock_sem);
1567 kfree(buf);
1568 return rc;
1569 }
1570
1571 static int
1572 cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
1573 bool wait_flag, bool posix_lck, int lock, int unlock,
1574 unsigned int xid)
1575 {
1576 int rc = 0;
1577 __u64 length = 1 + flock->fl_end - flock->fl_start;
1578 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1579 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1580 struct TCP_Server_Info *server = tcon->ses->server;
1581 struct inode *inode = d_inode(cfile->dentry);
1582
1583 if (posix_lck) {
1584 int posix_lock_type;
1585
1586 rc = cifs_posix_lock_set(file, flock);
1587 if (!rc || rc < 0)
1588 return rc;
1589
1590 if (type & server->vals->shared_lock_type)
1591 posix_lock_type = CIFS_RDLCK;
1592 else
1593 posix_lock_type = CIFS_WRLCK;
1594
1595 if (unlock == 1)
1596 posix_lock_type = CIFS_UNLCK;
1597
1598 rc = CIFSSMBPosixLock(xid, tcon, cfile->fid.netfid,
1599 hash_lockowner(flock->fl_owner),
1600 flock->fl_start, length,
1601 NULL, posix_lock_type, wait_flag);
1602 goto out;
1603 }
1604
1605 if (lock) {
1606 struct cifsLockInfo *lock;
1607
1608 lock = cifs_lock_init(flock->fl_start, length, type,
1609 flock->fl_flags);
1610 if (!lock)
1611 return -ENOMEM;
1612
1613 rc = cifs_lock_add_if(cfile, lock, wait_flag);
1614 if (rc < 0) {
1615 kfree(lock);
1616 return rc;
1617 }
1618 if (!rc)
1619 goto out;
1620
1621 /*
1622 * Windows 7 server can delay breaking lease from read to None
1623 * if we set a byte-range lock on a file - break it explicitly
1624 * before sending the lock to the server to be sure the next
1625 * read won't conflict with non-overlapted locks due to
1626 * pagereading.
1627 */
1628 if (!CIFS_CACHE_WRITE(CIFS_I(inode)) &&
1629 CIFS_CACHE_READ(CIFS_I(inode))) {
1630 cifs_zap_mapping(inode);
1631 cifs_dbg(FYI, "Set no oplock for inode=%p due to mand locks\n",
1632 inode);
1633 CIFS_I(inode)->oplock = 0;
1634 }
1635
1636 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1637 type, 1, 0, wait_flag);
1638 if (rc) {
1639 kfree(lock);
1640 return rc;
1641 }
1642
1643 cifs_lock_add(cfile, lock);
1644 } else if (unlock)
1645 rc = server->ops->mand_unlock_range(cfile, flock, xid);
1646
1647 out:
1648 if (flock->fl_flags & FL_POSIX && !rc)
1649 rc = locks_lock_file_wait(file, flock);
1650 return rc;
1651 }
1652
1653 int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
1654 {
1655 int rc, xid;
1656 int lock = 0, unlock = 0;
1657 bool wait_flag = false;
1658 bool posix_lck = false;
1659 struct cifs_sb_info *cifs_sb;
1660 struct cifs_tcon *tcon;
1661 struct cifsInodeInfo *cinode;
1662 struct cifsFileInfo *cfile;
1663 __u16 netfid;
1664 __u32 type;
1665
1666 rc = -EACCES;
1667 xid = get_xid();
1668
1669 cifs_dbg(FYI, "Lock parm: 0x%x flockflags: 0x%x flocktype: 0x%x start: %lld end: %lld\n",
1670 cmd, flock->fl_flags, flock->fl_type,
1671 flock->fl_start, flock->fl_end);
1672
1673 cfile = (struct cifsFileInfo *)file->private_data;
1674 tcon = tlink_tcon(cfile->tlink);
1675
1676 cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag,
1677 tcon->ses->server);
1678 cifs_sb = CIFS_FILE_SB(file);
1679 netfid = cfile->fid.netfid;
1680 cinode = CIFS_I(file_inode(file));
1681
1682 if (cap_unix(tcon->ses) &&
1683 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1684 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1685 posix_lck = true;
1686 /*
1687 * BB add code here to normalize offset and length to account for
1688 * negative length which we can not accept over the wire.
1689 */
1690 if (IS_GETLK(cmd)) {
1691 rc = cifs_getlk(file, flock, type, wait_flag, posix_lck, xid);
1692 free_xid(xid);
1693 return rc;
1694 }
1695
1696 if (!lock && !unlock) {
1697 /*
1698 * if no lock or unlock then nothing to do since we do not
1699 * know what it is
1700 */
1701 free_xid(xid);
1702 return -EOPNOTSUPP;
1703 }
1704
1705 rc = cifs_setlk(file, flock, type, wait_flag, posix_lck, lock, unlock,
1706 xid);
1707 free_xid(xid);
1708 return rc;
1709 }
1710
1711 /*
1712 * update the file size (if needed) after a write. Should be called with
1713 * the inode->i_lock held
1714 */
1715 void
1716 cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
1717 unsigned int bytes_written)
1718 {
1719 loff_t end_of_write = offset + bytes_written;
1720
1721 if (end_of_write > cifsi->server_eof)
1722 cifsi->server_eof = end_of_write;
1723 }
1724
1725 static ssize_t
1726 cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data,
1727 size_t write_size, loff_t *offset)
1728 {
1729 int rc = 0;
1730 unsigned int bytes_written = 0;
1731 unsigned int total_written;
1732 struct cifs_sb_info *cifs_sb;
1733 struct cifs_tcon *tcon;
1734 struct TCP_Server_Info *server;
1735 unsigned int xid;
1736 struct dentry *dentry = open_file->dentry;
1737 struct cifsInodeInfo *cifsi = CIFS_I(d_inode(dentry));
1738 struct cifs_io_parms io_parms;
1739
1740 cifs_sb = CIFS_SB(dentry->d_sb);
1741
1742 cifs_dbg(FYI, "write %zd bytes to offset %lld of %pd\n",
1743 write_size, *offset, dentry);
1744
1745 tcon = tlink_tcon(open_file->tlink);
1746 server = tcon->ses->server;
1747
1748 if (!server->ops->sync_write)
1749 return -ENOSYS;
1750
1751 xid = get_xid();
1752
1753 for (total_written = 0; write_size > total_written;
1754 total_written += bytes_written) {
1755 rc = -EAGAIN;
1756 while (rc == -EAGAIN) {
1757 struct kvec iov[2];
1758 unsigned int len;
1759
1760 if (open_file->invalidHandle) {
1761 /* we could deadlock if we called
1762 filemap_fdatawait from here so tell
1763 reopen_file not to flush data to
1764 server now */
1765 rc = cifs_reopen_file(open_file, false);
1766 if (rc != 0)
1767 break;
1768 }
1769
1770 len = min(server->ops->wp_retry_size(d_inode(dentry)),
1771 (unsigned int)write_size - total_written);
1772 /* iov[0] is reserved for smb header */
1773 iov[1].iov_base = (char *)write_data + total_written;
1774 iov[1].iov_len = len;
1775 io_parms.pid = pid;
1776 io_parms.tcon = tcon;
1777 io_parms.offset = *offset;
1778 io_parms.length = len;
1779 rc = server->ops->sync_write(xid, &open_file->fid,
1780 &io_parms, &bytes_written, iov, 1);
1781 }
1782 if (rc || (bytes_written == 0)) {
1783 if (total_written)
1784 break;
1785 else {
1786 free_xid(xid);
1787 return rc;
1788 }
1789 } else {
1790 spin_lock(&d_inode(dentry)->i_lock);
1791 cifs_update_eof(cifsi, *offset, bytes_written);
1792 spin_unlock(&d_inode(dentry)->i_lock);
1793 *offset += bytes_written;
1794 }
1795 }
1796
1797 cifs_stats_bytes_written(tcon, total_written);
1798
1799 if (total_written > 0) {
1800 spin_lock(&d_inode(dentry)->i_lock);
1801 if (*offset > d_inode(dentry)->i_size)
1802 i_size_write(d_inode(dentry), *offset);
1803 spin_unlock(&d_inode(dentry)->i_lock);
1804 }
1805 mark_inode_dirty_sync(d_inode(dentry));
1806 free_xid(xid);
1807 return total_written;
1808 }
1809
1810 struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
1811 bool fsuid_only)
1812 {
1813 struct cifsFileInfo *open_file = NULL;
1814 struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1815 struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb);
1816
1817 /* only filter by fsuid on multiuser mounts */
1818 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1819 fsuid_only = false;
1820
1821 spin_lock(&tcon->open_file_lock);
1822 /* we could simply get the first_list_entry since write-only entries
1823 are always at the end of the list but since the first entry might
1824 have a close pending, we go through the whole list */
1825 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1826 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
1827 continue;
1828 if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
1829 if (!open_file->invalidHandle) {
1830 /* found a good file */
1831 /* lock it so it will not be closed on us */
1832 cifsFileInfo_get(open_file);
1833 spin_unlock(&tcon->open_file_lock);
1834 return open_file;
1835 } /* else might as well continue, and look for
1836 another, or simply have the caller reopen it
1837 again rather than trying to fix this handle */
1838 } else /* write only file */
1839 break; /* write only files are last so must be done */
1840 }
1841 spin_unlock(&tcon->open_file_lock);
1842 return NULL;
1843 }
1844
1845 struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode,
1846 bool fsuid_only)
1847 {
1848 struct cifsFileInfo *open_file, *inv_file = NULL;
1849 struct cifs_sb_info *cifs_sb;
1850 struct cifs_tcon *tcon;
1851 bool any_available = false;
1852 int rc;
1853 unsigned int refind = 0;
1854
1855 /* Having a null inode here (because mapping->host was set to zero by
1856 the VFS or MM) should not happen but we had reports of on oops (due to
1857 it being zero) during stress testcases so we need to check for it */
1858
1859 if (cifs_inode == NULL) {
1860 cifs_dbg(VFS, "Null inode passed to cifs_writeable_file\n");
1861 dump_stack();
1862 return NULL;
1863 }
1864
1865 cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1866 tcon = cifs_sb_master_tcon(cifs_sb);
1867
1868 /* only filter by fsuid on multiuser mounts */
1869 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1870 fsuid_only = false;
1871
1872 spin_lock(&tcon->open_file_lock);
1873 refind_writable:
1874 if (refind > MAX_REOPEN_ATT) {
1875 spin_unlock(&tcon->open_file_lock);
1876 return NULL;
1877 }
1878 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1879 if (!any_available && open_file->pid != current->tgid)
1880 continue;
1881 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
1882 continue;
1883 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
1884 if (!open_file->invalidHandle) {
1885 /* found a good writable file */
1886 cifsFileInfo_get(open_file);
1887 spin_unlock(&tcon->open_file_lock);
1888 return open_file;
1889 } else {
1890 if (!inv_file)
1891 inv_file = open_file;
1892 }
1893 }
1894 }
1895 /* couldn't find useable FH with same pid, try any available */
1896 if (!any_available) {
1897 any_available = true;
1898 goto refind_writable;
1899 }
1900
1901 if (inv_file) {
1902 any_available = false;
1903 cifsFileInfo_get(inv_file);
1904 }
1905
1906 spin_unlock(&tcon->open_file_lock);
1907
1908 if (inv_file) {
1909 rc = cifs_reopen_file(inv_file, false);
1910 if (!rc)
1911 return inv_file;
1912 else {
1913 spin_lock(&tcon->open_file_lock);
1914 list_move_tail(&inv_file->flist,
1915 &cifs_inode->openFileList);
1916 spin_unlock(&tcon->open_file_lock);
1917 cifsFileInfo_put(inv_file);
1918 ++refind;
1919 inv_file = NULL;
1920 spin_lock(&tcon->open_file_lock);
1921 goto refind_writable;
1922 }
1923 }
1924
1925 return NULL;
1926 }
1927
1928 static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
1929 {
1930 struct address_space *mapping = page->mapping;
1931 loff_t offset = (loff_t)page->index << PAGE_SHIFT;
1932 char *write_data;
1933 int rc = -EFAULT;
1934 int bytes_written = 0;
1935 struct inode *inode;
1936 struct cifsFileInfo *open_file;
1937
1938 if (!mapping || !mapping->host)
1939 return -EFAULT;
1940
1941 inode = page->mapping->host;
1942
1943 offset += (loff_t)from;
1944 write_data = kmap(page);
1945 write_data += from;
1946
1947 if ((to > PAGE_SIZE) || (from > to)) {
1948 kunmap(page);
1949 return -EIO;
1950 }
1951
1952 /* racing with truncate? */
1953 if (offset > mapping->host->i_size) {
1954 kunmap(page);
1955 return 0; /* don't care */
1956 }
1957
1958 /* check to make sure that we are not extending the file */
1959 if (mapping->host->i_size - offset < (loff_t)to)
1960 to = (unsigned)(mapping->host->i_size - offset);
1961
1962 open_file = find_writable_file(CIFS_I(mapping->host), false);
1963 if (open_file) {
1964 bytes_written = cifs_write(open_file, open_file->pid,
1965 write_data, to - from, &offset);
1966 cifsFileInfo_put(open_file);
1967 /* Does mm or vfs already set times? */
1968 inode->i_atime = inode->i_mtime = current_time(inode);
1969 if ((bytes_written > 0) && (offset))
1970 rc = 0;
1971 else if (bytes_written < 0)
1972 rc = bytes_written;
1973 } else {
1974 cifs_dbg(FYI, "No writeable filehandles for inode\n");
1975 rc = -EIO;
1976 }
1977
1978 kunmap(page);
1979 return rc;
1980 }
1981
1982 static struct cifs_writedata *
1983 wdata_alloc_and_fillpages(pgoff_t tofind, struct address_space *mapping,
1984 pgoff_t end, pgoff_t *index,
1985 unsigned int *found_pages)
1986 {
1987 struct cifs_writedata *wdata;
1988
1989 wdata = cifs_writedata_alloc((unsigned int)tofind,
1990 cifs_writev_complete);
1991 if (!wdata)
1992 return NULL;
1993
1994 *found_pages = find_get_pages_range_tag(mapping, index, end,
1995 PAGECACHE_TAG_DIRTY, tofind, wdata->pages);
1996 return wdata;
1997 }
1998
1999 static unsigned int
2000 wdata_prepare_pages(struct cifs_writedata *wdata, unsigned int found_pages,
2001 struct address_space *mapping,
2002 struct writeback_control *wbc,
2003 pgoff_t end, pgoff_t *index, pgoff_t *next, bool *done)
2004 {
2005 unsigned int nr_pages = 0, i;
2006 struct page *page;
2007
2008 for (i = 0; i < found_pages; i++) {
2009 page = wdata->pages[i];
2010 /*
2011 * At this point we hold neither the i_pages lock nor the
2012 * page lock: the page may be truncated or invalidated
2013 * (changing page->mapping to NULL), or even swizzled
2014 * back from swapper_space to tmpfs file mapping
2015 */
2016
2017 if (nr_pages == 0)
2018 lock_page(page);
2019 else if (!trylock_page(page))
2020 break;
2021
2022 if (unlikely(page->mapping != mapping)) {
2023 unlock_page(page);
2024 break;
2025 }
2026
2027 if (!wbc->range_cyclic && page->index > end) {
2028 *done = true;
2029 unlock_page(page);
2030 break;
2031 }
2032
2033 if (*next && (page->index != *next)) {
2034 /* Not next consecutive page */
2035 unlock_page(page);
2036 break;
2037 }
2038
2039 if (wbc->sync_mode != WB_SYNC_NONE)
2040 wait_on_page_writeback(page);
2041
2042 if (PageWriteback(page) ||
2043 !clear_page_dirty_for_io(page)) {
2044 unlock_page(page);
2045 break;
2046 }
2047
2048 /*
2049 * This actually clears the dirty bit in the radix tree.
2050 * See cifs_writepage() for more commentary.
2051 */
2052 set_page_writeback(page);
2053 if (page_offset(page) >= i_size_read(mapping->host)) {
2054 *done = true;
2055 unlock_page(page);
2056 end_page_writeback(page);
2057 break;
2058 }
2059
2060 wdata->pages[i] = page;
2061 *next = page->index + 1;
2062 ++nr_pages;
2063 }
2064
2065 /* reset index to refind any pages skipped */
2066 if (nr_pages == 0)
2067 *index = wdata->pages[0]->index + 1;
2068
2069 /* put any pages we aren't going to use */
2070 for (i = nr_pages; i < found_pages; i++) {
2071 put_page(wdata->pages[i]);
2072 wdata->pages[i] = NULL;
2073 }
2074
2075 return nr_pages;
2076 }
2077
2078 static int
2079 wdata_send_pages(struct cifs_writedata *wdata, unsigned int nr_pages,
2080 struct address_space *mapping, struct writeback_control *wbc)
2081 {
2082 int rc = 0;
2083 struct TCP_Server_Info *server;
2084 unsigned int i;
2085
2086 wdata->sync_mode = wbc->sync_mode;
2087 wdata->nr_pages = nr_pages;
2088 wdata->offset = page_offset(wdata->pages[0]);
2089 wdata->pagesz = PAGE_SIZE;
2090 wdata->tailsz = min(i_size_read(mapping->host) -
2091 page_offset(wdata->pages[nr_pages - 1]),
2092 (loff_t)PAGE_SIZE);
2093 wdata->bytes = ((nr_pages - 1) * PAGE_SIZE) + wdata->tailsz;
2094
2095 if (wdata->cfile != NULL)
2096 cifsFileInfo_put(wdata->cfile);
2097 wdata->cfile = find_writable_file(CIFS_I(mapping->host), false);
2098 if (!wdata->cfile) {
2099 cifs_dbg(VFS, "No writable handles for inode\n");
2100 rc = -EBADF;
2101 } else {
2102 wdata->pid = wdata->cfile->pid;
2103 server = tlink_tcon(wdata->cfile->tlink)->ses->server;
2104 rc = server->ops->async_writev(wdata, cifs_writedata_release);
2105 }
2106
2107 for (i = 0; i < nr_pages; ++i)
2108 unlock_page(wdata->pages[i]);
2109
2110 return rc;
2111 }
2112
2113 static int cifs_writepages(struct address_space *mapping,
2114 struct writeback_control *wbc)
2115 {
2116 struct cifs_sb_info *cifs_sb = CIFS_SB(mapping->host->i_sb);
2117 struct TCP_Server_Info *server;
2118 bool done = false, scanned = false, range_whole = false;
2119 pgoff_t end, index;
2120 struct cifs_writedata *wdata;
2121 int rc = 0;
2122 int saved_rc = 0;
2123 unsigned int xid;
2124
2125 /*
2126 * If wsize is smaller than the page cache size, default to writing
2127 * one page at a time via cifs_writepage
2128 */
2129 if (cifs_sb->wsize < PAGE_SIZE)
2130 return generic_writepages(mapping, wbc);
2131
2132 xid = get_xid();
2133 if (wbc->range_cyclic) {
2134 index = mapping->writeback_index; /* Start from prev offset */
2135 end = -1;
2136 } else {
2137 index = wbc->range_start >> PAGE_SHIFT;
2138 end = wbc->range_end >> PAGE_SHIFT;
2139 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
2140 range_whole = true;
2141 scanned = true;
2142 }
2143 server = cifs_sb_master_tcon(cifs_sb)->ses->server;
2144 retry:
2145 while (!done && index <= end) {
2146 unsigned int i, nr_pages, found_pages, wsize, credits;
2147 pgoff_t next = 0, tofind, saved_index = index;
2148
2149 rc = server->ops->wait_mtu_credits(server, cifs_sb->wsize,
2150 &wsize, &credits);
2151 if (rc != 0) {
2152 done = true;
2153 break;
2154 }
2155
2156 tofind = min((wsize / PAGE_SIZE) - 1, end - index) + 1;
2157
2158 wdata = wdata_alloc_and_fillpages(tofind, mapping, end, &index,
2159 &found_pages);
2160 if (!wdata) {
2161 rc = -ENOMEM;
2162 done = true;
2163 add_credits_and_wake_if(server, credits, 0);
2164 break;
2165 }
2166
2167 if (found_pages == 0) {
2168 kref_put(&wdata->refcount, cifs_writedata_release);
2169 add_credits_and_wake_if(server, credits, 0);
2170 break;
2171 }
2172
2173 nr_pages = wdata_prepare_pages(wdata, found_pages, mapping, wbc,
2174 end, &index, &next, &done);
2175
2176 /* nothing to write? */
2177 if (nr_pages == 0) {
2178 kref_put(&wdata->refcount, cifs_writedata_release);
2179 add_credits_and_wake_if(server, credits, 0);
2180 continue;
2181 }
2182
2183 wdata->credits = credits;
2184
2185 rc = wdata_send_pages(wdata, nr_pages, mapping, wbc);
2186
2187 /* send failure -- clean up the mess */
2188 if (rc != 0) {
2189 add_credits_and_wake_if(server, wdata->credits, 0);
2190 for (i = 0; i < nr_pages; ++i) {
2191 if (is_retryable_error(rc))
2192 redirty_page_for_writepage(wbc,
2193 wdata->pages[i]);
2194 else
2195 SetPageError(wdata->pages[i]);
2196 end_page_writeback(wdata->pages[i]);
2197 put_page(wdata->pages[i]);
2198 }
2199 if (!is_retryable_error(rc))
2200 mapping_set_error(mapping, rc);
2201 }
2202 kref_put(&wdata->refcount, cifs_writedata_release);
2203
2204 if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN) {
2205 index = saved_index;
2206 continue;
2207 }
2208
2209 /* Return immediately if we received a signal during writing */
2210 if (is_interrupt_error(rc)) {
2211 done = true;
2212 break;
2213 }
2214
2215 if (rc != 0 && saved_rc == 0)
2216 saved_rc = rc;
2217
2218 wbc->nr_to_write -= nr_pages;
2219 if (wbc->nr_to_write <= 0)
2220 done = true;
2221
2222 index = next;
2223 }
2224
2225 if (!scanned && !done) {
2226 /*
2227 * We hit the last page and there is more work to be done: wrap
2228 * back to the start of the file
2229 */
2230 scanned = true;
2231 index = 0;
2232 goto retry;
2233 }
2234
2235 if (saved_rc != 0)
2236 rc = saved_rc;
2237
2238 if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
2239 mapping->writeback_index = index;
2240
2241 free_xid(xid);
2242 return rc;
2243 }
2244
2245 static int
2246 cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
2247 {
2248 int rc;
2249 unsigned int xid;
2250
2251 xid = get_xid();
2252 /* BB add check for wbc flags */
2253 get_page(page);
2254 if (!PageUptodate(page))
2255 cifs_dbg(FYI, "ppw - page not up to date\n");
2256
2257 /*
2258 * Set the "writeback" flag, and clear "dirty" in the radix tree.
2259 *
2260 * A writepage() implementation always needs to do either this,
2261 * or re-dirty the page with "redirty_page_for_writepage()" in
2262 * the case of a failure.
2263 *
2264 * Just unlocking the page will cause the radix tree tag-bits
2265 * to fail to update with the state of the page correctly.
2266 */
2267 set_page_writeback(page);
2268 retry_write:
2269 rc = cifs_partialpagewrite(page, 0, PAGE_SIZE);
2270 if (is_retryable_error(rc)) {
2271 if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN)
2272 goto retry_write;
2273 redirty_page_for_writepage(wbc, page);
2274 } else if (rc != 0) {
2275 SetPageError(page);
2276 mapping_set_error(page->mapping, rc);
2277 } else {
2278 SetPageUptodate(page);
2279 }
2280 end_page_writeback(page);
2281 put_page(page);
2282 free_xid(xid);
2283 return rc;
2284 }
2285
2286 static int cifs_writepage(struct page *page, struct writeback_control *wbc)
2287 {
2288 int rc = cifs_writepage_locked(page, wbc);
2289 unlock_page(page);
2290 return rc;
2291 }
2292
2293 static int cifs_write_end(struct file *file, struct address_space *mapping,
2294 loff_t pos, unsigned len, unsigned copied,
2295 struct page *page, void *fsdata)
2296 {
2297 int rc;
2298 struct inode *inode = mapping->host;
2299 struct cifsFileInfo *cfile = file->private_data;
2300 struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
2301 __u32 pid;
2302
2303 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2304 pid = cfile->pid;
2305 else
2306 pid = current->tgid;
2307
2308 cifs_dbg(FYI, "write_end for page %p from pos %lld with %d bytes\n",
2309 page, pos, copied);
2310
2311 if (PageChecked(page)) {
2312 if (copied == len)
2313 SetPageUptodate(page);
2314 ClearPageChecked(page);
2315 } else if (!PageUptodate(page) && copied == PAGE_SIZE)
2316 SetPageUptodate(page);
2317
2318 if (!PageUptodate(page)) {
2319 char *page_data;
2320 unsigned offset = pos & (PAGE_SIZE - 1);
2321 unsigned int xid;
2322
2323 xid = get_xid();
2324 /* this is probably better than directly calling
2325 partialpage_write since in this function the file handle is
2326 known which we might as well leverage */
2327 /* BB check if anything else missing out of ppw
2328 such as updating last write time */
2329 page_data = kmap(page);
2330 rc = cifs_write(cfile, pid, page_data + offset, copied, &pos);
2331 /* if (rc < 0) should we set writebehind rc? */
2332 kunmap(page);
2333
2334 free_xid(xid);
2335 } else {
2336 rc = copied;
2337 pos += copied;
2338 set_page_dirty(page);
2339 }
2340
2341 if (rc > 0) {
2342 spin_lock(&inode->i_lock);
2343 if (pos > inode->i_size)
2344 i_size_write(inode, pos);
2345 spin_unlock(&inode->i_lock);
2346 }
2347
2348 unlock_page(page);
2349 put_page(page);
2350
2351 return rc;
2352 }
2353
2354 int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
2355 int datasync)
2356 {
2357 unsigned int xid;
2358 int rc = 0;
2359 struct cifs_tcon *tcon;
2360 struct TCP_Server_Info *server;
2361 struct cifsFileInfo *smbfile = file->private_data;
2362 struct inode *inode = file_inode(file);
2363 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2364
2365 rc = file_write_and_wait_range(file, start, end);
2366 if (rc)
2367 return rc;
2368 inode_lock(inode);
2369
2370 xid = get_xid();
2371
2372 cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2373 file, datasync);
2374
2375 if (!CIFS_CACHE_READ(CIFS_I(inode))) {
2376 rc = cifs_zap_mapping(inode);
2377 if (rc) {
2378 cifs_dbg(FYI, "rc: %d during invalidate phase\n", rc);
2379 rc = 0; /* don't care about it in fsync */
2380 }
2381 }
2382
2383 tcon = tlink_tcon(smbfile->tlink);
2384 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2385 server = tcon->ses->server;
2386 if (server->ops->flush)
2387 rc = server->ops->flush(xid, tcon, &smbfile->fid);
2388 else
2389 rc = -ENOSYS;
2390 }
2391
2392 free_xid(xid);
2393 inode_unlock(inode);
2394 return rc;
2395 }
2396
2397 int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
2398 {
2399 unsigned int xid;
2400 int rc = 0;
2401 struct cifs_tcon *tcon;
2402 struct TCP_Server_Info *server;
2403 struct cifsFileInfo *smbfile = file->private_data;
2404 struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
2405 struct inode *inode = file->f_mapping->host;
2406
2407 rc = file_write_and_wait_range(file, start, end);
2408 if (rc)
2409 return rc;
2410 inode_lock(inode);
2411
2412 xid = get_xid();
2413
2414 cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2415 file, datasync);
2416
2417 tcon = tlink_tcon(smbfile->tlink);
2418 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2419 server = tcon->ses->server;
2420 if (server->ops->flush)
2421 rc = server->ops->flush(xid, tcon, &smbfile->fid);
2422 else
2423 rc = -ENOSYS;
2424 }
2425
2426 free_xid(xid);
2427 inode_unlock(inode);
2428 return rc;
2429 }
2430
2431 /*
2432 * As file closes, flush all cached write data for this inode checking
2433 * for write behind errors.
2434 */
2435 int cifs_flush(struct file *file, fl_owner_t id)
2436 {
2437 struct inode *inode = file_inode(file);
2438 int rc = 0;
2439
2440 if (file->f_mode & FMODE_WRITE)
2441 rc = filemap_write_and_wait(inode->i_mapping);
2442
2443 cifs_dbg(FYI, "Flush inode %p file %p rc %d\n", inode, file, rc);
2444
2445 return rc;
2446 }
2447
2448 static int
2449 cifs_write_allocate_pages(struct page **pages, unsigned long num_pages)
2450 {
2451 int rc = 0;
2452 unsigned long i;
2453
2454 for (i = 0; i < num_pages; i++) {
2455 pages[i] = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2456 if (!pages[i]) {
2457 /*
2458 * save number of pages we have already allocated and
2459 * return with ENOMEM error
2460 */
2461 num_pages = i;
2462 rc = -ENOMEM;
2463 break;
2464 }
2465 }
2466
2467 if (rc) {
2468 for (i = 0; i < num_pages; i++)
2469 put_page(pages[i]);
2470 }
2471 return rc;
2472 }
2473
2474 static inline
2475 size_t get_numpages(const size_t wsize, const size_t len, size_t *cur_len)
2476 {
2477 size_t num_pages;
2478 size_t clen;
2479
2480 clen = min_t(const size_t, len, wsize);
2481 num_pages = DIV_ROUND_UP(clen, PAGE_SIZE);
2482
2483 if (cur_len)
2484 *cur_len = clen;
2485
2486 return num_pages;
2487 }
2488
2489 static void
2490 cifs_uncached_writedata_release(struct kref *refcount)
2491 {
2492 int i;
2493 struct cifs_writedata *wdata = container_of(refcount,
2494 struct cifs_writedata, refcount);
2495
2496 kref_put(&wdata->ctx->refcount, cifs_aio_ctx_release);
2497 for (i = 0; i < wdata->nr_pages; i++)
2498 put_page(wdata->pages[i]);
2499 cifs_writedata_release(refcount);
2500 }
2501
2502 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx);
2503
2504 static void
2505 cifs_uncached_writev_complete(struct work_struct *work)
2506 {
2507 struct cifs_writedata *wdata = container_of(work,
2508 struct cifs_writedata, work);
2509 struct inode *inode = d_inode(wdata->cfile->dentry);
2510 struct cifsInodeInfo *cifsi = CIFS_I(inode);
2511
2512 spin_lock(&inode->i_lock);
2513 cifs_update_eof(cifsi, wdata->offset, wdata->bytes);
2514 if (cifsi->server_eof > inode->i_size)
2515 i_size_write(inode, cifsi->server_eof);
2516 spin_unlock(&inode->i_lock);
2517
2518 complete(&wdata->done);
2519 collect_uncached_write_data(wdata->ctx);
2520 /* the below call can possibly free the last ref to aio ctx */
2521 kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2522 }
2523
2524 static int
2525 wdata_fill_from_iovec(struct cifs_writedata *wdata, struct iov_iter *from,
2526 size_t *len, unsigned long *num_pages)
2527 {
2528 size_t save_len, copied, bytes, cur_len = *len;
2529 unsigned long i, nr_pages = *num_pages;
2530
2531 save_len = cur_len;
2532 for (i = 0; i < nr_pages; i++) {
2533 bytes = min_t(const size_t, cur_len, PAGE_SIZE);
2534 copied = copy_page_from_iter(wdata->pages[i], 0, bytes, from);
2535 cur_len -= copied;
2536 /*
2537 * If we didn't copy as much as we expected, then that
2538 * may mean we trod into an unmapped area. Stop copying
2539 * at that point. On the next pass through the big
2540 * loop, we'll likely end up getting a zero-length
2541 * write and bailing out of it.
2542 */
2543 if (copied < bytes)
2544 break;
2545 }
2546 cur_len = save_len - cur_len;
2547 *len = cur_len;
2548
2549 /*
2550 * If we have no data to send, then that probably means that
2551 * the copy above failed altogether. That's most likely because
2552 * the address in the iovec was bogus. Return -EFAULT and let
2553 * the caller free anything we allocated and bail out.
2554 */
2555 if (!cur_len)
2556 return -EFAULT;
2557
2558 /*
2559 * i + 1 now represents the number of pages we actually used in
2560 * the copy phase above.
2561 */
2562 *num_pages = i + 1;
2563 return 0;
2564 }
2565
2566 static int
2567 cifs_resend_wdata(struct cifs_writedata *wdata, struct list_head *wdata_list,
2568 struct cifs_aio_ctx *ctx)
2569 {
2570 unsigned int wsize, credits;
2571 int rc;
2572 struct TCP_Server_Info *server =
2573 tlink_tcon(wdata->cfile->tlink)->ses->server;
2574
2575 /*
2576 * Wait for credits to resend this wdata.
2577 * Note: we are attempting to resend the whole wdata not in segments
2578 */
2579 do {
2580 rc = server->ops->wait_mtu_credits(
2581 server, wdata->bytes, &wsize, &credits);
2582
2583 if (rc)
2584 goto out;
2585
2586 if (wsize < wdata->bytes) {
2587 add_credits_and_wake_if(server, credits, 0);
2588 msleep(1000);
2589 }
2590 } while (wsize < wdata->bytes);
2591
2592 rc = -EAGAIN;
2593 while (rc == -EAGAIN) {
2594 rc = 0;
2595 if (wdata->cfile->invalidHandle)
2596 rc = cifs_reopen_file(wdata->cfile, false);
2597 if (!rc)
2598 rc = server->ops->async_writev(wdata,
2599 cifs_uncached_writedata_release);
2600 }
2601
2602 if (!rc) {
2603 list_add_tail(&wdata->list, wdata_list);
2604 return 0;
2605 }
2606
2607 add_credits_and_wake_if(server, wdata->credits, 0);
2608 out:
2609 kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2610
2611 return rc;
2612 }
2613
2614 static int
2615 cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from,
2616 struct cifsFileInfo *open_file,
2617 struct cifs_sb_info *cifs_sb, struct list_head *wdata_list,
2618 struct cifs_aio_ctx *ctx)
2619 {
2620 int rc = 0;
2621 size_t cur_len;
2622 unsigned long nr_pages, num_pages, i;
2623 struct cifs_writedata *wdata;
2624 struct iov_iter saved_from = *from;
2625 loff_t saved_offset = offset;
2626 pid_t pid;
2627 struct TCP_Server_Info *server;
2628 struct page **pagevec;
2629 size_t start;
2630
2631 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2632 pid = open_file->pid;
2633 else
2634 pid = current->tgid;
2635
2636 server = tlink_tcon(open_file->tlink)->ses->server;
2637
2638 do {
2639 unsigned int wsize, credits;
2640
2641 rc = server->ops->wait_mtu_credits(server, cifs_sb->wsize,
2642 &wsize, &credits);
2643 if (rc)
2644 break;
2645
2646 cur_len = min_t(const size_t, len, wsize);
2647
2648 if (ctx->direct_io) {
2649 ssize_t result;
2650
2651 result = iov_iter_get_pages_alloc(
2652 from, &pagevec, cur_len, &start);
2653 if (result < 0) {
2654 cifs_dbg(VFS,
2655 "direct_writev couldn't get user pages "
2656 "(rc=%zd) iter type %d iov_offset %zd "
2657 "count %zd\n",
2658 result, from->type,
2659 from->iov_offset, from->count);
2660 dump_stack();
2661
2662 rc = result;
2663 add_credits_and_wake_if(server, credits, 0);
2664 break;
2665 }
2666 cur_len = (size_t)result;
2667 iov_iter_advance(from, cur_len);
2668
2669 nr_pages =
2670 (cur_len + start + PAGE_SIZE - 1) / PAGE_SIZE;
2671
2672 wdata = cifs_writedata_direct_alloc(pagevec,
2673 cifs_uncached_writev_complete);
2674 if (!wdata) {
2675 rc = -ENOMEM;
2676 add_credits_and_wake_if(server, credits, 0);
2677 break;
2678 }
2679
2680
2681 wdata->page_offset = start;
2682 wdata->tailsz =
2683 nr_pages > 1 ?
2684 cur_len - (PAGE_SIZE - start) -
2685 (nr_pages - 2) * PAGE_SIZE :
2686 cur_len;
2687 } else {
2688 nr_pages = get_numpages(wsize, len, &cur_len);
2689 wdata = cifs_writedata_alloc(nr_pages,
2690 cifs_uncached_writev_complete);
2691 if (!wdata) {
2692 rc = -ENOMEM;
2693 add_credits_and_wake_if(server, credits, 0);
2694 break;
2695 }
2696
2697 rc = cifs_write_allocate_pages(wdata->pages, nr_pages);
2698 if (rc) {
2699 kvfree(wdata->pages);
2700 kfree(wdata);
2701 add_credits_and_wake_if(server, credits, 0);
2702 break;
2703 }
2704
2705 num_pages = nr_pages;
2706 rc = wdata_fill_from_iovec(
2707 wdata, from, &cur_len, &num_pages);
2708 if (rc) {
2709 for (i = 0; i < nr_pages; i++)
2710 put_page(wdata->pages[i]);
2711 kvfree(wdata->pages);
2712 kfree(wdata);
2713 add_credits_and_wake_if(server, credits, 0);
2714 break;
2715 }
2716
2717 /*
2718 * Bring nr_pages down to the number of pages we
2719 * actually used, and free any pages that we didn't use.
2720 */
2721 for ( ; nr_pages > num_pages; nr_pages--)
2722 put_page(wdata->pages[nr_pages - 1]);
2723
2724 wdata->tailsz = cur_len - ((nr_pages - 1) * PAGE_SIZE);
2725 }
2726
2727 wdata->sync_mode = WB_SYNC_ALL;
2728 wdata->nr_pages = nr_pages;
2729 wdata->offset = (__u64)offset;
2730 wdata->cfile = cifsFileInfo_get(open_file);
2731 wdata->pid = pid;
2732 wdata->bytes = cur_len;
2733 wdata->pagesz = PAGE_SIZE;
2734 wdata->credits = credits;
2735 wdata->ctx = ctx;
2736 kref_get(&ctx->refcount);
2737
2738 if (!wdata->cfile->invalidHandle ||
2739 !(rc = cifs_reopen_file(wdata->cfile, false)))
2740 rc = server->ops->async_writev(wdata,
2741 cifs_uncached_writedata_release);
2742 if (rc) {
2743 add_credits_and_wake_if(server, wdata->credits, 0);
2744 kref_put(&wdata->refcount,
2745 cifs_uncached_writedata_release);
2746 if (rc == -EAGAIN) {
2747 *from = saved_from;
2748 iov_iter_advance(from, offset - saved_offset);
2749 continue;
2750 }
2751 break;
2752 }
2753
2754 list_add_tail(&wdata->list, wdata_list);
2755 offset += cur_len;
2756 len -= cur_len;
2757 } while (len > 0);
2758
2759 return rc;
2760 }
2761
2762 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx)
2763 {
2764 struct cifs_writedata *wdata, *tmp;
2765 struct cifs_tcon *tcon;
2766 struct cifs_sb_info *cifs_sb;
2767 struct dentry *dentry = ctx->cfile->dentry;
2768 unsigned int i;
2769 int rc;
2770
2771 tcon = tlink_tcon(ctx->cfile->tlink);
2772 cifs_sb = CIFS_SB(dentry->d_sb);
2773
2774 mutex_lock(&ctx->aio_mutex);
2775
2776 if (list_empty(&ctx->list)) {
2777 mutex_unlock(&ctx->aio_mutex);
2778 return;
2779 }
2780
2781 rc = ctx->rc;
2782 /*
2783 * Wait for and collect replies for any successful sends in order of
2784 * increasing offset. Once an error is hit, then return without waiting
2785 * for any more replies.
2786 */
2787 restart_loop:
2788 list_for_each_entry_safe(wdata, tmp, &ctx->list, list) {
2789 if (!rc) {
2790 if (!try_wait_for_completion(&wdata->done)) {
2791 mutex_unlock(&ctx->aio_mutex);
2792 return;
2793 }
2794
2795 if (wdata->result)
2796 rc = wdata->result;
2797 else
2798 ctx->total_len += wdata->bytes;
2799
2800 /* resend call if it's a retryable error */
2801 if (rc == -EAGAIN) {
2802 struct list_head tmp_list;
2803 struct iov_iter tmp_from = ctx->iter;
2804
2805 INIT_LIST_HEAD(&tmp_list);
2806 list_del_init(&wdata->list);
2807
2808 if (ctx->direct_io)
2809 rc = cifs_resend_wdata(
2810 wdata, &tmp_list, ctx);
2811 else {
2812 iov_iter_advance(&tmp_from,
2813 wdata->offset - ctx->pos);
2814
2815 rc = cifs_write_from_iter(wdata->offset,
2816 wdata->bytes, &tmp_from,
2817 ctx->cfile, cifs_sb, &tmp_list,
2818 ctx);
2819 }
2820
2821 list_splice(&tmp_list, &ctx->list);
2822
2823 kref_put(&wdata->refcount,
2824 cifs_uncached_writedata_release);
2825 goto restart_loop;
2826 }
2827 }
2828 list_del_init(&wdata->list);
2829 kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2830 }
2831
2832 if (!ctx->direct_io)
2833 for (i = 0; i < ctx->npages; i++)
2834 put_page(ctx->bv[i].bv_page);
2835
2836 cifs_stats_bytes_written(tcon, ctx->total_len);
2837 set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(dentry->d_inode)->flags);
2838
2839 ctx->rc = (rc == 0) ? ctx->total_len : rc;
2840
2841 mutex_unlock(&ctx->aio_mutex);
2842
2843 if (ctx->iocb && ctx->iocb->ki_complete)
2844 ctx->iocb->ki_complete(ctx->iocb, ctx->rc, 0);
2845 else
2846 complete(&ctx->done);
2847 }
2848
2849 static ssize_t __cifs_writev(
2850 struct kiocb *iocb, struct iov_iter *from, bool direct)
2851 {
2852 struct file *file = iocb->ki_filp;
2853 ssize_t total_written = 0;
2854 struct cifsFileInfo *cfile;
2855 struct cifs_tcon *tcon;
2856 struct cifs_sb_info *cifs_sb;
2857 struct cifs_aio_ctx *ctx;
2858 struct iov_iter saved_from = *from;
2859 size_t len = iov_iter_count(from);
2860 int rc;
2861
2862 /*
2863 * iov_iter_get_pages_alloc doesn't work with ITER_KVEC.
2864 * In this case, fall back to non-direct write function.
2865 * this could be improved by getting pages directly in ITER_KVEC
2866 */
2867 if (direct && from->type & ITER_KVEC) {
2868 cifs_dbg(FYI, "use non-direct cifs_writev for kvec I/O\n");
2869 direct = false;
2870 }
2871
2872 rc = generic_write_checks(iocb, from);
2873 if (rc <= 0)
2874 return rc;
2875
2876 cifs_sb = CIFS_FILE_SB(file);
2877 cfile = file->private_data;
2878 tcon = tlink_tcon(cfile->tlink);
2879
2880 if (!tcon->ses->server->ops->async_writev)
2881 return -ENOSYS;
2882
2883 ctx = cifs_aio_ctx_alloc();
2884 if (!ctx)
2885 return -ENOMEM;
2886
2887 ctx->cfile = cifsFileInfo_get(cfile);
2888
2889 if (!is_sync_kiocb(iocb))
2890 ctx->iocb = iocb;
2891
2892 ctx->pos = iocb->ki_pos;
2893
2894 if (direct) {
2895 ctx->direct_io = true;
2896 ctx->iter = *from;
2897 ctx->len = len;
2898 } else {
2899 rc = setup_aio_ctx_iter(ctx, from, WRITE);
2900 if (rc) {
2901 kref_put(&ctx->refcount, cifs_aio_ctx_release);
2902 return rc;
2903 }
2904 }
2905
2906 /* grab a lock here due to read response handlers can access ctx */
2907 mutex_lock(&ctx->aio_mutex);
2908
2909 rc = cifs_write_from_iter(iocb->ki_pos, ctx->len, &saved_from,
2910 cfile, cifs_sb, &ctx->list, ctx);
2911
2912 /*
2913 * If at least one write was successfully sent, then discard any rc
2914 * value from the later writes. If the other write succeeds, then
2915 * we'll end up returning whatever was written. If it fails, then
2916 * we'll get a new rc value from that.
2917 */
2918 if (!list_empty(&ctx->list))
2919 rc = 0;
2920
2921 mutex_unlock(&ctx->aio_mutex);
2922
2923 if (rc) {
2924 kref_put(&ctx->refcount, cifs_aio_ctx_release);
2925 return rc;
2926 }
2927
2928 if (!is_sync_kiocb(iocb)) {
2929 kref_put(&ctx->refcount, cifs_aio_ctx_release);
2930 return -EIOCBQUEUED;
2931 }
2932
2933 rc = wait_for_completion_killable(&ctx->done);
2934 if (rc) {
2935 mutex_lock(&ctx->aio_mutex);
2936 ctx->rc = rc = -EINTR;
2937 total_written = ctx->total_len;
2938 mutex_unlock(&ctx->aio_mutex);
2939 } else {
2940 rc = ctx->rc;
2941 total_written = ctx->total_len;
2942 }
2943
2944 kref_put(&ctx->refcount, cifs_aio_ctx_release);
2945
2946 if (unlikely(!total_written))
2947 return rc;
2948
2949 iocb->ki_pos += total_written;
2950 return total_written;
2951 }
2952
2953 ssize_t cifs_direct_writev(struct kiocb *iocb, struct iov_iter *from)
2954 {
2955 return __cifs_writev(iocb, from, true);
2956 }
2957
2958 ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from)
2959 {
2960 return __cifs_writev(iocb, from, false);
2961 }
2962
2963 static ssize_t
2964 cifs_writev(struct kiocb *iocb, struct iov_iter *from)
2965 {
2966 struct file *file = iocb->ki_filp;
2967 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
2968 struct inode *inode = file->f_mapping->host;
2969 struct cifsInodeInfo *cinode = CIFS_I(inode);
2970 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
2971 ssize_t rc;
2972
2973 inode_lock(inode);
2974 /*
2975 * We need to hold the sem to be sure nobody modifies lock list
2976 * with a brlock that prevents writing.
2977 */
2978 down_read(&cinode->lock_sem);
2979
2980 rc = generic_write_checks(iocb, from);
2981 if (rc <= 0)
2982 goto out;
2983
2984 if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(from),
2985 server->vals->exclusive_lock_type, 0,
2986 NULL, CIFS_WRITE_OP))
2987 rc = __generic_file_write_iter(iocb, from);
2988 else
2989 rc = -EACCES;
2990 out:
2991 up_read(&cinode->lock_sem);
2992 inode_unlock(inode);
2993
2994 if (rc > 0)
2995 rc = generic_write_sync(iocb, rc);
2996 return rc;
2997 }
2998
2999 ssize_t
3000 cifs_strict_writev(struct kiocb *iocb, struct iov_iter *from)
3001 {
3002 struct inode *inode = file_inode(iocb->ki_filp);
3003 struct cifsInodeInfo *cinode = CIFS_I(inode);
3004 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
3005 struct cifsFileInfo *cfile = (struct cifsFileInfo *)
3006 iocb->ki_filp->private_data;
3007 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3008 ssize_t written;
3009
3010 written = cifs_get_writer(cinode);
3011 if (written)
3012 return written;
3013
3014 if (CIFS_CACHE_WRITE(cinode)) {
3015 if (cap_unix(tcon->ses) &&
3016 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability))
3017 && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) {
3018 written = generic_file_write_iter(iocb, from);
3019 goto out;
3020 }
3021 written = cifs_writev(iocb, from);
3022 goto out;
3023 }
3024 /*
3025 * For non-oplocked files in strict cache mode we need to write the data
3026 * to the server exactly from the pos to pos+len-1 rather than flush all
3027 * affected pages because it may cause a error with mandatory locks on
3028 * these pages but not on the region from pos to ppos+len-1.
3029 */
3030 written = cifs_user_writev(iocb, from);
3031 if (written > 0 && CIFS_CACHE_READ(cinode)) {
3032 /*
3033 * Windows 7 server can delay breaking level2 oplock if a write
3034 * request comes - break it on the client to prevent reading
3035 * an old data.
3036 */
3037 cifs_zap_mapping(inode);
3038 cifs_dbg(FYI, "Set no oplock for inode=%p after a write operation\n",
3039 inode);
3040 cinode->oplock = 0;
3041 }
3042 out:
3043 cifs_put_writer(cinode);
3044 return written;
3045 }
3046
3047 static struct cifs_readdata *
3048 cifs_readdata_direct_alloc(struct page **pages, work_func_t complete)
3049 {
3050 struct cifs_readdata *rdata;
3051
3052 rdata = kzalloc(sizeof(*rdata), GFP_KERNEL);
3053 if (rdata != NULL) {
3054 rdata->pages = pages;
3055 kref_init(&rdata->refcount);
3056 INIT_LIST_HEAD(&rdata->list);
3057 init_completion(&rdata->done);
3058 INIT_WORK(&rdata->work, complete);
3059 }
3060
3061 return rdata;
3062 }
3063
3064 static struct cifs_readdata *
3065 cifs_readdata_alloc(unsigned int nr_pages, work_func_t complete)
3066 {
3067 struct page **pages =
3068 kcalloc(nr_pages, sizeof(struct page *), GFP_KERNEL);
3069 struct cifs_readdata *ret = NULL;
3070
3071 if (pages) {
3072 ret = cifs_readdata_direct_alloc(pages, complete);
3073 if (!ret)
3074 kfree(pages);
3075 }
3076
3077 return ret;
3078 }
3079
3080 void
3081 cifs_readdata_release(struct kref *refcount)
3082 {
3083 struct cifs_readdata *rdata = container_of(refcount,
3084 struct cifs_readdata, refcount);
3085 #ifdef CONFIG_CIFS_SMB_DIRECT
3086 if (rdata->mr) {
3087 smbd_deregister_mr(rdata->mr);
3088 rdata->mr = NULL;
3089 }
3090 #endif
3091 if (rdata->cfile)
3092 cifsFileInfo_put(rdata->cfile);
3093
3094 kvfree(rdata->pages);
3095 kfree(rdata);
3096 }
3097
3098 static int
3099 cifs_read_allocate_pages(struct cifs_readdata *rdata, unsigned int nr_pages)
3100 {
3101 int rc = 0;
3102 struct page *page;
3103 unsigned int i;
3104
3105 for (i = 0; i < nr_pages; i++) {
3106 page = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
3107 if (!page) {
3108 rc = -ENOMEM;
3109 break;
3110 }
3111 rdata->pages[i] = page;
3112 }
3113
3114 if (rc) {
3115 for (i = 0; i < nr_pages; i++) {
3116 put_page(rdata->pages[i]);
3117 rdata->pages[i] = NULL;
3118 }
3119 }
3120 return rc;
3121 }
3122
3123 static void
3124 cifs_uncached_readdata_release(struct kref *refcount)
3125 {
3126 struct cifs_readdata *rdata = container_of(refcount,
3127 struct cifs_readdata, refcount);
3128 unsigned int i;
3129
3130 kref_put(&rdata->ctx->refcount, cifs_aio_ctx_release);
3131 for (i = 0; i < rdata->nr_pages; i++) {
3132 put_page(rdata->pages[i]);
3133 }
3134 cifs_readdata_release(refcount);
3135 }
3136
3137 /**
3138 * cifs_readdata_to_iov - copy data from pages in response to an iovec
3139 * @rdata: the readdata response with list of pages holding data
3140 * @iter: destination for our data
3141 *
3142 * This function copies data from a list of pages in a readdata response into
3143 * an array of iovecs. It will first calculate where the data should go
3144 * based on the info in the readdata and then copy the data into that spot.
3145 */
3146 static int
3147 cifs_readdata_to_iov(struct cifs_readdata *rdata, struct iov_iter *iter)
3148 {
3149 size_t remaining = rdata->got_bytes;
3150 unsigned int i;
3151
3152 for (i = 0; i < rdata->nr_pages; i++) {
3153 struct page *page = rdata->pages[i];
3154 size_t copy = min_t(size_t, remaining, PAGE_SIZE);
3155 size_t written;
3156
3157 if (unlikely(iov_iter_is_pipe(iter))) {
3158 void *addr = kmap_atomic(page);
3159
3160 written = copy_to_iter(addr, copy, iter);
3161 kunmap_atomic(addr);
3162 } else
3163 written = copy_page_to_iter(page, 0, copy, iter);
3164 remaining -= written;
3165 if (written < copy && iov_iter_count(iter) > 0)
3166 break;
3167 }
3168 return remaining ? -EFAULT : 0;
3169 }
3170
3171 static void collect_uncached_read_data(struct cifs_aio_ctx *ctx);
3172
3173 static void
3174 cifs_uncached_readv_complete(struct work_struct *work)
3175 {
3176 struct cifs_readdata *rdata = container_of(work,
3177 struct cifs_readdata, work);
3178
3179 complete(&rdata->done);
3180 collect_uncached_read_data(rdata->ctx);
3181 /* the below call can possibly free the last ref to aio ctx */
3182 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3183 }
3184
3185 static int
3186 uncached_fill_pages(struct TCP_Server_Info *server,
3187 struct cifs_readdata *rdata, struct iov_iter *iter,
3188 unsigned int len)
3189 {
3190 int result = 0;
3191 unsigned int i;
3192 unsigned int nr_pages = rdata->nr_pages;
3193 unsigned int page_offset = rdata->page_offset;
3194
3195 rdata->got_bytes = 0;
3196 rdata->tailsz = PAGE_SIZE;
3197 for (i = 0; i < nr_pages; i++) {
3198 struct page *page = rdata->pages[i];
3199 size_t n;
3200 unsigned int segment_size = rdata->pagesz;
3201
3202 if (i == 0)
3203 segment_size -= page_offset;
3204 else
3205 page_offset = 0;
3206
3207
3208 if (len <= 0) {
3209 /* no need to hold page hostage */
3210 rdata->pages[i] = NULL;
3211 rdata->nr_pages--;
3212 put_page(page);
3213 continue;
3214 }
3215
3216 n = len;
3217 if (len >= segment_size)
3218 /* enough data to fill the page */
3219 n = segment_size;
3220 else
3221 rdata->tailsz = len;
3222 len -= n;
3223
3224 if (iter)
3225 result = copy_page_from_iter(
3226 page, page_offset, n, iter);
3227 #ifdef CONFIG_CIFS_SMB_DIRECT
3228 else if (rdata->mr)
3229 result = n;
3230 #endif
3231 else
3232 result = cifs_read_page_from_socket(
3233 server, page, page_offset, n);
3234 if (result < 0)
3235 break;
3236
3237 rdata->got_bytes += result;
3238 }
3239
3240 return rdata->got_bytes > 0 && result != -ECONNABORTED ?
3241 rdata->got_bytes : result;
3242 }
3243
3244 static int
3245 cifs_uncached_read_into_pages(struct TCP_Server_Info *server,
3246 struct cifs_readdata *rdata, unsigned int len)
3247 {
3248 return uncached_fill_pages(server, rdata, NULL, len);
3249 }
3250
3251 static int
3252 cifs_uncached_copy_into_pages(struct TCP_Server_Info *server,
3253 struct cifs_readdata *rdata,
3254 struct iov_iter *iter)
3255 {
3256 return uncached_fill_pages(server, rdata, iter, iter->count);
3257 }
3258
3259 static int cifs_resend_rdata(struct cifs_readdata *rdata,
3260 struct list_head *rdata_list,
3261 struct cifs_aio_ctx *ctx)
3262 {
3263 unsigned int rsize, credits;
3264 int rc;
3265 struct TCP_Server_Info *server =
3266 tlink_tcon(rdata->cfile->tlink)->ses->server;
3267
3268 /*
3269 * Wait for credits to resend this rdata.
3270 * Note: we are attempting to resend the whole rdata not in segments
3271 */
3272 do {
3273 rc = server->ops->wait_mtu_credits(server, rdata->bytes,
3274 &rsize, &credits);
3275
3276 if (rc)
3277 goto out;
3278
3279 if (rsize < rdata->bytes) {
3280 add_credits_and_wake_if(server, credits, 0);
3281 msleep(1000);
3282 }
3283 } while (rsize < rdata->bytes);
3284
3285 rc = -EAGAIN;
3286 while (rc == -EAGAIN) {
3287 rc = 0;
3288 if (rdata->cfile->invalidHandle)
3289 rc = cifs_reopen_file(rdata->cfile, true);
3290 if (!rc)
3291 rc = server->ops->async_readv(rdata);
3292 }
3293
3294 if (!rc) {
3295 /* Add to aio pending list */
3296 list_add_tail(&rdata->list, rdata_list);
3297 return 0;
3298 }
3299
3300 add_credits_and_wake_if(server, rdata->credits, 0);
3301 out:
3302 kref_put(&rdata->refcount,
3303 cifs_uncached_readdata_release);
3304
3305 return rc;
3306 }
3307
3308 static int
3309 cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file,
3310 struct cifs_sb_info *cifs_sb, struct list_head *rdata_list,
3311 struct cifs_aio_ctx *ctx)
3312 {
3313 struct cifs_readdata *rdata;
3314 unsigned int npages, rsize, credits;
3315 size_t cur_len;
3316 int rc;
3317 pid_t pid;
3318 struct TCP_Server_Info *server;
3319 struct page **pagevec;
3320 size_t start;
3321 struct iov_iter direct_iov = ctx->iter;
3322
3323 server = tlink_tcon(open_file->tlink)->ses->server;
3324
3325 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3326 pid = open_file->pid;
3327 else
3328 pid = current->tgid;
3329
3330 if (ctx->direct_io)
3331 iov_iter_advance(&direct_iov, offset - ctx->pos);
3332
3333 do {
3334 rc = server->ops->wait_mtu_credits(server, cifs_sb->rsize,
3335 &rsize, &credits);
3336 if (rc)
3337 break;
3338
3339 cur_len = min_t(const size_t, len, rsize);
3340
3341 if (ctx->direct_io) {
3342 ssize_t result;
3343
3344 result = iov_iter_get_pages_alloc(
3345 &direct_iov, &pagevec,
3346 cur_len, &start);
3347 if (result < 0) {
3348 cifs_dbg(VFS,
3349 "couldn't get user pages (rc=%zd)"
3350 " iter type %d"
3351 " iov_offset %zd count %zd\n",
3352 result, direct_iov.type,
3353 direct_iov.iov_offset,
3354 direct_iov.count);
3355 dump_stack();
3356
3357 rc = result;
3358 add_credits_and_wake_if(server, credits, 0);
3359 break;
3360 }
3361 cur_len = (size_t)result;
3362 iov_iter_advance(&direct_iov, cur_len);
3363
3364 rdata = cifs_readdata_direct_alloc(
3365 pagevec, cifs_uncached_readv_complete);
3366 if (!rdata) {
3367 add_credits_and_wake_if(server, credits, 0);
3368 rc = -ENOMEM;
3369 break;
3370 }
3371
3372 npages = (cur_len + start + PAGE_SIZE-1) / PAGE_SIZE;
3373 rdata->page_offset = start;
3374 rdata->tailsz = npages > 1 ?
3375 cur_len-(PAGE_SIZE-start)-(npages-2)*PAGE_SIZE :
3376 cur_len;
3377
3378 } else {
3379
3380 npages = DIV_ROUND_UP(cur_len, PAGE_SIZE);
3381 /* allocate a readdata struct */
3382 rdata = cifs_readdata_alloc(npages,
3383 cifs_uncached_readv_complete);
3384 if (!rdata) {
3385 add_credits_and_wake_if(server, credits, 0);
3386 rc = -ENOMEM;
3387 break;
3388 }
3389
3390 rc = cifs_read_allocate_pages(rdata, npages);
3391 if (rc) {
3392 kvfree(rdata->pages);
3393 kfree(rdata);
3394 add_credits_and_wake_if(server, credits, 0);
3395 break;
3396 }
3397
3398 rdata->tailsz = PAGE_SIZE;
3399 }
3400
3401 rdata->cfile = cifsFileInfo_get(open_file);
3402 rdata->nr_pages = npages;
3403 rdata->offset = offset;
3404 rdata->bytes = cur_len;
3405 rdata->pid = pid;
3406 rdata->pagesz = PAGE_SIZE;
3407 rdata->read_into_pages = cifs_uncached_read_into_pages;
3408 rdata->copy_into_pages = cifs_uncached_copy_into_pages;
3409 rdata->credits = credits;
3410 rdata->ctx = ctx;
3411 kref_get(&ctx->refcount);
3412
3413 if (!rdata->cfile->invalidHandle ||
3414 !(rc = cifs_reopen_file(rdata->cfile, true)))
3415 rc = server->ops->async_readv(rdata);
3416 if (rc) {
3417 add_credits_and_wake_if(server, rdata->credits, 0);
3418 kref_put(&rdata->refcount,
3419 cifs_uncached_readdata_release);
3420 if (rc == -EAGAIN) {
3421 iov_iter_revert(&direct_iov, cur_len);
3422 continue;
3423 }
3424 break;
3425 }
3426
3427 list_add_tail(&rdata->list, rdata_list);
3428 offset += cur_len;
3429 len -= cur_len;
3430 } while (len > 0);
3431
3432 return rc;
3433 }
3434
3435 static void
3436 collect_uncached_read_data(struct cifs_aio_ctx *ctx)
3437 {
3438 struct cifs_readdata *rdata, *tmp;
3439 struct iov_iter *to = &ctx->iter;
3440 struct cifs_sb_info *cifs_sb;
3441 struct cifs_tcon *tcon;
3442 unsigned int i;
3443 int rc;
3444
3445 tcon = tlink_tcon(ctx->cfile->tlink);
3446 cifs_sb = CIFS_SB(ctx->cfile->dentry->d_sb);
3447
3448 mutex_lock(&ctx->aio_mutex);
3449
3450 if (list_empty(&ctx->list)) {
3451 mutex_unlock(&ctx->aio_mutex);
3452 return;
3453 }
3454
3455 rc = ctx->rc;
3456 /* the loop below should proceed in the order of increasing offsets */
3457 again:
3458 list_for_each_entry_safe(rdata, tmp, &ctx->list, list) {
3459 if (!rc) {
3460 if (!try_wait_for_completion(&rdata->done)) {
3461 mutex_unlock(&ctx->aio_mutex);
3462 return;
3463 }
3464
3465 if (rdata->result == -EAGAIN) {
3466 /* resend call if it's a retryable error */
3467 struct list_head tmp_list;
3468 unsigned int got_bytes = rdata->got_bytes;
3469
3470 list_del_init(&rdata->list);
3471 INIT_LIST_HEAD(&tmp_list);
3472
3473 /*
3474 * Got a part of data and then reconnect has
3475 * happened -- fill the buffer and continue
3476 * reading.
3477 */
3478 if (got_bytes && got_bytes < rdata->bytes) {
3479 rc = 0;
3480 if (!ctx->direct_io)
3481 rc = cifs_readdata_to_iov(rdata, to);
3482 if (rc) {
3483 kref_put(&rdata->refcount,
3484 cifs_uncached_readdata_release);
3485 continue;
3486 }
3487 }
3488
3489 if (ctx->direct_io) {
3490 /*
3491 * Re-use rdata as this is a
3492 * direct I/O
3493 */
3494 rc = cifs_resend_rdata(
3495 rdata,
3496 &tmp_list, ctx);
3497 } else {
3498 rc = cifs_send_async_read(
3499 rdata->offset + got_bytes,
3500 rdata->bytes - got_bytes,
3501 rdata->cfile, cifs_sb,
3502 &tmp_list, ctx);
3503
3504 kref_put(&rdata->refcount,
3505 cifs_uncached_readdata_release);
3506 }
3507
3508 list_splice(&tmp_list, &ctx->list);
3509
3510 goto again;
3511 } else if (rdata->result)
3512 rc = rdata->result;
3513 else if (!ctx->direct_io)
3514 rc = cifs_readdata_to_iov(rdata, to);
3515
3516 /* if there was a short read -- discard anything left */
3517 if (rdata->got_bytes && rdata->got_bytes < rdata->bytes)
3518 rc = -ENODATA;
3519
3520 ctx->total_len += rdata->got_bytes;
3521 }
3522 list_del_init(&rdata->list);
3523 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3524 }
3525
3526 if (!ctx->direct_io) {
3527 for (i = 0; i < ctx->npages; i++) {
3528 if (ctx->should_dirty)
3529 set_page_dirty(ctx->bv[i].bv_page);
3530 put_page(ctx->bv[i].bv_page);
3531 }
3532
3533 ctx->total_len = ctx->len - iov_iter_count(to);
3534 }
3535
3536 cifs_stats_bytes_read(tcon, ctx->total_len);
3537
3538 /* mask nodata case */
3539 if (rc == -ENODATA)
3540 rc = 0;
3541
3542 ctx->rc = (rc == 0) ? ctx->total_len : rc;
3543
3544 mutex_unlock(&ctx->aio_mutex);
3545
3546 if (ctx->iocb && ctx->iocb->ki_complete)
3547 ctx->iocb->ki_complete(ctx->iocb, ctx->rc, 0);
3548 else
3549 complete(&ctx->done);
3550 }
3551
3552 static ssize_t __cifs_readv(
3553 struct kiocb *iocb, struct iov_iter *to, bool direct)
3554 {
3555 size_t len;
3556 struct file *file = iocb->ki_filp;
3557 struct cifs_sb_info *cifs_sb;
3558 struct cifsFileInfo *cfile;
3559 struct cifs_tcon *tcon;
3560 ssize_t rc, total_read = 0;
3561 loff_t offset = iocb->ki_pos;
3562 struct cifs_aio_ctx *ctx;
3563
3564 /*
3565 * iov_iter_get_pages_alloc() doesn't work with ITER_KVEC,
3566 * fall back to data copy read path
3567 * this could be improved by getting pages directly in ITER_KVEC
3568 */
3569 if (direct && to->type & ITER_KVEC) {
3570 cifs_dbg(FYI, "use non-direct cifs_user_readv for kvec I/O\n");
3571 direct = false;
3572 }
3573
3574 len = iov_iter_count(to);
3575 if (!len)
3576 return 0;
3577
3578 cifs_sb = CIFS_FILE_SB(file);
3579 cfile = file->private_data;
3580 tcon = tlink_tcon(cfile->tlink);
3581
3582 if (!tcon->ses->server->ops->async_readv)
3583 return -ENOSYS;
3584
3585 if ((file->f_flags & O_ACCMODE) == O_WRONLY)
3586 cifs_dbg(FYI, "attempting read on write only file instance\n");
3587
3588 ctx = cifs_aio_ctx_alloc();
3589 if (!ctx)
3590 return -ENOMEM;
3591
3592 ctx->cfile = cifsFileInfo_get(cfile);
3593
3594 if (!is_sync_kiocb(iocb))
3595 ctx->iocb = iocb;
3596
3597 if (iter_is_iovec(to))
3598 ctx->should_dirty = true;
3599
3600 if (direct) {
3601 ctx->pos = offset;
3602 ctx->direct_io = true;
3603 ctx->iter = *to;
3604 ctx->len = len;
3605 } else {
3606 rc = setup_aio_ctx_iter(ctx, to, READ);
3607 if (rc) {
3608 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3609 return rc;
3610 }
3611 len = ctx->len;
3612 }
3613
3614 /* grab a lock here due to read response handlers can access ctx */
3615 mutex_lock(&ctx->aio_mutex);
3616
3617 rc = cifs_send_async_read(offset, len, cfile, cifs_sb, &ctx->list, ctx);
3618
3619 /* if at least one read request send succeeded, then reset rc */
3620 if (!list_empty(&ctx->list))
3621 rc = 0;
3622
3623 mutex_unlock(&ctx->aio_mutex);
3624
3625 if (rc) {
3626 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3627 return rc;
3628 }
3629
3630 if (!is_sync_kiocb(iocb)) {
3631 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3632 return -EIOCBQUEUED;
3633 }
3634
3635 rc = wait_for_completion_killable(&ctx->done);
3636 if (rc) {
3637 mutex_lock(&ctx->aio_mutex);
3638 ctx->rc = rc = -EINTR;
3639 total_read = ctx->total_len;
3640 mutex_unlock(&ctx->aio_mutex);
3641 } else {
3642 rc = ctx->rc;
3643 total_read = ctx->total_len;
3644 }
3645
3646 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3647
3648 if (total_read) {
3649 iocb->ki_pos += total_read;
3650 return total_read;
3651 }
3652 return rc;
3653 }
3654
3655 ssize_t cifs_direct_readv(struct kiocb *iocb, struct iov_iter *to)
3656 {
3657 return __cifs_readv(iocb, to, true);
3658 }
3659
3660 ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to)
3661 {
3662 return __cifs_readv(iocb, to, false);
3663 }
3664
3665 ssize_t
3666 cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to)
3667 {
3668 struct inode *inode = file_inode(iocb->ki_filp);
3669 struct cifsInodeInfo *cinode = CIFS_I(inode);
3670 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
3671 struct cifsFileInfo *cfile = (struct cifsFileInfo *)
3672 iocb->ki_filp->private_data;
3673 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3674 int rc = -EACCES;
3675
3676 /*
3677 * In strict cache mode we need to read from the server all the time
3678 * if we don't have level II oplock because the server can delay mtime
3679 * change - so we can't make a decision about inode invalidating.
3680 * And we can also fail with pagereading if there are mandatory locks
3681 * on pages affected by this read but not on the region from pos to
3682 * pos+len-1.
3683 */
3684 if (!CIFS_CACHE_READ(cinode))
3685 return cifs_user_readv(iocb, to);
3686
3687 if (cap_unix(tcon->ses) &&
3688 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
3689 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
3690 return generic_file_read_iter(iocb, to);
3691
3692 /*
3693 * We need to hold the sem to be sure nobody modifies lock list
3694 * with a brlock that prevents reading.
3695 */
3696 down_read(&cinode->lock_sem);
3697 if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(to),
3698 tcon->ses->server->vals->shared_lock_type,
3699 0, NULL, CIFS_READ_OP))
3700 rc = generic_file_read_iter(iocb, to);
3701 up_read(&cinode->lock_sem);
3702 return rc;
3703 }
3704
3705 static ssize_t
3706 cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset)
3707 {
3708 int rc = -EACCES;
3709 unsigned int bytes_read = 0;
3710 unsigned int total_read;
3711 unsigned int current_read_size;
3712 unsigned int rsize;
3713 struct cifs_sb_info *cifs_sb;
3714 struct cifs_tcon *tcon;
3715 struct TCP_Server_Info *server;
3716 unsigned int xid;
3717 char *cur_offset;
3718 struct cifsFileInfo *open_file;
3719 struct cifs_io_parms io_parms;
3720 int buf_type = CIFS_NO_BUFFER;
3721 __u32 pid;
3722
3723 xid = get_xid();
3724 cifs_sb = CIFS_FILE_SB(file);
3725
3726 /* FIXME: set up handlers for larger reads and/or convert to async */
3727 rsize = min_t(unsigned int, cifs_sb->rsize, CIFSMaxBufSize);
3728
3729 if (file->private_data == NULL) {
3730 rc = -EBADF;
3731 free_xid(xid);
3732 return rc;
3733 }
3734 open_file = file->private_data;
3735 tcon = tlink_tcon(open_file->tlink);
3736 server = tcon->ses->server;
3737
3738 if (!server->ops->sync_read) {
3739 free_xid(xid);
3740 return -ENOSYS;
3741 }
3742
3743 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3744 pid = open_file->pid;
3745 else
3746 pid = current->tgid;
3747
3748 if ((file->f_flags & O_ACCMODE) == O_WRONLY)
3749 cifs_dbg(FYI, "attempting read on write only file instance\n");
3750
3751 for (total_read = 0, cur_offset = read_data; read_size > total_read;
3752 total_read += bytes_read, cur_offset += bytes_read) {
3753 do {
3754 current_read_size = min_t(uint, read_size - total_read,
3755 rsize);
3756 /*
3757 * For windows me and 9x we do not want to request more
3758 * than it negotiated since it will refuse the read
3759 * then.
3760 */
3761 if ((tcon->ses) && !(tcon->ses->capabilities &
3762 tcon->ses->server->vals->cap_large_files)) {
3763 current_read_size = min_t(uint,
3764 current_read_size, CIFSMaxBufSize);
3765 }
3766 if (open_file->invalidHandle) {
3767 rc = cifs_reopen_file(open_file, true);
3768 if (rc != 0)
3769 break;
3770 }
3771 io_parms.pid = pid;
3772 io_parms.tcon = tcon;
3773 io_parms.offset = *offset;
3774 io_parms.length = current_read_size;
3775 rc = server->ops->sync_read(xid, &open_file->fid, &io_parms,
3776 &bytes_read, &cur_offset,
3777 &buf_type);
3778 } while (rc == -EAGAIN);
3779
3780 if (rc || (bytes_read == 0)) {
3781 if (total_read) {
3782 break;
3783 } else {
3784 free_xid(xid);
3785 return rc;
3786 }
3787 } else {
3788 cifs_stats_bytes_read(tcon, total_read);
3789 *offset += bytes_read;
3790 }
3791 }
3792 free_xid(xid);
3793 return total_read;
3794 }
3795
3796 /*
3797 * If the page is mmap'ed into a process' page tables, then we need to make
3798 * sure that it doesn't change while being written back.
3799 */
3800 static vm_fault_t
3801 cifs_page_mkwrite(struct vm_fault *vmf)
3802 {
3803 struct page *page = vmf->page;
3804
3805 lock_page(page);
3806 return VM_FAULT_LOCKED;
3807 }
3808
3809 static const struct vm_operations_struct cifs_file_vm_ops = {
3810 .fault = filemap_fault,
3811 .map_pages = filemap_map_pages,
3812 .page_mkwrite = cifs_page_mkwrite,
3813 };
3814
3815 int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
3816 {
3817 int xid, rc = 0;
3818 struct inode *inode = file_inode(file);
3819
3820 xid = get_xid();
3821
3822 if (!CIFS_CACHE_READ(CIFS_I(inode)))
3823 rc = cifs_zap_mapping(inode);
3824 if (!rc)
3825 rc = generic_file_mmap(file, vma);
3826 if (!rc)
3827 vma->vm_ops = &cifs_file_vm_ops;
3828
3829 free_xid(xid);
3830 return rc;
3831 }
3832
3833 int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
3834 {
3835 int rc, xid;
3836
3837 xid = get_xid();
3838
3839 rc = cifs_revalidate_file(file);
3840 if (rc)
3841 cifs_dbg(FYI, "Validation prior to mmap failed, error=%d\n",
3842 rc);
3843 if (!rc)
3844 rc = generic_file_mmap(file, vma);
3845 if (!rc)
3846 vma->vm_ops = &cifs_file_vm_ops;
3847
3848 free_xid(xid);
3849 return rc;
3850 }
3851
3852 static void
3853 cifs_readv_complete(struct work_struct *work)
3854 {
3855 unsigned int i, got_bytes;
3856 struct cifs_readdata *rdata = container_of(work,
3857 struct cifs_readdata, work);
3858
3859 got_bytes = rdata->got_bytes;
3860 for (i = 0; i < rdata->nr_pages; i++) {
3861 struct page *page = rdata->pages[i];
3862
3863 lru_cache_add_file(page);
3864
3865 if (rdata->result == 0 ||
3866 (rdata->result == -EAGAIN && got_bytes)) {
3867 flush_dcache_page(page);
3868 SetPageUptodate(page);
3869 }
3870
3871 unlock_page(page);
3872
3873 if (rdata->result == 0 ||
3874 (rdata->result == -EAGAIN && got_bytes))
3875 cifs_readpage_to_fscache(rdata->mapping->host, page);
3876
3877 got_bytes -= min_t(unsigned int, PAGE_SIZE, got_bytes);
3878
3879 put_page(page);
3880 rdata->pages[i] = NULL;
3881 }
3882 kref_put(&rdata->refcount, cifs_readdata_release);
3883 }
3884
3885 static int
3886 readpages_fill_pages(struct TCP_Server_Info *server,
3887 struct cifs_readdata *rdata, struct iov_iter *iter,
3888 unsigned int len)
3889 {
3890 int result = 0;
3891 unsigned int i;
3892 u64 eof;
3893 pgoff_t eof_index;
3894 unsigned int nr_pages = rdata->nr_pages;
3895 unsigned int page_offset = rdata->page_offset;
3896
3897 /* determine the eof that the server (probably) has */
3898 eof = CIFS_I(rdata->mapping->host)->server_eof;
3899 eof_index = eof ? (eof - 1) >> PAGE_SHIFT : 0;
3900 cifs_dbg(FYI, "eof=%llu eof_index=%lu\n", eof, eof_index);
3901
3902 rdata->got_bytes = 0;
3903 rdata->tailsz = PAGE_SIZE;
3904 for (i = 0; i < nr_pages; i++) {
3905 struct page *page = rdata->pages[i];
3906 unsigned int to_read = rdata->pagesz;
3907 size_t n;
3908
3909 if (i == 0)
3910 to_read -= page_offset;
3911 else
3912 page_offset = 0;
3913
3914 n = to_read;
3915
3916 if (len >= to_read) {
3917 len -= to_read;
3918 } else if (len > 0) {
3919 /* enough for partial page, fill and zero the rest */
3920 zero_user(page, len + page_offset, to_read - len);
3921 n = rdata->tailsz = len;
3922 len = 0;
3923 } else if (page->index > eof_index) {
3924 /*
3925 * The VFS will not try to do readahead past the
3926 * i_size, but it's possible that we have outstanding
3927 * writes with gaps in the middle and the i_size hasn't
3928 * caught up yet. Populate those with zeroed out pages
3929 * to prevent the VFS from repeatedly attempting to
3930 * fill them until the writes are flushed.
3931 */
3932 zero_user(page, 0, PAGE_SIZE);
3933 lru_cache_add_file(page);
3934 flush_dcache_page(page);
3935 SetPageUptodate(page);
3936 unlock_page(page);
3937 put_page(page);
3938 rdata->pages[i] = NULL;
3939 rdata->nr_pages--;
3940 continue;
3941 } else {
3942 /* no need to hold page hostage */
3943 lru_cache_add_file(page);
3944 unlock_page(page);
3945 put_page(page);
3946 rdata->pages[i] = NULL;
3947 rdata->nr_pages--;
3948 continue;
3949 }
3950
3951 if (iter)
3952 result = copy_page_from_iter(
3953 page, page_offset, n, iter);
3954 #ifdef CONFIG_CIFS_SMB_DIRECT
3955 else if (rdata->mr)
3956 result = n;
3957 #endif
3958 else
3959 result = cifs_read_page_from_socket(
3960 server, page, page_offset, n);
3961 if (result < 0)
3962 break;
3963
3964 rdata->got_bytes += result;
3965 }
3966
3967 return rdata->got_bytes > 0 && result != -ECONNABORTED ?
3968 rdata->got_bytes : result;
3969 }
3970
3971 static int
3972 cifs_readpages_read_into_pages(struct TCP_Server_Info *server,
3973 struct cifs_readdata *rdata, unsigned int len)
3974 {
3975 return readpages_fill_pages(server, rdata, NULL, len);
3976 }
3977
3978 static int
3979 cifs_readpages_copy_into_pages(struct TCP_Server_Info *server,
3980 struct cifs_readdata *rdata,
3981 struct iov_iter *iter)
3982 {
3983 return readpages_fill_pages(server, rdata, iter, iter->count);
3984 }
3985
3986 static int
3987 readpages_get_pages(struct address_space *mapping, struct list_head *page_list,
3988 unsigned int rsize, struct list_head *tmplist,
3989 unsigned int *nr_pages, loff_t *offset, unsigned int *bytes)
3990 {
3991 struct page *page, *tpage;
3992 unsigned int expected_index;
3993 int rc;
3994 gfp_t gfp = readahead_gfp_mask(mapping);
3995
3996 INIT_LIST_HEAD(tmplist);
3997
3998 page = lru_to_page(page_list);
3999
4000 /*
4001 * Lock the page and put it in the cache. Since no one else
4002 * should have access to this page, we're safe to simply set
4003 * PG_locked without checking it first.
4004 */
4005 __SetPageLocked(page);
4006 rc = add_to_page_cache_locked(page, mapping,
4007 page->index, gfp);
4008
4009 /* give up if we can't stick it in the cache */
4010 if (rc) {
4011 __ClearPageLocked(page);
4012 return rc;
4013 }
4014
4015 /* move first page to the tmplist */
4016 *offset = (loff_t)page->index << PAGE_SHIFT;
4017 *bytes = PAGE_SIZE;
4018 *nr_pages = 1;
4019 list_move_tail(&page->lru, tmplist);
4020
4021 /* now try and add more pages onto the request */
4022 expected_index = page->index + 1;
4023 list_for_each_entry_safe_reverse(page, tpage, page_list, lru) {
4024 /* discontinuity ? */
4025 if (page->index != expected_index)
4026 break;
4027
4028 /* would this page push the read over the rsize? */
4029 if (*bytes + PAGE_SIZE > rsize)
4030 break;
4031
4032 __SetPageLocked(page);
4033 if (add_to_page_cache_locked(page, mapping, page->index, gfp)) {
4034 __ClearPageLocked(page);
4035 break;
4036 }
4037 list_move_tail(&page->lru, tmplist);
4038 (*bytes) += PAGE_SIZE;
4039 expected_index++;
4040 (*nr_pages)++;
4041 }
4042 return rc;
4043 }
4044
4045 static int cifs_readpages(struct file *file, struct address_space *mapping,
4046 struct list_head *page_list, unsigned num_pages)
4047 {
4048 int rc;
4049 struct list_head tmplist;
4050 struct cifsFileInfo *open_file = file->private_data;
4051 struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
4052 struct TCP_Server_Info *server;
4053 pid_t pid;
4054 unsigned int xid;
4055
4056 xid = get_xid();
4057 /*
4058 * Reads as many pages as possible from fscache. Returns -ENOBUFS
4059 * immediately if the cookie is negative
4060 *
4061 * After this point, every page in the list might have PG_fscache set,
4062 * so we will need to clean that up off of every page we don't use.
4063 */
4064 rc = cifs_readpages_from_fscache(mapping->host, mapping, page_list,
4065 &num_pages);
4066 if (rc == 0) {
4067 free_xid(xid);
4068 return rc;
4069 }
4070
4071 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
4072 pid = open_file->pid;
4073 else
4074 pid = current->tgid;
4075
4076 rc = 0;
4077 server = tlink_tcon(open_file->tlink)->ses->server;
4078
4079 cifs_dbg(FYI, "%s: file=%p mapping=%p num_pages=%u\n",
4080 __func__, file, mapping, num_pages);
4081
4082 /*
4083 * Start with the page at end of list and move it to private
4084 * list. Do the same with any following pages until we hit
4085 * the rsize limit, hit an index discontinuity, or run out of
4086 * pages. Issue the async read and then start the loop again
4087 * until the list is empty.
4088 *
4089 * Note that list order is important. The page_list is in
4090 * the order of declining indexes. When we put the pages in
4091 * the rdata->pages, then we want them in increasing order.
4092 */
4093 while (!list_empty(page_list)) {
4094 unsigned int i, nr_pages, bytes, rsize;
4095 loff_t offset;
4096 struct page *page, *tpage;
4097 struct cifs_readdata *rdata;
4098 unsigned credits;
4099
4100 rc = server->ops->wait_mtu_credits(server, cifs_sb->rsize,
4101 &rsize, &credits);
4102 if (rc)
4103 break;
4104
4105 /*
4106 * Give up immediately if rsize is too small to read an entire
4107 * page. The VFS will fall back to readpage. We should never
4108 * reach this point however since we set ra_pages to 0 when the
4109 * rsize is smaller than a cache page.
4110 */
4111 if (unlikely(rsize < PAGE_SIZE)) {
4112 add_credits_and_wake_if(server, credits, 0);
4113 free_xid(xid);
4114 return 0;
4115 }
4116
4117 rc = readpages_get_pages(mapping, page_list, rsize, &tmplist,
4118 &nr_pages, &offset, &bytes);
4119 if (rc) {
4120 add_credits_and_wake_if(server, credits, 0);
4121 break;
4122 }
4123
4124 rdata = cifs_readdata_alloc(nr_pages, cifs_readv_complete);
4125 if (!rdata) {
4126 /* best to give up if we're out of mem */
4127 list_for_each_entry_safe(page, tpage, &tmplist, lru) {
4128 list_del(&page->lru);
4129 lru_cache_add_file(page);
4130 unlock_page(page);
4131 put_page(page);
4132 }
4133 rc = -ENOMEM;
4134 add_credits_and_wake_if(server, credits, 0);
4135 break;
4136 }
4137
4138 rdata->cfile = cifsFileInfo_get(open_file);
4139 rdata->mapping = mapping;
4140 rdata->offset = offset;
4141 rdata->bytes = bytes;
4142 rdata->pid = pid;
4143 rdata->pagesz = PAGE_SIZE;
4144 rdata->tailsz = PAGE_SIZE;
4145 rdata->read_into_pages = cifs_readpages_read_into_pages;
4146 rdata->copy_into_pages = cifs_readpages_copy_into_pages;
4147 rdata->credits = credits;
4148
4149 list_for_each_entry_safe(page, tpage, &tmplist, lru) {
4150 list_del(&page->lru);
4151 rdata->pages[rdata->nr_pages++] = page;
4152 }
4153
4154 if (!rdata->cfile->invalidHandle ||
4155 !(rc = cifs_reopen_file(rdata->cfile, true)))
4156 rc = server->ops->async_readv(rdata);
4157 if (rc) {
4158 add_credits_and_wake_if(server, rdata->credits, 0);
4159 for (i = 0; i < rdata->nr_pages; i++) {
4160 page = rdata->pages[i];
4161 lru_cache_add_file(page);
4162 unlock_page(page);
4163 put_page(page);
4164 }
4165 /* Fallback to the readpage in error/reconnect cases */
4166 kref_put(&rdata->refcount, cifs_readdata_release);
4167 break;
4168 }
4169
4170 kref_put(&rdata->refcount, cifs_readdata_release);
4171 }
4172
4173 /* Any pages that have been shown to fscache but didn't get added to
4174 * the pagecache must be uncached before they get returned to the
4175 * allocator.
4176 */
4177 cifs_fscache_readpages_cancel(mapping->host, page_list);
4178 free_xid(xid);
4179 return rc;
4180 }
4181
4182 /*
4183 * cifs_readpage_worker must be called with the page pinned
4184 */
4185 static int cifs_readpage_worker(struct file *file, struct page *page,
4186 loff_t *poffset)
4187 {
4188 char *read_data;
4189 int rc;
4190
4191 /* Is the page cached? */
4192 rc = cifs_readpage_from_fscache(file_inode(file), page);
4193 if (rc == 0)
4194 goto read_complete;
4195
4196 read_data = kmap(page);
4197 /* for reads over a certain size could initiate async read ahead */
4198
4199 rc = cifs_read(file, read_data, PAGE_SIZE, poffset);
4200
4201 if (rc < 0)
4202 goto io_error;
4203 else
4204 cifs_dbg(FYI, "Bytes read %d\n", rc);
4205
4206 /* we do not want atime to be less than mtime, it broke some apps */
4207 file_inode(file)->i_atime = current_time(file_inode(file));
4208 if (timespec64_compare(&(file_inode(file)->i_atime), &(file_inode(file)->i_mtime)))
4209 file_inode(file)->i_atime = file_inode(file)->i_mtime;
4210 else
4211 file_inode(file)->i_atime = current_time(file_inode(file));
4212
4213 if (PAGE_SIZE > rc)
4214 memset(read_data + rc, 0, PAGE_SIZE - rc);
4215
4216 flush_dcache_page(page);
4217 SetPageUptodate(page);
4218
4219 /* send this page to the cache */
4220 cifs_readpage_to_fscache(file_inode(file), page);
4221
4222 rc = 0;
4223
4224 io_error:
4225 kunmap(page);
4226 unlock_page(page);
4227
4228 read_complete:
4229 return rc;
4230 }
4231
4232 static int cifs_readpage(struct file *file, struct page *page)
4233 {
4234 loff_t offset = (loff_t)page->index << PAGE_SHIFT;
4235 int rc = -EACCES;
4236 unsigned int xid;
4237
4238 xid = get_xid();
4239
4240 if (file->private_data == NULL) {
4241 rc = -EBADF;
4242 free_xid(xid);
4243 return rc;
4244 }
4245
4246 cifs_dbg(FYI, "readpage %p at offset %d 0x%x\n",
4247 page, (int)offset, (int)offset);
4248
4249 rc = cifs_readpage_worker(file, page, &offset);
4250
4251 free_xid(xid);
4252 return rc;
4253 }
4254
4255 static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
4256 {
4257 struct cifsFileInfo *open_file;
4258 struct cifs_tcon *tcon =
4259 cifs_sb_master_tcon(CIFS_SB(cifs_inode->vfs_inode.i_sb));
4260
4261 spin_lock(&tcon->open_file_lock);
4262 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
4263 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
4264 spin_unlock(&tcon->open_file_lock);
4265 return 1;
4266 }
4267 }
4268 spin_unlock(&tcon->open_file_lock);
4269 return 0;
4270 }
4271
4272 /* We do not want to update the file size from server for inodes
4273 open for write - to avoid races with writepage extending
4274 the file - in the future we could consider allowing
4275 refreshing the inode only on increases in the file size
4276 but this is tricky to do without racing with writebehind
4277 page caching in the current Linux kernel design */
4278 bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file)
4279 {
4280 if (!cifsInode)
4281 return true;
4282
4283 if (is_inode_writable(cifsInode)) {
4284 /* This inode is open for write at least once */
4285 struct cifs_sb_info *cifs_sb;
4286
4287 cifs_sb = CIFS_SB(cifsInode->vfs_inode.i_sb);
4288 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
4289 /* since no page cache to corrupt on directio
4290 we can change size safely */
4291 return true;
4292 }
4293
4294 if (i_size_read(&cifsInode->vfs_inode) < end_of_file)
4295 return true;
4296
4297 return false;
4298 } else
4299 return true;
4300 }
4301
4302 static int cifs_write_begin(struct file *file, struct address_space *mapping,
4303 loff_t pos, unsigned len, unsigned flags,
4304 struct page **pagep, void **fsdata)
4305 {
4306 int oncethru = 0;
4307 pgoff_t index = pos >> PAGE_SHIFT;
4308 loff_t offset = pos & (PAGE_SIZE - 1);
4309 loff_t page_start = pos & PAGE_MASK;
4310 loff_t i_size;
4311 struct page *page;
4312 int rc = 0;
4313
4314 cifs_dbg(FYI, "write_begin from %lld len %d\n", (long long)pos, len);
4315
4316 start:
4317 page = grab_cache_page_write_begin(mapping, index, flags);
4318 if (!page) {
4319 rc = -ENOMEM;
4320 goto out;
4321 }
4322
4323 if (PageUptodate(page))
4324 goto out;
4325
4326 /*
4327 * If we write a full page it will be up to date, no need to read from
4328 * the server. If the write is short, we'll end up doing a sync write
4329 * instead.
4330 */
4331 if (len == PAGE_SIZE)
4332 goto out;
4333
4334 /*
4335 * optimize away the read when we have an oplock, and we're not
4336 * expecting to use any of the data we'd be reading in. That
4337 * is, when the page lies beyond the EOF, or straddles the EOF
4338 * and the write will cover all of the existing data.
4339 */
4340 if (CIFS_CACHE_READ(CIFS_I(mapping->host))) {
4341 i_size = i_size_read(mapping->host);
4342 if (page_start >= i_size ||
4343 (offset == 0 && (pos + len) >= i_size)) {
4344 zero_user_segments(page, 0, offset,
4345 offset + len,
4346 PAGE_SIZE);
4347 /*
4348 * PageChecked means that the parts of the page
4349 * to which we're not writing are considered up
4350 * to date. Once the data is copied to the
4351 * page, it can be set uptodate.
4352 */
4353 SetPageChecked(page);
4354 goto out;
4355 }
4356 }
4357
4358 if ((file->f_flags & O_ACCMODE) != O_WRONLY && !oncethru) {
4359 /*
4360 * might as well read a page, it is fast enough. If we get
4361 * an error, we don't need to return it. cifs_write_end will
4362 * do a sync write instead since PG_uptodate isn't set.
4363 */
4364 cifs_readpage_worker(file, page, &page_start);
4365 put_page(page);
4366 oncethru = 1;
4367 goto start;
4368 } else {
4369 /* we could try using another file handle if there is one -
4370 but how would we lock it to prevent close of that handle
4371 racing with this read? In any case
4372 this will be written out by write_end so is fine */
4373 }
4374 out:
4375 *pagep = page;
4376 return rc;
4377 }
4378
4379 static int cifs_release_page(struct page *page, gfp_t gfp)
4380 {
4381 if (PagePrivate(page))
4382 return 0;
4383
4384 return cifs_fscache_release_page(page, gfp);
4385 }
4386
4387 static void cifs_invalidate_page(struct page *page, unsigned int offset,
4388 unsigned int length)
4389 {
4390 struct cifsInodeInfo *cifsi = CIFS_I(page->mapping->host);
4391
4392 if (offset == 0 && length == PAGE_SIZE)
4393 cifs_fscache_invalidate_page(page, &cifsi->vfs_inode);
4394 }
4395
4396 static int cifs_launder_page(struct page *page)
4397 {
4398 int rc = 0;
4399 loff_t range_start = page_offset(page);
4400 loff_t range_end = range_start + (loff_t)(PAGE_SIZE - 1);
4401 struct writeback_control wbc = {
4402 .sync_mode = WB_SYNC_ALL,
4403 .nr_to_write = 0,
4404 .range_start = range_start,
4405 .range_end = range_end,
4406 };
4407
4408 cifs_dbg(FYI, "Launder page: %p\n", page);
4409
4410 if (clear_page_dirty_for_io(page))
4411 rc = cifs_writepage_locked(page, &wbc);
4412
4413 cifs_fscache_invalidate_page(page, page->mapping->host);
4414 return rc;
4415 }
4416
4417 void cifs_oplock_break(struct work_struct *work)
4418 {
4419 struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
4420 oplock_break);
4421 struct inode *inode = d_inode(cfile->dentry);
4422 struct cifsInodeInfo *cinode = CIFS_I(inode);
4423 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
4424 struct TCP_Server_Info *server = tcon->ses->server;
4425 int rc = 0;
4426
4427 wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS,
4428 TASK_UNINTERRUPTIBLE);
4429
4430 server->ops->downgrade_oplock(server, cinode,
4431 test_bit(CIFS_INODE_DOWNGRADE_OPLOCK_TO_L2, &cinode->flags));
4432
4433 if (!CIFS_CACHE_WRITE(cinode) && CIFS_CACHE_READ(cinode) &&
4434 cifs_has_mand_locks(cinode)) {
4435 cifs_dbg(FYI, "Reset oplock to None for inode=%p due to mand locks\n",
4436 inode);
4437 cinode->oplock = 0;
4438 }
4439
4440 if (inode && S_ISREG(inode->i_mode)) {
4441 if (CIFS_CACHE_READ(cinode))
4442 break_lease(inode, O_RDONLY);
4443 else
4444 break_lease(inode, O_WRONLY);
4445 rc = filemap_fdatawrite(inode->i_mapping);
4446 if (!CIFS_CACHE_READ(cinode)) {
4447 rc = filemap_fdatawait(inode->i_mapping);
4448 mapping_set_error(inode->i_mapping, rc);
4449 cifs_zap_mapping(inode);
4450 }
4451 cifs_dbg(FYI, "Oplock flush inode %p rc %d\n", inode, rc);
4452 }
4453
4454 rc = cifs_push_locks(cfile);
4455 if (rc)
4456 cifs_dbg(VFS, "Push locks rc = %d\n", rc);
4457
4458 /*
4459 * releasing stale oplock after recent reconnect of smb session using
4460 * a now incorrect file handle is not a data integrity issue but do
4461 * not bother sending an oplock release if session to server still is
4462 * disconnected since oplock already released by the server
4463 */
4464 if (!cfile->oplock_break_cancelled) {
4465 rc = tcon->ses->server->ops->oplock_response(tcon, &cfile->fid,
4466 cinode);
4467 cifs_dbg(FYI, "Oplock release rc = %d\n", rc);
4468 }
4469 cifs_done_oplock_break(cinode);
4470 }
4471
4472 /*
4473 * The presence of cifs_direct_io() in the address space ops vector
4474 * allowes open() O_DIRECT flags which would have failed otherwise.
4475 *
4476 * In the non-cached mode (mount with cache=none), we shunt off direct read and write requests
4477 * so this method should never be called.
4478 *
4479 * Direct IO is not yet supported in the cached mode.
4480 */
4481 static ssize_t
4482 cifs_direct_io(struct kiocb *iocb, struct iov_iter *iter)
4483 {
4484 /*
4485 * FIXME
4486 * Eventually need to support direct IO for non forcedirectio mounts
4487 */
4488 return -EINVAL;
4489 }
4490
4491
4492 const struct address_space_operations cifs_addr_ops = {
4493 .readpage = cifs_readpage,
4494 .readpages = cifs_readpages,
4495 .writepage = cifs_writepage,
4496 .writepages = cifs_writepages,
4497 .write_begin = cifs_write_begin,
4498 .write_end = cifs_write_end,
4499 .set_page_dirty = __set_page_dirty_nobuffers,
4500 .releasepage = cifs_release_page,
4501 .direct_IO = cifs_direct_io,
4502 .invalidatepage = cifs_invalidate_page,
4503 .launder_page = cifs_launder_page,
4504 };
4505
4506 /*
4507 * cifs_readpages requires the server to support a buffer large enough to
4508 * contain the header plus one complete page of data. Otherwise, we need
4509 * to leave cifs_readpages out of the address space operations.
4510 */
4511 const struct address_space_operations cifs_addr_ops_smallbuf = {
4512 .readpage = cifs_readpage,
4513 .writepage = cifs_writepage,
4514 .writepages = cifs_writepages,
4515 .write_begin = cifs_write_begin,
4516 .write_end = cifs_write_end,
4517 .set_page_dirty = __set_page_dirty_nobuffers,
4518 .releasepage = cifs_release_page,
4519 .invalidatepage = cifs_invalidate_page,
4520 .launder_page = cifs_launder_page,
4521 };