]> git.proxmox.com Git - mirror_ubuntu-eoan-kernel.git/blob - fs/cifs/file.c
Merge tag 'irqchip-5.0-3' of git://git.kernel.org/pub/scm/linux/kernel/git/maz/arm...
[mirror_ubuntu-eoan-kernel.git] / fs / cifs / file.c
1 /*
2 * fs/cifs/file.c
3 *
4 * vfs operations that deal with files
5 *
6 * Copyright (C) International Business Machines Corp., 2002,2010
7 * Author(s): Steve French (sfrench@us.ibm.com)
8 * Jeremy Allison (jra@samba.org)
9 *
10 * This library is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU Lesser General Public License as published
12 * by the Free Software Foundation; either version 2.1 of the License, or
13 * (at your option) any later version.
14 *
15 * This library is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
18 * the GNU Lesser General Public License for more details.
19 *
20 * You should have received a copy of the GNU Lesser General Public License
21 * along with this library; if not, write to the Free Software
22 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 */
24 #include <linux/fs.h>
25 #include <linux/backing-dev.h>
26 #include <linux/stat.h>
27 #include <linux/fcntl.h>
28 #include <linux/pagemap.h>
29 #include <linux/pagevec.h>
30 #include <linux/writeback.h>
31 #include <linux/task_io_accounting_ops.h>
32 #include <linux/delay.h>
33 #include <linux/mount.h>
34 #include <linux/slab.h>
35 #include <linux/swap.h>
36 #include <linux/mm.h>
37 #include <asm/div64.h>
38 #include "cifsfs.h"
39 #include "cifspdu.h"
40 #include "cifsglob.h"
41 #include "cifsproto.h"
42 #include "cifs_unicode.h"
43 #include "cifs_debug.h"
44 #include "cifs_fs_sb.h"
45 #include "fscache.h"
46 #include "smbdirect.h"
47
48 static inline int cifs_convert_flags(unsigned int flags)
49 {
50 if ((flags & O_ACCMODE) == O_RDONLY)
51 return GENERIC_READ;
52 else if ((flags & O_ACCMODE) == O_WRONLY)
53 return GENERIC_WRITE;
54 else if ((flags & O_ACCMODE) == O_RDWR) {
55 /* GENERIC_ALL is too much permission to request
56 can cause unnecessary access denied on create */
57 /* return GENERIC_ALL; */
58 return (GENERIC_READ | GENERIC_WRITE);
59 }
60
61 return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
62 FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
63 FILE_READ_DATA);
64 }
65
66 static u32 cifs_posix_convert_flags(unsigned int flags)
67 {
68 u32 posix_flags = 0;
69
70 if ((flags & O_ACCMODE) == O_RDONLY)
71 posix_flags = SMB_O_RDONLY;
72 else if ((flags & O_ACCMODE) == O_WRONLY)
73 posix_flags = SMB_O_WRONLY;
74 else if ((flags & O_ACCMODE) == O_RDWR)
75 posix_flags = SMB_O_RDWR;
76
77 if (flags & O_CREAT) {
78 posix_flags |= SMB_O_CREAT;
79 if (flags & O_EXCL)
80 posix_flags |= SMB_O_EXCL;
81 } else if (flags & O_EXCL)
82 cifs_dbg(FYI, "Application %s pid %d has incorrectly set O_EXCL flag but not O_CREAT on file open. Ignoring O_EXCL\n",
83 current->comm, current->tgid);
84
85 if (flags & O_TRUNC)
86 posix_flags |= SMB_O_TRUNC;
87 /* be safe and imply O_SYNC for O_DSYNC */
88 if (flags & O_DSYNC)
89 posix_flags |= SMB_O_SYNC;
90 if (flags & O_DIRECTORY)
91 posix_flags |= SMB_O_DIRECTORY;
92 if (flags & O_NOFOLLOW)
93 posix_flags |= SMB_O_NOFOLLOW;
94 if (flags & O_DIRECT)
95 posix_flags |= SMB_O_DIRECT;
96
97 return posix_flags;
98 }
99
100 static inline int cifs_get_disposition(unsigned int flags)
101 {
102 if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
103 return FILE_CREATE;
104 else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
105 return FILE_OVERWRITE_IF;
106 else if ((flags & O_CREAT) == O_CREAT)
107 return FILE_OPEN_IF;
108 else if ((flags & O_TRUNC) == O_TRUNC)
109 return FILE_OVERWRITE;
110 else
111 return FILE_OPEN;
112 }
113
114 int cifs_posix_open(char *full_path, struct inode **pinode,
115 struct super_block *sb, int mode, unsigned int f_flags,
116 __u32 *poplock, __u16 *pnetfid, unsigned int xid)
117 {
118 int rc;
119 FILE_UNIX_BASIC_INFO *presp_data;
120 __u32 posix_flags = 0;
121 struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
122 struct cifs_fattr fattr;
123 struct tcon_link *tlink;
124 struct cifs_tcon *tcon;
125
126 cifs_dbg(FYI, "posix open %s\n", full_path);
127
128 presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL);
129 if (presp_data == NULL)
130 return -ENOMEM;
131
132 tlink = cifs_sb_tlink(cifs_sb);
133 if (IS_ERR(tlink)) {
134 rc = PTR_ERR(tlink);
135 goto posix_open_ret;
136 }
137
138 tcon = tlink_tcon(tlink);
139 mode &= ~current_umask();
140
141 posix_flags = cifs_posix_convert_flags(f_flags);
142 rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data,
143 poplock, full_path, cifs_sb->local_nls,
144 cifs_remap(cifs_sb));
145 cifs_put_tlink(tlink);
146
147 if (rc)
148 goto posix_open_ret;
149
150 if (presp_data->Type == cpu_to_le32(-1))
151 goto posix_open_ret; /* open ok, caller does qpathinfo */
152
153 if (!pinode)
154 goto posix_open_ret; /* caller does not need info */
155
156 cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb);
157
158 /* get new inode and set it up */
159 if (*pinode == NULL) {
160 cifs_fill_uniqueid(sb, &fattr);
161 *pinode = cifs_iget(sb, &fattr);
162 if (!*pinode) {
163 rc = -ENOMEM;
164 goto posix_open_ret;
165 }
166 } else {
167 cifs_fattr_to_inode(*pinode, &fattr);
168 }
169
170 posix_open_ret:
171 kfree(presp_data);
172 return rc;
173 }
174
175 static int
176 cifs_nt_open(char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
177 struct cifs_tcon *tcon, unsigned int f_flags, __u32 *oplock,
178 struct cifs_fid *fid, unsigned int xid)
179 {
180 int rc;
181 int desired_access;
182 int disposition;
183 int create_options = CREATE_NOT_DIR;
184 FILE_ALL_INFO *buf;
185 struct TCP_Server_Info *server = tcon->ses->server;
186 struct cifs_open_parms oparms;
187
188 if (!server->ops->open)
189 return -ENOSYS;
190
191 desired_access = cifs_convert_flags(f_flags);
192
193 /*********************************************************************
194 * open flag mapping table:
195 *
196 * POSIX Flag CIFS Disposition
197 * ---------- ----------------
198 * O_CREAT FILE_OPEN_IF
199 * O_CREAT | O_EXCL FILE_CREATE
200 * O_CREAT | O_TRUNC FILE_OVERWRITE_IF
201 * O_TRUNC FILE_OVERWRITE
202 * none of the above FILE_OPEN
203 *
204 * Note that there is not a direct match between disposition
205 * FILE_SUPERSEDE (ie create whether or not file exists although
206 * O_CREAT | O_TRUNC is similar but truncates the existing
207 * file rather than creating a new file as FILE_SUPERSEDE does
208 * (which uses the attributes / metadata passed in on open call)
209 *?
210 *? O_SYNC is a reasonable match to CIFS writethrough flag
211 *? and the read write flags match reasonably. O_LARGEFILE
212 *? is irrelevant because largefile support is always used
213 *? by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
214 * O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
215 *********************************************************************/
216
217 disposition = cifs_get_disposition(f_flags);
218
219 /* BB pass O_SYNC flag through on file attributes .. BB */
220
221 buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
222 if (!buf)
223 return -ENOMEM;
224
225 if (backup_cred(cifs_sb))
226 create_options |= CREATE_OPEN_BACKUP_INTENT;
227
228 /* O_SYNC also has bit for O_DSYNC so following check picks up either */
229 if (f_flags & O_SYNC)
230 create_options |= CREATE_WRITE_THROUGH;
231
232 if (f_flags & O_DIRECT)
233 create_options |= CREATE_NO_BUFFER;
234
235 oparms.tcon = tcon;
236 oparms.cifs_sb = cifs_sb;
237 oparms.desired_access = desired_access;
238 oparms.create_options = create_options;
239 oparms.disposition = disposition;
240 oparms.path = full_path;
241 oparms.fid = fid;
242 oparms.reconnect = false;
243
244 rc = server->ops->open(xid, &oparms, oplock, buf);
245
246 if (rc)
247 goto out;
248
249 if (tcon->unix_ext)
250 rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
251 xid);
252 else
253 rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
254 xid, fid);
255
256 out:
257 kfree(buf);
258 return rc;
259 }
260
261 static bool
262 cifs_has_mand_locks(struct cifsInodeInfo *cinode)
263 {
264 struct cifs_fid_locks *cur;
265 bool has_locks = false;
266
267 down_read(&cinode->lock_sem);
268 list_for_each_entry(cur, &cinode->llist, llist) {
269 if (!list_empty(&cur->locks)) {
270 has_locks = true;
271 break;
272 }
273 }
274 up_read(&cinode->lock_sem);
275 return has_locks;
276 }
277
278 struct cifsFileInfo *
279 cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
280 struct tcon_link *tlink, __u32 oplock)
281 {
282 struct dentry *dentry = file_dentry(file);
283 struct inode *inode = d_inode(dentry);
284 struct cifsInodeInfo *cinode = CIFS_I(inode);
285 struct cifsFileInfo *cfile;
286 struct cifs_fid_locks *fdlocks;
287 struct cifs_tcon *tcon = tlink_tcon(tlink);
288 struct TCP_Server_Info *server = tcon->ses->server;
289
290 cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
291 if (cfile == NULL)
292 return cfile;
293
294 fdlocks = kzalloc(sizeof(struct cifs_fid_locks), GFP_KERNEL);
295 if (!fdlocks) {
296 kfree(cfile);
297 return NULL;
298 }
299
300 INIT_LIST_HEAD(&fdlocks->locks);
301 fdlocks->cfile = cfile;
302 cfile->llist = fdlocks;
303 down_write(&cinode->lock_sem);
304 list_add(&fdlocks->llist, &cinode->llist);
305 up_write(&cinode->lock_sem);
306
307 cfile->count = 1;
308 cfile->pid = current->tgid;
309 cfile->uid = current_fsuid();
310 cfile->dentry = dget(dentry);
311 cfile->f_flags = file->f_flags;
312 cfile->invalidHandle = false;
313 cfile->tlink = cifs_get_tlink(tlink);
314 INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
315 mutex_init(&cfile->fh_mutex);
316 spin_lock_init(&cfile->file_info_lock);
317
318 cifs_sb_active(inode->i_sb);
319
320 /*
321 * If the server returned a read oplock and we have mandatory brlocks,
322 * set oplock level to None.
323 */
324 if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
325 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
326 oplock = 0;
327 }
328
329 spin_lock(&tcon->open_file_lock);
330 if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE && oplock)
331 oplock = fid->pending_open->oplock;
332 list_del(&fid->pending_open->olist);
333
334 fid->purge_cache = false;
335 server->ops->set_fid(cfile, fid, oplock);
336
337 list_add(&cfile->tlist, &tcon->openFileList);
338 atomic_inc(&tcon->num_local_opens);
339
340 /* if readable file instance put first in list*/
341 if (file->f_mode & FMODE_READ)
342 list_add(&cfile->flist, &cinode->openFileList);
343 else
344 list_add_tail(&cfile->flist, &cinode->openFileList);
345 spin_unlock(&tcon->open_file_lock);
346
347 if (fid->purge_cache)
348 cifs_zap_mapping(inode);
349
350 file->private_data = cfile;
351 return cfile;
352 }
353
354 struct cifsFileInfo *
355 cifsFileInfo_get(struct cifsFileInfo *cifs_file)
356 {
357 spin_lock(&cifs_file->file_info_lock);
358 cifsFileInfo_get_locked(cifs_file);
359 spin_unlock(&cifs_file->file_info_lock);
360 return cifs_file;
361 }
362
363 /*
364 * Release a reference on the file private data. This may involve closing
365 * the filehandle out on the server. Must be called without holding
366 * tcon->open_file_lock and cifs_file->file_info_lock.
367 */
368 void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
369 {
370 struct inode *inode = d_inode(cifs_file->dentry);
371 struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
372 struct TCP_Server_Info *server = tcon->ses->server;
373 struct cifsInodeInfo *cifsi = CIFS_I(inode);
374 struct super_block *sb = inode->i_sb;
375 struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
376 struct cifsLockInfo *li, *tmp;
377 struct cifs_fid fid;
378 struct cifs_pending_open open;
379 bool oplock_break_cancelled;
380
381 spin_lock(&tcon->open_file_lock);
382
383 spin_lock(&cifs_file->file_info_lock);
384 if (--cifs_file->count > 0) {
385 spin_unlock(&cifs_file->file_info_lock);
386 spin_unlock(&tcon->open_file_lock);
387 return;
388 }
389 spin_unlock(&cifs_file->file_info_lock);
390
391 if (server->ops->get_lease_key)
392 server->ops->get_lease_key(inode, &fid);
393
394 /* store open in pending opens to make sure we don't miss lease break */
395 cifs_add_pending_open_locked(&fid, cifs_file->tlink, &open);
396
397 /* remove it from the lists */
398 list_del(&cifs_file->flist);
399 list_del(&cifs_file->tlist);
400 atomic_dec(&tcon->num_local_opens);
401
402 if (list_empty(&cifsi->openFileList)) {
403 cifs_dbg(FYI, "closing last open instance for inode %p\n",
404 d_inode(cifs_file->dentry));
405 /*
406 * In strict cache mode we need invalidate mapping on the last
407 * close because it may cause a error when we open this file
408 * again and get at least level II oplock.
409 */
410 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)
411 set_bit(CIFS_INO_INVALID_MAPPING, &cifsi->flags);
412 cifs_set_oplock_level(cifsi, 0);
413 }
414
415 spin_unlock(&tcon->open_file_lock);
416
417 oplock_break_cancelled = cancel_work_sync(&cifs_file->oplock_break);
418
419 if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
420 struct TCP_Server_Info *server = tcon->ses->server;
421 unsigned int xid;
422
423 xid = get_xid();
424 if (server->ops->close)
425 server->ops->close(xid, tcon, &cifs_file->fid);
426 _free_xid(xid);
427 }
428
429 if (oplock_break_cancelled)
430 cifs_done_oplock_break(cifsi);
431
432 cifs_del_pending_open(&open);
433
434 /*
435 * Delete any outstanding lock records. We'll lose them when the file
436 * is closed anyway.
437 */
438 down_write(&cifsi->lock_sem);
439 list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) {
440 list_del(&li->llist);
441 cifs_del_lock_waiters(li);
442 kfree(li);
443 }
444 list_del(&cifs_file->llist->llist);
445 kfree(cifs_file->llist);
446 up_write(&cifsi->lock_sem);
447
448 cifs_put_tlink(cifs_file->tlink);
449 dput(cifs_file->dentry);
450 cifs_sb_deactive(sb);
451 kfree(cifs_file);
452 }
453
454 int cifs_open(struct inode *inode, struct file *file)
455
456 {
457 int rc = -EACCES;
458 unsigned int xid;
459 __u32 oplock;
460 struct cifs_sb_info *cifs_sb;
461 struct TCP_Server_Info *server;
462 struct cifs_tcon *tcon;
463 struct tcon_link *tlink;
464 struct cifsFileInfo *cfile = NULL;
465 char *full_path = NULL;
466 bool posix_open_ok = false;
467 struct cifs_fid fid;
468 struct cifs_pending_open open;
469
470 xid = get_xid();
471
472 cifs_sb = CIFS_SB(inode->i_sb);
473 tlink = cifs_sb_tlink(cifs_sb);
474 if (IS_ERR(tlink)) {
475 free_xid(xid);
476 return PTR_ERR(tlink);
477 }
478 tcon = tlink_tcon(tlink);
479 server = tcon->ses->server;
480
481 full_path = build_path_from_dentry(file_dentry(file));
482 if (full_path == NULL) {
483 rc = -ENOMEM;
484 goto out;
485 }
486
487 cifs_dbg(FYI, "inode = 0x%p file flags are 0x%x for %s\n",
488 inode, file->f_flags, full_path);
489
490 if (file->f_flags & O_DIRECT &&
491 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) {
492 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
493 file->f_op = &cifs_file_direct_nobrl_ops;
494 else
495 file->f_op = &cifs_file_direct_ops;
496 }
497
498 if (server->oplocks)
499 oplock = REQ_OPLOCK;
500 else
501 oplock = 0;
502
503 if (!tcon->broken_posix_open && tcon->unix_ext &&
504 cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP &
505 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
506 /* can not refresh inode info since size could be stale */
507 rc = cifs_posix_open(full_path, &inode, inode->i_sb,
508 cifs_sb->mnt_file_mode /* ignored */,
509 file->f_flags, &oplock, &fid.netfid, xid);
510 if (rc == 0) {
511 cifs_dbg(FYI, "posix open succeeded\n");
512 posix_open_ok = true;
513 } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
514 if (tcon->ses->serverNOS)
515 cifs_dbg(VFS, "server %s of type %s returned unexpected error on SMB posix open, disabling posix open support. Check if server update available.\n",
516 tcon->ses->serverName,
517 tcon->ses->serverNOS);
518 tcon->broken_posix_open = true;
519 } else if ((rc != -EIO) && (rc != -EREMOTE) &&
520 (rc != -EOPNOTSUPP)) /* path not found or net err */
521 goto out;
522 /*
523 * Else fallthrough to retry open the old way on network i/o
524 * or DFS errors.
525 */
526 }
527
528 if (server->ops->get_lease_key)
529 server->ops->get_lease_key(inode, &fid);
530
531 cifs_add_pending_open(&fid, tlink, &open);
532
533 if (!posix_open_ok) {
534 if (server->ops->get_lease_key)
535 server->ops->get_lease_key(inode, &fid);
536
537 rc = cifs_nt_open(full_path, inode, cifs_sb, tcon,
538 file->f_flags, &oplock, &fid, xid);
539 if (rc) {
540 cifs_del_pending_open(&open);
541 goto out;
542 }
543 }
544
545 cfile = cifs_new_fileinfo(&fid, file, tlink, oplock);
546 if (cfile == NULL) {
547 if (server->ops->close)
548 server->ops->close(xid, tcon, &fid);
549 cifs_del_pending_open(&open);
550 rc = -ENOMEM;
551 goto out;
552 }
553
554 cifs_fscache_set_inode_cookie(inode, file);
555
556 if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) {
557 /*
558 * Time to set mode which we can not set earlier due to
559 * problems creating new read-only files.
560 */
561 struct cifs_unix_set_info_args args = {
562 .mode = inode->i_mode,
563 .uid = INVALID_UID, /* no change */
564 .gid = INVALID_GID, /* no change */
565 .ctime = NO_CHANGE_64,
566 .atime = NO_CHANGE_64,
567 .mtime = NO_CHANGE_64,
568 .device = 0,
569 };
570 CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid.netfid,
571 cfile->pid);
572 }
573
574 out:
575 kfree(full_path);
576 free_xid(xid);
577 cifs_put_tlink(tlink);
578 return rc;
579 }
580
581 static int cifs_push_posix_locks(struct cifsFileInfo *cfile);
582
583 /*
584 * Try to reacquire byte range locks that were released when session
585 * to server was lost.
586 */
587 static int
588 cifs_relock_file(struct cifsFileInfo *cfile)
589 {
590 struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
591 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
592 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
593 int rc = 0;
594
595 down_read_nested(&cinode->lock_sem, SINGLE_DEPTH_NESTING);
596 if (cinode->can_cache_brlcks) {
597 /* can cache locks - no need to relock */
598 up_read(&cinode->lock_sem);
599 return rc;
600 }
601
602 if (cap_unix(tcon->ses) &&
603 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
604 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
605 rc = cifs_push_posix_locks(cfile);
606 else
607 rc = tcon->ses->server->ops->push_mand_locks(cfile);
608
609 up_read(&cinode->lock_sem);
610 return rc;
611 }
612
613 static int
614 cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
615 {
616 int rc = -EACCES;
617 unsigned int xid;
618 __u32 oplock;
619 struct cifs_sb_info *cifs_sb;
620 struct cifs_tcon *tcon;
621 struct TCP_Server_Info *server;
622 struct cifsInodeInfo *cinode;
623 struct inode *inode;
624 char *full_path = NULL;
625 int desired_access;
626 int disposition = FILE_OPEN;
627 int create_options = CREATE_NOT_DIR;
628 struct cifs_open_parms oparms;
629
630 xid = get_xid();
631 mutex_lock(&cfile->fh_mutex);
632 if (!cfile->invalidHandle) {
633 mutex_unlock(&cfile->fh_mutex);
634 rc = 0;
635 free_xid(xid);
636 return rc;
637 }
638
639 inode = d_inode(cfile->dentry);
640 cifs_sb = CIFS_SB(inode->i_sb);
641 tcon = tlink_tcon(cfile->tlink);
642 server = tcon->ses->server;
643
644 /*
645 * Can not grab rename sem here because various ops, including those
646 * that already have the rename sem can end up causing writepage to get
647 * called and if the server was down that means we end up here, and we
648 * can never tell if the caller already has the rename_sem.
649 */
650 full_path = build_path_from_dentry(cfile->dentry);
651 if (full_path == NULL) {
652 rc = -ENOMEM;
653 mutex_unlock(&cfile->fh_mutex);
654 free_xid(xid);
655 return rc;
656 }
657
658 cifs_dbg(FYI, "inode = 0x%p file flags 0x%x for %s\n",
659 inode, cfile->f_flags, full_path);
660
661 if (tcon->ses->server->oplocks)
662 oplock = REQ_OPLOCK;
663 else
664 oplock = 0;
665
666 if (tcon->unix_ext && cap_unix(tcon->ses) &&
667 (CIFS_UNIX_POSIX_PATH_OPS_CAP &
668 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
669 /*
670 * O_CREAT, O_EXCL and O_TRUNC already had their effect on the
671 * original open. Must mask them off for a reopen.
672 */
673 unsigned int oflags = cfile->f_flags &
674 ~(O_CREAT | O_EXCL | O_TRUNC);
675
676 rc = cifs_posix_open(full_path, NULL, inode->i_sb,
677 cifs_sb->mnt_file_mode /* ignored */,
678 oflags, &oplock, &cfile->fid.netfid, xid);
679 if (rc == 0) {
680 cifs_dbg(FYI, "posix reopen succeeded\n");
681 oparms.reconnect = true;
682 goto reopen_success;
683 }
684 /*
685 * fallthrough to retry open the old way on errors, especially
686 * in the reconnect path it is important to retry hard
687 */
688 }
689
690 desired_access = cifs_convert_flags(cfile->f_flags);
691
692 if (backup_cred(cifs_sb))
693 create_options |= CREATE_OPEN_BACKUP_INTENT;
694
695 if (server->ops->get_lease_key)
696 server->ops->get_lease_key(inode, &cfile->fid);
697
698 oparms.tcon = tcon;
699 oparms.cifs_sb = cifs_sb;
700 oparms.desired_access = desired_access;
701 oparms.create_options = create_options;
702 oparms.disposition = disposition;
703 oparms.path = full_path;
704 oparms.fid = &cfile->fid;
705 oparms.reconnect = true;
706
707 /*
708 * Can not refresh inode by passing in file_info buf to be returned by
709 * ops->open and then calling get_inode_info with returned buf since
710 * file might have write behind data that needs to be flushed and server
711 * version of file size can be stale. If we knew for sure that inode was
712 * not dirty locally we could do this.
713 */
714 rc = server->ops->open(xid, &oparms, &oplock, NULL);
715 if (rc == -ENOENT && oparms.reconnect == false) {
716 /* durable handle timeout is expired - open the file again */
717 rc = server->ops->open(xid, &oparms, &oplock, NULL);
718 /* indicate that we need to relock the file */
719 oparms.reconnect = true;
720 }
721
722 if (rc) {
723 mutex_unlock(&cfile->fh_mutex);
724 cifs_dbg(FYI, "cifs_reopen returned 0x%x\n", rc);
725 cifs_dbg(FYI, "oplock: %d\n", oplock);
726 goto reopen_error_exit;
727 }
728
729 reopen_success:
730 cfile->invalidHandle = false;
731 mutex_unlock(&cfile->fh_mutex);
732 cinode = CIFS_I(inode);
733
734 if (can_flush) {
735 rc = filemap_write_and_wait(inode->i_mapping);
736 if (!is_interrupt_error(rc))
737 mapping_set_error(inode->i_mapping, rc);
738
739 if (tcon->unix_ext)
740 rc = cifs_get_inode_info_unix(&inode, full_path,
741 inode->i_sb, xid);
742 else
743 rc = cifs_get_inode_info(&inode, full_path, NULL,
744 inode->i_sb, xid, NULL);
745 }
746 /*
747 * Else we are writing out data to server already and could deadlock if
748 * we tried to flush data, and since we do not know if we have data that
749 * would invalidate the current end of file on the server we can not go
750 * to the server to get the new inode info.
751 */
752
753 /*
754 * If the server returned a read oplock and we have mandatory brlocks,
755 * set oplock level to None.
756 */
757 if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
758 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
759 oplock = 0;
760 }
761
762 server->ops->set_fid(cfile, &cfile->fid, oplock);
763 if (oparms.reconnect)
764 cifs_relock_file(cfile);
765
766 reopen_error_exit:
767 kfree(full_path);
768 free_xid(xid);
769 return rc;
770 }
771
772 int cifs_close(struct inode *inode, struct file *file)
773 {
774 if (file->private_data != NULL) {
775 cifsFileInfo_put(file->private_data);
776 file->private_data = NULL;
777 }
778
779 /* return code from the ->release op is always ignored */
780 return 0;
781 }
782
783 void
784 cifs_reopen_persistent_handles(struct cifs_tcon *tcon)
785 {
786 struct cifsFileInfo *open_file;
787 struct list_head *tmp;
788 struct list_head *tmp1;
789 struct list_head tmp_list;
790
791 if (!tcon->use_persistent || !tcon->need_reopen_files)
792 return;
793
794 tcon->need_reopen_files = false;
795
796 cifs_dbg(FYI, "Reopen persistent handles");
797 INIT_LIST_HEAD(&tmp_list);
798
799 /* list all files open on tree connection, reopen resilient handles */
800 spin_lock(&tcon->open_file_lock);
801 list_for_each(tmp, &tcon->openFileList) {
802 open_file = list_entry(tmp, struct cifsFileInfo, tlist);
803 if (!open_file->invalidHandle)
804 continue;
805 cifsFileInfo_get(open_file);
806 list_add_tail(&open_file->rlist, &tmp_list);
807 }
808 spin_unlock(&tcon->open_file_lock);
809
810 list_for_each_safe(tmp, tmp1, &tmp_list) {
811 open_file = list_entry(tmp, struct cifsFileInfo, rlist);
812 if (cifs_reopen_file(open_file, false /* do not flush */))
813 tcon->need_reopen_files = true;
814 list_del_init(&open_file->rlist);
815 cifsFileInfo_put(open_file);
816 }
817 }
818
819 int cifs_closedir(struct inode *inode, struct file *file)
820 {
821 int rc = 0;
822 unsigned int xid;
823 struct cifsFileInfo *cfile = file->private_data;
824 struct cifs_tcon *tcon;
825 struct TCP_Server_Info *server;
826 char *buf;
827
828 cifs_dbg(FYI, "Closedir inode = 0x%p\n", inode);
829
830 if (cfile == NULL)
831 return rc;
832
833 xid = get_xid();
834 tcon = tlink_tcon(cfile->tlink);
835 server = tcon->ses->server;
836
837 cifs_dbg(FYI, "Freeing private data in close dir\n");
838 spin_lock(&cfile->file_info_lock);
839 if (server->ops->dir_needs_close(cfile)) {
840 cfile->invalidHandle = true;
841 spin_unlock(&cfile->file_info_lock);
842 if (server->ops->close_dir)
843 rc = server->ops->close_dir(xid, tcon, &cfile->fid);
844 else
845 rc = -ENOSYS;
846 cifs_dbg(FYI, "Closing uncompleted readdir with rc %d\n", rc);
847 /* not much we can do if it fails anyway, ignore rc */
848 rc = 0;
849 } else
850 spin_unlock(&cfile->file_info_lock);
851
852 buf = cfile->srch_inf.ntwrk_buf_start;
853 if (buf) {
854 cifs_dbg(FYI, "closedir free smb buf in srch struct\n");
855 cfile->srch_inf.ntwrk_buf_start = NULL;
856 if (cfile->srch_inf.smallBuf)
857 cifs_small_buf_release(buf);
858 else
859 cifs_buf_release(buf);
860 }
861
862 cifs_put_tlink(cfile->tlink);
863 kfree(file->private_data);
864 file->private_data = NULL;
865 /* BB can we lock the filestruct while this is going on? */
866 free_xid(xid);
867 return rc;
868 }
869
870 static struct cifsLockInfo *
871 cifs_lock_init(__u64 offset, __u64 length, __u8 type, __u16 flags)
872 {
873 struct cifsLockInfo *lock =
874 kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
875 if (!lock)
876 return lock;
877 lock->offset = offset;
878 lock->length = length;
879 lock->type = type;
880 lock->pid = current->tgid;
881 lock->flags = flags;
882 INIT_LIST_HEAD(&lock->blist);
883 init_waitqueue_head(&lock->block_q);
884 return lock;
885 }
886
887 void
888 cifs_del_lock_waiters(struct cifsLockInfo *lock)
889 {
890 struct cifsLockInfo *li, *tmp;
891 list_for_each_entry_safe(li, tmp, &lock->blist, blist) {
892 list_del_init(&li->blist);
893 wake_up(&li->block_q);
894 }
895 }
896
897 #define CIFS_LOCK_OP 0
898 #define CIFS_READ_OP 1
899 #define CIFS_WRITE_OP 2
900
901 /* @rw_check : 0 - no op, 1 - read, 2 - write */
902 static bool
903 cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset,
904 __u64 length, __u8 type, __u16 flags,
905 struct cifsFileInfo *cfile,
906 struct cifsLockInfo **conf_lock, int rw_check)
907 {
908 struct cifsLockInfo *li;
909 struct cifsFileInfo *cur_cfile = fdlocks->cfile;
910 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
911
912 list_for_each_entry(li, &fdlocks->locks, llist) {
913 if (offset + length <= li->offset ||
914 offset >= li->offset + li->length)
915 continue;
916 if (rw_check != CIFS_LOCK_OP && current->tgid == li->pid &&
917 server->ops->compare_fids(cfile, cur_cfile)) {
918 /* shared lock prevents write op through the same fid */
919 if (!(li->type & server->vals->shared_lock_type) ||
920 rw_check != CIFS_WRITE_OP)
921 continue;
922 }
923 if ((type & server->vals->shared_lock_type) &&
924 ((server->ops->compare_fids(cfile, cur_cfile) &&
925 current->tgid == li->pid) || type == li->type))
926 continue;
927 if (rw_check == CIFS_LOCK_OP &&
928 (flags & FL_OFDLCK) && (li->flags & FL_OFDLCK) &&
929 server->ops->compare_fids(cfile, cur_cfile))
930 continue;
931 if (conf_lock)
932 *conf_lock = li;
933 return true;
934 }
935 return false;
936 }
937
938 bool
939 cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
940 __u8 type, __u16 flags,
941 struct cifsLockInfo **conf_lock, int rw_check)
942 {
943 bool rc = false;
944 struct cifs_fid_locks *cur;
945 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
946
947 list_for_each_entry(cur, &cinode->llist, llist) {
948 rc = cifs_find_fid_lock_conflict(cur, offset, length, type,
949 flags, cfile, conf_lock,
950 rw_check);
951 if (rc)
952 break;
953 }
954
955 return rc;
956 }
957
958 /*
959 * Check if there is another lock that prevents us to set the lock (mandatory
960 * style). If such a lock exists, update the flock structure with its
961 * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
962 * or leave it the same if we can't. Returns 0 if we don't need to request to
963 * the server or 1 otherwise.
964 */
965 static int
966 cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
967 __u8 type, struct file_lock *flock)
968 {
969 int rc = 0;
970 struct cifsLockInfo *conf_lock;
971 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
972 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
973 bool exist;
974
975 down_read(&cinode->lock_sem);
976
977 exist = cifs_find_lock_conflict(cfile, offset, length, type,
978 flock->fl_flags, &conf_lock,
979 CIFS_LOCK_OP);
980 if (exist) {
981 flock->fl_start = conf_lock->offset;
982 flock->fl_end = conf_lock->offset + conf_lock->length - 1;
983 flock->fl_pid = conf_lock->pid;
984 if (conf_lock->type & server->vals->shared_lock_type)
985 flock->fl_type = F_RDLCK;
986 else
987 flock->fl_type = F_WRLCK;
988 } else if (!cinode->can_cache_brlcks)
989 rc = 1;
990 else
991 flock->fl_type = F_UNLCK;
992
993 up_read(&cinode->lock_sem);
994 return rc;
995 }
996
997 static void
998 cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock)
999 {
1000 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1001 down_write(&cinode->lock_sem);
1002 list_add_tail(&lock->llist, &cfile->llist->locks);
1003 up_write(&cinode->lock_sem);
1004 }
1005
1006 /*
1007 * Set the byte-range lock (mandatory style). Returns:
1008 * 1) 0, if we set the lock and don't need to request to the server;
1009 * 2) 1, if no locks prevent us but we need to request to the server;
1010 * 3) -EACCES, if there is a lock that prevents us and wait is false.
1011 */
1012 static int
1013 cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock,
1014 bool wait)
1015 {
1016 struct cifsLockInfo *conf_lock;
1017 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1018 bool exist;
1019 int rc = 0;
1020
1021 try_again:
1022 exist = false;
1023 down_write(&cinode->lock_sem);
1024
1025 exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length,
1026 lock->type, lock->flags, &conf_lock,
1027 CIFS_LOCK_OP);
1028 if (!exist && cinode->can_cache_brlcks) {
1029 list_add_tail(&lock->llist, &cfile->llist->locks);
1030 up_write(&cinode->lock_sem);
1031 return rc;
1032 }
1033
1034 if (!exist)
1035 rc = 1;
1036 else if (!wait)
1037 rc = -EACCES;
1038 else {
1039 list_add_tail(&lock->blist, &conf_lock->blist);
1040 up_write(&cinode->lock_sem);
1041 rc = wait_event_interruptible(lock->block_q,
1042 (lock->blist.prev == &lock->blist) &&
1043 (lock->blist.next == &lock->blist));
1044 if (!rc)
1045 goto try_again;
1046 down_write(&cinode->lock_sem);
1047 list_del_init(&lock->blist);
1048 }
1049
1050 up_write(&cinode->lock_sem);
1051 return rc;
1052 }
1053
1054 /*
1055 * Check if there is another lock that prevents us to set the lock (posix
1056 * style). If such a lock exists, update the flock structure with its
1057 * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1058 * or leave it the same if we can't. Returns 0 if we don't need to request to
1059 * the server or 1 otherwise.
1060 */
1061 static int
1062 cifs_posix_lock_test(struct file *file, struct file_lock *flock)
1063 {
1064 int rc = 0;
1065 struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1066 unsigned char saved_type = flock->fl_type;
1067
1068 if ((flock->fl_flags & FL_POSIX) == 0)
1069 return 1;
1070
1071 down_read(&cinode->lock_sem);
1072 posix_test_lock(file, flock);
1073
1074 if (flock->fl_type == F_UNLCK && !cinode->can_cache_brlcks) {
1075 flock->fl_type = saved_type;
1076 rc = 1;
1077 }
1078
1079 up_read(&cinode->lock_sem);
1080 return rc;
1081 }
1082
1083 /*
1084 * Set the byte-range lock (posix style). Returns:
1085 * 1) 0, if we set the lock and don't need to request to the server;
1086 * 2) 1, if we need to request to the server;
1087 * 3) <0, if the error occurs while setting the lock.
1088 */
1089 static int
1090 cifs_posix_lock_set(struct file *file, struct file_lock *flock)
1091 {
1092 struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1093 int rc = 1;
1094
1095 if ((flock->fl_flags & FL_POSIX) == 0)
1096 return rc;
1097
1098 try_again:
1099 down_write(&cinode->lock_sem);
1100 if (!cinode->can_cache_brlcks) {
1101 up_write(&cinode->lock_sem);
1102 return rc;
1103 }
1104
1105 rc = posix_lock_file(file, flock, NULL);
1106 up_write(&cinode->lock_sem);
1107 if (rc == FILE_LOCK_DEFERRED) {
1108 rc = wait_event_interruptible(flock->fl_wait, !flock->fl_blocker);
1109 if (!rc)
1110 goto try_again;
1111 locks_delete_block(flock);
1112 }
1113 return rc;
1114 }
1115
1116 int
1117 cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
1118 {
1119 unsigned int xid;
1120 int rc = 0, stored_rc;
1121 struct cifsLockInfo *li, *tmp;
1122 struct cifs_tcon *tcon;
1123 unsigned int num, max_num, max_buf;
1124 LOCKING_ANDX_RANGE *buf, *cur;
1125 static const int types[] = {
1126 LOCKING_ANDX_LARGE_FILES,
1127 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1128 };
1129 int i;
1130
1131 xid = get_xid();
1132 tcon = tlink_tcon(cfile->tlink);
1133
1134 /*
1135 * Accessing maxBuf is racy with cifs_reconnect - need to store value
1136 * and check it before using.
1137 */
1138 max_buf = tcon->ses->server->maxBuf;
1139 if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE))) {
1140 free_xid(xid);
1141 return -EINVAL;
1142 }
1143
1144 BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1145 PAGE_SIZE);
1146 max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1147 PAGE_SIZE);
1148 max_num = (max_buf - sizeof(struct smb_hdr)) /
1149 sizeof(LOCKING_ANDX_RANGE);
1150 buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1151 if (!buf) {
1152 free_xid(xid);
1153 return -ENOMEM;
1154 }
1155
1156 for (i = 0; i < 2; i++) {
1157 cur = buf;
1158 num = 0;
1159 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1160 if (li->type != types[i])
1161 continue;
1162 cur->Pid = cpu_to_le16(li->pid);
1163 cur->LengthLow = cpu_to_le32((u32)li->length);
1164 cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1165 cur->OffsetLow = cpu_to_le32((u32)li->offset);
1166 cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1167 if (++num == max_num) {
1168 stored_rc = cifs_lockv(xid, tcon,
1169 cfile->fid.netfid,
1170 (__u8)li->type, 0, num,
1171 buf);
1172 if (stored_rc)
1173 rc = stored_rc;
1174 cur = buf;
1175 num = 0;
1176 } else
1177 cur++;
1178 }
1179
1180 if (num) {
1181 stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1182 (__u8)types[i], 0, num, buf);
1183 if (stored_rc)
1184 rc = stored_rc;
1185 }
1186 }
1187
1188 kfree(buf);
1189 free_xid(xid);
1190 return rc;
1191 }
1192
1193 static __u32
1194 hash_lockowner(fl_owner_t owner)
1195 {
1196 return cifs_lock_secret ^ hash32_ptr((const void *)owner);
1197 }
1198
1199 struct lock_to_push {
1200 struct list_head llist;
1201 __u64 offset;
1202 __u64 length;
1203 __u32 pid;
1204 __u16 netfid;
1205 __u8 type;
1206 };
1207
1208 static int
1209 cifs_push_posix_locks(struct cifsFileInfo *cfile)
1210 {
1211 struct inode *inode = d_inode(cfile->dentry);
1212 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1213 struct file_lock *flock;
1214 struct file_lock_context *flctx = inode->i_flctx;
1215 unsigned int count = 0, i;
1216 int rc = 0, xid, type;
1217 struct list_head locks_to_send, *el;
1218 struct lock_to_push *lck, *tmp;
1219 __u64 length;
1220
1221 xid = get_xid();
1222
1223 if (!flctx)
1224 goto out;
1225
1226 spin_lock(&flctx->flc_lock);
1227 list_for_each(el, &flctx->flc_posix) {
1228 count++;
1229 }
1230 spin_unlock(&flctx->flc_lock);
1231
1232 INIT_LIST_HEAD(&locks_to_send);
1233
1234 /*
1235 * Allocating count locks is enough because no FL_POSIX locks can be
1236 * added to the list while we are holding cinode->lock_sem that
1237 * protects locking operations of this inode.
1238 */
1239 for (i = 0; i < count; i++) {
1240 lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL);
1241 if (!lck) {
1242 rc = -ENOMEM;
1243 goto err_out;
1244 }
1245 list_add_tail(&lck->llist, &locks_to_send);
1246 }
1247
1248 el = locks_to_send.next;
1249 spin_lock(&flctx->flc_lock);
1250 list_for_each_entry(flock, &flctx->flc_posix, fl_list) {
1251 if (el == &locks_to_send) {
1252 /*
1253 * The list ended. We don't have enough allocated
1254 * structures - something is really wrong.
1255 */
1256 cifs_dbg(VFS, "Can't push all brlocks!\n");
1257 break;
1258 }
1259 length = 1 + flock->fl_end - flock->fl_start;
1260 if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK)
1261 type = CIFS_RDLCK;
1262 else
1263 type = CIFS_WRLCK;
1264 lck = list_entry(el, struct lock_to_push, llist);
1265 lck->pid = hash_lockowner(flock->fl_owner);
1266 lck->netfid = cfile->fid.netfid;
1267 lck->length = length;
1268 lck->type = type;
1269 lck->offset = flock->fl_start;
1270 }
1271 spin_unlock(&flctx->flc_lock);
1272
1273 list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1274 int stored_rc;
1275
1276 stored_rc = CIFSSMBPosixLock(xid, tcon, lck->netfid, lck->pid,
1277 lck->offset, lck->length, NULL,
1278 lck->type, 0);
1279 if (stored_rc)
1280 rc = stored_rc;
1281 list_del(&lck->llist);
1282 kfree(lck);
1283 }
1284
1285 out:
1286 free_xid(xid);
1287 return rc;
1288 err_out:
1289 list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1290 list_del(&lck->llist);
1291 kfree(lck);
1292 }
1293 goto out;
1294 }
1295
1296 static int
1297 cifs_push_locks(struct cifsFileInfo *cfile)
1298 {
1299 struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
1300 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1301 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1302 int rc = 0;
1303
1304 /* we are going to update can_cache_brlcks here - need a write access */
1305 down_write(&cinode->lock_sem);
1306 if (!cinode->can_cache_brlcks) {
1307 up_write(&cinode->lock_sem);
1308 return rc;
1309 }
1310
1311 if (cap_unix(tcon->ses) &&
1312 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1313 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1314 rc = cifs_push_posix_locks(cfile);
1315 else
1316 rc = tcon->ses->server->ops->push_mand_locks(cfile);
1317
1318 cinode->can_cache_brlcks = false;
1319 up_write(&cinode->lock_sem);
1320 return rc;
1321 }
1322
1323 static void
1324 cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock,
1325 bool *wait_flag, struct TCP_Server_Info *server)
1326 {
1327 if (flock->fl_flags & FL_POSIX)
1328 cifs_dbg(FYI, "Posix\n");
1329 if (flock->fl_flags & FL_FLOCK)
1330 cifs_dbg(FYI, "Flock\n");
1331 if (flock->fl_flags & FL_SLEEP) {
1332 cifs_dbg(FYI, "Blocking lock\n");
1333 *wait_flag = true;
1334 }
1335 if (flock->fl_flags & FL_ACCESS)
1336 cifs_dbg(FYI, "Process suspended by mandatory locking - not implemented yet\n");
1337 if (flock->fl_flags & FL_LEASE)
1338 cifs_dbg(FYI, "Lease on file - not implemented yet\n");
1339 if (flock->fl_flags &
1340 (~(FL_POSIX | FL_FLOCK | FL_SLEEP |
1341 FL_ACCESS | FL_LEASE | FL_CLOSE | FL_OFDLCK)))
1342 cifs_dbg(FYI, "Unknown lock flags 0x%x\n", flock->fl_flags);
1343
1344 *type = server->vals->large_lock_type;
1345 if (flock->fl_type == F_WRLCK) {
1346 cifs_dbg(FYI, "F_WRLCK\n");
1347 *type |= server->vals->exclusive_lock_type;
1348 *lock = 1;
1349 } else if (flock->fl_type == F_UNLCK) {
1350 cifs_dbg(FYI, "F_UNLCK\n");
1351 *type |= server->vals->unlock_lock_type;
1352 *unlock = 1;
1353 /* Check if unlock includes more than one lock range */
1354 } else if (flock->fl_type == F_RDLCK) {
1355 cifs_dbg(FYI, "F_RDLCK\n");
1356 *type |= server->vals->shared_lock_type;
1357 *lock = 1;
1358 } else if (flock->fl_type == F_EXLCK) {
1359 cifs_dbg(FYI, "F_EXLCK\n");
1360 *type |= server->vals->exclusive_lock_type;
1361 *lock = 1;
1362 } else if (flock->fl_type == F_SHLCK) {
1363 cifs_dbg(FYI, "F_SHLCK\n");
1364 *type |= server->vals->shared_lock_type;
1365 *lock = 1;
1366 } else
1367 cifs_dbg(FYI, "Unknown type of lock\n");
1368 }
1369
1370 static int
1371 cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
1372 bool wait_flag, bool posix_lck, unsigned int xid)
1373 {
1374 int rc = 0;
1375 __u64 length = 1 + flock->fl_end - flock->fl_start;
1376 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1377 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1378 struct TCP_Server_Info *server = tcon->ses->server;
1379 __u16 netfid = cfile->fid.netfid;
1380
1381 if (posix_lck) {
1382 int posix_lock_type;
1383
1384 rc = cifs_posix_lock_test(file, flock);
1385 if (!rc)
1386 return rc;
1387
1388 if (type & server->vals->shared_lock_type)
1389 posix_lock_type = CIFS_RDLCK;
1390 else
1391 posix_lock_type = CIFS_WRLCK;
1392 rc = CIFSSMBPosixLock(xid, tcon, netfid,
1393 hash_lockowner(flock->fl_owner),
1394 flock->fl_start, length, flock,
1395 posix_lock_type, wait_flag);
1396 return rc;
1397 }
1398
1399 rc = cifs_lock_test(cfile, flock->fl_start, length, type, flock);
1400 if (!rc)
1401 return rc;
1402
1403 /* BB we could chain these into one lock request BB */
1404 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, type,
1405 1, 0, false);
1406 if (rc == 0) {
1407 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1408 type, 0, 1, false);
1409 flock->fl_type = F_UNLCK;
1410 if (rc != 0)
1411 cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1412 rc);
1413 return 0;
1414 }
1415
1416 if (type & server->vals->shared_lock_type) {
1417 flock->fl_type = F_WRLCK;
1418 return 0;
1419 }
1420
1421 type &= ~server->vals->exclusive_lock_type;
1422
1423 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1424 type | server->vals->shared_lock_type,
1425 1, 0, false);
1426 if (rc == 0) {
1427 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1428 type | server->vals->shared_lock_type, 0, 1, false);
1429 flock->fl_type = F_RDLCK;
1430 if (rc != 0)
1431 cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1432 rc);
1433 } else
1434 flock->fl_type = F_WRLCK;
1435
1436 return 0;
1437 }
1438
1439 void
1440 cifs_move_llist(struct list_head *source, struct list_head *dest)
1441 {
1442 struct list_head *li, *tmp;
1443 list_for_each_safe(li, tmp, source)
1444 list_move(li, dest);
1445 }
1446
1447 void
1448 cifs_free_llist(struct list_head *llist)
1449 {
1450 struct cifsLockInfo *li, *tmp;
1451 list_for_each_entry_safe(li, tmp, llist, llist) {
1452 cifs_del_lock_waiters(li);
1453 list_del(&li->llist);
1454 kfree(li);
1455 }
1456 }
1457
1458 int
1459 cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
1460 unsigned int xid)
1461 {
1462 int rc = 0, stored_rc;
1463 static const int types[] = {
1464 LOCKING_ANDX_LARGE_FILES,
1465 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1466 };
1467 unsigned int i;
1468 unsigned int max_num, num, max_buf;
1469 LOCKING_ANDX_RANGE *buf, *cur;
1470 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1471 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1472 struct cifsLockInfo *li, *tmp;
1473 __u64 length = 1 + flock->fl_end - flock->fl_start;
1474 struct list_head tmp_llist;
1475
1476 INIT_LIST_HEAD(&tmp_llist);
1477
1478 /*
1479 * Accessing maxBuf is racy with cifs_reconnect - need to store value
1480 * and check it before using.
1481 */
1482 max_buf = tcon->ses->server->maxBuf;
1483 if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE)))
1484 return -EINVAL;
1485
1486 BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1487 PAGE_SIZE);
1488 max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1489 PAGE_SIZE);
1490 max_num = (max_buf - sizeof(struct smb_hdr)) /
1491 sizeof(LOCKING_ANDX_RANGE);
1492 buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1493 if (!buf)
1494 return -ENOMEM;
1495
1496 down_write(&cinode->lock_sem);
1497 for (i = 0; i < 2; i++) {
1498 cur = buf;
1499 num = 0;
1500 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1501 if (flock->fl_start > li->offset ||
1502 (flock->fl_start + length) <
1503 (li->offset + li->length))
1504 continue;
1505 if (current->tgid != li->pid)
1506 continue;
1507 if (types[i] != li->type)
1508 continue;
1509 if (cinode->can_cache_brlcks) {
1510 /*
1511 * We can cache brlock requests - simply remove
1512 * a lock from the file's list.
1513 */
1514 list_del(&li->llist);
1515 cifs_del_lock_waiters(li);
1516 kfree(li);
1517 continue;
1518 }
1519 cur->Pid = cpu_to_le16(li->pid);
1520 cur->LengthLow = cpu_to_le32((u32)li->length);
1521 cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1522 cur->OffsetLow = cpu_to_le32((u32)li->offset);
1523 cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1524 /*
1525 * We need to save a lock here to let us add it again to
1526 * the file's list if the unlock range request fails on
1527 * the server.
1528 */
1529 list_move(&li->llist, &tmp_llist);
1530 if (++num == max_num) {
1531 stored_rc = cifs_lockv(xid, tcon,
1532 cfile->fid.netfid,
1533 li->type, num, 0, buf);
1534 if (stored_rc) {
1535 /*
1536 * We failed on the unlock range
1537 * request - add all locks from the tmp
1538 * list to the head of the file's list.
1539 */
1540 cifs_move_llist(&tmp_llist,
1541 &cfile->llist->locks);
1542 rc = stored_rc;
1543 } else
1544 /*
1545 * The unlock range request succeed -
1546 * free the tmp list.
1547 */
1548 cifs_free_llist(&tmp_llist);
1549 cur = buf;
1550 num = 0;
1551 } else
1552 cur++;
1553 }
1554 if (num) {
1555 stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1556 types[i], num, 0, buf);
1557 if (stored_rc) {
1558 cifs_move_llist(&tmp_llist,
1559 &cfile->llist->locks);
1560 rc = stored_rc;
1561 } else
1562 cifs_free_llist(&tmp_llist);
1563 }
1564 }
1565
1566 up_write(&cinode->lock_sem);
1567 kfree(buf);
1568 return rc;
1569 }
1570
1571 static int
1572 cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
1573 bool wait_flag, bool posix_lck, int lock, int unlock,
1574 unsigned int xid)
1575 {
1576 int rc = 0;
1577 __u64 length = 1 + flock->fl_end - flock->fl_start;
1578 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1579 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1580 struct TCP_Server_Info *server = tcon->ses->server;
1581 struct inode *inode = d_inode(cfile->dentry);
1582
1583 if (posix_lck) {
1584 int posix_lock_type;
1585
1586 rc = cifs_posix_lock_set(file, flock);
1587 if (!rc || rc < 0)
1588 return rc;
1589
1590 if (type & server->vals->shared_lock_type)
1591 posix_lock_type = CIFS_RDLCK;
1592 else
1593 posix_lock_type = CIFS_WRLCK;
1594
1595 if (unlock == 1)
1596 posix_lock_type = CIFS_UNLCK;
1597
1598 rc = CIFSSMBPosixLock(xid, tcon, cfile->fid.netfid,
1599 hash_lockowner(flock->fl_owner),
1600 flock->fl_start, length,
1601 NULL, posix_lock_type, wait_flag);
1602 goto out;
1603 }
1604
1605 if (lock) {
1606 struct cifsLockInfo *lock;
1607
1608 lock = cifs_lock_init(flock->fl_start, length, type,
1609 flock->fl_flags);
1610 if (!lock)
1611 return -ENOMEM;
1612
1613 rc = cifs_lock_add_if(cfile, lock, wait_flag);
1614 if (rc < 0) {
1615 kfree(lock);
1616 return rc;
1617 }
1618 if (!rc)
1619 goto out;
1620
1621 /*
1622 * Windows 7 server can delay breaking lease from read to None
1623 * if we set a byte-range lock on a file - break it explicitly
1624 * before sending the lock to the server to be sure the next
1625 * read won't conflict with non-overlapted locks due to
1626 * pagereading.
1627 */
1628 if (!CIFS_CACHE_WRITE(CIFS_I(inode)) &&
1629 CIFS_CACHE_READ(CIFS_I(inode))) {
1630 cifs_zap_mapping(inode);
1631 cifs_dbg(FYI, "Set no oplock for inode=%p due to mand locks\n",
1632 inode);
1633 CIFS_I(inode)->oplock = 0;
1634 }
1635
1636 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1637 type, 1, 0, wait_flag);
1638 if (rc) {
1639 kfree(lock);
1640 return rc;
1641 }
1642
1643 cifs_lock_add(cfile, lock);
1644 } else if (unlock)
1645 rc = server->ops->mand_unlock_range(cfile, flock, xid);
1646
1647 out:
1648 if (flock->fl_flags & FL_POSIX && !rc)
1649 rc = locks_lock_file_wait(file, flock);
1650 return rc;
1651 }
1652
1653 int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
1654 {
1655 int rc, xid;
1656 int lock = 0, unlock = 0;
1657 bool wait_flag = false;
1658 bool posix_lck = false;
1659 struct cifs_sb_info *cifs_sb;
1660 struct cifs_tcon *tcon;
1661 struct cifsInodeInfo *cinode;
1662 struct cifsFileInfo *cfile;
1663 __u16 netfid;
1664 __u32 type;
1665
1666 rc = -EACCES;
1667 xid = get_xid();
1668
1669 cifs_dbg(FYI, "Lock parm: 0x%x flockflags: 0x%x flocktype: 0x%x start: %lld end: %lld\n",
1670 cmd, flock->fl_flags, flock->fl_type,
1671 flock->fl_start, flock->fl_end);
1672
1673 cfile = (struct cifsFileInfo *)file->private_data;
1674 tcon = tlink_tcon(cfile->tlink);
1675
1676 cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag,
1677 tcon->ses->server);
1678 cifs_sb = CIFS_FILE_SB(file);
1679 netfid = cfile->fid.netfid;
1680 cinode = CIFS_I(file_inode(file));
1681
1682 if (cap_unix(tcon->ses) &&
1683 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1684 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1685 posix_lck = true;
1686 /*
1687 * BB add code here to normalize offset and length to account for
1688 * negative length which we can not accept over the wire.
1689 */
1690 if (IS_GETLK(cmd)) {
1691 rc = cifs_getlk(file, flock, type, wait_flag, posix_lck, xid);
1692 free_xid(xid);
1693 return rc;
1694 }
1695
1696 if (!lock && !unlock) {
1697 /*
1698 * if no lock or unlock then nothing to do since we do not
1699 * know what it is
1700 */
1701 free_xid(xid);
1702 return -EOPNOTSUPP;
1703 }
1704
1705 rc = cifs_setlk(file, flock, type, wait_flag, posix_lck, lock, unlock,
1706 xid);
1707 free_xid(xid);
1708 return rc;
1709 }
1710
1711 /*
1712 * update the file size (if needed) after a write. Should be called with
1713 * the inode->i_lock held
1714 */
1715 void
1716 cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
1717 unsigned int bytes_written)
1718 {
1719 loff_t end_of_write = offset + bytes_written;
1720
1721 if (end_of_write > cifsi->server_eof)
1722 cifsi->server_eof = end_of_write;
1723 }
1724
1725 static ssize_t
1726 cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data,
1727 size_t write_size, loff_t *offset)
1728 {
1729 int rc = 0;
1730 unsigned int bytes_written = 0;
1731 unsigned int total_written;
1732 struct cifs_sb_info *cifs_sb;
1733 struct cifs_tcon *tcon;
1734 struct TCP_Server_Info *server;
1735 unsigned int xid;
1736 struct dentry *dentry = open_file->dentry;
1737 struct cifsInodeInfo *cifsi = CIFS_I(d_inode(dentry));
1738 struct cifs_io_parms io_parms;
1739
1740 cifs_sb = CIFS_SB(dentry->d_sb);
1741
1742 cifs_dbg(FYI, "write %zd bytes to offset %lld of %pd\n",
1743 write_size, *offset, dentry);
1744
1745 tcon = tlink_tcon(open_file->tlink);
1746 server = tcon->ses->server;
1747
1748 if (!server->ops->sync_write)
1749 return -ENOSYS;
1750
1751 xid = get_xid();
1752
1753 for (total_written = 0; write_size > total_written;
1754 total_written += bytes_written) {
1755 rc = -EAGAIN;
1756 while (rc == -EAGAIN) {
1757 struct kvec iov[2];
1758 unsigned int len;
1759
1760 if (open_file->invalidHandle) {
1761 /* we could deadlock if we called
1762 filemap_fdatawait from here so tell
1763 reopen_file not to flush data to
1764 server now */
1765 rc = cifs_reopen_file(open_file, false);
1766 if (rc != 0)
1767 break;
1768 }
1769
1770 len = min(server->ops->wp_retry_size(d_inode(dentry)),
1771 (unsigned int)write_size - total_written);
1772 /* iov[0] is reserved for smb header */
1773 iov[1].iov_base = (char *)write_data + total_written;
1774 iov[1].iov_len = len;
1775 io_parms.pid = pid;
1776 io_parms.tcon = tcon;
1777 io_parms.offset = *offset;
1778 io_parms.length = len;
1779 rc = server->ops->sync_write(xid, &open_file->fid,
1780 &io_parms, &bytes_written, iov, 1);
1781 }
1782 if (rc || (bytes_written == 0)) {
1783 if (total_written)
1784 break;
1785 else {
1786 free_xid(xid);
1787 return rc;
1788 }
1789 } else {
1790 spin_lock(&d_inode(dentry)->i_lock);
1791 cifs_update_eof(cifsi, *offset, bytes_written);
1792 spin_unlock(&d_inode(dentry)->i_lock);
1793 *offset += bytes_written;
1794 }
1795 }
1796
1797 cifs_stats_bytes_written(tcon, total_written);
1798
1799 if (total_written > 0) {
1800 spin_lock(&d_inode(dentry)->i_lock);
1801 if (*offset > d_inode(dentry)->i_size)
1802 i_size_write(d_inode(dentry), *offset);
1803 spin_unlock(&d_inode(dentry)->i_lock);
1804 }
1805 mark_inode_dirty_sync(d_inode(dentry));
1806 free_xid(xid);
1807 return total_written;
1808 }
1809
1810 struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
1811 bool fsuid_only)
1812 {
1813 struct cifsFileInfo *open_file = NULL;
1814 struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1815 struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb);
1816
1817 /* only filter by fsuid on multiuser mounts */
1818 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1819 fsuid_only = false;
1820
1821 spin_lock(&tcon->open_file_lock);
1822 /* we could simply get the first_list_entry since write-only entries
1823 are always at the end of the list but since the first entry might
1824 have a close pending, we go through the whole list */
1825 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1826 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
1827 continue;
1828 if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
1829 if (!open_file->invalidHandle) {
1830 /* found a good file */
1831 /* lock it so it will not be closed on us */
1832 cifsFileInfo_get(open_file);
1833 spin_unlock(&tcon->open_file_lock);
1834 return open_file;
1835 } /* else might as well continue, and look for
1836 another, or simply have the caller reopen it
1837 again rather than trying to fix this handle */
1838 } else /* write only file */
1839 break; /* write only files are last so must be done */
1840 }
1841 spin_unlock(&tcon->open_file_lock);
1842 return NULL;
1843 }
1844
1845 struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode,
1846 bool fsuid_only)
1847 {
1848 struct cifsFileInfo *open_file, *inv_file = NULL;
1849 struct cifs_sb_info *cifs_sb;
1850 struct cifs_tcon *tcon;
1851 bool any_available = false;
1852 int rc;
1853 unsigned int refind = 0;
1854
1855 /* Having a null inode here (because mapping->host was set to zero by
1856 the VFS or MM) should not happen but we had reports of on oops (due to
1857 it being zero) during stress testcases so we need to check for it */
1858
1859 if (cifs_inode == NULL) {
1860 cifs_dbg(VFS, "Null inode passed to cifs_writeable_file\n");
1861 dump_stack();
1862 return NULL;
1863 }
1864
1865 cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1866 tcon = cifs_sb_master_tcon(cifs_sb);
1867
1868 /* only filter by fsuid on multiuser mounts */
1869 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1870 fsuid_only = false;
1871
1872 spin_lock(&tcon->open_file_lock);
1873 refind_writable:
1874 if (refind > MAX_REOPEN_ATT) {
1875 spin_unlock(&tcon->open_file_lock);
1876 return NULL;
1877 }
1878 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1879 if (!any_available && open_file->pid != current->tgid)
1880 continue;
1881 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
1882 continue;
1883 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
1884 if (!open_file->invalidHandle) {
1885 /* found a good writable file */
1886 cifsFileInfo_get(open_file);
1887 spin_unlock(&tcon->open_file_lock);
1888 return open_file;
1889 } else {
1890 if (!inv_file)
1891 inv_file = open_file;
1892 }
1893 }
1894 }
1895 /* couldn't find useable FH with same pid, try any available */
1896 if (!any_available) {
1897 any_available = true;
1898 goto refind_writable;
1899 }
1900
1901 if (inv_file) {
1902 any_available = false;
1903 cifsFileInfo_get(inv_file);
1904 }
1905
1906 spin_unlock(&tcon->open_file_lock);
1907
1908 if (inv_file) {
1909 rc = cifs_reopen_file(inv_file, false);
1910 if (!rc)
1911 return inv_file;
1912 else {
1913 spin_lock(&tcon->open_file_lock);
1914 list_move_tail(&inv_file->flist,
1915 &cifs_inode->openFileList);
1916 spin_unlock(&tcon->open_file_lock);
1917 cifsFileInfo_put(inv_file);
1918 ++refind;
1919 inv_file = NULL;
1920 spin_lock(&tcon->open_file_lock);
1921 goto refind_writable;
1922 }
1923 }
1924
1925 return NULL;
1926 }
1927
1928 static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
1929 {
1930 struct address_space *mapping = page->mapping;
1931 loff_t offset = (loff_t)page->index << PAGE_SHIFT;
1932 char *write_data;
1933 int rc = -EFAULT;
1934 int bytes_written = 0;
1935 struct inode *inode;
1936 struct cifsFileInfo *open_file;
1937
1938 if (!mapping || !mapping->host)
1939 return -EFAULT;
1940
1941 inode = page->mapping->host;
1942
1943 offset += (loff_t)from;
1944 write_data = kmap(page);
1945 write_data += from;
1946
1947 if ((to > PAGE_SIZE) || (from > to)) {
1948 kunmap(page);
1949 return -EIO;
1950 }
1951
1952 /* racing with truncate? */
1953 if (offset > mapping->host->i_size) {
1954 kunmap(page);
1955 return 0; /* don't care */
1956 }
1957
1958 /* check to make sure that we are not extending the file */
1959 if (mapping->host->i_size - offset < (loff_t)to)
1960 to = (unsigned)(mapping->host->i_size - offset);
1961
1962 open_file = find_writable_file(CIFS_I(mapping->host), false);
1963 if (open_file) {
1964 bytes_written = cifs_write(open_file, open_file->pid,
1965 write_data, to - from, &offset);
1966 cifsFileInfo_put(open_file);
1967 /* Does mm or vfs already set times? */
1968 inode->i_atime = inode->i_mtime = current_time(inode);
1969 if ((bytes_written > 0) && (offset))
1970 rc = 0;
1971 else if (bytes_written < 0)
1972 rc = bytes_written;
1973 } else {
1974 cifs_dbg(FYI, "No writeable filehandles for inode\n");
1975 rc = -EIO;
1976 }
1977
1978 kunmap(page);
1979 return rc;
1980 }
1981
1982 static struct cifs_writedata *
1983 wdata_alloc_and_fillpages(pgoff_t tofind, struct address_space *mapping,
1984 pgoff_t end, pgoff_t *index,
1985 unsigned int *found_pages)
1986 {
1987 struct cifs_writedata *wdata;
1988
1989 wdata = cifs_writedata_alloc((unsigned int)tofind,
1990 cifs_writev_complete);
1991 if (!wdata)
1992 return NULL;
1993
1994 *found_pages = find_get_pages_range_tag(mapping, index, end,
1995 PAGECACHE_TAG_DIRTY, tofind, wdata->pages);
1996 return wdata;
1997 }
1998
1999 static unsigned int
2000 wdata_prepare_pages(struct cifs_writedata *wdata, unsigned int found_pages,
2001 struct address_space *mapping,
2002 struct writeback_control *wbc,
2003 pgoff_t end, pgoff_t *index, pgoff_t *next, bool *done)
2004 {
2005 unsigned int nr_pages = 0, i;
2006 struct page *page;
2007
2008 for (i = 0; i < found_pages; i++) {
2009 page = wdata->pages[i];
2010 /*
2011 * At this point we hold neither the i_pages lock nor the
2012 * page lock: the page may be truncated or invalidated
2013 * (changing page->mapping to NULL), or even swizzled
2014 * back from swapper_space to tmpfs file mapping
2015 */
2016
2017 if (nr_pages == 0)
2018 lock_page(page);
2019 else if (!trylock_page(page))
2020 break;
2021
2022 if (unlikely(page->mapping != mapping)) {
2023 unlock_page(page);
2024 break;
2025 }
2026
2027 if (!wbc->range_cyclic && page->index > end) {
2028 *done = true;
2029 unlock_page(page);
2030 break;
2031 }
2032
2033 if (*next && (page->index != *next)) {
2034 /* Not next consecutive page */
2035 unlock_page(page);
2036 break;
2037 }
2038
2039 if (wbc->sync_mode != WB_SYNC_NONE)
2040 wait_on_page_writeback(page);
2041
2042 if (PageWriteback(page) ||
2043 !clear_page_dirty_for_io(page)) {
2044 unlock_page(page);
2045 break;
2046 }
2047
2048 /*
2049 * This actually clears the dirty bit in the radix tree.
2050 * See cifs_writepage() for more commentary.
2051 */
2052 set_page_writeback(page);
2053 if (page_offset(page) >= i_size_read(mapping->host)) {
2054 *done = true;
2055 unlock_page(page);
2056 end_page_writeback(page);
2057 break;
2058 }
2059
2060 wdata->pages[i] = page;
2061 *next = page->index + 1;
2062 ++nr_pages;
2063 }
2064
2065 /* reset index to refind any pages skipped */
2066 if (nr_pages == 0)
2067 *index = wdata->pages[0]->index + 1;
2068
2069 /* put any pages we aren't going to use */
2070 for (i = nr_pages; i < found_pages; i++) {
2071 put_page(wdata->pages[i]);
2072 wdata->pages[i] = NULL;
2073 }
2074
2075 return nr_pages;
2076 }
2077
2078 static int
2079 wdata_send_pages(struct cifs_writedata *wdata, unsigned int nr_pages,
2080 struct address_space *mapping, struct writeback_control *wbc)
2081 {
2082 int rc = 0;
2083 struct TCP_Server_Info *server;
2084 unsigned int i;
2085
2086 wdata->sync_mode = wbc->sync_mode;
2087 wdata->nr_pages = nr_pages;
2088 wdata->offset = page_offset(wdata->pages[0]);
2089 wdata->pagesz = PAGE_SIZE;
2090 wdata->tailsz = min(i_size_read(mapping->host) -
2091 page_offset(wdata->pages[nr_pages - 1]),
2092 (loff_t)PAGE_SIZE);
2093 wdata->bytes = ((nr_pages - 1) * PAGE_SIZE) + wdata->tailsz;
2094
2095 if (wdata->cfile != NULL)
2096 cifsFileInfo_put(wdata->cfile);
2097 wdata->cfile = find_writable_file(CIFS_I(mapping->host), false);
2098 if (!wdata->cfile) {
2099 cifs_dbg(VFS, "No writable handles for inode\n");
2100 rc = -EBADF;
2101 } else {
2102 wdata->pid = wdata->cfile->pid;
2103 server = tlink_tcon(wdata->cfile->tlink)->ses->server;
2104 rc = server->ops->async_writev(wdata, cifs_writedata_release);
2105 }
2106
2107 for (i = 0; i < nr_pages; ++i)
2108 unlock_page(wdata->pages[i]);
2109
2110 return rc;
2111 }
2112
2113 static int cifs_writepages(struct address_space *mapping,
2114 struct writeback_control *wbc)
2115 {
2116 struct cifs_sb_info *cifs_sb = CIFS_SB(mapping->host->i_sb);
2117 struct TCP_Server_Info *server;
2118 bool done = false, scanned = false, range_whole = false;
2119 pgoff_t end, index;
2120 struct cifs_writedata *wdata;
2121 int rc = 0;
2122 int saved_rc = 0;
2123 unsigned int xid;
2124
2125 /*
2126 * If wsize is smaller than the page cache size, default to writing
2127 * one page at a time via cifs_writepage
2128 */
2129 if (cifs_sb->wsize < PAGE_SIZE)
2130 return generic_writepages(mapping, wbc);
2131
2132 xid = get_xid();
2133 if (wbc->range_cyclic) {
2134 index = mapping->writeback_index; /* Start from prev offset */
2135 end = -1;
2136 } else {
2137 index = wbc->range_start >> PAGE_SHIFT;
2138 end = wbc->range_end >> PAGE_SHIFT;
2139 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
2140 range_whole = true;
2141 scanned = true;
2142 }
2143 server = cifs_sb_master_tcon(cifs_sb)->ses->server;
2144 retry:
2145 while (!done && index <= end) {
2146 unsigned int i, nr_pages, found_pages, wsize, credits;
2147 pgoff_t next = 0, tofind, saved_index = index;
2148
2149 rc = server->ops->wait_mtu_credits(server, cifs_sb->wsize,
2150 &wsize, &credits);
2151 if (rc != 0) {
2152 done = true;
2153 break;
2154 }
2155
2156 tofind = min((wsize / PAGE_SIZE) - 1, end - index) + 1;
2157
2158 wdata = wdata_alloc_and_fillpages(tofind, mapping, end, &index,
2159 &found_pages);
2160 if (!wdata) {
2161 rc = -ENOMEM;
2162 done = true;
2163 add_credits_and_wake_if(server, credits, 0);
2164 break;
2165 }
2166
2167 if (found_pages == 0) {
2168 kref_put(&wdata->refcount, cifs_writedata_release);
2169 add_credits_and_wake_if(server, credits, 0);
2170 break;
2171 }
2172
2173 nr_pages = wdata_prepare_pages(wdata, found_pages, mapping, wbc,
2174 end, &index, &next, &done);
2175
2176 /* nothing to write? */
2177 if (nr_pages == 0) {
2178 kref_put(&wdata->refcount, cifs_writedata_release);
2179 add_credits_and_wake_if(server, credits, 0);
2180 continue;
2181 }
2182
2183 wdata->credits = credits;
2184
2185 rc = wdata_send_pages(wdata, nr_pages, mapping, wbc);
2186
2187 /* send failure -- clean up the mess */
2188 if (rc != 0) {
2189 add_credits_and_wake_if(server, wdata->credits, 0);
2190 for (i = 0; i < nr_pages; ++i) {
2191 if (is_retryable_error(rc))
2192 redirty_page_for_writepage(wbc,
2193 wdata->pages[i]);
2194 else
2195 SetPageError(wdata->pages[i]);
2196 end_page_writeback(wdata->pages[i]);
2197 put_page(wdata->pages[i]);
2198 }
2199 if (!is_retryable_error(rc))
2200 mapping_set_error(mapping, rc);
2201 }
2202 kref_put(&wdata->refcount, cifs_writedata_release);
2203
2204 if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN) {
2205 index = saved_index;
2206 continue;
2207 }
2208
2209 /* Return immediately if we received a signal during writing */
2210 if (is_interrupt_error(rc)) {
2211 done = true;
2212 break;
2213 }
2214
2215 if (rc != 0 && saved_rc == 0)
2216 saved_rc = rc;
2217
2218 wbc->nr_to_write -= nr_pages;
2219 if (wbc->nr_to_write <= 0)
2220 done = true;
2221
2222 index = next;
2223 }
2224
2225 if (!scanned && !done) {
2226 /*
2227 * We hit the last page and there is more work to be done: wrap
2228 * back to the start of the file
2229 */
2230 scanned = true;
2231 index = 0;
2232 goto retry;
2233 }
2234
2235 if (saved_rc != 0)
2236 rc = saved_rc;
2237
2238 if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
2239 mapping->writeback_index = index;
2240
2241 free_xid(xid);
2242 return rc;
2243 }
2244
2245 static int
2246 cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
2247 {
2248 int rc;
2249 unsigned int xid;
2250
2251 xid = get_xid();
2252 /* BB add check for wbc flags */
2253 get_page(page);
2254 if (!PageUptodate(page))
2255 cifs_dbg(FYI, "ppw - page not up to date\n");
2256
2257 /*
2258 * Set the "writeback" flag, and clear "dirty" in the radix tree.
2259 *
2260 * A writepage() implementation always needs to do either this,
2261 * or re-dirty the page with "redirty_page_for_writepage()" in
2262 * the case of a failure.
2263 *
2264 * Just unlocking the page will cause the radix tree tag-bits
2265 * to fail to update with the state of the page correctly.
2266 */
2267 set_page_writeback(page);
2268 retry_write:
2269 rc = cifs_partialpagewrite(page, 0, PAGE_SIZE);
2270 if (is_retryable_error(rc)) {
2271 if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN)
2272 goto retry_write;
2273 redirty_page_for_writepage(wbc, page);
2274 } else if (rc != 0) {
2275 SetPageError(page);
2276 mapping_set_error(page->mapping, rc);
2277 } else {
2278 SetPageUptodate(page);
2279 }
2280 end_page_writeback(page);
2281 put_page(page);
2282 free_xid(xid);
2283 return rc;
2284 }
2285
2286 static int cifs_writepage(struct page *page, struct writeback_control *wbc)
2287 {
2288 int rc = cifs_writepage_locked(page, wbc);
2289 unlock_page(page);
2290 return rc;
2291 }
2292
2293 static int cifs_write_end(struct file *file, struct address_space *mapping,
2294 loff_t pos, unsigned len, unsigned copied,
2295 struct page *page, void *fsdata)
2296 {
2297 int rc;
2298 struct inode *inode = mapping->host;
2299 struct cifsFileInfo *cfile = file->private_data;
2300 struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
2301 __u32 pid;
2302
2303 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2304 pid = cfile->pid;
2305 else
2306 pid = current->tgid;
2307
2308 cifs_dbg(FYI, "write_end for page %p from pos %lld with %d bytes\n",
2309 page, pos, copied);
2310
2311 if (PageChecked(page)) {
2312 if (copied == len)
2313 SetPageUptodate(page);
2314 ClearPageChecked(page);
2315 } else if (!PageUptodate(page) && copied == PAGE_SIZE)
2316 SetPageUptodate(page);
2317
2318 if (!PageUptodate(page)) {
2319 char *page_data;
2320 unsigned offset = pos & (PAGE_SIZE - 1);
2321 unsigned int xid;
2322
2323 xid = get_xid();
2324 /* this is probably better than directly calling
2325 partialpage_write since in this function the file handle is
2326 known which we might as well leverage */
2327 /* BB check if anything else missing out of ppw
2328 such as updating last write time */
2329 page_data = kmap(page);
2330 rc = cifs_write(cfile, pid, page_data + offset, copied, &pos);
2331 /* if (rc < 0) should we set writebehind rc? */
2332 kunmap(page);
2333
2334 free_xid(xid);
2335 } else {
2336 rc = copied;
2337 pos += copied;
2338 set_page_dirty(page);
2339 }
2340
2341 if (rc > 0) {
2342 spin_lock(&inode->i_lock);
2343 if (pos > inode->i_size)
2344 i_size_write(inode, pos);
2345 spin_unlock(&inode->i_lock);
2346 }
2347
2348 unlock_page(page);
2349 put_page(page);
2350
2351 return rc;
2352 }
2353
2354 int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
2355 int datasync)
2356 {
2357 unsigned int xid;
2358 int rc = 0;
2359 struct cifs_tcon *tcon;
2360 struct TCP_Server_Info *server;
2361 struct cifsFileInfo *smbfile = file->private_data;
2362 struct inode *inode = file_inode(file);
2363 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2364
2365 rc = file_write_and_wait_range(file, start, end);
2366 if (rc)
2367 return rc;
2368 inode_lock(inode);
2369
2370 xid = get_xid();
2371
2372 cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2373 file, datasync);
2374
2375 if (!CIFS_CACHE_READ(CIFS_I(inode))) {
2376 rc = cifs_zap_mapping(inode);
2377 if (rc) {
2378 cifs_dbg(FYI, "rc: %d during invalidate phase\n", rc);
2379 rc = 0; /* don't care about it in fsync */
2380 }
2381 }
2382
2383 tcon = tlink_tcon(smbfile->tlink);
2384 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2385 server = tcon->ses->server;
2386 if (server->ops->flush)
2387 rc = server->ops->flush(xid, tcon, &smbfile->fid);
2388 else
2389 rc = -ENOSYS;
2390 }
2391
2392 free_xid(xid);
2393 inode_unlock(inode);
2394 return rc;
2395 }
2396
2397 int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
2398 {
2399 unsigned int xid;
2400 int rc = 0;
2401 struct cifs_tcon *tcon;
2402 struct TCP_Server_Info *server;
2403 struct cifsFileInfo *smbfile = file->private_data;
2404 struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
2405 struct inode *inode = file->f_mapping->host;
2406
2407 rc = file_write_and_wait_range(file, start, end);
2408 if (rc)
2409 return rc;
2410 inode_lock(inode);
2411
2412 xid = get_xid();
2413
2414 cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2415 file, datasync);
2416
2417 tcon = tlink_tcon(smbfile->tlink);
2418 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2419 server = tcon->ses->server;
2420 if (server->ops->flush)
2421 rc = server->ops->flush(xid, tcon, &smbfile->fid);
2422 else
2423 rc = -ENOSYS;
2424 }
2425
2426 free_xid(xid);
2427 inode_unlock(inode);
2428 return rc;
2429 }
2430
2431 /*
2432 * As file closes, flush all cached write data for this inode checking
2433 * for write behind errors.
2434 */
2435 int cifs_flush(struct file *file, fl_owner_t id)
2436 {
2437 struct inode *inode = file_inode(file);
2438 int rc = 0;
2439
2440 if (file->f_mode & FMODE_WRITE)
2441 rc = filemap_write_and_wait(inode->i_mapping);
2442
2443 cifs_dbg(FYI, "Flush inode %p file %p rc %d\n", inode, file, rc);
2444
2445 return rc;
2446 }
2447
2448 static int
2449 cifs_write_allocate_pages(struct page **pages, unsigned long num_pages)
2450 {
2451 int rc = 0;
2452 unsigned long i;
2453
2454 for (i = 0; i < num_pages; i++) {
2455 pages[i] = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2456 if (!pages[i]) {
2457 /*
2458 * save number of pages we have already allocated and
2459 * return with ENOMEM error
2460 */
2461 num_pages = i;
2462 rc = -ENOMEM;
2463 break;
2464 }
2465 }
2466
2467 if (rc) {
2468 for (i = 0; i < num_pages; i++)
2469 put_page(pages[i]);
2470 }
2471 return rc;
2472 }
2473
2474 static inline
2475 size_t get_numpages(const size_t wsize, const size_t len, size_t *cur_len)
2476 {
2477 size_t num_pages;
2478 size_t clen;
2479
2480 clen = min_t(const size_t, len, wsize);
2481 num_pages = DIV_ROUND_UP(clen, PAGE_SIZE);
2482
2483 if (cur_len)
2484 *cur_len = clen;
2485
2486 return num_pages;
2487 }
2488
2489 static void
2490 cifs_uncached_writedata_release(struct kref *refcount)
2491 {
2492 int i;
2493 struct cifs_writedata *wdata = container_of(refcount,
2494 struct cifs_writedata, refcount);
2495
2496 kref_put(&wdata->ctx->refcount, cifs_aio_ctx_release);
2497 for (i = 0; i < wdata->nr_pages; i++)
2498 put_page(wdata->pages[i]);
2499 cifs_writedata_release(refcount);
2500 }
2501
2502 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx);
2503
2504 static void
2505 cifs_uncached_writev_complete(struct work_struct *work)
2506 {
2507 struct cifs_writedata *wdata = container_of(work,
2508 struct cifs_writedata, work);
2509 struct inode *inode = d_inode(wdata->cfile->dentry);
2510 struct cifsInodeInfo *cifsi = CIFS_I(inode);
2511
2512 spin_lock(&inode->i_lock);
2513 cifs_update_eof(cifsi, wdata->offset, wdata->bytes);
2514 if (cifsi->server_eof > inode->i_size)
2515 i_size_write(inode, cifsi->server_eof);
2516 spin_unlock(&inode->i_lock);
2517
2518 complete(&wdata->done);
2519 collect_uncached_write_data(wdata->ctx);
2520 /* the below call can possibly free the last ref to aio ctx */
2521 kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2522 }
2523
2524 static int
2525 wdata_fill_from_iovec(struct cifs_writedata *wdata, struct iov_iter *from,
2526 size_t *len, unsigned long *num_pages)
2527 {
2528 size_t save_len, copied, bytes, cur_len = *len;
2529 unsigned long i, nr_pages = *num_pages;
2530
2531 save_len = cur_len;
2532 for (i = 0; i < nr_pages; i++) {
2533 bytes = min_t(const size_t, cur_len, PAGE_SIZE);
2534 copied = copy_page_from_iter(wdata->pages[i], 0, bytes, from);
2535 cur_len -= copied;
2536 /*
2537 * If we didn't copy as much as we expected, then that
2538 * may mean we trod into an unmapped area. Stop copying
2539 * at that point. On the next pass through the big
2540 * loop, we'll likely end up getting a zero-length
2541 * write and bailing out of it.
2542 */
2543 if (copied < bytes)
2544 break;
2545 }
2546 cur_len = save_len - cur_len;
2547 *len = cur_len;
2548
2549 /*
2550 * If we have no data to send, then that probably means that
2551 * the copy above failed altogether. That's most likely because
2552 * the address in the iovec was bogus. Return -EFAULT and let
2553 * the caller free anything we allocated and bail out.
2554 */
2555 if (!cur_len)
2556 return -EFAULT;
2557
2558 /*
2559 * i + 1 now represents the number of pages we actually used in
2560 * the copy phase above.
2561 */
2562 *num_pages = i + 1;
2563 return 0;
2564 }
2565
2566 static int
2567 cifs_resend_wdata(struct cifs_writedata *wdata, struct list_head *wdata_list,
2568 struct cifs_aio_ctx *ctx)
2569 {
2570 unsigned int wsize, credits;
2571 int rc;
2572 struct TCP_Server_Info *server =
2573 tlink_tcon(wdata->cfile->tlink)->ses->server;
2574
2575 /*
2576 * Wait for credits to resend this wdata.
2577 * Note: we are attempting to resend the whole wdata not in segments
2578 */
2579 do {
2580 rc = server->ops->wait_mtu_credits(
2581 server, wdata->bytes, &wsize, &credits);
2582
2583 if (rc)
2584 goto out;
2585
2586 if (wsize < wdata->bytes) {
2587 add_credits_and_wake_if(server, credits, 0);
2588 msleep(1000);
2589 }
2590 } while (wsize < wdata->bytes);
2591
2592 rc = -EAGAIN;
2593 while (rc == -EAGAIN) {
2594 rc = 0;
2595 if (wdata->cfile->invalidHandle)
2596 rc = cifs_reopen_file(wdata->cfile, false);
2597 if (!rc)
2598 rc = server->ops->async_writev(wdata,
2599 cifs_uncached_writedata_release);
2600 }
2601
2602 if (!rc) {
2603 list_add_tail(&wdata->list, wdata_list);
2604 return 0;
2605 }
2606
2607 add_credits_and_wake_if(server, wdata->credits, 0);
2608 out:
2609 kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2610
2611 return rc;
2612 }
2613
2614 static int
2615 cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from,
2616 struct cifsFileInfo *open_file,
2617 struct cifs_sb_info *cifs_sb, struct list_head *wdata_list,
2618 struct cifs_aio_ctx *ctx)
2619 {
2620 int rc = 0;
2621 size_t cur_len;
2622 unsigned long nr_pages, num_pages, i;
2623 struct cifs_writedata *wdata;
2624 struct iov_iter saved_from = *from;
2625 loff_t saved_offset = offset;
2626 pid_t pid;
2627 struct TCP_Server_Info *server;
2628 struct page **pagevec;
2629 size_t start;
2630
2631 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2632 pid = open_file->pid;
2633 else
2634 pid = current->tgid;
2635
2636 server = tlink_tcon(open_file->tlink)->ses->server;
2637
2638 do {
2639 unsigned int wsize, credits;
2640
2641 rc = server->ops->wait_mtu_credits(server, cifs_sb->wsize,
2642 &wsize, &credits);
2643 if (rc)
2644 break;
2645
2646 cur_len = min_t(const size_t, len, wsize);
2647
2648 if (ctx->direct_io) {
2649 ssize_t result;
2650
2651 result = iov_iter_get_pages_alloc(
2652 from, &pagevec, cur_len, &start);
2653 if (result < 0) {
2654 cifs_dbg(VFS,
2655 "direct_writev couldn't get user pages "
2656 "(rc=%zd) iter type %d iov_offset %zd "
2657 "count %zd\n",
2658 result, from->type,
2659 from->iov_offset, from->count);
2660 dump_stack();
2661
2662 rc = result;
2663 add_credits_and_wake_if(server, credits, 0);
2664 break;
2665 }
2666 cur_len = (size_t)result;
2667 iov_iter_advance(from, cur_len);
2668
2669 nr_pages =
2670 (cur_len + start + PAGE_SIZE - 1) / PAGE_SIZE;
2671
2672 wdata = cifs_writedata_direct_alloc(pagevec,
2673 cifs_uncached_writev_complete);
2674 if (!wdata) {
2675 rc = -ENOMEM;
2676 add_credits_and_wake_if(server, credits, 0);
2677 break;
2678 }
2679
2680
2681 wdata->page_offset = start;
2682 wdata->tailsz =
2683 nr_pages > 1 ?
2684 cur_len - (PAGE_SIZE - start) -
2685 (nr_pages - 2) * PAGE_SIZE :
2686 cur_len;
2687 } else {
2688 nr_pages = get_numpages(wsize, len, &cur_len);
2689 wdata = cifs_writedata_alloc(nr_pages,
2690 cifs_uncached_writev_complete);
2691 if (!wdata) {
2692 rc = -ENOMEM;
2693 add_credits_and_wake_if(server, credits, 0);
2694 break;
2695 }
2696
2697 rc = cifs_write_allocate_pages(wdata->pages, nr_pages);
2698 if (rc) {
2699 kfree(wdata);
2700 add_credits_and_wake_if(server, credits, 0);
2701 break;
2702 }
2703
2704 num_pages = nr_pages;
2705 rc = wdata_fill_from_iovec(
2706 wdata, from, &cur_len, &num_pages);
2707 if (rc) {
2708 for (i = 0; i < nr_pages; i++)
2709 put_page(wdata->pages[i]);
2710 kfree(wdata);
2711 add_credits_and_wake_if(server, credits, 0);
2712 break;
2713 }
2714
2715 /*
2716 * Bring nr_pages down to the number of pages we
2717 * actually used, and free any pages that we didn't use.
2718 */
2719 for ( ; nr_pages > num_pages; nr_pages--)
2720 put_page(wdata->pages[nr_pages - 1]);
2721
2722 wdata->tailsz = cur_len - ((nr_pages - 1) * PAGE_SIZE);
2723 }
2724
2725 wdata->sync_mode = WB_SYNC_ALL;
2726 wdata->nr_pages = nr_pages;
2727 wdata->offset = (__u64)offset;
2728 wdata->cfile = cifsFileInfo_get(open_file);
2729 wdata->pid = pid;
2730 wdata->bytes = cur_len;
2731 wdata->pagesz = PAGE_SIZE;
2732 wdata->credits = credits;
2733 wdata->ctx = ctx;
2734 kref_get(&ctx->refcount);
2735
2736 if (!wdata->cfile->invalidHandle ||
2737 !(rc = cifs_reopen_file(wdata->cfile, false)))
2738 rc = server->ops->async_writev(wdata,
2739 cifs_uncached_writedata_release);
2740 if (rc) {
2741 add_credits_and_wake_if(server, wdata->credits, 0);
2742 kref_put(&wdata->refcount,
2743 cifs_uncached_writedata_release);
2744 if (rc == -EAGAIN) {
2745 *from = saved_from;
2746 iov_iter_advance(from, offset - saved_offset);
2747 continue;
2748 }
2749 break;
2750 }
2751
2752 list_add_tail(&wdata->list, wdata_list);
2753 offset += cur_len;
2754 len -= cur_len;
2755 } while (len > 0);
2756
2757 return rc;
2758 }
2759
2760 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx)
2761 {
2762 struct cifs_writedata *wdata, *tmp;
2763 struct cifs_tcon *tcon;
2764 struct cifs_sb_info *cifs_sb;
2765 struct dentry *dentry = ctx->cfile->dentry;
2766 unsigned int i;
2767 int rc;
2768
2769 tcon = tlink_tcon(ctx->cfile->tlink);
2770 cifs_sb = CIFS_SB(dentry->d_sb);
2771
2772 mutex_lock(&ctx->aio_mutex);
2773
2774 if (list_empty(&ctx->list)) {
2775 mutex_unlock(&ctx->aio_mutex);
2776 return;
2777 }
2778
2779 rc = ctx->rc;
2780 /*
2781 * Wait for and collect replies for any successful sends in order of
2782 * increasing offset. Once an error is hit, then return without waiting
2783 * for any more replies.
2784 */
2785 restart_loop:
2786 list_for_each_entry_safe(wdata, tmp, &ctx->list, list) {
2787 if (!rc) {
2788 if (!try_wait_for_completion(&wdata->done)) {
2789 mutex_unlock(&ctx->aio_mutex);
2790 return;
2791 }
2792
2793 if (wdata->result)
2794 rc = wdata->result;
2795 else
2796 ctx->total_len += wdata->bytes;
2797
2798 /* resend call if it's a retryable error */
2799 if (rc == -EAGAIN) {
2800 struct list_head tmp_list;
2801 struct iov_iter tmp_from = ctx->iter;
2802
2803 INIT_LIST_HEAD(&tmp_list);
2804 list_del_init(&wdata->list);
2805
2806 if (ctx->direct_io)
2807 rc = cifs_resend_wdata(
2808 wdata, &tmp_list, ctx);
2809 else {
2810 iov_iter_advance(&tmp_from,
2811 wdata->offset - ctx->pos);
2812
2813 rc = cifs_write_from_iter(wdata->offset,
2814 wdata->bytes, &tmp_from,
2815 ctx->cfile, cifs_sb, &tmp_list,
2816 ctx);
2817 }
2818
2819 list_splice(&tmp_list, &ctx->list);
2820
2821 kref_put(&wdata->refcount,
2822 cifs_uncached_writedata_release);
2823 goto restart_loop;
2824 }
2825 }
2826 list_del_init(&wdata->list);
2827 kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2828 }
2829
2830 if (!ctx->direct_io)
2831 for (i = 0; i < ctx->npages; i++)
2832 put_page(ctx->bv[i].bv_page);
2833
2834 cifs_stats_bytes_written(tcon, ctx->total_len);
2835 set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(dentry->d_inode)->flags);
2836
2837 ctx->rc = (rc == 0) ? ctx->total_len : rc;
2838
2839 mutex_unlock(&ctx->aio_mutex);
2840
2841 if (ctx->iocb && ctx->iocb->ki_complete)
2842 ctx->iocb->ki_complete(ctx->iocb, ctx->rc, 0);
2843 else
2844 complete(&ctx->done);
2845 }
2846
2847 static ssize_t __cifs_writev(
2848 struct kiocb *iocb, struct iov_iter *from, bool direct)
2849 {
2850 struct file *file = iocb->ki_filp;
2851 ssize_t total_written = 0;
2852 struct cifsFileInfo *cfile;
2853 struct cifs_tcon *tcon;
2854 struct cifs_sb_info *cifs_sb;
2855 struct cifs_aio_ctx *ctx;
2856 struct iov_iter saved_from = *from;
2857 size_t len = iov_iter_count(from);
2858 int rc;
2859
2860 /*
2861 * iov_iter_get_pages_alloc doesn't work with ITER_KVEC.
2862 * In this case, fall back to non-direct write function.
2863 * this could be improved by getting pages directly in ITER_KVEC
2864 */
2865 if (direct && from->type & ITER_KVEC) {
2866 cifs_dbg(FYI, "use non-direct cifs_writev for kvec I/O\n");
2867 direct = false;
2868 }
2869
2870 rc = generic_write_checks(iocb, from);
2871 if (rc <= 0)
2872 return rc;
2873
2874 cifs_sb = CIFS_FILE_SB(file);
2875 cfile = file->private_data;
2876 tcon = tlink_tcon(cfile->tlink);
2877
2878 if (!tcon->ses->server->ops->async_writev)
2879 return -ENOSYS;
2880
2881 ctx = cifs_aio_ctx_alloc();
2882 if (!ctx)
2883 return -ENOMEM;
2884
2885 ctx->cfile = cifsFileInfo_get(cfile);
2886
2887 if (!is_sync_kiocb(iocb))
2888 ctx->iocb = iocb;
2889
2890 ctx->pos = iocb->ki_pos;
2891
2892 if (direct) {
2893 ctx->direct_io = true;
2894 ctx->iter = *from;
2895 ctx->len = len;
2896 } else {
2897 rc = setup_aio_ctx_iter(ctx, from, WRITE);
2898 if (rc) {
2899 kref_put(&ctx->refcount, cifs_aio_ctx_release);
2900 return rc;
2901 }
2902 }
2903
2904 /* grab a lock here due to read response handlers can access ctx */
2905 mutex_lock(&ctx->aio_mutex);
2906
2907 rc = cifs_write_from_iter(iocb->ki_pos, ctx->len, &saved_from,
2908 cfile, cifs_sb, &ctx->list, ctx);
2909
2910 /*
2911 * If at least one write was successfully sent, then discard any rc
2912 * value from the later writes. If the other write succeeds, then
2913 * we'll end up returning whatever was written. If it fails, then
2914 * we'll get a new rc value from that.
2915 */
2916 if (!list_empty(&ctx->list))
2917 rc = 0;
2918
2919 mutex_unlock(&ctx->aio_mutex);
2920
2921 if (rc) {
2922 kref_put(&ctx->refcount, cifs_aio_ctx_release);
2923 return rc;
2924 }
2925
2926 if (!is_sync_kiocb(iocb)) {
2927 kref_put(&ctx->refcount, cifs_aio_ctx_release);
2928 return -EIOCBQUEUED;
2929 }
2930
2931 rc = wait_for_completion_killable(&ctx->done);
2932 if (rc) {
2933 mutex_lock(&ctx->aio_mutex);
2934 ctx->rc = rc = -EINTR;
2935 total_written = ctx->total_len;
2936 mutex_unlock(&ctx->aio_mutex);
2937 } else {
2938 rc = ctx->rc;
2939 total_written = ctx->total_len;
2940 }
2941
2942 kref_put(&ctx->refcount, cifs_aio_ctx_release);
2943
2944 if (unlikely(!total_written))
2945 return rc;
2946
2947 iocb->ki_pos += total_written;
2948 return total_written;
2949 }
2950
2951 ssize_t cifs_direct_writev(struct kiocb *iocb, struct iov_iter *from)
2952 {
2953 return __cifs_writev(iocb, from, true);
2954 }
2955
2956 ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from)
2957 {
2958 return __cifs_writev(iocb, from, false);
2959 }
2960
2961 static ssize_t
2962 cifs_writev(struct kiocb *iocb, struct iov_iter *from)
2963 {
2964 struct file *file = iocb->ki_filp;
2965 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
2966 struct inode *inode = file->f_mapping->host;
2967 struct cifsInodeInfo *cinode = CIFS_I(inode);
2968 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
2969 ssize_t rc;
2970
2971 inode_lock(inode);
2972 /*
2973 * We need to hold the sem to be sure nobody modifies lock list
2974 * with a brlock that prevents writing.
2975 */
2976 down_read(&cinode->lock_sem);
2977
2978 rc = generic_write_checks(iocb, from);
2979 if (rc <= 0)
2980 goto out;
2981
2982 if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(from),
2983 server->vals->exclusive_lock_type, 0,
2984 NULL, CIFS_WRITE_OP))
2985 rc = __generic_file_write_iter(iocb, from);
2986 else
2987 rc = -EACCES;
2988 out:
2989 up_read(&cinode->lock_sem);
2990 inode_unlock(inode);
2991
2992 if (rc > 0)
2993 rc = generic_write_sync(iocb, rc);
2994 return rc;
2995 }
2996
2997 ssize_t
2998 cifs_strict_writev(struct kiocb *iocb, struct iov_iter *from)
2999 {
3000 struct inode *inode = file_inode(iocb->ki_filp);
3001 struct cifsInodeInfo *cinode = CIFS_I(inode);
3002 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
3003 struct cifsFileInfo *cfile = (struct cifsFileInfo *)
3004 iocb->ki_filp->private_data;
3005 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3006 ssize_t written;
3007
3008 written = cifs_get_writer(cinode);
3009 if (written)
3010 return written;
3011
3012 if (CIFS_CACHE_WRITE(cinode)) {
3013 if (cap_unix(tcon->ses) &&
3014 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability))
3015 && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) {
3016 written = generic_file_write_iter(iocb, from);
3017 goto out;
3018 }
3019 written = cifs_writev(iocb, from);
3020 goto out;
3021 }
3022 /*
3023 * For non-oplocked files in strict cache mode we need to write the data
3024 * to the server exactly from the pos to pos+len-1 rather than flush all
3025 * affected pages because it may cause a error with mandatory locks on
3026 * these pages but not on the region from pos to ppos+len-1.
3027 */
3028 written = cifs_user_writev(iocb, from);
3029 if (written > 0 && CIFS_CACHE_READ(cinode)) {
3030 /*
3031 * Windows 7 server can delay breaking level2 oplock if a write
3032 * request comes - break it on the client to prevent reading
3033 * an old data.
3034 */
3035 cifs_zap_mapping(inode);
3036 cifs_dbg(FYI, "Set no oplock for inode=%p after a write operation\n",
3037 inode);
3038 cinode->oplock = 0;
3039 }
3040 out:
3041 cifs_put_writer(cinode);
3042 return written;
3043 }
3044
3045 static struct cifs_readdata *
3046 cifs_readdata_direct_alloc(struct page **pages, work_func_t complete)
3047 {
3048 struct cifs_readdata *rdata;
3049
3050 rdata = kzalloc(sizeof(*rdata), GFP_KERNEL);
3051 if (rdata != NULL) {
3052 rdata->pages = pages;
3053 kref_init(&rdata->refcount);
3054 INIT_LIST_HEAD(&rdata->list);
3055 init_completion(&rdata->done);
3056 INIT_WORK(&rdata->work, complete);
3057 }
3058
3059 return rdata;
3060 }
3061
3062 static struct cifs_readdata *
3063 cifs_readdata_alloc(unsigned int nr_pages, work_func_t complete)
3064 {
3065 struct page **pages =
3066 kcalloc(nr_pages, sizeof(struct page *), GFP_KERNEL);
3067 struct cifs_readdata *ret = NULL;
3068
3069 if (pages) {
3070 ret = cifs_readdata_direct_alloc(pages, complete);
3071 if (!ret)
3072 kfree(pages);
3073 }
3074
3075 return ret;
3076 }
3077
3078 void
3079 cifs_readdata_release(struct kref *refcount)
3080 {
3081 struct cifs_readdata *rdata = container_of(refcount,
3082 struct cifs_readdata, refcount);
3083 #ifdef CONFIG_CIFS_SMB_DIRECT
3084 if (rdata->mr) {
3085 smbd_deregister_mr(rdata->mr);
3086 rdata->mr = NULL;
3087 }
3088 #endif
3089 if (rdata->cfile)
3090 cifsFileInfo_put(rdata->cfile);
3091
3092 kvfree(rdata->pages);
3093 kfree(rdata);
3094 }
3095
3096 static int
3097 cifs_read_allocate_pages(struct cifs_readdata *rdata, unsigned int nr_pages)
3098 {
3099 int rc = 0;
3100 struct page *page;
3101 unsigned int i;
3102
3103 for (i = 0; i < nr_pages; i++) {
3104 page = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
3105 if (!page) {
3106 rc = -ENOMEM;
3107 break;
3108 }
3109 rdata->pages[i] = page;
3110 }
3111
3112 if (rc) {
3113 for (i = 0; i < nr_pages; i++) {
3114 put_page(rdata->pages[i]);
3115 rdata->pages[i] = NULL;
3116 }
3117 }
3118 return rc;
3119 }
3120
3121 static void
3122 cifs_uncached_readdata_release(struct kref *refcount)
3123 {
3124 struct cifs_readdata *rdata = container_of(refcount,
3125 struct cifs_readdata, refcount);
3126 unsigned int i;
3127
3128 kref_put(&rdata->ctx->refcount, cifs_aio_ctx_release);
3129 for (i = 0; i < rdata->nr_pages; i++) {
3130 put_page(rdata->pages[i]);
3131 }
3132 cifs_readdata_release(refcount);
3133 }
3134
3135 /**
3136 * cifs_readdata_to_iov - copy data from pages in response to an iovec
3137 * @rdata: the readdata response with list of pages holding data
3138 * @iter: destination for our data
3139 *
3140 * This function copies data from a list of pages in a readdata response into
3141 * an array of iovecs. It will first calculate where the data should go
3142 * based on the info in the readdata and then copy the data into that spot.
3143 */
3144 static int
3145 cifs_readdata_to_iov(struct cifs_readdata *rdata, struct iov_iter *iter)
3146 {
3147 size_t remaining = rdata->got_bytes;
3148 unsigned int i;
3149
3150 for (i = 0; i < rdata->nr_pages; i++) {
3151 struct page *page = rdata->pages[i];
3152 size_t copy = min_t(size_t, remaining, PAGE_SIZE);
3153 size_t written;
3154
3155 if (unlikely(iov_iter_is_pipe(iter))) {
3156 void *addr = kmap_atomic(page);
3157
3158 written = copy_to_iter(addr, copy, iter);
3159 kunmap_atomic(addr);
3160 } else
3161 written = copy_page_to_iter(page, 0, copy, iter);
3162 remaining -= written;
3163 if (written < copy && iov_iter_count(iter) > 0)
3164 break;
3165 }
3166 return remaining ? -EFAULT : 0;
3167 }
3168
3169 static void collect_uncached_read_data(struct cifs_aio_ctx *ctx);
3170
3171 static void
3172 cifs_uncached_readv_complete(struct work_struct *work)
3173 {
3174 struct cifs_readdata *rdata = container_of(work,
3175 struct cifs_readdata, work);
3176
3177 complete(&rdata->done);
3178 collect_uncached_read_data(rdata->ctx);
3179 /* the below call can possibly free the last ref to aio ctx */
3180 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3181 }
3182
3183 static int
3184 uncached_fill_pages(struct TCP_Server_Info *server,
3185 struct cifs_readdata *rdata, struct iov_iter *iter,
3186 unsigned int len)
3187 {
3188 int result = 0;
3189 unsigned int i;
3190 unsigned int nr_pages = rdata->nr_pages;
3191 unsigned int page_offset = rdata->page_offset;
3192
3193 rdata->got_bytes = 0;
3194 rdata->tailsz = PAGE_SIZE;
3195 for (i = 0; i < nr_pages; i++) {
3196 struct page *page = rdata->pages[i];
3197 size_t n;
3198 unsigned int segment_size = rdata->pagesz;
3199
3200 if (i == 0)
3201 segment_size -= page_offset;
3202 else
3203 page_offset = 0;
3204
3205
3206 if (len <= 0) {
3207 /* no need to hold page hostage */
3208 rdata->pages[i] = NULL;
3209 rdata->nr_pages--;
3210 put_page(page);
3211 continue;
3212 }
3213
3214 n = len;
3215 if (len >= segment_size)
3216 /* enough data to fill the page */
3217 n = segment_size;
3218 else
3219 rdata->tailsz = len;
3220 len -= n;
3221
3222 if (iter)
3223 result = copy_page_from_iter(
3224 page, page_offset, n, iter);
3225 #ifdef CONFIG_CIFS_SMB_DIRECT
3226 else if (rdata->mr)
3227 result = n;
3228 #endif
3229 else
3230 result = cifs_read_page_from_socket(
3231 server, page, page_offset, n);
3232 if (result < 0)
3233 break;
3234
3235 rdata->got_bytes += result;
3236 }
3237
3238 return rdata->got_bytes > 0 && result != -ECONNABORTED ?
3239 rdata->got_bytes : result;
3240 }
3241
3242 static int
3243 cifs_uncached_read_into_pages(struct TCP_Server_Info *server,
3244 struct cifs_readdata *rdata, unsigned int len)
3245 {
3246 return uncached_fill_pages(server, rdata, NULL, len);
3247 }
3248
3249 static int
3250 cifs_uncached_copy_into_pages(struct TCP_Server_Info *server,
3251 struct cifs_readdata *rdata,
3252 struct iov_iter *iter)
3253 {
3254 return uncached_fill_pages(server, rdata, iter, iter->count);
3255 }
3256
3257 static int cifs_resend_rdata(struct cifs_readdata *rdata,
3258 struct list_head *rdata_list,
3259 struct cifs_aio_ctx *ctx)
3260 {
3261 unsigned int rsize, credits;
3262 int rc;
3263 struct TCP_Server_Info *server =
3264 tlink_tcon(rdata->cfile->tlink)->ses->server;
3265
3266 /*
3267 * Wait for credits to resend this rdata.
3268 * Note: we are attempting to resend the whole rdata not in segments
3269 */
3270 do {
3271 rc = server->ops->wait_mtu_credits(server, rdata->bytes,
3272 &rsize, &credits);
3273
3274 if (rc)
3275 goto out;
3276
3277 if (rsize < rdata->bytes) {
3278 add_credits_and_wake_if(server, credits, 0);
3279 msleep(1000);
3280 }
3281 } while (rsize < rdata->bytes);
3282
3283 rc = -EAGAIN;
3284 while (rc == -EAGAIN) {
3285 rc = 0;
3286 if (rdata->cfile->invalidHandle)
3287 rc = cifs_reopen_file(rdata->cfile, true);
3288 if (!rc)
3289 rc = server->ops->async_readv(rdata);
3290 }
3291
3292 if (!rc) {
3293 /* Add to aio pending list */
3294 list_add_tail(&rdata->list, rdata_list);
3295 return 0;
3296 }
3297
3298 add_credits_and_wake_if(server, rdata->credits, 0);
3299 out:
3300 kref_put(&rdata->refcount,
3301 cifs_uncached_readdata_release);
3302
3303 return rc;
3304 }
3305
3306 static int
3307 cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file,
3308 struct cifs_sb_info *cifs_sb, struct list_head *rdata_list,
3309 struct cifs_aio_ctx *ctx)
3310 {
3311 struct cifs_readdata *rdata;
3312 unsigned int npages, rsize, credits;
3313 size_t cur_len;
3314 int rc;
3315 pid_t pid;
3316 struct TCP_Server_Info *server;
3317 struct page **pagevec;
3318 size_t start;
3319 struct iov_iter direct_iov = ctx->iter;
3320
3321 server = tlink_tcon(open_file->tlink)->ses->server;
3322
3323 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3324 pid = open_file->pid;
3325 else
3326 pid = current->tgid;
3327
3328 if (ctx->direct_io)
3329 iov_iter_advance(&direct_iov, offset - ctx->pos);
3330
3331 do {
3332 rc = server->ops->wait_mtu_credits(server, cifs_sb->rsize,
3333 &rsize, &credits);
3334 if (rc)
3335 break;
3336
3337 cur_len = min_t(const size_t, len, rsize);
3338
3339 if (ctx->direct_io) {
3340 ssize_t result;
3341
3342 result = iov_iter_get_pages_alloc(
3343 &direct_iov, &pagevec,
3344 cur_len, &start);
3345 if (result < 0) {
3346 cifs_dbg(VFS,
3347 "couldn't get user pages (rc=%zd)"
3348 " iter type %d"
3349 " iov_offset %zd count %zd\n",
3350 result, direct_iov.type,
3351 direct_iov.iov_offset,
3352 direct_iov.count);
3353 dump_stack();
3354
3355 rc = result;
3356 add_credits_and_wake_if(server, credits, 0);
3357 break;
3358 }
3359 cur_len = (size_t)result;
3360 iov_iter_advance(&direct_iov, cur_len);
3361
3362 rdata = cifs_readdata_direct_alloc(
3363 pagevec, cifs_uncached_readv_complete);
3364 if (!rdata) {
3365 add_credits_and_wake_if(server, credits, 0);
3366 rc = -ENOMEM;
3367 break;
3368 }
3369
3370 npages = (cur_len + start + PAGE_SIZE-1) / PAGE_SIZE;
3371 rdata->page_offset = start;
3372 rdata->tailsz = npages > 1 ?
3373 cur_len-(PAGE_SIZE-start)-(npages-2)*PAGE_SIZE :
3374 cur_len;
3375
3376 } else {
3377
3378 npages = DIV_ROUND_UP(cur_len, PAGE_SIZE);
3379 /* allocate a readdata struct */
3380 rdata = cifs_readdata_alloc(npages,
3381 cifs_uncached_readv_complete);
3382 if (!rdata) {
3383 add_credits_and_wake_if(server, credits, 0);
3384 rc = -ENOMEM;
3385 break;
3386 }
3387
3388 rc = cifs_read_allocate_pages(rdata, npages);
3389 if (rc)
3390 goto error;
3391
3392 rdata->tailsz = PAGE_SIZE;
3393 }
3394
3395 rdata->cfile = cifsFileInfo_get(open_file);
3396 rdata->nr_pages = npages;
3397 rdata->offset = offset;
3398 rdata->bytes = cur_len;
3399 rdata->pid = pid;
3400 rdata->pagesz = PAGE_SIZE;
3401 rdata->read_into_pages = cifs_uncached_read_into_pages;
3402 rdata->copy_into_pages = cifs_uncached_copy_into_pages;
3403 rdata->credits = credits;
3404 rdata->ctx = ctx;
3405 kref_get(&ctx->refcount);
3406
3407 if (!rdata->cfile->invalidHandle ||
3408 !(rc = cifs_reopen_file(rdata->cfile, true)))
3409 rc = server->ops->async_readv(rdata);
3410 error:
3411 if (rc) {
3412 add_credits_and_wake_if(server, rdata->credits, 0);
3413 kref_put(&rdata->refcount,
3414 cifs_uncached_readdata_release);
3415 if (rc == -EAGAIN) {
3416 iov_iter_revert(&direct_iov, cur_len);
3417 continue;
3418 }
3419 break;
3420 }
3421
3422 list_add_tail(&rdata->list, rdata_list);
3423 offset += cur_len;
3424 len -= cur_len;
3425 } while (len > 0);
3426
3427 return rc;
3428 }
3429
3430 static void
3431 collect_uncached_read_data(struct cifs_aio_ctx *ctx)
3432 {
3433 struct cifs_readdata *rdata, *tmp;
3434 struct iov_iter *to = &ctx->iter;
3435 struct cifs_sb_info *cifs_sb;
3436 struct cifs_tcon *tcon;
3437 unsigned int i;
3438 int rc;
3439
3440 tcon = tlink_tcon(ctx->cfile->tlink);
3441 cifs_sb = CIFS_SB(ctx->cfile->dentry->d_sb);
3442
3443 mutex_lock(&ctx->aio_mutex);
3444
3445 if (list_empty(&ctx->list)) {
3446 mutex_unlock(&ctx->aio_mutex);
3447 return;
3448 }
3449
3450 rc = ctx->rc;
3451 /* the loop below should proceed in the order of increasing offsets */
3452 again:
3453 list_for_each_entry_safe(rdata, tmp, &ctx->list, list) {
3454 if (!rc) {
3455 if (!try_wait_for_completion(&rdata->done)) {
3456 mutex_unlock(&ctx->aio_mutex);
3457 return;
3458 }
3459
3460 if (rdata->result == -EAGAIN) {
3461 /* resend call if it's a retryable error */
3462 struct list_head tmp_list;
3463 unsigned int got_bytes = rdata->got_bytes;
3464
3465 list_del_init(&rdata->list);
3466 INIT_LIST_HEAD(&tmp_list);
3467
3468 /*
3469 * Got a part of data and then reconnect has
3470 * happened -- fill the buffer and continue
3471 * reading.
3472 */
3473 if (got_bytes && got_bytes < rdata->bytes) {
3474 rc = 0;
3475 if (!ctx->direct_io)
3476 rc = cifs_readdata_to_iov(rdata, to);
3477 if (rc) {
3478 kref_put(&rdata->refcount,
3479 cifs_uncached_readdata_release);
3480 continue;
3481 }
3482 }
3483
3484 if (ctx->direct_io) {
3485 /*
3486 * Re-use rdata as this is a
3487 * direct I/O
3488 */
3489 rc = cifs_resend_rdata(
3490 rdata,
3491 &tmp_list, ctx);
3492 } else {
3493 rc = cifs_send_async_read(
3494 rdata->offset + got_bytes,
3495 rdata->bytes - got_bytes,
3496 rdata->cfile, cifs_sb,
3497 &tmp_list, ctx);
3498
3499 kref_put(&rdata->refcount,
3500 cifs_uncached_readdata_release);
3501 }
3502
3503 list_splice(&tmp_list, &ctx->list);
3504
3505 goto again;
3506 } else if (rdata->result)
3507 rc = rdata->result;
3508 else if (!ctx->direct_io)
3509 rc = cifs_readdata_to_iov(rdata, to);
3510
3511 /* if there was a short read -- discard anything left */
3512 if (rdata->got_bytes && rdata->got_bytes < rdata->bytes)
3513 rc = -ENODATA;
3514
3515 ctx->total_len += rdata->got_bytes;
3516 }
3517 list_del_init(&rdata->list);
3518 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3519 }
3520
3521 if (!ctx->direct_io) {
3522 for (i = 0; i < ctx->npages; i++) {
3523 if (ctx->should_dirty)
3524 set_page_dirty(ctx->bv[i].bv_page);
3525 put_page(ctx->bv[i].bv_page);
3526 }
3527
3528 ctx->total_len = ctx->len - iov_iter_count(to);
3529 }
3530
3531 cifs_stats_bytes_read(tcon, ctx->total_len);
3532
3533 /* mask nodata case */
3534 if (rc == -ENODATA)
3535 rc = 0;
3536
3537 ctx->rc = (rc == 0) ? ctx->total_len : rc;
3538
3539 mutex_unlock(&ctx->aio_mutex);
3540
3541 if (ctx->iocb && ctx->iocb->ki_complete)
3542 ctx->iocb->ki_complete(ctx->iocb, ctx->rc, 0);
3543 else
3544 complete(&ctx->done);
3545 }
3546
3547 static ssize_t __cifs_readv(
3548 struct kiocb *iocb, struct iov_iter *to, bool direct)
3549 {
3550 size_t len;
3551 struct file *file = iocb->ki_filp;
3552 struct cifs_sb_info *cifs_sb;
3553 struct cifsFileInfo *cfile;
3554 struct cifs_tcon *tcon;
3555 ssize_t rc, total_read = 0;
3556 loff_t offset = iocb->ki_pos;
3557 struct cifs_aio_ctx *ctx;
3558
3559 /*
3560 * iov_iter_get_pages_alloc() doesn't work with ITER_KVEC,
3561 * fall back to data copy read path
3562 * this could be improved by getting pages directly in ITER_KVEC
3563 */
3564 if (direct && to->type & ITER_KVEC) {
3565 cifs_dbg(FYI, "use non-direct cifs_user_readv for kvec I/O\n");
3566 direct = false;
3567 }
3568
3569 len = iov_iter_count(to);
3570 if (!len)
3571 return 0;
3572
3573 cifs_sb = CIFS_FILE_SB(file);
3574 cfile = file->private_data;
3575 tcon = tlink_tcon(cfile->tlink);
3576
3577 if (!tcon->ses->server->ops->async_readv)
3578 return -ENOSYS;
3579
3580 if ((file->f_flags & O_ACCMODE) == O_WRONLY)
3581 cifs_dbg(FYI, "attempting read on write only file instance\n");
3582
3583 ctx = cifs_aio_ctx_alloc();
3584 if (!ctx)
3585 return -ENOMEM;
3586
3587 ctx->cfile = cifsFileInfo_get(cfile);
3588
3589 if (!is_sync_kiocb(iocb))
3590 ctx->iocb = iocb;
3591
3592 if (iter_is_iovec(to))
3593 ctx->should_dirty = true;
3594
3595 if (direct) {
3596 ctx->pos = offset;
3597 ctx->direct_io = true;
3598 ctx->iter = *to;
3599 ctx->len = len;
3600 } else {
3601 rc = setup_aio_ctx_iter(ctx, to, READ);
3602 if (rc) {
3603 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3604 return rc;
3605 }
3606 len = ctx->len;
3607 }
3608
3609 /* grab a lock here due to read response handlers can access ctx */
3610 mutex_lock(&ctx->aio_mutex);
3611
3612 rc = cifs_send_async_read(offset, len, cfile, cifs_sb, &ctx->list, ctx);
3613
3614 /* if at least one read request send succeeded, then reset rc */
3615 if (!list_empty(&ctx->list))
3616 rc = 0;
3617
3618 mutex_unlock(&ctx->aio_mutex);
3619
3620 if (rc) {
3621 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3622 return rc;
3623 }
3624
3625 if (!is_sync_kiocb(iocb)) {
3626 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3627 return -EIOCBQUEUED;
3628 }
3629
3630 rc = wait_for_completion_killable(&ctx->done);
3631 if (rc) {
3632 mutex_lock(&ctx->aio_mutex);
3633 ctx->rc = rc = -EINTR;
3634 total_read = ctx->total_len;
3635 mutex_unlock(&ctx->aio_mutex);
3636 } else {
3637 rc = ctx->rc;
3638 total_read = ctx->total_len;
3639 }
3640
3641 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3642
3643 if (total_read) {
3644 iocb->ki_pos += total_read;
3645 return total_read;
3646 }
3647 return rc;
3648 }
3649
3650 ssize_t cifs_direct_readv(struct kiocb *iocb, struct iov_iter *to)
3651 {
3652 return __cifs_readv(iocb, to, true);
3653 }
3654
3655 ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to)
3656 {
3657 return __cifs_readv(iocb, to, false);
3658 }
3659
3660 ssize_t
3661 cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to)
3662 {
3663 struct inode *inode = file_inode(iocb->ki_filp);
3664 struct cifsInodeInfo *cinode = CIFS_I(inode);
3665 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
3666 struct cifsFileInfo *cfile = (struct cifsFileInfo *)
3667 iocb->ki_filp->private_data;
3668 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3669 int rc = -EACCES;
3670
3671 /*
3672 * In strict cache mode we need to read from the server all the time
3673 * if we don't have level II oplock because the server can delay mtime
3674 * change - so we can't make a decision about inode invalidating.
3675 * And we can also fail with pagereading if there are mandatory locks
3676 * on pages affected by this read but not on the region from pos to
3677 * pos+len-1.
3678 */
3679 if (!CIFS_CACHE_READ(cinode))
3680 return cifs_user_readv(iocb, to);
3681
3682 if (cap_unix(tcon->ses) &&
3683 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
3684 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
3685 return generic_file_read_iter(iocb, to);
3686
3687 /*
3688 * We need to hold the sem to be sure nobody modifies lock list
3689 * with a brlock that prevents reading.
3690 */
3691 down_read(&cinode->lock_sem);
3692 if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(to),
3693 tcon->ses->server->vals->shared_lock_type,
3694 0, NULL, CIFS_READ_OP))
3695 rc = generic_file_read_iter(iocb, to);
3696 up_read(&cinode->lock_sem);
3697 return rc;
3698 }
3699
3700 static ssize_t
3701 cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset)
3702 {
3703 int rc = -EACCES;
3704 unsigned int bytes_read = 0;
3705 unsigned int total_read;
3706 unsigned int current_read_size;
3707 unsigned int rsize;
3708 struct cifs_sb_info *cifs_sb;
3709 struct cifs_tcon *tcon;
3710 struct TCP_Server_Info *server;
3711 unsigned int xid;
3712 char *cur_offset;
3713 struct cifsFileInfo *open_file;
3714 struct cifs_io_parms io_parms;
3715 int buf_type = CIFS_NO_BUFFER;
3716 __u32 pid;
3717
3718 xid = get_xid();
3719 cifs_sb = CIFS_FILE_SB(file);
3720
3721 /* FIXME: set up handlers for larger reads and/or convert to async */
3722 rsize = min_t(unsigned int, cifs_sb->rsize, CIFSMaxBufSize);
3723
3724 if (file->private_data == NULL) {
3725 rc = -EBADF;
3726 free_xid(xid);
3727 return rc;
3728 }
3729 open_file = file->private_data;
3730 tcon = tlink_tcon(open_file->tlink);
3731 server = tcon->ses->server;
3732
3733 if (!server->ops->sync_read) {
3734 free_xid(xid);
3735 return -ENOSYS;
3736 }
3737
3738 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3739 pid = open_file->pid;
3740 else
3741 pid = current->tgid;
3742
3743 if ((file->f_flags & O_ACCMODE) == O_WRONLY)
3744 cifs_dbg(FYI, "attempting read on write only file instance\n");
3745
3746 for (total_read = 0, cur_offset = read_data; read_size > total_read;
3747 total_read += bytes_read, cur_offset += bytes_read) {
3748 do {
3749 current_read_size = min_t(uint, read_size - total_read,
3750 rsize);
3751 /*
3752 * For windows me and 9x we do not want to request more
3753 * than it negotiated since it will refuse the read
3754 * then.
3755 */
3756 if ((tcon->ses) && !(tcon->ses->capabilities &
3757 tcon->ses->server->vals->cap_large_files)) {
3758 current_read_size = min_t(uint,
3759 current_read_size, CIFSMaxBufSize);
3760 }
3761 if (open_file->invalidHandle) {
3762 rc = cifs_reopen_file(open_file, true);
3763 if (rc != 0)
3764 break;
3765 }
3766 io_parms.pid = pid;
3767 io_parms.tcon = tcon;
3768 io_parms.offset = *offset;
3769 io_parms.length = current_read_size;
3770 rc = server->ops->sync_read(xid, &open_file->fid, &io_parms,
3771 &bytes_read, &cur_offset,
3772 &buf_type);
3773 } while (rc == -EAGAIN);
3774
3775 if (rc || (bytes_read == 0)) {
3776 if (total_read) {
3777 break;
3778 } else {
3779 free_xid(xid);
3780 return rc;
3781 }
3782 } else {
3783 cifs_stats_bytes_read(tcon, total_read);
3784 *offset += bytes_read;
3785 }
3786 }
3787 free_xid(xid);
3788 return total_read;
3789 }
3790
3791 /*
3792 * If the page is mmap'ed into a process' page tables, then we need to make
3793 * sure that it doesn't change while being written back.
3794 */
3795 static vm_fault_t
3796 cifs_page_mkwrite(struct vm_fault *vmf)
3797 {
3798 struct page *page = vmf->page;
3799
3800 lock_page(page);
3801 return VM_FAULT_LOCKED;
3802 }
3803
3804 static const struct vm_operations_struct cifs_file_vm_ops = {
3805 .fault = filemap_fault,
3806 .map_pages = filemap_map_pages,
3807 .page_mkwrite = cifs_page_mkwrite,
3808 };
3809
3810 int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
3811 {
3812 int xid, rc = 0;
3813 struct inode *inode = file_inode(file);
3814
3815 xid = get_xid();
3816
3817 if (!CIFS_CACHE_READ(CIFS_I(inode)))
3818 rc = cifs_zap_mapping(inode);
3819 if (!rc)
3820 rc = generic_file_mmap(file, vma);
3821 if (!rc)
3822 vma->vm_ops = &cifs_file_vm_ops;
3823
3824 free_xid(xid);
3825 return rc;
3826 }
3827
3828 int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
3829 {
3830 int rc, xid;
3831
3832 xid = get_xid();
3833
3834 rc = cifs_revalidate_file(file);
3835 if (rc)
3836 cifs_dbg(FYI, "Validation prior to mmap failed, error=%d\n",
3837 rc);
3838 if (!rc)
3839 rc = generic_file_mmap(file, vma);
3840 if (!rc)
3841 vma->vm_ops = &cifs_file_vm_ops;
3842
3843 free_xid(xid);
3844 return rc;
3845 }
3846
3847 static void
3848 cifs_readv_complete(struct work_struct *work)
3849 {
3850 unsigned int i, got_bytes;
3851 struct cifs_readdata *rdata = container_of(work,
3852 struct cifs_readdata, work);
3853
3854 got_bytes = rdata->got_bytes;
3855 for (i = 0; i < rdata->nr_pages; i++) {
3856 struct page *page = rdata->pages[i];
3857
3858 lru_cache_add_file(page);
3859
3860 if (rdata->result == 0 ||
3861 (rdata->result == -EAGAIN && got_bytes)) {
3862 flush_dcache_page(page);
3863 SetPageUptodate(page);
3864 }
3865
3866 unlock_page(page);
3867
3868 if (rdata->result == 0 ||
3869 (rdata->result == -EAGAIN && got_bytes))
3870 cifs_readpage_to_fscache(rdata->mapping->host, page);
3871
3872 got_bytes -= min_t(unsigned int, PAGE_SIZE, got_bytes);
3873
3874 put_page(page);
3875 rdata->pages[i] = NULL;
3876 }
3877 kref_put(&rdata->refcount, cifs_readdata_release);
3878 }
3879
3880 static int
3881 readpages_fill_pages(struct TCP_Server_Info *server,
3882 struct cifs_readdata *rdata, struct iov_iter *iter,
3883 unsigned int len)
3884 {
3885 int result = 0;
3886 unsigned int i;
3887 u64 eof;
3888 pgoff_t eof_index;
3889 unsigned int nr_pages = rdata->nr_pages;
3890 unsigned int page_offset = rdata->page_offset;
3891
3892 /* determine the eof that the server (probably) has */
3893 eof = CIFS_I(rdata->mapping->host)->server_eof;
3894 eof_index = eof ? (eof - 1) >> PAGE_SHIFT : 0;
3895 cifs_dbg(FYI, "eof=%llu eof_index=%lu\n", eof, eof_index);
3896
3897 rdata->got_bytes = 0;
3898 rdata->tailsz = PAGE_SIZE;
3899 for (i = 0; i < nr_pages; i++) {
3900 struct page *page = rdata->pages[i];
3901 unsigned int to_read = rdata->pagesz;
3902 size_t n;
3903
3904 if (i == 0)
3905 to_read -= page_offset;
3906 else
3907 page_offset = 0;
3908
3909 n = to_read;
3910
3911 if (len >= to_read) {
3912 len -= to_read;
3913 } else if (len > 0) {
3914 /* enough for partial page, fill and zero the rest */
3915 zero_user(page, len + page_offset, to_read - len);
3916 n = rdata->tailsz = len;
3917 len = 0;
3918 } else if (page->index > eof_index) {
3919 /*
3920 * The VFS will not try to do readahead past the
3921 * i_size, but it's possible that we have outstanding
3922 * writes with gaps in the middle and the i_size hasn't
3923 * caught up yet. Populate those with zeroed out pages
3924 * to prevent the VFS from repeatedly attempting to
3925 * fill them until the writes are flushed.
3926 */
3927 zero_user(page, 0, PAGE_SIZE);
3928 lru_cache_add_file(page);
3929 flush_dcache_page(page);
3930 SetPageUptodate(page);
3931 unlock_page(page);
3932 put_page(page);
3933 rdata->pages[i] = NULL;
3934 rdata->nr_pages--;
3935 continue;
3936 } else {
3937 /* no need to hold page hostage */
3938 lru_cache_add_file(page);
3939 unlock_page(page);
3940 put_page(page);
3941 rdata->pages[i] = NULL;
3942 rdata->nr_pages--;
3943 continue;
3944 }
3945
3946 if (iter)
3947 result = copy_page_from_iter(
3948 page, page_offset, n, iter);
3949 #ifdef CONFIG_CIFS_SMB_DIRECT
3950 else if (rdata->mr)
3951 result = n;
3952 #endif
3953 else
3954 result = cifs_read_page_from_socket(
3955 server, page, page_offset, n);
3956 if (result < 0)
3957 break;
3958
3959 rdata->got_bytes += result;
3960 }
3961
3962 return rdata->got_bytes > 0 && result != -ECONNABORTED ?
3963 rdata->got_bytes : result;
3964 }
3965
3966 static int
3967 cifs_readpages_read_into_pages(struct TCP_Server_Info *server,
3968 struct cifs_readdata *rdata, unsigned int len)
3969 {
3970 return readpages_fill_pages(server, rdata, NULL, len);
3971 }
3972
3973 static int
3974 cifs_readpages_copy_into_pages(struct TCP_Server_Info *server,
3975 struct cifs_readdata *rdata,
3976 struct iov_iter *iter)
3977 {
3978 return readpages_fill_pages(server, rdata, iter, iter->count);
3979 }
3980
3981 static int
3982 readpages_get_pages(struct address_space *mapping, struct list_head *page_list,
3983 unsigned int rsize, struct list_head *tmplist,
3984 unsigned int *nr_pages, loff_t *offset, unsigned int *bytes)
3985 {
3986 struct page *page, *tpage;
3987 unsigned int expected_index;
3988 int rc;
3989 gfp_t gfp = readahead_gfp_mask(mapping);
3990
3991 INIT_LIST_HEAD(tmplist);
3992
3993 page = lru_to_page(page_list);
3994
3995 /*
3996 * Lock the page and put it in the cache. Since no one else
3997 * should have access to this page, we're safe to simply set
3998 * PG_locked without checking it first.
3999 */
4000 __SetPageLocked(page);
4001 rc = add_to_page_cache_locked(page, mapping,
4002 page->index, gfp);
4003
4004 /* give up if we can't stick it in the cache */
4005 if (rc) {
4006 __ClearPageLocked(page);
4007 return rc;
4008 }
4009
4010 /* move first page to the tmplist */
4011 *offset = (loff_t)page->index << PAGE_SHIFT;
4012 *bytes = PAGE_SIZE;
4013 *nr_pages = 1;
4014 list_move_tail(&page->lru, tmplist);
4015
4016 /* now try and add more pages onto the request */
4017 expected_index = page->index + 1;
4018 list_for_each_entry_safe_reverse(page, tpage, page_list, lru) {
4019 /* discontinuity ? */
4020 if (page->index != expected_index)
4021 break;
4022
4023 /* would this page push the read over the rsize? */
4024 if (*bytes + PAGE_SIZE > rsize)
4025 break;
4026
4027 __SetPageLocked(page);
4028 if (add_to_page_cache_locked(page, mapping, page->index, gfp)) {
4029 __ClearPageLocked(page);
4030 break;
4031 }
4032 list_move_tail(&page->lru, tmplist);
4033 (*bytes) += PAGE_SIZE;
4034 expected_index++;
4035 (*nr_pages)++;
4036 }
4037 return rc;
4038 }
4039
4040 static int cifs_readpages(struct file *file, struct address_space *mapping,
4041 struct list_head *page_list, unsigned num_pages)
4042 {
4043 int rc;
4044 struct list_head tmplist;
4045 struct cifsFileInfo *open_file = file->private_data;
4046 struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
4047 struct TCP_Server_Info *server;
4048 pid_t pid;
4049 unsigned int xid;
4050
4051 xid = get_xid();
4052 /*
4053 * Reads as many pages as possible from fscache. Returns -ENOBUFS
4054 * immediately if the cookie is negative
4055 *
4056 * After this point, every page in the list might have PG_fscache set,
4057 * so we will need to clean that up off of every page we don't use.
4058 */
4059 rc = cifs_readpages_from_fscache(mapping->host, mapping, page_list,
4060 &num_pages);
4061 if (rc == 0) {
4062 free_xid(xid);
4063 return rc;
4064 }
4065
4066 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
4067 pid = open_file->pid;
4068 else
4069 pid = current->tgid;
4070
4071 rc = 0;
4072 server = tlink_tcon(open_file->tlink)->ses->server;
4073
4074 cifs_dbg(FYI, "%s: file=%p mapping=%p num_pages=%u\n",
4075 __func__, file, mapping, num_pages);
4076
4077 /*
4078 * Start with the page at end of list and move it to private
4079 * list. Do the same with any following pages until we hit
4080 * the rsize limit, hit an index discontinuity, or run out of
4081 * pages. Issue the async read and then start the loop again
4082 * until the list is empty.
4083 *
4084 * Note that list order is important. The page_list is in
4085 * the order of declining indexes. When we put the pages in
4086 * the rdata->pages, then we want them in increasing order.
4087 */
4088 while (!list_empty(page_list)) {
4089 unsigned int i, nr_pages, bytes, rsize;
4090 loff_t offset;
4091 struct page *page, *tpage;
4092 struct cifs_readdata *rdata;
4093 unsigned credits;
4094
4095 rc = server->ops->wait_mtu_credits(server, cifs_sb->rsize,
4096 &rsize, &credits);
4097 if (rc)
4098 break;
4099
4100 /*
4101 * Give up immediately if rsize is too small to read an entire
4102 * page. The VFS will fall back to readpage. We should never
4103 * reach this point however since we set ra_pages to 0 when the
4104 * rsize is smaller than a cache page.
4105 */
4106 if (unlikely(rsize < PAGE_SIZE)) {
4107 add_credits_and_wake_if(server, credits, 0);
4108 free_xid(xid);
4109 return 0;
4110 }
4111
4112 rc = readpages_get_pages(mapping, page_list, rsize, &tmplist,
4113 &nr_pages, &offset, &bytes);
4114 if (rc) {
4115 add_credits_and_wake_if(server, credits, 0);
4116 break;
4117 }
4118
4119 rdata = cifs_readdata_alloc(nr_pages, cifs_readv_complete);
4120 if (!rdata) {
4121 /* best to give up if we're out of mem */
4122 list_for_each_entry_safe(page, tpage, &tmplist, lru) {
4123 list_del(&page->lru);
4124 lru_cache_add_file(page);
4125 unlock_page(page);
4126 put_page(page);
4127 }
4128 rc = -ENOMEM;
4129 add_credits_and_wake_if(server, credits, 0);
4130 break;
4131 }
4132
4133 rdata->cfile = cifsFileInfo_get(open_file);
4134 rdata->mapping = mapping;
4135 rdata->offset = offset;
4136 rdata->bytes = bytes;
4137 rdata->pid = pid;
4138 rdata->pagesz = PAGE_SIZE;
4139 rdata->tailsz = PAGE_SIZE;
4140 rdata->read_into_pages = cifs_readpages_read_into_pages;
4141 rdata->copy_into_pages = cifs_readpages_copy_into_pages;
4142 rdata->credits = credits;
4143
4144 list_for_each_entry_safe(page, tpage, &tmplist, lru) {
4145 list_del(&page->lru);
4146 rdata->pages[rdata->nr_pages++] = page;
4147 }
4148
4149 if (!rdata->cfile->invalidHandle ||
4150 !(rc = cifs_reopen_file(rdata->cfile, true)))
4151 rc = server->ops->async_readv(rdata);
4152 if (rc) {
4153 add_credits_and_wake_if(server, rdata->credits, 0);
4154 for (i = 0; i < rdata->nr_pages; i++) {
4155 page = rdata->pages[i];
4156 lru_cache_add_file(page);
4157 unlock_page(page);
4158 put_page(page);
4159 }
4160 /* Fallback to the readpage in error/reconnect cases */
4161 kref_put(&rdata->refcount, cifs_readdata_release);
4162 break;
4163 }
4164
4165 kref_put(&rdata->refcount, cifs_readdata_release);
4166 }
4167
4168 /* Any pages that have been shown to fscache but didn't get added to
4169 * the pagecache must be uncached before they get returned to the
4170 * allocator.
4171 */
4172 cifs_fscache_readpages_cancel(mapping->host, page_list);
4173 free_xid(xid);
4174 return rc;
4175 }
4176
4177 /*
4178 * cifs_readpage_worker must be called with the page pinned
4179 */
4180 static int cifs_readpage_worker(struct file *file, struct page *page,
4181 loff_t *poffset)
4182 {
4183 char *read_data;
4184 int rc;
4185
4186 /* Is the page cached? */
4187 rc = cifs_readpage_from_fscache(file_inode(file), page);
4188 if (rc == 0)
4189 goto read_complete;
4190
4191 read_data = kmap(page);
4192 /* for reads over a certain size could initiate async read ahead */
4193
4194 rc = cifs_read(file, read_data, PAGE_SIZE, poffset);
4195
4196 if (rc < 0)
4197 goto io_error;
4198 else
4199 cifs_dbg(FYI, "Bytes read %d\n", rc);
4200
4201 /* we do not want atime to be less than mtime, it broke some apps */
4202 file_inode(file)->i_atime = current_time(file_inode(file));
4203 if (timespec64_compare(&(file_inode(file)->i_atime), &(file_inode(file)->i_mtime)))
4204 file_inode(file)->i_atime = file_inode(file)->i_mtime;
4205 else
4206 file_inode(file)->i_atime = current_time(file_inode(file));
4207
4208 if (PAGE_SIZE > rc)
4209 memset(read_data + rc, 0, PAGE_SIZE - rc);
4210
4211 flush_dcache_page(page);
4212 SetPageUptodate(page);
4213
4214 /* send this page to the cache */
4215 cifs_readpage_to_fscache(file_inode(file), page);
4216
4217 rc = 0;
4218
4219 io_error:
4220 kunmap(page);
4221 unlock_page(page);
4222
4223 read_complete:
4224 return rc;
4225 }
4226
4227 static int cifs_readpage(struct file *file, struct page *page)
4228 {
4229 loff_t offset = (loff_t)page->index << PAGE_SHIFT;
4230 int rc = -EACCES;
4231 unsigned int xid;
4232
4233 xid = get_xid();
4234
4235 if (file->private_data == NULL) {
4236 rc = -EBADF;
4237 free_xid(xid);
4238 return rc;
4239 }
4240
4241 cifs_dbg(FYI, "readpage %p at offset %d 0x%x\n",
4242 page, (int)offset, (int)offset);
4243
4244 rc = cifs_readpage_worker(file, page, &offset);
4245
4246 free_xid(xid);
4247 return rc;
4248 }
4249
4250 static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
4251 {
4252 struct cifsFileInfo *open_file;
4253 struct cifs_tcon *tcon =
4254 cifs_sb_master_tcon(CIFS_SB(cifs_inode->vfs_inode.i_sb));
4255
4256 spin_lock(&tcon->open_file_lock);
4257 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
4258 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
4259 spin_unlock(&tcon->open_file_lock);
4260 return 1;
4261 }
4262 }
4263 spin_unlock(&tcon->open_file_lock);
4264 return 0;
4265 }
4266
4267 /* We do not want to update the file size from server for inodes
4268 open for write - to avoid races with writepage extending
4269 the file - in the future we could consider allowing
4270 refreshing the inode only on increases in the file size
4271 but this is tricky to do without racing with writebehind
4272 page caching in the current Linux kernel design */
4273 bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file)
4274 {
4275 if (!cifsInode)
4276 return true;
4277
4278 if (is_inode_writable(cifsInode)) {
4279 /* This inode is open for write at least once */
4280 struct cifs_sb_info *cifs_sb;
4281
4282 cifs_sb = CIFS_SB(cifsInode->vfs_inode.i_sb);
4283 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
4284 /* since no page cache to corrupt on directio
4285 we can change size safely */
4286 return true;
4287 }
4288
4289 if (i_size_read(&cifsInode->vfs_inode) < end_of_file)
4290 return true;
4291
4292 return false;
4293 } else
4294 return true;
4295 }
4296
4297 static int cifs_write_begin(struct file *file, struct address_space *mapping,
4298 loff_t pos, unsigned len, unsigned flags,
4299 struct page **pagep, void **fsdata)
4300 {
4301 int oncethru = 0;
4302 pgoff_t index = pos >> PAGE_SHIFT;
4303 loff_t offset = pos & (PAGE_SIZE - 1);
4304 loff_t page_start = pos & PAGE_MASK;
4305 loff_t i_size;
4306 struct page *page;
4307 int rc = 0;
4308
4309 cifs_dbg(FYI, "write_begin from %lld len %d\n", (long long)pos, len);
4310
4311 start:
4312 page = grab_cache_page_write_begin(mapping, index, flags);
4313 if (!page) {
4314 rc = -ENOMEM;
4315 goto out;
4316 }
4317
4318 if (PageUptodate(page))
4319 goto out;
4320
4321 /*
4322 * If we write a full page it will be up to date, no need to read from
4323 * the server. If the write is short, we'll end up doing a sync write
4324 * instead.
4325 */
4326 if (len == PAGE_SIZE)
4327 goto out;
4328
4329 /*
4330 * optimize away the read when we have an oplock, and we're not
4331 * expecting to use any of the data we'd be reading in. That
4332 * is, when the page lies beyond the EOF, or straddles the EOF
4333 * and the write will cover all of the existing data.
4334 */
4335 if (CIFS_CACHE_READ(CIFS_I(mapping->host))) {
4336 i_size = i_size_read(mapping->host);
4337 if (page_start >= i_size ||
4338 (offset == 0 && (pos + len) >= i_size)) {
4339 zero_user_segments(page, 0, offset,
4340 offset + len,
4341 PAGE_SIZE);
4342 /*
4343 * PageChecked means that the parts of the page
4344 * to which we're not writing are considered up
4345 * to date. Once the data is copied to the
4346 * page, it can be set uptodate.
4347 */
4348 SetPageChecked(page);
4349 goto out;
4350 }
4351 }
4352
4353 if ((file->f_flags & O_ACCMODE) != O_WRONLY && !oncethru) {
4354 /*
4355 * might as well read a page, it is fast enough. If we get
4356 * an error, we don't need to return it. cifs_write_end will
4357 * do a sync write instead since PG_uptodate isn't set.
4358 */
4359 cifs_readpage_worker(file, page, &page_start);
4360 put_page(page);
4361 oncethru = 1;
4362 goto start;
4363 } else {
4364 /* we could try using another file handle if there is one -
4365 but how would we lock it to prevent close of that handle
4366 racing with this read? In any case
4367 this will be written out by write_end so is fine */
4368 }
4369 out:
4370 *pagep = page;
4371 return rc;
4372 }
4373
4374 static int cifs_release_page(struct page *page, gfp_t gfp)
4375 {
4376 if (PagePrivate(page))
4377 return 0;
4378
4379 return cifs_fscache_release_page(page, gfp);
4380 }
4381
4382 static void cifs_invalidate_page(struct page *page, unsigned int offset,
4383 unsigned int length)
4384 {
4385 struct cifsInodeInfo *cifsi = CIFS_I(page->mapping->host);
4386
4387 if (offset == 0 && length == PAGE_SIZE)
4388 cifs_fscache_invalidate_page(page, &cifsi->vfs_inode);
4389 }
4390
4391 static int cifs_launder_page(struct page *page)
4392 {
4393 int rc = 0;
4394 loff_t range_start = page_offset(page);
4395 loff_t range_end = range_start + (loff_t)(PAGE_SIZE - 1);
4396 struct writeback_control wbc = {
4397 .sync_mode = WB_SYNC_ALL,
4398 .nr_to_write = 0,
4399 .range_start = range_start,
4400 .range_end = range_end,
4401 };
4402
4403 cifs_dbg(FYI, "Launder page: %p\n", page);
4404
4405 if (clear_page_dirty_for_io(page))
4406 rc = cifs_writepage_locked(page, &wbc);
4407
4408 cifs_fscache_invalidate_page(page, page->mapping->host);
4409 return rc;
4410 }
4411
4412 void cifs_oplock_break(struct work_struct *work)
4413 {
4414 struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
4415 oplock_break);
4416 struct inode *inode = d_inode(cfile->dentry);
4417 struct cifsInodeInfo *cinode = CIFS_I(inode);
4418 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
4419 struct TCP_Server_Info *server = tcon->ses->server;
4420 int rc = 0;
4421
4422 wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS,
4423 TASK_UNINTERRUPTIBLE);
4424
4425 server->ops->downgrade_oplock(server, cinode,
4426 test_bit(CIFS_INODE_DOWNGRADE_OPLOCK_TO_L2, &cinode->flags));
4427
4428 if (!CIFS_CACHE_WRITE(cinode) && CIFS_CACHE_READ(cinode) &&
4429 cifs_has_mand_locks(cinode)) {
4430 cifs_dbg(FYI, "Reset oplock to None for inode=%p due to mand locks\n",
4431 inode);
4432 cinode->oplock = 0;
4433 }
4434
4435 if (inode && S_ISREG(inode->i_mode)) {
4436 if (CIFS_CACHE_READ(cinode))
4437 break_lease(inode, O_RDONLY);
4438 else
4439 break_lease(inode, O_WRONLY);
4440 rc = filemap_fdatawrite(inode->i_mapping);
4441 if (!CIFS_CACHE_READ(cinode)) {
4442 rc = filemap_fdatawait(inode->i_mapping);
4443 mapping_set_error(inode->i_mapping, rc);
4444 cifs_zap_mapping(inode);
4445 }
4446 cifs_dbg(FYI, "Oplock flush inode %p rc %d\n", inode, rc);
4447 }
4448
4449 rc = cifs_push_locks(cfile);
4450 if (rc)
4451 cifs_dbg(VFS, "Push locks rc = %d\n", rc);
4452
4453 /*
4454 * releasing stale oplock after recent reconnect of smb session using
4455 * a now incorrect file handle is not a data integrity issue but do
4456 * not bother sending an oplock release if session to server still is
4457 * disconnected since oplock already released by the server
4458 */
4459 if (!cfile->oplock_break_cancelled) {
4460 rc = tcon->ses->server->ops->oplock_response(tcon, &cfile->fid,
4461 cinode);
4462 cifs_dbg(FYI, "Oplock release rc = %d\n", rc);
4463 }
4464 cifs_done_oplock_break(cinode);
4465 }
4466
4467 /*
4468 * The presence of cifs_direct_io() in the address space ops vector
4469 * allowes open() O_DIRECT flags which would have failed otherwise.
4470 *
4471 * In the non-cached mode (mount with cache=none), we shunt off direct read and write requests
4472 * so this method should never be called.
4473 *
4474 * Direct IO is not yet supported in the cached mode.
4475 */
4476 static ssize_t
4477 cifs_direct_io(struct kiocb *iocb, struct iov_iter *iter)
4478 {
4479 /*
4480 * FIXME
4481 * Eventually need to support direct IO for non forcedirectio mounts
4482 */
4483 return -EINVAL;
4484 }
4485
4486
4487 const struct address_space_operations cifs_addr_ops = {
4488 .readpage = cifs_readpage,
4489 .readpages = cifs_readpages,
4490 .writepage = cifs_writepage,
4491 .writepages = cifs_writepages,
4492 .write_begin = cifs_write_begin,
4493 .write_end = cifs_write_end,
4494 .set_page_dirty = __set_page_dirty_nobuffers,
4495 .releasepage = cifs_release_page,
4496 .direct_IO = cifs_direct_io,
4497 .invalidatepage = cifs_invalidate_page,
4498 .launder_page = cifs_launder_page,
4499 };
4500
4501 /*
4502 * cifs_readpages requires the server to support a buffer large enough to
4503 * contain the header plus one complete page of data. Otherwise, we need
4504 * to leave cifs_readpages out of the address space operations.
4505 */
4506 const struct address_space_operations cifs_addr_ops_smallbuf = {
4507 .readpage = cifs_readpage,
4508 .writepage = cifs_writepage,
4509 .writepages = cifs_writepages,
4510 .write_begin = cifs_write_begin,
4511 .write_end = cifs_write_end,
4512 .set_page_dirty = __set_page_dirty_nobuffers,
4513 .releasepage = cifs_release_page,
4514 .invalidatepage = cifs_invalidate_page,
4515 .launder_page = cifs_launder_page,
4516 };