]> git.proxmox.com Git - mirror_ubuntu-focal-kernel.git/blob - fs/cifs/file.c
Merge branch 'efi-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel...
[mirror_ubuntu-focal-kernel.git] / fs / cifs / file.c
1 /*
2 * fs/cifs/file.c
3 *
4 * vfs operations that deal with files
5 *
6 * Copyright (C) International Business Machines Corp., 2002,2010
7 * Author(s): Steve French (sfrench@us.ibm.com)
8 * Jeremy Allison (jra@samba.org)
9 *
10 * This library is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU Lesser General Public License as published
12 * by the Free Software Foundation; either version 2.1 of the License, or
13 * (at your option) any later version.
14 *
15 * This library is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
18 * the GNU Lesser General Public License for more details.
19 *
20 * You should have received a copy of the GNU Lesser General Public License
21 * along with this library; if not, write to the Free Software
22 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 */
24 #include <linux/fs.h>
25 #include <linux/backing-dev.h>
26 #include <linux/stat.h>
27 #include <linux/fcntl.h>
28 #include <linux/pagemap.h>
29 #include <linux/pagevec.h>
30 #include <linux/writeback.h>
31 #include <linux/task_io_accounting_ops.h>
32 #include <linux/delay.h>
33 #include <linux/mount.h>
34 #include <linux/slab.h>
35 #include <linux/swap.h>
36 #include <linux/mm.h>
37 #include <asm/div64.h>
38 #include "cifsfs.h"
39 #include "cifspdu.h"
40 #include "cifsglob.h"
41 #include "cifsproto.h"
42 #include "cifs_unicode.h"
43 #include "cifs_debug.h"
44 #include "cifs_fs_sb.h"
45 #include "fscache.h"
46 #include "smbdirect.h"
47
48 static inline int cifs_convert_flags(unsigned int flags)
49 {
50 if ((flags & O_ACCMODE) == O_RDONLY)
51 return GENERIC_READ;
52 else if ((flags & O_ACCMODE) == O_WRONLY)
53 return GENERIC_WRITE;
54 else if ((flags & O_ACCMODE) == O_RDWR) {
55 /* GENERIC_ALL is too much permission to request
56 can cause unnecessary access denied on create */
57 /* return GENERIC_ALL; */
58 return (GENERIC_READ | GENERIC_WRITE);
59 }
60
61 return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
62 FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
63 FILE_READ_DATA);
64 }
65
66 static u32 cifs_posix_convert_flags(unsigned int flags)
67 {
68 u32 posix_flags = 0;
69
70 if ((flags & O_ACCMODE) == O_RDONLY)
71 posix_flags = SMB_O_RDONLY;
72 else if ((flags & O_ACCMODE) == O_WRONLY)
73 posix_flags = SMB_O_WRONLY;
74 else if ((flags & O_ACCMODE) == O_RDWR)
75 posix_flags = SMB_O_RDWR;
76
77 if (flags & O_CREAT) {
78 posix_flags |= SMB_O_CREAT;
79 if (flags & O_EXCL)
80 posix_flags |= SMB_O_EXCL;
81 } else if (flags & O_EXCL)
82 cifs_dbg(FYI, "Application %s pid %d has incorrectly set O_EXCL flag but not O_CREAT on file open. Ignoring O_EXCL\n",
83 current->comm, current->tgid);
84
85 if (flags & O_TRUNC)
86 posix_flags |= SMB_O_TRUNC;
87 /* be safe and imply O_SYNC for O_DSYNC */
88 if (flags & O_DSYNC)
89 posix_flags |= SMB_O_SYNC;
90 if (flags & O_DIRECTORY)
91 posix_flags |= SMB_O_DIRECTORY;
92 if (flags & O_NOFOLLOW)
93 posix_flags |= SMB_O_NOFOLLOW;
94 if (flags & O_DIRECT)
95 posix_flags |= SMB_O_DIRECT;
96
97 return posix_flags;
98 }
99
100 static inline int cifs_get_disposition(unsigned int flags)
101 {
102 if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
103 return FILE_CREATE;
104 else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
105 return FILE_OVERWRITE_IF;
106 else if ((flags & O_CREAT) == O_CREAT)
107 return FILE_OPEN_IF;
108 else if ((flags & O_TRUNC) == O_TRUNC)
109 return FILE_OVERWRITE;
110 else
111 return FILE_OPEN;
112 }
113
114 int cifs_posix_open(char *full_path, struct inode **pinode,
115 struct super_block *sb, int mode, unsigned int f_flags,
116 __u32 *poplock, __u16 *pnetfid, unsigned int xid)
117 {
118 int rc;
119 FILE_UNIX_BASIC_INFO *presp_data;
120 __u32 posix_flags = 0;
121 struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
122 struct cifs_fattr fattr;
123 struct tcon_link *tlink;
124 struct cifs_tcon *tcon;
125
126 cifs_dbg(FYI, "posix open %s\n", full_path);
127
128 presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL);
129 if (presp_data == NULL)
130 return -ENOMEM;
131
132 tlink = cifs_sb_tlink(cifs_sb);
133 if (IS_ERR(tlink)) {
134 rc = PTR_ERR(tlink);
135 goto posix_open_ret;
136 }
137
138 tcon = tlink_tcon(tlink);
139 mode &= ~current_umask();
140
141 posix_flags = cifs_posix_convert_flags(f_flags);
142 rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data,
143 poplock, full_path, cifs_sb->local_nls,
144 cifs_remap(cifs_sb));
145 cifs_put_tlink(tlink);
146
147 if (rc)
148 goto posix_open_ret;
149
150 if (presp_data->Type == cpu_to_le32(-1))
151 goto posix_open_ret; /* open ok, caller does qpathinfo */
152
153 if (!pinode)
154 goto posix_open_ret; /* caller does not need info */
155
156 cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb);
157
158 /* get new inode and set it up */
159 if (*pinode == NULL) {
160 cifs_fill_uniqueid(sb, &fattr);
161 *pinode = cifs_iget(sb, &fattr);
162 if (!*pinode) {
163 rc = -ENOMEM;
164 goto posix_open_ret;
165 }
166 } else {
167 cifs_fattr_to_inode(*pinode, &fattr);
168 }
169
170 posix_open_ret:
171 kfree(presp_data);
172 return rc;
173 }
174
175 static int
176 cifs_nt_open(char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
177 struct cifs_tcon *tcon, unsigned int f_flags, __u32 *oplock,
178 struct cifs_fid *fid, unsigned int xid)
179 {
180 int rc;
181 int desired_access;
182 int disposition;
183 int create_options = CREATE_NOT_DIR;
184 FILE_ALL_INFO *buf;
185 struct TCP_Server_Info *server = tcon->ses->server;
186 struct cifs_open_parms oparms;
187
188 if (!server->ops->open)
189 return -ENOSYS;
190
191 desired_access = cifs_convert_flags(f_flags);
192
193 /*********************************************************************
194 * open flag mapping table:
195 *
196 * POSIX Flag CIFS Disposition
197 * ---------- ----------------
198 * O_CREAT FILE_OPEN_IF
199 * O_CREAT | O_EXCL FILE_CREATE
200 * O_CREAT | O_TRUNC FILE_OVERWRITE_IF
201 * O_TRUNC FILE_OVERWRITE
202 * none of the above FILE_OPEN
203 *
204 * Note that there is not a direct match between disposition
205 * FILE_SUPERSEDE (ie create whether or not file exists although
206 * O_CREAT | O_TRUNC is similar but truncates the existing
207 * file rather than creating a new file as FILE_SUPERSEDE does
208 * (which uses the attributes / metadata passed in on open call)
209 *?
210 *? O_SYNC is a reasonable match to CIFS writethrough flag
211 *? and the read write flags match reasonably. O_LARGEFILE
212 *? is irrelevant because largefile support is always used
213 *? by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
214 * O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
215 *********************************************************************/
216
217 disposition = cifs_get_disposition(f_flags);
218
219 /* BB pass O_SYNC flag through on file attributes .. BB */
220
221 buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
222 if (!buf)
223 return -ENOMEM;
224
225 if (backup_cred(cifs_sb))
226 create_options |= CREATE_OPEN_BACKUP_INTENT;
227
228 /* O_SYNC also has bit for O_DSYNC so following check picks up either */
229 if (f_flags & O_SYNC)
230 create_options |= CREATE_WRITE_THROUGH;
231
232 if (f_flags & O_DIRECT)
233 create_options |= CREATE_NO_BUFFER;
234
235 oparms.tcon = tcon;
236 oparms.cifs_sb = cifs_sb;
237 oparms.desired_access = desired_access;
238 oparms.create_options = create_options;
239 oparms.disposition = disposition;
240 oparms.path = full_path;
241 oparms.fid = fid;
242 oparms.reconnect = false;
243
244 rc = server->ops->open(xid, &oparms, oplock, buf);
245
246 if (rc)
247 goto out;
248
249 if (tcon->unix_ext)
250 rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
251 xid);
252 else
253 rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
254 xid, fid);
255
256 if (rc) {
257 server->ops->close(xid, tcon, fid);
258 if (rc == -ESTALE)
259 rc = -EOPENSTALE;
260 }
261
262 out:
263 kfree(buf);
264 return rc;
265 }
266
267 static bool
268 cifs_has_mand_locks(struct cifsInodeInfo *cinode)
269 {
270 struct cifs_fid_locks *cur;
271 bool has_locks = false;
272
273 down_read(&cinode->lock_sem);
274 list_for_each_entry(cur, &cinode->llist, llist) {
275 if (!list_empty(&cur->locks)) {
276 has_locks = true;
277 break;
278 }
279 }
280 up_read(&cinode->lock_sem);
281 return has_locks;
282 }
283
284 struct cifsFileInfo *
285 cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
286 struct tcon_link *tlink, __u32 oplock)
287 {
288 struct dentry *dentry = file_dentry(file);
289 struct inode *inode = d_inode(dentry);
290 struct cifsInodeInfo *cinode = CIFS_I(inode);
291 struct cifsFileInfo *cfile;
292 struct cifs_fid_locks *fdlocks;
293 struct cifs_tcon *tcon = tlink_tcon(tlink);
294 struct TCP_Server_Info *server = tcon->ses->server;
295
296 cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
297 if (cfile == NULL)
298 return cfile;
299
300 fdlocks = kzalloc(sizeof(struct cifs_fid_locks), GFP_KERNEL);
301 if (!fdlocks) {
302 kfree(cfile);
303 return NULL;
304 }
305
306 INIT_LIST_HEAD(&fdlocks->locks);
307 fdlocks->cfile = cfile;
308 cfile->llist = fdlocks;
309 down_write(&cinode->lock_sem);
310 list_add(&fdlocks->llist, &cinode->llist);
311 up_write(&cinode->lock_sem);
312
313 cfile->count = 1;
314 cfile->pid = current->tgid;
315 cfile->uid = current_fsuid();
316 cfile->dentry = dget(dentry);
317 cfile->f_flags = file->f_flags;
318 cfile->invalidHandle = false;
319 cfile->tlink = cifs_get_tlink(tlink);
320 INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
321 mutex_init(&cfile->fh_mutex);
322 spin_lock_init(&cfile->file_info_lock);
323
324 cifs_sb_active(inode->i_sb);
325
326 /*
327 * If the server returned a read oplock and we have mandatory brlocks,
328 * set oplock level to None.
329 */
330 if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
331 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
332 oplock = 0;
333 }
334
335 spin_lock(&tcon->open_file_lock);
336 if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE && oplock)
337 oplock = fid->pending_open->oplock;
338 list_del(&fid->pending_open->olist);
339
340 fid->purge_cache = false;
341 server->ops->set_fid(cfile, fid, oplock);
342
343 list_add(&cfile->tlist, &tcon->openFileList);
344 atomic_inc(&tcon->num_local_opens);
345
346 /* if readable file instance put first in list*/
347 spin_lock(&cinode->open_file_lock);
348 if (file->f_mode & FMODE_READ)
349 list_add(&cfile->flist, &cinode->openFileList);
350 else
351 list_add_tail(&cfile->flist, &cinode->openFileList);
352 spin_unlock(&cinode->open_file_lock);
353 spin_unlock(&tcon->open_file_lock);
354
355 if (fid->purge_cache)
356 cifs_zap_mapping(inode);
357
358 file->private_data = cfile;
359 return cfile;
360 }
361
362 struct cifsFileInfo *
363 cifsFileInfo_get(struct cifsFileInfo *cifs_file)
364 {
365 spin_lock(&cifs_file->file_info_lock);
366 cifsFileInfo_get_locked(cifs_file);
367 spin_unlock(&cifs_file->file_info_lock);
368 return cifs_file;
369 }
370
371 /**
372 * cifsFileInfo_put - release a reference of file priv data
373 *
374 * Always potentially wait for oplock handler. See _cifsFileInfo_put().
375 */
376 void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
377 {
378 _cifsFileInfo_put(cifs_file, true);
379 }
380
381 /**
382 * _cifsFileInfo_put - release a reference of file priv data
383 *
384 * This may involve closing the filehandle @cifs_file out on the
385 * server. Must be called without holding tcon->open_file_lock and
386 * cifs_file->file_info_lock.
387 *
388 * If @wait_for_oplock_handler is true and we are releasing the last
389 * reference, wait for any running oplock break handler of the file
390 * and cancel any pending one. If calling this function from the
391 * oplock break handler, you need to pass false.
392 *
393 */
394 void _cifsFileInfo_put(struct cifsFileInfo *cifs_file, bool wait_oplock_handler)
395 {
396 struct inode *inode = d_inode(cifs_file->dentry);
397 struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
398 struct TCP_Server_Info *server = tcon->ses->server;
399 struct cifsInodeInfo *cifsi = CIFS_I(inode);
400 struct super_block *sb = inode->i_sb;
401 struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
402 struct cifsLockInfo *li, *tmp;
403 struct cifs_fid fid;
404 struct cifs_pending_open open;
405 bool oplock_break_cancelled;
406
407 spin_lock(&tcon->open_file_lock);
408
409 spin_lock(&cifs_file->file_info_lock);
410 if (--cifs_file->count > 0) {
411 spin_unlock(&cifs_file->file_info_lock);
412 spin_unlock(&tcon->open_file_lock);
413 return;
414 }
415 spin_unlock(&cifs_file->file_info_lock);
416
417 if (server->ops->get_lease_key)
418 server->ops->get_lease_key(inode, &fid);
419
420 /* store open in pending opens to make sure we don't miss lease break */
421 cifs_add_pending_open_locked(&fid, cifs_file->tlink, &open);
422
423 /* remove it from the lists */
424 spin_lock(&cifsi->open_file_lock);
425 list_del(&cifs_file->flist);
426 spin_unlock(&cifsi->open_file_lock);
427 list_del(&cifs_file->tlist);
428 atomic_dec(&tcon->num_local_opens);
429
430 if (list_empty(&cifsi->openFileList)) {
431 cifs_dbg(FYI, "closing last open instance for inode %p\n",
432 d_inode(cifs_file->dentry));
433 /*
434 * In strict cache mode we need invalidate mapping on the last
435 * close because it may cause a error when we open this file
436 * again and get at least level II oplock.
437 */
438 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)
439 set_bit(CIFS_INO_INVALID_MAPPING, &cifsi->flags);
440 cifs_set_oplock_level(cifsi, 0);
441 }
442
443 spin_unlock(&tcon->open_file_lock);
444
445 oplock_break_cancelled = wait_oplock_handler ?
446 cancel_work_sync(&cifs_file->oplock_break) : false;
447
448 if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
449 struct TCP_Server_Info *server = tcon->ses->server;
450 unsigned int xid;
451
452 xid = get_xid();
453 if (server->ops->close)
454 server->ops->close(xid, tcon, &cifs_file->fid);
455 _free_xid(xid);
456 }
457
458 if (oplock_break_cancelled)
459 cifs_done_oplock_break(cifsi);
460
461 cifs_del_pending_open(&open);
462
463 /*
464 * Delete any outstanding lock records. We'll lose them when the file
465 * is closed anyway.
466 */
467 down_write(&cifsi->lock_sem);
468 list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) {
469 list_del(&li->llist);
470 cifs_del_lock_waiters(li);
471 kfree(li);
472 }
473 list_del(&cifs_file->llist->llist);
474 kfree(cifs_file->llist);
475 up_write(&cifsi->lock_sem);
476
477 cifs_put_tlink(cifs_file->tlink);
478 dput(cifs_file->dentry);
479 cifs_sb_deactive(sb);
480 kfree(cifs_file);
481 }
482
483 int cifs_open(struct inode *inode, struct file *file)
484
485 {
486 int rc = -EACCES;
487 unsigned int xid;
488 __u32 oplock;
489 struct cifs_sb_info *cifs_sb;
490 struct TCP_Server_Info *server;
491 struct cifs_tcon *tcon;
492 struct tcon_link *tlink;
493 struct cifsFileInfo *cfile = NULL;
494 char *full_path = NULL;
495 bool posix_open_ok = false;
496 struct cifs_fid fid;
497 struct cifs_pending_open open;
498
499 xid = get_xid();
500
501 cifs_sb = CIFS_SB(inode->i_sb);
502 tlink = cifs_sb_tlink(cifs_sb);
503 if (IS_ERR(tlink)) {
504 free_xid(xid);
505 return PTR_ERR(tlink);
506 }
507 tcon = tlink_tcon(tlink);
508 server = tcon->ses->server;
509
510 full_path = build_path_from_dentry(file_dentry(file));
511 if (full_path == NULL) {
512 rc = -ENOMEM;
513 goto out;
514 }
515
516 cifs_dbg(FYI, "inode = 0x%p file flags are 0x%x for %s\n",
517 inode, file->f_flags, full_path);
518
519 if (file->f_flags & O_DIRECT &&
520 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) {
521 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
522 file->f_op = &cifs_file_direct_nobrl_ops;
523 else
524 file->f_op = &cifs_file_direct_ops;
525 }
526
527 if (server->oplocks)
528 oplock = REQ_OPLOCK;
529 else
530 oplock = 0;
531
532 if (!tcon->broken_posix_open && tcon->unix_ext &&
533 cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP &
534 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
535 /* can not refresh inode info since size could be stale */
536 rc = cifs_posix_open(full_path, &inode, inode->i_sb,
537 cifs_sb->mnt_file_mode /* ignored */,
538 file->f_flags, &oplock, &fid.netfid, xid);
539 if (rc == 0) {
540 cifs_dbg(FYI, "posix open succeeded\n");
541 posix_open_ok = true;
542 } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
543 if (tcon->ses->serverNOS)
544 cifs_dbg(VFS, "server %s of type %s returned unexpected error on SMB posix open, disabling posix open support. Check if server update available.\n",
545 tcon->ses->serverName,
546 tcon->ses->serverNOS);
547 tcon->broken_posix_open = true;
548 } else if ((rc != -EIO) && (rc != -EREMOTE) &&
549 (rc != -EOPNOTSUPP)) /* path not found or net err */
550 goto out;
551 /*
552 * Else fallthrough to retry open the old way on network i/o
553 * or DFS errors.
554 */
555 }
556
557 if (server->ops->get_lease_key)
558 server->ops->get_lease_key(inode, &fid);
559
560 cifs_add_pending_open(&fid, tlink, &open);
561
562 if (!posix_open_ok) {
563 if (server->ops->get_lease_key)
564 server->ops->get_lease_key(inode, &fid);
565
566 rc = cifs_nt_open(full_path, inode, cifs_sb, tcon,
567 file->f_flags, &oplock, &fid, xid);
568 if (rc) {
569 cifs_del_pending_open(&open);
570 goto out;
571 }
572 }
573
574 cfile = cifs_new_fileinfo(&fid, file, tlink, oplock);
575 if (cfile == NULL) {
576 if (server->ops->close)
577 server->ops->close(xid, tcon, &fid);
578 cifs_del_pending_open(&open);
579 rc = -ENOMEM;
580 goto out;
581 }
582
583 cifs_fscache_set_inode_cookie(inode, file);
584
585 if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) {
586 /*
587 * Time to set mode which we can not set earlier due to
588 * problems creating new read-only files.
589 */
590 struct cifs_unix_set_info_args args = {
591 .mode = inode->i_mode,
592 .uid = INVALID_UID, /* no change */
593 .gid = INVALID_GID, /* no change */
594 .ctime = NO_CHANGE_64,
595 .atime = NO_CHANGE_64,
596 .mtime = NO_CHANGE_64,
597 .device = 0,
598 };
599 CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid.netfid,
600 cfile->pid);
601 }
602
603 out:
604 kfree(full_path);
605 free_xid(xid);
606 cifs_put_tlink(tlink);
607 return rc;
608 }
609
610 static int cifs_push_posix_locks(struct cifsFileInfo *cfile);
611
612 /*
613 * Try to reacquire byte range locks that were released when session
614 * to server was lost.
615 */
616 static int
617 cifs_relock_file(struct cifsFileInfo *cfile)
618 {
619 struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
620 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
621 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
622 int rc = 0;
623
624 down_read_nested(&cinode->lock_sem, SINGLE_DEPTH_NESTING);
625 if (cinode->can_cache_brlcks) {
626 /* can cache locks - no need to relock */
627 up_read(&cinode->lock_sem);
628 return rc;
629 }
630
631 if (cap_unix(tcon->ses) &&
632 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
633 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
634 rc = cifs_push_posix_locks(cfile);
635 else
636 rc = tcon->ses->server->ops->push_mand_locks(cfile);
637
638 up_read(&cinode->lock_sem);
639 return rc;
640 }
641
642 static int
643 cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
644 {
645 int rc = -EACCES;
646 unsigned int xid;
647 __u32 oplock;
648 struct cifs_sb_info *cifs_sb;
649 struct cifs_tcon *tcon;
650 struct TCP_Server_Info *server;
651 struct cifsInodeInfo *cinode;
652 struct inode *inode;
653 char *full_path = NULL;
654 int desired_access;
655 int disposition = FILE_OPEN;
656 int create_options = CREATE_NOT_DIR;
657 struct cifs_open_parms oparms;
658
659 xid = get_xid();
660 mutex_lock(&cfile->fh_mutex);
661 if (!cfile->invalidHandle) {
662 mutex_unlock(&cfile->fh_mutex);
663 rc = 0;
664 free_xid(xid);
665 return rc;
666 }
667
668 inode = d_inode(cfile->dentry);
669 cifs_sb = CIFS_SB(inode->i_sb);
670 tcon = tlink_tcon(cfile->tlink);
671 server = tcon->ses->server;
672
673 /*
674 * Can not grab rename sem here because various ops, including those
675 * that already have the rename sem can end up causing writepage to get
676 * called and if the server was down that means we end up here, and we
677 * can never tell if the caller already has the rename_sem.
678 */
679 full_path = build_path_from_dentry(cfile->dentry);
680 if (full_path == NULL) {
681 rc = -ENOMEM;
682 mutex_unlock(&cfile->fh_mutex);
683 free_xid(xid);
684 return rc;
685 }
686
687 cifs_dbg(FYI, "inode = 0x%p file flags 0x%x for %s\n",
688 inode, cfile->f_flags, full_path);
689
690 if (tcon->ses->server->oplocks)
691 oplock = REQ_OPLOCK;
692 else
693 oplock = 0;
694
695 if (tcon->unix_ext && cap_unix(tcon->ses) &&
696 (CIFS_UNIX_POSIX_PATH_OPS_CAP &
697 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
698 /*
699 * O_CREAT, O_EXCL and O_TRUNC already had their effect on the
700 * original open. Must mask them off for a reopen.
701 */
702 unsigned int oflags = cfile->f_flags &
703 ~(O_CREAT | O_EXCL | O_TRUNC);
704
705 rc = cifs_posix_open(full_path, NULL, inode->i_sb,
706 cifs_sb->mnt_file_mode /* ignored */,
707 oflags, &oplock, &cfile->fid.netfid, xid);
708 if (rc == 0) {
709 cifs_dbg(FYI, "posix reopen succeeded\n");
710 oparms.reconnect = true;
711 goto reopen_success;
712 }
713 /*
714 * fallthrough to retry open the old way on errors, especially
715 * in the reconnect path it is important to retry hard
716 */
717 }
718
719 desired_access = cifs_convert_flags(cfile->f_flags);
720
721 if (backup_cred(cifs_sb))
722 create_options |= CREATE_OPEN_BACKUP_INTENT;
723
724 if (server->ops->get_lease_key)
725 server->ops->get_lease_key(inode, &cfile->fid);
726
727 oparms.tcon = tcon;
728 oparms.cifs_sb = cifs_sb;
729 oparms.desired_access = desired_access;
730 oparms.create_options = create_options;
731 oparms.disposition = disposition;
732 oparms.path = full_path;
733 oparms.fid = &cfile->fid;
734 oparms.reconnect = true;
735
736 /*
737 * Can not refresh inode by passing in file_info buf to be returned by
738 * ops->open and then calling get_inode_info with returned buf since
739 * file might have write behind data that needs to be flushed and server
740 * version of file size can be stale. If we knew for sure that inode was
741 * not dirty locally we could do this.
742 */
743 rc = server->ops->open(xid, &oparms, &oplock, NULL);
744 if (rc == -ENOENT && oparms.reconnect == false) {
745 /* durable handle timeout is expired - open the file again */
746 rc = server->ops->open(xid, &oparms, &oplock, NULL);
747 /* indicate that we need to relock the file */
748 oparms.reconnect = true;
749 }
750
751 if (rc) {
752 mutex_unlock(&cfile->fh_mutex);
753 cifs_dbg(FYI, "cifs_reopen returned 0x%x\n", rc);
754 cifs_dbg(FYI, "oplock: %d\n", oplock);
755 goto reopen_error_exit;
756 }
757
758 reopen_success:
759 cfile->invalidHandle = false;
760 mutex_unlock(&cfile->fh_mutex);
761 cinode = CIFS_I(inode);
762
763 if (can_flush) {
764 rc = filemap_write_and_wait(inode->i_mapping);
765 if (!is_interrupt_error(rc))
766 mapping_set_error(inode->i_mapping, rc);
767
768 if (tcon->unix_ext)
769 rc = cifs_get_inode_info_unix(&inode, full_path,
770 inode->i_sb, xid);
771 else
772 rc = cifs_get_inode_info(&inode, full_path, NULL,
773 inode->i_sb, xid, NULL);
774 }
775 /*
776 * Else we are writing out data to server already and could deadlock if
777 * we tried to flush data, and since we do not know if we have data that
778 * would invalidate the current end of file on the server we can not go
779 * to the server to get the new inode info.
780 */
781
782 /*
783 * If the server returned a read oplock and we have mandatory brlocks,
784 * set oplock level to None.
785 */
786 if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
787 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
788 oplock = 0;
789 }
790
791 server->ops->set_fid(cfile, &cfile->fid, oplock);
792 if (oparms.reconnect)
793 cifs_relock_file(cfile);
794
795 reopen_error_exit:
796 kfree(full_path);
797 free_xid(xid);
798 return rc;
799 }
800
801 int cifs_close(struct inode *inode, struct file *file)
802 {
803 if (file->private_data != NULL) {
804 cifsFileInfo_put(file->private_data);
805 file->private_data = NULL;
806 }
807
808 /* return code from the ->release op is always ignored */
809 return 0;
810 }
811
812 void
813 cifs_reopen_persistent_handles(struct cifs_tcon *tcon)
814 {
815 struct cifsFileInfo *open_file;
816 struct list_head *tmp;
817 struct list_head *tmp1;
818 struct list_head tmp_list;
819
820 if (!tcon->use_persistent || !tcon->need_reopen_files)
821 return;
822
823 tcon->need_reopen_files = false;
824
825 cifs_dbg(FYI, "Reopen persistent handles");
826 INIT_LIST_HEAD(&tmp_list);
827
828 /* list all files open on tree connection, reopen resilient handles */
829 spin_lock(&tcon->open_file_lock);
830 list_for_each(tmp, &tcon->openFileList) {
831 open_file = list_entry(tmp, struct cifsFileInfo, tlist);
832 if (!open_file->invalidHandle)
833 continue;
834 cifsFileInfo_get(open_file);
835 list_add_tail(&open_file->rlist, &tmp_list);
836 }
837 spin_unlock(&tcon->open_file_lock);
838
839 list_for_each_safe(tmp, tmp1, &tmp_list) {
840 open_file = list_entry(tmp, struct cifsFileInfo, rlist);
841 if (cifs_reopen_file(open_file, false /* do not flush */))
842 tcon->need_reopen_files = true;
843 list_del_init(&open_file->rlist);
844 cifsFileInfo_put(open_file);
845 }
846 }
847
848 int cifs_closedir(struct inode *inode, struct file *file)
849 {
850 int rc = 0;
851 unsigned int xid;
852 struct cifsFileInfo *cfile = file->private_data;
853 struct cifs_tcon *tcon;
854 struct TCP_Server_Info *server;
855 char *buf;
856
857 cifs_dbg(FYI, "Closedir inode = 0x%p\n", inode);
858
859 if (cfile == NULL)
860 return rc;
861
862 xid = get_xid();
863 tcon = tlink_tcon(cfile->tlink);
864 server = tcon->ses->server;
865
866 cifs_dbg(FYI, "Freeing private data in close dir\n");
867 spin_lock(&cfile->file_info_lock);
868 if (server->ops->dir_needs_close(cfile)) {
869 cfile->invalidHandle = true;
870 spin_unlock(&cfile->file_info_lock);
871 if (server->ops->close_dir)
872 rc = server->ops->close_dir(xid, tcon, &cfile->fid);
873 else
874 rc = -ENOSYS;
875 cifs_dbg(FYI, "Closing uncompleted readdir with rc %d\n", rc);
876 /* not much we can do if it fails anyway, ignore rc */
877 rc = 0;
878 } else
879 spin_unlock(&cfile->file_info_lock);
880
881 buf = cfile->srch_inf.ntwrk_buf_start;
882 if (buf) {
883 cifs_dbg(FYI, "closedir free smb buf in srch struct\n");
884 cfile->srch_inf.ntwrk_buf_start = NULL;
885 if (cfile->srch_inf.smallBuf)
886 cifs_small_buf_release(buf);
887 else
888 cifs_buf_release(buf);
889 }
890
891 cifs_put_tlink(cfile->tlink);
892 kfree(file->private_data);
893 file->private_data = NULL;
894 /* BB can we lock the filestruct while this is going on? */
895 free_xid(xid);
896 return rc;
897 }
898
899 static struct cifsLockInfo *
900 cifs_lock_init(__u64 offset, __u64 length, __u8 type, __u16 flags)
901 {
902 struct cifsLockInfo *lock =
903 kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
904 if (!lock)
905 return lock;
906 lock->offset = offset;
907 lock->length = length;
908 lock->type = type;
909 lock->pid = current->tgid;
910 lock->flags = flags;
911 INIT_LIST_HEAD(&lock->blist);
912 init_waitqueue_head(&lock->block_q);
913 return lock;
914 }
915
916 void
917 cifs_del_lock_waiters(struct cifsLockInfo *lock)
918 {
919 struct cifsLockInfo *li, *tmp;
920 list_for_each_entry_safe(li, tmp, &lock->blist, blist) {
921 list_del_init(&li->blist);
922 wake_up(&li->block_q);
923 }
924 }
925
926 #define CIFS_LOCK_OP 0
927 #define CIFS_READ_OP 1
928 #define CIFS_WRITE_OP 2
929
930 /* @rw_check : 0 - no op, 1 - read, 2 - write */
931 static bool
932 cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset,
933 __u64 length, __u8 type, __u16 flags,
934 struct cifsFileInfo *cfile,
935 struct cifsLockInfo **conf_lock, int rw_check)
936 {
937 struct cifsLockInfo *li;
938 struct cifsFileInfo *cur_cfile = fdlocks->cfile;
939 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
940
941 list_for_each_entry(li, &fdlocks->locks, llist) {
942 if (offset + length <= li->offset ||
943 offset >= li->offset + li->length)
944 continue;
945 if (rw_check != CIFS_LOCK_OP && current->tgid == li->pid &&
946 server->ops->compare_fids(cfile, cur_cfile)) {
947 /* shared lock prevents write op through the same fid */
948 if (!(li->type & server->vals->shared_lock_type) ||
949 rw_check != CIFS_WRITE_OP)
950 continue;
951 }
952 if ((type & server->vals->shared_lock_type) &&
953 ((server->ops->compare_fids(cfile, cur_cfile) &&
954 current->tgid == li->pid) || type == li->type))
955 continue;
956 if (rw_check == CIFS_LOCK_OP &&
957 (flags & FL_OFDLCK) && (li->flags & FL_OFDLCK) &&
958 server->ops->compare_fids(cfile, cur_cfile))
959 continue;
960 if (conf_lock)
961 *conf_lock = li;
962 return true;
963 }
964 return false;
965 }
966
967 bool
968 cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
969 __u8 type, __u16 flags,
970 struct cifsLockInfo **conf_lock, int rw_check)
971 {
972 bool rc = false;
973 struct cifs_fid_locks *cur;
974 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
975
976 list_for_each_entry(cur, &cinode->llist, llist) {
977 rc = cifs_find_fid_lock_conflict(cur, offset, length, type,
978 flags, cfile, conf_lock,
979 rw_check);
980 if (rc)
981 break;
982 }
983
984 return rc;
985 }
986
987 /*
988 * Check if there is another lock that prevents us to set the lock (mandatory
989 * style). If such a lock exists, update the flock structure with its
990 * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
991 * or leave it the same if we can't. Returns 0 if we don't need to request to
992 * the server or 1 otherwise.
993 */
994 static int
995 cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
996 __u8 type, struct file_lock *flock)
997 {
998 int rc = 0;
999 struct cifsLockInfo *conf_lock;
1000 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1001 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
1002 bool exist;
1003
1004 down_read(&cinode->lock_sem);
1005
1006 exist = cifs_find_lock_conflict(cfile, offset, length, type,
1007 flock->fl_flags, &conf_lock,
1008 CIFS_LOCK_OP);
1009 if (exist) {
1010 flock->fl_start = conf_lock->offset;
1011 flock->fl_end = conf_lock->offset + conf_lock->length - 1;
1012 flock->fl_pid = conf_lock->pid;
1013 if (conf_lock->type & server->vals->shared_lock_type)
1014 flock->fl_type = F_RDLCK;
1015 else
1016 flock->fl_type = F_WRLCK;
1017 } else if (!cinode->can_cache_brlcks)
1018 rc = 1;
1019 else
1020 flock->fl_type = F_UNLCK;
1021
1022 up_read(&cinode->lock_sem);
1023 return rc;
1024 }
1025
1026 static void
1027 cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock)
1028 {
1029 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1030 down_write(&cinode->lock_sem);
1031 list_add_tail(&lock->llist, &cfile->llist->locks);
1032 up_write(&cinode->lock_sem);
1033 }
1034
1035 /*
1036 * Set the byte-range lock (mandatory style). Returns:
1037 * 1) 0, if we set the lock and don't need to request to the server;
1038 * 2) 1, if no locks prevent us but we need to request to the server;
1039 * 3) -EACCES, if there is a lock that prevents us and wait is false.
1040 */
1041 static int
1042 cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock,
1043 bool wait)
1044 {
1045 struct cifsLockInfo *conf_lock;
1046 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1047 bool exist;
1048 int rc = 0;
1049
1050 try_again:
1051 exist = false;
1052 down_write(&cinode->lock_sem);
1053
1054 exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length,
1055 lock->type, lock->flags, &conf_lock,
1056 CIFS_LOCK_OP);
1057 if (!exist && cinode->can_cache_brlcks) {
1058 list_add_tail(&lock->llist, &cfile->llist->locks);
1059 up_write(&cinode->lock_sem);
1060 return rc;
1061 }
1062
1063 if (!exist)
1064 rc = 1;
1065 else if (!wait)
1066 rc = -EACCES;
1067 else {
1068 list_add_tail(&lock->blist, &conf_lock->blist);
1069 up_write(&cinode->lock_sem);
1070 rc = wait_event_interruptible(lock->block_q,
1071 (lock->blist.prev == &lock->blist) &&
1072 (lock->blist.next == &lock->blist));
1073 if (!rc)
1074 goto try_again;
1075 down_write(&cinode->lock_sem);
1076 list_del_init(&lock->blist);
1077 }
1078
1079 up_write(&cinode->lock_sem);
1080 return rc;
1081 }
1082
1083 /*
1084 * Check if there is another lock that prevents us to set the lock (posix
1085 * style). If such a lock exists, update the flock structure with its
1086 * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1087 * or leave it the same if we can't. Returns 0 if we don't need to request to
1088 * the server or 1 otherwise.
1089 */
1090 static int
1091 cifs_posix_lock_test(struct file *file, struct file_lock *flock)
1092 {
1093 int rc = 0;
1094 struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1095 unsigned char saved_type = flock->fl_type;
1096
1097 if ((flock->fl_flags & FL_POSIX) == 0)
1098 return 1;
1099
1100 down_read(&cinode->lock_sem);
1101 posix_test_lock(file, flock);
1102
1103 if (flock->fl_type == F_UNLCK && !cinode->can_cache_brlcks) {
1104 flock->fl_type = saved_type;
1105 rc = 1;
1106 }
1107
1108 up_read(&cinode->lock_sem);
1109 return rc;
1110 }
1111
1112 /*
1113 * Set the byte-range lock (posix style). Returns:
1114 * 1) 0, if we set the lock and don't need to request to the server;
1115 * 2) 1, if we need to request to the server;
1116 * 3) <0, if the error occurs while setting the lock.
1117 */
1118 static int
1119 cifs_posix_lock_set(struct file *file, struct file_lock *flock)
1120 {
1121 struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1122 int rc = 1;
1123
1124 if ((flock->fl_flags & FL_POSIX) == 0)
1125 return rc;
1126
1127 try_again:
1128 down_write(&cinode->lock_sem);
1129 if (!cinode->can_cache_brlcks) {
1130 up_write(&cinode->lock_sem);
1131 return rc;
1132 }
1133
1134 rc = posix_lock_file(file, flock, NULL);
1135 up_write(&cinode->lock_sem);
1136 if (rc == FILE_LOCK_DEFERRED) {
1137 rc = wait_event_interruptible(flock->fl_wait, !flock->fl_blocker);
1138 if (!rc)
1139 goto try_again;
1140 locks_delete_block(flock);
1141 }
1142 return rc;
1143 }
1144
1145 int
1146 cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
1147 {
1148 unsigned int xid;
1149 int rc = 0, stored_rc;
1150 struct cifsLockInfo *li, *tmp;
1151 struct cifs_tcon *tcon;
1152 unsigned int num, max_num, max_buf;
1153 LOCKING_ANDX_RANGE *buf, *cur;
1154 static const int types[] = {
1155 LOCKING_ANDX_LARGE_FILES,
1156 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1157 };
1158 int i;
1159
1160 xid = get_xid();
1161 tcon = tlink_tcon(cfile->tlink);
1162
1163 /*
1164 * Accessing maxBuf is racy with cifs_reconnect - need to store value
1165 * and check it before using.
1166 */
1167 max_buf = tcon->ses->server->maxBuf;
1168 if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE))) {
1169 free_xid(xid);
1170 return -EINVAL;
1171 }
1172
1173 BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1174 PAGE_SIZE);
1175 max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1176 PAGE_SIZE);
1177 max_num = (max_buf - sizeof(struct smb_hdr)) /
1178 sizeof(LOCKING_ANDX_RANGE);
1179 buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1180 if (!buf) {
1181 free_xid(xid);
1182 return -ENOMEM;
1183 }
1184
1185 for (i = 0; i < 2; i++) {
1186 cur = buf;
1187 num = 0;
1188 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1189 if (li->type != types[i])
1190 continue;
1191 cur->Pid = cpu_to_le16(li->pid);
1192 cur->LengthLow = cpu_to_le32((u32)li->length);
1193 cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1194 cur->OffsetLow = cpu_to_le32((u32)li->offset);
1195 cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1196 if (++num == max_num) {
1197 stored_rc = cifs_lockv(xid, tcon,
1198 cfile->fid.netfid,
1199 (__u8)li->type, 0, num,
1200 buf);
1201 if (stored_rc)
1202 rc = stored_rc;
1203 cur = buf;
1204 num = 0;
1205 } else
1206 cur++;
1207 }
1208
1209 if (num) {
1210 stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1211 (__u8)types[i], 0, num, buf);
1212 if (stored_rc)
1213 rc = stored_rc;
1214 }
1215 }
1216
1217 kfree(buf);
1218 free_xid(xid);
1219 return rc;
1220 }
1221
1222 static __u32
1223 hash_lockowner(fl_owner_t owner)
1224 {
1225 return cifs_lock_secret ^ hash32_ptr((const void *)owner);
1226 }
1227
1228 struct lock_to_push {
1229 struct list_head llist;
1230 __u64 offset;
1231 __u64 length;
1232 __u32 pid;
1233 __u16 netfid;
1234 __u8 type;
1235 };
1236
1237 static int
1238 cifs_push_posix_locks(struct cifsFileInfo *cfile)
1239 {
1240 struct inode *inode = d_inode(cfile->dentry);
1241 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1242 struct file_lock *flock;
1243 struct file_lock_context *flctx = inode->i_flctx;
1244 unsigned int count = 0, i;
1245 int rc = 0, xid, type;
1246 struct list_head locks_to_send, *el;
1247 struct lock_to_push *lck, *tmp;
1248 __u64 length;
1249
1250 xid = get_xid();
1251
1252 if (!flctx)
1253 goto out;
1254
1255 spin_lock(&flctx->flc_lock);
1256 list_for_each(el, &flctx->flc_posix) {
1257 count++;
1258 }
1259 spin_unlock(&flctx->flc_lock);
1260
1261 INIT_LIST_HEAD(&locks_to_send);
1262
1263 /*
1264 * Allocating count locks is enough because no FL_POSIX locks can be
1265 * added to the list while we are holding cinode->lock_sem that
1266 * protects locking operations of this inode.
1267 */
1268 for (i = 0; i < count; i++) {
1269 lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL);
1270 if (!lck) {
1271 rc = -ENOMEM;
1272 goto err_out;
1273 }
1274 list_add_tail(&lck->llist, &locks_to_send);
1275 }
1276
1277 el = locks_to_send.next;
1278 spin_lock(&flctx->flc_lock);
1279 list_for_each_entry(flock, &flctx->flc_posix, fl_list) {
1280 if (el == &locks_to_send) {
1281 /*
1282 * The list ended. We don't have enough allocated
1283 * structures - something is really wrong.
1284 */
1285 cifs_dbg(VFS, "Can't push all brlocks!\n");
1286 break;
1287 }
1288 length = 1 + flock->fl_end - flock->fl_start;
1289 if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK)
1290 type = CIFS_RDLCK;
1291 else
1292 type = CIFS_WRLCK;
1293 lck = list_entry(el, struct lock_to_push, llist);
1294 lck->pid = hash_lockowner(flock->fl_owner);
1295 lck->netfid = cfile->fid.netfid;
1296 lck->length = length;
1297 lck->type = type;
1298 lck->offset = flock->fl_start;
1299 }
1300 spin_unlock(&flctx->flc_lock);
1301
1302 list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1303 int stored_rc;
1304
1305 stored_rc = CIFSSMBPosixLock(xid, tcon, lck->netfid, lck->pid,
1306 lck->offset, lck->length, NULL,
1307 lck->type, 0);
1308 if (stored_rc)
1309 rc = stored_rc;
1310 list_del(&lck->llist);
1311 kfree(lck);
1312 }
1313
1314 out:
1315 free_xid(xid);
1316 return rc;
1317 err_out:
1318 list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1319 list_del(&lck->llist);
1320 kfree(lck);
1321 }
1322 goto out;
1323 }
1324
1325 static int
1326 cifs_push_locks(struct cifsFileInfo *cfile)
1327 {
1328 struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
1329 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1330 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1331 int rc = 0;
1332
1333 /* we are going to update can_cache_brlcks here - need a write access */
1334 down_write(&cinode->lock_sem);
1335 if (!cinode->can_cache_brlcks) {
1336 up_write(&cinode->lock_sem);
1337 return rc;
1338 }
1339
1340 if (cap_unix(tcon->ses) &&
1341 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1342 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1343 rc = cifs_push_posix_locks(cfile);
1344 else
1345 rc = tcon->ses->server->ops->push_mand_locks(cfile);
1346
1347 cinode->can_cache_brlcks = false;
1348 up_write(&cinode->lock_sem);
1349 return rc;
1350 }
1351
1352 static void
1353 cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock,
1354 bool *wait_flag, struct TCP_Server_Info *server)
1355 {
1356 if (flock->fl_flags & FL_POSIX)
1357 cifs_dbg(FYI, "Posix\n");
1358 if (flock->fl_flags & FL_FLOCK)
1359 cifs_dbg(FYI, "Flock\n");
1360 if (flock->fl_flags & FL_SLEEP) {
1361 cifs_dbg(FYI, "Blocking lock\n");
1362 *wait_flag = true;
1363 }
1364 if (flock->fl_flags & FL_ACCESS)
1365 cifs_dbg(FYI, "Process suspended by mandatory locking - not implemented yet\n");
1366 if (flock->fl_flags & FL_LEASE)
1367 cifs_dbg(FYI, "Lease on file - not implemented yet\n");
1368 if (flock->fl_flags &
1369 (~(FL_POSIX | FL_FLOCK | FL_SLEEP |
1370 FL_ACCESS | FL_LEASE | FL_CLOSE | FL_OFDLCK)))
1371 cifs_dbg(FYI, "Unknown lock flags 0x%x\n", flock->fl_flags);
1372
1373 *type = server->vals->large_lock_type;
1374 if (flock->fl_type == F_WRLCK) {
1375 cifs_dbg(FYI, "F_WRLCK\n");
1376 *type |= server->vals->exclusive_lock_type;
1377 *lock = 1;
1378 } else if (flock->fl_type == F_UNLCK) {
1379 cifs_dbg(FYI, "F_UNLCK\n");
1380 *type |= server->vals->unlock_lock_type;
1381 *unlock = 1;
1382 /* Check if unlock includes more than one lock range */
1383 } else if (flock->fl_type == F_RDLCK) {
1384 cifs_dbg(FYI, "F_RDLCK\n");
1385 *type |= server->vals->shared_lock_type;
1386 *lock = 1;
1387 } else if (flock->fl_type == F_EXLCK) {
1388 cifs_dbg(FYI, "F_EXLCK\n");
1389 *type |= server->vals->exclusive_lock_type;
1390 *lock = 1;
1391 } else if (flock->fl_type == F_SHLCK) {
1392 cifs_dbg(FYI, "F_SHLCK\n");
1393 *type |= server->vals->shared_lock_type;
1394 *lock = 1;
1395 } else
1396 cifs_dbg(FYI, "Unknown type of lock\n");
1397 }
1398
1399 static int
1400 cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
1401 bool wait_flag, bool posix_lck, unsigned int xid)
1402 {
1403 int rc = 0;
1404 __u64 length = 1 + flock->fl_end - flock->fl_start;
1405 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1406 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1407 struct TCP_Server_Info *server = tcon->ses->server;
1408 __u16 netfid = cfile->fid.netfid;
1409
1410 if (posix_lck) {
1411 int posix_lock_type;
1412
1413 rc = cifs_posix_lock_test(file, flock);
1414 if (!rc)
1415 return rc;
1416
1417 if (type & server->vals->shared_lock_type)
1418 posix_lock_type = CIFS_RDLCK;
1419 else
1420 posix_lock_type = CIFS_WRLCK;
1421 rc = CIFSSMBPosixLock(xid, tcon, netfid,
1422 hash_lockowner(flock->fl_owner),
1423 flock->fl_start, length, flock,
1424 posix_lock_type, wait_flag);
1425 return rc;
1426 }
1427
1428 rc = cifs_lock_test(cfile, flock->fl_start, length, type, flock);
1429 if (!rc)
1430 return rc;
1431
1432 /* BB we could chain these into one lock request BB */
1433 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, type,
1434 1, 0, false);
1435 if (rc == 0) {
1436 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1437 type, 0, 1, false);
1438 flock->fl_type = F_UNLCK;
1439 if (rc != 0)
1440 cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1441 rc);
1442 return 0;
1443 }
1444
1445 if (type & server->vals->shared_lock_type) {
1446 flock->fl_type = F_WRLCK;
1447 return 0;
1448 }
1449
1450 type &= ~server->vals->exclusive_lock_type;
1451
1452 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1453 type | server->vals->shared_lock_type,
1454 1, 0, false);
1455 if (rc == 0) {
1456 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1457 type | server->vals->shared_lock_type, 0, 1, false);
1458 flock->fl_type = F_RDLCK;
1459 if (rc != 0)
1460 cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1461 rc);
1462 } else
1463 flock->fl_type = F_WRLCK;
1464
1465 return 0;
1466 }
1467
1468 void
1469 cifs_move_llist(struct list_head *source, struct list_head *dest)
1470 {
1471 struct list_head *li, *tmp;
1472 list_for_each_safe(li, tmp, source)
1473 list_move(li, dest);
1474 }
1475
1476 void
1477 cifs_free_llist(struct list_head *llist)
1478 {
1479 struct cifsLockInfo *li, *tmp;
1480 list_for_each_entry_safe(li, tmp, llist, llist) {
1481 cifs_del_lock_waiters(li);
1482 list_del(&li->llist);
1483 kfree(li);
1484 }
1485 }
1486
1487 int
1488 cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
1489 unsigned int xid)
1490 {
1491 int rc = 0, stored_rc;
1492 static const int types[] = {
1493 LOCKING_ANDX_LARGE_FILES,
1494 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1495 };
1496 unsigned int i;
1497 unsigned int max_num, num, max_buf;
1498 LOCKING_ANDX_RANGE *buf, *cur;
1499 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1500 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1501 struct cifsLockInfo *li, *tmp;
1502 __u64 length = 1 + flock->fl_end - flock->fl_start;
1503 struct list_head tmp_llist;
1504
1505 INIT_LIST_HEAD(&tmp_llist);
1506
1507 /*
1508 * Accessing maxBuf is racy with cifs_reconnect - need to store value
1509 * and check it before using.
1510 */
1511 max_buf = tcon->ses->server->maxBuf;
1512 if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE)))
1513 return -EINVAL;
1514
1515 BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1516 PAGE_SIZE);
1517 max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1518 PAGE_SIZE);
1519 max_num = (max_buf - sizeof(struct smb_hdr)) /
1520 sizeof(LOCKING_ANDX_RANGE);
1521 buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1522 if (!buf)
1523 return -ENOMEM;
1524
1525 down_write(&cinode->lock_sem);
1526 for (i = 0; i < 2; i++) {
1527 cur = buf;
1528 num = 0;
1529 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1530 if (flock->fl_start > li->offset ||
1531 (flock->fl_start + length) <
1532 (li->offset + li->length))
1533 continue;
1534 if (current->tgid != li->pid)
1535 continue;
1536 if (types[i] != li->type)
1537 continue;
1538 if (cinode->can_cache_brlcks) {
1539 /*
1540 * We can cache brlock requests - simply remove
1541 * a lock from the file's list.
1542 */
1543 list_del(&li->llist);
1544 cifs_del_lock_waiters(li);
1545 kfree(li);
1546 continue;
1547 }
1548 cur->Pid = cpu_to_le16(li->pid);
1549 cur->LengthLow = cpu_to_le32((u32)li->length);
1550 cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1551 cur->OffsetLow = cpu_to_le32((u32)li->offset);
1552 cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1553 /*
1554 * We need to save a lock here to let us add it again to
1555 * the file's list if the unlock range request fails on
1556 * the server.
1557 */
1558 list_move(&li->llist, &tmp_llist);
1559 if (++num == max_num) {
1560 stored_rc = cifs_lockv(xid, tcon,
1561 cfile->fid.netfid,
1562 li->type, num, 0, buf);
1563 if (stored_rc) {
1564 /*
1565 * We failed on the unlock range
1566 * request - add all locks from the tmp
1567 * list to the head of the file's list.
1568 */
1569 cifs_move_llist(&tmp_llist,
1570 &cfile->llist->locks);
1571 rc = stored_rc;
1572 } else
1573 /*
1574 * The unlock range request succeed -
1575 * free the tmp list.
1576 */
1577 cifs_free_llist(&tmp_llist);
1578 cur = buf;
1579 num = 0;
1580 } else
1581 cur++;
1582 }
1583 if (num) {
1584 stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1585 types[i], num, 0, buf);
1586 if (stored_rc) {
1587 cifs_move_llist(&tmp_llist,
1588 &cfile->llist->locks);
1589 rc = stored_rc;
1590 } else
1591 cifs_free_llist(&tmp_llist);
1592 }
1593 }
1594
1595 up_write(&cinode->lock_sem);
1596 kfree(buf);
1597 return rc;
1598 }
1599
1600 static int
1601 cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
1602 bool wait_flag, bool posix_lck, int lock, int unlock,
1603 unsigned int xid)
1604 {
1605 int rc = 0;
1606 __u64 length = 1 + flock->fl_end - flock->fl_start;
1607 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1608 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1609 struct TCP_Server_Info *server = tcon->ses->server;
1610 struct inode *inode = d_inode(cfile->dentry);
1611
1612 if (posix_lck) {
1613 int posix_lock_type;
1614
1615 rc = cifs_posix_lock_set(file, flock);
1616 if (!rc || rc < 0)
1617 return rc;
1618
1619 if (type & server->vals->shared_lock_type)
1620 posix_lock_type = CIFS_RDLCK;
1621 else
1622 posix_lock_type = CIFS_WRLCK;
1623
1624 if (unlock == 1)
1625 posix_lock_type = CIFS_UNLCK;
1626
1627 rc = CIFSSMBPosixLock(xid, tcon, cfile->fid.netfid,
1628 hash_lockowner(flock->fl_owner),
1629 flock->fl_start, length,
1630 NULL, posix_lock_type, wait_flag);
1631 goto out;
1632 }
1633
1634 if (lock) {
1635 struct cifsLockInfo *lock;
1636
1637 lock = cifs_lock_init(flock->fl_start, length, type,
1638 flock->fl_flags);
1639 if (!lock)
1640 return -ENOMEM;
1641
1642 rc = cifs_lock_add_if(cfile, lock, wait_flag);
1643 if (rc < 0) {
1644 kfree(lock);
1645 return rc;
1646 }
1647 if (!rc)
1648 goto out;
1649
1650 /*
1651 * Windows 7 server can delay breaking lease from read to None
1652 * if we set a byte-range lock on a file - break it explicitly
1653 * before sending the lock to the server to be sure the next
1654 * read won't conflict with non-overlapted locks due to
1655 * pagereading.
1656 */
1657 if (!CIFS_CACHE_WRITE(CIFS_I(inode)) &&
1658 CIFS_CACHE_READ(CIFS_I(inode))) {
1659 cifs_zap_mapping(inode);
1660 cifs_dbg(FYI, "Set no oplock for inode=%p due to mand locks\n",
1661 inode);
1662 CIFS_I(inode)->oplock = 0;
1663 }
1664
1665 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1666 type, 1, 0, wait_flag);
1667 if (rc) {
1668 kfree(lock);
1669 return rc;
1670 }
1671
1672 cifs_lock_add(cfile, lock);
1673 } else if (unlock)
1674 rc = server->ops->mand_unlock_range(cfile, flock, xid);
1675
1676 out:
1677 if (flock->fl_flags & FL_POSIX) {
1678 /*
1679 * If this is a request to remove all locks because we
1680 * are closing the file, it doesn't matter if the
1681 * unlocking failed as both cifs.ko and the SMB server
1682 * remove the lock on file close
1683 */
1684 if (rc) {
1685 cifs_dbg(VFS, "%s failed rc=%d\n", __func__, rc);
1686 if (!(flock->fl_flags & FL_CLOSE))
1687 return rc;
1688 }
1689 rc = locks_lock_file_wait(file, flock);
1690 }
1691 return rc;
1692 }
1693
1694 int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
1695 {
1696 int rc, xid;
1697 int lock = 0, unlock = 0;
1698 bool wait_flag = false;
1699 bool posix_lck = false;
1700 struct cifs_sb_info *cifs_sb;
1701 struct cifs_tcon *tcon;
1702 struct cifsFileInfo *cfile;
1703 __u32 type;
1704
1705 rc = -EACCES;
1706 xid = get_xid();
1707
1708 cifs_dbg(FYI, "Lock parm: 0x%x flockflags: 0x%x flocktype: 0x%x start: %lld end: %lld\n",
1709 cmd, flock->fl_flags, flock->fl_type,
1710 flock->fl_start, flock->fl_end);
1711
1712 cfile = (struct cifsFileInfo *)file->private_data;
1713 tcon = tlink_tcon(cfile->tlink);
1714
1715 cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag,
1716 tcon->ses->server);
1717 cifs_sb = CIFS_FILE_SB(file);
1718
1719 if (cap_unix(tcon->ses) &&
1720 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1721 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1722 posix_lck = true;
1723 /*
1724 * BB add code here to normalize offset and length to account for
1725 * negative length which we can not accept over the wire.
1726 */
1727 if (IS_GETLK(cmd)) {
1728 rc = cifs_getlk(file, flock, type, wait_flag, posix_lck, xid);
1729 free_xid(xid);
1730 return rc;
1731 }
1732
1733 if (!lock && !unlock) {
1734 /*
1735 * if no lock or unlock then nothing to do since we do not
1736 * know what it is
1737 */
1738 free_xid(xid);
1739 return -EOPNOTSUPP;
1740 }
1741
1742 rc = cifs_setlk(file, flock, type, wait_flag, posix_lck, lock, unlock,
1743 xid);
1744 free_xid(xid);
1745 return rc;
1746 }
1747
1748 /*
1749 * update the file size (if needed) after a write. Should be called with
1750 * the inode->i_lock held
1751 */
1752 void
1753 cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
1754 unsigned int bytes_written)
1755 {
1756 loff_t end_of_write = offset + bytes_written;
1757
1758 if (end_of_write > cifsi->server_eof)
1759 cifsi->server_eof = end_of_write;
1760 }
1761
1762 static ssize_t
1763 cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data,
1764 size_t write_size, loff_t *offset)
1765 {
1766 int rc = 0;
1767 unsigned int bytes_written = 0;
1768 unsigned int total_written;
1769 struct cifs_tcon *tcon;
1770 struct TCP_Server_Info *server;
1771 unsigned int xid;
1772 struct dentry *dentry = open_file->dentry;
1773 struct cifsInodeInfo *cifsi = CIFS_I(d_inode(dentry));
1774 struct cifs_io_parms io_parms;
1775
1776 cifs_dbg(FYI, "write %zd bytes to offset %lld of %pd\n",
1777 write_size, *offset, dentry);
1778
1779 tcon = tlink_tcon(open_file->tlink);
1780 server = tcon->ses->server;
1781
1782 if (!server->ops->sync_write)
1783 return -ENOSYS;
1784
1785 xid = get_xid();
1786
1787 for (total_written = 0; write_size > total_written;
1788 total_written += bytes_written) {
1789 rc = -EAGAIN;
1790 while (rc == -EAGAIN) {
1791 struct kvec iov[2];
1792 unsigned int len;
1793
1794 if (open_file->invalidHandle) {
1795 /* we could deadlock if we called
1796 filemap_fdatawait from here so tell
1797 reopen_file not to flush data to
1798 server now */
1799 rc = cifs_reopen_file(open_file, false);
1800 if (rc != 0)
1801 break;
1802 }
1803
1804 len = min(server->ops->wp_retry_size(d_inode(dentry)),
1805 (unsigned int)write_size - total_written);
1806 /* iov[0] is reserved for smb header */
1807 iov[1].iov_base = (char *)write_data + total_written;
1808 iov[1].iov_len = len;
1809 io_parms.pid = pid;
1810 io_parms.tcon = tcon;
1811 io_parms.offset = *offset;
1812 io_parms.length = len;
1813 rc = server->ops->sync_write(xid, &open_file->fid,
1814 &io_parms, &bytes_written, iov, 1);
1815 }
1816 if (rc || (bytes_written == 0)) {
1817 if (total_written)
1818 break;
1819 else {
1820 free_xid(xid);
1821 return rc;
1822 }
1823 } else {
1824 spin_lock(&d_inode(dentry)->i_lock);
1825 cifs_update_eof(cifsi, *offset, bytes_written);
1826 spin_unlock(&d_inode(dentry)->i_lock);
1827 *offset += bytes_written;
1828 }
1829 }
1830
1831 cifs_stats_bytes_written(tcon, total_written);
1832
1833 if (total_written > 0) {
1834 spin_lock(&d_inode(dentry)->i_lock);
1835 if (*offset > d_inode(dentry)->i_size)
1836 i_size_write(d_inode(dentry), *offset);
1837 spin_unlock(&d_inode(dentry)->i_lock);
1838 }
1839 mark_inode_dirty_sync(d_inode(dentry));
1840 free_xid(xid);
1841 return total_written;
1842 }
1843
1844 struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
1845 bool fsuid_only)
1846 {
1847 struct cifsFileInfo *open_file = NULL;
1848 struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1849
1850 /* only filter by fsuid on multiuser mounts */
1851 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1852 fsuid_only = false;
1853
1854 spin_lock(&cifs_inode->open_file_lock);
1855 /* we could simply get the first_list_entry since write-only entries
1856 are always at the end of the list but since the first entry might
1857 have a close pending, we go through the whole list */
1858 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1859 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
1860 continue;
1861 if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
1862 if (!open_file->invalidHandle) {
1863 /* found a good file */
1864 /* lock it so it will not be closed on us */
1865 cifsFileInfo_get(open_file);
1866 spin_unlock(&cifs_inode->open_file_lock);
1867 return open_file;
1868 } /* else might as well continue, and look for
1869 another, or simply have the caller reopen it
1870 again rather than trying to fix this handle */
1871 } else /* write only file */
1872 break; /* write only files are last so must be done */
1873 }
1874 spin_unlock(&cifs_inode->open_file_lock);
1875 return NULL;
1876 }
1877
1878 /* Return -EBADF if no handle is found and general rc otherwise */
1879 int
1880 cifs_get_writable_file(struct cifsInodeInfo *cifs_inode, bool fsuid_only,
1881 struct cifsFileInfo **ret_file)
1882 {
1883 struct cifsFileInfo *open_file, *inv_file = NULL;
1884 struct cifs_sb_info *cifs_sb;
1885 bool any_available = false;
1886 int rc = -EBADF;
1887 unsigned int refind = 0;
1888
1889 *ret_file = NULL;
1890
1891 /*
1892 * Having a null inode here (because mapping->host was set to zero by
1893 * the VFS or MM) should not happen but we had reports of on oops (due
1894 * to it being zero) during stress testcases so we need to check for it
1895 */
1896
1897 if (cifs_inode == NULL) {
1898 cifs_dbg(VFS, "Null inode passed to cifs_writeable_file\n");
1899 dump_stack();
1900 return rc;
1901 }
1902
1903 cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1904
1905 /* only filter by fsuid on multiuser mounts */
1906 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1907 fsuid_only = false;
1908
1909 spin_lock(&cifs_inode->open_file_lock);
1910 refind_writable:
1911 if (refind > MAX_REOPEN_ATT) {
1912 spin_unlock(&cifs_inode->open_file_lock);
1913 return rc;
1914 }
1915 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1916 if (!any_available && open_file->pid != current->tgid)
1917 continue;
1918 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
1919 continue;
1920 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
1921 if (!open_file->invalidHandle) {
1922 /* found a good writable file */
1923 cifsFileInfo_get(open_file);
1924 spin_unlock(&cifs_inode->open_file_lock);
1925 *ret_file = open_file;
1926 return 0;
1927 } else {
1928 if (!inv_file)
1929 inv_file = open_file;
1930 }
1931 }
1932 }
1933 /* couldn't find useable FH with same pid, try any available */
1934 if (!any_available) {
1935 any_available = true;
1936 goto refind_writable;
1937 }
1938
1939 if (inv_file) {
1940 any_available = false;
1941 cifsFileInfo_get(inv_file);
1942 }
1943
1944 spin_unlock(&cifs_inode->open_file_lock);
1945
1946 if (inv_file) {
1947 rc = cifs_reopen_file(inv_file, false);
1948 if (!rc) {
1949 *ret_file = inv_file;
1950 return 0;
1951 }
1952
1953 spin_lock(&cifs_inode->open_file_lock);
1954 list_move_tail(&inv_file->flist, &cifs_inode->openFileList);
1955 spin_unlock(&cifs_inode->open_file_lock);
1956 cifsFileInfo_put(inv_file);
1957 ++refind;
1958 inv_file = NULL;
1959 spin_lock(&cifs_inode->open_file_lock);
1960 goto refind_writable;
1961 }
1962
1963 return rc;
1964 }
1965
1966 struct cifsFileInfo *
1967 find_writable_file(struct cifsInodeInfo *cifs_inode, bool fsuid_only)
1968 {
1969 struct cifsFileInfo *cfile;
1970 int rc;
1971
1972 rc = cifs_get_writable_file(cifs_inode, fsuid_only, &cfile);
1973 if (rc)
1974 cifs_dbg(FYI, "couldn't find writable handle rc=%d", rc);
1975
1976 return cfile;
1977 }
1978
1979 int
1980 cifs_get_writable_path(struct cifs_tcon *tcon, const char *name,
1981 struct cifsFileInfo **ret_file)
1982 {
1983 struct list_head *tmp;
1984 struct cifsFileInfo *cfile;
1985 struct cifsInodeInfo *cinode;
1986 char *full_path;
1987
1988 *ret_file = NULL;
1989
1990 spin_lock(&tcon->open_file_lock);
1991 list_for_each(tmp, &tcon->openFileList) {
1992 cfile = list_entry(tmp, struct cifsFileInfo,
1993 tlist);
1994 full_path = build_path_from_dentry(cfile->dentry);
1995 if (full_path == NULL) {
1996 spin_unlock(&tcon->open_file_lock);
1997 return -ENOMEM;
1998 }
1999 if (strcmp(full_path, name)) {
2000 kfree(full_path);
2001 continue;
2002 }
2003
2004 kfree(full_path);
2005 cinode = CIFS_I(d_inode(cfile->dentry));
2006 spin_unlock(&tcon->open_file_lock);
2007 return cifs_get_writable_file(cinode, 0, ret_file);
2008 }
2009
2010 spin_unlock(&tcon->open_file_lock);
2011 return -ENOENT;
2012 }
2013
2014 int
2015 cifs_get_readable_path(struct cifs_tcon *tcon, const char *name,
2016 struct cifsFileInfo **ret_file)
2017 {
2018 struct list_head *tmp;
2019 struct cifsFileInfo *cfile;
2020 struct cifsInodeInfo *cinode;
2021 char *full_path;
2022
2023 *ret_file = NULL;
2024
2025 spin_lock(&tcon->open_file_lock);
2026 list_for_each(tmp, &tcon->openFileList) {
2027 cfile = list_entry(tmp, struct cifsFileInfo,
2028 tlist);
2029 full_path = build_path_from_dentry(cfile->dentry);
2030 if (full_path == NULL) {
2031 spin_unlock(&tcon->open_file_lock);
2032 return -ENOMEM;
2033 }
2034 if (strcmp(full_path, name)) {
2035 kfree(full_path);
2036 continue;
2037 }
2038
2039 kfree(full_path);
2040 cinode = CIFS_I(d_inode(cfile->dentry));
2041 spin_unlock(&tcon->open_file_lock);
2042 *ret_file = find_readable_file(cinode, 0);
2043 return *ret_file ? 0 : -ENOENT;
2044 }
2045
2046 spin_unlock(&tcon->open_file_lock);
2047 return -ENOENT;
2048 }
2049
2050 static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
2051 {
2052 struct address_space *mapping = page->mapping;
2053 loff_t offset = (loff_t)page->index << PAGE_SHIFT;
2054 char *write_data;
2055 int rc = -EFAULT;
2056 int bytes_written = 0;
2057 struct inode *inode;
2058 struct cifsFileInfo *open_file;
2059
2060 if (!mapping || !mapping->host)
2061 return -EFAULT;
2062
2063 inode = page->mapping->host;
2064
2065 offset += (loff_t)from;
2066 write_data = kmap(page);
2067 write_data += from;
2068
2069 if ((to > PAGE_SIZE) || (from > to)) {
2070 kunmap(page);
2071 return -EIO;
2072 }
2073
2074 /* racing with truncate? */
2075 if (offset > mapping->host->i_size) {
2076 kunmap(page);
2077 return 0; /* don't care */
2078 }
2079
2080 /* check to make sure that we are not extending the file */
2081 if (mapping->host->i_size - offset < (loff_t)to)
2082 to = (unsigned)(mapping->host->i_size - offset);
2083
2084 rc = cifs_get_writable_file(CIFS_I(mapping->host), false, &open_file);
2085 if (!rc) {
2086 bytes_written = cifs_write(open_file, open_file->pid,
2087 write_data, to - from, &offset);
2088 cifsFileInfo_put(open_file);
2089 /* Does mm or vfs already set times? */
2090 inode->i_atime = inode->i_mtime = current_time(inode);
2091 if ((bytes_written > 0) && (offset))
2092 rc = 0;
2093 else if (bytes_written < 0)
2094 rc = bytes_written;
2095 else
2096 rc = -EFAULT;
2097 } else {
2098 cifs_dbg(FYI, "No writable handle for write page rc=%d\n", rc);
2099 if (!is_retryable_error(rc))
2100 rc = -EIO;
2101 }
2102
2103 kunmap(page);
2104 return rc;
2105 }
2106
2107 static struct cifs_writedata *
2108 wdata_alloc_and_fillpages(pgoff_t tofind, struct address_space *mapping,
2109 pgoff_t end, pgoff_t *index,
2110 unsigned int *found_pages)
2111 {
2112 struct cifs_writedata *wdata;
2113
2114 wdata = cifs_writedata_alloc((unsigned int)tofind,
2115 cifs_writev_complete);
2116 if (!wdata)
2117 return NULL;
2118
2119 *found_pages = find_get_pages_range_tag(mapping, index, end,
2120 PAGECACHE_TAG_DIRTY, tofind, wdata->pages);
2121 return wdata;
2122 }
2123
2124 static unsigned int
2125 wdata_prepare_pages(struct cifs_writedata *wdata, unsigned int found_pages,
2126 struct address_space *mapping,
2127 struct writeback_control *wbc,
2128 pgoff_t end, pgoff_t *index, pgoff_t *next, bool *done)
2129 {
2130 unsigned int nr_pages = 0, i;
2131 struct page *page;
2132
2133 for (i = 0; i < found_pages; i++) {
2134 page = wdata->pages[i];
2135 /*
2136 * At this point we hold neither the i_pages lock nor the
2137 * page lock: the page may be truncated or invalidated
2138 * (changing page->mapping to NULL), or even swizzled
2139 * back from swapper_space to tmpfs file mapping
2140 */
2141
2142 if (nr_pages == 0)
2143 lock_page(page);
2144 else if (!trylock_page(page))
2145 break;
2146
2147 if (unlikely(page->mapping != mapping)) {
2148 unlock_page(page);
2149 break;
2150 }
2151
2152 if (!wbc->range_cyclic && page->index > end) {
2153 *done = true;
2154 unlock_page(page);
2155 break;
2156 }
2157
2158 if (*next && (page->index != *next)) {
2159 /* Not next consecutive page */
2160 unlock_page(page);
2161 break;
2162 }
2163
2164 if (wbc->sync_mode != WB_SYNC_NONE)
2165 wait_on_page_writeback(page);
2166
2167 if (PageWriteback(page) ||
2168 !clear_page_dirty_for_io(page)) {
2169 unlock_page(page);
2170 break;
2171 }
2172
2173 /*
2174 * This actually clears the dirty bit in the radix tree.
2175 * See cifs_writepage() for more commentary.
2176 */
2177 set_page_writeback(page);
2178 if (page_offset(page) >= i_size_read(mapping->host)) {
2179 *done = true;
2180 unlock_page(page);
2181 end_page_writeback(page);
2182 break;
2183 }
2184
2185 wdata->pages[i] = page;
2186 *next = page->index + 1;
2187 ++nr_pages;
2188 }
2189
2190 /* reset index to refind any pages skipped */
2191 if (nr_pages == 0)
2192 *index = wdata->pages[0]->index + 1;
2193
2194 /* put any pages we aren't going to use */
2195 for (i = nr_pages; i < found_pages; i++) {
2196 put_page(wdata->pages[i]);
2197 wdata->pages[i] = NULL;
2198 }
2199
2200 return nr_pages;
2201 }
2202
2203 static int
2204 wdata_send_pages(struct cifs_writedata *wdata, unsigned int nr_pages,
2205 struct address_space *mapping, struct writeback_control *wbc)
2206 {
2207 int rc;
2208 struct TCP_Server_Info *server =
2209 tlink_tcon(wdata->cfile->tlink)->ses->server;
2210
2211 wdata->sync_mode = wbc->sync_mode;
2212 wdata->nr_pages = nr_pages;
2213 wdata->offset = page_offset(wdata->pages[0]);
2214 wdata->pagesz = PAGE_SIZE;
2215 wdata->tailsz = min(i_size_read(mapping->host) -
2216 page_offset(wdata->pages[nr_pages - 1]),
2217 (loff_t)PAGE_SIZE);
2218 wdata->bytes = ((nr_pages - 1) * PAGE_SIZE) + wdata->tailsz;
2219 wdata->pid = wdata->cfile->pid;
2220
2221 rc = adjust_credits(server, &wdata->credits, wdata->bytes);
2222 if (rc)
2223 return rc;
2224
2225 if (wdata->cfile->invalidHandle)
2226 rc = -EAGAIN;
2227 else
2228 rc = server->ops->async_writev(wdata, cifs_writedata_release);
2229
2230 return rc;
2231 }
2232
2233 static int cifs_writepages(struct address_space *mapping,
2234 struct writeback_control *wbc)
2235 {
2236 struct inode *inode = mapping->host;
2237 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2238 struct TCP_Server_Info *server;
2239 bool done = false, scanned = false, range_whole = false;
2240 pgoff_t end, index;
2241 struct cifs_writedata *wdata;
2242 struct cifsFileInfo *cfile = NULL;
2243 int rc = 0;
2244 int saved_rc = 0;
2245 unsigned int xid;
2246
2247 /*
2248 * If wsize is smaller than the page cache size, default to writing
2249 * one page at a time via cifs_writepage
2250 */
2251 if (cifs_sb->wsize < PAGE_SIZE)
2252 return generic_writepages(mapping, wbc);
2253
2254 xid = get_xid();
2255 if (wbc->range_cyclic) {
2256 index = mapping->writeback_index; /* Start from prev offset */
2257 end = -1;
2258 } else {
2259 index = wbc->range_start >> PAGE_SHIFT;
2260 end = wbc->range_end >> PAGE_SHIFT;
2261 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
2262 range_whole = true;
2263 scanned = true;
2264 }
2265 server = cifs_sb_master_tcon(cifs_sb)->ses->server;
2266 retry:
2267 while (!done && index <= end) {
2268 unsigned int i, nr_pages, found_pages, wsize;
2269 pgoff_t next = 0, tofind, saved_index = index;
2270 struct cifs_credits credits_on_stack;
2271 struct cifs_credits *credits = &credits_on_stack;
2272 int get_file_rc = 0;
2273
2274 if (cfile)
2275 cifsFileInfo_put(cfile);
2276
2277 rc = cifs_get_writable_file(CIFS_I(inode), false, &cfile);
2278
2279 /* in case of an error store it to return later */
2280 if (rc)
2281 get_file_rc = rc;
2282
2283 rc = server->ops->wait_mtu_credits(server, cifs_sb->wsize,
2284 &wsize, credits);
2285 if (rc != 0) {
2286 done = true;
2287 break;
2288 }
2289
2290 tofind = min((wsize / PAGE_SIZE) - 1, end - index) + 1;
2291
2292 wdata = wdata_alloc_and_fillpages(tofind, mapping, end, &index,
2293 &found_pages);
2294 if (!wdata) {
2295 rc = -ENOMEM;
2296 done = true;
2297 add_credits_and_wake_if(server, credits, 0);
2298 break;
2299 }
2300
2301 if (found_pages == 0) {
2302 kref_put(&wdata->refcount, cifs_writedata_release);
2303 add_credits_and_wake_if(server, credits, 0);
2304 break;
2305 }
2306
2307 nr_pages = wdata_prepare_pages(wdata, found_pages, mapping, wbc,
2308 end, &index, &next, &done);
2309
2310 /* nothing to write? */
2311 if (nr_pages == 0) {
2312 kref_put(&wdata->refcount, cifs_writedata_release);
2313 add_credits_and_wake_if(server, credits, 0);
2314 continue;
2315 }
2316
2317 wdata->credits = credits_on_stack;
2318 wdata->cfile = cfile;
2319 cfile = NULL;
2320
2321 if (!wdata->cfile) {
2322 cifs_dbg(VFS, "No writable handle in writepages rc=%d\n",
2323 get_file_rc);
2324 if (is_retryable_error(get_file_rc))
2325 rc = get_file_rc;
2326 else
2327 rc = -EBADF;
2328 } else
2329 rc = wdata_send_pages(wdata, nr_pages, mapping, wbc);
2330
2331 for (i = 0; i < nr_pages; ++i)
2332 unlock_page(wdata->pages[i]);
2333
2334 /* send failure -- clean up the mess */
2335 if (rc != 0) {
2336 add_credits_and_wake_if(server, &wdata->credits, 0);
2337 for (i = 0; i < nr_pages; ++i) {
2338 if (is_retryable_error(rc))
2339 redirty_page_for_writepage(wbc,
2340 wdata->pages[i]);
2341 else
2342 SetPageError(wdata->pages[i]);
2343 end_page_writeback(wdata->pages[i]);
2344 put_page(wdata->pages[i]);
2345 }
2346 if (!is_retryable_error(rc))
2347 mapping_set_error(mapping, rc);
2348 }
2349 kref_put(&wdata->refcount, cifs_writedata_release);
2350
2351 if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN) {
2352 index = saved_index;
2353 continue;
2354 }
2355
2356 /* Return immediately if we received a signal during writing */
2357 if (is_interrupt_error(rc)) {
2358 done = true;
2359 break;
2360 }
2361
2362 if (rc != 0 && saved_rc == 0)
2363 saved_rc = rc;
2364
2365 wbc->nr_to_write -= nr_pages;
2366 if (wbc->nr_to_write <= 0)
2367 done = true;
2368
2369 index = next;
2370 }
2371
2372 if (!scanned && !done) {
2373 /*
2374 * We hit the last page and there is more work to be done: wrap
2375 * back to the start of the file
2376 */
2377 scanned = true;
2378 index = 0;
2379 goto retry;
2380 }
2381
2382 if (saved_rc != 0)
2383 rc = saved_rc;
2384
2385 if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
2386 mapping->writeback_index = index;
2387
2388 if (cfile)
2389 cifsFileInfo_put(cfile);
2390 free_xid(xid);
2391 return rc;
2392 }
2393
2394 static int
2395 cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
2396 {
2397 int rc;
2398 unsigned int xid;
2399
2400 xid = get_xid();
2401 /* BB add check for wbc flags */
2402 get_page(page);
2403 if (!PageUptodate(page))
2404 cifs_dbg(FYI, "ppw - page not up to date\n");
2405
2406 /*
2407 * Set the "writeback" flag, and clear "dirty" in the radix tree.
2408 *
2409 * A writepage() implementation always needs to do either this,
2410 * or re-dirty the page with "redirty_page_for_writepage()" in
2411 * the case of a failure.
2412 *
2413 * Just unlocking the page will cause the radix tree tag-bits
2414 * to fail to update with the state of the page correctly.
2415 */
2416 set_page_writeback(page);
2417 retry_write:
2418 rc = cifs_partialpagewrite(page, 0, PAGE_SIZE);
2419 if (is_retryable_error(rc)) {
2420 if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN)
2421 goto retry_write;
2422 redirty_page_for_writepage(wbc, page);
2423 } else if (rc != 0) {
2424 SetPageError(page);
2425 mapping_set_error(page->mapping, rc);
2426 } else {
2427 SetPageUptodate(page);
2428 }
2429 end_page_writeback(page);
2430 put_page(page);
2431 free_xid(xid);
2432 return rc;
2433 }
2434
2435 static int cifs_writepage(struct page *page, struct writeback_control *wbc)
2436 {
2437 int rc = cifs_writepage_locked(page, wbc);
2438 unlock_page(page);
2439 return rc;
2440 }
2441
2442 static int cifs_write_end(struct file *file, struct address_space *mapping,
2443 loff_t pos, unsigned len, unsigned copied,
2444 struct page *page, void *fsdata)
2445 {
2446 int rc;
2447 struct inode *inode = mapping->host;
2448 struct cifsFileInfo *cfile = file->private_data;
2449 struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
2450 __u32 pid;
2451
2452 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2453 pid = cfile->pid;
2454 else
2455 pid = current->tgid;
2456
2457 cifs_dbg(FYI, "write_end for page %p from pos %lld with %d bytes\n",
2458 page, pos, copied);
2459
2460 if (PageChecked(page)) {
2461 if (copied == len)
2462 SetPageUptodate(page);
2463 ClearPageChecked(page);
2464 } else if (!PageUptodate(page) && copied == PAGE_SIZE)
2465 SetPageUptodate(page);
2466
2467 if (!PageUptodate(page)) {
2468 char *page_data;
2469 unsigned offset = pos & (PAGE_SIZE - 1);
2470 unsigned int xid;
2471
2472 xid = get_xid();
2473 /* this is probably better than directly calling
2474 partialpage_write since in this function the file handle is
2475 known which we might as well leverage */
2476 /* BB check if anything else missing out of ppw
2477 such as updating last write time */
2478 page_data = kmap(page);
2479 rc = cifs_write(cfile, pid, page_data + offset, copied, &pos);
2480 /* if (rc < 0) should we set writebehind rc? */
2481 kunmap(page);
2482
2483 free_xid(xid);
2484 } else {
2485 rc = copied;
2486 pos += copied;
2487 set_page_dirty(page);
2488 }
2489
2490 if (rc > 0) {
2491 spin_lock(&inode->i_lock);
2492 if (pos > inode->i_size)
2493 i_size_write(inode, pos);
2494 spin_unlock(&inode->i_lock);
2495 }
2496
2497 unlock_page(page);
2498 put_page(page);
2499
2500 return rc;
2501 }
2502
2503 int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
2504 int datasync)
2505 {
2506 unsigned int xid;
2507 int rc = 0;
2508 struct cifs_tcon *tcon;
2509 struct TCP_Server_Info *server;
2510 struct cifsFileInfo *smbfile = file->private_data;
2511 struct inode *inode = file_inode(file);
2512 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2513
2514 rc = file_write_and_wait_range(file, start, end);
2515 if (rc)
2516 return rc;
2517
2518 xid = get_xid();
2519
2520 cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2521 file, datasync);
2522
2523 if (!CIFS_CACHE_READ(CIFS_I(inode))) {
2524 rc = cifs_zap_mapping(inode);
2525 if (rc) {
2526 cifs_dbg(FYI, "rc: %d during invalidate phase\n", rc);
2527 rc = 0; /* don't care about it in fsync */
2528 }
2529 }
2530
2531 tcon = tlink_tcon(smbfile->tlink);
2532 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2533 server = tcon->ses->server;
2534 if (server->ops->flush)
2535 rc = server->ops->flush(xid, tcon, &smbfile->fid);
2536 else
2537 rc = -ENOSYS;
2538 }
2539
2540 free_xid(xid);
2541 return rc;
2542 }
2543
2544 int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
2545 {
2546 unsigned int xid;
2547 int rc = 0;
2548 struct cifs_tcon *tcon;
2549 struct TCP_Server_Info *server;
2550 struct cifsFileInfo *smbfile = file->private_data;
2551 struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
2552
2553 rc = file_write_and_wait_range(file, start, end);
2554 if (rc)
2555 return rc;
2556
2557 xid = get_xid();
2558
2559 cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2560 file, datasync);
2561
2562 tcon = tlink_tcon(smbfile->tlink);
2563 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2564 server = tcon->ses->server;
2565 if (server->ops->flush)
2566 rc = server->ops->flush(xid, tcon, &smbfile->fid);
2567 else
2568 rc = -ENOSYS;
2569 }
2570
2571 free_xid(xid);
2572 return rc;
2573 }
2574
2575 /*
2576 * As file closes, flush all cached write data for this inode checking
2577 * for write behind errors.
2578 */
2579 int cifs_flush(struct file *file, fl_owner_t id)
2580 {
2581 struct inode *inode = file_inode(file);
2582 int rc = 0;
2583
2584 if (file->f_mode & FMODE_WRITE)
2585 rc = filemap_write_and_wait(inode->i_mapping);
2586
2587 cifs_dbg(FYI, "Flush inode %p file %p rc %d\n", inode, file, rc);
2588
2589 return rc;
2590 }
2591
2592 static int
2593 cifs_write_allocate_pages(struct page **pages, unsigned long num_pages)
2594 {
2595 int rc = 0;
2596 unsigned long i;
2597
2598 for (i = 0; i < num_pages; i++) {
2599 pages[i] = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2600 if (!pages[i]) {
2601 /*
2602 * save number of pages we have already allocated and
2603 * return with ENOMEM error
2604 */
2605 num_pages = i;
2606 rc = -ENOMEM;
2607 break;
2608 }
2609 }
2610
2611 if (rc) {
2612 for (i = 0; i < num_pages; i++)
2613 put_page(pages[i]);
2614 }
2615 return rc;
2616 }
2617
2618 static inline
2619 size_t get_numpages(const size_t wsize, const size_t len, size_t *cur_len)
2620 {
2621 size_t num_pages;
2622 size_t clen;
2623
2624 clen = min_t(const size_t, len, wsize);
2625 num_pages = DIV_ROUND_UP(clen, PAGE_SIZE);
2626
2627 if (cur_len)
2628 *cur_len = clen;
2629
2630 return num_pages;
2631 }
2632
2633 static void
2634 cifs_uncached_writedata_release(struct kref *refcount)
2635 {
2636 int i;
2637 struct cifs_writedata *wdata = container_of(refcount,
2638 struct cifs_writedata, refcount);
2639
2640 kref_put(&wdata->ctx->refcount, cifs_aio_ctx_release);
2641 for (i = 0; i < wdata->nr_pages; i++)
2642 put_page(wdata->pages[i]);
2643 cifs_writedata_release(refcount);
2644 }
2645
2646 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx);
2647
2648 static void
2649 cifs_uncached_writev_complete(struct work_struct *work)
2650 {
2651 struct cifs_writedata *wdata = container_of(work,
2652 struct cifs_writedata, work);
2653 struct inode *inode = d_inode(wdata->cfile->dentry);
2654 struct cifsInodeInfo *cifsi = CIFS_I(inode);
2655
2656 spin_lock(&inode->i_lock);
2657 cifs_update_eof(cifsi, wdata->offset, wdata->bytes);
2658 if (cifsi->server_eof > inode->i_size)
2659 i_size_write(inode, cifsi->server_eof);
2660 spin_unlock(&inode->i_lock);
2661
2662 complete(&wdata->done);
2663 collect_uncached_write_data(wdata->ctx);
2664 /* the below call can possibly free the last ref to aio ctx */
2665 kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2666 }
2667
2668 static int
2669 wdata_fill_from_iovec(struct cifs_writedata *wdata, struct iov_iter *from,
2670 size_t *len, unsigned long *num_pages)
2671 {
2672 size_t save_len, copied, bytes, cur_len = *len;
2673 unsigned long i, nr_pages = *num_pages;
2674
2675 save_len = cur_len;
2676 for (i = 0; i < nr_pages; i++) {
2677 bytes = min_t(const size_t, cur_len, PAGE_SIZE);
2678 copied = copy_page_from_iter(wdata->pages[i], 0, bytes, from);
2679 cur_len -= copied;
2680 /*
2681 * If we didn't copy as much as we expected, then that
2682 * may mean we trod into an unmapped area. Stop copying
2683 * at that point. On the next pass through the big
2684 * loop, we'll likely end up getting a zero-length
2685 * write and bailing out of it.
2686 */
2687 if (copied < bytes)
2688 break;
2689 }
2690 cur_len = save_len - cur_len;
2691 *len = cur_len;
2692
2693 /*
2694 * If we have no data to send, then that probably means that
2695 * the copy above failed altogether. That's most likely because
2696 * the address in the iovec was bogus. Return -EFAULT and let
2697 * the caller free anything we allocated and bail out.
2698 */
2699 if (!cur_len)
2700 return -EFAULT;
2701
2702 /*
2703 * i + 1 now represents the number of pages we actually used in
2704 * the copy phase above.
2705 */
2706 *num_pages = i + 1;
2707 return 0;
2708 }
2709
2710 static int
2711 cifs_resend_wdata(struct cifs_writedata *wdata, struct list_head *wdata_list,
2712 struct cifs_aio_ctx *ctx)
2713 {
2714 unsigned int wsize;
2715 struct cifs_credits credits;
2716 int rc;
2717 struct TCP_Server_Info *server =
2718 tlink_tcon(wdata->cfile->tlink)->ses->server;
2719
2720 do {
2721 if (wdata->cfile->invalidHandle) {
2722 rc = cifs_reopen_file(wdata->cfile, false);
2723 if (rc == -EAGAIN)
2724 continue;
2725 else if (rc)
2726 break;
2727 }
2728
2729
2730 /*
2731 * Wait for credits to resend this wdata.
2732 * Note: we are attempting to resend the whole wdata not in
2733 * segments
2734 */
2735 do {
2736 rc = server->ops->wait_mtu_credits(server, wdata->bytes,
2737 &wsize, &credits);
2738 if (rc)
2739 goto fail;
2740
2741 if (wsize < wdata->bytes) {
2742 add_credits_and_wake_if(server, &credits, 0);
2743 msleep(1000);
2744 }
2745 } while (wsize < wdata->bytes);
2746 wdata->credits = credits;
2747
2748 rc = adjust_credits(server, &wdata->credits, wdata->bytes);
2749
2750 if (!rc) {
2751 if (wdata->cfile->invalidHandle)
2752 rc = -EAGAIN;
2753 else
2754 rc = server->ops->async_writev(wdata,
2755 cifs_uncached_writedata_release);
2756 }
2757
2758 /* If the write was successfully sent, we are done */
2759 if (!rc) {
2760 list_add_tail(&wdata->list, wdata_list);
2761 return 0;
2762 }
2763
2764 /* Roll back credits and retry if needed */
2765 add_credits_and_wake_if(server, &wdata->credits, 0);
2766 } while (rc == -EAGAIN);
2767
2768 fail:
2769 kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2770 return rc;
2771 }
2772
2773 static int
2774 cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from,
2775 struct cifsFileInfo *open_file,
2776 struct cifs_sb_info *cifs_sb, struct list_head *wdata_list,
2777 struct cifs_aio_ctx *ctx)
2778 {
2779 int rc = 0;
2780 size_t cur_len;
2781 unsigned long nr_pages, num_pages, i;
2782 struct cifs_writedata *wdata;
2783 struct iov_iter saved_from = *from;
2784 loff_t saved_offset = offset;
2785 pid_t pid;
2786 struct TCP_Server_Info *server;
2787 struct page **pagevec;
2788 size_t start;
2789 unsigned int xid;
2790
2791 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2792 pid = open_file->pid;
2793 else
2794 pid = current->tgid;
2795
2796 server = tlink_tcon(open_file->tlink)->ses->server;
2797 xid = get_xid();
2798
2799 do {
2800 unsigned int wsize;
2801 struct cifs_credits credits_on_stack;
2802 struct cifs_credits *credits = &credits_on_stack;
2803
2804 if (open_file->invalidHandle) {
2805 rc = cifs_reopen_file(open_file, false);
2806 if (rc == -EAGAIN)
2807 continue;
2808 else if (rc)
2809 break;
2810 }
2811
2812 rc = server->ops->wait_mtu_credits(server, cifs_sb->wsize,
2813 &wsize, credits);
2814 if (rc)
2815 break;
2816
2817 cur_len = min_t(const size_t, len, wsize);
2818
2819 if (ctx->direct_io) {
2820 ssize_t result;
2821
2822 result = iov_iter_get_pages_alloc(
2823 from, &pagevec, cur_len, &start);
2824 if (result < 0) {
2825 cifs_dbg(VFS,
2826 "direct_writev couldn't get user pages "
2827 "(rc=%zd) iter type %d iov_offset %zd "
2828 "count %zd\n",
2829 result, from->type,
2830 from->iov_offset, from->count);
2831 dump_stack();
2832
2833 rc = result;
2834 add_credits_and_wake_if(server, credits, 0);
2835 break;
2836 }
2837 cur_len = (size_t)result;
2838 iov_iter_advance(from, cur_len);
2839
2840 nr_pages =
2841 (cur_len + start + PAGE_SIZE - 1) / PAGE_SIZE;
2842
2843 wdata = cifs_writedata_direct_alloc(pagevec,
2844 cifs_uncached_writev_complete);
2845 if (!wdata) {
2846 rc = -ENOMEM;
2847 add_credits_and_wake_if(server, credits, 0);
2848 break;
2849 }
2850
2851
2852 wdata->page_offset = start;
2853 wdata->tailsz =
2854 nr_pages > 1 ?
2855 cur_len - (PAGE_SIZE - start) -
2856 (nr_pages - 2) * PAGE_SIZE :
2857 cur_len;
2858 } else {
2859 nr_pages = get_numpages(wsize, len, &cur_len);
2860 wdata = cifs_writedata_alloc(nr_pages,
2861 cifs_uncached_writev_complete);
2862 if (!wdata) {
2863 rc = -ENOMEM;
2864 add_credits_and_wake_if(server, credits, 0);
2865 break;
2866 }
2867
2868 rc = cifs_write_allocate_pages(wdata->pages, nr_pages);
2869 if (rc) {
2870 kvfree(wdata->pages);
2871 kfree(wdata);
2872 add_credits_and_wake_if(server, credits, 0);
2873 break;
2874 }
2875
2876 num_pages = nr_pages;
2877 rc = wdata_fill_from_iovec(
2878 wdata, from, &cur_len, &num_pages);
2879 if (rc) {
2880 for (i = 0; i < nr_pages; i++)
2881 put_page(wdata->pages[i]);
2882 kvfree(wdata->pages);
2883 kfree(wdata);
2884 add_credits_and_wake_if(server, credits, 0);
2885 break;
2886 }
2887
2888 /*
2889 * Bring nr_pages down to the number of pages we
2890 * actually used, and free any pages that we didn't use.
2891 */
2892 for ( ; nr_pages > num_pages; nr_pages--)
2893 put_page(wdata->pages[nr_pages - 1]);
2894
2895 wdata->tailsz = cur_len - ((nr_pages - 1) * PAGE_SIZE);
2896 }
2897
2898 wdata->sync_mode = WB_SYNC_ALL;
2899 wdata->nr_pages = nr_pages;
2900 wdata->offset = (__u64)offset;
2901 wdata->cfile = cifsFileInfo_get(open_file);
2902 wdata->pid = pid;
2903 wdata->bytes = cur_len;
2904 wdata->pagesz = PAGE_SIZE;
2905 wdata->credits = credits_on_stack;
2906 wdata->ctx = ctx;
2907 kref_get(&ctx->refcount);
2908
2909 rc = adjust_credits(server, &wdata->credits, wdata->bytes);
2910
2911 if (!rc) {
2912 if (wdata->cfile->invalidHandle)
2913 rc = -EAGAIN;
2914 else
2915 rc = server->ops->async_writev(wdata,
2916 cifs_uncached_writedata_release);
2917 }
2918
2919 if (rc) {
2920 add_credits_and_wake_if(server, &wdata->credits, 0);
2921 kref_put(&wdata->refcount,
2922 cifs_uncached_writedata_release);
2923 if (rc == -EAGAIN) {
2924 *from = saved_from;
2925 iov_iter_advance(from, offset - saved_offset);
2926 continue;
2927 }
2928 break;
2929 }
2930
2931 list_add_tail(&wdata->list, wdata_list);
2932 offset += cur_len;
2933 len -= cur_len;
2934 } while (len > 0);
2935
2936 free_xid(xid);
2937 return rc;
2938 }
2939
2940 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx)
2941 {
2942 struct cifs_writedata *wdata, *tmp;
2943 struct cifs_tcon *tcon;
2944 struct cifs_sb_info *cifs_sb;
2945 struct dentry *dentry = ctx->cfile->dentry;
2946 int rc;
2947
2948 tcon = tlink_tcon(ctx->cfile->tlink);
2949 cifs_sb = CIFS_SB(dentry->d_sb);
2950
2951 mutex_lock(&ctx->aio_mutex);
2952
2953 if (list_empty(&ctx->list)) {
2954 mutex_unlock(&ctx->aio_mutex);
2955 return;
2956 }
2957
2958 rc = ctx->rc;
2959 /*
2960 * Wait for and collect replies for any successful sends in order of
2961 * increasing offset. Once an error is hit, then return without waiting
2962 * for any more replies.
2963 */
2964 restart_loop:
2965 list_for_each_entry_safe(wdata, tmp, &ctx->list, list) {
2966 if (!rc) {
2967 if (!try_wait_for_completion(&wdata->done)) {
2968 mutex_unlock(&ctx->aio_mutex);
2969 return;
2970 }
2971
2972 if (wdata->result)
2973 rc = wdata->result;
2974 else
2975 ctx->total_len += wdata->bytes;
2976
2977 /* resend call if it's a retryable error */
2978 if (rc == -EAGAIN) {
2979 struct list_head tmp_list;
2980 struct iov_iter tmp_from = ctx->iter;
2981
2982 INIT_LIST_HEAD(&tmp_list);
2983 list_del_init(&wdata->list);
2984
2985 if (ctx->direct_io)
2986 rc = cifs_resend_wdata(
2987 wdata, &tmp_list, ctx);
2988 else {
2989 iov_iter_advance(&tmp_from,
2990 wdata->offset - ctx->pos);
2991
2992 rc = cifs_write_from_iter(wdata->offset,
2993 wdata->bytes, &tmp_from,
2994 ctx->cfile, cifs_sb, &tmp_list,
2995 ctx);
2996
2997 kref_put(&wdata->refcount,
2998 cifs_uncached_writedata_release);
2999 }
3000
3001 list_splice(&tmp_list, &ctx->list);
3002 goto restart_loop;
3003 }
3004 }
3005 list_del_init(&wdata->list);
3006 kref_put(&wdata->refcount, cifs_uncached_writedata_release);
3007 }
3008
3009 cifs_stats_bytes_written(tcon, ctx->total_len);
3010 set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(dentry->d_inode)->flags);
3011
3012 ctx->rc = (rc == 0) ? ctx->total_len : rc;
3013
3014 mutex_unlock(&ctx->aio_mutex);
3015
3016 if (ctx->iocb && ctx->iocb->ki_complete)
3017 ctx->iocb->ki_complete(ctx->iocb, ctx->rc, 0);
3018 else
3019 complete(&ctx->done);
3020 }
3021
3022 static ssize_t __cifs_writev(
3023 struct kiocb *iocb, struct iov_iter *from, bool direct)
3024 {
3025 struct file *file = iocb->ki_filp;
3026 ssize_t total_written = 0;
3027 struct cifsFileInfo *cfile;
3028 struct cifs_tcon *tcon;
3029 struct cifs_sb_info *cifs_sb;
3030 struct cifs_aio_ctx *ctx;
3031 struct iov_iter saved_from = *from;
3032 size_t len = iov_iter_count(from);
3033 int rc;
3034
3035 /*
3036 * iov_iter_get_pages_alloc doesn't work with ITER_KVEC.
3037 * In this case, fall back to non-direct write function.
3038 * this could be improved by getting pages directly in ITER_KVEC
3039 */
3040 if (direct && from->type & ITER_KVEC) {
3041 cifs_dbg(FYI, "use non-direct cifs_writev for kvec I/O\n");
3042 direct = false;
3043 }
3044
3045 rc = generic_write_checks(iocb, from);
3046 if (rc <= 0)
3047 return rc;
3048
3049 cifs_sb = CIFS_FILE_SB(file);
3050 cfile = file->private_data;
3051 tcon = tlink_tcon(cfile->tlink);
3052
3053 if (!tcon->ses->server->ops->async_writev)
3054 return -ENOSYS;
3055
3056 ctx = cifs_aio_ctx_alloc();
3057 if (!ctx)
3058 return -ENOMEM;
3059
3060 ctx->cfile = cifsFileInfo_get(cfile);
3061
3062 if (!is_sync_kiocb(iocb))
3063 ctx->iocb = iocb;
3064
3065 ctx->pos = iocb->ki_pos;
3066
3067 if (direct) {
3068 ctx->direct_io = true;
3069 ctx->iter = *from;
3070 ctx->len = len;
3071 } else {
3072 rc = setup_aio_ctx_iter(ctx, from, WRITE);
3073 if (rc) {
3074 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3075 return rc;
3076 }
3077 }
3078
3079 /* grab a lock here due to read response handlers can access ctx */
3080 mutex_lock(&ctx->aio_mutex);
3081
3082 rc = cifs_write_from_iter(iocb->ki_pos, ctx->len, &saved_from,
3083 cfile, cifs_sb, &ctx->list, ctx);
3084
3085 /*
3086 * If at least one write was successfully sent, then discard any rc
3087 * value from the later writes. If the other write succeeds, then
3088 * we'll end up returning whatever was written. If it fails, then
3089 * we'll get a new rc value from that.
3090 */
3091 if (!list_empty(&ctx->list))
3092 rc = 0;
3093
3094 mutex_unlock(&ctx->aio_mutex);
3095
3096 if (rc) {
3097 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3098 return rc;
3099 }
3100
3101 if (!is_sync_kiocb(iocb)) {
3102 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3103 return -EIOCBQUEUED;
3104 }
3105
3106 rc = wait_for_completion_killable(&ctx->done);
3107 if (rc) {
3108 mutex_lock(&ctx->aio_mutex);
3109 ctx->rc = rc = -EINTR;
3110 total_written = ctx->total_len;
3111 mutex_unlock(&ctx->aio_mutex);
3112 } else {
3113 rc = ctx->rc;
3114 total_written = ctx->total_len;
3115 }
3116
3117 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3118
3119 if (unlikely(!total_written))
3120 return rc;
3121
3122 iocb->ki_pos += total_written;
3123 return total_written;
3124 }
3125
3126 ssize_t cifs_direct_writev(struct kiocb *iocb, struct iov_iter *from)
3127 {
3128 return __cifs_writev(iocb, from, true);
3129 }
3130
3131 ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from)
3132 {
3133 return __cifs_writev(iocb, from, false);
3134 }
3135
3136 static ssize_t
3137 cifs_writev(struct kiocb *iocb, struct iov_iter *from)
3138 {
3139 struct file *file = iocb->ki_filp;
3140 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
3141 struct inode *inode = file->f_mapping->host;
3142 struct cifsInodeInfo *cinode = CIFS_I(inode);
3143 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
3144 ssize_t rc;
3145
3146 inode_lock(inode);
3147 /*
3148 * We need to hold the sem to be sure nobody modifies lock list
3149 * with a brlock that prevents writing.
3150 */
3151 down_read(&cinode->lock_sem);
3152
3153 rc = generic_write_checks(iocb, from);
3154 if (rc <= 0)
3155 goto out;
3156
3157 if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(from),
3158 server->vals->exclusive_lock_type, 0,
3159 NULL, CIFS_WRITE_OP))
3160 rc = __generic_file_write_iter(iocb, from);
3161 else
3162 rc = -EACCES;
3163 out:
3164 up_read(&cinode->lock_sem);
3165 inode_unlock(inode);
3166
3167 if (rc > 0)
3168 rc = generic_write_sync(iocb, rc);
3169 return rc;
3170 }
3171
3172 ssize_t
3173 cifs_strict_writev(struct kiocb *iocb, struct iov_iter *from)
3174 {
3175 struct inode *inode = file_inode(iocb->ki_filp);
3176 struct cifsInodeInfo *cinode = CIFS_I(inode);
3177 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
3178 struct cifsFileInfo *cfile = (struct cifsFileInfo *)
3179 iocb->ki_filp->private_data;
3180 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3181 ssize_t written;
3182
3183 written = cifs_get_writer(cinode);
3184 if (written)
3185 return written;
3186
3187 if (CIFS_CACHE_WRITE(cinode)) {
3188 if (cap_unix(tcon->ses) &&
3189 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability))
3190 && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) {
3191 written = generic_file_write_iter(iocb, from);
3192 goto out;
3193 }
3194 written = cifs_writev(iocb, from);
3195 goto out;
3196 }
3197 /*
3198 * For non-oplocked files in strict cache mode we need to write the data
3199 * to the server exactly from the pos to pos+len-1 rather than flush all
3200 * affected pages because it may cause a error with mandatory locks on
3201 * these pages but not on the region from pos to ppos+len-1.
3202 */
3203 written = cifs_user_writev(iocb, from);
3204 if (CIFS_CACHE_READ(cinode)) {
3205 /*
3206 * We have read level caching and we have just sent a write
3207 * request to the server thus making data in the cache stale.
3208 * Zap the cache and set oplock/lease level to NONE to avoid
3209 * reading stale data from the cache. All subsequent read
3210 * operations will read new data from the server.
3211 */
3212 cifs_zap_mapping(inode);
3213 cifs_dbg(FYI, "Set Oplock/Lease to NONE for inode=%p after write\n",
3214 inode);
3215 cinode->oplock = 0;
3216 }
3217 out:
3218 cifs_put_writer(cinode);
3219 return written;
3220 }
3221
3222 static struct cifs_readdata *
3223 cifs_readdata_direct_alloc(struct page **pages, work_func_t complete)
3224 {
3225 struct cifs_readdata *rdata;
3226
3227 rdata = kzalloc(sizeof(*rdata), GFP_KERNEL);
3228 if (rdata != NULL) {
3229 rdata->pages = pages;
3230 kref_init(&rdata->refcount);
3231 INIT_LIST_HEAD(&rdata->list);
3232 init_completion(&rdata->done);
3233 INIT_WORK(&rdata->work, complete);
3234 }
3235
3236 return rdata;
3237 }
3238
3239 static struct cifs_readdata *
3240 cifs_readdata_alloc(unsigned int nr_pages, work_func_t complete)
3241 {
3242 struct page **pages =
3243 kcalloc(nr_pages, sizeof(struct page *), GFP_KERNEL);
3244 struct cifs_readdata *ret = NULL;
3245
3246 if (pages) {
3247 ret = cifs_readdata_direct_alloc(pages, complete);
3248 if (!ret)
3249 kfree(pages);
3250 }
3251
3252 return ret;
3253 }
3254
3255 void
3256 cifs_readdata_release(struct kref *refcount)
3257 {
3258 struct cifs_readdata *rdata = container_of(refcount,
3259 struct cifs_readdata, refcount);
3260 #ifdef CONFIG_CIFS_SMB_DIRECT
3261 if (rdata->mr) {
3262 smbd_deregister_mr(rdata->mr);
3263 rdata->mr = NULL;
3264 }
3265 #endif
3266 if (rdata->cfile)
3267 cifsFileInfo_put(rdata->cfile);
3268
3269 kvfree(rdata->pages);
3270 kfree(rdata);
3271 }
3272
3273 static int
3274 cifs_read_allocate_pages(struct cifs_readdata *rdata, unsigned int nr_pages)
3275 {
3276 int rc = 0;
3277 struct page *page;
3278 unsigned int i;
3279
3280 for (i = 0; i < nr_pages; i++) {
3281 page = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
3282 if (!page) {
3283 rc = -ENOMEM;
3284 break;
3285 }
3286 rdata->pages[i] = page;
3287 }
3288
3289 if (rc) {
3290 unsigned int nr_page_failed = i;
3291
3292 for (i = 0; i < nr_page_failed; i++) {
3293 put_page(rdata->pages[i]);
3294 rdata->pages[i] = NULL;
3295 }
3296 }
3297 return rc;
3298 }
3299
3300 static void
3301 cifs_uncached_readdata_release(struct kref *refcount)
3302 {
3303 struct cifs_readdata *rdata = container_of(refcount,
3304 struct cifs_readdata, refcount);
3305 unsigned int i;
3306
3307 kref_put(&rdata->ctx->refcount, cifs_aio_ctx_release);
3308 for (i = 0; i < rdata->nr_pages; i++) {
3309 put_page(rdata->pages[i]);
3310 }
3311 cifs_readdata_release(refcount);
3312 }
3313
3314 /**
3315 * cifs_readdata_to_iov - copy data from pages in response to an iovec
3316 * @rdata: the readdata response with list of pages holding data
3317 * @iter: destination for our data
3318 *
3319 * This function copies data from a list of pages in a readdata response into
3320 * an array of iovecs. It will first calculate where the data should go
3321 * based on the info in the readdata and then copy the data into that spot.
3322 */
3323 static int
3324 cifs_readdata_to_iov(struct cifs_readdata *rdata, struct iov_iter *iter)
3325 {
3326 size_t remaining = rdata->got_bytes;
3327 unsigned int i;
3328
3329 for (i = 0; i < rdata->nr_pages; i++) {
3330 struct page *page = rdata->pages[i];
3331 size_t copy = min_t(size_t, remaining, PAGE_SIZE);
3332 size_t written;
3333
3334 if (unlikely(iov_iter_is_pipe(iter))) {
3335 void *addr = kmap_atomic(page);
3336
3337 written = copy_to_iter(addr, copy, iter);
3338 kunmap_atomic(addr);
3339 } else
3340 written = copy_page_to_iter(page, 0, copy, iter);
3341 remaining -= written;
3342 if (written < copy && iov_iter_count(iter) > 0)
3343 break;
3344 }
3345 return remaining ? -EFAULT : 0;
3346 }
3347
3348 static void collect_uncached_read_data(struct cifs_aio_ctx *ctx);
3349
3350 static void
3351 cifs_uncached_readv_complete(struct work_struct *work)
3352 {
3353 struct cifs_readdata *rdata = container_of(work,
3354 struct cifs_readdata, work);
3355
3356 complete(&rdata->done);
3357 collect_uncached_read_data(rdata->ctx);
3358 /* the below call can possibly free the last ref to aio ctx */
3359 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3360 }
3361
3362 static int
3363 uncached_fill_pages(struct TCP_Server_Info *server,
3364 struct cifs_readdata *rdata, struct iov_iter *iter,
3365 unsigned int len)
3366 {
3367 int result = 0;
3368 unsigned int i;
3369 unsigned int nr_pages = rdata->nr_pages;
3370 unsigned int page_offset = rdata->page_offset;
3371
3372 rdata->got_bytes = 0;
3373 rdata->tailsz = PAGE_SIZE;
3374 for (i = 0; i < nr_pages; i++) {
3375 struct page *page = rdata->pages[i];
3376 size_t n;
3377 unsigned int segment_size = rdata->pagesz;
3378
3379 if (i == 0)
3380 segment_size -= page_offset;
3381 else
3382 page_offset = 0;
3383
3384
3385 if (len <= 0) {
3386 /* no need to hold page hostage */
3387 rdata->pages[i] = NULL;
3388 rdata->nr_pages--;
3389 put_page(page);
3390 continue;
3391 }
3392
3393 n = len;
3394 if (len >= segment_size)
3395 /* enough data to fill the page */
3396 n = segment_size;
3397 else
3398 rdata->tailsz = len;
3399 len -= n;
3400
3401 if (iter)
3402 result = copy_page_from_iter(
3403 page, page_offset, n, iter);
3404 #ifdef CONFIG_CIFS_SMB_DIRECT
3405 else if (rdata->mr)
3406 result = n;
3407 #endif
3408 else
3409 result = cifs_read_page_from_socket(
3410 server, page, page_offset, n);
3411 if (result < 0)
3412 break;
3413
3414 rdata->got_bytes += result;
3415 }
3416
3417 return rdata->got_bytes > 0 && result != -ECONNABORTED ?
3418 rdata->got_bytes : result;
3419 }
3420
3421 static int
3422 cifs_uncached_read_into_pages(struct TCP_Server_Info *server,
3423 struct cifs_readdata *rdata, unsigned int len)
3424 {
3425 return uncached_fill_pages(server, rdata, NULL, len);
3426 }
3427
3428 static int
3429 cifs_uncached_copy_into_pages(struct TCP_Server_Info *server,
3430 struct cifs_readdata *rdata,
3431 struct iov_iter *iter)
3432 {
3433 return uncached_fill_pages(server, rdata, iter, iter->count);
3434 }
3435
3436 static int cifs_resend_rdata(struct cifs_readdata *rdata,
3437 struct list_head *rdata_list,
3438 struct cifs_aio_ctx *ctx)
3439 {
3440 unsigned int rsize;
3441 struct cifs_credits credits;
3442 int rc;
3443 struct TCP_Server_Info *server =
3444 tlink_tcon(rdata->cfile->tlink)->ses->server;
3445
3446 do {
3447 if (rdata->cfile->invalidHandle) {
3448 rc = cifs_reopen_file(rdata->cfile, true);
3449 if (rc == -EAGAIN)
3450 continue;
3451 else if (rc)
3452 break;
3453 }
3454
3455 /*
3456 * Wait for credits to resend this rdata.
3457 * Note: we are attempting to resend the whole rdata not in
3458 * segments
3459 */
3460 do {
3461 rc = server->ops->wait_mtu_credits(server, rdata->bytes,
3462 &rsize, &credits);
3463
3464 if (rc)
3465 goto fail;
3466
3467 if (rsize < rdata->bytes) {
3468 add_credits_and_wake_if(server, &credits, 0);
3469 msleep(1000);
3470 }
3471 } while (rsize < rdata->bytes);
3472 rdata->credits = credits;
3473
3474 rc = adjust_credits(server, &rdata->credits, rdata->bytes);
3475 if (!rc) {
3476 if (rdata->cfile->invalidHandle)
3477 rc = -EAGAIN;
3478 else
3479 rc = server->ops->async_readv(rdata);
3480 }
3481
3482 /* If the read was successfully sent, we are done */
3483 if (!rc) {
3484 /* Add to aio pending list */
3485 list_add_tail(&rdata->list, rdata_list);
3486 return 0;
3487 }
3488
3489 /* Roll back credits and retry if needed */
3490 add_credits_and_wake_if(server, &rdata->credits, 0);
3491 } while (rc == -EAGAIN);
3492
3493 fail:
3494 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3495 return rc;
3496 }
3497
3498 static int
3499 cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file,
3500 struct cifs_sb_info *cifs_sb, struct list_head *rdata_list,
3501 struct cifs_aio_ctx *ctx)
3502 {
3503 struct cifs_readdata *rdata;
3504 unsigned int npages, rsize;
3505 struct cifs_credits credits_on_stack;
3506 struct cifs_credits *credits = &credits_on_stack;
3507 size_t cur_len;
3508 int rc;
3509 pid_t pid;
3510 struct TCP_Server_Info *server;
3511 struct page **pagevec;
3512 size_t start;
3513 struct iov_iter direct_iov = ctx->iter;
3514
3515 server = tlink_tcon(open_file->tlink)->ses->server;
3516
3517 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3518 pid = open_file->pid;
3519 else
3520 pid = current->tgid;
3521
3522 if (ctx->direct_io)
3523 iov_iter_advance(&direct_iov, offset - ctx->pos);
3524
3525 do {
3526 if (open_file->invalidHandle) {
3527 rc = cifs_reopen_file(open_file, true);
3528 if (rc == -EAGAIN)
3529 continue;
3530 else if (rc)
3531 break;
3532 }
3533
3534 rc = server->ops->wait_mtu_credits(server, cifs_sb->rsize,
3535 &rsize, credits);
3536 if (rc)
3537 break;
3538
3539 cur_len = min_t(const size_t, len, rsize);
3540
3541 if (ctx->direct_io) {
3542 ssize_t result;
3543
3544 result = iov_iter_get_pages_alloc(
3545 &direct_iov, &pagevec,
3546 cur_len, &start);
3547 if (result < 0) {
3548 cifs_dbg(VFS,
3549 "couldn't get user pages (rc=%zd)"
3550 " iter type %d"
3551 " iov_offset %zd count %zd\n",
3552 result, direct_iov.type,
3553 direct_iov.iov_offset,
3554 direct_iov.count);
3555 dump_stack();
3556
3557 rc = result;
3558 add_credits_and_wake_if(server, credits, 0);
3559 break;
3560 }
3561 cur_len = (size_t)result;
3562 iov_iter_advance(&direct_iov, cur_len);
3563
3564 rdata = cifs_readdata_direct_alloc(
3565 pagevec, cifs_uncached_readv_complete);
3566 if (!rdata) {
3567 add_credits_and_wake_if(server, credits, 0);
3568 rc = -ENOMEM;
3569 break;
3570 }
3571
3572 npages = (cur_len + start + PAGE_SIZE-1) / PAGE_SIZE;
3573 rdata->page_offset = start;
3574 rdata->tailsz = npages > 1 ?
3575 cur_len-(PAGE_SIZE-start)-(npages-2)*PAGE_SIZE :
3576 cur_len;
3577
3578 } else {
3579
3580 npages = DIV_ROUND_UP(cur_len, PAGE_SIZE);
3581 /* allocate a readdata struct */
3582 rdata = cifs_readdata_alloc(npages,
3583 cifs_uncached_readv_complete);
3584 if (!rdata) {
3585 add_credits_and_wake_if(server, credits, 0);
3586 rc = -ENOMEM;
3587 break;
3588 }
3589
3590 rc = cifs_read_allocate_pages(rdata, npages);
3591 if (rc) {
3592 kvfree(rdata->pages);
3593 kfree(rdata);
3594 add_credits_and_wake_if(server, credits, 0);
3595 break;
3596 }
3597
3598 rdata->tailsz = PAGE_SIZE;
3599 }
3600
3601 rdata->cfile = cifsFileInfo_get(open_file);
3602 rdata->nr_pages = npages;
3603 rdata->offset = offset;
3604 rdata->bytes = cur_len;
3605 rdata->pid = pid;
3606 rdata->pagesz = PAGE_SIZE;
3607 rdata->read_into_pages = cifs_uncached_read_into_pages;
3608 rdata->copy_into_pages = cifs_uncached_copy_into_pages;
3609 rdata->credits = credits_on_stack;
3610 rdata->ctx = ctx;
3611 kref_get(&ctx->refcount);
3612
3613 rc = adjust_credits(server, &rdata->credits, rdata->bytes);
3614
3615 if (!rc) {
3616 if (rdata->cfile->invalidHandle)
3617 rc = -EAGAIN;
3618 else
3619 rc = server->ops->async_readv(rdata);
3620 }
3621
3622 if (rc) {
3623 add_credits_and_wake_if(server, &rdata->credits, 0);
3624 kref_put(&rdata->refcount,
3625 cifs_uncached_readdata_release);
3626 if (rc == -EAGAIN) {
3627 iov_iter_revert(&direct_iov, cur_len);
3628 continue;
3629 }
3630 break;
3631 }
3632
3633 list_add_tail(&rdata->list, rdata_list);
3634 offset += cur_len;
3635 len -= cur_len;
3636 } while (len > 0);
3637
3638 return rc;
3639 }
3640
3641 static void
3642 collect_uncached_read_data(struct cifs_aio_ctx *ctx)
3643 {
3644 struct cifs_readdata *rdata, *tmp;
3645 struct iov_iter *to = &ctx->iter;
3646 struct cifs_sb_info *cifs_sb;
3647 int rc;
3648
3649 cifs_sb = CIFS_SB(ctx->cfile->dentry->d_sb);
3650
3651 mutex_lock(&ctx->aio_mutex);
3652
3653 if (list_empty(&ctx->list)) {
3654 mutex_unlock(&ctx->aio_mutex);
3655 return;
3656 }
3657
3658 rc = ctx->rc;
3659 /* the loop below should proceed in the order of increasing offsets */
3660 again:
3661 list_for_each_entry_safe(rdata, tmp, &ctx->list, list) {
3662 if (!rc) {
3663 if (!try_wait_for_completion(&rdata->done)) {
3664 mutex_unlock(&ctx->aio_mutex);
3665 return;
3666 }
3667
3668 if (rdata->result == -EAGAIN) {
3669 /* resend call if it's a retryable error */
3670 struct list_head tmp_list;
3671 unsigned int got_bytes = rdata->got_bytes;
3672
3673 list_del_init(&rdata->list);
3674 INIT_LIST_HEAD(&tmp_list);
3675
3676 /*
3677 * Got a part of data and then reconnect has
3678 * happened -- fill the buffer and continue
3679 * reading.
3680 */
3681 if (got_bytes && got_bytes < rdata->bytes) {
3682 rc = 0;
3683 if (!ctx->direct_io)
3684 rc = cifs_readdata_to_iov(rdata, to);
3685 if (rc) {
3686 kref_put(&rdata->refcount,
3687 cifs_uncached_readdata_release);
3688 continue;
3689 }
3690 }
3691
3692 if (ctx->direct_io) {
3693 /*
3694 * Re-use rdata as this is a
3695 * direct I/O
3696 */
3697 rc = cifs_resend_rdata(
3698 rdata,
3699 &tmp_list, ctx);
3700 } else {
3701 rc = cifs_send_async_read(
3702 rdata->offset + got_bytes,
3703 rdata->bytes - got_bytes,
3704 rdata->cfile, cifs_sb,
3705 &tmp_list, ctx);
3706
3707 kref_put(&rdata->refcount,
3708 cifs_uncached_readdata_release);
3709 }
3710
3711 list_splice(&tmp_list, &ctx->list);
3712
3713 goto again;
3714 } else if (rdata->result)
3715 rc = rdata->result;
3716 else if (!ctx->direct_io)
3717 rc = cifs_readdata_to_iov(rdata, to);
3718
3719 /* if there was a short read -- discard anything left */
3720 if (rdata->got_bytes && rdata->got_bytes < rdata->bytes)
3721 rc = -ENODATA;
3722
3723 ctx->total_len += rdata->got_bytes;
3724 }
3725 list_del_init(&rdata->list);
3726 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3727 }
3728
3729 if (!ctx->direct_io)
3730 ctx->total_len = ctx->len - iov_iter_count(to);
3731
3732 /* mask nodata case */
3733 if (rc == -ENODATA)
3734 rc = 0;
3735
3736 ctx->rc = (rc == 0) ? ctx->total_len : rc;
3737
3738 mutex_unlock(&ctx->aio_mutex);
3739
3740 if (ctx->iocb && ctx->iocb->ki_complete)
3741 ctx->iocb->ki_complete(ctx->iocb, ctx->rc, 0);
3742 else
3743 complete(&ctx->done);
3744 }
3745
3746 static ssize_t __cifs_readv(
3747 struct kiocb *iocb, struct iov_iter *to, bool direct)
3748 {
3749 size_t len;
3750 struct file *file = iocb->ki_filp;
3751 struct cifs_sb_info *cifs_sb;
3752 struct cifsFileInfo *cfile;
3753 struct cifs_tcon *tcon;
3754 ssize_t rc, total_read = 0;
3755 loff_t offset = iocb->ki_pos;
3756 struct cifs_aio_ctx *ctx;
3757
3758 /*
3759 * iov_iter_get_pages_alloc() doesn't work with ITER_KVEC,
3760 * fall back to data copy read path
3761 * this could be improved by getting pages directly in ITER_KVEC
3762 */
3763 if (direct && to->type & ITER_KVEC) {
3764 cifs_dbg(FYI, "use non-direct cifs_user_readv for kvec I/O\n");
3765 direct = false;
3766 }
3767
3768 len = iov_iter_count(to);
3769 if (!len)
3770 return 0;
3771
3772 cifs_sb = CIFS_FILE_SB(file);
3773 cfile = file->private_data;
3774 tcon = tlink_tcon(cfile->tlink);
3775
3776 if (!tcon->ses->server->ops->async_readv)
3777 return -ENOSYS;
3778
3779 if ((file->f_flags & O_ACCMODE) == O_WRONLY)
3780 cifs_dbg(FYI, "attempting read on write only file instance\n");
3781
3782 ctx = cifs_aio_ctx_alloc();
3783 if (!ctx)
3784 return -ENOMEM;
3785
3786 ctx->cfile = cifsFileInfo_get(cfile);
3787
3788 if (!is_sync_kiocb(iocb))
3789 ctx->iocb = iocb;
3790
3791 if (iter_is_iovec(to))
3792 ctx->should_dirty = true;
3793
3794 if (direct) {
3795 ctx->pos = offset;
3796 ctx->direct_io = true;
3797 ctx->iter = *to;
3798 ctx->len = len;
3799 } else {
3800 rc = setup_aio_ctx_iter(ctx, to, READ);
3801 if (rc) {
3802 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3803 return rc;
3804 }
3805 len = ctx->len;
3806 }
3807
3808 /* grab a lock here due to read response handlers can access ctx */
3809 mutex_lock(&ctx->aio_mutex);
3810
3811 rc = cifs_send_async_read(offset, len, cfile, cifs_sb, &ctx->list, ctx);
3812
3813 /* if at least one read request send succeeded, then reset rc */
3814 if (!list_empty(&ctx->list))
3815 rc = 0;
3816
3817 mutex_unlock(&ctx->aio_mutex);
3818
3819 if (rc) {
3820 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3821 return rc;
3822 }
3823
3824 if (!is_sync_kiocb(iocb)) {
3825 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3826 return -EIOCBQUEUED;
3827 }
3828
3829 rc = wait_for_completion_killable(&ctx->done);
3830 if (rc) {
3831 mutex_lock(&ctx->aio_mutex);
3832 ctx->rc = rc = -EINTR;
3833 total_read = ctx->total_len;
3834 mutex_unlock(&ctx->aio_mutex);
3835 } else {
3836 rc = ctx->rc;
3837 total_read = ctx->total_len;
3838 }
3839
3840 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3841
3842 if (total_read) {
3843 iocb->ki_pos += total_read;
3844 return total_read;
3845 }
3846 return rc;
3847 }
3848
3849 ssize_t cifs_direct_readv(struct kiocb *iocb, struct iov_iter *to)
3850 {
3851 return __cifs_readv(iocb, to, true);
3852 }
3853
3854 ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to)
3855 {
3856 return __cifs_readv(iocb, to, false);
3857 }
3858
3859 ssize_t
3860 cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to)
3861 {
3862 struct inode *inode = file_inode(iocb->ki_filp);
3863 struct cifsInodeInfo *cinode = CIFS_I(inode);
3864 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
3865 struct cifsFileInfo *cfile = (struct cifsFileInfo *)
3866 iocb->ki_filp->private_data;
3867 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3868 int rc = -EACCES;
3869
3870 /*
3871 * In strict cache mode we need to read from the server all the time
3872 * if we don't have level II oplock because the server can delay mtime
3873 * change - so we can't make a decision about inode invalidating.
3874 * And we can also fail with pagereading if there are mandatory locks
3875 * on pages affected by this read but not on the region from pos to
3876 * pos+len-1.
3877 */
3878 if (!CIFS_CACHE_READ(cinode))
3879 return cifs_user_readv(iocb, to);
3880
3881 if (cap_unix(tcon->ses) &&
3882 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
3883 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
3884 return generic_file_read_iter(iocb, to);
3885
3886 /*
3887 * We need to hold the sem to be sure nobody modifies lock list
3888 * with a brlock that prevents reading.
3889 */
3890 down_read(&cinode->lock_sem);
3891 if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(to),
3892 tcon->ses->server->vals->shared_lock_type,
3893 0, NULL, CIFS_READ_OP))
3894 rc = generic_file_read_iter(iocb, to);
3895 up_read(&cinode->lock_sem);
3896 return rc;
3897 }
3898
3899 static ssize_t
3900 cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset)
3901 {
3902 int rc = -EACCES;
3903 unsigned int bytes_read = 0;
3904 unsigned int total_read;
3905 unsigned int current_read_size;
3906 unsigned int rsize;
3907 struct cifs_sb_info *cifs_sb;
3908 struct cifs_tcon *tcon;
3909 struct TCP_Server_Info *server;
3910 unsigned int xid;
3911 char *cur_offset;
3912 struct cifsFileInfo *open_file;
3913 struct cifs_io_parms io_parms;
3914 int buf_type = CIFS_NO_BUFFER;
3915 __u32 pid;
3916
3917 xid = get_xid();
3918 cifs_sb = CIFS_FILE_SB(file);
3919
3920 /* FIXME: set up handlers for larger reads and/or convert to async */
3921 rsize = min_t(unsigned int, cifs_sb->rsize, CIFSMaxBufSize);
3922
3923 if (file->private_data == NULL) {
3924 rc = -EBADF;
3925 free_xid(xid);
3926 return rc;
3927 }
3928 open_file = file->private_data;
3929 tcon = tlink_tcon(open_file->tlink);
3930 server = tcon->ses->server;
3931
3932 if (!server->ops->sync_read) {
3933 free_xid(xid);
3934 return -ENOSYS;
3935 }
3936
3937 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3938 pid = open_file->pid;
3939 else
3940 pid = current->tgid;
3941
3942 if ((file->f_flags & O_ACCMODE) == O_WRONLY)
3943 cifs_dbg(FYI, "attempting read on write only file instance\n");
3944
3945 for (total_read = 0, cur_offset = read_data; read_size > total_read;
3946 total_read += bytes_read, cur_offset += bytes_read) {
3947 do {
3948 current_read_size = min_t(uint, read_size - total_read,
3949 rsize);
3950 /*
3951 * For windows me and 9x we do not want to request more
3952 * than it negotiated since it will refuse the read
3953 * then.
3954 */
3955 if ((tcon->ses) && !(tcon->ses->capabilities &
3956 tcon->ses->server->vals->cap_large_files)) {
3957 current_read_size = min_t(uint,
3958 current_read_size, CIFSMaxBufSize);
3959 }
3960 if (open_file->invalidHandle) {
3961 rc = cifs_reopen_file(open_file, true);
3962 if (rc != 0)
3963 break;
3964 }
3965 io_parms.pid = pid;
3966 io_parms.tcon = tcon;
3967 io_parms.offset = *offset;
3968 io_parms.length = current_read_size;
3969 rc = server->ops->sync_read(xid, &open_file->fid, &io_parms,
3970 &bytes_read, &cur_offset,
3971 &buf_type);
3972 } while (rc == -EAGAIN);
3973
3974 if (rc || (bytes_read == 0)) {
3975 if (total_read) {
3976 break;
3977 } else {
3978 free_xid(xid);
3979 return rc;
3980 }
3981 } else {
3982 cifs_stats_bytes_read(tcon, total_read);
3983 *offset += bytes_read;
3984 }
3985 }
3986 free_xid(xid);
3987 return total_read;
3988 }
3989
3990 /*
3991 * If the page is mmap'ed into a process' page tables, then we need to make
3992 * sure that it doesn't change while being written back.
3993 */
3994 static vm_fault_t
3995 cifs_page_mkwrite(struct vm_fault *vmf)
3996 {
3997 struct page *page = vmf->page;
3998
3999 lock_page(page);
4000 return VM_FAULT_LOCKED;
4001 }
4002
4003 static const struct vm_operations_struct cifs_file_vm_ops = {
4004 .fault = filemap_fault,
4005 .map_pages = filemap_map_pages,
4006 .page_mkwrite = cifs_page_mkwrite,
4007 };
4008
4009 int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
4010 {
4011 int xid, rc = 0;
4012 struct inode *inode = file_inode(file);
4013
4014 xid = get_xid();
4015
4016 if (!CIFS_CACHE_READ(CIFS_I(inode)))
4017 rc = cifs_zap_mapping(inode);
4018 if (!rc)
4019 rc = generic_file_mmap(file, vma);
4020 if (!rc)
4021 vma->vm_ops = &cifs_file_vm_ops;
4022
4023 free_xid(xid);
4024 return rc;
4025 }
4026
4027 int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
4028 {
4029 int rc, xid;
4030
4031 xid = get_xid();
4032
4033 rc = cifs_revalidate_file(file);
4034 if (rc)
4035 cifs_dbg(FYI, "Validation prior to mmap failed, error=%d\n",
4036 rc);
4037 if (!rc)
4038 rc = generic_file_mmap(file, vma);
4039 if (!rc)
4040 vma->vm_ops = &cifs_file_vm_ops;
4041
4042 free_xid(xid);
4043 return rc;
4044 }
4045
4046 static void
4047 cifs_readv_complete(struct work_struct *work)
4048 {
4049 unsigned int i, got_bytes;
4050 struct cifs_readdata *rdata = container_of(work,
4051 struct cifs_readdata, work);
4052
4053 got_bytes = rdata->got_bytes;
4054 for (i = 0; i < rdata->nr_pages; i++) {
4055 struct page *page = rdata->pages[i];
4056
4057 lru_cache_add_file(page);
4058
4059 if (rdata->result == 0 ||
4060 (rdata->result == -EAGAIN && got_bytes)) {
4061 flush_dcache_page(page);
4062 SetPageUptodate(page);
4063 }
4064
4065 unlock_page(page);
4066
4067 if (rdata->result == 0 ||
4068 (rdata->result == -EAGAIN && got_bytes))
4069 cifs_readpage_to_fscache(rdata->mapping->host, page);
4070
4071 got_bytes -= min_t(unsigned int, PAGE_SIZE, got_bytes);
4072
4073 put_page(page);
4074 rdata->pages[i] = NULL;
4075 }
4076 kref_put(&rdata->refcount, cifs_readdata_release);
4077 }
4078
4079 static int
4080 readpages_fill_pages(struct TCP_Server_Info *server,
4081 struct cifs_readdata *rdata, struct iov_iter *iter,
4082 unsigned int len)
4083 {
4084 int result = 0;
4085 unsigned int i;
4086 u64 eof;
4087 pgoff_t eof_index;
4088 unsigned int nr_pages = rdata->nr_pages;
4089 unsigned int page_offset = rdata->page_offset;
4090
4091 /* determine the eof that the server (probably) has */
4092 eof = CIFS_I(rdata->mapping->host)->server_eof;
4093 eof_index = eof ? (eof - 1) >> PAGE_SHIFT : 0;
4094 cifs_dbg(FYI, "eof=%llu eof_index=%lu\n", eof, eof_index);
4095
4096 rdata->got_bytes = 0;
4097 rdata->tailsz = PAGE_SIZE;
4098 for (i = 0; i < nr_pages; i++) {
4099 struct page *page = rdata->pages[i];
4100 unsigned int to_read = rdata->pagesz;
4101 size_t n;
4102
4103 if (i == 0)
4104 to_read -= page_offset;
4105 else
4106 page_offset = 0;
4107
4108 n = to_read;
4109
4110 if (len >= to_read) {
4111 len -= to_read;
4112 } else if (len > 0) {
4113 /* enough for partial page, fill and zero the rest */
4114 zero_user(page, len + page_offset, to_read - len);
4115 n = rdata->tailsz = len;
4116 len = 0;
4117 } else if (page->index > eof_index) {
4118 /*
4119 * The VFS will not try to do readahead past the
4120 * i_size, but it's possible that we have outstanding
4121 * writes with gaps in the middle and the i_size hasn't
4122 * caught up yet. Populate those with zeroed out pages
4123 * to prevent the VFS from repeatedly attempting to
4124 * fill them until the writes are flushed.
4125 */
4126 zero_user(page, 0, PAGE_SIZE);
4127 lru_cache_add_file(page);
4128 flush_dcache_page(page);
4129 SetPageUptodate(page);
4130 unlock_page(page);
4131 put_page(page);
4132 rdata->pages[i] = NULL;
4133 rdata->nr_pages--;
4134 continue;
4135 } else {
4136 /* no need to hold page hostage */
4137 lru_cache_add_file(page);
4138 unlock_page(page);
4139 put_page(page);
4140 rdata->pages[i] = NULL;
4141 rdata->nr_pages--;
4142 continue;
4143 }
4144
4145 if (iter)
4146 result = copy_page_from_iter(
4147 page, page_offset, n, iter);
4148 #ifdef CONFIG_CIFS_SMB_DIRECT
4149 else if (rdata->mr)
4150 result = n;
4151 #endif
4152 else
4153 result = cifs_read_page_from_socket(
4154 server, page, page_offset, n);
4155 if (result < 0)
4156 break;
4157
4158 rdata->got_bytes += result;
4159 }
4160
4161 return rdata->got_bytes > 0 && result != -ECONNABORTED ?
4162 rdata->got_bytes : result;
4163 }
4164
4165 static int
4166 cifs_readpages_read_into_pages(struct TCP_Server_Info *server,
4167 struct cifs_readdata *rdata, unsigned int len)
4168 {
4169 return readpages_fill_pages(server, rdata, NULL, len);
4170 }
4171
4172 static int
4173 cifs_readpages_copy_into_pages(struct TCP_Server_Info *server,
4174 struct cifs_readdata *rdata,
4175 struct iov_iter *iter)
4176 {
4177 return readpages_fill_pages(server, rdata, iter, iter->count);
4178 }
4179
4180 static int
4181 readpages_get_pages(struct address_space *mapping, struct list_head *page_list,
4182 unsigned int rsize, struct list_head *tmplist,
4183 unsigned int *nr_pages, loff_t *offset, unsigned int *bytes)
4184 {
4185 struct page *page, *tpage;
4186 unsigned int expected_index;
4187 int rc;
4188 gfp_t gfp = readahead_gfp_mask(mapping);
4189
4190 INIT_LIST_HEAD(tmplist);
4191
4192 page = lru_to_page(page_list);
4193
4194 /*
4195 * Lock the page and put it in the cache. Since no one else
4196 * should have access to this page, we're safe to simply set
4197 * PG_locked without checking it first.
4198 */
4199 __SetPageLocked(page);
4200 rc = add_to_page_cache_locked(page, mapping,
4201 page->index, gfp);
4202
4203 /* give up if we can't stick it in the cache */
4204 if (rc) {
4205 __ClearPageLocked(page);
4206 return rc;
4207 }
4208
4209 /* move first page to the tmplist */
4210 *offset = (loff_t)page->index << PAGE_SHIFT;
4211 *bytes = PAGE_SIZE;
4212 *nr_pages = 1;
4213 list_move_tail(&page->lru, tmplist);
4214
4215 /* now try and add more pages onto the request */
4216 expected_index = page->index + 1;
4217 list_for_each_entry_safe_reverse(page, tpage, page_list, lru) {
4218 /* discontinuity ? */
4219 if (page->index != expected_index)
4220 break;
4221
4222 /* would this page push the read over the rsize? */
4223 if (*bytes + PAGE_SIZE > rsize)
4224 break;
4225
4226 __SetPageLocked(page);
4227 if (add_to_page_cache_locked(page, mapping, page->index, gfp)) {
4228 __ClearPageLocked(page);
4229 break;
4230 }
4231 list_move_tail(&page->lru, tmplist);
4232 (*bytes) += PAGE_SIZE;
4233 expected_index++;
4234 (*nr_pages)++;
4235 }
4236 return rc;
4237 }
4238
4239 static int cifs_readpages(struct file *file, struct address_space *mapping,
4240 struct list_head *page_list, unsigned num_pages)
4241 {
4242 int rc;
4243 struct list_head tmplist;
4244 struct cifsFileInfo *open_file = file->private_data;
4245 struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
4246 struct TCP_Server_Info *server;
4247 pid_t pid;
4248 unsigned int xid;
4249
4250 xid = get_xid();
4251 /*
4252 * Reads as many pages as possible from fscache. Returns -ENOBUFS
4253 * immediately if the cookie is negative
4254 *
4255 * After this point, every page in the list might have PG_fscache set,
4256 * so we will need to clean that up off of every page we don't use.
4257 */
4258 rc = cifs_readpages_from_fscache(mapping->host, mapping, page_list,
4259 &num_pages);
4260 if (rc == 0) {
4261 free_xid(xid);
4262 return rc;
4263 }
4264
4265 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
4266 pid = open_file->pid;
4267 else
4268 pid = current->tgid;
4269
4270 rc = 0;
4271 server = tlink_tcon(open_file->tlink)->ses->server;
4272
4273 cifs_dbg(FYI, "%s: file=%p mapping=%p num_pages=%u\n",
4274 __func__, file, mapping, num_pages);
4275
4276 /*
4277 * Start with the page at end of list and move it to private
4278 * list. Do the same with any following pages until we hit
4279 * the rsize limit, hit an index discontinuity, or run out of
4280 * pages. Issue the async read and then start the loop again
4281 * until the list is empty.
4282 *
4283 * Note that list order is important. The page_list is in
4284 * the order of declining indexes. When we put the pages in
4285 * the rdata->pages, then we want them in increasing order.
4286 */
4287 while (!list_empty(page_list)) {
4288 unsigned int i, nr_pages, bytes, rsize;
4289 loff_t offset;
4290 struct page *page, *tpage;
4291 struct cifs_readdata *rdata;
4292 struct cifs_credits credits_on_stack;
4293 struct cifs_credits *credits = &credits_on_stack;
4294
4295 if (open_file->invalidHandle) {
4296 rc = cifs_reopen_file(open_file, true);
4297 if (rc == -EAGAIN)
4298 continue;
4299 else if (rc)
4300 break;
4301 }
4302
4303 rc = server->ops->wait_mtu_credits(server, cifs_sb->rsize,
4304 &rsize, credits);
4305 if (rc)
4306 break;
4307
4308 /*
4309 * Give up immediately if rsize is too small to read an entire
4310 * page. The VFS will fall back to readpage. We should never
4311 * reach this point however since we set ra_pages to 0 when the
4312 * rsize is smaller than a cache page.
4313 */
4314 if (unlikely(rsize < PAGE_SIZE)) {
4315 add_credits_and_wake_if(server, credits, 0);
4316 free_xid(xid);
4317 return 0;
4318 }
4319
4320 rc = readpages_get_pages(mapping, page_list, rsize, &tmplist,
4321 &nr_pages, &offset, &bytes);
4322 if (rc) {
4323 add_credits_and_wake_if(server, credits, 0);
4324 break;
4325 }
4326
4327 rdata = cifs_readdata_alloc(nr_pages, cifs_readv_complete);
4328 if (!rdata) {
4329 /* best to give up if we're out of mem */
4330 list_for_each_entry_safe(page, tpage, &tmplist, lru) {
4331 list_del(&page->lru);
4332 lru_cache_add_file(page);
4333 unlock_page(page);
4334 put_page(page);
4335 }
4336 rc = -ENOMEM;
4337 add_credits_and_wake_if(server, credits, 0);
4338 break;
4339 }
4340
4341 rdata->cfile = cifsFileInfo_get(open_file);
4342 rdata->mapping = mapping;
4343 rdata->offset = offset;
4344 rdata->bytes = bytes;
4345 rdata->pid = pid;
4346 rdata->pagesz = PAGE_SIZE;
4347 rdata->tailsz = PAGE_SIZE;
4348 rdata->read_into_pages = cifs_readpages_read_into_pages;
4349 rdata->copy_into_pages = cifs_readpages_copy_into_pages;
4350 rdata->credits = credits_on_stack;
4351
4352 list_for_each_entry_safe(page, tpage, &tmplist, lru) {
4353 list_del(&page->lru);
4354 rdata->pages[rdata->nr_pages++] = page;
4355 }
4356
4357 rc = adjust_credits(server, &rdata->credits, rdata->bytes);
4358
4359 if (!rc) {
4360 if (rdata->cfile->invalidHandle)
4361 rc = -EAGAIN;
4362 else
4363 rc = server->ops->async_readv(rdata);
4364 }
4365
4366 if (rc) {
4367 add_credits_and_wake_if(server, &rdata->credits, 0);
4368 for (i = 0; i < rdata->nr_pages; i++) {
4369 page = rdata->pages[i];
4370 lru_cache_add_file(page);
4371 unlock_page(page);
4372 put_page(page);
4373 }
4374 /* Fallback to the readpage in error/reconnect cases */
4375 kref_put(&rdata->refcount, cifs_readdata_release);
4376 break;
4377 }
4378
4379 kref_put(&rdata->refcount, cifs_readdata_release);
4380 }
4381
4382 /* Any pages that have been shown to fscache but didn't get added to
4383 * the pagecache must be uncached before they get returned to the
4384 * allocator.
4385 */
4386 cifs_fscache_readpages_cancel(mapping->host, page_list);
4387 free_xid(xid);
4388 return rc;
4389 }
4390
4391 /*
4392 * cifs_readpage_worker must be called with the page pinned
4393 */
4394 static int cifs_readpage_worker(struct file *file, struct page *page,
4395 loff_t *poffset)
4396 {
4397 char *read_data;
4398 int rc;
4399
4400 /* Is the page cached? */
4401 rc = cifs_readpage_from_fscache(file_inode(file), page);
4402 if (rc == 0)
4403 goto read_complete;
4404
4405 read_data = kmap(page);
4406 /* for reads over a certain size could initiate async read ahead */
4407
4408 rc = cifs_read(file, read_data, PAGE_SIZE, poffset);
4409
4410 if (rc < 0)
4411 goto io_error;
4412 else
4413 cifs_dbg(FYI, "Bytes read %d\n", rc);
4414
4415 /* we do not want atime to be less than mtime, it broke some apps */
4416 file_inode(file)->i_atime = current_time(file_inode(file));
4417 if (timespec64_compare(&(file_inode(file)->i_atime), &(file_inode(file)->i_mtime)))
4418 file_inode(file)->i_atime = file_inode(file)->i_mtime;
4419 else
4420 file_inode(file)->i_atime = current_time(file_inode(file));
4421
4422 if (PAGE_SIZE > rc)
4423 memset(read_data + rc, 0, PAGE_SIZE - rc);
4424
4425 flush_dcache_page(page);
4426 SetPageUptodate(page);
4427
4428 /* send this page to the cache */
4429 cifs_readpage_to_fscache(file_inode(file), page);
4430
4431 rc = 0;
4432
4433 io_error:
4434 kunmap(page);
4435 unlock_page(page);
4436
4437 read_complete:
4438 return rc;
4439 }
4440
4441 static int cifs_readpage(struct file *file, struct page *page)
4442 {
4443 loff_t offset = (loff_t)page->index << PAGE_SHIFT;
4444 int rc = -EACCES;
4445 unsigned int xid;
4446
4447 xid = get_xid();
4448
4449 if (file->private_data == NULL) {
4450 rc = -EBADF;
4451 free_xid(xid);
4452 return rc;
4453 }
4454
4455 cifs_dbg(FYI, "readpage %p at offset %d 0x%x\n",
4456 page, (int)offset, (int)offset);
4457
4458 rc = cifs_readpage_worker(file, page, &offset);
4459
4460 free_xid(xid);
4461 return rc;
4462 }
4463
4464 static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
4465 {
4466 struct cifsFileInfo *open_file;
4467
4468 spin_lock(&cifs_inode->open_file_lock);
4469 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
4470 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
4471 spin_unlock(&cifs_inode->open_file_lock);
4472 return 1;
4473 }
4474 }
4475 spin_unlock(&cifs_inode->open_file_lock);
4476 return 0;
4477 }
4478
4479 /* We do not want to update the file size from server for inodes
4480 open for write - to avoid races with writepage extending
4481 the file - in the future we could consider allowing
4482 refreshing the inode only on increases in the file size
4483 but this is tricky to do without racing with writebehind
4484 page caching in the current Linux kernel design */
4485 bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file)
4486 {
4487 if (!cifsInode)
4488 return true;
4489
4490 if (is_inode_writable(cifsInode)) {
4491 /* This inode is open for write at least once */
4492 struct cifs_sb_info *cifs_sb;
4493
4494 cifs_sb = CIFS_SB(cifsInode->vfs_inode.i_sb);
4495 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
4496 /* since no page cache to corrupt on directio
4497 we can change size safely */
4498 return true;
4499 }
4500
4501 if (i_size_read(&cifsInode->vfs_inode) < end_of_file)
4502 return true;
4503
4504 return false;
4505 } else
4506 return true;
4507 }
4508
4509 static int cifs_write_begin(struct file *file, struct address_space *mapping,
4510 loff_t pos, unsigned len, unsigned flags,
4511 struct page **pagep, void **fsdata)
4512 {
4513 int oncethru = 0;
4514 pgoff_t index = pos >> PAGE_SHIFT;
4515 loff_t offset = pos & (PAGE_SIZE - 1);
4516 loff_t page_start = pos & PAGE_MASK;
4517 loff_t i_size;
4518 struct page *page;
4519 int rc = 0;
4520
4521 cifs_dbg(FYI, "write_begin from %lld len %d\n", (long long)pos, len);
4522
4523 start:
4524 page = grab_cache_page_write_begin(mapping, index, flags);
4525 if (!page) {
4526 rc = -ENOMEM;
4527 goto out;
4528 }
4529
4530 if (PageUptodate(page))
4531 goto out;
4532
4533 /*
4534 * If we write a full page it will be up to date, no need to read from
4535 * the server. If the write is short, we'll end up doing a sync write
4536 * instead.
4537 */
4538 if (len == PAGE_SIZE)
4539 goto out;
4540
4541 /*
4542 * optimize away the read when we have an oplock, and we're not
4543 * expecting to use any of the data we'd be reading in. That
4544 * is, when the page lies beyond the EOF, or straddles the EOF
4545 * and the write will cover all of the existing data.
4546 */
4547 if (CIFS_CACHE_READ(CIFS_I(mapping->host))) {
4548 i_size = i_size_read(mapping->host);
4549 if (page_start >= i_size ||
4550 (offset == 0 && (pos + len) >= i_size)) {
4551 zero_user_segments(page, 0, offset,
4552 offset + len,
4553 PAGE_SIZE);
4554 /*
4555 * PageChecked means that the parts of the page
4556 * to which we're not writing are considered up
4557 * to date. Once the data is copied to the
4558 * page, it can be set uptodate.
4559 */
4560 SetPageChecked(page);
4561 goto out;
4562 }
4563 }
4564
4565 if ((file->f_flags & O_ACCMODE) != O_WRONLY && !oncethru) {
4566 /*
4567 * might as well read a page, it is fast enough. If we get
4568 * an error, we don't need to return it. cifs_write_end will
4569 * do a sync write instead since PG_uptodate isn't set.
4570 */
4571 cifs_readpage_worker(file, page, &page_start);
4572 put_page(page);
4573 oncethru = 1;
4574 goto start;
4575 } else {
4576 /* we could try using another file handle if there is one -
4577 but how would we lock it to prevent close of that handle
4578 racing with this read? In any case
4579 this will be written out by write_end so is fine */
4580 }
4581 out:
4582 *pagep = page;
4583 return rc;
4584 }
4585
4586 static int cifs_release_page(struct page *page, gfp_t gfp)
4587 {
4588 if (PagePrivate(page))
4589 return 0;
4590
4591 return cifs_fscache_release_page(page, gfp);
4592 }
4593
4594 static void cifs_invalidate_page(struct page *page, unsigned int offset,
4595 unsigned int length)
4596 {
4597 struct cifsInodeInfo *cifsi = CIFS_I(page->mapping->host);
4598
4599 if (offset == 0 && length == PAGE_SIZE)
4600 cifs_fscache_invalidate_page(page, &cifsi->vfs_inode);
4601 }
4602
4603 static int cifs_launder_page(struct page *page)
4604 {
4605 int rc = 0;
4606 loff_t range_start = page_offset(page);
4607 loff_t range_end = range_start + (loff_t)(PAGE_SIZE - 1);
4608 struct writeback_control wbc = {
4609 .sync_mode = WB_SYNC_ALL,
4610 .nr_to_write = 0,
4611 .range_start = range_start,
4612 .range_end = range_end,
4613 };
4614
4615 cifs_dbg(FYI, "Launder page: %p\n", page);
4616
4617 if (clear_page_dirty_for_io(page))
4618 rc = cifs_writepage_locked(page, &wbc);
4619
4620 cifs_fscache_invalidate_page(page, page->mapping->host);
4621 return rc;
4622 }
4623
4624 void cifs_oplock_break(struct work_struct *work)
4625 {
4626 struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
4627 oplock_break);
4628 struct inode *inode = d_inode(cfile->dentry);
4629 struct cifsInodeInfo *cinode = CIFS_I(inode);
4630 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
4631 struct TCP_Server_Info *server = tcon->ses->server;
4632 int rc = 0;
4633
4634 wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS,
4635 TASK_UNINTERRUPTIBLE);
4636
4637 server->ops->downgrade_oplock(server, cinode,
4638 test_bit(CIFS_INODE_DOWNGRADE_OPLOCK_TO_L2, &cinode->flags));
4639
4640 if (!CIFS_CACHE_WRITE(cinode) && CIFS_CACHE_READ(cinode) &&
4641 cifs_has_mand_locks(cinode)) {
4642 cifs_dbg(FYI, "Reset oplock to None for inode=%p due to mand locks\n",
4643 inode);
4644 cinode->oplock = 0;
4645 }
4646
4647 if (inode && S_ISREG(inode->i_mode)) {
4648 if (CIFS_CACHE_READ(cinode))
4649 break_lease(inode, O_RDONLY);
4650 else
4651 break_lease(inode, O_WRONLY);
4652 rc = filemap_fdatawrite(inode->i_mapping);
4653 if (!CIFS_CACHE_READ(cinode)) {
4654 rc = filemap_fdatawait(inode->i_mapping);
4655 mapping_set_error(inode->i_mapping, rc);
4656 cifs_zap_mapping(inode);
4657 }
4658 cifs_dbg(FYI, "Oplock flush inode %p rc %d\n", inode, rc);
4659 }
4660
4661 rc = cifs_push_locks(cfile);
4662 if (rc)
4663 cifs_dbg(VFS, "Push locks rc = %d\n", rc);
4664
4665 /*
4666 * releasing stale oplock after recent reconnect of smb session using
4667 * a now incorrect file handle is not a data integrity issue but do
4668 * not bother sending an oplock release if session to server still is
4669 * disconnected since oplock already released by the server
4670 */
4671 if (!cfile->oplock_break_cancelled) {
4672 rc = tcon->ses->server->ops->oplock_response(tcon, &cfile->fid,
4673 cinode);
4674 cifs_dbg(FYI, "Oplock release rc = %d\n", rc);
4675 }
4676 _cifsFileInfo_put(cfile, false /* do not wait for ourself */);
4677 cifs_done_oplock_break(cinode);
4678 }
4679
4680 /*
4681 * The presence of cifs_direct_io() in the address space ops vector
4682 * allowes open() O_DIRECT flags which would have failed otherwise.
4683 *
4684 * In the non-cached mode (mount with cache=none), we shunt off direct read and write requests
4685 * so this method should never be called.
4686 *
4687 * Direct IO is not yet supported in the cached mode.
4688 */
4689 static ssize_t
4690 cifs_direct_io(struct kiocb *iocb, struct iov_iter *iter)
4691 {
4692 /*
4693 * FIXME
4694 * Eventually need to support direct IO for non forcedirectio mounts
4695 */
4696 return -EINVAL;
4697 }
4698
4699
4700 const struct address_space_operations cifs_addr_ops = {
4701 .readpage = cifs_readpage,
4702 .readpages = cifs_readpages,
4703 .writepage = cifs_writepage,
4704 .writepages = cifs_writepages,
4705 .write_begin = cifs_write_begin,
4706 .write_end = cifs_write_end,
4707 .set_page_dirty = __set_page_dirty_nobuffers,
4708 .releasepage = cifs_release_page,
4709 .direct_IO = cifs_direct_io,
4710 .invalidatepage = cifs_invalidate_page,
4711 .launder_page = cifs_launder_page,
4712 };
4713
4714 /*
4715 * cifs_readpages requires the server to support a buffer large enough to
4716 * contain the header plus one complete page of data. Otherwise, we need
4717 * to leave cifs_readpages out of the address space operations.
4718 */
4719 const struct address_space_operations cifs_addr_ops_smallbuf = {
4720 .readpage = cifs_readpage,
4721 .writepage = cifs_writepage,
4722 .writepages = cifs_writepages,
4723 .write_begin = cifs_write_begin,
4724 .write_end = cifs_write_end,
4725 .set_page_dirty = __set_page_dirty_nobuffers,
4726 .releasepage = cifs_release_page,
4727 .invalidatepage = cifs_invalidate_page,
4728 .launder_page = cifs_launder_page,
4729 };