]> git.proxmox.com Git - mirror_spl-debian.git/blame - module/spl/spl-vnode.c
New upstream version 0.7.2
[mirror_spl-debian.git] / module / spl / spl-vnode.c
CommitLineData
716154c5
BB
1/*****************************************************************************\
2 * Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
3 * Copyright (C) 2007 The Regents of the University of California.
4 * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
5 * Written by Brian Behlendorf <behlendorf1@llnl.gov>.
715f6251 6 * UCRL-CODE-235197
7 *
716154c5 8 * This file is part of the SPL, Solaris Porting Layer.
3d6af2dd 9 * For details, see <http://zfsonlinux.org/>.
716154c5
BB
10 *
11 * The SPL is free software; you can redistribute it and/or modify it
12 * under the terms of the GNU General Public License as published by the
13 * Free Software Foundation; either version 2 of the License, or (at your
14 * option) any later version.
715f6251 15 *
716154c5 16 * The SPL is distributed in the hope that it will be useful, but WITHOUT
715f6251 17 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
18 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
19 * for more details.
20 *
21 * You should have received a copy of the GNU General Public License along
716154c5
BB
22 * with the SPL. If not, see <http://www.gnu.org/licenses/>.
23 *****************************************************************************
24 * Solaris Porting Layer (SPL) Vnode Implementation.
25\*****************************************************************************/
715f6251 26
80093b6f 27#include <sys/cred.h>
4b171585 28#include <sys/vnode.h>
10946b02 29#include <sys/kmem_cache.h>
bbdc6ae4 30#include <linux/falloc.h>
10946b02 31#include <linux/file_compat.h>
937879f1 32
51a727e9 33vnode_t *rootdir = (vnode_t *)0xabcd1234;
4b171585 34EXPORT_SYMBOL(rootdir);
35
7afde631 36static spl_kmem_cache_t *vn_cache;
37static spl_kmem_cache_t *vn_file_cache;
e4f1d29f 38
83c623aa 39static DEFINE_SPINLOCK(vn_file_lock);
e4f1d29f 40static LIST_HEAD(vn_file_list);
af828292 41
4295b530
BB
42vtype_t
43vn_mode_to_vtype(mode_t mode)
4b171585 44{
45 if (S_ISREG(mode))
46 return VREG;
47
48 if (S_ISDIR(mode))
49 return VDIR;
50
51 if (S_ISCHR(mode))
52 return VCHR;
53
54 if (S_ISBLK(mode))
55 return VBLK;
56
57 if (S_ISFIFO(mode))
58 return VFIFO;
59
60 if (S_ISLNK(mode))
61 return VLNK;
62
63 if (S_ISSOCK(mode))
64 return VSOCK;
65
4b171585 66 return VNON;
4295b530
BB
67} /* vn_mode_to_vtype() */
68EXPORT_SYMBOL(vn_mode_to_vtype);
69
70mode_t
71vn_vtype_to_mode(vtype_t vtype)
72{
73 if (vtype == VREG)
74 return S_IFREG;
75
76 if (vtype == VDIR)
77 return S_IFDIR;
78
79 if (vtype == VCHR)
80 return S_IFCHR;
81
82 if (vtype == VBLK)
83 return S_IFBLK;
84
85 if (vtype == VFIFO)
86 return S_IFIFO;
87
88 if (vtype == VLNK)
89 return S_IFLNK;
90
91 if (vtype == VSOCK)
92 return S_IFSOCK;
93
94 return VNON;
95} /* vn_vtype_to_mode() */
96EXPORT_SYMBOL(vn_vtype_to_mode);
4b171585 97
af828292 98vnode_t *
99vn_alloc(int flag)
100{
101 vnode_t *vp;
102
103 vp = kmem_cache_alloc(vn_cache, flag);
af828292 104 if (vp != NULL) {
e4f1d29f 105 vp->v_file = NULL;
af828292 106 vp->v_type = 0;
107 }
108
10946b02 109 return (vp);
af828292 110} /* vn_alloc() */
111EXPORT_SYMBOL(vn_alloc);
112
113void
114vn_free(vnode_t *vp)
115{
116 kmem_cache_free(vn_cache, vp);
117} /* vn_free() */
118EXPORT_SYMBOL(vn_free);
119
0b3cf046 120int
af828292 121vn_open(const char *path, uio_seg_t seg, int flags, int mode,
4b171585 122 vnode_t **vpp, int x1, void *x2)
0b3cf046 123{
f7e8739c
RC
124 struct file *fp;
125 struct kstat stat;
126 int rc, saved_umask = 0;
4be55565 127 gfp_t saved_gfp;
0b3cf046 128 vnode_t *vp;
0b3cf046 129
937879f1 130 ASSERT(flags & (FWRITE | FREAD));
131 ASSERT(seg == UIO_SYSSPACE);
132 ASSERT(vpp);
4b171585 133 *vpp = NULL;
134
135 if (!(flags & FCREAT) && (flags & FWRITE))
136 flags |= FEXCL;
137
728b9dd8 138 /* Note for filp_open() the two low bits must be remapped to mean:
139 * 01 - read-only -> 00 read-only
140 * 10 - write-only -> 01 write-only
141 * 11 - read-write -> 10 read-write
142 */
143 flags--;
0b3cf046 144
145 if (flags & FCREAT)
4b171585 146 saved_umask = xchg(&current->fs->umask, 0);
0b3cf046 147
f7e8739c 148 fp = filp_open(path, flags, mode);
0b3cf046 149
150 if (flags & FCREAT)
4b171585 151 (void)xchg(&current->fs->umask, saved_umask);
0b3cf046 152
f7e8739c 153 if (IS_ERR(fp))
10946b02 154 return (-PTR_ERR(fp));
0b3cf046 155
2ea56c1d
AX
156#if defined(HAVE_4ARGS_VFS_GETATTR)
157 rc = vfs_getattr(&fp->f_path, &stat, STATX_TYPE, AT_STATX_SYNC_AS_STAT);
158#elif defined(HAVE_2ARGS_VFS_GETATTR)
2a305c34
RY
159 rc = vfs_getattr(&fp->f_path, &stat);
160#else
bc90df66 161 rc = vfs_getattr(fp->f_path.mnt, fp->f_dentry, &stat);
2a305c34 162#endif
4b171585 163 if (rc) {
164 filp_close(fp, 0);
10946b02 165 return (-rc);
0b3cf046 166 }
167
af828292 168 vp = vn_alloc(KM_SLEEP);
4b171585 169 if (!vp) {
170 filp_close(fp, 0);
10946b02 171 return (ENOMEM);
4b171585 172 }
0b3cf046 173
4be55565
LW
174 saved_gfp = mapping_gfp_mask(fp->f_mapping);
175 mapping_set_gfp_mask(fp->f_mapping, saved_gfp & ~(__GFP_IO|__GFP_FS));
176
e4f1d29f 177 mutex_enter(&vp->v_lock);
4295b530 178 vp->v_type = vn_mode_to_vtype(stat.mode);
e4f1d29f 179 vp->v_file = fp;
4be55565 180 vp->v_gfp_mask = saved_gfp;
4b171585 181 *vpp = vp;
e4f1d29f 182 mutex_exit(&vp->v_lock);
0b3cf046 183
10946b02 184 return (0);
4b171585 185} /* vn_open() */
186EXPORT_SYMBOL(vn_open);
0b3cf046 187
0b3cf046 188int
af828292 189vn_openat(const char *path, uio_seg_t seg, int flags, int mode,
4b171585 190 vnode_t **vpp, int x1, void *x2, vnode_t *vp, int fd)
0b3cf046 191{
4b171585 192 char *realpath;
12018327 193 int len, rc;
0b3cf046 194
937879f1 195 ASSERT(vp == rootdir);
0b3cf046 196
12018327 197 len = strlen(path) + 2;
10946b02 198 realpath = kmalloc(len, kmem_flags_convert(KM_SLEEP));
4b171585 199 if (!realpath)
10946b02 200 return (ENOMEM);
0b3cf046 201
12018327 202 (void)snprintf(realpath, len, "/%s", path);
4b171585 203 rc = vn_open(realpath, seg, flags, mode, vpp, x1, x2);
4b171585 204 kfree(realpath);
205
10946b02 206 return (rc);
4b171585 207} /* vn_openat() */
208EXPORT_SYMBOL(vn_openat);
0b3cf046 209
0b3cf046 210int
4b171585 211vn_rdwr(uio_rw_t uio, vnode_t *vp, void *addr, ssize_t len, offset_t off,
663e02a1 212 uio_seg_t seg, int ioflag, rlim64_t x2, void *x3, ssize_t *residp)
0b3cf046 213{
4b171585 214 loff_t offset;
215 mm_segment_t saved_fs;
216 struct file *fp;
217 int rc;
218
937879f1 219 ASSERT(uio == UIO_WRITE || uio == UIO_READ);
220 ASSERT(vp);
221 ASSERT(vp->v_file);
222 ASSERT(seg == UIO_SYSSPACE);
663e02a1 223 ASSERT((ioflag & ~FAPPEND) == 0);
4b171585 224
e4f1d29f 225 fp = vp->v_file;
4b171585 226
663e02a1
RC
227 offset = off;
228 if (ioflag & FAPPEND)
229 offset = fp->f_pos;
230
4b171585 231 /* Writable user data segment must be briefly increased for this
232 * process so we can use the user space read call paths to write
233 * in to memory allocated by the kernel. */
234 saved_fs = get_fs();
235 set_fs(get_ds());
236
237 if (uio & UIO_WRITE)
238 rc = vfs_write(fp, addr, len, &offset);
239 else
240 rc = vfs_read(fp, addr, len, &offset);
241
242 set_fs(saved_fs);
f3989ed3 243 fp->f_pos = offset;
4b171585 244
245 if (rc < 0)
10946b02 246 return (-rc);
0b3cf046 247
4b171585 248 if (residp) {
249 *residp = len - rc;
0b3cf046 250 } else {
4b171585 251 if (rc != len)
10946b02 252 return (EIO);
0b3cf046 253 }
254
10946b02 255 return (0);
4b171585 256} /* vn_rdwr() */
257EXPORT_SYMBOL(vn_rdwr);
258
259int
2f5d55aa 260vn_close(vnode_t *vp, int flags, int x1, int x2, void *x3, void *x4)
4b171585 261{
262 int rc;
263
937879f1 264 ASSERT(vp);
265 ASSERT(vp->v_file);
4b171585 266
4be55565 267 mapping_set_gfp_mask(vp->v_file->f_mapping, vp->v_gfp_mask);
97735c39
BB
268 rc = filp_close(vp->v_file, 0);
269 vn_free(vp);
4b171585 270
10946b02 271 return (-rc);
4b171585 272} /* vn_close() */
273EXPORT_SYMBOL(vn_close);
274
97735c39
BB
275/* vn_seek() does not actually seek it only performs bounds checking on the
276 * proposed seek. We perform minimal checking and allow vn_rdwr() to catch
277 * anything more serious. */
278int
47995fa6 279vn_seek(vnode_t *vp, offset_t ooff, offset_t *noffp, void *ct)
97735c39
BB
280{
281 return ((*noffp < 0 || *noffp > MAXOFFSET_T) ? EINVAL : 0);
282}
283EXPORT_SYMBOL(vn_seek);
284
10946b02
AX
285/*
286 * spl_basename() takes a NULL-terminated string s as input containing a path.
287 * It returns a char pointer to a string and a length that describe the
288 * basename of the path. If the basename is not "." or "/", it will be an index
289 * into the string. While the string should be NULL terminated, the section
290 * referring to the basename is not. spl_basename is dual-licensed GPLv2+ and
291 * CC0. Anyone wishing to reuse it in another codebase may pick either license.
292 */
293static void
294spl_basename(const char *s, const char **str, int *len)
295{
296 size_t i, end;
297
298 ASSERT(str);
299 ASSERT(len);
300
301 if (!s || !*s) {
302 *str = ".";
303 *len = 1;
304 return;
305 }
306
307 i = strlen(s) - 1;
308
309 while (i && s[i--] == '/');
310
311 if (i == 0) {
312 *str = "/";
313 *len = 1;
314 return;
315 }
316
317 end = i;
318
319 for (end = i; i; i--) {
320 if (s[i] == '/') {
321 *str = &s[i+1];
322 *len = end - i + 1;
323 return;
324 }
325 }
326
327 *str = s;
328 *len = end + 1;
329}
330
331static struct dentry *
332spl_kern_path_locked(const char *name, struct path *path)
333{
334 struct path parent;
335 struct dentry *dentry;
336 const char *basename;
337 int len;
338 int rc;
339
340 ASSERT(name);
341 ASSERT(path);
342
343 spl_basename(name, &basename, &len);
344
345 /* We do not accept "." or ".." */
346 if (len <= 2 && basename[0] == '.')
347 if (len == 1 || basename[1] == '.')
348 return (ERR_PTR(-EACCES));
349
350 rc = kern_path(name, LOOKUP_PARENT, &parent);
351 if (rc)
352 return (ERR_PTR(rc));
353
0f836a62
AX
354 /* use I_MUTEX_PARENT because vfs_unlink needs it */
355 spl_inode_lock_nested(parent.dentry->d_inode, I_MUTEX_PARENT);
10946b02
AX
356
357 dentry = lookup_one_len(basename, parent.dentry, len);
358 if (IS_ERR(dentry)) {
359 spl_inode_unlock(parent.dentry->d_inode);
360 path_put(&parent);
361 } else {
362 *path = parent;
363 }
364
365 return (dentry);
366}
367
bcb15891
YS
368/* Based on do_unlinkat() from linux/fs/namei.c */
369int
370vn_remove(const char *path, uio_seg_t seg, int flags)
371{
372 struct dentry *dentry;
373 struct path parent;
374 struct inode *inode = NULL;
375 int rc = 0;
bcb15891
YS
376
377 ASSERT(seg == UIO_SYSSPACE);
378 ASSERT(flags == RMFILE);
379
380 dentry = spl_kern_path_locked(path, &parent);
381 rc = PTR_ERR(dentry);
382 if (!IS_ERR(dentry)) {
10946b02
AX
383 if (parent.dentry->d_name.name[parent.dentry->d_name.len]) {
384 rc = 0;
385 goto slashes;
386 }
bcb15891
YS
387
388 inode = dentry->d_inode;
10946b02
AX
389 if (inode) {
390 atomic_inc(&inode->i_count);
391 } else {
392 rc = 0;
393 goto slashes;
394 }
bcb15891 395
33a20369 396#ifdef HAVE_2ARGS_VFS_UNLINK
bcb15891 397 rc = vfs_unlink(parent.dentry->d_inode, dentry);
33a20369
LG
398#else
399 rc = vfs_unlink(parent.dentry->d_inode, dentry, NULL);
400#endif /* HAVE_2ARGS_VFS_UNLINK */
bcb15891
YS
401exit1:
402 dput(dentry);
053678f3
BB
403 } else {
404 return (-rc);
bcb15891
YS
405 }
406
407 spl_inode_unlock(parent.dentry->d_inode);
408 if (inode)
409 iput(inode); /* truncate the inode here */
410
411 path_put(&parent);
10946b02 412 return (-rc);
bcb15891
YS
413
414slashes:
415 rc = !dentry->d_inode ? -ENOENT :
416 S_ISDIR(dentry->d_inode->i_mode) ? -EISDIR : -ENOTDIR;
10946b02 417 goto exit1;
bcb15891
YS
418} /* vn_remove() */
419EXPORT_SYMBOL(vn_remove);
420
421/* Based on do_rename() from linux/fs/namei.c */
422int
423vn_rename(const char *oldname, const char *newname, int x1)
424{
425 struct dentry *old_dir, *new_dir;
426 struct dentry *old_dentry, *new_dentry;
427 struct dentry *trap;
428 struct path old_parent, new_parent;
429 int rc = 0;
bcb15891
YS
430
431 old_dentry = spl_kern_path_locked(oldname, &old_parent);
10946b02
AX
432 if (IS_ERR(old_dentry)) {
433 rc = PTR_ERR(old_dentry);
434 goto exit;
435 }
bcb15891
YS
436
437 spl_inode_unlock(old_parent.dentry->d_inode);
438
439 new_dentry = spl_kern_path_locked(newname, &new_parent);
10946b02
AX
440 if (IS_ERR(new_dentry)) {
441 rc = PTR_ERR(new_dentry);
442 goto exit2;
443 }
bcb15891
YS
444
445 spl_inode_unlock(new_parent.dentry->d_inode);
446
447 rc = -EXDEV;
448 if (old_parent.mnt != new_parent.mnt)
10946b02 449 goto exit3;
bcb15891
YS
450
451 old_dir = old_parent.dentry;
452 new_dir = new_parent.dentry;
453 trap = lock_rename(new_dir, old_dir);
454
455 /* source should not be ancestor of target */
456 rc = -EINVAL;
457 if (old_dentry == trap)
10946b02 458 goto exit4;
bcb15891
YS
459
460 /* target should not be an ancestor of source */
461 rc = -ENOTEMPTY;
462 if (new_dentry == trap)
10946b02 463 goto exit4;
bcb15891
YS
464
465 /* source must exist */
466 rc = -ENOENT;
467 if (!old_dentry->d_inode)
10946b02 468 goto exit4;
bcb15891
YS
469
470 /* unless the source is a directory trailing slashes give -ENOTDIR */
471 if (!S_ISDIR(old_dentry->d_inode->i_mode)) {
472 rc = -ENOTDIR;
473 if (old_dentry->d_name.name[old_dentry->d_name.len])
10946b02 474 goto exit4;
bcb15891 475 if (new_dentry->d_name.name[new_dentry->d_name.len])
10946b02 476 goto exit4;
bcb15891
YS
477 }
478
9e4fb5c2 479#if defined(HAVE_4ARGS_VFS_RENAME)
bcb15891 480 rc = vfs_rename(old_dir->d_inode, old_dentry,
33a20369 481 new_dir->d_inode, new_dentry);
9e4fb5c2 482#elif defined(HAVE_5ARGS_VFS_RENAME)
33a20369
LG
483 rc = vfs_rename(old_dir->d_inode, old_dentry,
484 new_dir->d_inode, new_dentry, NULL);
9e4fb5c2
LG
485#else
486 rc = vfs_rename(old_dir->d_inode, old_dentry,
487 new_dir->d_inode, new_dentry, NULL, 0);
488#endif
bcb15891
YS
489exit4:
490 unlock_rename(new_dir, old_dir);
491exit3:
492 dput(new_dentry);
493 path_put(&new_parent);
494exit2:
495 dput(old_dentry);
496 path_put(&old_parent);
497exit:
10946b02 498 return (-rc);
0b3cf046 499}
4b171585 500EXPORT_SYMBOL(vn_rename);
0b3cf046 501
4b171585 502int
36e6f861 503vn_getattr(vnode_t *vp, vattr_t *vap, int flags, void *x3, void *x4)
0b3cf046 504{
4b171585 505 struct file *fp;
dcd9cb5a 506 struct kstat stat;
4b171585 507 int rc;
508
937879f1 509 ASSERT(vp);
510 ASSERT(vp->v_file);
511 ASSERT(vap);
4b171585 512
e4f1d29f 513 fp = vp->v_file;
4b171585 514
2ea56c1d
AX
515#if defined(HAVE_4ARGS_VFS_GETATTR)
516 rc = vfs_getattr(&fp->f_path, &stat, STATX_BASIC_STATS,
517 AT_STATX_SYNC_AS_STAT);
518#elif defined(HAVE_2ARGS_VFS_GETATTR)
2a305c34
RY
519 rc = vfs_getattr(&fp->f_path, &stat);
520#else
521 rc = vfs_getattr(fp->f_path.mnt, fp->f_dentry, &stat);
522#endif
4b171585 523 if (rc)
10946b02 524 return (-rc);
4b171585 525
4295b530 526 vap->va_type = vn_mode_to_vtype(stat.mode);
4b171585 527 vap->va_mode = stat.mode;
80093b6f
AX
528 vap->va_uid = KUID_TO_SUID(stat.uid);
529 vap->va_gid = KGID_TO_SGID(stat.gid);
4b171585 530 vap->va_fsid = 0;
531 vap->va_nodeid = stat.ino;
532 vap->va_nlink = stat.nlink;
533 vap->va_size = stat.size;
47995fa6 534 vap->va_blksize = stat.blksize;
dcd9cb5a
BB
535 vap->va_atime = stat.atime;
536 vap->va_mtime = stat.mtime;
537 vap->va_ctime = stat.ctime;
4b171585 538 vap->va_rdev = stat.rdev;
dcd9cb5a 539 vap->va_nblocks = stat.blocks;
4b171585 540
10946b02 541 return (0);
0b3cf046 542}
4b171585 543EXPORT_SYMBOL(vn_getattr);
544
2f5d55aa 545int vn_fsync(vnode_t *vp, int flags, void *x3, void *x4)
4b171585 546{
36e6f861 547 int datasync = 0;
10946b02
AX
548 int error;
549 int fstrans;
36e6f861 550
937879f1 551 ASSERT(vp);
552 ASSERT(vp->v_file);
4b171585 553
36e6f861 554 if (flags & FDSYNC)
555 datasync = 1;
556
10946b02
AX
557 /*
558 * May enter XFS which generates a warning when PF_FSTRANS is set.
559 * To avoid this the flag is cleared over vfs_sync() and then reset.
560 */
2ea56c1d 561 fstrans = __spl_pf_fstrans_check();
10946b02 562 if (fstrans)
2ea56c1d 563 current->flags &= ~(__SPL_PF_FSTRANS);
10946b02
AX
564
565 error = -spl_filp_fsync(vp->v_file, datasync);
566 if (fstrans)
2ea56c1d 567 current->flags |= __SPL_PF_FSTRANS;
10946b02
AX
568
569 return (error);
4b171585 570} /* vn_fsync() */
571EXPORT_SYMBOL(vn_fsync);
af828292 572
bbdc6ae4
ED
573int vn_space(vnode_t *vp, int cmd, struct flock *bfp, int flag,
574 offset_t offset, void *x6, void *x7)
575{
576 int error = EOPNOTSUPP;
2ea56c1d
AX
577#ifdef FALLOC_FL_PUNCH_HOLE
578 int fstrans;
579#endif
bbdc6ae4
ED
580
581 if (cmd != F_FREESP || bfp->l_whence != 0)
10946b02 582 return (EOPNOTSUPP);
bbdc6ae4
ED
583
584 ASSERT(vp);
585 ASSERT(vp->v_file);
586 ASSERT(bfp->l_start >= 0 && bfp->l_len > 0);
587
588#ifdef FALLOC_FL_PUNCH_HOLE
2ea56c1d
AX
589 /*
590 * May enter XFS which generates a warning when PF_FSTRANS is set.
591 * To avoid this the flag is cleared over vfs_sync() and then reset.
592 */
593 fstrans = __spl_pf_fstrans_check();
594 if (fstrans)
595 current->flags &= ~(__SPL_PF_FSTRANS);
596
1c7b3eaf
BB
597 /*
598 * When supported by the underlying file system preferentially
599 * use the fallocate() callback to preallocate the space.
600 */
601 error = -spl_filp_fallocate(vp->v_file,
602 FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE,
603 bfp->l_start, bfp->l_len);
2ea56c1d
AX
604
605 if (fstrans)
606 current->flags |= __SPL_PF_FSTRANS;
607
1c7b3eaf 608 if (error == 0)
10946b02 609 return (0);
bbdc6ae4
ED
610#endif
611
612#ifdef HAVE_INODE_TRUNCATE_RANGE
613 if (vp->v_file->f_dentry && vp->v_file->f_dentry->d_inode &&
614 vp->v_file->f_dentry->d_inode->i_op &&
615 vp->v_file->f_dentry->d_inode->i_op->truncate_range) {
616 off_t end = bfp->l_start + bfp->l_len;
617 /*
618 * Judging from the code in shmem_truncate_range(),
619 * it seems the kernel expects the end offset to be
620 * inclusive and aligned to the end of a page.
621 */
622 if (end % PAGE_SIZE != 0) {
623 end &= ~(off_t)(PAGE_SIZE - 1);
624 if (end <= bfp->l_start)
10946b02 625 return (0);
bbdc6ae4
ED
626 }
627 --end;
628
629 vp->v_file->f_dentry->d_inode->i_op->truncate_range(
630 vp->v_file->f_dentry->d_inode,
631 bfp->l_start, end
632 );
10946b02 633 return (0);
bbdc6ae4
ED
634 }
635#endif
636
10946b02 637 return (error);
bbdc6ae4
ED
638}
639EXPORT_SYMBOL(vn_space);
640
e4f1d29f 641/* Function must be called while holding the vn_file_lock */
642static file_t *
f6188ddd 643file_find(int fd, struct task_struct *task)
e4f1d29f 644{
645 file_t *fp;
646
937879f1 647 ASSERT(spin_is_locked(&vn_file_lock));
e4f1d29f 648
649 list_for_each_entry(fp, &vn_file_list, f_list) {
f6188ddd 650 if (fd == fp->f_fd && fp->f_task == task) {
937879f1 651 ASSERT(atomic_read(&fp->f_ref) != 0);
e4f1d29f 652 return fp;
653 }
654 }
655
656 return NULL;
657} /* file_find() */
658
659file_t *
660vn_getf(int fd)
661{
662 struct kstat stat;
663 struct file *lfp;
664 file_t *fp;
665 vnode_t *vp;
937879f1 666 int rc = 0;
e4f1d29f 667
f6188ddd
AX
668 if (fd < 0)
669 return (NULL);
670
e4f1d29f 671 /* Already open just take an extra reference */
672 spin_lock(&vn_file_lock);
673
f6188ddd 674 fp = file_find(fd, current);
e4f1d29f 675 if (fp) {
ec06701b
AX
676 lfp = fget(fd);
677 fput(fp->f_file);
678 /*
679 * areleasef() can cause us to see a stale reference when
680 * userspace has reused a file descriptor before areleasef()
681 * has run. fput() the stale reference and replace it. We
682 * retain the original reference count such that the concurrent
683 * areleasef() will decrement its reference and terminate.
684 */
685 if (lfp != fp->f_file) {
686 fp->f_file = lfp;
687 fp->f_vnode->v_file = lfp;
688 }
e4f1d29f 689 atomic_inc(&fp->f_ref);
690 spin_unlock(&vn_file_lock);
10946b02 691 return (fp);
e4f1d29f 692 }
693
694 spin_unlock(&vn_file_lock);
695
696 /* File was not yet opened create the object and setup */
4afaaefa 697 fp = kmem_cache_alloc(vn_file_cache, KM_SLEEP);
e4f1d29f 698 if (fp == NULL)
10946b02 699 goto out;
e4f1d29f 700
701 mutex_enter(&fp->f_lock);
702
703 fp->f_fd = fd;
763b2f3b 704 fp->f_task = current;
e4f1d29f 705 fp->f_offset = 0;
706 atomic_inc(&fp->f_ref);
707
708 lfp = fget(fd);
709 if (lfp == NULL)
10946b02 710 goto out_mutex;
e4f1d29f 711
712 vp = vn_alloc(KM_SLEEP);
713 if (vp == NULL)
10946b02 714 goto out_fget;
e4f1d29f 715
2ea56c1d
AX
716#if defined(HAVE_4ARGS_VFS_GETATTR)
717 rc = vfs_getattr(&lfp->f_path, &stat, STATX_TYPE, AT_STATX_SYNC_AS_STAT);
718#elif defined(HAVE_2ARGS_VFS_GETATTR)
2a305c34
RY
719 rc = vfs_getattr(&lfp->f_path, &stat);
720#else
721 rc = vfs_getattr(lfp->f_path.mnt, lfp->f_dentry, &stat);
722#endif
723 if (rc)
10946b02 724 goto out_vnode;
e4f1d29f 725
726 mutex_enter(&vp->v_lock);
4295b530 727 vp->v_type = vn_mode_to_vtype(stat.mode);
e4f1d29f 728 vp->v_file = lfp;
729 mutex_exit(&vp->v_lock);
730
731 fp->f_vnode = vp;
732 fp->f_file = lfp;
733
734 /* Put it on the tracking list */
735 spin_lock(&vn_file_lock);
736 list_add(&fp->f_list, &vn_file_list);
737 spin_unlock(&vn_file_lock);
738
739 mutex_exit(&fp->f_lock);
10946b02 740 return (fp);
e4f1d29f 741
742out_vnode:
e4f1d29f 743 vn_free(vp);
744out_fget:
e4f1d29f 745 fput(lfp);
746out_mutex:
e4f1d29f 747 mutex_exit(&fp->f_lock);
748 kmem_cache_free(vn_file_cache, fp);
749out:
10946b02 750 return (NULL);
e4f1d29f 751} /* getf() */
752EXPORT_SYMBOL(getf);
753
754static void releasef_locked(file_t *fp)
755{
937879f1 756 ASSERT(fp->f_file);
757 ASSERT(fp->f_vnode);
e4f1d29f 758
759 /* Unlinked from list, no refs, safe to free outside mutex */
760 fput(fp->f_file);
761 vn_free(fp->f_vnode);
762
763 kmem_cache_free(vn_file_cache, fp);
764}
765
766void
767vn_releasef(int fd)
f6188ddd
AX
768{
769 areleasef(fd, P_FINFO(current));
770}
771EXPORT_SYMBOL(releasef);
772
773void
774vn_areleasef(int fd, uf_info_t *fip)
e4f1d29f 775{
776 file_t *fp;
f6188ddd
AX
777 struct task_struct *task = (struct task_struct *)fip;
778
779 if (fd < 0)
780 return;
e4f1d29f 781
782 spin_lock(&vn_file_lock);
f6188ddd 783 fp = file_find(fd, task);
e4f1d29f 784 if (fp) {
785 atomic_dec(&fp->f_ref);
786 if (atomic_read(&fp->f_ref) > 0) {
787 spin_unlock(&vn_file_lock);
788 return;
789 }
790
791 list_del(&fp->f_list);
792 releasef_locked(fp);
793 }
794 spin_unlock(&vn_file_lock);
795
796 return;
797} /* releasef() */
f6188ddd
AX
798EXPORT_SYMBOL(areleasef);
799
e4f1d29f 800
10946b02
AX
801static void
802#ifdef HAVE_SET_FS_PWD_WITH_CONST
803vn_set_fs_pwd(struct fs_struct *fs, const struct path *path)
804#else
805vn_set_fs_pwd(struct fs_struct *fs, struct path *path)
806#endif /* HAVE_SET_FS_PWD_WITH_CONST */
51a727e9 807{
9b2048c2
BB
808 struct path old_pwd;
809
10946b02 810#ifdef HAVE_FS_STRUCT_SPINLOCK
9b2048c2
BB
811 spin_lock(&fs->lock);
812 old_pwd = fs->pwd;
813 fs->pwd = *path;
814 path_get(path);
815 spin_unlock(&fs->lock);
10946b02 816#else
9b2048c2
BB
817 write_lock(&fs->lock);
818 old_pwd = fs->pwd;
819 fs->pwd = *path;
820 path_get(path);
821 write_unlock(&fs->lock);
10946b02 822#endif /* HAVE_FS_STRUCT_SPINLOCK */
9b2048c2
BB
823
824 if (old_pwd.dentry)
825 path_put(&old_pwd);
51a727e9 826}
51a727e9
BB
827
828int
829vn_set_pwd(const char *filename)
830{
51a727e9 831 struct path path;
82a358d9 832 mm_segment_t saved_fs;
51a727e9 833 int rc;
51a727e9 834
82a358d9
BB
835 /*
836 * user_path_dir() and __user_walk() both expect 'filename' to be
837 * a user space address so we must briefly increase the data segment
838 * size to ensure strncpy_from_user() does not fail with -EFAULT.
839 */
840 saved_fs = get_fs();
841 set_fs(get_ds());
842
51a727e9
BB
843 rc = user_path_dir(filename, &path);
844 if (rc)
10946b02 845 goto out;
51a727e9
BB
846
847 rc = inode_permission(path.dentry->d_inode, MAY_EXEC | MAY_ACCESS);
848 if (rc)
10946b02 849 goto dput_and_out;
51a727e9 850
10946b02 851 vn_set_fs_pwd(current->fs, &path);
51a727e9
BB
852
853dput_and_out:
854 path_put(&path);
51a727e9 855out:
82a358d9
BB
856 set_fs(saved_fs);
857
10946b02 858 return (-rc);
51a727e9
BB
859} /* vn_set_pwd() */
860EXPORT_SYMBOL(vn_set_pwd);
861
af828292 862static int
863vn_cache_constructor(void *buf, void *cdrarg, int kmflags)
864{
865 struct vnode *vp = buf;
866
867 mutex_init(&vp->v_lock, NULL, MUTEX_DEFAULT, NULL);
868
869 return (0);
870} /* vn_cache_constructor() */
871
872static void
873vn_cache_destructor(void *buf, void *cdrarg)
874{
875 struct vnode *vp = buf;
876
877 mutex_destroy(&vp->v_lock);
878} /* vn_cache_destructor() */
879
e4f1d29f 880static int
881vn_file_cache_constructor(void *buf, void *cdrarg, int kmflags)
882{
883 file_t *fp = buf;
884
885 atomic_set(&fp->f_ref, 0);
886 mutex_init(&fp->f_lock, NULL, MUTEX_DEFAULT, NULL);
4e62fd41 887 INIT_LIST_HEAD(&fp->f_list);
e4f1d29f 888
889 return (0);
890} /* file_cache_constructor() */
891
892static void
893vn_file_cache_destructor(void *buf, void *cdrarg)
894{
895 file_t *fp = buf;
896
897 mutex_destroy(&fp->f_lock);
898} /* vn_file_cache_destructor() */
899
af828292 900int
12ff95ff 901spl_vn_init(void)
af828292 902{
57d86234 903 vn_cache = kmem_cache_create("spl_vn_cache",
904 sizeof(struct vnode), 64,
5d86345d 905 vn_cache_constructor,
906 vn_cache_destructor,
ac9cc135 907 NULL, NULL, NULL, 0);
e4f1d29f 908
909 vn_file_cache = kmem_cache_create("spl_vn_file_cache",
910 sizeof(file_t), 64,
911 vn_file_cache_constructor,
912 vn_file_cache_destructor,
ac9cc135 913 NULL, NULL, NULL, 0);
10946b02 914 return (0);
af828292 915} /* vn_init() */
916
917void
12ff95ff 918spl_vn_fini(void)
af828292 919{
e4f1d29f 920 file_t *fp, *next_fp;
2fb9b26a 921 int leaked = 0;
e4f1d29f 922
923 spin_lock(&vn_file_lock);
924
925 list_for_each_entry_safe(fp, next_fp, &vn_file_list, f_list) {
926 list_del(&fp->f_list);
927 releasef_locked(fp);
928 leaked++;
929 }
930
e4f1d29f 931 spin_unlock(&vn_file_lock);
932
933 if (leaked > 0)
10946b02 934 printk(KERN_WARNING "WARNING: %d vnode files leaked\n", leaked);
e4f1d29f 935
2371321e 936 kmem_cache_destroy(vn_file_cache);
2fb9b26a 937 kmem_cache_destroy(vn_cache);
e4f1d29f 938
939 return;
af828292 940} /* vn_fini() */