2 * This file is part of the SPL: Solaris Porting Layer.
4 * Copyright (c) 2008 Lawrence Livermore National Security, LLC.
5 * Produced at Lawrence Livermore National Laboratory
7 * Brian Behlendorf <behlendorf1@llnl.gov>,
8 * Herb Wartens <wartens2@llnl.gov>,
9 * Jim Garlick <garlick@llnl.gov>
12 * This is free software; you can redistribute it and/or modify it
13 * under the terms of the GNU General Public License as published by
14 * the Free Software Foundation; either version 2 of the License, or
15 * (at your option) any later version.
17 * This is distributed in the hope that it will be useful, but WITHOUT
18 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
19 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
22 * You should have received a copy of the GNU General Public License along
23 * with this program; if not, write to the Free Software Foundation, Inc.,
24 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
27 #include <sys/sysmacros.h>
28 #include <sys/vnode.h>
31 #ifdef DEBUG_SUBSYSTEM
32 #undef DEBUG_SUBSYSTEM
35 #define DEBUG_SUBSYSTEM S_VNODE
37 vnode_t
*rootdir
= (vnode_t
*)0xabcd1234;
38 EXPORT_SYMBOL(rootdir
);
40 static spl_kmem_cache_t
*vn_cache
;
41 static spl_kmem_cache_t
*vn_file_cache
;
43 static spinlock_t vn_file_lock
= SPIN_LOCK_UNLOCKED
;
44 static LIST_HEAD(vn_file_list
);
47 vn_get_sol_type(umode_t mode
)
74 } /* vn_get_sol_type() */
82 vp
= kmem_cache_alloc(vn_cache
, flag
);
90 EXPORT_SYMBOL(vn_alloc
);
96 kmem_cache_free(vn_cache
, vp
);
99 EXPORT_SYMBOL(vn_free
);
102 vn_open(const char *path
, uio_seg_t seg
, int flags
, int mode
,
103 vnode_t
**vpp
, int x1
, void *x2
)
107 int rc
, saved_umask
= 0;
111 ASSERT(flags
& (FWRITE
| FREAD
));
112 ASSERT(seg
== UIO_SYSSPACE
);
116 if (!(flags
& FCREAT
) && (flags
& FWRITE
))
119 /* Note for filp_open() the two low bits must be remapped to mean:
120 * 01 - read-only -> 00 read-only
121 * 10 - write-only -> 01 write-only
122 * 11 - read-write -> 10 read-write
127 saved_umask
= xchg(¤t
->fs
->umask
, 0);
129 fp
= filp_open(path
, flags
, mode
);
132 (void)xchg(¤t
->fs
->umask
, saved_umask
);
135 RETURN(-PTR_ERR(fp
));
137 rc
= vfs_getattr(fp
->f_vfsmnt
, fp
->f_dentry
, &stat
);
143 vp
= vn_alloc(KM_SLEEP
);
149 mutex_enter(&vp
->v_lock
);
150 vp
->v_type
= vn_get_sol_type(stat
.mode
);
153 mutex_exit(&vp
->v_lock
);
157 EXPORT_SYMBOL(vn_open
);
160 vn_openat(const char *path
, uio_seg_t seg
, int flags
, int mode
,
161 vnode_t
**vpp
, int x1
, void *x2
, vnode_t
*vp
, int fd
)
167 ASSERT(vp
== rootdir
);
169 len
= strlen(path
) + 2;
170 realpath
= kmalloc(len
, GFP_KERNEL
);
174 (void)snprintf(realpath
, len
, "/%s", path
);
175 rc
= vn_open(realpath
, seg
, flags
, mode
, vpp
, x1
, x2
);
180 EXPORT_SYMBOL(vn_openat
);
183 vn_rdwr(uio_rw_t uio
, vnode_t
*vp
, void *addr
, ssize_t len
, offset_t off
,
184 uio_seg_t seg
, int x1
, rlim64_t x2
, void *x3
, ssize_t
*residp
)
187 mm_segment_t saved_fs
;
192 ASSERT(uio
== UIO_WRITE
|| uio
== UIO_READ
);
195 ASSERT(seg
== UIO_SYSSPACE
);
197 ASSERT(x2
== RLIM64_INFINITY
);
202 /* Writable user data segment must be briefly increased for this
203 * process so we can use the user space read call paths to write
204 * in to memory allocated by the kernel. */
209 rc
= vfs_write(fp
, addr
, len
, &offset
);
211 rc
= vfs_read(fp
, addr
, len
, &offset
);
227 EXPORT_SYMBOL(vn_rdwr
);
230 vn_close(vnode_t
*vp
, int flags
, int x1
, int x2
, void *x3
, void *x4
)
238 rc
= filp_close(vp
->v_file
, 0);
243 EXPORT_SYMBOL(vn_close
);
245 /* vn_seek() does not actually seek it only performs bounds checking on the
246 * proposed seek. We perform minimal checking and allow vn_rdwr() to catch
247 * anything more serious. */
249 vn_seek(vnode_t
*vp
, offset_t ooff
, offset_t
*noffp
, caller_context_t
*ct
)
251 return ((*noffp
< 0 || *noffp
> MAXOFFSET_T
) ? EINVAL
: 0);
253 EXPORT_SYMBOL(vn_seek
);
255 static struct dentry
*
256 vn_lookup_hash(struct nameidata
*nd
)
258 return lookup_one_len(nd
->last
.name
, nd
->nd_dentry
, nd
->last
.len
);
259 } /* lookup_hash() */
262 vn_path_release(struct nameidata
*nd
)
268 /* Modified do_unlinkat() from linux/fs/namei.c, only uses exported symbols */
270 vn_remove(const char *path
, uio_seg_t seg
, int flags
)
272 struct dentry
*dentry
;
274 struct inode
*inode
= NULL
;
278 ASSERT(seg
== UIO_SYSSPACE
);
279 ASSERT(flags
== RMFILE
);
281 rc
= path_lookup(path
, LOOKUP_PARENT
, &nd
);
286 if (nd
.last_type
!= LAST_NORM
)
289 #ifdef HAVE_INODE_I_MUTEX
290 mutex_lock_nested(&nd
.nd_dentry
->d_inode
->i_mutex
, I_MUTEX_PARENT
);
292 down(&nd
.nd_dentry
->d_inode
->i_sem
);
293 #endif /* HAVE_INODE_I_MUTEX */
294 dentry
= vn_lookup_hash(&nd
);
295 rc
= PTR_ERR(dentry
);
296 if (!IS_ERR(dentry
)) {
297 /* Why not before? Because we want correct rc value */
298 if (nd
.last
.name
[nd
.last
.len
])
301 inode
= dentry
->d_inode
;
303 atomic_inc(&inode
->i_count
);
304 #ifdef HAVE_2ARGS_VFS_UNLINK
305 rc
= vfs_unlink(nd
.nd_dentry
->d_inode
, dentry
);
307 rc
= vfs_unlink(nd
.nd_dentry
->d_inode
, dentry
, nd
.nd_mnt
);
308 #endif /* HAVE_2ARGS_VFS_UNLINK */
312 #ifdef HAVE_INODE_I_MUTEX
313 mutex_unlock(&nd
.nd_dentry
->d_inode
->i_mutex
);
315 up(&nd
.nd_dentry
->d_inode
->i_sem
);
316 #endif /* HAVE_INODE_I_MUTEX */
318 iput(inode
); /* truncate the inode here */
320 vn_path_release(&nd
);
325 rc
= !dentry
->d_inode
? -ENOENT
:
326 S_ISDIR(dentry
->d_inode
->i_mode
) ? -EISDIR
: -ENOTDIR
;
329 EXPORT_SYMBOL(vn_remove
);
331 /* Modified do_rename() from linux/fs/namei.c, only uses exported symbols */
333 vn_rename(const char *oldname
, const char *newname
, int x1
)
335 struct dentry
*old_dir
, *new_dir
;
336 struct dentry
*old_dentry
, *new_dentry
;
338 struct nameidata oldnd
, newnd
;
342 rc
= path_lookup(oldname
, LOOKUP_PARENT
, &oldnd
);
346 rc
= path_lookup(newname
, LOOKUP_PARENT
, &newnd
);
351 if (oldnd
.nd_mnt
!= newnd
.nd_mnt
)
354 old_dir
= oldnd
.nd_dentry
;
356 if (oldnd
.last_type
!= LAST_NORM
)
359 new_dir
= newnd
.nd_dentry
;
360 if (newnd
.last_type
!= LAST_NORM
)
363 trap
= lock_rename(new_dir
, old_dir
);
365 old_dentry
= vn_lookup_hash(&oldnd
);
367 rc
= PTR_ERR(old_dentry
);
368 if (IS_ERR(old_dentry
))
371 /* source must exist */
373 if (!old_dentry
->d_inode
)
376 /* unless the source is a directory trailing slashes give -ENOTDIR */
377 if (!S_ISDIR(old_dentry
->d_inode
->i_mode
)) {
379 if (oldnd
.last
.name
[oldnd
.last
.len
])
381 if (newnd
.last
.name
[newnd
.last
.len
])
385 /* source should not be ancestor of target */
387 if (old_dentry
== trap
)
390 new_dentry
= vn_lookup_hash(&newnd
);
391 rc
= PTR_ERR(new_dentry
);
392 if (IS_ERR(new_dentry
))
395 /* target should not be an ancestor of source */
397 if (new_dentry
== trap
)
400 #ifdef HAVE_4ARGS_VFS_RENAME
401 rc
= vfs_rename(old_dir
->d_inode
, old_dentry
,
402 new_dir
->d_inode
, new_dentry
);
404 rc
= vfs_rename(old_dir
->d_inode
, old_dentry
, oldnd
.nd_mnt
,
405 new_dir
->d_inode
, new_dentry
, newnd
.nd_mnt
);
406 #endif /* HAVE_4ARGS_VFS_RENAME */
412 unlock_rename(new_dir
, old_dir
);
414 vn_path_release(&newnd
);
416 vn_path_release(&oldnd
);
420 EXPORT_SYMBOL(vn_rename
);
423 vn_getattr(vnode_t
*vp
, vattr_t
*vap
, int flags
, void *x3
, void *x4
)
436 rc
= vfs_getattr(fp
->f_vfsmnt
, fp
->f_dentry
, &stat
);
440 vap
->va_type
= vn_get_sol_type(stat
.mode
);
441 vap
->va_mode
= stat
.mode
;
442 vap
->va_uid
= stat
.uid
;
443 vap
->va_gid
= stat
.gid
;
445 vap
->va_nodeid
= stat
.ino
;
446 vap
->va_nlink
= stat
.nlink
;
447 vap
->va_size
= stat
.size
;
448 vap
->va_blocksize
= stat
.blksize
;
449 vap
->va_atime
.tv_sec
= stat
.atime
.tv_sec
;
450 vap
->va_atime
.tv_usec
= stat
.atime
.tv_nsec
/ NSEC_PER_USEC
;
451 vap
->va_mtime
.tv_sec
= stat
.mtime
.tv_sec
;
452 vap
->va_mtime
.tv_usec
= stat
.mtime
.tv_nsec
/ NSEC_PER_USEC
;
453 vap
->va_ctime
.tv_sec
= stat
.ctime
.tv_sec
;
454 vap
->va_ctime
.tv_usec
= stat
.ctime
.tv_nsec
/ NSEC_PER_USEC
;
455 vap
->va_rdev
= stat
.rdev
;
456 vap
->va_blocks
= stat
.blocks
;
460 EXPORT_SYMBOL(vn_getattr
);
462 int vn_fsync(vnode_t
*vp
, int flags
, void *x3
, void *x4
)
473 RETURN(-file_fsync(vp
->v_file
, vp
->v_file
->f_dentry
, datasync
));
475 EXPORT_SYMBOL(vn_fsync
);
477 /* Function must be called while holding the vn_file_lock */
483 ASSERT(spin_is_locked(&vn_file_lock
));
485 list_for_each_entry(fp
, &vn_file_list
, f_list
) {
486 if (fd
== fp
->f_fd
) {
487 ASSERT(atomic_read(&fp
->f_ref
) != 0);
505 /* Already open just take an extra reference */
506 spin_lock(&vn_file_lock
);
510 atomic_inc(&fp
->f_ref
);
511 spin_unlock(&vn_file_lock
);
515 spin_unlock(&vn_file_lock
);
517 /* File was not yet opened create the object and setup */
518 fp
= kmem_cache_alloc(vn_file_cache
, KM_SLEEP
);
522 mutex_enter(&fp
->f_lock
);
526 atomic_inc(&fp
->f_ref
);
532 vp
= vn_alloc(KM_SLEEP
);
536 if (vfs_getattr(lfp
->f_vfsmnt
, lfp
->f_dentry
, &stat
))
539 mutex_enter(&vp
->v_lock
);
540 vp
->v_type
= vn_get_sol_type(stat
.mode
);
542 mutex_exit(&vp
->v_lock
);
547 /* Put it on the tracking list */
548 spin_lock(&vn_file_lock
);
549 list_add(&fp
->f_list
, &vn_file_list
);
550 spin_unlock(&vn_file_lock
);
552 mutex_exit(&fp
->f_lock
);
560 mutex_exit(&fp
->f_lock
);
561 kmem_cache_free(vn_file_cache
, fp
);
567 static void releasef_locked(file_t
*fp
)
572 /* Unlinked from list, no refs, safe to free outside mutex */
574 vn_free(fp
->f_vnode
);
576 kmem_cache_free(vn_file_cache
, fp
);
585 spin_lock(&vn_file_lock
);
588 atomic_dec(&fp
->f_ref
);
589 if (atomic_read(&fp
->f_ref
) > 0) {
590 spin_unlock(&vn_file_lock
);
595 list_del(&fp
->f_list
);
598 spin_unlock(&vn_file_lock
);
603 EXPORT_SYMBOL(releasef
);
605 #ifndef HAVE_SET_FS_PWD
606 # ifdef HAVE_2ARGS_SET_FS_PWD
607 /* Used from 2.6.25 - 2.6.31+ */
609 set_fs_pwd(struct fs_struct
*fs
, struct path
*path
)
613 write_lock(&fs
->lock
);
617 write_unlock(&fs
->lock
);
623 /* Used from 2.6.11 - 2.6.24 */
625 set_fs_pwd(struct fs_struct
*fs
, struct vfsmount
*mnt
, struct dentry
*dentry
)
627 struct dentry
*old_pwd
;
628 struct vfsmount
*old_pwdmnt
;
630 write_lock(&fs
->lock
);
632 old_pwdmnt
= fs
->pwdmnt
;
633 fs
->pwdmnt
= mntget(mnt
);
634 fs
->pwd
= dget(dentry
);
635 write_unlock(&fs
->lock
);
642 # endif /* HAVE_2ARGS_SET_FS_PWD */
643 #endif /* HAVE_SET_FS_PWD */
646 vn_set_pwd(const char *filename
)
648 #ifdef HAVE_2ARGS_SET_FS_PWD
653 rc
= user_path_dir(filename
, &path
);
657 rc
= inode_permission(path
.dentry
->d_inode
, MAY_EXEC
| MAY_ACCESS
);
659 GOTO(dput_and_out
, rc
);
661 set_fs_pwd(current
->fs
, &path
);
670 rc
= __user_walk(filename
,
671 LOOKUP_FOLLOW
|LOOKUP_DIRECTORY
|LOOKUP_CHDIR
, &nd
);
675 rc
= vfs_permission(&nd
, MAY_EXEC
);
677 GOTO(dput_and_out
, rc
);
679 set_fs_pwd(current
->fs
, nd
.nd_mnt
, nd
.nd_dentry
);
682 vn_path_release(&nd
);
683 #endif /* HAVE_2ARGS_SET_FS_PWD */
687 EXPORT_SYMBOL(vn_set_pwd
);
690 vn_cache_constructor(void *buf
, void *cdrarg
, int kmflags
)
692 struct vnode
*vp
= buf
;
694 mutex_init(&vp
->v_lock
, NULL
, MUTEX_DEFAULT
, NULL
);
697 } /* vn_cache_constructor() */
700 vn_cache_destructor(void *buf
, void *cdrarg
)
702 struct vnode
*vp
= buf
;
704 mutex_destroy(&vp
->v_lock
);
705 } /* vn_cache_destructor() */
708 vn_file_cache_constructor(void *buf
, void *cdrarg
, int kmflags
)
712 atomic_set(&fp
->f_ref
, 0);
713 mutex_init(&fp
->f_lock
, NULL
, MUTEX_DEFAULT
, NULL
);
714 INIT_LIST_HEAD(&fp
->f_list
);
717 } /* file_cache_constructor() */
720 vn_file_cache_destructor(void *buf
, void *cdrarg
)
724 mutex_destroy(&fp
->f_lock
);
725 } /* vn_file_cache_destructor() */
731 vn_cache
= kmem_cache_create("spl_vn_cache",
732 sizeof(struct vnode
), 64,
733 vn_cache_constructor
,
735 NULL
, NULL
, NULL
, 0);
737 vn_file_cache
= kmem_cache_create("spl_vn_file_cache",
739 vn_file_cache_constructor
,
740 vn_file_cache_destructor
,
741 NULL
, NULL
, NULL
, 0);
748 file_t
*fp
, *next_fp
;
752 spin_lock(&vn_file_lock
);
754 list_for_each_entry_safe(fp
, next_fp
, &vn_file_list
, f_list
) {
755 list_del(&fp
->f_list
);
760 kmem_cache_destroy(vn_file_cache
);
761 vn_file_cache
= NULL
;
762 spin_unlock(&vn_file_lock
);
765 CWARN("Warning %d files leaked\n", leaked
);
767 kmem_cache_destroy(vn_cache
);