1 /*****************************************************************************\
2 * Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
3 * Copyright (C) 2007 The Regents of the University of California.
4 * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
5 * Written by Brian Behlendorf <behlendorf1@llnl.gov>.
8 * This file is part of the SPL, Solaris Porting Layer.
9 * For details, see <http://github.com/behlendorf/spl/>.
11 * The SPL is free software; you can redistribute it and/or modify it
12 * under the terms of the GNU General Public License as published by the
13 * Free Software Foundation; either version 2 of the License, or (at your
14 * option) any later version.
16 * The SPL is distributed in the hope that it will be useful, but WITHOUT
17 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
18 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
21 * You should have received a copy of the GNU General Public License along
22 * with the SPL. If not, see <http://www.gnu.org/licenses/>.
23 *****************************************************************************
24 * Solaris Porting Layer (SPL) Vnode Implementation.
25 \*****************************************************************************/
27 #include <sys/vnode.h>
28 #include <spl-debug.h>
30 #ifdef SS_DEBUG_SUBSYS
31 #undef SS_DEBUG_SUBSYS
34 #define SS_DEBUG_SUBSYS SS_VNODE
36 vnode_t
*rootdir
= (vnode_t
*)0xabcd1234;
37 EXPORT_SYMBOL(rootdir
);
39 static spl_kmem_cache_t
*vn_cache
;
40 static spl_kmem_cache_t
*vn_file_cache
;
42 static DEFINE_SPINLOCK(vn_file_lock
);
43 static LIST_HEAD(vn_file_list
);
45 #ifdef HAVE_KERN_PATH_PARENT_HEADER
46 #ifndef HAVE_KERN_PATH_PARENT_SYMBOL
47 kern_path_parent_t kern_path_parent_fn
= SYMBOL_POISON
;
48 EXPORT_SYMBOL(kern_path_parent_fn
);
49 #endif /* HAVE_KERN_PATH_PARENT_SYMBOL */
50 #endif /* HAVE_KERN_PATH_PARENT_HEADER */
53 vn_mode_to_vtype(mode_t mode
)
80 } /* vn_mode_to_vtype() */
81 EXPORT_SYMBOL(vn_mode_to_vtype
);
84 vn_vtype_to_mode(vtype_t vtype
)
108 } /* vn_vtype_to_mode() */
109 EXPORT_SYMBOL(vn_vtype_to_mode
);
117 vp
= kmem_cache_alloc(vn_cache
, flag
);
125 EXPORT_SYMBOL(vn_alloc
);
131 kmem_cache_free(vn_cache
, vp
);
134 EXPORT_SYMBOL(vn_free
);
137 vn_open(const char *path
, uio_seg_t seg
, int flags
, int mode
,
138 vnode_t
**vpp
, int x1
, void *x2
)
142 int rc
, saved_umask
= 0;
147 ASSERT(flags
& (FWRITE
| FREAD
));
148 ASSERT(seg
== UIO_SYSSPACE
);
152 if (!(flags
& FCREAT
) && (flags
& FWRITE
))
155 /* Note for filp_open() the two low bits must be remapped to mean:
156 * 01 - read-only -> 00 read-only
157 * 10 - write-only -> 01 write-only
158 * 11 - read-write -> 10 read-write
163 saved_umask
= xchg(¤t
->fs
->umask
, 0);
165 fp
= filp_open(path
, flags
, mode
);
168 (void)xchg(¤t
->fs
->umask
, saved_umask
);
171 SRETURN(-PTR_ERR(fp
));
173 rc
= vfs_getattr(fp
->f_vfsmnt
, fp
->f_dentry
, &stat
);
179 vp
= vn_alloc(KM_SLEEP
);
185 saved_gfp
= mapping_gfp_mask(fp
->f_mapping
);
186 mapping_set_gfp_mask(fp
->f_mapping
, saved_gfp
& ~(__GFP_IO
|__GFP_FS
));
188 mutex_enter(&vp
->v_lock
);
189 vp
->v_type
= vn_mode_to_vtype(stat
.mode
);
191 vp
->v_gfp_mask
= saved_gfp
;
193 mutex_exit(&vp
->v_lock
);
197 EXPORT_SYMBOL(vn_open
);
200 vn_openat(const char *path
, uio_seg_t seg
, int flags
, int mode
,
201 vnode_t
**vpp
, int x1
, void *x2
, vnode_t
*vp
, int fd
)
207 ASSERT(vp
== rootdir
);
209 len
= strlen(path
) + 2;
210 realpath
= kmalloc(len
, GFP_KERNEL
);
214 (void)snprintf(realpath
, len
, "/%s", path
);
215 rc
= vn_open(realpath
, seg
, flags
, mode
, vpp
, x1
, x2
);
220 EXPORT_SYMBOL(vn_openat
);
223 vn_rdwr(uio_rw_t uio
, vnode_t
*vp
, void *addr
, ssize_t len
, offset_t off
,
224 uio_seg_t seg
, int ioflag
, rlim64_t x2
, void *x3
, ssize_t
*residp
)
227 mm_segment_t saved_fs
;
232 ASSERT(uio
== UIO_WRITE
|| uio
== UIO_READ
);
235 ASSERT(seg
== UIO_SYSSPACE
);
236 ASSERT((ioflag
& ~FAPPEND
) == 0);
237 ASSERT(x2
== RLIM64_INFINITY
);
242 if (ioflag
& FAPPEND
)
245 /* Writable user data segment must be briefly increased for this
246 * process so we can use the user space read call paths to write
247 * in to memory allocated by the kernel. */
252 rc
= vfs_write(fp
, addr
, len
, &offset
);
254 rc
= vfs_read(fp
, addr
, len
, &offset
);
271 EXPORT_SYMBOL(vn_rdwr
);
274 vn_close(vnode_t
*vp
, int flags
, int x1
, int x2
, void *x3
, void *x4
)
282 mapping_set_gfp_mask(vp
->v_file
->f_mapping
, vp
->v_gfp_mask
);
283 rc
= filp_close(vp
->v_file
, 0);
288 EXPORT_SYMBOL(vn_close
);
290 /* vn_seek() does not actually seek it only performs bounds checking on the
291 * proposed seek. We perform minimal checking and allow vn_rdwr() to catch
292 * anything more serious. */
294 vn_seek(vnode_t
*vp
, offset_t ooff
, offset_t
*noffp
, void *ct
)
296 return ((*noffp
< 0 || *noffp
> MAXOFFSET_T
) ? EINVAL
: 0);
298 EXPORT_SYMBOL(vn_seek
);
300 static struct dentry
*
301 vn_lookup_hash(struct nameidata
*nd
)
303 return lookup_one_len((const char *)nd
->last
.name
,
304 nd
->nd_dentry
, nd
->last
.len
);
305 } /* lookup_hash() */
308 vn_path_release(struct nameidata
*nd
)
314 /* Modified do_unlinkat() from linux/fs/namei.c, only uses exported symbols */
316 vn_remove(const char *path
, uio_seg_t seg
, int flags
)
318 struct dentry
*dentry
;
320 struct inode
*inode
= NULL
;
324 ASSERT(seg
== UIO_SYSSPACE
);
325 ASSERT(flags
== RMFILE
);
327 rc
= spl_kern_path_parent(path
, &nd
);
332 if (nd
.last_type
!= LAST_NORM
)
335 spl_inode_lock_nested(nd
.nd_dentry
->d_inode
, I_MUTEX_PARENT
);
336 dentry
= vn_lookup_hash(&nd
);
337 rc
= PTR_ERR(dentry
);
338 if (!IS_ERR(dentry
)) {
339 /* Why not before? Because we want correct rc value */
340 if (nd
.last
.name
[nd
.last
.len
])
343 inode
= dentry
->d_inode
;
345 atomic_inc(&inode
->i_count
);
346 #ifdef HAVE_2ARGS_VFS_UNLINK
347 rc
= vfs_unlink(nd
.nd_dentry
->d_inode
, dentry
);
349 rc
= vfs_unlink(nd
.nd_dentry
->d_inode
, dentry
, nd
.nd_mnt
);
350 #endif /* HAVE_2ARGS_VFS_UNLINK */
355 spl_inode_unlock(nd
.nd_dentry
->d_inode
);
357 iput(inode
); /* truncate the inode here */
359 vn_path_release(&nd
);
364 rc
= !dentry
->d_inode
? -ENOENT
:
365 S_ISDIR(dentry
->d_inode
->i_mode
) ? -EISDIR
: -ENOTDIR
;
368 EXPORT_SYMBOL(vn_remove
);
370 /* Modified do_rename() from linux/fs/namei.c, only uses exported symbols */
372 vn_rename(const char *oldname
, const char *newname
, int x1
)
374 struct dentry
*old_dir
, *new_dir
;
375 struct dentry
*old_dentry
, *new_dentry
;
377 struct nameidata oldnd
, newnd
;
381 rc
= spl_kern_path_parent(oldname
, &oldnd
);
385 rc
= spl_kern_path_parent(newname
, &newnd
);
390 if (oldnd
.nd_mnt
!= newnd
.nd_mnt
)
393 old_dir
= oldnd
.nd_dentry
;
395 if (oldnd
.last_type
!= LAST_NORM
)
398 new_dir
= newnd
.nd_dentry
;
399 if (newnd
.last_type
!= LAST_NORM
)
402 trap
= lock_rename(new_dir
, old_dir
);
404 old_dentry
= vn_lookup_hash(&oldnd
);
406 rc
= PTR_ERR(old_dentry
);
407 if (IS_ERR(old_dentry
))
410 /* source must exist */
412 if (!old_dentry
->d_inode
)
415 /* unless the source is a directory trailing slashes give -ENOTDIR */
416 if (!S_ISDIR(old_dentry
->d_inode
->i_mode
)) {
418 if (oldnd
.last
.name
[oldnd
.last
.len
])
420 if (newnd
.last
.name
[newnd
.last
.len
])
424 /* source should not be ancestor of target */
426 if (old_dentry
== trap
)
429 new_dentry
= vn_lookup_hash(&newnd
);
430 rc
= PTR_ERR(new_dentry
);
431 if (IS_ERR(new_dentry
))
434 /* target should not be an ancestor of source */
436 if (new_dentry
== trap
)
439 #ifdef HAVE_4ARGS_VFS_RENAME
440 rc
= vfs_rename(old_dir
->d_inode
, old_dentry
,
441 new_dir
->d_inode
, new_dentry
);
443 rc
= vfs_rename(old_dir
->d_inode
, old_dentry
, oldnd
.nd_mnt
,
444 new_dir
->d_inode
, new_dentry
, newnd
.nd_mnt
);
445 #endif /* HAVE_4ARGS_VFS_RENAME */
451 unlock_rename(new_dir
, old_dir
);
453 vn_path_release(&newnd
);
455 vn_path_release(&oldnd
);
459 EXPORT_SYMBOL(vn_rename
);
462 vn_getattr(vnode_t
*vp
, vattr_t
*vap
, int flags
, void *x3
, void *x4
)
475 rc
= vfs_getattr(fp
->f_vfsmnt
, fp
->f_dentry
, &stat
);
479 vap
->va_type
= vn_mode_to_vtype(stat
.mode
);
480 vap
->va_mode
= stat
.mode
;
481 vap
->va_uid
= stat
.uid
;
482 vap
->va_gid
= stat
.gid
;
484 vap
->va_nodeid
= stat
.ino
;
485 vap
->va_nlink
= stat
.nlink
;
486 vap
->va_size
= stat
.size
;
487 vap
->va_blksize
= stat
.blksize
;
488 vap
->va_atime
= stat
.atime
;
489 vap
->va_mtime
= stat
.mtime
;
490 vap
->va_ctime
= stat
.ctime
;
491 vap
->va_rdev
= stat
.rdev
;
492 vap
->va_nblocks
= stat
.blocks
;
496 EXPORT_SYMBOL(vn_getattr
);
498 int vn_fsync(vnode_t
*vp
, int flags
, void *x3
, void *x4
)
509 SRETURN(-spl_filp_fsync(vp
->v_file
, datasync
));
511 EXPORT_SYMBOL(vn_fsync
);
513 /* Function must be called while holding the vn_file_lock */
519 ASSERT(spin_is_locked(&vn_file_lock
));
521 list_for_each_entry(fp
, &vn_file_list
, f_list
) {
522 if (fd
== fp
->f_fd
&& fp
->f_task
== current
) {
523 ASSERT(atomic_read(&fp
->f_ref
) != 0);
541 /* Already open just take an extra reference */
542 spin_lock(&vn_file_lock
);
546 atomic_inc(&fp
->f_ref
);
547 spin_unlock(&vn_file_lock
);
551 spin_unlock(&vn_file_lock
);
553 /* File was not yet opened create the object and setup */
554 fp
= kmem_cache_alloc(vn_file_cache
, KM_SLEEP
);
558 mutex_enter(&fp
->f_lock
);
561 fp
->f_task
= current
;
563 atomic_inc(&fp
->f_ref
);
567 SGOTO(out_mutex
, rc
);
569 vp
= vn_alloc(KM_SLEEP
);
573 if (vfs_getattr(lfp
->f_vfsmnt
, lfp
->f_dentry
, &stat
))
574 SGOTO(out_vnode
, rc
);
576 mutex_enter(&vp
->v_lock
);
577 vp
->v_type
= vn_mode_to_vtype(stat
.mode
);
579 mutex_exit(&vp
->v_lock
);
584 /* Put it on the tracking list */
585 spin_lock(&vn_file_lock
);
586 list_add(&fp
->f_list
, &vn_file_list
);
587 spin_unlock(&vn_file_lock
);
589 mutex_exit(&fp
->f_lock
);
597 mutex_exit(&fp
->f_lock
);
598 kmem_cache_free(vn_file_cache
, fp
);
604 static void releasef_locked(file_t
*fp
)
609 /* Unlinked from list, no refs, safe to free outside mutex */
611 vn_free(fp
->f_vnode
);
613 kmem_cache_free(vn_file_cache
, fp
);
622 spin_lock(&vn_file_lock
);
625 atomic_dec(&fp
->f_ref
);
626 if (atomic_read(&fp
->f_ref
) > 0) {
627 spin_unlock(&vn_file_lock
);
632 list_del(&fp
->f_list
);
635 spin_unlock(&vn_file_lock
);
640 EXPORT_SYMBOL(releasef
);
642 #ifndef HAVE_SET_FS_PWD
643 # ifdef HAVE_2ARGS_SET_FS_PWD
644 /* Used from 2.6.25 - 2.6.31+ */
646 set_fs_pwd(struct fs_struct
*fs
, struct path
*path
)
650 # ifdef HAVE_FS_STRUCT_SPINLOCK
651 spin_lock(&fs
->lock
);
655 spin_unlock(&fs
->lock
);
657 write_lock(&fs
->lock
);
661 write_unlock(&fs
->lock
);
662 # endif /* HAVE_FS_STRUCT_SPINLOCK */
668 /* Used from 2.6.11 - 2.6.24 */
670 set_fs_pwd(struct fs_struct
*fs
, struct vfsmount
*mnt
, struct dentry
*dentry
)
672 struct dentry
*old_pwd
;
673 struct vfsmount
*old_pwdmnt
;
675 write_lock(&fs
->lock
);
677 old_pwdmnt
= fs
->pwdmnt
;
678 fs
->pwdmnt
= mntget(mnt
);
679 fs
->pwd
= dget(dentry
);
680 write_unlock(&fs
->lock
);
687 # endif /* HAVE_2ARGS_SET_FS_PWD */
688 #endif /* HAVE_SET_FS_PWD */
691 vn_set_pwd(const char *filename
)
693 #if defined(HAVE_2ARGS_SET_FS_PWD) && defined(HAVE_USER_PATH_DIR)
697 #endif /* HAVE_2ARGS_SET_FS_PWD */
698 mm_segment_t saved_fs
;
703 * user_path_dir() and __user_walk() both expect 'filename' to be
704 * a user space address so we must briefly increase the data segment
705 * size to ensure strncpy_from_user() does not fail with -EFAULT.
710 #ifdef HAVE_2ARGS_SET_FS_PWD
711 # ifdef HAVE_USER_PATH_DIR
712 rc
= user_path_dir(filename
, &path
);
716 rc
= inode_permission(path
.dentry
->d_inode
, MAY_EXEC
| MAY_ACCESS
);
718 SGOTO(dput_and_out
, rc
);
720 set_fs_pwd(current
->fs
, &path
);
725 rc
= __user_walk(filename
,
726 LOOKUP_FOLLOW
|LOOKUP_DIRECTORY
|LOOKUP_CHDIR
, &nd
);
730 rc
= vfs_permission(&nd
, MAY_EXEC
);
732 SGOTO(dput_and_out
, rc
);
734 set_fs_pwd(current
->fs
, &nd
.path
);
738 # endif /* HAVE_USER_PATH_DIR */
740 rc
= __user_walk(filename
,
741 LOOKUP_FOLLOW
|LOOKUP_DIRECTORY
|LOOKUP_CHDIR
, &nd
);
745 rc
= vfs_permission(&nd
, MAY_EXEC
);
747 SGOTO(dput_and_out
, rc
);
749 set_fs_pwd(current
->fs
, nd
.nd_mnt
, nd
.nd_dentry
);
752 vn_path_release(&nd
);
753 #endif /* HAVE_2ARGS_SET_FS_PWD */
759 EXPORT_SYMBOL(vn_set_pwd
);
762 vn_cache_constructor(void *buf
, void *cdrarg
, int kmflags
)
764 struct vnode
*vp
= buf
;
766 mutex_init(&vp
->v_lock
, NULL
, MUTEX_DEFAULT
, NULL
);
769 } /* vn_cache_constructor() */
772 vn_cache_destructor(void *buf
, void *cdrarg
)
774 struct vnode
*vp
= buf
;
776 mutex_destroy(&vp
->v_lock
);
777 } /* vn_cache_destructor() */
780 vn_file_cache_constructor(void *buf
, void *cdrarg
, int kmflags
)
784 atomic_set(&fp
->f_ref
, 0);
785 mutex_init(&fp
->f_lock
, NULL
, MUTEX_DEFAULT
, NULL
);
786 INIT_LIST_HEAD(&fp
->f_list
);
789 } /* file_cache_constructor() */
792 vn_file_cache_destructor(void *buf
, void *cdrarg
)
796 mutex_destroy(&fp
->f_lock
);
797 } /* vn_file_cache_destructor() */
799 int spl_vn_init_kallsyms_lookup(void)
801 #ifdef HAVE_KERN_PATH_PARENT_HEADER
802 #ifndef HAVE_KERN_PATH_PARENT_SYMBOL
803 kern_path_parent_fn
= (kern_path_parent_t
)
804 spl_kallsyms_lookup_name("kern_path_parent");
805 if (!kern_path_parent_fn
) {
806 printk(KERN_ERR
"Error: Unknown symbol kern_path_parent\n");
809 #endif /* HAVE_KERN_PATH_PARENT_SYMBOL */
810 #endif /* HAVE_KERN_PATH_PARENT_HEADER */
819 vn_cache
= kmem_cache_create("spl_vn_cache",
820 sizeof(struct vnode
), 64,
821 vn_cache_constructor
,
823 NULL
, NULL
, NULL
, KMC_KMEM
);
825 vn_file_cache
= kmem_cache_create("spl_vn_file_cache",
827 vn_file_cache_constructor
,
828 vn_file_cache_destructor
,
829 NULL
, NULL
, NULL
, KMC_KMEM
);
836 file_t
*fp
, *next_fp
;
840 spin_lock(&vn_file_lock
);
842 list_for_each_entry_safe(fp
, next_fp
, &vn_file_list
, f_list
) {
843 list_del(&fp
->f_list
);
848 kmem_cache_destroy(vn_file_cache
);
849 vn_file_cache
= NULL
;
850 spin_unlock(&vn_file_lock
);
853 SWARN("Warning %d files leaked\n", leaked
);
855 kmem_cache_destroy(vn_cache
);