1 /*****************************************************************************\
2 * Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
3 * Copyright (C) 2007 The Regents of the University of California.
4 * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
5 * Written by Brian Behlendorf <behlendorf1@llnl.gov>.
8 * This file is part of the SPL, Solaris Porting Layer.
9 * For details, see <http://zfsonlinux.org/>.
11 * The SPL is free software; you can redistribute it and/or modify it
12 * under the terms of the GNU General Public License as published by the
13 * Free Software Foundation; either version 2 of the License, or (at your
14 * option) any later version.
16 * The SPL is distributed in the hope that it will be useful, but WITHOUT
17 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
18 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
21 * You should have received a copy of the GNU General Public License along
22 * with the SPL. If not, see <http://www.gnu.org/licenses/>.
23 *****************************************************************************
24 * Solaris Porting Layer (SPL) Vnode Implementation.
25 \*****************************************************************************/
28 #include <sys/vnode.h>
29 #include <sys/kmem_cache.h>
30 #include <linux/falloc.h>
31 #include <linux/file_compat.h>
33 vnode_t
*rootdir
= (vnode_t
*)0xabcd1234;
34 EXPORT_SYMBOL(rootdir
);
36 static spl_kmem_cache_t
*vn_cache
;
37 static spl_kmem_cache_t
*vn_file_cache
;
39 static DEFINE_SPINLOCK(vn_file_lock
);
40 static LIST_HEAD(vn_file_list
);
43 vn_mode_to_vtype(mode_t mode
)
70 } /* vn_mode_to_vtype() */
71 EXPORT_SYMBOL(vn_mode_to_vtype
);
74 vn_vtype_to_mode(vtype_t vtype
)
98 } /* vn_vtype_to_mode() */
99 EXPORT_SYMBOL(vn_vtype_to_mode
);
106 vp
= kmem_cache_alloc(vn_cache
, flag
);
114 EXPORT_SYMBOL(vn_alloc
);
119 kmem_cache_free(vn_cache
, vp
);
121 EXPORT_SYMBOL(vn_free
);
124 vn_open(const char *path
, uio_seg_t seg
, int flags
, int mode
,
125 vnode_t
**vpp
, int x1
, void *x2
)
129 int rc
, saved_umask
= 0;
133 ASSERT(flags
& (FWRITE
| FREAD
));
134 ASSERT(seg
== UIO_SYSSPACE
);
138 if (!(flags
& FCREAT
) && (flags
& FWRITE
))
141 /* Note for filp_open() the two low bits must be remapped to mean:
142 * 01 - read-only -> 00 read-only
143 * 10 - write-only -> 01 write-only
144 * 11 - read-write -> 10 read-write
149 saved_umask
= xchg(¤t
->fs
->umask
, 0);
151 fp
= filp_open(path
, flags
, mode
);
154 (void)xchg(¤t
->fs
->umask
, saved_umask
);
157 return (-PTR_ERR(fp
));
159 #if defined(HAVE_4ARGS_VFS_GETATTR)
160 rc
= vfs_getattr(&fp
->f_path
, &stat
, STATX_TYPE
, AT_STATX_SYNC_AS_STAT
);
161 #elif defined(HAVE_2ARGS_VFS_GETATTR)
162 rc
= vfs_getattr(&fp
->f_path
, &stat
);
164 rc
= vfs_getattr(fp
->f_path
.mnt
, fp
->f_dentry
, &stat
);
171 vp
= vn_alloc(KM_SLEEP
);
177 saved_gfp
= mapping_gfp_mask(fp
->f_mapping
);
178 mapping_set_gfp_mask(fp
->f_mapping
, saved_gfp
& ~(__GFP_IO
|__GFP_FS
));
180 mutex_enter(&vp
->v_lock
);
181 vp
->v_type
= vn_mode_to_vtype(stat
.mode
);
183 vp
->v_gfp_mask
= saved_gfp
;
185 mutex_exit(&vp
->v_lock
);
189 EXPORT_SYMBOL(vn_open
);
192 vn_openat(const char *path
, uio_seg_t seg
, int flags
, int mode
,
193 vnode_t
**vpp
, int x1
, void *x2
, vnode_t
*vp
, int fd
)
198 ASSERT(vp
== rootdir
);
200 len
= strlen(path
) + 2;
201 realpath
= kmalloc(len
, kmem_flags_convert(KM_SLEEP
));
205 (void)snprintf(realpath
, len
, "/%s", path
);
206 rc
= vn_open(realpath
, seg
, flags
, mode
, vpp
, x1
, x2
);
211 EXPORT_SYMBOL(vn_openat
);
214 vn_rdwr(uio_rw_t uio
, vnode_t
*vp
, void *addr
, ssize_t len
, offset_t off
,
215 uio_seg_t seg
, int ioflag
, rlim64_t x2
, void *x3
, ssize_t
*residp
)
218 mm_segment_t saved_fs
;
222 ASSERT(uio
== UIO_WRITE
|| uio
== UIO_READ
);
225 ASSERT(seg
== UIO_SYSSPACE
);
226 ASSERT((ioflag
& ~FAPPEND
) == 0);
227 ASSERT(x2
== RLIM64_INFINITY
);
232 if (ioflag
& FAPPEND
)
235 /* Writable user data segment must be briefly increased for this
236 * process so we can use the user space read call paths to write
237 * in to memory allocated by the kernel. */
242 rc
= vfs_write(fp
, addr
, len
, &offset
);
244 rc
= vfs_read(fp
, addr
, len
, &offset
);
261 EXPORT_SYMBOL(vn_rdwr
);
264 vn_close(vnode_t
*vp
, int flags
, int x1
, int x2
, void *x3
, void *x4
)
271 mapping_set_gfp_mask(vp
->v_file
->f_mapping
, vp
->v_gfp_mask
);
272 rc
= filp_close(vp
->v_file
, 0);
277 EXPORT_SYMBOL(vn_close
);
279 /* vn_seek() does not actually seek it only performs bounds checking on the
280 * proposed seek. We perform minimal checking and allow vn_rdwr() to catch
281 * anything more serious. */
283 vn_seek(vnode_t
*vp
, offset_t ooff
, offset_t
*noffp
, void *ct
)
285 return ((*noffp
< 0 || *noffp
> MAXOFFSET_T
) ? EINVAL
: 0);
287 EXPORT_SYMBOL(vn_seek
);
290 * spl_basename() takes a NULL-terminated string s as input containing a path.
291 * It returns a char pointer to a string and a length that describe the
292 * basename of the path. If the basename is not "." or "/", it will be an index
293 * into the string. While the string should be NULL terminated, the section
294 * referring to the basename is not. spl_basename is dual-licensed GPLv2+ and
295 * CC0. Anyone wishing to reuse it in another codebase may pick either license.
298 spl_basename(const char *s
, const char **str
, int *len
)
313 while (i
&& s
[i
--] == '/');
323 for (end
= i
; i
; i
--) {
335 static struct dentry
*
336 spl_kern_path_locked(const char *name
, struct path
*path
)
339 struct dentry
*dentry
;
340 const char *basename
;
347 spl_basename(name
, &basename
, &len
);
349 /* We do not accept "." or ".." */
350 if (len
<= 2 && basename
[0] == '.')
351 if (len
== 1 || basename
[1] == '.')
352 return (ERR_PTR(-EACCES
));
354 rc
= kern_path(name
, LOOKUP_PARENT
, &parent
);
356 return (ERR_PTR(rc
));
358 /* use I_MUTEX_PARENT because vfs_unlink needs it */
359 spl_inode_lock_nested(parent
.dentry
->d_inode
, I_MUTEX_PARENT
);
361 dentry
= lookup_one_len(basename
, parent
.dentry
, len
);
362 if (IS_ERR(dentry
)) {
363 spl_inode_unlock(parent
.dentry
->d_inode
);
372 /* Based on do_unlinkat() from linux/fs/namei.c */
374 vn_remove(const char *path
, uio_seg_t seg
, int flags
)
376 struct dentry
*dentry
;
378 struct inode
*inode
= NULL
;
381 ASSERT(seg
== UIO_SYSSPACE
);
382 ASSERT(flags
== RMFILE
);
384 dentry
= spl_kern_path_locked(path
, &parent
);
385 rc
= PTR_ERR(dentry
);
386 if (!IS_ERR(dentry
)) {
387 if (parent
.dentry
->d_name
.name
[parent
.dentry
->d_name
.len
]) {
392 inode
= dentry
->d_inode
;
394 atomic_inc(&inode
->i_count
);
400 #ifdef HAVE_2ARGS_VFS_UNLINK
401 rc
= vfs_unlink(parent
.dentry
->d_inode
, dentry
);
403 rc
= vfs_unlink(parent
.dentry
->d_inode
, dentry
, NULL
);
404 #endif /* HAVE_2ARGS_VFS_UNLINK */
411 spl_inode_unlock(parent
.dentry
->d_inode
);
413 iput(inode
); /* truncate the inode here */
419 rc
= !dentry
->d_inode
? -ENOENT
:
420 S_ISDIR(dentry
->d_inode
->i_mode
) ? -EISDIR
: -ENOTDIR
;
423 EXPORT_SYMBOL(vn_remove
);
425 /* Based on do_rename() from linux/fs/namei.c */
427 vn_rename(const char *oldname
, const char *newname
, int x1
)
429 struct dentry
*old_dir
, *new_dir
;
430 struct dentry
*old_dentry
, *new_dentry
;
432 struct path old_parent
, new_parent
;
435 old_dentry
= spl_kern_path_locked(oldname
, &old_parent
);
436 if (IS_ERR(old_dentry
)) {
437 rc
= PTR_ERR(old_dentry
);
441 spl_inode_unlock(old_parent
.dentry
->d_inode
);
443 new_dentry
= spl_kern_path_locked(newname
, &new_parent
);
444 if (IS_ERR(new_dentry
)) {
445 rc
= PTR_ERR(new_dentry
);
449 spl_inode_unlock(new_parent
.dentry
->d_inode
);
452 if (old_parent
.mnt
!= new_parent
.mnt
)
455 old_dir
= old_parent
.dentry
;
456 new_dir
= new_parent
.dentry
;
457 trap
= lock_rename(new_dir
, old_dir
);
459 /* source should not be ancestor of target */
461 if (old_dentry
== trap
)
464 /* target should not be an ancestor of source */
466 if (new_dentry
== trap
)
469 /* source must exist */
471 if (!old_dentry
->d_inode
)
474 /* unless the source is a directory trailing slashes give -ENOTDIR */
475 if (!S_ISDIR(old_dentry
->d_inode
->i_mode
)) {
477 if (old_dentry
->d_name
.name
[old_dentry
->d_name
.len
])
479 if (new_dentry
->d_name
.name
[new_dentry
->d_name
.len
])
483 #if defined(HAVE_4ARGS_VFS_RENAME)
484 rc
= vfs_rename(old_dir
->d_inode
, old_dentry
,
485 new_dir
->d_inode
, new_dentry
);
486 #elif defined(HAVE_5ARGS_VFS_RENAME)
487 rc
= vfs_rename(old_dir
->d_inode
, old_dentry
,
488 new_dir
->d_inode
, new_dentry
, NULL
);
490 rc
= vfs_rename(old_dir
->d_inode
, old_dentry
,
491 new_dir
->d_inode
, new_dentry
, NULL
, 0);
494 unlock_rename(new_dir
, old_dir
);
497 path_put(&new_parent
);
500 path_put(&old_parent
);
504 EXPORT_SYMBOL(vn_rename
);
507 vn_getattr(vnode_t
*vp
, vattr_t
*vap
, int flags
, void *x3
, void *x4
)
519 #if defined(HAVE_4ARGS_VFS_GETATTR)
520 rc
= vfs_getattr(&fp
->f_path
, &stat
, STATX_BASIC_STATS
,
521 AT_STATX_SYNC_AS_STAT
);
522 #elif defined(HAVE_2ARGS_VFS_GETATTR)
523 rc
= vfs_getattr(&fp
->f_path
, &stat
);
525 rc
= vfs_getattr(fp
->f_path
.mnt
, fp
->f_dentry
, &stat
);
530 vap
->va_type
= vn_mode_to_vtype(stat
.mode
);
531 vap
->va_mode
= stat
.mode
;
532 vap
->va_uid
= KUID_TO_SUID(stat
.uid
);
533 vap
->va_gid
= KGID_TO_SGID(stat
.gid
);
535 vap
->va_nodeid
= stat
.ino
;
536 vap
->va_nlink
= stat
.nlink
;
537 vap
->va_size
= stat
.size
;
538 vap
->va_blksize
= stat
.blksize
;
539 vap
->va_atime
= stat
.atime
;
540 vap
->va_mtime
= stat
.mtime
;
541 vap
->va_ctime
= stat
.ctime
;
542 vap
->va_rdev
= stat
.rdev
;
543 vap
->va_nblocks
= stat
.blocks
;
547 EXPORT_SYMBOL(vn_getattr
);
549 int vn_fsync(vnode_t
*vp
, int flags
, void *x3
, void *x4
)
562 * May enter XFS which generates a warning when PF_FSTRANS is set.
563 * To avoid this the flag is cleared over vfs_sync() and then reset.
565 fstrans
= __spl_pf_fstrans_check();
567 current
->flags
&= ~(__SPL_PF_FSTRANS
);
569 error
= -spl_filp_fsync(vp
->v_file
, datasync
);
571 current
->flags
|= __SPL_PF_FSTRANS
;
575 EXPORT_SYMBOL(vn_fsync
);
577 int vn_space(vnode_t
*vp
, int cmd
, struct flock
*bfp
, int flag
,
578 offset_t offset
, void *x6
, void *x7
)
580 int error
= EOPNOTSUPP
;
581 #ifdef FALLOC_FL_PUNCH_HOLE
585 if (cmd
!= F_FREESP
|| bfp
->l_whence
!= 0)
590 ASSERT(bfp
->l_start
>= 0 && bfp
->l_len
> 0);
592 #ifdef FALLOC_FL_PUNCH_HOLE
594 * May enter XFS which generates a warning when PF_FSTRANS is set.
595 * To avoid this the flag is cleared over vfs_sync() and then reset.
597 fstrans
= __spl_pf_fstrans_check();
599 current
->flags
&= ~(__SPL_PF_FSTRANS
);
602 * When supported by the underlying file system preferentially
603 * use the fallocate() callback to preallocate the space.
605 error
= -spl_filp_fallocate(vp
->v_file
,
606 FALLOC_FL_KEEP_SIZE
| FALLOC_FL_PUNCH_HOLE
,
607 bfp
->l_start
, bfp
->l_len
);
610 current
->flags
|= __SPL_PF_FSTRANS
;
616 #ifdef HAVE_INODE_TRUNCATE_RANGE
617 if (vp
->v_file
->f_dentry
&& vp
->v_file
->f_dentry
->d_inode
&&
618 vp
->v_file
->f_dentry
->d_inode
->i_op
&&
619 vp
->v_file
->f_dentry
->d_inode
->i_op
->truncate_range
) {
620 off_t end
= bfp
->l_start
+ bfp
->l_len
;
622 * Judging from the code in shmem_truncate_range(),
623 * it seems the kernel expects the end offset to be
624 * inclusive and aligned to the end of a page.
626 if (end
% PAGE_SIZE
!= 0) {
627 end
&= ~(off_t
)(PAGE_SIZE
- 1);
628 if (end
<= bfp
->l_start
)
633 vp
->v_file
->f_dentry
->d_inode
->i_op
->truncate_range(
634 vp
->v_file
->f_dentry
->d_inode
,
643 EXPORT_SYMBOL(vn_space
);
645 /* Function must be called while holding the vn_file_lock */
647 file_find(int fd
, struct task_struct
*task
)
651 ASSERT(spin_is_locked(&vn_file_lock
));
653 list_for_each_entry(fp
, &vn_file_list
, f_list
) {
654 if (fd
== fp
->f_fd
&& fp
->f_task
== task
) {
655 ASSERT(atomic_read(&fp
->f_ref
) != 0);
675 /* Already open just take an extra reference */
676 spin_lock(&vn_file_lock
);
678 fp
= file_find(fd
, current
);
680 atomic_inc(&fp
->f_ref
);
681 spin_unlock(&vn_file_lock
);
685 spin_unlock(&vn_file_lock
);
687 /* File was not yet opened create the object and setup */
688 fp
= kmem_cache_alloc(vn_file_cache
, KM_SLEEP
);
692 mutex_enter(&fp
->f_lock
);
695 fp
->f_task
= current
;
697 atomic_inc(&fp
->f_ref
);
703 vp
= vn_alloc(KM_SLEEP
);
707 #if defined(HAVE_4ARGS_VFS_GETATTR)
708 rc
= vfs_getattr(&lfp
->f_path
, &stat
, STATX_TYPE
, AT_STATX_SYNC_AS_STAT
);
709 #elif defined(HAVE_2ARGS_VFS_GETATTR)
710 rc
= vfs_getattr(&lfp
->f_path
, &stat
);
712 rc
= vfs_getattr(lfp
->f_path
.mnt
, lfp
->f_dentry
, &stat
);
717 mutex_enter(&vp
->v_lock
);
718 vp
->v_type
= vn_mode_to_vtype(stat
.mode
);
720 mutex_exit(&vp
->v_lock
);
725 /* Put it on the tracking list */
726 spin_lock(&vn_file_lock
);
727 list_add(&fp
->f_list
, &vn_file_list
);
728 spin_unlock(&vn_file_lock
);
730 mutex_exit(&fp
->f_lock
);
738 mutex_exit(&fp
->f_lock
);
739 kmem_cache_free(vn_file_cache
, fp
);
745 static void releasef_locked(file_t
*fp
)
750 /* Unlinked from list, no refs, safe to free outside mutex */
752 vn_free(fp
->f_vnode
);
754 kmem_cache_free(vn_file_cache
, fp
);
760 areleasef(fd
, P_FINFO(current
));
762 EXPORT_SYMBOL(releasef
);
765 vn_areleasef(int fd
, uf_info_t
*fip
)
768 struct task_struct
*task
= (struct task_struct
*)fip
;
773 spin_lock(&vn_file_lock
);
774 fp
= file_find(fd
, task
);
776 atomic_dec(&fp
->f_ref
);
777 if (atomic_read(&fp
->f_ref
) > 0) {
778 spin_unlock(&vn_file_lock
);
782 list_del(&fp
->f_list
);
785 spin_unlock(&vn_file_lock
);
789 EXPORT_SYMBOL(areleasef
);
793 #ifdef HAVE_SET_FS_PWD_WITH_CONST
794 vn_set_fs_pwd(struct fs_struct
*fs
, const struct path
*path
)
796 vn_set_fs_pwd(struct fs_struct
*fs
, struct path
*path
)
797 #endif /* HAVE_SET_FS_PWD_WITH_CONST */
801 #ifdef HAVE_FS_STRUCT_SPINLOCK
802 spin_lock(&fs
->lock
);
806 spin_unlock(&fs
->lock
);
808 write_lock(&fs
->lock
);
812 write_unlock(&fs
->lock
);
813 #endif /* HAVE_FS_STRUCT_SPINLOCK */
820 vn_set_pwd(const char *filename
)
823 mm_segment_t saved_fs
;
827 * user_path_dir() and __user_walk() both expect 'filename' to be
828 * a user space address so we must briefly increase the data segment
829 * size to ensure strncpy_from_user() does not fail with -EFAULT.
834 rc
= user_path_dir(filename
, &path
);
838 rc
= inode_permission(path
.dentry
->d_inode
, MAY_EXEC
| MAY_ACCESS
);
842 vn_set_fs_pwd(current
->fs
, &path
);
851 EXPORT_SYMBOL(vn_set_pwd
);
854 vn_cache_constructor(void *buf
, void *cdrarg
, int kmflags
)
856 struct vnode
*vp
= buf
;
858 mutex_init(&vp
->v_lock
, NULL
, MUTEX_DEFAULT
, NULL
);
861 } /* vn_cache_constructor() */
864 vn_cache_destructor(void *buf
, void *cdrarg
)
866 struct vnode
*vp
= buf
;
868 mutex_destroy(&vp
->v_lock
);
869 } /* vn_cache_destructor() */
872 vn_file_cache_constructor(void *buf
, void *cdrarg
, int kmflags
)
876 atomic_set(&fp
->f_ref
, 0);
877 mutex_init(&fp
->f_lock
, NULL
, MUTEX_DEFAULT
, NULL
);
878 INIT_LIST_HEAD(&fp
->f_list
);
881 } /* file_cache_constructor() */
884 vn_file_cache_destructor(void *buf
, void *cdrarg
)
888 mutex_destroy(&fp
->f_lock
);
889 } /* vn_file_cache_destructor() */
894 vn_cache
= kmem_cache_create("spl_vn_cache",
895 sizeof(struct vnode
), 64,
896 vn_cache_constructor
,
898 NULL
, NULL
, NULL
, 0);
900 vn_file_cache
= kmem_cache_create("spl_vn_file_cache",
902 vn_file_cache_constructor
,
903 vn_file_cache_destructor
,
904 NULL
, NULL
, NULL
, 0);
911 file_t
*fp
, *next_fp
;
914 spin_lock(&vn_file_lock
);
916 list_for_each_entry_safe(fp
, next_fp
, &vn_file_list
, f_list
) {
917 list_del(&fp
->f_list
);
922 spin_unlock(&vn_file_lock
);
925 printk(KERN_WARNING
"WARNING: %d vnode files leaked\n", leaked
);
927 kmem_cache_destroy(vn_file_cache
);
928 kmem_cache_destroy(vn_cache
);