]> git.proxmox.com Git - mirror_spl-debian.git/blob - module/spl/spl-vnode.c
New upstream version 0.7.2
[mirror_spl-debian.git] / module / spl / spl-vnode.c
1 /*****************************************************************************\
2 * Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
3 * Copyright (C) 2007 The Regents of the University of California.
4 * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
5 * Written by Brian Behlendorf <behlendorf1@llnl.gov>.
6 * UCRL-CODE-235197
7 *
8 * This file is part of the SPL, Solaris Porting Layer.
9 * For details, see <http://zfsonlinux.org/>.
10 *
11 * The SPL is free software; you can redistribute it and/or modify it
12 * under the terms of the GNU General Public License as published by the
13 * Free Software Foundation; either version 2 of the License, or (at your
14 * option) any later version.
15 *
16 * The SPL is distributed in the hope that it will be useful, but WITHOUT
17 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
18 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
19 * for more details.
20 *
21 * You should have received a copy of the GNU General Public License along
22 * with the SPL. If not, see <http://www.gnu.org/licenses/>.
23 *****************************************************************************
24 * Solaris Porting Layer (SPL) Vnode Implementation.
25 \*****************************************************************************/
26
27 #include <sys/cred.h>
28 #include <sys/vnode.h>
29 #include <sys/kmem_cache.h>
30 #include <linux/falloc.h>
31 #include <linux/file_compat.h>
32
33 vnode_t *rootdir = (vnode_t *)0xabcd1234;
34 EXPORT_SYMBOL(rootdir);
35
36 static spl_kmem_cache_t *vn_cache;
37 static spl_kmem_cache_t *vn_file_cache;
38
39 static DEFINE_SPINLOCK(vn_file_lock);
40 static LIST_HEAD(vn_file_list);
41
42 vtype_t
43 vn_mode_to_vtype(mode_t mode)
44 {
45 if (S_ISREG(mode))
46 return VREG;
47
48 if (S_ISDIR(mode))
49 return VDIR;
50
51 if (S_ISCHR(mode))
52 return VCHR;
53
54 if (S_ISBLK(mode))
55 return VBLK;
56
57 if (S_ISFIFO(mode))
58 return VFIFO;
59
60 if (S_ISLNK(mode))
61 return VLNK;
62
63 if (S_ISSOCK(mode))
64 return VSOCK;
65
66 return VNON;
67 } /* vn_mode_to_vtype() */
68 EXPORT_SYMBOL(vn_mode_to_vtype);
69
70 mode_t
71 vn_vtype_to_mode(vtype_t vtype)
72 {
73 if (vtype == VREG)
74 return S_IFREG;
75
76 if (vtype == VDIR)
77 return S_IFDIR;
78
79 if (vtype == VCHR)
80 return S_IFCHR;
81
82 if (vtype == VBLK)
83 return S_IFBLK;
84
85 if (vtype == VFIFO)
86 return S_IFIFO;
87
88 if (vtype == VLNK)
89 return S_IFLNK;
90
91 if (vtype == VSOCK)
92 return S_IFSOCK;
93
94 return VNON;
95 } /* vn_vtype_to_mode() */
96 EXPORT_SYMBOL(vn_vtype_to_mode);
97
98 vnode_t *
99 vn_alloc(int flag)
100 {
101 vnode_t *vp;
102
103 vp = kmem_cache_alloc(vn_cache, flag);
104 if (vp != NULL) {
105 vp->v_file = NULL;
106 vp->v_type = 0;
107 }
108
109 return (vp);
110 } /* vn_alloc() */
111 EXPORT_SYMBOL(vn_alloc);
112
113 void
114 vn_free(vnode_t *vp)
115 {
116 kmem_cache_free(vn_cache, vp);
117 } /* vn_free() */
118 EXPORT_SYMBOL(vn_free);
119
120 int
121 vn_open(const char *path, uio_seg_t seg, int flags, int mode,
122 vnode_t **vpp, int x1, void *x2)
123 {
124 struct file *fp;
125 struct kstat stat;
126 int rc, saved_umask = 0;
127 gfp_t saved_gfp;
128 vnode_t *vp;
129
130 ASSERT(flags & (FWRITE | FREAD));
131 ASSERT(seg == UIO_SYSSPACE);
132 ASSERT(vpp);
133 *vpp = NULL;
134
135 if (!(flags & FCREAT) && (flags & FWRITE))
136 flags |= FEXCL;
137
138 /* Note for filp_open() the two low bits must be remapped to mean:
139 * 01 - read-only -> 00 read-only
140 * 10 - write-only -> 01 write-only
141 * 11 - read-write -> 10 read-write
142 */
143 flags--;
144
145 if (flags & FCREAT)
146 saved_umask = xchg(&current->fs->umask, 0);
147
148 fp = filp_open(path, flags, mode);
149
150 if (flags & FCREAT)
151 (void)xchg(&current->fs->umask, saved_umask);
152
153 if (IS_ERR(fp))
154 return (-PTR_ERR(fp));
155
156 #if defined(HAVE_4ARGS_VFS_GETATTR)
157 rc = vfs_getattr(&fp->f_path, &stat, STATX_TYPE, AT_STATX_SYNC_AS_STAT);
158 #elif defined(HAVE_2ARGS_VFS_GETATTR)
159 rc = vfs_getattr(&fp->f_path, &stat);
160 #else
161 rc = vfs_getattr(fp->f_path.mnt, fp->f_dentry, &stat);
162 #endif
163 if (rc) {
164 filp_close(fp, 0);
165 return (-rc);
166 }
167
168 vp = vn_alloc(KM_SLEEP);
169 if (!vp) {
170 filp_close(fp, 0);
171 return (ENOMEM);
172 }
173
174 saved_gfp = mapping_gfp_mask(fp->f_mapping);
175 mapping_set_gfp_mask(fp->f_mapping, saved_gfp & ~(__GFP_IO|__GFP_FS));
176
177 mutex_enter(&vp->v_lock);
178 vp->v_type = vn_mode_to_vtype(stat.mode);
179 vp->v_file = fp;
180 vp->v_gfp_mask = saved_gfp;
181 *vpp = vp;
182 mutex_exit(&vp->v_lock);
183
184 return (0);
185 } /* vn_open() */
186 EXPORT_SYMBOL(vn_open);
187
188 int
189 vn_openat(const char *path, uio_seg_t seg, int flags, int mode,
190 vnode_t **vpp, int x1, void *x2, vnode_t *vp, int fd)
191 {
192 char *realpath;
193 int len, rc;
194
195 ASSERT(vp == rootdir);
196
197 len = strlen(path) + 2;
198 realpath = kmalloc(len, kmem_flags_convert(KM_SLEEP));
199 if (!realpath)
200 return (ENOMEM);
201
202 (void)snprintf(realpath, len, "/%s", path);
203 rc = vn_open(realpath, seg, flags, mode, vpp, x1, x2);
204 kfree(realpath);
205
206 return (rc);
207 } /* vn_openat() */
208 EXPORT_SYMBOL(vn_openat);
209
210 int
211 vn_rdwr(uio_rw_t uio, vnode_t *vp, void *addr, ssize_t len, offset_t off,
212 uio_seg_t seg, int ioflag, rlim64_t x2, void *x3, ssize_t *residp)
213 {
214 loff_t offset;
215 mm_segment_t saved_fs;
216 struct file *fp;
217 int rc;
218
219 ASSERT(uio == UIO_WRITE || uio == UIO_READ);
220 ASSERT(vp);
221 ASSERT(vp->v_file);
222 ASSERT(seg == UIO_SYSSPACE);
223 ASSERT((ioflag & ~FAPPEND) == 0);
224
225 fp = vp->v_file;
226
227 offset = off;
228 if (ioflag & FAPPEND)
229 offset = fp->f_pos;
230
231 /* Writable user data segment must be briefly increased for this
232 * process so we can use the user space read call paths to write
233 * in to memory allocated by the kernel. */
234 saved_fs = get_fs();
235 set_fs(get_ds());
236
237 if (uio & UIO_WRITE)
238 rc = vfs_write(fp, addr, len, &offset);
239 else
240 rc = vfs_read(fp, addr, len, &offset);
241
242 set_fs(saved_fs);
243 fp->f_pos = offset;
244
245 if (rc < 0)
246 return (-rc);
247
248 if (residp) {
249 *residp = len - rc;
250 } else {
251 if (rc != len)
252 return (EIO);
253 }
254
255 return (0);
256 } /* vn_rdwr() */
257 EXPORT_SYMBOL(vn_rdwr);
258
259 int
260 vn_close(vnode_t *vp, int flags, int x1, int x2, void *x3, void *x4)
261 {
262 int rc;
263
264 ASSERT(vp);
265 ASSERT(vp->v_file);
266
267 mapping_set_gfp_mask(vp->v_file->f_mapping, vp->v_gfp_mask);
268 rc = filp_close(vp->v_file, 0);
269 vn_free(vp);
270
271 return (-rc);
272 } /* vn_close() */
273 EXPORT_SYMBOL(vn_close);
274
275 /* vn_seek() does not actually seek it only performs bounds checking on the
276 * proposed seek. We perform minimal checking and allow vn_rdwr() to catch
277 * anything more serious. */
278 int
279 vn_seek(vnode_t *vp, offset_t ooff, offset_t *noffp, void *ct)
280 {
281 return ((*noffp < 0 || *noffp > MAXOFFSET_T) ? EINVAL : 0);
282 }
283 EXPORT_SYMBOL(vn_seek);
284
285 /*
286 * spl_basename() takes a NULL-terminated string s as input containing a path.
287 * It returns a char pointer to a string and a length that describe the
288 * basename of the path. If the basename is not "." or "/", it will be an index
289 * into the string. While the string should be NULL terminated, the section
290 * referring to the basename is not. spl_basename is dual-licensed GPLv2+ and
291 * CC0. Anyone wishing to reuse it in another codebase may pick either license.
292 */
293 static void
294 spl_basename(const char *s, const char **str, int *len)
295 {
296 size_t i, end;
297
298 ASSERT(str);
299 ASSERT(len);
300
301 if (!s || !*s) {
302 *str = ".";
303 *len = 1;
304 return;
305 }
306
307 i = strlen(s) - 1;
308
309 while (i && s[i--] == '/');
310
311 if (i == 0) {
312 *str = "/";
313 *len = 1;
314 return;
315 }
316
317 end = i;
318
319 for (end = i; i; i--) {
320 if (s[i] == '/') {
321 *str = &s[i+1];
322 *len = end - i + 1;
323 return;
324 }
325 }
326
327 *str = s;
328 *len = end + 1;
329 }
330
331 static struct dentry *
332 spl_kern_path_locked(const char *name, struct path *path)
333 {
334 struct path parent;
335 struct dentry *dentry;
336 const char *basename;
337 int len;
338 int rc;
339
340 ASSERT(name);
341 ASSERT(path);
342
343 spl_basename(name, &basename, &len);
344
345 /* We do not accept "." or ".." */
346 if (len <= 2 && basename[0] == '.')
347 if (len == 1 || basename[1] == '.')
348 return (ERR_PTR(-EACCES));
349
350 rc = kern_path(name, LOOKUP_PARENT, &parent);
351 if (rc)
352 return (ERR_PTR(rc));
353
354 /* use I_MUTEX_PARENT because vfs_unlink needs it */
355 spl_inode_lock_nested(parent.dentry->d_inode, I_MUTEX_PARENT);
356
357 dentry = lookup_one_len(basename, parent.dentry, len);
358 if (IS_ERR(dentry)) {
359 spl_inode_unlock(parent.dentry->d_inode);
360 path_put(&parent);
361 } else {
362 *path = parent;
363 }
364
365 return (dentry);
366 }
367
368 /* Based on do_unlinkat() from linux/fs/namei.c */
369 int
370 vn_remove(const char *path, uio_seg_t seg, int flags)
371 {
372 struct dentry *dentry;
373 struct path parent;
374 struct inode *inode = NULL;
375 int rc = 0;
376
377 ASSERT(seg == UIO_SYSSPACE);
378 ASSERT(flags == RMFILE);
379
380 dentry = spl_kern_path_locked(path, &parent);
381 rc = PTR_ERR(dentry);
382 if (!IS_ERR(dentry)) {
383 if (parent.dentry->d_name.name[parent.dentry->d_name.len]) {
384 rc = 0;
385 goto slashes;
386 }
387
388 inode = dentry->d_inode;
389 if (inode) {
390 atomic_inc(&inode->i_count);
391 } else {
392 rc = 0;
393 goto slashes;
394 }
395
396 #ifdef HAVE_2ARGS_VFS_UNLINK
397 rc = vfs_unlink(parent.dentry->d_inode, dentry);
398 #else
399 rc = vfs_unlink(parent.dentry->d_inode, dentry, NULL);
400 #endif /* HAVE_2ARGS_VFS_UNLINK */
401 exit1:
402 dput(dentry);
403 } else {
404 return (-rc);
405 }
406
407 spl_inode_unlock(parent.dentry->d_inode);
408 if (inode)
409 iput(inode); /* truncate the inode here */
410
411 path_put(&parent);
412 return (-rc);
413
414 slashes:
415 rc = !dentry->d_inode ? -ENOENT :
416 S_ISDIR(dentry->d_inode->i_mode) ? -EISDIR : -ENOTDIR;
417 goto exit1;
418 } /* vn_remove() */
419 EXPORT_SYMBOL(vn_remove);
420
421 /* Based on do_rename() from linux/fs/namei.c */
422 int
423 vn_rename(const char *oldname, const char *newname, int x1)
424 {
425 struct dentry *old_dir, *new_dir;
426 struct dentry *old_dentry, *new_dentry;
427 struct dentry *trap;
428 struct path old_parent, new_parent;
429 int rc = 0;
430
431 old_dentry = spl_kern_path_locked(oldname, &old_parent);
432 if (IS_ERR(old_dentry)) {
433 rc = PTR_ERR(old_dentry);
434 goto exit;
435 }
436
437 spl_inode_unlock(old_parent.dentry->d_inode);
438
439 new_dentry = spl_kern_path_locked(newname, &new_parent);
440 if (IS_ERR(new_dentry)) {
441 rc = PTR_ERR(new_dentry);
442 goto exit2;
443 }
444
445 spl_inode_unlock(new_parent.dentry->d_inode);
446
447 rc = -EXDEV;
448 if (old_parent.mnt != new_parent.mnt)
449 goto exit3;
450
451 old_dir = old_parent.dentry;
452 new_dir = new_parent.dentry;
453 trap = lock_rename(new_dir, old_dir);
454
455 /* source should not be ancestor of target */
456 rc = -EINVAL;
457 if (old_dentry == trap)
458 goto exit4;
459
460 /* target should not be an ancestor of source */
461 rc = -ENOTEMPTY;
462 if (new_dentry == trap)
463 goto exit4;
464
465 /* source must exist */
466 rc = -ENOENT;
467 if (!old_dentry->d_inode)
468 goto exit4;
469
470 /* unless the source is a directory trailing slashes give -ENOTDIR */
471 if (!S_ISDIR(old_dentry->d_inode->i_mode)) {
472 rc = -ENOTDIR;
473 if (old_dentry->d_name.name[old_dentry->d_name.len])
474 goto exit4;
475 if (new_dentry->d_name.name[new_dentry->d_name.len])
476 goto exit4;
477 }
478
479 #if defined(HAVE_4ARGS_VFS_RENAME)
480 rc = vfs_rename(old_dir->d_inode, old_dentry,
481 new_dir->d_inode, new_dentry);
482 #elif defined(HAVE_5ARGS_VFS_RENAME)
483 rc = vfs_rename(old_dir->d_inode, old_dentry,
484 new_dir->d_inode, new_dentry, NULL);
485 #else
486 rc = vfs_rename(old_dir->d_inode, old_dentry,
487 new_dir->d_inode, new_dentry, NULL, 0);
488 #endif
489 exit4:
490 unlock_rename(new_dir, old_dir);
491 exit3:
492 dput(new_dentry);
493 path_put(&new_parent);
494 exit2:
495 dput(old_dentry);
496 path_put(&old_parent);
497 exit:
498 return (-rc);
499 }
500 EXPORT_SYMBOL(vn_rename);
501
502 int
503 vn_getattr(vnode_t *vp, vattr_t *vap, int flags, void *x3, void *x4)
504 {
505 struct file *fp;
506 struct kstat stat;
507 int rc;
508
509 ASSERT(vp);
510 ASSERT(vp->v_file);
511 ASSERT(vap);
512
513 fp = vp->v_file;
514
515 #if defined(HAVE_4ARGS_VFS_GETATTR)
516 rc = vfs_getattr(&fp->f_path, &stat, STATX_BASIC_STATS,
517 AT_STATX_SYNC_AS_STAT);
518 #elif defined(HAVE_2ARGS_VFS_GETATTR)
519 rc = vfs_getattr(&fp->f_path, &stat);
520 #else
521 rc = vfs_getattr(fp->f_path.mnt, fp->f_dentry, &stat);
522 #endif
523 if (rc)
524 return (-rc);
525
526 vap->va_type = vn_mode_to_vtype(stat.mode);
527 vap->va_mode = stat.mode;
528 vap->va_uid = KUID_TO_SUID(stat.uid);
529 vap->va_gid = KGID_TO_SGID(stat.gid);
530 vap->va_fsid = 0;
531 vap->va_nodeid = stat.ino;
532 vap->va_nlink = stat.nlink;
533 vap->va_size = stat.size;
534 vap->va_blksize = stat.blksize;
535 vap->va_atime = stat.atime;
536 vap->va_mtime = stat.mtime;
537 vap->va_ctime = stat.ctime;
538 vap->va_rdev = stat.rdev;
539 vap->va_nblocks = stat.blocks;
540
541 return (0);
542 }
543 EXPORT_SYMBOL(vn_getattr);
544
545 int vn_fsync(vnode_t *vp, int flags, void *x3, void *x4)
546 {
547 int datasync = 0;
548 int error;
549 int fstrans;
550
551 ASSERT(vp);
552 ASSERT(vp->v_file);
553
554 if (flags & FDSYNC)
555 datasync = 1;
556
557 /*
558 * May enter XFS which generates a warning when PF_FSTRANS is set.
559 * To avoid this the flag is cleared over vfs_sync() and then reset.
560 */
561 fstrans = __spl_pf_fstrans_check();
562 if (fstrans)
563 current->flags &= ~(__SPL_PF_FSTRANS);
564
565 error = -spl_filp_fsync(vp->v_file, datasync);
566 if (fstrans)
567 current->flags |= __SPL_PF_FSTRANS;
568
569 return (error);
570 } /* vn_fsync() */
571 EXPORT_SYMBOL(vn_fsync);
572
573 int vn_space(vnode_t *vp, int cmd, struct flock *bfp, int flag,
574 offset_t offset, void *x6, void *x7)
575 {
576 int error = EOPNOTSUPP;
577 #ifdef FALLOC_FL_PUNCH_HOLE
578 int fstrans;
579 #endif
580
581 if (cmd != F_FREESP || bfp->l_whence != 0)
582 return (EOPNOTSUPP);
583
584 ASSERT(vp);
585 ASSERT(vp->v_file);
586 ASSERT(bfp->l_start >= 0 && bfp->l_len > 0);
587
588 #ifdef FALLOC_FL_PUNCH_HOLE
589 /*
590 * May enter XFS which generates a warning when PF_FSTRANS is set.
591 * To avoid this the flag is cleared over vfs_sync() and then reset.
592 */
593 fstrans = __spl_pf_fstrans_check();
594 if (fstrans)
595 current->flags &= ~(__SPL_PF_FSTRANS);
596
597 /*
598 * When supported by the underlying file system preferentially
599 * use the fallocate() callback to preallocate the space.
600 */
601 error = -spl_filp_fallocate(vp->v_file,
602 FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE,
603 bfp->l_start, bfp->l_len);
604
605 if (fstrans)
606 current->flags |= __SPL_PF_FSTRANS;
607
608 if (error == 0)
609 return (0);
610 #endif
611
612 #ifdef HAVE_INODE_TRUNCATE_RANGE
613 if (vp->v_file->f_dentry && vp->v_file->f_dentry->d_inode &&
614 vp->v_file->f_dentry->d_inode->i_op &&
615 vp->v_file->f_dentry->d_inode->i_op->truncate_range) {
616 off_t end = bfp->l_start + bfp->l_len;
617 /*
618 * Judging from the code in shmem_truncate_range(),
619 * it seems the kernel expects the end offset to be
620 * inclusive and aligned to the end of a page.
621 */
622 if (end % PAGE_SIZE != 0) {
623 end &= ~(off_t)(PAGE_SIZE - 1);
624 if (end <= bfp->l_start)
625 return (0);
626 }
627 --end;
628
629 vp->v_file->f_dentry->d_inode->i_op->truncate_range(
630 vp->v_file->f_dentry->d_inode,
631 bfp->l_start, end
632 );
633 return (0);
634 }
635 #endif
636
637 return (error);
638 }
639 EXPORT_SYMBOL(vn_space);
640
641 /* Function must be called while holding the vn_file_lock */
642 static file_t *
643 file_find(int fd, struct task_struct *task)
644 {
645 file_t *fp;
646
647 ASSERT(spin_is_locked(&vn_file_lock));
648
649 list_for_each_entry(fp, &vn_file_list, f_list) {
650 if (fd == fp->f_fd && fp->f_task == task) {
651 ASSERT(atomic_read(&fp->f_ref) != 0);
652 return fp;
653 }
654 }
655
656 return NULL;
657 } /* file_find() */
658
659 file_t *
660 vn_getf(int fd)
661 {
662 struct kstat stat;
663 struct file *lfp;
664 file_t *fp;
665 vnode_t *vp;
666 int rc = 0;
667
668 if (fd < 0)
669 return (NULL);
670
671 /* Already open just take an extra reference */
672 spin_lock(&vn_file_lock);
673
674 fp = file_find(fd, current);
675 if (fp) {
676 lfp = fget(fd);
677 fput(fp->f_file);
678 /*
679 * areleasef() can cause us to see a stale reference when
680 * userspace has reused a file descriptor before areleasef()
681 * has run. fput() the stale reference and replace it. We
682 * retain the original reference count such that the concurrent
683 * areleasef() will decrement its reference and terminate.
684 */
685 if (lfp != fp->f_file) {
686 fp->f_file = lfp;
687 fp->f_vnode->v_file = lfp;
688 }
689 atomic_inc(&fp->f_ref);
690 spin_unlock(&vn_file_lock);
691 return (fp);
692 }
693
694 spin_unlock(&vn_file_lock);
695
696 /* File was not yet opened create the object and setup */
697 fp = kmem_cache_alloc(vn_file_cache, KM_SLEEP);
698 if (fp == NULL)
699 goto out;
700
701 mutex_enter(&fp->f_lock);
702
703 fp->f_fd = fd;
704 fp->f_task = current;
705 fp->f_offset = 0;
706 atomic_inc(&fp->f_ref);
707
708 lfp = fget(fd);
709 if (lfp == NULL)
710 goto out_mutex;
711
712 vp = vn_alloc(KM_SLEEP);
713 if (vp == NULL)
714 goto out_fget;
715
716 #if defined(HAVE_4ARGS_VFS_GETATTR)
717 rc = vfs_getattr(&lfp->f_path, &stat, STATX_TYPE, AT_STATX_SYNC_AS_STAT);
718 #elif defined(HAVE_2ARGS_VFS_GETATTR)
719 rc = vfs_getattr(&lfp->f_path, &stat);
720 #else
721 rc = vfs_getattr(lfp->f_path.mnt, lfp->f_dentry, &stat);
722 #endif
723 if (rc)
724 goto out_vnode;
725
726 mutex_enter(&vp->v_lock);
727 vp->v_type = vn_mode_to_vtype(stat.mode);
728 vp->v_file = lfp;
729 mutex_exit(&vp->v_lock);
730
731 fp->f_vnode = vp;
732 fp->f_file = lfp;
733
734 /* Put it on the tracking list */
735 spin_lock(&vn_file_lock);
736 list_add(&fp->f_list, &vn_file_list);
737 spin_unlock(&vn_file_lock);
738
739 mutex_exit(&fp->f_lock);
740 return (fp);
741
742 out_vnode:
743 vn_free(vp);
744 out_fget:
745 fput(lfp);
746 out_mutex:
747 mutex_exit(&fp->f_lock);
748 kmem_cache_free(vn_file_cache, fp);
749 out:
750 return (NULL);
751 } /* getf() */
752 EXPORT_SYMBOL(getf);
753
754 static void releasef_locked(file_t *fp)
755 {
756 ASSERT(fp->f_file);
757 ASSERT(fp->f_vnode);
758
759 /* Unlinked from list, no refs, safe to free outside mutex */
760 fput(fp->f_file);
761 vn_free(fp->f_vnode);
762
763 kmem_cache_free(vn_file_cache, fp);
764 }
765
766 void
767 vn_releasef(int fd)
768 {
769 areleasef(fd, P_FINFO(current));
770 }
771 EXPORT_SYMBOL(releasef);
772
773 void
774 vn_areleasef(int fd, uf_info_t *fip)
775 {
776 file_t *fp;
777 struct task_struct *task = (struct task_struct *)fip;
778
779 if (fd < 0)
780 return;
781
782 spin_lock(&vn_file_lock);
783 fp = file_find(fd, task);
784 if (fp) {
785 atomic_dec(&fp->f_ref);
786 if (atomic_read(&fp->f_ref) > 0) {
787 spin_unlock(&vn_file_lock);
788 return;
789 }
790
791 list_del(&fp->f_list);
792 releasef_locked(fp);
793 }
794 spin_unlock(&vn_file_lock);
795
796 return;
797 } /* releasef() */
798 EXPORT_SYMBOL(areleasef);
799
800
801 static void
802 #ifdef HAVE_SET_FS_PWD_WITH_CONST
803 vn_set_fs_pwd(struct fs_struct *fs, const struct path *path)
804 #else
805 vn_set_fs_pwd(struct fs_struct *fs, struct path *path)
806 #endif /* HAVE_SET_FS_PWD_WITH_CONST */
807 {
808 struct path old_pwd;
809
810 #ifdef HAVE_FS_STRUCT_SPINLOCK
811 spin_lock(&fs->lock);
812 old_pwd = fs->pwd;
813 fs->pwd = *path;
814 path_get(path);
815 spin_unlock(&fs->lock);
816 #else
817 write_lock(&fs->lock);
818 old_pwd = fs->pwd;
819 fs->pwd = *path;
820 path_get(path);
821 write_unlock(&fs->lock);
822 #endif /* HAVE_FS_STRUCT_SPINLOCK */
823
824 if (old_pwd.dentry)
825 path_put(&old_pwd);
826 }
827
828 int
829 vn_set_pwd(const char *filename)
830 {
831 struct path path;
832 mm_segment_t saved_fs;
833 int rc;
834
835 /*
836 * user_path_dir() and __user_walk() both expect 'filename' to be
837 * a user space address so we must briefly increase the data segment
838 * size to ensure strncpy_from_user() does not fail with -EFAULT.
839 */
840 saved_fs = get_fs();
841 set_fs(get_ds());
842
843 rc = user_path_dir(filename, &path);
844 if (rc)
845 goto out;
846
847 rc = inode_permission(path.dentry->d_inode, MAY_EXEC | MAY_ACCESS);
848 if (rc)
849 goto dput_and_out;
850
851 vn_set_fs_pwd(current->fs, &path);
852
853 dput_and_out:
854 path_put(&path);
855 out:
856 set_fs(saved_fs);
857
858 return (-rc);
859 } /* vn_set_pwd() */
860 EXPORT_SYMBOL(vn_set_pwd);
861
862 static int
863 vn_cache_constructor(void *buf, void *cdrarg, int kmflags)
864 {
865 struct vnode *vp = buf;
866
867 mutex_init(&vp->v_lock, NULL, MUTEX_DEFAULT, NULL);
868
869 return (0);
870 } /* vn_cache_constructor() */
871
872 static void
873 vn_cache_destructor(void *buf, void *cdrarg)
874 {
875 struct vnode *vp = buf;
876
877 mutex_destroy(&vp->v_lock);
878 } /* vn_cache_destructor() */
879
880 static int
881 vn_file_cache_constructor(void *buf, void *cdrarg, int kmflags)
882 {
883 file_t *fp = buf;
884
885 atomic_set(&fp->f_ref, 0);
886 mutex_init(&fp->f_lock, NULL, MUTEX_DEFAULT, NULL);
887 INIT_LIST_HEAD(&fp->f_list);
888
889 return (0);
890 } /* file_cache_constructor() */
891
892 static void
893 vn_file_cache_destructor(void *buf, void *cdrarg)
894 {
895 file_t *fp = buf;
896
897 mutex_destroy(&fp->f_lock);
898 } /* vn_file_cache_destructor() */
899
900 int
901 spl_vn_init(void)
902 {
903 vn_cache = kmem_cache_create("spl_vn_cache",
904 sizeof(struct vnode), 64,
905 vn_cache_constructor,
906 vn_cache_destructor,
907 NULL, NULL, NULL, 0);
908
909 vn_file_cache = kmem_cache_create("spl_vn_file_cache",
910 sizeof(file_t), 64,
911 vn_file_cache_constructor,
912 vn_file_cache_destructor,
913 NULL, NULL, NULL, 0);
914 return (0);
915 } /* vn_init() */
916
917 void
918 spl_vn_fini(void)
919 {
920 file_t *fp, *next_fp;
921 int leaked = 0;
922
923 spin_lock(&vn_file_lock);
924
925 list_for_each_entry_safe(fp, next_fp, &vn_file_list, f_list) {
926 list_del(&fp->f_list);
927 releasef_locked(fp);
928 leaked++;
929 }
930
931 spin_unlock(&vn_file_lock);
932
933 if (leaked > 0)
934 printk(KERN_WARNING "WARNING: %d vnode files leaked\n", leaked);
935
936 kmem_cache_destroy(vn_file_cache);
937 kmem_cache_destroy(vn_cache);
938
939 return;
940 } /* vn_fini() */