]> git.proxmox.com Git - mirror_spl-debian.git/blob - module/spl/spl-vnode.c
New upstream version 0.7.11
[mirror_spl-debian.git] / module / spl / spl-vnode.c
1 /*****************************************************************************\
2 * Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
3 * Copyright (C) 2007 The Regents of the University of California.
4 * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
5 * Written by Brian Behlendorf <behlendorf1@llnl.gov>.
6 * UCRL-CODE-235197
7 *
8 * This file is part of the SPL, Solaris Porting Layer.
9 * For details, see <http://zfsonlinux.org/>.
10 *
11 * The SPL is free software; you can redistribute it and/or modify it
12 * under the terms of the GNU General Public License as published by the
13 * Free Software Foundation; either version 2 of the License, or (at your
14 * option) any later version.
15 *
16 * The SPL is distributed in the hope that it will be useful, but WITHOUT
17 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
18 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
19 * for more details.
20 *
21 * You should have received a copy of the GNU General Public License along
22 * with the SPL. If not, see <http://www.gnu.org/licenses/>.
23 *
24 * Solaris Porting Layer (SPL) Vnode Implementation.
25 \*****************************************************************************/
26
27 #include <sys/cred.h>
28 #include <sys/vnode.h>
29 #include <sys/kmem_cache.h>
30 #include <linux/falloc.h>
31 #include <linux/file_compat.h>
32
33 vnode_t *rootdir = (vnode_t *)0xabcd1234;
34 EXPORT_SYMBOL(rootdir);
35
36 static spl_kmem_cache_t *vn_cache;
37 static spl_kmem_cache_t *vn_file_cache;
38
39 static DEFINE_SPINLOCK(vn_file_lock);
40 static LIST_HEAD(vn_file_list);
41
42 vtype_t
43 vn_mode_to_vtype(mode_t mode)
44 {
45 if (S_ISREG(mode))
46 return (VREG);
47
48 if (S_ISDIR(mode))
49 return (VDIR);
50
51 if (S_ISCHR(mode))
52 return (VCHR);
53
54 if (S_ISBLK(mode))
55 return (VBLK);
56
57 if (S_ISFIFO(mode))
58 return (VFIFO);
59
60 if (S_ISLNK(mode))
61 return (VLNK);
62
63 if (S_ISSOCK(mode))
64 return (VSOCK);
65
66 return (VNON);
67 } /* vn_mode_to_vtype() */
68 EXPORT_SYMBOL(vn_mode_to_vtype);
69
70 mode_t
71 vn_vtype_to_mode(vtype_t vtype)
72 {
73 if (vtype == VREG)
74 return (S_IFREG);
75
76 if (vtype == VDIR)
77 return (S_IFDIR);
78
79 if (vtype == VCHR)
80 return (S_IFCHR);
81
82 if (vtype == VBLK)
83 return (S_IFBLK);
84
85 if (vtype == VFIFO)
86 return (S_IFIFO);
87
88 if (vtype == VLNK)
89 return (S_IFLNK);
90
91 if (vtype == VSOCK)
92 return (S_IFSOCK);
93
94 return (VNON);
95 } /* vn_vtype_to_mode() */
96 EXPORT_SYMBOL(vn_vtype_to_mode);
97
98 vnode_t *
99 vn_alloc(int flag)
100 {
101 vnode_t *vp;
102
103 vp = kmem_cache_alloc(vn_cache, flag);
104 if (vp != NULL) {
105 vp->v_file = NULL;
106 vp->v_type = 0;
107 }
108
109 return (vp);
110 } /* vn_alloc() */
111 EXPORT_SYMBOL(vn_alloc);
112
113 void
114 vn_free(vnode_t *vp)
115 {
116 kmem_cache_free(vn_cache, vp);
117 } /* vn_free() */
118 EXPORT_SYMBOL(vn_free);
119
120 int
121 vn_open(const char *path, uio_seg_t seg, int flags, int mode, vnode_t **vpp,
122 int x1, void *x2)
123 {
124 struct file *fp;
125 struct kstat stat;
126 int rc, saved_umask = 0;
127 gfp_t saved_gfp;
128 vnode_t *vp;
129
130 ASSERT(flags & (FWRITE | FREAD));
131 ASSERT(seg == UIO_SYSSPACE);
132 ASSERT(vpp);
133 *vpp = NULL;
134
135 if (!(flags & FCREAT) && (flags & FWRITE))
136 flags |= FEXCL;
137
138 /*
139 * Note for filp_open() the two low bits must be remapped to mean:
140 * 01 - read-only -> 00 read-only
141 * 10 - write-only -> 01 write-only
142 * 11 - read-write -> 10 read-write
143 */
144 flags--;
145
146 if (flags & FCREAT)
147 saved_umask = xchg(&current->fs->umask, 0);
148
149 fp = filp_open(path, flags, mode);
150
151 if (flags & FCREAT)
152 (void) xchg(&current->fs->umask, saved_umask);
153
154 if (IS_ERR(fp))
155 return (-PTR_ERR(fp));
156
157 #if defined(HAVE_4ARGS_VFS_GETATTR)
158 rc = vfs_getattr(&fp->f_path, &stat, STATX_TYPE, AT_STATX_SYNC_AS_STAT);
159 #elif defined(HAVE_2ARGS_VFS_GETATTR)
160 rc = vfs_getattr(&fp->f_path, &stat);
161 #else
162 rc = vfs_getattr(fp->f_path.mnt, fp->f_dentry, &stat);
163 #endif
164 if (rc) {
165 filp_close(fp, 0);
166 return (-rc);
167 }
168
169 vp = vn_alloc(KM_SLEEP);
170 if (!vp) {
171 filp_close(fp, 0);
172 return (ENOMEM);
173 }
174
175 saved_gfp = mapping_gfp_mask(fp->f_mapping);
176 mapping_set_gfp_mask(fp->f_mapping, saved_gfp & ~(__GFP_IO|__GFP_FS));
177
178 mutex_enter(&vp->v_lock);
179 vp->v_type = vn_mode_to_vtype(stat.mode);
180 vp->v_file = fp;
181 vp->v_gfp_mask = saved_gfp;
182 *vpp = vp;
183 mutex_exit(&vp->v_lock);
184
185 return (0);
186 } /* vn_open() */
187 EXPORT_SYMBOL(vn_open);
188
189 int
190 vn_openat(const char *path, uio_seg_t seg, int flags, int mode,
191 vnode_t **vpp, int x1, void *x2, vnode_t *vp, int fd)
192 {
193 char *realpath;
194 int len, rc;
195
196 ASSERT(vp == rootdir);
197
198 len = strlen(path) + 2;
199 realpath = kmalloc(len, kmem_flags_convert(KM_SLEEP));
200 if (!realpath)
201 return (ENOMEM);
202
203 (void) snprintf(realpath, len, "/%s", path);
204 rc = vn_open(realpath, seg, flags, mode, vpp, x1, x2);
205 kfree(realpath);
206
207 return (rc);
208 } /* vn_openat() */
209 EXPORT_SYMBOL(vn_openat);
210
211 int
212 vn_rdwr(uio_rw_t uio, vnode_t *vp, void *addr, ssize_t len, offset_t off,
213 uio_seg_t seg, int ioflag, rlim64_t x2, void *x3, ssize_t *residp)
214 {
215 struct file *fp = vp->v_file;
216 loff_t offset = off;
217 int rc;
218
219 ASSERT(uio == UIO_WRITE || uio == UIO_READ);
220 ASSERT(seg == UIO_SYSSPACE);
221 ASSERT((ioflag & ~FAPPEND) == 0);
222
223 if (ioflag & FAPPEND)
224 offset = fp->f_pos;
225
226 if (uio & UIO_WRITE)
227 rc = spl_kernel_write(fp, addr, len, &offset);
228 else
229 rc = spl_kernel_read(fp, addr, len, &offset);
230
231 fp->f_pos = offset;
232
233 if (rc < 0)
234 return (-rc);
235
236 if (residp) {
237 *residp = len - rc;
238 } else {
239 if (rc != len)
240 return (EIO);
241 }
242
243 return (0);
244 } /* vn_rdwr() */
245 EXPORT_SYMBOL(vn_rdwr);
246
247 int
248 vn_close(vnode_t *vp, int flags, int x1, int x2, void *x3, void *x4)
249 {
250 int rc;
251
252 ASSERT(vp);
253 ASSERT(vp->v_file);
254
255 mapping_set_gfp_mask(vp->v_file->f_mapping, vp->v_gfp_mask);
256 rc = filp_close(vp->v_file, 0);
257 vn_free(vp);
258
259 return (-rc);
260 } /* vn_close() */
261 EXPORT_SYMBOL(vn_close);
262
263 /*
264 * vn_seek() does not actually seek it only performs bounds checking on the
265 * proposed seek. We perform minimal checking and allow vn_rdwr() to catch
266 * anything more serious.
267 */
268 int
269 vn_seek(vnode_t *vp, offset_t ooff, offset_t *noffp, void *ct)
270 {
271 return ((*noffp < 0 || *noffp > MAXOFFSET_T) ? EINVAL : 0);
272 }
273 EXPORT_SYMBOL(vn_seek);
274
275 int
276 vn_getattr(vnode_t *vp, vattr_t *vap, int flags, void *x3, void *x4)
277 {
278 struct file *fp;
279 struct kstat stat;
280 int rc;
281
282 ASSERT(vp);
283 ASSERT(vp->v_file);
284 ASSERT(vap);
285
286 fp = vp->v_file;
287
288 #if defined(HAVE_4ARGS_VFS_GETATTR)
289 rc = vfs_getattr(&fp->f_path, &stat, STATX_BASIC_STATS,
290 AT_STATX_SYNC_AS_STAT);
291 #elif defined(HAVE_2ARGS_VFS_GETATTR)
292 rc = vfs_getattr(&fp->f_path, &stat);
293 #else
294 rc = vfs_getattr(fp->f_path.mnt, fp->f_dentry, &stat);
295 #endif
296 if (rc)
297 return (-rc);
298
299 vap->va_type = vn_mode_to_vtype(stat.mode);
300 vap->va_mode = stat.mode;
301 vap->va_uid = KUID_TO_SUID(stat.uid);
302 vap->va_gid = KGID_TO_SGID(stat.gid);
303 vap->va_fsid = 0;
304 vap->va_nodeid = stat.ino;
305 vap->va_nlink = stat.nlink;
306 vap->va_size = stat.size;
307 vap->va_blksize = stat.blksize;
308 vap->va_atime = stat.atime;
309 vap->va_mtime = stat.mtime;
310 vap->va_ctime = stat.ctime;
311 vap->va_rdev = stat.rdev;
312 vap->va_nblocks = stat.blocks;
313
314 return (0);
315 }
316 EXPORT_SYMBOL(vn_getattr);
317
318 int
319 vn_fsync(vnode_t *vp, int flags, void *x3, void *x4)
320 {
321 int datasync = 0;
322 int error;
323 int fstrans;
324
325 ASSERT(vp);
326 ASSERT(vp->v_file);
327
328 if (flags & FDSYNC)
329 datasync = 1;
330
331 /*
332 * May enter XFS which generates a warning when PF_FSTRANS is set.
333 * To avoid this the flag is cleared over vfs_sync() and then reset.
334 */
335 fstrans = __spl_pf_fstrans_check();
336 if (fstrans)
337 current->flags &= ~(__SPL_PF_FSTRANS);
338
339 error = -spl_filp_fsync(vp->v_file, datasync);
340 if (fstrans)
341 current->flags |= __SPL_PF_FSTRANS;
342
343 return (error);
344 } /* vn_fsync() */
345 EXPORT_SYMBOL(vn_fsync);
346
347 int vn_space(vnode_t *vp, int cmd, struct flock *bfp, int flag,
348 offset_t offset, void *x6, void *x7)
349 {
350 int error = EOPNOTSUPP;
351 #ifdef FALLOC_FL_PUNCH_HOLE
352 int fstrans;
353 #endif
354
355 if (cmd != F_FREESP || bfp->l_whence != 0)
356 return (EOPNOTSUPP);
357
358 ASSERT(vp);
359 ASSERT(vp->v_file);
360 ASSERT(bfp->l_start >= 0 && bfp->l_len > 0);
361
362 #ifdef FALLOC_FL_PUNCH_HOLE
363 /*
364 * May enter XFS which generates a warning when PF_FSTRANS is set.
365 * To avoid this the flag is cleared over vfs_sync() and then reset.
366 */
367 fstrans = __spl_pf_fstrans_check();
368 if (fstrans)
369 current->flags &= ~(__SPL_PF_FSTRANS);
370
371 /*
372 * When supported by the underlying file system preferentially
373 * use the fallocate() callback to preallocate the space.
374 */
375 error = -spl_filp_fallocate(vp->v_file,
376 FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE,
377 bfp->l_start, bfp->l_len);
378
379 if (fstrans)
380 current->flags |= __SPL_PF_FSTRANS;
381
382 if (error == 0)
383 return (0);
384 #endif
385
386 #ifdef HAVE_INODE_TRUNCATE_RANGE
387 if (vp->v_file->f_dentry && vp->v_file->f_dentry->d_inode &&
388 vp->v_file->f_dentry->d_inode->i_op &&
389 vp->v_file->f_dentry->d_inode->i_op->truncate_range) {
390 off_t end = bfp->l_start + bfp->l_len;
391 /*
392 * Judging from the code in shmem_truncate_range(),
393 * it seems the kernel expects the end offset to be
394 * inclusive and aligned to the end of a page.
395 */
396 if (end % PAGE_SIZE != 0) {
397 end &= ~(off_t)(PAGE_SIZE - 1);
398 if (end <= bfp->l_start)
399 return (0);
400 }
401 --end;
402
403 vp->v_file->f_dentry->d_inode->i_op->truncate_range(
404 vp->v_file->f_dentry->d_inode, bfp->l_start, end);
405
406 return (0);
407 }
408 #endif
409
410 return (error);
411 }
412 EXPORT_SYMBOL(vn_space);
413
414 /* Function must be called while holding the vn_file_lock */
415 static file_t *
416 file_find(int fd, struct task_struct *task)
417 {
418 file_t *fp;
419
420 list_for_each_entry(fp, &vn_file_list, f_list) {
421 if (fd == fp->f_fd && fp->f_task == task) {
422 ASSERT(atomic_read(&fp->f_ref) != 0);
423 return (fp);
424 }
425 }
426
427 return (NULL);
428 } /* file_find() */
429
430 file_t *
431 vn_getf(int fd)
432 {
433 struct kstat stat;
434 struct file *lfp;
435 file_t *fp;
436 vnode_t *vp;
437 int rc = 0;
438
439 if (fd < 0)
440 return (NULL);
441
442 /* Already open just take an extra reference */
443 spin_lock(&vn_file_lock);
444
445 fp = file_find(fd, current);
446 if (fp) {
447 lfp = fget(fd);
448 fput(fp->f_file);
449 /*
450 * areleasef() can cause us to see a stale reference when
451 * userspace has reused a file descriptor before areleasef()
452 * has run. fput() the stale reference and replace it. We
453 * retain the original reference count such that the concurrent
454 * areleasef() will decrement its reference and terminate.
455 */
456 if (lfp != fp->f_file) {
457 fp->f_file = lfp;
458 fp->f_vnode->v_file = lfp;
459 }
460 atomic_inc(&fp->f_ref);
461 spin_unlock(&vn_file_lock);
462 return (fp);
463 }
464
465 spin_unlock(&vn_file_lock);
466
467 /* File was not yet opened create the object and setup */
468 fp = kmem_cache_alloc(vn_file_cache, KM_SLEEP);
469 if (fp == NULL)
470 goto out;
471
472 mutex_enter(&fp->f_lock);
473
474 fp->f_fd = fd;
475 fp->f_task = current;
476 fp->f_offset = 0;
477 atomic_inc(&fp->f_ref);
478
479 lfp = fget(fd);
480 if (lfp == NULL)
481 goto out_mutex;
482
483 vp = vn_alloc(KM_SLEEP);
484 if (vp == NULL)
485 goto out_fget;
486
487 #if defined(HAVE_4ARGS_VFS_GETATTR)
488 rc = vfs_getattr(&lfp->f_path, &stat, STATX_TYPE,
489 AT_STATX_SYNC_AS_STAT);
490 #elif defined(HAVE_2ARGS_VFS_GETATTR)
491 rc = vfs_getattr(&lfp->f_path, &stat);
492 #else
493 rc = vfs_getattr(lfp->f_path.mnt, lfp->f_dentry, &stat);
494 #endif
495 if (rc)
496 goto out_vnode;
497
498 mutex_enter(&vp->v_lock);
499 vp->v_type = vn_mode_to_vtype(stat.mode);
500 vp->v_file = lfp;
501 mutex_exit(&vp->v_lock);
502
503 fp->f_vnode = vp;
504 fp->f_file = lfp;
505
506 /* Put it on the tracking list */
507 spin_lock(&vn_file_lock);
508 list_add(&fp->f_list, &vn_file_list);
509 spin_unlock(&vn_file_lock);
510
511 mutex_exit(&fp->f_lock);
512 return (fp);
513
514 out_vnode:
515 vn_free(vp);
516 out_fget:
517 fput(lfp);
518 out_mutex:
519 mutex_exit(&fp->f_lock);
520 kmem_cache_free(vn_file_cache, fp);
521 out:
522 return (NULL);
523 } /* getf() */
524 EXPORT_SYMBOL(getf);
525
526 static void releasef_locked(file_t *fp)
527 {
528 ASSERT(fp->f_file);
529 ASSERT(fp->f_vnode);
530
531 /* Unlinked from list, no refs, safe to free outside mutex */
532 fput(fp->f_file);
533 vn_free(fp->f_vnode);
534
535 kmem_cache_free(vn_file_cache, fp);
536 }
537
538 void
539 vn_releasef(int fd)
540 {
541 areleasef(fd, P_FINFO(current));
542 }
543 EXPORT_SYMBOL(releasef);
544
545 void
546 vn_areleasef(int fd, uf_info_t *fip)
547 {
548 file_t *fp;
549 struct task_struct *task = (struct task_struct *)fip;
550
551 if (fd < 0)
552 return;
553
554 spin_lock(&vn_file_lock);
555 fp = file_find(fd, task);
556 if (fp) {
557 atomic_dec(&fp->f_ref);
558 if (atomic_read(&fp->f_ref) > 0) {
559 spin_unlock(&vn_file_lock);
560 return;
561 }
562
563 list_del(&fp->f_list);
564 releasef_locked(fp);
565 }
566 spin_unlock(&vn_file_lock);
567 } /* releasef() */
568 EXPORT_SYMBOL(areleasef);
569
570
571 static void
572 #ifdef HAVE_SET_FS_PWD_WITH_CONST
573 vn_set_fs_pwd(struct fs_struct *fs, const struct path *path)
574 #else
575 vn_set_fs_pwd(struct fs_struct *fs, struct path *path)
576 #endif /* HAVE_SET_FS_PWD_WITH_CONST */
577 {
578 struct path old_pwd;
579
580 #ifdef HAVE_FS_STRUCT_SPINLOCK
581 spin_lock(&fs->lock);
582 old_pwd = fs->pwd;
583 fs->pwd = *path;
584 path_get(path);
585 spin_unlock(&fs->lock);
586 #else
587 write_lock(&fs->lock);
588 old_pwd = fs->pwd;
589 fs->pwd = *path;
590 path_get(path);
591 write_unlock(&fs->lock);
592 #endif /* HAVE_FS_STRUCT_SPINLOCK */
593
594 if (old_pwd.dentry)
595 path_put(&old_pwd);
596 }
597
598 int
599 vn_set_pwd(const char *filename)
600 {
601 struct path path;
602 mm_segment_t saved_fs;
603 int rc;
604
605 /*
606 * user_path_dir() and __user_walk() both expect 'filename' to be
607 * a user space address so we must briefly increase the data segment
608 * size to ensure strncpy_from_user() does not fail with -EFAULT.
609 */
610 saved_fs = get_fs();
611 set_fs(get_ds());
612
613 rc = user_path_dir(filename, &path);
614 if (rc)
615 goto out;
616
617 rc = inode_permission(path.dentry->d_inode, MAY_EXEC | MAY_ACCESS);
618 if (rc)
619 goto dput_and_out;
620
621 vn_set_fs_pwd(current->fs, &path);
622
623 dput_and_out:
624 path_put(&path);
625 out:
626 set_fs(saved_fs);
627
628 return (-rc);
629 } /* vn_set_pwd() */
630 EXPORT_SYMBOL(vn_set_pwd);
631
632 static int
633 vn_cache_constructor(void *buf, void *cdrarg, int kmflags)
634 {
635 struct vnode *vp = buf;
636
637 mutex_init(&vp->v_lock, NULL, MUTEX_DEFAULT, NULL);
638
639 return (0);
640 } /* vn_cache_constructor() */
641
642 static void
643 vn_cache_destructor(void *buf, void *cdrarg)
644 {
645 struct vnode *vp = buf;
646
647 mutex_destroy(&vp->v_lock);
648 } /* vn_cache_destructor() */
649
650 static int
651 vn_file_cache_constructor(void *buf, void *cdrarg, int kmflags)
652 {
653 file_t *fp = buf;
654
655 atomic_set(&fp->f_ref, 0);
656 mutex_init(&fp->f_lock, NULL, MUTEX_DEFAULT, NULL);
657 INIT_LIST_HEAD(&fp->f_list);
658
659 return (0);
660 } /* vn_file_cache_constructor() */
661
662 static void
663 vn_file_cache_destructor(void *buf, void *cdrarg)
664 {
665 file_t *fp = buf;
666
667 mutex_destroy(&fp->f_lock);
668 } /* vn_file_cache_destructor() */
669
670 int
671 spl_vn_init(void)
672 {
673 spin_lock_init(&vn_file_lock);
674
675 vn_cache = kmem_cache_create("spl_vn_cache",
676 sizeof (struct vnode), 64, vn_cache_constructor,
677 vn_cache_destructor, NULL, NULL, NULL, 0);
678
679 vn_file_cache = kmem_cache_create("spl_vn_file_cache",
680 sizeof (file_t), 64, vn_file_cache_constructor,
681 vn_file_cache_destructor, NULL, NULL, NULL, 0);
682
683 return (0);
684 } /* spl_vn_init() */
685
686 void
687 spl_vn_fini(void)
688 {
689 file_t *fp, *next_fp;
690 int leaked = 0;
691
692 spin_lock(&vn_file_lock);
693
694 list_for_each_entry_safe(fp, next_fp, &vn_file_list, f_list) {
695 list_del(&fp->f_list);
696 releasef_locked(fp);
697 leaked++;
698 }
699
700 spin_unlock(&vn_file_lock);
701
702 if (leaked > 0)
703 printk(KERN_WARNING "WARNING: %d vnode files leaked\n", leaked);
704
705 kmem_cache_destroy(vn_file_cache);
706 kmem_cache_destroy(vn_cache);
707 } /* spl_vn_fini() */