]> git.proxmox.com Git - mirror_zfs.git/blob - module/zfs/zpl_file.c
OpenZFS 9235 - rename zpool_rewind_policy_t to zpool_load_policy_t
[mirror_zfs.git] / module / zfs / zpl_file.c
1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 2011, Lawrence Livermore National Security, LLC.
23 * Copyright (c) 2015 by Chunwei Chen. All rights reserved.
24 */
25
26
27 #ifdef CONFIG_COMPAT
28 #include <linux/compat.h>
29 #endif
30 #include <sys/file.h>
31 #include <sys/dmu_objset.h>
32 #include <sys/zfs_vfsops.h>
33 #include <sys/zfs_vnops.h>
34 #include <sys/zfs_znode.h>
35 #include <sys/zfs_project.h>
36
37
38 static int
39 zpl_open(struct inode *ip, struct file *filp)
40 {
41 cred_t *cr = CRED();
42 int error;
43 fstrans_cookie_t cookie;
44
45 error = generic_file_open(ip, filp);
46 if (error)
47 return (error);
48
49 crhold(cr);
50 cookie = spl_fstrans_mark();
51 error = -zfs_open(ip, filp->f_mode, filp->f_flags, cr);
52 spl_fstrans_unmark(cookie);
53 crfree(cr);
54 ASSERT3S(error, <=, 0);
55
56 return (error);
57 }
58
59 static int
60 zpl_release(struct inode *ip, struct file *filp)
61 {
62 cred_t *cr = CRED();
63 int error;
64 fstrans_cookie_t cookie;
65
66 cookie = spl_fstrans_mark();
67 if (ITOZ(ip)->z_atime_dirty)
68 zfs_mark_inode_dirty(ip);
69
70 crhold(cr);
71 error = -zfs_close(ip, filp->f_flags, cr);
72 spl_fstrans_unmark(cookie);
73 crfree(cr);
74 ASSERT3S(error, <=, 0);
75
76 return (error);
77 }
78
79 static int
80 zpl_iterate(struct file *filp, zpl_dir_context_t *ctx)
81 {
82 cred_t *cr = CRED();
83 int error;
84 fstrans_cookie_t cookie;
85
86 crhold(cr);
87 cookie = spl_fstrans_mark();
88 error = -zfs_readdir(file_inode(filp), ctx, cr);
89 spl_fstrans_unmark(cookie);
90 crfree(cr);
91 ASSERT3S(error, <=, 0);
92
93 return (error);
94 }
95
96 #if !defined(HAVE_VFS_ITERATE) && !defined(HAVE_VFS_ITERATE_SHARED)
97 static int
98 zpl_readdir(struct file *filp, void *dirent, filldir_t filldir)
99 {
100 zpl_dir_context_t ctx =
101 ZPL_DIR_CONTEXT_INIT(dirent, filldir, filp->f_pos);
102 int error;
103
104 error = zpl_iterate(filp, &ctx);
105 filp->f_pos = ctx.pos;
106
107 return (error);
108 }
109 #endif /* !HAVE_VFS_ITERATE && !HAVE_VFS_ITERATE_SHARED */
110
111 #if defined(HAVE_FSYNC_WITH_DENTRY)
112 /*
113 * Linux 2.6.x - 2.6.34 API,
114 * Through 2.6.34 the nfsd kernel server would pass a NULL 'file struct *'
115 * to the fops->fsync() hook. For this reason, we must be careful not to
116 * use filp unconditionally.
117 */
118 static int
119 zpl_fsync(struct file *filp, struct dentry *dentry, int datasync)
120 {
121 cred_t *cr = CRED();
122 int error;
123 fstrans_cookie_t cookie;
124
125 crhold(cr);
126 cookie = spl_fstrans_mark();
127 error = -zfs_fsync(dentry->d_inode, datasync, cr);
128 spl_fstrans_unmark(cookie);
129 crfree(cr);
130 ASSERT3S(error, <=, 0);
131
132 return (error);
133 }
134
135 #ifdef HAVE_FILE_AIO_FSYNC
136 static int
137 zpl_aio_fsync(struct kiocb *kiocb, int datasync)
138 {
139 struct file *filp = kiocb->ki_filp;
140 return (zpl_fsync(filp, file_dentry(filp), datasync));
141 }
142 #endif
143
144 #elif defined(HAVE_FSYNC_WITHOUT_DENTRY)
145 /*
146 * Linux 2.6.35 - 3.0 API,
147 * As of 2.6.35 the dentry argument to the fops->fsync() hook was deemed
148 * redundant. The dentry is still accessible via filp->f_path.dentry,
149 * and we are guaranteed that filp will never be NULL.
150 */
151 static int
152 zpl_fsync(struct file *filp, int datasync)
153 {
154 struct inode *inode = filp->f_mapping->host;
155 cred_t *cr = CRED();
156 int error;
157 fstrans_cookie_t cookie;
158
159 crhold(cr);
160 cookie = spl_fstrans_mark();
161 error = -zfs_fsync(inode, datasync, cr);
162 spl_fstrans_unmark(cookie);
163 crfree(cr);
164 ASSERT3S(error, <=, 0);
165
166 return (error);
167 }
168
169 #ifdef HAVE_FILE_AIO_FSYNC
170 static int
171 zpl_aio_fsync(struct kiocb *kiocb, int datasync)
172 {
173 return (zpl_fsync(kiocb->ki_filp, datasync));
174 }
175 #endif
176
177 #elif defined(HAVE_FSYNC_RANGE)
178 /*
179 * Linux 3.1 - 3.x API,
180 * As of 3.1 the responsibility to call filemap_write_and_wait_range() has
181 * been pushed down in to the .fsync() vfs hook. Additionally, the i_mutex
182 * lock is no longer held by the caller, for zfs we don't require the lock
183 * to be held so we don't acquire it.
184 */
185 static int
186 zpl_fsync(struct file *filp, loff_t start, loff_t end, int datasync)
187 {
188 struct inode *inode = filp->f_mapping->host;
189 cred_t *cr = CRED();
190 int error;
191 fstrans_cookie_t cookie;
192
193 error = filemap_write_and_wait_range(inode->i_mapping, start, end);
194 if (error)
195 return (error);
196
197 crhold(cr);
198 cookie = spl_fstrans_mark();
199 error = -zfs_fsync(inode, datasync, cr);
200 spl_fstrans_unmark(cookie);
201 crfree(cr);
202 ASSERT3S(error, <=, 0);
203
204 return (error);
205 }
206
207 #ifdef HAVE_FILE_AIO_FSYNC
208 static int
209 zpl_aio_fsync(struct kiocb *kiocb, int datasync)
210 {
211 return (zpl_fsync(kiocb->ki_filp, kiocb->ki_pos, -1, datasync));
212 }
213 #endif
214
215 #else
216 #error "Unsupported fops->fsync() implementation"
217 #endif
218
219 static ssize_t
220 zpl_read_common_iovec(struct inode *ip, const struct iovec *iovp, size_t count,
221 unsigned long nr_segs, loff_t *ppos, uio_seg_t segment, int flags,
222 cred_t *cr, size_t skip)
223 {
224 ssize_t read;
225 uio_t uio;
226 int error;
227 fstrans_cookie_t cookie;
228
229 uio.uio_iov = iovp;
230 uio.uio_skip = skip;
231 uio.uio_resid = count;
232 uio.uio_iovcnt = nr_segs;
233 uio.uio_loffset = *ppos;
234 uio.uio_limit = MAXOFFSET_T;
235 uio.uio_segflg = segment;
236
237 cookie = spl_fstrans_mark();
238 error = -zfs_read(ip, &uio, flags, cr);
239 spl_fstrans_unmark(cookie);
240 if (error < 0)
241 return (error);
242
243 read = count - uio.uio_resid;
244 *ppos += read;
245 task_io_account_read(read);
246
247 return (read);
248 }
249
250 inline ssize_t
251 zpl_read_common(struct inode *ip, const char *buf, size_t len, loff_t *ppos,
252 uio_seg_t segment, int flags, cred_t *cr)
253 {
254 struct iovec iov;
255
256 iov.iov_base = (void *)buf;
257 iov.iov_len = len;
258
259 return (zpl_read_common_iovec(ip, &iov, len, 1, ppos, segment,
260 flags, cr, 0));
261 }
262
263 static ssize_t
264 zpl_iter_read_common(struct kiocb *kiocb, const struct iovec *iovp,
265 unsigned long nr_segs, size_t count, uio_seg_t seg, size_t skip)
266 {
267 cred_t *cr = CRED();
268 struct file *filp = kiocb->ki_filp;
269 ssize_t read;
270
271 crhold(cr);
272 read = zpl_read_common_iovec(filp->f_mapping->host, iovp, count,
273 nr_segs, &kiocb->ki_pos, seg, filp->f_flags, cr, skip);
274 crfree(cr);
275
276 file_accessed(filp);
277 return (read);
278 }
279
280 #if defined(HAVE_VFS_RW_ITERATE)
281 static ssize_t
282 zpl_iter_read(struct kiocb *kiocb, struct iov_iter *to)
283 {
284 ssize_t ret;
285 uio_seg_t seg = UIO_USERSPACE;
286 if (to->type & ITER_KVEC)
287 seg = UIO_SYSSPACE;
288 if (to->type & ITER_BVEC)
289 seg = UIO_BVEC;
290 ret = zpl_iter_read_common(kiocb, to->iov, to->nr_segs,
291 iov_iter_count(to), seg, to->iov_offset);
292 if (ret > 0)
293 iov_iter_advance(to, ret);
294 return (ret);
295 }
296 #else
297 static ssize_t
298 zpl_aio_read(struct kiocb *kiocb, const struct iovec *iovp,
299 unsigned long nr_segs, loff_t pos)
300 {
301 ssize_t ret;
302 size_t count;
303
304 ret = generic_segment_checks(iovp, &nr_segs, &count, VERIFY_WRITE);
305 if (ret)
306 return (ret);
307
308 return (zpl_iter_read_common(kiocb, iovp, nr_segs, count,
309 UIO_USERSPACE, 0));
310 }
311 #endif /* HAVE_VFS_RW_ITERATE */
312
313 static ssize_t
314 zpl_write_common_iovec(struct inode *ip, const struct iovec *iovp, size_t count,
315 unsigned long nr_segs, loff_t *ppos, uio_seg_t segment, int flags,
316 cred_t *cr, size_t skip)
317 {
318 ssize_t wrote;
319 uio_t uio;
320 int error;
321 fstrans_cookie_t cookie;
322
323 if (flags & O_APPEND)
324 *ppos = i_size_read(ip);
325
326 uio.uio_iov = iovp;
327 uio.uio_skip = skip;
328 uio.uio_resid = count;
329 uio.uio_iovcnt = nr_segs;
330 uio.uio_loffset = *ppos;
331 uio.uio_limit = MAXOFFSET_T;
332 uio.uio_segflg = segment;
333
334 cookie = spl_fstrans_mark();
335 error = -zfs_write(ip, &uio, flags, cr);
336 spl_fstrans_unmark(cookie);
337 if (error < 0)
338 return (error);
339
340 wrote = count - uio.uio_resid;
341 *ppos += wrote;
342 task_io_account_write(wrote);
343
344 return (wrote);
345 }
346
347 inline ssize_t
348 zpl_write_common(struct inode *ip, const char *buf, size_t len, loff_t *ppos,
349 uio_seg_t segment, int flags, cred_t *cr)
350 {
351 struct iovec iov;
352
353 iov.iov_base = (void *)buf;
354 iov.iov_len = len;
355
356 return (zpl_write_common_iovec(ip, &iov, len, 1, ppos, segment,
357 flags, cr, 0));
358 }
359
360 static ssize_t
361 zpl_iter_write_common(struct kiocb *kiocb, const struct iovec *iovp,
362 unsigned long nr_segs, size_t count, uio_seg_t seg, size_t skip)
363 {
364 cred_t *cr = CRED();
365 struct file *filp = kiocb->ki_filp;
366 ssize_t wrote;
367
368 crhold(cr);
369 wrote = zpl_write_common_iovec(filp->f_mapping->host, iovp, count,
370 nr_segs, &kiocb->ki_pos, seg, filp->f_flags, cr, skip);
371 crfree(cr);
372
373 return (wrote);
374 }
375
376 #if defined(HAVE_VFS_RW_ITERATE)
377 static ssize_t
378 zpl_iter_write(struct kiocb *kiocb, struct iov_iter *from)
379 {
380 size_t count;
381 ssize_t ret;
382 uio_seg_t seg = UIO_USERSPACE;
383
384 #ifndef HAVE_GENERIC_WRITE_CHECKS_KIOCB
385 struct file *file = kiocb->ki_filp;
386 struct address_space *mapping = file->f_mapping;
387 struct inode *ip = mapping->host;
388 int isblk = S_ISBLK(ip->i_mode);
389
390 count = iov_iter_count(from);
391 ret = generic_write_checks(file, &kiocb->ki_pos, &count, isblk);
392 if (ret)
393 return (ret);
394 #else
395 /*
396 * XXX - ideally this check should be in the same lock region with
397 * write operations, so that there's no TOCTTOU race when doing
398 * append and someone else grow the file.
399 */
400 ret = generic_write_checks(kiocb, from);
401 if (ret <= 0)
402 return (ret);
403 count = ret;
404 #endif
405
406 if (from->type & ITER_KVEC)
407 seg = UIO_SYSSPACE;
408 if (from->type & ITER_BVEC)
409 seg = UIO_BVEC;
410
411 ret = zpl_iter_write_common(kiocb, from->iov, from->nr_segs,
412 count, seg, from->iov_offset);
413 if (ret > 0)
414 iov_iter_advance(from, ret);
415
416 return (ret);
417 }
418 #else
419 static ssize_t
420 zpl_aio_write(struct kiocb *kiocb, const struct iovec *iovp,
421 unsigned long nr_segs, loff_t pos)
422 {
423 struct file *file = kiocb->ki_filp;
424 struct address_space *mapping = file->f_mapping;
425 struct inode *ip = mapping->host;
426 int isblk = S_ISBLK(ip->i_mode);
427 size_t count;
428 ssize_t ret;
429
430 ret = generic_segment_checks(iovp, &nr_segs, &count, VERIFY_READ);
431 if (ret)
432 return (ret);
433
434 ret = generic_write_checks(file, &pos, &count, isblk);
435 if (ret)
436 return (ret);
437
438 return (zpl_iter_write_common(kiocb, iovp, nr_segs, count,
439 UIO_USERSPACE, 0));
440 }
441 #endif /* HAVE_VFS_RW_ITERATE */
442
443 static loff_t
444 zpl_llseek(struct file *filp, loff_t offset, int whence)
445 {
446 #if defined(SEEK_HOLE) && defined(SEEK_DATA)
447 fstrans_cookie_t cookie;
448
449 if (whence == SEEK_DATA || whence == SEEK_HOLE) {
450 struct inode *ip = filp->f_mapping->host;
451 loff_t maxbytes = ip->i_sb->s_maxbytes;
452 loff_t error;
453
454 spl_inode_lock_shared(ip);
455 cookie = spl_fstrans_mark();
456 error = -zfs_holey(ip, whence, &offset);
457 spl_fstrans_unmark(cookie);
458 if (error == 0)
459 error = lseek_execute(filp, ip, offset, maxbytes);
460 spl_inode_unlock_shared(ip);
461
462 return (error);
463 }
464 #endif /* SEEK_HOLE && SEEK_DATA */
465
466 return (generic_file_llseek(filp, offset, whence));
467 }
468
469 /*
470 * It's worth taking a moment to describe how mmap is implemented
471 * for zfs because it differs considerably from other Linux filesystems.
472 * However, this issue is handled the same way under OpenSolaris.
473 *
474 * The issue is that by design zfs bypasses the Linux page cache and
475 * leaves all caching up to the ARC. This has been shown to work
476 * well for the common read(2)/write(2) case. However, mmap(2)
477 * is problem because it relies on being tightly integrated with the
478 * page cache. To handle this we cache mmap'ed files twice, once in
479 * the ARC and a second time in the page cache. The code is careful
480 * to keep both copies synchronized.
481 *
482 * When a file with an mmap'ed region is written to using write(2)
483 * both the data in the ARC and existing pages in the page cache
484 * are updated. For a read(2) data will be read first from the page
485 * cache then the ARC if needed. Neither a write(2) or read(2) will
486 * will ever result in new pages being added to the page cache.
487 *
488 * New pages are added to the page cache only via .readpage() which
489 * is called when the vfs needs to read a page off disk to back the
490 * virtual memory region. These pages may be modified without
491 * notifying the ARC and will be written out periodically via
492 * .writepage(). This will occur due to either a sync or the usual
493 * page aging behavior. Note because a read(2) of a mmap'ed file
494 * will always check the page cache first even when the ARC is out
495 * of date correct data will still be returned.
496 *
497 * While this implementation ensures correct behavior it does have
498 * have some drawbacks. The most obvious of which is that it
499 * increases the required memory footprint when access mmap'ed
500 * files. It also adds additional complexity to the code keeping
501 * both caches synchronized.
502 *
503 * Longer term it may be possible to cleanly resolve this wart by
504 * mapping page cache pages directly on to the ARC buffers. The
505 * Linux address space operations are flexible enough to allow
506 * selection of which pages back a particular index. The trick
507 * would be working out the details of which subsystem is in
508 * charge, the ARC, the page cache, or both. It may also prove
509 * helpful to move the ARC buffers to a scatter-gather lists
510 * rather than a vmalloc'ed region.
511 */
512 static int
513 zpl_mmap(struct file *filp, struct vm_area_struct *vma)
514 {
515 struct inode *ip = filp->f_mapping->host;
516 znode_t *zp = ITOZ(ip);
517 int error;
518 fstrans_cookie_t cookie;
519
520 cookie = spl_fstrans_mark();
521 error = -zfs_map(ip, vma->vm_pgoff, (caddr_t *)vma->vm_start,
522 (size_t)(vma->vm_end - vma->vm_start), vma->vm_flags);
523 spl_fstrans_unmark(cookie);
524 if (error)
525 return (error);
526
527 error = generic_file_mmap(filp, vma);
528 if (error)
529 return (error);
530
531 mutex_enter(&zp->z_lock);
532 zp->z_is_mapped = B_TRUE;
533 mutex_exit(&zp->z_lock);
534
535 return (error);
536 }
537
538 /*
539 * Populate a page with data for the Linux page cache. This function is
540 * only used to support mmap(2). There will be an identical copy of the
541 * data in the ARC which is kept up to date via .write() and .writepage().
542 *
543 * Current this function relies on zpl_read_common() and the O_DIRECT
544 * flag to read in a page. This works but the more correct way is to
545 * update zfs_fillpage() to be Linux friendly and use that interface.
546 */
547 static int
548 zpl_readpage(struct file *filp, struct page *pp)
549 {
550 struct inode *ip;
551 struct page *pl[1];
552 int error = 0;
553 fstrans_cookie_t cookie;
554
555 ASSERT(PageLocked(pp));
556 ip = pp->mapping->host;
557 pl[0] = pp;
558
559 cookie = spl_fstrans_mark();
560 error = -zfs_getpage(ip, pl, 1);
561 spl_fstrans_unmark(cookie);
562
563 if (error) {
564 SetPageError(pp);
565 ClearPageUptodate(pp);
566 } else {
567 ClearPageError(pp);
568 SetPageUptodate(pp);
569 flush_dcache_page(pp);
570 }
571
572 unlock_page(pp);
573 return (error);
574 }
575
576 /*
577 * Populate a set of pages with data for the Linux page cache. This
578 * function will only be called for read ahead and never for demand
579 * paging. For simplicity, the code relies on read_cache_pages() to
580 * correctly lock each page for IO and call zpl_readpage().
581 */
582 static int
583 zpl_readpages(struct file *filp, struct address_space *mapping,
584 struct list_head *pages, unsigned nr_pages)
585 {
586 return (read_cache_pages(mapping, pages,
587 (filler_t *)zpl_readpage, filp));
588 }
589
590 int
591 zpl_putpage(struct page *pp, struct writeback_control *wbc, void *data)
592 {
593 struct address_space *mapping = data;
594 fstrans_cookie_t cookie;
595
596 ASSERT(PageLocked(pp));
597 ASSERT(!PageWriteback(pp));
598
599 cookie = spl_fstrans_mark();
600 (void) zfs_putpage(mapping->host, pp, wbc);
601 spl_fstrans_unmark(cookie);
602
603 return (0);
604 }
605
606 static int
607 zpl_writepages(struct address_space *mapping, struct writeback_control *wbc)
608 {
609 znode_t *zp = ITOZ(mapping->host);
610 zfsvfs_t *zfsvfs = ITOZSB(mapping->host);
611 enum writeback_sync_modes sync_mode;
612 int result;
613
614 ZFS_ENTER(zfsvfs);
615 if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
616 wbc->sync_mode = WB_SYNC_ALL;
617 ZFS_EXIT(zfsvfs);
618 sync_mode = wbc->sync_mode;
619
620 /*
621 * We don't want to run write_cache_pages() in SYNC mode here, because
622 * that would make putpage() wait for a single page to be committed to
623 * disk every single time, resulting in atrocious performance. Instead
624 * we run it once in non-SYNC mode so that the ZIL gets all the data,
625 * and then we commit it all in one go.
626 */
627 wbc->sync_mode = WB_SYNC_NONE;
628 result = write_cache_pages(mapping, wbc, zpl_putpage, mapping);
629 if (sync_mode != wbc->sync_mode) {
630 ZFS_ENTER(zfsvfs);
631 ZFS_VERIFY_ZP(zp);
632 if (zfsvfs->z_log != NULL)
633 zil_commit(zfsvfs->z_log, zp->z_id);
634 ZFS_EXIT(zfsvfs);
635
636 /*
637 * We need to call write_cache_pages() again (we can't just
638 * return after the commit) because the previous call in
639 * non-SYNC mode does not guarantee that we got all the dirty
640 * pages (see the implementation of write_cache_pages() for
641 * details). That being said, this is a no-op in most cases.
642 */
643 wbc->sync_mode = sync_mode;
644 result = write_cache_pages(mapping, wbc, zpl_putpage, mapping);
645 }
646 return (result);
647 }
648
649 /*
650 * Write out dirty pages to the ARC, this function is only required to
651 * support mmap(2). Mapped pages may be dirtied by memory operations
652 * which never call .write(). These dirty pages are kept in sync with
653 * the ARC buffers via this hook.
654 */
655 static int
656 zpl_writepage(struct page *pp, struct writeback_control *wbc)
657 {
658 if (ITOZSB(pp->mapping->host)->z_os->os_sync == ZFS_SYNC_ALWAYS)
659 wbc->sync_mode = WB_SYNC_ALL;
660
661 return (zpl_putpage(pp, wbc, pp->mapping));
662 }
663
664 /*
665 * The only flag combination which matches the behavior of zfs_space()
666 * is FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE. The FALLOC_FL_PUNCH_HOLE
667 * flag was introduced in the 2.6.38 kernel.
668 */
669 #if defined(HAVE_FILE_FALLOCATE) || defined(HAVE_INODE_FALLOCATE)
670 long
671 zpl_fallocate_common(struct inode *ip, int mode, loff_t offset, loff_t len)
672 {
673 int error = -EOPNOTSUPP;
674
675 #if defined(FALLOC_FL_PUNCH_HOLE) && defined(FALLOC_FL_KEEP_SIZE)
676 cred_t *cr = CRED();
677 flock64_t bf;
678 loff_t olen;
679 fstrans_cookie_t cookie;
680
681 if (mode != (FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
682 return (error);
683
684 if (offset < 0 || len <= 0)
685 return (-EINVAL);
686
687 spl_inode_lock(ip);
688 olen = i_size_read(ip);
689
690 if (offset > olen) {
691 spl_inode_unlock(ip);
692 return (0);
693 }
694 if (offset + len > olen)
695 len = olen - offset;
696 bf.l_type = F_WRLCK;
697 bf.l_whence = 0;
698 bf.l_start = offset;
699 bf.l_len = len;
700 bf.l_pid = 0;
701
702 crhold(cr);
703 cookie = spl_fstrans_mark();
704 error = -zfs_space(ip, F_FREESP, &bf, FWRITE, offset, cr);
705 spl_fstrans_unmark(cookie);
706 spl_inode_unlock(ip);
707
708 crfree(cr);
709 #endif /* defined(FALLOC_FL_PUNCH_HOLE) && defined(FALLOC_FL_KEEP_SIZE) */
710
711 ASSERT3S(error, <=, 0);
712 return (error);
713 }
714 #endif /* defined(HAVE_FILE_FALLOCATE) || defined(HAVE_INODE_FALLOCATE) */
715
716 #ifdef HAVE_FILE_FALLOCATE
717 static long
718 zpl_fallocate(struct file *filp, int mode, loff_t offset, loff_t len)
719 {
720 return zpl_fallocate_common(file_inode(filp),
721 mode, offset, len);
722 }
723 #endif /* HAVE_FILE_FALLOCATE */
724
725 #define ZFS_FL_USER_VISIBLE (FS_FL_USER_VISIBLE | ZFS_PROJINHERIT_FL)
726 #define ZFS_FL_USER_MODIFIABLE (FS_FL_USER_MODIFIABLE | ZFS_PROJINHERIT_FL)
727
728 static uint32_t
729 __zpl_ioctl_getflags(struct inode *ip)
730 {
731 uint64_t zfs_flags = ITOZ(ip)->z_pflags;
732 uint32_t ioctl_flags = 0;
733
734 if (zfs_flags & ZFS_IMMUTABLE)
735 ioctl_flags |= FS_IMMUTABLE_FL;
736
737 if (zfs_flags & ZFS_APPENDONLY)
738 ioctl_flags |= FS_APPEND_FL;
739
740 if (zfs_flags & ZFS_NODUMP)
741 ioctl_flags |= FS_NODUMP_FL;
742
743 if (zfs_flags & ZFS_PROJINHERIT)
744 ioctl_flags |= ZFS_PROJINHERIT_FL;
745
746 return (ioctl_flags & ZFS_FL_USER_VISIBLE);
747 }
748
749 /*
750 * Map zfs file z_pflags (xvattr_t) to linux file attributes. Only file
751 * attributes common to both Linux and Solaris are mapped.
752 */
753 static int
754 zpl_ioctl_getflags(struct file *filp, void __user *arg)
755 {
756 uint32_t flags;
757 int err;
758
759 flags = __zpl_ioctl_getflags(file_inode(filp));
760 err = copy_to_user(arg, &flags, sizeof (flags));
761
762 return (err);
763 }
764
765 /*
766 * fchange() is a helper macro to detect if we have been asked to change a
767 * flag. This is ugly, but the requirement that we do this is a consequence of
768 * how the Linux file attribute interface was designed. Another consequence is
769 * that concurrent modification of files suffers from a TOCTOU race. Neither
770 * are things we can fix without modifying the kernel-userland interface, which
771 * is outside of our jurisdiction.
772 */
773
774 #define fchange(f0, f1, b0, b1) (!((f0) & (b0)) != !((f1) & (b1)))
775
776 static int
777 __zpl_ioctl_setflags(struct inode *ip, uint32_t ioctl_flags, xvattr_t *xva)
778 {
779 uint64_t zfs_flags = ITOZ(ip)->z_pflags;
780 xoptattr_t *xoap;
781
782 if (ioctl_flags & ~(FS_IMMUTABLE_FL | FS_APPEND_FL | FS_NODUMP_FL |
783 ZFS_PROJINHERIT_FL))
784 return (-EOPNOTSUPP);
785
786 if (ioctl_flags & ~ZFS_FL_USER_MODIFIABLE)
787 return (-EACCES);
788
789 if ((fchange(ioctl_flags, zfs_flags, FS_IMMUTABLE_FL, ZFS_IMMUTABLE) ||
790 fchange(ioctl_flags, zfs_flags, FS_APPEND_FL, ZFS_APPENDONLY)) &&
791 !capable(CAP_LINUX_IMMUTABLE))
792 return (-EACCES);
793
794 if (!zpl_inode_owner_or_capable(ip))
795 return (-EACCES);
796
797 xva_init(xva);
798 xoap = xva_getxoptattr(xva);
799
800 XVA_SET_REQ(xva, XAT_IMMUTABLE);
801 if (ioctl_flags & FS_IMMUTABLE_FL)
802 xoap->xoa_immutable = B_TRUE;
803
804 XVA_SET_REQ(xva, XAT_APPENDONLY);
805 if (ioctl_flags & FS_APPEND_FL)
806 xoap->xoa_appendonly = B_TRUE;
807
808 XVA_SET_REQ(xva, XAT_NODUMP);
809 if (ioctl_flags & FS_NODUMP_FL)
810 xoap->xoa_nodump = B_TRUE;
811
812 XVA_SET_REQ(xva, XAT_PROJINHERIT);
813 if (ioctl_flags & ZFS_PROJINHERIT_FL)
814 xoap->xoa_projinherit = B_TRUE;
815
816 return (0);
817 }
818
819 static int
820 zpl_ioctl_setflags(struct file *filp, void __user *arg)
821 {
822 struct inode *ip = file_inode(filp);
823 uint32_t flags;
824 cred_t *cr = CRED();
825 xvattr_t xva;
826 int err;
827 fstrans_cookie_t cookie;
828
829 if (copy_from_user(&flags, arg, sizeof (flags)))
830 return (-EFAULT);
831
832 err = __zpl_ioctl_setflags(ip, flags, &xva);
833 if (err)
834 return (err);
835
836 crhold(cr);
837 cookie = spl_fstrans_mark();
838 err = -zfs_setattr(ip, (vattr_t *)&xva, 0, cr);
839 spl_fstrans_unmark(cookie);
840 crfree(cr);
841
842 return (err);
843 }
844
845 static int
846 zpl_ioctl_getxattr(struct file *filp, void __user *arg)
847 {
848 zfsxattr_t fsx = { 0 };
849 struct inode *ip = file_inode(filp);
850 int err;
851
852 fsx.fsx_xflags = __zpl_ioctl_getflags(ip);
853 fsx.fsx_projid = ITOZ(ip)->z_projid;
854 err = copy_to_user(arg, &fsx, sizeof (fsx));
855
856 return (err);
857 }
858
859 static int
860 zpl_ioctl_setxattr(struct file *filp, void __user *arg)
861 {
862 struct inode *ip = file_inode(filp);
863 zfsxattr_t fsx;
864 cred_t *cr = CRED();
865 xvattr_t xva;
866 xoptattr_t *xoap;
867 int err;
868 fstrans_cookie_t cookie;
869
870 if (copy_from_user(&fsx, arg, sizeof (fsx)))
871 return (-EFAULT);
872
873 if (!zpl_is_valid_projid(fsx.fsx_projid))
874 return (-EINVAL);
875
876 err = __zpl_ioctl_setflags(ip, fsx.fsx_xflags, &xva);
877 if (err)
878 return (err);
879
880 xoap = xva_getxoptattr(&xva);
881 XVA_SET_REQ(&xva, XAT_PROJID);
882 xoap->xoa_projid = fsx.fsx_projid;
883
884 crhold(cr);
885 cookie = spl_fstrans_mark();
886 err = -zfs_setattr(ip, (vattr_t *)&xva, 0, cr);
887 spl_fstrans_unmark(cookie);
888 crfree(cr);
889
890 return (err);
891 }
892
893 static long
894 zpl_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
895 {
896 switch (cmd) {
897 case FS_IOC_GETFLAGS:
898 return (zpl_ioctl_getflags(filp, (void *)arg));
899 case FS_IOC_SETFLAGS:
900 return (zpl_ioctl_setflags(filp, (void *)arg));
901 case ZFS_IOC_FSGETXATTR:
902 return (zpl_ioctl_getxattr(filp, (void *)arg));
903 case ZFS_IOC_FSSETXATTR:
904 return (zpl_ioctl_setxattr(filp, (void *)arg));
905 default:
906 return (-ENOTTY);
907 }
908 }
909
910 #ifdef CONFIG_COMPAT
911 static long
912 zpl_compat_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
913 {
914 switch (cmd) {
915 case FS_IOC32_GETFLAGS:
916 cmd = FS_IOC_GETFLAGS;
917 break;
918 case FS_IOC32_SETFLAGS:
919 cmd = FS_IOC_SETFLAGS;
920 break;
921 default:
922 return (-ENOTTY);
923 }
924 return (zpl_ioctl(filp, cmd, (unsigned long)compat_ptr(arg)));
925 }
926 #endif /* CONFIG_COMPAT */
927
928
929 const struct address_space_operations zpl_address_space_operations = {
930 .readpages = zpl_readpages,
931 .readpage = zpl_readpage,
932 .writepage = zpl_writepage,
933 .writepages = zpl_writepages,
934 };
935
936 const struct file_operations zpl_file_operations = {
937 .open = zpl_open,
938 .release = zpl_release,
939 .llseek = zpl_llseek,
940 #ifdef HAVE_VFS_RW_ITERATE
941 #ifdef HAVE_NEW_SYNC_READ
942 .read = new_sync_read,
943 .write = new_sync_write,
944 #endif
945 .read_iter = zpl_iter_read,
946 .write_iter = zpl_iter_write,
947 #else
948 .read = do_sync_read,
949 .write = do_sync_write,
950 .aio_read = zpl_aio_read,
951 .aio_write = zpl_aio_write,
952 #endif
953 .mmap = zpl_mmap,
954 .fsync = zpl_fsync,
955 #ifdef HAVE_FILE_AIO_FSYNC
956 .aio_fsync = zpl_aio_fsync,
957 #endif
958 #ifdef HAVE_FILE_FALLOCATE
959 .fallocate = zpl_fallocate,
960 #endif /* HAVE_FILE_FALLOCATE */
961 .unlocked_ioctl = zpl_ioctl,
962 #ifdef CONFIG_COMPAT
963 .compat_ioctl = zpl_compat_ioctl,
964 #endif
965 };
966
967 const struct file_operations zpl_dir_file_operations = {
968 .llseek = generic_file_llseek,
969 .read = generic_read_dir,
970 #if defined(HAVE_VFS_ITERATE_SHARED)
971 .iterate_shared = zpl_iterate,
972 #elif defined(HAVE_VFS_ITERATE)
973 .iterate = zpl_iterate,
974 #else
975 .readdir = zpl_readdir,
976 #endif
977 .fsync = zpl_fsync,
978 .unlocked_ioctl = zpl_ioctl,
979 #ifdef CONFIG_COMPAT
980 .compat_ioctl = zpl_compat_ioctl,
981 #endif
982 };