]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blob - zfs/module/zfs/zpl_file.c
UBUNTU: SAUCE: (noup) Update spl to 0.7.3-1ubuntu1, zfs to 0.7.3-1ubuntu1
[mirror_ubuntu-bionic-kernel.git] / zfs / module / zfs / zpl_file.c
1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 2011, Lawrence Livermore National Security, LLC.
23 * Copyright (c) 2015 by Chunwei Chen. All rights reserved.
24 */
25
26
27 #ifdef CONFIG_COMPAT
28 #include <linux/compat.h>
29 #endif
30 #include <sys/dmu_objset.h>
31 #include <sys/zfs_vfsops.h>
32 #include <sys/zfs_vnops.h>
33 #include <sys/zfs_znode.h>
34 #include <sys/zpl.h>
35
36
37 static int
38 zpl_open(struct inode *ip, struct file *filp)
39 {
40 cred_t *cr = CRED();
41 int error;
42 fstrans_cookie_t cookie;
43
44 error = generic_file_open(ip, filp);
45 if (error)
46 return (error);
47
48 crhold(cr);
49 cookie = spl_fstrans_mark();
50 error = -zfs_open(ip, filp->f_mode, filp->f_flags, cr);
51 spl_fstrans_unmark(cookie);
52 crfree(cr);
53 ASSERT3S(error, <=, 0);
54
55 return (error);
56 }
57
58 static int
59 zpl_release(struct inode *ip, struct file *filp)
60 {
61 cred_t *cr = CRED();
62 int error;
63 fstrans_cookie_t cookie;
64
65 cookie = spl_fstrans_mark();
66 if (ITOZ(ip)->z_atime_dirty)
67 zfs_mark_inode_dirty(ip);
68
69 crhold(cr);
70 error = -zfs_close(ip, filp->f_flags, cr);
71 spl_fstrans_unmark(cookie);
72 crfree(cr);
73 ASSERT3S(error, <=, 0);
74
75 return (error);
76 }
77
78 static int
79 zpl_iterate(struct file *filp, struct dir_context *ctx)
80 {
81 cred_t *cr = CRED();
82 int error;
83 fstrans_cookie_t cookie;
84
85 crhold(cr);
86 cookie = spl_fstrans_mark();
87 error = -zfs_readdir(file_inode(filp), ctx, cr);
88 spl_fstrans_unmark(cookie);
89 crfree(cr);
90 ASSERT3S(error, <=, 0);
91
92 return (error);
93 }
94
95 #if !defined(HAVE_VFS_ITERATE) && !defined(HAVE_VFS_ITERATE_SHARED)
96 static int
97 zpl_readdir(struct file *filp, void *dirent, filldir_t filldir)
98 {
99 struct dir_context ctx = DIR_CONTEXT_INIT(dirent, filldir, filp->f_pos);
100 int error;
101
102 error = zpl_iterate(filp, &ctx);
103 filp->f_pos = ctx.pos;
104
105 return (error);
106 }
107 #endif /* HAVE_VFS_ITERATE */
108
109 #if defined(HAVE_FSYNC_WITH_DENTRY)
110 /*
111 * Linux 2.6.x - 2.6.34 API,
112 * Through 2.6.34 the nfsd kernel server would pass a NULL 'file struct *'
113 * to the fops->fsync() hook. For this reason, we must be careful not to
114 * use filp unconditionally.
115 */
116 static int
117 zpl_fsync(struct file *filp, struct dentry *dentry, int datasync)
118 {
119 cred_t *cr = CRED();
120 int error;
121 fstrans_cookie_t cookie;
122
123 crhold(cr);
124 cookie = spl_fstrans_mark();
125 error = -zfs_fsync(dentry->d_inode, datasync, cr);
126 spl_fstrans_unmark(cookie);
127 crfree(cr);
128 ASSERT3S(error, <=, 0);
129
130 return (error);
131 }
132
133 #ifdef HAVE_FILE_AIO_FSYNC
134 static int
135 zpl_aio_fsync(struct kiocb *kiocb, int datasync)
136 {
137 struct file *filp = kiocb->ki_filp;
138 return (zpl_fsync(filp, file_dentry(filp), datasync));
139 }
140 #endif
141
142 #elif defined(HAVE_FSYNC_WITHOUT_DENTRY)
143 /*
144 * Linux 2.6.35 - 3.0 API,
145 * As of 2.6.35 the dentry argument to the fops->fsync() hook was deemed
146 * redundant. The dentry is still accessible via filp->f_path.dentry,
147 * and we are guaranteed that filp will never be NULL.
148 */
149 static int
150 zpl_fsync(struct file *filp, int datasync)
151 {
152 struct inode *inode = filp->f_mapping->host;
153 cred_t *cr = CRED();
154 int error;
155 fstrans_cookie_t cookie;
156
157 crhold(cr);
158 cookie = spl_fstrans_mark();
159 error = -zfs_fsync(inode, datasync, cr);
160 spl_fstrans_unmark(cookie);
161 crfree(cr);
162 ASSERT3S(error, <=, 0);
163
164 return (error);
165 }
166
167 #ifdef HAVE_FILE_AIO_FSYNC
168 static int
169 zpl_aio_fsync(struct kiocb *kiocb, int datasync)
170 {
171 return (zpl_fsync(kiocb->ki_filp, datasync));
172 }
173 #endif
174
175 #elif defined(HAVE_FSYNC_RANGE)
176 /*
177 * Linux 3.1 - 3.x API,
178 * As of 3.1 the responsibility to call filemap_write_and_wait_range() has
179 * been pushed down in to the .fsync() vfs hook. Additionally, the i_mutex
180 * lock is no longer held by the caller, for zfs we don't require the lock
181 * to be held so we don't acquire it.
182 */
183 static int
184 zpl_fsync(struct file *filp, loff_t start, loff_t end, int datasync)
185 {
186 struct inode *inode = filp->f_mapping->host;
187 cred_t *cr = CRED();
188 int error;
189 fstrans_cookie_t cookie;
190
191 error = filemap_write_and_wait_range(inode->i_mapping, start, end);
192 if (error)
193 return (error);
194
195 crhold(cr);
196 cookie = spl_fstrans_mark();
197 error = -zfs_fsync(inode, datasync, cr);
198 spl_fstrans_unmark(cookie);
199 crfree(cr);
200 ASSERT3S(error, <=, 0);
201
202 return (error);
203 }
204
205 #ifdef HAVE_FILE_AIO_FSYNC
206 static int
207 zpl_aio_fsync(struct kiocb *kiocb, int datasync)
208 {
209 return (zpl_fsync(kiocb->ki_filp, kiocb->ki_pos, -1, datasync));
210 }
211 #endif
212
213 #else
214 #error "Unsupported fops->fsync() implementation"
215 #endif
216
217 static ssize_t
218 zpl_read_common_iovec(struct inode *ip, const struct iovec *iovp, size_t count,
219 unsigned long nr_segs, loff_t *ppos, uio_seg_t segment, int flags,
220 cred_t *cr, size_t skip)
221 {
222 ssize_t read;
223 uio_t uio;
224 int error;
225 fstrans_cookie_t cookie;
226
227 uio.uio_iov = iovp;
228 uio.uio_skip = skip;
229 uio.uio_resid = count;
230 uio.uio_iovcnt = nr_segs;
231 uio.uio_loffset = *ppos;
232 uio.uio_limit = MAXOFFSET_T;
233 uio.uio_segflg = segment;
234
235 cookie = spl_fstrans_mark();
236 error = -zfs_read(ip, &uio, flags, cr);
237 spl_fstrans_unmark(cookie);
238 if (error < 0)
239 return (error);
240
241 read = count - uio.uio_resid;
242 *ppos += read;
243 task_io_account_read(read);
244
245 return (read);
246 }
247
248 inline ssize_t
249 zpl_read_common(struct inode *ip, const char *buf, size_t len, loff_t *ppos,
250 uio_seg_t segment, int flags, cred_t *cr)
251 {
252 struct iovec iov;
253
254 iov.iov_base = (void *)buf;
255 iov.iov_len = len;
256
257 return (zpl_read_common_iovec(ip, &iov, len, 1, ppos, segment,
258 flags, cr, 0));
259 }
260
261 static ssize_t
262 zpl_iter_read_common(struct kiocb *kiocb, const struct iovec *iovp,
263 unsigned long nr_segs, size_t count, uio_seg_t seg, size_t skip)
264 {
265 cred_t *cr = CRED();
266 struct file *filp = kiocb->ki_filp;
267 ssize_t read;
268
269 crhold(cr);
270 read = zpl_read_common_iovec(filp->f_mapping->host, iovp, count,
271 nr_segs, &kiocb->ki_pos, seg, filp->f_flags, cr, skip);
272 crfree(cr);
273
274 file_accessed(filp);
275 return (read);
276 }
277
278 #if defined(HAVE_VFS_RW_ITERATE)
279 static ssize_t
280 zpl_iter_read(struct kiocb *kiocb, struct iov_iter *to)
281 {
282 ssize_t ret;
283 uio_seg_t seg = UIO_USERSPACE;
284 if (to->type & ITER_KVEC)
285 seg = UIO_SYSSPACE;
286 if (to->type & ITER_BVEC)
287 seg = UIO_BVEC;
288 ret = zpl_iter_read_common(kiocb, to->iov, to->nr_segs,
289 iov_iter_count(to), seg, to->iov_offset);
290 if (ret > 0)
291 iov_iter_advance(to, ret);
292 return (ret);
293 }
294 #else
295 static ssize_t
296 zpl_aio_read(struct kiocb *kiocb, const struct iovec *iovp,
297 unsigned long nr_segs, loff_t pos)
298 {
299 ssize_t ret;
300 size_t count;
301
302 ret = generic_segment_checks(iovp, &nr_segs, &count, VERIFY_WRITE);
303 if (ret)
304 return (ret);
305
306 return (zpl_iter_read_common(kiocb, iovp, nr_segs, count,
307 UIO_USERSPACE, 0));
308 }
309 #endif /* HAVE_VFS_RW_ITERATE */
310
311 static ssize_t
312 zpl_write_common_iovec(struct inode *ip, const struct iovec *iovp, size_t count,
313 unsigned long nr_segs, loff_t *ppos, uio_seg_t segment, int flags,
314 cred_t *cr, size_t skip)
315 {
316 ssize_t wrote;
317 uio_t uio;
318 int error;
319 fstrans_cookie_t cookie;
320
321 if (flags & O_APPEND)
322 *ppos = i_size_read(ip);
323
324 uio.uio_iov = iovp;
325 uio.uio_skip = skip;
326 uio.uio_resid = count;
327 uio.uio_iovcnt = nr_segs;
328 uio.uio_loffset = *ppos;
329 uio.uio_limit = MAXOFFSET_T;
330 uio.uio_segflg = segment;
331
332 cookie = spl_fstrans_mark();
333 error = -zfs_write(ip, &uio, flags, cr);
334 spl_fstrans_unmark(cookie);
335 if (error < 0)
336 return (error);
337
338 wrote = count - uio.uio_resid;
339 *ppos += wrote;
340 task_io_account_write(wrote);
341
342 return (wrote);
343 }
344
345 inline ssize_t
346 zpl_write_common(struct inode *ip, const char *buf, size_t len, loff_t *ppos,
347 uio_seg_t segment, int flags, cred_t *cr)
348 {
349 struct iovec iov;
350
351 iov.iov_base = (void *)buf;
352 iov.iov_len = len;
353
354 return (zpl_write_common_iovec(ip, &iov, len, 1, ppos, segment,
355 flags, cr, 0));
356 }
357
358 static ssize_t
359 zpl_iter_write_common(struct kiocb *kiocb, const struct iovec *iovp,
360 unsigned long nr_segs, size_t count, uio_seg_t seg, size_t skip)
361 {
362 cred_t *cr = CRED();
363 struct file *filp = kiocb->ki_filp;
364 ssize_t wrote;
365
366 crhold(cr);
367 wrote = zpl_write_common_iovec(filp->f_mapping->host, iovp, count,
368 nr_segs, &kiocb->ki_pos, seg, filp->f_flags, cr, skip);
369 crfree(cr);
370
371 return (wrote);
372 }
373
374 #if defined(HAVE_VFS_RW_ITERATE)
375 static ssize_t
376 zpl_iter_write(struct kiocb *kiocb, struct iov_iter *from)
377 {
378 size_t count;
379 ssize_t ret;
380 uio_seg_t seg = UIO_USERSPACE;
381
382 #ifndef HAVE_GENERIC_WRITE_CHECKS_KIOCB
383 struct file *file = kiocb->ki_filp;
384 struct address_space *mapping = file->f_mapping;
385 struct inode *ip = mapping->host;
386 int isblk = S_ISBLK(ip->i_mode);
387
388 count = iov_iter_count(from);
389 ret = generic_write_checks(file, &kiocb->ki_pos, &count, isblk);
390 if (ret)
391 return (ret);
392 #else
393 /*
394 * XXX - ideally this check should be in the same lock region with
395 * write operations, so that there's no TOCTTOU race when doing
396 * append and someone else grow the file.
397 */
398 ret = generic_write_checks(kiocb, from);
399 if (ret <= 0)
400 return (ret);
401 count = ret;
402 #endif
403
404 if (from->type & ITER_KVEC)
405 seg = UIO_SYSSPACE;
406 if (from->type & ITER_BVEC)
407 seg = UIO_BVEC;
408
409 ret = zpl_iter_write_common(kiocb, from->iov, from->nr_segs,
410 count, seg, from->iov_offset);
411 if (ret > 0)
412 iov_iter_advance(from, ret);
413
414 return (ret);
415 }
416 #else
417 static ssize_t
418 zpl_aio_write(struct kiocb *kiocb, const struct iovec *iovp,
419 unsigned long nr_segs, loff_t pos)
420 {
421 struct file *file = kiocb->ki_filp;
422 struct address_space *mapping = file->f_mapping;
423 struct inode *ip = mapping->host;
424 int isblk = S_ISBLK(ip->i_mode);
425 size_t count;
426 ssize_t ret;
427
428 ret = generic_segment_checks(iovp, &nr_segs, &count, VERIFY_READ);
429 if (ret)
430 return (ret);
431
432 ret = generic_write_checks(file, &pos, &count, isblk);
433 if (ret)
434 return (ret);
435
436 return (zpl_iter_write_common(kiocb, iovp, nr_segs, count,
437 UIO_USERSPACE, 0));
438 }
439 #endif /* HAVE_VFS_RW_ITERATE */
440
441 static loff_t
442 zpl_llseek(struct file *filp, loff_t offset, int whence)
443 {
444 #if defined(SEEK_HOLE) && defined(SEEK_DATA)
445 fstrans_cookie_t cookie;
446
447 if (whence == SEEK_DATA || whence == SEEK_HOLE) {
448 struct inode *ip = filp->f_mapping->host;
449 loff_t maxbytes = ip->i_sb->s_maxbytes;
450 loff_t error;
451
452 spl_inode_lock_shared(ip);
453 cookie = spl_fstrans_mark();
454 error = -zfs_holey(ip, whence, &offset);
455 spl_fstrans_unmark(cookie);
456 if (error == 0)
457 error = lseek_execute(filp, ip, offset, maxbytes);
458 spl_inode_unlock_shared(ip);
459
460 return (error);
461 }
462 #endif /* SEEK_HOLE && SEEK_DATA */
463
464 return (generic_file_llseek(filp, offset, whence));
465 }
466
467 /*
468 * It's worth taking a moment to describe how mmap is implemented
469 * for zfs because it differs considerably from other Linux filesystems.
470 * However, this issue is handled the same way under OpenSolaris.
471 *
472 * The issue is that by design zfs bypasses the Linux page cache and
473 * leaves all caching up to the ARC. This has been shown to work
474 * well for the common read(2)/write(2) case. However, mmap(2)
475 * is problem because it relies on being tightly integrated with the
476 * page cache. To handle this we cache mmap'ed files twice, once in
477 * the ARC and a second time in the page cache. The code is careful
478 * to keep both copies synchronized.
479 *
480 * When a file with an mmap'ed region is written to using write(2)
481 * both the data in the ARC and existing pages in the page cache
482 * are updated. For a read(2) data will be read first from the page
483 * cache then the ARC if needed. Neither a write(2) or read(2) will
484 * will ever result in new pages being added to the page cache.
485 *
486 * New pages are added to the page cache only via .readpage() which
487 * is called when the vfs needs to read a page off disk to back the
488 * virtual memory region. These pages may be modified without
489 * notifying the ARC and will be written out periodically via
490 * .writepage(). This will occur due to either a sync or the usual
491 * page aging behavior. Note because a read(2) of a mmap'ed file
492 * will always check the page cache first even when the ARC is out
493 * of date correct data will still be returned.
494 *
495 * While this implementation ensures correct behavior it does have
496 * have some drawbacks. The most obvious of which is that it
497 * increases the required memory footprint when access mmap'ed
498 * files. It also adds additional complexity to the code keeping
499 * both caches synchronized.
500 *
501 * Longer term it may be possible to cleanly resolve this wart by
502 * mapping page cache pages directly on to the ARC buffers. The
503 * Linux address space operations are flexible enough to allow
504 * selection of which pages back a particular index. The trick
505 * would be working out the details of which subsystem is in
506 * charge, the ARC, the page cache, or both. It may also prove
507 * helpful to move the ARC buffers to a scatter-gather lists
508 * rather than a vmalloc'ed region.
509 */
510 static int
511 zpl_mmap(struct file *filp, struct vm_area_struct *vma)
512 {
513 struct inode *ip = filp->f_mapping->host;
514 znode_t *zp = ITOZ(ip);
515 int error;
516 fstrans_cookie_t cookie;
517
518 cookie = spl_fstrans_mark();
519 error = -zfs_map(ip, vma->vm_pgoff, (caddr_t *)vma->vm_start,
520 (size_t)(vma->vm_end - vma->vm_start), vma->vm_flags);
521 spl_fstrans_unmark(cookie);
522 if (error)
523 return (error);
524
525 error = generic_file_mmap(filp, vma);
526 if (error)
527 return (error);
528
529 mutex_enter(&zp->z_lock);
530 zp->z_is_mapped = 1;
531 mutex_exit(&zp->z_lock);
532
533 return (error);
534 }
535
536 /*
537 * Populate a page with data for the Linux page cache. This function is
538 * only used to support mmap(2). There will be an identical copy of the
539 * data in the ARC which is kept up to date via .write() and .writepage().
540 *
541 * Current this function relies on zpl_read_common() and the O_DIRECT
542 * flag to read in a page. This works but the more correct way is to
543 * update zfs_fillpage() to be Linux friendly and use that interface.
544 */
545 static int
546 zpl_readpage(struct file *filp, struct page *pp)
547 {
548 struct inode *ip;
549 struct page *pl[1];
550 int error = 0;
551 fstrans_cookie_t cookie;
552
553 ASSERT(PageLocked(pp));
554 ip = pp->mapping->host;
555 pl[0] = pp;
556
557 cookie = spl_fstrans_mark();
558 error = -zfs_getpage(ip, pl, 1);
559 spl_fstrans_unmark(cookie);
560
561 if (error) {
562 SetPageError(pp);
563 ClearPageUptodate(pp);
564 } else {
565 ClearPageError(pp);
566 SetPageUptodate(pp);
567 flush_dcache_page(pp);
568 }
569
570 unlock_page(pp);
571 return (error);
572 }
573
574 /*
575 * Populate a set of pages with data for the Linux page cache. This
576 * function will only be called for read ahead and never for demand
577 * paging. For simplicity, the code relies on read_cache_pages() to
578 * correctly lock each page for IO and call zpl_readpage().
579 */
580 static int
581 zpl_readpages(struct file *filp, struct address_space *mapping,
582 struct list_head *pages, unsigned nr_pages)
583 {
584 return (read_cache_pages(mapping, pages,
585 (filler_t *)zpl_readpage, filp));
586 }
587
588 int
589 zpl_putpage(struct page *pp, struct writeback_control *wbc, void *data)
590 {
591 struct address_space *mapping = data;
592 fstrans_cookie_t cookie;
593
594 ASSERT(PageLocked(pp));
595 ASSERT(!PageWriteback(pp));
596
597 cookie = spl_fstrans_mark();
598 (void) zfs_putpage(mapping->host, pp, wbc);
599 spl_fstrans_unmark(cookie);
600
601 return (0);
602 }
603
604 static int
605 zpl_writepages(struct address_space *mapping, struct writeback_control *wbc)
606 {
607 znode_t *zp = ITOZ(mapping->host);
608 zfsvfs_t *zfsvfs = ITOZSB(mapping->host);
609 enum writeback_sync_modes sync_mode;
610 int result;
611
612 ZFS_ENTER(zfsvfs);
613 if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
614 wbc->sync_mode = WB_SYNC_ALL;
615 ZFS_EXIT(zfsvfs);
616 sync_mode = wbc->sync_mode;
617
618 /*
619 * We don't want to run write_cache_pages() in SYNC mode here, because
620 * that would make putpage() wait for a single page to be committed to
621 * disk every single time, resulting in atrocious performance. Instead
622 * we run it once in non-SYNC mode so that the ZIL gets all the data,
623 * and then we commit it all in one go.
624 */
625 wbc->sync_mode = WB_SYNC_NONE;
626 result = write_cache_pages(mapping, wbc, zpl_putpage, mapping);
627 if (sync_mode != wbc->sync_mode) {
628 ZFS_ENTER(zfsvfs);
629 ZFS_VERIFY_ZP(zp);
630 if (zfsvfs->z_log != NULL)
631 zil_commit(zfsvfs->z_log, zp->z_id);
632 ZFS_EXIT(zfsvfs);
633
634 /*
635 * We need to call write_cache_pages() again (we can't just
636 * return after the commit) because the previous call in
637 * non-SYNC mode does not guarantee that we got all the dirty
638 * pages (see the implementation of write_cache_pages() for
639 * details). That being said, this is a no-op in most cases.
640 */
641 wbc->sync_mode = sync_mode;
642 result = write_cache_pages(mapping, wbc, zpl_putpage, mapping);
643 }
644 return (result);
645 }
646
647 /*
648 * Write out dirty pages to the ARC, this function is only required to
649 * support mmap(2). Mapped pages may be dirtied by memory operations
650 * which never call .write(). These dirty pages are kept in sync with
651 * the ARC buffers via this hook.
652 */
653 static int
654 zpl_writepage(struct page *pp, struct writeback_control *wbc)
655 {
656 if (ITOZSB(pp->mapping->host)->z_os->os_sync == ZFS_SYNC_ALWAYS)
657 wbc->sync_mode = WB_SYNC_ALL;
658
659 return (zpl_putpage(pp, wbc, pp->mapping));
660 }
661
662 /*
663 * The only flag combination which matches the behavior of zfs_space()
664 * is FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE. The FALLOC_FL_PUNCH_HOLE
665 * flag was introduced in the 2.6.38 kernel.
666 */
667 #if defined(HAVE_FILE_FALLOCATE) || defined(HAVE_INODE_FALLOCATE)
668 long
669 zpl_fallocate_common(struct inode *ip, int mode, loff_t offset, loff_t len)
670 {
671 int error = -EOPNOTSUPP;
672
673 #if defined(FALLOC_FL_PUNCH_HOLE) && defined(FALLOC_FL_KEEP_SIZE)
674 cred_t *cr = CRED();
675 flock64_t bf;
676 loff_t olen;
677 fstrans_cookie_t cookie;
678
679 if (mode != (FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
680 return (error);
681
682 if (offset < 0 || len <= 0)
683 return (-EINVAL);
684
685 spl_inode_lock(ip);
686 olen = i_size_read(ip);
687
688 if (offset > olen) {
689 spl_inode_unlock(ip);
690 return (0);
691 }
692 if (offset + len > olen)
693 len = olen - offset;
694 bf.l_type = F_WRLCK;
695 bf.l_whence = 0;
696 bf.l_start = offset;
697 bf.l_len = len;
698 bf.l_pid = 0;
699
700 crhold(cr);
701 cookie = spl_fstrans_mark();
702 error = -zfs_space(ip, F_FREESP, &bf, FWRITE, offset, cr);
703 spl_fstrans_unmark(cookie);
704 spl_inode_unlock(ip);
705
706 crfree(cr);
707 #endif /* defined(FALLOC_FL_PUNCH_HOLE) && defined(FALLOC_FL_KEEP_SIZE) */
708
709 ASSERT3S(error, <=, 0);
710 return (error);
711 }
712 #endif /* defined(HAVE_FILE_FALLOCATE) || defined(HAVE_INODE_FALLOCATE) */
713
714 #ifdef HAVE_FILE_FALLOCATE
715 static long
716 zpl_fallocate(struct file *filp, int mode, loff_t offset, loff_t len)
717 {
718 return zpl_fallocate_common(file_inode(filp),
719 mode, offset, len);
720 }
721 #endif /* HAVE_FILE_FALLOCATE */
722
723 /*
724 * Map zfs file z_pflags (xvattr_t) to linux file attributes. Only file
725 * attributes common to both Linux and Solaris are mapped.
726 */
727 static int
728 zpl_ioctl_getflags(struct file *filp, void __user *arg)
729 {
730 struct inode *ip = file_inode(filp);
731 unsigned int ioctl_flags = 0;
732 uint64_t zfs_flags = ITOZ(ip)->z_pflags;
733 int error;
734
735 if (zfs_flags & ZFS_IMMUTABLE)
736 ioctl_flags |= FS_IMMUTABLE_FL;
737
738 if (zfs_flags & ZFS_APPENDONLY)
739 ioctl_flags |= FS_APPEND_FL;
740
741 if (zfs_flags & ZFS_NODUMP)
742 ioctl_flags |= FS_NODUMP_FL;
743
744 ioctl_flags &= FS_FL_USER_VISIBLE;
745
746 error = copy_to_user(arg, &ioctl_flags, sizeof (ioctl_flags));
747
748 return (error);
749 }
750
751 /*
752 * fchange() is a helper macro to detect if we have been asked to change a
753 * flag. This is ugly, but the requirement that we do this is a consequence of
754 * how the Linux file attribute interface was designed. Another consequence is
755 * that concurrent modification of files suffers from a TOCTOU race. Neither
756 * are things we can fix without modifying the kernel-userland interface, which
757 * is outside of our jurisdiction.
758 */
759
760 #define fchange(f0, f1, b0, b1) (!((f0) & (b0)) != !((f1) & (b1)))
761
762 static int
763 zpl_ioctl_setflags(struct file *filp, void __user *arg)
764 {
765 struct inode *ip = file_inode(filp);
766 uint64_t zfs_flags = ITOZ(ip)->z_pflags;
767 unsigned int ioctl_flags;
768 cred_t *cr = CRED();
769 xvattr_t xva;
770 xoptattr_t *xoap;
771 int error;
772 fstrans_cookie_t cookie;
773
774 if (copy_from_user(&ioctl_flags, arg, sizeof (ioctl_flags)))
775 return (-EFAULT);
776
777 if ((ioctl_flags & ~(FS_IMMUTABLE_FL | FS_APPEND_FL | FS_NODUMP_FL)))
778 return (-EOPNOTSUPP);
779
780 if ((ioctl_flags & ~(FS_FL_USER_MODIFIABLE)))
781 return (-EACCES);
782
783 if ((fchange(ioctl_flags, zfs_flags, FS_IMMUTABLE_FL, ZFS_IMMUTABLE) ||
784 fchange(ioctl_flags, zfs_flags, FS_APPEND_FL, ZFS_APPENDONLY)) &&
785 !capable(CAP_LINUX_IMMUTABLE))
786 return (-EACCES);
787
788 if (!zpl_inode_owner_or_capable(ip))
789 return (-EACCES);
790
791 xva_init(&xva);
792 xoap = xva_getxoptattr(&xva);
793
794 XVA_SET_REQ(&xva, XAT_IMMUTABLE);
795 if (ioctl_flags & FS_IMMUTABLE_FL)
796 xoap->xoa_immutable = B_TRUE;
797
798 XVA_SET_REQ(&xva, XAT_APPENDONLY);
799 if (ioctl_flags & FS_APPEND_FL)
800 xoap->xoa_appendonly = B_TRUE;
801
802 XVA_SET_REQ(&xva, XAT_NODUMP);
803 if (ioctl_flags & FS_NODUMP_FL)
804 xoap->xoa_nodump = B_TRUE;
805
806 crhold(cr);
807 cookie = spl_fstrans_mark();
808 error = -zfs_setattr(ip, (vattr_t *)&xva, 0, cr);
809 spl_fstrans_unmark(cookie);
810 crfree(cr);
811
812 return (error);
813 }
814
815 static long
816 zpl_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
817 {
818 switch (cmd) {
819 case FS_IOC_GETFLAGS:
820 return (zpl_ioctl_getflags(filp, (void *)arg));
821 case FS_IOC_SETFLAGS:
822 return (zpl_ioctl_setflags(filp, (void *)arg));
823 default:
824 return (-ENOTTY);
825 }
826 }
827
828 #ifdef CONFIG_COMPAT
829 static long
830 zpl_compat_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
831 {
832 switch (cmd) {
833 case FS_IOC32_GETFLAGS:
834 cmd = FS_IOC_GETFLAGS;
835 break;
836 case FS_IOC32_SETFLAGS:
837 cmd = FS_IOC_SETFLAGS;
838 break;
839 default:
840 return (-ENOTTY);
841 }
842 return (zpl_ioctl(filp, cmd, (unsigned long)compat_ptr(arg)));
843 }
844 #endif /* CONFIG_COMPAT */
845
846
847 const struct address_space_operations zpl_address_space_operations = {
848 .readpages = zpl_readpages,
849 .readpage = zpl_readpage,
850 .writepage = zpl_writepage,
851 .writepages = zpl_writepages,
852 };
853
854 const struct file_operations zpl_file_operations = {
855 .open = zpl_open,
856 .release = zpl_release,
857 .llseek = zpl_llseek,
858 #ifdef HAVE_VFS_RW_ITERATE
859 #ifdef HAVE_NEW_SYNC_READ
860 .read = new_sync_read,
861 .write = new_sync_write,
862 #endif
863 .read_iter = zpl_iter_read,
864 .write_iter = zpl_iter_write,
865 #else
866 .read = do_sync_read,
867 .write = do_sync_write,
868 .aio_read = zpl_aio_read,
869 .aio_write = zpl_aio_write,
870 #endif
871 .mmap = zpl_mmap,
872 .fsync = zpl_fsync,
873 #ifdef HAVE_FILE_AIO_FSYNC
874 .aio_fsync = zpl_aio_fsync,
875 #endif
876 #ifdef HAVE_FILE_FALLOCATE
877 .fallocate = zpl_fallocate,
878 #endif /* HAVE_FILE_FALLOCATE */
879 .unlocked_ioctl = zpl_ioctl,
880 #ifdef CONFIG_COMPAT
881 .compat_ioctl = zpl_compat_ioctl,
882 #endif
883 };
884
885 const struct file_operations zpl_dir_file_operations = {
886 .llseek = generic_file_llseek,
887 .read = generic_read_dir,
888 #ifdef HAVE_VFS_ITERATE_SHARED
889 .iterate_shared = zpl_iterate,
890 #elif defined(HAVE_VFS_ITERATE)
891 .iterate = zpl_iterate,
892 #else
893 .readdir = zpl_readdir,
894 #endif
895 .fsync = zpl_fsync,
896 .unlocked_ioctl = zpl_ioctl,
897 #ifdef CONFIG_COMPAT
898 .compat_ioctl = zpl_compat_ioctl,
899 #endif
900 };