]> git.proxmox.com Git - mirror_zfs.git/blame - module/zfs/zpl_file.c
Illumos 5117 - spacemap reallocation can cause corruption
[mirror_zfs.git] / module / zfs / zpl_file.c
CommitLineData
1efb473f
BB
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright (c) 2011, Lawrence Livermore National Security, LLC.
23 */
24
25
119a394a 26#include <sys/dmu_objset.h>
1efb473f
BB
27#include <sys/zfs_vfsops.h>
28#include <sys/zfs_vnops.h>
29#include <sys/zfs_znode.h>
30#include <sys/zpl.h>
31
32
126400a1
BB
33static int
34zpl_open(struct inode *ip, struct file *filp)
35{
81e97e21 36 cred_t *cr = CRED();
126400a1
BB
37 int error;
38
7dc71949
CC
39 error = generic_file_open(ip, filp);
40 if (error)
41 return (error);
42
81e97e21 43 crhold(cr);
126400a1 44 error = -zfs_open(ip, filp->f_mode, filp->f_flags, cr);
81e97e21 45 crfree(cr);
126400a1
BB
46 ASSERT3S(error, <=, 0);
47
7dc71949 48 return (error);
126400a1
BB
49}
50
51static int
52zpl_release(struct inode *ip, struct file *filp)
53{
81e97e21 54 cred_t *cr = CRED();
126400a1
BB
55 int error;
56
78d7a5d7 57 if (ITOZ(ip)->z_atime_dirty)
1e8db771 58 zfs_mark_inode_dirty(ip);
78d7a5d7 59
81e97e21 60 crhold(cr);
126400a1 61 error = -zfs_close(ip, filp->f_flags, cr);
81e97e21 62 crfree(cr);
126400a1
BB
63 ASSERT3S(error, <=, 0);
64
65 return (error);
66}
67
1efb473f 68static int
0f37d0c8 69zpl_iterate(struct file *filp, struct dir_context *ctx)
1efb473f
BB
70{
71 struct dentry *dentry = filp->f_path.dentry;
81e97e21 72 cred_t *cr = CRED();
1efb473f
BB
73 int error;
74
81e97e21 75 crhold(cr);
0f37d0c8 76 error = -zfs_readdir(dentry->d_inode, ctx, cr);
81e97e21 77 crfree(cr);
1efb473f
BB
78 ASSERT3S(error, <=, 0);
79
80 return (error);
81}
82
0f37d0c8
RY
83#if !defined(HAVE_VFS_ITERATE)
84static int
85zpl_readdir(struct file *filp, void *dirent, filldir_t filldir)
86{
87 struct dir_context ctx = DIR_CONTEXT_INIT(dirent, filldir, filp->f_pos);
88 int error;
89
90 error = zpl_iterate(filp, &ctx);
91 filp->f_pos = ctx.pos;
92
93 return (error);
94}
95#endif /* HAVE_VFS_ITERATE */
96
adcd70bd 97#if defined(HAVE_FSYNC_WITH_DENTRY)
3117dd0b 98/*
adcd70bd
BB
99 * Linux 2.6.x - 2.6.34 API,
100 * Through 2.6.34 the nfsd kernel server would pass a NULL 'file struct *'
101 * to the fops->fsync() hook. For this reason, we must be careful not to
102 * use filp unconditionally.
103 */
104static int
105zpl_fsync(struct file *filp, struct dentry *dentry, int datasync)
106{
107 cred_t *cr = CRED();
108 int error;
109
110 crhold(cr);
111 error = -zfs_fsync(dentry->d_inode, datasync, cr);
112 crfree(cr);
113 ASSERT3S(error, <=, 0);
114
115 return (error);
116}
117
cd3939c5
RY
118static int
119zpl_aio_fsync(struct kiocb *kiocb, int datasync)
120{
121 struct file *filp = kiocb->ki_filp;
122 return (zpl_fsync(filp, filp->f_path.dentry, datasync));
123}
adcd70bd
BB
124#elif defined(HAVE_FSYNC_WITHOUT_DENTRY)
125/*
126 * Linux 2.6.35 - 3.0 API,
127 * As of 2.6.35 the dentry argument to the fops->fsync() hook was deemed
3117dd0b
BB
128 * redundant. The dentry is still accessible via filp->f_path.dentry,
129 * and we are guaranteed that filp will never be NULL.
3117dd0b 130 */
3117dd0b
BB
131static int
132zpl_fsync(struct file *filp, int datasync)
133{
adcd70bd
BB
134 struct inode *inode = filp->f_mapping->host;
135 cred_t *cr = CRED();
136 int error;
137
138 crhold(cr);
139 error = -zfs_fsync(inode, datasync, cr);
140 crfree(cr);
141 ASSERT3S(error, <=, 0);
142
143 return (error);
144}
145
cd3939c5
RY
146static int
147zpl_aio_fsync(struct kiocb *kiocb, int datasync)
148{
149 return (zpl_fsync(kiocb->ki_filp, datasync));
150}
adcd70bd
BB
151#elif defined(HAVE_FSYNC_RANGE)
152/*
153 * Linux 3.1 - 3.x API,
154 * As of 3.1 the responsibility to call filemap_write_and_wait_range() has
155 * been pushed down in to the .fsync() vfs hook. Additionally, the i_mutex
156 * lock is no longer held by the caller, for zfs we don't require the lock
157 * to be held so we don't acquire it.
158 */
3117dd0b 159static int
adcd70bd 160zpl_fsync(struct file *filp, loff_t start, loff_t end, int datasync)
1efb473f 161{
adcd70bd 162 struct inode *inode = filp->f_mapping->host;
81e97e21 163 cred_t *cr = CRED();
1efb473f
BB
164 int error;
165
adcd70bd
BB
166 error = filemap_write_and_wait_range(inode->i_mapping, start, end);
167 if (error)
168 return (error);
169
81e97e21 170 crhold(cr);
adcd70bd 171 error = -zfs_fsync(inode, datasync, cr);
81e97e21 172 crfree(cr);
1efb473f
BB
173 ASSERT3S(error, <=, 0);
174
175 return (error);
176}
cd3939c5
RY
177
178static int
179zpl_aio_fsync(struct kiocb *kiocb, int datasync)
180{
181 return (zpl_fsync(kiocb->ki_filp, kiocb->ki_pos,
182 kiocb->ki_pos + kiocb->ki_nbytes, datasync));
183}
adcd70bd
BB
184#else
185#error "Unsupported fops->fsync() implementation"
186#endif
1efb473f 187
cd3939c5
RY
188static inline ssize_t
189zpl_read_common_iovec(struct inode *ip, const struct iovec *iovp, size_t count,
190 unsigned long nr_segs, loff_t *ppos, uio_seg_t segment,
191 int flags, cred_t *cr)
1efb473f 192{
e3dc14b8 193 ssize_t read;
1efb473f 194 uio_t uio;
cd3939c5 195 int error;
1efb473f 196
cd3939c5
RY
197 uio.uio_iov = (struct iovec *)iovp;
198 uio.uio_resid = count;
199 uio.uio_iovcnt = nr_segs;
200 uio.uio_loffset = *ppos;
1efb473f
BB
201 uio.uio_limit = MAXOFFSET_T;
202 uio.uio_segflg = segment;
203
204 error = -zfs_read(ip, &uio, flags, cr);
205 if (error < 0)
206 return (error);
207
cd3939c5
RY
208 read = count - uio.uio_resid;
209 *ppos += read;
e3dc14b8
BB
210 task_io_account_read(read);
211
212 return (read);
1efb473f
BB
213}
214
cd3939c5
RY
215inline ssize_t
216zpl_read_common(struct inode *ip, const char *buf, size_t len, loff_t *ppos,
217 uio_seg_t segment, int flags, cred_t *cr)
218{
219 struct iovec iov;
220
221 iov.iov_base = (void *)buf;
222 iov.iov_len = len;
223
224 return (zpl_read_common_iovec(ip, &iov, len, 1, ppos, segment,
225 flags, cr));
226}
227
1efb473f
BB
228static ssize_t
229zpl_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos)
230{
81e97e21 231 cred_t *cr = CRED();
1efb473f
BB
232 ssize_t read;
233
81e97e21 234 crhold(cr);
cd3939c5 235 read = zpl_read_common(filp->f_mapping->host, buf, len, ppos,
1efb473f 236 UIO_USERSPACE, filp->f_flags, cr);
81e97e21 237 crfree(cr);
1efb473f 238
cd3939c5
RY
239 return (read);
240}
241
242static ssize_t
243zpl_aio_read(struct kiocb *kiocb, const struct iovec *iovp,
244 unsigned long nr_segs, loff_t pos)
245{
246 cred_t *cr = CRED();
247 struct file *filp = kiocb->ki_filp;
248 size_t count = kiocb->ki_nbytes;
249 ssize_t read;
250 size_t alloc_size = sizeof (struct iovec) * nr_segs;
251 struct iovec *iov_tmp = kmem_alloc(alloc_size, KM_SLEEP);
252 bcopy(iovp, iov_tmp, alloc_size);
253
254 ASSERT(iovp);
255
256 crhold(cr);
257 read = zpl_read_common_iovec(filp->f_mapping->host, iov_tmp, count,
258 nr_segs, &kiocb->ki_pos, UIO_USERSPACE, filp->f_flags, cr);
259 crfree(cr);
260
261 kmem_free(iov_tmp, alloc_size);
1efb473f 262
1efb473f
BB
263 return (read);
264}
265
cd3939c5
RY
266static inline ssize_t
267zpl_write_common_iovec(struct inode *ip, const struct iovec *iovp, size_t count,
268 unsigned long nr_segs, loff_t *ppos, uio_seg_t segment,
269 int flags, cred_t *cr)
1efb473f 270{
e3dc14b8 271 ssize_t wrote;
1efb473f 272 uio_t uio;
cd3939c5 273 int error;
1efb473f 274
cd3939c5
RY
275 uio.uio_iov = (struct iovec *)iovp;
276 uio.uio_resid = count;
277 uio.uio_iovcnt = nr_segs;
278 uio.uio_loffset = *ppos;
1efb473f
BB
279 uio.uio_limit = MAXOFFSET_T;
280 uio.uio_segflg = segment;
281
282 error = -zfs_write(ip, &uio, flags, cr);
283 if (error < 0)
284 return (error);
285
cd3939c5
RY
286 wrote = count - uio.uio_resid;
287 *ppos += wrote;
e3dc14b8
BB
288 task_io_account_write(wrote);
289
290 return (wrote);
1efb473f 291}
cd3939c5
RY
292inline ssize_t
293zpl_write_common(struct inode *ip, const char *buf, size_t len, loff_t *ppos,
294 uio_seg_t segment, int flags, cred_t *cr)
295{
296 struct iovec iov;
297
298 iov.iov_base = (void *)buf;
299 iov.iov_len = len;
300
301 return (zpl_write_common_iovec(ip, &iov, len, 1, ppos, segment,
302 flags, cr));
303}
1efb473f
BB
304
305static ssize_t
306zpl_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos)
307{
81e97e21 308 cred_t *cr = CRED();
1efb473f
BB
309 ssize_t wrote;
310
81e97e21 311 crhold(cr);
cd3939c5 312 wrote = zpl_write_common(filp->f_mapping->host, buf, len, ppos,
1efb473f 313 UIO_USERSPACE, filp->f_flags, cr);
81e97e21 314 crfree(cr);
1efb473f 315
cd3939c5
RY
316 return (wrote);
317}
318
319static ssize_t
320zpl_aio_write(struct kiocb *kiocb, const struct iovec *iovp,
321 unsigned long nr_segs, loff_t pos)
322{
323 cred_t *cr = CRED();
324 struct file *filp = kiocb->ki_filp;
325 size_t count = kiocb->ki_nbytes;
326 ssize_t wrote;
327 size_t alloc_size = sizeof (struct iovec) * nr_segs;
328 struct iovec *iov_tmp = kmem_alloc(alloc_size, KM_SLEEP);
329 bcopy(iovp, iov_tmp, alloc_size);
330
331 ASSERT(iovp);
332
333 crhold(cr);
334 wrote = zpl_write_common_iovec(filp->f_mapping->host, iov_tmp, count,
335 nr_segs, &kiocb->ki_pos, UIO_USERSPACE, filp->f_flags, cr);
336 crfree(cr);
337
338 kmem_free(iov_tmp, alloc_size);
1efb473f 339
1efb473f
BB
340 return (wrote);
341}
342
802e7b5f
LD
343static loff_t
344zpl_llseek(struct file *filp, loff_t offset, int whence)
345{
346#if defined(SEEK_HOLE) && defined(SEEK_DATA)
347 if (whence == SEEK_DATA || whence == SEEK_HOLE) {
348 struct inode *ip = filp->f_mapping->host;
349 loff_t maxbytes = ip->i_sb->s_maxbytes;
350 loff_t error;
351
352 spl_inode_lock(ip);
353 error = -zfs_holey(ip, whence, &offset);
354 if (error == 0)
355 error = lseek_execute(filp, ip, offset, maxbytes);
356 spl_inode_unlock(ip);
357
358 return (error);
359 }
360#endif /* SEEK_HOLE && SEEK_DATA */
361
d1d7e268 362 return (generic_file_llseek(filp, offset, whence));
802e7b5f
LD
363}
364
c0d35759
BB
365/*
366 * It's worth taking a moment to describe how mmap is implemented
367 * for zfs because it differs considerably from other Linux filesystems.
368 * However, this issue is handled the same way under OpenSolaris.
369 *
370 * The issue is that by design zfs bypasses the Linux page cache and
371 * leaves all caching up to the ARC. This has been shown to work
372 * well for the common read(2)/write(2) case. However, mmap(2)
373 * is problem because it relies on being tightly integrated with the
374 * page cache. To handle this we cache mmap'ed files twice, once in
375 * the ARC and a second time in the page cache. The code is careful
376 * to keep both copies synchronized.
377 *
378 * When a file with an mmap'ed region is written to using write(2)
379 * both the data in the ARC and existing pages in the page cache
380 * are updated. For a read(2) data will be read first from the page
381 * cache then the ARC if needed. Neither a write(2) or read(2) will
382 * will ever result in new pages being added to the page cache.
383 *
384 * New pages are added to the page cache only via .readpage() which
385 * is called when the vfs needs to read a page off disk to back the
386 * virtual memory region. These pages may be modified without
387 * notifying the ARC and will be written out periodically via
388 * .writepage(). This will occur due to either a sync or the usual
389 * page aging behavior. Note because a read(2) of a mmap'ed file
390 * will always check the page cache first even when the ARC is out
391 * of date correct data will still be returned.
392 *
393 * While this implementation ensures correct behavior it does have
394 * have some drawbacks. The most obvious of which is that it
395 * increases the required memory footprint when access mmap'ed
396 * files. It also adds additional complexity to the code keeping
397 * both caches synchronized.
398 *
399 * Longer term it may be possible to cleanly resolve this wart by
400 * mapping page cache pages directly on to the ARC buffers. The
401 * Linux address space operations are flexible enough to allow
402 * selection of which pages back a particular index. The trick
403 * would be working out the details of which subsystem is in
404 * charge, the ARC, the page cache, or both. It may also prove
405 * helpful to move the ARC buffers to a scatter-gather lists
406 * rather than a vmalloc'ed region.
407 */
408static int
409zpl_mmap(struct file *filp, struct vm_area_struct *vma)
410{
e2e7aa2d
BB
411 struct inode *ip = filp->f_mapping->host;
412 znode_t *zp = ITOZ(ip);
c0d35759
BB
413 int error;
414
e2e7aa2d
BB
415 error = -zfs_map(ip, vma->vm_pgoff, (caddr_t *)vma->vm_start,
416 (size_t)(vma->vm_end - vma->vm_start), vma->vm_flags);
417 if (error)
418 return (error);
419
c0d35759
BB
420 error = generic_file_mmap(filp, vma);
421 if (error)
422 return (error);
423
424 mutex_enter(&zp->z_lock);
425 zp->z_is_mapped = 1;
426 mutex_exit(&zp->z_lock);
427
428 return (error);
429}
430
431/*
432 * Populate a page with data for the Linux page cache. This function is
433 * only used to support mmap(2). There will be an identical copy of the
434 * data in the ARC which is kept up to date via .write() and .writepage().
435 *
436 * Current this function relies on zpl_read_common() and the O_DIRECT
437 * flag to read in a page. This works but the more correct way is to
438 * update zfs_fillpage() to be Linux friendly and use that interface.
439 */
440static int
441zpl_readpage(struct file *filp, struct page *pp)
442{
443 struct inode *ip;
dde471ef 444 struct page *pl[1];
c0d35759
BB
445 int error = 0;
446
447 ASSERT(PageLocked(pp));
448 ip = pp->mapping->host;
dde471ef 449 pl[0] = pp;
c0d35759 450
dde471ef 451 error = -zfs_getpage(ip, pl, 1);
c0d35759 452
dde471ef
PJ
453 if (error) {
454 SetPageError(pp);
455 ClearPageUptodate(pp);
456 } else {
457 ClearPageError(pp);
458 SetPageUptodate(pp);
459 flush_dcache_page(pp);
460 }
c0d35759 461
dde471ef 462 unlock_page(pp);
d1d7e268 463 return (error);
dde471ef 464}
c0d35759 465
f3ab88d6
BB
466/*
467 * Populate a set of pages with data for the Linux page cache. This
468 * function will only be called for read ahead and never for demand
469 * paging. For simplicity, the code relies on read_cache_pages() to
470 * correctly lock each page for IO and call zpl_readpage().
471 */
472static int
473zpl_readpages(struct file *filp, struct address_space *mapping,
474 struct list_head *pages, unsigned nr_pages)
475{
95d9fd02
BB
476 return (read_cache_pages(mapping, pages,
477 (filler_t *)zpl_readpage, filp));
f3ab88d6
BB
478}
479
dde471ef
PJ
480int
481zpl_putpage(struct page *pp, struct writeback_control *wbc, void *data)
482{
3c0e5c0f
BB
483 struct address_space *mapping = data;
484
485 ASSERT(PageLocked(pp));
486 ASSERT(!PageWriteback(pp));
8630650a
BB
487 ASSERT(!(current->flags & PF_NOFS));
488
489 /*
490 * Annotate this call path with a flag that indicates that it is
491 * unsafe to use KM_SLEEP during memory allocations due to the
492 * potential for a deadlock. KM_PUSHPAGE should be used instead.
493 */
494 current->flags |= PF_NOFS;
62c4165a 495 (void) zfs_putpage(mapping->host, pp, wbc);
8630650a 496 current->flags &= ~PF_NOFS;
c0d35759 497
3c0e5c0f 498 return (0);
dde471ef 499}
c0d35759 500
dde471ef
PJ
501static int
502zpl_writepages(struct address_space *mapping, struct writeback_control *wbc)
503{
119a394a
ED
504 znode_t *zp = ITOZ(mapping->host);
505 zfs_sb_t *zsb = ITOZSB(mapping->host);
506 enum writeback_sync_modes sync_mode;
507 int result;
508
509 ZFS_ENTER(zsb);
510 if (zsb->z_os->os_sync == ZFS_SYNC_ALWAYS)
511 wbc->sync_mode = WB_SYNC_ALL;
512 ZFS_EXIT(zsb);
513 sync_mode = wbc->sync_mode;
514
515 /*
516 * We don't want to run write_cache_pages() in SYNC mode here, because
517 * that would make putpage() wait for a single page to be committed to
518 * disk every single time, resulting in atrocious performance. Instead
519 * we run it once in non-SYNC mode so that the ZIL gets all the data,
520 * and then we commit it all in one go.
521 */
522 wbc->sync_mode = WB_SYNC_NONE;
523 result = write_cache_pages(mapping, wbc, zpl_putpage, mapping);
524 if (sync_mode != wbc->sync_mode) {
525 ZFS_ENTER(zsb);
526 ZFS_VERIFY_ZP(zp);
1e8db771
BB
527 if (zsb->z_log != NULL)
528 zil_commit(zsb->z_log, zp->z_id);
119a394a
ED
529 ZFS_EXIT(zsb);
530
531 /*
532 * We need to call write_cache_pages() again (we can't just
533 * return after the commit) because the previous call in
534 * non-SYNC mode does not guarantee that we got all the dirty
535 * pages (see the implementation of write_cache_pages() for
536 * details). That being said, this is a no-op in most cases.
537 */
538 wbc->sync_mode = sync_mode;
539 result = write_cache_pages(mapping, wbc, zpl_putpage, mapping);
540 }
541 return (result);
c0d35759
BB
542}
543
544/*
545 * Write out dirty pages to the ARC, this function is only required to
546 * support mmap(2). Mapped pages may be dirtied by memory operations
547 * which never call .write(). These dirty pages are kept in sync with
548 * the ARC buffers via this hook.
c0d35759
BB
549 */
550static int
551zpl_writepage(struct page *pp, struct writeback_control *wbc)
552{
119a394a
ED
553 if (ITOZSB(pp->mapping->host)->z_os->os_sync == ZFS_SYNC_ALWAYS)
554 wbc->sync_mode = WB_SYNC_ALL;
555
556 return (zpl_putpage(pp, wbc, pp->mapping));
c0d35759
BB
557}
558
cb2d1901
ED
559/*
560 * The only flag combination which matches the behavior of zfs_space()
561 * is FALLOC_FL_PUNCH_HOLE. This flag was introduced in the 2.6.38 kernel.
562 */
563long
564zpl_fallocate_common(struct inode *ip, int mode, loff_t offset, loff_t len)
565{
566 cred_t *cr = CRED();
567 int error = -EOPNOTSUPP;
568
569 if (mode & FALLOC_FL_KEEP_SIZE)
570 return (-EOPNOTSUPP);
571
572 crhold(cr);
573
574#ifdef FALLOC_FL_PUNCH_HOLE
575 if (mode & FALLOC_FL_PUNCH_HOLE) {
576 flock64_t bf;
577
578 bf.l_type = F_WRLCK;
579 bf.l_whence = 0;
580 bf.l_start = offset;
581 bf.l_len = len;
582 bf.l_pid = 0;
583
584 error = -zfs_space(ip, F_FREESP, &bf, FWRITE, offset, cr);
585 }
586#endif /* FALLOC_FL_PUNCH_HOLE */
587
588 crfree(cr);
589
590 ASSERT3S(error, <=, 0);
591 return (error);
592}
593
594#ifdef HAVE_FILE_FALLOCATE
595static long
596zpl_fallocate(struct file *filp, int mode, loff_t offset, loff_t len)
597{
598 return zpl_fallocate_common(filp->f_path.dentry->d_inode,
599 mode, offset, len);
600}
601#endif /* HAVE_FILE_FALLOCATE */
602
9d317793
RY
603/*
604 * Map zfs file z_pflags (xvattr_t) to linux file attributes. Only file
605 * attributes common to both Linux and Solaris are mapped.
606 */
607static int
608zpl_ioctl_getflags(struct file *filp, void __user *arg)
609{
610 struct inode *ip = filp->f_dentry->d_inode;
611 unsigned int ioctl_flags = 0;
612 uint64_t zfs_flags = ITOZ(ip)->z_pflags;
613 int error;
614
615 if (zfs_flags & ZFS_IMMUTABLE)
616 ioctl_flags |= FS_IMMUTABLE_FL;
617
618 if (zfs_flags & ZFS_APPENDONLY)
619 ioctl_flags |= FS_APPEND_FL;
620
621 if (zfs_flags & ZFS_NODUMP)
622 ioctl_flags |= FS_NODUMP_FL;
623
624 ioctl_flags &= FS_FL_USER_VISIBLE;
625
626 error = copy_to_user(arg, &ioctl_flags, sizeof (ioctl_flags));
627
628 return (error);
629}
630
631/*
632 * fchange() is a helper macro to detect if we have been asked to change a
633 * flag. This is ugly, but the requirement that we do this is a consequence of
634 * how the Linux file attribute interface was designed. Another consequence is
635 * that concurrent modification of files suffers from a TOCTOU race. Neither
636 * are things we can fix without modifying the kernel-userland interface, which
637 * is outside of our jurisdiction.
638 */
639
640#define fchange(f0, f1, b0, b1) ((((f0) & (b0)) == (b0)) != \
641 (((b1) & (f1)) == (f1)))
642
643static int
644zpl_ioctl_setflags(struct file *filp, void __user *arg)
645{
646 struct inode *ip = filp->f_dentry->d_inode;
647 uint64_t zfs_flags = ITOZ(ip)->z_pflags;
648 unsigned int ioctl_flags;
649 cred_t *cr = CRED();
650 xvattr_t xva;
651 xoptattr_t *xoap;
652 int error;
653
654 if (copy_from_user(&ioctl_flags, arg, sizeof (ioctl_flags)))
655 return (-EFAULT);
656
657 if ((ioctl_flags & ~(FS_IMMUTABLE_FL | FS_APPEND_FL | FS_NODUMP_FL)))
658 return (-EOPNOTSUPP);
659
660 if ((ioctl_flags & ~(FS_FL_USER_MODIFIABLE)))
661 return (-EACCES);
662
663 if ((fchange(ioctl_flags, zfs_flags, FS_IMMUTABLE_FL, ZFS_IMMUTABLE) ||
664 fchange(ioctl_flags, zfs_flags, FS_APPEND_FL, ZFS_APPENDONLY)) &&
665 !capable(CAP_LINUX_IMMUTABLE))
666 return (-EACCES);
667
668 if (!zpl_inode_owner_or_capable(ip))
669 return (-EACCES);
670
671 xva_init(&xva);
672 xoap = xva_getxoptattr(&xva);
673
674 XVA_SET_REQ(&xva, XAT_IMMUTABLE);
675 if (ioctl_flags & FS_IMMUTABLE_FL)
676 xoap->xoa_immutable = B_TRUE;
677
678 XVA_SET_REQ(&xva, XAT_APPENDONLY);
679 if (ioctl_flags & FS_APPEND_FL)
680 xoap->xoa_appendonly = B_TRUE;
681
682 XVA_SET_REQ(&xva, XAT_NODUMP);
683 if (ioctl_flags & FS_NODUMP_FL)
684 xoap->xoa_nodump = B_TRUE;
685
686 crhold(cr);
687 error = -zfs_setattr(ip, (vattr_t *)&xva, 0, cr);
688 crfree(cr);
689
690 return (error);
691}
692
88c28395
BB
693static long
694zpl_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
695{
696 switch (cmd) {
9d317793
RY
697 case FS_IOC_GETFLAGS:
698 return (zpl_ioctl_getflags(filp, (void *)arg));
699 case FS_IOC_SETFLAGS:
700 return (zpl_ioctl_setflags(filp, (void *)arg));
88c28395
BB
701 default:
702 return (-ENOTTY);
703 }
704}
705
706#ifdef CONFIG_COMPAT
707static long
708zpl_compat_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
709{
d1d7e268 710 return (zpl_ioctl(filp, cmd, arg));
88c28395
BB
711}
712#endif /* CONFIG_COMPAT */
713
714
1efb473f 715const struct address_space_operations zpl_address_space_operations = {
dde471ef 716 .readpages = zpl_readpages,
1efb473f
BB
717 .readpage = zpl_readpage,
718 .writepage = zpl_writepage,
d1d7e268 719 .writepages = zpl_writepages,
1efb473f
BB
720};
721
722const struct file_operations zpl_file_operations = {
126400a1
BB
723 .open = zpl_open,
724 .release = zpl_release,
802e7b5f 725 .llseek = zpl_llseek,
c0d35759
BB
726 .read = zpl_read,
727 .write = zpl_write,
cd3939c5
RY
728 .aio_read = zpl_aio_read,
729 .aio_write = zpl_aio_write,
c0d35759 730 .mmap = zpl_mmap,
1efb473f 731 .fsync = zpl_fsync,
cd3939c5 732 .aio_fsync = zpl_aio_fsync,
cb2d1901 733#ifdef HAVE_FILE_FALLOCATE
d1d7e268 734 .fallocate = zpl_fallocate,
cb2d1901 735#endif /* HAVE_FILE_FALLOCATE */
d1d7e268 736 .unlocked_ioctl = zpl_ioctl,
88c28395 737#ifdef CONFIG_COMPAT
d1d7e268 738 .compat_ioctl = zpl_compat_ioctl,
88c28395 739#endif
1efb473f
BB
740};
741
742const struct file_operations zpl_dir_file_operations = {
743 .llseek = generic_file_llseek,
744 .read = generic_read_dir,
0f37d0c8
RY
745#ifdef HAVE_VFS_ITERATE
746 .iterate = zpl_iterate,
747#else
1efb473f 748 .readdir = zpl_readdir,
0f37d0c8 749#endif
1efb473f 750 .fsync = zpl_fsync,
88c28395
BB
751 .unlocked_ioctl = zpl_ioctl,
752#ifdef CONFIG_COMPAT
753 .compat_ioctl = zpl_compat_ioctl,
754#endif
1efb473f 755};