1 // SPDX-License-Identifier: GPL-2.0
3 * (C) 2001 Clemson University and The University of Chicago
4 * Copyright 2018 Omnibond Systems, L.L.C.
6 * See COPYING in top-level directory.
10 * Linux VFS inode operations.
13 #include <linux/bvec.h>
15 #include "orangefs-kernel.h"
16 #include "orangefs-bufmap.h"
18 static int orangefs_writepage_locked(struct page
*page
,
19 struct writeback_control
*wbc
)
21 struct inode
*inode
= page
->mapping
->host
;
22 struct orangefs_write_range
*wr
= NULL
;
29 set_page_writeback(page
);
31 len
= i_size_read(inode
);
32 if (PagePrivate(page
)) {
33 wr
= (struct orangefs_write_range
*)page_private(page
);
34 WARN_ON(wr
->pos
>= len
);
36 if (off
+ wr
->len
> len
)
42 off
= page_offset(page
);
43 if (off
+ PAGE_SIZE
> len
)
48 /* Should've been handled in orangefs_invalidatepage. */
49 WARN_ON(off
== len
|| off
+ wlen
> len
);
53 bv
.bv_offset
= off
% PAGE_SIZE
;
55 iov_iter_bvec(&iter
, WRITE
, &bv
, 1, wlen
);
57 ret
= wait_for_direct_io(ORANGEFS_IO_WRITE
, inode
, &off
, &iter
, wlen
,
61 mapping_set_error(page
->mapping
, ret
);
65 kfree(detach_page_private(page
));
69 static int orangefs_writepage(struct page
*page
, struct writeback_control
*wbc
)
72 ret
= orangefs_writepage_locked(page
, wbc
);
74 end_page_writeback(page
);
78 struct orangefs_writepages
{
89 static int orangefs_writepages_work(struct orangefs_writepages
*ow
,
90 struct writeback_control
*wbc
)
92 struct inode
*inode
= ow
->pages
[0]->mapping
->host
;
93 struct orangefs_write_range
*wrp
, wr
;
100 len
= i_size_read(inode
);
102 for (i
= 0; i
< ow
->npages
; i
++) {
103 set_page_writeback(ow
->pages
[i
]);
104 ow
->bv
[i
].bv_page
= ow
->pages
[i
];
105 ow
->bv
[i
].bv_len
= min(page_offset(ow
->pages
[i
]) + PAGE_SIZE
,
107 max(ow
->off
, page_offset(ow
->pages
[i
]));
109 ow
->bv
[i
].bv_offset
= ow
->off
-
110 page_offset(ow
->pages
[i
]);
112 ow
->bv
[i
].bv_offset
= 0;
114 iov_iter_bvec(&iter
, WRITE
, ow
->bv
, ow
->npages
, ow
->len
);
116 WARN_ON(ow
->off
>= len
);
117 if (ow
->off
+ ow
->len
> len
)
118 ow
->len
= len
- ow
->off
;
123 ret
= wait_for_direct_io(ORANGEFS_IO_WRITE
, inode
, &off
, &iter
, ow
->len
,
126 for (i
= 0; i
< ow
->npages
; i
++) {
127 SetPageError(ow
->pages
[i
]);
128 mapping_set_error(ow
->pages
[i
]->mapping
, ret
);
129 if (PagePrivate(ow
->pages
[i
])) {
130 wrp
= (struct orangefs_write_range
*)
131 page_private(ow
->pages
[i
]);
132 ClearPagePrivate(ow
->pages
[i
]);
133 put_page(ow
->pages
[i
]);
136 end_page_writeback(ow
->pages
[i
]);
137 unlock_page(ow
->pages
[i
]);
141 for (i
= 0; i
< ow
->npages
; i
++) {
142 if (PagePrivate(ow
->pages
[i
])) {
143 wrp
= (struct orangefs_write_range
*)
144 page_private(ow
->pages
[i
]);
145 ClearPagePrivate(ow
->pages
[i
]);
146 put_page(ow
->pages
[i
]);
149 end_page_writeback(ow
->pages
[i
]);
150 unlock_page(ow
->pages
[i
]);
156 static int orangefs_writepages_callback(struct page
*page
,
157 struct writeback_control
*wbc
, void *data
)
159 struct orangefs_writepages
*ow
= data
;
160 struct orangefs_write_range
*wr
;
163 if (!PagePrivate(page
)) {
165 /* It's not private so there's nothing to write, right? */
166 printk("writepages_callback not private!\n");
170 wr
= (struct orangefs_write_range
*)page_private(page
);
173 if (ow
->npages
== 0) {
178 ow
->pages
[ow
->npages
++] = page
;
182 if (!uid_eq(ow
->uid
, wr
->uid
) || !gid_eq(ow
->gid
, wr
->gid
)) {
183 orangefs_writepages_work(ow
, wbc
);
188 if (ow
->off
+ ow
->len
== wr
->pos
) {
190 ow
->pages
[ow
->npages
++] = page
;
197 orangefs_writepages_work(ow
, wbc
);
200 ret
= orangefs_writepage_locked(page
, wbc
);
201 mapping_set_error(page
->mapping
, ret
);
203 end_page_writeback(page
);
205 if (ow
->npages
== ow
->maxpages
) {
206 orangefs_writepages_work(ow
, wbc
);
213 static int orangefs_writepages(struct address_space
*mapping
,
214 struct writeback_control
*wbc
)
216 struct orangefs_writepages
*ow
;
217 struct blk_plug plug
;
219 ow
= kzalloc(sizeof(struct orangefs_writepages
), GFP_KERNEL
);
222 ow
->maxpages
= orangefs_bufmap_size_query()/PAGE_SIZE
;
223 ow
->pages
= kcalloc(ow
->maxpages
, sizeof(struct page
*), GFP_KERNEL
);
228 ow
->bv
= kcalloc(ow
->maxpages
, sizeof(struct bio_vec
), GFP_KERNEL
);
234 blk_start_plug(&plug
);
235 ret
= write_cache_pages(mapping
, wbc
, orangefs_writepages_callback
, ow
);
237 ret
= orangefs_writepages_work(ow
, wbc
);
238 blk_finish_plug(&plug
);
245 static int orangefs_launder_page(struct page
*);
247 static int orangefs_readpage(struct file
*file
, struct page
*page
)
249 struct inode
*inode
= page
->mapping
->host
;
250 struct iov_iter iter
;
253 loff_t off
; /* offset into this page */
254 pgoff_t index
; /* which page */
255 struct page
*next_page
;
258 int buffer_index
= -1; /* orangefs shared memory slot */
259 int slot_index
; /* index into slot */
263 * Get up to this many bytes from Orangefs at a time and try
264 * to fill them into the page cache at once. Tests with dd made
265 * this seem like a reasonable static number, if there was
266 * interest perhaps this number could be made setable through
272 orangefs_launder_page(page
);
274 off
= page_offset(page
);
275 index
= off
>> PAGE_SHIFT
;
277 bv
.bv_len
= PAGE_SIZE
;
279 iov_iter_bvec(&iter
, READ
, &bv
, 1, PAGE_SIZE
);
281 ret
= wait_for_direct_io(ORANGEFS_IO_READ
, inode
, &off
, &iter
,
282 read_size
, inode
->i_size
, NULL
, &buffer_index
, file
);
284 /* this will only zero remaining unread portions of the page data */
285 iov_iter_zero(~0U, &iter
);
286 /* takes care of potential aliasing */
287 flush_dcache_page(page
);
293 SetPageUptodate(page
);
295 ClearPageError(page
);
298 /* unlock the page after the ->readpage() routine completes */
301 if (remaining
> PAGE_SIZE
) {
303 while ((remaining
- PAGE_SIZE
) >= PAGE_SIZE
) {
304 remaining
-= PAGE_SIZE
;
306 * It is an optimization to try and fill more than one
307 * page... by now we've already gotten the single
308 * page we were after, if stuff doesn't seem to
309 * be going our way at this point just return
310 * and hope for the best.
312 * If we look for pages and they're already there is
313 * one reason to give up, and if they're not there
314 * and we can't create them is another reason.
319 next_page
= find_get_page(inode
->i_mapping
, index
);
321 gossip_debug(GOSSIP_FILE_DEBUG
,
322 "%s: found next page, quitting\n",
327 next_page
= find_or_create_page(inode
->i_mapping
,
331 * I've never hit this, leave it as a printk for
332 * now so it will be obvious.
335 printk("%s: can't create next page, quitting\n",
339 kaddr
= kmap_atomic(next_page
);
340 orangefs_bufmap_page_fill(kaddr
,
343 kunmap_atomic(kaddr
);
344 SetPageUptodate(next_page
);
345 unlock_page(next_page
);
351 if (buffer_index
!= -1)
352 orangefs_bufmap_put(buffer_index
);
356 static int orangefs_write_begin(struct file
*file
,
357 struct address_space
*mapping
,
358 loff_t pos
, unsigned len
, unsigned flags
, struct page
**pagep
,
361 struct orangefs_write_range
*wr
;
366 index
= pos
>> PAGE_SHIFT
;
368 page
= grab_cache_page_write_begin(mapping
, index
, flags
);
374 if (PageDirty(page
) && !PagePrivate(page
)) {
376 * Should be impossible. If it happens, launder the page
377 * since we don't know what's dirty. This will WARN in
378 * orangefs_writepage_locked.
380 ret
= orangefs_launder_page(page
);
384 if (PagePrivate(page
)) {
385 struct orangefs_write_range
*wr
;
386 wr
= (struct orangefs_write_range
*)page_private(page
);
387 if (wr
->pos
+ wr
->len
== pos
&&
388 uid_eq(wr
->uid
, current_fsuid()) &&
389 gid_eq(wr
->gid
, current_fsgid())) {
393 ret
= orangefs_launder_page(page
);
399 wr
= kmalloc(sizeof *wr
, GFP_KERNEL
);
405 wr
->uid
= current_fsuid();
406 wr
->gid
= current_fsgid();
407 attach_page_private(page
, wr
);
412 static int orangefs_write_end(struct file
*file
, struct address_space
*mapping
,
413 loff_t pos
, unsigned len
, unsigned copied
, struct page
*page
, void *fsdata
)
415 struct inode
*inode
= page
->mapping
->host
;
416 loff_t last_pos
= pos
+ copied
;
419 * No need to use i_size_read() here, the i_size
420 * cannot change under us because we hold the i_mutex.
422 if (last_pos
> inode
->i_size
)
423 i_size_write(inode
, last_pos
);
425 /* zero the stale part of the page if we did a short copy */
426 if (!PageUptodate(page
)) {
427 unsigned from
= pos
& (PAGE_SIZE
- 1);
429 zero_user(page
, from
+ copied
, len
- copied
);
431 /* Set fully written pages uptodate. */
432 if (pos
== page_offset(page
) &&
433 (len
== PAGE_SIZE
|| pos
+ len
== inode
->i_size
)) {
434 zero_user_segment(page
, from
+ copied
, PAGE_SIZE
);
435 SetPageUptodate(page
);
439 set_page_dirty(page
);
443 mark_inode_dirty_sync(file_inode(file
));
447 static void orangefs_invalidatepage(struct page
*page
,
451 struct orangefs_write_range
*wr
;
452 wr
= (struct orangefs_write_range
*)page_private(page
);
454 if (offset
== 0 && length
== PAGE_SIZE
) {
455 kfree(detach_page_private(page
));
457 /* write range entirely within invalidate range (or equal) */
458 } else if (page_offset(page
) + offset
<= wr
->pos
&&
459 wr
->pos
+ wr
->len
<= page_offset(page
) + offset
+ length
) {
460 kfree(detach_page_private(page
));
461 /* XXX is this right? only caller in fs */
462 cancel_dirty_page(page
);
464 /* invalidate range chops off end of write range */
465 } else if (wr
->pos
< page_offset(page
) + offset
&&
466 wr
->pos
+ wr
->len
<= page_offset(page
) + offset
+ length
&&
467 page_offset(page
) + offset
< wr
->pos
+ wr
->len
) {
469 x
= wr
->pos
+ wr
->len
- (page_offset(page
) + offset
);
470 WARN_ON(x
> wr
->len
);
472 wr
->uid
= current_fsuid();
473 wr
->gid
= current_fsgid();
474 /* invalidate range chops off beginning of write range */
475 } else if (page_offset(page
) + offset
<= wr
->pos
&&
476 page_offset(page
) + offset
+ length
< wr
->pos
+ wr
->len
&&
477 wr
->pos
< page_offset(page
) + offset
+ length
) {
479 x
= page_offset(page
) + offset
+ length
- wr
->pos
;
480 WARN_ON(x
> wr
->len
);
483 wr
->uid
= current_fsuid();
484 wr
->gid
= current_fsgid();
485 /* invalidate range entirely within write range (punch hole) */
486 } else if (wr
->pos
< page_offset(page
) + offset
&&
487 page_offset(page
) + offset
+ length
< wr
->pos
+ wr
->len
) {
488 /* XXX what do we do here... should not WARN_ON */
492 * should we just ignore this and write it out anyway?
493 * it hardly makes sense
496 /* non-overlapping ranges */
498 /* WARN if they do overlap */
499 if (!((page_offset(page
) + offset
+ length
<= wr
->pos
) ^
500 (wr
->pos
+ wr
->len
<= page_offset(page
) + offset
))) {
502 printk("invalidate range offset %llu length %u\n",
503 page_offset(page
) + offset
, length
);
504 printk("write range offset %llu length %zu\n",
511 * Above there are returns where wr is freed or where we WARN.
512 * Thus the following runs if wr was modified above.
515 orangefs_launder_page(page
);
518 static int orangefs_releasepage(struct page
*page
, gfp_t foo
)
520 return !PagePrivate(page
);
523 static void orangefs_freepage(struct page
*page
)
525 kfree(detach_page_private(page
));
528 static int orangefs_launder_page(struct page
*page
)
531 struct writeback_control wbc
= {
532 .sync_mode
= WB_SYNC_ALL
,
535 wait_on_page_writeback(page
);
536 if (clear_page_dirty_for_io(page
)) {
537 r
= orangefs_writepage_locked(page
, &wbc
);
538 end_page_writeback(page
);
543 static ssize_t
orangefs_direct_IO(struct kiocb
*iocb
,
544 struct iov_iter
*iter
)
547 * Comment from original do_readv_writev:
548 * Common entry point for read/write/readv/writev
549 * This function will dispatch it to either the direct I/O
550 * or buffered I/O path depending on the mount options and/or
551 * augmented/extended metadata attached to the file.
552 * Note: File extended attributes override any mount options.
554 struct file
*file
= iocb
->ki_filp
;
555 loff_t pos
= iocb
->ki_pos
;
556 enum ORANGEFS_io_type type
= iov_iter_rw(iter
) == WRITE
?
557 ORANGEFS_IO_WRITE
: ORANGEFS_IO_READ
;
558 loff_t
*offset
= &pos
;
559 struct inode
*inode
= file
->f_mapping
->host
;
560 struct orangefs_inode_s
*orangefs_inode
= ORANGEFS_I(inode
);
561 struct orangefs_khandle
*handle
= &orangefs_inode
->refn
.khandle
;
562 size_t count
= iov_iter_count(iter
);
563 ssize_t total_count
= 0;
564 ssize_t ret
= -EINVAL
;
567 gossip_debug(GOSSIP_FILE_DEBUG
,
568 "%s-BEGIN(%pU): count(%d) after estimate_max_iovecs.\n",
573 if (type
== ORANGEFS_IO_WRITE
) {
574 gossip_debug(GOSSIP_FILE_DEBUG
,
575 "%s(%pU): proceeding with offset : %llu, "
588 while (iov_iter_count(iter
)) {
589 size_t each_count
= iov_iter_count(iter
);
593 /* how much to transfer in this loop iteration */
594 if (each_count
> orangefs_bufmap_size_query())
595 each_count
= orangefs_bufmap_size_query();
597 gossip_debug(GOSSIP_FILE_DEBUG
,
598 "%s(%pU): size of each_count(%d)\n",
602 gossip_debug(GOSSIP_FILE_DEBUG
,
603 "%s(%pU): BEFORE wait_for_io: offset is %d\n",
608 ret
= wait_for_direct_io(type
, inode
, offset
, iter
,
609 each_count
, 0, NULL
, NULL
, file
);
610 gossip_debug(GOSSIP_FILE_DEBUG
,
611 "%s(%pU): return from wait_for_io:%d\n",
623 gossip_debug(GOSSIP_FILE_DEBUG
,
624 "%s(%pU): AFTER wait_for_io: offset is %d\n",
630 * if we got a short I/O operations,
631 * fall out and return what we got so far
633 if (amt_complete
< each_count
)
641 if (type
== ORANGEFS_IO_READ
) {
644 file_update_time(file
);
645 if (*offset
> i_size_read(inode
))
646 i_size_write(inode
, *offset
);
650 gossip_debug(GOSSIP_FILE_DEBUG
,
651 "%s(%pU): Value(%d) returned.\n",
659 /** ORANGEFS2 implementation of address space operations */
660 static const struct address_space_operations orangefs_address_operations
= {
661 .writepage
= orangefs_writepage
,
662 .readpage
= orangefs_readpage
,
663 .writepages
= orangefs_writepages
,
664 .set_page_dirty
= __set_page_dirty_nobuffers
,
665 .write_begin
= orangefs_write_begin
,
666 .write_end
= orangefs_write_end
,
667 .invalidatepage
= orangefs_invalidatepage
,
668 .releasepage
= orangefs_releasepage
,
669 .freepage
= orangefs_freepage
,
670 .launder_page
= orangefs_launder_page
,
671 .direct_IO
= orangefs_direct_IO
,
674 vm_fault_t
orangefs_page_mkwrite(struct vm_fault
*vmf
)
676 struct page
*page
= vmf
->page
;
677 struct inode
*inode
= file_inode(vmf
->vma
->vm_file
);
678 struct orangefs_inode_s
*orangefs_inode
= ORANGEFS_I(inode
);
679 unsigned long *bitlock
= &orangefs_inode
->bitlock
;
681 struct orangefs_write_range
*wr
;
683 sb_start_pagefault(inode
->i_sb
);
685 if (wait_on_bit(bitlock
, 1, TASK_KILLABLE
)) {
686 ret
= VM_FAULT_RETRY
;
691 if (PageDirty(page
) && !PagePrivate(page
)) {
693 * Should be impossible. If it happens, launder the page
694 * since we don't know what's dirty. This will WARN in
695 * orangefs_writepage_locked.
697 if (orangefs_launder_page(page
)) {
698 ret
= VM_FAULT_LOCKED
|VM_FAULT_RETRY
;
702 if (PagePrivate(page
)) {
703 wr
= (struct orangefs_write_range
*)page_private(page
);
704 if (uid_eq(wr
->uid
, current_fsuid()) &&
705 gid_eq(wr
->gid
, current_fsgid())) {
706 wr
->pos
= page_offset(page
);
710 if (orangefs_launder_page(page
)) {
711 ret
= VM_FAULT_LOCKED
|VM_FAULT_RETRY
;
716 wr
= kmalloc(sizeof *wr
, GFP_KERNEL
);
718 ret
= VM_FAULT_LOCKED
|VM_FAULT_RETRY
;
721 wr
->pos
= page_offset(page
);
723 wr
->uid
= current_fsuid();
724 wr
->gid
= current_fsgid();
725 attach_page_private(page
, wr
);
728 file_update_time(vmf
->vma
->vm_file
);
729 if (page
->mapping
!= inode
->i_mapping
) {
731 ret
= VM_FAULT_LOCKED
|VM_FAULT_NOPAGE
;
736 * We mark the page dirty already here so that when freeze is in
737 * progress, we are guaranteed that writeback during freezing will
738 * see the dirty page and writeprotect it again.
740 set_page_dirty(page
);
741 wait_for_stable_page(page
);
742 ret
= VM_FAULT_LOCKED
;
744 sb_end_pagefault(inode
->i_sb
);
748 static int orangefs_setattr_size(struct inode
*inode
, struct iattr
*iattr
)
750 struct orangefs_inode_s
*orangefs_inode
= ORANGEFS_I(inode
);
751 struct orangefs_kernel_op_s
*new_op
;
755 gossip_debug(GOSSIP_INODE_DEBUG
,
756 "%s: %pU: Handle is %pU | fs_id %d | size is %llu\n",
758 get_khandle_from_ino(inode
),
759 &orangefs_inode
->refn
.khandle
,
760 orangefs_inode
->refn
.fs_id
,
763 /* Ensure that we have a up to date size, so we know if it changed. */
764 ret
= orangefs_inode_getattr(inode
, ORANGEFS_GETATTR_SIZE
);
768 gossip_err("%s: orangefs_inode_getattr failed, ret:%d:.\n",
772 orig_size
= i_size_read(inode
);
774 /* This is truncate_setsize in a different order. */
775 truncate_pagecache(inode
, iattr
->ia_size
);
776 i_size_write(inode
, iattr
->ia_size
);
777 if (iattr
->ia_size
> orig_size
)
778 pagecache_isize_extended(inode
, orig_size
, iattr
->ia_size
);
780 new_op
= op_alloc(ORANGEFS_VFS_OP_TRUNCATE
);
784 new_op
->upcall
.req
.truncate
.refn
= orangefs_inode
->refn
;
785 new_op
->upcall
.req
.truncate
.size
= (__s64
) iattr
->ia_size
;
787 ret
= service_operation(new_op
,
789 get_interruptible_flag(inode
));
792 * the truncate has no downcall members to retrieve, but
793 * the status value tells us if it went through ok or not
795 gossip_debug(GOSSIP_INODE_DEBUG
, "%s: ret:%d:\n", __func__
, ret
);
802 if (orig_size
!= i_size_read(inode
))
803 iattr
->ia_valid
|= ATTR_CTIME
| ATTR_MTIME
;
808 int __orangefs_setattr(struct inode
*inode
, struct iattr
*iattr
)
812 if (iattr
->ia_valid
& ATTR_MODE
) {
813 if (iattr
->ia_mode
& (S_ISVTX
)) {
814 if (is_root_handle(inode
)) {
816 * allow sticky bit to be set on root (since
817 * it shows up that way by default anyhow),
818 * but don't show it to the server
820 iattr
->ia_mode
-= S_ISVTX
;
822 gossip_debug(GOSSIP_UTILS_DEBUG
,
823 "User attempted to set sticky bit on non-root directory; returning EINVAL.\n");
828 if (iattr
->ia_mode
& (S_ISUID
)) {
829 gossip_debug(GOSSIP_UTILS_DEBUG
,
830 "Attempting to set setuid bit (not supported); returning EINVAL.\n");
836 if (iattr
->ia_valid
& ATTR_SIZE
) {
837 ret
= orangefs_setattr_size(inode
, iattr
);
843 spin_lock(&inode
->i_lock
);
844 if (ORANGEFS_I(inode
)->attr_valid
) {
845 if (uid_eq(ORANGEFS_I(inode
)->attr_uid
, current_fsuid()) &&
846 gid_eq(ORANGEFS_I(inode
)->attr_gid
, current_fsgid())) {
847 ORANGEFS_I(inode
)->attr_valid
= iattr
->ia_valid
;
849 spin_unlock(&inode
->i_lock
);
850 write_inode_now(inode
, 1);
854 ORANGEFS_I(inode
)->attr_valid
= iattr
->ia_valid
;
855 ORANGEFS_I(inode
)->attr_uid
= current_fsuid();
856 ORANGEFS_I(inode
)->attr_gid
= current_fsgid();
858 setattr_copy(inode
, iattr
);
859 spin_unlock(&inode
->i_lock
);
860 mark_inode_dirty(inode
);
862 if (iattr
->ia_valid
& ATTR_MODE
)
863 /* change mod on a file that has ACLs */
864 ret
= posix_acl_chmod(inode
, inode
->i_mode
);
872 * Change attributes of an object referenced by dentry.
874 int orangefs_setattr(struct dentry
*dentry
, struct iattr
*iattr
)
877 gossip_debug(GOSSIP_INODE_DEBUG
, "__orangefs_setattr: called on %pd\n",
879 ret
= setattr_prepare(dentry
, iattr
);
882 ret
= __orangefs_setattr(d_inode(dentry
), iattr
);
883 sync_inode_metadata(d_inode(dentry
), 1);
885 gossip_debug(GOSSIP_INODE_DEBUG
, "orangefs_setattr: returning %d\n",
891 * Obtain attributes of an object given a dentry
893 int orangefs_getattr(const struct path
*path
, struct kstat
*stat
,
894 u32 request_mask
, unsigned int flags
)
897 struct inode
*inode
= path
->dentry
->d_inode
;
899 gossip_debug(GOSSIP_INODE_DEBUG
,
900 "orangefs_getattr: called on %pd mask %u\n",
901 path
->dentry
, request_mask
);
903 ret
= orangefs_inode_getattr(inode
,
904 request_mask
& STATX_SIZE
? ORANGEFS_GETATTR_SIZE
: 0);
906 generic_fillattr(inode
, stat
);
908 /* override block size reported to stat */
909 if (!(request_mask
& STATX_SIZE
))
910 stat
->result_mask
&= ~STATX_SIZE
;
912 stat
->attributes_mask
= STATX_ATTR_IMMUTABLE
|
914 if (inode
->i_flags
& S_IMMUTABLE
)
915 stat
->attributes
|= STATX_ATTR_IMMUTABLE
;
916 if (inode
->i_flags
& S_APPEND
)
917 stat
->attributes
|= STATX_ATTR_APPEND
;
922 int orangefs_permission(struct inode
*inode
, int mask
)
926 if (mask
& MAY_NOT_BLOCK
)
929 gossip_debug(GOSSIP_INODE_DEBUG
, "%s: refreshing\n", __func__
);
931 /* Make sure the permission (and other common attrs) are up to date. */
932 ret
= orangefs_inode_getattr(inode
, 0);
936 return generic_permission(inode
, mask
);
939 int orangefs_update_time(struct inode
*inode
, struct timespec64
*time
, int flags
)
942 gossip_debug(GOSSIP_INODE_DEBUG
, "orangefs_update_time: %pU\n",
943 get_khandle_from_ino(inode
));
944 generic_update_time(inode
, time
, flags
);
945 memset(&iattr
, 0, sizeof iattr
);
947 iattr
.ia_valid
|= ATTR_ATIME
;
949 iattr
.ia_valid
|= ATTR_CTIME
;
951 iattr
.ia_valid
|= ATTR_MTIME
;
952 return __orangefs_setattr(inode
, &iattr
);
955 /* ORANGEFS2 implementation of VFS inode operations for files */
956 static const struct inode_operations orangefs_file_inode_operations
= {
957 .get_acl
= orangefs_get_acl
,
958 .set_acl
= orangefs_set_acl
,
959 .setattr
= orangefs_setattr
,
960 .getattr
= orangefs_getattr
,
961 .listxattr
= orangefs_listxattr
,
962 .permission
= orangefs_permission
,
963 .update_time
= orangefs_update_time
,
966 static int orangefs_init_iops(struct inode
*inode
)
968 inode
->i_mapping
->a_ops
= &orangefs_address_operations
;
970 switch (inode
->i_mode
& S_IFMT
) {
972 inode
->i_op
= &orangefs_file_inode_operations
;
973 inode
->i_fop
= &orangefs_file_operations
;
976 inode
->i_op
= &orangefs_symlink_inode_operations
;
979 inode
->i_op
= &orangefs_dir_inode_operations
;
980 inode
->i_fop
= &orangefs_dir_operations
;
983 gossip_debug(GOSSIP_INODE_DEBUG
,
984 "%s: unsupported mode\n",
993 * Given an ORANGEFS object identifier (fsid, handle), convert it into
994 * a ino_t type that will be used as a hash-index from where the handle will
995 * be searched for in the VFS hash table of inodes.
997 static inline ino_t
orangefs_handle_hash(struct orangefs_object_kref
*ref
)
1001 return orangefs_khandle_to_ino(&(ref
->khandle
));
1005 * Called to set up an inode from iget5_locked.
1007 static int orangefs_set_inode(struct inode
*inode
, void *data
)
1009 struct orangefs_object_kref
*ref
= (struct orangefs_object_kref
*) data
;
1010 ORANGEFS_I(inode
)->refn
.fs_id
= ref
->fs_id
;
1011 ORANGEFS_I(inode
)->refn
.khandle
= ref
->khandle
;
1012 ORANGEFS_I(inode
)->attr_valid
= 0;
1013 hash_init(ORANGEFS_I(inode
)->xattr_cache
);
1014 ORANGEFS_I(inode
)->mapping_time
= jiffies
- 1;
1015 ORANGEFS_I(inode
)->bitlock
= 0;
1020 * Called to determine if handles match.
1022 static int orangefs_test_inode(struct inode
*inode
, void *data
)
1024 struct orangefs_object_kref
*ref
= (struct orangefs_object_kref
*) data
;
1025 struct orangefs_inode_s
*orangefs_inode
= NULL
;
1027 orangefs_inode
= ORANGEFS_I(inode
);
1028 /* test handles and fs_ids... */
1029 return (!ORANGEFS_khandle_cmp(&(orangefs_inode
->refn
.khandle
),
1031 orangefs_inode
->refn
.fs_id
== ref
->fs_id
);
1035 * Front-end to lookup the inode-cache maintained by the VFS using the ORANGEFS
1038 * @sb: the file system super block instance.
1039 * @ref: The ORANGEFS object for which we are trying to locate an inode.
1041 struct inode
*orangefs_iget(struct super_block
*sb
,
1042 struct orangefs_object_kref
*ref
)
1044 struct inode
*inode
= NULL
;
1048 hash
= orangefs_handle_hash(ref
);
1049 inode
= iget5_locked(sb
,
1051 orangefs_test_inode
,
1056 return ERR_PTR(-ENOMEM
);
1058 if (!(inode
->i_state
& I_NEW
))
1061 error
= orangefs_inode_getattr(inode
, ORANGEFS_GETATTR_NEW
);
1064 return ERR_PTR(error
);
1067 inode
->i_ino
= hash
; /* needed for stat etc */
1068 orangefs_init_iops(inode
);
1069 unlock_new_inode(inode
);
1071 gossip_debug(GOSSIP_INODE_DEBUG
,
1072 "iget handle %pU, fsid %d hash %ld i_ino %lu\n",
1082 * Allocate an inode for a newly created file and insert it into the inode hash.
1084 struct inode
*orangefs_new_inode(struct super_block
*sb
, struct inode
*dir
,
1085 int mode
, dev_t dev
, struct orangefs_object_kref
*ref
)
1087 unsigned long hash
= orangefs_handle_hash(ref
);
1088 struct inode
*inode
;
1091 gossip_debug(GOSSIP_INODE_DEBUG
,
1092 "%s:(sb is %p | MAJOR(dev)=%u | MINOR(dev)=%u mode=%o)\n",
1099 inode
= new_inode(sb
);
1101 return ERR_PTR(-ENOMEM
);
1103 orangefs_set_inode(inode
, ref
);
1104 inode
->i_ino
= hash
; /* needed for stat etc */
1106 error
= orangefs_inode_getattr(inode
, ORANGEFS_GETATTR_NEW
);
1110 orangefs_init_iops(inode
);
1111 inode
->i_rdev
= dev
;
1113 error
= insert_inode_locked4(inode
, hash
, orangefs_test_inode
, ref
);
1117 gossip_debug(GOSSIP_INODE_DEBUG
,
1118 "Initializing ACL's for inode %pU\n",
1119 get_khandle_from_ino(inode
));
1120 orangefs_init_acl(inode
, dir
);
1125 return ERR_PTR(error
);