4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.gnu.org/licenses/gpl-2.0.html
23 * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Use is subject to license terms.
26 * Copyright (c) 2011, 2015, Intel Corporation.
29 * This file is part of Lustre, http://www.lustre.org/
30 * Lustre is a trademark of Sun Microsystems, Inc.
34 * Author: Peter Braam <braam@clusterfs.com>
35 * Author: Phil Schwan <phil@clusterfs.com>
36 * Author: Andreas Dilger <adilger@clusterfs.com>
39 #define DEBUG_SUBSYSTEM S_LLITE
40 #include "../include/lustre_dlm.h"
41 #include <linux/pagemap.h>
42 #include <linux/file.h>
43 #include <linux/sched.h>
44 #include <linux/mount.h>
45 #include "../include/lustre/ll_fiemap.h"
46 #include "../include/lustre/lustre_ioctl.h"
48 #include "../include/cl_object.h"
49 #include "llite_internal.h"
52 ll_put_grouplock(struct inode
*inode
, struct file
*file
, unsigned long arg
);
54 static int ll_lease_close(struct obd_client_handle
*och
, struct inode
*inode
,
57 static enum llioc_iter
58 ll_iocontrol_call(struct inode
*inode
, struct file
*file
,
59 unsigned int cmd
, unsigned long arg
, int *rcp
);
61 static struct ll_file_data
*ll_file_data_get(void)
63 struct ll_file_data
*fd
;
65 fd
= kmem_cache_zalloc(ll_file_data_slab
, GFP_NOFS
);
68 fd
->fd_write_failed
= false;
72 static void ll_file_data_put(struct ll_file_data
*fd
)
75 kmem_cache_free(ll_file_data_slab
, fd
);
78 void ll_pack_inode2opdata(struct inode
*inode
, struct md_op_data
*op_data
,
79 struct lustre_handle
*fh
)
81 op_data
->op_fid1
= ll_i2info(inode
)->lli_fid
;
82 op_data
->op_attr
.ia_mode
= inode
->i_mode
;
83 op_data
->op_attr
.ia_atime
= inode
->i_atime
;
84 op_data
->op_attr
.ia_mtime
= inode
->i_mtime
;
85 op_data
->op_attr
.ia_ctime
= inode
->i_ctime
;
86 op_data
->op_attr
.ia_size
= i_size_read(inode
);
87 op_data
->op_attr_blocks
= inode
->i_blocks
;
88 op_data
->op_attr_flags
= ll_inode_to_ext_flags(inode
->i_flags
);
89 op_data
->op_ioepoch
= ll_i2info(inode
)->lli_ioepoch
;
91 op_data
->op_handle
= *fh
;
93 if (ll_i2info(inode
)->lli_flags
& LLIF_DATA_MODIFIED
)
94 op_data
->op_bias
|= MDS_DATA_MODIFIED
;
98 * Closes the IO epoch and packs all the attributes into @op_data for
101 static void ll_prepare_close(struct inode
*inode
, struct md_op_data
*op_data
,
102 struct obd_client_handle
*och
)
104 op_data
->op_attr
.ia_valid
= ATTR_MODE
| ATTR_ATIME
| ATTR_ATIME_SET
|
105 ATTR_MTIME
| ATTR_MTIME_SET
|
106 ATTR_CTIME
| ATTR_CTIME_SET
;
108 if (!(och
->och_flags
& FMODE_WRITE
))
111 if (!exp_connect_som(ll_i2mdexp(inode
)) || !S_ISREG(inode
->i_mode
))
112 op_data
->op_attr
.ia_valid
|= ATTR_SIZE
| ATTR_BLOCKS
;
114 ll_ioepoch_close(inode
, op_data
, &och
, 0);
117 ll_pack_inode2opdata(inode
, op_data
, &och
->och_fh
);
118 ll_prep_md_op_data(op_data
, inode
, NULL
, NULL
,
119 0, 0, LUSTRE_OPC_ANY
, NULL
);
122 static int ll_close_inode_openhandle(struct obd_export
*md_exp
,
124 struct obd_client_handle
*och
,
125 const __u64
*data_version
)
127 struct obd_export
*exp
= ll_i2mdexp(inode
);
128 struct md_op_data
*op_data
;
129 struct ptlrpc_request
*req
= NULL
;
130 struct obd_device
*obd
= class_exp2obd(exp
);
136 * XXX: in case of LMV, is this correct to access
139 CERROR("Invalid MDC connection handle %#llx\n",
140 ll_i2mdexp(inode
)->exp_handle
.h_cookie
);
145 op_data
= kzalloc(sizeof(*op_data
), GFP_NOFS
);
147 /* XXX We leak openhandle and request here. */
152 ll_prepare_close(inode
, op_data
, och
);
154 /* Pass in data_version implies release. */
155 op_data
->op_bias
|= MDS_HSM_RELEASE
;
156 op_data
->op_data_version
= *data_version
;
157 op_data
->op_lease_handle
= och
->och_lease_handle
;
158 op_data
->op_attr
.ia_valid
|= ATTR_SIZE
| ATTR_BLOCKS
;
160 epoch_close
= op_data
->op_flags
& MF_EPOCH_CLOSE
;
161 rc
= md_close(md_exp
, op_data
, och
->och_mod
, &req
);
163 /* This close must have the epoch closed. */
164 LASSERT(epoch_close
);
165 /* MDS has instructed us to obtain Size-on-MDS attribute from
166 * OSTs and send setattr to back to MDS.
168 rc
= ll_som_update(inode
, op_data
);
170 CERROR("%s: inode "DFID
" mdc Size-on-MDS update failed: rc = %d\n",
171 ll_i2mdexp(inode
)->exp_obd
->obd_name
,
172 PFID(ll_inode2fid(inode
)), rc
);
176 CERROR("%s: inode "DFID
" mdc close failed: rc = %d\n",
177 ll_i2mdexp(inode
)->exp_obd
->obd_name
,
178 PFID(ll_inode2fid(inode
)), rc
);
181 /* DATA_MODIFIED flag was successfully sent on close, cancel data
184 if (rc
== 0 && (op_data
->op_bias
& MDS_DATA_MODIFIED
)) {
185 struct ll_inode_info
*lli
= ll_i2info(inode
);
187 spin_lock(&lli
->lli_lock
);
188 lli
->lli_flags
&= ~LLIF_DATA_MODIFIED
;
189 spin_unlock(&lli
->lli_lock
);
192 if (rc
== 0 && op_data
->op_bias
& MDS_HSM_RELEASE
) {
193 struct mdt_body
*body
;
195 body
= req_capsule_server_get(&req
->rq_pill
, &RMF_MDT_BODY
);
196 if (!(body
->mbo_valid
& OBD_MD_FLRELEASED
))
200 ll_finish_md_op_data(op_data
);
203 if (exp_connect_som(exp
) && !epoch_close
&&
204 S_ISREG(inode
->i_mode
) && (och
->och_flags
& FMODE_WRITE
)) {
205 ll_queue_done_writing(inode
, LLIF_DONE_WRITING
);
207 md_clear_open_replay_data(md_exp
, och
);
208 /* Free @och if it is not waiting for DONE_WRITING. */
209 och
->och_fh
.cookie
= DEAD_HANDLE_MAGIC
;
212 if (req
) /* This is close request */
213 ptlrpc_req_finished(req
);
217 int ll_md_real_close(struct inode
*inode
, fmode_t fmode
)
219 struct ll_inode_info
*lli
= ll_i2info(inode
);
220 struct obd_client_handle
**och_p
;
221 struct obd_client_handle
*och
;
225 if (fmode
& FMODE_WRITE
) {
226 och_p
= &lli
->lli_mds_write_och
;
227 och_usecount
= &lli
->lli_open_fd_write_count
;
228 } else if (fmode
& FMODE_EXEC
) {
229 och_p
= &lli
->lli_mds_exec_och
;
230 och_usecount
= &lli
->lli_open_fd_exec_count
;
232 LASSERT(fmode
& FMODE_READ
);
233 och_p
= &lli
->lli_mds_read_och
;
234 och_usecount
= &lli
->lli_open_fd_read_count
;
237 mutex_lock(&lli
->lli_och_mutex
);
238 if (*och_usecount
> 0) {
239 /* There are still users of this handle, so skip
242 mutex_unlock(&lli
->lli_och_mutex
);
248 mutex_unlock(&lli
->lli_och_mutex
);
251 /* There might be a race and this handle may already
254 rc
= ll_close_inode_openhandle(ll_i2sbi(inode
)->ll_md_exp
,
261 static int ll_md_close(struct obd_export
*md_exp
, struct inode
*inode
,
264 struct ll_file_data
*fd
= LUSTRE_FPRIVATE(file
);
265 struct ll_inode_info
*lli
= ll_i2info(inode
);
267 __u64 flags
= LDLM_FL_BLOCK_GRANTED
| LDLM_FL_TEST_LOCK
;
268 struct lustre_handle lockh
;
269 ldlm_policy_data_t policy
= {.l_inodebits
= {MDS_INODELOCK_OPEN
} };
272 /* clear group lock, if present */
273 if (unlikely(fd
->fd_flags
& LL_FILE_GROUP_LOCKED
))
274 ll_put_grouplock(inode
, file
, fd
->fd_grouplock
.lg_gid
);
276 if (fd
->fd_lease_och
) {
279 /* Usually the lease is not released when the
280 * application crashed, we need to release here.
282 rc
= ll_lease_close(fd
->fd_lease_och
, inode
, &lease_broken
);
283 CDEBUG(rc
? D_ERROR
: D_INODE
,
284 "Clean up lease " DFID
" %d/%d\n",
285 PFID(&lli
->lli_fid
), rc
, lease_broken
);
287 fd
->fd_lease_och
= NULL
;
291 rc
= ll_close_inode_openhandle(md_exp
, inode
, fd
->fd_och
, NULL
);
296 /* Let's see if we have good enough OPEN lock on the file and if
297 * we can skip talking to MDS
300 mutex_lock(&lli
->lli_och_mutex
);
301 if (fd
->fd_omode
& FMODE_WRITE
) {
303 LASSERT(lli
->lli_open_fd_write_count
);
304 lli
->lli_open_fd_write_count
--;
305 } else if (fd
->fd_omode
& FMODE_EXEC
) {
307 LASSERT(lli
->lli_open_fd_exec_count
);
308 lli
->lli_open_fd_exec_count
--;
311 LASSERT(lli
->lli_open_fd_read_count
);
312 lli
->lli_open_fd_read_count
--;
314 mutex_unlock(&lli
->lli_och_mutex
);
316 if (!md_lock_match(md_exp
, flags
, ll_inode2fid(inode
),
317 LDLM_IBITS
, &policy
, lockmode
, &lockh
))
318 rc
= ll_md_real_close(inode
, fd
->fd_omode
);
321 LUSTRE_FPRIVATE(file
) = NULL
;
322 ll_file_data_put(fd
);
327 /* While this returns an error code, fput() the caller does not, so we need
328 * to make every effort to clean up all of our state here. Also, applications
329 * rarely check close errors and even if an error is returned they will not
330 * re-try the close call.
332 int ll_file_release(struct inode
*inode
, struct file
*file
)
334 struct ll_file_data
*fd
;
335 struct ll_sb_info
*sbi
= ll_i2sbi(inode
);
336 struct ll_inode_info
*lli
= ll_i2info(inode
);
339 CDEBUG(D_VFSTRACE
, "VFS Op:inode="DFID
"(%p)\n",
340 PFID(ll_inode2fid(inode
)), inode
);
342 if (!is_root_inode(inode
))
343 ll_stats_ops_tally(sbi
, LPROC_LL_RELEASE
, 1);
344 fd
= LUSTRE_FPRIVATE(file
);
347 /* The last ref on @file, maybe not be the owner pid of statahead,
348 * because parent and child process can share the same file handle.
350 if (S_ISDIR(inode
->i_mode
) && lli
->lli_opendir_key
== fd
)
351 ll_deauthorize_statahead(inode
, fd
);
353 if (is_root_inode(inode
)) {
354 LUSTRE_FPRIVATE(file
) = NULL
;
355 ll_file_data_put(fd
);
359 if (!S_ISDIR(inode
->i_mode
)) {
361 lov_read_and_clear_async_rc(lli
->lli_clob
);
362 lli
->lli_async_rc
= 0;
365 rc
= ll_md_close(sbi
->ll_md_exp
, inode
, file
);
367 if (CFS_FAIL_TIMEOUT_MS(OBD_FAIL_PTLRPC_DUMP_LOG
, cfs_fail_val
))
368 libcfs_debug_dumplog();
373 static int ll_intent_file_open(struct dentry
*de
, void *lmm
, int lmmsize
,
374 struct lookup_intent
*itp
)
376 struct inode
*inode
= d_inode(de
);
377 struct ll_sb_info
*sbi
= ll_i2sbi(inode
);
378 struct dentry
*parent
= de
->d_parent
;
379 const char *name
= NULL
;
380 struct md_op_data
*op_data
;
381 struct ptlrpc_request
*req
= NULL
;
385 LASSERT(itp
->it_flags
& MDS_OPEN_BY_FID
);
388 * if server supports open-by-fid, or file name is invalid, don't pack
389 * name in open request
391 if (!(exp_connect_flags(sbi
->ll_md_exp
) & OBD_CONNECT_OPEN_BY_FID
) &&
392 lu_name_is_valid_2(de
->d_name
.name
, de
->d_name
.len
)) {
393 name
= de
->d_name
.name
;
394 len
= de
->d_name
.len
;
397 op_data
= ll_prep_md_op_data(NULL
, d_inode(parent
), inode
, name
, len
,
398 O_RDWR
, LUSTRE_OPC_ANY
, NULL
);
400 return PTR_ERR(op_data
);
401 op_data
->op_data
= lmm
;
402 op_data
->op_data_size
= lmmsize
;
404 rc
= md_intent_lock(sbi
->ll_md_exp
, op_data
, itp
, &req
,
405 &ll_md_blocking_ast
, 0);
406 ll_finish_md_op_data(op_data
);
408 /* reason for keep own exit path - don`t flood log
409 * with messages with -ESTALE errors.
411 if (!it_disposition(itp
, DISP_OPEN_OPEN
) ||
412 it_open_error(DISP_OPEN_OPEN
, itp
))
414 ll_release_openhandle(inode
, itp
);
418 if (it_disposition(itp
, DISP_LOOKUP_NEG
)) {
423 if (rc
!= 0 || it_open_error(DISP_OPEN_OPEN
, itp
)) {
424 rc
= rc
? rc
: it_open_error(DISP_OPEN_OPEN
, itp
);
425 CDEBUG(D_VFSTRACE
, "lock enqueue: err: %d\n", rc
);
429 rc
= ll_prep_inode(&inode
, req
, NULL
, itp
);
430 if (!rc
&& itp
->it_lock_mode
)
431 ll_set_lock_data(sbi
->ll_md_exp
, inode
, itp
, NULL
);
434 ptlrpc_req_finished(req
);
435 ll_intent_drop_lock(itp
);
441 * Assign an obtained @ioepoch to client's inode. No lock is needed, MDS does
442 * not believe attributes if a few ioepoch holders exist. Attributes for
443 * previous ioepoch if new one is opened are also skipped by MDS.
445 void ll_ioepoch_open(struct ll_inode_info
*lli
, __u64 ioepoch
)
447 if (ioepoch
&& lli
->lli_ioepoch
!= ioepoch
) {
448 lli
->lli_ioepoch
= ioepoch
;
449 CDEBUG(D_INODE
, "Epoch %llu opened on "DFID
"\n",
450 ioepoch
, PFID(&lli
->lli_fid
));
454 static int ll_och_fill(struct obd_export
*md_exp
, struct lookup_intent
*it
,
455 struct obd_client_handle
*och
)
457 struct mdt_body
*body
;
459 body
= req_capsule_server_get(&it
->it_request
->rq_pill
, &RMF_MDT_BODY
);
460 och
->och_fh
= body
->mbo_handle
;
461 och
->och_fid
= body
->mbo_fid1
;
462 och
->och_lease_handle
.cookie
= it
->it_lock_handle
;
463 och
->och_magic
= OBD_CLIENT_HANDLE_MAGIC
;
464 och
->och_flags
= it
->it_flags
;
466 return md_set_open_replay_data(md_exp
, och
, it
);
469 static int ll_local_open(struct file
*file
, struct lookup_intent
*it
,
470 struct ll_file_data
*fd
, struct obd_client_handle
*och
)
472 struct inode
*inode
= file_inode(file
);
473 struct ll_inode_info
*lli
= ll_i2info(inode
);
475 LASSERT(!LUSTRE_FPRIVATE(file
));
480 struct mdt_body
*body
;
483 rc
= ll_och_fill(ll_i2sbi(inode
)->ll_md_exp
, it
, och
);
487 body
= req_capsule_server_get(&it
->it_request
->rq_pill
,
489 ll_ioepoch_open(lli
, body
->mbo_ioepoch
);
492 LUSTRE_FPRIVATE(file
) = fd
;
493 ll_readahead_init(inode
, &fd
->fd_ras
);
494 fd
->fd_omode
= it
->it_flags
& (FMODE_READ
| FMODE_WRITE
| FMODE_EXEC
);
496 /* ll_cl_context initialize */
497 rwlock_init(&fd
->fd_lock
);
498 INIT_LIST_HEAD(&fd
->fd_lccs
);
503 /* Open a file, and (for the very first open) create objects on the OSTs at
504 * this time. If opened with O_LOV_DELAY_CREATE, then we don't do the object
505 * creation or open until ll_lov_setstripe() ioctl is called.
507 * If we already have the stripe MD locally then we don't request it in
508 * md_open(), by passing a lmm_size = 0.
510 * It is up to the application to ensure no other processes open this file
511 * in the O_LOV_DELAY_CREATE case, or the default striping pattern will be
512 * used. We might be able to avoid races of that sort by getting lli_open_sem
513 * before returning in the O_LOV_DELAY_CREATE case and dropping it here
514 * or in ll_file_release(), but I'm not sure that is desirable/necessary.
516 int ll_file_open(struct inode
*inode
, struct file
*file
)
518 struct ll_inode_info
*lli
= ll_i2info(inode
);
519 struct lookup_intent
*it
, oit
= { .it_op
= IT_OPEN
,
520 .it_flags
= file
->f_flags
};
521 struct obd_client_handle
**och_p
= NULL
;
522 __u64
*och_usecount
= NULL
;
523 struct ll_file_data
*fd
;
526 CDEBUG(D_VFSTRACE
, "VFS Op:inode="DFID
"(%p), flags %o\n",
527 PFID(ll_inode2fid(inode
)), inode
, file
->f_flags
);
529 it
= file
->private_data
; /* XXX: compat macro */
530 file
->private_data
= NULL
; /* prevent ll_local_open assertion */
532 fd
= ll_file_data_get();
539 if (S_ISDIR(inode
->i_mode
))
540 ll_authorize_statahead(inode
, fd
);
542 if (is_root_inode(inode
)) {
543 LUSTRE_FPRIVATE(file
) = fd
;
547 if (!it
|| !it
->it_disposition
) {
548 /* Convert f_flags into access mode. We cannot use file->f_mode,
549 * because everything but O_ACCMODE mask was stripped from
552 if ((oit
.it_flags
+ 1) & O_ACCMODE
)
554 if (file
->f_flags
& O_TRUNC
)
555 oit
.it_flags
|= FMODE_WRITE
;
557 /* kernel only call f_op->open in dentry_open. filp_open calls
558 * dentry_open after call to open_namei that checks permissions.
559 * Only nfsd_open call dentry_open directly without checking
560 * permissions and because of that this code below is safe.
562 if (oit
.it_flags
& (FMODE_WRITE
| FMODE_READ
))
563 oit
.it_flags
|= MDS_OPEN_OWNEROVERRIDE
;
565 /* We do not want O_EXCL here, presumably we opened the file
566 * already? XXX - NFS implications?
568 oit
.it_flags
&= ~O_EXCL
;
570 /* bug20584, if "it_flags" contains O_CREAT, the file will be
571 * created if necessary, then "IT_CREAT" should be set to keep
574 if (oit
.it_flags
& O_CREAT
)
575 oit
.it_op
|= IT_CREAT
;
581 /* Let's see if we have file open on MDS already. */
582 if (it
->it_flags
& FMODE_WRITE
) {
583 och_p
= &lli
->lli_mds_write_och
;
584 och_usecount
= &lli
->lli_open_fd_write_count
;
585 } else if (it
->it_flags
& FMODE_EXEC
) {
586 och_p
= &lli
->lli_mds_exec_och
;
587 och_usecount
= &lli
->lli_open_fd_exec_count
;
589 och_p
= &lli
->lli_mds_read_och
;
590 och_usecount
= &lli
->lli_open_fd_read_count
;
593 mutex_lock(&lli
->lli_och_mutex
);
594 if (*och_p
) { /* Open handle is present */
595 if (it_disposition(it
, DISP_OPEN_OPEN
)) {
596 /* Well, there's extra open request that we do not need,
597 * let's close it somehow. This will decref request.
599 rc
= it_open_error(DISP_OPEN_OPEN
, it
);
601 mutex_unlock(&lli
->lli_och_mutex
);
605 ll_release_openhandle(inode
, it
);
609 rc
= ll_local_open(file
, it
, fd
, NULL
);
612 mutex_unlock(&lli
->lli_och_mutex
);
616 LASSERT(*och_usecount
== 0);
617 if (!it
->it_disposition
) {
618 /* We cannot just request lock handle now, new ELC code
619 * means that one of other OPEN locks for this file
620 * could be cancelled, and since blocking ast handler
621 * would attempt to grab och_mutex as well, that would
622 * result in a deadlock
624 mutex_unlock(&lli
->lli_och_mutex
);
626 * Normally called under two situations:
628 * 2. revalidate with IT_OPEN (revalidate doesn't
629 * execute this intent any more).
631 * Always fetch MDS_OPEN_LOCK if this is not setstripe.
633 * Always specify MDS_OPEN_BY_FID because we don't want
634 * to get file with different fid.
636 it
->it_flags
|= MDS_OPEN_LOCK
| MDS_OPEN_BY_FID
;
637 rc
= ll_intent_file_open(file
->f_path
.dentry
, NULL
, 0, it
);
643 *och_p
= kzalloc(sizeof(struct obd_client_handle
), GFP_NOFS
);
651 /* md_intent_lock() didn't get a request ref if there was an
652 * open error, so don't do cleanup on the request here
655 /* XXX (green): Should not we bail out on any error here, not
658 rc
= it_open_error(DISP_OPEN_OPEN
, it
);
662 LASSERTF(it_disposition(it
, DISP_ENQ_OPEN_REF
),
663 "inode %p: disposition %x, status %d\n", inode
,
664 it_disposition(it
, ~0), it
->it_status
);
666 rc
= ll_local_open(file
, it
, fd
, *och_p
);
670 mutex_unlock(&lli
->lli_och_mutex
);
673 /* Must do this outside lli_och_mutex lock to prevent deadlock where
674 * different kind of OPEN lock for this same inode gets cancelled
677 if (!S_ISREG(inode
->i_mode
))
680 if (!lli
->lli_has_smd
&&
681 (cl_is_lov_delay_create(file
->f_flags
) ||
682 (file
->f_mode
& FMODE_WRITE
) == 0)) {
683 CDEBUG(D_INODE
, "object creation was delayed\n");
686 cl_lov_delay_create_clear(&file
->f_flags
);
691 if (och_p
&& *och_p
) {
696 mutex_unlock(&lli
->lli_och_mutex
);
699 if (lli
->lli_opendir_key
== fd
)
700 ll_deauthorize_statahead(inode
, fd
);
702 ll_file_data_put(fd
);
704 ll_stats_ops_tally(ll_i2sbi(inode
), LPROC_LL_OPEN
, 1);
707 if (it
&& it_disposition(it
, DISP_ENQ_OPEN_REF
)) {
708 ptlrpc_req_finished(it
->it_request
);
709 it_clear_disposition(it
, DISP_ENQ_OPEN_REF
);
715 static int ll_md_blocking_lease_ast(struct ldlm_lock
*lock
,
716 struct ldlm_lock_desc
*desc
,
717 void *data
, int flag
)
720 struct lustre_handle lockh
;
723 case LDLM_CB_BLOCKING
:
724 ldlm_lock2handle(lock
, &lockh
);
725 rc
= ldlm_cli_cancel(&lockh
, LCF_ASYNC
);
727 CDEBUG(D_INODE
, "ldlm_cli_cancel: %d\n", rc
);
731 case LDLM_CB_CANCELING
:
739 * Acquire a lease and open the file.
741 static struct obd_client_handle
*
742 ll_lease_open(struct inode
*inode
, struct file
*file
, fmode_t fmode
,
745 struct lookup_intent it
= { .it_op
= IT_OPEN
};
746 struct ll_sb_info
*sbi
= ll_i2sbi(inode
);
747 struct md_op_data
*op_data
;
748 struct ptlrpc_request
*req
= NULL
;
749 struct lustre_handle old_handle
= { 0 };
750 struct obd_client_handle
*och
= NULL
;
754 if (fmode
!= FMODE_WRITE
&& fmode
!= FMODE_READ
)
755 return ERR_PTR(-EINVAL
);
758 struct ll_inode_info
*lli
= ll_i2info(inode
);
759 struct ll_file_data
*fd
= LUSTRE_FPRIVATE(file
);
760 struct obd_client_handle
**och_p
;
763 if (!(fmode
& file
->f_mode
) || (file
->f_mode
& FMODE_EXEC
))
764 return ERR_PTR(-EPERM
);
766 /* Get the openhandle of the file */
768 mutex_lock(&lli
->lli_och_mutex
);
769 if (fd
->fd_lease_och
) {
770 mutex_unlock(&lli
->lli_och_mutex
);
775 if (file
->f_mode
& FMODE_WRITE
) {
776 LASSERT(lli
->lli_mds_write_och
);
777 och_p
= &lli
->lli_mds_write_och
;
778 och_usecount
= &lli
->lli_open_fd_write_count
;
780 LASSERT(lli
->lli_mds_read_och
);
781 och_p
= &lli
->lli_mds_read_och
;
782 och_usecount
= &lli
->lli_open_fd_read_count
;
784 if (*och_usecount
== 1) {
791 mutex_unlock(&lli
->lli_och_mutex
);
792 if (rc
< 0) /* more than 1 opener */
796 old_handle
= fd
->fd_och
->och_fh
;
799 och
= kzalloc(sizeof(*och
), GFP_NOFS
);
801 return ERR_PTR(-ENOMEM
);
803 op_data
= ll_prep_md_op_data(NULL
, inode
, inode
, NULL
, 0, 0,
804 LUSTRE_OPC_ANY
, NULL
);
805 if (IS_ERR(op_data
)) {
806 rc
= PTR_ERR(op_data
);
810 /* To tell the MDT this openhandle is from the same owner */
811 op_data
->op_handle
= old_handle
;
813 it
.it_flags
= fmode
| open_flags
;
814 it
.it_flags
|= MDS_OPEN_LOCK
| MDS_OPEN_BY_FID
| MDS_OPEN_LEASE
;
815 rc
= md_intent_lock(sbi
->ll_md_exp
, op_data
, &it
, &req
,
816 &ll_md_blocking_lease_ast
,
817 /* LDLM_FL_NO_LRU: To not put the lease lock into LRU list, otherwise
818 * it can be cancelled which may mislead applications that the lease is
820 * LDLM_FL_EXCL: Set this flag so that it won't be matched by normal
821 * open in ll_md_blocking_ast(). Otherwise as ll_md_blocking_lease_ast
822 * doesn't deal with openhandle, so normal openhandle will be leaked.
824 LDLM_FL_NO_LRU
| LDLM_FL_EXCL
);
825 ll_finish_md_op_data(op_data
);
826 ptlrpc_req_finished(req
);
830 if (it_disposition(&it
, DISP_LOOKUP_NEG
)) {
835 rc
= it_open_error(DISP_OPEN_OPEN
, &it
);
839 LASSERT(it_disposition(&it
, DISP_ENQ_OPEN_REF
));
840 ll_och_fill(sbi
->ll_md_exp
, &it
, och
);
842 if (!it_disposition(&it
, DISP_OPEN_LEASE
)) /* old server? */ {
847 /* already get lease, handle lease lock */
848 ll_set_lock_data(sbi
->ll_md_exp
, inode
, &it
, NULL
);
849 if (it
.it_lock_mode
== 0 ||
850 it
.it_lock_bits
!= MDS_INODELOCK_OPEN
) {
851 /* open lock must return for lease */
852 CERROR(DFID
"lease granted but no open lock, %d/%llu.\n",
853 PFID(ll_inode2fid(inode
)), it
.it_lock_mode
,
859 ll_intent_release(&it
);
863 /* Cancel open lock */
864 if (it
.it_lock_mode
!= 0) {
865 ldlm_lock_decref_and_cancel(&och
->och_lease_handle
,
868 och
->och_lease_handle
.cookie
= 0ULL;
870 rc2
= ll_close_inode_openhandle(sbi
->ll_md_exp
, inode
, och
, NULL
);
872 CERROR("%s: error closing file "DFID
": %d\n",
873 ll_get_fsname(inode
->i_sb
, NULL
, 0),
874 PFID(&ll_i2info(inode
)->lli_fid
), rc2
);
875 och
= NULL
; /* och has been freed in ll_close_inode_openhandle() */
877 ll_intent_release(&it
);
884 * Release lease and close the file.
885 * It will check if the lease has ever broken.
887 static int ll_lease_close(struct obd_client_handle
*och
, struct inode
*inode
,
890 struct ldlm_lock
*lock
;
891 bool cancelled
= true;
893 lock
= ldlm_handle2lock(&och
->och_lease_handle
);
895 lock_res_and_lock(lock
);
896 cancelled
= ldlm_is_cancel(lock
);
897 unlock_res_and_lock(lock
);
901 CDEBUG(D_INODE
, "lease for " DFID
" broken? %d\n",
902 PFID(&ll_i2info(inode
)->lli_fid
), cancelled
);
905 ldlm_cli_cancel(&och
->och_lease_handle
, 0);
907 *lease_broken
= cancelled
;
909 return ll_close_inode_openhandle(ll_i2sbi(inode
)->ll_md_exp
,
913 /* Fills the obdo with the attributes for the lsm */
914 static int ll_lsm_getattr(struct lov_stripe_md
*lsm
, struct obd_export
*exp
,
915 struct obdo
*obdo
, __u64 ioepoch
, int dv_flags
)
917 struct ptlrpc_request_set
*set
;
918 struct obd_info oinfo
= { };
925 oinfo
.oi_oa
->o_oi
= lsm
->lsm_oi
;
926 oinfo
.oi_oa
->o_mode
= S_IFREG
;
927 oinfo
.oi_oa
->o_ioepoch
= ioepoch
;
928 oinfo
.oi_oa
->o_valid
= OBD_MD_FLID
| OBD_MD_FLTYPE
|
929 OBD_MD_FLSIZE
| OBD_MD_FLBLOCKS
|
930 OBD_MD_FLBLKSZ
| OBD_MD_FLATIME
|
931 OBD_MD_FLMTIME
| OBD_MD_FLCTIME
|
932 OBD_MD_FLGROUP
| OBD_MD_FLEPOCH
|
933 OBD_MD_FLDATAVERSION
;
934 if (dv_flags
& (LL_DV_WR_FLUSH
| LL_DV_RD_FLUSH
)) {
935 oinfo
.oi_oa
->o_valid
|= OBD_MD_FLFLAGS
;
936 oinfo
.oi_oa
->o_flags
|= OBD_FL_SRVLOCK
;
937 if (dv_flags
& LL_DV_WR_FLUSH
)
938 oinfo
.oi_oa
->o_flags
|= OBD_FL_FLUSH
;
941 set
= ptlrpc_prep_set();
943 CERROR("cannot allocate ptlrpc set: rc = %d\n", -ENOMEM
);
946 rc
= obd_getattr_async(exp
, &oinfo
, set
);
948 rc
= ptlrpc_set_wait(set
);
949 ptlrpc_set_destroy(set
);
952 oinfo
.oi_oa
->o_valid
&= (OBD_MD_FLBLOCKS
| OBD_MD_FLBLKSZ
|
953 OBD_MD_FLATIME
| OBD_MD_FLMTIME
|
954 OBD_MD_FLCTIME
| OBD_MD_FLSIZE
|
955 OBD_MD_FLDATAVERSION
| OBD_MD_FLFLAGS
);
956 if (dv_flags
& LL_DV_WR_FLUSH
&&
957 !(oinfo
.oi_oa
->o_valid
& OBD_MD_FLFLAGS
&&
958 oinfo
.oi_oa
->o_flags
& OBD_FL_FLUSH
))
965 * Performs the getattr on the inode and updates its fields.
966 * If @sync != 0, perform the getattr under the server-side lock.
968 int ll_inode_getattr(struct inode
*inode
, struct obdo
*obdo
,
969 __u64 ioepoch
, int sync
)
971 struct lov_stripe_md
*lsm
;
974 lsm
= ccc_inode_lsm_get(inode
);
975 rc
= ll_lsm_getattr(lsm
, ll_i2dtexp(inode
),
976 obdo
, ioepoch
, sync
? LL_DV_RD_FLUSH
: 0);
978 struct ost_id
*oi
= lsm
? &lsm
->lsm_oi
: &obdo
->o_oi
;
980 obdo_refresh_inode(inode
, obdo
, obdo
->o_valid
);
981 CDEBUG(D_INODE
, "objid " DOSTID
" size %llu, blocks %llu, blksize %lu\n",
982 POSTID(oi
), i_size_read(inode
),
983 (unsigned long long)inode
->i_blocks
,
984 1UL << inode
->i_blkbits
);
986 ccc_inode_lsm_put(inode
, lsm
);
990 int ll_merge_attr(const struct lu_env
*env
, struct inode
*inode
)
992 struct ll_inode_info
*lli
= ll_i2info(inode
);
993 struct cl_object
*obj
= lli
->lli_clob
;
994 struct cl_attr
*attr
= vvp_env_thread_attr(env
);
1000 ll_inode_size_lock(inode
);
1002 /* merge timestamps the most recently obtained from mds with
1003 * timestamps obtained from osts
1005 LTIME_S(inode
->i_atime
) = lli
->lli_atime
;
1006 LTIME_S(inode
->i_mtime
) = lli
->lli_mtime
;
1007 LTIME_S(inode
->i_ctime
) = lli
->lli_ctime
;
1009 mtime
= LTIME_S(inode
->i_mtime
);
1010 atime
= LTIME_S(inode
->i_atime
);
1011 ctime
= LTIME_S(inode
->i_ctime
);
1013 cl_object_attr_lock(obj
);
1014 rc
= cl_object_attr_get(env
, obj
, attr
);
1015 cl_object_attr_unlock(obj
);
1018 goto out_size_unlock
;
1020 if (atime
< attr
->cat_atime
)
1021 atime
= attr
->cat_atime
;
1023 if (ctime
< attr
->cat_ctime
)
1024 ctime
= attr
->cat_ctime
;
1026 if (mtime
< attr
->cat_mtime
)
1027 mtime
= attr
->cat_mtime
;
1029 CDEBUG(D_VFSTRACE
, DFID
" updating i_size %llu\n",
1030 PFID(&lli
->lli_fid
), attr
->cat_size
);
1032 i_size_write(inode
, attr
->cat_size
);
1034 inode
->i_blocks
= attr
->cat_blocks
;
1036 LTIME_S(inode
->i_mtime
) = mtime
;
1037 LTIME_S(inode
->i_atime
) = atime
;
1038 LTIME_S(inode
->i_ctime
) = ctime
;
1041 ll_inode_size_unlock(inode
);
1046 int ll_glimpse_ioctl(struct ll_sb_info
*sbi
, struct lov_stripe_md
*lsm
,
1049 struct obdo obdo
= { 0 };
1052 rc
= ll_lsm_getattr(lsm
, sbi
->ll_dt_exp
, &obdo
, 0, 0);
1054 st
->st_size
= obdo
.o_size
;
1055 st
->st_blocks
= obdo
.o_blocks
;
1056 st
->st_mtime
= obdo
.o_mtime
;
1057 st
->st_atime
= obdo
.o_atime
;
1058 st
->st_ctime
= obdo
.o_ctime
;
1063 static bool file_is_noatime(const struct file
*file
)
1065 const struct vfsmount
*mnt
= file
->f_path
.mnt
;
1066 const struct inode
*inode
= file_inode(file
);
1068 /* Adapted from file_accessed() and touch_atime().*/
1069 if (file
->f_flags
& O_NOATIME
)
1072 if (inode
->i_flags
& S_NOATIME
)
1075 if (IS_NOATIME(inode
))
1078 if (mnt
->mnt_flags
& (MNT_NOATIME
| MNT_READONLY
))
1081 if ((mnt
->mnt_flags
& MNT_NODIRATIME
) && S_ISDIR(inode
->i_mode
))
1084 if ((inode
->i_sb
->s_flags
& MS_NODIRATIME
) && S_ISDIR(inode
->i_mode
))
1090 void ll_io_init(struct cl_io
*io
, const struct file
*file
, int write
)
1092 struct inode
*inode
= file_inode(file
);
1094 io
->u
.ci_rw
.crw_nonblock
= file
->f_flags
& O_NONBLOCK
;
1096 io
->u
.ci_wr
.wr_append
= !!(file
->f_flags
& O_APPEND
);
1097 io
->u
.ci_wr
.wr_sync
= file
->f_flags
& O_SYNC
||
1098 file
->f_flags
& O_DIRECT
||
1101 io
->ci_obj
= ll_i2info(inode
)->lli_clob
;
1102 io
->ci_lockreq
= CILR_MAYBE
;
1103 if (ll_file_nolock(file
)) {
1104 io
->ci_lockreq
= CILR_NEVER
;
1105 io
->ci_no_srvlock
= 1;
1106 } else if (file
->f_flags
& O_APPEND
) {
1107 io
->ci_lockreq
= CILR_MANDATORY
;
1110 io
->ci_noatime
= file_is_noatime(file
);
1114 ll_file_io_generic(const struct lu_env
*env
, struct vvp_io_args
*args
,
1115 struct file
*file
, enum cl_io_type iot
,
1116 loff_t
*ppos
, size_t count
)
1118 struct ll_inode_info
*lli
= ll_i2info(file_inode(file
));
1119 struct ll_file_data
*fd
= LUSTRE_FPRIVATE(file
);
1120 struct range_lock range
;
1124 CDEBUG(D_VFSTRACE
, "file: %s, type: %d ppos: %llu, count: %zu\n",
1125 file
->f_path
.dentry
->d_name
.name
, iot
, *ppos
, count
);
1128 io
= vvp_env_thread_io(env
);
1129 ll_io_init(io
, file
, iot
== CIT_WRITE
);
1131 if (cl_io_rw_init(env
, io
, iot
, *ppos
, count
) == 0) {
1132 struct vvp_io
*vio
= vvp_env_io(env
);
1133 bool range_locked
= false;
1135 if (file
->f_flags
& O_APPEND
)
1136 range_lock_init(&range
, 0, LUSTRE_EOF
);
1138 range_lock_init(&range
, *ppos
, *ppos
+ count
- 1);
1140 vio
->vui_fd
= LUSTRE_FPRIVATE(file
);
1141 vio
->vui_io_subtype
= args
->via_io_subtype
;
1143 switch (vio
->vui_io_subtype
) {
1145 vio
->vui_iter
= args
->u
.normal
.via_iter
;
1146 vio
->vui_iocb
= args
->u
.normal
.via_iocb
;
1148 * Direct IO reads must also take range lock,
1149 * or multiple reads will try to work on the same pages
1150 * See LU-6227 for details.
1152 if (((iot
== CIT_WRITE
) ||
1153 (iot
== CIT_READ
&& (file
->f_flags
& O_DIRECT
))) &&
1154 !(vio
->vui_fd
->fd_flags
& LL_FILE_GROUP_LOCKED
)) {
1155 CDEBUG(D_VFSTRACE
, "Range lock [%llu, %llu]\n",
1156 range
.rl_node
.in_extent
.start
,
1157 range
.rl_node
.in_extent
.end
);
1158 result
= range_lock(&lli
->lli_write_tree
,
1163 range_locked
= true;
1165 down_read(&lli
->lli_trunc_sem
);
1168 vio
->u
.splice
.vui_pipe
= args
->u
.splice
.via_pipe
;
1169 vio
->u
.splice
.vui_flags
= args
->u
.splice
.via_flags
;
1172 CERROR("Unknown IO type - %u\n", vio
->vui_io_subtype
);
1175 ll_cl_add(file
, env
, io
);
1176 result
= cl_io_loop(env
, io
);
1177 ll_cl_remove(file
, env
);
1178 if (args
->via_io_subtype
== IO_NORMAL
)
1179 up_read(&lli
->lli_trunc_sem
);
1181 CDEBUG(D_VFSTRACE
, "Range unlock [%llu, %llu]\n",
1182 range
.rl_node
.in_extent
.start
,
1183 range
.rl_node
.in_extent
.end
);
1184 range_unlock(&lli
->lli_write_tree
, &range
);
1187 /* cl_io_rw_init() handled IO */
1188 result
= io
->ci_result
;
1191 if (io
->ci_nob
> 0) {
1192 result
= io
->ci_nob
;
1193 *ppos
= io
->u
.ci_wr
.wr
.crw_pos
;
1197 cl_io_fini(env
, io
);
1198 /* If any bit been read/written (result != 0), we just return
1199 * short read/write instead of restart io.
1201 if ((result
== 0 || result
== -ENODATA
) && io
->ci_need_restart
) {
1202 CDEBUG(D_VFSTRACE
, "Restart %s on %pD from %lld, count:%zu\n",
1203 iot
== CIT_READ
? "read" : "write",
1204 file
, *ppos
, count
);
1205 LASSERTF(io
->ci_nob
== 0, "%zd\n", io
->ci_nob
);
1209 if (iot
== CIT_READ
) {
1211 ll_stats_ops_tally(ll_i2sbi(file_inode(file
)),
1212 LPROC_LL_READ_BYTES
, result
);
1213 } else if (iot
== CIT_WRITE
) {
1215 ll_stats_ops_tally(ll_i2sbi(file_inode(file
)),
1216 LPROC_LL_WRITE_BYTES
, result
);
1217 fd
->fd_write_failed
= false;
1218 } else if (result
!= -ERESTARTSYS
) {
1219 fd
->fd_write_failed
= true;
1222 CDEBUG(D_VFSTRACE
, "iot: %d, result: %zd\n", iot
, result
);
1227 static ssize_t
ll_file_read_iter(struct kiocb
*iocb
, struct iov_iter
*to
)
1230 struct vvp_io_args
*args
;
1234 env
= cl_env_get(&refcheck
);
1236 return PTR_ERR(env
);
1238 args
= ll_env_args(env
, IO_NORMAL
);
1239 args
->u
.normal
.via_iter
= to
;
1240 args
->u
.normal
.via_iocb
= iocb
;
1242 result
= ll_file_io_generic(env
, args
, iocb
->ki_filp
, CIT_READ
,
1243 &iocb
->ki_pos
, iov_iter_count(to
));
1244 cl_env_put(env
, &refcheck
);
1249 * Write to a file (through the page cache).
1251 static ssize_t
ll_file_write_iter(struct kiocb
*iocb
, struct iov_iter
*from
)
1254 struct vvp_io_args
*args
;
1258 env
= cl_env_get(&refcheck
);
1260 return PTR_ERR(env
);
1262 args
= ll_env_args(env
, IO_NORMAL
);
1263 args
->u
.normal
.via_iter
= from
;
1264 args
->u
.normal
.via_iocb
= iocb
;
1266 result
= ll_file_io_generic(env
, args
, iocb
->ki_filp
, CIT_WRITE
,
1267 &iocb
->ki_pos
, iov_iter_count(from
));
1268 cl_env_put(env
, &refcheck
);
1273 * Send file content (through pagecache) somewhere with helper
1275 static ssize_t
ll_file_splice_read(struct file
*in_file
, loff_t
*ppos
,
1276 struct pipe_inode_info
*pipe
, size_t count
,
1280 struct vvp_io_args
*args
;
1284 env
= cl_env_get(&refcheck
);
1286 return PTR_ERR(env
);
1288 args
= ll_env_args(env
, IO_SPLICE
);
1289 args
->u
.splice
.via_pipe
= pipe
;
1290 args
->u
.splice
.via_flags
= flags
;
1292 result
= ll_file_io_generic(env
, args
, in_file
, CIT_READ
, ppos
, count
);
1293 cl_env_put(env
, &refcheck
);
1297 int ll_lov_setstripe_ea_info(struct inode
*inode
, struct dentry
*dentry
,
1298 __u64 flags
, struct lov_user_md
*lum
,
1301 struct lov_stripe_md
*lsm
= NULL
;
1302 struct lookup_intent oit
= {
1304 .it_flags
= flags
| MDS_OPEN_BY_FID
,
1308 lsm
= ccc_inode_lsm_get(inode
);
1310 ccc_inode_lsm_put(inode
, lsm
);
1311 CDEBUG(D_IOCTL
, "stripe already exists for inode "DFID
"\n",
1312 PFID(ll_inode2fid(inode
)));
1317 ll_inode_size_lock(inode
);
1318 rc
= ll_intent_file_open(dentry
, lum
, lum_size
, &oit
);
1325 ll_release_openhandle(inode
, &oit
);
1328 ll_inode_size_unlock(inode
);
1329 ll_intent_release(&oit
);
1330 ccc_inode_lsm_put(inode
, lsm
);
1335 int ll_lov_getstripe_ea_info(struct inode
*inode
, const char *filename
,
1336 struct lov_mds_md
**lmmp
, int *lmm_size
,
1337 struct ptlrpc_request
**request
)
1339 struct ll_sb_info
*sbi
= ll_i2sbi(inode
);
1340 struct mdt_body
*body
;
1341 struct lov_mds_md
*lmm
= NULL
;
1342 struct ptlrpc_request
*req
= NULL
;
1343 struct md_op_data
*op_data
;
1346 rc
= ll_get_default_mdsize(sbi
, &lmmsize
);
1350 op_data
= ll_prep_md_op_data(NULL
, inode
, NULL
, filename
,
1351 strlen(filename
), lmmsize
,
1352 LUSTRE_OPC_ANY
, NULL
);
1353 if (IS_ERR(op_data
))
1354 return PTR_ERR(op_data
);
1356 op_data
->op_valid
= OBD_MD_FLEASIZE
| OBD_MD_FLDIREA
;
1357 rc
= md_getattr_name(sbi
->ll_md_exp
, op_data
, &req
);
1358 ll_finish_md_op_data(op_data
);
1360 CDEBUG(D_INFO
, "md_getattr_name failed on %s: rc %d\n",
1365 body
= req_capsule_server_get(&req
->rq_pill
, &RMF_MDT_BODY
);
1367 lmmsize
= body
->mbo_eadatasize
;
1369 if (!(body
->mbo_valid
& (OBD_MD_FLEASIZE
| OBD_MD_FLDIREA
)) ||
1375 lmm
= req_capsule_server_sized_get(&req
->rq_pill
, &RMF_MDT_MD
, lmmsize
);
1377 if ((lmm
->lmm_magic
!= cpu_to_le32(LOV_MAGIC_V1
)) &&
1378 (lmm
->lmm_magic
!= cpu_to_le32(LOV_MAGIC_V3
))) {
1384 * This is coming from the MDS, so is probably in
1385 * little endian. We convert it to host endian before
1386 * passing it to userspace.
1388 if (cpu_to_le32(LOV_MAGIC
) != LOV_MAGIC
) {
1391 stripe_count
= le16_to_cpu(lmm
->lmm_stripe_count
);
1392 if (le32_to_cpu(lmm
->lmm_pattern
) & LOV_PATTERN_F_RELEASED
)
1395 /* if function called for directory - we should
1396 * avoid swab not existent lsm objects
1398 if (lmm
->lmm_magic
== cpu_to_le32(LOV_MAGIC_V1
)) {
1399 lustre_swab_lov_user_md_v1((struct lov_user_md_v1
*)lmm
);
1400 if (S_ISREG(body
->mbo_mode
))
1401 lustre_swab_lov_user_md_objects(
1402 ((struct lov_user_md_v1
*)lmm
)->lmm_objects
,
1404 } else if (lmm
->lmm_magic
== cpu_to_le32(LOV_MAGIC_V3
)) {
1405 lustre_swab_lov_user_md_v3((struct lov_user_md_v3
*)lmm
);
1406 if (S_ISREG(body
->mbo_mode
))
1407 lustre_swab_lov_user_md_objects(
1408 ((struct lov_user_md_v3
*)lmm
)->lmm_objects
,
1415 *lmm_size
= lmmsize
;
1420 static int ll_lov_setea(struct inode
*inode
, struct file
*file
,
1423 __u64 flags
= MDS_OPEN_HAS_OBJS
| FMODE_WRITE
;
1424 struct lov_user_md
*lump
;
1425 int lum_size
= sizeof(struct lov_user_md
) +
1426 sizeof(struct lov_user_ost_data
);
1429 if (!capable(CFS_CAP_SYS_ADMIN
))
1432 lump
= libcfs_kvzalloc(lum_size
, GFP_NOFS
);
1436 if (copy_from_user(lump
, (struct lov_user_md __user
*)arg
, lum_size
)) {
1441 rc
= ll_lov_setstripe_ea_info(inode
, file
->f_path
.dentry
, flags
, lump
,
1443 cl_lov_delay_create_clear(&file
->f_flags
);
1449 static int ll_file_getstripe(struct inode
*inode
,
1450 struct lov_user_md __user
*lum
)
1456 env
= cl_env_get(&refcheck
);
1458 return PTR_ERR(env
);
1460 rc
= cl_object_getstripe(env
, ll_i2info(inode
)->lli_clob
, lum
);
1461 cl_env_put(env
, &refcheck
);
1465 static int ll_lov_setstripe(struct inode
*inode
, struct file
*file
,
1468 struct lov_user_md __user
*lum
= (struct lov_user_md __user
*)arg
;
1469 struct lov_user_md
*klum
;
1471 __u64 flags
= FMODE_WRITE
;
1473 rc
= ll_copy_user_md(lum
, &klum
);
1478 rc
= ll_lov_setstripe_ea_info(inode
, file
->f_path
.dentry
, flags
, klum
,
1480 cl_lov_delay_create_clear(&file
->f_flags
);
1484 put_user(0, &lum
->lmm_stripe_count
);
1486 ll_layout_refresh(inode
, &gen
);
1487 rc
= ll_file_getstripe(inode
, (struct lov_user_md __user
*)arg
);
1495 ll_get_grouplock(struct inode
*inode
, struct file
*file
, unsigned long arg
)
1497 struct ll_inode_info
*lli
= ll_i2info(inode
);
1498 struct ll_file_data
*fd
= LUSTRE_FPRIVATE(file
);
1499 struct ll_grouplock grouplock
;
1503 CWARN("group id for group lock must not be 0\n");
1507 if (ll_file_nolock(file
))
1510 spin_lock(&lli
->lli_lock
);
1511 if (fd
->fd_flags
& LL_FILE_GROUP_LOCKED
) {
1512 CWARN("group lock already existed with gid %lu\n",
1513 fd
->fd_grouplock
.lg_gid
);
1514 spin_unlock(&lli
->lli_lock
);
1517 LASSERT(!fd
->fd_grouplock
.lg_lock
);
1518 spin_unlock(&lli
->lli_lock
);
1520 rc
= cl_get_grouplock(ll_i2info(inode
)->lli_clob
,
1521 arg
, (file
->f_flags
& O_NONBLOCK
), &grouplock
);
1525 spin_lock(&lli
->lli_lock
);
1526 if (fd
->fd_flags
& LL_FILE_GROUP_LOCKED
) {
1527 spin_unlock(&lli
->lli_lock
);
1528 CERROR("another thread just won the race\n");
1529 cl_put_grouplock(&grouplock
);
1533 fd
->fd_flags
|= LL_FILE_GROUP_LOCKED
;
1534 fd
->fd_grouplock
= grouplock
;
1535 spin_unlock(&lli
->lli_lock
);
1537 CDEBUG(D_INFO
, "group lock %lu obtained\n", arg
);
1541 static int ll_put_grouplock(struct inode
*inode
, struct file
*file
,
1544 struct ll_inode_info
*lli
= ll_i2info(inode
);
1545 struct ll_file_data
*fd
= LUSTRE_FPRIVATE(file
);
1546 struct ll_grouplock grouplock
;
1548 spin_lock(&lli
->lli_lock
);
1549 if (!(fd
->fd_flags
& LL_FILE_GROUP_LOCKED
)) {
1550 spin_unlock(&lli
->lli_lock
);
1551 CWARN("no group lock held\n");
1554 LASSERT(fd
->fd_grouplock
.lg_lock
);
1556 if (fd
->fd_grouplock
.lg_gid
!= arg
) {
1557 CWARN("group lock %lu doesn't match current id %lu\n",
1558 arg
, fd
->fd_grouplock
.lg_gid
);
1559 spin_unlock(&lli
->lli_lock
);
1563 grouplock
= fd
->fd_grouplock
;
1564 memset(&fd
->fd_grouplock
, 0, sizeof(fd
->fd_grouplock
));
1565 fd
->fd_flags
&= ~LL_FILE_GROUP_LOCKED
;
1566 spin_unlock(&lli
->lli_lock
);
1568 cl_put_grouplock(&grouplock
);
1569 CDEBUG(D_INFO
, "group lock %lu released\n", arg
);
1574 * Close inode open handle
1576 * \param inode [in] inode in question
1577 * \param it [in,out] intent which contains open info and result
1580 * \retval <0 failure
1582 int ll_release_openhandle(struct inode
*inode
, struct lookup_intent
*it
)
1584 struct obd_client_handle
*och
;
1589 /* Root ? Do nothing. */
1590 if (is_root_inode(inode
))
1593 /* No open handle to close? Move away */
1594 if (!it_disposition(it
, DISP_OPEN_OPEN
))
1597 LASSERT(it_open_error(DISP_OPEN_OPEN
, it
) == 0);
1599 och
= kzalloc(sizeof(*och
), GFP_NOFS
);
1605 ll_och_fill(ll_i2sbi(inode
)->ll_md_exp
, it
, och
);
1607 rc
= ll_close_inode_openhandle(ll_i2sbi(inode
)->ll_md_exp
,
1610 /* this one is in place of ll_file_open */
1611 if (it_disposition(it
, DISP_ENQ_OPEN_REF
)) {
1612 ptlrpc_req_finished(it
->it_request
);
1613 it_clear_disposition(it
, DISP_ENQ_OPEN_REF
);
1619 * Get size for inode for which FIEMAP mapping is requested.
1620 * Make the FIEMAP get_info call and returns the result.
1622 static int ll_do_fiemap(struct inode
*inode
, struct ll_user_fiemap
*fiemap
,
1625 struct obd_export
*exp
= ll_i2dtexp(inode
);
1626 struct lov_stripe_md
*lsm
= NULL
;
1627 struct ll_fiemap_info_key fm_key
= { .name
= KEY_FIEMAP
, };
1628 __u32 vallen
= num_bytes
;
1631 /* Checks for fiemap flags */
1632 if (fiemap
->fm_flags
& ~LUSTRE_FIEMAP_FLAGS_COMPAT
) {
1633 fiemap
->fm_flags
&= ~LUSTRE_FIEMAP_FLAGS_COMPAT
;
1637 /* Check for FIEMAP_FLAG_SYNC */
1638 if (fiemap
->fm_flags
& FIEMAP_FLAG_SYNC
) {
1639 rc
= filemap_fdatawrite(inode
->i_mapping
);
1644 lsm
= ccc_inode_lsm_get(inode
);
1648 /* If the stripe_count > 1 and the application does not understand
1649 * DEVICE_ORDER flag, then it cannot interpret the extents correctly.
1651 if (lsm
->lsm_stripe_count
> 1 &&
1652 !(fiemap
->fm_flags
& FIEMAP_FLAG_DEVICE_ORDER
)) {
1657 fm_key
.oa
.o_oi
= lsm
->lsm_oi
;
1658 fm_key
.oa
.o_valid
= OBD_MD_FLID
| OBD_MD_FLGROUP
;
1660 if (i_size_read(inode
) == 0) {
1661 rc
= ll_glimpse_size(inode
);
1666 obdo_from_inode(&fm_key
.oa
, inode
, OBD_MD_FLSIZE
);
1667 obdo_set_parent_fid(&fm_key
.oa
, &ll_i2info(inode
)->lli_fid
);
1668 /* If filesize is 0, then there would be no objects for mapping */
1669 if (fm_key
.oa
.o_size
== 0) {
1670 fiemap
->fm_mapped_extents
= 0;
1675 memcpy(&fm_key
.fiemap
, fiemap
, sizeof(*fiemap
));
1677 rc
= obd_get_info(NULL
, exp
, sizeof(fm_key
), &fm_key
, &vallen
,
1680 CERROR("obd_get_info failed: rc = %d\n", rc
);
1683 ccc_inode_lsm_put(inode
, lsm
);
1687 int ll_fid2path(struct inode
*inode
, void __user
*arg
)
1689 struct obd_export
*exp
= ll_i2mdexp(inode
);
1690 const struct getinfo_fid2path __user
*gfin
= arg
;
1691 struct getinfo_fid2path
*gfout
;
1696 if (!capable(CFS_CAP_DAC_READ_SEARCH
) &&
1697 !(ll_i2sbi(inode
)->ll_flags
& LL_SBI_USER_FID2PATH
))
1700 /* Only need to get the buflen */
1701 if (get_user(pathlen
, &gfin
->gf_pathlen
))
1704 if (pathlen
> PATH_MAX
)
1707 outsize
= sizeof(*gfout
) + pathlen
;
1709 gfout
= kzalloc(outsize
, GFP_NOFS
);
1713 if (copy_from_user(gfout
, arg
, sizeof(*gfout
))) {
1718 /* Call mdc_iocontrol */
1719 rc
= obd_iocontrol(OBD_IOC_FID2PATH
, exp
, outsize
, gfout
, NULL
);
1723 if (copy_to_user(arg
, gfout
, outsize
))
1731 static int ll_ioctl_fiemap(struct inode
*inode
, unsigned long arg
)
1733 struct ll_user_fiemap
*fiemap_s
;
1734 size_t num_bytes
, ret_bytes
;
1735 unsigned int extent_count
;
1738 /* Get the extent count so we can calculate the size of
1739 * required fiemap buffer
1741 if (get_user(extent_count
,
1742 &((struct ll_user_fiemap __user
*)arg
)->fm_extent_count
))
1746 (SIZE_MAX
- sizeof(*fiemap_s
)) / sizeof(struct ll_fiemap_extent
))
1748 num_bytes
= sizeof(*fiemap_s
) + (extent_count
*
1749 sizeof(struct ll_fiemap_extent
));
1751 fiemap_s
= libcfs_kvzalloc(num_bytes
, GFP_NOFS
);
1755 /* get the fiemap value */
1756 if (copy_from_user(fiemap_s
, (struct ll_user_fiemap __user
*)arg
,
1757 sizeof(*fiemap_s
))) {
1762 /* If fm_extent_count is non-zero, read the first extent since
1763 * it is used to calculate end_offset and device from previous
1767 if (copy_from_user(&fiemap_s
->fm_extents
[0],
1768 (char __user
*)arg
+ sizeof(*fiemap_s
),
1769 sizeof(struct ll_fiemap_extent
))) {
1775 rc
= ll_do_fiemap(inode
, fiemap_s
, num_bytes
);
1779 ret_bytes
= sizeof(struct ll_user_fiemap
);
1781 if (extent_count
!= 0)
1782 ret_bytes
+= (fiemap_s
->fm_mapped_extents
*
1783 sizeof(struct ll_fiemap_extent
));
1785 if (copy_to_user((void __user
*)arg
, fiemap_s
, ret_bytes
))
1794 * Read the data_version for inode.
1796 * This value is computed using stripe object version on OST.
1797 * Version is computed using server side locking.
1799 * @param sync if do sync on the OST side;
1801 * LL_DV_RD_FLUSH: flush dirty pages, LCK_PR on OSTs
1802 * LL_DV_WR_FLUSH: drop all caching pages, LCK_PW on OSTs
1804 int ll_data_version(struct inode
*inode
, __u64
*data_version
, int flags
)
1806 struct lov_stripe_md
*lsm
= NULL
;
1807 struct ll_sb_info
*sbi
= ll_i2sbi(inode
);
1808 struct obdo
*obdo
= NULL
;
1811 /* If no stripe, we consider version is 0. */
1812 lsm
= ccc_inode_lsm_get(inode
);
1813 if (!lsm_has_objects(lsm
)) {
1815 CDEBUG(D_INODE
, "No object for inode\n");
1820 obdo
= kzalloc(sizeof(*obdo
), GFP_NOFS
);
1826 rc
= ll_lsm_getattr(lsm
, sbi
->ll_dt_exp
, obdo
, 0, flags
);
1828 if (!(obdo
->o_valid
& OBD_MD_FLDATAVERSION
))
1831 *data_version
= obdo
->o_data_version
;
1836 ccc_inode_lsm_put(inode
, lsm
);
1841 * Trigger a HSM release request for the provided inode.
1843 int ll_hsm_release(struct inode
*inode
)
1845 struct cl_env_nest nest
;
1847 struct obd_client_handle
*och
= NULL
;
1848 __u64 data_version
= 0;
1851 CDEBUG(D_INODE
, "%s: Releasing file "DFID
".\n",
1852 ll_get_fsname(inode
->i_sb
, NULL
, 0),
1853 PFID(&ll_i2info(inode
)->lli_fid
));
1855 och
= ll_lease_open(inode
, NULL
, FMODE_WRITE
, MDS_OPEN_RELEASE
);
1861 /* Grab latest data_version and [am]time values */
1862 rc
= ll_data_version(inode
, &data_version
, LL_DV_WR_FLUSH
);
1866 env
= cl_env_nested_get(&nest
);
1872 ll_merge_attr(env
, inode
);
1873 cl_env_nested_put(&nest
, env
);
1875 /* Release the file.
1876 * NB: lease lock handle is released in mdc_hsm_release_pack() because
1877 * we still need it to pack l_remote_handle to MDT.
1879 rc
= ll_close_inode_openhandle(ll_i2sbi(inode
)->ll_md_exp
, inode
, och
,
1884 if (och
&& !IS_ERR(och
)) /* close the file */
1885 ll_lease_close(och
, inode
, NULL
);
1890 struct ll_swap_stack
{
1891 struct iattr ia1
, ia2
;
1893 struct inode
*inode1
, *inode2
;
1894 bool check_dv1
, check_dv2
;
1897 static int ll_swap_layouts(struct file
*file1
, struct file
*file2
,
1898 struct lustre_swap_layouts
*lsl
)
1900 struct mdc_swap_layouts msl
;
1901 struct md_op_data
*op_data
;
1904 struct ll_swap_stack
*llss
= NULL
;
1907 llss
= kzalloc(sizeof(*llss
), GFP_NOFS
);
1911 llss
->inode1
= file_inode(file1
);
1912 llss
->inode2
= file_inode(file2
);
1914 if (!S_ISREG(llss
->inode2
->i_mode
)) {
1919 if (inode_permission(llss
->inode1
, MAY_WRITE
) ||
1920 inode_permission(llss
->inode2
, MAY_WRITE
)) {
1925 if (llss
->inode2
->i_sb
!= llss
->inode1
->i_sb
) {
1930 /* we use 2 bool because it is easier to swap than 2 bits */
1931 if (lsl
->sl_flags
& SWAP_LAYOUTS_CHECK_DV1
)
1932 llss
->check_dv1
= true;
1934 if (lsl
->sl_flags
& SWAP_LAYOUTS_CHECK_DV2
)
1935 llss
->check_dv2
= true;
1937 /* we cannot use lsl->sl_dvX directly because we may swap them */
1938 llss
->dv1
= lsl
->sl_dv1
;
1939 llss
->dv2
= lsl
->sl_dv2
;
1941 rc
= lu_fid_cmp(ll_inode2fid(llss
->inode1
), ll_inode2fid(llss
->inode2
));
1942 if (rc
== 0) /* same file, done! */ {
1947 if (rc
< 0) { /* sequentialize it */
1948 swap(llss
->inode1
, llss
->inode2
);
1950 swap(llss
->dv1
, llss
->dv2
);
1951 swap(llss
->check_dv1
, llss
->check_dv2
);
1955 if (gid
!= 0) { /* application asks to flush dirty cache */
1956 rc
= ll_get_grouplock(llss
->inode1
, file1
, gid
);
1960 rc
= ll_get_grouplock(llss
->inode2
, file2
, gid
);
1962 ll_put_grouplock(llss
->inode1
, file1
, gid
);
1967 /* to be able to restore mtime and atime after swap
1968 * we need to first save them
1971 (SWAP_LAYOUTS_KEEP_MTIME
| SWAP_LAYOUTS_KEEP_ATIME
)) {
1972 llss
->ia1
.ia_mtime
= llss
->inode1
->i_mtime
;
1973 llss
->ia1
.ia_atime
= llss
->inode1
->i_atime
;
1974 llss
->ia1
.ia_valid
= ATTR_MTIME
| ATTR_ATIME
;
1975 llss
->ia2
.ia_mtime
= llss
->inode2
->i_mtime
;
1976 llss
->ia2
.ia_atime
= llss
->inode2
->i_atime
;
1977 llss
->ia2
.ia_valid
= ATTR_MTIME
| ATTR_ATIME
;
1980 /* ultimate check, before swapping the layouts we check if
1981 * dataversion has changed (if requested)
1983 if (llss
->check_dv1
) {
1984 rc
= ll_data_version(llss
->inode1
, &dv
, 0);
1987 if (dv
!= llss
->dv1
) {
1993 if (llss
->check_dv2
) {
1994 rc
= ll_data_version(llss
->inode2
, &dv
, 0);
1997 if (dv
!= llss
->dv2
) {
2003 /* struct md_op_data is used to send the swap args to the mdt
2004 * only flags is missing, so we use struct mdc_swap_layouts
2005 * through the md_op_data->op_data
2007 /* flags from user space have to be converted before they are send to
2008 * server, no flag is sent today, they are only used on the client
2012 op_data
= ll_prep_md_op_data(NULL
, llss
->inode1
, llss
->inode2
, NULL
, 0,
2013 0, LUSTRE_OPC_ANY
, &msl
);
2014 if (IS_ERR(op_data
)) {
2015 rc
= PTR_ERR(op_data
);
2019 rc
= obd_iocontrol(LL_IOC_LOV_SWAP_LAYOUTS
, ll_i2mdexp(llss
->inode1
),
2020 sizeof(*op_data
), op_data
, NULL
);
2021 ll_finish_md_op_data(op_data
);
2025 ll_put_grouplock(llss
->inode2
, file2
, gid
);
2026 ll_put_grouplock(llss
->inode1
, file1
, gid
);
2029 /* rc can be set from obd_iocontrol() or from a GOTO(putgl, ...) */
2033 /* clear useless flags */
2034 if (!(lsl
->sl_flags
& SWAP_LAYOUTS_KEEP_MTIME
)) {
2035 llss
->ia1
.ia_valid
&= ~ATTR_MTIME
;
2036 llss
->ia2
.ia_valid
&= ~ATTR_MTIME
;
2039 if (!(lsl
->sl_flags
& SWAP_LAYOUTS_KEEP_ATIME
)) {
2040 llss
->ia1
.ia_valid
&= ~ATTR_ATIME
;
2041 llss
->ia2
.ia_valid
&= ~ATTR_ATIME
;
2044 /* update time if requested */
2046 if (llss
->ia2
.ia_valid
!= 0) {
2047 inode_lock(llss
->inode1
);
2048 rc
= ll_setattr(file1
->f_path
.dentry
, &llss
->ia2
);
2049 inode_unlock(llss
->inode1
);
2052 if (llss
->ia1
.ia_valid
!= 0) {
2055 inode_lock(llss
->inode2
);
2056 rc1
= ll_setattr(file2
->f_path
.dentry
, &llss
->ia1
);
2057 inode_unlock(llss
->inode2
);
2068 static int ll_hsm_state_set(struct inode
*inode
, struct hsm_state_set
*hss
)
2070 struct md_op_data
*op_data
;
2073 /* Detect out-of range masks */
2074 if ((hss
->hss_setmask
| hss
->hss_clearmask
) & ~HSM_FLAGS_MASK
)
2077 /* Non-root users are forbidden to set or clear flags which are
2078 * NOT defined in HSM_USER_MASK.
2080 if (((hss
->hss_setmask
| hss
->hss_clearmask
) & ~HSM_USER_MASK
) &&
2081 !capable(CFS_CAP_SYS_ADMIN
))
2084 /* Detect out-of range archive id */
2085 if ((hss
->hss_valid
& HSS_ARCHIVE_ID
) &&
2086 (hss
->hss_archive_id
> LL_HSM_MAX_ARCHIVE
))
2089 op_data
= ll_prep_md_op_data(NULL
, inode
, NULL
, NULL
, 0, 0,
2090 LUSTRE_OPC_ANY
, hss
);
2091 if (IS_ERR(op_data
))
2092 return PTR_ERR(op_data
);
2094 rc
= obd_iocontrol(LL_IOC_HSM_STATE_SET
, ll_i2mdexp(inode
),
2095 sizeof(*op_data
), op_data
, NULL
);
2097 ll_finish_md_op_data(op_data
);
2102 static int ll_hsm_import(struct inode
*inode
, struct file
*file
,
2103 struct hsm_user_import
*hui
)
2105 struct hsm_state_set
*hss
= NULL
;
2106 struct iattr
*attr
= NULL
;
2109 if (!S_ISREG(inode
->i_mode
))
2113 hss
= kzalloc(sizeof(*hss
), GFP_NOFS
);
2117 hss
->hss_valid
= HSS_SETMASK
| HSS_ARCHIVE_ID
;
2118 hss
->hss_archive_id
= hui
->hui_archive_id
;
2119 hss
->hss_setmask
= HS_ARCHIVED
| HS_EXISTS
| HS_RELEASED
;
2120 rc
= ll_hsm_state_set(inode
, hss
);
2124 attr
= kzalloc(sizeof(*attr
), GFP_NOFS
);
2130 attr
->ia_mode
= hui
->hui_mode
& (S_IRWXU
| S_IRWXG
| S_IRWXO
);
2131 attr
->ia_mode
|= S_IFREG
;
2132 attr
->ia_uid
= make_kuid(&init_user_ns
, hui
->hui_uid
);
2133 attr
->ia_gid
= make_kgid(&init_user_ns
, hui
->hui_gid
);
2134 attr
->ia_size
= hui
->hui_size
;
2135 attr
->ia_mtime
.tv_sec
= hui
->hui_mtime
;
2136 attr
->ia_mtime
.tv_nsec
= hui
->hui_mtime_ns
;
2137 attr
->ia_atime
.tv_sec
= hui
->hui_atime
;
2138 attr
->ia_atime
.tv_nsec
= hui
->hui_atime_ns
;
2140 attr
->ia_valid
= ATTR_SIZE
| ATTR_MODE
| ATTR_FORCE
|
2141 ATTR_UID
| ATTR_GID
|
2142 ATTR_MTIME
| ATTR_MTIME_SET
|
2143 ATTR_ATIME
| ATTR_ATIME_SET
;
2147 rc
= ll_setattr_raw(file
->f_path
.dentry
, attr
, true);
2151 inode_unlock(inode
);
2159 static inline long ll_lease_type_from_fmode(fmode_t fmode
)
2161 return ((fmode
& FMODE_READ
) ? LL_LEASE_RDLCK
: 0) |
2162 ((fmode
& FMODE_WRITE
) ? LL_LEASE_WRLCK
: 0);
2166 ll_file_ioctl(struct file
*file
, unsigned int cmd
, unsigned long arg
)
2168 struct inode
*inode
= file_inode(file
);
2169 struct ll_file_data
*fd
= LUSTRE_FPRIVATE(file
);
2172 CDEBUG(D_VFSTRACE
, "VFS Op:inode="DFID
"(%p),cmd=%x\n",
2173 PFID(ll_inode2fid(inode
)), inode
, cmd
);
2174 ll_stats_ops_tally(ll_i2sbi(inode
), LPROC_LL_IOCTL
, 1);
2176 /* asm-ppc{,64} declares TCGETS, et. al. as type 't' not 'T' */
2177 if (_IOC_TYPE(cmd
) == 'T' || _IOC_TYPE(cmd
) == 't') /* tty ioctls */
2181 case LL_IOC_GETFLAGS
:
2182 /* Get the current value of the file flags */
2183 return put_user(fd
->fd_flags
, (int __user
*)arg
);
2184 case LL_IOC_SETFLAGS
:
2185 case LL_IOC_CLRFLAGS
:
2186 /* Set or clear specific file flags */
2187 /* XXX This probably needs checks to ensure the flags are
2188 * not abused, and to handle any flag side effects.
2190 if (get_user(flags
, (int __user
*)arg
))
2193 if (cmd
== LL_IOC_SETFLAGS
) {
2194 if ((flags
& LL_FILE_IGNORE_LOCK
) &&
2195 !(file
->f_flags
& O_DIRECT
)) {
2196 CERROR("%s: unable to disable locking on non-O_DIRECT file\n",
2201 fd
->fd_flags
|= flags
;
2203 fd
->fd_flags
&= ~flags
;
2206 case LL_IOC_LOV_SETSTRIPE
:
2207 return ll_lov_setstripe(inode
, file
, arg
);
2208 case LL_IOC_LOV_SETEA
:
2209 return ll_lov_setea(inode
, file
, arg
);
2210 case LL_IOC_LOV_SWAP_LAYOUTS
: {
2212 struct lustre_swap_layouts lsl
;
2214 if (copy_from_user(&lsl
, (char __user
*)arg
,
2215 sizeof(struct lustre_swap_layouts
)))
2218 if ((file
->f_flags
& O_ACCMODE
) == 0) /* O_RDONLY */
2221 file2
= fget(lsl
.sl_fd
);
2226 if ((file2
->f_flags
& O_ACCMODE
) != 0) /* O_WRONLY or O_RDWR */
2227 rc
= ll_swap_layouts(file
, file2
, &lsl
);
2231 case LL_IOC_LOV_GETSTRIPE
:
2232 return ll_file_getstripe(inode
,
2233 (struct lov_user_md __user
*)arg
);
2234 case FSFILT_IOC_FIEMAP
:
2235 return ll_ioctl_fiemap(inode
, arg
);
2236 case FSFILT_IOC_GETFLAGS
:
2237 case FSFILT_IOC_SETFLAGS
:
2238 return ll_iocontrol(inode
, file
, cmd
, arg
);
2239 case FSFILT_IOC_GETVERSION_OLD
:
2240 case FSFILT_IOC_GETVERSION
:
2241 return put_user(inode
->i_generation
, (int __user
*)arg
);
2242 case LL_IOC_GROUP_LOCK
:
2243 return ll_get_grouplock(inode
, file
, arg
);
2244 case LL_IOC_GROUP_UNLOCK
:
2245 return ll_put_grouplock(inode
, file
, arg
);
2246 case IOC_OBD_STATFS
:
2247 return ll_obd_statfs(inode
, (void __user
*)arg
);
2249 /* We need to special case any other ioctls we want to handle,
2250 * to send them to the MDS/OST as appropriate and to properly
2251 * network encode the arg field.
2252 case FSFILT_IOC_SETVERSION_OLD:
2253 case FSFILT_IOC_SETVERSION:
2255 case LL_IOC_FLUSHCTX
:
2256 return ll_flush_ctx(inode
);
2257 case LL_IOC_PATH2FID
: {
2258 if (copy_to_user((void __user
*)arg
, ll_inode2fid(inode
),
2259 sizeof(struct lu_fid
)))
2264 case LL_IOC_GETPARENT
:
2265 return ll_getparent(file
, (struct getparent __user
*)arg
);
2266 case OBD_IOC_FID2PATH
:
2267 return ll_fid2path(inode
, (void __user
*)arg
);
2268 case LL_IOC_DATA_VERSION
: {
2269 struct ioc_data_version idv
;
2272 if (copy_from_user(&idv
, (char __user
*)arg
, sizeof(idv
)))
2275 idv
.idv_flags
&= LL_DV_RD_FLUSH
| LL_DV_WR_FLUSH
;
2276 rc
= ll_data_version(inode
, &idv
.idv_version
, idv
.idv_flags
);
2277 if (rc
== 0 && copy_to_user((char __user
*)arg
, &idv
,
2284 case LL_IOC_GET_MDTIDX
: {
2287 mdtidx
= ll_get_mdt_idx(inode
);
2291 if (put_user(mdtidx
, (int __user
*)arg
))
2296 case OBD_IOC_GETDTNAME
:
2297 case OBD_IOC_GETMDNAME
:
2298 return ll_get_obd_name(inode
, cmd
, arg
);
2299 case LL_IOC_HSM_STATE_GET
: {
2300 struct md_op_data
*op_data
;
2301 struct hsm_user_state
*hus
;
2304 hus
= kzalloc(sizeof(*hus
), GFP_NOFS
);
2308 op_data
= ll_prep_md_op_data(NULL
, inode
, NULL
, NULL
, 0, 0,
2309 LUSTRE_OPC_ANY
, hus
);
2310 if (IS_ERR(op_data
)) {
2312 return PTR_ERR(op_data
);
2315 rc
= obd_iocontrol(cmd
, ll_i2mdexp(inode
), sizeof(*op_data
),
2318 if (copy_to_user((void __user
*)arg
, hus
, sizeof(*hus
)))
2321 ll_finish_md_op_data(op_data
);
2325 case LL_IOC_HSM_STATE_SET
: {
2326 struct hsm_state_set
*hss
;
2329 hss
= memdup_user((char __user
*)arg
, sizeof(*hss
));
2331 return PTR_ERR(hss
);
2333 rc
= ll_hsm_state_set(inode
, hss
);
2338 case LL_IOC_HSM_ACTION
: {
2339 struct md_op_data
*op_data
;
2340 struct hsm_current_action
*hca
;
2343 hca
= kzalloc(sizeof(*hca
), GFP_NOFS
);
2347 op_data
= ll_prep_md_op_data(NULL
, inode
, NULL
, NULL
, 0, 0,
2348 LUSTRE_OPC_ANY
, hca
);
2349 if (IS_ERR(op_data
)) {
2351 return PTR_ERR(op_data
);
2354 rc
= obd_iocontrol(cmd
, ll_i2mdexp(inode
), sizeof(*op_data
),
2357 if (copy_to_user((char __user
*)arg
, hca
, sizeof(*hca
)))
2360 ll_finish_md_op_data(op_data
);
2364 case LL_IOC_SET_LEASE
: {
2365 struct ll_inode_info
*lli
= ll_i2info(inode
);
2366 struct obd_client_handle
*och
= NULL
;
2371 case LL_LEASE_WRLCK
:
2372 if (!(file
->f_mode
& FMODE_WRITE
))
2374 fmode
= FMODE_WRITE
;
2376 case LL_LEASE_RDLCK
:
2377 if (!(file
->f_mode
& FMODE_READ
))
2381 case LL_LEASE_UNLCK
:
2382 mutex_lock(&lli
->lli_och_mutex
);
2383 if (fd
->fd_lease_och
) {
2384 och
= fd
->fd_lease_och
;
2385 fd
->fd_lease_och
= NULL
;
2387 mutex_unlock(&lli
->lli_och_mutex
);
2392 fmode
= och
->och_flags
;
2393 rc
= ll_lease_close(och
, inode
, &lease_broken
);
2400 return ll_lease_type_from_fmode(fmode
);
2405 CDEBUG(D_INODE
, "Set lease with mode %u\n", fmode
);
2407 /* apply for lease */
2408 och
= ll_lease_open(inode
, file
, fmode
, 0);
2410 return PTR_ERR(och
);
2413 mutex_lock(&lli
->lli_och_mutex
);
2414 if (!fd
->fd_lease_och
) {
2415 fd
->fd_lease_och
= och
;
2418 mutex_unlock(&lli
->lli_och_mutex
);
2420 /* impossible now that only excl is supported for now */
2421 ll_lease_close(och
, inode
, &lease_broken
);
2426 case LL_IOC_GET_LEASE
: {
2427 struct ll_inode_info
*lli
= ll_i2info(inode
);
2428 struct ldlm_lock
*lock
= NULL
;
2431 mutex_lock(&lli
->lli_och_mutex
);
2432 if (fd
->fd_lease_och
) {
2433 struct obd_client_handle
*och
= fd
->fd_lease_och
;
2435 lock
= ldlm_handle2lock(&och
->och_lease_handle
);
2437 lock_res_and_lock(lock
);
2438 if (!ldlm_is_cancel(lock
))
2439 fmode
= och
->och_flags
;
2440 unlock_res_and_lock(lock
);
2441 LDLM_LOCK_PUT(lock
);
2444 mutex_unlock(&lli
->lli_och_mutex
);
2445 return ll_lease_type_from_fmode(fmode
);
2447 case LL_IOC_HSM_IMPORT
: {
2448 struct hsm_user_import
*hui
;
2450 hui
= memdup_user((void __user
*)arg
, sizeof(*hui
));
2452 return PTR_ERR(hui
);
2454 rc
= ll_hsm_import(inode
, file
, hui
);
2462 if (ll_iocontrol_call(inode
, file
, cmd
, arg
, &err
) ==
2466 return obd_iocontrol(cmd
, ll_i2dtexp(inode
), 0, NULL
,
2467 (void __user
*)arg
);
2472 static loff_t
ll_file_seek(struct file
*file
, loff_t offset
, int origin
)
2474 struct inode
*inode
= file_inode(file
);
2475 loff_t retval
, eof
= 0;
2477 retval
= offset
+ ((origin
== SEEK_END
) ? i_size_read(inode
) :
2478 (origin
== SEEK_CUR
) ? file
->f_pos
: 0);
2479 CDEBUG(D_VFSTRACE
, "VFS Op:inode="DFID
"(%p), to=%llu=%#llx(%d)\n",
2480 PFID(ll_inode2fid(inode
)), inode
, retval
, retval
, origin
);
2481 ll_stats_ops_tally(ll_i2sbi(inode
), LPROC_LL_LLSEEK
, 1);
2483 if (origin
== SEEK_END
|| origin
== SEEK_HOLE
|| origin
== SEEK_DATA
) {
2484 retval
= ll_glimpse_size(inode
);
2487 eof
= i_size_read(inode
);
2490 return generic_file_llseek_size(file
, offset
, origin
,
2491 ll_file_maxbytes(inode
), eof
);
2494 static int ll_flush(struct file
*file
, fl_owner_t id
)
2496 struct inode
*inode
= file_inode(file
);
2497 struct ll_inode_info
*lli
= ll_i2info(inode
);
2498 struct ll_file_data
*fd
= LUSTRE_FPRIVATE(file
);
2501 LASSERT(!S_ISDIR(inode
->i_mode
));
2503 /* catch async errors that were recorded back when async writeback
2504 * failed for pages in this mapping.
2506 rc
= lli
->lli_async_rc
;
2507 lli
->lli_async_rc
= 0;
2508 if (lli
->lli_clob
) {
2509 err
= lov_read_and_clear_async_rc(lli
->lli_clob
);
2514 /* The application has been told about write failure already.
2515 * Do not report failure again.
2517 if (fd
->fd_write_failed
)
2519 return rc
? -EIO
: 0;
2523 * Called to make sure a portion of file has been written out.
2524 * if @mode is not CL_FSYNC_LOCAL, it will send OST_SYNC RPCs to OST.
2526 * Return how many pages have been written.
2528 int cl_sync_file_range(struct inode
*inode
, loff_t start
, loff_t end
,
2529 enum cl_fsync_mode mode
, int ignore_layout
)
2531 struct cl_env_nest nest
;
2534 struct cl_fsync_io
*fio
;
2537 if (mode
!= CL_FSYNC_NONE
&& mode
!= CL_FSYNC_LOCAL
&&
2538 mode
!= CL_FSYNC_DISCARD
&& mode
!= CL_FSYNC_ALL
)
2541 env
= cl_env_nested_get(&nest
);
2543 return PTR_ERR(env
);
2545 io
= vvp_env_thread_io(env
);
2546 io
->ci_obj
= ll_i2info(inode
)->lli_clob
;
2547 io
->ci_ignore_layout
= ignore_layout
;
2549 /* initialize parameters for sync */
2550 fio
= &io
->u
.ci_fsync
;
2551 fio
->fi_start
= start
;
2553 fio
->fi_fid
= ll_inode2fid(inode
);
2554 fio
->fi_mode
= mode
;
2555 fio
->fi_nr_written
= 0;
2557 if (cl_io_init(env
, io
, CIT_FSYNC
, io
->ci_obj
) == 0)
2558 result
= cl_io_loop(env
, io
);
2560 result
= io
->ci_result
;
2562 result
= fio
->fi_nr_written
;
2563 cl_io_fini(env
, io
);
2564 cl_env_nested_put(&nest
, env
);
2569 int ll_fsync(struct file
*file
, loff_t start
, loff_t end
, int datasync
)
2571 struct inode
*inode
= file_inode(file
);
2572 struct ll_inode_info
*lli
= ll_i2info(inode
);
2573 struct ptlrpc_request
*req
;
2576 CDEBUG(D_VFSTRACE
, "VFS Op:inode="DFID
"(%p)\n",
2577 PFID(ll_inode2fid(inode
)), inode
);
2578 ll_stats_ops_tally(ll_i2sbi(inode
), LPROC_LL_FSYNC
, 1);
2580 rc
= filemap_write_and_wait_range(inode
->i_mapping
, start
, end
);
2583 /* catch async errors that were recorded back when async writeback
2584 * failed for pages in this mapping.
2586 if (!S_ISDIR(inode
->i_mode
)) {
2587 err
= lli
->lli_async_rc
;
2588 lli
->lli_async_rc
= 0;
2591 err
= lov_read_and_clear_async_rc(lli
->lli_clob
);
2596 err
= md_sync(ll_i2sbi(inode
)->ll_md_exp
, ll_inode2fid(inode
), &req
);
2600 ptlrpc_req_finished(req
);
2602 if (S_ISREG(inode
->i_mode
)) {
2603 struct ll_file_data
*fd
= LUSTRE_FPRIVATE(file
);
2605 err
= cl_sync_file_range(inode
, start
, end
, CL_FSYNC_ALL
, 0);
2606 if (rc
== 0 && err
< 0)
2609 fd
->fd_write_failed
= true;
2611 fd
->fd_write_failed
= false;
2614 inode_unlock(inode
);
2619 ll_file_flock(struct file
*file
, int cmd
, struct file_lock
*file_lock
)
2621 struct inode
*inode
= file_inode(file
);
2622 struct ll_sb_info
*sbi
= ll_i2sbi(inode
);
2623 struct ldlm_enqueue_info einfo
= {
2624 .ei_type
= LDLM_FLOCK
,
2625 .ei_cb_cp
= ldlm_flock_completion_ast
,
2626 .ei_cbdata
= file_lock
,
2628 struct md_op_data
*op_data
;
2629 struct lustre_handle lockh
= {0};
2630 ldlm_policy_data_t flock
= { {0} };
2631 int fl_type
= file_lock
->fl_type
;
2636 CDEBUG(D_VFSTRACE
, "VFS Op:inode="DFID
" file_lock=%p\n",
2637 PFID(ll_inode2fid(inode
)), file_lock
);
2639 ll_stats_ops_tally(ll_i2sbi(inode
), LPROC_LL_FLOCK
, 1);
2641 if (file_lock
->fl_flags
& FL_FLOCK
)
2642 LASSERT((cmd
== F_SETLKW
) || (cmd
== F_SETLK
));
2643 else if (!(file_lock
->fl_flags
& FL_POSIX
))
2646 flock
.l_flock
.owner
= (unsigned long)file_lock
->fl_owner
;
2647 flock
.l_flock
.pid
= file_lock
->fl_pid
;
2648 flock
.l_flock
.start
= file_lock
->fl_start
;
2649 flock
.l_flock
.end
= file_lock
->fl_end
;
2651 /* Somewhat ugly workaround for svc lockd.
2652 * lockd installs custom fl_lmops->lm_compare_owner that checks
2653 * for the fl_owner to be the same (which it always is on local node
2654 * I guess between lockd processes) and then compares pid.
2655 * As such we assign pid to the owner field to make it all work,
2656 * conflict with normal locks is unlikely since pid space and
2657 * pointer space for current->files are not intersecting
2659 if (file_lock
->fl_lmops
&& file_lock
->fl_lmops
->lm_compare_owner
)
2660 flock
.l_flock
.owner
= (unsigned long)file_lock
->fl_pid
;
2664 einfo
.ei_mode
= LCK_PR
;
2667 /* An unlock request may or may not have any relation to
2668 * existing locks so we may not be able to pass a lock handle
2669 * via a normal ldlm_lock_cancel() request. The request may even
2670 * unlock a byte range in the middle of an existing lock. In
2671 * order to process an unlock request we need all of the same
2672 * information that is given with a normal read or write record
2673 * lock request. To avoid creating another ldlm unlock (cancel)
2674 * message we'll treat a LCK_NL flock request as an unlock.
2676 einfo
.ei_mode
= LCK_NL
;
2679 einfo
.ei_mode
= LCK_PW
;
2682 CDEBUG(D_INFO
, "Unknown fcntl lock type: %d\n", fl_type
);
2697 flags
= LDLM_FL_BLOCK_NOWAIT
;
2703 flags
= LDLM_FL_TEST_LOCK
;
2706 CERROR("unknown fcntl lock command: %d\n", cmd
);
2711 * Save the old mode so that if the mode in the lock changes we
2712 * can decrement the appropriate reader or writer refcount.
2714 file_lock
->fl_type
= einfo
.ei_mode
;
2716 op_data
= ll_prep_md_op_data(NULL
, inode
, NULL
, NULL
, 0, 0,
2717 LUSTRE_OPC_ANY
, NULL
);
2718 if (IS_ERR(op_data
))
2719 return PTR_ERR(op_data
);
2721 CDEBUG(D_DLMTRACE
, "inode="DFID
", pid=%u, flags=%#llx, mode=%u, start=%llu, end=%llu\n",
2722 PFID(ll_inode2fid(inode
)), flock
.l_flock
.pid
, flags
,
2723 einfo
.ei_mode
, flock
.l_flock
.start
, flock
.l_flock
.end
);
2725 rc
= md_enqueue(sbi
->ll_md_exp
, &einfo
, &flock
, NULL
, op_data
, &lockh
,
2728 /* Restore the file lock type if not TEST lock. */
2729 if (!(flags
& LDLM_FL_TEST_LOCK
))
2730 file_lock
->fl_type
= fl_type
;
2732 if ((rc
== 0 || file_lock
->fl_type
== F_UNLCK
) &&
2733 !(flags
& LDLM_FL_TEST_LOCK
))
2734 rc2
= locks_lock_file_wait(file
, file_lock
);
2736 if (rc2
&& file_lock
->fl_type
!= F_UNLCK
) {
2737 einfo
.ei_mode
= LCK_NL
;
2738 md_enqueue(sbi
->ll_md_exp
, &einfo
, &flock
, NULL
, op_data
,
2743 ll_finish_md_op_data(op_data
);
2748 int ll_get_fid_by_name(struct inode
*parent
, const char *name
,
2749 int namelen
, struct lu_fid
*fid
)
2751 struct md_op_data
*op_data
= NULL
;
2752 struct ptlrpc_request
*req
;
2753 struct mdt_body
*body
;
2756 op_data
= ll_prep_md_op_data(NULL
, parent
, NULL
, name
, namelen
, 0,
2757 LUSTRE_OPC_ANY
, NULL
);
2758 if (IS_ERR(op_data
))
2759 return PTR_ERR(op_data
);
2761 op_data
->op_valid
= OBD_MD_FLID
;
2762 rc
= md_getattr_name(ll_i2sbi(parent
)->ll_md_exp
, op_data
, &req
);
2763 ll_finish_md_op_data(op_data
);
2767 body
= req_capsule_server_get(&req
->rq_pill
, &RMF_MDT_BODY
);
2773 *fid
= body
->mbo_fid1
;
2775 ptlrpc_req_finished(req
);
2779 int ll_migrate(struct inode
*parent
, struct file
*file
, int mdtidx
,
2780 const char *name
, int namelen
)
2782 struct ptlrpc_request
*request
= NULL
;
2783 struct inode
*child_inode
= NULL
;
2784 struct dentry
*dchild
= NULL
;
2785 struct md_op_data
*op_data
;
2789 CDEBUG(D_VFSTRACE
, "migrate %s under "DFID
" to MDT%d\n",
2790 name
, PFID(ll_inode2fid(parent
)), mdtidx
);
2792 op_data
= ll_prep_md_op_data(NULL
, parent
, NULL
, name
, namelen
,
2793 0, LUSTRE_OPC_ANY
, NULL
);
2794 if (IS_ERR(op_data
))
2795 return PTR_ERR(op_data
);
2797 /* Get child FID first */
2798 qstr
.hash
= full_name_hash(parent
, name
, namelen
);
2801 dchild
= d_lookup(file_dentry(file
), &qstr
);
2803 op_data
->op_fid3
= *ll_inode2fid(dchild
->d_inode
);
2804 if (dchild
->d_inode
) {
2805 child_inode
= igrab(dchild
->d_inode
);
2807 inode_lock(child_inode
);
2808 op_data
->op_fid3
= *ll_inode2fid(child_inode
);
2809 ll_invalidate_aliases(child_inode
);
2814 rc
= ll_get_fid_by_name(parent
, name
, namelen
,
2820 if (!fid_is_sane(&op_data
->op_fid3
)) {
2821 CERROR("%s: migrate %s, but fid "DFID
" is insane\n",
2822 ll_get_fsname(parent
->i_sb
, NULL
, 0), name
,
2823 PFID(&op_data
->op_fid3
));
2828 rc
= ll_get_mdt_idx_by_fid(ll_i2sbi(parent
), &op_data
->op_fid3
);
2833 CDEBUG(D_INFO
, "%s:"DFID
" is already on MDT%d.\n", name
,
2834 PFID(&op_data
->op_fid3
), mdtidx
);
2839 op_data
->op_mds
= mdtidx
;
2840 op_data
->op_cli_flags
= CLI_MIGRATE
;
2841 rc
= md_rename(ll_i2sbi(parent
)->ll_md_exp
, op_data
, name
,
2842 namelen
, name
, namelen
, &request
);
2844 ll_update_times(request
, parent
);
2846 ptlrpc_req_finished(request
);
2850 clear_nlink(child_inode
);
2851 inode_unlock(child_inode
);
2855 ll_finish_md_op_data(op_data
);
2860 ll_file_noflock(struct file
*file
, int cmd
, struct file_lock
*file_lock
)
2866 * test if some locks matching bits and l_req_mode are acquired
2867 * - bits can be in different locks
2868 * - if found clear the common lock bits in *bits
2869 * - the bits not found, are kept in *bits
2871 * \param bits [IN] searched lock bits [IN]
2872 * \param l_req_mode [IN] searched lock mode
2873 * \retval boolean, true iff all bits are found
2875 int ll_have_md_lock(struct inode
*inode
, __u64
*bits
,
2876 enum ldlm_mode l_req_mode
)
2878 struct lustre_handle lockh
;
2879 ldlm_policy_data_t policy
;
2880 enum ldlm_mode mode
= (l_req_mode
== LCK_MINMODE
) ?
2881 (LCK_CR
| LCK_CW
| LCK_PR
| LCK_PW
) : l_req_mode
;
2889 fid
= &ll_i2info(inode
)->lli_fid
;
2890 CDEBUG(D_INFO
, "trying to match res "DFID
" mode %s\n", PFID(fid
),
2891 ldlm_lockname
[mode
]);
2893 flags
= LDLM_FL_BLOCK_GRANTED
| LDLM_FL_CBPENDING
| LDLM_FL_TEST_LOCK
;
2894 for (i
= 0; i
<= MDS_INODELOCK_MAXSHIFT
&& *bits
!= 0; i
++) {
2895 policy
.l_inodebits
.bits
= *bits
& (1 << i
);
2896 if (policy
.l_inodebits
.bits
== 0)
2899 if (md_lock_match(ll_i2mdexp(inode
), flags
, fid
, LDLM_IBITS
,
2900 &policy
, mode
, &lockh
)) {
2901 struct ldlm_lock
*lock
;
2903 lock
= ldlm_handle2lock(&lockh
);
2906 ~(lock
->l_policy_data
.l_inodebits
.bits
);
2907 LDLM_LOCK_PUT(lock
);
2909 *bits
&= ~policy
.l_inodebits
.bits
;
2916 enum ldlm_mode
ll_take_md_lock(struct inode
*inode
, __u64 bits
,
2917 struct lustre_handle
*lockh
, __u64 flags
,
2918 enum ldlm_mode mode
)
2920 ldlm_policy_data_t policy
= { .l_inodebits
= {bits
} };
2923 fid
= &ll_i2info(inode
)->lli_fid
;
2924 CDEBUG(D_INFO
, "trying to match res "DFID
"\n", PFID(fid
));
2926 return md_lock_match(ll_i2mdexp(inode
), flags
| LDLM_FL_BLOCK_GRANTED
,
2927 fid
, LDLM_IBITS
, &policy
, mode
, lockh
);
2930 static int ll_inode_revalidate_fini(struct inode
*inode
, int rc
)
2932 /* Already unlinked. Just update nlink and return success */
2933 if (rc
== -ENOENT
) {
2935 /* This path cannot be hit for regular files unless in
2936 * case of obscure races, so no need to validate size.
2938 if (!S_ISREG(inode
->i_mode
) && !S_ISDIR(inode
->i_mode
))
2940 } else if (rc
!= 0) {
2941 CDEBUG_LIMIT((rc
== -EACCES
|| rc
== -EIDRM
) ? D_INFO
: D_ERROR
,
2942 "%s: revalidate FID "DFID
" error: rc = %d\n",
2943 ll_get_fsname(inode
->i_sb
, NULL
, 0),
2944 PFID(ll_inode2fid(inode
)), rc
);
2950 static int __ll_inode_revalidate(struct dentry
*dentry
, __u64 ibits
)
2952 struct inode
*inode
= d_inode(dentry
);
2953 struct ptlrpc_request
*req
= NULL
;
2954 struct obd_export
*exp
;
2957 CDEBUG(D_VFSTRACE
, "VFS Op:inode="DFID
"(%p),name=%pd\n",
2958 PFID(ll_inode2fid(inode
)), inode
, dentry
);
2960 exp
= ll_i2mdexp(inode
);
2962 /* XXX: Enable OBD_CONNECT_ATTRFID to reduce unnecessary getattr RPC.
2963 * But under CMD case, it caused some lock issues, should be fixed
2964 * with new CMD ibits lock. See bug 12718
2966 if (exp_connect_flags(exp
) & OBD_CONNECT_ATTRFID
) {
2967 struct lookup_intent oit
= { .it_op
= IT_GETATTR
};
2968 struct md_op_data
*op_data
;
2970 if (ibits
== MDS_INODELOCK_LOOKUP
)
2971 oit
.it_op
= IT_LOOKUP
;
2973 /* Call getattr by fid, so do not provide name at all. */
2974 op_data
= ll_prep_md_op_data(NULL
, inode
,
2976 LUSTRE_OPC_ANY
, NULL
);
2977 if (IS_ERR(op_data
))
2978 return PTR_ERR(op_data
);
2980 rc
= md_intent_lock(exp
, op_data
, &oit
, &req
,
2981 &ll_md_blocking_ast
, 0);
2982 ll_finish_md_op_data(op_data
);
2984 rc
= ll_inode_revalidate_fini(inode
, rc
);
2988 rc
= ll_revalidate_it_finish(req
, &oit
, inode
);
2990 ll_intent_release(&oit
);
2994 /* Unlinked? Unhash dentry, so it is not picked up later by
2995 * do_lookup() -> ll_revalidate_it(). We cannot use d_drop
2996 * here to preserve get_cwd functionality on 2.6.
2999 if (!d_inode(dentry
)->i_nlink
) {
3000 spin_lock(&inode
->i_lock
);
3001 d_lustre_invalidate(dentry
, 0);
3002 spin_unlock(&inode
->i_lock
);
3005 ll_lookup_finish_locks(&oit
, inode
);
3006 } else if (!ll_have_md_lock(d_inode(dentry
), &ibits
, LCK_MINMODE
)) {
3007 struct ll_sb_info
*sbi
= ll_i2sbi(d_inode(dentry
));
3008 u64 valid
= OBD_MD_FLGETATTR
;
3009 struct md_op_data
*op_data
;
3012 if (S_ISREG(inode
->i_mode
)) {
3013 rc
= ll_get_default_mdsize(sbi
, &ealen
);
3016 valid
|= OBD_MD_FLEASIZE
| OBD_MD_FLMODEASIZE
;
3019 op_data
= ll_prep_md_op_data(NULL
, inode
, NULL
, NULL
,
3020 0, ealen
, LUSTRE_OPC_ANY
,
3022 if (IS_ERR(op_data
))
3023 return PTR_ERR(op_data
);
3025 op_data
->op_valid
= valid
;
3026 rc
= md_getattr(sbi
->ll_md_exp
, op_data
, &req
);
3027 ll_finish_md_op_data(op_data
);
3029 return ll_inode_revalidate_fini(inode
, rc
);
3031 rc
= ll_prep_inode(&inode
, req
, NULL
, NULL
);
3034 ptlrpc_req_finished(req
);
3038 static int ll_merge_md_attr(struct inode
*inode
)
3040 struct cl_attr attr
= { 0 };
3043 LASSERT(ll_i2info(inode
)->lli_lsm_md
);
3044 rc
= md_merge_attr(ll_i2mdexp(inode
), ll_i2info(inode
)->lli_lsm_md
,
3045 &attr
, ll_md_blocking_ast
);
3049 set_nlink(inode
, attr
.cat_nlink
);
3050 inode
->i_blocks
= attr
.cat_blocks
;
3051 i_size_write(inode
, attr
.cat_size
);
3053 ll_i2info(inode
)->lli_atime
= attr
.cat_atime
;
3054 ll_i2info(inode
)->lli_mtime
= attr
.cat_mtime
;
3055 ll_i2info(inode
)->lli_ctime
= attr
.cat_ctime
;
3060 static int ll_inode_revalidate(struct dentry
*dentry
, __u64 ibits
)
3062 struct inode
*inode
= d_inode(dentry
);
3065 rc
= __ll_inode_revalidate(dentry
, ibits
);
3069 /* if object isn't regular file, don't validate size */
3070 if (!S_ISREG(inode
->i_mode
)) {
3071 if (S_ISDIR(inode
->i_mode
) &&
3072 ll_i2info(inode
)->lli_lsm_md
) {
3073 rc
= ll_merge_md_attr(inode
);
3078 LTIME_S(inode
->i_atime
) = ll_i2info(inode
)->lli_atime
;
3079 LTIME_S(inode
->i_mtime
) = ll_i2info(inode
)->lli_mtime
;
3080 LTIME_S(inode
->i_ctime
) = ll_i2info(inode
)->lli_ctime
;
3082 /* In case of restore, the MDT has the right size and has
3083 * already send it back without granting the layout lock,
3084 * inode is up-to-date so glimpse is useless.
3085 * Also to glimpse we need the layout, in case of a running
3086 * restore the MDT holds the layout lock so the glimpse will
3087 * block up to the end of restore (getattr will block)
3089 if (!(ll_i2info(inode
)->lli_flags
& LLIF_FILE_RESTORING
))
3090 rc
= ll_glimpse_size(inode
);
3095 int ll_getattr(struct vfsmount
*mnt
, struct dentry
*de
, struct kstat
*stat
)
3097 struct inode
*inode
= d_inode(de
);
3098 struct ll_sb_info
*sbi
= ll_i2sbi(inode
);
3099 struct ll_inode_info
*lli
= ll_i2info(inode
);
3102 res
= ll_inode_revalidate(de
, MDS_INODELOCK_UPDATE
|
3103 MDS_INODELOCK_LOOKUP
);
3104 ll_stats_ops_tally(sbi
, LPROC_LL_GETATTR
, 1);
3109 OBD_FAIL_TIMEOUT(OBD_FAIL_GETATTR_DELAY
, 30);
3111 stat
->dev
= inode
->i_sb
->s_dev
;
3112 if (ll_need_32bit_api(sbi
))
3113 stat
->ino
= cl_fid_build_ino(&lli
->lli_fid
, 1);
3115 stat
->ino
= inode
->i_ino
;
3116 stat
->mode
= inode
->i_mode
;
3117 stat
->uid
= inode
->i_uid
;
3118 stat
->gid
= inode
->i_gid
;
3119 stat
->rdev
= inode
->i_rdev
;
3120 stat
->atime
= inode
->i_atime
;
3121 stat
->mtime
= inode
->i_mtime
;
3122 stat
->ctime
= inode
->i_ctime
;
3123 stat
->blksize
= 1 << inode
->i_blkbits
;
3125 stat
->nlink
= inode
->i_nlink
;
3126 stat
->size
= i_size_read(inode
);
3127 stat
->blocks
= inode
->i_blocks
;
3132 static int ll_fiemap(struct inode
*inode
, struct fiemap_extent_info
*fieinfo
,
3133 __u64 start
, __u64 len
)
3137 struct ll_user_fiemap
*fiemap
;
3138 unsigned int extent_count
= fieinfo
->fi_extents_max
;
3140 num_bytes
= sizeof(*fiemap
) + (extent_count
*
3141 sizeof(struct ll_fiemap_extent
));
3142 fiemap
= libcfs_kvzalloc(num_bytes
, GFP_NOFS
);
3147 fiemap
->fm_flags
= fieinfo
->fi_flags
;
3148 fiemap
->fm_extent_count
= fieinfo
->fi_extents_max
;
3149 fiemap
->fm_start
= start
;
3150 fiemap
->fm_length
= len
;
3151 if (extent_count
> 0 &&
3152 copy_from_user(&fiemap
->fm_extents
[0], fieinfo
->fi_extents_start
,
3153 sizeof(struct ll_fiemap_extent
)) != 0) {
3158 rc
= ll_do_fiemap(inode
, fiemap
, num_bytes
);
3160 fieinfo
->fi_flags
= fiemap
->fm_flags
;
3161 fieinfo
->fi_extents_mapped
= fiemap
->fm_mapped_extents
;
3162 if (extent_count
> 0 &&
3163 copy_to_user(fieinfo
->fi_extents_start
, &fiemap
->fm_extents
[0],
3164 fiemap
->fm_mapped_extents
*
3165 sizeof(struct ll_fiemap_extent
)) != 0) {
3175 struct posix_acl
*ll_get_acl(struct inode
*inode
, int type
)
3177 struct ll_inode_info
*lli
= ll_i2info(inode
);
3178 struct posix_acl
*acl
= NULL
;
3180 spin_lock(&lli
->lli_lock
);
3181 /* VFS' acl_permission_check->check_acl will release the refcount */
3182 acl
= posix_acl_dup(lli
->lli_posix_acl
);
3183 #ifdef CONFIG_FS_POSIX_ACL
3184 forget_cached_acl(inode
, type
);
3186 spin_unlock(&lli
->lli_lock
);
3191 int ll_inode_permission(struct inode
*inode
, int mask
)
3193 struct ll_sb_info
*sbi
;
3194 struct root_squash_info
*squash
;
3195 const struct cred
*old_cred
= NULL
;
3196 struct cred
*cred
= NULL
;
3197 bool squash_id
= false;
3201 if (mask
& MAY_NOT_BLOCK
)
3204 /* as root inode are NOT getting validated in lookup operation,
3205 * need to do it before permission check.
3208 if (is_root_inode(inode
)) {
3209 rc
= __ll_inode_revalidate(inode
->i_sb
->s_root
,
3210 MDS_INODELOCK_LOOKUP
);
3215 CDEBUG(D_VFSTRACE
, "VFS Op:inode="DFID
"(%p), inode mode %x mask %o\n",
3216 PFID(ll_inode2fid(inode
)), inode
, inode
->i_mode
, mask
);
3218 /* squash fsuid/fsgid if needed */
3219 sbi
= ll_i2sbi(inode
);
3220 squash
= &sbi
->ll_squash
;
3221 if (unlikely(squash
->rsi_uid
&&
3222 uid_eq(current_fsuid(), GLOBAL_ROOT_UID
) &&
3223 !(sbi
->ll_flags
& LL_SBI_NOROOTSQUASH
))) {
3228 CDEBUG(D_OTHER
, "squash creds (%d:%d)=>(%d:%d)\n",
3229 __kuid_val(current_fsuid()), __kgid_val(current_fsgid()),
3230 squash
->rsi_uid
, squash
->rsi_gid
);
3233 * update current process's credentials
3236 cred
= prepare_creds();
3240 cred
->fsuid
= make_kuid(&init_user_ns
, squash
->rsi_uid
);
3241 cred
->fsgid
= make_kgid(&init_user_ns
, squash
->rsi_gid
);
3242 for (cap
= 0; cap
< sizeof(cfs_cap_t
) * 8; cap
++) {
3243 if ((1 << cap
) & CFS_CAP_FS_MASK
)
3244 cap_lower(cred
->cap_effective
, cap
);
3246 old_cred
= override_creds(cred
);
3249 ll_stats_ops_tally(ll_i2sbi(inode
), LPROC_LL_INODE_PERM
, 1);
3250 rc
= generic_permission(inode
, mask
);
3252 /* restore current process's credentials and FS capability */
3254 revert_creds(old_cred
);
3261 /* -o localflock - only provides locally consistent flock locks */
3262 struct file_operations ll_file_operations
= {
3263 .read_iter
= ll_file_read_iter
,
3264 .write_iter
= ll_file_write_iter
,
3265 .unlocked_ioctl
= ll_file_ioctl
,
3266 .open
= ll_file_open
,
3267 .release
= ll_file_release
,
3268 .mmap
= ll_file_mmap
,
3269 .llseek
= ll_file_seek
,
3270 .splice_read
= ll_file_splice_read
,
3275 struct file_operations ll_file_operations_flock
= {
3276 .read_iter
= ll_file_read_iter
,
3277 .write_iter
= ll_file_write_iter
,
3278 .unlocked_ioctl
= ll_file_ioctl
,
3279 .open
= ll_file_open
,
3280 .release
= ll_file_release
,
3281 .mmap
= ll_file_mmap
,
3282 .llseek
= ll_file_seek
,
3283 .splice_read
= ll_file_splice_read
,
3286 .flock
= ll_file_flock
,
3287 .lock
= ll_file_flock
3290 /* These are for -o noflock - to return ENOSYS on flock calls */
3291 struct file_operations ll_file_operations_noflock
= {
3292 .read_iter
= ll_file_read_iter
,
3293 .write_iter
= ll_file_write_iter
,
3294 .unlocked_ioctl
= ll_file_ioctl
,
3295 .open
= ll_file_open
,
3296 .release
= ll_file_release
,
3297 .mmap
= ll_file_mmap
,
3298 .llseek
= ll_file_seek
,
3299 .splice_read
= ll_file_splice_read
,
3302 .flock
= ll_file_noflock
,
3303 .lock
= ll_file_noflock
3306 const struct inode_operations ll_file_inode_operations
= {
3307 .setattr
= ll_setattr
,
3308 .getattr
= ll_getattr
,
3309 .permission
= ll_inode_permission
,
3310 .setxattr
= generic_setxattr
,
3311 .getxattr
= generic_getxattr
,
3312 .listxattr
= ll_listxattr
,
3313 .removexattr
= generic_removexattr
,
3314 .fiemap
= ll_fiemap
,
3315 .get_acl
= ll_get_acl
,
3318 /* dynamic ioctl number support routines */
3319 static struct llioc_ctl_data
{
3320 struct rw_semaphore ioc_sem
;
3321 struct list_head ioc_head
;
3323 __RWSEM_INITIALIZER(llioc
.ioc_sem
),
3324 LIST_HEAD_INIT(llioc
.ioc_head
)
3328 struct list_head iocd_list
;
3329 unsigned int iocd_size
;
3330 llioc_callback_t iocd_cb
;
3331 unsigned int iocd_count
;
3332 unsigned int iocd_cmd
[0];
3335 void *ll_iocontrol_register(llioc_callback_t cb
, int count
, unsigned int *cmd
)
3338 struct llioc_data
*in_data
= NULL
;
3340 if (!cb
|| !cmd
|| count
> LLIOC_MAX_CMD
|| count
< 0)
3343 size
= sizeof(*in_data
) + count
* sizeof(unsigned int);
3344 in_data
= kzalloc(size
, GFP_NOFS
);
3348 in_data
->iocd_size
= size
;
3349 in_data
->iocd_cb
= cb
;
3350 in_data
->iocd_count
= count
;
3351 memcpy(in_data
->iocd_cmd
, cmd
, sizeof(unsigned int) * count
);
3353 down_write(&llioc
.ioc_sem
);
3354 list_add_tail(&in_data
->iocd_list
, &llioc
.ioc_head
);
3355 up_write(&llioc
.ioc_sem
);
3359 EXPORT_SYMBOL(ll_iocontrol_register
);
3361 void ll_iocontrol_unregister(void *magic
)
3363 struct llioc_data
*tmp
;
3368 down_write(&llioc
.ioc_sem
);
3369 list_for_each_entry(tmp
, &llioc
.ioc_head
, iocd_list
) {
3371 list_del(&tmp
->iocd_list
);
3372 up_write(&llioc
.ioc_sem
);
3378 up_write(&llioc
.ioc_sem
);
3380 CWARN("didn't find iocontrol register block with magic: %p\n", magic
);
3382 EXPORT_SYMBOL(ll_iocontrol_unregister
);
3384 static enum llioc_iter
3385 ll_iocontrol_call(struct inode
*inode
, struct file
*file
,
3386 unsigned int cmd
, unsigned long arg
, int *rcp
)
3388 enum llioc_iter ret
= LLIOC_CONT
;
3389 struct llioc_data
*data
;
3390 int rc
= -EINVAL
, i
;
3392 down_read(&llioc
.ioc_sem
);
3393 list_for_each_entry(data
, &llioc
.ioc_head
, iocd_list
) {
3394 for (i
= 0; i
< data
->iocd_count
; i
++) {
3395 if (cmd
!= data
->iocd_cmd
[i
])
3398 ret
= data
->iocd_cb(inode
, file
, cmd
, arg
, data
, &rc
);
3402 if (ret
== LLIOC_STOP
)
3405 up_read(&llioc
.ioc_sem
);
3412 int ll_layout_conf(struct inode
*inode
, const struct cl_object_conf
*conf
)
3414 struct ll_inode_info
*lli
= ll_i2info(inode
);
3415 struct cl_env_nest nest
;
3422 env
= cl_env_nested_get(&nest
);
3424 return PTR_ERR(env
);
3426 result
= cl_conf_set(env
, lli
->lli_clob
, conf
);
3427 cl_env_nested_put(&nest
, env
);
3429 if (conf
->coc_opc
== OBJECT_CONF_SET
) {
3430 struct ldlm_lock
*lock
= conf
->coc_lock
;
3433 LASSERT(ldlm_has_layout(lock
));
3435 /* it can only be allowed to match after layout is
3436 * applied to inode otherwise false layout would be
3437 * seen. Applying layout should happen before dropping
3440 ldlm_lock_allow_match(lock
);
3446 /* Fetch layout from MDT with getxattr request, if it's not ready yet */
3447 static int ll_layout_fetch(struct inode
*inode
, struct ldlm_lock
*lock
)
3450 struct ll_sb_info
*sbi
= ll_i2sbi(inode
);
3451 struct ptlrpc_request
*req
;
3452 struct mdt_body
*body
;
3458 CDEBUG(D_INODE
, DFID
" LVB_READY=%d l_lvb_data=%p l_lvb_len=%d\n",
3459 PFID(ll_inode2fid(inode
)), ldlm_is_lvb_ready(lock
),
3460 lock
->l_lvb_data
, lock
->l_lvb_len
);
3462 if (lock
->l_lvb_data
&& ldlm_is_lvb_ready(lock
))
3465 /* if layout lock was granted right away, the layout is returned
3466 * within DLM_LVB of dlm reply; otherwise if the lock was ever
3467 * blocked and then granted via completion ast, we have to fetch
3468 * layout here. Please note that we can't use the LVB buffer in
3469 * completion AST because it doesn't have a large enough buffer
3471 rc
= ll_get_default_mdsize(sbi
, &lmmsize
);
3473 rc
= md_getxattr(sbi
->ll_md_exp
, ll_inode2fid(inode
),
3474 OBD_MD_FLXATTR
, XATTR_NAME_LOV
, NULL
, 0,
3479 body
= req_capsule_server_get(&req
->rq_pill
, &RMF_MDT_BODY
);
3485 lmmsize
= body
->mbo_eadatasize
;
3486 if (lmmsize
== 0) /* empty layout */ {
3491 lmm
= req_capsule_server_sized_get(&req
->rq_pill
, &RMF_EADATA
, lmmsize
);
3497 lvbdata
= libcfs_kvzalloc(lmmsize
, GFP_NOFS
);
3503 memcpy(lvbdata
, lmm
, lmmsize
);
3504 lock_res_and_lock(lock
);
3505 if (lock
->l_lvb_data
)
3506 kvfree(lock
->l_lvb_data
);
3508 lock
->l_lvb_data
= lvbdata
;
3509 lock
->l_lvb_len
= lmmsize
;
3510 unlock_res_and_lock(lock
);
3513 ptlrpc_req_finished(req
);
3518 * Apply the layout to the inode. Layout lock is held and will be released
3521 static int ll_layout_lock_set(struct lustre_handle
*lockh
, enum ldlm_mode mode
,
3522 struct inode
*inode
, __u32
*gen
, bool reconf
)
3524 struct ll_inode_info
*lli
= ll_i2info(inode
);
3525 struct ll_sb_info
*sbi
= ll_i2sbi(inode
);
3526 struct ldlm_lock
*lock
;
3527 struct lustre_md md
= { NULL
};
3528 struct cl_object_conf conf
;
3531 bool wait_layout
= false;
3533 LASSERT(lustre_handle_is_used(lockh
));
3535 lock
= ldlm_handle2lock(lockh
);
3537 LASSERT(ldlm_has_layout(lock
));
3539 LDLM_DEBUG(lock
, "File "DFID
"(%p) being reconfigured: %d",
3540 PFID(&lli
->lli_fid
), inode
, reconf
);
3542 /* in case this is a caching lock and reinstate with new inode */
3543 md_set_lock_data(sbi
->ll_md_exp
, lockh
, inode
, NULL
);
3545 lock_res_and_lock(lock
);
3546 lvb_ready
= ldlm_is_lvb_ready(lock
);
3547 unlock_res_and_lock(lock
);
3548 /* checking lvb_ready is racy but this is okay. The worst case is
3549 * that multi processes may configure the file on the same time.
3551 if (lvb_ready
|| !reconf
) {
3554 /* layout_gen must be valid if layout lock is not
3555 * cancelled and stripe has already set
3557 *gen
= ll_layout_version_get(lli
);
3563 rc
= ll_layout_fetch(inode
, lock
);
3567 /* for layout lock, lmm is returned in lock's lvb.
3568 * lvb_data is immutable if the lock is held so it's safe to access it
3569 * without res lock. See the description in ldlm_lock_decref_internal()
3570 * for the condition to free lvb_data of layout lock
3572 if (lock
->l_lvb_data
) {
3573 rc
= obd_unpackmd(sbi
->ll_dt_exp
, &md
.lsm
,
3574 lock
->l_lvb_data
, lock
->l_lvb_len
);
3576 *gen
= LL_LAYOUT_GEN_EMPTY
;
3578 *gen
= md
.lsm
->lsm_layout_gen
;
3581 CERROR("%s: file " DFID
" unpackmd error: %d\n",
3582 ll_get_fsname(inode
->i_sb
, NULL
, 0),
3583 PFID(&lli
->lli_fid
), rc
);
3589 /* set layout to file. Unlikely this will fail as old layout was
3592 memset(&conf
, 0, sizeof(conf
));
3593 conf
.coc_opc
= OBJECT_CONF_SET
;
3594 conf
.coc_inode
= inode
;
3595 conf
.coc_lock
= lock
;
3596 conf
.u
.coc_md
= &md
;
3597 rc
= ll_layout_conf(inode
, &conf
);
3600 obd_free_memmd(sbi
->ll_dt_exp
, &md
.lsm
);
3602 /* refresh layout failed, need to wait */
3603 wait_layout
= rc
== -EBUSY
;
3606 LDLM_LOCK_PUT(lock
);
3607 ldlm_lock_decref(lockh
, mode
);
3609 /* wait for IO to complete if it's still being used. */
3611 CDEBUG(D_INODE
, "%s: "DFID
"(%p) wait for layout reconf\n",
3612 ll_get_fsname(inode
->i_sb
, NULL
, 0),
3613 PFID(&lli
->lli_fid
), inode
);
3615 memset(&conf
, 0, sizeof(conf
));
3616 conf
.coc_opc
= OBJECT_CONF_WAIT
;
3617 conf
.coc_inode
= inode
;
3618 rc
= ll_layout_conf(inode
, &conf
);
3622 CDEBUG(D_INODE
, "%s: file="DFID
" waiting layout return: %d.\n",
3623 ll_get_fsname(inode
->i_sb
, NULL
, 0),
3624 PFID(&lli
->lli_fid
), rc
);
3630 * This function checks if there exists a LAYOUT lock on the client side,
3631 * or enqueues it if it doesn't have one in cache.
3633 * This function will not hold layout lock so it may be revoked any time after
3634 * this function returns. Any operations depend on layout should be redone
3637 * This function should be called before lov_io_init() to get an uptodate
3638 * layout version, the caller should save the version number and after IO
3639 * is finished, this function should be called again to verify that layout
3640 * is not changed during IO time.
3642 int ll_layout_refresh(struct inode
*inode
, __u32
*gen
)
3644 struct ll_inode_info
*lli
= ll_i2info(inode
);
3645 struct ll_sb_info
*sbi
= ll_i2sbi(inode
);
3646 struct md_op_data
*op_data
;
3647 struct lookup_intent it
;
3648 struct lustre_handle lockh
;
3649 enum ldlm_mode mode
;
3650 struct ldlm_enqueue_info einfo
= {
3651 .ei_type
= LDLM_IBITS
,
3653 .ei_cb_bl
= &ll_md_blocking_ast
,
3654 .ei_cb_cp
= &ldlm_completion_ast
,
3658 *gen
= ll_layout_version_get(lli
);
3659 if (!(sbi
->ll_flags
& LL_SBI_LAYOUT_LOCK
) || *gen
!= LL_LAYOUT_GEN_NONE
)
3663 LASSERT(fid_is_sane(ll_inode2fid(inode
)));
3664 LASSERT(S_ISREG(inode
->i_mode
));
3666 /* take layout lock mutex to enqueue layout lock exclusively. */
3667 mutex_lock(&lli
->lli_layout_mutex
);
3670 /* mostly layout lock is caching on the local side, so try to match
3671 * it before grabbing layout lock mutex.
3673 mode
= ll_take_md_lock(inode
, MDS_INODELOCK_LAYOUT
, &lockh
, 0,
3674 LCK_CR
| LCK_CW
| LCK_PR
| LCK_PW
);
3675 if (mode
!= 0) { /* hit cached lock */
3676 rc
= ll_layout_lock_set(&lockh
, mode
, inode
, gen
, true);
3680 mutex_unlock(&lli
->lli_layout_mutex
);
3684 op_data
= ll_prep_md_op_data(NULL
, inode
, inode
, NULL
,
3685 0, 0, LUSTRE_OPC_ANY
, NULL
);
3686 if (IS_ERR(op_data
)) {
3687 mutex_unlock(&lli
->lli_layout_mutex
);
3688 return PTR_ERR(op_data
);
3691 /* have to enqueue one */
3692 memset(&it
, 0, sizeof(it
));
3693 it
.it_op
= IT_LAYOUT
;
3694 lockh
.cookie
= 0ULL;
3696 LDLM_DEBUG_NOLOCK("%s: requeue layout lock for file "DFID
"(%p)",
3697 ll_get_fsname(inode
->i_sb
, NULL
, 0),
3698 PFID(&lli
->lli_fid
), inode
);
3700 rc
= md_enqueue(sbi
->ll_md_exp
, &einfo
, NULL
, &it
, op_data
, &lockh
, 0);
3701 ptlrpc_req_finished(it
.it_request
);
3702 it
.it_request
= NULL
;
3704 ll_finish_md_op_data(op_data
);
3706 mode
= it
.it_lock_mode
;
3707 it
.it_lock_mode
= 0;
3708 ll_intent_drop_lock(&it
);
3711 /* set lock data in case this is a new lock */
3712 ll_set_lock_data(sbi
->ll_md_exp
, inode
, &it
, NULL
);
3713 rc
= ll_layout_lock_set(&lockh
, mode
, inode
, gen
, true);
3717 mutex_unlock(&lli
->lli_layout_mutex
);
3723 * This function send a restore request to the MDT
3725 int ll_layout_restore(struct inode
*inode
, loff_t offset
, __u64 length
)
3727 struct hsm_user_request
*hur
;
3730 len
= sizeof(struct hsm_user_request
) +
3731 sizeof(struct hsm_user_item
);
3732 hur
= kzalloc(len
, GFP_NOFS
);
3736 hur
->hur_request
.hr_action
= HUA_RESTORE
;
3737 hur
->hur_request
.hr_archive_id
= 0;
3738 hur
->hur_request
.hr_flags
= 0;
3739 memcpy(&hur
->hur_user_item
[0].hui_fid
, &ll_i2info(inode
)->lli_fid
,
3740 sizeof(hur
->hur_user_item
[0].hui_fid
));
3741 hur
->hur_user_item
[0].hui_extent
.offset
= offset
;
3742 hur
->hur_user_item
[0].hui_extent
.length
= length
;
3743 hur
->hur_request
.hr_itemcount
= 1;
3744 rc
= obd_iocontrol(LL_IOC_HSM_REQUEST
, ll_i2sbi(inode
)->ll_md_exp
,