4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
20 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21 * CA 95054 USA or visit www.sun.com if you need additional information or
27 * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
28 * Use is subject to license terms.
30 * Copyright (c) 2011, 2012, Intel Corporation.
33 * This file is part of Lustre, http://www.lustre.org/
34 * Lustre is a trademark of Sun Microsystems, Inc.
38 * Author: Peter Braam <braam@clusterfs.com>
39 * Author: Phil Schwan <phil@clusterfs.com>
40 * Author: Andreas Dilger <adilger@clusterfs.com>
43 #define DEBUG_SUBSYSTEM S_LLITE
44 #include <lustre_dlm.h>
45 #include <lustre_lite.h>
46 #include <linux/pagemap.h>
47 #include <linux/file.h>
48 #include "llite_internal.h"
49 #include <lustre/ll_fiemap.h>
51 #include "cl_object.h"
53 struct ll_file_data
*ll_file_data_get(void)
55 struct ll_file_data
*fd
;
57 OBD_SLAB_ALLOC_PTR_GFP(fd
, ll_file_data_slab
, __GFP_IO
);
60 fd
->fd_write_failed
= false;
64 static void ll_file_data_put(struct ll_file_data
*fd
)
67 OBD_SLAB_FREE_PTR(fd
, ll_file_data_slab
);
70 void ll_pack_inode2opdata(struct inode
*inode
, struct md_op_data
*op_data
,
71 struct lustre_handle
*fh
)
73 op_data
->op_fid1
= ll_i2info(inode
)->lli_fid
;
74 op_data
->op_attr
.ia_mode
= inode
->i_mode
;
75 op_data
->op_attr
.ia_atime
= inode
->i_atime
;
76 op_data
->op_attr
.ia_mtime
= inode
->i_mtime
;
77 op_data
->op_attr
.ia_ctime
= inode
->i_ctime
;
78 op_data
->op_attr
.ia_size
= i_size_read(inode
);
79 op_data
->op_attr_blocks
= inode
->i_blocks
;
80 ((struct ll_iattr
*)&op_data
->op_attr
)->ia_attr_flags
=
81 ll_inode_to_ext_flags(inode
->i_flags
);
82 op_data
->op_ioepoch
= ll_i2info(inode
)->lli_ioepoch
;
84 op_data
->op_handle
= *fh
;
85 op_data
->op_capa1
= ll_mdscapa_get(inode
);
87 if (LLIF_DATA_MODIFIED
& ll_i2info(inode
)->lli_flags
)
88 op_data
->op_bias
|= MDS_DATA_MODIFIED
;
92 * Closes the IO epoch and packs all the attributes into @op_data for
95 static void ll_prepare_close(struct inode
*inode
, struct md_op_data
*op_data
,
96 struct obd_client_handle
*och
)
98 op_data
->op_attr
.ia_valid
= ATTR_MODE
| ATTR_ATIME
| ATTR_ATIME_SET
|
99 ATTR_MTIME
| ATTR_MTIME_SET
|
100 ATTR_CTIME
| ATTR_CTIME_SET
;
102 if (!(och
->och_flags
& FMODE_WRITE
))
105 if (!exp_connect_som(ll_i2mdexp(inode
)) || !S_ISREG(inode
->i_mode
))
106 op_data
->op_attr
.ia_valid
|= ATTR_SIZE
| ATTR_BLOCKS
;
108 ll_ioepoch_close(inode
, op_data
, &och
, 0);
111 ll_pack_inode2opdata(inode
, op_data
, &och
->och_fh
);
112 ll_prep_md_op_data(op_data
, inode
, NULL
, NULL
,
113 0, 0, LUSTRE_OPC_ANY
, NULL
);
116 static int ll_close_inode_openhandle(struct obd_export
*md_exp
,
118 struct obd_client_handle
*och
,
119 const __u64
*data_version
)
121 struct obd_export
*exp
= ll_i2mdexp(inode
);
122 struct md_op_data
*op_data
;
123 struct ptlrpc_request
*req
= NULL
;
124 struct obd_device
*obd
= class_exp2obd(exp
);
130 * XXX: in case of LMV, is this correct to access
133 CERROR("Invalid MDC connection handle "LPX64
"\n",
134 ll_i2mdexp(inode
)->exp_handle
.h_cookie
);
138 OBD_ALLOC_PTR(op_data
);
140 GOTO(out
, rc
= -ENOMEM
); // XXX We leak openhandle and request here.
142 ll_prepare_close(inode
, op_data
, och
);
143 if (data_version
!= NULL
) {
144 /* Pass in data_version implies release. */
145 op_data
->op_bias
|= MDS_HSM_RELEASE
;
146 op_data
->op_data_version
= *data_version
;
147 op_data
->op_lease_handle
= och
->och_lease_handle
;
148 op_data
->op_attr
.ia_valid
|= ATTR_SIZE
| ATTR_BLOCKS
;
150 epoch_close
= (op_data
->op_flags
& MF_EPOCH_CLOSE
);
151 rc
= md_close(md_exp
, op_data
, och
->och_mod
, &req
);
153 /* This close must have the epoch closed. */
154 LASSERT(epoch_close
);
155 /* MDS has instructed us to obtain Size-on-MDS attribute from
156 * OSTs and send setattr to back to MDS. */
157 rc
= ll_som_update(inode
, op_data
);
159 CERROR("inode %lu mdc Size-on-MDS update failed: "
160 "rc = %d\n", inode
->i_ino
, rc
);
164 CERROR("inode %lu mdc close failed: rc = %d\n",
168 /* DATA_MODIFIED flag was successfully sent on close, cancel data
169 * modification flag. */
170 if (rc
== 0 && (op_data
->op_bias
& MDS_DATA_MODIFIED
)) {
171 struct ll_inode_info
*lli
= ll_i2info(inode
);
173 spin_lock(&lli
->lli_lock
);
174 lli
->lli_flags
&= ~LLIF_DATA_MODIFIED
;
175 spin_unlock(&lli
->lli_lock
);
179 rc
= ll_objects_destroy(req
, inode
);
181 CERROR("inode %lu ll_objects destroy: rc = %d\n",
184 if (rc
== 0 && op_data
->op_bias
& MDS_HSM_RELEASE
) {
185 struct mdt_body
*body
;
186 body
= req_capsule_server_get(&req
->rq_pill
, &RMF_MDT_BODY
);
187 if (!(body
->valid
& OBD_MD_FLRELEASED
))
191 ll_finish_md_op_data(op_data
);
194 if (exp_connect_som(exp
) && !epoch_close
&&
195 S_ISREG(inode
->i_mode
) && (och
->och_flags
& FMODE_WRITE
)) {
196 ll_queue_done_writing(inode
, LLIF_DONE_WRITING
);
198 md_clear_open_replay_data(md_exp
, och
);
199 /* Free @och if it is not waiting for DONE_WRITING. */
200 och
->och_fh
.cookie
= DEAD_HANDLE_MAGIC
;
203 if (req
) /* This is close request */
204 ptlrpc_req_finished(req
);
208 int ll_md_real_close(struct inode
*inode
, int flags
)
210 struct ll_inode_info
*lli
= ll_i2info(inode
);
211 struct obd_client_handle
**och_p
;
212 struct obd_client_handle
*och
;
216 if (flags
& FMODE_WRITE
) {
217 och_p
= &lli
->lli_mds_write_och
;
218 och_usecount
= &lli
->lli_open_fd_write_count
;
219 } else if (flags
& FMODE_EXEC
) {
220 och_p
= &lli
->lli_mds_exec_och
;
221 och_usecount
= &lli
->lli_open_fd_exec_count
;
223 LASSERT(flags
& FMODE_READ
);
224 och_p
= &lli
->lli_mds_read_och
;
225 och_usecount
= &lli
->lli_open_fd_read_count
;
228 mutex_lock(&lli
->lli_och_mutex
);
229 if (*och_usecount
) { /* There are still users of this handle, so
231 mutex_unlock(&lli
->lli_och_mutex
);
236 mutex_unlock(&lli
->lli_och_mutex
);
238 if (och
) { /* There might be a race and somebody have freed this och
240 rc
= ll_close_inode_openhandle(ll_i2sbi(inode
)->ll_md_exp
,
247 int ll_md_close(struct obd_export
*md_exp
, struct inode
*inode
,
250 struct ll_file_data
*fd
= LUSTRE_FPRIVATE(file
);
251 struct ll_inode_info
*lli
= ll_i2info(inode
);
254 /* clear group lock, if present */
255 if (unlikely(fd
->fd_flags
& LL_FILE_GROUP_LOCKED
))
256 ll_put_grouplock(inode
, file
, fd
->fd_grouplock
.cg_gid
);
258 if (fd
->fd_lease_och
!= NULL
) {
261 /* Usually the lease is not released when the
262 * application crashed, we need to release here. */
263 rc
= ll_lease_close(fd
->fd_lease_och
, inode
, &lease_broken
);
264 CDEBUG(rc
? D_ERROR
: D_INODE
, "Clean up lease "DFID
" %d/%d\n",
265 PFID(&lli
->lli_fid
), rc
, lease_broken
);
267 fd
->fd_lease_och
= NULL
;
270 if (fd
->fd_och
!= NULL
) {
271 rc
= ll_close_inode_openhandle(md_exp
, inode
, fd
->fd_och
, NULL
);
276 /* Let's see if we have good enough OPEN lock on the file and if
277 we can skip talking to MDS */
278 if (file
->f_dentry
->d_inode
) { /* Can this ever be false? */
280 int flags
= LDLM_FL_BLOCK_GRANTED
| LDLM_FL_TEST_LOCK
;
281 struct lustre_handle lockh
;
282 struct inode
*inode
= file
->f_dentry
->d_inode
;
283 ldlm_policy_data_t policy
= {.l_inodebits
={MDS_INODELOCK_OPEN
}};
285 mutex_lock(&lli
->lli_och_mutex
);
286 if (fd
->fd_omode
& FMODE_WRITE
) {
288 LASSERT(lli
->lli_open_fd_write_count
);
289 lli
->lli_open_fd_write_count
--;
290 } else if (fd
->fd_omode
& FMODE_EXEC
) {
292 LASSERT(lli
->lli_open_fd_exec_count
);
293 lli
->lli_open_fd_exec_count
--;
296 LASSERT(lli
->lli_open_fd_read_count
);
297 lli
->lli_open_fd_read_count
--;
299 mutex_unlock(&lli
->lli_och_mutex
);
301 if (!md_lock_match(md_exp
, flags
, ll_inode2fid(inode
),
302 LDLM_IBITS
, &policy
, lockmode
,
304 rc
= ll_md_real_close(file
->f_dentry
->d_inode
,
308 CERROR("Releasing a file %p with negative dentry %p. Name %s",
309 file
, file
->f_dentry
, file
->f_dentry
->d_name
.name
);
313 LUSTRE_FPRIVATE(file
) = NULL
;
314 ll_file_data_put(fd
);
315 ll_capa_close(inode
);
320 /* While this returns an error code, fput() the caller does not, so we need
321 * to make every effort to clean up all of our state here. Also, applications
322 * rarely check close errors and even if an error is returned they will not
323 * re-try the close call.
325 int ll_file_release(struct inode
*inode
, struct file
*file
)
327 struct ll_file_data
*fd
;
328 struct ll_sb_info
*sbi
= ll_i2sbi(inode
);
329 struct ll_inode_info
*lli
= ll_i2info(inode
);
332 CDEBUG(D_VFSTRACE
, "VFS Op:inode=%lu/%u(%p)\n", inode
->i_ino
,
333 inode
->i_generation
, inode
);
335 #ifdef CONFIG_FS_POSIX_ACL
336 if (sbi
->ll_flags
& LL_SBI_RMT_CLIENT
&&
337 inode
== inode
->i_sb
->s_root
->d_inode
) {
338 struct ll_file_data
*fd
= LUSTRE_FPRIVATE(file
);
341 if (unlikely(fd
->fd_flags
& LL_FILE_RMTACL
)) {
342 fd
->fd_flags
&= ~LL_FILE_RMTACL
;
343 rct_del(&sbi
->ll_rct
, current_pid());
344 et_search_free(&sbi
->ll_et
, current_pid());
349 if (inode
->i_sb
->s_root
!= file
->f_dentry
)
350 ll_stats_ops_tally(sbi
, LPROC_LL_RELEASE
, 1);
351 fd
= LUSTRE_FPRIVATE(file
);
354 /* The last ref on @file, maybe not the the owner pid of statahead.
355 * Different processes can open the same dir, "ll_opendir_key" means:
356 * it is me that should stop the statahead thread. */
357 if (S_ISDIR(inode
->i_mode
) && lli
->lli_opendir_key
== fd
&&
358 lli
->lli_opendir_pid
!= 0)
359 ll_stop_statahead(inode
, lli
->lli_opendir_key
);
361 if (inode
->i_sb
->s_root
== file
->f_dentry
) {
362 LUSTRE_FPRIVATE(file
) = NULL
;
363 ll_file_data_put(fd
);
367 if (!S_ISDIR(inode
->i_mode
)) {
368 lov_read_and_clear_async_rc(lli
->lli_clob
);
369 lli
->lli_async_rc
= 0;
372 rc
= ll_md_close(sbi
->ll_md_exp
, inode
, file
);
374 if (CFS_FAIL_TIMEOUT_MS(OBD_FAIL_PTLRPC_DUMP_LOG
, cfs_fail_val
))
375 libcfs_debug_dumplog();
380 static int ll_intent_file_open(struct file
*file
, void *lmm
,
381 int lmmsize
, struct lookup_intent
*itp
)
383 struct ll_sb_info
*sbi
= ll_i2sbi(file
->f_dentry
->d_inode
);
384 struct dentry
*parent
= file
->f_dentry
->d_parent
;
385 const char *name
= file
->f_dentry
->d_name
.name
;
386 const int len
= file
->f_dentry
->d_name
.len
;
387 struct md_op_data
*op_data
;
388 struct ptlrpc_request
*req
;
389 __u32 opc
= LUSTRE_OPC_ANY
;
395 /* Usually we come here only for NFSD, and we want open lock.
396 But we can also get here with pre 2.6.15 patchless kernels, and in
397 that case that lock is also ok */
398 /* We can also get here if there was cached open handle in revalidate_it
399 * but it disappeared while we were getting from there to ll_file_open.
400 * But this means this file was closed and immediately opened which
401 * makes a good candidate for using OPEN lock */
402 /* If lmmsize & lmm are not 0, we are just setting stripe info
403 * parameters. No need for the open lock */
404 if (lmm
== NULL
&& lmmsize
== 0) {
405 itp
->it_flags
|= MDS_OPEN_LOCK
;
406 if (itp
->it_flags
& FMODE_WRITE
)
407 opc
= LUSTRE_OPC_CREATE
;
410 op_data
= ll_prep_md_op_data(NULL
, parent
->d_inode
,
411 file
->f_dentry
->d_inode
, name
, len
,
414 return PTR_ERR(op_data
);
416 itp
->it_flags
|= MDS_OPEN_BY_FID
;
417 rc
= md_intent_lock(sbi
->ll_md_exp
, op_data
, lmm
, lmmsize
, itp
,
418 0 /*unused */, &req
, ll_md_blocking_ast
, 0);
419 ll_finish_md_op_data(op_data
);
421 /* reason for keep own exit path - don`t flood log
422 * with messages with -ESTALE errors.
424 if (!it_disposition(itp
, DISP_OPEN_OPEN
) ||
425 it_open_error(DISP_OPEN_OPEN
, itp
))
427 ll_release_openhandle(file
->f_dentry
, itp
);
431 if (it_disposition(itp
, DISP_LOOKUP_NEG
))
432 GOTO(out
, rc
= -ENOENT
);
434 if (rc
!= 0 || it_open_error(DISP_OPEN_OPEN
, itp
)) {
435 rc
= rc
? rc
: it_open_error(DISP_OPEN_OPEN
, itp
);
436 CDEBUG(D_VFSTRACE
, "lock enqueue: err: %d\n", rc
);
440 rc
= ll_prep_inode(&file
->f_dentry
->d_inode
, req
, NULL
, itp
);
441 if (!rc
&& itp
->d
.lustre
.it_lock_mode
)
442 ll_set_lock_data(sbi
->ll_md_exp
, file
->f_dentry
->d_inode
,
446 ptlrpc_req_finished(itp
->d
.lustre
.it_data
);
447 it_clear_disposition(itp
, DISP_ENQ_COMPLETE
);
448 ll_intent_drop_lock(itp
);
454 * Assign an obtained @ioepoch to client's inode. No lock is needed, MDS does
455 * not believe attributes if a few ioepoch holders exist. Attributes for
456 * previous ioepoch if new one is opened are also skipped by MDS.
458 void ll_ioepoch_open(struct ll_inode_info
*lli
, __u64 ioepoch
)
460 if (ioepoch
&& lli
->lli_ioepoch
!= ioepoch
) {
461 lli
->lli_ioepoch
= ioepoch
;
462 CDEBUG(D_INODE
, "Epoch "LPU64
" opened on "DFID
"\n",
463 ioepoch
, PFID(&lli
->lli_fid
));
467 static int ll_och_fill(struct obd_export
*md_exp
, struct lookup_intent
*it
,
468 struct obd_client_handle
*och
)
470 struct ptlrpc_request
*req
= it
->d
.lustre
.it_data
;
471 struct mdt_body
*body
;
473 body
= req_capsule_server_get(&req
->rq_pill
, &RMF_MDT_BODY
);
474 och
->och_fh
= body
->handle
;
475 och
->och_fid
= body
->fid1
;
476 och
->och_lease_handle
.cookie
= it
->d
.lustre
.it_lock_handle
;
477 och
->och_magic
= OBD_CLIENT_HANDLE_MAGIC
;
478 och
->och_flags
= it
->it_flags
;
480 return md_set_open_replay_data(md_exp
, och
, req
);
483 int ll_local_open(struct file
*file
, struct lookup_intent
*it
,
484 struct ll_file_data
*fd
, struct obd_client_handle
*och
)
486 struct inode
*inode
= file
->f_dentry
->d_inode
;
487 struct ll_inode_info
*lli
= ll_i2info(inode
);
489 LASSERT(!LUSTRE_FPRIVATE(file
));
494 struct ptlrpc_request
*req
= it
->d
.lustre
.it_data
;
495 struct mdt_body
*body
;
498 rc
= ll_och_fill(ll_i2sbi(inode
)->ll_md_exp
, it
, och
);
502 body
= req_capsule_server_get(&req
->rq_pill
, &RMF_MDT_BODY
);
503 ll_ioepoch_open(lli
, body
->ioepoch
);
506 LUSTRE_FPRIVATE(file
) = fd
;
507 ll_readahead_init(inode
, &fd
->fd_ras
);
508 fd
->fd_omode
= it
->it_flags
& (FMODE_READ
| FMODE_WRITE
| FMODE_EXEC
);
512 /* Open a file, and (for the very first open) create objects on the OSTs at
513 * this time. If opened with O_LOV_DELAY_CREATE, then we don't do the object
514 * creation or open until ll_lov_setstripe() ioctl is called.
516 * If we already have the stripe MD locally then we don't request it in
517 * md_open(), by passing a lmm_size = 0.
519 * It is up to the application to ensure no other processes open this file
520 * in the O_LOV_DELAY_CREATE case, or the default striping pattern will be
521 * used. We might be able to avoid races of that sort by getting lli_open_sem
522 * before returning in the O_LOV_DELAY_CREATE case and dropping it here
523 * or in ll_file_release(), but I'm not sure that is desirable/necessary.
525 int ll_file_open(struct inode
*inode
, struct file
*file
)
527 struct ll_inode_info
*lli
= ll_i2info(inode
);
528 struct lookup_intent
*it
, oit
= { .it_op
= IT_OPEN
,
529 .it_flags
= file
->f_flags
};
530 struct obd_client_handle
**och_p
= NULL
;
531 __u64
*och_usecount
= NULL
;
532 struct ll_file_data
*fd
;
533 int rc
= 0, opendir_set
= 0;
535 CDEBUG(D_VFSTRACE
, "VFS Op:inode=%lu/%u(%p), flags %o\n", inode
->i_ino
,
536 inode
->i_generation
, inode
, file
->f_flags
);
538 it
= file
->private_data
; /* XXX: compat macro */
539 file
->private_data
= NULL
; /* prevent ll_local_open assertion */
541 fd
= ll_file_data_get();
543 GOTO(out_openerr
, rc
= -ENOMEM
);
546 if (S_ISDIR(inode
->i_mode
)) {
547 spin_lock(&lli
->lli_sa_lock
);
548 if (lli
->lli_opendir_key
== NULL
&& lli
->lli_sai
== NULL
&&
549 lli
->lli_opendir_pid
== 0) {
550 lli
->lli_opendir_key
= fd
;
551 lli
->lli_opendir_pid
= current_pid();
554 spin_unlock(&lli
->lli_sa_lock
);
557 if (inode
->i_sb
->s_root
== file
->f_dentry
) {
558 LUSTRE_FPRIVATE(file
) = fd
;
562 if (!it
|| !it
->d
.lustre
.it_disposition
) {
563 /* Convert f_flags into access mode. We cannot use file->f_mode,
564 * because everything but O_ACCMODE mask was stripped from
566 if ((oit
.it_flags
+ 1) & O_ACCMODE
)
568 if (file
->f_flags
& O_TRUNC
)
569 oit
.it_flags
|= FMODE_WRITE
;
571 /* kernel only call f_op->open in dentry_open. filp_open calls
572 * dentry_open after call to open_namei that checks permissions.
573 * Only nfsd_open call dentry_open directly without checking
574 * permissions and because of that this code below is safe. */
575 if (oit
.it_flags
& (FMODE_WRITE
| FMODE_READ
))
576 oit
.it_flags
|= MDS_OPEN_OWNEROVERRIDE
;
578 /* We do not want O_EXCL here, presumably we opened the file
579 * already? XXX - NFS implications? */
580 oit
.it_flags
&= ~O_EXCL
;
582 /* bug20584, if "it_flags" contains O_CREAT, the file will be
583 * created if necessary, then "IT_CREAT" should be set to keep
584 * consistent with it */
585 if (oit
.it_flags
& O_CREAT
)
586 oit
.it_op
|= IT_CREAT
;
592 /* Let's see if we have file open on MDS already. */
593 if (it
->it_flags
& FMODE_WRITE
) {
594 och_p
= &lli
->lli_mds_write_och
;
595 och_usecount
= &lli
->lli_open_fd_write_count
;
596 } else if (it
->it_flags
& FMODE_EXEC
) {
597 och_p
= &lli
->lli_mds_exec_och
;
598 och_usecount
= &lli
->lli_open_fd_exec_count
;
600 och_p
= &lli
->lli_mds_read_och
;
601 och_usecount
= &lli
->lli_open_fd_read_count
;
604 mutex_lock(&lli
->lli_och_mutex
);
605 if (*och_p
) { /* Open handle is present */
606 if (it_disposition(it
, DISP_OPEN_OPEN
)) {
607 /* Well, there's extra open request that we do not need,
608 let's close it somehow. This will decref request. */
609 rc
= it_open_error(DISP_OPEN_OPEN
, it
);
611 mutex_unlock(&lli
->lli_och_mutex
);
612 GOTO(out_openerr
, rc
);
615 ll_release_openhandle(file
->f_dentry
, it
);
619 rc
= ll_local_open(file
, it
, fd
, NULL
);
622 mutex_unlock(&lli
->lli_och_mutex
);
623 GOTO(out_openerr
, rc
);
626 LASSERT(*och_usecount
== 0);
627 if (!it
->d
.lustre
.it_disposition
) {
628 /* We cannot just request lock handle now, new ELC code
629 means that one of other OPEN locks for this file
630 could be cancelled, and since blocking ast handler
631 would attempt to grab och_mutex as well, that would
632 result in a deadlock */
633 mutex_unlock(&lli
->lli_och_mutex
);
634 it
->it_create_mode
|= M_CHECK_STALE
;
635 rc
= ll_intent_file_open(file
, NULL
, 0, it
);
636 it
->it_create_mode
&= ~M_CHECK_STALE
;
638 GOTO(out_openerr
, rc
);
642 OBD_ALLOC(*och_p
, sizeof (struct obd_client_handle
));
644 GOTO(out_och_free
, rc
= -ENOMEM
);
648 /* md_intent_lock() didn't get a request ref if there was an
649 * open error, so don't do cleanup on the request here
651 /* XXX (green): Should not we bail out on any error here, not
652 * just open error? */
653 rc
= it_open_error(DISP_OPEN_OPEN
, it
);
655 GOTO(out_och_free
, rc
);
657 LASSERT(it_disposition(it
, DISP_ENQ_OPEN_REF
));
659 rc
= ll_local_open(file
, it
, fd
, *och_p
);
661 GOTO(out_och_free
, rc
);
663 mutex_unlock(&lli
->lli_och_mutex
);
666 /* Must do this outside lli_och_mutex lock to prevent deadlock where
667 different kind of OPEN lock for this same inode gets cancelled
668 by ldlm_cancel_lru */
669 if (!S_ISREG(inode
->i_mode
))
670 GOTO(out_och_free
, rc
);
674 if (!lli
->lli_has_smd
&&
675 (cl_is_lov_delay_create(file
->f_flags
) ||
676 (file
->f_mode
& FMODE_WRITE
) == 0)) {
677 CDEBUG(D_INODE
, "object creation was delayed\n");
678 GOTO(out_och_free
, rc
);
680 cl_lov_delay_create_clear(&file
->f_flags
);
681 GOTO(out_och_free
, rc
);
685 if (och_p
&& *och_p
) {
686 OBD_FREE(*och_p
, sizeof (struct obd_client_handle
));
687 *och_p
= NULL
; /* OBD_FREE writes some magic there */
690 mutex_unlock(&lli
->lli_och_mutex
);
693 if (opendir_set
!= 0)
694 ll_stop_statahead(inode
, lli
->lli_opendir_key
);
696 ll_file_data_put(fd
);
698 ll_stats_ops_tally(ll_i2sbi(inode
), LPROC_LL_OPEN
, 1);
701 if (it
&& it_disposition(it
, DISP_ENQ_OPEN_REF
)) {
702 ptlrpc_req_finished(it
->d
.lustre
.it_data
);
703 it_clear_disposition(it
, DISP_ENQ_OPEN_REF
);
709 static int ll_md_blocking_lease_ast(struct ldlm_lock
*lock
,
710 struct ldlm_lock_desc
*desc
, void *data
, int flag
)
713 struct lustre_handle lockh
;
716 case LDLM_CB_BLOCKING
:
717 ldlm_lock2handle(lock
, &lockh
);
718 rc
= ldlm_cli_cancel(&lockh
, LCF_ASYNC
);
720 CDEBUG(D_INODE
, "ldlm_cli_cancel: %d\n", rc
);
724 case LDLM_CB_CANCELING
:
732 * Acquire a lease and open the file.
734 struct obd_client_handle
*ll_lease_open(struct inode
*inode
, struct file
*file
,
735 fmode_t fmode
, __u64 open_flags
)
737 struct lookup_intent it
= { .it_op
= IT_OPEN
};
738 struct ll_sb_info
*sbi
= ll_i2sbi(inode
);
739 struct md_op_data
*op_data
;
740 struct ptlrpc_request
*req
;
741 struct lustre_handle old_handle
= { 0 };
742 struct obd_client_handle
*och
= NULL
;
746 if (fmode
!= FMODE_WRITE
&& fmode
!= FMODE_READ
)
747 return ERR_PTR(-EINVAL
);
750 struct ll_inode_info
*lli
= ll_i2info(inode
);
751 struct ll_file_data
*fd
= LUSTRE_FPRIVATE(file
);
752 struct obd_client_handle
**och_p
;
755 if (!(fmode
& file
->f_mode
) || (file
->f_mode
& FMODE_EXEC
))
756 return ERR_PTR(-EPERM
);
758 /* Get the openhandle of the file */
760 mutex_lock(&lli
->lli_och_mutex
);
761 if (fd
->fd_lease_och
!= NULL
) {
762 mutex_unlock(&lli
->lli_och_mutex
);
766 if (fd
->fd_och
== NULL
) {
767 if (file
->f_mode
& FMODE_WRITE
) {
768 LASSERT(lli
->lli_mds_write_och
!= NULL
);
769 och_p
= &lli
->lli_mds_write_och
;
770 och_usecount
= &lli
->lli_open_fd_write_count
;
772 LASSERT(lli
->lli_mds_read_och
!= NULL
);
773 och_p
= &lli
->lli_mds_read_och
;
774 och_usecount
= &lli
->lli_open_fd_read_count
;
776 if (*och_usecount
== 1) {
783 mutex_unlock(&lli
->lli_och_mutex
);
784 if (rc
< 0) /* more than 1 opener */
787 LASSERT(fd
->fd_och
!= NULL
);
788 old_handle
= fd
->fd_och
->och_fh
;
793 return ERR_PTR(-ENOMEM
);
795 op_data
= ll_prep_md_op_data(NULL
, inode
, inode
, NULL
, 0, 0,
796 LUSTRE_OPC_ANY
, NULL
);
798 GOTO(out
, rc
= PTR_ERR(op_data
));
800 /* To tell the MDT this openhandle is from the same owner */
801 op_data
->op_handle
= old_handle
;
803 it
.it_flags
= fmode
| open_flags
;
804 it
.it_flags
|= MDS_OPEN_LOCK
| MDS_OPEN_BY_FID
| MDS_OPEN_LEASE
;
805 rc
= md_intent_lock(sbi
->ll_md_exp
, op_data
, NULL
, 0, &it
, 0, &req
,
806 ll_md_blocking_lease_ast
,
807 /* LDLM_FL_NO_LRU: To not put the lease lock into LRU list, otherwise
808 * it can be cancelled which may mislead applications that the lease is
810 * LDLM_FL_EXCL: Set this flag so that it won't be matched by normal
811 * open in ll_md_blocking_ast(). Otherwise as ll_md_blocking_lease_ast
812 * doesn't deal with openhandle, so normal openhandle will be leaked. */
813 LDLM_FL_NO_LRU
| LDLM_FL_EXCL
);
814 ll_finish_md_op_data(op_data
);
816 ptlrpc_req_finished(req
);
817 it_clear_disposition(&it
, DISP_ENQ_COMPLETE
);
820 GOTO(out_release_it
, rc
);
822 if (it_disposition(&it
, DISP_LOOKUP_NEG
))
823 GOTO(out_release_it
, rc
= -ENOENT
);
825 rc
= it_open_error(DISP_OPEN_OPEN
, &it
);
827 GOTO(out_release_it
, rc
);
829 LASSERT(it_disposition(&it
, DISP_ENQ_OPEN_REF
));
830 ll_och_fill(sbi
->ll_md_exp
, &it
, och
);
832 if (!it_disposition(&it
, DISP_OPEN_LEASE
)) /* old server? */
833 GOTO(out_close
, rc
= -EOPNOTSUPP
);
835 /* already get lease, handle lease lock */
836 ll_set_lock_data(sbi
->ll_md_exp
, inode
, &it
, NULL
);
837 if (it
.d
.lustre
.it_lock_mode
== 0 ||
838 it
.d
.lustre
.it_lock_bits
!= MDS_INODELOCK_OPEN
) {
839 /* open lock must return for lease */
840 CERROR(DFID
"lease granted but no open lock, %d/%llu.\n",
841 PFID(ll_inode2fid(inode
)), it
.d
.lustre
.it_lock_mode
,
842 it
.d
.lustre
.it_lock_bits
);
843 GOTO(out_close
, rc
= -EPROTO
);
846 ll_intent_release(&it
);
850 rc2
= ll_close_inode_openhandle(sbi
->ll_md_exp
, inode
, och
, NULL
);
852 CERROR("Close openhandle returned %d\n", rc2
);
854 /* cancel open lock */
855 if (it
.d
.lustre
.it_lock_mode
!= 0) {
856 ldlm_lock_decref_and_cancel(&och
->och_lease_handle
,
857 it
.d
.lustre
.it_lock_mode
);
858 it
.d
.lustre
.it_lock_mode
= 0;
861 ll_intent_release(&it
);
866 EXPORT_SYMBOL(ll_lease_open
);
869 * Release lease and close the file.
870 * It will check if the lease has ever broken.
872 int ll_lease_close(struct obd_client_handle
*och
, struct inode
*inode
,
875 struct ldlm_lock
*lock
;
876 bool cancelled
= true;
879 lock
= ldlm_handle2lock(&och
->och_lease_handle
);
881 lock_res_and_lock(lock
);
882 cancelled
= ldlm_is_cancel(lock
);
883 unlock_res_and_lock(lock
);
887 CDEBUG(D_INODE
, "lease for "DFID
" broken? %d\n",
888 PFID(&ll_i2info(inode
)->lli_fid
), cancelled
);
891 ldlm_cli_cancel(&och
->och_lease_handle
, 0);
892 if (lease_broken
!= NULL
)
893 *lease_broken
= cancelled
;
895 rc
= ll_close_inode_openhandle(ll_i2sbi(inode
)->ll_md_exp
, inode
, och
,
899 EXPORT_SYMBOL(ll_lease_close
);
901 /* Fills the obdo with the attributes for the lsm */
902 static int ll_lsm_getattr(struct lov_stripe_md
*lsm
, struct obd_export
*exp
,
903 struct obd_capa
*capa
, struct obdo
*obdo
,
904 __u64 ioepoch
, int sync
)
906 struct ptlrpc_request_set
*set
;
907 struct obd_info oinfo
= { { { 0 } } };
910 LASSERT(lsm
!= NULL
);
914 oinfo
.oi_oa
->o_oi
= lsm
->lsm_oi
;
915 oinfo
.oi_oa
->o_mode
= S_IFREG
;
916 oinfo
.oi_oa
->o_ioepoch
= ioepoch
;
917 oinfo
.oi_oa
->o_valid
= OBD_MD_FLID
| OBD_MD_FLTYPE
|
918 OBD_MD_FLSIZE
| OBD_MD_FLBLOCKS
|
919 OBD_MD_FLBLKSZ
| OBD_MD_FLATIME
|
920 OBD_MD_FLMTIME
| OBD_MD_FLCTIME
|
921 OBD_MD_FLGROUP
| OBD_MD_FLEPOCH
|
922 OBD_MD_FLDATAVERSION
;
923 oinfo
.oi_capa
= capa
;
925 oinfo
.oi_oa
->o_valid
|= OBD_MD_FLFLAGS
;
926 oinfo
.oi_oa
->o_flags
|= OBD_FL_SRVLOCK
;
929 set
= ptlrpc_prep_set();
931 CERROR("can't allocate ptlrpc set\n");
934 rc
= obd_getattr_async(exp
, &oinfo
, set
);
936 rc
= ptlrpc_set_wait(set
);
937 ptlrpc_set_destroy(set
);
940 oinfo
.oi_oa
->o_valid
&= (OBD_MD_FLBLOCKS
| OBD_MD_FLBLKSZ
|
941 OBD_MD_FLATIME
| OBD_MD_FLMTIME
|
942 OBD_MD_FLCTIME
| OBD_MD_FLSIZE
|
943 OBD_MD_FLDATAVERSION
);
948 * Performs the getattr on the inode and updates its fields.
949 * If @sync != 0, perform the getattr under the server-side lock.
951 int ll_inode_getattr(struct inode
*inode
, struct obdo
*obdo
,
952 __u64 ioepoch
, int sync
)
954 struct obd_capa
*capa
= ll_mdscapa_get(inode
);
955 struct lov_stripe_md
*lsm
;
958 lsm
= ccc_inode_lsm_get(inode
);
959 rc
= ll_lsm_getattr(lsm
, ll_i2dtexp(inode
),
960 capa
, obdo
, ioepoch
, sync
);
963 struct ost_id
*oi
= lsm
? &lsm
->lsm_oi
: &obdo
->o_oi
;
965 obdo_refresh_inode(inode
, obdo
, obdo
->o_valid
);
966 CDEBUG(D_INODE
, "objid "DOSTID
" size %llu, blocks %llu,"
967 " blksize %lu\n", POSTID(oi
), i_size_read(inode
),
968 (unsigned long long)inode
->i_blocks
,
969 (unsigned long)ll_inode_blksize(inode
));
971 ccc_inode_lsm_put(inode
, lsm
);
975 int ll_merge_lvb(const struct lu_env
*env
, struct inode
*inode
)
977 struct ll_inode_info
*lli
= ll_i2info(inode
);
978 struct cl_object
*obj
= lli
->lli_clob
;
979 struct cl_attr
*attr
= ccc_env_thread_attr(env
);
983 ll_inode_size_lock(inode
);
984 /* merge timestamps the most recently obtained from mds with
985 timestamps obtained from osts */
986 LTIME_S(inode
->i_atime
) = lli
->lli_lvb
.lvb_atime
;
987 LTIME_S(inode
->i_mtime
) = lli
->lli_lvb
.lvb_mtime
;
988 LTIME_S(inode
->i_ctime
) = lli
->lli_lvb
.lvb_ctime
;
989 inode_init_lvb(inode
, &lvb
);
991 cl_object_attr_lock(obj
);
992 rc
= cl_object_attr_get(env
, obj
, attr
);
993 cl_object_attr_unlock(obj
);
996 if (lvb
.lvb_atime
< attr
->cat_atime
)
997 lvb
.lvb_atime
= attr
->cat_atime
;
998 if (lvb
.lvb_ctime
< attr
->cat_ctime
)
999 lvb
.lvb_ctime
= attr
->cat_ctime
;
1000 if (lvb
.lvb_mtime
< attr
->cat_mtime
)
1001 lvb
.lvb_mtime
= attr
->cat_mtime
;
1003 CDEBUG(D_VFSTRACE
, DFID
" updating i_size "LPU64
"\n",
1004 PFID(&lli
->lli_fid
), attr
->cat_size
);
1005 cl_isize_write_nolock(inode
, attr
->cat_size
);
1007 inode
->i_blocks
= attr
->cat_blocks
;
1009 LTIME_S(inode
->i_mtime
) = lvb
.lvb_mtime
;
1010 LTIME_S(inode
->i_atime
) = lvb
.lvb_atime
;
1011 LTIME_S(inode
->i_ctime
) = lvb
.lvb_ctime
;
1013 ll_inode_size_unlock(inode
);
1018 int ll_glimpse_ioctl(struct ll_sb_info
*sbi
, struct lov_stripe_md
*lsm
,
1021 struct obdo obdo
= { 0 };
1024 rc
= ll_lsm_getattr(lsm
, sbi
->ll_dt_exp
, NULL
, &obdo
, 0, 0);
1026 st
->st_size
= obdo
.o_size
;
1027 st
->st_blocks
= obdo
.o_blocks
;
1028 st
->st_mtime
= obdo
.o_mtime
;
1029 st
->st_atime
= obdo
.o_atime
;
1030 st
->st_ctime
= obdo
.o_ctime
;
1035 void ll_io_init(struct cl_io
*io
, const struct file
*file
, int write
)
1037 struct inode
*inode
= file
->f_dentry
->d_inode
;
1039 io
->u
.ci_rw
.crw_nonblock
= file
->f_flags
& O_NONBLOCK
;
1041 io
->u
.ci_wr
.wr_append
= !!(file
->f_flags
& O_APPEND
);
1042 io
->u
.ci_wr
.wr_sync
= file
->f_flags
& O_SYNC
||
1043 file
->f_flags
& O_DIRECT
||
1046 io
->ci_obj
= ll_i2info(inode
)->lli_clob
;
1047 io
->ci_lockreq
= CILR_MAYBE
;
1048 if (ll_file_nolock(file
)) {
1049 io
->ci_lockreq
= CILR_NEVER
;
1050 io
->ci_no_srvlock
= 1;
1051 } else if (file
->f_flags
& O_APPEND
) {
1052 io
->ci_lockreq
= CILR_MANDATORY
;
1057 ll_file_io_generic(const struct lu_env
*env
, struct vvp_io_args
*args
,
1058 struct file
*file
, enum cl_io_type iot
,
1059 loff_t
*ppos
, size_t count
)
1061 struct ll_inode_info
*lli
= ll_i2info(file
->f_dentry
->d_inode
);
1062 struct ll_file_data
*fd
= LUSTRE_FPRIVATE(file
);
1067 io
= ccc_env_thread_io(env
);
1068 ll_io_init(io
, file
, iot
== CIT_WRITE
);
1070 if (cl_io_rw_init(env
, io
, iot
, *ppos
, count
) == 0) {
1071 struct vvp_io
*vio
= vvp_env_io(env
);
1072 struct ccc_io
*cio
= ccc_env_io(env
);
1073 int write_mutex_locked
= 0;
1075 cio
->cui_fd
= LUSTRE_FPRIVATE(file
);
1076 vio
->cui_io_subtype
= args
->via_io_subtype
;
1078 switch (vio
->cui_io_subtype
) {
1080 cio
->cui_iov
= args
->u
.normal
.via_iov
;
1081 cio
->cui_nrsegs
= args
->u
.normal
.via_nrsegs
;
1082 cio
->cui_tot_nrsegs
= cio
->cui_nrsegs
;
1083 cio
->cui_iocb
= args
->u
.normal
.via_iocb
;
1084 if ((iot
== CIT_WRITE
) &&
1085 !(cio
->cui_fd
->fd_flags
& LL_FILE_GROUP_LOCKED
)) {
1086 if (mutex_lock_interruptible(&lli
->
1088 GOTO(out
, result
= -ERESTARTSYS
);
1089 write_mutex_locked
= 1;
1090 } else if (iot
== CIT_READ
) {
1091 down_read(&lli
->lli_trunc_sem
);
1095 vio
->u
.sendfile
.cui_actor
= args
->u
.sendfile
.via_actor
;
1096 vio
->u
.sendfile
.cui_target
= args
->u
.sendfile
.via_target
;
1099 vio
->u
.splice
.cui_pipe
= args
->u
.splice
.via_pipe
;
1100 vio
->u
.splice
.cui_flags
= args
->u
.splice
.via_flags
;
1103 CERROR("Unknow IO type - %u\n", vio
->cui_io_subtype
);
1106 result
= cl_io_loop(env
, io
);
1107 if (write_mutex_locked
)
1108 mutex_unlock(&lli
->lli_write_mutex
);
1109 else if (args
->via_io_subtype
== IO_NORMAL
&& iot
== CIT_READ
)
1110 up_read(&lli
->lli_trunc_sem
);
1112 /* cl_io_rw_init() handled IO */
1113 result
= io
->ci_result
;
1116 if (io
->ci_nob
> 0) {
1117 result
= io
->ci_nob
;
1118 *ppos
= io
->u
.ci_wr
.wr
.crw_pos
;
1122 cl_io_fini(env
, io
);
1123 /* If any bit been read/written (result != 0), we just return
1124 * short read/write instead of restart io. */
1125 if ((result
== 0 || result
== -ENODATA
) && io
->ci_need_restart
) {
1126 CDEBUG(D_VFSTRACE
, "Restart %s on %s from %lld, count:%zd\n",
1127 iot
== CIT_READ
? "read" : "write",
1128 file
->f_dentry
->d_name
.name
, *ppos
, count
);
1129 LASSERTF(io
->ci_nob
== 0, "%zd", io
->ci_nob
);
1133 if (iot
== CIT_READ
) {
1135 ll_stats_ops_tally(ll_i2sbi(file
->f_dentry
->d_inode
),
1136 LPROC_LL_READ_BYTES
, result
);
1137 } else if (iot
== CIT_WRITE
) {
1139 ll_stats_ops_tally(ll_i2sbi(file
->f_dentry
->d_inode
),
1140 LPROC_LL_WRITE_BYTES
, result
);
1141 fd
->fd_write_failed
= false;
1142 } else if (result
!= -ERESTARTSYS
) {
1143 fd
->fd_write_failed
= true;
1150 static ssize_t
ll_file_aio_read(struct kiocb
*iocb
, const struct iovec
*iov
,
1151 unsigned long nr_segs
, loff_t pos
)
1154 struct vvp_io_args
*args
;
1159 result
= generic_segment_checks(iov
, &nr_segs
, &count
, VERIFY_WRITE
);
1163 env
= cl_env_get(&refcheck
);
1165 return PTR_ERR(env
);
1167 args
= vvp_env_args(env
, IO_NORMAL
);
1168 args
->u
.normal
.via_iov
= (struct iovec
*)iov
;
1169 args
->u
.normal
.via_nrsegs
= nr_segs
;
1170 args
->u
.normal
.via_iocb
= iocb
;
1172 result
= ll_file_io_generic(env
, args
, iocb
->ki_filp
, CIT_READ
,
1173 &iocb
->ki_pos
, count
);
1174 cl_env_put(env
, &refcheck
);
1178 static ssize_t
ll_file_read(struct file
*file
, char *buf
, size_t count
,
1182 struct iovec
*local_iov
;
1183 struct kiocb
*kiocb
;
1187 env
= cl_env_get(&refcheck
);
1189 return PTR_ERR(env
);
1191 local_iov
= &vvp_env_info(env
)->vti_local_iov
;
1192 kiocb
= &vvp_env_info(env
)->vti_kiocb
;
1193 local_iov
->iov_base
= (void __user
*)buf
;
1194 local_iov
->iov_len
= count
;
1195 init_sync_kiocb(kiocb
, file
);
1196 kiocb
->ki_pos
= *ppos
;
1197 kiocb
->ki_nbytes
= count
;
1199 result
= ll_file_aio_read(kiocb
, local_iov
, 1, kiocb
->ki_pos
);
1200 *ppos
= kiocb
->ki_pos
;
1202 cl_env_put(env
, &refcheck
);
1207 * Write to a file (through the page cache).
1209 static ssize_t
ll_file_aio_write(struct kiocb
*iocb
, const struct iovec
*iov
,
1210 unsigned long nr_segs
, loff_t pos
)
1213 struct vvp_io_args
*args
;
1218 result
= generic_segment_checks(iov
, &nr_segs
, &count
, VERIFY_READ
);
1222 env
= cl_env_get(&refcheck
);
1224 return PTR_ERR(env
);
1226 args
= vvp_env_args(env
, IO_NORMAL
);
1227 args
->u
.normal
.via_iov
= (struct iovec
*)iov
;
1228 args
->u
.normal
.via_nrsegs
= nr_segs
;
1229 args
->u
.normal
.via_iocb
= iocb
;
1231 result
= ll_file_io_generic(env
, args
, iocb
->ki_filp
, CIT_WRITE
,
1232 &iocb
->ki_pos
, count
);
1233 cl_env_put(env
, &refcheck
);
1237 static ssize_t
ll_file_write(struct file
*file
, const char *buf
, size_t count
,
1241 struct iovec
*local_iov
;
1242 struct kiocb
*kiocb
;
1246 env
= cl_env_get(&refcheck
);
1248 return PTR_ERR(env
);
1250 local_iov
= &vvp_env_info(env
)->vti_local_iov
;
1251 kiocb
= &vvp_env_info(env
)->vti_kiocb
;
1252 local_iov
->iov_base
= (void __user
*)buf
;
1253 local_iov
->iov_len
= count
;
1254 init_sync_kiocb(kiocb
, file
);
1255 kiocb
->ki_pos
= *ppos
;
1256 kiocb
->ki_nbytes
= count
;
1258 result
= ll_file_aio_write(kiocb
, local_iov
, 1, kiocb
->ki_pos
);
1259 *ppos
= kiocb
->ki_pos
;
1261 cl_env_put(env
, &refcheck
);
1268 * Send file content (through pagecache) somewhere with helper
1270 static ssize_t
ll_file_splice_read(struct file
*in_file
, loff_t
*ppos
,
1271 struct pipe_inode_info
*pipe
, size_t count
,
1275 struct vvp_io_args
*args
;
1279 env
= cl_env_get(&refcheck
);
1281 return PTR_ERR(env
);
1283 args
= vvp_env_args(env
, IO_SPLICE
);
1284 args
->u
.splice
.via_pipe
= pipe
;
1285 args
->u
.splice
.via_flags
= flags
;
1287 result
= ll_file_io_generic(env
, args
, in_file
, CIT_READ
, ppos
, count
);
1288 cl_env_put(env
, &refcheck
);
1292 static int ll_lov_recreate(struct inode
*inode
, struct ost_id
*oi
,
1295 struct obd_export
*exp
= ll_i2dtexp(inode
);
1296 struct obd_trans_info oti
= { 0 };
1297 struct obdo
*oa
= NULL
;
1300 struct lov_stripe_md
*lsm
= NULL
, *lsm2
;
1306 lsm
= ccc_inode_lsm_get(inode
);
1307 if (!lsm_has_objects(lsm
))
1308 GOTO(out
, rc
= -ENOENT
);
1310 lsm_size
= sizeof(*lsm
) + (sizeof(struct lov_oinfo
) *
1311 (lsm
->lsm_stripe_count
));
1313 OBD_ALLOC_LARGE(lsm2
, lsm_size
);
1315 GOTO(out
, rc
= -ENOMEM
);
1318 oa
->o_nlink
= ost_idx
;
1319 oa
->o_flags
|= OBD_FL_RECREATE_OBJS
;
1320 oa
->o_valid
= OBD_MD_FLID
| OBD_MD_FLFLAGS
| OBD_MD_FLGROUP
;
1321 obdo_from_inode(oa
, inode
, OBD_MD_FLTYPE
| OBD_MD_FLATIME
|
1322 OBD_MD_FLMTIME
| OBD_MD_FLCTIME
);
1323 obdo_set_parent_fid(oa
, &ll_i2info(inode
)->lli_fid
);
1324 memcpy(lsm2
, lsm
, lsm_size
);
1325 ll_inode_size_lock(inode
);
1326 rc
= obd_create(NULL
, exp
, oa
, &lsm2
, &oti
);
1327 ll_inode_size_unlock(inode
);
1329 OBD_FREE_LARGE(lsm2
, lsm_size
);
1332 ccc_inode_lsm_put(inode
, lsm
);
1337 static int ll_lov_recreate_obj(struct inode
*inode
, unsigned long arg
)
1339 struct ll_recreate_obj ucreat
;
1342 if (!capable(CFS_CAP_SYS_ADMIN
))
1345 if (copy_from_user(&ucreat
, (struct ll_recreate_obj
*)arg
,
1349 ostid_set_seq_mdt0(&oi
);
1350 ostid_set_id(&oi
, ucreat
.lrc_id
);
1351 return ll_lov_recreate(inode
, &oi
, ucreat
.lrc_ost_idx
);
1354 static int ll_lov_recreate_fid(struct inode
*inode
, unsigned long arg
)
1360 if (!capable(CFS_CAP_SYS_ADMIN
))
1363 if (copy_from_user(&fid
, (struct lu_fid
*)arg
, sizeof(fid
)))
1366 fid_to_ostid(&fid
, &oi
);
1367 ost_idx
= (fid_seq(&fid
) >> 16) & 0xffff;
1368 return ll_lov_recreate(inode
, &oi
, ost_idx
);
1371 int ll_lov_setstripe_ea_info(struct inode
*inode
, struct file
*file
,
1372 int flags
, struct lov_user_md
*lum
, int lum_size
)
1374 struct lov_stripe_md
*lsm
= NULL
;
1375 struct lookup_intent oit
= {.it_op
= IT_OPEN
, .it_flags
= flags
};
1378 lsm
= ccc_inode_lsm_get(inode
);
1380 ccc_inode_lsm_put(inode
, lsm
);
1381 CDEBUG(D_IOCTL
, "stripe already exists for ino %lu\n",
1383 GOTO(out
, rc
= -EEXIST
);
1386 ll_inode_size_lock(inode
);
1387 rc
= ll_intent_file_open(file
, lum
, lum_size
, &oit
);
1389 GOTO(out_unlock
, rc
);
1390 rc
= oit
.d
.lustre
.it_status
;
1392 GOTO(out_req_free
, rc
);
1394 ll_release_openhandle(file
->f_dentry
, &oit
);
1397 ll_inode_size_unlock(inode
);
1398 ll_intent_release(&oit
);
1399 ccc_inode_lsm_put(inode
, lsm
);
1401 cl_lov_delay_create_clear(&file
->f_flags
);
1404 ptlrpc_req_finished((struct ptlrpc_request
*) oit
.d
.lustre
.it_data
);
1408 int ll_lov_getstripe_ea_info(struct inode
*inode
, const char *filename
,
1409 struct lov_mds_md
**lmmp
, int *lmm_size
,
1410 struct ptlrpc_request
**request
)
1412 struct ll_sb_info
*sbi
= ll_i2sbi(inode
);
1413 struct mdt_body
*body
;
1414 struct lov_mds_md
*lmm
= NULL
;
1415 struct ptlrpc_request
*req
= NULL
;
1416 struct md_op_data
*op_data
;
1419 rc
= ll_get_max_mdsize(sbi
, &lmmsize
);
1423 op_data
= ll_prep_md_op_data(NULL
, inode
, NULL
, filename
,
1424 strlen(filename
), lmmsize
,
1425 LUSTRE_OPC_ANY
, NULL
);
1426 if (IS_ERR(op_data
))
1427 return PTR_ERR(op_data
);
1429 op_data
->op_valid
= OBD_MD_FLEASIZE
| OBD_MD_FLDIREA
;
1430 rc
= md_getattr_name(sbi
->ll_md_exp
, op_data
, &req
);
1431 ll_finish_md_op_data(op_data
);
1433 CDEBUG(D_INFO
, "md_getattr_name failed "
1434 "on %s: rc %d\n", filename
, rc
);
1438 body
= req_capsule_server_get(&req
->rq_pill
, &RMF_MDT_BODY
);
1439 LASSERT(body
!= NULL
); /* checked by mdc_getattr_name */
1441 lmmsize
= body
->eadatasize
;
1443 if (!(body
->valid
& (OBD_MD_FLEASIZE
| OBD_MD_FLDIREA
)) ||
1445 GOTO(out
, rc
= -ENODATA
);
1448 lmm
= req_capsule_server_sized_get(&req
->rq_pill
, &RMF_MDT_MD
, lmmsize
);
1449 LASSERT(lmm
!= NULL
);
1451 if ((lmm
->lmm_magic
!= cpu_to_le32(LOV_MAGIC_V1
)) &&
1452 (lmm
->lmm_magic
!= cpu_to_le32(LOV_MAGIC_V3
))) {
1453 GOTO(out
, rc
= -EPROTO
);
1457 * This is coming from the MDS, so is probably in
1458 * little endian. We convert it to host endian before
1459 * passing it to userspace.
1461 if (LOV_MAGIC
!= cpu_to_le32(LOV_MAGIC
)) {
1464 stripe_count
= le16_to_cpu(lmm
->lmm_stripe_count
);
1465 if (le32_to_cpu(lmm
->lmm_pattern
) & LOV_PATTERN_F_RELEASED
)
1468 /* if function called for directory - we should
1469 * avoid swab not existent lsm objects */
1470 if (lmm
->lmm_magic
== cpu_to_le32(LOV_MAGIC_V1
)) {
1471 lustre_swab_lov_user_md_v1((struct lov_user_md_v1
*)lmm
);
1472 if (S_ISREG(body
->mode
))
1473 lustre_swab_lov_user_md_objects(
1474 ((struct lov_user_md_v1
*)lmm
)->lmm_objects
,
1476 } else if (lmm
->lmm_magic
== cpu_to_le32(LOV_MAGIC_V3
)) {
1477 lustre_swab_lov_user_md_v3((struct lov_user_md_v3
*)lmm
);
1478 if (S_ISREG(body
->mode
))
1479 lustre_swab_lov_user_md_objects(
1480 ((struct lov_user_md_v3
*)lmm
)->lmm_objects
,
1487 *lmm_size
= lmmsize
;
1492 static int ll_lov_setea(struct inode
*inode
, struct file
*file
,
1495 int flags
= MDS_OPEN_HAS_OBJS
| FMODE_WRITE
;
1496 struct lov_user_md
*lump
;
1497 int lum_size
= sizeof(struct lov_user_md
) +
1498 sizeof(struct lov_user_ost_data
);
1501 if (!capable(CFS_CAP_SYS_ADMIN
))
1504 OBD_ALLOC_LARGE(lump
, lum_size
);
1508 if (copy_from_user(lump
, (struct lov_user_md
*)arg
, lum_size
)) {
1509 OBD_FREE_LARGE(lump
, lum_size
);
1513 rc
= ll_lov_setstripe_ea_info(inode
, file
, flags
, lump
, lum_size
);
1515 OBD_FREE_LARGE(lump
, lum_size
);
1519 static int ll_lov_setstripe(struct inode
*inode
, struct file
*file
,
1522 struct lov_user_md_v3 lumv3
;
1523 struct lov_user_md_v1
*lumv1
= (struct lov_user_md_v1
*)&lumv3
;
1524 struct lov_user_md_v1
*lumv1p
= (struct lov_user_md_v1
*)arg
;
1525 struct lov_user_md_v3
*lumv3p
= (struct lov_user_md_v3
*)arg
;
1527 int flags
= FMODE_WRITE
;
1529 /* first try with v1 which is smaller than v3 */
1530 lum_size
= sizeof(struct lov_user_md_v1
);
1531 if (copy_from_user(lumv1
, lumv1p
, lum_size
))
1534 if (lumv1
->lmm_magic
== LOV_USER_MAGIC_V3
) {
1535 lum_size
= sizeof(struct lov_user_md_v3
);
1536 if (copy_from_user(&lumv3
, lumv3p
, lum_size
))
1540 rc
= ll_lov_setstripe_ea_info(inode
, file
, flags
, lumv1
, lum_size
);
1542 struct lov_stripe_md
*lsm
;
1545 put_user(0, &lumv1p
->lmm_stripe_count
);
1547 ll_layout_refresh(inode
, &gen
);
1548 lsm
= ccc_inode_lsm_get(inode
);
1549 rc
= obd_iocontrol(LL_IOC_LOV_GETSTRIPE
, ll_i2dtexp(inode
),
1550 0, lsm
, (void *)arg
);
1551 ccc_inode_lsm_put(inode
, lsm
);
1556 static int ll_lov_getstripe(struct inode
*inode
, unsigned long arg
)
1558 struct lov_stripe_md
*lsm
;
1561 lsm
= ccc_inode_lsm_get(inode
);
1563 rc
= obd_iocontrol(LL_IOC_LOV_GETSTRIPE
, ll_i2dtexp(inode
), 0,
1565 ccc_inode_lsm_put(inode
, lsm
);
1569 int ll_get_grouplock(struct inode
*inode
, struct file
*file
, unsigned long arg
)
1571 struct ll_inode_info
*lli
= ll_i2info(inode
);
1572 struct ll_file_data
*fd
= LUSTRE_FPRIVATE(file
);
1573 struct ccc_grouplock grouplock
;
1576 if (ll_file_nolock(file
))
1579 spin_lock(&lli
->lli_lock
);
1580 if (fd
->fd_flags
& LL_FILE_GROUP_LOCKED
) {
1581 CWARN("group lock already existed with gid %lu\n",
1582 fd
->fd_grouplock
.cg_gid
);
1583 spin_unlock(&lli
->lli_lock
);
1586 LASSERT(fd
->fd_grouplock
.cg_lock
== NULL
);
1587 spin_unlock(&lli
->lli_lock
);
1589 rc
= cl_get_grouplock(cl_i2info(inode
)->lli_clob
,
1590 arg
, (file
->f_flags
& O_NONBLOCK
), &grouplock
);
1594 spin_lock(&lli
->lli_lock
);
1595 if (fd
->fd_flags
& LL_FILE_GROUP_LOCKED
) {
1596 spin_unlock(&lli
->lli_lock
);
1597 CERROR("another thread just won the race\n");
1598 cl_put_grouplock(&grouplock
);
1602 fd
->fd_flags
|= LL_FILE_GROUP_LOCKED
;
1603 fd
->fd_grouplock
= grouplock
;
1604 spin_unlock(&lli
->lli_lock
);
1606 CDEBUG(D_INFO
, "group lock %lu obtained\n", arg
);
1610 int ll_put_grouplock(struct inode
*inode
, struct file
*file
, unsigned long arg
)
1612 struct ll_inode_info
*lli
= ll_i2info(inode
);
1613 struct ll_file_data
*fd
= LUSTRE_FPRIVATE(file
);
1614 struct ccc_grouplock grouplock
;
1616 spin_lock(&lli
->lli_lock
);
1617 if (!(fd
->fd_flags
& LL_FILE_GROUP_LOCKED
)) {
1618 spin_unlock(&lli
->lli_lock
);
1619 CWARN("no group lock held\n");
1622 LASSERT(fd
->fd_grouplock
.cg_lock
!= NULL
);
1624 if (fd
->fd_grouplock
.cg_gid
!= arg
) {
1625 CWARN("group lock %lu doesn't match current id %lu\n",
1626 arg
, fd
->fd_grouplock
.cg_gid
);
1627 spin_unlock(&lli
->lli_lock
);
1631 grouplock
= fd
->fd_grouplock
;
1632 memset(&fd
->fd_grouplock
, 0, sizeof(fd
->fd_grouplock
));
1633 fd
->fd_flags
&= ~LL_FILE_GROUP_LOCKED
;
1634 spin_unlock(&lli
->lli_lock
);
1636 cl_put_grouplock(&grouplock
);
1637 CDEBUG(D_INFO
, "group lock %lu released\n", arg
);
1642 * Close inode open handle
1644 * \param dentry [in] dentry which contains the inode
1645 * \param it [in,out] intent which contains open info and result
1648 * \retval <0 failure
1650 int ll_release_openhandle(struct dentry
*dentry
, struct lookup_intent
*it
)
1652 struct inode
*inode
= dentry
->d_inode
;
1653 struct obd_client_handle
*och
;
1658 /* Root ? Do nothing. */
1659 if (dentry
->d_inode
->i_sb
->s_root
== dentry
)
1662 /* No open handle to close? Move away */
1663 if (!it_disposition(it
, DISP_OPEN_OPEN
))
1666 LASSERT(it_open_error(DISP_OPEN_OPEN
, it
) == 0);
1668 OBD_ALLOC(och
, sizeof(*och
));
1670 GOTO(out
, rc
= -ENOMEM
);
1672 ll_och_fill(ll_i2sbi(inode
)->ll_md_exp
, it
, och
);
1674 rc
= ll_close_inode_openhandle(ll_i2sbi(inode
)->ll_md_exp
,
1677 /* this one is in place of ll_file_open */
1678 if (it_disposition(it
, DISP_ENQ_OPEN_REF
)) {
1679 ptlrpc_req_finished(it
->d
.lustre
.it_data
);
1680 it_clear_disposition(it
, DISP_ENQ_OPEN_REF
);
1686 * Get size for inode for which FIEMAP mapping is requested.
1687 * Make the FIEMAP get_info call and returns the result.
1689 int ll_do_fiemap(struct inode
*inode
, struct ll_user_fiemap
*fiemap
,
1692 struct obd_export
*exp
= ll_i2dtexp(inode
);
1693 struct lov_stripe_md
*lsm
= NULL
;
1694 struct ll_fiemap_info_key fm_key
= { .name
= KEY_FIEMAP
, };
1695 int vallen
= num_bytes
;
1698 /* Checks for fiemap flags */
1699 if (fiemap
->fm_flags
& ~LUSTRE_FIEMAP_FLAGS_COMPAT
) {
1700 fiemap
->fm_flags
&= ~LUSTRE_FIEMAP_FLAGS_COMPAT
;
1704 /* Check for FIEMAP_FLAG_SYNC */
1705 if (fiemap
->fm_flags
& FIEMAP_FLAG_SYNC
) {
1706 rc
= filemap_fdatawrite(inode
->i_mapping
);
1711 lsm
= ccc_inode_lsm_get(inode
);
1715 /* If the stripe_count > 1 and the application does not understand
1716 * DEVICE_ORDER flag, then it cannot interpret the extents correctly.
1718 if (lsm
->lsm_stripe_count
> 1 &&
1719 !(fiemap
->fm_flags
& FIEMAP_FLAG_DEVICE_ORDER
))
1720 GOTO(out
, rc
= -EOPNOTSUPP
);
1722 fm_key
.oa
.o_oi
= lsm
->lsm_oi
;
1723 fm_key
.oa
.o_valid
= OBD_MD_FLID
| OBD_MD_FLGROUP
;
1725 obdo_from_inode(&fm_key
.oa
, inode
, OBD_MD_FLSIZE
);
1726 obdo_set_parent_fid(&fm_key
.oa
, &ll_i2info(inode
)->lli_fid
);
1727 /* If filesize is 0, then there would be no objects for mapping */
1728 if (fm_key
.oa
.o_size
== 0) {
1729 fiemap
->fm_mapped_extents
= 0;
1733 memcpy(&fm_key
.fiemap
, fiemap
, sizeof(*fiemap
));
1735 rc
= obd_get_info(NULL
, exp
, sizeof(fm_key
), &fm_key
, &vallen
,
1738 CERROR("obd_get_info failed: rc = %d\n", rc
);
1741 ccc_inode_lsm_put(inode
, lsm
);
1745 int ll_fid2path(struct inode
*inode
, void *arg
)
1747 struct obd_export
*exp
= ll_i2mdexp(inode
);
1748 struct getinfo_fid2path
*gfout
, *gfin
;
1751 if (!capable(CFS_CAP_DAC_READ_SEARCH
) &&
1752 !(ll_i2sbi(inode
)->ll_flags
& LL_SBI_USER_FID2PATH
))
1755 /* Need to get the buflen */
1756 OBD_ALLOC_PTR(gfin
);
1759 if (copy_from_user(gfin
, arg
, sizeof(*gfin
))) {
1764 outsize
= sizeof(*gfout
) + gfin
->gf_pathlen
;
1765 OBD_ALLOC(gfout
, outsize
);
1766 if (gfout
== NULL
) {
1770 memcpy(gfout
, gfin
, sizeof(*gfout
));
1773 /* Call mdc_iocontrol */
1774 rc
= obd_iocontrol(OBD_IOC_FID2PATH
, exp
, outsize
, gfout
, NULL
);
1778 if (copy_to_user(arg
, gfout
, outsize
))
1782 OBD_FREE(gfout
, outsize
);
1786 static int ll_ioctl_fiemap(struct inode
*inode
, unsigned long arg
)
1788 struct ll_user_fiemap
*fiemap_s
;
1789 size_t num_bytes
, ret_bytes
;
1790 unsigned int extent_count
;
1793 /* Get the extent count so we can calculate the size of
1794 * required fiemap buffer */
1795 if (get_user(extent_count
,
1796 &((struct ll_user_fiemap __user
*)arg
)->fm_extent_count
))
1798 num_bytes
= sizeof(*fiemap_s
) + (extent_count
*
1799 sizeof(struct ll_fiemap_extent
));
1801 OBD_ALLOC_LARGE(fiemap_s
, num_bytes
);
1802 if (fiemap_s
== NULL
)
1805 /* get the fiemap value */
1806 if (copy_from_user(fiemap_s
, (struct ll_user_fiemap __user
*)arg
,
1808 GOTO(error
, rc
= -EFAULT
);
1810 /* If fm_extent_count is non-zero, read the first extent since
1811 * it is used to calculate end_offset and device from previous
1814 if (copy_from_user(&fiemap_s
->fm_extents
[0],
1815 (char __user
*)arg
+ sizeof(*fiemap_s
),
1816 sizeof(struct ll_fiemap_extent
)))
1817 GOTO(error
, rc
= -EFAULT
);
1820 rc
= ll_do_fiemap(inode
, fiemap_s
, num_bytes
);
1824 ret_bytes
= sizeof(struct ll_user_fiemap
);
1826 if (extent_count
!= 0)
1827 ret_bytes
+= (fiemap_s
->fm_mapped_extents
*
1828 sizeof(struct ll_fiemap_extent
));
1830 if (copy_to_user((void *)arg
, fiemap_s
, ret_bytes
))
1834 OBD_FREE_LARGE(fiemap_s
, num_bytes
);
1839 * Read the data_version for inode.
1841 * This value is computed using stripe object version on OST.
1842 * Version is computed using server side locking.
1844 * @param extent_lock Take extent lock. Not needed if a process is already
1845 * holding the OST object group locks.
1847 int ll_data_version(struct inode
*inode
, __u64
*data_version
,
1850 struct lov_stripe_md
*lsm
= NULL
;
1851 struct ll_sb_info
*sbi
= ll_i2sbi(inode
);
1852 struct obdo
*obdo
= NULL
;
1855 /* If no stripe, we consider version is 0. */
1856 lsm
= ccc_inode_lsm_get(inode
);
1857 if (!lsm_has_objects(lsm
)) {
1859 CDEBUG(D_INODE
, "No object for inode\n");
1863 OBD_ALLOC_PTR(obdo
);
1865 GOTO(out
, rc
= -ENOMEM
);
1867 rc
= ll_lsm_getattr(lsm
, sbi
->ll_dt_exp
, NULL
, obdo
, 0, extent_lock
);
1869 if (!(obdo
->o_valid
& OBD_MD_FLDATAVERSION
))
1872 *data_version
= obdo
->o_data_version
;
1877 ccc_inode_lsm_put(inode
, lsm
);
1882 * Trigger a HSM release request for the provided inode.
1884 int ll_hsm_release(struct inode
*inode
)
1886 struct cl_env_nest nest
;
1888 struct obd_client_handle
*och
= NULL
;
1889 __u64 data_version
= 0;
1893 CDEBUG(D_INODE
, "%s: Releasing file "DFID
".\n",
1894 ll_get_fsname(inode
->i_sb
, NULL
, 0),
1895 PFID(&ll_i2info(inode
)->lli_fid
));
1897 och
= ll_lease_open(inode
, NULL
, FMODE_WRITE
, MDS_OPEN_RELEASE
);
1899 GOTO(out
, rc
= PTR_ERR(och
));
1901 /* Grab latest data_version and [am]time values */
1902 rc
= ll_data_version(inode
, &data_version
, 1);
1906 env
= cl_env_nested_get(&nest
);
1908 GOTO(out
, rc
= PTR_ERR(env
));
1910 ll_merge_lvb(env
, inode
);
1911 cl_env_nested_put(&nest
, env
);
1913 /* Release the file.
1914 * NB: lease lock handle is released in mdc_hsm_release_pack() because
1915 * we still need it to pack l_remote_handle to MDT. */
1916 rc
= ll_close_inode_openhandle(ll_i2sbi(inode
)->ll_md_exp
, inode
, och
,
1922 if (och
!= NULL
&& !IS_ERR(och
)) /* close the file */
1923 ll_lease_close(och
, inode
, NULL
);
1928 struct ll_swap_stack
{
1929 struct iattr ia1
, ia2
;
1931 struct inode
*inode1
, *inode2
;
1932 bool check_dv1
, check_dv2
;
1935 static int ll_swap_layouts(struct file
*file1
, struct file
*file2
,
1936 struct lustre_swap_layouts
*lsl
)
1938 struct mdc_swap_layouts msl
;
1939 struct md_op_data
*op_data
;
1942 struct ll_swap_stack
*llss
= NULL
;
1945 OBD_ALLOC_PTR(llss
);
1949 llss
->inode1
= file1
->f_dentry
->d_inode
;
1950 llss
->inode2
= file2
->f_dentry
->d_inode
;
1952 if (!S_ISREG(llss
->inode2
->i_mode
))
1953 GOTO(free
, rc
= -EINVAL
);
1955 if (inode_permission(llss
->inode1
, MAY_WRITE
) ||
1956 inode_permission(llss
->inode2
, MAY_WRITE
))
1957 GOTO(free
, rc
= -EPERM
);
1959 if (llss
->inode2
->i_sb
!= llss
->inode1
->i_sb
)
1960 GOTO(free
, rc
= -EXDEV
);
1962 /* we use 2 bool because it is easier to swap than 2 bits */
1963 if (lsl
->sl_flags
& SWAP_LAYOUTS_CHECK_DV1
)
1964 llss
->check_dv1
= true;
1966 if (lsl
->sl_flags
& SWAP_LAYOUTS_CHECK_DV2
)
1967 llss
->check_dv2
= true;
1969 /* we cannot use lsl->sl_dvX directly because we may swap them */
1970 llss
->dv1
= lsl
->sl_dv1
;
1971 llss
->dv2
= lsl
->sl_dv2
;
1973 rc
= lu_fid_cmp(ll_inode2fid(llss
->inode1
), ll_inode2fid(llss
->inode2
));
1974 if (rc
== 0) /* same file, done! */
1977 if (rc
< 0) { /* sequentialize it */
1978 swap(llss
->inode1
, llss
->inode2
);
1980 swap(llss
->dv1
, llss
->dv2
);
1981 swap(llss
->check_dv1
, llss
->check_dv2
);
1985 if (gid
!= 0) { /* application asks to flush dirty cache */
1986 rc
= ll_get_grouplock(llss
->inode1
, file1
, gid
);
1990 rc
= ll_get_grouplock(llss
->inode2
, file2
, gid
);
1992 ll_put_grouplock(llss
->inode1
, file1
, gid
);
1997 /* to be able to restore mtime and atime after swap
1998 * we need to first save them */
2000 (SWAP_LAYOUTS_KEEP_MTIME
| SWAP_LAYOUTS_KEEP_ATIME
)) {
2001 llss
->ia1
.ia_mtime
= llss
->inode1
->i_mtime
;
2002 llss
->ia1
.ia_atime
= llss
->inode1
->i_atime
;
2003 llss
->ia1
.ia_valid
= ATTR_MTIME
| ATTR_ATIME
;
2004 llss
->ia2
.ia_mtime
= llss
->inode2
->i_mtime
;
2005 llss
->ia2
.ia_atime
= llss
->inode2
->i_atime
;
2006 llss
->ia2
.ia_valid
= ATTR_MTIME
| ATTR_ATIME
;
2009 /* ultimate check, before swaping the layouts we check if
2010 * dataversion has changed (if requested) */
2011 if (llss
->check_dv1
) {
2012 rc
= ll_data_version(llss
->inode1
, &dv
, 0);
2015 if (dv
!= llss
->dv1
)
2016 GOTO(putgl
, rc
= -EAGAIN
);
2019 if (llss
->check_dv2
) {
2020 rc
= ll_data_version(llss
->inode2
, &dv
, 0);
2023 if (dv
!= llss
->dv2
)
2024 GOTO(putgl
, rc
= -EAGAIN
);
2027 /* struct md_op_data is used to send the swap args to the mdt
2028 * only flags is missing, so we use struct mdc_swap_layouts
2029 * through the md_op_data->op_data */
2030 /* flags from user space have to be converted before they are send to
2031 * server, no flag is sent today, they are only used on the client */
2034 op_data
= ll_prep_md_op_data(NULL
, llss
->inode1
, llss
->inode2
, NULL
, 0,
2035 0, LUSTRE_OPC_ANY
, &msl
);
2036 if (IS_ERR(op_data
))
2037 GOTO(free
, rc
= PTR_ERR(op_data
));
2039 rc
= obd_iocontrol(LL_IOC_LOV_SWAP_LAYOUTS
, ll_i2mdexp(llss
->inode1
),
2040 sizeof(*op_data
), op_data
, NULL
);
2041 ll_finish_md_op_data(op_data
);
2045 ll_put_grouplock(llss
->inode2
, file2
, gid
);
2046 ll_put_grouplock(llss
->inode1
, file1
, gid
);
2049 /* rc can be set from obd_iocontrol() or from a GOTO(putgl, ...) */
2053 /* clear useless flags */
2054 if (!(lsl
->sl_flags
& SWAP_LAYOUTS_KEEP_MTIME
)) {
2055 llss
->ia1
.ia_valid
&= ~ATTR_MTIME
;
2056 llss
->ia2
.ia_valid
&= ~ATTR_MTIME
;
2059 if (!(lsl
->sl_flags
& SWAP_LAYOUTS_KEEP_ATIME
)) {
2060 llss
->ia1
.ia_valid
&= ~ATTR_ATIME
;
2061 llss
->ia2
.ia_valid
&= ~ATTR_ATIME
;
2064 /* update time if requested */
2066 if (llss
->ia2
.ia_valid
!= 0) {
2067 mutex_lock(&llss
->inode1
->i_mutex
);
2068 rc
= ll_setattr(file1
->f_dentry
, &llss
->ia2
);
2069 mutex_unlock(&llss
->inode1
->i_mutex
);
2072 if (llss
->ia1
.ia_valid
!= 0) {
2075 mutex_lock(&llss
->inode2
->i_mutex
);
2076 rc1
= ll_setattr(file2
->f_dentry
, &llss
->ia1
);
2077 mutex_unlock(&llss
->inode2
->i_mutex
);
2089 static int ll_hsm_state_set(struct inode
*inode
, struct hsm_state_set
*hss
)
2091 struct md_op_data
*op_data
;
2094 /* Non-root users are forbidden to set or clear flags which are
2095 * NOT defined in HSM_USER_MASK. */
2096 if (((hss
->hss_setmask
| hss
->hss_clearmask
) & ~HSM_USER_MASK
) &&
2097 !capable(CFS_CAP_SYS_ADMIN
))
2100 op_data
= ll_prep_md_op_data(NULL
, inode
, NULL
, NULL
, 0, 0,
2101 LUSTRE_OPC_ANY
, hss
);
2102 if (IS_ERR(op_data
))
2103 return PTR_ERR(op_data
);
2105 rc
= obd_iocontrol(LL_IOC_HSM_STATE_SET
, ll_i2mdexp(inode
),
2106 sizeof(*op_data
), op_data
, NULL
);
2108 ll_finish_md_op_data(op_data
);
2113 static int ll_hsm_import(struct inode
*inode
, struct file
*file
,
2114 struct hsm_user_import
*hui
)
2116 struct hsm_state_set
*hss
= NULL
;
2117 struct iattr
*attr
= NULL
;
2121 if (!S_ISREG(inode
->i_mode
))
2127 GOTO(out
, rc
= -ENOMEM
);
2129 hss
->hss_valid
= HSS_SETMASK
| HSS_ARCHIVE_ID
;
2130 hss
->hss_archive_id
= hui
->hui_archive_id
;
2131 hss
->hss_setmask
= HS_ARCHIVED
| HS_EXISTS
| HS_RELEASED
;
2132 rc
= ll_hsm_state_set(inode
, hss
);
2136 OBD_ALLOC_PTR(attr
);
2138 GOTO(out
, rc
= -ENOMEM
);
2140 attr
->ia_mode
= hui
->hui_mode
& (S_IRWXU
| S_IRWXG
| S_IRWXO
);
2141 attr
->ia_mode
|= S_IFREG
;
2142 attr
->ia_uid
= make_kuid(&init_user_ns
, hui
->hui_uid
);
2143 attr
->ia_gid
= make_kgid(&init_user_ns
, hui
->hui_gid
);
2144 attr
->ia_size
= hui
->hui_size
;
2145 attr
->ia_mtime
.tv_sec
= hui
->hui_mtime
;
2146 attr
->ia_mtime
.tv_nsec
= hui
->hui_mtime_ns
;
2147 attr
->ia_atime
.tv_sec
= hui
->hui_atime
;
2148 attr
->ia_atime
.tv_nsec
= hui
->hui_atime_ns
;
2150 attr
->ia_valid
= ATTR_SIZE
| ATTR_MODE
| ATTR_FORCE
|
2151 ATTR_UID
| ATTR_GID
|
2152 ATTR_MTIME
| ATTR_MTIME_SET
|
2153 ATTR_ATIME
| ATTR_ATIME_SET
;
2155 rc
= ll_setattr_raw(file
->f_dentry
, attr
, true);
2169 long ll_file_ioctl(struct file
*file
, unsigned int cmd
, unsigned long arg
)
2171 struct inode
*inode
= file
->f_dentry
->d_inode
;
2172 struct ll_file_data
*fd
= LUSTRE_FPRIVATE(file
);
2175 CDEBUG(D_VFSTRACE
, "VFS Op:inode=%lu/%u(%p),cmd=%x\n", inode
->i_ino
,
2176 inode
->i_generation
, inode
, cmd
);
2177 ll_stats_ops_tally(ll_i2sbi(inode
), LPROC_LL_IOCTL
, 1);
2179 /* asm-ppc{,64} declares TCGETS, et. al. as type 't' not 'T' */
2180 if (_IOC_TYPE(cmd
) == 'T' || _IOC_TYPE(cmd
) == 't') /* tty ioctls */
2184 case LL_IOC_GETFLAGS
:
2185 /* Get the current value of the file flags */
2186 return put_user(fd
->fd_flags
, (int *)arg
);
2187 case LL_IOC_SETFLAGS
:
2188 case LL_IOC_CLRFLAGS
:
2189 /* Set or clear specific file flags */
2190 /* XXX This probably needs checks to ensure the flags are
2191 * not abused, and to handle any flag side effects.
2193 if (get_user(flags
, (int *) arg
))
2196 if (cmd
== LL_IOC_SETFLAGS
) {
2197 if ((flags
& LL_FILE_IGNORE_LOCK
) &&
2198 !(file
->f_flags
& O_DIRECT
)) {
2199 CERROR("%s: unable to disable locking on "
2200 "non-O_DIRECT file\n", current
->comm
);
2204 fd
->fd_flags
|= flags
;
2206 fd
->fd_flags
&= ~flags
;
2209 case LL_IOC_LOV_SETSTRIPE
:
2210 return ll_lov_setstripe(inode
, file
, arg
);
2211 case LL_IOC_LOV_SETEA
:
2212 return ll_lov_setea(inode
, file
, arg
);
2213 case LL_IOC_LOV_SWAP_LAYOUTS
: {
2215 struct lustre_swap_layouts lsl
;
2217 if (copy_from_user(&lsl
, (char *)arg
,
2218 sizeof(struct lustre_swap_layouts
)))
2221 if ((file
->f_flags
& O_ACCMODE
) == 0) /* O_RDONLY */
2224 file2
= fget(lsl
.sl_fd
);
2229 if ((file2
->f_flags
& O_ACCMODE
) != 0) /* O_WRONLY or O_RDWR */
2230 rc
= ll_swap_layouts(file
, file2
, &lsl
);
2234 case LL_IOC_LOV_GETSTRIPE
:
2235 return ll_lov_getstripe(inode
, arg
);
2236 case LL_IOC_RECREATE_OBJ
:
2237 return ll_lov_recreate_obj(inode
, arg
);
2238 case LL_IOC_RECREATE_FID
:
2239 return ll_lov_recreate_fid(inode
, arg
);
2240 case FSFILT_IOC_FIEMAP
:
2241 return ll_ioctl_fiemap(inode
, arg
);
2242 case FSFILT_IOC_GETFLAGS
:
2243 case FSFILT_IOC_SETFLAGS
:
2244 return ll_iocontrol(inode
, file
, cmd
, arg
);
2245 case FSFILT_IOC_GETVERSION_OLD
:
2246 case FSFILT_IOC_GETVERSION
:
2247 return put_user(inode
->i_generation
, (int *)arg
);
2248 case LL_IOC_GROUP_LOCK
:
2249 return ll_get_grouplock(inode
, file
, arg
);
2250 case LL_IOC_GROUP_UNLOCK
:
2251 return ll_put_grouplock(inode
, file
, arg
);
2252 case IOC_OBD_STATFS
:
2253 return ll_obd_statfs(inode
, (void *)arg
);
2255 /* We need to special case any other ioctls we want to handle,
2256 * to send them to the MDS/OST as appropriate and to properly
2257 * network encode the arg field.
2258 case FSFILT_IOC_SETVERSION_OLD:
2259 case FSFILT_IOC_SETVERSION:
2261 case LL_IOC_FLUSHCTX
:
2262 return ll_flush_ctx(inode
);
2263 case LL_IOC_PATH2FID
: {
2264 if (copy_to_user((void *)arg
, ll_inode2fid(inode
),
2265 sizeof(struct lu_fid
)))
2270 case OBD_IOC_FID2PATH
:
2271 return ll_fid2path(inode
, (void *)arg
);
2272 case LL_IOC_DATA_VERSION
: {
2273 struct ioc_data_version idv
;
2276 if (copy_from_user(&idv
, (char *)arg
, sizeof(idv
)))
2279 rc
= ll_data_version(inode
, &idv
.idv_version
,
2280 !(idv
.idv_flags
& LL_DV_NOFLUSH
));
2282 if (rc
== 0 && copy_to_user((char *) arg
, &idv
, sizeof(idv
)))
2288 case LL_IOC_GET_MDTIDX
: {
2291 mdtidx
= ll_get_mdt_idx(inode
);
2295 if (put_user((int)mdtidx
, (int*)arg
))
2300 case OBD_IOC_GETDTNAME
:
2301 case OBD_IOC_GETMDNAME
:
2302 return ll_get_obd_name(inode
, cmd
, arg
);
2303 case LL_IOC_HSM_STATE_GET
: {
2304 struct md_op_data
*op_data
;
2305 struct hsm_user_state
*hus
;
2312 op_data
= ll_prep_md_op_data(NULL
, inode
, NULL
, NULL
, 0, 0,
2313 LUSTRE_OPC_ANY
, hus
);
2314 if (IS_ERR(op_data
)) {
2316 return PTR_ERR(op_data
);
2319 rc
= obd_iocontrol(cmd
, ll_i2mdexp(inode
), sizeof(*op_data
),
2322 if (copy_to_user((void *)arg
, hus
, sizeof(*hus
)))
2325 ll_finish_md_op_data(op_data
);
2329 case LL_IOC_HSM_STATE_SET
: {
2330 struct hsm_state_set
*hss
;
2337 if (copy_from_user(hss
, (char *)arg
, sizeof(*hss
))) {
2342 rc
= ll_hsm_state_set(inode
, hss
);
2347 case LL_IOC_HSM_ACTION
: {
2348 struct md_op_data
*op_data
;
2349 struct hsm_current_action
*hca
;
2356 op_data
= ll_prep_md_op_data(NULL
, inode
, NULL
, NULL
, 0, 0,
2357 LUSTRE_OPC_ANY
, hca
);
2358 if (IS_ERR(op_data
)) {
2360 return PTR_ERR(op_data
);
2363 rc
= obd_iocontrol(cmd
, ll_i2mdexp(inode
), sizeof(*op_data
),
2366 if (copy_to_user((char *)arg
, hca
, sizeof(*hca
)))
2369 ll_finish_md_op_data(op_data
);
2373 case LL_IOC_SET_LEASE
: {
2374 struct ll_inode_info
*lli
= ll_i2info(inode
);
2375 struct obd_client_handle
*och
= NULL
;
2381 if (!(file
->f_mode
& FMODE_WRITE
))
2386 if (!(file
->f_mode
& FMODE_READ
))
2391 mutex_lock(&lli
->lli_och_mutex
);
2392 if (fd
->fd_lease_och
!= NULL
) {
2393 och
= fd
->fd_lease_och
;
2394 fd
->fd_lease_och
= NULL
;
2396 mutex_unlock(&lli
->lli_och_mutex
);
2399 mode
= och
->och_flags
&
2400 (FMODE_READ
|FMODE_WRITE
);
2401 rc
= ll_lease_close(och
, inode
, &lease_broken
);
2402 if (rc
== 0 && lease_broken
)
2408 /* return the type of lease or error */
2409 return rc
< 0 ? rc
: (int)mode
;
2414 CDEBUG(D_INODE
, "Set lease with mode %d\n", mode
);
2416 /* apply for lease */
2417 och
= ll_lease_open(inode
, file
, mode
, 0);
2419 return PTR_ERR(och
);
2422 mutex_lock(&lli
->lli_och_mutex
);
2423 if (fd
->fd_lease_och
== NULL
) {
2424 fd
->fd_lease_och
= och
;
2427 mutex_unlock(&lli
->lli_och_mutex
);
2429 /* impossible now that only excl is supported for now */
2430 ll_lease_close(och
, inode
, &lease_broken
);
2435 case LL_IOC_GET_LEASE
: {
2436 struct ll_inode_info
*lli
= ll_i2info(inode
);
2437 struct ldlm_lock
*lock
= NULL
;
2440 mutex_lock(&lli
->lli_och_mutex
);
2441 if (fd
->fd_lease_och
!= NULL
) {
2442 struct obd_client_handle
*och
= fd
->fd_lease_och
;
2444 lock
= ldlm_handle2lock(&och
->och_lease_handle
);
2446 lock_res_and_lock(lock
);
2447 if (!ldlm_is_cancel(lock
))
2448 rc
= och
->och_flags
&
2449 (FMODE_READ
| FMODE_WRITE
);
2450 unlock_res_and_lock(lock
);
2451 ldlm_lock_put(lock
);
2454 mutex_unlock(&lli
->lli_och_mutex
);
2457 case LL_IOC_HSM_IMPORT
: {
2458 struct hsm_user_import
*hui
;
2464 if (copy_from_user(hui
, (void *)arg
, sizeof(*hui
))) {
2469 rc
= ll_hsm_import(inode
, file
, hui
);
2478 ll_iocontrol_call(inode
, file
, cmd
, arg
, &err
))
2481 return obd_iocontrol(cmd
, ll_i2dtexp(inode
), 0, NULL
,
2488 loff_t
ll_file_seek(struct file
*file
, loff_t offset
, int origin
)
2490 struct inode
*inode
= file
->f_dentry
->d_inode
;
2491 loff_t retval
, eof
= 0;
2493 retval
= offset
+ ((origin
== SEEK_END
) ? i_size_read(inode
) :
2494 (origin
== SEEK_CUR
) ? file
->f_pos
: 0);
2495 CDEBUG(D_VFSTRACE
, "VFS Op:inode=%lu/%u(%p), to=%llu=%#llx(%d)\n",
2496 inode
->i_ino
, inode
->i_generation
, inode
, retval
, retval
,
2498 ll_stats_ops_tally(ll_i2sbi(inode
), LPROC_LL_LLSEEK
, 1);
2500 if (origin
== SEEK_END
|| origin
== SEEK_HOLE
|| origin
== SEEK_DATA
) {
2501 retval
= ll_glimpse_size(inode
);
2504 eof
= i_size_read(inode
);
2507 retval
= generic_file_llseek_size(file
, offset
, origin
,
2508 ll_file_maxbytes(inode
), eof
);
2512 int ll_flush(struct file
*file
, fl_owner_t id
)
2514 struct inode
*inode
= file
->f_dentry
->d_inode
;
2515 struct ll_inode_info
*lli
= ll_i2info(inode
);
2516 struct ll_file_data
*fd
= LUSTRE_FPRIVATE(file
);
2519 LASSERT(!S_ISDIR(inode
->i_mode
));
2521 /* catch async errors that were recorded back when async writeback
2522 * failed for pages in this mapping. */
2523 rc
= lli
->lli_async_rc
;
2524 lli
->lli_async_rc
= 0;
2525 err
= lov_read_and_clear_async_rc(lli
->lli_clob
);
2529 /* The application has been told write failure already.
2530 * Do not report failure again. */
2531 if (fd
->fd_write_failed
)
2533 return rc
? -EIO
: 0;
2537 * Called to make sure a portion of file has been written out.
2538 * if @local_only is not true, it will send OST_SYNC RPCs to ost.
2540 * Return how many pages have been written.
2542 int cl_sync_file_range(struct inode
*inode
, loff_t start
, loff_t end
,
2543 enum cl_fsync_mode mode
, int ignore_layout
)
2545 struct cl_env_nest nest
;
2548 struct obd_capa
*capa
= NULL
;
2549 struct cl_fsync_io
*fio
;
2552 if (mode
!= CL_FSYNC_NONE
&& mode
!= CL_FSYNC_LOCAL
&&
2553 mode
!= CL_FSYNC_DISCARD
&& mode
!= CL_FSYNC_ALL
)
2556 env
= cl_env_nested_get(&nest
);
2558 return PTR_ERR(env
);
2560 capa
= ll_osscapa_get(inode
, CAPA_OPC_OSS_WRITE
);
2562 io
= ccc_env_thread_io(env
);
2563 io
->ci_obj
= cl_i2info(inode
)->lli_clob
;
2564 io
->ci_ignore_layout
= ignore_layout
;
2566 /* initialize parameters for sync */
2567 fio
= &io
->u
.ci_fsync
;
2568 fio
->fi_capa
= capa
;
2569 fio
->fi_start
= start
;
2571 fio
->fi_fid
= ll_inode2fid(inode
);
2572 fio
->fi_mode
= mode
;
2573 fio
->fi_nr_written
= 0;
2575 if (cl_io_init(env
, io
, CIT_FSYNC
, io
->ci_obj
) == 0)
2576 result
= cl_io_loop(env
, io
);
2578 result
= io
->ci_result
;
2580 result
= fio
->fi_nr_written
;
2581 cl_io_fini(env
, io
);
2582 cl_env_nested_put(&nest
, env
);
2590 * When dentry is provided (the 'else' case), *file->f_dentry may be
2591 * null and dentry must be used directly rather than pulled from
2592 * *file->f_dentry as is done otherwise.
2595 int ll_fsync(struct file
*file
, loff_t start
, loff_t end
, int datasync
)
2597 struct dentry
*dentry
= file
->f_dentry
;
2598 struct inode
*inode
= dentry
->d_inode
;
2599 struct ll_inode_info
*lli
= ll_i2info(inode
);
2600 struct ptlrpc_request
*req
;
2601 struct obd_capa
*oc
;
2604 CDEBUG(D_VFSTRACE
, "VFS Op:inode=%lu/%u(%p)\n", inode
->i_ino
,
2605 inode
->i_generation
, inode
);
2606 ll_stats_ops_tally(ll_i2sbi(inode
), LPROC_LL_FSYNC
, 1);
2608 rc
= filemap_write_and_wait_range(inode
->i_mapping
, start
, end
);
2609 mutex_lock(&inode
->i_mutex
);
2611 /* catch async errors that were recorded back when async writeback
2612 * failed for pages in this mapping. */
2613 if (!S_ISDIR(inode
->i_mode
)) {
2614 err
= lli
->lli_async_rc
;
2615 lli
->lli_async_rc
= 0;
2618 err
= lov_read_and_clear_async_rc(lli
->lli_clob
);
2623 oc
= ll_mdscapa_get(inode
);
2624 err
= md_sync(ll_i2sbi(inode
)->ll_md_exp
, ll_inode2fid(inode
), oc
,
2630 ptlrpc_req_finished(req
);
2632 if (datasync
&& S_ISREG(inode
->i_mode
)) {
2633 struct ll_file_data
*fd
= LUSTRE_FPRIVATE(file
);
2635 err
= cl_sync_file_range(inode
, 0, OBD_OBJECT_EOF
,
2637 if (rc
== 0 && err
< 0)
2640 fd
->fd_write_failed
= true;
2642 fd
->fd_write_failed
= false;
2645 mutex_unlock(&inode
->i_mutex
);
2649 int ll_file_flock(struct file
*file
, int cmd
, struct file_lock
*file_lock
)
2651 struct inode
*inode
= file
->f_dentry
->d_inode
;
2652 struct ll_sb_info
*sbi
= ll_i2sbi(inode
);
2653 struct ldlm_enqueue_info einfo
= {
2654 .ei_type
= LDLM_FLOCK
,
2655 .ei_cb_cp
= ldlm_flock_completion_ast
,
2656 .ei_cbdata
= file_lock
,
2658 struct md_op_data
*op_data
;
2659 struct lustre_handle lockh
= {0};
2660 ldlm_policy_data_t flock
= {{0}};
2665 CDEBUG(D_VFSTRACE
, "VFS Op:inode=%lu file_lock=%p\n",
2666 inode
->i_ino
, file_lock
);
2668 ll_stats_ops_tally(ll_i2sbi(inode
), LPROC_LL_FLOCK
, 1);
2670 if (file_lock
->fl_flags
& FL_FLOCK
) {
2671 LASSERT((cmd
== F_SETLKW
) || (cmd
== F_SETLK
));
2672 /* flocks are whole-file locks */
2673 flock
.l_flock
.end
= OFFSET_MAX
;
2674 /* For flocks owner is determined by the local file desctiptor*/
2675 flock
.l_flock
.owner
= (unsigned long)file_lock
->fl_file
;
2676 } else if (file_lock
->fl_flags
& FL_POSIX
) {
2677 flock
.l_flock
.owner
= (unsigned long)file_lock
->fl_owner
;
2678 flock
.l_flock
.start
= file_lock
->fl_start
;
2679 flock
.l_flock
.end
= file_lock
->fl_end
;
2683 flock
.l_flock
.pid
= file_lock
->fl_pid
;
2685 /* Somewhat ugly workaround for svc lockd.
2686 * lockd installs custom fl_lmops->lm_compare_owner that checks
2687 * for the fl_owner to be the same (which it always is on local node
2688 * I guess between lockd processes) and then compares pid.
2689 * As such we assign pid to the owner field to make it all work,
2690 * conflict with normal locks is unlikely since pid space and
2691 * pointer space for current->files are not intersecting */
2692 if (file_lock
->fl_lmops
&& file_lock
->fl_lmops
->lm_compare_owner
)
2693 flock
.l_flock
.owner
= (unsigned long)file_lock
->fl_pid
;
2695 switch (file_lock
->fl_type
) {
2697 einfo
.ei_mode
= LCK_PR
;
2700 /* An unlock request may or may not have any relation to
2701 * existing locks so we may not be able to pass a lock handle
2702 * via a normal ldlm_lock_cancel() request. The request may even
2703 * unlock a byte range in the middle of an existing lock. In
2704 * order to process an unlock request we need all of the same
2705 * information that is given with a normal read or write record
2706 * lock request. To avoid creating another ldlm unlock (cancel)
2707 * message we'll treat a LCK_NL flock request as an unlock. */
2708 einfo
.ei_mode
= LCK_NL
;
2711 einfo
.ei_mode
= LCK_PW
;
2714 CDEBUG(D_INFO
, "Unknown fcntl lock type: %d\n",
2715 file_lock
->fl_type
);
2730 flags
= LDLM_FL_BLOCK_NOWAIT
;
2736 flags
= LDLM_FL_TEST_LOCK
;
2737 /* Save the old mode so that if the mode in the lock changes we
2738 * can decrement the appropriate reader or writer refcount. */
2739 file_lock
->fl_type
= einfo
.ei_mode
;
2742 CERROR("unknown fcntl lock command: %d\n", cmd
);
2746 op_data
= ll_prep_md_op_data(NULL
, inode
, NULL
, NULL
, 0, 0,
2747 LUSTRE_OPC_ANY
, NULL
);
2748 if (IS_ERR(op_data
))
2749 return PTR_ERR(op_data
);
2751 CDEBUG(D_DLMTRACE
, "inode=%lu, pid=%u, flags=%#x, mode=%u, "
2752 "start="LPU64
", end="LPU64
"\n", inode
->i_ino
, flock
.l_flock
.pid
,
2753 flags
, einfo
.ei_mode
, flock
.l_flock
.start
, flock
.l_flock
.end
);
2755 rc
= md_enqueue(sbi
->ll_md_exp
, &einfo
, NULL
,
2756 op_data
, &lockh
, &flock
, 0, NULL
/* req */, flags
);
2758 if ((file_lock
->fl_flags
& FL_FLOCK
) &&
2759 (rc
== 0 || file_lock
->fl_type
== F_UNLCK
))
2760 rc2
= flock_lock_file_wait(file
, file_lock
);
2761 if ((file_lock
->fl_flags
& FL_POSIX
) &&
2762 (rc
== 0 || file_lock
->fl_type
== F_UNLCK
) &&
2763 !(flags
& LDLM_FL_TEST_LOCK
))
2764 rc2
= posix_lock_file_wait(file
, file_lock
);
2766 if (rc2
&& file_lock
->fl_type
!= F_UNLCK
) {
2767 einfo
.ei_mode
= LCK_NL
;
2768 md_enqueue(sbi
->ll_md_exp
, &einfo
, NULL
,
2769 op_data
, &lockh
, &flock
, 0, NULL
/* req */, flags
);
2773 ll_finish_md_op_data(op_data
);
2778 int ll_file_noflock(struct file
*file
, int cmd
, struct file_lock
*file_lock
)
2784 * test if some locks matching bits and l_req_mode are acquired
2785 * - bits can be in different locks
2786 * - if found clear the common lock bits in *bits
2787 * - the bits not found, are kept in *bits
2789 * \param bits [IN] searched lock bits [IN]
2790 * \param l_req_mode [IN] searched lock mode
2791 * \retval boolean, true iff all bits are found
2793 int ll_have_md_lock(struct inode
*inode
, __u64
*bits
, ldlm_mode_t l_req_mode
)
2795 struct lustre_handle lockh
;
2796 ldlm_policy_data_t policy
;
2797 ldlm_mode_t mode
= (l_req_mode
== LCK_MINMODE
) ?
2798 (LCK_CR
|LCK_CW
|LCK_PR
|LCK_PW
) : l_req_mode
;
2806 fid
= &ll_i2info(inode
)->lli_fid
;
2807 CDEBUG(D_INFO
, "trying to match res "DFID
" mode %s\n", PFID(fid
),
2808 ldlm_lockname
[mode
]);
2810 flags
= LDLM_FL_BLOCK_GRANTED
| LDLM_FL_CBPENDING
| LDLM_FL_TEST_LOCK
;
2811 for (i
= 0; i
<= MDS_INODELOCK_MAXSHIFT
&& *bits
!= 0; i
++) {
2812 policy
.l_inodebits
.bits
= *bits
& (1 << i
);
2813 if (policy
.l_inodebits
.bits
== 0)
2816 if (md_lock_match(ll_i2mdexp(inode
), flags
, fid
, LDLM_IBITS
,
2817 &policy
, mode
, &lockh
)) {
2818 struct ldlm_lock
*lock
;
2820 lock
= ldlm_handle2lock(&lockh
);
2823 ~(lock
->l_policy_data
.l_inodebits
.bits
);
2824 LDLM_LOCK_PUT(lock
);
2826 *bits
&= ~policy
.l_inodebits
.bits
;
2833 ldlm_mode_t
ll_take_md_lock(struct inode
*inode
, __u64 bits
,
2834 struct lustre_handle
*lockh
, __u64 flags
,
2837 ldlm_policy_data_t policy
= { .l_inodebits
= {bits
}};
2841 fid
= &ll_i2info(inode
)->lli_fid
;
2842 CDEBUG(D_INFO
, "trying to match res "DFID
"\n", PFID(fid
));
2844 rc
= md_lock_match(ll_i2mdexp(inode
), LDLM_FL_BLOCK_GRANTED
|flags
,
2845 fid
, LDLM_IBITS
, &policy
, mode
, lockh
);
2850 static int ll_inode_revalidate_fini(struct inode
*inode
, int rc
)
2852 /* Already unlinked. Just update nlink and return success */
2853 if (rc
== -ENOENT
) {
2855 /* This path cannot be hit for regular files unless in
2856 * case of obscure races, so no need to validate size.
2858 if (!S_ISREG(inode
->i_mode
) && !S_ISDIR(inode
->i_mode
))
2860 } else if (rc
!= 0) {
2861 CERROR("%s: revalidate FID "DFID
" error: rc = %d\n",
2862 ll_get_fsname(inode
->i_sb
, NULL
, 0),
2863 PFID(ll_inode2fid(inode
)), rc
);
2869 int __ll_inode_revalidate_it(struct dentry
*dentry
, struct lookup_intent
*it
,
2872 struct inode
*inode
= dentry
->d_inode
;
2873 struct ptlrpc_request
*req
= NULL
;
2874 struct obd_export
*exp
;
2877 LASSERT(inode
!= NULL
);
2879 CDEBUG(D_VFSTRACE
, "VFS Op:inode=%lu/%u(%p),name=%s\n",
2880 inode
->i_ino
, inode
->i_generation
, inode
, dentry
->d_name
.name
);
2882 exp
= ll_i2mdexp(inode
);
2884 /* XXX: Enable OBD_CONNECT_ATTRFID to reduce unnecessary getattr RPC.
2885 * But under CMD case, it caused some lock issues, should be fixed
2886 * with new CMD ibits lock. See bug 12718 */
2887 if (exp_connect_flags(exp
) & OBD_CONNECT_ATTRFID
) {
2888 struct lookup_intent oit
= { .it_op
= IT_GETATTR
};
2889 struct md_op_data
*op_data
;
2891 if (ibits
== MDS_INODELOCK_LOOKUP
)
2892 oit
.it_op
= IT_LOOKUP
;
2894 /* Call getattr by fid, so do not provide name at all. */
2895 op_data
= ll_prep_md_op_data(NULL
, dentry
->d_parent
->d_inode
,
2896 dentry
->d_inode
, NULL
, 0, 0,
2897 LUSTRE_OPC_ANY
, NULL
);
2898 if (IS_ERR(op_data
))
2899 return PTR_ERR(op_data
);
2901 oit
.it_create_mode
|= M_CHECK_STALE
;
2902 rc
= md_intent_lock(exp
, op_data
, NULL
, 0,
2903 /* we are not interested in name
2906 ll_md_blocking_ast
, 0);
2907 ll_finish_md_op_data(op_data
);
2908 oit
.it_create_mode
&= ~M_CHECK_STALE
;
2910 rc
= ll_inode_revalidate_fini(inode
, rc
);
2914 rc
= ll_revalidate_it_finish(req
, &oit
, dentry
);
2916 ll_intent_release(&oit
);
2920 /* Unlinked? Unhash dentry, so it is not picked up later by
2921 do_lookup() -> ll_revalidate_it(). We cannot use d_drop
2922 here to preserve get_cwd functionality on 2.6.
2924 if (!dentry
->d_inode
->i_nlink
)
2925 d_lustre_invalidate(dentry
, 0);
2927 ll_lookup_finish_locks(&oit
, dentry
);
2928 } else if (!ll_have_md_lock(dentry
->d_inode
, &ibits
, LCK_MINMODE
)) {
2929 struct ll_sb_info
*sbi
= ll_i2sbi(dentry
->d_inode
);
2930 obd_valid valid
= OBD_MD_FLGETATTR
;
2931 struct md_op_data
*op_data
;
2934 if (S_ISREG(inode
->i_mode
)) {
2935 rc
= ll_get_max_mdsize(sbi
, &ealen
);
2938 valid
|= OBD_MD_FLEASIZE
| OBD_MD_FLMODEASIZE
;
2941 op_data
= ll_prep_md_op_data(NULL
, inode
, NULL
, NULL
,
2942 0, ealen
, LUSTRE_OPC_ANY
,
2944 if (IS_ERR(op_data
))
2945 return PTR_ERR(op_data
);
2947 op_data
->op_valid
= valid
;
2948 /* Once OBD_CONNECT_ATTRFID is not supported, we can't find one
2949 * capa for this inode. Because we only keep capas of dirs
2951 rc
= md_getattr(sbi
->ll_md_exp
, op_data
, &req
);
2952 ll_finish_md_op_data(op_data
);
2954 rc
= ll_inode_revalidate_fini(inode
, rc
);
2958 rc
= ll_prep_inode(&inode
, req
, NULL
, NULL
);
2961 ptlrpc_req_finished(req
);
2965 int ll_inode_revalidate_it(struct dentry
*dentry
, struct lookup_intent
*it
,
2968 struct inode
*inode
= dentry
->d_inode
;
2971 rc
= __ll_inode_revalidate_it(dentry
, it
, ibits
);
2975 /* if object isn't regular file, don't validate size */
2976 if (!S_ISREG(inode
->i_mode
)) {
2977 LTIME_S(inode
->i_atime
) = ll_i2info(inode
)->lli_lvb
.lvb_atime
;
2978 LTIME_S(inode
->i_mtime
) = ll_i2info(inode
)->lli_lvb
.lvb_mtime
;
2979 LTIME_S(inode
->i_ctime
) = ll_i2info(inode
)->lli_lvb
.lvb_ctime
;
2981 /* In case of restore, the MDT has the right size and has
2982 * already send it back without granting the layout lock,
2983 * inode is up-to-date so glimpse is useless.
2984 * Also to glimpse we need the layout, in case of a running
2985 * restore the MDT holds the layout lock so the glimpse will
2986 * block up to the end of restore (getattr will block)
2988 if (!(ll_i2info(inode
)->lli_flags
& LLIF_FILE_RESTORING
))
2989 rc
= ll_glimpse_size(inode
);
2994 int ll_getattr_it(struct vfsmount
*mnt
, struct dentry
*de
,
2995 struct lookup_intent
*it
, struct kstat
*stat
)
2997 struct inode
*inode
= de
->d_inode
;
2998 struct ll_sb_info
*sbi
= ll_i2sbi(inode
);
2999 struct ll_inode_info
*lli
= ll_i2info(inode
);
3002 res
= ll_inode_revalidate_it(de
, it
, MDS_INODELOCK_UPDATE
|
3003 MDS_INODELOCK_LOOKUP
);
3004 ll_stats_ops_tally(sbi
, LPROC_LL_GETATTR
, 1);
3009 stat
->dev
= inode
->i_sb
->s_dev
;
3010 if (ll_need_32bit_api(sbi
))
3011 stat
->ino
= cl_fid_build_ino(&lli
->lli_fid
, 1);
3013 stat
->ino
= inode
->i_ino
;
3014 stat
->mode
= inode
->i_mode
;
3015 stat
->nlink
= inode
->i_nlink
;
3016 stat
->uid
= inode
->i_uid
;
3017 stat
->gid
= inode
->i_gid
;
3018 stat
->rdev
= inode
->i_rdev
;
3019 stat
->atime
= inode
->i_atime
;
3020 stat
->mtime
= inode
->i_mtime
;
3021 stat
->ctime
= inode
->i_ctime
;
3022 stat
->blksize
= 1 << inode
->i_blkbits
;
3024 stat
->size
= i_size_read(inode
);
3025 stat
->blocks
= inode
->i_blocks
;
3029 int ll_getattr(struct vfsmount
*mnt
, struct dentry
*de
, struct kstat
*stat
)
3031 struct lookup_intent it
= { .it_op
= IT_GETATTR
};
3033 return ll_getattr_it(mnt
, de
, &it
, stat
);
3036 int ll_fiemap(struct inode
*inode
, struct fiemap_extent_info
*fieinfo
,
3037 __u64 start
, __u64 len
)
3041 struct ll_user_fiemap
*fiemap
;
3042 unsigned int extent_count
= fieinfo
->fi_extents_max
;
3044 num_bytes
= sizeof(*fiemap
) + (extent_count
*
3045 sizeof(struct ll_fiemap_extent
));
3046 OBD_ALLOC_LARGE(fiemap
, num_bytes
);
3051 fiemap
->fm_flags
= fieinfo
->fi_flags
;
3052 fiemap
->fm_extent_count
= fieinfo
->fi_extents_max
;
3053 fiemap
->fm_start
= start
;
3054 fiemap
->fm_length
= len
;
3055 memcpy(&fiemap
->fm_extents
[0], fieinfo
->fi_extents_start
,
3056 sizeof(struct ll_fiemap_extent
));
3058 rc
= ll_do_fiemap(inode
, fiemap
, num_bytes
);
3060 fieinfo
->fi_flags
= fiemap
->fm_flags
;
3061 fieinfo
->fi_extents_mapped
= fiemap
->fm_mapped_extents
;
3062 memcpy(fieinfo
->fi_extents_start
, &fiemap
->fm_extents
[0],
3063 fiemap
->fm_mapped_extents
* sizeof(struct ll_fiemap_extent
));
3065 OBD_FREE_LARGE(fiemap
, num_bytes
);
3069 struct posix_acl
* ll_get_acl(struct inode
*inode
, int type
)
3071 struct ll_inode_info
*lli
= ll_i2info(inode
);
3072 struct posix_acl
*acl
= NULL
;
3074 spin_lock(&lli
->lli_lock
);
3075 /* VFS' acl_permission_check->check_acl will release the refcount */
3076 acl
= posix_acl_dup(lli
->lli_posix_acl
);
3077 spin_unlock(&lli
->lli_lock
);
3083 int ll_inode_permission(struct inode
*inode
, int mask
)
3087 #ifdef MAY_NOT_BLOCK
3088 if (mask
& MAY_NOT_BLOCK
)
3092 /* as root inode are NOT getting validated in lookup operation,
3093 * need to do it before permission check. */
3095 if (inode
== inode
->i_sb
->s_root
->d_inode
) {
3096 struct lookup_intent it
= { .it_op
= IT_LOOKUP
};
3098 rc
= __ll_inode_revalidate_it(inode
->i_sb
->s_root
, &it
,
3099 MDS_INODELOCK_LOOKUP
);
3104 CDEBUG(D_VFSTRACE
, "VFS Op:inode=%lu/%u(%p), inode mode %x mask %o\n",
3105 inode
->i_ino
, inode
->i_generation
, inode
, inode
->i_mode
, mask
);
3107 if (ll_i2sbi(inode
)->ll_flags
& LL_SBI_RMT_CLIENT
)
3108 return lustre_check_remote_perm(inode
, mask
);
3110 ll_stats_ops_tally(ll_i2sbi(inode
), LPROC_LL_INODE_PERM
, 1);
3111 rc
= generic_permission(inode
, mask
);
3116 /* -o localflock - only provides locally consistent flock locks */
3117 struct file_operations ll_file_operations
= {
3118 .read
= ll_file_read
,
3119 .aio_read
= ll_file_aio_read
,
3120 .write
= ll_file_write
,
3121 .aio_write
= ll_file_aio_write
,
3122 .unlocked_ioctl
= ll_file_ioctl
,
3123 .open
= ll_file_open
,
3124 .release
= ll_file_release
,
3125 .mmap
= ll_file_mmap
,
3126 .llseek
= ll_file_seek
,
3127 .splice_read
= ll_file_splice_read
,
3132 struct file_operations ll_file_operations_flock
= {
3133 .read
= ll_file_read
,
3134 .aio_read
= ll_file_aio_read
,
3135 .write
= ll_file_write
,
3136 .aio_write
= ll_file_aio_write
,
3137 .unlocked_ioctl
= ll_file_ioctl
,
3138 .open
= ll_file_open
,
3139 .release
= ll_file_release
,
3140 .mmap
= ll_file_mmap
,
3141 .llseek
= ll_file_seek
,
3142 .splice_read
= ll_file_splice_read
,
3145 .flock
= ll_file_flock
,
3146 .lock
= ll_file_flock
3149 /* These are for -o noflock - to return ENOSYS on flock calls */
3150 struct file_operations ll_file_operations_noflock
= {
3151 .read
= ll_file_read
,
3152 .aio_read
= ll_file_aio_read
,
3153 .write
= ll_file_write
,
3154 .aio_write
= ll_file_aio_write
,
3155 .unlocked_ioctl
= ll_file_ioctl
,
3156 .open
= ll_file_open
,
3157 .release
= ll_file_release
,
3158 .mmap
= ll_file_mmap
,
3159 .llseek
= ll_file_seek
,
3160 .splice_read
= ll_file_splice_read
,
3163 .flock
= ll_file_noflock
,
3164 .lock
= ll_file_noflock
3167 struct inode_operations ll_file_inode_operations
= {
3168 .setattr
= ll_setattr
,
3169 .getattr
= ll_getattr
,
3170 .permission
= ll_inode_permission
,
3171 .setxattr
= ll_setxattr
,
3172 .getxattr
= ll_getxattr
,
3173 .listxattr
= ll_listxattr
,
3174 .removexattr
= ll_removexattr
,
3175 .fiemap
= ll_fiemap
,
3176 .get_acl
= ll_get_acl
,
3179 /* dynamic ioctl number support routins */
3180 static struct llioc_ctl_data
{
3181 struct rw_semaphore ioc_sem
;
3182 struct list_head ioc_head
;
3184 __RWSEM_INITIALIZER(llioc
.ioc_sem
),
3185 LIST_HEAD_INIT(llioc
.ioc_head
)
3190 struct list_head iocd_list
;
3191 unsigned int iocd_size
;
3192 llioc_callback_t iocd_cb
;
3193 unsigned int iocd_count
;
3194 unsigned int iocd_cmd
[0];
3197 void *ll_iocontrol_register(llioc_callback_t cb
, int count
, unsigned int *cmd
)
3200 struct llioc_data
*in_data
= NULL
;
3202 if (cb
== NULL
|| cmd
== NULL
||
3203 count
> LLIOC_MAX_CMD
|| count
< 0)
3206 size
= sizeof(*in_data
) + count
* sizeof(unsigned int);
3207 OBD_ALLOC(in_data
, size
);
3208 if (in_data
== NULL
)
3211 memset(in_data
, 0, sizeof(*in_data
));
3212 in_data
->iocd_size
= size
;
3213 in_data
->iocd_cb
= cb
;
3214 in_data
->iocd_count
= count
;
3215 memcpy(in_data
->iocd_cmd
, cmd
, sizeof(unsigned int) * count
);
3217 down_write(&llioc
.ioc_sem
);
3218 list_add_tail(&in_data
->iocd_list
, &llioc
.ioc_head
);
3219 up_write(&llioc
.ioc_sem
);
3224 void ll_iocontrol_unregister(void *magic
)
3226 struct llioc_data
*tmp
;
3231 down_write(&llioc
.ioc_sem
);
3232 list_for_each_entry(tmp
, &llioc
.ioc_head
, iocd_list
) {
3234 unsigned int size
= tmp
->iocd_size
;
3236 list_del(&tmp
->iocd_list
);
3237 up_write(&llioc
.ioc_sem
);
3239 OBD_FREE(tmp
, size
);
3243 up_write(&llioc
.ioc_sem
);
3245 CWARN("didn't find iocontrol register block with magic: %p\n", magic
);
3248 EXPORT_SYMBOL(ll_iocontrol_register
);
3249 EXPORT_SYMBOL(ll_iocontrol_unregister
);
3251 enum llioc_iter
ll_iocontrol_call(struct inode
*inode
, struct file
*file
,
3252 unsigned int cmd
, unsigned long arg
, int *rcp
)
3254 enum llioc_iter ret
= LLIOC_CONT
;
3255 struct llioc_data
*data
;
3256 int rc
= -EINVAL
, i
;
3258 down_read(&llioc
.ioc_sem
);
3259 list_for_each_entry(data
, &llioc
.ioc_head
, iocd_list
) {
3260 for (i
= 0; i
< data
->iocd_count
; i
++) {
3261 if (cmd
!= data
->iocd_cmd
[i
])
3264 ret
= data
->iocd_cb(inode
, file
, cmd
, arg
, data
, &rc
);
3268 if (ret
== LLIOC_STOP
)
3271 up_read(&llioc
.ioc_sem
);
3278 int ll_layout_conf(struct inode
*inode
, const struct cl_object_conf
*conf
)
3280 struct ll_inode_info
*lli
= ll_i2info(inode
);
3281 struct cl_env_nest nest
;
3285 if (lli
->lli_clob
== NULL
)
3288 env
= cl_env_nested_get(&nest
);
3290 return PTR_ERR(env
);
3292 result
= cl_conf_set(env
, lli
->lli_clob
, conf
);
3293 cl_env_nested_put(&nest
, env
);
3295 if (conf
->coc_opc
== OBJECT_CONF_SET
) {
3296 struct ldlm_lock
*lock
= conf
->coc_lock
;
3298 LASSERT(lock
!= NULL
);
3299 LASSERT(ldlm_has_layout(lock
));
3301 /* it can only be allowed to match after layout is
3302 * applied to inode otherwise false layout would be
3303 * seen. Applying layout shoud happen before dropping
3304 * the intent lock. */
3305 ldlm_lock_allow_match(lock
);
3311 /* Fetch layout from MDT with getxattr request, if it's not ready yet */
3312 static int ll_layout_fetch(struct inode
*inode
, struct ldlm_lock
*lock
)
3315 struct ll_sb_info
*sbi
= ll_i2sbi(inode
);
3316 struct obd_capa
*oc
;
3317 struct ptlrpc_request
*req
;
3318 struct mdt_body
*body
;
3324 CDEBUG(D_INODE
, DFID
" LVB_READY=%d l_lvb_data=%p l_lvb_len=%d\n",
3325 PFID(ll_inode2fid(inode
)), !!(lock
->l_flags
& LDLM_FL_LVB_READY
),
3326 lock
->l_lvb_data
, lock
->l_lvb_len
);
3328 if ((lock
->l_lvb_data
!= NULL
) && (lock
->l_flags
& LDLM_FL_LVB_READY
))
3331 /* if layout lock was granted right away, the layout is returned
3332 * within DLM_LVB of dlm reply; otherwise if the lock was ever
3333 * blocked and then granted via completion ast, we have to fetch
3334 * layout here. Please note that we can't use the LVB buffer in
3335 * completion AST because it doesn't have a large enough buffer */
3336 oc
= ll_mdscapa_get(inode
);
3337 rc
= ll_get_max_mdsize(sbi
, &lmmsize
);
3339 rc
= md_getxattr(sbi
->ll_md_exp
, ll_inode2fid(inode
), oc
,
3340 OBD_MD_FLXATTR
, XATTR_NAME_LOV
, NULL
, 0,
3346 body
= req_capsule_server_get(&req
->rq_pill
, &RMF_MDT_BODY
);
3347 if (body
== NULL
|| body
->eadatasize
> lmmsize
)
3348 GOTO(out
, rc
= -EPROTO
);
3350 lmmsize
= body
->eadatasize
;
3351 if (lmmsize
== 0) /* empty layout */
3354 lmm
= req_capsule_server_sized_get(&req
->rq_pill
, &RMF_EADATA
, lmmsize
);
3356 GOTO(out
, rc
= -EFAULT
);
3358 OBD_ALLOC_LARGE(lvbdata
, lmmsize
);
3359 if (lvbdata
== NULL
)
3360 GOTO(out
, rc
= -ENOMEM
);
3362 memcpy(lvbdata
, lmm
, lmmsize
);
3363 lock_res_and_lock(lock
);
3364 if (lock
->l_lvb_data
!= NULL
)
3365 OBD_FREE_LARGE(lock
->l_lvb_data
, lock
->l_lvb_len
);
3367 lock
->l_lvb_data
= lvbdata
;
3368 lock
->l_lvb_len
= lmmsize
;
3369 unlock_res_and_lock(lock
);
3372 ptlrpc_req_finished(req
);
3377 * Apply the layout to the inode. Layout lock is held and will be released
3380 static int ll_layout_lock_set(struct lustre_handle
*lockh
, ldlm_mode_t mode
,
3381 struct inode
*inode
, __u32
*gen
, bool reconf
)
3383 struct ll_inode_info
*lli
= ll_i2info(inode
);
3384 struct ll_sb_info
*sbi
= ll_i2sbi(inode
);
3385 struct ldlm_lock
*lock
;
3386 struct lustre_md md
= { NULL
};
3387 struct cl_object_conf conf
;
3390 bool wait_layout
= false;
3392 LASSERT(lustre_handle_is_used(lockh
));
3394 lock
= ldlm_handle2lock(lockh
);
3395 LASSERT(lock
!= NULL
);
3396 LASSERT(ldlm_has_layout(lock
));
3398 LDLM_DEBUG(lock
, "File %p/"DFID
" being reconfigured: %d.\n",
3399 inode
, PFID(&lli
->lli_fid
), reconf
);
3401 /* in case this is a caching lock and reinstate with new inode */
3402 md_set_lock_data(sbi
->ll_md_exp
, &lockh
->cookie
, inode
, NULL
);
3404 lock_res_and_lock(lock
);
3405 lvb_ready
= !!(lock
->l_flags
& LDLM_FL_LVB_READY
);
3406 unlock_res_and_lock(lock
);
3407 /* checking lvb_ready is racy but this is okay. The worst case is
3408 * that multi processes may configure the file on the same time. */
3409 if (lvb_ready
|| !reconf
) {
3412 /* layout_gen must be valid if layout lock is not
3413 * cancelled and stripe has already set */
3414 *gen
= lli
->lli_layout_gen
;
3420 rc
= ll_layout_fetch(inode
, lock
);
3424 /* for layout lock, lmm is returned in lock's lvb.
3425 * lvb_data is immutable if the lock is held so it's safe to access it
3426 * without res lock. See the description in ldlm_lock_decref_internal()
3427 * for the condition to free lvb_data of layout lock */
3428 if (lock
->l_lvb_data
!= NULL
) {
3429 rc
= obd_unpackmd(sbi
->ll_dt_exp
, &md
.lsm
,
3430 lock
->l_lvb_data
, lock
->l_lvb_len
);
3432 *gen
= LL_LAYOUT_GEN_EMPTY
;
3434 *gen
= md
.lsm
->lsm_layout_gen
;
3437 CERROR("%s: file "DFID
" unpackmd error: %d\n",
3438 ll_get_fsname(inode
->i_sb
, NULL
, 0),
3439 PFID(&lli
->lli_fid
), rc
);
3445 /* set layout to file. Unlikely this will fail as old layout was
3446 * surely eliminated */
3447 memset(&conf
, 0, sizeof(conf
));
3448 conf
.coc_opc
= OBJECT_CONF_SET
;
3449 conf
.coc_inode
= inode
;
3450 conf
.coc_lock
= lock
;
3451 conf
.u
.coc_md
= &md
;
3452 rc
= ll_layout_conf(inode
, &conf
);
3455 obd_free_memmd(sbi
->ll_dt_exp
, &md
.lsm
);
3457 /* refresh layout failed, need to wait */
3458 wait_layout
= rc
== -EBUSY
;
3461 LDLM_LOCK_PUT(lock
);
3462 ldlm_lock_decref(lockh
, mode
);
3464 /* wait for IO to complete if it's still being used. */
3466 CDEBUG(D_INODE
, "%s: %p/"DFID
" wait for layout reconf.\n",
3467 ll_get_fsname(inode
->i_sb
, NULL
, 0),
3468 inode
, PFID(&lli
->lli_fid
));
3470 memset(&conf
, 0, sizeof(conf
));
3471 conf
.coc_opc
= OBJECT_CONF_WAIT
;
3472 conf
.coc_inode
= inode
;
3473 rc
= ll_layout_conf(inode
, &conf
);
3477 CDEBUG(D_INODE
, "file: "DFID
" waiting layout return: %d.\n",
3478 PFID(&lli
->lli_fid
), rc
);
3484 * This function checks if there exists a LAYOUT lock on the client side,
3485 * or enqueues it if it doesn't have one in cache.
3487 * This function will not hold layout lock so it may be revoked any time after
3488 * this function returns. Any operations depend on layout should be redone
3491 * This function should be called before lov_io_init() to get an uptodate
3492 * layout version, the caller should save the version number and after IO
3493 * is finished, this function should be called again to verify that layout
3494 * is not changed during IO time.
3496 int ll_layout_refresh(struct inode
*inode
, __u32
*gen
)
3498 struct ll_inode_info
*lli
= ll_i2info(inode
);
3499 struct ll_sb_info
*sbi
= ll_i2sbi(inode
);
3500 struct md_op_data
*op_data
;
3501 struct lookup_intent it
;
3502 struct lustre_handle lockh
;
3504 struct ldlm_enqueue_info einfo
= {
3505 .ei_type
= LDLM_IBITS
,
3507 .ei_cb_bl
= ll_md_blocking_ast
,
3508 .ei_cb_cp
= ldlm_completion_ast
,
3512 *gen
= lli
->lli_layout_gen
;
3513 if (!(sbi
->ll_flags
& LL_SBI_LAYOUT_LOCK
))
3517 LASSERT(fid_is_sane(ll_inode2fid(inode
)));
3518 LASSERT(S_ISREG(inode
->i_mode
));
3520 /* mostly layout lock is caching on the local side, so try to match
3521 * it before grabbing layout lock mutex. */
3522 mode
= ll_take_md_lock(inode
, MDS_INODELOCK_LAYOUT
, &lockh
, 0,
3523 LCK_CR
| LCK_CW
| LCK_PR
| LCK_PW
);
3524 if (mode
!= 0) { /* hit cached lock */
3525 rc
= ll_layout_lock_set(&lockh
, mode
, inode
, gen
, false);
3529 /* better hold lli_layout_mutex to try again otherwise
3530 * it will have starvation problem. */
3533 /* take layout lock mutex to enqueue layout lock exclusively. */
3534 mutex_lock(&lli
->lli_layout_mutex
);
3537 /* try again. Maybe somebody else has done this. */
3538 mode
= ll_take_md_lock(inode
, MDS_INODELOCK_LAYOUT
, &lockh
, 0,
3539 LCK_CR
| LCK_CW
| LCK_PR
| LCK_PW
);
3540 if (mode
!= 0) { /* hit cached lock */
3541 rc
= ll_layout_lock_set(&lockh
, mode
, inode
, gen
, true);
3545 mutex_unlock(&lli
->lli_layout_mutex
);
3549 op_data
= ll_prep_md_op_data(NULL
, inode
, inode
, NULL
,
3550 0, 0, LUSTRE_OPC_ANY
, NULL
);
3551 if (IS_ERR(op_data
)) {
3552 mutex_unlock(&lli
->lli_layout_mutex
);
3553 return PTR_ERR(op_data
);
3556 /* have to enqueue one */
3557 memset(&it
, 0, sizeof(it
));
3558 it
.it_op
= IT_LAYOUT
;
3559 lockh
.cookie
= 0ULL;
3561 LDLM_DEBUG_NOLOCK("%s: requeue layout lock for file %p/"DFID
".\n",
3562 ll_get_fsname(inode
->i_sb
, NULL
, 0), inode
,
3563 PFID(&lli
->lli_fid
));
3565 rc
= md_enqueue(sbi
->ll_md_exp
, &einfo
, &it
, op_data
, &lockh
,
3567 if (it
.d
.lustre
.it_data
!= NULL
)
3568 ptlrpc_req_finished(it
.d
.lustre
.it_data
);
3569 it
.d
.lustre
.it_data
= NULL
;
3571 ll_finish_md_op_data(op_data
);
3573 mode
= it
.d
.lustre
.it_lock_mode
;
3574 it
.d
.lustre
.it_lock_mode
= 0;
3575 ll_intent_drop_lock(&it
);
3578 /* set lock data in case this is a new lock */
3579 ll_set_lock_data(sbi
->ll_md_exp
, inode
, &it
, NULL
);
3580 rc
= ll_layout_lock_set(&lockh
, mode
, inode
, gen
, true);
3584 mutex_unlock(&lli
->lli_layout_mutex
);
3590 * This function send a restore request to the MDT
3592 int ll_layout_restore(struct inode
*inode
)
3594 struct hsm_user_request
*hur
;
3597 len
= sizeof(struct hsm_user_request
) +
3598 sizeof(struct hsm_user_item
);
3599 OBD_ALLOC(hur
, len
);
3603 hur
->hur_request
.hr_action
= HUA_RESTORE
;
3604 hur
->hur_request
.hr_archive_id
= 0;
3605 hur
->hur_request
.hr_flags
= 0;
3606 memcpy(&hur
->hur_user_item
[0].hui_fid
, &ll_i2info(inode
)->lli_fid
,
3607 sizeof(hur
->hur_user_item
[0].hui_fid
));
3608 hur
->hur_user_item
[0].hui_extent
.length
= -1;
3609 hur
->hur_request
.hr_itemcount
= 1;
3610 rc
= obd_iocontrol(LL_IOC_HSM_REQUEST
, cl_i2sbi(inode
)->ll_md_exp
,