4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.gnu.org/licenses/gpl-2.0.html
23 * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Use is subject to license terms.
26 * Copyright (c) 2011, 2015, Intel Corporation.
29 * This file is part of Lustre, http://www.lustre.org/
30 * Lustre is a trademark of Sun Microsystems, Inc.
33 #define DEBUG_SUBSYSTEM S_LMV
34 #include <linux/slab.h>
35 #include <linux/module.h>
36 #include <linux/init.h>
37 #include <linux/pagemap.h>
39 #include <asm/div64.h>
40 #include <linux/seq_file.h>
41 #include <linux/namei.h>
42 #include <linux/uaccess.h>
44 #include "../include/lustre/lustre_idl.h"
45 #include "../include/obd_support.h"
46 #include "../include/lustre_net.h"
47 #include "../include/obd_class.h"
48 #include "../include/lustre_lmv.h"
49 #include "../include/lprocfs_status.h"
50 #include "../include/cl_object.h"
51 #include "../include/lustre_fid.h"
52 #include "../include/lustre/lustre_ioctl.h"
53 #include "../include/lustre_kernelcomm.h"
54 #include "lmv_internal.h"
56 static void lmv_activate_target(struct lmv_obd
*lmv
,
57 struct lmv_tgt_desc
*tgt
,
60 if (tgt
->ltd_active
== activate
)
63 tgt
->ltd_active
= activate
;
64 lmv
->desc
.ld_active_tgt_count
+= (activate
? 1 : -1);
65 tgt
->ltd_exp
->exp_obd
->obd_inactive
= !activate
;
71 * -EINVAL : UUID can't be found in the LMV's target list
72 * -ENOTCONN: The UUID is found, but the target connection is bad (!)
73 * -EBADF : The UUID is found, but the OBD of the wrong type (!)
75 static int lmv_set_mdc_active(struct lmv_obd
*lmv
, const struct obd_uuid
*uuid
,
78 struct lmv_tgt_desc
*uninitialized_var(tgt
);
79 struct obd_device
*obd
;
83 CDEBUG(D_INFO
, "Searching in lmv %p for uuid %s (activate=%d)\n",
84 lmv
, uuid
->uuid
, activate
);
86 spin_lock(&lmv
->lmv_lock
);
87 for (i
= 0; i
< lmv
->desc
.ld_tgt_count
; i
++) {
89 if (!tgt
|| !tgt
->ltd_exp
)
92 CDEBUG(D_INFO
, "Target idx %d is %s conn %#llx\n", i
,
93 tgt
->ltd_uuid
.uuid
, tgt
->ltd_exp
->exp_handle
.h_cookie
);
95 if (obd_uuid_equals(uuid
, &tgt
->ltd_uuid
))
99 if (i
== lmv
->desc
.ld_tgt_count
) {
104 obd
= class_exp2obd(tgt
->ltd_exp
);
110 CDEBUG(D_INFO
, "Found OBD %s=%s device %d (%p) type %s at LMV idx %d\n",
111 obd
->obd_name
, obd
->obd_uuid
.uuid
, obd
->obd_minor
, obd
,
112 obd
->obd_type
->typ_name
, i
);
113 LASSERT(strcmp(obd
->obd_type
->typ_name
, LUSTRE_MDC_NAME
) == 0);
115 if (tgt
->ltd_active
== activate
) {
116 CDEBUG(D_INFO
, "OBD %p already %sactive!\n", obd
,
117 activate
? "" : "in");
121 CDEBUG(D_INFO
, "Marking OBD %p %sactive\n", obd
,
122 activate
? "" : "in");
123 lmv_activate_target(lmv
, tgt
, activate
);
126 spin_unlock(&lmv
->lmv_lock
);
130 static struct obd_uuid
*lmv_get_uuid(struct obd_export
*exp
)
132 struct lmv_obd
*lmv
= &exp
->exp_obd
->u
.lmv
;
133 struct lmv_tgt_desc
*tgt
= lmv
->tgts
[0];
135 return tgt
? obd_get_uuid(tgt
->ltd_exp
) : NULL
;
138 static int lmv_notify(struct obd_device
*obd
, struct obd_device
*watched
,
139 enum obd_notify_event ev
, void *data
)
141 struct obd_connect_data
*conn_data
;
142 struct lmv_obd
*lmv
= &obd
->u
.lmv
;
143 struct obd_uuid
*uuid
;
146 if (strcmp(watched
->obd_type
->typ_name
, LUSTRE_MDC_NAME
)) {
147 CERROR("unexpected notification of %s %s!\n",
148 watched
->obd_type
->typ_name
,
153 uuid
= &watched
->u
.cli
.cl_target_uuid
;
154 if (ev
== OBD_NOTIFY_ACTIVE
|| ev
== OBD_NOTIFY_INACTIVE
) {
156 * Set MDC as active before notifying the observer, so the
157 * observer can use the MDC normally.
159 rc
= lmv_set_mdc_active(lmv
, uuid
,
160 ev
== OBD_NOTIFY_ACTIVE
);
162 CERROR("%sactivation of %s failed: %d\n",
163 ev
== OBD_NOTIFY_ACTIVE
? "" : "de",
167 } else if (ev
== OBD_NOTIFY_OCD
) {
168 conn_data
= &watched
->u
.cli
.cl_import
->imp_connect_data
;
170 * XXX: Make sure that ocd_connect_flags from all targets are
171 * the same. Otherwise one of MDTs runs wrong version or
172 * something like this. --umka
174 obd
->obd_self_export
->exp_connect_data
= *conn_data
;
178 * Pass the notification up the chain.
180 if (obd
->obd_observer
)
181 rc
= obd_notify(obd
->obd_observer
, watched
, ev
, data
);
187 * This is fake connect function. Its purpose is to initialize lmv and say
188 * caller that everything is okay. Real connection will be performed later.
190 static int lmv_connect(const struct lu_env
*env
,
191 struct obd_export
**exp
, struct obd_device
*obd
,
192 struct obd_uuid
*cluuid
, struct obd_connect_data
*data
,
195 struct lmv_obd
*lmv
= &obd
->u
.lmv
;
196 struct lustre_handle conn
= { 0 };
200 * We don't want to actually do the underlying connections more than
201 * once, so keep track.
204 if (lmv
->refcount
> 1) {
209 rc
= class_connect(&conn
, obd
, cluuid
);
211 CERROR("class_connection() returned %d\n", rc
);
215 *exp
= class_conn2export(&conn
);
216 class_export_get(*exp
);
220 lmv
->cluuid
= *cluuid
;
223 lmv
->conn_data
= *data
;
225 lmv
->lmv_tgts_kobj
= kobject_create_and_add("target_obds",
228 * All real clients should perform actual connection right away, because
229 * it is possible, that LMV will not have opportunity to connect targets
230 * and MDC stuff will be called directly, for instance while reading
231 * ../mdc/../kbytesfree procfs file, etc.
233 if (data
&& data
->ocd_connect_flags
& OBD_CONNECT_REAL
)
234 rc
= lmv_check_connect(obd
);
236 if (rc
&& lmv
->lmv_tgts_kobj
)
237 kobject_put(lmv
->lmv_tgts_kobj
);
242 static int lmv_init_ea_size(struct obd_export
*exp
, u32 easize
, u32 def_easize
)
244 struct obd_device
*obd
= exp
->exp_obd
;
245 struct lmv_obd
*lmv
= &obd
->u
.lmv
;
250 if (lmv
->max_easize
< easize
) {
251 lmv
->max_easize
= easize
;
254 if (lmv
->max_def_easize
< def_easize
) {
255 lmv
->max_def_easize
= def_easize
;
262 if (lmv
->connected
== 0)
265 for (i
= 0; i
< lmv
->desc
.ld_tgt_count
; i
++) {
266 struct lmv_tgt_desc
*tgt
= lmv
->tgts
[i
];
268 if (!tgt
|| !tgt
->ltd_exp
|| !tgt
->ltd_active
) {
269 CWARN("%s: NULL export for %d\n", obd
->obd_name
, i
);
273 rc
= md_init_ea_size(tgt
->ltd_exp
, easize
, def_easize
);
275 CERROR("%s: obd_init_ea_size() failed on MDT target %d: rc = %d\n",
276 obd
->obd_name
, i
, rc
);
283 #define MAX_STRING_SIZE 128
285 static int lmv_connect_mdc(struct obd_device
*obd
, struct lmv_tgt_desc
*tgt
)
287 struct lmv_obd
*lmv
= &obd
->u
.lmv
;
288 struct obd_uuid
*cluuid
= &lmv
->cluuid
;
289 struct obd_uuid lmv_mdc_uuid
= { "LMV_MDC_UUID" };
290 struct obd_device
*mdc_obd
;
291 struct obd_export
*mdc_exp
;
292 struct lu_fld_target target
;
295 mdc_obd
= class_find_client_obd(&tgt
->ltd_uuid
, LUSTRE_MDC_NAME
,
298 CERROR("target %s not attached\n", tgt
->ltd_uuid
.uuid
);
302 CDEBUG(D_CONFIG
, "connect to %s(%s) - %s, %s FOR %s\n",
303 mdc_obd
->obd_name
, mdc_obd
->obd_uuid
.uuid
,
304 tgt
->ltd_uuid
.uuid
, obd
->obd_uuid
.uuid
, cluuid
->uuid
);
306 if (!mdc_obd
->obd_set_up
) {
307 CERROR("target %s is not set up\n", tgt
->ltd_uuid
.uuid
);
311 rc
= obd_connect(NULL
, &mdc_exp
, mdc_obd
, &lmv_mdc_uuid
,
312 &lmv
->conn_data
, NULL
);
314 CERROR("target %s connect error %d\n", tgt
->ltd_uuid
.uuid
, rc
);
319 * Init fid sequence client for this mdc and add new fld target.
321 rc
= obd_fid_init(mdc_obd
, mdc_exp
, LUSTRE_SEQ_METADATA
);
325 target
.ft_srv
= NULL
;
326 target
.ft_exp
= mdc_exp
;
327 target
.ft_idx
= tgt
->ltd_idx
;
329 fld_client_add_target(&lmv
->lmv_fld
, &target
);
331 rc
= obd_register_observer(mdc_obd
, obd
);
333 obd_disconnect(mdc_exp
);
334 CERROR("target %s register_observer error %d\n",
335 tgt
->ltd_uuid
.uuid
, rc
);
339 if (obd
->obd_observer
) {
341 * Tell the observer about the new target.
343 rc
= obd_notify(obd
->obd_observer
, mdc_exp
->exp_obd
,
345 (void *)(tgt
- lmv
->tgts
[0]));
347 obd_disconnect(mdc_exp
);
353 tgt
->ltd_exp
= mdc_exp
;
354 lmv
->desc
.ld_active_tgt_count
++;
356 md_init_ea_size(tgt
->ltd_exp
, lmv
->max_easize
, lmv
->max_def_easize
);
358 CDEBUG(D_CONFIG
, "Connected to %s(%s) successfully (%d)\n",
359 mdc_obd
->obd_name
, mdc_obd
->obd_uuid
.uuid
,
360 atomic_read(&obd
->obd_refcount
));
362 if (lmv
->lmv_tgts_kobj
)
363 /* Even if we failed to create the link, that's fine */
364 rc
= sysfs_create_link(lmv
->lmv_tgts_kobj
, &mdc_obd
->obd_kobj
,
369 static void lmv_del_target(struct lmv_obd
*lmv
, int index
)
371 if (!lmv
->tgts
[index
])
374 kfree(lmv
->tgts
[index
]);
375 lmv
->tgts
[index
] = NULL
;
378 static int lmv_add_target(struct obd_device
*obd
, struct obd_uuid
*uuidp
,
379 __u32 index
, int gen
)
381 struct lmv_obd
*lmv
= &obd
->u
.lmv
;
382 struct obd_device
*mdc_obd
;
383 struct lmv_tgt_desc
*tgt
;
384 int orig_tgt_count
= 0;
387 CDEBUG(D_CONFIG
, "Target uuid: %s. index %d\n", uuidp
->uuid
, index
);
389 mdc_obd
= class_find_client_obd(uuidp
, LUSTRE_MDC_NAME
,
392 CERROR("%s: Target %s not attached: rc = %d\n",
393 obd
->obd_name
, uuidp
->uuid
, -EINVAL
);
397 mutex_lock(&lmv
->lmv_init_mutex
);
399 if ((index
< lmv
->tgts_size
) && lmv
->tgts
[index
]) {
400 tgt
= lmv
->tgts
[index
];
401 CERROR("%s: UUID %s already assigned at LOV target index %d: rc = %d\n",
403 obd_uuid2str(&tgt
->ltd_uuid
), index
, -EEXIST
);
404 mutex_unlock(&lmv
->lmv_init_mutex
);
408 if (index
>= lmv
->tgts_size
) {
409 /* We need to reallocate the lmv target array. */
410 struct lmv_tgt_desc
**newtgts
, **old
= NULL
;
414 while (newsize
< index
+ 1)
416 newtgts
= kcalloc(newsize
, sizeof(*newtgts
), GFP_NOFS
);
418 mutex_unlock(&lmv
->lmv_init_mutex
);
422 if (lmv
->tgts_size
) {
423 memcpy(newtgts
, lmv
->tgts
,
424 sizeof(*newtgts
) * lmv
->tgts_size
);
426 oldsize
= lmv
->tgts_size
;
430 lmv
->tgts_size
= newsize
;
434 CDEBUG(D_CONFIG
, "tgts: %p size: %d\n", lmv
->tgts
,
438 tgt
= kzalloc(sizeof(*tgt
), GFP_NOFS
);
440 mutex_unlock(&lmv
->lmv_init_mutex
);
444 mutex_init(&tgt
->ltd_fid_mutex
);
445 tgt
->ltd_idx
= index
;
446 tgt
->ltd_uuid
= *uuidp
;
448 lmv
->tgts
[index
] = tgt
;
449 if (index
>= lmv
->desc
.ld_tgt_count
) {
450 orig_tgt_count
= lmv
->desc
.ld_tgt_count
;
451 lmv
->desc
.ld_tgt_count
= index
+ 1;
454 if (!lmv
->connected
) {
455 /* lmv_check_connect() will connect this target. */
456 mutex_unlock(&lmv
->lmv_init_mutex
);
460 /* Otherwise let's connect it ourselves */
461 mutex_unlock(&lmv
->lmv_init_mutex
);
462 rc
= lmv_connect_mdc(obd
, tgt
);
464 spin_lock(&lmv
->lmv_lock
);
465 if (lmv
->desc
.ld_tgt_count
== index
+ 1)
466 lmv
->desc
.ld_tgt_count
= orig_tgt_count
;
467 memset(tgt
, 0, sizeof(*tgt
));
468 spin_unlock(&lmv
->lmv_lock
);
470 int easize
= sizeof(struct lmv_stripe_md
) +
471 lmv
->desc
.ld_tgt_count
* sizeof(struct lu_fid
);
472 lmv_init_ea_size(obd
->obd_self_export
, easize
, 0);
478 int lmv_check_connect(struct obd_device
*obd
)
480 struct lmv_obd
*lmv
= &obd
->u
.lmv
;
481 struct lmv_tgt_desc
*tgt
;
489 mutex_lock(&lmv
->lmv_init_mutex
);
490 if (lmv
->connected
) {
491 mutex_unlock(&lmv
->lmv_init_mutex
);
495 if (lmv
->desc
.ld_tgt_count
== 0) {
496 mutex_unlock(&lmv
->lmv_init_mutex
);
497 CERROR("%s: no targets configured.\n", obd
->obd_name
);
504 mutex_unlock(&lmv
->lmv_init_mutex
);
505 CERROR("%s: no target configured for index 0.\n",
510 CDEBUG(D_CONFIG
, "Time to connect %s to %s\n",
511 lmv
->cluuid
.uuid
, obd
->obd_name
);
513 for (i
= 0; i
< lmv
->desc
.ld_tgt_count
; i
++) {
517 rc
= lmv_connect_mdc(obd
, tgt
);
522 class_export_put(lmv
->exp
);
524 easize
= lmv_mds_md_size(lmv
->desc
.ld_tgt_count
, LMV_MAGIC
);
525 lmv_init_ea_size(obd
->obd_self_export
, easize
, 0);
526 mutex_unlock(&lmv
->lmv_init_mutex
);
538 --lmv
->desc
.ld_active_tgt_count
;
539 rc2
= obd_disconnect(tgt
->ltd_exp
);
541 CERROR("LMV target %s disconnect on MDC idx %d: error %d\n",
542 tgt
->ltd_uuid
.uuid
, i
, rc2
);
546 class_disconnect(lmv
->exp
);
547 mutex_unlock(&lmv
->lmv_init_mutex
);
551 static int lmv_disconnect_mdc(struct obd_device
*obd
, struct lmv_tgt_desc
*tgt
)
553 struct lmv_obd
*lmv
= &obd
->u
.lmv
;
554 struct obd_device
*mdc_obd
;
557 mdc_obd
= class_exp2obd(tgt
->ltd_exp
);
560 mdc_obd
->obd_force
= obd
->obd_force
;
561 mdc_obd
->obd_fail
= obd
->obd_fail
;
562 mdc_obd
->obd_no_recov
= obd
->obd_no_recov
;
564 if (lmv
->lmv_tgts_kobj
)
565 sysfs_remove_link(lmv
->lmv_tgts_kobj
,
569 rc
= obd_fid_fini(tgt
->ltd_exp
->exp_obd
);
571 CERROR("Can't finalize fids factory\n");
573 CDEBUG(D_INFO
, "Disconnected from %s(%s) successfully\n",
574 tgt
->ltd_exp
->exp_obd
->obd_name
,
575 tgt
->ltd_exp
->exp_obd
->obd_uuid
.uuid
);
577 obd_register_observer(tgt
->ltd_exp
->exp_obd
, NULL
);
578 rc
= obd_disconnect(tgt
->ltd_exp
);
580 if (tgt
->ltd_active
) {
581 CERROR("Target %s disconnect error %d\n",
582 tgt
->ltd_uuid
.uuid
, rc
);
586 lmv_activate_target(lmv
, tgt
, 0);
591 static int lmv_disconnect(struct obd_export
*exp
)
593 struct obd_device
*obd
= class_exp2obd(exp
);
594 struct lmv_obd
*lmv
= &obd
->u
.lmv
;
602 * Only disconnect the underlying layers on the final disconnect.
605 if (lmv
->refcount
!= 0)
608 for (i
= 0; i
< lmv
->desc
.ld_tgt_count
; i
++) {
609 if (!lmv
->tgts
[i
] || !lmv
->tgts
[i
]->ltd_exp
)
612 lmv_disconnect_mdc(obd
, lmv
->tgts
[i
]);
615 if (lmv
->lmv_tgts_kobj
)
616 kobject_put(lmv
->lmv_tgts_kobj
);
620 * This is the case when no real connection is established by
621 * lmv_check_connect().
624 class_export_put(exp
);
625 rc
= class_disconnect(exp
);
626 if (lmv
->refcount
== 0)
631 static int lmv_fid2path(struct obd_export
*exp
, int len
, void *karg
,
634 struct obd_device
*obddev
= class_exp2obd(exp
);
635 struct lmv_obd
*lmv
= &obddev
->u
.lmv
;
636 struct getinfo_fid2path
*gf
;
637 struct lmv_tgt_desc
*tgt
;
638 struct getinfo_fid2path
*remote_gf
= NULL
;
639 int remote_gf_size
= 0;
643 tgt
= lmv_find_target(lmv
, &gf
->gf_fid
);
648 rc
= obd_iocontrol(OBD_IOC_FID2PATH
, tgt
->ltd_exp
, len
, gf
, uarg
);
649 if (rc
!= 0 && rc
!= -EREMOTE
)
652 /* If remote_gf != NULL, it means just building the
653 * path on the remote MDT, copy this path segment to gf
656 struct getinfo_fid2path
*ori_gf
;
660 if (strlen(ori_gf
->gf_path
) +
661 strlen(gf
->gf_path
) > ori_gf
->gf_pathlen
) {
666 ptr
= ori_gf
->gf_path
;
668 memmove(ptr
+ strlen(gf
->gf_path
) + 1, ptr
,
669 strlen(ori_gf
->gf_path
));
671 strncpy(ptr
, gf
->gf_path
, strlen(gf
->gf_path
));
672 ptr
+= strlen(gf
->gf_path
);
676 CDEBUG(D_INFO
, "%s: get path %s "DFID
" rec: %llu ln: %u\n",
677 tgt
->ltd_exp
->exp_obd
->obd_name
,
678 gf
->gf_path
, PFID(&gf
->gf_fid
), gf
->gf_recno
,
684 /* sigh, has to go to another MDT to do path building further */
686 remote_gf_size
= sizeof(*remote_gf
) + PATH_MAX
;
687 remote_gf
= kzalloc(remote_gf_size
, GFP_NOFS
);
692 remote_gf
->gf_pathlen
= PATH_MAX
;
695 if (!fid_is_sane(&gf
->gf_fid
)) {
696 CERROR("%s: invalid FID "DFID
": rc = %d\n",
697 tgt
->ltd_exp
->exp_obd
->obd_name
,
698 PFID(&gf
->gf_fid
), -EINVAL
);
703 tgt
= lmv_find_target(lmv
, &gf
->gf_fid
);
709 remote_gf
->gf_fid
= gf
->gf_fid
;
710 remote_gf
->gf_recno
= -1;
711 remote_gf
->gf_linkno
= -1;
712 memset(remote_gf
->gf_path
, 0, remote_gf
->gf_pathlen
);
714 goto repeat_fid2path
;
721 static int lmv_hsm_req_count(struct lmv_obd
*lmv
,
722 const struct hsm_user_request
*hur
,
723 const struct lmv_tgt_desc
*tgt_mds
)
726 struct lmv_tgt_desc
*curr_tgt
;
728 /* count how many requests must be sent to the given target */
729 for (i
= 0; i
< hur
->hur_request
.hr_itemcount
; i
++) {
730 curr_tgt
= lmv_find_target(lmv
, &hur
->hur_user_item
[i
].hui_fid
);
731 if (IS_ERR(curr_tgt
))
732 return PTR_ERR(curr_tgt
);
733 if (obd_uuid_equals(&curr_tgt
->ltd_uuid
, &tgt_mds
->ltd_uuid
))
739 static int lmv_hsm_req_build(struct lmv_obd
*lmv
,
740 struct hsm_user_request
*hur_in
,
741 const struct lmv_tgt_desc
*tgt_mds
,
742 struct hsm_user_request
*hur_out
)
745 struct lmv_tgt_desc
*curr_tgt
;
747 /* build the hsm_user_request for the given target */
748 hur_out
->hur_request
= hur_in
->hur_request
;
750 for (i
= 0; i
< hur_in
->hur_request
.hr_itemcount
; i
++) {
751 curr_tgt
= lmv_find_target(lmv
,
752 &hur_in
->hur_user_item
[i
].hui_fid
);
753 if (IS_ERR(curr_tgt
))
754 return PTR_ERR(curr_tgt
);
755 if (obd_uuid_equals(&curr_tgt
->ltd_uuid
, &tgt_mds
->ltd_uuid
)) {
756 hur_out
->hur_user_item
[nr_out
] =
757 hur_in
->hur_user_item
[i
];
761 hur_out
->hur_request
.hr_itemcount
= nr_out
;
762 memcpy(hur_data(hur_out
), hur_data(hur_in
),
763 hur_in
->hur_request
.hr_data_len
);
768 static int lmv_hsm_ct_unregister(struct lmv_obd
*lmv
, unsigned int cmd
, int len
,
769 struct lustre_kernelcomm
*lk
,
774 /* unregister request (call from llapi_hsm_copytool_fini) */
775 for (i
= 0; i
< lmv
->desc
.ld_tgt_count
; i
++) {
776 struct lmv_tgt_desc
*tgt
= lmv
->tgts
[i
];
778 if (!tgt
|| !tgt
->ltd_exp
)
781 /* best effort: try to clean as much as possible
782 * (continue on error)
784 obd_iocontrol(cmd
, lmv
->tgts
[i
]->ltd_exp
, len
, lk
, uarg
);
787 /* Whatever the result, remove copytool from kuc groups.
788 * Unreached coordinators will get EPIPE on next requests
789 * and will unregister automatically.
791 return libcfs_kkuc_group_rem(lk
->lk_uid
, lk
->lk_group
);
794 static int lmv_hsm_ct_register(struct lmv_obd
*lmv
, unsigned int cmd
, int len
,
795 struct lustre_kernelcomm
*lk
, void __user
*uarg
)
800 bool any_set
= false;
801 struct kkuc_ct_data kcd
= { 0 };
803 /* All or nothing: try to register to all MDS.
804 * In case of failure, unregister from previous MDS,
805 * except if it because of inactive target.
807 for (i
= 0; i
< lmv
->desc
.ld_tgt_count
; i
++) {
808 struct lmv_tgt_desc
*tgt
= lmv
->tgts
[i
];
810 if (!tgt
|| !tgt
->ltd_exp
)
813 err
= obd_iocontrol(cmd
, tgt
->ltd_exp
, len
, lk
, uarg
);
815 if (tgt
->ltd_active
) {
816 /* permanent error */
817 CERROR("error: iocontrol MDC %s on MDTidx %d cmd %x: err = %d\n",
818 tgt
->ltd_uuid
.uuid
, i
, cmd
, err
);
820 lk
->lk_flags
|= LK_FLG_STOP
;
821 /* unregister from previous MDS */
822 for (j
= 0; j
< i
; j
++) {
825 if (!tgt
|| !tgt
->ltd_exp
)
827 obd_iocontrol(cmd
, tgt
->ltd_exp
, len
,
832 /* else: transient error.
833 * kuc will register to the missing MDT when it is back
841 /* no registration done: return error */
844 /* at least one registration done, with no failure */
845 filp
= fget(lk
->lk_wfd
);
849 kcd
.kcd_magic
= KKUC_CT_DATA_MAGIC
;
850 kcd
.kcd_uuid
= lmv
->cluuid
;
851 kcd
.kcd_archive
= lk
->lk_data
;
853 rc
= libcfs_kkuc_group_add(filp
, lk
->lk_uid
, lk
->lk_group
,
861 static int lmv_iocontrol(unsigned int cmd
, struct obd_export
*exp
,
862 int len
, void *karg
, void __user
*uarg
)
864 struct obd_device
*obddev
= class_exp2obd(exp
);
865 struct lmv_obd
*lmv
= &obddev
->u
.lmv
;
866 struct lmv_tgt_desc
*tgt
= NULL
;
870 u32 count
= lmv
->desc
.ld_tgt_count
;
876 case IOC_OBD_STATFS
: {
877 struct obd_ioctl_data
*data
= karg
;
878 struct obd_device
*mdc_obd
;
879 struct obd_statfs stat_buf
= {0};
882 memcpy(&index
, data
->ioc_inlbuf2
, sizeof(__u32
));
886 tgt
= lmv
->tgts
[index
];
887 if (!tgt
|| !tgt
->ltd_active
)
890 mdc_obd
= class_exp2obd(tgt
->ltd_exp
);
895 if (copy_to_user(data
->ioc_pbuf2
, obd2cli_tgt(mdc_obd
),
896 min((int)data
->ioc_plen2
,
897 (int)sizeof(struct obd_uuid
))))
900 rc
= obd_statfs(NULL
, tgt
->ltd_exp
, &stat_buf
,
901 cfs_time_shift_64(-OBD_STATFS_CACHE_SECONDS
),
905 if (copy_to_user(data
->ioc_pbuf1
, &stat_buf
,
906 min((int)data
->ioc_plen1
,
907 (int)sizeof(stat_buf
))))
911 case OBD_IOC_QUOTACTL
: {
912 struct if_quotactl
*qctl
= karg
;
913 struct obd_quotactl
*oqctl
;
915 if (qctl
->qc_valid
== QC_MDTIDX
) {
916 if (count
<= qctl
->qc_idx
)
919 tgt
= lmv
->tgts
[qctl
->qc_idx
];
920 if (!tgt
|| !tgt
->ltd_exp
)
922 } else if (qctl
->qc_valid
== QC_UUID
) {
923 for (i
= 0; i
< count
; i
++) {
927 if (!obd_uuid_equals(&tgt
->ltd_uuid
,
943 LASSERT(tgt
&& tgt
->ltd_exp
);
944 oqctl
= kzalloc(sizeof(*oqctl
), GFP_NOFS
);
948 QCTL_COPY(oqctl
, qctl
);
949 rc
= obd_quotactl(tgt
->ltd_exp
, oqctl
);
951 QCTL_COPY(qctl
, oqctl
);
952 qctl
->qc_valid
= QC_MDTIDX
;
953 qctl
->obd_uuid
= tgt
->ltd_uuid
;
958 case OBD_IOC_CHANGELOG_SEND
:
959 case OBD_IOC_CHANGELOG_CLEAR
: {
960 struct ioc_changelog
*icc
= karg
;
962 if (icc
->icc_mdtindex
>= count
)
965 tgt
= lmv
->tgts
[icc
->icc_mdtindex
];
966 if (!tgt
|| !tgt
->ltd_exp
|| !tgt
->ltd_active
)
968 rc
= obd_iocontrol(cmd
, tgt
->ltd_exp
, sizeof(*icc
), icc
, NULL
);
971 case LL_IOC_GET_CONNECT_FLAGS
: {
974 if (!tgt
|| !tgt
->ltd_exp
)
976 rc
= obd_iocontrol(cmd
, tgt
->ltd_exp
, len
, karg
, uarg
);
979 case LL_IOC_FID2MDTIDX
: {
980 struct lu_fid
*fid
= karg
;
983 rc
= lmv_fld_lookup(lmv
, fid
, &mdt_index
);
988 * Note: this is from llite(see ll_dir_ioctl()), @uarg does not
989 * point to user space memory for FID2MDTIDX.
991 *(__u32
*)uarg
= mdt_index
;
994 case OBD_IOC_FID2PATH
: {
995 rc
= lmv_fid2path(exp
, len
, karg
, uarg
);
998 case LL_IOC_HSM_STATE_GET
:
999 case LL_IOC_HSM_STATE_SET
:
1000 case LL_IOC_HSM_ACTION
: {
1001 struct md_op_data
*op_data
= karg
;
1003 tgt
= lmv_find_target(lmv
, &op_data
->op_fid1
);
1005 return PTR_ERR(tgt
);
1010 rc
= obd_iocontrol(cmd
, tgt
->ltd_exp
, len
, karg
, uarg
);
1013 case LL_IOC_HSM_PROGRESS
: {
1014 const struct hsm_progress_kernel
*hpk
= karg
;
1016 tgt
= lmv_find_target(lmv
, &hpk
->hpk_fid
);
1018 return PTR_ERR(tgt
);
1019 rc
= obd_iocontrol(cmd
, tgt
->ltd_exp
, len
, karg
, uarg
);
1022 case LL_IOC_HSM_REQUEST
: {
1023 struct hsm_user_request
*hur
= karg
;
1024 unsigned int reqcount
= hur
->hur_request
.hr_itemcount
;
1029 /* if the request is about a single fid
1030 * or if there is a single MDS, no need to split
1033 if (reqcount
== 1 || count
== 1) {
1034 tgt
= lmv_find_target(lmv
,
1035 &hur
->hur_user_item
[0].hui_fid
);
1037 return PTR_ERR(tgt
);
1038 rc
= obd_iocontrol(cmd
, tgt
->ltd_exp
, len
, karg
, uarg
);
1040 /* split fid list to their respective MDS */
1041 for (i
= 0; i
< count
; i
++) {
1042 struct hsm_user_request
*req
;
1047 if (!tgt
|| !tgt
->ltd_exp
)
1050 nr
= lmv_hsm_req_count(lmv
, hur
, tgt
);
1053 if (nr
== 0) /* nothing for this MDS */
1056 /* build a request with fids for this MDS */
1057 reqlen
= offsetof(typeof(*hur
),
1059 + hur
->hur_request
.hr_data_len
;
1060 req
= libcfs_kvzalloc(reqlen
, GFP_NOFS
);
1064 rc1
= lmv_hsm_req_build(lmv
, hur
, tgt
, req
);
1068 rc1
= obd_iocontrol(cmd
, tgt
->ltd_exp
, reqlen
,
1071 if (rc1
!= 0 && rc
== 0)
1078 case LL_IOC_LOV_SWAP_LAYOUTS
: {
1079 struct md_op_data
*op_data
= karg
;
1080 struct lmv_tgt_desc
*tgt1
, *tgt2
;
1082 tgt1
= lmv_find_target(lmv
, &op_data
->op_fid1
);
1084 return PTR_ERR(tgt1
);
1086 tgt2
= lmv_find_target(lmv
, &op_data
->op_fid2
);
1088 return PTR_ERR(tgt2
);
1090 if (!tgt1
->ltd_exp
|| !tgt2
->ltd_exp
)
1093 /* only files on same MDT can have their layouts swapped */
1094 if (tgt1
->ltd_idx
!= tgt2
->ltd_idx
)
1097 rc
= obd_iocontrol(cmd
, tgt1
->ltd_exp
, len
, karg
, uarg
);
1100 case LL_IOC_HSM_CT_START
: {
1101 struct lustre_kernelcomm
*lk
= karg
;
1103 if (lk
->lk_flags
& LK_FLG_STOP
)
1104 rc
= lmv_hsm_ct_unregister(lmv
, cmd
, len
, lk
, uarg
);
1106 rc
= lmv_hsm_ct_register(lmv
, cmd
, len
, lk
, uarg
);
1110 for (i
= 0; i
< count
; i
++) {
1111 struct obd_device
*mdc_obd
;
1115 if (!tgt
|| !tgt
->ltd_exp
)
1117 /* ll_umount_begin() sets force flag but for lmv, not
1118 * mdc. Let's pass it through
1120 mdc_obd
= class_exp2obd(tgt
->ltd_exp
);
1121 mdc_obd
->obd_force
= obddev
->obd_force
;
1122 err
= obd_iocontrol(cmd
, tgt
->ltd_exp
, len
, karg
, uarg
);
1124 if (tgt
->ltd_active
) {
1125 CERROR("error: iocontrol MDC %s on MDTidx %d cmd %x: err = %d\n",
1126 tgt
->ltd_uuid
.uuid
, i
, cmd
, err
);
1141 * This is _inode_ placement policy function (not name).
1143 static int lmv_placement_policy(struct obd_device
*obd
,
1144 struct md_op_data
*op_data
, u32
*mds
)
1146 struct lmv_obd
*lmv
= &obd
->u
.lmv
;
1150 if (lmv
->desc
.ld_tgt_count
== 1) {
1155 if (op_data
->op_default_stripe_offset
!= -1) {
1156 *mds
= op_data
->op_default_stripe_offset
;
1161 * If stripe_offset is provided during setdirstripe
1162 * (setdirstripe -i xx), xx MDS will be chosen.
1164 if (op_data
->op_cli_flags
& CLI_SET_MEA
&& op_data
->op_data
) {
1165 struct lmv_user_md
*lum
;
1167 lum
= op_data
->op_data
;
1168 if (le32_to_cpu(lum
->lum_stripe_offset
) != (__u32
)-1) {
1169 *mds
= le32_to_cpu(lum
->lum_stripe_offset
);
1172 * -1 means default, which will be in the same MDT with
1175 *mds
= op_data
->op_mds
;
1176 lum
->lum_stripe_offset
= cpu_to_le32(op_data
->op_mds
);
1180 * Allocate new fid on target according to operation type and
1183 *mds
= op_data
->op_mds
;
1189 int __lmv_fid_alloc(struct lmv_obd
*lmv
, struct lu_fid
*fid
, u32 mds
)
1191 struct lmv_tgt_desc
*tgt
;
1194 tgt
= lmv_get_target(lmv
, mds
, NULL
);
1196 return PTR_ERR(tgt
);
1199 * New seq alloc and FLD setup should be atomic. Otherwise we may find
1200 * on server that seq in new allocated fid is not yet known.
1202 mutex_lock(&tgt
->ltd_fid_mutex
);
1204 if (tgt
->ltd_active
== 0 || !tgt
->ltd_exp
) {
1210 * Asking underlaying tgt layer to allocate new fid.
1212 rc
= obd_fid_alloc(NULL
, tgt
->ltd_exp
, fid
, NULL
);
1214 LASSERT(fid_is_sane(fid
));
1219 mutex_unlock(&tgt
->ltd_fid_mutex
);
1223 int lmv_fid_alloc(const struct lu_env
*env
, struct obd_export
*exp
,
1224 struct lu_fid
*fid
, struct md_op_data
*op_data
)
1226 struct obd_device
*obd
= class_exp2obd(exp
);
1227 struct lmv_obd
*lmv
= &obd
->u
.lmv
;
1234 rc
= lmv_placement_policy(obd
, op_data
, &mds
);
1236 CERROR("Can't get target for allocating fid, rc %d\n",
1241 rc
= __lmv_fid_alloc(lmv
, fid
, mds
);
1243 CERROR("Can't alloc new fid, rc %d\n", rc
);
1250 static int lmv_setup(struct obd_device
*obd
, struct lustre_cfg
*lcfg
)
1252 struct lmv_obd
*lmv
= &obd
->u
.lmv
;
1253 struct lprocfs_static_vars lvars
= { NULL
};
1254 struct lmv_desc
*desc
;
1257 if (LUSTRE_CFG_BUFLEN(lcfg
, 1) < 1) {
1258 CERROR("LMV setup requires a descriptor\n");
1262 desc
= (struct lmv_desc
*)lustre_cfg_buf(lcfg
, 1);
1263 if (sizeof(*desc
) > LUSTRE_CFG_BUFLEN(lcfg
, 1)) {
1264 CERROR("Lmv descriptor size wrong: %d > %d\n",
1265 (int)sizeof(*desc
), LUSTRE_CFG_BUFLEN(lcfg
, 1));
1269 lmv
->tgts_size
= 32U;
1270 lmv
->tgts
= kcalloc(lmv
->tgts_size
, sizeof(*lmv
->tgts
), GFP_NOFS
);
1274 obd_str2uuid(&lmv
->desc
.ld_uuid
, desc
->ld_uuid
.uuid
);
1275 lmv
->desc
.ld_tgt_count
= 0;
1276 lmv
->desc
.ld_active_tgt_count
= 0;
1277 lmv
->max_def_easize
= 0;
1278 lmv
->max_easize
= 0;
1280 spin_lock_init(&lmv
->lmv_lock
);
1281 mutex_init(&lmv
->lmv_init_mutex
);
1283 lprocfs_lmv_init_vars(&lvars
);
1285 lprocfs_obd_setup(obd
, lvars
.obd_vars
, lvars
.sysfs_vars
);
1286 rc
= ldebugfs_seq_create(obd
->obd_debugfs_entry
, "target_obd",
1287 0444, &lmv_proc_target_fops
, obd
);
1289 CWARN("%s: error adding LMV target_obd file: rc = %d\n",
1291 rc
= fld_client_init(&lmv
->lmv_fld
, obd
->obd_name
,
1292 LUSTRE_CLI_FLD_HASH_DHT
);
1294 CERROR("Can't init FLD, err %d\n", rc
);
1304 static int lmv_cleanup(struct obd_device
*obd
)
1306 struct lmv_obd
*lmv
= &obd
->u
.lmv
;
1308 fld_client_fini(&lmv
->lmv_fld
);
1312 for (i
= 0; i
< lmv
->desc
.ld_tgt_count
; i
++) {
1315 lmv_del_target(lmv
, i
);
1323 static int lmv_process_config(struct obd_device
*obd
, u32 len
, void *buf
)
1325 struct lustre_cfg
*lcfg
= buf
;
1326 struct obd_uuid obd_uuid
;
1331 switch (lcfg
->lcfg_command
) {
1333 /* modify_mdc_tgts add 0:lustre-clilmv 1:lustre-MDT0000_UUID
1334 * 2:0 3:1 4:lustre-MDT0000-mdc_UUID
1336 if (LUSTRE_CFG_BUFLEN(lcfg
, 1) > sizeof(obd_uuid
.uuid
)) {
1341 obd_str2uuid(&obd_uuid
, lustre_cfg_buf(lcfg
, 1));
1343 if (sscanf(lustre_cfg_buf(lcfg
, 2), "%u", &index
) != 1) {
1347 if (sscanf(lustre_cfg_buf(lcfg
, 3), "%d", &gen
) != 1) {
1351 rc
= lmv_add_target(obd
, &obd_uuid
, index
, gen
);
1354 CERROR("Unknown command: %d\n", lcfg
->lcfg_command
);
1362 static int lmv_statfs(const struct lu_env
*env
, struct obd_export
*exp
,
1363 struct obd_statfs
*osfs
, __u64 max_age
, __u32 flags
)
1365 struct obd_device
*obd
= class_exp2obd(exp
);
1366 struct lmv_obd
*lmv
= &obd
->u
.lmv
;
1367 struct obd_statfs
*temp
;
1371 rc
= lmv_check_connect(obd
);
1375 temp
= kzalloc(sizeof(*temp
), GFP_NOFS
);
1379 for (i
= 0; i
< lmv
->desc
.ld_tgt_count
; i
++) {
1380 if (!lmv
->tgts
[i
] || !lmv
->tgts
[i
]->ltd_exp
)
1383 rc
= obd_statfs(env
, lmv
->tgts
[i
]->ltd_exp
, temp
,
1386 CERROR("can't stat MDS #%d (%s), error %d\n", i
,
1387 lmv
->tgts
[i
]->ltd_exp
->exp_obd
->obd_name
,
1394 /* If the statfs is from mount, it will needs
1395 * retrieve necessary information from MDT0.
1396 * i.e. mount does not need the merged osfs
1398 * And also clients can be mounted as long as
1399 * MDT0 is in service
1401 if (flags
& OBD_STATFS_FOR_MDT0
)
1404 osfs
->os_bavail
+= temp
->os_bavail
;
1405 osfs
->os_blocks
+= temp
->os_blocks
;
1406 osfs
->os_ffree
+= temp
->os_ffree
;
1407 osfs
->os_files
+= temp
->os_files
;
1416 static int lmv_getstatus(struct obd_export
*exp
,
1419 struct obd_device
*obd
= exp
->exp_obd
;
1420 struct lmv_obd
*lmv
= &obd
->u
.lmv
;
1423 rc
= lmv_check_connect(obd
);
1427 return md_getstatus(lmv
->tgts
[0]->ltd_exp
, fid
);
1430 static int lmv_getxattr(struct obd_export
*exp
, const struct lu_fid
*fid
,
1431 u64 valid
, const char *name
,
1432 const char *input
, int input_size
, int output_size
,
1433 int flags
, struct ptlrpc_request
**request
)
1435 struct obd_device
*obd
= exp
->exp_obd
;
1436 struct lmv_obd
*lmv
= &obd
->u
.lmv
;
1437 struct lmv_tgt_desc
*tgt
;
1440 rc
= lmv_check_connect(obd
);
1444 tgt
= lmv_find_target(lmv
, fid
);
1446 return PTR_ERR(tgt
);
1448 return md_getxattr(tgt
->ltd_exp
, fid
, valid
, name
, input
,
1449 input_size
, output_size
, flags
, request
);
1452 static int lmv_setxattr(struct obd_export
*exp
, const struct lu_fid
*fid
,
1453 u64 valid
, const char *name
,
1454 const char *input
, int input_size
, int output_size
,
1455 int flags
, __u32 suppgid
,
1456 struct ptlrpc_request
**request
)
1458 struct obd_device
*obd
= exp
->exp_obd
;
1459 struct lmv_obd
*lmv
= &obd
->u
.lmv
;
1460 struct lmv_tgt_desc
*tgt
;
1463 rc
= lmv_check_connect(obd
);
1467 tgt
= lmv_find_target(lmv
, fid
);
1469 return PTR_ERR(tgt
);
1471 return md_setxattr(tgt
->ltd_exp
, fid
, valid
, name
, input
,
1472 input_size
, output_size
, flags
, suppgid
,
1476 static int lmv_getattr(struct obd_export
*exp
, struct md_op_data
*op_data
,
1477 struct ptlrpc_request
**request
)
1479 struct obd_device
*obd
= exp
->exp_obd
;
1480 struct lmv_obd
*lmv
= &obd
->u
.lmv
;
1481 struct lmv_tgt_desc
*tgt
;
1484 rc
= lmv_check_connect(obd
);
1488 tgt
= lmv_find_target(lmv
, &op_data
->op_fid1
);
1490 return PTR_ERR(tgt
);
1492 if (op_data
->op_flags
& MF_GET_MDT_IDX
) {
1493 op_data
->op_mds
= tgt
->ltd_idx
;
1497 return md_getattr(tgt
->ltd_exp
, op_data
, request
);
1500 static int lmv_null_inode(struct obd_export
*exp
, const struct lu_fid
*fid
)
1502 struct obd_device
*obd
= exp
->exp_obd
;
1503 struct lmv_obd
*lmv
= &obd
->u
.lmv
;
1507 rc
= lmv_check_connect(obd
);
1511 CDEBUG(D_INODE
, "CBDATA for "DFID
"\n", PFID(fid
));
1514 * With DNE every object can have two locks in different namespaces:
1515 * lookup lock in space of MDT storing direntry and update/open lock in
1516 * space of MDT storing inode.
1518 for (i
= 0; i
< lmv
->desc
.ld_tgt_count
; i
++) {
1519 if (!lmv
->tgts
[i
] || !lmv
->tgts
[i
]->ltd_exp
)
1521 md_null_inode(lmv
->tgts
[i
]->ltd_exp
, fid
);
1527 static int lmv_close(struct obd_export
*exp
, struct md_op_data
*op_data
,
1528 struct md_open_data
*mod
, struct ptlrpc_request
**request
)
1530 struct obd_device
*obd
= exp
->exp_obd
;
1531 struct lmv_obd
*lmv
= &obd
->u
.lmv
;
1532 struct lmv_tgt_desc
*tgt
;
1535 rc
= lmv_check_connect(obd
);
1539 tgt
= lmv_find_target(lmv
, &op_data
->op_fid1
);
1541 return PTR_ERR(tgt
);
1543 CDEBUG(D_INODE
, "CLOSE "DFID
"\n", PFID(&op_data
->op_fid1
));
1544 return md_close(tgt
->ltd_exp
, op_data
, mod
, request
);
1548 * Choosing the MDT by name or FID in @op_data.
1549 * For non-striped directory, it will locate MDT by fid.
1550 * For striped-directory, it will locate MDT by name. And also
1551 * it will reset op_fid1 with the FID of the chosen stripe.
1553 static struct lmv_tgt_desc
*
1554 lmv_locate_target_for_name(struct lmv_obd
*lmv
, struct lmv_stripe_md
*lsm
,
1555 const char *name
, int namelen
, struct lu_fid
*fid
,
1558 const struct lmv_oinfo
*oinfo
;
1559 struct lmv_tgt_desc
*tgt
;
1561 if (OBD_FAIL_CHECK(OBD_FAIL_LFSCK_BAD_NAME_HASH
)) {
1562 if (cfs_fail_val
>= lsm
->lsm_md_stripe_count
)
1563 return ERR_PTR(-EBADF
);
1564 oinfo
= &lsm
->lsm_md_oinfo
[cfs_fail_val
];
1566 oinfo
= lsm_name_to_stripe_info(lsm
, name
, namelen
);
1568 return ERR_CAST(oinfo
);
1572 *fid
= oinfo
->lmo_fid
;
1574 *mds
= oinfo
->lmo_mds
;
1576 tgt
= lmv_get_target(lmv
, oinfo
->lmo_mds
, NULL
);
1578 CDEBUG(D_INFO
, "locate on mds %u " DFID
"\n", oinfo
->lmo_mds
,
1579 PFID(&oinfo
->lmo_fid
));
1584 * Locate mds by fid or name
1586 * For striped directory (lsm != NULL), it will locate the stripe
1587 * by name hash (see lsm_name_to_stripe_info()). Note: if the hash_type
1588 * is unknown, it will return -EBADFD, and lmv_intent_lookup might need
1589 * walk through all of stripes to locate the entry.
1591 * For normal direcotry, it will locate MDS by FID directly.
1592 * \param[in] lmv LMV device
1593 * \param[in] op_data client MD stack parameters, name, namelen
1595 * \param[in] fid object FID used to locate MDS.
1597 * retval pointer to the lmv_tgt_desc if succeed.
1598 * ERR_PTR(errno) if failed.
1600 struct lmv_tgt_desc
*
1601 lmv_locate_mds(struct lmv_obd
*lmv
, struct md_op_data
*op_data
,
1604 struct lmv_stripe_md
*lsm
= op_data
->op_mea1
;
1605 struct lmv_tgt_desc
*tgt
;
1608 * During creating VOLATILE file, it should honor the mdt
1609 * index if the file under striped dir is being restored, see
1612 if (op_data
->op_bias
& MDS_CREATE_VOLATILE
&&
1613 (int)op_data
->op_mds
!= -1) {
1616 tgt
= lmv_get_target(lmv
, op_data
->op_mds
, NULL
);
1621 /* refill the right parent fid */
1622 for (i
= 0; i
< lsm
->lsm_md_stripe_count
; i
++) {
1623 struct lmv_oinfo
*oinfo
;
1625 oinfo
= &lsm
->lsm_md_oinfo
[i
];
1626 if (oinfo
->lmo_mds
== op_data
->op_mds
) {
1627 *fid
= oinfo
->lmo_fid
;
1632 if (i
== lsm
->lsm_md_stripe_count
)
1633 *fid
= lsm
->lsm_md_oinfo
[0].lmo_fid
;
1639 if (!lsm
|| !op_data
->op_namelen
) {
1640 tgt
= lmv_find_target(lmv
, fid
);
1644 op_data
->op_mds
= tgt
->ltd_idx
;
1649 return lmv_locate_target_for_name(lmv
, lsm
, op_data
->op_name
,
1650 op_data
->op_namelen
, fid
,
1654 static int lmv_create(struct obd_export
*exp
, struct md_op_data
*op_data
,
1655 const void *data
, size_t datalen
, umode_t mode
,
1656 uid_t uid
, gid_t gid
, cfs_cap_t cap_effective
,
1657 __u64 rdev
, struct ptlrpc_request
**request
)
1659 struct obd_device
*obd
= exp
->exp_obd
;
1660 struct lmv_obd
*lmv
= &obd
->u
.lmv
;
1661 struct lmv_tgt_desc
*tgt
;
1664 rc
= lmv_check_connect(obd
);
1668 if (!lmv
->desc
.ld_active_tgt_count
)
1671 tgt
= lmv_locate_mds(lmv
, op_data
, &op_data
->op_fid1
);
1673 return PTR_ERR(tgt
);
1675 CDEBUG(D_INODE
, "CREATE name '%.*s' on "DFID
" -> mds #%x\n",
1676 (int)op_data
->op_namelen
, op_data
->op_name
,
1677 PFID(&op_data
->op_fid1
), op_data
->op_mds
);
1679 rc
= lmv_fid_alloc(NULL
, exp
, &op_data
->op_fid2
, op_data
);
1683 if (exp_connect_flags(exp
) & OBD_CONNECT_DIR_STRIPE
) {
1685 * Send the create request to the MDT where the object
1688 tgt
= lmv_find_target(lmv
, &op_data
->op_fid2
);
1690 return PTR_ERR(tgt
);
1692 op_data
->op_mds
= tgt
->ltd_idx
;
1694 CDEBUG(D_CONFIG
, "Server doesn't support striped dirs\n");
1697 CDEBUG(D_INODE
, "CREATE obj "DFID
" -> mds #%x\n",
1698 PFID(&op_data
->op_fid1
), op_data
->op_mds
);
1700 op_data
->op_flags
|= MF_MDC_CANCEL_FID1
;
1701 rc
= md_create(tgt
->ltd_exp
, op_data
, data
, datalen
, mode
, uid
, gid
,
1702 cap_effective
, rdev
, request
);
1707 CDEBUG(D_INODE
, "Created - "DFID
"\n", PFID(&op_data
->op_fid2
));
1713 lmv_enqueue(struct obd_export
*exp
, struct ldlm_enqueue_info
*einfo
,
1714 const union ldlm_policy_data
*policy
,
1715 struct lookup_intent
*it
, struct md_op_data
*op_data
,
1716 struct lustre_handle
*lockh
, __u64 extra_lock_flags
)
1718 struct obd_device
*obd
= exp
->exp_obd
;
1719 struct lmv_obd
*lmv
= &obd
->u
.lmv
;
1720 struct lmv_tgt_desc
*tgt
;
1723 rc
= lmv_check_connect(obd
);
1727 CDEBUG(D_INODE
, "ENQUEUE '%s' on "DFID
"\n",
1728 LL_IT2STR(it
), PFID(&op_data
->op_fid1
));
1730 tgt
= lmv_locate_mds(lmv
, op_data
, &op_data
->op_fid1
);
1732 return PTR_ERR(tgt
);
1734 CDEBUG(D_INODE
, "ENQUEUE '%s' on " DFID
" -> mds #%u\n",
1735 LL_IT2STR(it
), PFID(&op_data
->op_fid1
), tgt
->ltd_idx
);
1737 return md_enqueue(tgt
->ltd_exp
, einfo
, policy
, it
, op_data
, lockh
,
1742 lmv_getattr_name(struct obd_export
*exp
, struct md_op_data
*op_data
,
1743 struct ptlrpc_request
**preq
)
1745 struct ptlrpc_request
*req
= NULL
;
1746 struct obd_device
*obd
= exp
->exp_obd
;
1747 struct lmv_obd
*lmv
= &obd
->u
.lmv
;
1748 struct lmv_tgt_desc
*tgt
;
1749 struct mdt_body
*body
;
1752 rc
= lmv_check_connect(obd
);
1756 tgt
= lmv_locate_mds(lmv
, op_data
, &op_data
->op_fid1
);
1758 return PTR_ERR(tgt
);
1760 CDEBUG(D_INODE
, "GETATTR_NAME for %*s on " DFID
" -> mds #%u\n",
1761 (int)op_data
->op_namelen
, op_data
->op_name
,
1762 PFID(&op_data
->op_fid1
), tgt
->ltd_idx
);
1764 rc
= md_getattr_name(tgt
->ltd_exp
, op_data
, preq
);
1768 body
= req_capsule_server_get(&(*preq
)->rq_pill
, &RMF_MDT_BODY
);
1769 if (body
->mbo_valid
& OBD_MD_MDS
) {
1770 struct lu_fid rid
= body
->mbo_fid1
;
1772 CDEBUG(D_INODE
, "Request attrs for "DFID
"\n",
1775 tgt
= lmv_find_target(lmv
, &rid
);
1777 ptlrpc_req_finished(*preq
);
1779 return PTR_ERR(tgt
);
1782 op_data
->op_fid1
= rid
;
1783 op_data
->op_valid
|= OBD_MD_FLCROSSREF
;
1784 op_data
->op_namelen
= 0;
1785 op_data
->op_name
= NULL
;
1786 rc
= md_getattr_name(tgt
->ltd_exp
, op_data
, &req
);
1787 ptlrpc_req_finished(*preq
);
1794 #define md_op_data_fid(op_data, fl) \
1795 (fl == MF_MDC_CANCEL_FID1 ? &op_data->op_fid1 : \
1796 fl == MF_MDC_CANCEL_FID2 ? &op_data->op_fid2 : \
1797 fl == MF_MDC_CANCEL_FID3 ? &op_data->op_fid3 : \
1798 fl == MF_MDC_CANCEL_FID4 ? &op_data->op_fid4 : \
1801 static int lmv_early_cancel(struct obd_export
*exp
, struct lmv_tgt_desc
*tgt
,
1802 struct md_op_data
*op_data
, int op_tgt
,
1803 enum ldlm_mode mode
, int bits
, int flag
)
1805 struct lu_fid
*fid
= md_op_data_fid(op_data
, flag
);
1806 struct obd_device
*obd
= exp
->exp_obd
;
1807 struct lmv_obd
*lmv
= &obd
->u
.lmv
;
1808 union ldlm_policy_data policy
= { { 0 } };
1811 if (!fid_is_sane(fid
))
1815 tgt
= lmv_find_target(lmv
, fid
);
1817 return PTR_ERR(tgt
);
1820 if (tgt
->ltd_idx
!= op_tgt
) {
1821 CDEBUG(D_INODE
, "EARLY_CANCEL on "DFID
"\n", PFID(fid
));
1822 policy
.l_inodebits
.bits
= bits
;
1823 rc
= md_cancel_unused(tgt
->ltd_exp
, fid
, &policy
,
1824 mode
, LCF_ASYNC
, NULL
);
1827 "EARLY_CANCEL skip operation target %d on "DFID
"\n",
1829 op_data
->op_flags
|= flag
;
1837 * llite passes fid of an target inode in op_data->op_fid1 and id of directory in
1840 static int lmv_link(struct obd_export
*exp
, struct md_op_data
*op_data
,
1841 struct ptlrpc_request
**request
)
1843 struct obd_device
*obd
= exp
->exp_obd
;
1844 struct lmv_obd
*lmv
= &obd
->u
.lmv
;
1845 struct lmv_tgt_desc
*tgt
;
1848 rc
= lmv_check_connect(obd
);
1852 LASSERT(op_data
->op_namelen
!= 0);
1854 CDEBUG(D_INODE
, "LINK "DFID
":%*s to "DFID
"\n",
1855 PFID(&op_data
->op_fid2
), (int)op_data
->op_namelen
,
1856 op_data
->op_name
, PFID(&op_data
->op_fid1
));
1858 op_data
->op_fsuid
= from_kuid(&init_user_ns
, current_fsuid());
1859 op_data
->op_fsgid
= from_kgid(&init_user_ns
, current_fsgid());
1860 op_data
->op_cap
= cfs_curproc_cap_pack();
1861 if (op_data
->op_mea2
) {
1862 struct lmv_stripe_md
*lsm
= op_data
->op_mea2
;
1863 const struct lmv_oinfo
*oinfo
;
1865 oinfo
= lsm_name_to_stripe_info(lsm
, op_data
->op_name
,
1866 op_data
->op_namelen
);
1868 return PTR_ERR(oinfo
);
1870 op_data
->op_fid2
= oinfo
->lmo_fid
;
1873 tgt
= lmv_locate_mds(lmv
, op_data
, &op_data
->op_fid2
);
1875 return PTR_ERR(tgt
);
1878 * Cancel UPDATE lock on child (fid1).
1880 op_data
->op_flags
|= MF_MDC_CANCEL_FID2
;
1881 rc
= lmv_early_cancel(exp
, NULL
, op_data
, tgt
->ltd_idx
, LCK_EX
,
1882 MDS_INODELOCK_UPDATE
, MF_MDC_CANCEL_FID1
);
1886 return md_link(tgt
->ltd_exp
, op_data
, request
);
1889 static int lmv_rename(struct obd_export
*exp
, struct md_op_data
*op_data
,
1890 const char *old
, size_t oldlen
,
1891 const char *new, size_t newlen
,
1892 struct ptlrpc_request
**request
)
1894 struct obd_device
*obd
= exp
->exp_obd
;
1895 struct lmv_obd
*lmv
= &obd
->u
.lmv
;
1896 struct obd_export
*target_exp
;
1897 struct lmv_tgt_desc
*src_tgt
;
1898 struct lmv_tgt_desc
*tgt_tgt
;
1899 struct mdt_body
*body
;
1902 LASSERT(oldlen
!= 0);
1904 CDEBUG(D_INODE
, "RENAME %.*s in "DFID
":%d to %.*s in "DFID
":%d\n",
1905 (int)oldlen
, old
, PFID(&op_data
->op_fid1
),
1906 op_data
->op_mea1
? op_data
->op_mea1
->lsm_md_stripe_count
: 0,
1907 (int)newlen
, new, PFID(&op_data
->op_fid2
),
1908 op_data
->op_mea2
? op_data
->op_mea2
->lsm_md_stripe_count
: 0);
1910 rc
= lmv_check_connect(obd
);
1914 op_data
->op_fsuid
= from_kuid(&init_user_ns
, current_fsuid());
1915 op_data
->op_fsgid
= from_kgid(&init_user_ns
, current_fsgid());
1916 op_data
->op_cap
= cfs_curproc_cap_pack();
1918 if (op_data
->op_cli_flags
& CLI_MIGRATE
) {
1919 LASSERTF(fid_is_sane(&op_data
->op_fid3
), "invalid FID "DFID
"\n",
1920 PFID(&op_data
->op_fid3
));
1922 if (op_data
->op_mea1
) {
1923 struct lmv_stripe_md
*lsm
= op_data
->op_mea1
;
1924 struct lmv_tgt_desc
*tmp
;
1926 /* Fix the parent fid for striped dir */
1927 tmp
= lmv_locate_target_for_name(lmv
, lsm
, old
,
1932 return PTR_ERR(tmp
);
1935 rc
= lmv_fid_alloc(NULL
, exp
, &op_data
->op_fid2
, op_data
);
1938 src_tgt
= lmv_find_target(lmv
, &op_data
->op_fid3
);
1939 if (IS_ERR(src_tgt
))
1940 return PTR_ERR(src_tgt
);
1942 target_exp
= src_tgt
->ltd_exp
;
1944 if (op_data
->op_mea1
) {
1945 struct lmv_stripe_md
*lsm
= op_data
->op_mea1
;
1947 src_tgt
= lmv_locate_target_for_name(lmv
, lsm
, old
,
1952 src_tgt
= lmv_find_target(lmv
, &op_data
->op_fid1
);
1954 if (IS_ERR(src_tgt
))
1955 return PTR_ERR(src_tgt
);
1957 if (op_data
->op_mea2
) {
1958 struct lmv_stripe_md
*lsm
= op_data
->op_mea2
;
1960 tgt_tgt
= lmv_locate_target_for_name(lmv
, lsm
, new,
1965 tgt_tgt
= lmv_find_target(lmv
, &op_data
->op_fid2
);
1967 if (IS_ERR(tgt_tgt
))
1968 return PTR_ERR(tgt_tgt
);
1970 target_exp
= tgt_tgt
->ltd_exp
;
1974 * LOOKUP lock on src child (fid3) should also be cancelled for
1975 * src_tgt in mdc_rename.
1977 op_data
->op_flags
|= MF_MDC_CANCEL_FID1
| MF_MDC_CANCEL_FID3
;
1980 * Cancel UPDATE locks on tgt parent (fid2), tgt_tgt is its
1983 rc
= lmv_early_cancel(exp
, NULL
, op_data
, src_tgt
->ltd_idx
,
1984 LCK_EX
, MDS_INODELOCK_UPDATE
,
1985 MF_MDC_CANCEL_FID2
);
1989 * Cancel LOOKUP locks on source child (fid3) for parent tgt_tgt.
1991 if (fid_is_sane(&op_data
->op_fid3
)) {
1992 struct lmv_tgt_desc
*tgt
;
1994 tgt
= lmv_find_target(lmv
, &op_data
->op_fid1
);
1996 return PTR_ERR(tgt
);
1998 /* Cancel LOOKUP lock on its parent */
1999 rc
= lmv_early_cancel(exp
, tgt
, op_data
, src_tgt
->ltd_idx
,
2000 LCK_EX
, MDS_INODELOCK_LOOKUP
,
2001 MF_MDC_CANCEL_FID3
);
2005 rc
= lmv_early_cancel(exp
, NULL
, op_data
, src_tgt
->ltd_idx
,
2006 LCK_EX
, MDS_INODELOCK_FULL
,
2007 MF_MDC_CANCEL_FID3
);
2014 * Cancel all the locks on tgt child (fid4).
2016 if (fid_is_sane(&op_data
->op_fid4
)) {
2017 struct lmv_tgt_desc
*tgt
;
2019 rc
= lmv_early_cancel(exp
, NULL
, op_data
, src_tgt
->ltd_idx
,
2020 LCK_EX
, MDS_INODELOCK_FULL
,
2021 MF_MDC_CANCEL_FID4
);
2025 tgt
= lmv_find_target(lmv
, &op_data
->op_fid4
);
2027 return PTR_ERR(tgt
);
2030 * Since the target child might be destroyed, and it might
2031 * become orphan, and we can only check orphan on the local
2032 * MDT right now, so we send rename request to the MDT where
2033 * target child is located. If target child does not exist,
2034 * then it will send the request to the target parent
2036 target_exp
= tgt
->ltd_exp
;
2039 rc
= md_rename(target_exp
, op_data
, old
, oldlen
, new, newlen
, request
);
2040 if (rc
&& rc
!= -EREMOTE
)
2043 body
= req_capsule_server_get(&(*request
)->rq_pill
, &RMF_MDT_BODY
);
2047 /* Not cross-ref case, just get out of here. */
2048 if (likely(!(body
->mbo_valid
& OBD_MD_MDS
)))
2051 CDEBUG(D_INODE
, "%s: try rename to another MDT for " DFID
"\n",
2052 exp
->exp_obd
->obd_name
, PFID(&body
->mbo_fid1
));
2054 op_data
->op_fid4
= body
->mbo_fid1
;
2055 ptlrpc_req_finished(*request
);
2060 static int lmv_setattr(struct obd_export
*exp
, struct md_op_data
*op_data
,
2061 void *ea
, size_t ealen
, struct ptlrpc_request
**request
)
2063 struct obd_device
*obd
= exp
->exp_obd
;
2064 struct lmv_obd
*lmv
= &obd
->u
.lmv
;
2065 struct lmv_tgt_desc
*tgt
;
2068 rc
= lmv_check_connect(obd
);
2072 CDEBUG(D_INODE
, "SETATTR for "DFID
", valid 0x%x\n",
2073 PFID(&op_data
->op_fid1
), op_data
->op_attr
.ia_valid
);
2075 op_data
->op_flags
|= MF_MDC_CANCEL_FID1
;
2076 tgt
= lmv_find_target(lmv
, &op_data
->op_fid1
);
2078 return PTR_ERR(tgt
);
2080 return md_setattr(tgt
->ltd_exp
, op_data
, ea
, ealen
, request
);
2083 static int lmv_sync(struct obd_export
*exp
, const struct lu_fid
*fid
,
2084 struct ptlrpc_request
**request
)
2086 struct obd_device
*obd
= exp
->exp_obd
;
2087 struct lmv_obd
*lmv
= &obd
->u
.lmv
;
2088 struct lmv_tgt_desc
*tgt
;
2091 rc
= lmv_check_connect(obd
);
2095 tgt
= lmv_find_target(lmv
, fid
);
2097 return PTR_ERR(tgt
);
2099 return md_sync(tgt
->ltd_exp
, fid
, request
);
2103 * Get current minimum entry from striped directory
2105 * This function will search the dir entry, whose hash value is the
2106 * closest(>=) to @hash_offset, from all of sub-stripes, and it is
2107 * only being called for striped directory.
2109 * \param[in] exp export of LMV
2110 * \param[in] op_data parameters transferred beween client MD stack
2111 * stripe_information will be included in this
2113 * \param[in] cb_op ldlm callback being used in enqueue in
2115 * \param[in] hash_offset the hash value, which is used to locate
2116 * minum(closet) dir entry
2117 * \param[in|out] stripe_offset the caller use this to indicate the stripe
2118 * index of last entry, so to avoid hash conflict
2119 * between stripes. It will also be used to
2120 * return the stripe index of current dir entry.
2121 * \param[in|out] entp the minum entry and it also is being used
2122 * to input the last dir entry to resolve the
2125 * \param[out] ppage the page which holds the minum entry
2127 * \retval = 0 get the entry successfully
2128 * negative errno (< 0) does not get the entry
2130 static int lmv_get_min_striped_entry(struct obd_export
*exp
,
2131 struct md_op_data
*op_data
,
2132 struct md_callback
*cb_op
,
2133 __u64 hash_offset
, int *stripe_offset
,
2134 struct lu_dirent
**entp
,
2135 struct page
**ppage
)
2137 struct lmv_stripe_md
*lsm
= op_data
->op_mea1
;
2138 struct obd_device
*obd
= exp
->exp_obd
;
2139 struct lmv_obd
*lmv
= &obd
->u
.lmv
;
2140 struct lu_dirent
*min_ent
= NULL
;
2141 struct page
*min_page
= NULL
;
2142 struct lmv_tgt_desc
*tgt
;
2148 stripe_count
= lsm
->lsm_md_stripe_count
;
2149 for (i
= 0; i
< stripe_count
; i
++) {
2150 __u64 stripe_hash
= hash_offset
;
2151 struct lu_dirent
*ent
= NULL
;
2152 struct page
*page
= NULL
;
2153 struct lu_dirpage
*dp
;
2155 tgt
= lmv_get_target(lmv
, lsm
->lsm_md_oinfo
[i
].lmo_mds
, NULL
);
2162 * op_data will be shared by each stripe, so we need
2163 * reset these value for each stripe
2165 op_data
->op_fid1
= lsm
->lsm_md_oinfo
[i
].lmo_fid
;
2166 op_data
->op_fid2
= lsm
->lsm_md_oinfo
[i
].lmo_fid
;
2167 op_data
->op_data
= lsm
->lsm_md_oinfo
[i
].lmo_root
;
2169 rc
= md_read_page(tgt
->ltd_exp
, op_data
, cb_op
, stripe_hash
,
2174 dp
= page_address(page
);
2175 for (ent
= lu_dirent_start(dp
); ent
;
2176 ent
= lu_dirent_next(ent
)) {
2177 /* Skip dummy entry */
2178 if (!le16_to_cpu(ent
->lde_namelen
))
2181 if (le64_to_cpu(ent
->lde_hash
) < hash_offset
)
2184 if (le64_to_cpu(ent
->lde_hash
) == hash_offset
&&
2185 (*entp
== ent
|| i
< *stripe_offset
))
2188 /* skip . and .. for other stripes */
2189 if (i
&& (!strncmp(ent
->lde_name
, ".",
2190 le16_to_cpu(ent
->lde_namelen
)) ||
2191 !strncmp(ent
->lde_name
, "..",
2192 le16_to_cpu(ent
->lde_namelen
))))
2198 stripe_hash
= le64_to_cpu(dp
->ldp_hash_end
);
2205 * reach the end of current stripe, go to next stripe
2207 if (stripe_hash
== MDS_DIR_END_OFF
)
2214 if (le64_to_cpu(min_ent
->lde_hash
) >
2215 le64_to_cpu(ent
->lde_hash
)) {
2238 *stripe_offset
= min_idx
;
2245 * Build dir entry page from a striped directory
2247 * This function gets one entry by @offset from a striped directory. It will
2248 * read entries from all of stripes, and choose one closest to the required
2249 * offset(&offset). A few notes
2250 * 1. skip . and .. for non-zero stripes, because there can only have one .
2251 * and .. in a directory.
2252 * 2. op_data will be shared by all of stripes, instead of allocating new
2253 * one, so need to restore before reusing.
2254 * 3. release the entry page if that is not being chosen.
2256 * \param[in] exp obd export refer to LMV
2257 * \param[in] op_data hold those MD parameters of read_entry
2258 * \param[in] cb_op ldlm callback being used in enqueue in mdc_read_entry
2259 * \param[out] ldp the entry being read
2260 * \param[out] ppage the page holding the entry. Note: because the entry
2261 * will be accessed in upper layer, so we need hold the
2262 * page until the usages of entry is finished, see
2263 * ll_dir_entry_next.
2265 * retval =0 if get entry successfully
2266 * <0 cannot get entry
2268 static int lmv_read_striped_page(struct obd_export
*exp
,
2269 struct md_op_data
*op_data
,
2270 struct md_callback
*cb_op
,
2271 __u64 offset
, struct page
**ppage
)
2273 struct inode
*master_inode
= op_data
->op_data
;
2274 struct lu_fid master_fid
= op_data
->op_fid1
;
2275 struct obd_device
*obd
= exp
->exp_obd
;
2276 __u64 hash_offset
= offset
;
2277 struct page
*min_ent_page
= NULL
;
2278 struct page
*ent_page
= NULL
;
2279 struct lu_dirent
*min_ent
= NULL
;
2280 struct lu_dirent
*last_ent
;
2281 struct lu_dirent
*ent
;
2282 struct lu_dirpage
*dp
;
2288 rc
= lmv_check_connect(obd
);
2293 * Allocate a page and read entries from all of stripes and fill
2294 * the page by hash order
2296 ent_page
= alloc_page(GFP_KERNEL
);
2300 /* Initialize the entry page */
2301 dp
= kmap(ent_page
);
2302 memset(dp
, 0, sizeof(*dp
));
2303 dp
->ldp_hash_start
= cpu_to_le64(offset
);
2304 dp
->ldp_flags
|= LDF_COLLIDE
;
2307 left_bytes
= PAGE_SIZE
- sizeof(*dp
);
2313 /* Find the minum entry from all sub-stripes */
2314 rc
= lmv_get_min_striped_entry(exp
, op_data
, cb_op
, hash_offset
,
2321 * If it can not get minum entry, it means it already reaches
2322 * the end of this directory
2325 last_ent
->lde_reclen
= 0;
2326 hash_offset
= MDS_DIR_END_OFF
;
2330 ent_size
= le16_to_cpu(min_ent
->lde_reclen
);
2333 * the last entry lde_reclen is 0, but it might not
2334 * the end of this entry of this temporay entry
2337 ent_size
= lu_dirent_calc_size(
2338 le16_to_cpu(min_ent
->lde_namelen
),
2339 le32_to_cpu(min_ent
->lde_attrs
));
2340 if (ent_size
> left_bytes
) {
2341 last_ent
->lde_reclen
= cpu_to_le16(0);
2342 hash_offset
= le64_to_cpu(min_ent
->lde_hash
);
2346 memcpy(ent
, min_ent
, ent_size
);
2349 * Replace . with master FID and Replace .. with the parent FID
2352 if (!strncmp(ent
->lde_name
, ".",
2353 le16_to_cpu(ent
->lde_namelen
)) &&
2354 le16_to_cpu(ent
->lde_namelen
) == 1)
2355 fid_cpu_to_le(&ent
->lde_fid
, &master_fid
);
2356 else if (!strncmp(ent
->lde_name
, "..",
2357 le16_to_cpu(ent
->lde_namelen
)) &&
2358 le16_to_cpu(ent
->lde_namelen
) == 2)
2359 fid_cpu_to_le(&ent
->lde_fid
, &op_data
->op_fid3
);
2361 left_bytes
-= ent_size
;
2362 ent
->lde_reclen
= cpu_to_le16(ent_size
);
2364 ent
= (void *)ent
+ ent_size
;
2365 hash_offset
= le64_to_cpu(min_ent
->lde_hash
);
2366 if (hash_offset
== MDS_DIR_END_OFF
) {
2367 last_ent
->lde_reclen
= 0;
2373 kunmap(min_ent_page
);
2374 put_page(min_ent_page
);
2378 __free_page(ent_page
);
2382 dp
->ldp_flags
|= LDF_EMPTY
;
2383 dp
->ldp_flags
= cpu_to_le32(dp
->ldp_flags
);
2384 dp
->ldp_hash_end
= cpu_to_le64(hash_offset
);
2388 * We do not want to allocate md_op_data during each
2389 * dir entry reading, so op_data will be shared by every stripe,
2390 * then we need to restore it back to original value before
2391 * return to the upper layer
2393 op_data
->op_fid1
= master_fid
;
2394 op_data
->op_fid2
= master_fid
;
2395 op_data
->op_data
= master_inode
;
2402 static int lmv_read_page(struct obd_export
*exp
, struct md_op_data
*op_data
,
2403 struct md_callback
*cb_op
, __u64 offset
,
2404 struct page
**ppage
)
2406 struct lmv_stripe_md
*lsm
= op_data
->op_mea1
;
2407 struct obd_device
*obd
= exp
->exp_obd
;
2408 struct lmv_obd
*lmv
= &obd
->u
.lmv
;
2409 struct lmv_tgt_desc
*tgt
;
2412 rc
= lmv_check_connect(obd
);
2417 return lmv_read_striped_page(exp
, op_data
, cb_op
, offset
, ppage
);
2419 tgt
= lmv_find_target(lmv
, &op_data
->op_fid1
);
2421 return PTR_ERR(tgt
);
2423 return md_read_page(tgt
->ltd_exp
, op_data
, cb_op
, offset
, ppage
);
2427 * Unlink a file/directory
2429 * Unlink a file or directory under the parent dir. The unlink request
2430 * usually will be sent to the MDT where the child is located, but if
2431 * the client does not have the child FID then request will be sent to the
2432 * MDT where the parent is located.
2434 * If the parent is a striped directory then it also needs to locate which
2435 * stripe the name of the child is located, and replace the parent FID
2436 * (@op->op_fid1) with the stripe FID. Note: if the stripe is unknown,
2437 * it will walk through all of sub-stripes until the child is being
2440 * \param[in] exp export refer to LMV
2441 * \param[in] op_data different parameters transferred beween client
2442 * MD stacks, name, namelen, FIDs etc.
2443 * op_fid1 is the parent FID, op_fid2 is the child
2445 * \param[out] request point to the request of unlink.
2447 * retval 0 if succeed
2448 * negative errno if failed.
2450 static int lmv_unlink(struct obd_export
*exp
, struct md_op_data
*op_data
,
2451 struct ptlrpc_request
**request
)
2453 struct lmv_stripe_md
*lsm
= op_data
->op_mea1
;
2454 struct obd_device
*obd
= exp
->exp_obd
;
2455 struct lmv_obd
*lmv
= &obd
->u
.lmv
;
2456 struct lmv_tgt_desc
*parent_tgt
= NULL
;
2457 struct lmv_tgt_desc
*tgt
= NULL
;
2458 struct mdt_body
*body
;
2459 int stripe_index
= 0;
2462 rc
= lmv_check_connect(obd
);
2466 /* For striped dir, we need to locate the parent as well */
2468 struct lmv_tgt_desc
*tmp
;
2470 LASSERT(op_data
->op_name
&& op_data
->op_namelen
);
2472 tmp
= lmv_locate_target_for_name(lmv
, lsm
,
2474 op_data
->op_namelen
,
2479 * return -EBADFD means unknown hash type, might
2480 * need try all sub-stripe here
2482 if (IS_ERR(tmp
) && PTR_ERR(tmp
) != -EBADFD
)
2483 return PTR_ERR(tmp
);
2486 * Note: both migrating dir and unknown hash dir need to
2487 * try all of sub-stripes, so we need start search the
2488 * name from stripe 0, but migrating dir is already handled
2489 * inside lmv_locate_target_for_name(), so we only check
2490 * unknown hash type directory here
2492 if (!lmv_is_known_hash_type(lsm
->lsm_md_hash_type
)) {
2493 struct lmv_oinfo
*oinfo
;
2495 oinfo
= &lsm
->lsm_md_oinfo
[stripe_index
];
2497 op_data
->op_fid1
= oinfo
->lmo_fid
;
2498 op_data
->op_mds
= oinfo
->lmo_mds
;
2503 /* Send unlink requests to the MDT where the child is located */
2504 if (likely(!fid_is_zero(&op_data
->op_fid2
)))
2505 tgt
= lmv_find_target(lmv
, &op_data
->op_fid2
);
2507 tgt
= lmv_get_target(lmv
, op_data
->op_mds
, NULL
);
2509 tgt
= lmv_locate_mds(lmv
, op_data
, &op_data
->op_fid1
);
2512 return PTR_ERR(tgt
);
2514 op_data
->op_fsuid
= from_kuid(&init_user_ns
, current_fsuid());
2515 op_data
->op_fsgid
= from_kgid(&init_user_ns
, current_fsgid());
2516 op_data
->op_cap
= cfs_curproc_cap_pack();
2519 * If child's fid is given, cancel unused locks for it if it is from
2520 * another export than parent.
2522 * LOOKUP lock for child (fid3) should also be cancelled on parent
2523 * tgt_tgt in mdc_unlink().
2525 op_data
->op_flags
|= MF_MDC_CANCEL_FID1
| MF_MDC_CANCEL_FID3
;
2528 * Cancel FULL locks on child (fid3).
2530 parent_tgt
= lmv_find_target(lmv
, &op_data
->op_fid1
);
2531 if (IS_ERR(parent_tgt
))
2532 return PTR_ERR(parent_tgt
);
2534 if (parent_tgt
!= tgt
) {
2535 rc
= lmv_early_cancel(exp
, parent_tgt
, op_data
, tgt
->ltd_idx
,
2536 LCK_EX
, MDS_INODELOCK_LOOKUP
,
2537 MF_MDC_CANCEL_FID3
);
2540 rc
= lmv_early_cancel(exp
, NULL
, op_data
, tgt
->ltd_idx
, LCK_EX
,
2541 MDS_INODELOCK_FULL
, MF_MDC_CANCEL_FID3
);
2545 CDEBUG(D_INODE
, "unlink with fid=" DFID
"/" DFID
" -> mds #%u\n",
2546 PFID(&op_data
->op_fid1
), PFID(&op_data
->op_fid2
), tgt
->ltd_idx
);
2548 rc
= md_unlink(tgt
->ltd_exp
, op_data
, request
);
2549 if (rc
!= 0 && rc
!= -EREMOTE
&& rc
!= -ENOENT
)
2552 /* Try next stripe if it is needed. */
2553 if (rc
== -ENOENT
&& lsm
&& lmv_need_try_all_stripes(lsm
)) {
2554 struct lmv_oinfo
*oinfo
;
2557 if (stripe_index
>= lsm
->lsm_md_stripe_count
)
2560 oinfo
= &lsm
->lsm_md_oinfo
[stripe_index
];
2562 op_data
->op_fid1
= oinfo
->lmo_fid
;
2563 op_data
->op_mds
= oinfo
->lmo_mds
;
2565 ptlrpc_req_finished(*request
);
2568 goto try_next_stripe
;
2571 body
= req_capsule_server_get(&(*request
)->rq_pill
, &RMF_MDT_BODY
);
2575 /* Not cross-ref case, just get out of here. */
2576 if (likely(!(body
->mbo_valid
& OBD_MD_MDS
)))
2579 CDEBUG(D_INODE
, "%s: try unlink to another MDT for "DFID
"\n",
2580 exp
->exp_obd
->obd_name
, PFID(&body
->mbo_fid1
));
2582 /* This is a remote object, try remote MDT, Note: it may
2583 * try more than 1 time here, Considering following case
2584 * /mnt/lustre is root on MDT0, remote1 is on MDT1
2585 * 1. Initially A does not know where remote1 is, it send
2586 * unlink RPC to MDT0, MDT0 return -EREMOTE, it will
2587 * resend unlink RPC to MDT1 (retry 1st time).
2589 * 2. During the unlink RPC in flight,
2590 * client B mv /mnt/lustre/remote1 /mnt/lustre/remote2
2591 * and create new remote1, but on MDT0
2593 * 3. MDT1 get unlink RPC(from A), then do remote lock on
2594 * /mnt/lustre, then lookup get fid of remote1, and find
2595 * it is remote dir again, and replay -EREMOTE again.
2597 * 4. Then A will resend unlink RPC to MDT0. (retry 2nd times).
2599 * In theory, it might try unlimited time here, but it should
2600 * be very rare case.
2602 op_data
->op_fid2
= body
->mbo_fid1
;
2603 ptlrpc_req_finished(*request
);
2609 static int lmv_precleanup(struct obd_device
*obd
)
2611 fld_client_debugfs_fini(&obd
->u
.lmv
.lmv_fld
);
2612 lprocfs_obd_cleanup(obd
);
2617 * Get by key a value associated with a LMV device.
2619 * Dispatch request to lower-layer devices as needed.
2621 * \param[in] env execution environment for this thread
2622 * \param[in] exp export for the LMV device
2623 * \param[in] keylen length of key identifier
2624 * \param[in] key identifier of key to get value for
2625 * \param[in] vallen size of \a val
2626 * \param[out] val pointer to storage location for value
2628 * \retval 0 on success
2629 * \retval negative negated errno on failure
2631 static int lmv_get_info(const struct lu_env
*env
, struct obd_export
*exp
,
2632 __u32 keylen
, void *key
, __u32
*vallen
, void *val
)
2634 struct obd_device
*obd
;
2635 struct lmv_obd
*lmv
;
2638 obd
= class_exp2obd(exp
);
2640 CDEBUG(D_IOCTL
, "Invalid client cookie %#llx\n",
2641 exp
->exp_handle
.h_cookie
);
2646 if (keylen
>= strlen("remote_flag") && !strcmp(key
, "remote_flag")) {
2649 rc
= lmv_check_connect(obd
);
2653 LASSERT(*vallen
== sizeof(__u32
));
2654 for (i
= 0; i
< lmv
->desc
.ld_tgt_count
; i
++) {
2655 struct lmv_tgt_desc
*tgt
= lmv
->tgts
[i
];
2658 * All tgts should be connected when this gets called.
2660 if (!tgt
|| !tgt
->ltd_exp
)
2663 if (!obd_get_info(env
, tgt
->ltd_exp
, keylen
, key
,
2668 } else if (KEY_IS(KEY_MAX_EASIZE
) ||
2669 KEY_IS(KEY_DEFAULT_EASIZE
) ||
2670 KEY_IS(KEY_CONN_DATA
)) {
2671 rc
= lmv_check_connect(obd
);
2676 * Forwarding this request to first MDS, it should know LOV
2679 rc
= obd_get_info(env
, lmv
->tgts
[0]->ltd_exp
, keylen
, key
,
2681 if (!rc
&& KEY_IS(KEY_CONN_DATA
))
2682 exp
->exp_connect_data
= *(struct obd_connect_data
*)val
;
2684 } else if (KEY_IS(KEY_TGT_COUNT
)) {
2685 *((int *)val
) = lmv
->desc
.ld_tgt_count
;
2689 CDEBUG(D_IOCTL
, "Invalid key\n");
2694 * Asynchronously set by key a value associated with a LMV device.
2696 * Dispatch request to lower-layer devices as needed.
2698 * \param[in] env execution environment for this thread
2699 * \param[in] exp export for the LMV device
2700 * \param[in] keylen length of key identifier
2701 * \param[in] key identifier of key to store value for
2702 * \param[in] vallen size of value to store
2703 * \param[in] val pointer to data to be stored
2704 * \param[in] set optional list of related ptlrpc requests
2706 * \retval 0 on success
2707 * \retval negative negated errno on failure
2709 static int lmv_set_info_async(const struct lu_env
*env
, struct obd_export
*exp
,
2710 u32 keylen
, void *key
, u32 vallen
,
2711 void *val
, struct ptlrpc_request_set
*set
)
2713 struct lmv_tgt_desc
*tgt
;
2714 struct obd_device
*obd
;
2715 struct lmv_obd
*lmv
;
2718 obd
= class_exp2obd(exp
);
2720 CDEBUG(D_IOCTL
, "Invalid client cookie %#llx\n",
2721 exp
->exp_handle
.h_cookie
);
2726 if (KEY_IS(KEY_READ_ONLY
) || KEY_IS(KEY_FLUSH_CTX
) ||
2727 KEY_IS(KEY_DEFAULT_EASIZE
)) {
2730 for (i
= 0; i
< lmv
->desc
.ld_tgt_count
; i
++) {
2733 if (!tgt
|| !tgt
->ltd_exp
)
2736 err
= obd_set_info_async(env
, tgt
->ltd_exp
,
2737 keylen
, key
, vallen
, val
, set
);
2748 static int lmv_unpack_md_v1(struct obd_export
*exp
, struct lmv_stripe_md
*lsm
,
2749 const struct lmv_mds_md_v1
*lmm1
)
2751 struct lmv_obd
*lmv
= &exp
->exp_obd
->u
.lmv
;
2757 lsm
->lsm_md_magic
= le32_to_cpu(lmm1
->lmv_magic
);
2758 lsm
->lsm_md_stripe_count
= le32_to_cpu(lmm1
->lmv_stripe_count
);
2759 lsm
->lsm_md_master_mdt_index
= le32_to_cpu(lmm1
->lmv_master_mdt_index
);
2760 if (OBD_FAIL_CHECK(OBD_FAIL_UNKNOWN_LMV_STRIPE
))
2761 lsm
->lsm_md_hash_type
= LMV_HASH_TYPE_UNKNOWN
;
2763 lsm
->lsm_md_hash_type
= le32_to_cpu(lmm1
->lmv_hash_type
);
2764 lsm
->lsm_md_layout_version
= le32_to_cpu(lmm1
->lmv_layout_version
);
2765 cplen
= strlcpy(lsm
->lsm_md_pool_name
, lmm1
->lmv_pool_name
,
2766 sizeof(lsm
->lsm_md_pool_name
));
2768 if (cplen
>= sizeof(lsm
->lsm_md_pool_name
))
2771 CDEBUG(D_INFO
, "unpack lsm count %d, master %d hash_type %d layout_version %d\n",
2772 lsm
->lsm_md_stripe_count
, lsm
->lsm_md_master_mdt_index
,
2773 lsm
->lsm_md_hash_type
, lsm
->lsm_md_layout_version
);
2775 stripe_count
= le32_to_cpu(lmm1
->lmv_stripe_count
);
2776 for (i
= 0; i
< stripe_count
; i
++) {
2777 fid_le_to_cpu(&lsm
->lsm_md_oinfo
[i
].lmo_fid
,
2778 &lmm1
->lmv_stripe_fids
[i
]);
2779 rc
= lmv_fld_lookup(lmv
, &lsm
->lsm_md_oinfo
[i
].lmo_fid
,
2780 &lsm
->lsm_md_oinfo
[i
].lmo_mds
);
2783 CDEBUG(D_INFO
, "unpack fid #%d "DFID
"\n", i
,
2784 PFID(&lsm
->lsm_md_oinfo
[i
].lmo_fid
));
2790 static int lmv_unpackmd(struct obd_export
*exp
, struct lmv_stripe_md
**lsmp
,
2791 const union lmv_mds_md
*lmm
, size_t lmm_size
)
2793 struct lmv_stripe_md
*lsm
;
2794 bool allocated
= false;
2804 for (i
= 1; i
< lsm
->lsm_md_stripe_count
; i
++) {
2806 * For migrating inode, the master stripe and master
2807 * object will be the same, so do not need iput, see
2810 if (!(lsm
->lsm_md_hash_type
& LMV_HASH_FLAG_MIGRATION
&&
2811 !i
) && lsm
->lsm_md_oinfo
[i
].lmo_root
)
2812 iput(lsm
->lsm_md_oinfo
[i
].lmo_root
);
2820 if (le32_to_cpu(lmm
->lmv_magic
) == LMV_MAGIC_STRIPE
)
2824 if (le32_to_cpu(lmm
->lmv_magic
) != LMV_MAGIC_V1
&&
2825 le32_to_cpu(lmm
->lmv_magic
) != LMV_USER_MAGIC
) {
2826 CERROR("%s: invalid lmv magic %x: rc = %d\n",
2827 exp
->exp_obd
->obd_name
, le32_to_cpu(lmm
->lmv_magic
),
2832 if (le32_to_cpu(lmm
->lmv_magic
) == LMV_MAGIC_V1
)
2833 lsm_size
= lmv_stripe_md_size(lmv_mds_md_stripe_count_get(lmm
));
2836 * Unpack default dirstripe(lmv_user_md) to lmv_stripe_md,
2837 * stripecount should be 0 then.
2839 lsm_size
= lmv_stripe_md_size(0);
2842 lsm
= libcfs_kvzalloc(lsm_size
, GFP_NOFS
);
2849 switch (le32_to_cpu(lmm
->lmv_magic
)) {
2851 rc
= lmv_unpack_md_v1(exp
, lsm
, &lmm
->lmv_md_v1
);
2854 CERROR("%s: unrecognized magic %x\n", exp
->exp_obd
->obd_name
,
2855 le32_to_cpu(lmm
->lmv_magic
));
2860 if (rc
&& allocated
) {
2868 void lmv_free_memmd(struct lmv_stripe_md
*lsm
)
2870 lmv_unpackmd(NULL
, &lsm
, NULL
, 0);
2872 EXPORT_SYMBOL(lmv_free_memmd
);
2874 static int lmv_cancel_unused(struct obd_export
*exp
, const struct lu_fid
*fid
,
2875 union ldlm_policy_data
*policy
,
2876 enum ldlm_mode mode
, enum ldlm_cancel_flags flags
,
2879 struct obd_device
*obd
= exp
->exp_obd
;
2880 struct lmv_obd
*lmv
= &obd
->u
.lmv
;
2887 for (i
= 0; i
< lmv
->desc
.ld_tgt_count
; i
++) {
2888 struct lmv_tgt_desc
*tgt
= lmv
->tgts
[i
];
2890 if (!tgt
|| !tgt
->ltd_exp
|| !tgt
->ltd_active
)
2893 err
= md_cancel_unused(tgt
->ltd_exp
, fid
, policy
, mode
, flags
,
2901 static int lmv_set_lock_data(struct obd_export
*exp
,
2902 const struct lustre_handle
*lockh
,
2903 void *data
, __u64
*bits
)
2905 struct lmv_obd
*lmv
= &exp
->exp_obd
->u
.lmv
;
2906 struct lmv_tgt_desc
*tgt
= lmv
->tgts
[0];
2908 if (!tgt
|| !tgt
->ltd_exp
)
2911 return md_set_lock_data(tgt
->ltd_exp
, lockh
, data
, bits
);
2914 static enum ldlm_mode
lmv_lock_match(struct obd_export
*exp
, __u64 flags
,
2915 const struct lu_fid
*fid
,
2916 enum ldlm_type type
,
2917 union ldlm_policy_data
*policy
,
2918 enum ldlm_mode mode
,
2919 struct lustre_handle
*lockh
)
2921 struct obd_device
*obd
= exp
->exp_obd
;
2922 struct lmv_obd
*lmv
= &obd
->u
.lmv
;
2927 CDEBUG(D_INODE
, "Lock match for "DFID
"\n", PFID(fid
));
2930 * With DNE every object can have two locks in different namespaces:
2931 * lookup lock in space of MDT storing direntry and update/open lock in
2932 * space of MDT storing inode. Try the MDT that the FID maps to first,
2933 * since this can be easily found, and only try others if that fails.
2935 for (i
= 0, tgt
= lmv_find_target_index(lmv
, fid
);
2936 i
< lmv
->desc
.ld_tgt_count
;
2937 i
++, tgt
= (tgt
+ 1) % lmv
->desc
.ld_tgt_count
) {
2939 CDEBUG(D_HA
, "%s: "DFID
" is inaccessible: rc = %d\n",
2940 obd
->obd_name
, PFID(fid
), tgt
);
2944 if (!lmv
->tgts
[tgt
] || !lmv
->tgts
[tgt
]->ltd_exp
||
2945 !lmv
->tgts
[tgt
]->ltd_active
)
2948 rc
= md_lock_match(lmv
->tgts
[tgt
]->ltd_exp
, flags
, fid
,
2949 type
, policy
, mode
, lockh
);
2957 static int lmv_get_lustre_md(struct obd_export
*exp
,
2958 struct ptlrpc_request
*req
,
2959 struct obd_export
*dt_exp
,
2960 struct obd_export
*md_exp
,
2961 struct lustre_md
*md
)
2963 struct lmv_obd
*lmv
= &exp
->exp_obd
->u
.lmv
;
2964 struct lmv_tgt_desc
*tgt
= lmv
->tgts
[0];
2966 if (!tgt
|| !tgt
->ltd_exp
)
2968 return md_get_lustre_md(tgt
->ltd_exp
, req
, dt_exp
, md_exp
, md
);
2971 static int lmv_free_lustre_md(struct obd_export
*exp
, struct lustre_md
*md
)
2973 struct obd_device
*obd
= exp
->exp_obd
;
2974 struct lmv_obd
*lmv
= &obd
->u
.lmv
;
2975 struct lmv_tgt_desc
*tgt
= lmv
->tgts
[0];
2978 lmv_free_memmd(md
->lmv
);
2981 if (!tgt
|| !tgt
->ltd_exp
)
2983 return md_free_lustre_md(tgt
->ltd_exp
, md
);
2986 static int lmv_set_open_replay_data(struct obd_export
*exp
,
2987 struct obd_client_handle
*och
,
2988 struct lookup_intent
*it
)
2990 struct obd_device
*obd
= exp
->exp_obd
;
2991 struct lmv_obd
*lmv
= &obd
->u
.lmv
;
2992 struct lmv_tgt_desc
*tgt
;
2994 tgt
= lmv_find_target(lmv
, &och
->och_fid
);
2996 return PTR_ERR(tgt
);
2998 return md_set_open_replay_data(tgt
->ltd_exp
, och
, it
);
3001 static int lmv_clear_open_replay_data(struct obd_export
*exp
,
3002 struct obd_client_handle
*och
)
3004 struct obd_device
*obd
= exp
->exp_obd
;
3005 struct lmv_obd
*lmv
= &obd
->u
.lmv
;
3006 struct lmv_tgt_desc
*tgt
;
3008 tgt
= lmv_find_target(lmv
, &och
->och_fid
);
3010 return PTR_ERR(tgt
);
3012 return md_clear_open_replay_data(tgt
->ltd_exp
, och
);
3015 static int lmv_intent_getattr_async(struct obd_export
*exp
,
3016 struct md_enqueue_info
*minfo
)
3018 struct md_op_data
*op_data
= &minfo
->mi_data
;
3019 struct obd_device
*obd
= exp
->exp_obd
;
3020 struct lmv_obd
*lmv
= &obd
->u
.lmv
;
3021 struct lmv_tgt_desc
*ptgt
= NULL
;
3022 struct lmv_tgt_desc
*ctgt
= NULL
;
3025 if (!fid_is_sane(&op_data
->op_fid2
))
3028 rc
= lmv_check_connect(obd
);
3032 ptgt
= lmv_locate_mds(lmv
, op_data
, &op_data
->op_fid1
);
3034 return PTR_ERR(ptgt
);
3036 ctgt
= lmv_locate_mds(lmv
, op_data
, &op_data
->op_fid2
);
3038 return PTR_ERR(ctgt
);
3041 * if child is on remote MDT, we need 2 async RPCs to fetch both LOOKUP
3042 * lock on parent, and UPDATE lock on child MDT, which makes all
3043 * complicated. Considering remote dir is rare case, and not supporting
3044 * it in statahead won't cause any issue, drop its support for now.
3049 return md_intent_getattr_async(ptgt
->ltd_exp
, minfo
);
3052 static int lmv_revalidate_lock(struct obd_export
*exp
, struct lookup_intent
*it
,
3053 struct lu_fid
*fid
, __u64
*bits
)
3055 struct obd_device
*obd
= exp
->exp_obd
;
3056 struct lmv_obd
*lmv
= &obd
->u
.lmv
;
3057 struct lmv_tgt_desc
*tgt
;
3060 rc
= lmv_check_connect(obd
);
3064 tgt
= lmv_find_target(lmv
, fid
);
3066 return PTR_ERR(tgt
);
3068 return md_revalidate_lock(tgt
->ltd_exp
, it
, fid
, bits
);
3072 lmv_get_fid_from_lsm(struct obd_export
*exp
,
3073 const struct lmv_stripe_md
*lsm
,
3074 const char *name
, int namelen
, struct lu_fid
*fid
)
3076 const struct lmv_oinfo
*oinfo
;
3079 oinfo
= lsm_name_to_stripe_info(lsm
, name
, namelen
);
3081 return PTR_ERR(oinfo
);
3083 *fid
= oinfo
->lmo_fid
;
3089 * For lmv, only need to send request to master MDT, and the master MDT will
3090 * process with other slave MDTs. The only exception is Q_GETOQUOTA for which
3091 * we directly fetch data from the slave MDTs.
3093 static int lmv_quotactl(struct obd_device
*unused
, struct obd_export
*exp
,
3094 struct obd_quotactl
*oqctl
)
3096 struct obd_device
*obd
= class_exp2obd(exp
);
3097 struct lmv_obd
*lmv
= &obd
->u
.lmv
;
3098 struct lmv_tgt_desc
*tgt
= lmv
->tgts
[0];
3100 __u64 curspace
= 0, curinodes
= 0;
3103 if (!tgt
|| !tgt
->ltd_exp
|| !tgt
->ltd_active
||
3104 !lmv
->desc
.ld_tgt_count
) {
3105 CERROR("master lmv inactive\n");
3109 if (oqctl
->qc_cmd
!= Q_GETOQUOTA
)
3110 return obd_quotactl(tgt
->ltd_exp
, oqctl
);
3112 for (i
= 0; i
< lmv
->desc
.ld_tgt_count
; i
++) {
3117 if (!tgt
|| !tgt
->ltd_exp
|| !tgt
->ltd_active
)
3120 err
= obd_quotactl(tgt
->ltd_exp
, oqctl
);
3122 CERROR("getquota on mdt %d failed. %d\n", i
, err
);
3126 curspace
+= oqctl
->qc_dqblk
.dqb_curspace
;
3127 curinodes
+= oqctl
->qc_dqblk
.dqb_curinodes
;
3130 oqctl
->qc_dqblk
.dqb_curspace
= curspace
;
3131 oqctl
->qc_dqblk
.dqb_curinodes
= curinodes
;
3136 static int lmv_merge_attr(struct obd_export
*exp
,
3137 const struct lmv_stripe_md
*lsm
,
3138 struct cl_attr
*attr
,
3139 ldlm_blocking_callback cb_blocking
)
3143 rc
= lmv_revalidate_slaves(exp
, lsm
, cb_blocking
, 0);
3147 for (i
= 0; i
< lsm
->lsm_md_stripe_count
; i
++) {
3148 struct inode
*inode
= lsm
->lsm_md_oinfo
[i
].lmo_root
;
3150 CDEBUG(D_INFO
, ""DFID
" size %llu, blocks %llu nlink %u, atime %lu ctime %lu, mtime %lu.\n",
3151 PFID(&lsm
->lsm_md_oinfo
[i
].lmo_fid
),
3152 i_size_read(inode
), (unsigned long long)inode
->i_blocks
,
3153 inode
->i_nlink
, LTIME_S(inode
->i_atime
),
3154 LTIME_S(inode
->i_ctime
), LTIME_S(inode
->i_mtime
));
3156 /* for slave stripe, it needs to subtract nlink for . and .. */
3158 attr
->cat_nlink
+= inode
->i_nlink
- 2;
3160 attr
->cat_nlink
= inode
->i_nlink
;
3162 attr
->cat_size
+= i_size_read(inode
);
3163 attr
->cat_blocks
+= inode
->i_blocks
;
3165 if (attr
->cat_atime
< LTIME_S(inode
->i_atime
))
3166 attr
->cat_atime
= LTIME_S(inode
->i_atime
);
3168 if (attr
->cat_ctime
< LTIME_S(inode
->i_ctime
))
3169 attr
->cat_ctime
= LTIME_S(inode
->i_ctime
);
3171 if (attr
->cat_mtime
< LTIME_S(inode
->i_mtime
))
3172 attr
->cat_mtime
= LTIME_S(inode
->i_mtime
);
3177 static struct obd_ops lmv_obd_ops
= {
3178 .owner
= THIS_MODULE
,
3180 .cleanup
= lmv_cleanup
,
3181 .precleanup
= lmv_precleanup
,
3182 .process_config
= lmv_process_config
,
3183 .connect
= lmv_connect
,
3184 .disconnect
= lmv_disconnect
,
3185 .statfs
= lmv_statfs
,
3186 .get_info
= lmv_get_info
,
3187 .set_info_async
= lmv_set_info_async
,
3188 .notify
= lmv_notify
,
3189 .get_uuid
= lmv_get_uuid
,
3190 .iocontrol
= lmv_iocontrol
,
3191 .quotactl
= lmv_quotactl
3194 static struct md_ops lmv_md_ops
= {
3195 .getstatus
= lmv_getstatus
,
3196 .null_inode
= lmv_null_inode
,
3198 .create
= lmv_create
,
3199 .enqueue
= lmv_enqueue
,
3200 .getattr
= lmv_getattr
,
3201 .getxattr
= lmv_getxattr
,
3202 .getattr_name
= lmv_getattr_name
,
3203 .intent_lock
= lmv_intent_lock
,
3205 .rename
= lmv_rename
,
3206 .setattr
= lmv_setattr
,
3207 .setxattr
= lmv_setxattr
,
3209 .read_page
= lmv_read_page
,
3210 .unlink
= lmv_unlink
,
3211 .init_ea_size
= lmv_init_ea_size
,
3212 .cancel_unused
= lmv_cancel_unused
,
3213 .set_lock_data
= lmv_set_lock_data
,
3214 .lock_match
= lmv_lock_match
,
3215 .get_lustre_md
= lmv_get_lustre_md
,
3216 .free_lustre_md
= lmv_free_lustre_md
,
3217 .merge_attr
= lmv_merge_attr
,
3218 .set_open_replay_data
= lmv_set_open_replay_data
,
3219 .clear_open_replay_data
= lmv_clear_open_replay_data
,
3220 .intent_getattr_async
= lmv_intent_getattr_async
,
3221 .revalidate_lock
= lmv_revalidate_lock
,
3222 .get_fid_from_lsm
= lmv_get_fid_from_lsm
,
3223 .unpackmd
= lmv_unpackmd
,
3226 static int __init
lmv_init(void)
3228 struct lprocfs_static_vars lvars
;
3230 lprocfs_lmv_init_vars(&lvars
);
3232 return class_register_type(&lmv_obd_ops
, &lmv_md_ops
,
3233 LUSTRE_LMV_NAME
, NULL
);
3236 static void lmv_exit(void)
3238 class_unregister_type(LUSTRE_LMV_NAME
);
3241 MODULE_AUTHOR("OpenSFS, Inc. <http://www.lustre.org/>");
3242 MODULE_DESCRIPTION("Lustre Logical Metadata Volume");
3243 MODULE_VERSION(LUSTRE_VERSION_STRING
);
3244 MODULE_LICENSE("GPL");
3246 module_init(lmv_init
);
3247 module_exit(lmv_exit
);