]>
Commit | Line | Data |
---|---|---|
d7e09d03 PT |
1 | /* |
2 | * GPL HEADER START | |
3 | * | |
4 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. | |
5 | * | |
6 | * This program is free software; you can redistribute it and/or modify | |
7 | * it under the terms of the GNU General Public License version 2 only, | |
8 | * as published by the Free Software Foundation. | |
9 | * | |
10 | * This program is distributed in the hope that it will be useful, but | |
11 | * WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
13 | * General Public License version 2 for more details (a copy is included | |
14 | * in the LICENSE file that accompanied this code). | |
15 | * | |
16 | * You should have received a copy of the GNU General Public License | |
17 | * version 2 along with this program; If not, see | |
6a5b99a4 | 18 | * http://www.gnu.org/licenses/gpl-2.0.html |
d7e09d03 | 19 | * |
d7e09d03 PT |
20 | * GPL HEADER END |
21 | */ | |
22 | /* | |
23 | * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. | |
24 | * Use is subject to license terms. | |
25 | * | |
1dc563a6 | 26 | * Copyright (c) 2011, 2015, Intel Corporation. |
d7e09d03 PT |
27 | */ |
28 | /* | |
29 | * This file is part of Lustre, http://www.lustre.org/ | |
30 | * Lustre is a trademark of Sun Microsystems, Inc. | |
31 | * | |
32 | * lustre/include/lustre_fid.h | |
33 | * | |
34 | * Author: Yury Umanets <umka@clusterfs.com> | |
35 | */ | |
36 | ||
56f4c5a8 LX |
37 | #ifndef __LUSTRE_FID_H |
38 | #define __LUSTRE_FID_H | |
d7e09d03 PT |
39 | |
40 | /** \defgroup fid fid | |
41 | * | |
42 | * @{ | |
43 | * | |
25ed6a5e | 44 | * http://wiki.old.lustre.org/index.php/Architecture_-_Interoperability_fids_zfs |
d7e09d03 PT |
45 | * describes the FID namespace and interoperability requirements for FIDs. |
46 | * The important parts of that document are included here for reference. | |
47 | * | |
48 | * FID | |
49 | * File IDentifier generated by client from range allocated by the SEQuence | |
50 | * service and stored in struct lu_fid. The FID is composed of three parts: | |
51 | * SEQuence, ObjectID, and VERsion. The SEQ component is a filesystem | |
52 | * unique 64-bit integer, and only one client is ever assigned any SEQ value. | |
53 | * The first 0x400 FID_SEQ_NORMAL [2^33, 2^33 + 0x400] values are reserved | |
54 | * for system use. The OID component is a 32-bit value generated by the | |
55 | * client on a per-SEQ basis to allow creating many unique FIDs without | |
56 | * communication with the server. The VER component is a 32-bit value that | |
57 | * distinguishes between different FID instantiations, such as snapshots or | |
58 | * separate subtrees within the filesystem. FIDs with the same VER field | |
59 | * are considered part of the same namespace. | |
60 | * | |
61 | * OLD filesystems are those upgraded from Lustre 1.x that predate FIDs, and | |
62 | * MDTs use 32-bit ldiskfs internal inode/generation numbers (IGIFs), while | |
63 | * OSTs use 64-bit Lustre object IDs and generation numbers. | |
64 | * | |
65 | * NEW filesystems are those formatted since the introduction of FIDs. | |
66 | * | |
67 | * IGIF | |
68 | * Inode and Generation In FID, a surrogate FID used to globally identify | |
69 | * an existing object on OLD formatted MDT file system. This would only be | |
70 | * used on MDT0 in a DNE filesystem, because there cannot be more than one | |
71 | * MDT in an OLD formatted filesystem. Belongs to sequence in [12, 2^32 - 1] | |
72 | * range, where inode number is stored in SEQ, and inode generation is in OID. | |
73 | * NOTE: This assumes no more than 2^32-1 inodes exist in the MDT filesystem, | |
74 | * which is the maximum possible for an ldiskfs backend. It also assumes | |
75 | * that the reserved ext3/ext4/ldiskfs inode numbers [0-11] are never visible | |
76 | * to clients, which has always been true. | |
77 | * | |
78 | * IDIF | |
79 | * object ID In FID, a surrogate FID used to globally identify an existing | |
80 | * OST object on OLD formatted OST file system. Belongs to a sequence in | |
81 | * [2^32, 2^33 - 1]. Sequence number is calculated as: | |
82 | * | |
83 | * 1 << 32 | (ost_index << 16) | ((objid >> 32) & 0xffff) | |
84 | * | |
85 | * that is, SEQ consists of 16-bit OST index, and higher 16 bits of object | |
86 | * ID. The generation of unique SEQ values per OST allows the IDIF FIDs to | |
87 | * be identified in the FLD correctly. The OID field is calculated as: | |
88 | * | |
89 | * objid & 0xffffffff | |
90 | * | |
91 | * that is, it consists of lower 32 bits of object ID. For objects within | |
92 | * the IDIF range, object ID extraction will be: | |
93 | * | |
94 | * o_id = (fid->f_seq & 0x7fff) << 16 | fid->f_oid; | |
95 | * o_seq = 0; // formerly group number | |
96 | * | |
97 | * NOTE: This assumes that no more than 2^48-1 objects have ever been created | |
98 | * on any OST, and that no more than 65535 OSTs are in use. Both are very | |
99 | * reasonable assumptions, i.e. an IDIF can uniquely map all objects assuming | |
100 | * a maximum creation rate of 1M objects per second for a maximum of 9 years, | |
101 | * or combinations thereof. | |
102 | * | |
103 | * OST_MDT0 | |
104 | * Surrogate FID used to identify an existing object on OLD formatted OST | |
105 | * filesystem. Belongs to the reserved SEQuence 0, and is used prior to | |
106 | * the introduction of FID-on-OST, at which point IDIF will be used to | |
107 | * identify objects as residing on a specific OST. | |
108 | * | |
109 | * LLOG | |
110 | * For Lustre Log objects the object sequence 1 is used. This is compatible | |
111 | * with both OLD and NEW namespaces, as this SEQ number is in the | |
112 | * ext3/ldiskfs reserved inode range and does not conflict with IGIF | |
113 | * sequence numbers. | |
114 | * | |
115 | * ECHO | |
116 | * For testing OST IO performance the object sequence 2 is used. This is | |
117 | * compatible with both OLD and NEW namespaces, as this SEQ number is in | |
118 | * the ext3/ldiskfs reserved inode range and does not conflict with IGIF | |
119 | * sequence numbers. | |
120 | * | |
121 | * OST_MDT1 .. OST_MAX | |
122 | * For testing with multiple MDTs the object sequence 3 through 9 is used, | |
123 | * allowing direct mapping of MDTs 1 through 7 respectively, for a total | |
124 | * of 8 MDTs including OST_MDT0. This matches the legacy CMD project "group" | |
125 | * mappings. However, this SEQ range is only for testing prior to any | |
126 | * production DNE release, as the objects in this range conflict across all | |
127 | * OSTs, as the OST index is not part of the FID. For production DNE usage, | |
128 | * OST objects created by MDT1+ will use FID_SEQ_NORMAL FIDs. | |
129 | * | |
130 | * DLM OST objid to IDIF mapping | |
131 | * For compatibility with existing OLD OST network protocol structures, the | |
132 | * FID must map onto the o_id and o_seq in a manner that ensures existing | |
133 | * objects are identified consistently for IO, as well as onto the LDLM | |
134 | * namespace to ensure IDIFs there is only a single resource name for any | |
135 | * object in the DLM. The OLD OST object DLM resource mapping is: | |
136 | * | |
137 | * resource[] = {o_id, o_seq, 0, 0}; // o_seq == 0 for production releases | |
138 | * | |
139 | * The NEW OST object DLM resource mapping is the same for both MDT and OST: | |
140 | * | |
141 | * resource[] = {SEQ, OID, VER, HASH}; | |
142 | * | |
143 | * NOTE: for mapping IDIF values to DLM resource names the o_id may be | |
144 | * larger than the 2^33 reserved sequence numbers for IDIF, so it is possible | |
145 | * for the o_id numbers to overlap FID SEQ numbers in the resource. However, | |
146 | * in all production releases the OLD o_seq field is always zero, and all | |
147 | * valid FID OID values are non-zero, so the lock resources will not collide. | |
148 | * Even so, the MDT and OST resources are also in different LDLM namespaces. | |
149 | */ | |
150 | ||
9fdaf8c0 | 151 | #include "../../include/linux/libcfs/libcfs.h" |
1accaadf | 152 | #include "lustre/lustre_idl.h" |
5d01897e | 153 | #include "seq_range.h" |
d7e09d03 | 154 | |
56f4c5a8 | 155 | struct lu_env; |
d7e09d03 PT |
156 | struct lu_site; |
157 | struct lu_context; | |
56f4c5a8 LX |
158 | struct obd_device; |
159 | struct obd_export; | |
d7e09d03 PT |
160 | |
161 | /* Whole sequences space range and zero range definitions */ | |
162 | extern const struct lu_seq_range LUSTRE_SEQ_SPACE_RANGE; | |
163 | extern const struct lu_seq_range LUSTRE_SEQ_ZERO_RANGE; | |
164 | extern const struct lu_fid LUSTRE_BFL_FID; | |
165 | extern const struct lu_fid LU_OBF_FID; | |
166 | extern const struct lu_fid LU_DOT_LUSTRE_FID; | |
167 | ||
168 | enum { | |
169 | /* | |
170 | * This is how may metadata FIDs may be allocated in one sequence(128k) | |
171 | */ | |
172 | LUSTRE_METADATA_SEQ_MAX_WIDTH = 0x0000000000020000ULL, | |
173 | ||
174 | /* | |
175 | * This is how many data FIDs could be allocated in one sequence(4B - 1) | |
176 | */ | |
177 | LUSTRE_DATA_SEQ_MAX_WIDTH = 0x00000000FFFFFFFFULL, | |
178 | ||
179 | /* | |
180 | * How many sequences to allocate to a client at once. | |
181 | */ | |
182 | LUSTRE_SEQ_META_WIDTH = 0x0000000000000001ULL, | |
183 | ||
184 | /* | |
185 | * seq allocation pool size. | |
186 | */ | |
187 | LUSTRE_SEQ_BATCH_WIDTH = LUSTRE_SEQ_META_WIDTH * 1000, | |
188 | ||
189 | /* | |
190 | * This is how many sequences may be in one super-sequence allocated to | |
191 | * MDTs. | |
192 | */ | |
193 | LUSTRE_SEQ_SUPER_WIDTH = ((1ULL << 30ULL) * LUSTRE_SEQ_META_WIDTH) | |
194 | }; | |
195 | ||
196 | enum { | |
197 | /** 2^6 FIDs for OI containers */ | |
198 | OSD_OI_FID_OID_BITS = 6, | |
199 | /** reserve enough FIDs in case we want more in the future */ | |
200 | OSD_OI_FID_OID_BITS_MAX = 10, | |
201 | }; | |
202 | ||
203 | /** special OID for local objects */ | |
204 | enum local_oid { | |
205 | /** \see fld_mod_init */ | |
206 | FLD_INDEX_OID = 3UL, | |
207 | /** \see fid_mod_init */ | |
208 | FID_SEQ_CTL_OID = 4UL, | |
209 | FID_SEQ_SRV_OID = 5UL, | |
210 | /** \see mdd_mod_init */ | |
211 | MDD_ROOT_INDEX_OID = 6UL, /* deprecated in 2.4 */ | |
212 | MDD_ORPHAN_OID = 7UL, /* deprecated in 2.4 */ | |
213 | MDD_LOV_OBJ_OID = 8UL, | |
214 | MDD_CAPA_KEYS_OID = 9UL, | |
215 | /** \see mdt_mod_init */ | |
216 | LAST_RECV_OID = 11UL, | |
217 | OSD_FS_ROOT_OID = 13UL, | |
218 | ACCT_USER_OID = 15UL, | |
219 | ACCT_GROUP_OID = 16UL, | |
220 | LFSCK_BOOKMARK_OID = 17UL, | |
221 | OTABLE_IT_OID = 18UL, | |
222 | /* These two definitions are obsolete | |
223 | * OFD_GROUP0_LAST_OID = 20UL, | |
224 | * OFD_GROUP4K_LAST_OID = 20UL+4096, | |
225 | */ | |
226 | OFD_LAST_GROUP_OID = 4117UL, | |
227 | LLOG_CATALOGS_OID = 4118UL, | |
228 | MGS_CONFIGS_OID = 4119UL, | |
229 | OFD_HEALTH_CHECK_OID = 4120UL, | |
230 | MDD_LOV_OBJ_OSEQ = 4121UL, | |
231 | LFSCK_NAMESPACE_OID = 4122UL, | |
232 | REMOTE_PARENT_DIR_OID = 4123UL, | |
75ac62fc | 233 | SLAVE_LLOG_CATALOGS_OID = 4124UL, |
d7e09d03 PT |
234 | }; |
235 | ||
236 | static inline void lu_local_obj_fid(struct lu_fid *fid, __u32 oid) | |
237 | { | |
238 | fid->f_seq = FID_SEQ_LOCAL_FILE; | |
239 | fid->f_oid = oid; | |
240 | fid->f_ver = 0; | |
241 | } | |
242 | ||
243 | static inline void lu_local_name_obj_fid(struct lu_fid *fid, __u32 oid) | |
244 | { | |
245 | fid->f_seq = FID_SEQ_LOCAL_NAME; | |
246 | fid->f_oid = oid; | |
247 | fid->f_ver = 0; | |
248 | } | |
249 | ||
250 | /* For new FS (>= 2.4), the root FID will be changed to | |
251 | * [FID_SEQ_ROOT:1:0], for existing FS, (upgraded to 2.4), | |
c56e256d OD |
252 | * the root FID will still be IGIF |
253 | */ | |
d7e09d03 PT |
254 | static inline int fid_is_root(const struct lu_fid *fid) |
255 | { | |
256 | return unlikely((fid_seq(fid) == FID_SEQ_ROOT && | |
257 | fid_oid(fid) == 1)); | |
258 | } | |
259 | ||
260 | static inline int fid_is_dot_lustre(const struct lu_fid *fid) | |
261 | { | |
262 | return unlikely(fid_seq(fid) == FID_SEQ_DOT_LUSTRE && | |
263 | fid_oid(fid) == FID_OID_DOT_LUSTRE); | |
264 | } | |
265 | ||
266 | static inline int fid_is_obf(const struct lu_fid *fid) | |
267 | { | |
268 | return unlikely(fid_seq(fid) == FID_SEQ_DOT_LUSTRE && | |
269 | fid_oid(fid) == FID_OID_DOT_LUSTRE_OBF); | |
270 | } | |
271 | ||
272 | static inline int fid_is_otable_it(const struct lu_fid *fid) | |
273 | { | |
274 | return unlikely(fid_seq(fid) == FID_SEQ_LOCAL_FILE && | |
275 | fid_oid(fid) == OTABLE_IT_OID); | |
276 | } | |
277 | ||
278 | static inline int fid_is_acct(const struct lu_fid *fid) | |
279 | { | |
280 | return fid_seq(fid) == FID_SEQ_LOCAL_FILE && | |
281 | (fid_oid(fid) == ACCT_USER_OID || | |
282 | fid_oid(fid) == ACCT_GROUP_OID); | |
283 | } | |
284 | ||
285 | static inline int fid_is_quota(const struct lu_fid *fid) | |
286 | { | |
287 | return fid_seq(fid) == FID_SEQ_QUOTA || | |
288 | fid_seq(fid) == FID_SEQ_QUOTA_GLB; | |
289 | } | |
290 | ||
291 | static inline int fid_is_namespace_visible(const struct lu_fid *fid) | |
292 | { | |
293 | const __u64 seq = fid_seq(fid); | |
294 | ||
295 | /* Here, we cannot distinguish whether the normal FID is for OST | |
c56e256d OD |
296 | * object or not. It is caller's duty to check more if needed. |
297 | */ | |
d7e09d03 PT |
298 | return (!fid_is_last_id(fid) && |
299 | (fid_seq_is_norm(seq) || fid_seq_is_igif(seq))) || | |
300 | fid_is_root(fid) || fid_is_dot_lustre(fid); | |
301 | } | |
302 | ||
303 | static inline int fid_seq_in_fldb(__u64 seq) | |
304 | { | |
305 | return fid_seq_is_igif(seq) || fid_seq_is_norm(seq) || | |
306 | fid_seq_is_root(seq) || fid_seq_is_dot(seq); | |
307 | } | |
308 | ||
22144626 | 309 | static inline void lu_last_id_fid(struct lu_fid *fid, __u64 seq, __u32 ost_idx) |
d7e09d03 PT |
310 | { |
311 | if (fid_seq_is_mdt0(seq)) { | |
22144626 | 312 | fid->f_seq = fid_idif_seq(0, ost_idx); |
d7e09d03 PT |
313 | } else { |
314 | LASSERTF(fid_seq_is_norm(seq) || fid_seq_is_echo(seq) || | |
55f5a824 | 315 | fid_seq_is_idif(seq), "%#llx\n", seq); |
d7e09d03 PT |
316 | fid->f_seq = seq; |
317 | } | |
318 | fid->f_oid = 0; | |
319 | fid->f_ver = 0; | |
320 | } | |
321 | ||
56f4c5a8 LX |
322 | /* seq client type */ |
323 | enum lu_cli_type { | |
324 | LUSTRE_SEQ_METADATA = 1, | |
325 | LUSTRE_SEQ_DATA | |
326 | }; | |
327 | ||
d7e09d03 PT |
328 | enum lu_mgr_type { |
329 | LUSTRE_SEQ_SERVER, | |
330 | LUSTRE_SEQ_CONTROLLER | |
331 | }; | |
332 | ||
d7e09d03 PT |
333 | /* Client sequence manager interface. */ |
334 | struct lu_client_seq { | |
335 | /* Sequence-controller export. */ | |
336 | struct obd_export *lcs_exp; | |
337 | struct mutex lcs_mutex; | |
338 | ||
339 | /* | |
17891183 | 340 | * Range of allowed for allocation sequences. When using lu_client_seq on |
d7e09d03 PT |
341 | * clients, this contains meta-sequence range. And for servers this |
342 | * contains super-sequence range. | |
343 | */ | |
344 | struct lu_seq_range lcs_space; | |
345 | ||
346 | /* Seq related proc */ | |
f3aa79fb | 347 | struct dentry *lcs_debugfs_entry; |
d7e09d03 PT |
348 | |
349 | /* This holds last allocated fid in last obtained seq */ | |
350 | struct lu_fid lcs_fid; | |
351 | ||
352 | /* LUSTRE_SEQ_METADATA or LUSTRE_SEQ_DATA */ | |
353 | enum lu_cli_type lcs_type; | |
354 | ||
355 | /* | |
356 | * Service uuid, passed from MDT + seq name to form unique seq name to | |
357 | * use it with procfs. | |
358 | */ | |
37604896 | 359 | char lcs_name[LUSTRE_MDT_MAXNAMELEN]; |
d7e09d03 PT |
360 | |
361 | /* | |
362 | * Sequence width, that is how many objects may be allocated in one | |
363 | * sequence. Default value for it is LUSTRE_SEQ_MAX_WIDTH. | |
364 | */ | |
365 | __u64 lcs_width; | |
366 | ||
d7e09d03 PT |
367 | /* wait queue for fid allocation and update indicator */ |
368 | wait_queue_head_t lcs_waitq; | |
369 | int lcs_update; | |
370 | }; | |
371 | ||
d7e09d03 | 372 | /* Client methods */ |
d7e09d03 PT |
373 | void seq_client_flush(struct lu_client_seq *seq); |
374 | ||
375 | int seq_client_alloc_fid(const struct lu_env *env, struct lu_client_seq *seq, | |
376 | struct lu_fid *fid); | |
d7e09d03 PT |
377 | /* Fids common stuff */ |
378 | int fid_is_local(const struct lu_env *env, | |
379 | struct lu_site *site, const struct lu_fid *fid); | |
380 | ||
56f4c5a8 | 381 | enum lu_cli_type; |
d7e09d03 PT |
382 | int client_fid_init(struct obd_device *obd, struct obd_export *exp, |
383 | enum lu_cli_type type); | |
384 | int client_fid_fini(struct obd_device *obd); | |
385 | ||
386 | /* fid locking */ | |
387 | ||
388 | struct ldlm_namespace; | |
389 | ||
390 | /* | |
391 | * Build (DLM) resource name from FID. | |
392 | * | |
393 | * NOTE: until Lustre 1.8.7/2.1.1 the fid_ver() was packed into name[2], | |
394 | * but was moved into name[1] along with the OID to avoid consuming the | |
395 | * renaming name[2,3] fields that need to be used for the quota identifier. | |
396 | */ | |
5ec35d45 | 397 | static inline void |
c5b60ba7 | 398 | fid_build_reg_res_name(const struct lu_fid *fid, struct ldlm_res_id *res) |
d7e09d03 | 399 | { |
c5b60ba7 AD |
400 | memset(res, 0, sizeof(*res)); |
401 | res->name[LUSTRE_RES_ID_SEQ_OFF] = fid_seq(fid); | |
402 | res->name[LUSTRE_RES_ID_VER_OID_OFF] = fid_ver_oid(fid); | |
c5b60ba7 AD |
403 | } |
404 | ||
405 | /* | |
406 | * Return true if resource is for object identified by FID. | |
407 | */ | |
d8f183b3 JH |
408 | static inline bool fid_res_name_eq(const struct lu_fid *fid, |
409 | const struct ldlm_res_id *res) | |
c5b60ba7 AD |
410 | { |
411 | return res->name[LUSTRE_RES_ID_SEQ_OFF] == fid_seq(fid) && | |
412 | res->name[LUSTRE_RES_ID_VER_OID_OFF] == fid_ver_oid(fid); | |
413 | } | |
414 | ||
415 | /* | |
416 | * Extract FID from LDLM resource. Reverse of fid_build_reg_res_name(). | |
417 | */ | |
5ec35d45 | 418 | static inline void |
c5b60ba7 AD |
419 | fid_extract_from_res_name(struct lu_fid *fid, const struct ldlm_res_id *res) |
420 | { | |
421 | fid->f_seq = res->name[LUSTRE_RES_ID_SEQ_OFF]; | |
422 | fid->f_oid = (__u32)(res->name[LUSTRE_RES_ID_VER_OID_OFF]); | |
423 | fid->f_ver = (__u32)(res->name[LUSTRE_RES_ID_VER_OID_OFF] >> 32); | |
424 | LASSERT(fid_res_name_eq(fid, res)); | |
d7e09d03 PT |
425 | } |
426 | ||
427 | /* | |
428 | * Build (DLM) resource identifier from global quota FID and quota ID. | |
429 | */ | |
5ec35d45 | 430 | static inline void |
c5b60ba7 | 431 | fid_build_quota_res_name(const struct lu_fid *glb_fid, union lquota_id *qid, |
10457d4b | 432 | struct ldlm_res_id *res) |
d7e09d03 PT |
433 | { |
434 | fid_build_reg_res_name(glb_fid, res); | |
435 | res->name[LUSTRE_RES_ID_QUOTA_SEQ_OFF] = fid_seq(&qid->qid_fid); | |
436 | res->name[LUSTRE_RES_ID_QUOTA_VER_OID_OFF] = fid_ver_oid(&qid->qid_fid); | |
d7e09d03 PT |
437 | } |
438 | ||
439 | /* | |
440 | * Extract global FID and quota ID from resource name | |
441 | */ | |
c5b60ba7 AD |
442 | static inline void fid_extract_from_quota_res(struct lu_fid *glb_fid, |
443 | union lquota_id *qid, | |
444 | const struct ldlm_res_id *res) | |
d7e09d03 | 445 | { |
c5b60ba7 | 446 | fid_extract_from_res_name(glb_fid, res); |
d7e09d03 PT |
447 | qid->qid_fid.f_seq = res->name[LUSTRE_RES_ID_QUOTA_SEQ_OFF]; |
448 | qid->qid_fid.f_oid = (__u32)res->name[LUSTRE_RES_ID_QUOTA_VER_OID_OFF]; | |
449 | qid->qid_fid.f_ver = | |
450 | (__u32)(res->name[LUSTRE_RES_ID_QUOTA_VER_OID_OFF] >> 32); | |
451 | } | |
452 | ||
5ec35d45 | 453 | static inline void |
c5b60ba7 AD |
454 | fid_build_pdo_res_name(const struct lu_fid *fid, unsigned int hash, |
455 | struct ldlm_res_id *res) | |
d7e09d03 | 456 | { |
c5b60ba7 AD |
457 | fid_build_reg_res_name(fid, res); |
458 | res->name[LUSTRE_RES_ID_HSH_OFF] = hash; | |
d7e09d03 PT |
459 | } |
460 | ||
461 | /** | |
462 | * Build DLM resource name from object id & seq, which will be removed | |
463 | * finally, when we replace ost_id with FID in data stack. | |
464 | * | |
465 | * Currently, resid from the old client, whose res[0] = object_id, | |
17891183 | 466 | * res[1] = object_seq, is just opposite with Metatdata |
d7e09d03 | 467 | * resid, where, res[0] = fid->f_seq, res[1] = fid->f_oid. |
bd9070cb | 468 | * To unify the resid identification, we will reverse the data |
d7e09d03 PT |
469 | * resid to keep it same with Metadata resid, i.e. |
470 | * | |
471 | * For resid from the old client, | |
472 | * res[0] = objid, res[1] = 0, still keep the original order, | |
17891183 | 473 | * for compatibility. |
d7e09d03 PT |
474 | * |
475 | * For new resid | |
476 | * res will be built from normal FID directly, i.e. res[0] = f_seq, | |
477 | * res[1] = f_oid + f_ver. | |
478 | */ | |
ac8f0a5c | 479 | static inline void ostid_build_res_name(const struct ost_id *oi, |
d7e09d03 PT |
480 | struct ldlm_res_id *name) |
481 | { | |
ec83e611 | 482 | memset(name, 0, sizeof(*name)); |
d7e09d03 PT |
483 | if (fid_seq_is_mdt0(ostid_seq(oi))) { |
484 | name->name[LUSTRE_RES_ID_SEQ_OFF] = ostid_id(oi); | |
485 | name->name[LUSTRE_RES_ID_VER_OID_OFF] = ostid_seq(oi); | |
486 | } else { | |
c5b60ba7 | 487 | fid_build_reg_res_name(&oi->oi_fid, name); |
d7e09d03 PT |
488 | } |
489 | } | |
490 | ||
d7e09d03 PT |
491 | /** |
492 | * Return true if the resource is for the object identified by this id & group. | |
493 | */ | |
ac8f0a5c JH |
494 | static inline int ostid_res_name_eq(const struct ost_id *oi, |
495 | const struct ldlm_res_id *name) | |
d7e09d03 PT |
496 | { |
497 | /* Note: it is just a trick here to save some effort, probably the | |
c56e256d OD |
498 | * correct way would be turn them into the FID and compare |
499 | */ | |
d7e09d03 PT |
500 | if (fid_seq_is_mdt0(ostid_seq(oi))) { |
501 | return name->name[LUSTRE_RES_ID_SEQ_OFF] == ostid_id(oi) && | |
502 | name->name[LUSTRE_RES_ID_VER_OID_OFF] == ostid_seq(oi); | |
503 | } else { | |
504 | return name->name[LUSTRE_RES_ID_SEQ_OFF] == ostid_seq(oi) && | |
505 | name->name[LUSTRE_RES_ID_VER_OID_OFF] == ostid_id(oi); | |
506 | } | |
507 | } | |
508 | ||
509 | /* The same as osc_build_res_name() */ | |
510 | static inline void ost_fid_build_resid(const struct lu_fid *fid, | |
511 | struct ldlm_res_id *resname) | |
512 | { | |
513 | if (fid_is_mdt0(fid) || fid_is_idif(fid)) { | |
514 | struct ost_id oi; | |
50ffcb7e | 515 | |
bfba872a | 516 | oi.oi.oi_id = 0; /* gcc 4.7.2 complains otherwise */ |
d7e09d03 PT |
517 | if (fid_to_ostid(fid, &oi) != 0) |
518 | return; | |
519 | ostid_build_res_name(&oi, resname); | |
520 | } else { | |
521 | fid_build_reg_res_name(fid, resname); | |
522 | } | |
523 | } | |
524 | ||
525 | static inline void ost_fid_from_resid(struct lu_fid *fid, | |
22144626 FY |
526 | const struct ldlm_res_id *name, |
527 | int ost_idx) | |
d7e09d03 PT |
528 | { |
529 | if (fid_seq_is_mdt0(name->name[LUSTRE_RES_ID_VER_OID_OFF])) { | |
530 | /* old resid */ | |
531 | struct ost_id oi; | |
50ffcb7e | 532 | |
d7e09d03 PT |
533 | ostid_set_seq(&oi, name->name[LUSTRE_RES_ID_VER_OID_OFF]); |
534 | ostid_set_id(&oi, name->name[LUSTRE_RES_ID_SEQ_OFF]); | |
22144626 | 535 | ostid_to_fid(fid, &oi, ost_idx); |
d7e09d03 PT |
536 | } else { |
537 | /* new resid */ | |
c5b60ba7 | 538 | fid_extract_from_res_name(fid, name); |
d7e09d03 PT |
539 | } |
540 | } | |
541 | ||
542 | /** | |
543 | * Flatten 128-bit FID values into a 64-bit value for use as an inode number. | |
544 | * For non-IGIF FIDs this starts just over 2^32, and continues without | |
545 | * conflict until 2^64, at which point we wrap the high 24 bits of the SEQ | |
546 | * into the range where there may not be many OID values in use, to minimize | |
547 | * the risk of conflict. | |
548 | * | |
549 | * Suppose LUSTRE_SEQ_MAX_WIDTH less than (1 << 24) which is currently true, | |
550 | * the time between re-used inode numbers is very long - 2^40 SEQ numbers, | |
551 | * or about 2^40 client mounts, if clients create less than 2^24 files/mount. | |
552 | */ | |
553 | static inline __u64 fid_flatten(const struct lu_fid *fid) | |
554 | { | |
555 | __u64 ino; | |
556 | __u64 seq; | |
557 | ||
558 | if (fid_is_igif(fid)) { | |
559 | ino = lu_igif_ino(fid); | |
0a3bdb00 | 560 | return ino; |
d7e09d03 PT |
561 | } |
562 | ||
563 | seq = fid_seq(fid); | |
564 | ||
565 | ino = (seq << 24) + ((seq >> 24) & 0xffffff0000ULL) + fid_oid(fid); | |
566 | ||
0a3bdb00 | 567 | return ino ? ino : fid_oid(fid); |
d7e09d03 PT |
568 | } |
569 | ||
570 | static inline __u32 fid_hash(const struct lu_fid *f, int bits) | |
571 | { | |
572 | /* all objects with same id and different versions will belong to same | |
c56e256d OD |
573 | * collisions list. |
574 | */ | |
72c0824a | 575 | return hash_long(fid_flatten(f), bits); |
d7e09d03 PT |
576 | } |
577 | ||
578 | /** | |
c56e256d OD |
579 | * map fid to 32 bit value for ino on 32bit systems. |
580 | */ | |
d7e09d03 PT |
581 | static inline __u32 fid_flatten32(const struct lu_fid *fid) |
582 | { | |
583 | __u32 ino; | |
584 | __u64 seq; | |
585 | ||
586 | if (fid_is_igif(fid)) { | |
587 | ino = lu_igif_ino(fid); | |
0a3bdb00 | 588 | return ino; |
d7e09d03 PT |
589 | } |
590 | ||
591 | seq = fid_seq(fid) - FID_SEQ_START; | |
592 | ||
593 | /* Map the high bits of the OID into higher bits of the inode number so | |
594 | * that inodes generated at about the same time have a reduced chance | |
595 | * of collisions. This will give a period of 2^12 = 1024 unique clients | |
596 | * (from SEQ) and up to min(LUSTRE_SEQ_MAX_WIDTH, 2^20) = 128k objects | |
c56e256d OD |
597 | * (from OID), or up to 128M inodes without collisions for new files. |
598 | */ | |
d7e09d03 | 599 | ino = ((seq & 0x000fffffULL) << 12) + ((seq >> 8) & 0xfffff000) + |
cd94f231 | 600 | (seq >> (64 - (40 - 8)) & 0xffffff00) + |
d7e09d03 PT |
601 | (fid_oid(fid) & 0xff000fff) + ((fid_oid(fid) & 0x00fff000) << 8); |
602 | ||
0a3bdb00 | 603 | return ino ? ino : fid_oid(fid); |
d7e09d03 PT |
604 | } |
605 | ||
ac8f0a5c JH |
606 | static inline int lu_fid_diff(const struct lu_fid *fid1, |
607 | const struct lu_fid *fid2) | |
d7e09d03 | 608 | { |
1ada25dc | 609 | LASSERTF(fid_seq(fid1) == fid_seq(fid2), "fid1:" DFID ", fid2:" DFID "\n", |
d7e09d03 PT |
610 | PFID(fid1), PFID(fid2)); |
611 | ||
612 | if (fid_is_idif(fid1) && fid_is_idif(fid2)) | |
613 | return fid_idif_id(fid1->f_seq, fid1->f_oid, fid1->f_ver) - | |
614 | fid_idif_id(fid2->f_seq, fid2->f_oid, fid2->f_ver); | |
615 | ||
616 | return fid_oid(fid1) - fid_oid(fid2); | |
617 | } | |
618 | ||
619 | #define LUSTRE_SEQ_SRV_NAME "seq_srv" | |
620 | #define LUSTRE_SEQ_CTL_NAME "seq_ctl" | |
621 | ||
622 | /* Range common stuff */ | |
623 | static inline void range_cpu_to_le(struct lu_seq_range *dst, const struct lu_seq_range *src) | |
624 | { | |
625 | dst->lsr_start = cpu_to_le64(src->lsr_start); | |
626 | dst->lsr_end = cpu_to_le64(src->lsr_end); | |
627 | dst->lsr_index = cpu_to_le32(src->lsr_index); | |
628 | dst->lsr_flags = cpu_to_le32(src->lsr_flags); | |
629 | } | |
630 | ||
631 | static inline void range_le_to_cpu(struct lu_seq_range *dst, const struct lu_seq_range *src) | |
632 | { | |
633 | dst->lsr_start = le64_to_cpu(src->lsr_start); | |
634 | dst->lsr_end = le64_to_cpu(src->lsr_end); | |
635 | dst->lsr_index = le32_to_cpu(src->lsr_index); | |
636 | dst->lsr_flags = le32_to_cpu(src->lsr_flags); | |
637 | } | |
638 | ||
639 | static inline void range_cpu_to_be(struct lu_seq_range *dst, const struct lu_seq_range *src) | |
640 | { | |
641 | dst->lsr_start = cpu_to_be64(src->lsr_start); | |
642 | dst->lsr_end = cpu_to_be64(src->lsr_end); | |
643 | dst->lsr_index = cpu_to_be32(src->lsr_index); | |
644 | dst->lsr_flags = cpu_to_be32(src->lsr_flags); | |
645 | } | |
646 | ||
647 | static inline void range_be_to_cpu(struct lu_seq_range *dst, const struct lu_seq_range *src) | |
648 | { | |
649 | dst->lsr_start = be64_to_cpu(src->lsr_start); | |
650 | dst->lsr_end = be64_to_cpu(src->lsr_end); | |
651 | dst->lsr_index = be32_to_cpu(src->lsr_index); | |
652 | dst->lsr_flags = be32_to_cpu(src->lsr_flags); | |
653 | } | |
654 | ||
655 | /** @} fid */ | |
656 | ||
56f4c5a8 | 657 | #endif /* __LUSTRE_FID_H */ |