]>
Commit | Line | Data |
---|---|---|
d7e09d03 PT |
1 | /* |
2 | * GPL HEADER START | |
3 | * | |
4 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. | |
5 | * | |
6 | * This program is free software; you can redistribute it and/or modify | |
7 | * it under the terms of the GNU General Public License version 2 only, | |
8 | * as published by the Free Software Foundation. | |
9 | * | |
10 | * This program is distributed in the hope that it will be useful, but | |
11 | * WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
13 | * General Public License version 2 for more details (a copy is included | |
14 | * in the LICENSE file that accompanied this code). | |
15 | * | |
16 | * You should have received a copy of the GNU General Public License | |
17 | * version 2 along with this program; If not, see | |
6a5b99a4 | 18 | * http://www.gnu.org/licenses/gpl-2.0.html |
d7e09d03 | 19 | * |
d7e09d03 PT |
20 | * GPL HEADER END |
21 | */ | |
22 | /* | |
23 | * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. | |
24 | * Use is subject to license terms. | |
25 | * | |
1dc563a6 | 26 | * Copyright (c) 2011, 2015, Intel Corporation. |
d7e09d03 PT |
27 | */ |
28 | /* | |
29 | * This file is part of Lustre, http://www.lustre.org/ | |
30 | * Lustre is a trademark of Sun Microsystems, Inc. | |
31 | */ | |
32 | ||
33 | #include <linux/fs.h> | |
34 | #include <linux/sched.h> | |
35 | #include <linux/mm.h> | |
36 | #include <linux/quotaops.h> | |
37 | #include <linux/highmem.h> | |
38 | #include <linux/pagemap.h> | |
39 | #include <linux/security.h> | |
40 | ||
41 | #define DEBUG_SUBSYSTEM S_LLITE | |
42 | ||
67a235f5 GKH |
43 | #include "../include/obd_support.h" |
44 | #include "../include/lustre_fid.h" | |
67a235f5 GKH |
45 | #include "../include/lustre_dlm.h" |
46 | #include "../include/lustre_ver.h" | |
d7e09d03 PT |
47 | #include "llite_internal.h" |
48 | ||
9c1c204f JH |
49 | static int ll_create_it(struct inode *dir, struct dentry *dentry, |
50 | struct lookup_intent *it); | |
d7e09d03 | 51 | |
5a3cd992 | 52 | /* called from iget5_locked->find_inode() under inode_hash_lock spinlock */ |
d7e09d03 PT |
53 | static int ll_test_inode(struct inode *inode, void *opaque) |
54 | { | |
55 | struct ll_inode_info *lli = ll_i2info(inode); | |
56 | struct lustre_md *md = opaque; | |
57 | ||
2e1b5b8b | 58 | if (unlikely(!(md->body->mbo_valid & OBD_MD_FLID))) { |
d7e09d03 PT |
59 | CERROR("MDS body missing FID\n"); |
60 | return 0; | |
61 | } | |
62 | ||
2e1b5b8b | 63 | if (!lu_fid_eq(&lli->lli_fid, &md->body->mbo_fid1)) |
d7e09d03 PT |
64 | return 0; |
65 | ||
66 | return 1; | |
67 | } | |
68 | ||
69 | static int ll_set_inode(struct inode *inode, void *opaque) | |
70 | { | |
71 | struct ll_inode_info *lli = ll_i2info(inode); | |
72 | struct mdt_body *body = ((struct lustre_md *)opaque)->body; | |
73 | ||
2e1b5b8b | 74 | if (unlikely(!(body->mbo_valid & OBD_MD_FLID))) { |
d7e09d03 PT |
75 | CERROR("MDS body missing FID\n"); |
76 | return -EINVAL; | |
77 | } | |
78 | ||
2e1b5b8b JH |
79 | lli->lli_fid = body->mbo_fid1; |
80 | if (unlikely(!(body->mbo_valid & OBD_MD_FLTYPE))) { | |
267d9c8a LM |
81 | CERROR("Can not initialize inode " DFID |
82 | " without object type: valid = %#llx\n", | |
2e1b5b8b | 83 | PFID(&lli->lli_fid), body->mbo_valid); |
d7e09d03 PT |
84 | return -EINVAL; |
85 | } | |
86 | ||
2e1b5b8b | 87 | inode->i_mode = (inode->i_mode & ~S_IFMT) | (body->mbo_mode & S_IFMT); |
d7e09d03 | 88 | if (unlikely(inode->i_mode == 0)) { |
1ada25dc | 89 | CERROR("Invalid inode " DFID " type\n", PFID(&lli->lli_fid)); |
d7e09d03 PT |
90 | return -EINVAL; |
91 | } | |
92 | ||
93 | ll_lli_init(lli); | |
94 | ||
95 | return 0; | |
96 | } | |
97 | ||
c3397e7e | 98 | /** |
99 | * Get an inode by inode number(@hash), which is already instantiated by | |
100 | * the intent lookup). | |
d7e09d03 PT |
101 | */ |
102 | struct inode *ll_iget(struct super_block *sb, ino_t hash, | |
103 | struct lustre_md *md) | |
104 | { | |
105 | struct inode *inode; | |
c3397e7e | 106 | int rc = 0; |
d7e09d03 PT |
107 | |
108 | LASSERT(hash != 0); | |
109 | inode = iget5_locked(sb, hash, ll_test_inode, ll_set_inode, md); | |
c3397e7e | 110 | if (!inode) |
111 | return ERR_PTR(-ENOMEM); | |
112 | ||
113 | if (inode->i_state & I_NEW) { | |
114 | rc = ll_read_inode2(inode, md); | |
115 | if (!rc && S_ISREG(inode->i_mode) && | |
55051039 | 116 | !ll_i2info(inode)->lli_clob) |
c3397e7e | 117 | rc = cl_file_inode_init(inode, md); |
55051039 | 118 | |
c3397e7e | 119 | if (rc) { |
a80ba5fe | 120 | /* |
121 | * Let's clear directory lsm here, otherwise | |
122 | * make_bad_inode() will reset the inode mode | |
123 | * to regular, then ll_clear_inode will not | |
124 | * be able to clear lsm_md | |
125 | */ | |
126 | if (S_ISDIR(inode->i_mode)) | |
127 | ll_dir_clear_lsm_md(inode); | |
c3397e7e | 128 | make_bad_inode(inode); |
129 | unlock_new_inode(inode); | |
130 | iput(inode); | |
131 | inode = ERR_PTR(rc); | |
132 | } else { | |
133 | unlock_new_inode(inode); | |
134 | } | |
135 | } else if (!(inode->i_state & (I_FREEING | I_CLEAR))) { | |
136 | rc = ll_update_inode(inode, md); | |
1ada25dc | 137 | CDEBUG(D_VFSTRACE, "got inode: " DFID "(%p): rc = %d\n", |
2e1b5b8b | 138 | PFID(&md->body->mbo_fid1), inode, rc); |
c3397e7e | 139 | if (rc) { |
a80ba5fe | 140 | if (S_ISDIR(inode->i_mode)) |
141 | ll_dir_clear_lsm_md(inode); | |
c3397e7e | 142 | iput(inode); |
143 | inode = ERR_PTR(rc); | |
da5ecb4d | 144 | } |
d7e09d03 | 145 | } |
0a3bdb00 | 146 | return inode; |
d7e09d03 PT |
147 | } |
148 | ||
149 | static void ll_invalidate_negative_children(struct inode *dir) | |
150 | { | |
151 | struct dentry *dentry, *tmp_subdir; | |
d7e09d03 | 152 | |
2bbec0ed | 153 | spin_lock(&dir->i_lock); |
9d5be52f | 154 | hlist_for_each_entry(dentry, &dir->i_dentry, d_u.d_alias) { |
d7e09d03 PT |
155 | spin_lock(&dentry->d_lock); |
156 | if (!list_empty(&dentry->d_subdirs)) { | |
157 | struct dentry *child; | |
158 | ||
159 | list_for_each_entry_safe(child, tmp_subdir, | |
160 | &dentry->d_subdirs, | |
946e51f2 | 161 | d_child) { |
2b0143b5 | 162 | if (d_really_is_negative(child)) |
b1d2a127 | 163 | d_lustre_invalidate(child, 1); |
d7e09d03 PT |
164 | } |
165 | } | |
166 | spin_unlock(&dentry->d_lock); | |
167 | } | |
2bbec0ed | 168 | spin_unlock(&dir->i_lock); |
d7e09d03 PT |
169 | } |
170 | ||
2de35386 | 171 | int ll_test_inode_by_fid(struct inode *inode, void *opaque) |
172 | { | |
173 | return lu_fid_eq(&ll_i2info(inode)->lli_fid, opaque); | |
174 | } | |
175 | ||
d7e09d03 PT |
176 | int ll_md_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc, |
177 | void *data, int flag) | |
178 | { | |
d7e09d03 | 179 | struct lustre_handle lockh; |
45b2a010 | 180 | int rc; |
d7e09d03 PT |
181 | |
182 | switch (flag) { | |
183 | case LDLM_CB_BLOCKING: | |
184 | ldlm_lock2handle(lock, &lockh); | |
185 | rc = ldlm_cli_cancel(&lockh, LCF_ASYNC); | |
186 | if (rc < 0) { | |
45b2a010 | 187 | CDEBUG(D_INODE, "ldlm_cli_cancel: rc = %d\n", rc); |
0a3bdb00 | 188 | return rc; |
d7e09d03 PT |
189 | } |
190 | break; | |
191 | case LDLM_CB_CANCELING: { | |
192 | struct inode *inode = ll_inode_from_resource_lock(lock); | |
d7e09d03 | 193 | __u64 bits = lock->l_policy_data.l_inodebits.bits; |
d7e09d03 PT |
194 | |
195 | /* Inode is set to lock->l_resource->lr_lvb_inode | |
c0894c6c OD |
196 | * for mdc - bug 24555 |
197 | */ | |
6e16818b | 198 | LASSERT(!lock->l_ast_data); |
d7e09d03 | 199 | |
6e16818b | 200 | if (!inode) |
d7e09d03 PT |
201 | break; |
202 | ||
45b2a010 | 203 | /* Invalidate all dentries associated with this inode */ |
5a9a80ba | 204 | LASSERT(ldlm_is_canceling(lock)); |
7fc1f831 | 205 | |
45b2a010 JH |
206 | if (!fid_res_name_eq(ll_inode2fid(inode), |
207 | &lock->l_resource->lr_name)) { | |
1ada25dc | 208 | LDLM_ERROR(lock, "data mismatch with object " DFID "(%p)", |
45b2a010 JH |
209 | PFID(ll_inode2fid(inode)), inode); |
210 | LBUG(); | |
211 | } | |
212 | ||
213 | if (bits & MDS_INODELOCK_XATTR) { | |
d81e9009 | 214 | if (S_ISDIR(inode->i_mode)) |
215 | ll_i2info(inode)->lli_def_stripe_offset = -1; | |
7fc1f831 | 216 | ll_xattr_cache_destroy(inode); |
45b2a010 JH |
217 | bits &= ~MDS_INODELOCK_XATTR; |
218 | } | |
7fc1f831 | 219 | |
d7e09d03 | 220 | /* For OPEN locks we differentiate between lock modes |
c0894c6c OD |
221 | * LCK_CR, LCK_CW, LCK_PR - bug 22891 |
222 | */ | |
d7e09d03 | 223 | if (bits & MDS_INODELOCK_OPEN) |
45b2a010 | 224 | ll_have_md_lock(inode, &bits, lock->l_req_mode); |
d7e09d03 PT |
225 | |
226 | if (bits & MDS_INODELOCK_OPEN) { | |
45b2a010 JH |
227 | fmode_t fmode; |
228 | ||
d7e09d03 PT |
229 | switch (lock->l_req_mode) { |
230 | case LCK_CW: | |
45b2a010 | 231 | fmode = FMODE_WRITE; |
d7e09d03 PT |
232 | break; |
233 | case LCK_PR: | |
45b2a010 | 234 | fmode = FMODE_EXEC; |
d7e09d03 PT |
235 | break; |
236 | case LCK_CR: | |
45b2a010 | 237 | fmode = FMODE_READ; |
d7e09d03 PT |
238 | break; |
239 | default: | |
45b2a010 JH |
240 | LDLM_ERROR(lock, "bad lock mode for OPEN lock"); |
241 | LBUG(); | |
d7e09d03 | 242 | } |
45b2a010 JH |
243 | |
244 | ll_md_real_close(inode, fmode); | |
d7e09d03 PT |
245 | } |
246 | ||
45b2a010 JH |
247 | if (bits & (MDS_INODELOCK_LOOKUP | MDS_INODELOCK_UPDATE | |
248 | MDS_INODELOCK_LAYOUT | MDS_INODELOCK_PERM)) | |
249 | ll_have_md_lock(inode, &bits, LCK_MINMODE); | |
250 | ||
d7e09d03 | 251 | if (bits & MDS_INODELOCK_LAYOUT) { |
45b2a010 JH |
252 | struct cl_object_conf conf = { |
253 | .coc_opc = OBJECT_CONF_INVALIDATE, | |
254 | .coc_inode = inode, | |
255 | }; | |
d7e09d03 | 256 | |
d7e09d03 | 257 | rc = ll_layout_conf(inode, &conf); |
45b2a010 JH |
258 | if (rc < 0) |
259 | CDEBUG(D_INODE, "cannot invalidate layout of " | |
1ada25dc | 260 | DFID ": rc = %d\n", |
45b2a010 | 261 | PFID(ll_inode2fid(inode)), rc); |
d7e09d03 PT |
262 | } |
263 | ||
2d8c919e NY |
264 | if (bits & MDS_INODELOCK_UPDATE) { |
265 | struct ll_inode_info *lli = ll_i2info(inode); | |
266 | ||
267 | spin_lock(&lli->lli_lock); | |
268 | LTIME_S(inode->i_mtime) = 0; | |
269 | LTIME_S(inode->i_atime) = 0; | |
270 | LTIME_S(inode->i_ctime) = 0; | |
271 | spin_unlock(&lli->lli_lock); | |
272 | } | |
273 | ||
45b2a010 | 274 | if ((bits & MDS_INODELOCK_UPDATE) && S_ISDIR(inode->i_mode)) { |
2de35386 | 275 | struct ll_inode_info *lli = ll_i2info(inode); |
276 | ||
1ada25dc | 277 | CDEBUG(D_INODE, "invalidating inode " DFID " lli = %p, pfid = " DFID "\n", |
2de35386 | 278 | PFID(ll_inode2fid(inode)), lli, |
279 | PFID(&lli->lli_pfid)); | |
280 | ||
d7e09d03 | 281 | truncate_inode_pages(inode->i_mapping, 0); |
2de35386 | 282 | |
283 | if (unlikely(!fid_is_zero(&lli->lli_pfid))) { | |
284 | struct inode *master_inode = NULL; | |
285 | unsigned long hash; | |
286 | ||
287 | /* | |
288 | * This is slave inode, since all of the child | |
289 | * dentry is connected on the master inode, so | |
290 | * we have to invalidate the negative children | |
291 | * on master inode | |
292 | */ | |
1ada25dc | 293 | CDEBUG(D_INODE, "Invalidate s" DFID " m" DFID "\n", |
2de35386 | 294 | PFID(ll_inode2fid(inode)), |
295 | PFID(&lli->lli_pfid)); | |
296 | ||
297 | hash = cl_fid_build_ino(&lli->lli_pfid, | |
298 | ll_need_32bit_api(ll_i2sbi(inode))); | |
5ce86eac | 299 | /* |
300 | * Do not lookup the inode with ilookup5, | |
301 | * otherwise it will cause dead lock, | |
302 | * | |
303 | * 1. Client1 send chmod req to the MDT0, then | |
304 | * on MDT0, it enqueues master and all of its | |
305 | * slaves lock, (mdt_attr_set() -> | |
306 | * mdt_lock_slaves()), after gets master and | |
307 | * stripe0 lock, it will send the enqueue req | |
308 | * (for stripe1) to MDT1, then MDT1 finds the | |
309 | * lock has been granted to client2. Then MDT1 | |
310 | * sends blocking ast to client2. | |
311 | * | |
312 | * 2. At the same time, client2 tries to unlink | |
313 | * the striped dir (rm -rf striped_dir), and | |
314 | * during lookup, it will hold the master inode | |
315 | * of the striped directory, whose inode state | |
316 | * is NEW, then tries to revalidate all of its | |
317 | * slaves, (ll_prep_inode()->ll_iget()-> | |
318 | * ll_read_inode2()-> ll_update_inode().). And | |
319 | * it will be blocked on the server side because | |
320 | * of 1. | |
321 | * | |
322 | * 3. Then the client get the blocking_ast req, | |
323 | * cancel the lock, but being blocked if using | |
324 | * ->ilookup5()), because master inode state is | |
325 | * NEW. | |
326 | */ | |
327 | master_inode = ilookup5_nowait(inode->i_sb, | |
328 | hash, | |
329 | ll_test_inode_by_fid, | |
330 | (void *)&lli->lli_pfid); | |
330e2a0f | 331 | if (master_inode) { |
2de35386 | 332 | ll_invalidate_negative_children(master_inode); |
333 | iput(master_inode); | |
334 | } | |
335 | } else { | |
336 | ll_invalidate_negative_children(inode); | |
337 | } | |
d7e09d03 PT |
338 | } |
339 | ||
45b2a010 | 340 | if ((bits & (MDS_INODELOCK_LOOKUP | MDS_INODELOCK_PERM)) && |
6e16818b | 341 | inode->i_sb->s_root && |
5242d422 | 342 | !is_root_inode(inode)) |
d7e09d03 | 343 | ll_invalidate_aliases(inode); |
45b2a010 | 344 | |
d7e09d03 PT |
345 | iput(inode); |
346 | break; | |
347 | } | |
348 | default: | |
349 | LBUG(); | |
350 | } | |
351 | ||
0a3bdb00 | 352 | return 0; |
d7e09d03 PT |
353 | } |
354 | ||
355 | __u32 ll_i2suppgid(struct inode *i) | |
356 | { | |
4b1a25f0 PT |
357 | if (in_group_p(i->i_gid)) |
358 | return (__u32)from_kgid(&init_user_ns, i->i_gid); | |
d7e09d03 PT |
359 | else |
360 | return (__u32)(-1); | |
361 | } | |
362 | ||
363 | /* Pack the required supplementary groups into the supplied groups array. | |
364 | * If we don't need to use the groups from the target inode(s) then we | |
365 | * instead pack one or more groups from the user's supplementary group | |
c0894c6c OD |
366 | * array in case it might be useful. Not needed if doing an MDS-side upcall. |
367 | */ | |
d7e09d03 PT |
368 | void ll_i2gids(__u32 *suppgids, struct inode *i1, struct inode *i2) |
369 | { | |
6e16818b | 370 | LASSERT(i1); |
d7e09d03 PT |
371 | |
372 | suppgids[0] = ll_i2suppgid(i1); | |
373 | ||
374 | if (i2) | |
375 | suppgids[1] = ll_i2suppgid(i2); | |
376 | else | |
377 | suppgids[1] = -1; | |
d7e09d03 PT |
378 | } |
379 | ||
380 | /* | |
381 | * try to reuse three types of dentry: | |
382 | * 1. unhashed alias, this one is unhashed by d_invalidate (but it may be valid | |
383 | * by concurrent .revalidate). | |
384 | * 2. INVALID alias (common case for no valid ldlm lock held, but this flag may | |
385 | * be cleared by others calling d_lustre_revalidate). | |
386 | * 3. DISCONNECTED alias. | |
387 | */ | |
388 | static struct dentry *ll_find_alias(struct inode *inode, struct dentry *dentry) | |
389 | { | |
390 | struct dentry *alias, *discon_alias, *invalid_alias; | |
d7e09d03 | 391 | |
9d5be52f | 392 | if (hlist_empty(&inode->i_dentry)) |
d7e09d03 PT |
393 | return NULL; |
394 | ||
7551b8b5 NC |
395 | discon_alias = NULL; |
396 | invalid_alias = NULL; | |
d7e09d03 | 397 | |
2bbec0ed | 398 | spin_lock(&inode->i_lock); |
9d5be52f | 399 | hlist_for_each_entry(alias, &inode->i_dentry, d_u.d_alias) { |
d7e09d03 PT |
400 | LASSERT(alias != dentry); |
401 | ||
402 | spin_lock(&alias->d_lock); | |
f7368144 LNJ |
403 | if ((alias->d_flags & DCACHE_DISCONNECTED) && |
404 | S_ISDIR(inode->i_mode)) | |
d7e09d03 PT |
405 | /* LASSERT(last_discon == NULL); LU-405, bz 20055 */ |
406 | discon_alias = alias; | |
407 | else if (alias->d_parent == dentry->d_parent && | |
408 | alias->d_name.hash == dentry->d_name.hash && | |
409 | alias->d_name.len == dentry->d_name.len && | |
410 | memcmp(alias->d_name.name, dentry->d_name.name, | |
411 | dentry->d_name.len) == 0) | |
412 | invalid_alias = alias; | |
413 | spin_unlock(&alias->d_lock); | |
414 | ||
415 | if (invalid_alias) | |
416 | break; | |
417 | } | |
418 | alias = invalid_alias ?: discon_alias ?: NULL; | |
419 | if (alias) { | |
420 | spin_lock(&alias->d_lock); | |
421 | dget_dlock(alias); | |
422 | spin_unlock(&alias->d_lock); | |
423 | } | |
2bbec0ed | 424 | spin_unlock(&inode->i_lock); |
d7e09d03 PT |
425 | |
426 | return alias; | |
427 | } | |
428 | ||
429 | /* | |
430 | * Similar to d_splice_alias(), but lustre treats invalid alias | |
431 | * similar to DCACHE_DISCONNECTED, and tries to use it anyway. | |
432 | */ | |
433 | struct dentry *ll_splice_alias(struct inode *inode, struct dentry *de) | |
434 | { | |
d7e09d03 | 435 | if (inode) { |
7126bc2e | 436 | struct dentry *new = ll_find_alias(inode, de); |
d9f5af4a | 437 | |
d7e09d03 | 438 | if (new) { |
d7e09d03 PT |
439 | d_move(new, de); |
440 | iput(inode); | |
441 | CDEBUG(D_DENTRY, | |
442 | "Reuse dentry %p inode %p refc %d flags %#x\n", | |
2b0143b5 | 443 | new, d_inode(new), d_count(new), new->d_flags); |
d7e09d03 PT |
444 | return new; |
445 | } | |
446 | } | |
d7e09d03 PT |
447 | d_add(de, inode); |
448 | CDEBUG(D_DENTRY, "Add dentry %p inode %p refc %d flags %#x\n", | |
2b0143b5 | 449 | de, d_inode(de), d_count(de), de->d_flags); |
d7e09d03 PT |
450 | return de; |
451 | } | |
452 | ||
2d95f10e JH |
453 | static int ll_lookup_it_finish(struct ptlrpc_request *request, |
454 | struct lookup_intent *it, | |
455 | struct inode *parent, struct dentry **de) | |
d7e09d03 | 456 | { |
d7e09d03 PT |
457 | struct inode *inode = NULL; |
458 | __u64 bits = 0; | |
44ecac68 | 459 | int rc = 0; |
99f1c013 | 460 | struct dentry *alias; |
d7e09d03 PT |
461 | |
462 | /* NB 1 request reference will be taken away by ll_intent_lock() | |
c0894c6c OD |
463 | * when I return |
464 | */ | |
d7e09d03 | 465 | CDEBUG(D_DENTRY, "it %p it_disposition %x\n", it, |
e476f2e5 | 466 | it->it_disposition); |
d7e09d03 PT |
467 | if (!it_disposition(it, DISP_LOOKUP_NEG)) { |
468 | rc = ll_prep_inode(&inode, request, (*de)->d_sb, it); | |
469 | if (rc) | |
0a3bdb00 | 470 | return rc; |
d7e09d03 PT |
471 | |
472 | ll_set_lock_data(ll_i2sbi(parent)->ll_md_exp, inode, it, &bits); | |
473 | ||
474 | /* We used to query real size from OSTs here, but actually | |
c0894c6c OD |
475 | * this is not needed. For stat() calls size would be updated |
476 | * from subsequent do_revalidate()->ll_inode_revalidate_it() in | |
477 | * 2.4 and | |
478 | * vfs_getattr_it->ll_getattr()->ll_inode_revalidate_it() in 2.6 | |
479 | * Everybody else who needs correct file size would call | |
480 | * ll_glimpse_size or some equivalent themselves anyway. | |
481 | * Also see bug 7198. | |
482 | */ | |
d7e09d03 PT |
483 | } |
484 | ||
99f1c013 OD |
485 | alias = ll_splice_alias(inode, *de); |
486 | if (IS_ERR(alias)) { | |
487 | rc = PTR_ERR(alias); | |
488 | goto out; | |
3ea8f3bc | 489 | } |
99f1c013 | 490 | *de = alias; |
d7e09d03 PT |
491 | |
492 | if (!it_disposition(it, DISP_LOOKUP_NEG)) { | |
493 | /* we have lookup look - unhide dentry */ | |
494 | if (bits & MDS_INODELOCK_LOOKUP) | |
495 | d_lustre_revalidate(*de); | |
496 | } else if (!it_disposition(it, DISP_OPEN_CREATE)) { | |
497 | /* If file created on server, don't depend on parent UPDATE | |
498 | * lock to unhide it. It is left hidden and next lookup can | |
499 | * find it in ll_splice_alias. | |
500 | */ | |
501 | /* Check that parent has UPDATE lock. */ | |
502 | struct lookup_intent parent_it = { | |
503 | .it_op = IT_GETATTR, | |
e476f2e5 | 504 | .it_lock_handle = 0 }; |
a609c393 | 505 | struct lu_fid fid = ll_i2info(parent)->lli_fid; |
506 | ||
507 | /* If it is striped directory, get the real stripe parent */ | |
508 | if (unlikely(ll_i2info(parent)->lli_lsm_md)) { | |
509 | rc = md_get_fid_from_lsm(ll_i2mdexp(parent), | |
510 | ll_i2info(parent)->lli_lsm_md, | |
511 | (*de)->d_name.name, | |
512 | (*de)->d_name.len, &fid); | |
513 | if (rc) | |
514 | return rc; | |
515 | } | |
d7e09d03 | 516 | |
a609c393 | 517 | if (md_revalidate_lock(ll_i2mdexp(parent), &parent_it, &fid, |
518 | NULL)) { | |
d7e09d03 PT |
519 | d_lustre_revalidate(*de); |
520 | ll_intent_release(&parent_it); | |
521 | } | |
522 | } | |
523 | ||
44ecac68 FY |
524 | out: |
525 | if (rc != 0 && it->it_op & IT_OPEN) | |
526 | ll_open_cleanup((*de)->d_sb, request); | |
527 | ||
528 | return rc; | |
d7e09d03 PT |
529 | } |
530 | ||
531 | static struct dentry *ll_lookup_it(struct inode *parent, struct dentry *dentry, | |
9b496412 | 532 | struct lookup_intent *it) |
d7e09d03 PT |
533 | { |
534 | struct lookup_intent lookup_it = { .it_op = IT_LOOKUP }; | |
535 | struct dentry *save = dentry, *retval; | |
536 | struct ptlrpc_request *req = NULL; | |
864d6a25 | 537 | struct md_op_data *op_data = NULL; |
dbca51dd | 538 | struct inode *inode; |
d7e09d03 PT |
539 | __u32 opc; |
540 | int rc; | |
d7e09d03 PT |
541 | |
542 | if (dentry->d_name.len > ll_i2sbi(parent)->ll_namelen) | |
0a3bdb00 | 543 | return ERR_PTR(-ENAMETOOLONG); |
d7e09d03 | 544 | |
1ada25dc | 545 | CDEBUG(D_VFSTRACE, "VFS Op:name=%pd, dir=" DFID "(%p),intent=%s\n", |
97a075cd | 546 | dentry, PFID(ll_inode2fid(parent)), parent, LL_IT2STR(it)); |
d7e09d03 PT |
547 | |
548 | if (d_mountpoint(dentry)) | |
549 | CERROR("Tell Peter, lookup on mtpt, it %s\n", LL_IT2STR(it)); | |
550 | ||
6e16818b | 551 | if (!it || it->it_op == IT_GETXATTR) |
2d95f10e | 552 | it = &lookup_it; |
d7e09d03 | 553 | |
5231f765 LS |
554 | if (it->it_op == IT_GETATTR && dentry_may_statahead(parent, dentry)) { |
555 | rc = ll_statahead(parent, &dentry, 0); | |
d7e09d03 PT |
556 | if (rc == 1) { |
557 | if (dentry == save) | |
34e1f2bb JL |
558 | retval = NULL; |
559 | else | |
560 | retval = dentry; | |
561 | goto out; | |
d7e09d03 PT |
562 | } |
563 | } | |
564 | ||
b4f840c1 LD |
565 | if (it->it_op & IT_OPEN && it->it_flags & FMODE_WRITE && |
566 | dentry->d_sb->s_flags & MS_RDONLY) | |
567 | return ERR_PTR(-EROFS); | |
568 | ||
1e8a576e | 569 | if (it->it_op & IT_CREAT) |
d7e09d03 PT |
570 | opc = LUSTRE_OPC_CREATE; |
571 | else | |
572 | opc = LUSTRE_OPC_ANY; | |
573 | ||
574 | op_data = ll_prep_md_op_data(NULL, parent, NULL, dentry->d_name.name, | |
9b496412 | 575 | dentry->d_name.len, 0, opc, NULL); |
d7e09d03 | 576 | if (IS_ERR(op_data)) |
0a3bdb00 | 577 | return (void *)op_data; |
d7e09d03 PT |
578 | |
579 | /* enforce umask if acl disabled or MDS doesn't support umask */ | |
580 | if (!IS_POSIXACL(parent) || !exp_connect_umask(ll_i2mdexp(parent))) | |
581 | it->it_create_mode &= ~current_umask(); | |
582 | ||
70a251f6 JH |
583 | rc = md_intent_lock(ll_i2mdexp(parent), op_data, it, &req, |
584 | &ll_md_blocking_ast, 0); | |
864d6a25 FY |
585 | /* |
586 | * If the MDS allows the client to chgrp (CFS_SETGRP_PERM), but the | |
587 | * client does not know which suppgid should be sent to the MDS, or | |
588 | * some other(s) changed the target file's GID after this RPC sent | |
589 | * to the MDS with the suppgid as the original GID, then we should | |
590 | * try again with right suppgid. | |
591 | */ | |
592 | if (rc == -EACCES && it->it_op & IT_OPEN && | |
593 | it_disposition(it, DISP_OPEN_DENY)) { | |
594 | struct mdt_body *body; | |
595 | ||
596 | LASSERT(req); | |
597 | ||
598 | body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY); | |
599 | if (op_data->op_suppgids[0] == body->mbo_gid || | |
600 | op_data->op_suppgids[1] == body->mbo_gid || | |
601 | !in_group_p(make_kgid(&init_user_ns, body->mbo_gid))) { | |
602 | retval = ERR_PTR(-EACCES); | |
603 | goto out; | |
604 | } | |
605 | ||
606 | fid_zero(&op_data->op_fid2); | |
607 | op_data->op_suppgids[1] = body->mbo_gid; | |
608 | ptlrpc_req_finished(req); | |
609 | req = NULL; | |
610 | ll_intent_release(it); | |
611 | rc = md_intent_lock(ll_i2mdexp(parent), op_data, it, &req, | |
612 | ll_md_blocking_ast, 0); | |
613 | } | |
614 | ||
34e1f2bb JL |
615 | if (rc < 0) { |
616 | retval = ERR_PTR(rc); | |
617 | goto out; | |
618 | } | |
d7e09d03 | 619 | |
2d95f10e | 620 | rc = ll_lookup_it_finish(req, it, parent, &dentry); |
d7e09d03 PT |
621 | if (rc != 0) { |
622 | ll_intent_release(it); | |
34e1f2bb JL |
623 | retval = ERR_PTR(rc); |
624 | goto out; | |
d7e09d03 PT |
625 | } |
626 | ||
2b0143b5 | 627 | inode = d_inode(dentry); |
dbca51dd AV |
628 | if ((it->it_op & IT_OPEN) && inode && |
629 | !S_ISREG(inode->i_mode) && | |
630 | !S_ISDIR(inode->i_mode)) { | |
631 | ll_release_openhandle(inode, it); | |
d7e09d03 | 632 | } |
dbca51dd | 633 | ll_lookup_finish_locks(it, inode); |
d7e09d03 PT |
634 | |
635 | if (dentry == save) | |
34e1f2bb | 636 | retval = NULL; |
d7e09d03 | 637 | else |
34e1f2bb | 638 | retval = dentry; |
5231f765 | 639 | out: |
864d6a25 FY |
640 | if (op_data && !IS_ERR(op_data)) |
641 | ll_finish_md_op_data(op_data); | |
642 | ||
5231f765 | 643 | ptlrpc_req_finished(req); |
d7e09d03 PT |
644 | return retval; |
645 | } | |
646 | ||
647 | static struct dentry *ll_lookup_nd(struct inode *parent, struct dentry *dentry, | |
648 | unsigned int flags) | |
649 | { | |
650 | struct lookup_intent *itp, it = { .it_op = IT_GETATTR }; | |
651 | struct dentry *de; | |
652 | ||
1ada25dc | 653 | CDEBUG(D_VFSTRACE, "VFS Op:name=%pd, dir=" DFID "(%p),flags=%u\n", |
97a075cd | 654 | dentry, PFID(ll_inode2fid(parent)), parent, flags); |
d7e09d03 | 655 | |
3a77df11 OD |
656 | /* Optimize away (CREATE && !OPEN). Let .create handle the race. |
657 | * but only if we have write permissions there, otherwise we need | |
658 | * to proceed with lookup. LU-4185 | |
659 | */ | |
660 | if ((flags & LOOKUP_CREATE) && !(flags & LOOKUP_OPEN) && | |
661 | (inode_permission(parent, MAY_WRITE | MAY_EXEC) == 0)) | |
d7e09d03 | 662 | return NULL; |
d7e09d03 | 663 | |
cd94f231 | 664 | if (flags & (LOOKUP_PARENT | LOOKUP_OPEN | LOOKUP_CREATE)) |
d7e09d03 PT |
665 | itp = NULL; |
666 | else | |
667 | itp = ⁢ | |
9b496412 | 668 | de = ll_lookup_it(parent, dentry, itp); |
d7e09d03 | 669 | |
6e16818b | 670 | if (itp) |
d7e09d03 PT |
671 | ll_intent_release(itp); |
672 | ||
673 | return de; | |
674 | } | |
675 | ||
676 | /* | |
677 | * For cached negative dentry and new dentry, handle lookup/create/open | |
678 | * together. | |
679 | */ | |
680 | static int ll_atomic_open(struct inode *dir, struct dentry *dentry, | |
e7738506 | 681 | struct file *file, unsigned int open_flags, |
d7e09d03 PT |
682 | umode_t mode, int *opened) |
683 | { | |
684 | struct lookup_intent *it; | |
685 | struct dentry *de; | |
d7e09d03 | 686 | int rc = 0; |
d7e09d03 | 687 | |
1ada25dc | 688 | CDEBUG(D_VFSTRACE, "VFS Op:name=%pd, dir=" DFID "(%p),file %p,open_flags %x,mode %x opened %d\n", |
97a075cd JN |
689 | dentry, PFID(ll_inode2fid(dir)), dir, file, open_flags, mode, |
690 | *opened); | |
d7e09d03 | 691 | |
99f1c013 OD |
692 | /* Only negative dentries enter here */ |
693 | LASSERT(!d_inode(dentry)); | |
694 | ||
695 | if (!d_in_lookup(dentry)) { | |
696 | /* A valid negative dentry that just passed revalidation, | |
697 | * there's little point to try and open it server-side, | |
698 | * even though there's a minuscle chance it might succeed. | |
699 | * Either way it's a valid race to just return -ENOENT here. | |
700 | */ | |
701 | if (!(open_flags & O_CREAT)) | |
702 | return -ENOENT; | |
703 | ||
704 | /* Otherwise we just unhash it to be rehashed afresh via | |
705 | * lookup if necessary | |
706 | */ | |
707 | d_drop(dentry); | |
708 | } | |
709 | ||
496a51bd | 710 | it = kzalloc(sizeof(*it), GFP_NOFS); |
d7e09d03 | 711 | if (!it) |
0a3bdb00 | 712 | return -ENOMEM; |
d7e09d03 PT |
713 | |
714 | it->it_op = IT_OPEN; | |
9b496412 | 715 | if (open_flags & O_CREAT) |
d7e09d03 | 716 | it->it_op |= IT_CREAT; |
d7e09d03 PT |
717 | it->it_create_mode = (mode & S_IALLUGO) | S_IFREG; |
718 | it->it_flags = (open_flags & ~O_ACCMODE) | OPEN_FMODE(open_flags); | |
c1b66fcc | 719 | it->it_flags &= ~MDS_OPEN_FL_INTERNAL; |
d7e09d03 PT |
720 | |
721 | /* Dentry added to dcache tree in ll_lookup_it */ | |
9b496412 | 722 | de = ll_lookup_it(dir, dentry, it); |
d7e09d03 PT |
723 | if (IS_ERR(de)) |
724 | rc = PTR_ERR(de); | |
6e16818b | 725 | else if (de) |
d7e09d03 PT |
726 | dentry = de; |
727 | ||
728 | if (!rc) { | |
729 | if (it_disposition(it, DISP_OPEN_CREATE)) { | |
730 | /* Dentry instantiated in ll_create_it. */ | |
9c1c204f | 731 | rc = ll_create_it(dir, dentry, it); |
d7e09d03 PT |
732 | if (rc) { |
733 | /* We dget in ll_splice_alias. */ | |
6e16818b | 734 | if (de) |
d7e09d03 PT |
735 | dput(de); |
736 | goto out_release; | |
737 | } | |
738 | ||
739 | *opened |= FILE_CREATED; | |
740 | } | |
2b0143b5 | 741 | if (d_really_is_positive(dentry) && it_disposition(it, DISP_OPEN_OPEN)) { |
d7e09d03 | 742 | /* Open dentry. */ |
2b0143b5 | 743 | if (S_ISFIFO(d_inode(dentry)->i_mode)) { |
411c9699 JH |
744 | /* We cannot call open here as it might |
745 | * deadlock. This case is unreachable in | |
746 | * practice because of OBD_CONNECT_NODEVOH. | |
d7e09d03 | 747 | */ |
d7e09d03 PT |
748 | rc = finish_no_open(file, de); |
749 | } else { | |
750 | file->private_data = it; | |
751 | rc = finish_open(file, dentry, NULL, opened); | |
752 | /* We dget in ll_splice_alias. finish_open takes | |
753 | * care of dget for fd open. | |
754 | */ | |
6e16818b | 755 | if (de) |
d7e09d03 PT |
756 | dput(de); |
757 | } | |
758 | } else { | |
759 | rc = finish_no_open(file, de); | |
760 | } | |
761 | } | |
762 | ||
763 | out_release: | |
764 | ll_intent_release(it); | |
97903a26 | 765 | kfree(it); |
d7e09d03 | 766 | |
0a3bdb00 | 767 | return rc; |
d7e09d03 PT |
768 | } |
769 | ||
d7e09d03 | 770 | /* We depend on "mode" being set with the proper file type/umask by now */ |
2d95f10e | 771 | static struct inode *ll_create_node(struct inode *dir, struct lookup_intent *it) |
d7e09d03 PT |
772 | { |
773 | struct inode *inode = NULL; | |
774 | struct ptlrpc_request *request = NULL; | |
775 | struct ll_sb_info *sbi = ll_i2sbi(dir); | |
776 | int rc; | |
d7e09d03 | 777 | |
e476f2e5 | 778 | LASSERT(it && it->it_disposition); |
d7e09d03 PT |
779 | |
780 | LASSERT(it_disposition(it, DISP_ENQ_CREATE_REF)); | |
8bf86fd9 | 781 | request = it->it_request; |
d7e09d03 PT |
782 | it_clear_disposition(it, DISP_ENQ_CREATE_REF); |
783 | rc = ll_prep_inode(&inode, request, dir->i_sb, it); | |
34e1f2bb JL |
784 | if (rc) { |
785 | inode = ERR_PTR(rc); | |
786 | goto out; | |
787 | } | |
d7e09d03 | 788 | |
9d5be52f | 789 | LASSERT(hlist_empty(&inode->i_dentry)); |
d7e09d03 PT |
790 | |
791 | /* We asked for a lock on the directory, but were granted a | |
792 | * lock on the inode. Since we finally have an inode pointer, | |
c0894c6c OD |
793 | * stuff it in the lock. |
794 | */ | |
1ada25dc | 795 | CDEBUG(D_DLMTRACE, "setting l_ast_data to inode " DFID "(%p)\n", |
97a075cd | 796 | PFID(ll_inode2fid(dir)), inode); |
d7e09d03 | 797 | ll_set_lock_data(sbi->ll_md_exp, inode, it, NULL); |
d7e09d03 PT |
798 | out: |
799 | ptlrpc_req_finished(request); | |
800 | return inode; | |
801 | } | |
802 | ||
803 | /* | |
804 | * By the time this is called, we already have created the directory cache | |
805 | * entry for the new file, but it is so far negative - it has no inode. | |
806 | * | |
807 | * We defer creating the OBD object(s) until open, to keep the intent and | |
808 | * non-intent code paths similar, and also because we do not have the MDS | |
809 | * inode number before calling ll_create_node() (which is needed for LOV), | |
810 | * so we would need to do yet another RPC to the MDS to store the LOV EA | |
811 | * data on the MDS. If needed, we would pass the PACKED lmm as data and | |
812 | * lmm_size in datalen (the MDS still has code which will handle that). | |
813 | * | |
814 | * If the create succeeds, we fill in the inode information | |
815 | * with d_instantiate(). | |
816 | */ | |
9c1c204f | 817 | static int ll_create_it(struct inode *dir, struct dentry *dentry, |
d7e09d03 PT |
818 | struct lookup_intent *it) |
819 | { | |
820 | struct inode *inode; | |
821 | int rc = 0; | |
d7e09d03 | 822 | |
1ada25dc | 823 | CDEBUG(D_VFSTRACE, "VFS Op:name=%pd, dir=" DFID "(%p), intent=%s\n", |
97a075cd | 824 | dentry, PFID(ll_inode2fid(dir)), dir, LL_IT2STR(it)); |
d7e09d03 PT |
825 | |
826 | rc = it_open_error(DISP_OPEN_CREATE, it); | |
827 | if (rc) | |
0a3bdb00 | 828 | return rc; |
d7e09d03 | 829 | |
2d95f10e | 830 | inode = ll_create_node(dir, it); |
d7e09d03 | 831 | if (IS_ERR(inode)) |
0a3bdb00 | 832 | return PTR_ERR(inode); |
d7e09d03 | 833 | |
d7e09d03 | 834 | d_instantiate(dentry, inode); |
51cfb8c4 SB |
835 | |
836 | return ll_init_security(dentry, inode, dir); | |
d7e09d03 PT |
837 | } |
838 | ||
79496845 | 839 | void ll_update_times(struct ptlrpc_request *request, struct inode *inode) |
d7e09d03 PT |
840 | { |
841 | struct mdt_body *body = req_capsule_server_get(&request->rq_pill, | |
842 | &RMF_MDT_BODY); | |
843 | ||
844 | LASSERT(body); | |
2e1b5b8b JH |
845 | if (body->mbo_valid & OBD_MD_FLMTIME && |
846 | body->mbo_mtime > LTIME_S(inode->i_mtime)) { | |
1ada25dc | 847 | CDEBUG(D_INODE, "setting fid " DFID " mtime from %lu to %llu\n", |
97a075cd | 848 | PFID(ll_inode2fid(inode)), LTIME_S(inode->i_mtime), |
2e1b5b8b JH |
849 | body->mbo_mtime); |
850 | LTIME_S(inode->i_mtime) = body->mbo_mtime; | |
d7e09d03 | 851 | } |
2e1b5b8b JH |
852 | if (body->mbo_valid & OBD_MD_FLCTIME && |
853 | body->mbo_ctime > LTIME_S(inode->i_ctime)) | |
854 | LTIME_S(inode->i_ctime) = body->mbo_ctime; | |
d7e09d03 PT |
855 | } |
856 | ||
b2037bb6 | 857 | static int ll_new_node(struct inode *dir, struct dentry *dentry, |
cad89e56 | 858 | const char *tgt, umode_t mode, int rdev, |
b2037bb6 | 859 | __u32 opc) |
d7e09d03 PT |
860 | { |
861 | struct ptlrpc_request *request = NULL; | |
862 | struct md_op_data *op_data; | |
863 | struct inode *inode = NULL; | |
864 | struct ll_sb_info *sbi = ll_i2sbi(dir); | |
865 | int tgt_len = 0; | |
866 | int err; | |
867 | ||
6e16818b | 868 | if (unlikely(tgt)) |
d7e09d03 | 869 | tgt_len = strlen(tgt) + 1; |
d81e9009 | 870 | again: |
b2037bb6 AV |
871 | op_data = ll_prep_md_op_data(NULL, dir, NULL, |
872 | dentry->d_name.name, | |
873 | dentry->d_name.len, | |
874 | 0, opc, NULL); | |
34e1f2bb JL |
875 | if (IS_ERR(op_data)) { |
876 | err = PTR_ERR(op_data); | |
877 | goto err_exit; | |
878 | } | |
d7e09d03 PT |
879 | |
880 | err = md_create(sbi->ll_md_exp, op_data, tgt, tgt_len, mode, | |
4b1a25f0 PT |
881 | from_kuid(&init_user_ns, current_fsuid()), |
882 | from_kgid(&init_user_ns, current_fsgid()), | |
d7e09d03 PT |
883 | cfs_curproc_cap_pack(), rdev, &request); |
884 | ll_finish_md_op_data(op_data); | |
d81e9009 | 885 | if (err < 0 && err != -EREMOTE) |
34e1f2bb | 886 | goto err_exit; |
d7e09d03 | 887 | |
d81e9009 | 888 | /* |
889 | * If the client doesn't know where to create a subdirectory (or | |
890 | * in case of a race that sends the RPC to the wrong MDS), the | |
891 | * MDS will return -EREMOTE and the client will fetch the layout | |
892 | * of the directory, then create the directory on the right MDT. | |
893 | */ | |
894 | if (unlikely(err == -EREMOTE)) { | |
895 | struct ll_inode_info *lli = ll_i2info(dir); | |
896 | struct lmv_user_md *lum; | |
897 | int lumsize, err2; | |
898 | ||
899 | ptlrpc_req_finished(request); | |
900 | request = NULL; | |
901 | ||
902 | err2 = ll_dir_getstripe(dir, (void **)&lum, &lumsize, &request, | |
903 | OBD_MD_DEFAULT_MEA); | |
904 | if (!err2) { | |
905 | /* Update stripe_offset and retry */ | |
906 | lli->lli_def_stripe_offset = lum->lum_stripe_offset; | |
907 | } else if (err2 == -ENODATA && | |
908 | lli->lli_def_stripe_offset != -1) { | |
909 | /* | |
910 | * If there are no default stripe EA on the MDT, but the | |
911 | * client has default stripe, then it probably means | |
912 | * default stripe EA has just been deleted. | |
913 | */ | |
914 | lli->lli_def_stripe_offset = -1; | |
915 | } else { | |
916 | goto err_exit; | |
917 | } | |
918 | ||
919 | ptlrpc_req_finished(request); | |
920 | request = NULL; | |
921 | goto again; | |
922 | } | |
923 | ||
d7e09d03 PT |
924 | ll_update_times(request, dir); |
925 | ||
b2037bb6 AV |
926 | err = ll_prep_inode(&inode, request, dir->i_sb, NULL); |
927 | if (err) | |
928 | goto err_exit; | |
d7e09d03 | 929 | |
b2037bb6 | 930 | d_instantiate(dentry, inode); |
51cfb8c4 SB |
931 | |
932 | err = ll_init_security(dentry, inode, dir); | |
d7e09d03 | 933 | err_exit: |
d81e9009 | 934 | if (request) |
935 | ptlrpc_req_finished(request); | |
d7e09d03 PT |
936 | |
937 | return err; | |
938 | } | |
939 | ||
d6689e5f AV |
940 | static int ll_mknod(struct inode *dir, struct dentry *dchild, |
941 | umode_t mode, dev_t rdev) | |
d7e09d03 PT |
942 | { |
943 | int err; | |
d7e09d03 | 944 | |
1ada25dc | 945 | CDEBUG(D_VFSTRACE, "VFS Op:name=%pd, dir=" DFID "(%p) mode %o dev %x\n", |
97a075cd JN |
946 | dchild, PFID(ll_inode2fid(dir)), dir, mode, |
947 | old_encode_dev(rdev)); | |
d7e09d03 PT |
948 | |
949 | if (!IS_POSIXACL(dir) || !exp_connect_umask(ll_i2mdexp(dir))) | |
950 | mode &= ~current_umask(); | |
951 | ||
952 | switch (mode & S_IFMT) { | |
953 | case 0: | |
954 | mode |= S_IFREG; /* for mode = 0 case, fallthrough */ | |
955 | case S_IFREG: | |
956 | case S_IFCHR: | |
957 | case S_IFBLK: | |
958 | case S_IFIFO: | |
959 | case S_IFSOCK: | |
b2037bb6 AV |
960 | err = ll_new_node(dir, dchild, NULL, mode, |
961 | old_encode_dev(rdev), | |
d7e09d03 PT |
962 | LUSTRE_OPC_MKNOD); |
963 | break; | |
964 | case S_IFDIR: | |
965 | err = -EPERM; | |
966 | break; | |
967 | default: | |
968 | err = -EINVAL; | |
969 | } | |
970 | ||
971 | if (!err) | |
972 | ll_stats_ops_tally(ll_i2sbi(dir), LPROC_LL_MKNOD, 1); | |
973 | ||
0a3bdb00 | 974 | return err; |
d7e09d03 PT |
975 | } |
976 | ||
977 | /* | |
978 | * Plain create. Intent create is handled in atomic_open. | |
979 | */ | |
980 | static int ll_create_nd(struct inode *dir, struct dentry *dentry, | |
981 | umode_t mode, bool want_excl) | |
982 | { | |
983 | int rc; | |
984 | ||
1ada25dc | 985 | CDEBUG(D_VFSTRACE, "VFS Op:name=%pd, dir=" DFID "(%p), flags=%u, excl=%d\n", |
97a075cd | 986 | dentry, PFID(ll_inode2fid(dir)), dir, mode, want_excl); |
d7e09d03 | 987 | |
d6689e5f | 988 | rc = ll_mknod(dir, dentry, mode, 0); |
d7e09d03 PT |
989 | |
990 | ll_stats_ops_tally(ll_i2sbi(dir), LPROC_LL_CREATE, 1); | |
991 | ||
09561a53 AV |
992 | CDEBUG(D_VFSTRACE, "VFS Op:name=%pd, unhashed %d\n", |
993 | dentry, d_unhashed(dentry)); | |
d7e09d03 PT |
994 | |
995 | return rc; | |
996 | } | |
997 | ||
96d61c24 | 998 | static int ll_unlink(struct inode *dir, struct dentry *dchild) |
d7e09d03 | 999 | { |
521f2ad7 AV |
1000 | struct ptlrpc_request *request = NULL; |
1001 | struct md_op_data *op_data; | |
1002 | int rc; | |
43550121 | 1003 | |
521f2ad7 | 1004 | CDEBUG(D_VFSTRACE, "VFS Op:name=%pd,dir=%lu/%u(%p)\n", |
96d61c24 | 1005 | dchild, dir->i_ino, dir->i_generation, dir); |
521f2ad7 | 1006 | |
521f2ad7 | 1007 | op_data = ll_prep_md_op_data(NULL, dir, NULL, |
96d61c24 AP |
1008 | dchild->d_name.name, |
1009 | dchild->d_name.len, | |
521f2ad7 AV |
1010 | 0, LUSTRE_OPC_ANY, NULL); |
1011 | if (IS_ERR(op_data)) | |
1012 | return PTR_ERR(op_data); | |
1013 | ||
5d9d0cb7 | 1014 | if (dchild->d_inode) |
3c4b9d09 AP |
1015 | op_data->op_fid3 = *ll_inode2fid(dchild->d_inode); |
1016 | ||
521f2ad7 AV |
1017 | op_data->op_fid2 = op_data->op_fid3; |
1018 | rc = md_unlink(ll_i2sbi(dir)->ll_md_exp, op_data, &request); | |
1019 | ll_finish_md_op_data(op_data); | |
1020 | if (rc) | |
1021 | goto out; | |
1022 | ||
1023 | ll_update_times(request, dir); | |
1024 | ll_stats_ops_tally(ll_i2sbi(dir), LPROC_LL_UNLINK, 1); | |
1025 | ||
521f2ad7 AV |
1026 | out: |
1027 | ptlrpc_req_finished(request); | |
1028 | return rc; | |
d7e09d03 PT |
1029 | } |
1030 | ||
e3befdc7 | 1031 | static int ll_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) |
d7e09d03 | 1032 | { |
7c2f9094 AV |
1033 | int err; |
1034 | ||
1ada25dc | 1035 | CDEBUG(D_VFSTRACE, "VFS Op:name=%pd, dir" DFID "(%p)\n", |
97a075cd | 1036 | dentry, PFID(ll_inode2fid(dir)), dir); |
7c2f9094 AV |
1037 | |
1038 | if (!IS_POSIXACL(dir) || !exp_connect_umask(ll_i2mdexp(dir))) | |
1039 | mode &= ~current_umask(); | |
52048862 | 1040 | mode = (mode & (0777 | S_ISVTX)) | S_IFDIR; |
7c2f9094 | 1041 | |
9329b697 | 1042 | err = ll_new_node(dir, dentry, NULL, mode, 0, LUSTRE_OPC_MKDIR); |
7c2f9094 AV |
1043 | if (!err) |
1044 | ll_stats_ops_tally(ll_i2sbi(dir), LPROC_LL_MKDIR, 1); | |
1045 | ||
1046 | return err; | |
d7e09d03 PT |
1047 | } |
1048 | ||
96d61c24 | 1049 | static int ll_rmdir(struct inode *dir, struct dentry *dchild) |
d7e09d03 | 1050 | { |
55dec617 AV |
1051 | struct ptlrpc_request *request = NULL; |
1052 | struct md_op_data *op_data; | |
1053 | int rc; | |
1054 | ||
1ada25dc | 1055 | CDEBUG(D_VFSTRACE, "VFS Op:name=%pd, dir=" DFID "(%p)\n", |
97a075cd | 1056 | dchild, PFID(ll_inode2fid(dir)), dir); |
55dec617 | 1057 | |
55dec617 | 1058 | op_data = ll_prep_md_op_data(NULL, dir, NULL, |
96d61c24 AP |
1059 | dchild->d_name.name, |
1060 | dchild->d_name.len, | |
55dec617 AV |
1061 | S_IFDIR, LUSTRE_OPC_ANY, NULL); |
1062 | if (IS_ERR(op_data)) | |
1063 | return PTR_ERR(op_data); | |
1064 | ||
5d9d0cb7 | 1065 | if (dchild->d_inode) |
3c4b9d09 AP |
1066 | op_data->op_fid3 = *ll_inode2fid(dchild->d_inode); |
1067 | ||
55dec617 AV |
1068 | op_data->op_fid2 = op_data->op_fid3; |
1069 | rc = md_unlink(ll_i2sbi(dir)->ll_md_exp, op_data, &request); | |
1070 | ll_finish_md_op_data(op_data); | |
1071 | if (rc == 0) { | |
1072 | ll_update_times(request, dir); | |
1073 | ll_stats_ops_tally(ll_i2sbi(dir), LPROC_LL_RMDIR, 1); | |
1074 | } | |
1075 | ||
1076 | ptlrpc_req_finished(request); | |
1077 | return rc; | |
d7e09d03 PT |
1078 | } |
1079 | ||
1080 | static int ll_symlink(struct inode *dir, struct dentry *dentry, | |
1081 | const char *oldname) | |
1082 | { | |
60dd654e AV |
1083 | int err; |
1084 | ||
1ada25dc | 1085 | CDEBUG(D_VFSTRACE, "VFS Op:name=%pd, dir=" DFID "(%p),target=%.*s\n", |
97a075cd | 1086 | dentry, PFID(ll_inode2fid(dir)), dir, 3000, oldname); |
60dd654e | 1087 | |
52048862 | 1088 | err = ll_new_node(dir, dentry, oldname, S_IFLNK | 0777, |
e15ba45d | 1089 | 0, LUSTRE_OPC_SYMLINK); |
60dd654e AV |
1090 | |
1091 | if (!err) | |
1092 | ll_stats_ops_tally(ll_i2sbi(dir), LPROC_LL_SYMLINK, 1); | |
1093 | ||
1094 | return err; | |
d7e09d03 PT |
1095 | } |
1096 | ||
1097 | static int ll_link(struct dentry *old_dentry, struct inode *dir, | |
1098 | struct dentry *new_dentry) | |
1099 | { | |
2b0143b5 | 1100 | struct inode *src = d_inode(old_dentry); |
59cc93ef AV |
1101 | struct ll_sb_info *sbi = ll_i2sbi(dir); |
1102 | struct ptlrpc_request *request = NULL; | |
1103 | struct md_op_data *op_data; | |
1104 | int err; | |
1105 | ||
1ada25dc | 1106 | CDEBUG(D_VFSTRACE, "VFS Op: inode=" DFID "(%p), dir=" DFID "(%p), target=%pd\n", |
97a075cd JN |
1107 | PFID(ll_inode2fid(src)), src, PFID(ll_inode2fid(dir)), dir, |
1108 | new_dentry); | |
59cc93ef AV |
1109 | |
1110 | op_data = ll_prep_md_op_data(NULL, src, dir, new_dentry->d_name.name, | |
1111 | new_dentry->d_name.len, | |
1112 | 0, LUSTRE_OPC_ANY, NULL); | |
1113 | if (IS_ERR(op_data)) | |
1114 | return PTR_ERR(op_data); | |
1115 | ||
1116 | err = md_link(sbi->ll_md_exp, op_data, &request); | |
1117 | ll_finish_md_op_data(op_data); | |
1118 | if (err) | |
1119 | goto out; | |
1120 | ||
1121 | ll_update_times(request, dir); | |
1122 | ll_stats_ops_tally(sbi, LPROC_LL_LINK, 1); | |
1123 | out: | |
1124 | ptlrpc_req_finished(request); | |
1125 | return err; | |
d7e09d03 PT |
1126 | } |
1127 | ||
96d61c24 | 1128 | static int ll_rename(struct inode *src, struct dentry *src_dchild, |
1cd66c93 MS |
1129 | struct inode *tgt, struct dentry *tgt_dchild, |
1130 | unsigned int flags) | |
d7e09d03 | 1131 | { |
78851093 | 1132 | struct ptlrpc_request *request = NULL; |
96d61c24 | 1133 | struct ll_sb_info *sbi = ll_i2sbi(src); |
78851093 | 1134 | struct md_op_data *op_data; |
d7e09d03 | 1135 | int err; |
78851093 | 1136 | |
1cd66c93 MS |
1137 | if (flags) |
1138 | return -EINVAL; | |
1139 | ||
78851093 | 1140 | CDEBUG(D_VFSTRACE, |
1ada25dc | 1141 | "VFS Op:oldname=%pd, src_dir=" DFID "(%p), newname=%pd, tgt_dir=" DFID "(%p)\n", |
97a075cd JN |
1142 | src_dchild, PFID(ll_inode2fid(src)), src, |
1143 | tgt_dchild, PFID(ll_inode2fid(tgt)), tgt); | |
78851093 | 1144 | |
96d61c24 | 1145 | op_data = ll_prep_md_op_data(NULL, src, tgt, NULL, 0, 0, |
78851093 AV |
1146 | LUSTRE_OPC_ANY, NULL); |
1147 | if (IS_ERR(op_data)) | |
1148 | return PTR_ERR(op_data); | |
1149 | ||
5d9d0cb7 | 1150 | if (src_dchild->d_inode) |
3c4b9d09 | 1151 | op_data->op_fid3 = *ll_inode2fid(src_dchild->d_inode); |
5d9d0cb7 | 1152 | if (tgt_dchild->d_inode) |
3c4b9d09 AP |
1153 | op_data->op_fid4 = *ll_inode2fid(tgt_dchild->d_inode); |
1154 | ||
78851093 | 1155 | err = md_rename(sbi->ll_md_exp, op_data, |
96d61c24 AP |
1156 | src_dchild->d_name.name, |
1157 | src_dchild->d_name.len, | |
1158 | tgt_dchild->d_name.name, | |
1159 | tgt_dchild->d_name.len, &request); | |
78851093 | 1160 | ll_finish_md_op_data(op_data); |
d7e09d03 | 1161 | if (!err) { |
96d61c24 AP |
1162 | ll_update_times(request, src); |
1163 | ll_update_times(request, tgt); | |
78851093 | 1164 | ll_stats_ops_tally(sbi, LPROC_LL_RENAME, 1); |
d7e09d03 | 1165 | } |
78851093 AV |
1166 | |
1167 | ptlrpc_req_finished(request); | |
1168 | if (!err) | |
96d61c24 | 1169 | d_move(src_dchild, tgt_dchild); |
d7e09d03 PT |
1170 | return err; |
1171 | } | |
1172 | ||
2d95f10e | 1173 | const struct inode_operations ll_dir_inode_operations = { |
d7e09d03 PT |
1174 | .mknod = ll_mknod, |
1175 | .atomic_open = ll_atomic_open, | |
1176 | .lookup = ll_lookup_nd, | |
1177 | .create = ll_create_nd, | |
1178 | /* We need all these non-raw things for NFSD, to not patch it. */ | |
1179 | .unlink = ll_unlink, | |
1180 | .mkdir = ll_mkdir, | |
1181 | .rmdir = ll_rmdir, | |
1182 | .symlink = ll_symlink, | |
1183 | .link = ll_link, | |
2773bf00 | 1184 | .rename = ll_rename, |
d7e09d03 PT |
1185 | .setattr = ll_setattr, |
1186 | .getattr = ll_getattr, | |
1187 | .permission = ll_inode_permission, | |
d7e09d03 | 1188 | .listxattr = ll_listxattr, |
d7e09d03 PT |
1189 | .get_acl = ll_get_acl, |
1190 | }; | |
1191 | ||
2d95f10e | 1192 | const struct inode_operations ll_special_inode_operations = { |
d7e09d03 PT |
1193 | .setattr = ll_setattr, |
1194 | .getattr = ll_getattr, | |
1195 | .permission = ll_inode_permission, | |
d7e09d03 | 1196 | .listxattr = ll_listxattr, |
d7e09d03 PT |
1197 | .get_acl = ll_get_acl, |
1198 | }; |