]>
Commit | Line | Data |
---|---|---|
65294c1f JL |
1 | /* |
2 | * Open file cache. | |
3 | * | |
4 | * (c) 2015 - Jeff Layton <jeff.layton@primarydata.com> | |
5 | */ | |
6 | ||
7 | #include <linux/hash.h> | |
8 | #include <linux/slab.h> | |
9 | #include <linux/hash.h> | |
10 | #include <linux/file.h> | |
11 | #include <linux/sched.h> | |
12 | #include <linux/list_lru.h> | |
13 | #include <linux/fsnotify_backend.h> | |
14 | #include <linux/fsnotify.h> | |
15 | #include <linux/seq_file.h> | |
16 | ||
17 | #include "vfs.h" | |
18 | #include "nfsd.h" | |
19 | #include "nfsfh.h" | |
20 | #include "filecache.h" | |
21 | #include "trace.h" | |
22 | ||
23 | #define NFSDDBG_FACILITY NFSDDBG_FH | |
24 | ||
25 | /* FIXME: dynamically size this for the machine somehow? */ | |
26 | #define NFSD_FILE_HASH_BITS 12 | |
27 | #define NFSD_FILE_HASH_SIZE (1 << NFSD_FILE_HASH_BITS) | |
28 | #define NFSD_LAUNDRETTE_DELAY (2 * HZ) | |
29 | ||
30 | #define NFSD_FILE_LRU_RESCAN (0) | |
31 | #define NFSD_FILE_SHUTDOWN (1) | |
32 | #define NFSD_FILE_LRU_THRESHOLD (4096UL) | |
33 | #define NFSD_FILE_LRU_LIMIT (NFSD_FILE_LRU_THRESHOLD << 2) | |
34 | ||
35 | /* We only care about NFSD_MAY_READ/WRITE for this cache */ | |
36 | #define NFSD_FILE_MAY_MASK (NFSD_MAY_READ|NFSD_MAY_WRITE) | |
37 | ||
38 | struct nfsd_fcache_bucket { | |
39 | struct hlist_head nfb_head; | |
40 | spinlock_t nfb_lock; | |
41 | unsigned int nfb_count; | |
42 | unsigned int nfb_maxcount; | |
43 | }; | |
44 | ||
45 | static DEFINE_PER_CPU(unsigned long, nfsd_file_cache_hits); | |
46 | ||
47 | static struct kmem_cache *nfsd_file_slab; | |
48 | static struct kmem_cache *nfsd_file_mark_slab; | |
49 | static struct nfsd_fcache_bucket *nfsd_file_hashtbl; | |
50 | static struct list_lru nfsd_file_lru; | |
51 | static long nfsd_file_lru_flags; | |
52 | static struct fsnotify_group *nfsd_file_fsnotify_group; | |
53 | static atomic_long_t nfsd_filecache_count; | |
54 | static struct delayed_work nfsd_filecache_laundrette; | |
55 | ||
56 | enum nfsd_file_laundrette_ctl { | |
57 | NFSD_FILE_LAUNDRETTE_NOFLUSH = 0, | |
58 | NFSD_FILE_LAUNDRETTE_MAY_FLUSH | |
59 | }; | |
60 | ||
61 | static void | |
62 | nfsd_file_schedule_laundrette(enum nfsd_file_laundrette_ctl ctl) | |
63 | { | |
64 | long count = atomic_long_read(&nfsd_filecache_count); | |
65 | ||
66 | if (count == 0 || test_bit(NFSD_FILE_SHUTDOWN, &nfsd_file_lru_flags)) | |
67 | return; | |
68 | ||
69 | /* Be more aggressive about scanning if over the threshold */ | |
70 | if (count > NFSD_FILE_LRU_THRESHOLD) | |
71 | mod_delayed_work(system_wq, &nfsd_filecache_laundrette, 0); | |
72 | else | |
73 | schedule_delayed_work(&nfsd_filecache_laundrette, NFSD_LAUNDRETTE_DELAY); | |
74 | ||
75 | if (ctl == NFSD_FILE_LAUNDRETTE_NOFLUSH) | |
76 | return; | |
77 | ||
78 | /* ...and don't delay flushing if we're out of control */ | |
79 | if (count >= NFSD_FILE_LRU_LIMIT) | |
80 | flush_delayed_work(&nfsd_filecache_laundrette); | |
81 | } | |
82 | ||
83 | static void | |
84 | nfsd_file_slab_free(struct rcu_head *rcu) | |
85 | { | |
86 | struct nfsd_file *nf = container_of(rcu, struct nfsd_file, nf_rcu); | |
87 | ||
88 | put_cred(nf->nf_cred); | |
89 | kmem_cache_free(nfsd_file_slab, nf); | |
90 | } | |
91 | ||
92 | static void | |
93 | nfsd_file_mark_free(struct fsnotify_mark *mark) | |
94 | { | |
95 | struct nfsd_file_mark *nfm = container_of(mark, struct nfsd_file_mark, | |
96 | nfm_mark); | |
97 | ||
98 | kmem_cache_free(nfsd_file_mark_slab, nfm); | |
99 | } | |
100 | ||
101 | static struct nfsd_file_mark * | |
102 | nfsd_file_mark_get(struct nfsd_file_mark *nfm) | |
103 | { | |
104 | if (!atomic_inc_not_zero(&nfm->nfm_ref)) | |
105 | return NULL; | |
106 | return nfm; | |
107 | } | |
108 | ||
109 | static void | |
110 | nfsd_file_mark_put(struct nfsd_file_mark *nfm) | |
111 | { | |
112 | if (atomic_dec_and_test(&nfm->nfm_ref)) { | |
113 | ||
114 | fsnotify_destroy_mark(&nfm->nfm_mark, nfsd_file_fsnotify_group); | |
115 | fsnotify_put_mark(&nfm->nfm_mark); | |
116 | } | |
117 | } | |
118 | ||
119 | static struct nfsd_file_mark * | |
120 | nfsd_file_mark_find_or_create(struct nfsd_file *nf) | |
121 | { | |
122 | int err; | |
123 | struct fsnotify_mark *mark; | |
124 | struct nfsd_file_mark *nfm = NULL, *new; | |
125 | struct inode *inode = nf->nf_inode; | |
126 | ||
127 | do { | |
128 | mutex_lock(&nfsd_file_fsnotify_group->mark_mutex); | |
129 | mark = fsnotify_find_mark(&inode->i_fsnotify_marks, | |
130 | nfsd_file_fsnotify_group); | |
131 | if (mark) { | |
132 | nfm = nfsd_file_mark_get(container_of(mark, | |
133 | struct nfsd_file_mark, | |
134 | nfm_mark)); | |
135 | mutex_unlock(&nfsd_file_fsnotify_group->mark_mutex); | |
136 | fsnotify_put_mark(mark); | |
137 | if (likely(nfm)) | |
138 | break; | |
139 | } else | |
140 | mutex_unlock(&nfsd_file_fsnotify_group->mark_mutex); | |
141 | ||
142 | /* allocate a new nfm */ | |
143 | new = kmem_cache_alloc(nfsd_file_mark_slab, GFP_KERNEL); | |
144 | if (!new) | |
145 | return NULL; | |
146 | fsnotify_init_mark(&new->nfm_mark, nfsd_file_fsnotify_group); | |
147 | new->nfm_mark.mask = FS_ATTRIB|FS_DELETE_SELF; | |
148 | atomic_set(&new->nfm_ref, 1); | |
149 | ||
150 | err = fsnotify_add_inode_mark(&new->nfm_mark, inode, 0); | |
151 | ||
152 | /* | |
153 | * If the add was successful, then return the object. | |
154 | * Otherwise, we need to put the reference we hold on the | |
155 | * nfm_mark. The fsnotify code will take a reference and put | |
156 | * it on failure, so we can't just free it directly. It's also | |
157 | * not safe to call fsnotify_destroy_mark on it as the | |
158 | * mark->group will be NULL. Thus, we can't let the nfm_ref | |
159 | * counter drive the destruction at this point. | |
160 | */ | |
161 | if (likely(!err)) | |
162 | nfm = new; | |
163 | else | |
164 | fsnotify_put_mark(&new->nfm_mark); | |
165 | } while (unlikely(err == -EEXIST)); | |
166 | ||
167 | return nfm; | |
168 | } | |
169 | ||
170 | static struct nfsd_file * | |
171 | nfsd_file_alloc(struct inode *inode, unsigned int may, unsigned int hashval) | |
172 | { | |
173 | struct nfsd_file *nf; | |
174 | ||
175 | nf = kmem_cache_alloc(nfsd_file_slab, GFP_KERNEL); | |
176 | if (nf) { | |
177 | INIT_HLIST_NODE(&nf->nf_node); | |
178 | INIT_LIST_HEAD(&nf->nf_lru); | |
179 | nf->nf_file = NULL; | |
180 | nf->nf_cred = get_current_cred(); | |
181 | nf->nf_flags = 0; | |
182 | nf->nf_inode = inode; | |
183 | nf->nf_hashval = hashval; | |
184 | atomic_set(&nf->nf_ref, 1); | |
185 | nf->nf_may = may & NFSD_FILE_MAY_MASK; | |
186 | if (may & NFSD_MAY_NOT_BREAK_LEASE) { | |
187 | if (may & NFSD_MAY_WRITE) | |
188 | __set_bit(NFSD_FILE_BREAK_WRITE, &nf->nf_flags); | |
189 | if (may & NFSD_MAY_READ) | |
190 | __set_bit(NFSD_FILE_BREAK_READ, &nf->nf_flags); | |
191 | } | |
192 | nf->nf_mark = NULL; | |
193 | trace_nfsd_file_alloc(nf); | |
194 | } | |
195 | return nf; | |
196 | } | |
197 | ||
198 | static bool | |
199 | nfsd_file_free(struct nfsd_file *nf) | |
200 | { | |
201 | bool flush = false; | |
202 | ||
203 | trace_nfsd_file_put_final(nf); | |
204 | if (nf->nf_mark) | |
205 | nfsd_file_mark_put(nf->nf_mark); | |
206 | if (nf->nf_file) { | |
207 | get_file(nf->nf_file); | |
208 | filp_close(nf->nf_file, NULL); | |
209 | fput(nf->nf_file); | |
210 | flush = true; | |
211 | } | |
212 | call_rcu(&nf->nf_rcu, nfsd_file_slab_free); | |
213 | return flush; | |
214 | } | |
215 | ||
216 | static void | |
217 | nfsd_file_do_unhash(struct nfsd_file *nf) | |
218 | { | |
219 | lockdep_assert_held(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock); | |
220 | ||
221 | trace_nfsd_file_unhash(nf); | |
222 | ||
223 | --nfsd_file_hashtbl[nf->nf_hashval].nfb_count; | |
224 | hlist_del_rcu(&nf->nf_node); | |
225 | if (!list_empty(&nf->nf_lru)) | |
226 | list_lru_del(&nfsd_file_lru, &nf->nf_lru); | |
227 | atomic_long_dec(&nfsd_filecache_count); | |
228 | } | |
229 | ||
230 | static bool | |
231 | nfsd_file_unhash(struct nfsd_file *nf) | |
232 | { | |
233 | if (test_and_clear_bit(NFSD_FILE_HASHED, &nf->nf_flags)) { | |
234 | nfsd_file_do_unhash(nf); | |
235 | return true; | |
236 | } | |
237 | return false; | |
238 | } | |
239 | ||
240 | /* | |
241 | * Return true if the file was unhashed. | |
242 | */ | |
243 | static bool | |
244 | nfsd_file_unhash_and_release_locked(struct nfsd_file *nf, struct list_head *dispose) | |
245 | { | |
246 | lockdep_assert_held(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock); | |
247 | ||
248 | trace_nfsd_file_unhash_and_release_locked(nf); | |
249 | if (!nfsd_file_unhash(nf)) | |
250 | return false; | |
251 | /* keep final reference for nfsd_file_lru_dispose */ | |
252 | if (atomic_add_unless(&nf->nf_ref, -1, 1)) | |
253 | return true; | |
254 | ||
255 | list_add(&nf->nf_lru, dispose); | |
256 | return true; | |
257 | } | |
258 | ||
259 | static int | |
260 | nfsd_file_put_noref(struct nfsd_file *nf) | |
261 | { | |
262 | int count; | |
263 | trace_nfsd_file_put(nf); | |
264 | ||
265 | count = atomic_dec_return(&nf->nf_ref); | |
266 | if (!count) { | |
267 | WARN_ON(test_bit(NFSD_FILE_HASHED, &nf->nf_flags)); | |
268 | nfsd_file_free(nf); | |
269 | } | |
270 | return count; | |
271 | } | |
272 | ||
273 | void | |
274 | nfsd_file_put(struct nfsd_file *nf) | |
275 | { | |
276 | bool is_hashed = test_bit(NFSD_FILE_HASHED, &nf->nf_flags) != 0; | |
277 | ||
278 | set_bit(NFSD_FILE_REFERENCED, &nf->nf_flags); | |
279 | if (nfsd_file_put_noref(nf) == 1 && is_hashed) | |
280 | nfsd_file_schedule_laundrette(NFSD_FILE_LAUNDRETTE_MAY_FLUSH); | |
281 | } | |
282 | ||
283 | struct nfsd_file * | |
284 | nfsd_file_get(struct nfsd_file *nf) | |
285 | { | |
286 | if (likely(atomic_inc_not_zero(&nf->nf_ref))) | |
287 | return nf; | |
288 | return NULL; | |
289 | } | |
290 | ||
291 | static void | |
292 | nfsd_file_dispose_list(struct list_head *dispose) | |
293 | { | |
294 | struct nfsd_file *nf; | |
295 | ||
296 | while(!list_empty(dispose)) { | |
297 | nf = list_first_entry(dispose, struct nfsd_file, nf_lru); | |
298 | list_del(&nf->nf_lru); | |
299 | nfsd_file_put_noref(nf); | |
300 | } | |
301 | } | |
302 | ||
303 | static void | |
304 | nfsd_file_dispose_list_sync(struct list_head *dispose) | |
305 | { | |
306 | bool flush = false; | |
307 | struct nfsd_file *nf; | |
308 | ||
309 | while(!list_empty(dispose)) { | |
310 | nf = list_first_entry(dispose, struct nfsd_file, nf_lru); | |
311 | list_del(&nf->nf_lru); | |
312 | if (!atomic_dec_and_test(&nf->nf_ref)) | |
313 | continue; | |
314 | if (nfsd_file_free(nf)) | |
315 | flush = true; | |
316 | } | |
317 | if (flush) | |
318 | flush_delayed_fput(); | |
319 | } | |
320 | ||
321 | /* | |
322 | * Note this can deadlock with nfsd_file_cache_purge. | |
323 | */ | |
324 | static enum lru_status | |
325 | nfsd_file_lru_cb(struct list_head *item, struct list_lru_one *lru, | |
326 | spinlock_t *lock, void *arg) | |
327 | __releases(lock) | |
328 | __acquires(lock) | |
329 | { | |
330 | struct list_head *head = arg; | |
331 | struct nfsd_file *nf = list_entry(item, struct nfsd_file, nf_lru); | |
332 | ||
333 | /* | |
334 | * Do a lockless refcount check. The hashtable holds one reference, so | |
335 | * we look to see if anything else has a reference, or if any have | |
336 | * been put since the shrinker last ran. Those don't get unhashed and | |
337 | * released. | |
338 | * | |
339 | * Note that in the put path, we set the flag and then decrement the | |
340 | * counter. Here we check the counter and then test and clear the flag. | |
341 | * That order is deliberate to ensure that we can do this locklessly. | |
342 | */ | |
343 | if (atomic_read(&nf->nf_ref) > 1) | |
344 | goto out_skip; | |
345 | if (test_and_clear_bit(NFSD_FILE_REFERENCED, &nf->nf_flags)) | |
346 | goto out_rescan; | |
347 | ||
348 | if (!test_and_clear_bit(NFSD_FILE_HASHED, &nf->nf_flags)) | |
349 | goto out_skip; | |
350 | ||
351 | list_lru_isolate_move(lru, &nf->nf_lru, head); | |
352 | return LRU_REMOVED; | |
353 | out_rescan: | |
354 | set_bit(NFSD_FILE_LRU_RESCAN, &nfsd_file_lru_flags); | |
355 | out_skip: | |
356 | return LRU_SKIP; | |
357 | } | |
358 | ||
359 | static void | |
360 | nfsd_file_lru_dispose(struct list_head *head) | |
361 | { | |
362 | while(!list_empty(head)) { | |
363 | struct nfsd_file *nf = list_first_entry(head, | |
364 | struct nfsd_file, nf_lru); | |
365 | list_del_init(&nf->nf_lru); | |
366 | spin_lock(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock); | |
367 | nfsd_file_do_unhash(nf); | |
368 | spin_unlock(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock); | |
369 | nfsd_file_put_noref(nf); | |
370 | } | |
371 | } | |
372 | ||
373 | static unsigned long | |
374 | nfsd_file_lru_count(struct shrinker *s, struct shrink_control *sc) | |
375 | { | |
376 | return list_lru_count(&nfsd_file_lru); | |
377 | } | |
378 | ||
379 | static unsigned long | |
380 | nfsd_file_lru_scan(struct shrinker *s, struct shrink_control *sc) | |
381 | { | |
382 | LIST_HEAD(head); | |
383 | unsigned long ret; | |
384 | ||
385 | ret = list_lru_shrink_walk(&nfsd_file_lru, sc, nfsd_file_lru_cb, &head); | |
386 | nfsd_file_lru_dispose(&head); | |
387 | return ret; | |
388 | } | |
389 | ||
390 | static struct shrinker nfsd_file_shrinker = { | |
391 | .scan_objects = nfsd_file_lru_scan, | |
392 | .count_objects = nfsd_file_lru_count, | |
393 | .seeks = 1, | |
394 | }; | |
395 | ||
396 | static void | |
397 | __nfsd_file_close_inode(struct inode *inode, unsigned int hashval, | |
398 | struct list_head *dispose) | |
399 | { | |
400 | struct nfsd_file *nf; | |
401 | struct hlist_node *tmp; | |
402 | ||
403 | spin_lock(&nfsd_file_hashtbl[hashval].nfb_lock); | |
404 | hlist_for_each_entry_safe(nf, tmp, &nfsd_file_hashtbl[hashval].nfb_head, nf_node) { | |
405 | if (inode == nf->nf_inode) | |
406 | nfsd_file_unhash_and_release_locked(nf, dispose); | |
407 | } | |
408 | spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock); | |
409 | } | |
410 | ||
411 | /** | |
412 | * nfsd_file_close_inode_sync - attempt to forcibly close a nfsd_file | |
413 | * @inode: inode of the file to attempt to remove | |
414 | * | |
415 | * Walk the whole hash bucket, looking for any files that correspond to "inode". | |
416 | * If any do, then unhash them and put the hashtable reference to them and | |
417 | * destroy any that had their last reference put. Also ensure that any of the | |
418 | * fputs also have their final __fput done as well. | |
419 | */ | |
420 | void | |
421 | nfsd_file_close_inode_sync(struct inode *inode) | |
422 | { | |
423 | unsigned int hashval = (unsigned int)hash_long(inode->i_ino, | |
424 | NFSD_FILE_HASH_BITS); | |
425 | LIST_HEAD(dispose); | |
426 | ||
427 | __nfsd_file_close_inode(inode, hashval, &dispose); | |
428 | trace_nfsd_file_close_inode_sync(inode, hashval, !list_empty(&dispose)); | |
429 | nfsd_file_dispose_list_sync(&dispose); | |
430 | } | |
431 | ||
432 | /** | |
433 | * nfsd_file_close_inode_sync - attempt to forcibly close a nfsd_file | |
434 | * @inode: inode of the file to attempt to remove | |
435 | * | |
436 | * Walk the whole hash bucket, looking for any files that correspond to "inode". | |
437 | * If any do, then unhash them and put the hashtable reference to them and | |
438 | * destroy any that had their last reference put. | |
439 | */ | |
440 | static void | |
441 | nfsd_file_close_inode(struct inode *inode) | |
442 | { | |
443 | unsigned int hashval = (unsigned int)hash_long(inode->i_ino, | |
444 | NFSD_FILE_HASH_BITS); | |
445 | LIST_HEAD(dispose); | |
446 | ||
447 | __nfsd_file_close_inode(inode, hashval, &dispose); | |
448 | trace_nfsd_file_close_inode(inode, hashval, !list_empty(&dispose)); | |
449 | nfsd_file_dispose_list(&dispose); | |
450 | } | |
451 | ||
452 | /** | |
453 | * nfsd_file_delayed_close - close unused nfsd_files | |
454 | * @work: dummy | |
455 | * | |
456 | * Walk the LRU list and close any entries that have not been used since | |
457 | * the last scan. | |
458 | * | |
459 | * Note this can deadlock with nfsd_file_cache_purge. | |
460 | */ | |
461 | static void | |
462 | nfsd_file_delayed_close(struct work_struct *work) | |
463 | { | |
464 | LIST_HEAD(head); | |
465 | ||
466 | list_lru_walk(&nfsd_file_lru, nfsd_file_lru_cb, &head, LONG_MAX); | |
467 | ||
468 | if (test_and_clear_bit(NFSD_FILE_LRU_RESCAN, &nfsd_file_lru_flags)) | |
469 | nfsd_file_schedule_laundrette(NFSD_FILE_LAUNDRETTE_NOFLUSH); | |
470 | ||
471 | if (!list_empty(&head)) { | |
472 | nfsd_file_lru_dispose(&head); | |
473 | flush_delayed_fput(); | |
474 | } | |
475 | } | |
476 | ||
477 | static int | |
478 | nfsd_file_lease_notifier_call(struct notifier_block *nb, unsigned long arg, | |
479 | void *data) | |
480 | { | |
481 | struct file_lock *fl = data; | |
482 | ||
483 | /* Only close files for F_SETLEASE leases */ | |
484 | if (fl->fl_flags & FL_LEASE) | |
485 | nfsd_file_close_inode_sync(file_inode(fl->fl_file)); | |
486 | return 0; | |
487 | } | |
488 | ||
489 | static struct notifier_block nfsd_file_lease_notifier = { | |
490 | .notifier_call = nfsd_file_lease_notifier_call, | |
491 | }; | |
492 | ||
493 | static int | |
494 | nfsd_file_fsnotify_handle_event(struct fsnotify_group *group, | |
495 | struct inode *inode, | |
496 | u32 mask, const void *data, int data_type, | |
497 | const struct qstr *file_name, u32 cookie, | |
498 | struct fsnotify_iter_info *iter_info) | |
499 | { | |
500 | trace_nfsd_file_fsnotify_handle_event(inode, mask); | |
501 | ||
502 | /* Should be no marks on non-regular files */ | |
503 | if (!S_ISREG(inode->i_mode)) { | |
504 | WARN_ON_ONCE(1); | |
505 | return 0; | |
506 | } | |
507 | ||
508 | /* don't close files if this was not the last link */ | |
509 | if (mask & FS_ATTRIB) { | |
510 | if (inode->i_nlink) | |
511 | return 0; | |
512 | } | |
513 | ||
514 | nfsd_file_close_inode(inode); | |
515 | return 0; | |
516 | } | |
517 | ||
518 | ||
519 | static const struct fsnotify_ops nfsd_file_fsnotify_ops = { | |
520 | .handle_event = nfsd_file_fsnotify_handle_event, | |
521 | .free_mark = nfsd_file_mark_free, | |
522 | }; | |
523 | ||
524 | int | |
525 | nfsd_file_cache_init(void) | |
526 | { | |
527 | int ret = -ENOMEM; | |
528 | unsigned int i; | |
529 | ||
530 | clear_bit(NFSD_FILE_SHUTDOWN, &nfsd_file_lru_flags); | |
531 | ||
532 | if (nfsd_file_hashtbl) | |
533 | return 0; | |
534 | ||
535 | nfsd_file_hashtbl = kcalloc(NFSD_FILE_HASH_SIZE, | |
536 | sizeof(*nfsd_file_hashtbl), GFP_KERNEL); | |
537 | if (!nfsd_file_hashtbl) { | |
538 | pr_err("nfsd: unable to allocate nfsd_file_hashtbl\n"); | |
539 | goto out_err; | |
540 | } | |
541 | ||
542 | nfsd_file_slab = kmem_cache_create("nfsd_file", | |
543 | sizeof(struct nfsd_file), 0, 0, NULL); | |
544 | if (!nfsd_file_slab) { | |
545 | pr_err("nfsd: unable to create nfsd_file_slab\n"); | |
546 | goto out_err; | |
547 | } | |
548 | ||
549 | nfsd_file_mark_slab = kmem_cache_create("nfsd_file_mark", | |
550 | sizeof(struct nfsd_file_mark), 0, 0, NULL); | |
551 | if (!nfsd_file_mark_slab) { | |
552 | pr_err("nfsd: unable to create nfsd_file_mark_slab\n"); | |
553 | goto out_err; | |
554 | } | |
555 | ||
556 | ||
557 | ret = list_lru_init(&nfsd_file_lru); | |
558 | if (ret) { | |
559 | pr_err("nfsd: failed to init nfsd_file_lru: %d\n", ret); | |
560 | goto out_err; | |
561 | } | |
562 | ||
563 | ret = register_shrinker(&nfsd_file_shrinker); | |
564 | if (ret) { | |
565 | pr_err("nfsd: failed to register nfsd_file_shrinker: %d\n", ret); | |
566 | goto out_lru; | |
567 | } | |
568 | ||
569 | ret = lease_register_notifier(&nfsd_file_lease_notifier); | |
570 | if (ret) { | |
571 | pr_err("nfsd: unable to register lease notifier: %d\n", ret); | |
572 | goto out_shrinker; | |
573 | } | |
574 | ||
575 | nfsd_file_fsnotify_group = fsnotify_alloc_group(&nfsd_file_fsnotify_ops); | |
576 | if (IS_ERR(nfsd_file_fsnotify_group)) { | |
577 | pr_err("nfsd: unable to create fsnotify group: %ld\n", | |
578 | PTR_ERR(nfsd_file_fsnotify_group)); | |
579 | nfsd_file_fsnotify_group = NULL; | |
580 | goto out_notifier; | |
581 | } | |
582 | ||
583 | for (i = 0; i < NFSD_FILE_HASH_SIZE; i++) { | |
584 | INIT_HLIST_HEAD(&nfsd_file_hashtbl[i].nfb_head); | |
585 | spin_lock_init(&nfsd_file_hashtbl[i].nfb_lock); | |
586 | } | |
587 | ||
588 | INIT_DELAYED_WORK(&nfsd_filecache_laundrette, nfsd_file_delayed_close); | |
589 | out: | |
590 | return ret; | |
591 | out_notifier: | |
592 | lease_unregister_notifier(&nfsd_file_lease_notifier); | |
593 | out_shrinker: | |
594 | unregister_shrinker(&nfsd_file_shrinker); | |
595 | out_lru: | |
596 | list_lru_destroy(&nfsd_file_lru); | |
597 | out_err: | |
598 | kmem_cache_destroy(nfsd_file_slab); | |
599 | nfsd_file_slab = NULL; | |
600 | kmem_cache_destroy(nfsd_file_mark_slab); | |
601 | nfsd_file_mark_slab = NULL; | |
602 | kfree(nfsd_file_hashtbl); | |
603 | nfsd_file_hashtbl = NULL; | |
604 | goto out; | |
605 | } | |
606 | ||
607 | /* | |
608 | * Note this can deadlock with nfsd_file_lru_cb. | |
609 | */ | |
610 | void | |
611 | nfsd_file_cache_purge(void) | |
612 | { | |
613 | unsigned int i; | |
614 | struct nfsd_file *nf; | |
615 | LIST_HEAD(dispose); | |
616 | bool del; | |
617 | ||
618 | if (!nfsd_file_hashtbl) | |
619 | return; | |
620 | ||
621 | for (i = 0; i < NFSD_FILE_HASH_SIZE; i++) { | |
622 | spin_lock(&nfsd_file_hashtbl[i].nfb_lock); | |
623 | while(!hlist_empty(&nfsd_file_hashtbl[i].nfb_head)) { | |
624 | nf = hlist_entry(nfsd_file_hashtbl[i].nfb_head.first, | |
625 | struct nfsd_file, nf_node); | |
626 | del = nfsd_file_unhash_and_release_locked(nf, &dispose); | |
627 | ||
628 | /* | |
629 | * Deadlock detected! Something marked this entry as | |
630 | * unhased, but hasn't removed it from the hash list. | |
631 | */ | |
632 | WARN_ON_ONCE(!del); | |
633 | } | |
634 | spin_unlock(&nfsd_file_hashtbl[i].nfb_lock); | |
635 | nfsd_file_dispose_list(&dispose); | |
636 | } | |
637 | } | |
638 | ||
639 | void | |
640 | nfsd_file_cache_shutdown(void) | |
641 | { | |
642 | LIST_HEAD(dispose); | |
643 | ||
644 | set_bit(NFSD_FILE_SHUTDOWN, &nfsd_file_lru_flags); | |
645 | ||
646 | lease_unregister_notifier(&nfsd_file_lease_notifier); | |
647 | unregister_shrinker(&nfsd_file_shrinker); | |
648 | /* | |
649 | * make sure all callers of nfsd_file_lru_cb are done before | |
650 | * calling nfsd_file_cache_purge | |
651 | */ | |
652 | cancel_delayed_work_sync(&nfsd_filecache_laundrette); | |
653 | nfsd_file_cache_purge(); | |
654 | list_lru_destroy(&nfsd_file_lru); | |
655 | rcu_barrier(); | |
656 | fsnotify_put_group(nfsd_file_fsnotify_group); | |
657 | nfsd_file_fsnotify_group = NULL; | |
658 | kmem_cache_destroy(nfsd_file_slab); | |
659 | nfsd_file_slab = NULL; | |
660 | fsnotify_wait_marks_destroyed(); | |
661 | kmem_cache_destroy(nfsd_file_mark_slab); | |
662 | nfsd_file_mark_slab = NULL; | |
663 | kfree(nfsd_file_hashtbl); | |
664 | nfsd_file_hashtbl = NULL; | |
665 | } | |
666 | ||
667 | static bool | |
668 | nfsd_match_cred(const struct cred *c1, const struct cred *c2) | |
669 | { | |
670 | int i; | |
671 | ||
672 | if (!uid_eq(c1->fsuid, c2->fsuid)) | |
673 | return false; | |
674 | if (!gid_eq(c1->fsgid, c2->fsgid)) | |
675 | return false; | |
676 | if (c1->group_info == NULL || c2->group_info == NULL) | |
677 | return c1->group_info == c2->group_info; | |
678 | if (c1->group_info->ngroups != c2->group_info->ngroups) | |
679 | return false; | |
680 | for (i = 0; i < c1->group_info->ngroups; i++) { | |
681 | if (!gid_eq(c1->group_info->gid[i], c2->group_info->gid[i])) | |
682 | return false; | |
683 | } | |
684 | return true; | |
685 | } | |
686 | ||
687 | static struct nfsd_file * | |
688 | nfsd_file_find_locked(struct inode *inode, unsigned int may_flags, | |
689 | unsigned int hashval) | |
690 | { | |
691 | struct nfsd_file *nf; | |
692 | unsigned char need = may_flags & NFSD_FILE_MAY_MASK; | |
693 | ||
694 | hlist_for_each_entry_rcu(nf, &nfsd_file_hashtbl[hashval].nfb_head, | |
695 | nf_node) { | |
696 | if ((need & nf->nf_may) != need) | |
697 | continue; | |
698 | if (nf->nf_inode != inode) | |
699 | continue; | |
700 | if (!nfsd_match_cred(nf->nf_cred, current_cred())) | |
701 | continue; | |
702 | if (nfsd_file_get(nf) != NULL) | |
703 | return nf; | |
704 | } | |
705 | return NULL; | |
706 | } | |
707 | ||
708 | /** | |
709 | * nfsd_file_is_cached - are there any cached open files for this fh? | |
710 | * @inode: inode of the file to check | |
711 | * | |
712 | * Scan the hashtable for open files that match this fh. Returns true if there | |
713 | * are any, and false if not. | |
714 | */ | |
715 | bool | |
716 | nfsd_file_is_cached(struct inode *inode) | |
717 | { | |
718 | bool ret = false; | |
719 | struct nfsd_file *nf; | |
720 | unsigned int hashval; | |
721 | ||
722 | hashval = (unsigned int)hash_long(inode->i_ino, NFSD_FILE_HASH_BITS); | |
723 | ||
724 | rcu_read_lock(); | |
725 | hlist_for_each_entry_rcu(nf, &nfsd_file_hashtbl[hashval].nfb_head, | |
726 | nf_node) { | |
727 | if (inode == nf->nf_inode) { | |
728 | ret = true; | |
729 | break; | |
730 | } | |
731 | } | |
732 | rcu_read_unlock(); | |
733 | trace_nfsd_file_is_cached(inode, hashval, (int)ret); | |
734 | return ret; | |
735 | } | |
736 | ||
737 | __be32 | |
738 | nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, | |
739 | unsigned int may_flags, struct nfsd_file **pnf) | |
740 | { | |
741 | __be32 status; | |
742 | struct nfsd_file *nf, *new; | |
743 | struct inode *inode; | |
744 | unsigned int hashval; | |
745 | ||
746 | /* FIXME: skip this if fh_dentry is already set? */ | |
747 | status = fh_verify(rqstp, fhp, S_IFREG, | |
748 | may_flags|NFSD_MAY_OWNER_OVERRIDE); | |
749 | if (status != nfs_ok) | |
750 | return status; | |
751 | ||
752 | inode = d_inode(fhp->fh_dentry); | |
753 | hashval = (unsigned int)hash_long(inode->i_ino, NFSD_FILE_HASH_BITS); | |
754 | retry: | |
755 | rcu_read_lock(); | |
756 | nf = nfsd_file_find_locked(inode, may_flags, hashval); | |
757 | rcu_read_unlock(); | |
758 | if (nf) | |
759 | goto wait_for_construction; | |
760 | ||
761 | new = nfsd_file_alloc(inode, may_flags, hashval); | |
762 | if (!new) { | |
763 | trace_nfsd_file_acquire(rqstp, hashval, inode, may_flags, | |
764 | NULL, nfserr_jukebox); | |
765 | return nfserr_jukebox; | |
766 | } | |
767 | ||
768 | spin_lock(&nfsd_file_hashtbl[hashval].nfb_lock); | |
769 | nf = nfsd_file_find_locked(inode, may_flags, hashval); | |
770 | if (nf == NULL) | |
771 | goto open_file; | |
772 | spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock); | |
773 | nfsd_file_slab_free(&new->nf_rcu); | |
774 | ||
775 | wait_for_construction: | |
776 | wait_on_bit(&nf->nf_flags, NFSD_FILE_PENDING, TASK_UNINTERRUPTIBLE); | |
777 | ||
778 | /* Did construction of this file fail? */ | |
779 | if (!test_bit(NFSD_FILE_HASHED, &nf->nf_flags)) { | |
780 | nfsd_file_put_noref(nf); | |
781 | goto retry; | |
782 | } | |
783 | ||
784 | this_cpu_inc(nfsd_file_cache_hits); | |
785 | ||
786 | if (!(may_flags & NFSD_MAY_NOT_BREAK_LEASE)) { | |
787 | bool write = (may_flags & NFSD_MAY_WRITE); | |
788 | ||
789 | if (test_bit(NFSD_FILE_BREAK_READ, &nf->nf_flags) || | |
790 | (test_bit(NFSD_FILE_BREAK_WRITE, &nf->nf_flags) && write)) { | |
791 | status = nfserrno(nfsd_open_break_lease( | |
792 | file_inode(nf->nf_file), may_flags)); | |
793 | if (status == nfs_ok) { | |
794 | clear_bit(NFSD_FILE_BREAK_READ, &nf->nf_flags); | |
795 | if (write) | |
796 | clear_bit(NFSD_FILE_BREAK_WRITE, | |
797 | &nf->nf_flags); | |
798 | } | |
799 | } | |
800 | } | |
801 | out: | |
802 | if (status == nfs_ok) { | |
803 | *pnf = nf; | |
804 | } else { | |
805 | nfsd_file_put(nf); | |
806 | nf = NULL; | |
807 | } | |
808 | ||
809 | trace_nfsd_file_acquire(rqstp, hashval, inode, may_flags, nf, status); | |
810 | return status; | |
811 | open_file: | |
812 | nf = new; | |
813 | /* Take reference for the hashtable */ | |
814 | atomic_inc(&nf->nf_ref); | |
815 | __set_bit(NFSD_FILE_HASHED, &nf->nf_flags); | |
816 | __set_bit(NFSD_FILE_PENDING, &nf->nf_flags); | |
817 | list_lru_add(&nfsd_file_lru, &nf->nf_lru); | |
818 | hlist_add_head_rcu(&nf->nf_node, &nfsd_file_hashtbl[hashval].nfb_head); | |
819 | ++nfsd_file_hashtbl[hashval].nfb_count; | |
820 | nfsd_file_hashtbl[hashval].nfb_maxcount = max(nfsd_file_hashtbl[hashval].nfb_maxcount, | |
821 | nfsd_file_hashtbl[hashval].nfb_count); | |
822 | spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock); | |
823 | atomic_long_inc(&nfsd_filecache_count); | |
824 | ||
825 | nf->nf_mark = nfsd_file_mark_find_or_create(nf); | |
826 | if (nf->nf_mark) | |
827 | status = nfsd_open_verified(rqstp, fhp, S_IFREG, | |
828 | may_flags, &nf->nf_file); | |
829 | else | |
830 | status = nfserr_jukebox; | |
831 | /* | |
832 | * If construction failed, or we raced with a call to unlink() | |
833 | * then unhash. | |
834 | */ | |
835 | if (status != nfs_ok || inode->i_nlink == 0) { | |
836 | bool do_free; | |
837 | spin_lock(&nfsd_file_hashtbl[hashval].nfb_lock); | |
838 | do_free = nfsd_file_unhash(nf); | |
839 | spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock); | |
840 | if (do_free) | |
841 | nfsd_file_put_noref(nf); | |
842 | } | |
843 | clear_bit_unlock(NFSD_FILE_PENDING, &nf->nf_flags); | |
844 | smp_mb__after_atomic(); | |
845 | wake_up_bit(&nf->nf_flags, NFSD_FILE_PENDING); | |
846 | goto out; | |
847 | } | |
848 | ||
849 | /* | |
850 | * Note that fields may be added, removed or reordered in the future. Programs | |
851 | * scraping this file for info should test the labels to ensure they're | |
852 | * getting the correct field. | |
853 | */ | |
854 | static int nfsd_file_cache_stats_show(struct seq_file *m, void *v) | |
855 | { | |
856 | unsigned int i, count = 0, longest = 0; | |
857 | unsigned long hits = 0; | |
858 | ||
859 | /* | |
860 | * No need for spinlocks here since we're not terribly interested in | |
861 | * accuracy. We do take the nfsd_mutex simply to ensure that we | |
862 | * don't end up racing with server shutdown | |
863 | */ | |
864 | mutex_lock(&nfsd_mutex); | |
865 | if (nfsd_file_hashtbl) { | |
866 | for (i = 0; i < NFSD_FILE_HASH_SIZE; i++) { | |
867 | count += nfsd_file_hashtbl[i].nfb_count; | |
868 | longest = max(longest, nfsd_file_hashtbl[i].nfb_count); | |
869 | } | |
870 | } | |
871 | mutex_unlock(&nfsd_mutex); | |
872 | ||
873 | for_each_possible_cpu(i) | |
874 | hits += per_cpu(nfsd_file_cache_hits, i); | |
875 | ||
876 | seq_printf(m, "total entries: %u\n", count); | |
877 | seq_printf(m, "longest chain: %u\n", longest); | |
878 | seq_printf(m, "cache hits: %lu\n", hits); | |
879 | return 0; | |
880 | } | |
881 | ||
882 | int nfsd_file_cache_stats_open(struct inode *inode, struct file *file) | |
883 | { | |
884 | return single_open(file, nfsd_file_cache_stats_show, NULL); | |
885 | } |