fs/nfsd/filecache.c

   1 /*
   2  * Open file cache.
   3  *
   4  * (c) 2015 - Jeff Layton <jeff.layton@primarydata.com>
   5  */
   6
   7 #include <linux/hash.h>
   8 #include <linux/slab.h>
   9 #include <linux/file.h>
  10 #include <linux/sched.h>
  11 #include <linux/list_lru.h>
  12 #include <linux/fsnotify_backend.h>
  13 #include <linux/fsnotify.h>
  14 #include <linux/seq_file.h>
  15
  16 #include "vfs.h"
  17 #include "nfsd.h"
  18 #include "nfsfh.h"
  19 #include "netns.h"
  20 #include "filecache.h"
  21 #include "trace.h"
  22
  23 #define NFSDDBG_FACILITY        NFSDDBG_FH
  24
  25 /* FIXME: dynamically size this for the machine somehow? */
  26 #define NFSD_FILE_HASH_BITS                   12
  27 #define NFSD_FILE_HASH_SIZE                  (1 << NFSD_FILE_HASH_BITS)
  28 #define NFSD_LAUNDRETTE_DELAY                (2 * HZ)
  29
  30 #define NFSD_FILE_LRU_RESCAN                 (0)
  31 #define NFSD_FILE_SHUTDOWN                   (1)
  32 #define NFSD_FILE_LRU_THRESHOLD              (4096UL)
  33 #define NFSD_FILE_LRU_LIMIT                  (NFSD_FILE_LRU_THRESHOLD << 2)
  34
  35 /* We only care about NFSD_MAY_READ/WRITE for this cache */
  36 #define NFSD_FILE_MAY_MASK      (NFSD_MAY_READ|NFSD_MAY_WRITE)
  37
  38 struct nfsd_fcache_bucket {
  39         struct hlist_head       nfb_head;
  40         spinlock_t              nfb_lock;
  41         unsigned int            nfb_count;
  42         unsigned int            nfb_maxcount;
  43 };
  44
  45 static DEFINE_PER_CPU(unsigned long, nfsd_file_cache_hits);
  46
  47 static struct kmem_cache                *nfsd_file_slab;
  48 static struct kmem_cache                *nfsd_file_mark_slab;
  49 static struct nfsd_fcache_bucket        *nfsd_file_hashtbl;
  50 static struct list_lru                  nfsd_file_lru;
  51 static long                             nfsd_file_lru_flags;
  52 static struct fsnotify_group            *nfsd_file_fsnotify_group;
  53 static atomic_long_t                    nfsd_filecache_count;
  54 static struct delayed_work              nfsd_filecache_laundrette;
  55
  56 enum nfsd_file_laundrette_ctl {
  57         NFSD_FILE_LAUNDRETTE_NOFLUSH = 0,
  58         NFSD_FILE_LAUNDRETTE_MAY_FLUSH
  59 };
  60
  61 static void
  62 nfsd_file_schedule_laundrette(enum nfsd_file_laundrette_ctl ctl)
  63 {
  64         long count = atomic_long_read(&nfsd_filecache_count);
  65
  66         if (count == 0 || test_bit(NFSD_FILE_SHUTDOWN, &nfsd_file_lru_flags))
  67                 return;
  68
  69         /* Be more aggressive about scanning if over the threshold */
  70         if (count > NFSD_FILE_LRU_THRESHOLD)
  71                 mod_delayed_work(system_wq, &nfsd_filecache_laundrette, 0);
  72         else
  73                 schedule_delayed_work(&nfsd_filecache_laundrette, NFSD_LAUNDRETTE_DELAY);
  74
  75         if (ctl == NFSD_FILE_LAUNDRETTE_NOFLUSH)
  76                 return;
  77
  78         /* ...and don't delay flushing if we're out of control */
  79         if (count >= NFSD_FILE_LRU_LIMIT)
  80                 flush_delayed_work(&nfsd_filecache_laundrette);
  81 }
  82
  83 static void
  84 nfsd_file_slab_free(struct rcu_head *rcu)
  85 {
  86         struct nfsd_file *nf = container_of(rcu, struct nfsd_file, nf_rcu);
  87
  88         put_cred(nf->nf_cred);
  89         kmem_cache_free(nfsd_file_slab, nf);
  90 }
  91
  92 static void
  93 nfsd_file_mark_free(struct fsnotify_mark *mark)
  94 {
  95         struct nfsd_file_mark *nfm = container_of(mark, struct nfsd_file_mark,
  96                                                   nfm_mark);
  97
  98         kmem_cache_free(nfsd_file_mark_slab, nfm);
  99 }
 100
 101 static struct nfsd_file_mark *
 102 nfsd_file_mark_get(struct nfsd_file_mark *nfm)
 103 {
 104         if (!atomic_inc_not_zero(&nfm->nfm_ref))
 105                 return NULL;
 106         return nfm;
 107 }
 108
 109 static void
 110 nfsd_file_mark_put(struct nfsd_file_mark *nfm)
 111 {
 112         if (atomic_dec_and_test(&nfm->nfm_ref)) {
 113
 114                 fsnotify_destroy_mark(&nfm->nfm_mark, nfsd_file_fsnotify_group);
 115                 fsnotify_put_mark(&nfm->nfm_mark);
 116         }
 117 }
 118
 119 static struct nfsd_file_mark *
 120 nfsd_file_mark_find_or_create(struct nfsd_file *nf)
 121 {
 122         int                     err;
 123         struct fsnotify_mark    *mark;
 124         struct nfsd_file_mark   *nfm = NULL, *new;
 125         struct inode *inode = nf->nf_inode;
 126
 127         do {
 128                 mutex_lock(&nfsd_file_fsnotify_group->mark_mutex);
 129                 mark = fsnotify_find_mark(&inode->i_fsnotify_marks,
 130                                 nfsd_file_fsnotify_group);
 131                 if (mark) {
 132                         nfm = nfsd_file_mark_get(container_of(mark,
 133                                                  struct nfsd_file_mark,
 134                                                  nfm_mark));
 135                         mutex_unlock(&nfsd_file_fsnotify_group->mark_mutex);
 136                         if (nfm) {
 137                                 fsnotify_put_mark(mark);
 138                                 break;
 139                         }
 140                         /* Avoid soft lockup race with nfsd_file_mark_put() */
 141                         fsnotify_destroy_mark(mark, nfsd_file_fsnotify_group);
 142                         fsnotify_put_mark(mark);
 143                 } else
 144                         mutex_unlock(&nfsd_file_fsnotify_group->mark_mutex);
 145
 146                 /* allocate a new nfm */
 147                 new = kmem_cache_alloc(nfsd_file_mark_slab, GFP_KERNEL);
 148                 if (!new)
 149                         return NULL;
 150                 fsnotify_init_mark(&new->nfm_mark, nfsd_file_fsnotify_group);
 151                 new->nfm_mark.mask = FS_ATTRIB|FS_DELETE_SELF;
 152                 atomic_set(&new->nfm_ref, 1);
 153
 154                 err = fsnotify_add_inode_mark(&new->nfm_mark, inode, 0);
 155
 156                 /*
 157                  * If the add was successful, then return the object.
 158                  * Otherwise, we need to put the reference we hold on the
 159                  * nfm_mark. The fsnotify code will take a reference and put
 160                  * it on failure, so we can't just free it directly. It's also
 161                  * not safe to call fsnotify_destroy_mark on it as the
 162                  * mark->group will be NULL. Thus, we can't let the nfm_ref
 163                  * counter drive the destruction at this point.
 164                  */
 165                 if (likely(!err))
 166                         nfm = new;
 167                 else
 168                         fsnotify_put_mark(&new->nfm_mark);
 169         } while (unlikely(err == -EEXIST));
 170
 171         return nfm;
 172 }
 173
 174 static struct nfsd_file *
 175 nfsd_file_alloc(struct inode *inode, unsigned int may, unsigned int hashval,
 176                 struct net *net)
 177 {
 178         struct nfsd_file *nf;
 179
 180         nf = kmem_cache_alloc(nfsd_file_slab, GFP_KERNEL);
 181         if (nf) {
 182                 INIT_HLIST_NODE(&nf->nf_node);
 183                 INIT_LIST_HEAD(&nf->nf_lru);
 184                 nf->nf_file = NULL;
 185                 nf->nf_cred = get_current_cred();
 186                 nf->nf_net = net;
 187                 nf->nf_flags = 0;
 188                 nf->nf_inode = inode;
 189                 nf->nf_hashval = hashval;
 190                 atomic_set(&nf->nf_ref, 1);
 191                 nf->nf_may = may & NFSD_FILE_MAY_MASK;
 192                 if (may & NFSD_MAY_NOT_BREAK_LEASE) {
 193                         if (may & NFSD_MAY_WRITE)
 194                                 __set_bit(NFSD_FILE_BREAK_WRITE, &nf->nf_flags);
 195                         if (may & NFSD_MAY_READ)
 196                                 __set_bit(NFSD_FILE_BREAK_READ, &nf->nf_flags);
 197                 }
 198                 nf->nf_mark = NULL;
 199                 trace_nfsd_file_alloc(nf);
 200         }
 201         return nf;
 202 }
 203
 204 static bool
 205 nfsd_file_free(struct nfsd_file *nf)
 206 {
 207         bool flush = false;
 208
 209         trace_nfsd_file_put_final(nf);
 210         if (nf->nf_mark)
 211                 nfsd_file_mark_put(nf->nf_mark);
 212         if (nf->nf_file) {
 213                 get_file(nf->nf_file);
 214                 filp_close(nf->nf_file, NULL);
 215                 fput(nf->nf_file);
 216                 flush = true;
 217         }
 218         call_rcu(&nf->nf_rcu, nfsd_file_slab_free);
 219         return flush;
 220 }
 221
 222 static bool
 223 nfsd_file_check_writeback(struct nfsd_file *nf)
 224 {
 225         struct file *file = nf->nf_file;
 226         struct address_space *mapping;
 227
 228         if (!file || !(file->f_mode & FMODE_WRITE))
 229                 return false;
 230         mapping = file->f_mapping;
 231         return mapping_tagged(mapping, PAGECACHE_TAG_DIRTY) ||
 232                 mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK);
 233 }
 234
 235 static int
 236 nfsd_file_check_write_error(struct nfsd_file *nf)
 237 {
 238         struct file *file = nf->nf_file;
 239
 240         if (!file || !(file->f_mode & FMODE_WRITE))
 241                 return 0;
 242         return filemap_check_wb_err(file->f_mapping, READ_ONCE(file->f_wb_err));
 243 }
 244
 245 static bool
 246 nfsd_file_in_use(struct nfsd_file *nf)
 247 {
 248         return nfsd_file_check_writeback(nf) ||
 249                         nfsd_file_check_write_error(nf);
 250 }
 251
 252 static void
 253 nfsd_file_do_unhash(struct nfsd_file *nf)
 254 {
 255         lockdep_assert_held(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock);
 256
 257         trace_nfsd_file_unhash(nf);
 258
 259         if (nfsd_file_check_write_error(nf))
 260                 nfsd_reset_boot_verifier(net_generic(nf->nf_net, nfsd_net_id));
 261         --nfsd_file_hashtbl[nf->nf_hashval].nfb_count;
 262         hlist_del_rcu(&nf->nf_node);
 263         atomic_long_dec(&nfsd_filecache_count);
 264 }
 265
 266 static bool
 267 nfsd_file_unhash(struct nfsd_file *nf)
 268 {
 269         if (test_and_clear_bit(NFSD_FILE_HASHED, &nf->nf_flags)) {
 270                 nfsd_file_do_unhash(nf);
 271                 if (!list_empty(&nf->nf_lru))
 272                         list_lru_del(&nfsd_file_lru, &nf->nf_lru);
 273                 return true;
 274         }
 275         return false;
 276 }
 277
 278 /*
 279  * Return true if the file was unhashed.
 280  */
 281 static bool
 282 nfsd_file_unhash_and_release_locked(struct nfsd_file *nf, struct list_head *dispose)
 283 {
 284         lockdep_assert_held(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock);
 285
 286         trace_nfsd_file_unhash_and_release_locked(nf);
 287         if (!nfsd_file_unhash(nf))
 288                 return false;
 289         /* keep final reference for nfsd_file_lru_dispose */
 290         if (atomic_add_unless(&nf->nf_ref, -1, 1))
 291                 return true;
 292
 293         list_add(&nf->nf_lru, dispose);
 294         return true;
 295 }
 296
 297 static int
 298 nfsd_file_put_noref(struct nfsd_file *nf)
 299 {
 300         int count;
 301         trace_nfsd_file_put(nf);
 302
 303         count = atomic_dec_return(&nf->nf_ref);
 304         if (!count) {
 305                 WARN_ON(test_bit(NFSD_FILE_HASHED, &nf->nf_flags));
 306                 nfsd_file_free(nf);
 307         }
 308         return count;
 309 }
 310
 311 void
 312 nfsd_file_put(struct nfsd_file *nf)
 313 {
 314         bool is_hashed = test_bit(NFSD_FILE_HASHED, &nf->nf_flags) != 0;
 315         bool unused = !nfsd_file_in_use(nf);
 316
 317         set_bit(NFSD_FILE_REFERENCED, &nf->nf_flags);
 318         if (nfsd_file_put_noref(nf) == 1 && is_hashed && unused)
 319                 nfsd_file_schedule_laundrette(NFSD_FILE_LAUNDRETTE_MAY_FLUSH);
 320 }
 321
 322 struct nfsd_file *
 323 nfsd_file_get(struct nfsd_file *nf)
 324 {
 325         if (likely(atomic_inc_not_zero(&nf->nf_ref)))
 326                 return nf;
 327         return NULL;
 328 }
 329
 330 static void
 331 nfsd_file_dispose_list(struct list_head *dispose)
 332 {
 333         struct nfsd_file *nf;
 334
 335         while(!list_empty(dispose)) {
 336                 nf = list_first_entry(dispose, struct nfsd_file, nf_lru);
 337                 list_del(&nf->nf_lru);
 338                 nfsd_file_put_noref(nf);
 339         }
 340 }
 341
 342 static void
 343 nfsd_file_dispose_list_sync(struct list_head *dispose)
 344 {
 345         bool flush = false;
 346         struct nfsd_file *nf;
 347
 348         while(!list_empty(dispose)) {
 349                 nf = list_first_entry(dispose, struct nfsd_file, nf_lru);
 350                 list_del(&nf->nf_lru);
 351                 if (!atomic_dec_and_test(&nf->nf_ref))
 352                         continue;
 353                 if (nfsd_file_free(nf))
 354                         flush = true;
 355         }
 356         if (flush)
 357                 flush_delayed_fput();
 358 }
 359
 360 /*
 361  * Note this can deadlock with nfsd_file_cache_purge.
 362  */
 363 static enum lru_status
 364 nfsd_file_lru_cb(struct list_head *item, struct list_lru_one *lru,
 365                  spinlock_t *lock, void *arg)
 366         __releases(lock)
 367         __acquires(lock)
 368 {
 369         struct list_head *head = arg;
 370         struct nfsd_file *nf = list_entry(item, struct nfsd_file, nf_lru);
 371
 372         /*
 373          * Do a lockless refcount check. The hashtable holds one reference, so
 374          * we look to see if anything else has a reference, or if any have
 375          * been put since the shrinker last ran. Those don't get unhashed and
 376          * released.
 377          *
 378          * Note that in the put path, we set the flag and then decrement the
 379          * counter. Here we check the counter and then test and clear the flag.
 380          * That order is deliberate to ensure that we can do this locklessly.
 381          */
 382         if (atomic_read(&nf->nf_ref) > 1)
 383                 goto out_skip;
 384
 385         /*
 386          * Don't throw out files that are still undergoing I/O or
 387          * that have uncleared errors pending.
 388          */
 389         if (nfsd_file_check_writeback(nf))
 390                 goto out_skip;
 391
 392         if (test_and_clear_bit(NFSD_FILE_REFERENCED, &nf->nf_flags))
 393                 goto out_rescan;
 394
 395         if (!test_and_clear_bit(NFSD_FILE_HASHED, &nf->nf_flags))
 396                 goto out_skip;
 397
 398         list_lru_isolate_move(lru, &nf->nf_lru, head);
 399         return LRU_REMOVED;
 400 out_rescan:
 401         set_bit(NFSD_FILE_LRU_RESCAN, &nfsd_file_lru_flags);
 402 out_skip:
 403         return LRU_SKIP;
 404 }
 405
 406 static void
 407 nfsd_file_lru_dispose(struct list_head *head)
 408 {
 409         struct nfsd_file *nf;
 410
 411         list_for_each_entry(nf, head, nf_lru) {
 412                 spin_lock(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock);
 413                 nfsd_file_do_unhash(nf);
 414                 spin_unlock(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock);
 415         }
 416         nfsd_file_dispose_list(head);
 417 }
 418
 419 static unsigned long
 420 nfsd_file_lru_count(struct shrinker *s, struct shrink_control *sc)
 421 {
 422         return list_lru_count(&nfsd_file_lru);
 423 }
 424
 425 static unsigned long
 426 nfsd_file_lru_scan(struct shrinker *s, struct shrink_control *sc)
 427 {
 428         LIST_HEAD(head);
 429         unsigned long ret;
 430
 431         ret = list_lru_shrink_walk(&nfsd_file_lru, sc, nfsd_file_lru_cb, &head);
 432         nfsd_file_lru_dispose(&head);
 433         return ret;
 434 }
 435
 436 static struct shrinker  nfsd_file_shrinker = {
 437         .scan_objects = nfsd_file_lru_scan,
 438         .count_objects = nfsd_file_lru_count,
 439         .seeks = 1,
 440 };
 441
 442 static void
 443 __nfsd_file_close_inode(struct inode *inode, unsigned int hashval,
 444                         struct list_head *dispose)
 445 {
 446         struct nfsd_file        *nf;
 447         struct hlist_node       *tmp;
 448
 449         spin_lock(&nfsd_file_hashtbl[hashval].nfb_lock);
 450         hlist_for_each_entry_safe(nf, tmp, &nfsd_file_hashtbl[hashval].nfb_head, nf_node) {
 451                 if (inode == nf->nf_inode)
 452                         nfsd_file_unhash_and_release_locked(nf, dispose);
 453         }
 454         spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock);
 455 }
 456
 457 /**
 458  * nfsd_file_close_inode_sync - attempt to forcibly close a nfsd_file
 459  * @inode: inode of the file to attempt to remove
 460  *
 461  * Walk the whole hash bucket, looking for any files that correspond to "inode".
 462  * If any do, then unhash them and put the hashtable reference to them and
 463  * destroy any that had their last reference put. Also ensure that any of the
 464  * fputs also have their final __fput done as well.
 465  */
 466 void
 467 nfsd_file_close_inode_sync(struct inode *inode)
 468 {
 469         unsigned int            hashval = (unsigned int)hash_long(inode->i_ino,
 470                                                 NFSD_FILE_HASH_BITS);
 471         LIST_HEAD(dispose);
 472
 473         __nfsd_file_close_inode(inode, hashval, &dispose);
 474         trace_nfsd_file_close_inode_sync(inode, hashval, !list_empty(&dispose));
 475         nfsd_file_dispose_list_sync(&dispose);
 476 }
 477
 478 /**
 479  * nfsd_file_close_inode_sync - attempt to forcibly close a nfsd_file
 480  * @inode: inode of the file to attempt to remove
 481  *
 482  * Walk the whole hash bucket, looking for any files that correspond to "inode".
 483  * If any do, then unhash them and put the hashtable reference to them and
 484  * destroy any that had their last reference put.
 485  */
 486 static void
 487 nfsd_file_close_inode(struct inode *inode)
 488 {
 489         unsigned int            hashval = (unsigned int)hash_long(inode->i_ino,
 490                                                 NFSD_FILE_HASH_BITS);
 491         LIST_HEAD(dispose);
 492
 493         __nfsd_file_close_inode(inode, hashval, &dispose);
 494         trace_nfsd_file_close_inode(inode, hashval, !list_empty(&dispose));
 495         nfsd_file_dispose_list(&dispose);
 496 }
 497
 498 /**
 499  * nfsd_file_delayed_close - close unused nfsd_files
 500  * @work: dummy
 501  *
 502  * Walk the LRU list and close any entries that have not been used since
 503  * the last scan.
 504  *
 505  * Note this can deadlock with nfsd_file_cache_purge.
 506  */
 507 static void
 508 nfsd_file_delayed_close(struct work_struct *work)
 509 {
 510         LIST_HEAD(head);
 511
 512         list_lru_walk(&nfsd_file_lru, nfsd_file_lru_cb, &head, LONG_MAX);
 513
 514         if (test_and_clear_bit(NFSD_FILE_LRU_RESCAN, &nfsd_file_lru_flags))
 515                 nfsd_file_schedule_laundrette(NFSD_FILE_LAUNDRETTE_NOFLUSH);
 516
 517         if (!list_empty(&head)) {
 518                 nfsd_file_lru_dispose(&head);
 519                 flush_delayed_fput();
 520         }
 521 }
 522
 523 static int
 524 nfsd_file_lease_notifier_call(struct notifier_block *nb, unsigned long arg,
 525                             void *data)
 526 {
 527         struct file_lock *fl = data;
 528
 529         /* Only close files for F_SETLEASE leases */
 530         if (fl->fl_flags & FL_LEASE)
 531                 nfsd_file_close_inode_sync(file_inode(fl->fl_file));
 532         return 0;
 533 }
 534
 535 static struct notifier_block nfsd_file_lease_notifier = {
 536         .notifier_call = nfsd_file_lease_notifier_call,
 537 };
 538
 539 static int
 540 nfsd_file_fsnotify_handle_event(struct fsnotify_group *group,
 541                                 struct inode *inode,
 542                                 u32 mask, const void *data, int data_type,
 543                                 const struct qstr *file_name, u32 cookie,
 544                                 struct fsnotify_iter_info *iter_info)
 545 {
 546         trace_nfsd_file_fsnotify_handle_event(inode, mask);
 547
 548         /* Should be no marks on non-regular files */
 549         if (!S_ISREG(inode->i_mode)) {
 550                 WARN_ON_ONCE(1);
 551                 return 0;
 552         }
 553
 554         /* don't close files if this was not the last link */
 555         if (mask & FS_ATTRIB) {
 556                 if (inode->i_nlink)
 557                         return 0;
 558         }
 559
 560         nfsd_file_close_inode(inode);
 561         return 0;
 562 }
 563
 564
 565 static const struct fsnotify_ops nfsd_file_fsnotify_ops = {
 566         .handle_event = nfsd_file_fsnotify_handle_event,
 567         .free_mark = nfsd_file_mark_free,
 568 };
 569
 570 int
 571 nfsd_file_cache_init(void)
 572 {
 573         int             ret = -ENOMEM;
 574         unsigned int    i;
 575
 576         clear_bit(NFSD_FILE_SHUTDOWN, &nfsd_file_lru_flags);
 577
 578         if (nfsd_file_hashtbl)
 579                 return 0;
 580
 581         nfsd_file_hashtbl = kcalloc(NFSD_FILE_HASH_SIZE,
 582                                 sizeof(*nfsd_file_hashtbl), GFP_KERNEL);
 583         if (!nfsd_file_hashtbl) {
 584                 pr_err("nfsd: unable to allocate nfsd_file_hashtbl\n");
 585                 goto out_err;
 586         }
 587
 588         nfsd_file_slab = kmem_cache_create("nfsd_file",
 589                                 sizeof(struct nfsd_file), 0, 0, NULL);
 590         if (!nfsd_file_slab) {
 591                 pr_err("nfsd: unable to create nfsd_file_slab\n");
 592                 goto out_err;
 593         }
 594
 595         nfsd_file_mark_slab = kmem_cache_create("nfsd_file_mark",
 596                                         sizeof(struct nfsd_file_mark), 0, 0, NULL);
 597         if (!nfsd_file_mark_slab) {
 598                 pr_err("nfsd: unable to create nfsd_file_mark_slab\n");
 599                 goto out_err;
 600         }
 601
 602
 603         ret = list_lru_init(&nfsd_file_lru);
 604         if (ret) {
 605                 pr_err("nfsd: failed to init nfsd_file_lru: %d\n", ret);
 606                 goto out_err;
 607         }
 608
 609         ret = register_shrinker(&nfsd_file_shrinker);
 610         if (ret) {
 611                 pr_err("nfsd: failed to register nfsd_file_shrinker: %d\n", ret);
 612                 goto out_lru;
 613         }
 614
 615         ret = lease_register_notifier(&nfsd_file_lease_notifier);
 616         if (ret) {
 617                 pr_err("nfsd: unable to register lease notifier: %d\n", ret);
 618                 goto out_shrinker;
 619         }
 620
 621         nfsd_file_fsnotify_group = fsnotify_alloc_group(&nfsd_file_fsnotify_ops);
 622         if (IS_ERR(nfsd_file_fsnotify_group)) {
 623                 pr_err("nfsd: unable to create fsnotify group: %ld\n",
 624                         PTR_ERR(nfsd_file_fsnotify_group));
 625                 nfsd_file_fsnotify_group = NULL;
 626                 goto out_notifier;
 627         }
 628
 629         for (i = 0; i < NFSD_FILE_HASH_SIZE; i++) {
 630                 INIT_HLIST_HEAD(&nfsd_file_hashtbl[i].nfb_head);
 631                 spin_lock_init(&nfsd_file_hashtbl[i].nfb_lock);
 632         }
 633
 634         INIT_DELAYED_WORK(&nfsd_filecache_laundrette, nfsd_file_delayed_close);
 635 out:
 636         return ret;
 637 out_notifier:
 638         lease_unregister_notifier(&nfsd_file_lease_notifier);
 639 out_shrinker:
 640         unregister_shrinker(&nfsd_file_shrinker);
 641 out_lru:
 642         list_lru_destroy(&nfsd_file_lru);
 643 out_err:
 644         kmem_cache_destroy(nfsd_file_slab);
 645         nfsd_file_slab = NULL;
 646         kmem_cache_destroy(nfsd_file_mark_slab);
 647         nfsd_file_mark_slab = NULL;
 648         kfree(nfsd_file_hashtbl);
 649         nfsd_file_hashtbl = NULL;
 650         goto out;
 651 }
 652
 653 /*
 654  * Note this can deadlock with nfsd_file_lru_cb.
 655  */
 656 void
 657 nfsd_file_cache_purge(struct net *net)
 658 {
 659         unsigned int            i;
 660         struct nfsd_file        *nf;
 661         struct hlist_node       *next;
 662         LIST_HEAD(dispose);
 663         bool del;
 664
 665         if (!nfsd_file_hashtbl)
 666                 return;
 667
 668         for (i = 0; i < NFSD_FILE_HASH_SIZE; i++) {
 669                 struct nfsd_fcache_bucket *nfb = &nfsd_file_hashtbl[i];
 670
 671                 spin_lock(&nfb->nfb_lock);
 672                 hlist_for_each_entry_safe(nf, next, &nfb->nfb_head, nf_node) {
 673                         if (net && nf->nf_net != net)
 674                                 continue;
 675                         del = nfsd_file_unhash_and_release_locked(nf, &dispose);
 676
 677                         /*
 678                          * Deadlock detected! Something marked this entry as
 679                          * unhased, but hasn't removed it from the hash list.
 680                          */
 681                         WARN_ON_ONCE(!del);
 682                 }
 683                 spin_unlock(&nfb->nfb_lock);
 684                 nfsd_file_dispose_list(&dispose);
 685         }
 686 }
 687
 688 void
 689 nfsd_file_cache_shutdown(void)
 690 {
 691         LIST_HEAD(dispose);
 692
 693         set_bit(NFSD_FILE_SHUTDOWN, &nfsd_file_lru_flags);
 694
 695         lease_unregister_notifier(&nfsd_file_lease_notifier);
 696         unregister_shrinker(&nfsd_file_shrinker);
 697         /*
 698          * make sure all callers of nfsd_file_lru_cb are done before
 699          * calling nfsd_file_cache_purge
 700          */
 701         cancel_delayed_work_sync(&nfsd_filecache_laundrette);
 702         nfsd_file_cache_purge(NULL);
 703         list_lru_destroy(&nfsd_file_lru);
 704         rcu_barrier();
 705         fsnotify_put_group(nfsd_file_fsnotify_group);
 706         nfsd_file_fsnotify_group = NULL;
 707         kmem_cache_destroy(nfsd_file_slab);
 708         nfsd_file_slab = NULL;
 709         fsnotify_wait_marks_destroyed();
 710         kmem_cache_destroy(nfsd_file_mark_slab);
 711         nfsd_file_mark_slab = NULL;
 712         kfree(nfsd_file_hashtbl);
 713         nfsd_file_hashtbl = NULL;
 714 }
 715
 716 static bool
 717 nfsd_match_cred(const struct cred *c1, const struct cred *c2)
 718 {
 719         int i;
 720
 721         if (!uid_eq(c1->fsuid, c2->fsuid))
 722                 return false;
 723         if (!gid_eq(c1->fsgid, c2->fsgid))
 724                 return false;
 725         if (c1->group_info == NULL || c2->group_info == NULL)
 726                 return c1->group_info == c2->group_info;
 727         if (c1->group_info->ngroups != c2->group_info->ngroups)
 728                 return false;
 729         for (i = 0; i < c1->group_info->ngroups; i++) {
 730                 if (!gid_eq(c1->group_info->gid[i], c2->group_info->gid[i]))
 731                         return false;
 732         }
 733         return true;
 734 }
 735
 736 static struct nfsd_file *
 737 nfsd_file_find_locked(struct inode *inode, unsigned int may_flags,
 738                         unsigned int hashval, struct net *net)
 739 {
 740         struct nfsd_file *nf;
 741         unsigned char need = may_flags & NFSD_FILE_MAY_MASK;
 742
 743         hlist_for_each_entry_rcu(nf, &nfsd_file_hashtbl[hashval].nfb_head,
 744                                  nf_node) {
 745                 if ((need & nf->nf_may) != need)
 746                         continue;
 747                 if (nf->nf_inode != inode)
 748                         continue;
 749                 if (nf->nf_net != net)
 750                         continue;
 751                 if (!nfsd_match_cred(nf->nf_cred, current_cred()))
 752                         continue;
 753                 if (!test_bit(NFSD_FILE_HASHED, &nf->nf_flags))
 754                         continue;
 755                 if (nfsd_file_get(nf) != NULL)
 756                         return nf;
 757         }
 758         return NULL;
 759 }
 760
 761 /**
 762  * nfsd_file_is_cached - are there any cached open files for this fh?
 763  * @inode: inode of the file to check
 764  *
 765  * Scan the hashtable for open files that match this fh. Returns true if there
 766  * are any, and false if not.
 767  */
 768 bool
 769 nfsd_file_is_cached(struct inode *inode)
 770 {
 771         bool                    ret = false;
 772         struct nfsd_file        *nf;
 773         unsigned int            hashval;
 774
 775         hashval = (unsigned int)hash_long(inode->i_ino, NFSD_FILE_HASH_BITS);
 776
 777         rcu_read_lock();
 778         hlist_for_each_entry_rcu(nf, &nfsd_file_hashtbl[hashval].nfb_head,
 779                                  nf_node) {
 780                 if (inode == nf->nf_inode) {
 781                         ret = true;
 782                         break;
 783                 }
 784         }
 785         rcu_read_unlock();
 786         trace_nfsd_file_is_cached(inode, hashval, (int)ret);
 787         return ret;
 788 }
 789
 790 __be32
 791 nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
 792                   unsigned int may_flags, struct nfsd_file **pnf)
 793 {
 794         __be32  status;
 795         struct net *net = SVC_NET(rqstp);
 796         struct nfsd_file *nf, *new;
 797         struct inode *inode;
 798         unsigned int hashval;
 799         bool retry = true;
 800
 801         /* FIXME: skip this if fh_dentry is already set? */
 802         status = fh_verify(rqstp, fhp, S_IFREG,
 803                                 may_flags|NFSD_MAY_OWNER_OVERRIDE);
 804         if (status != nfs_ok)
 805                 return status;
 806
 807         inode = d_inode(fhp->fh_dentry);
 808         hashval = (unsigned int)hash_long(inode->i_ino, NFSD_FILE_HASH_BITS);
 809 retry:
 810         rcu_read_lock();
 811         nf = nfsd_file_find_locked(inode, may_flags, hashval, net);
 812         rcu_read_unlock();
 813         if (nf)
 814                 goto wait_for_construction;
 815
 816         new = nfsd_file_alloc(inode, may_flags, hashval, net);
 817         if (!new) {
 818                 trace_nfsd_file_acquire(rqstp, hashval, inode, may_flags,
 819                                         NULL, nfserr_jukebox);
 820                 return nfserr_jukebox;
 821         }
 822
 823         spin_lock(&nfsd_file_hashtbl[hashval].nfb_lock);
 824         nf = nfsd_file_find_locked(inode, may_flags, hashval, net);
 825         if (nf == NULL)
 826                 goto open_file;
 827         spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock);
 828         nfsd_file_slab_free(&new->nf_rcu);
 829
 830 wait_for_construction:
 831         wait_on_bit(&nf->nf_flags, NFSD_FILE_PENDING, TASK_UNINTERRUPTIBLE);
 832
 833         /* Did construction of this file fail? */
 834         if (!test_bit(NFSD_FILE_HASHED, &nf->nf_flags)) {
 835                 if (!retry) {
 836                         status = nfserr_jukebox;
 837                         goto out;
 838                 }
 839                 retry = false;
 840                 nfsd_file_put_noref(nf);
 841                 goto retry;
 842         }
 843
 844         this_cpu_inc(nfsd_file_cache_hits);
 845
 846         if (!(may_flags & NFSD_MAY_NOT_BREAK_LEASE)) {
 847                 bool write = (may_flags & NFSD_MAY_WRITE);
 848
 849                 if (test_bit(NFSD_FILE_BREAK_READ, &nf->nf_flags) ||
 850                     (test_bit(NFSD_FILE_BREAK_WRITE, &nf->nf_flags) && write)) {
 851                         status = nfserrno(nfsd_open_break_lease(
 852                                         file_inode(nf->nf_file), may_flags));
 853                         if (status == nfs_ok) {
 854                                 clear_bit(NFSD_FILE_BREAK_READ, &nf->nf_flags);
 855                                 if (write)
 856                                         clear_bit(NFSD_FILE_BREAK_WRITE,
 857                                                   &nf->nf_flags);
 858                         }
 859                 }
 860         }
 861 out:
 862         if (status == nfs_ok) {
 863                 *pnf = nf;
 864         } else {
 865                 nfsd_file_put(nf);
 866                 nf = NULL;
 867         }
 868
 869         trace_nfsd_file_acquire(rqstp, hashval, inode, may_flags, nf, status);
 870         return status;
 871 open_file:
 872         nf = new;
 873         /* Take reference for the hashtable */
 874         atomic_inc(&nf->nf_ref);
 875         __set_bit(NFSD_FILE_HASHED, &nf->nf_flags);
 876         __set_bit(NFSD_FILE_PENDING, &nf->nf_flags);
 877         list_lru_add(&nfsd_file_lru, &nf->nf_lru);
 878         hlist_add_head_rcu(&nf->nf_node, &nfsd_file_hashtbl[hashval].nfb_head);
 879         ++nfsd_file_hashtbl[hashval].nfb_count;
 880         nfsd_file_hashtbl[hashval].nfb_maxcount = max(nfsd_file_hashtbl[hashval].nfb_maxcount,
 881                         nfsd_file_hashtbl[hashval].nfb_count);
 882         spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock);
 883         atomic_long_inc(&nfsd_filecache_count);
 884
 885         nf->nf_mark = nfsd_file_mark_find_or_create(nf);
 886         if (nf->nf_mark)
 887                 status = nfsd_open_verified(rqstp, fhp, S_IFREG,
 888                                 may_flags, &nf->nf_file);
 889         else
 890                 status = nfserr_jukebox;
 891         /*
 892          * If construction failed, or we raced with a call to unlink()
 893          * then unhash.
 894          */
 895         if (status != nfs_ok || inode->i_nlink == 0) {
 896                 bool do_free;
 897                 spin_lock(&nfsd_file_hashtbl[hashval].nfb_lock);
 898                 do_free = nfsd_file_unhash(nf);
 899                 spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock);
 900                 if (do_free)
 901                         nfsd_file_put_noref(nf);
 902         }
 903         clear_bit_unlock(NFSD_FILE_PENDING, &nf->nf_flags);
 904         smp_mb__after_atomic();
 905         wake_up_bit(&nf->nf_flags, NFSD_FILE_PENDING);
 906         goto out;
 907 }
 908
 909 /*
 910  * Note that fields may be added, removed or reordered in the future. Programs
 911  * scraping this file for info should test the labels to ensure they're
 912  * getting the correct field.
 913  */
 914 static int nfsd_file_cache_stats_show(struct seq_file *m, void *v)
 915 {
 916         unsigned int i, count = 0, longest = 0;
 917         unsigned long hits = 0;
 918
 919         /*
 920          * No need for spinlocks here since we're not terribly interested in
 921          * accuracy. We do take the nfsd_mutex simply to ensure that we
 922          * don't end up racing with server shutdown
 923          */
 924         mutex_lock(&nfsd_mutex);
 925         if (nfsd_file_hashtbl) {
 926                 for (i = 0; i < NFSD_FILE_HASH_SIZE; i++) {
 927                         count += nfsd_file_hashtbl[i].nfb_count;
 928                         longest = max(longest, nfsd_file_hashtbl[i].nfb_count);
 929                 }
 930         }
 931         mutex_unlock(&nfsd_mutex);
 932
 933         for_each_possible_cpu(i)
 934                 hits += per_cpu(nfsd_file_cache_hits, i);
 935
 936         seq_printf(m, "total entries: %u\n", count);
 937         seq_printf(m, "longest chain: %u\n", longest);
 938         seq_printf(m, "cache hits:    %lu\n", hits);
 939         return 0;
 940 }
 941
 942 int nfsd_file_cache_stats_open(struct inode *inode, struct file *file)
 943 {
 944         return single_open(file, nfsd_file_cache_stats_show, NULL);
 945 }