fs/xfs/xfs_iget.c

   1 /*
   2  * Copyright (c) 2000-2005 Silicon Graphics, Inc.  All Rights Reserved.
   3  *
   4  * This program is free software; you can redistribute it and/or modify it
   5  * under the terms of version 2 of the GNU General Public License as
   6  * published by the Free Software Foundation.
   7  *
   8  * This program is distributed in the hope that it would be useful, but
   9  * WITHOUT ANY WARRANTY; without even the implied warranty of
  10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  11  *
  12  * Further, this software is distributed without any warranty that it is
  13  * free of the rightful claim of any third person regarding infringement
  14  * or the like.  Any license provided herein, whether implied or
  15  * otherwise, applies only to this software file.  Patent licenses, if
  16  * any, provided herein do not apply to combinations of this program with
  17  * other software, or any other product whatsoever.
  18  *
  19  * You should have received a copy of the GNU General Public License along
  20  * with this program; if not, write the Free Software Foundation, Inc., 59
  21  * Temple Place - Suite 330, Boston MA 02111-1307, USA.
  22  *
  23  * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
  24  * Mountain View, CA  94043, or:
  25  *
  26  * http://www.sgi.com
  27  *
  28  * For further information regarding this notice, see:
  29  *
  30  * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
  31  */
  32
  33 #include "xfs.h"
  34
  35 #include "xfs_macros.h"
  36 #include "xfs_types.h"
  37 #include "xfs_inum.h"
  38 #include "xfs_log.h"
  39 #include "xfs_trans.h"
  40 #include "xfs_sb.h"
  41 #include "xfs_ag.h"
  42 #include "xfs_dir.h"
  43 #include "xfs_dir2.h"
  44 #include "xfs_dmapi.h"
  45 #include "xfs_mount.h"
  46 #include "xfs_alloc_btree.h"
  47 #include "xfs_bmap_btree.h"
  48 #include "xfs_ialloc_btree.h"
  49 #include "xfs_btree.h"
  50 #include "xfs_ialloc.h"
  51 #include "xfs_attr_sf.h"
  52 #include "xfs_dir_sf.h"
  53 #include "xfs_dir2_sf.h"
  54 #include "xfs_dinode.h"
  55 #include "xfs_inode.h"
  56 #include "xfs_quota.h"
  57 #include "xfs_utils.h"
  58 #include "xfs_bit.h"
  59
  60 /*
  61  * Initialize the inode hash table for the newly mounted file system.
  62  * Choose an initial table size based on user specified value, else
  63  * use a simple algorithm using the maximum number of inodes as an
  64  * indicator for table size, and clamp it between one and some large
  65  * number of pages.
  66  */
  67 void
  68 xfs_ihash_init(xfs_mount_t *mp)
  69 {
  70         __uint64_t      icount;
  71         uint            i, flags = KM_SLEEP | KM_MAYFAIL;
  72
  73         if (!mp->m_ihsize) {
  74                 icount = mp->m_maxicount ? mp->m_maxicount :
  75                          (mp->m_sb.sb_dblocks << mp->m_sb.sb_inopblog);
  76                 mp->m_ihsize = 1 << max_t(uint, 8,
  77                                         (xfs_highbit64(icount) + 1) / 2);
  78                 mp->m_ihsize = min_t(uint, mp->m_ihsize,
  79                                         (64 * NBPP) / sizeof(xfs_ihash_t));
  80         }
  81
  82         while (!(mp->m_ihash = (xfs_ihash_t *)kmem_zalloc(mp->m_ihsize *
  83                                                 sizeof(xfs_ihash_t), flags))) {
  84                 if ((mp->m_ihsize >>= 1) <= NBPP)
  85                         flags = KM_SLEEP;
  86         }
  87         for (i = 0; i < mp->m_ihsize; i++) {
  88                 rwlock_init(&(mp->m_ihash[i].ih_lock));
  89         }
  90 }
  91
  92 /*
  93  * Free up structures allocated by xfs_ihash_init, at unmount time.
  94  */
  95 void
  96 xfs_ihash_free(xfs_mount_t *mp)
  97 {
  98         kmem_free(mp->m_ihash, mp->m_ihsize*sizeof(xfs_ihash_t));
  99         mp->m_ihash = NULL;
 100 }
 101
 102 /*
 103  * Initialize the inode cluster hash table for the newly mounted file system.
 104  * Its size is derived from the ihash table size.
 105  */
 106 void
 107 xfs_chash_init(xfs_mount_t *mp)
 108 {
 109         uint    i;
 110
 111         mp->m_chsize = max_t(uint, 1, mp->m_ihsize /
 112                          (XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog));
 113         mp->m_chsize = min_t(uint, mp->m_chsize, mp->m_ihsize);
 114         mp->m_chash = (xfs_chash_t *)kmem_zalloc(mp->m_chsize
 115                                                  * sizeof(xfs_chash_t),
 116                                                  KM_SLEEP);
 117         for (i = 0; i < mp->m_chsize; i++) {
 118                 spinlock_init(&mp->m_chash[i].ch_lock,"xfshash");
 119         }
 120 }
 121
 122 /*
 123  * Free up structures allocated by xfs_chash_init, at unmount time.
 124  */
 125 void
 126 xfs_chash_free(xfs_mount_t *mp)
 127 {
 128         int     i;
 129
 130         for (i = 0; i < mp->m_chsize; i++) {
 131                 spinlock_destroy(&mp->m_chash[i].ch_lock);
 132         }
 133
 134         kmem_free(mp->m_chash, mp->m_chsize*sizeof(xfs_chash_t));
 135         mp->m_chash = NULL;
 136 }
 137
 138 /*
 139  * Look up an inode by number in the given file system.
 140  * The inode is looked up in the hash table for the file system
 141  * represented by the mount point parameter mp.  Each bucket of
 142  * the hash table is guarded by an individual semaphore.
 143  *
 144  * If the inode is found in the hash table, its corresponding vnode
 145  * is obtained with a call to vn_get().  This call takes care of
 146  * coordination with the reclamation of the inode and vnode.  Note
 147  * that the vmap structure is filled in while holding the hash lock.
 148  * This gives us the state of the inode/vnode when we found it and
 149  * is used for coordination in vn_get().
 150  *
 151  * If it is not in core, read it in from the file system's device and
 152  * add the inode into the hash table.
 153  *
 154  * The inode is locked according to the value of the lock_flags parameter.
 155  * This flag parameter indicates how and if the inode's IO lock and inode lock
 156  * should be taken.
 157  *
 158  * mp -- the mount point structure for the current file system.  It points
 159  *       to the inode hash table.
 160  * tp -- a pointer to the current transaction if there is one.  This is
 161  *       simply passed through to the xfs_iread() call.
 162  * ino -- the number of the inode desired.  This is the unique identifier
 163  *        within the file system for the inode being requested.
 164  * lock_flags -- flags indicating how to lock the inode.  See the comment
 165  *               for xfs_ilock() for a list of valid values.
 166  * bno -- the block number starting the buffer containing the inode,
 167  *        if known (as by bulkstat), else 0.
 168  */
 169 STATIC int
 170 xfs_iget_core(
 171         vnode_t         *vp,
 172         xfs_mount_t     *mp,
 173         xfs_trans_t     *tp,
 174         xfs_ino_t       ino,
 175         uint            flags,
 176         uint            lock_flags,
 177         xfs_inode_t     **ipp,
 178         xfs_daddr_t     bno)
 179 {
 180         xfs_ihash_t     *ih;
 181         xfs_inode_t     *ip;
 182         xfs_inode_t     *iq;
 183         vnode_t         *inode_vp;
 184         ulong           version;
 185         int             error;
 186         /* REFERENCED */
 187         xfs_chash_t     *ch;
 188         xfs_chashlist_t *chl, *chlnew;
 189         SPLDECL(s);
 190
 191
 192         ih = XFS_IHASH(mp, ino);
 193
 194 again:
 195         read_lock(&ih->ih_lock);
 196
 197         for (ip = ih->ih_next; ip != NULL; ip = ip->i_next) {
 198                 if (ip->i_ino == ino) {
 199                         /*
 200                          * If INEW is set this inode is being set up
 201                          * we need to pause and try again.
 202                          */
 203                         if (ip->i_flags & XFS_INEW) {
 204                                 read_unlock(&ih->ih_lock);
 205                                 delay(1);
 206                                 XFS_STATS_INC(xs_ig_frecycle);
 207
 208                                 goto again;
 209                         }
 210
 211                         inode_vp = XFS_ITOV_NULL(ip);
 212                         if (inode_vp == NULL) {
 213                                 /*
 214                                  * If IRECLAIM is set this inode is
 215                                  * on its way out of the system,
 216                                  * we need to pause and try again.
 217                                  */
 218                                 if (ip->i_flags & XFS_IRECLAIM) {
 219                                         read_unlock(&ih->ih_lock);
 220                                         delay(1);
 221                                         XFS_STATS_INC(xs_ig_frecycle);
 222
 223                                         goto again;
 224                                 }
 225
 226                                 vn_trace_exit(vp, "xfs_iget.alloc",
 227                                         (inst_t *)__return_address);
 228
 229                                 XFS_STATS_INC(xs_ig_found);
 230
 231                                 ip->i_flags &= ~XFS_IRECLAIMABLE;
 232                                 read_unlock(&ih->ih_lock);
 233
 234                                 XFS_MOUNT_ILOCK(mp);
 235                                 list_del_init(&ip->i_reclaim);
 236                                 XFS_MOUNT_IUNLOCK(mp);
 237
 238                                 goto finish_inode;
 239
 240                         } else if (vp != inode_vp) {
 241                                 struct inode *inode = LINVFS_GET_IP(inode_vp);
 242
 243                                 /* The inode is being torn down, pause and
 244                                  * try again.
 245                                  */
 246                                 if (inode->i_state & (I_FREEING | I_CLEAR)) {
 247                                         read_unlock(&ih->ih_lock);
 248                                         delay(1);
 249                                         XFS_STATS_INC(xs_ig_frecycle);
 250
 251                                         goto again;
 252                                 }
 253 /* Chances are the other vnode (the one in the inode) is being torn
 254  * down right now, and we landed on top of it. Question is, what do
 255  * we do? Unhook the old inode and hook up the new one?
 256  */
 257                                 cmn_err(CE_PANIC,
 258                         "xfs_iget_core: ambiguous vns: vp/0x%p, invp/0x%p",
 259                                                 inode_vp, vp);
 260                         }
 261
 262                         read_unlock(&ih->ih_lock);
 263
 264                         XFS_STATS_INC(xs_ig_found);
 265
 266 finish_inode:
 267                         if (ip->i_d.di_mode == 0) {
 268                                 if (!(flags & IGET_CREATE))
 269                                         return ENOENT;
 270                                 xfs_iocore_inode_reinit(ip);
 271                         }
 272
 273                         if (lock_flags != 0)
 274                                 xfs_ilock(ip, lock_flags);
 275
 276                         ip->i_flags &= ~XFS_ISTALE;
 277
 278                         vn_trace_exit(vp, "xfs_iget.found",
 279                                                 (inst_t *)__return_address);
 280                         goto return_ip;
 281                 }
 282         }
 283
 284         /*
 285          * Inode cache miss: save the hash chain version stamp and unlock
 286          * the chain, so we don't deadlock in vn_alloc.
 287          */
 288         XFS_STATS_INC(xs_ig_missed);
 289
 290         version = ih->ih_version;
 291
 292         read_unlock(&ih->ih_lock);
 293
 294         /*
 295          * Read the disk inode attributes into a new inode structure and get
 296          * a new vnode for it. This should also initialize i_ino and i_mount.
 297          */
 298         error = xfs_iread(mp, tp, ino, &ip, bno);
 299         if (error) {
 300                 return error;
 301         }
 302
 303         vn_trace_exit(vp, "xfs_iget.alloc", (inst_t *)__return_address);
 304
 305         xfs_inode_lock_init(ip, vp);
 306         xfs_iocore_inode_init(ip);
 307
 308         if (lock_flags != 0) {
 309                 xfs_ilock(ip, lock_flags);
 310         }
 311
 312         if ((ip->i_d.di_mode == 0) && !(flags & IGET_CREATE)) {
 313                 xfs_idestroy(ip);
 314                 return ENOENT;
 315         }
 316
 317         /*
 318          * Put ip on its hash chain, unless someone else hashed a duplicate
 319          * after we released the hash lock.
 320          */
 321         write_lock(&ih->ih_lock);
 322
 323         if (ih->ih_version != version) {
 324                 for (iq = ih->ih_next; iq != NULL; iq = iq->i_next) {
 325                         if (iq->i_ino == ino) {
 326                                 write_unlock(&ih->ih_lock);
 327                                 xfs_idestroy(ip);
 328
 329                                 XFS_STATS_INC(xs_ig_dup);
 330                                 goto again;
 331                         }
 332                 }
 333         }
 334
 335         /*
 336          * These values _must_ be set before releasing ihlock!
 337          */
 338         ip->i_hash = ih;
 339         if ((iq = ih->ih_next)) {
 340                 iq->i_prevp = &ip->i_next;
 341         }
 342         ip->i_next = iq;
 343         ip->i_prevp = &ih->ih_next;
 344         ih->ih_next = ip;
 345         ip->i_udquot = ip->i_gdquot = NULL;
 346         ih->ih_version++;
 347         ip->i_flags |= XFS_INEW;
 348
 349         write_unlock(&ih->ih_lock);
 350
 351         /*
 352          * put ip on its cluster's hash chain
 353          */
 354         ASSERT(ip->i_chash == NULL && ip->i_cprev == NULL &&
 355                ip->i_cnext == NULL);
 356
 357         chlnew = NULL;
 358         ch = XFS_CHASH(mp, ip->i_blkno);
 359  chlredo:
 360         s = mutex_spinlock(&ch->ch_lock);
 361         for (chl = ch->ch_list; chl != NULL; chl = chl->chl_next) {
 362                 if (chl->chl_blkno == ip->i_blkno) {
 363
 364                         /* insert this inode into the doubly-linked list
 365                          * where chl points */
 366                         if ((iq = chl->chl_ip)) {
 367                                 ip->i_cprev = iq->i_cprev;
 368                                 iq->i_cprev->i_cnext = ip;
 369                                 iq->i_cprev = ip;
 370                                 ip->i_cnext = iq;
 371                         } else {
 372                                 ip->i_cnext = ip;
 373                                 ip->i_cprev = ip;
 374                         }
 375                         chl->chl_ip = ip;
 376                         ip->i_chash = chl;
 377                         break;
 378                 }
 379         }
 380
 381         /* no hash list found for this block; add a new hash list */
 382         if (chl == NULL)  {
 383                 if (chlnew == NULL) {
 384                         mutex_spinunlock(&ch->ch_lock, s);
 385                         ASSERT(xfs_chashlist_zone != NULL);
 386                         chlnew = (xfs_chashlist_t *)
 387                                         kmem_zone_alloc(xfs_chashlist_zone,
 388                                                 KM_SLEEP);
 389                         ASSERT(chlnew != NULL);
 390                         goto chlredo;
 391                 } else {
 392                         ip->i_cnext = ip;
 393                         ip->i_cprev = ip;
 394                         ip->i_chash = chlnew;
 395                         chlnew->chl_ip = ip;
 396                         chlnew->chl_blkno = ip->i_blkno;
 397                         chlnew->chl_next = ch->ch_list;
 398                         ch->ch_list = chlnew;
 399                         chlnew = NULL;
 400                 }
 401         } else {
 402                 if (chlnew != NULL) {
 403                         kmem_zone_free(xfs_chashlist_zone, chlnew);
 404                 }
 405         }
 406
 407         mutex_spinunlock(&ch->ch_lock, s);
 408
 409
 410         /*
 411          * Link ip to its mount and thread it on the mount's inode list.
 412          */
 413         XFS_MOUNT_ILOCK(mp);
 414         if ((iq = mp->m_inodes)) {
 415                 ASSERT(iq->i_mprev->i_mnext == iq);
 416                 ip->i_mprev = iq->i_mprev;
 417                 iq->i_mprev->i_mnext = ip;
 418                 iq->i_mprev = ip;
 419                 ip->i_mnext = iq;
 420         } else {
 421                 ip->i_mnext = ip;
 422                 ip->i_mprev = ip;
 423         }
 424         mp->m_inodes = ip;
 425
 426         XFS_MOUNT_IUNLOCK(mp);
 427
 428  return_ip:
 429         ASSERT(ip->i_df.if_ext_max ==
 430                XFS_IFORK_DSIZE(ip) / sizeof(xfs_bmbt_rec_t));
 431
 432         ASSERT(((ip->i_d.di_flags & XFS_DIFLAG_REALTIME) != 0) ==
 433                ((ip->i_iocore.io_flags & XFS_IOCORE_RT) != 0));
 434
 435         *ipp = ip;
 436
 437         /*
 438          * If we have a real type for an on-disk inode, we can set ops(&unlock)
 439          * now.  If it's a new inode being created, xfs_ialloc will handle it.
 440          */
 441         VFS_INIT_VNODE(XFS_MTOVFS(mp), vp, XFS_ITOBHV(ip), 1);
 442
 443         return 0;
 444 }
 445
 446
 447 /*
 448  * The 'normal' internal xfs_iget, if needed it will
 449  * 'allocate', or 'get', the vnode.
 450  */
 451 int
 452 xfs_iget(
 453         xfs_mount_t     *mp,
 454         xfs_trans_t     *tp,
 455         xfs_ino_t       ino,
 456         uint            flags,
 457         uint            lock_flags,
 458         xfs_inode_t     **ipp,
 459         xfs_daddr_t     bno)
 460 {
 461         struct inode    *inode;
 462         vnode_t         *vp = NULL;
 463         int             error;
 464
 465 retry:
 466         XFS_STATS_INC(xs_ig_attempts);
 467
 468         if ((inode = iget_locked(XFS_MTOVFS(mp)->vfs_super, ino))) {
 469                 bhv_desc_t      *bdp;
 470                 xfs_inode_t     *ip;
 471                 int             newnode;
 472
 473                 vp = LINVFS_GET_VP(inode);
 474                 if (inode->i_state & I_NEW) {
 475 inode_allocate:
 476                         vn_initialize(inode);
 477                         error = xfs_iget_core(vp, mp, tp, ino, flags,
 478                                         lock_flags, ipp, bno);
 479                         if (error) {
 480                                 vn_mark_bad(vp);
 481                                 if (inode->i_state & I_NEW)
 482                                         unlock_new_inode(inode);
 483                                 iput(inode);
 484                         }
 485                 } else {
 486                         /* These are true if the inode is in inactive or
 487                          * reclaim. The linux inode is about to go away,
 488                          * wait for that path to finish, and try again.
 489                          */
 490                         if (vp->v_flag & (VINACT | VRECLM)) {
 491                                 vn_wait(vp);
 492                                 iput(inode);
 493                                 goto retry;
 494                         }
 495
 496                         if (is_bad_inode(inode)) {
 497                                 iput(inode);
 498                                 return EIO;
 499                         }
 500
 501                         bdp = vn_bhv_lookup(VN_BHV_HEAD(vp), &xfs_vnodeops);
 502                         if (bdp == NULL) {
 503                                 XFS_STATS_INC(xs_ig_dup);
 504                                 goto inode_allocate;
 505                         }
 506                         ip = XFS_BHVTOI(bdp);
 507                         if (lock_flags != 0)
 508                                 xfs_ilock(ip, lock_flags);
 509                         newnode = (ip->i_d.di_mode == 0);
 510                         if (newnode)
 511                                 xfs_iocore_inode_reinit(ip);
 512                         XFS_STATS_INC(xs_ig_found);
 513                         *ipp = ip;
 514                         error = 0;
 515                 }
 516         } else
 517                 error = ENOMEM; /* If we got no inode we are out of memory */
 518
 519         return error;
 520 }
 521
 522 /*
 523  * Do the setup for the various locks within the incore inode.
 524  */
 525 void
 526 xfs_inode_lock_init(
 527         xfs_inode_t     *ip,
 528         vnode_t         *vp)
 529 {
 530         mrlock_init(&ip->i_lock, MRLOCK_ALLOW_EQUAL_PRI|MRLOCK_BARRIER,
 531                      "xfsino", (long)vp->v_number);
 532         mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", vp->v_number);
 533         init_waitqueue_head(&ip->i_ipin_wait);
 534         atomic_set(&ip->i_pincount, 0);
 535         init_sema(&ip->i_flock, 1, "xfsfino", vp->v_number);
 536 }
 537
 538 /*
 539  * Look for the inode corresponding to the given ino in the hash table.
 540  * If it is there and its i_transp pointer matches tp, return it.
 541  * Otherwise, return NULL.
 542  */
 543 xfs_inode_t *
 544 xfs_inode_incore(xfs_mount_t    *mp,
 545                  xfs_ino_t      ino,
 546                  xfs_trans_t    *tp)
 547 {
 548         xfs_ihash_t     *ih;
 549         xfs_inode_t     *ip;
 550
 551         ih = XFS_IHASH(mp, ino);
 552         read_lock(&ih->ih_lock);
 553         for (ip = ih->ih_next; ip != NULL; ip = ip->i_next) {
 554                 if (ip->i_ino == ino) {
 555                         /*
 556                          * If we find it and tp matches, return it.
 557                          * Otherwise break from the loop and return
 558                          * NULL.
 559                          */
 560                         if (ip->i_transp == tp) {
 561                                 read_unlock(&ih->ih_lock);
 562                                 return (ip);
 563                         }
 564                         break;
 565                 }
 566         }
 567         read_unlock(&ih->ih_lock);
 568         return (NULL);
 569 }
 570
 571 /*
 572  * Decrement reference count of an inode structure and unlock it.
 573  *
 574  * ip -- the inode being released
 575  * lock_flags -- this parameter indicates the inode's locks to be
 576  *       to be released.  See the comment on xfs_iunlock() for a list
 577  *       of valid values.
 578  */
 579 void
 580 xfs_iput(xfs_inode_t    *ip,
 581          uint           lock_flags)
 582 {
 583         vnode_t *vp = XFS_ITOV(ip);
 584
 585         vn_trace_entry(vp, "xfs_iput", (inst_t *)__return_address);
 586
 587         xfs_iunlock(ip, lock_flags);
 588
 589         VN_RELE(vp);
 590 }
 591
 592 /*
 593  * Special iput for brand-new inodes that are still locked
 594  */
 595 void
 596 xfs_iput_new(xfs_inode_t        *ip,
 597              uint               lock_flags)
 598 {
 599         vnode_t         *vp = XFS_ITOV(ip);
 600         struct inode    *inode = LINVFS_GET_IP(vp);
 601
 602         vn_trace_entry(vp, "xfs_iput_new", (inst_t *)__return_address);
 603
 604         if ((ip->i_d.di_mode == 0)) {
 605                 ASSERT(!(ip->i_flags & XFS_IRECLAIMABLE));
 606                 vn_mark_bad(vp);
 607         }
 608         if (inode->i_state & I_NEW)
 609                 unlock_new_inode(inode);
 610         if (lock_flags)
 611                 xfs_iunlock(ip, lock_flags);
 612         VN_RELE(vp);
 613 }
 614
 615
 616 /*
 617  * This routine embodies the part of the reclaim code that pulls
 618  * the inode from the inode hash table and the mount structure's
 619  * inode list.
 620  * This should only be called from xfs_reclaim().
 621  */
 622 void
 623 xfs_ireclaim(xfs_inode_t *ip)
 624 {
 625         vnode_t         *vp;
 626
 627         /*
 628          * Remove from old hash list and mount list.
 629          */
 630         XFS_STATS_INC(xs_ig_reclaims);
 631
 632         xfs_iextract(ip);
 633
 634         /*
 635          * Here we do a spurious inode lock in order to coordinate with
 636          * xfs_sync().  This is because xfs_sync() references the inodes
 637          * in the mount list without taking references on the corresponding
 638          * vnodes.  We make that OK here by ensuring that we wait until
 639          * the inode is unlocked in xfs_sync() before we go ahead and
 640          * free it.  We get both the regular lock and the io lock because
 641          * the xfs_sync() code may need to drop the regular one but will
 642          * still hold the io lock.
 643          */
 644         xfs_ilock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
 645
 646         /*
 647          * Release dquots (and their references) if any. An inode may escape
 648          * xfs_inactive and get here via vn_alloc->vn_reclaim path.
 649          */
 650         XFS_QM_DQDETACH(ip->i_mount, ip);
 651
 652         /*
 653          * Pull our behavior descriptor from the vnode chain.
 654          */
 655         vp = XFS_ITOV_NULL(ip);
 656         if (vp) {
 657                 vn_bhv_remove(VN_BHV_HEAD(vp), XFS_ITOBHV(ip));
 658         }
 659
 660         /*
 661          * Free all memory associated with the inode.
 662          */
 663         xfs_idestroy(ip);
 664 }
 665
 666 /*
 667  * This routine removes an about-to-be-destroyed inode from
 668  * all of the lists in which it is located with the exception
 669  * of the behavior chain.
 670  */
 671 void
 672 xfs_iextract(
 673         xfs_inode_t     *ip)
 674 {
 675         xfs_ihash_t     *ih;
 676         xfs_inode_t     *iq;
 677         xfs_mount_t     *mp;
 678         xfs_chash_t     *ch;
 679         xfs_chashlist_t *chl, *chm;
 680         SPLDECL(s);
 681
 682         ih = ip->i_hash;
 683         write_lock(&ih->ih_lock);
 684         if ((iq = ip->i_next)) {
 685                 iq->i_prevp = ip->i_prevp;
 686         }
 687         *ip->i_prevp = iq;
 688         write_unlock(&ih->ih_lock);
 689
 690         /*
 691          * Remove from cluster hash list
 692          *   1) delete the chashlist if this is the last inode on the chashlist
 693          *   2) unchain from list of inodes
 694          *   3) point chashlist->chl_ip to 'chl_next' if to this inode.
 695          */
 696         mp = ip->i_mount;
 697         ch = XFS_CHASH(mp, ip->i_blkno);
 698         s = mutex_spinlock(&ch->ch_lock);
 699
 700         if (ip->i_cnext == ip) {
 701                 /* Last inode on chashlist */
 702                 ASSERT(ip->i_cnext == ip && ip->i_cprev == ip);
 703                 ASSERT(ip->i_chash != NULL);
 704                 chm=NULL;
 705                 for (chl = ch->ch_list; chl != NULL; chl = chl->chl_next) {
 706                         if (chl->chl_blkno == ip->i_blkno) {
 707                                 if (chm == NULL) {
 708                                         /* first item on the list */
 709                                         ch->ch_list = chl->chl_next;
 710                                 } else {
 711                                         chm->chl_next = chl->chl_next;
 712                                 }
 713                                 kmem_zone_free(xfs_chashlist_zone, chl);
 714                                 break;
 715                         } else {
 716                                 ASSERT(chl->chl_ip != ip);
 717                                 chm = chl;
 718                         }
 719                 }
 720                 ASSERT_ALWAYS(chl != NULL);
 721        } else {
 722                 /* delete one inode from a non-empty list */
 723                 iq = ip->i_cnext;
 724                 iq->i_cprev = ip->i_cprev;
 725                 ip->i_cprev->i_cnext = iq;
 726                 if (ip->i_chash->chl_ip == ip) {
 727                         ip->i_chash->chl_ip = iq;
 728                 }
 729                 ip->i_chash = __return_address;
 730                 ip->i_cprev = __return_address;
 731                 ip->i_cnext = __return_address;
 732         }
 733         mutex_spinunlock(&ch->ch_lock, s);
 734
 735         /*
 736          * Remove from mount's inode list.
 737          */
 738         XFS_MOUNT_ILOCK(mp);
 739         ASSERT((ip->i_mnext != NULL) && (ip->i_mprev != NULL));
 740         iq = ip->i_mnext;
 741         iq->i_mprev = ip->i_mprev;
 742         ip->i_mprev->i_mnext = iq;
 743
 744         /*
 745          * Fix up the head pointer if it points to the inode being deleted.
 746          */
 747         if (mp->m_inodes == ip) {
 748                 if (ip == iq) {
 749                         mp->m_inodes = NULL;
 750                 } else {
 751                         mp->m_inodes = iq;
 752                 }
 753         }
 754
 755         /* Deal with the deleted inodes list */
 756         list_del_init(&ip->i_reclaim);
 757
 758         mp->m_ireclaims++;
 759         XFS_MOUNT_IUNLOCK(mp);
 760 }
 761
 762 /*
 763  * This is a wrapper routine around the xfs_ilock() routine
 764  * used to centralize some grungy code.  It is used in places
 765  * that wish to lock the inode solely for reading the extents.
 766  * The reason these places can't just call xfs_ilock(SHARED)
 767  * is that the inode lock also guards to bringing in of the
 768  * extents from disk for a file in b-tree format.  If the inode
 769  * is in b-tree format, then we need to lock the inode exclusively
 770  * until the extents are read in.  Locking it exclusively all
 771  * the time would limit our parallelism unnecessarily, though.
 772  * What we do instead is check to see if the extents have been
 773  * read in yet, and only lock the inode exclusively if they
 774  * have not.
 775  *
 776  * The function returns a value which should be given to the
 777  * corresponding xfs_iunlock_map_shared().  This value is
 778  * the mode in which the lock was actually taken.
 779  */
 780 uint
 781 xfs_ilock_map_shared(
 782         xfs_inode_t     *ip)
 783 {
 784         uint    lock_mode;
 785
 786         if ((ip->i_d.di_format == XFS_DINODE_FMT_BTREE) &&
 787             ((ip->i_df.if_flags & XFS_IFEXTENTS) == 0)) {
 788                 lock_mode = XFS_ILOCK_EXCL;
 789         } else {
 790                 lock_mode = XFS_ILOCK_SHARED;
 791         }
 792
 793         xfs_ilock(ip, lock_mode);
 794
 795         return lock_mode;
 796 }
 797
 798 /*
 799  * This is simply the unlock routine to go with xfs_ilock_map_shared().
 800  * All it does is call xfs_iunlock() with the given lock_mode.
 801  */
 802 void
 803 xfs_iunlock_map_shared(
 804         xfs_inode_t     *ip,
 805         unsigned int    lock_mode)
 806 {
 807         xfs_iunlock(ip, lock_mode);
 808 }
 809
 810 /*
 811  * The xfs inode contains 2 locks: a multi-reader lock called the
 812  * i_iolock and a multi-reader lock called the i_lock.  This routine
 813  * allows either or both of the locks to be obtained.
 814  *
 815  * The 2 locks should always be ordered so that the IO lock is
 816  * obtained first in order to prevent deadlock.
 817  *
 818  * ip -- the inode being locked
 819  * lock_flags -- this parameter indicates the inode's locks
 820  *       to be locked.  It can be:
 821  *              XFS_IOLOCK_SHARED,
 822  *              XFS_IOLOCK_EXCL,
 823  *              XFS_ILOCK_SHARED,
 824  *              XFS_ILOCK_EXCL,
 825  *              XFS_IOLOCK_SHARED | XFS_ILOCK_SHARED,
 826  *              XFS_IOLOCK_SHARED | XFS_ILOCK_EXCL,
 827  *              XFS_IOLOCK_EXCL | XFS_ILOCK_SHARED,
 828  *              XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL
 829  */
 830 void
 831 xfs_ilock(xfs_inode_t   *ip,
 832           uint          lock_flags)
 833 {
 834         /*
 835          * You can't set both SHARED and EXCL for the same lock,
 836          * and only XFS_IOLOCK_SHARED, XFS_IOLOCK_EXCL, XFS_ILOCK_SHARED,
 837          * and XFS_ILOCK_EXCL are valid values to set in lock_flags.
 838          */
 839         ASSERT((lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) !=
 840                (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL));
 841         ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) !=
 842                (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL));
 843         ASSERT((lock_flags & ~XFS_LOCK_MASK) == 0);
 844
 845         if (lock_flags & XFS_IOLOCK_EXCL) {
 846                 mrupdate(&ip->i_iolock);
 847         } else if (lock_flags & XFS_IOLOCK_SHARED) {
 848                 mraccess(&ip->i_iolock);
 849         }
 850         if (lock_flags & XFS_ILOCK_EXCL) {
 851                 mrupdate(&ip->i_lock);
 852         } else if (lock_flags & XFS_ILOCK_SHARED) {
 853                 mraccess(&ip->i_lock);
 854         }
 855         xfs_ilock_trace(ip, 1, lock_flags, (inst_t *)__return_address);
 856 }
 857
 858 /*
 859  * This is just like xfs_ilock(), except that the caller
 860  * is guaranteed not to sleep.  It returns 1 if it gets
 861  * the requested locks and 0 otherwise.  If the IO lock is
 862  * obtained but the inode lock cannot be, then the IO lock
 863  * is dropped before returning.
 864  *
 865  * ip -- the inode being locked
 866  * lock_flags -- this parameter indicates the inode's locks to be
 867  *       to be locked.  See the comment for xfs_ilock() for a list
 868  *       of valid values.
 869  *
 870  */
 871 int
 872 xfs_ilock_nowait(xfs_inode_t    *ip,
 873                  uint           lock_flags)
 874 {
 875         int     iolocked;
 876         int     ilocked;
 877
 878         /*
 879          * You can't set both SHARED and EXCL for the same lock,
 880          * and only XFS_IOLOCK_SHARED, XFS_IOLOCK_EXCL, XFS_ILOCK_SHARED,
 881          * and XFS_ILOCK_EXCL are valid values to set in lock_flags.
 882          */
 883         ASSERT((lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) !=
 884                (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL));
 885         ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) !=
 886                (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL));
 887         ASSERT((lock_flags & ~XFS_LOCK_MASK) == 0);
 888
 889         iolocked = 0;
 890         if (lock_flags & XFS_IOLOCK_EXCL) {
 891                 iolocked = mrtryupdate(&ip->i_iolock);
 892                 if (!iolocked) {
 893                         return 0;
 894                 }
 895         } else if (lock_flags & XFS_IOLOCK_SHARED) {
 896                 iolocked = mrtryaccess(&ip->i_iolock);
 897                 if (!iolocked) {
 898                         return 0;
 899                 }
 900         }
 901         if (lock_flags & XFS_ILOCK_EXCL) {
 902                 ilocked = mrtryupdate(&ip->i_lock);
 903                 if (!ilocked) {
 904                         if (iolocked) {
 905                                 mrunlock(&ip->i_iolock);
 906                         }
 907                         return 0;
 908                 }
 909         } else if (lock_flags & XFS_ILOCK_SHARED) {
 910                 ilocked = mrtryaccess(&ip->i_lock);
 911                 if (!ilocked) {
 912                         if (iolocked) {
 913                                 mrunlock(&ip->i_iolock);
 914                         }
 915                         return 0;
 916                 }
 917         }
 918         xfs_ilock_trace(ip, 2, lock_flags, (inst_t *)__return_address);
 919         return 1;
 920 }
 921
 922 /*
 923  * xfs_iunlock() is used to drop the inode locks acquired with
 924  * xfs_ilock() and xfs_ilock_nowait().  The caller must pass
 925  * in the flags given to xfs_ilock() or xfs_ilock_nowait() so
 926  * that we know which locks to drop.
 927  *
 928  * ip -- the inode being unlocked
 929  * lock_flags -- this parameter indicates the inode's locks to be
 930  *       to be unlocked.  See the comment for xfs_ilock() for a list
 931  *       of valid values for this parameter.
 932  *
 933  */
 934 void
 935 xfs_iunlock(xfs_inode_t *ip,
 936             uint        lock_flags)
 937 {
 938         /*
 939          * You can't set both SHARED and EXCL for the same lock,
 940          * and only XFS_IOLOCK_SHARED, XFS_IOLOCK_EXCL, XFS_ILOCK_SHARED,
 941          * and XFS_ILOCK_EXCL are valid values to set in lock_flags.
 942          */
 943         ASSERT((lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) !=
 944                (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL));
 945         ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) !=
 946                (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL));
 947         ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_IUNLOCK_NONOTIFY)) == 0);
 948         ASSERT(lock_flags != 0);
 949
 950         if (lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) {
 951                 ASSERT(!(lock_flags & XFS_IOLOCK_SHARED) ||
 952                        (ismrlocked(&ip->i_iolock, MR_ACCESS)));
 953                 ASSERT(!(lock_flags & XFS_IOLOCK_EXCL) ||
 954                        (ismrlocked(&ip->i_iolock, MR_UPDATE)));
 955                 mrunlock(&ip->i_iolock);
 956         }
 957
 958         if (lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) {
 959                 ASSERT(!(lock_flags & XFS_ILOCK_SHARED) ||
 960                        (ismrlocked(&ip->i_lock, MR_ACCESS)));
 961                 ASSERT(!(lock_flags & XFS_ILOCK_EXCL) ||
 962                        (ismrlocked(&ip->i_lock, MR_UPDATE)));
 963                 mrunlock(&ip->i_lock);
 964
 965                 /*
 966                  * Let the AIL know that this item has been unlocked in case
 967                  * it is in the AIL and anyone is waiting on it.  Don't do
 968                  * this if the caller has asked us not to.
 969                  */
 970                 if (!(lock_flags & XFS_IUNLOCK_NONOTIFY) &&
 971                      ip->i_itemp != NULL) {
 972                         xfs_trans_unlocked_item(ip->i_mount,
 973                                                 (xfs_log_item_t*)(ip->i_itemp));
 974                 }
 975         }
 976         xfs_ilock_trace(ip, 3, lock_flags, (inst_t *)__return_address);
 977 }
 978
 979 /*
 980  * give up write locks.  the i/o lock cannot be held nested
 981  * if it is being demoted.
 982  */
 983 void
 984 xfs_ilock_demote(xfs_inode_t    *ip,
 985                  uint           lock_flags)
 986 {
 987         ASSERT(lock_flags & (XFS_IOLOCK_EXCL|XFS_ILOCK_EXCL));
 988         ASSERT((lock_flags & ~(XFS_IOLOCK_EXCL|XFS_ILOCK_EXCL)) == 0);
 989
 990         if (lock_flags & XFS_ILOCK_EXCL) {
 991                 ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE));
 992                 mrdemote(&ip->i_lock);
 993         }
 994         if (lock_flags & XFS_IOLOCK_EXCL) {
 995                 ASSERT(ismrlocked(&ip->i_iolock, MR_UPDATE));
 996                 mrdemote(&ip->i_iolock);
 997         }
 998 }
 999
1000 /*
1001  * The following three routines simply manage the i_flock
1002  * semaphore embedded in the inode.  This semaphore synchronizes
1003  * processes attempting to flush the in-core inode back to disk.
1004  */
1005 void
1006 xfs_iflock(xfs_inode_t *ip)
1007 {
1008         psema(&(ip->i_flock), PINOD|PLTWAIT);
1009 }
1010
1011 int
1012 xfs_iflock_nowait(xfs_inode_t *ip)
1013 {
1014         return (cpsema(&(ip->i_flock)));
1015 }
1016
1017 void
1018 xfs_ifunlock(xfs_inode_t *ip)
1019 {
1020         ASSERT(valusema(&(ip->i_flock)) <= 0);
1021         vsema(&(ip->i_flock));
1022 }