module/zfs/zfs_vfsops.c

   1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  23  * Copyright (c) 2012, 2018 by Delphix. All rights reserved.
  24  */
  25
  26 /* Portions Copyright 2010 Robert Milkowski */
  27
  28 #include <sys/types.h>
  29 #include <sys/param.h>
  30 #include <sys/sysmacros.h>
  31 #include <sys/kmem.h>
  32 #include <sys/pathname.h>
  33 #include <sys/vnode.h>
  34 #include <sys/vfs.h>
  35 #include <sys/mntent.h>
  36 #include <sys/cmn_err.h>
  37 #include <sys/zfs_znode.h>
  38 #include <sys/zfs_vnops.h>
  39 #include <sys/zfs_dir.h>
  40 #include <sys/zil.h>
  41 #include <sys/fs/zfs.h>
  42 #include <sys/dmu.h>
  43 #include <sys/dsl_prop.h>
  44 #include <sys/dsl_dataset.h>
  45 #include <sys/dsl_deleg.h>
  46 #include <sys/spa.h>
  47 #include <sys/zap.h>
  48 #include <sys/sa.h>
  49 #include <sys/sa_impl.h>
  50 #include <sys/policy.h>
  51 #include <sys/atomic.h>
  52 #include <sys/zfs_ioctl.h>
  53 #include <sys/zfs_ctldir.h>
  54 #include <sys/zfs_fuid.h>
  55 #include <sys/sunddi.h>
  56 #include <sys/dmu_objset.h>
  57 #include <sys/spa_boot.h>
  58 #include <sys/zpl.h>
  59 #include <linux/vfs_compat.h>
  60 #include "zfs_comutil.h"
  61
  62 enum {
  63         TOKEN_RO,
  64         TOKEN_RW,
  65         TOKEN_SETUID,
  66         TOKEN_NOSETUID,
  67         TOKEN_EXEC,
  68         TOKEN_NOEXEC,
  69         TOKEN_DEVICES,
  70         TOKEN_NODEVICES,
  71         TOKEN_DIRXATTR,
  72         TOKEN_SAXATTR,
  73         TOKEN_XATTR,
  74         TOKEN_NOXATTR,
  75         TOKEN_ATIME,
  76         TOKEN_NOATIME,
  77         TOKEN_RELATIME,
  78         TOKEN_NORELATIME,
  79         TOKEN_NBMAND,
  80         TOKEN_NONBMAND,
  81         TOKEN_MNTPOINT,
  82         TOKEN_LAST,
  83 };
  84
  85 static const match_table_t zpl_tokens = {
  86         { TOKEN_RO,             MNTOPT_RO },
  87         { TOKEN_RW,             MNTOPT_RW },
  88         { TOKEN_SETUID,         MNTOPT_SETUID },
  89         { TOKEN_NOSETUID,       MNTOPT_NOSETUID },
  90         { TOKEN_EXEC,           MNTOPT_EXEC },
  91         { TOKEN_NOEXEC,         MNTOPT_NOEXEC },
  92         { TOKEN_DEVICES,        MNTOPT_DEVICES },
  93         { TOKEN_NODEVICES,      MNTOPT_NODEVICES },
  94         { TOKEN_DIRXATTR,       MNTOPT_DIRXATTR },
  95         { TOKEN_SAXATTR,        MNTOPT_SAXATTR },
  96         { TOKEN_XATTR,          MNTOPT_XATTR },
  97         { TOKEN_NOXATTR,        MNTOPT_NOXATTR },
  98         { TOKEN_ATIME,          MNTOPT_ATIME },
  99         { TOKEN_NOATIME,        MNTOPT_NOATIME },
 100         { TOKEN_RELATIME,       MNTOPT_RELATIME },
 101         { TOKEN_NORELATIME,     MNTOPT_NORELATIME },
 102         { TOKEN_NBMAND,         MNTOPT_NBMAND },
 103         { TOKEN_NONBMAND,       MNTOPT_NONBMAND },
 104         { TOKEN_MNTPOINT,       MNTOPT_MNTPOINT "=%s" },
 105         { TOKEN_LAST,           NULL },
 106 };
 107
 108 static void
 109 zfsvfs_vfs_free(vfs_t *vfsp)
 110 {
 111         if (vfsp != NULL) {
 112                 if (vfsp->vfs_mntpoint != NULL)
 113                         strfree(vfsp->vfs_mntpoint);
 114
 115                 kmem_free(vfsp, sizeof (vfs_t));
 116         }
 117 }
 118
 119 static int
 120 zfsvfs_parse_option(char *option, int token, substring_t *args, vfs_t *vfsp)
 121 {
 122         switch (token) {
 123         case TOKEN_RO:
 124                 vfsp->vfs_readonly = B_TRUE;
 125                 vfsp->vfs_do_readonly = B_TRUE;
 126                 break;
 127         case TOKEN_RW:
 128                 vfsp->vfs_readonly = B_FALSE;
 129                 vfsp->vfs_do_readonly = B_TRUE;
 130                 break;
 131         case TOKEN_SETUID:
 132                 vfsp->vfs_setuid = B_TRUE;
 133                 vfsp->vfs_do_setuid = B_TRUE;
 134                 break;
 135         case TOKEN_NOSETUID:
 136                 vfsp->vfs_setuid = B_FALSE;
 137                 vfsp->vfs_do_setuid = B_TRUE;
 138                 break;
 139         case TOKEN_EXEC:
 140                 vfsp->vfs_exec = B_TRUE;
 141                 vfsp->vfs_do_exec = B_TRUE;
 142                 break;
 143         case TOKEN_NOEXEC:
 144                 vfsp->vfs_exec = B_FALSE;
 145                 vfsp->vfs_do_exec = B_TRUE;
 146                 break;
 147         case TOKEN_DEVICES:
 148                 vfsp->vfs_devices = B_TRUE;
 149                 vfsp->vfs_do_devices = B_TRUE;
 150                 break;
 151         case TOKEN_NODEVICES:
 152                 vfsp->vfs_devices = B_FALSE;
 153                 vfsp->vfs_do_devices = B_TRUE;
 154                 break;
 155         case TOKEN_DIRXATTR:
 156                 vfsp->vfs_xattr = ZFS_XATTR_DIR;
 157                 vfsp->vfs_do_xattr = B_TRUE;
 158                 break;
 159         case TOKEN_SAXATTR:
 160                 vfsp->vfs_xattr = ZFS_XATTR_SA;
 161                 vfsp->vfs_do_xattr = B_TRUE;
 162                 break;
 163         case TOKEN_XATTR:
 164                 vfsp->vfs_xattr = ZFS_XATTR_DIR;
 165                 vfsp->vfs_do_xattr = B_TRUE;
 166                 break;
 167         case TOKEN_NOXATTR:
 168                 vfsp->vfs_xattr = ZFS_XATTR_OFF;
 169                 vfsp->vfs_do_xattr = B_TRUE;
 170                 break;
 171         case TOKEN_ATIME:
 172                 vfsp->vfs_atime = B_TRUE;
 173                 vfsp->vfs_do_atime = B_TRUE;
 174                 break;
 175         case TOKEN_NOATIME:
 176                 vfsp->vfs_atime = B_FALSE;
 177                 vfsp->vfs_do_atime = B_TRUE;
 178                 break;
 179         case TOKEN_RELATIME:
 180                 vfsp->vfs_relatime = B_TRUE;
 181                 vfsp->vfs_do_relatime = B_TRUE;
 182                 break;
 183         case TOKEN_NORELATIME:
 184                 vfsp->vfs_relatime = B_FALSE;
 185                 vfsp->vfs_do_relatime = B_TRUE;
 186                 break;
 187         case TOKEN_NBMAND:
 188                 vfsp->vfs_nbmand = B_TRUE;
 189                 vfsp->vfs_do_nbmand = B_TRUE;
 190                 break;
 191         case TOKEN_NONBMAND:
 192                 vfsp->vfs_nbmand = B_FALSE;
 193                 vfsp->vfs_do_nbmand = B_TRUE;
 194                 break;
 195         case TOKEN_MNTPOINT:
 196                 vfsp->vfs_mntpoint = match_strdup(&args[0]);
 197                 if (vfsp->vfs_mntpoint == NULL)
 198                         return (SET_ERROR(ENOMEM));
 199
 200                 break;
 201         default:
 202                 break;
 203         }
 204
 205         return (0);
 206 }
 207
 208 /*
 209  * Parse the raw mntopts and return a vfs_t describing the options.
 210  */
 211 static int
 212 zfsvfs_parse_options(char *mntopts, vfs_t **vfsp)
 213 {
 214         vfs_t *tmp_vfsp;
 215         int error;
 216
 217         tmp_vfsp = kmem_zalloc(sizeof (vfs_t), KM_SLEEP);
 218
 219         if (mntopts != NULL) {
 220                 substring_t args[MAX_OPT_ARGS];
 221                 char *tmp_mntopts, *p, *t;
 222                 int token;
 223
 224                 tmp_mntopts = t = strdup(mntopts);
 225                 if (tmp_mntopts == NULL)
 226                         return (SET_ERROR(ENOMEM));
 227
 228                 while ((p = strsep(&t, ",")) != NULL) {
 229                         if (!*p)
 230                                 continue;
 231
 232                         args[0].to = args[0].from = NULL;
 233                         token = match_token(p, zpl_tokens, args);
 234                         error = zfsvfs_parse_option(p, token, args, tmp_vfsp);
 235                         if (error) {
 236                                 strfree(tmp_mntopts);
 237                                 zfsvfs_vfs_free(tmp_vfsp);
 238                                 return (error);
 239                         }
 240                 }
 241
 242                 strfree(tmp_mntopts);
 243         }
 244
 245         *vfsp = tmp_vfsp;
 246
 247         return (0);
 248 }
 249
 250 boolean_t
 251 zfs_is_readonly(zfsvfs_t *zfsvfs)
 252 {
 253         return (!!(zfsvfs->z_sb->s_flags & SB_RDONLY));
 254 }
 255
 256 /*ARGSUSED*/
 257 int
 258 zfs_sync(struct super_block *sb, int wait, cred_t *cr)
 259 {
 260         zfsvfs_t *zfsvfs = sb->s_fs_info;
 261
 262         /*
 263          * Semantically, the only requirement is that the sync be initiated.
 264          * The DMU syncs out txgs frequently, so there's nothing to do.
 265          */
 266         if (!wait)
 267                 return (0);
 268
 269         if (zfsvfs != NULL) {
 270                 /*
 271                  * Sync a specific filesystem.
 272                  */
 273                 dsl_pool_t *dp;
 274
 275                 ZFS_ENTER(zfsvfs);
 276                 dp = dmu_objset_pool(zfsvfs->z_os);
 277
 278                 /*
 279                  * If the system is shutting down, then skip any
 280                  * filesystems which may exist on a suspended pool.
 281                  */
 282                 if (spa_suspended(dp->dp_spa)) {
 283                         ZFS_EXIT(zfsvfs);
 284                         return (0);
 285                 }
 286
 287                 if (zfsvfs->z_log != NULL)
 288                         zil_commit(zfsvfs->z_log, 0);
 289
 290                 ZFS_EXIT(zfsvfs);
 291         } else {
 292                 /*
 293                  * Sync all ZFS filesystems.  This is what happens when you
 294                  * run sync(1M).  Unlike other filesystems, ZFS honors the
 295                  * request by waiting for all pools to commit all dirty data.
 296                  */
 297                 spa_sync_allpools();
 298         }
 299
 300         return (0);
 301 }
 302
 303 static void
 304 atime_changed_cb(void *arg, uint64_t newval)
 305 {
 306         zfsvfs_t *zfsvfs = arg;
 307         struct super_block *sb = zfsvfs->z_sb;
 308
 309         if (sb == NULL)
 310                 return;
 311         /*
 312          * Update SB_NOATIME bit in VFS super block.  Since atime update is
 313          * determined by atime_needs_update(), atime_needs_update() needs to
 314          * return false if atime is turned off, and not unconditionally return
 315          * false if atime is turned on.
 316          */
 317         if (newval)
 318                 sb->s_flags &= ~SB_NOATIME;
 319         else
 320                 sb->s_flags |= SB_NOATIME;
 321 }
 322
 323 static void
 324 relatime_changed_cb(void *arg, uint64_t newval)
 325 {
 326         ((zfsvfs_t *)arg)->z_relatime = newval;
 327 }
 328
 329 static void
 330 xattr_changed_cb(void *arg, uint64_t newval)
 331 {
 332         zfsvfs_t *zfsvfs = arg;
 333
 334         if (newval == ZFS_XATTR_OFF) {
 335                 zfsvfs->z_flags &= ~ZSB_XATTR;
 336         } else {
 337                 zfsvfs->z_flags |= ZSB_XATTR;
 338
 339                 if (newval == ZFS_XATTR_SA)
 340                         zfsvfs->z_xattr_sa = B_TRUE;
 341                 else
 342                         zfsvfs->z_xattr_sa = B_FALSE;
 343         }
 344 }
 345
 346 static void
 347 acltype_changed_cb(void *arg, uint64_t newval)
 348 {
 349         zfsvfs_t *zfsvfs = arg;
 350
 351         switch (newval) {
 352         case ZFS_ACLTYPE_OFF:
 353                 zfsvfs->z_acl_type = ZFS_ACLTYPE_OFF;
 354                 zfsvfs->z_sb->s_flags &= ~SB_POSIXACL;
 355                 break;
 356         case ZFS_ACLTYPE_POSIXACL:
 357 #ifdef CONFIG_FS_POSIX_ACL
 358                 zfsvfs->z_acl_type = ZFS_ACLTYPE_POSIXACL;
 359                 zfsvfs->z_sb->s_flags |= SB_POSIXACL;
 360 #else
 361                 zfsvfs->z_acl_type = ZFS_ACLTYPE_OFF;
 362                 zfsvfs->z_sb->s_flags &= ~SB_POSIXACL;
 363 #endif /* CONFIG_FS_POSIX_ACL */
 364                 break;
 365         default:
 366                 break;
 367         }
 368 }
 369
 370 static void
 371 blksz_changed_cb(void *arg, uint64_t newval)
 372 {
 373         zfsvfs_t *zfsvfs = arg;
 374         ASSERT3U(newval, <=, spa_maxblocksize(dmu_objset_spa(zfsvfs->z_os)));
 375         ASSERT3U(newval, >=, SPA_MINBLOCKSIZE);
 376         ASSERT(ISP2(newval));
 377
 378         zfsvfs->z_max_blksz = newval;
 379 }
 380
 381 static void
 382 readonly_changed_cb(void *arg, uint64_t newval)
 383 {
 384         zfsvfs_t *zfsvfs = arg;
 385         struct super_block *sb = zfsvfs->z_sb;
 386
 387         if (sb == NULL)
 388                 return;
 389
 390         if (newval)
 391                 sb->s_flags |= SB_RDONLY;
 392         else
 393                 sb->s_flags &= ~SB_RDONLY;
 394 }
 395
 396 static void
 397 devices_changed_cb(void *arg, uint64_t newval)
 398 {
 399 }
 400
 401 static void
 402 setuid_changed_cb(void *arg, uint64_t newval)
 403 {
 404 }
 405
 406 static void
 407 exec_changed_cb(void *arg, uint64_t newval)
 408 {
 409 }
 410
 411 static void
 412 nbmand_changed_cb(void *arg, uint64_t newval)
 413 {
 414         zfsvfs_t *zfsvfs = arg;
 415         struct super_block *sb = zfsvfs->z_sb;
 416
 417         if (sb == NULL)
 418                 return;
 419
 420         if (newval == TRUE)
 421                 sb->s_flags |= SB_MANDLOCK;
 422         else
 423                 sb->s_flags &= ~SB_MANDLOCK;
 424 }
 425
 426 static void
 427 snapdir_changed_cb(void *arg, uint64_t newval)
 428 {
 429         ((zfsvfs_t *)arg)->z_show_ctldir = newval;
 430 }
 431
 432 static void
 433 vscan_changed_cb(void *arg, uint64_t newval)
 434 {
 435         ((zfsvfs_t *)arg)->z_vscan = newval;
 436 }
 437
 438 static void
 439 acl_inherit_changed_cb(void *arg, uint64_t newval)
 440 {
 441         ((zfsvfs_t *)arg)->z_acl_inherit = newval;
 442 }
 443
 444 static int
 445 zfs_register_callbacks(vfs_t *vfsp)
 446 {
 447         struct dsl_dataset *ds = NULL;
 448         objset_t *os = NULL;
 449         zfsvfs_t *zfsvfs = NULL;
 450         int error = 0;
 451
 452         ASSERT(vfsp);
 453         zfsvfs = vfsp->vfs_data;
 454         ASSERT(zfsvfs);
 455         os = zfsvfs->z_os;
 456
 457         /*
 458          * The act of registering our callbacks will destroy any mount
 459          * options we may have.  In order to enable temporary overrides
 460          * of mount options, we stash away the current values and
 461          * restore them after we register the callbacks.
 462          */
 463         if (zfs_is_readonly(zfsvfs) || !spa_writeable(dmu_objset_spa(os))) {
 464                 vfsp->vfs_do_readonly = B_TRUE;
 465                 vfsp->vfs_readonly = B_TRUE;
 466         }
 467
 468         /*
 469          * Register property callbacks.
 470          *
 471          * It would probably be fine to just check for i/o error from
 472          * the first prop_register(), but I guess I like to go
 473          * overboard...
 474          */
 475         ds = dmu_objset_ds(os);
 476         dsl_pool_config_enter(dmu_objset_pool(os), FTAG);
 477         error = dsl_prop_register(ds,
 478             zfs_prop_to_name(ZFS_PROP_ATIME), atime_changed_cb, zfsvfs);
 479         error = error ? error : dsl_prop_register(ds,
 480             zfs_prop_to_name(ZFS_PROP_RELATIME), relatime_changed_cb, zfsvfs);
 481         error = error ? error : dsl_prop_register(ds,
 482             zfs_prop_to_name(ZFS_PROP_XATTR), xattr_changed_cb, zfsvfs);
 483         error = error ? error : dsl_prop_register(ds,
 484             zfs_prop_to_name(ZFS_PROP_RECORDSIZE), blksz_changed_cb, zfsvfs);
 485         error = error ? error : dsl_prop_register(ds,
 486             zfs_prop_to_name(ZFS_PROP_READONLY), readonly_changed_cb, zfsvfs);
 487         error = error ? error : dsl_prop_register(ds,
 488             zfs_prop_to_name(ZFS_PROP_DEVICES), devices_changed_cb, zfsvfs);
 489         error = error ? error : dsl_prop_register(ds,
 490             zfs_prop_to_name(ZFS_PROP_SETUID), setuid_changed_cb, zfsvfs);
 491         error = error ? error : dsl_prop_register(ds,
 492             zfs_prop_to_name(ZFS_PROP_EXEC), exec_changed_cb, zfsvfs);
 493         error = error ? error : dsl_prop_register(ds,
 494             zfs_prop_to_name(ZFS_PROP_SNAPDIR), snapdir_changed_cb, zfsvfs);
 495         error = error ? error : dsl_prop_register(ds,
 496             zfs_prop_to_name(ZFS_PROP_ACLTYPE), acltype_changed_cb, zfsvfs);
 497         error = error ? error : dsl_prop_register(ds,
 498             zfs_prop_to_name(ZFS_PROP_ACLINHERIT), acl_inherit_changed_cb,
 499             zfsvfs);
 500         error = error ? error : dsl_prop_register(ds,
 501             zfs_prop_to_name(ZFS_PROP_VSCAN), vscan_changed_cb, zfsvfs);
 502         error = error ? error : dsl_prop_register(ds,
 503             zfs_prop_to_name(ZFS_PROP_NBMAND), nbmand_changed_cb, zfsvfs);
 504         dsl_pool_config_exit(dmu_objset_pool(os), FTAG);
 505         if (error)
 506                 goto unregister;
 507
 508         /*
 509          * Invoke our callbacks to restore temporary mount options.
 510          */
 511         if (vfsp->vfs_do_readonly)
 512                 readonly_changed_cb(zfsvfs, vfsp->vfs_readonly);
 513         if (vfsp->vfs_do_setuid)
 514                 setuid_changed_cb(zfsvfs, vfsp->vfs_setuid);
 515         if (vfsp->vfs_do_exec)
 516                 exec_changed_cb(zfsvfs, vfsp->vfs_exec);
 517         if (vfsp->vfs_do_devices)
 518                 devices_changed_cb(zfsvfs, vfsp->vfs_devices);
 519         if (vfsp->vfs_do_xattr)
 520                 xattr_changed_cb(zfsvfs, vfsp->vfs_xattr);
 521         if (vfsp->vfs_do_atime)
 522                 atime_changed_cb(zfsvfs, vfsp->vfs_atime);
 523         if (vfsp->vfs_do_relatime)
 524                 relatime_changed_cb(zfsvfs, vfsp->vfs_relatime);
 525         if (vfsp->vfs_do_nbmand)
 526                 nbmand_changed_cb(zfsvfs, vfsp->vfs_nbmand);
 527
 528         return (0);
 529
 530 unregister:
 531         dsl_prop_unregister_all(ds, zfsvfs);
 532         return (error);
 533 }
 534
 535 static int
 536 zfs_space_delta_cb(dmu_object_type_t bonustype, void *data,
 537     uint64_t *userp, uint64_t *groupp, uint64_t *projectp)
 538 {
 539         sa_hdr_phys_t sa;
 540         sa_hdr_phys_t *sap = data;
 541         uint64_t flags;
 542         int hdrsize;
 543         boolean_t swap = B_FALSE;
 544
 545         /*
 546          * Is it a valid type of object to track?
 547          */
 548         if (bonustype != DMU_OT_ZNODE && bonustype != DMU_OT_SA)
 549                 return (SET_ERROR(ENOENT));
 550
 551         /*
 552          * If we have a NULL data pointer
 553          * then assume the id's aren't changing and
 554          * return EEXIST to the dmu to let it know to
 555          * use the same ids
 556          */
 557         if (data == NULL)
 558                 return (SET_ERROR(EEXIST));
 559
 560         if (bonustype == DMU_OT_ZNODE) {
 561                 znode_phys_t *znp = data;
 562                 *userp = znp->zp_uid;
 563                 *groupp = znp->zp_gid;
 564                 *projectp = ZFS_DEFAULT_PROJID;
 565                 return (0);
 566         }
 567
 568         if (sap->sa_magic == 0) {
 569                 /*
 570                  * This should only happen for newly created files
 571                  * that haven't had the znode data filled in yet.
 572                  */
 573                 *userp = 0;
 574                 *groupp = 0;
 575                 *projectp = ZFS_DEFAULT_PROJID;
 576                 return (0);
 577         }
 578
 579         sa = *sap;
 580         if (sa.sa_magic == BSWAP_32(SA_MAGIC)) {
 581                 sa.sa_magic = SA_MAGIC;
 582                 sa.sa_layout_info = BSWAP_16(sa.sa_layout_info);
 583                 swap = B_TRUE;
 584         } else {
 585                 VERIFY3U(sa.sa_magic, ==, SA_MAGIC);
 586         }
 587
 588         hdrsize = sa_hdrsize(&sa);
 589         VERIFY3U(hdrsize, >=, sizeof (sa_hdr_phys_t));
 590
 591         *userp = *((uint64_t *)((uintptr_t)data + hdrsize + SA_UID_OFFSET));
 592         *groupp = *((uint64_t *)((uintptr_t)data + hdrsize + SA_GID_OFFSET));
 593         flags = *((uint64_t *)((uintptr_t)data + hdrsize + SA_FLAGS_OFFSET));
 594         if (swap)
 595                 flags = BSWAP_64(flags);
 596
 597         if (flags & ZFS_PROJID)
 598                 *projectp = *((uint64_t *)((uintptr_t)data + hdrsize +
 599                     SA_PROJID_OFFSET));
 600         else
 601                 *projectp = ZFS_DEFAULT_PROJID;
 602
 603         if (swap) {
 604                 *userp = BSWAP_64(*userp);
 605                 *groupp = BSWAP_64(*groupp);
 606                 *projectp = BSWAP_64(*projectp);
 607         }
 608         return (0);
 609 }
 610
 611 static void
 612 fuidstr_to_sid(zfsvfs_t *zfsvfs, const char *fuidstr,
 613     char *domainbuf, int buflen, uid_t *ridp)
 614 {
 615         uint64_t fuid;
 616         const char *domain;
 617
 618         fuid = zfs_strtonum(fuidstr, NULL);
 619
 620         domain = zfs_fuid_find_by_idx(zfsvfs, FUID_INDEX(fuid));
 621         if (domain)
 622                 (void) strlcpy(domainbuf, domain, buflen);
 623         else
 624                 domainbuf[0] = '\0';
 625         *ridp = FUID_RID(fuid);
 626 }
 627
 628 static uint64_t
 629 zfs_userquota_prop_to_obj(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type)
 630 {
 631         switch (type) {
 632         case ZFS_PROP_USERUSED:
 633         case ZFS_PROP_USEROBJUSED:
 634                 return (DMU_USERUSED_OBJECT);
 635         case ZFS_PROP_GROUPUSED:
 636         case ZFS_PROP_GROUPOBJUSED:
 637                 return (DMU_GROUPUSED_OBJECT);
 638         case ZFS_PROP_PROJECTUSED:
 639         case ZFS_PROP_PROJECTOBJUSED:
 640                 return (DMU_PROJECTUSED_OBJECT);
 641         case ZFS_PROP_USERQUOTA:
 642                 return (zfsvfs->z_userquota_obj);
 643         case ZFS_PROP_GROUPQUOTA:
 644                 return (zfsvfs->z_groupquota_obj);
 645         case ZFS_PROP_USEROBJQUOTA:
 646                 return (zfsvfs->z_userobjquota_obj);
 647         case ZFS_PROP_GROUPOBJQUOTA:
 648                 return (zfsvfs->z_groupobjquota_obj);
 649         case ZFS_PROP_PROJECTQUOTA:
 650                 return (zfsvfs->z_projectquota_obj);
 651         case ZFS_PROP_PROJECTOBJQUOTA:
 652                 return (zfsvfs->z_projectobjquota_obj);
 653         default:
 654                 return (ZFS_NO_OBJECT);
 655         }
 656 }
 657
 658 int
 659 zfs_userspace_many(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type,
 660     uint64_t *cookiep, void *vbuf, uint64_t *bufsizep)
 661 {
 662         int error;
 663         zap_cursor_t zc;
 664         zap_attribute_t za;
 665         zfs_useracct_t *buf = vbuf;
 666         uint64_t obj;
 667         int offset = 0;
 668
 669         if (!dmu_objset_userspace_present(zfsvfs->z_os))
 670                 return (SET_ERROR(ENOTSUP));
 671
 672         if ((type == ZFS_PROP_PROJECTQUOTA || type == ZFS_PROP_PROJECTUSED ||
 673             type == ZFS_PROP_PROJECTOBJQUOTA ||
 674             type == ZFS_PROP_PROJECTOBJUSED) &&
 675             !dmu_objset_projectquota_present(zfsvfs->z_os))
 676                 return (SET_ERROR(ENOTSUP));
 677
 678         if ((type == ZFS_PROP_USEROBJUSED || type == ZFS_PROP_GROUPOBJUSED ||
 679             type == ZFS_PROP_USEROBJQUOTA || type == ZFS_PROP_GROUPOBJQUOTA ||
 680             type == ZFS_PROP_PROJECTOBJUSED ||
 681             type == ZFS_PROP_PROJECTOBJQUOTA) &&
 682             !dmu_objset_userobjspace_present(zfsvfs->z_os))
 683                 return (SET_ERROR(ENOTSUP));
 684
 685         obj = zfs_userquota_prop_to_obj(zfsvfs, type);
 686         if (obj == ZFS_NO_OBJECT) {
 687                 *bufsizep = 0;
 688                 return (0);
 689         }
 690
 691         if (type == ZFS_PROP_USEROBJUSED || type == ZFS_PROP_GROUPOBJUSED ||
 692             type == ZFS_PROP_PROJECTOBJUSED)
 693                 offset = DMU_OBJACCT_PREFIX_LEN;
 694
 695         for (zap_cursor_init_serialized(&zc, zfsvfs->z_os, obj, *cookiep);
 696             (error = zap_cursor_retrieve(&zc, &za)) == 0;
 697             zap_cursor_advance(&zc)) {
 698                 if ((uintptr_t)buf - (uintptr_t)vbuf + sizeof (zfs_useracct_t) >
 699                     *bufsizep)
 700                         break;
 701
 702                 /*
 703                  * skip object quota (with zap name prefix DMU_OBJACCT_PREFIX)
 704                  * when dealing with block quota and vice versa.
 705                  */
 706                 if ((offset > 0) != (strncmp(za.za_name, DMU_OBJACCT_PREFIX,
 707                     DMU_OBJACCT_PREFIX_LEN) == 0))
 708                         continue;
 709
 710                 fuidstr_to_sid(zfsvfs, za.za_name + offset,
 711                     buf->zu_domain, sizeof (buf->zu_domain), &buf->zu_rid);
 712
 713                 buf->zu_space = za.za_first_integer;
 714                 buf++;
 715         }
 716         if (error == ENOENT)
 717                 error = 0;
 718
 719         ASSERT3U((uintptr_t)buf - (uintptr_t)vbuf, <=, *bufsizep);
 720         *bufsizep = (uintptr_t)buf - (uintptr_t)vbuf;
 721         *cookiep = zap_cursor_serialize(&zc);
 722         zap_cursor_fini(&zc);
 723         return (error);
 724 }
 725
 726 /*
 727  * buf must be big enough (eg, 32 bytes)
 728  */
 729 static int
 730 id_to_fuidstr(zfsvfs_t *zfsvfs, const char *domain, uid_t rid,
 731     char *buf, boolean_t addok)
 732 {
 733         uint64_t fuid;
 734         int domainid = 0;
 735
 736         if (domain && domain[0]) {
 737                 domainid = zfs_fuid_find_by_domain(zfsvfs, domain, NULL, addok);
 738                 if (domainid == -1)
 739                         return (SET_ERROR(ENOENT));
 740         }
 741         fuid = FUID_ENCODE(domainid, rid);
 742         (void) sprintf(buf, "%llx", (longlong_t)fuid);
 743         return (0);
 744 }
 745
 746 int
 747 zfs_userspace_one(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type,
 748     const char *domain, uint64_t rid, uint64_t *valp)
 749 {
 750         char buf[20 + DMU_OBJACCT_PREFIX_LEN];
 751         int offset = 0;
 752         int err;
 753         uint64_t obj;
 754
 755         *valp = 0;
 756
 757         if (!dmu_objset_userspace_present(zfsvfs->z_os))
 758                 return (SET_ERROR(ENOTSUP));
 759
 760         if ((type == ZFS_PROP_USEROBJUSED || type == ZFS_PROP_GROUPOBJUSED ||
 761             type == ZFS_PROP_USEROBJQUOTA || type == ZFS_PROP_GROUPOBJQUOTA ||
 762             type == ZFS_PROP_PROJECTOBJUSED ||
 763             type == ZFS_PROP_PROJECTOBJQUOTA) &&
 764             !dmu_objset_userobjspace_present(zfsvfs->z_os))
 765                 return (SET_ERROR(ENOTSUP));
 766
 767         if (type == ZFS_PROP_PROJECTQUOTA || type == ZFS_PROP_PROJECTUSED ||
 768             type == ZFS_PROP_PROJECTOBJQUOTA ||
 769             type == ZFS_PROP_PROJECTOBJUSED) {
 770                 if (!dmu_objset_projectquota_present(zfsvfs->z_os))
 771                         return (SET_ERROR(ENOTSUP));
 772                 if (!zpl_is_valid_projid(rid))
 773                         return (SET_ERROR(EINVAL));
 774         }
 775
 776         obj = zfs_userquota_prop_to_obj(zfsvfs, type);
 777         if (obj == ZFS_NO_OBJECT)
 778                 return (0);
 779
 780         if (type == ZFS_PROP_USEROBJUSED || type == ZFS_PROP_GROUPOBJUSED ||
 781             type == ZFS_PROP_PROJECTOBJUSED) {
 782                 strlcpy(buf, DMU_OBJACCT_PREFIX, DMU_OBJACCT_PREFIX_LEN + 1);
 783                 offset = DMU_OBJACCT_PREFIX_LEN;
 784         }
 785
 786         err = id_to_fuidstr(zfsvfs, domain, rid, buf + offset, B_FALSE);
 787         if (err)
 788                 return (err);
 789
 790         err = zap_lookup(zfsvfs->z_os, obj, buf, 8, 1, valp);
 791         if (err == ENOENT)
 792                 err = 0;
 793         return (err);
 794 }
 795
 796 int
 797 zfs_set_userquota(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type,
 798     const char *domain, uint64_t rid, uint64_t quota)
 799 {
 800         char buf[32];
 801         int err;
 802         dmu_tx_t *tx;
 803         uint64_t *objp;
 804         boolean_t fuid_dirtied;
 805
 806         if (zfsvfs->z_version < ZPL_VERSION_USERSPACE)
 807                 return (SET_ERROR(ENOTSUP));
 808
 809         switch (type) {
 810         case ZFS_PROP_USERQUOTA:
 811                 objp = &zfsvfs->z_userquota_obj;
 812                 break;
 813         case ZFS_PROP_GROUPQUOTA:
 814                 objp = &zfsvfs->z_groupquota_obj;
 815                 break;
 816         case ZFS_PROP_USEROBJQUOTA:
 817                 objp = &zfsvfs->z_userobjquota_obj;
 818                 break;
 819         case ZFS_PROP_GROUPOBJQUOTA:
 820                 objp = &zfsvfs->z_groupobjquota_obj;
 821                 break;
 822         case ZFS_PROP_PROJECTQUOTA:
 823                 if (!dmu_objset_projectquota_enabled(zfsvfs->z_os))
 824                         return (SET_ERROR(ENOTSUP));
 825                 if (!zpl_is_valid_projid(rid))
 826                         return (SET_ERROR(EINVAL));
 827
 828                 objp = &zfsvfs->z_projectquota_obj;
 829                 break;
 830         case ZFS_PROP_PROJECTOBJQUOTA:
 831                 if (!dmu_objset_projectquota_enabled(zfsvfs->z_os))
 832                         return (SET_ERROR(ENOTSUP));
 833                 if (!zpl_is_valid_projid(rid))
 834                         return (SET_ERROR(EINVAL));
 835
 836                 objp = &zfsvfs->z_projectobjquota_obj;
 837                 break;
 838         default:
 839                 return (SET_ERROR(EINVAL));
 840         }
 841
 842         err = id_to_fuidstr(zfsvfs, domain, rid, buf, B_TRUE);
 843         if (err)
 844                 return (err);
 845         fuid_dirtied = zfsvfs->z_fuid_dirty;
 846
 847         tx = dmu_tx_create(zfsvfs->z_os);
 848         dmu_tx_hold_zap(tx, *objp ? *objp : DMU_NEW_OBJECT, B_TRUE, NULL);
 849         if (*objp == 0) {
 850                 dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, B_TRUE,
 851                     zfs_userquota_prop_prefixes[type]);
 852         }
 853         if (fuid_dirtied)
 854                 zfs_fuid_txhold(zfsvfs, tx);
 855         err = dmu_tx_assign(tx, TXG_WAIT);
 856         if (err) {
 857                 dmu_tx_abort(tx);
 858                 return (err);
 859         }
 860
 861         mutex_enter(&zfsvfs->z_lock);
 862         if (*objp == 0) {
 863                 *objp = zap_create(zfsvfs->z_os, DMU_OT_USERGROUP_QUOTA,
 864                     DMU_OT_NONE, 0, tx);
 865                 VERIFY(0 == zap_add(zfsvfs->z_os, MASTER_NODE_OBJ,
 866                     zfs_userquota_prop_prefixes[type], 8, 1, objp, tx));
 867         }
 868         mutex_exit(&zfsvfs->z_lock);
 869
 870         if (quota == 0) {
 871                 err = zap_remove(zfsvfs->z_os, *objp, buf, tx);
 872                 if (err == ENOENT)
 873                         err = 0;
 874         } else {
 875                 err = zap_update(zfsvfs->z_os, *objp, buf, 8, 1, &quota, tx);
 876         }
 877         ASSERT(err == 0);
 878         if (fuid_dirtied)
 879                 zfs_fuid_sync(zfsvfs, tx);
 880         dmu_tx_commit(tx);
 881         return (err);
 882 }
 883
 884 boolean_t
 885 zfs_id_overobjquota(zfsvfs_t *zfsvfs, uint64_t usedobj, uint64_t id)
 886 {
 887         char buf[20 + DMU_OBJACCT_PREFIX_LEN];
 888         uint64_t used, quota, quotaobj;
 889         int err;
 890
 891         if (!dmu_objset_userobjspace_present(zfsvfs->z_os)) {
 892                 if (dmu_objset_userobjspace_upgradable(zfsvfs->z_os)) {
 893                         dsl_pool_config_enter(
 894                             dmu_objset_pool(zfsvfs->z_os), FTAG);
 895                         dmu_objset_id_quota_upgrade(zfsvfs->z_os);
 896                         dsl_pool_config_exit(
 897                             dmu_objset_pool(zfsvfs->z_os), FTAG);
 898                 }
 899                 return (B_FALSE);
 900         }
 901
 902         if (usedobj == DMU_PROJECTUSED_OBJECT) {
 903                 if (!dmu_objset_projectquota_present(zfsvfs->z_os)) {
 904                         if (dmu_objset_projectquota_upgradable(zfsvfs->z_os)) {
 905                                 dsl_pool_config_enter(
 906                                     dmu_objset_pool(zfsvfs->z_os), FTAG);
 907                                 dmu_objset_id_quota_upgrade(zfsvfs->z_os);
 908                                 dsl_pool_config_exit(
 909                                     dmu_objset_pool(zfsvfs->z_os), FTAG);
 910                         }
 911                         return (B_FALSE);
 912                 }
 913                 quotaobj = zfsvfs->z_projectobjquota_obj;
 914         } else if (usedobj == DMU_USERUSED_OBJECT) {
 915                 quotaobj = zfsvfs->z_userobjquota_obj;
 916         } else if (usedobj == DMU_GROUPUSED_OBJECT) {
 917                 quotaobj = zfsvfs->z_groupobjquota_obj;
 918         } else {
 919                 return (B_FALSE);
 920         }
 921         if (quotaobj == 0 || zfsvfs->z_replay)
 922                 return (B_FALSE);
 923
 924         (void) sprintf(buf, "%llx", (longlong_t)id);
 925         err = zap_lookup(zfsvfs->z_os, quotaobj, buf, 8, 1, &quota);
 926         if (err != 0)
 927                 return (B_FALSE);
 928
 929         (void) sprintf(buf, DMU_OBJACCT_PREFIX "%llx", (longlong_t)id);
 930         err = zap_lookup(zfsvfs->z_os, usedobj, buf, 8, 1, &used);
 931         if (err != 0)
 932                 return (B_FALSE);
 933         return (used >= quota);
 934 }
 935
 936 boolean_t
 937 zfs_id_overblockquota(zfsvfs_t *zfsvfs, uint64_t usedobj, uint64_t id)
 938 {
 939         char buf[20];
 940         uint64_t used, quota, quotaobj;
 941         int err;
 942
 943         if (usedobj == DMU_PROJECTUSED_OBJECT) {
 944                 if (!dmu_objset_projectquota_present(zfsvfs->z_os)) {
 945                         if (dmu_objset_projectquota_upgradable(zfsvfs->z_os)) {
 946                                 dsl_pool_config_enter(
 947                                     dmu_objset_pool(zfsvfs->z_os), FTAG);
 948                                 dmu_objset_id_quota_upgrade(zfsvfs->z_os);
 949                                 dsl_pool_config_exit(
 950                                     dmu_objset_pool(zfsvfs->z_os), FTAG);
 951                         }
 952                         return (B_FALSE);
 953                 }
 954                 quotaobj = zfsvfs->z_projectquota_obj;
 955         } else if (usedobj == DMU_USERUSED_OBJECT) {
 956                 quotaobj = zfsvfs->z_userquota_obj;
 957         } else if (usedobj == DMU_GROUPUSED_OBJECT) {
 958                 quotaobj = zfsvfs->z_groupquota_obj;
 959         } else {
 960                 return (B_FALSE);
 961         }
 962         if (quotaobj == 0 || zfsvfs->z_replay)
 963                 return (B_FALSE);
 964
 965         (void) sprintf(buf, "%llx", (longlong_t)id);
 966         err = zap_lookup(zfsvfs->z_os, quotaobj, buf, 8, 1, &quota);
 967         if (err != 0)
 968                 return (B_FALSE);
 969
 970         err = zap_lookup(zfsvfs->z_os, usedobj, buf, 8, 1, &used);
 971         if (err != 0)
 972                 return (B_FALSE);
 973         return (used >= quota);
 974 }
 975
 976 boolean_t
 977 zfs_id_overquota(zfsvfs_t *zfsvfs, uint64_t usedobj, uint64_t id)
 978 {
 979         return (zfs_id_overblockquota(zfsvfs, usedobj, id) ||
 980             zfs_id_overobjquota(zfsvfs, usedobj, id));
 981 }
 982
 983 /*
 984  * Associate this zfsvfs with the given objset, which must be owned.
 985  * This will cache a bunch of on-disk state from the objset in the
 986  * zfsvfs.
 987  */
 988 static int
 989 zfsvfs_init(zfsvfs_t *zfsvfs, objset_t *os)
 990 {
 991         int error;
 992         uint64_t val;
 993
 994         zfsvfs->z_max_blksz = SPA_OLD_MAXBLOCKSIZE;
 995         zfsvfs->z_show_ctldir = ZFS_SNAPDIR_VISIBLE;
 996         zfsvfs->z_os = os;
 997
 998         error = zfs_get_zplprop(os, ZFS_PROP_VERSION, &zfsvfs->z_version);
 999         if (error != 0)
1000                 return (error);
1001         if (zfsvfs->z_version >
1002             zfs_zpl_version_map(spa_version(dmu_objset_spa(os)))) {
1003                 (void) printk("Can't mount a version %lld file system "
1004                     "on a version %lld pool\n. Pool must be upgraded to mount "
1005                     "this file system.", (u_longlong_t)zfsvfs->z_version,
1006                     (u_longlong_t)spa_version(dmu_objset_spa(os)));
1007                 return (SET_ERROR(ENOTSUP));
1008         }
1009         error = zfs_get_zplprop(os, ZFS_PROP_NORMALIZE, &val);
1010         if (error != 0)
1011                 return (error);
1012         zfsvfs->z_norm = (int)val;
1013
1014         error = zfs_get_zplprop(os, ZFS_PROP_UTF8ONLY, &val);
1015         if (error != 0)
1016                 return (error);
1017         zfsvfs->z_utf8 = (val != 0);
1018
1019         error = zfs_get_zplprop(os, ZFS_PROP_CASE, &val);
1020         if (error != 0)
1021                 return (error);
1022         zfsvfs->z_case = (uint_t)val;
1023
1024         if ((error = zfs_get_zplprop(os, ZFS_PROP_ACLTYPE, &val)) != 0)
1025                 return (error);
1026         zfsvfs->z_acl_type = (uint_t)val;
1027
1028         /*
1029          * Fold case on file systems that are always or sometimes case
1030          * insensitive.
1031          */
1032         if (zfsvfs->z_case == ZFS_CASE_INSENSITIVE ||
1033             zfsvfs->z_case == ZFS_CASE_MIXED)
1034                 zfsvfs->z_norm |= U8_TEXTPREP_TOUPPER;
1035
1036         zfsvfs->z_use_fuids = USE_FUIDS(zfsvfs->z_version, zfsvfs->z_os);
1037         zfsvfs->z_use_sa = USE_SA(zfsvfs->z_version, zfsvfs->z_os);
1038
1039         uint64_t sa_obj = 0;
1040         if (zfsvfs->z_use_sa) {
1041                 /* should either have both of these objects or none */
1042                 error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_SA_ATTRS, 8, 1,
1043                     &sa_obj);
1044                 if (error != 0)
1045                         return (error);
1046
1047                 error = zfs_get_zplprop(os, ZFS_PROP_XATTR, &val);
1048                 if ((error == 0) && (val == ZFS_XATTR_SA))
1049                         zfsvfs->z_xattr_sa = B_TRUE;
1050         }
1051
1052         error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_ROOT_OBJ, 8, 1,
1053             &zfsvfs->z_root);
1054         if (error != 0)
1055                 return (error);
1056         ASSERT(zfsvfs->z_root != 0);
1057
1058         error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_UNLINKED_SET, 8, 1,
1059             &zfsvfs->z_unlinkedobj);
1060         if (error != 0)
1061                 return (error);
1062
1063         error = zap_lookup(os, MASTER_NODE_OBJ,
1064             zfs_userquota_prop_prefixes[ZFS_PROP_USERQUOTA],
1065             8, 1, &zfsvfs->z_userquota_obj);
1066         if (error == ENOENT)
1067                 zfsvfs->z_userquota_obj = 0;
1068         else if (error != 0)
1069                 return (error);
1070
1071         error = zap_lookup(os, MASTER_NODE_OBJ,
1072             zfs_userquota_prop_prefixes[ZFS_PROP_GROUPQUOTA],
1073             8, 1, &zfsvfs->z_groupquota_obj);
1074         if (error == ENOENT)
1075                 zfsvfs->z_groupquota_obj = 0;
1076         else if (error != 0)
1077                 return (error);
1078
1079         error = zap_lookup(os, MASTER_NODE_OBJ,
1080             zfs_userquota_prop_prefixes[ZFS_PROP_PROJECTQUOTA],
1081             8, 1, &zfsvfs->z_projectquota_obj);
1082         if (error == ENOENT)
1083                 zfsvfs->z_projectquota_obj = 0;
1084         else if (error != 0)
1085                 return (error);
1086
1087         error = zap_lookup(os, MASTER_NODE_OBJ,
1088             zfs_userquota_prop_prefixes[ZFS_PROP_USEROBJQUOTA],
1089             8, 1, &zfsvfs->z_userobjquota_obj);
1090         if (error == ENOENT)
1091                 zfsvfs->z_userobjquota_obj = 0;
1092         else if (error != 0)
1093                 return (error);
1094
1095         error = zap_lookup(os, MASTER_NODE_OBJ,
1096             zfs_userquota_prop_prefixes[ZFS_PROP_GROUPOBJQUOTA],
1097             8, 1, &zfsvfs->z_groupobjquota_obj);
1098         if (error == ENOENT)
1099                 zfsvfs->z_groupobjquota_obj = 0;
1100         else if (error != 0)
1101                 return (error);
1102
1103         error = zap_lookup(os, MASTER_NODE_OBJ,
1104             zfs_userquota_prop_prefixes[ZFS_PROP_PROJECTOBJQUOTA],
1105             8, 1, &zfsvfs->z_projectobjquota_obj);
1106         if (error == ENOENT)
1107                 zfsvfs->z_projectobjquota_obj = 0;
1108         else if (error != 0)
1109                 return (error);
1110
1111         error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_FUID_TABLES, 8, 1,
1112             &zfsvfs->z_fuid_obj);
1113         if (error == ENOENT)
1114                 zfsvfs->z_fuid_obj = 0;
1115         else if (error != 0)
1116                 return (error);
1117
1118         error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_SHARES_DIR, 8, 1,
1119             &zfsvfs->z_shares_dir);
1120         if (error == ENOENT)
1121                 zfsvfs->z_shares_dir = 0;
1122         else if (error != 0)
1123                 return (error);
1124
1125         error = sa_setup(os, sa_obj, zfs_attr_table, ZPL_END,
1126             &zfsvfs->z_attr_table);
1127         if (error != 0)
1128                 return (error);
1129
1130         if (zfsvfs->z_version >= ZPL_VERSION_SA)
1131                 sa_register_update_callback(os, zfs_sa_upgrade);
1132
1133         return (0);
1134 }
1135
1136 int
1137 zfsvfs_create(const char *osname, boolean_t readonly, zfsvfs_t **zfvp)
1138 {
1139         objset_t *os;
1140         zfsvfs_t *zfsvfs;
1141         int error;
1142         boolean_t ro = (readonly || (strchr(osname, '@') != NULL));
1143
1144         zfsvfs = kmem_zalloc(sizeof (zfsvfs_t), KM_SLEEP);
1145
1146         error = dmu_objset_own(osname, DMU_OST_ZFS, ro, B_TRUE, zfsvfs, &os);
1147         if (error != 0) {
1148                 kmem_free(zfsvfs, sizeof (zfsvfs_t));
1149                 return (error);
1150         }
1151
1152         error = zfsvfs_create_impl(zfvp, zfsvfs, os);
1153         if (error != 0) {
1154                 dmu_objset_disown(os, B_TRUE, zfsvfs);
1155         }
1156         return (error);
1157 }
1158
1159
1160 /*
1161  * Note: zfsvfs is assumed to be malloc'd, and will be freed by this function
1162  * on a failure.  Do not pass in a statically allocated zfsvfs.
1163  */
1164 int
1165 zfsvfs_create_impl(zfsvfs_t **zfvp, zfsvfs_t *zfsvfs, objset_t *os)
1166 {
1167         int error;
1168
1169         zfsvfs->z_vfs = NULL;
1170         zfsvfs->z_sb = NULL;
1171         zfsvfs->z_parent = zfsvfs;
1172
1173         mutex_init(&zfsvfs->z_znodes_lock, NULL, MUTEX_DEFAULT, NULL);
1174         mutex_init(&zfsvfs->z_lock, NULL, MUTEX_DEFAULT, NULL);
1175         list_create(&zfsvfs->z_all_znodes, sizeof (znode_t),
1176             offsetof(znode_t, z_link_node));
1177         rrm_init(&zfsvfs->z_teardown_lock, B_FALSE);
1178         rw_init(&zfsvfs->z_teardown_inactive_lock, NULL, RW_DEFAULT, NULL);
1179         rw_init(&zfsvfs->z_fuid_lock, NULL, RW_DEFAULT, NULL);
1180
1181         int size = MIN(1 << (highbit64(zfs_object_mutex_size) - 1),
1182             ZFS_OBJ_MTX_MAX);
1183         zfsvfs->z_hold_size = size;
1184         zfsvfs->z_hold_trees = vmem_zalloc(sizeof (avl_tree_t) * size,
1185             KM_SLEEP);
1186         zfsvfs->z_hold_locks = vmem_zalloc(sizeof (kmutex_t) * size, KM_SLEEP);
1187         for (int i = 0; i != size; i++) {
1188                 avl_create(&zfsvfs->z_hold_trees[i], zfs_znode_hold_compare,
1189                     sizeof (znode_hold_t), offsetof(znode_hold_t, zh_node));
1190                 mutex_init(&zfsvfs->z_hold_locks[i], NULL, MUTEX_DEFAULT, NULL);
1191         }
1192
1193         error = zfsvfs_init(zfsvfs, os);
1194         if (error != 0) {
1195                 *zfvp = NULL;
1196                 zfsvfs_free(zfsvfs);
1197                 return (error);
1198         }
1199
1200         zfsvfs->z_drain_task = TASKQID_INVALID;
1201         zfsvfs->z_draining = B_FALSE;
1202         zfsvfs->z_drain_cancel = B_TRUE;
1203
1204         *zfvp = zfsvfs;
1205         return (0);
1206 }
1207
1208 static int
1209 zfsvfs_setup(zfsvfs_t *zfsvfs, boolean_t mounting)
1210 {
1211         int error;
1212         boolean_t readonly = zfs_is_readonly(zfsvfs);
1213
1214         error = zfs_register_callbacks(zfsvfs->z_vfs);
1215         if (error)
1216                 return (error);
1217
1218         zfsvfs->z_log = zil_open(zfsvfs->z_os, zfs_get_data);
1219
1220         /*
1221          * If we are not mounting (ie: online recv), then we don't
1222          * have to worry about replaying the log as we blocked all
1223          * operations out since we closed the ZIL.
1224          */
1225         if (mounting) {
1226                 ASSERT3P(zfsvfs->z_kstat.dk_kstats, ==, NULL);
1227                 dataset_kstats_create(&zfsvfs->z_kstat, zfsvfs->z_os);
1228
1229                 /*
1230                  * During replay we remove the read only flag to
1231                  * allow replays to succeed.
1232                  */
1233                 if (readonly != 0) {
1234                         readonly_changed_cb(zfsvfs, B_FALSE);
1235                 } else {
1236                         zap_stats_t zs;
1237                         if (zap_get_stats(zfsvfs->z_os, zfsvfs->z_unlinkedobj,
1238                             &zs) == 0) {
1239                                 dataset_kstats_update_nunlinks_kstat(
1240                                     &zfsvfs->z_kstat, zs.zs_num_entries);
1241                         }
1242                         dprintf_ds(zfsvfs->z_os->os_dsl_dataset,
1243                             "num_entries in unlinked set: %llu",
1244                             zs.zs_num_entries);
1245                         zfs_unlinked_drain(zfsvfs);
1246                 }
1247
1248                 /*
1249                  * Parse and replay the intent log.
1250                  *
1251                  * Because of ziltest, this must be done after
1252                  * zfs_unlinked_drain().  (Further note: ziltest
1253                  * doesn't use readonly mounts, where
1254                  * zfs_unlinked_drain() isn't called.)  This is because
1255                  * ziltest causes spa_sync() to think it's committed,
1256                  * but actually it is not, so the intent log contains
1257                  * many txg's worth of changes.
1258                  *
1259                  * In particular, if object N is in the unlinked set in
1260                  * the last txg to actually sync, then it could be
1261                  * actually freed in a later txg and then reallocated
1262                  * in a yet later txg.  This would write a "create
1263                  * object N" record to the intent log.  Normally, this
1264                  * would be fine because the spa_sync() would have
1265                  * written out the fact that object N is free, before
1266                  * we could write the "create object N" intent log
1267                  * record.
1268                  *
1269                  * But when we are in ziltest mode, we advance the "open
1270                  * txg" without actually spa_sync()-ing the changes to
1271                  * disk.  So we would see that object N is still
1272                  * allocated and in the unlinked set, and there is an
1273                  * intent log record saying to allocate it.
1274                  */
1275                 if (spa_writeable(dmu_objset_spa(zfsvfs->z_os))) {
1276                         if (zil_replay_disable) {
1277                                 zil_destroy(zfsvfs->z_log, B_FALSE);
1278                         } else {
1279                                 zfsvfs->z_replay = B_TRUE;
1280                                 zil_replay(zfsvfs->z_os, zfsvfs,
1281                                     zfs_replay_vector);
1282                                 zfsvfs->z_replay = B_FALSE;
1283                         }
1284                 }
1285
1286                 /* restore readonly bit */
1287                 if (readonly != 0)
1288                         readonly_changed_cb(zfsvfs, B_TRUE);
1289         }
1290
1291         /*
1292          * Set the objset user_ptr to track its zfsvfs.
1293          */
1294         mutex_enter(&zfsvfs->z_os->os_user_ptr_lock);
1295         dmu_objset_set_user(zfsvfs->z_os, zfsvfs);
1296         mutex_exit(&zfsvfs->z_os->os_user_ptr_lock);
1297
1298         return (0);
1299 }
1300
1301 void
1302 zfsvfs_free(zfsvfs_t *zfsvfs)
1303 {
1304         int i, size = zfsvfs->z_hold_size;
1305
1306         zfs_fuid_destroy(zfsvfs);
1307
1308         mutex_destroy(&zfsvfs->z_znodes_lock);
1309         mutex_destroy(&zfsvfs->z_lock);
1310         list_destroy(&zfsvfs->z_all_znodes);
1311         rrm_destroy(&zfsvfs->z_teardown_lock);
1312         rw_destroy(&zfsvfs->z_teardown_inactive_lock);
1313         rw_destroy(&zfsvfs->z_fuid_lock);
1314         for (i = 0; i != size; i++) {
1315                 avl_destroy(&zfsvfs->z_hold_trees[i]);
1316                 mutex_destroy(&zfsvfs->z_hold_locks[i]);
1317         }
1318         vmem_free(zfsvfs->z_hold_trees, sizeof (avl_tree_t) * size);
1319         vmem_free(zfsvfs->z_hold_locks, sizeof (kmutex_t) * size);
1320         zfsvfs_vfs_free(zfsvfs->z_vfs);
1321         dataset_kstats_destroy(&zfsvfs->z_kstat);
1322         kmem_free(zfsvfs, sizeof (zfsvfs_t));
1323 }
1324
1325 static void
1326 zfs_set_fuid_feature(zfsvfs_t *zfsvfs)
1327 {
1328         zfsvfs->z_use_fuids = USE_FUIDS(zfsvfs->z_version, zfsvfs->z_os);
1329         zfsvfs->z_use_sa = USE_SA(zfsvfs->z_version, zfsvfs->z_os);
1330 }
1331
1332 void
1333 zfs_unregister_callbacks(zfsvfs_t *zfsvfs)
1334 {
1335         objset_t *os = zfsvfs->z_os;
1336
1337         if (!dmu_objset_is_snapshot(os))
1338                 dsl_prop_unregister_all(dmu_objset_ds(os), zfsvfs);
1339 }
1340
1341 #ifdef HAVE_MLSLABEL
1342 /*
1343  * Check that the hex label string is appropriate for the dataset being
1344  * mounted into the global_zone proper.
1345  *
1346  * Return an error if the hex label string is not default or
1347  * admin_low/admin_high.  For admin_low labels, the corresponding
1348  * dataset must be readonly.
1349  */
1350 int
1351 zfs_check_global_label(const char *dsname, const char *hexsl)
1352 {
1353         if (strcasecmp(hexsl, ZFS_MLSLABEL_DEFAULT) == 0)
1354                 return (0);
1355         if (strcasecmp(hexsl, ADMIN_HIGH) == 0)
1356                 return (0);
1357         if (strcasecmp(hexsl, ADMIN_LOW) == 0) {
1358                 /* must be readonly */
1359                 uint64_t rdonly;
1360
1361                 if (dsl_prop_get_integer(dsname,
1362                     zfs_prop_to_name(ZFS_PROP_READONLY), &rdonly, NULL))
1363                         return (SET_ERROR(EACCES));
1364                 return (rdonly ? 0 : EACCES);
1365         }
1366         return (SET_ERROR(EACCES));
1367 }
1368 #endif /* HAVE_MLSLABEL */
1369
1370 static int
1371 zfs_statfs_project(zfsvfs_t *zfsvfs, znode_t *zp, struct kstatfs *statp,
1372     uint32_t bshift)
1373 {
1374         char buf[20 + DMU_OBJACCT_PREFIX_LEN];
1375         uint64_t offset = DMU_OBJACCT_PREFIX_LEN;
1376         uint64_t quota;
1377         uint64_t used;
1378         int err;
1379
1380         strlcpy(buf, DMU_OBJACCT_PREFIX, DMU_OBJACCT_PREFIX_LEN + 1);
1381         err = id_to_fuidstr(zfsvfs, NULL, zp->z_projid, buf + offset, B_FALSE);
1382         if (err)
1383                 return (err);
1384
1385         if (zfsvfs->z_projectquota_obj == 0)
1386                 goto objs;
1387
1388         err = zap_lookup(zfsvfs->z_os, zfsvfs->z_projectquota_obj,
1389             buf + offset, 8, 1, &quota);
1390         if (err == ENOENT)
1391                 goto objs;
1392         else if (err)
1393                 return (err);
1394
1395         err = zap_lookup(zfsvfs->z_os, DMU_PROJECTUSED_OBJECT,
1396             buf + offset, 8, 1, &used);
1397         if (unlikely(err == ENOENT)) {
1398                 uint32_t blksize;
1399                 u_longlong_t nblocks;
1400
1401                 /*
1402                  * Quota accounting is async, so it is possible race case.
1403                  * There is at least one object with the given project ID.
1404                  */
1405                 sa_object_size(zp->z_sa_hdl, &blksize, &nblocks);
1406                 if (unlikely(zp->z_blksz == 0))
1407                         blksize = zfsvfs->z_max_blksz;
1408
1409                 used = blksize * nblocks;
1410         } else if (err) {
1411                 return (err);
1412         }
1413
1414         statp->f_blocks = quota >> bshift;
1415         statp->f_bfree = (quota > used) ? ((quota - used) >> bshift) : 0;
1416         statp->f_bavail = statp->f_bfree;
1417
1418 objs:
1419         if (zfsvfs->z_projectobjquota_obj == 0)
1420                 return (0);
1421
1422         err = zap_lookup(zfsvfs->z_os, zfsvfs->z_projectobjquota_obj,
1423             buf + offset, 8, 1, &quota);
1424         if (err == ENOENT)
1425                 return (0);
1426         else if (err)
1427                 return (err);
1428
1429         err = zap_lookup(zfsvfs->z_os, DMU_PROJECTUSED_OBJECT,
1430             buf, 8, 1, &used);
1431         if (unlikely(err == ENOENT)) {
1432                 /*
1433                  * Quota accounting is async, so it is possible race case.
1434                  * There is at least one object with the given project ID.
1435                  */
1436                 used = 1;
1437         } else if (err) {
1438                 return (err);
1439         }
1440
1441         statp->f_files = quota;
1442         statp->f_ffree = (quota > used) ? (quota - used) : 0;
1443
1444         return (0);
1445 }
1446
1447 int
1448 zfs_statvfs(struct dentry *dentry, struct kstatfs *statp)
1449 {
1450         zfsvfs_t *zfsvfs = dentry->d_sb->s_fs_info;
1451         uint64_t refdbytes, availbytes, usedobjs, availobjs;
1452         int err = 0;
1453
1454         ZFS_ENTER(zfsvfs);
1455
1456         dmu_objset_space(zfsvfs->z_os,
1457             &refdbytes, &availbytes, &usedobjs, &availobjs);
1458
1459         uint64_t fsid = dmu_objset_fsid_guid(zfsvfs->z_os);
1460         /*
1461          * The underlying storage pool actually uses multiple block
1462          * size.  Under Solaris frsize (fragment size) is reported as
1463          * the smallest block size we support, and bsize (block size)
1464          * as the filesystem's maximum block size.  Unfortunately,
1465          * under Linux the fragment size and block size are often used
1466          * interchangeably.  Thus we are forced to report both of them
1467          * as the filesystem's maximum block size.
1468          */
1469         statp->f_frsize = zfsvfs->z_max_blksz;
1470         statp->f_bsize = zfsvfs->z_max_blksz;
1471         uint32_t bshift = fls(statp->f_bsize) - 1;
1472
1473         /*
1474          * The following report "total" blocks of various kinds in
1475          * the file system, but reported in terms of f_bsize - the
1476          * "preferred" size.
1477          */
1478
1479         /* Round up so we never have a filesytem using 0 blocks. */
1480         refdbytes = P2ROUNDUP(refdbytes, statp->f_bsize);
1481         statp->f_blocks = (refdbytes + availbytes) >> bshift;
1482         statp->f_bfree = availbytes >> bshift;
1483         statp->f_bavail = statp->f_bfree; /* no root reservation */
1484
1485         /*
1486          * statvfs() should really be called statufs(), because it assumes
1487          * static metadata.  ZFS doesn't preallocate files, so the best
1488          * we can do is report the max that could possibly fit in f_files,
1489          * and that minus the number actually used in f_ffree.
1490          * For f_ffree, report the smaller of the number of objects available
1491          * and the number of blocks (each object will take at least a block).
1492          */
1493         statp->f_ffree = MIN(availobjs, availbytes >> DNODE_SHIFT);
1494         statp->f_files = statp->f_ffree + usedobjs;
1495         statp->f_fsid.val[0] = (uint32_t)fsid;
1496         statp->f_fsid.val[1] = (uint32_t)(fsid >> 32);
1497         statp->f_type = ZFS_SUPER_MAGIC;
1498         statp->f_namelen = MAXNAMELEN - 1;
1499
1500         /*
1501          * We have all of 40 characters to stuff a string here.
1502          * Is there anything useful we could/should provide?
1503          */
1504         bzero(statp->f_spare, sizeof (statp->f_spare));
1505
1506         if (dmu_objset_projectquota_enabled(zfsvfs->z_os) &&
1507             dmu_objset_projectquota_present(zfsvfs->z_os)) {
1508                 znode_t *zp = ITOZ(dentry->d_inode);
1509
1510                 if (zp->z_pflags & ZFS_PROJINHERIT && zp->z_projid &&
1511                     zpl_is_valid_projid(zp->z_projid))
1512                         err = zfs_statfs_project(zfsvfs, zp, statp, bshift);
1513         }
1514
1515         ZFS_EXIT(zfsvfs);
1516         return (err);
1517 }
1518
1519 int
1520 zfs_root(zfsvfs_t *zfsvfs, struct inode **ipp)
1521 {
1522         znode_t *rootzp;
1523         int error;
1524
1525         ZFS_ENTER(zfsvfs);
1526
1527         error = zfs_zget(zfsvfs, zfsvfs->z_root, &rootzp);
1528         if (error == 0)
1529                 *ipp = ZTOI(rootzp);
1530
1531         ZFS_EXIT(zfsvfs);
1532         return (error);
1533 }
1534
1535 #ifdef HAVE_D_PRUNE_ALIASES
1536 /*
1537  * Linux kernels older than 3.1 do not support a per-filesystem shrinker.
1538  * To accommodate this we must improvise and manually walk the list of znodes
1539  * attempting to prune dentries in order to be able to drop the inodes.
1540  *
1541  * To avoid scanning the same znodes multiple times they are always rotated
1542  * to the end of the z_all_znodes list.  New znodes are inserted at the
1543  * end of the list so we're always scanning the oldest znodes first.
1544  */
1545 static int
1546 zfs_prune_aliases(zfsvfs_t *zfsvfs, unsigned long nr_to_scan)
1547 {
1548         znode_t **zp_array, *zp;
1549         int max_array = MIN(nr_to_scan, PAGE_SIZE * 8 / sizeof (znode_t *));
1550         int objects = 0;
1551         int i = 0, j = 0;
1552
1553         zp_array = kmem_zalloc(max_array * sizeof (znode_t *), KM_SLEEP);
1554
1555         mutex_enter(&zfsvfs->z_znodes_lock);
1556         while ((zp = list_head(&zfsvfs->z_all_znodes)) != NULL) {
1557
1558                 if ((i++ > nr_to_scan) || (j >= max_array))
1559                         break;
1560
1561                 ASSERT(list_link_active(&zp->z_link_node));
1562                 list_remove(&zfsvfs->z_all_znodes, zp);
1563                 list_insert_tail(&zfsvfs->z_all_znodes, zp);
1564
1565                 /* Skip active znodes and .zfs entries */
1566                 if (MUTEX_HELD(&zp->z_lock) || zp->z_is_ctldir)
1567                         continue;
1568
1569                 if (igrab(ZTOI(zp)) == NULL)
1570                         continue;
1571
1572                 zp_array[j] = zp;
1573                 j++;
1574         }
1575         mutex_exit(&zfsvfs->z_znodes_lock);
1576
1577         for (i = 0; i < j; i++) {
1578                 zp = zp_array[i];
1579
1580                 ASSERT3P(zp, !=, NULL);
1581                 d_prune_aliases(ZTOI(zp));
1582
1583                 if (atomic_read(&ZTOI(zp)->i_count) == 1)
1584                         objects++;
1585
1586                 iput(ZTOI(zp));
1587         }
1588
1589         kmem_free(zp_array, max_array * sizeof (znode_t *));
1590
1591         return (objects);
1592 }
1593 #endif /* HAVE_D_PRUNE_ALIASES */
1594
1595 /*
1596  * The ARC has requested that the filesystem drop entries from the dentry
1597  * and inode caches.  This can occur when the ARC needs to free meta data
1598  * blocks but can't because they are all pinned by entries in these caches.
1599  */
1600 int
1601 zfs_prune(struct super_block *sb, unsigned long nr_to_scan, int *objects)
1602 {
1603         zfsvfs_t *zfsvfs = sb->s_fs_info;
1604         int error = 0;
1605 #if defined(HAVE_SHRINK) || defined(HAVE_SPLIT_SHRINKER_CALLBACK)
1606         struct shrinker *shrinker = &sb->s_shrink;
1607         struct shrink_control sc = {
1608                 .nr_to_scan = nr_to_scan,
1609                 .gfp_mask = GFP_KERNEL,
1610         };
1611 #endif
1612
1613         ZFS_ENTER(zfsvfs);
1614
1615 #if defined(HAVE_SPLIT_SHRINKER_CALLBACK) && \
1616         defined(SHRINK_CONTROL_HAS_NID) && \
1617         defined(SHRINKER_NUMA_AWARE)
1618         if (sb->s_shrink.flags & SHRINKER_NUMA_AWARE) {
1619                 *objects = 0;
1620                 for_each_online_node(sc.nid) {
1621                         *objects += (*shrinker->scan_objects)(shrinker, &sc);
1622                 }
1623         } else {
1624                         *objects = (*shrinker->scan_objects)(shrinker, &sc);
1625         }
1626
1627 #elif defined(HAVE_SPLIT_SHRINKER_CALLBACK)
1628         *objects = (*shrinker->scan_objects)(shrinker, &sc);
1629 #elif defined(HAVE_SHRINK)
1630         *objects = (*shrinker->shrink)(shrinker, &sc);
1631 #elif defined(HAVE_D_PRUNE_ALIASES)
1632 #define D_PRUNE_ALIASES_IS_DEFAULT
1633         *objects = zfs_prune_aliases(zfsvfs, nr_to_scan);
1634 #else
1635 #error "No available dentry and inode cache pruning mechanism."
1636 #endif
1637
1638 #if defined(HAVE_D_PRUNE_ALIASES) && !defined(D_PRUNE_ALIASES_IS_DEFAULT)
1639 #undef  D_PRUNE_ALIASES_IS_DEFAULT
1640         /*
1641          * Fall back to zfs_prune_aliases if the kernel's per-superblock
1642          * shrinker couldn't free anything, possibly due to the inodes being
1643          * allocated in a different memcg.
1644          */
1645         if (*objects == 0)
1646                 *objects = zfs_prune_aliases(zfsvfs, nr_to_scan);
1647 #endif
1648
1649         ZFS_EXIT(zfsvfs);
1650
1651         dprintf_ds(zfsvfs->z_os->os_dsl_dataset,
1652             "pruning, nr_to_scan=%lu objects=%d error=%d\n",
1653             nr_to_scan, *objects, error);
1654
1655         return (error);
1656 }
1657
1658 /*
1659  * Teardown the zfsvfs_t.
1660  *
1661  * Note, if 'unmounting' is FALSE, we return with the 'z_teardown_lock'
1662  * and 'z_teardown_inactive_lock' held.
1663  */
1664 static int
1665 zfsvfs_teardown(zfsvfs_t *zfsvfs, boolean_t unmounting)
1666 {
1667         znode_t *zp;
1668
1669         zfs_unlinked_drain_stop_wait(zfsvfs);
1670
1671         /*
1672          * If someone has not already unmounted this file system,
1673          * drain the iput_taskq to ensure all active references to the
1674          * zfsvfs_t have been handled only then can it be safely destroyed.
1675          */
1676         if (zfsvfs->z_os) {
1677                 /*
1678                  * If we're unmounting we have to wait for the list to
1679                  * drain completely.
1680                  *
1681                  * If we're not unmounting there's no guarantee the list
1682                  * will drain completely, but iputs run from the taskq
1683                  * may add the parents of dir-based xattrs to the taskq
1684                  * so we want to wait for these.
1685                  *
1686                  * We can safely read z_nr_znodes without locking because the
1687                  * VFS has already blocked operations which add to the
1688                  * z_all_znodes list and thus increment z_nr_znodes.
1689                  */
1690                 int round = 0;
1691                 while (zfsvfs->z_nr_znodes > 0) {
1692                         taskq_wait_outstanding(dsl_pool_iput_taskq(
1693                             dmu_objset_pool(zfsvfs->z_os)), 0);
1694                         if (++round > 1 && !unmounting)
1695                                 break;
1696                 }
1697         }
1698
1699         rrm_enter(&zfsvfs->z_teardown_lock, RW_WRITER, FTAG);
1700
1701         if (!unmounting) {
1702                 /*
1703                  * We purge the parent filesystem's super block as the
1704                  * parent filesystem and all of its snapshots have their
1705                  * inode's super block set to the parent's filesystem's
1706                  * super block.  Note,  'z_parent' is self referential
1707                  * for non-snapshots.
1708                  */
1709                 shrink_dcache_sb(zfsvfs->z_parent->z_sb);
1710         }
1711
1712         /*
1713          * Close the zil. NB: Can't close the zil while zfs_inactive
1714          * threads are blocked as zil_close can call zfs_inactive.
1715          */
1716         if (zfsvfs->z_log) {
1717                 zil_close(zfsvfs->z_log);
1718                 zfsvfs->z_log = NULL;
1719         }
1720
1721         rw_enter(&zfsvfs->z_teardown_inactive_lock, RW_WRITER);
1722
1723         /*
1724          * If we are not unmounting (ie: online recv) and someone already
1725          * unmounted this file system while we were doing the switcheroo,
1726          * or a reopen of z_os failed then just bail out now.
1727          */
1728         if (!unmounting && (zfsvfs->z_unmounted || zfsvfs->z_os == NULL)) {
1729                 rw_exit(&zfsvfs->z_teardown_inactive_lock);
1730                 rrm_exit(&zfsvfs->z_teardown_lock, FTAG);
1731                 return (SET_ERROR(EIO));
1732         }
1733
1734         /*
1735          * At this point there are no VFS ops active, and any new VFS ops
1736          * will fail with EIO since we have z_teardown_lock for writer (only
1737          * relevant for forced unmount).
1738          *
1739          * Release all holds on dbufs.
1740          */
1741         if (!unmounting) {
1742                 mutex_enter(&zfsvfs->z_znodes_lock);
1743                 for (zp = list_head(&zfsvfs->z_all_znodes); zp != NULL;
1744                     zp = list_next(&zfsvfs->z_all_znodes, zp)) {
1745                         if (zp->z_sa_hdl)
1746                                 zfs_znode_dmu_fini(zp);
1747                 }
1748                 mutex_exit(&zfsvfs->z_znodes_lock);
1749         }
1750
1751         /*
1752          * If we are unmounting, set the unmounted flag and let new VFS ops
1753          * unblock.  zfs_inactive will have the unmounted behavior, and all
1754          * other VFS ops will fail with EIO.
1755          */
1756         if (unmounting) {
1757                 zfsvfs->z_unmounted = B_TRUE;
1758                 rw_exit(&zfsvfs->z_teardown_inactive_lock);
1759                 rrm_exit(&zfsvfs->z_teardown_lock, FTAG);
1760         }
1761
1762         /*
1763          * z_os will be NULL if there was an error in attempting to reopen
1764          * zfsvfs, so just return as the properties had already been
1765          *
1766          * unregistered and cached data had been evicted before.
1767          */
1768         if (zfsvfs->z_os == NULL)
1769                 return (0);
1770
1771         /*
1772          * Unregister properties.
1773          */
1774         zfs_unregister_callbacks(zfsvfs);
1775
1776         /*
1777          * Evict cached data. We must write out any dirty data before
1778          * disowning the dataset.
1779          */
1780         if (!zfs_is_readonly(zfsvfs))
1781                 txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), 0);
1782         dmu_objset_evict_dbufs(zfsvfs->z_os);
1783
1784         return (0);
1785 }
1786
1787 #if !defined(HAVE_2ARGS_BDI_SETUP_AND_REGISTER) && \
1788         !defined(HAVE_3ARGS_BDI_SETUP_AND_REGISTER)
1789 atomic_long_t zfs_bdi_seq = ATOMIC_LONG_INIT(0);
1790 #endif
1791
1792 int
1793 zfs_domount(struct super_block *sb, zfs_mnt_t *zm, int silent)
1794 {
1795         const char *osname = zm->mnt_osname;
1796         struct inode *root_inode;
1797         uint64_t recordsize;
1798         int error = 0;
1799         zfsvfs_t *zfsvfs = NULL;
1800         vfs_t *vfs = NULL;
1801
1802         ASSERT(zm);
1803         ASSERT(osname);
1804
1805         error = zfsvfs_parse_options(zm->mnt_data, &vfs);
1806         if (error)
1807                 return (error);
1808
1809         error = zfsvfs_create(osname, vfs->vfs_readonly, &zfsvfs);
1810         if (error) {
1811                 zfsvfs_vfs_free(vfs);
1812                 goto out;
1813         }
1814
1815         if ((error = dsl_prop_get_integer(osname, "recordsize",
1816             &recordsize, NULL))) {
1817                 zfsvfs_vfs_free(vfs);
1818                 goto out;
1819         }
1820
1821         vfs->vfs_data = zfsvfs;
1822         zfsvfs->z_vfs = vfs;
1823         zfsvfs->z_sb = sb;
1824         sb->s_fs_info = zfsvfs;
1825         sb->s_magic = ZFS_SUPER_MAGIC;
1826         sb->s_maxbytes = MAX_LFS_FILESIZE;
1827         sb->s_time_gran = 1;
1828         sb->s_blocksize = recordsize;
1829         sb->s_blocksize_bits = ilog2(recordsize);
1830
1831         error = -zpl_bdi_setup(sb, "zfs");
1832         if (error)
1833                 goto out;
1834
1835         sb->s_bdi->ra_pages = 0;
1836
1837         /* Set callback operations for the file system. */
1838         sb->s_op = &zpl_super_operations;
1839         sb->s_xattr = zpl_xattr_handlers;
1840         sb->s_export_op = &zpl_export_operations;
1841 #ifdef HAVE_S_D_OP
1842         sb->s_d_op = &zpl_dentry_operations;
1843 #endif /* HAVE_S_D_OP */
1844
1845         /* Set features for file system. */
1846         zfs_set_fuid_feature(zfsvfs);
1847
1848         if (dmu_objset_is_snapshot(zfsvfs->z_os)) {
1849                 uint64_t pval;
1850
1851                 atime_changed_cb(zfsvfs, B_FALSE);
1852                 readonly_changed_cb(zfsvfs, B_TRUE);
1853                 if ((error = dsl_prop_get_integer(osname,
1854                     "xattr", &pval, NULL)))
1855                         goto out;
1856                 xattr_changed_cb(zfsvfs, pval);
1857                 if ((error = dsl_prop_get_integer(osname,
1858                     "acltype", &pval, NULL)))
1859                         goto out;
1860                 acltype_changed_cb(zfsvfs, pval);
1861                 zfsvfs->z_issnap = B_TRUE;
1862                 zfsvfs->z_os->os_sync = ZFS_SYNC_DISABLED;
1863                 zfsvfs->z_snap_defer_time = jiffies;
1864
1865                 mutex_enter(&zfsvfs->z_os->os_user_ptr_lock);
1866                 dmu_objset_set_user(zfsvfs->z_os, zfsvfs);
1867                 mutex_exit(&zfsvfs->z_os->os_user_ptr_lock);
1868         } else {
1869                 if ((error = zfsvfs_setup(zfsvfs, B_TRUE)))
1870                         goto out;
1871         }
1872
1873         /* Allocate a root inode for the filesystem. */
1874         error = zfs_root(zfsvfs, &root_inode);
1875         if (error) {
1876                 (void) zfs_umount(sb);
1877                 goto out;
1878         }
1879
1880         /* Allocate a root dentry for the filesystem */
1881         sb->s_root = d_make_root(root_inode);
1882         if (sb->s_root == NULL) {
1883                 (void) zfs_umount(sb);
1884                 error = SET_ERROR(ENOMEM);
1885                 goto out;
1886         }
1887
1888         if (!zfsvfs->z_issnap)
1889                 zfsctl_create(zfsvfs);
1890
1891         zfsvfs->z_arc_prune = arc_add_prune_callback(zpl_prune_sb, sb);
1892 out:
1893         if (error) {
1894                 if (zfsvfs != NULL) {
1895                         dmu_objset_disown(zfsvfs->z_os, B_TRUE, zfsvfs);
1896                         zfsvfs_free(zfsvfs);
1897                 }
1898                 /*
1899                  * make sure we don't have dangling sb->s_fs_info which
1900                  * zfs_preumount will use.
1901                  */
1902                 sb->s_fs_info = NULL;
1903         }
1904
1905         return (error);
1906 }
1907
1908 /*
1909  * Called when an unmount is requested and certain sanity checks have
1910  * already passed.  At this point no dentries or inodes have been reclaimed
1911  * from their respective caches.  We drop the extra reference on the .zfs
1912  * control directory to allow everything to be reclaimed.  All snapshots
1913  * must already have been unmounted to reach this point.
1914  */
1915 void
1916 zfs_preumount(struct super_block *sb)
1917 {
1918         zfsvfs_t *zfsvfs = sb->s_fs_info;
1919
1920         /* zfsvfs is NULL when zfs_domount fails during mount */
1921         if (zfsvfs) {
1922                 zfs_unlinked_drain_stop_wait(zfsvfs);
1923                 zfsctl_destroy(sb->s_fs_info);
1924                 /*
1925                  * Wait for iput_async before entering evict_inodes in
1926                  * generic_shutdown_super. The reason we must finish before
1927                  * evict_inodes is when lazytime is on, or when zfs_purgedir
1928                  * calls zfs_zget, iput would bump i_count from 0 to 1. This
1929                  * would race with the i_count check in evict_inodes. This means
1930                  * it could destroy the inode while we are still using it.
1931                  *
1932                  * We wait for two passes. xattr directories in the first pass
1933                  * may add xattr entries in zfs_purgedir, so in the second pass
1934                  * we wait for them. We don't use taskq_wait here because it is
1935                  * a pool wide taskq. Other mounted filesystems can constantly
1936                  * do iput_async and there's no guarantee when taskq will be
1937                  * empty.
1938                  */
1939                 taskq_wait_outstanding(dsl_pool_iput_taskq(
1940                     dmu_objset_pool(zfsvfs->z_os)), 0);
1941                 taskq_wait_outstanding(dsl_pool_iput_taskq(
1942                     dmu_objset_pool(zfsvfs->z_os)), 0);
1943         }
1944 }
1945
1946 /*
1947  * Called once all other unmount released tear down has occurred.
1948  * It is our responsibility to release any remaining infrastructure.
1949  */
1950 /*ARGSUSED*/
1951 int
1952 zfs_umount(struct super_block *sb)
1953 {
1954         zfsvfs_t *zfsvfs = sb->s_fs_info;
1955         objset_t *os;
1956
1957         if (zfsvfs->z_arc_prune != NULL)
1958                 arc_remove_prune_callback(zfsvfs->z_arc_prune);
1959         VERIFY(zfsvfs_teardown(zfsvfs, B_TRUE) == 0);
1960         os = zfsvfs->z_os;
1961         zpl_bdi_destroy(sb);
1962
1963         /*
1964          * z_os will be NULL if there was an error in
1965          * attempting to reopen zfsvfs.
1966          */
1967         if (os != NULL) {
1968                 /*
1969                  * Unset the objset user_ptr.
1970                  */
1971                 mutex_enter(&os->os_user_ptr_lock);
1972                 dmu_objset_set_user(os, NULL);
1973                 mutex_exit(&os->os_user_ptr_lock);
1974
1975                 /*
1976                  * Finally release the objset
1977                  */
1978                 dmu_objset_disown(os, B_TRUE, zfsvfs);
1979         }
1980
1981         zfsvfs_free(zfsvfs);
1982         return (0);
1983 }
1984
1985 int
1986 zfs_remount(struct super_block *sb, int *flags, zfs_mnt_t *zm)
1987 {
1988         zfsvfs_t *zfsvfs = sb->s_fs_info;
1989         vfs_t *vfsp;
1990         boolean_t issnap = dmu_objset_is_snapshot(zfsvfs->z_os);
1991         int error;
1992
1993         if ((issnap || !spa_writeable(dmu_objset_spa(zfsvfs->z_os))) &&
1994             !(*flags & SB_RDONLY)) {
1995                 *flags |= SB_RDONLY;
1996                 return (EROFS);
1997         }
1998
1999         error = zfsvfs_parse_options(zm->mnt_data, &vfsp);
2000         if (error)
2001                 return (error);
2002
2003         if (!zfs_is_readonly(zfsvfs) && (*flags & SB_RDONLY))
2004                 txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), 0);
2005
2006         zfs_unregister_callbacks(zfsvfs);
2007         zfsvfs_vfs_free(zfsvfs->z_vfs);
2008
2009         vfsp->vfs_data = zfsvfs;
2010         zfsvfs->z_vfs = vfsp;
2011         if (!issnap)
2012                 (void) zfs_register_callbacks(vfsp);
2013
2014         return (error);
2015 }
2016
2017 int
2018 zfs_vget(struct super_block *sb, struct inode **ipp, fid_t *fidp)
2019 {
2020         zfsvfs_t        *zfsvfs = sb->s_fs_info;
2021         znode_t         *zp;
2022         uint64_t        object = 0;
2023         uint64_t        fid_gen = 0;
2024         uint64_t        gen_mask;
2025         uint64_t        zp_gen;
2026         int             i, err;
2027
2028         *ipp = NULL;
2029
2030         if (fidp->fid_len == SHORT_FID_LEN || fidp->fid_len == LONG_FID_LEN) {
2031                 zfid_short_t    *zfid = (zfid_short_t *)fidp;
2032
2033                 for (i = 0; i < sizeof (zfid->zf_object); i++)
2034                         object |= ((uint64_t)zfid->zf_object[i]) << (8 * i);
2035
2036                 for (i = 0; i < sizeof (zfid->zf_gen); i++)
2037                         fid_gen |= ((uint64_t)zfid->zf_gen[i]) << (8 * i);
2038         } else {
2039                 return (SET_ERROR(EINVAL));
2040         }
2041
2042         /* LONG_FID_LEN means snapdirs */
2043         if (fidp->fid_len == LONG_FID_LEN) {
2044                 zfid_long_t     *zlfid = (zfid_long_t *)fidp;
2045                 uint64_t        objsetid = 0;
2046                 uint64_t        setgen = 0;
2047
2048                 for (i = 0; i < sizeof (zlfid->zf_setid); i++)
2049                         objsetid |= ((uint64_t)zlfid->zf_setid[i]) << (8 * i);
2050
2051                 for (i = 0; i < sizeof (zlfid->zf_setgen); i++)
2052                         setgen |= ((uint64_t)zlfid->zf_setgen[i]) << (8 * i);
2053
2054                 if (objsetid != ZFSCTL_INO_SNAPDIRS - object) {
2055                         dprintf("snapdir fid: objsetid (%llu) != "
2056                             "ZFSCTL_INO_SNAPDIRS (%llu) - object (%llu)\n",
2057                             objsetid, ZFSCTL_INO_SNAPDIRS, object);
2058
2059                         return (SET_ERROR(EINVAL));
2060                 }
2061
2062                 if (fid_gen > 1 || setgen != 0) {
2063                         dprintf("snapdir fid: fid_gen (%llu) and setgen "
2064                             "(%llu)\n", fid_gen, setgen);
2065                         return (SET_ERROR(EINVAL));
2066                 }
2067
2068                 return (zfsctl_snapdir_vget(sb, objsetid, fid_gen, ipp));
2069         }
2070
2071         ZFS_ENTER(zfsvfs);
2072         /* A zero fid_gen means we are in the .zfs control directories */
2073         if (fid_gen == 0 &&
2074             (object == ZFSCTL_INO_ROOT || object == ZFSCTL_INO_SNAPDIR)) {
2075                 *ipp = zfsvfs->z_ctldir;
2076                 ASSERT(*ipp != NULL);
2077                 if (object == ZFSCTL_INO_SNAPDIR) {
2078                         VERIFY(zfsctl_root_lookup(*ipp, "snapshot", ipp,
2079                             0, kcred, NULL, NULL) == 0);
2080                 } else {
2081                         igrab(*ipp);
2082                 }
2083                 ZFS_EXIT(zfsvfs);
2084                 return (0);
2085         }
2086
2087         gen_mask = -1ULL >> (64 - 8 * i);
2088
2089         dprintf("getting %llu [%llu mask %llx]\n", object, fid_gen, gen_mask);
2090         if ((err = zfs_zget(zfsvfs, object, &zp))) {
2091                 ZFS_EXIT(zfsvfs);
2092                 return (err);
2093         }
2094
2095         /* Don't export xattr stuff */
2096         if (zp->z_pflags & ZFS_XATTR) {
2097                 iput(ZTOI(zp));
2098                 ZFS_EXIT(zfsvfs);
2099                 return (SET_ERROR(ENOENT));
2100         }
2101
2102         (void) sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(zfsvfs), &zp_gen,
2103             sizeof (uint64_t));
2104         zp_gen = zp_gen & gen_mask;
2105         if (zp_gen == 0)
2106                 zp_gen = 1;
2107         if ((fid_gen == 0) && (zfsvfs->z_root == object))
2108                 fid_gen = zp_gen;
2109         if (zp->z_unlinked || zp_gen != fid_gen) {
2110                 dprintf("znode gen (%llu) != fid gen (%llu)\n", zp_gen,
2111                     fid_gen);
2112                 iput(ZTOI(zp));
2113                 ZFS_EXIT(zfsvfs);
2114                 return (SET_ERROR(ENOENT));
2115         }
2116
2117         *ipp = ZTOI(zp);
2118         if (*ipp)
2119                 zfs_inode_update(ITOZ(*ipp));
2120
2121         ZFS_EXIT(zfsvfs);
2122         return (0);
2123 }
2124
2125 /*
2126  * Block out VFS ops and close zfsvfs_t
2127  *
2128  * Note, if successful, then we return with the 'z_teardown_lock' and
2129  * 'z_teardown_inactive_lock' write held.  We leave ownership of the underlying
2130  * dataset and objset intact so that they can be atomically handed off during
2131  * a subsequent rollback or recv operation and the resume thereafter.
2132  */
2133 int
2134 zfs_suspend_fs(zfsvfs_t *zfsvfs)
2135 {
2136         int error;
2137
2138         if ((error = zfsvfs_teardown(zfsvfs, B_FALSE)) != 0)
2139                 return (error);
2140
2141         return (0);
2142 }
2143
2144 /*
2145  * Rebuild SA and release VOPs.  Note that ownership of the underlying dataset
2146  * is an invariant across any of the operations that can be performed while the
2147  * filesystem was suspended.  Whether it succeeded or failed, the preconditions
2148  * are the same: the relevant objset and associated dataset are owned by
2149  * zfsvfs, held, and long held on entry.
2150  */
2151 int
2152 zfs_resume_fs(zfsvfs_t *zfsvfs, dsl_dataset_t *ds)
2153 {
2154         int err, err2;
2155         znode_t *zp;
2156
2157         ASSERT(RRM_WRITE_HELD(&zfsvfs->z_teardown_lock));
2158         ASSERT(RW_WRITE_HELD(&zfsvfs->z_teardown_inactive_lock));
2159
2160         /*
2161          * We already own this, so just update the objset_t, as the one we
2162          * had before may have been evicted.
2163          */
2164         objset_t *os;
2165         VERIFY3P(ds->ds_owner, ==, zfsvfs);
2166         VERIFY(dsl_dataset_long_held(ds));
2167         VERIFY0(dmu_objset_from_ds(ds, &os));
2168
2169         err = zfsvfs_init(zfsvfs, os);
2170         if (err != 0)
2171                 goto bail;
2172
2173         VERIFY(zfsvfs_setup(zfsvfs, B_FALSE) == 0);
2174
2175         zfs_set_fuid_feature(zfsvfs);
2176         zfsvfs->z_rollback_time = jiffies;
2177
2178         /*
2179          * Attempt to re-establish all the active inodes with their
2180          * dbufs.  If a zfs_rezget() fails, then we unhash the inode
2181          * and mark it stale.  This prevents a collision if a new
2182          * inode/object is created which must use the same inode
2183          * number.  The stale inode will be be released when the
2184          * VFS prunes the dentry holding the remaining references
2185          * on the stale inode.
2186          */
2187         mutex_enter(&zfsvfs->z_znodes_lock);
2188         for (zp = list_head(&zfsvfs->z_all_znodes); zp;
2189             zp = list_next(&zfsvfs->z_all_znodes, zp)) {
2190                 err2 = zfs_rezget(zp);
2191                 if (err2) {
2192                         remove_inode_hash(ZTOI(zp));
2193                         zp->z_is_stale = B_TRUE;
2194                 }
2195         }
2196         mutex_exit(&zfsvfs->z_znodes_lock);
2197
2198         if (!zfs_is_readonly(zfsvfs) && !zfsvfs->z_unmounted) {
2199                 /*
2200                  * zfs_suspend_fs() could have interrupted freeing
2201                  * of dnodes. We need to restart this freeing so
2202                  * that we don't "leak" the space.
2203                  */
2204                 zfs_unlinked_drain(zfsvfs);
2205         }
2206
2207 bail:
2208         /* release the VFS ops */
2209         rw_exit(&zfsvfs->z_teardown_inactive_lock);
2210         rrm_exit(&zfsvfs->z_teardown_lock, FTAG);
2211
2212         if (err) {
2213                 /*
2214                  * Since we couldn't setup the sa framework, try to force
2215                  * unmount this file system.
2216                  */
2217                 if (zfsvfs->z_os)
2218                         (void) zfs_umount(zfsvfs->z_sb);
2219         }
2220         return (err);
2221 }
2222
2223 int
2224 zfs_set_version(zfsvfs_t *zfsvfs, uint64_t newvers)
2225 {
2226         int error;
2227         objset_t *os = zfsvfs->z_os;
2228         dmu_tx_t *tx;
2229
2230         if (newvers < ZPL_VERSION_INITIAL || newvers > ZPL_VERSION)
2231                 return (SET_ERROR(EINVAL));
2232
2233         if (newvers < zfsvfs->z_version)
2234                 return (SET_ERROR(EINVAL));
2235
2236         if (zfs_spa_version_map(newvers) >
2237             spa_version(dmu_objset_spa(zfsvfs->z_os)))
2238                 return (SET_ERROR(ENOTSUP));
2239
2240         tx = dmu_tx_create(os);
2241         dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, B_FALSE, ZPL_VERSION_STR);
2242         if (newvers >= ZPL_VERSION_SA && !zfsvfs->z_use_sa) {
2243                 dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, B_TRUE,
2244                     ZFS_SA_ATTRS);
2245                 dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL);
2246         }
2247         error = dmu_tx_assign(tx, TXG_WAIT);
2248         if (error) {
2249                 dmu_tx_abort(tx);
2250                 return (error);
2251         }
2252
2253         error = zap_update(os, MASTER_NODE_OBJ, ZPL_VERSION_STR,
2254             8, 1, &newvers, tx);
2255
2256         if (error) {
2257                 dmu_tx_commit(tx);
2258                 return (error);
2259         }
2260
2261         if (newvers >= ZPL_VERSION_SA && !zfsvfs->z_use_sa) {
2262                 uint64_t sa_obj;
2263
2264                 ASSERT3U(spa_version(dmu_objset_spa(zfsvfs->z_os)), >=,
2265                     SPA_VERSION_SA);
2266                 sa_obj = zap_create(os, DMU_OT_SA_MASTER_NODE,
2267                     DMU_OT_NONE, 0, tx);
2268
2269                 error = zap_add(os, MASTER_NODE_OBJ,
2270                     ZFS_SA_ATTRS, 8, 1, &sa_obj, tx);
2271                 ASSERT0(error);
2272
2273                 VERIFY(0 == sa_set_sa_object(os, sa_obj));
2274                 sa_register_update_callback(os, zfs_sa_upgrade);
2275         }
2276
2277         spa_history_log_internal_ds(dmu_objset_ds(os), "upgrade", tx,
2278             "from %llu to %llu", zfsvfs->z_version, newvers);
2279
2280         dmu_tx_commit(tx);
2281
2282         zfsvfs->z_version = newvers;
2283         os->os_version = newvers;
2284
2285         zfs_set_fuid_feature(zfsvfs);
2286
2287         return (0);
2288 }
2289
2290 /*
2291  * Read a property stored within the master node.
2292  */
2293 int
2294 zfs_get_zplprop(objset_t *os, zfs_prop_t prop, uint64_t *value)
2295 {
2296         uint64_t *cached_copy = NULL;
2297
2298         /*
2299          * Figure out where in the objset_t the cached copy would live, if it
2300          * is available for the requested property.
2301          */
2302         if (os != NULL) {
2303                 switch (prop) {
2304                 case ZFS_PROP_VERSION:
2305                         cached_copy = &os->os_version;
2306                         break;
2307                 case ZFS_PROP_NORMALIZE:
2308                         cached_copy = &os->os_normalization;
2309                         break;
2310                 case ZFS_PROP_UTF8ONLY:
2311                         cached_copy = &os->os_utf8only;
2312                         break;
2313                 case ZFS_PROP_CASE:
2314                         cached_copy = &os->os_casesensitivity;
2315                         break;
2316                 default:
2317                         break;
2318                 }
2319         }
2320         if (cached_copy != NULL && *cached_copy != OBJSET_PROP_UNINITIALIZED) {
2321                 *value = *cached_copy;
2322                 return (0);
2323         }
2324
2325         /*
2326          * If the property wasn't cached, look up the file system's value for
2327          * the property. For the version property, we look up a slightly
2328          * different string.
2329          */
2330         const char *pname;
2331         int error = ENOENT;
2332         if (prop == ZFS_PROP_VERSION)
2333                 pname = ZPL_VERSION_STR;
2334         else
2335                 pname = zfs_prop_to_name(prop);
2336
2337         if (os != NULL) {
2338                 ASSERT3U(os->os_phys->os_type, ==, DMU_OST_ZFS);
2339                 error = zap_lookup(os, MASTER_NODE_OBJ, pname, 8, 1, value);
2340         }
2341
2342         if (error == ENOENT) {
2343                 /* No value set, use the default value */
2344                 switch (prop) {
2345                 case ZFS_PROP_VERSION:
2346                         *value = ZPL_VERSION;
2347                         break;
2348                 case ZFS_PROP_NORMALIZE:
2349                 case ZFS_PROP_UTF8ONLY:
2350                         *value = 0;
2351                         break;
2352                 case ZFS_PROP_CASE:
2353                         *value = ZFS_CASE_SENSITIVE;
2354                         break;
2355                 case ZFS_PROP_ACLTYPE:
2356                         *value = ZFS_ACLTYPE_OFF;
2357                         break;
2358                 default:
2359                         return (error);
2360                 }
2361                 error = 0;
2362         }
2363
2364         /*
2365          * If one of the methods for getting the property value above worked,
2366          * copy it into the objset_t's cache.
2367          */
2368         if (error == 0 && cached_copy != NULL) {
2369                 *cached_copy = *value;
2370         }
2371
2372         return (error);
2373 }
2374
2375 /*
2376  * Return true if the coresponding vfs's unmounted flag is set.
2377  * Otherwise return false.
2378  * If this function returns true we know VFS unmount has been initiated.
2379  */
2380 boolean_t
2381 zfs_get_vfs_flag_unmounted(objset_t *os)
2382 {
2383         zfsvfs_t *zfvp;
2384         boolean_t unmounted = B_FALSE;
2385
2386         ASSERT(dmu_objset_type(os) == DMU_OST_ZFS);
2387
2388         mutex_enter(&os->os_user_ptr_lock);
2389         zfvp = dmu_objset_get_user(os);
2390         if (zfvp != NULL && zfvp->z_unmounted)
2391                 unmounted = B_TRUE;
2392         mutex_exit(&os->os_user_ptr_lock);
2393
2394         return (unmounted);
2395 }
2396
2397 void
2398 zfs_init(void)
2399 {
2400         zfsctl_init();
2401         zfs_znode_init();
2402         dmu_objset_register_type(DMU_OST_ZFS, zfs_space_delta_cb);
2403         register_filesystem(&zpl_fs_type);
2404 }
2405
2406 void
2407 zfs_fini(void)
2408 {
2409         /*
2410          * we don't use outstanding because zpl_posix_acl_free might add more.
2411          */
2412         taskq_wait(system_delay_taskq);
2413         taskq_wait(system_taskq);
2414         unregister_filesystem(&zpl_fs_type);
2415         zfs_znode_fini();
2416         zfsctl_fini();
2417 }
2418
2419 #if defined(_KERNEL)
2420 EXPORT_SYMBOL(zfs_suspend_fs);
2421 EXPORT_SYMBOL(zfs_resume_fs);
2422 EXPORT_SYMBOL(zfs_userspace_one);
2423 EXPORT_SYMBOL(zfs_userspace_many);
2424 EXPORT_SYMBOL(zfs_set_userquota);
2425 EXPORT_SYMBOL(zfs_id_overblockquota);
2426 EXPORT_SYMBOL(zfs_id_overobjquota);
2427 EXPORT_SYMBOL(zfs_id_overquota);
2428 EXPORT_SYMBOL(zfs_set_version);
2429 EXPORT_SYMBOL(zfsvfs_create);
2430 EXPORT_SYMBOL(zfsvfs_free);
2431 EXPORT_SYMBOL(zfs_is_readonly);
2432 EXPORT_SYMBOL(zfs_domount);
2433 EXPORT_SYMBOL(zfs_preumount);
2434 EXPORT_SYMBOL(zfs_umount);
2435 EXPORT_SYMBOL(zfs_remount);
2436 EXPORT_SYMBOL(zfs_statvfs);
2437 EXPORT_SYMBOL(zfs_vget);
2438 EXPORT_SYMBOL(zfs_prune);
2439 #endif