UBUNTU: SAUCE: (noup) Update zfs to 0.7.5-1ubuntu15 (LP: #1764690)

author Colin Ian King <colin.king@canonical.com>

Tue, 17 Apr 2018 11:04:00 +0000 (13:04 +0200)

committer Stefan Bader <stefan.bader@canonical.com>

Tue, 22 May 2018 14:17:58 +0000 (16:17 +0200)
author Colin Ian King <colin.king@canonical.com>
Tue, 17 Apr 2018 11:04:00 +0000 (13:04 +0200)
committer Stefan Bader <stefan.bader@canonical.com>
Tue, 22 May 2018 14:17:58 +0000 (16:17 +0200)
diff --git a/zfs/META b/zfs/META

index d624ae400e3c26b067ae526291a20424eac6b6c1..2110eef1b19d6d0902f7ca52422ff06bd0cd444f 100644 (file)
--- a/zfs/META
+++ b/zfs/META
@@ -2,7 +2,7 @@ Meta:         1
  Name:         zfs
  Branch:       1.0
  Version:      0.7.5
-Release:      1ubuntu13
+Release:      1ubuntu15
  Release-Tags: relext
  License:      CDDL
  Author:       OpenZFS on Linux
diff --git a/zfs/include/sys/dmu.h b/zfs/include/sys/dmu.h

index d24615262737912d1e88c18aecd30fc8c9105560..bcdf7d646fbc8073337a412c4ac1772753a2cc39 100644 (file)
--- a/zfs/include/sys/dmu.h
+++ b/zfs/include/sys/dmu.h
@@ -713,11 +713,16 @@ void dmu_tx_mark_netfree(dmu_tx_t *tx);
   * to stable storage and will also be called if the dmu_tx is aborted.
   * If there is any error which prevents the transaction from being committed to
   * disk, the callback will be called with a value of error != 0.
+ *
+ * When multiple callbacks are registered to the transaction, the callbacks
+ * will be called in reverse order to let Lustre, the only user of commit
+ * callback currently, take the fast path of its commit callback handling.
   */
  typedef void dmu_tx_callback_func_t(void *dcb_data, int error);
  
  void dmu_tx_callback_register(dmu_tx_t *tx, dmu_tx_callback_func_t *dcb_func,
      void *dcb_data);
+void dmu_tx_do_callbacks(list_t *cb_list, int error);
  
  /*
   * Free up the data blocks for a defined range of a file.  If size is
diff --git a/zfs/include/sys/dmu_tx.h b/zfs/include/sys/dmu_tx.h

index f16e1e858041aa48ba51ccd602c3c9a0dbe04f98..d82a79310db69339f03da1ef684671382f7d2b9d 100644 (file)
--- a/zfs/include/sys/dmu_tx.h
+++ b/zfs/include/sys/dmu_tx.h
@@ -145,10 +145,6 @@ uint64_t dmu_tx_get_txg(dmu_tx_t *tx);
  struct dsl_pool *dmu_tx_pool(dmu_tx_t *tx);
  void dmu_tx_wait(dmu_tx_t *tx);
  
-void dmu_tx_callback_register(dmu_tx_t *tx, dmu_tx_callback_func_t *dcb_func,
-    void *dcb_data);
-void dmu_tx_do_callbacks(list_t *cb_list, int error);
-
  /*
   * These routines are defined in dmu_spa.h, and are called by the SPA.
   */
diff --git a/zfs/include/sys/dsl_pool.h b/zfs/include/sys/dsl_pool.h

index d2dabda6df19917dbec5f6f09c6b8b9d3dc5f218..7eb6cb0929a73d249f31d7497460b8c257459b2f 100644 (file)
--- a/zfs/include/sys/dsl_pool.h
+++ b/zfs/include/sys/dsl_pool.h
@@ -126,6 +126,7 @@ typedef struct dsl_pool {
         txg_list_t dp_dirty_dirs;
         txg_list_t dp_sync_tasks;
         taskq_t *dp_sync_taskq;
+       taskq_t *dp_zil_clean_taskq;
  
         /*
          * Protects administrative changes (properties, namespace)
diff --git a/zfs/include/sys/trace_dmu.h b/zfs/include/sys/trace_dmu.h

index 5ae59e563358f667c3db0978e84b322cafdfa7be..24e57f5146a4f11c997dd29734b4ec65e06a98e4 100644 (file)
--- a/zfs/include/sys/trace_dmu.h
+++ b/zfs/include/sys/trace_dmu.h
@@ -50,7 +50,7 @@ DECLARE_EVENT_CLASS(zfs_delay_mintime_class,
             __field(uint64_t,                   tx_lastsnap_txg)
             __field(uint64_t,                   tx_lasttried_txg)
             __field(boolean_t,                  tx_anyobj)
-           __field(boolean_t,                  tx_waited)
+           __field(boolean_t,                  tx_dirty_delayed)
             __field(hrtime_t,                   tx_start)
             __field(boolean_t,                  tx_wait_dirty)
             __field(int,                        tx_err)
@@ -62,7 +62,7 @@ DECLARE_EVENT_CLASS(zfs_delay_mintime_class,
             __entry->tx_lastsnap_txg            = tx->tx_lastsnap_txg;
             __entry->tx_lasttried_txg           = tx->tx_lasttried_txg;
             __entry->tx_anyobj                  = tx->tx_anyobj;
-           __entry->tx_waited                  = tx->tx_waited;
+           __entry->tx_dirty_delayed           = tx->tx_dirty_delayed;
             __entry->tx_start                   = tx->tx_start;
             __entry->tx_wait_dirty              = tx->tx_wait_dirty;
             __entry->tx_err                     = tx->tx_err;
@@ -70,11 +70,12 @@ DECLARE_EVENT_CLASS(zfs_delay_mintime_class,
             __entry->min_tx_time                = min_tx_time;
         ),
         TP_printk("tx { txg %llu lastsnap_txg %llu tx_lasttried_txg %llu "
-           "anyobj %d waited %d start %llu wait_dirty %d err %i "
+           "anyobj %d dirty_delayed %d start %llu wait_dirty %d err %i "
             "} dirty %llu min_tx_time %llu",
             __entry->tx_txg, __entry->tx_lastsnap_txg,
-           __entry->tx_lasttried_txg, __entry->tx_anyobj, __entry->tx_waited,
-           __entry->tx_start, __entry->tx_wait_dirty, __entry->tx_err,
+           __entry->tx_lasttried_txg, __entry->tx_anyobj,
+           __entry->tx_dirty_delayed, __entry->tx_start,
+           __entry->tx_wait_dirty, __entry->tx_err,
             __entry->dirty, __entry->min_tx_time)
  );
  /* END CSTYLED */
diff --git a/zfs/include/sys/vdev.h b/zfs/include/sys/vdev.h

index 7157ef43f64dc6e12319e730450605d9a7477813..473d2691c947fcb7bb20444f735727597b4a726d 100644 (file)
--- a/zfs/include/sys/vdev.h
+++ b/zfs/include/sys/vdev.h
@@ -125,8 +125,7 @@ extern zio_t *vdev_queue_io(zio_t *zio);
  extern void vdev_queue_io_done(zio_t *zio);
  
  extern int vdev_queue_length(vdev_t *vd);
-extern uint64_t vdev_queue_lastoffset(vdev_t *vd);
-extern void vdev_queue_register_lastoffset(vdev_t *vd, zio_t *zio);
+extern uint64_t vdev_queue_last_offset(vdev_t *vd);
  
  extern void vdev_config_dirty(vdev_t *vd);
  extern void vdev_config_clean(vdev_t *vd);
diff --git a/zfs/include/sys/vdev_impl.h b/zfs/include/sys/vdev_impl.h

index 7c5e54b08e193ec33c8f1598219732d944b2eb73..4c2e3cd2e0af6f62053bbe275d11a4e6948f55d5 100644 (file)
--- a/zfs/include/sys/vdev_impl.h
+++ b/zfs/include/sys/vdev_impl.h
@@ -127,7 +127,6 @@ struct vdev_queue {
         hrtime_t        vq_io_delta_ts;
         zio_t           vq_io_search; /* used as local for stack reduction */
         kmutex_t        vq_lock;
-       uint64_t        vq_lastoffset;
  };
  
  /*
diff --git a/zfs/include/sys/vdev_raidz_impl.h b/zfs/include/sys/vdev_raidz_impl.h

index 4bd15e3d53c2b421b470bcf393fe73c71e50fd08..0799ed19dfc88bf68d67c4e1b087584008571fd2 100644 (file)
--- a/zfs/include/sys/vdev_raidz_impl.h
+++ b/zfs/include/sys/vdev_raidz_impl.h
@@ -102,30 +102,30 @@ typedef struct raidz_impl_ops {
  } raidz_impl_ops_t;
  
  typedef struct raidz_col {
-       size_t rc_devidx;               /* child device index for I/O */
-       size_t rc_offset;               /* device offset */
-       size_t rc_size;                 /* I/O size */
+       uint64_t rc_devidx;             /* child device index for I/O */
+       uint64_t rc_offset;             /* device offset */
+       uint64_t rc_size;               /* I/O size */
         abd_t *rc_abd;                  /* I/O data */
         void *rc_gdata;                 /* used to store the "good" version */
         int rc_error;                   /* I/O error for this device */
-       unsigned int rc_tried;          /* Did we attempt this I/O column? */
-       unsigned int rc_skipped;        /* Did we skip this I/O column? */
+       uint8_t rc_tried;               /* Did we attempt this I/O column? */
+       uint8_t rc_skipped;             /* Did we skip this I/O column? */
  } raidz_col_t;
  
  typedef struct raidz_map {
-       size_t rm_cols;                 /* Regular column count */
-       size_t rm_scols;                /* Count including skipped columns */
-       size_t rm_bigcols;              /* Number of oversized columns */
-       size_t rm_asize;                /* Actual total I/O size */
-       size_t rm_missingdata;          /* Count of missing data devices */
-       size_t rm_missingparity;        /* Count of missing parity devices */
-       size_t rm_firstdatacol;         /* First data column/parity count */
-       size_t rm_nskip;                /* Skipped sectors for padding */
-       size_t rm_skipstart;            /* Column index of padding start */
+       uint64_t rm_cols;               /* Regular column count */
+       uint64_t rm_scols;              /* Count including skipped columns */
+       uint64_t rm_bigcols;            /* Number of oversized columns */
+       uint64_t rm_asize;              /* Actual total I/O size */
+       uint64_t rm_missingdata;        /* Count of missing data devices */
+       uint64_t rm_missingparity;      /* Count of missing parity devices */
+       uint64_t rm_firstdatacol;       /* First data column/parity count */
+       uint64_t rm_nskip;              /* Skipped sectors for padding */
+       uint64_t rm_skipstart;          /* Column index of padding start */
         abd_t *rm_abd_copy;             /* rm_asize-buffer of copied data */
-       size_t rm_reports;              /* # of referencing checksum reports */
-       unsigned int rm_freed;          /* map no longer has referencing ZIO */
-       unsigned int rm_ecksuminjected; /* checksum error was injected */
+       uintptr_t rm_reports;           /* # of referencing checksum reports */
+       uint8_t rm_freed;               /* map no longer has referencing ZIO */
+       uint8_t rm_ecksuminjected;      /* checksum error was injected */
         raidz_impl_ops_t *rm_ops;       /* RAIDZ math operations */
         raidz_col_t rm_col[1];          /* Flexible array of I/O columns */
  } raidz_map_t;
diff --git a/zfs/include/sys/zil_impl.h b/zfs/include/sys/zil_impl.h

index 13ecca3c8b0436818de603fab0dd2c780f3c2eed..dd5304b79a91be698af69413c1b2296e9133256e 100644 (file)
--- a/zfs/include/sys/zil_impl.h
+++ b/zfs/include/sys/zil_impl.h
@@ -124,7 +124,6 @@ struct zilog {
         list_t          zl_lwb_list;    /* in-flight log write list */
         kmutex_t        zl_vdev_lock;   /* protects zl_vdev_tree */
         avl_tree_t      zl_vdev_tree;   /* vdevs to flush in zil_commit() */
-       taskq_t         *zl_clean_taskq; /* runs lwb and itx clean tasks */
         avl_tree_t      zl_bp_tree;     /* track bps during log parse */
         clock_t         zl_replay_time; /* lbolt of when replay started */
         uint64_t        zl_replay_blks; /* number of log blocks replayed */
diff --git a/zfs/module/icp/asm-x86_64/aes/aes_intel.S b/zfs/module/icp/asm-x86_64/aes/aes_intel.S

index ed0df75c5513f2bbfd678fd07e44b7db98071b34..a40e30fbed5f6a90ebbf2a1d15d44970e5eb2983 100644 (file)
--- a/zfs/module/icp/asm-x86_64/aes/aes_intel.S
+++ b/zfs/module/icp/asm-x86_64/aes/aes_intel.S
@@ -207,7 +207,7 @@ _key_expansion_256a_local:
         shufps  $0b10001100, %xmm0, %xmm4
         pxor    %xmm4, %xmm0
         pxor    %xmm1, %xmm0
-       movaps  %xmm0, (%rcx)
+       movups  %xmm0, (%rcx)
         add     $0x10, %rcx
         ret
         nop
@@ -224,18 +224,18 @@ _key_expansion_192a_local:
         pxor    %xmm4, %xmm0
         pxor    %xmm1, %xmm0
  
-       movaps  %xmm2, %xmm5
-       movaps  %xmm2, %xmm6
+       movups  %xmm2, %xmm5
+       movups  %xmm2, %xmm6
         pslldq  $4, %xmm5
         pshufd  $0b11111111, %xmm0, %xmm3
         pxor    %xmm3, %xmm2
         pxor    %xmm5, %xmm2
  
-       movaps  %xmm0, %xmm1
+       movups  %xmm0, %xmm1
         shufps  $0b01000100, %xmm0, %xmm6
-       movaps  %xmm6, (%rcx)
+       movups  %xmm6, (%rcx)
         shufps  $0b01001110, %xmm2, %xmm1
-       movaps  %xmm1, 0x10(%rcx)
+       movups  %xmm1, 0x10(%rcx)
         add     $0x20, %rcx
         ret
  SET_SIZE(_key_expansion_192a)
@@ -250,13 +250,13 @@ _key_expansion_192b_local:
         pxor    %xmm4, %xmm0
         pxor    %xmm1, %xmm0
  
-       movaps  %xmm2, %xmm5
+       movups  %xmm2, %xmm5
         pslldq  $4, %xmm5
         pshufd  $0b11111111, %xmm0, %xmm3
         pxor    %xmm3, %xmm2
         pxor    %xmm5, %xmm2
  
-       movaps  %xmm0, (%rcx)
+       movups  %xmm0, (%rcx)
         add     $0x10, %rcx
         ret
  SET_SIZE(_key_expansion_192b)
@@ -270,7 +270,7 @@ _key_expansion_256b_local:
         shufps  $0b10001100, %xmm2, %xmm4
         pxor    %xmm4, %xmm2
         pxor    %xmm1, %xmm2
-       movaps  %xmm2, (%rcx)
+       movups  %xmm2, (%rcx)
         add     $0x10, %rcx
         ret
  SET_SIZE(_key_expansion_256b)
@@ -327,7 +327,7 @@ rijndael_key_setup_enc_intel_local:
         jz      .Lenc_key_invalid_param
  
         movups  (%USERCIPHERKEY), %xmm0 // user key (first 16 bytes)
-       movaps  %xmm0, (%AESKEY)
+       movups  %xmm0, (%AESKEY)
         lea     0x10(%AESKEY), %rcx     // key addr
         pxor    %xmm4, %xmm4            // xmm4 is assumed 0 in _key_expansion_x
  
@@ -341,7 +341,7 @@ rijndael_key_setup_enc_intel_local:
  #endif /* OPENSSL_INTERFACE */
  
         movups  0x10(%USERCIPHERKEY), %xmm2     // other user key (2nd 16 bytes)
-       movaps  %xmm2, (%rcx)
+       movups  %xmm2, (%rcx)
         add     $0x10, %rcx
  
         aeskeygenassist $0x1, %xmm2, %xmm1      // expand the key
@@ -525,10 +525,10 @@ FRAME_BEGIN
  
  .align 4
  .Ldec_key_reorder_loop:
-       movaps  (%AESKEY), %xmm0
-       movaps  (%ROUNDS64), %xmm1
-       movaps  %xmm0, (%ROUNDS64)
-       movaps  %xmm1, (%AESKEY)
+       movups  (%AESKEY), %xmm0
+       movups  (%ROUNDS64), %xmm1
+       movups  %xmm0, (%ROUNDS64)
+       movups  %xmm1, (%AESKEY)
         lea     0x10(%AESKEY), %AESKEY
         lea     -0x10(%ROUNDS64), %ROUNDS64
         cmp     %AESKEY, %ROUNDS64
@@ -536,11 +536,11 @@ FRAME_BEGIN
  
  .align 4
  .Ldec_key_inv_loop:
-       movaps  (%rcx), %xmm0
+       movups  (%rcx), %xmm0
         // Convert an encryption round key to a form usable for decryption
         // with the "AES Inverse Mix Columns" instruction
         aesimc  %xmm0, %xmm1
-       movaps  %xmm1, (%rcx)
+       movups  %xmm1, (%rcx)
         lea     0x10(%rcx), %rcx
         cmp     %ENDAESKEY, %rcx
         jnz     .Ldec_key_inv_loop
@@ -602,7 +602,7 @@ FRAME_BEGIN
  ENTRY_NP(aes_encrypt_intel)
  
         movups  (%INP), %STATE                  // input
-       movaps  (%KEYP), %KEY                   // key
+       movups  (%KEYP), %KEY                   // key
  #ifdef OPENSSL_INTERFACE
         mov     240(%KEYP), %NROUNDS32          // round count
  #else  /* OpenSolaris Interface */
@@ -618,41 +618,41 @@ ENTRY_NP(aes_encrypt_intel)
  
         // AES 256
         lea     0x20(%KEYP), %KEYP
-       movaps  -0x60(%KEYP), %KEY
+       movups  -0x60(%KEYP), %KEY
         aesenc  %KEY, %STATE
-       movaps  -0x50(%KEYP), %KEY
+       movups  -0x50(%KEYP), %KEY
         aesenc  %KEY, %STATE
  
  .align 4
  .Lenc192:
         // AES 192 and 256
-       movaps  -0x40(%KEYP), %KEY
+       movups  -0x40(%KEYP), %KEY
         aesenc  %KEY, %STATE
-       movaps  -0x30(%KEYP), %KEY
+       movups  -0x30(%KEYP), %KEY
         aesenc  %KEY, %STATE
  
  .align 4
  .Lenc128:
         // AES 128, 192, and 256
-       movaps  -0x20(%KEYP), %KEY
+       movups  -0x20(%KEYP), %KEY
         aesenc  %KEY, %STATE
-       movaps  -0x10(%KEYP), %KEY
+       movups  -0x10(%KEYP), %KEY
         aesenc  %KEY, %STATE
-       movaps  (%KEYP), %KEY
+       movups  (%KEYP), %KEY
         aesenc  %KEY, %STATE
-       movaps  0x10(%KEYP), %KEY
+       movups  0x10(%KEYP), %KEY
         aesenc  %KEY, %STATE
-       movaps  0x20(%KEYP), %KEY
+       movups  0x20(%KEYP), %KEY
         aesenc  %KEY, %STATE
-       movaps  0x30(%KEYP), %KEY
+       movups  0x30(%KEYP), %KEY
         aesenc  %KEY, %STATE
-       movaps  0x40(%KEYP), %KEY
+       movups  0x40(%KEYP), %KEY
         aesenc  %KEY, %STATE
-       movaps  0x50(%KEYP), %KEY
+       movups  0x50(%KEYP), %KEY
         aesenc  %KEY, %STATE
-       movaps  0x60(%KEYP), %KEY
+       movups  0x60(%KEYP), %KEY
         aesenc  %KEY, %STATE
-       movaps  0x70(%KEYP), %KEY
+       movups  0x70(%KEYP), %KEY
         aesenclast       %KEY, %STATE           // last round
         movups  %STATE, (%OUTP)                 // output
  
@@ -685,7 +685,7 @@ ENTRY_NP(aes_encrypt_intel)
  ENTRY_NP(aes_decrypt_intel)
  
         movups  (%INP), %STATE                  // input
-       movaps  (%KEYP), %KEY                   // key
+       movups  (%KEYP), %KEY                   // key
  #ifdef OPENSSL_INTERFACE
         mov     240(%KEYP), %NROUNDS32          // round count
  #else  /* OpenSolaris Interface */
@@ -701,41 +701,41 @@ ENTRY_NP(aes_decrypt_intel)
  
         // AES 256
         lea     0x20(%KEYP), %KEYP
-       movaps  -0x60(%KEYP), %KEY
+       movups  -0x60(%KEYP), %KEY
         aesdec  %KEY, %STATE
-       movaps  -0x50(%KEYP), %KEY
+       movups  -0x50(%KEYP), %KEY
         aesdec  %KEY, %STATE
  
  .align 4
  .Ldec192:
         // AES 192 and 256
-       movaps  -0x40(%KEYP), %KEY
+       movups  -0x40(%KEYP), %KEY
         aesdec  %KEY, %STATE
-       movaps  -0x30(%KEYP), %KEY
+       movups  -0x30(%KEYP), %KEY
         aesdec  %KEY, %STATE
  
  .align 4
  .Ldec128:
         // AES 128, 192, and 256
-       movaps  -0x20(%KEYP), %KEY
+       movups  -0x20(%KEYP), %KEY
         aesdec  %KEY, %STATE
-       movaps  -0x10(%KEYP), %KEY
+       movups  -0x10(%KEYP), %KEY
         aesdec  %KEY, %STATE
-       movaps  (%KEYP), %KEY
+       movups  (%KEYP), %KEY
         aesdec  %KEY, %STATE
-       movaps  0x10(%KEYP), %KEY
+       movups  0x10(%KEYP), %KEY
         aesdec  %KEY, %STATE
-       movaps  0x20(%KEYP), %KEY
+       movups  0x20(%KEYP), %KEY
         aesdec  %KEY, %STATE
-       movaps  0x30(%KEYP), %KEY
+       movups  0x30(%KEYP), %KEY
         aesdec  %KEY, %STATE
-       movaps  0x40(%KEYP), %KEY
+       movups  0x40(%KEYP), %KEY
         aesdec  %KEY, %STATE
-       movaps  0x50(%KEYP), %KEY
+       movups  0x50(%KEYP), %KEY
         aesdec  %KEY, %STATE
-       movaps  0x60(%KEYP), %KEY
+       movups  0x60(%KEYP), %KEY
         aesdec  %KEY, %STATE
-       movaps  0x70(%KEYP), %KEY
+       movups  0x70(%KEYP), %KEY
         aesdeclast      %KEY, %STATE            // last round
         movups  %STATE, (%OUTP)                 // output
  
diff --git a/zfs/module/icp/asm-x86_64/modes/gcm_intel.S b/zfs/module/icp/asm-x86_64/modes/gcm_intel.S

index a43b5ebcb7e5dcb51be2095e878da6774f835521..3aec0ee1586d52754250b37694a0a9ab151794ab 100644 (file)
--- a/zfs/module/icp/asm-x86_64/modes/gcm_intel.S
+++ b/zfs/module/icp/asm-x86_64/modes/gcm_intel.S
@@ -150,7 +150,7 @@ ENTRY_NP(gcm_mul_pclmulqdq)
         // Byte swap 16-byte input
         //
         lea     .Lbyte_swap16_mask(%rip), %rax
-       movaps  (%rax), %xmm10
+       movups  (%rax), %xmm10
         pshufb  %xmm10, %xmm0
         pshufb  %xmm10, %xmm1
  
diff --git a/zfs/module/icp/spi/kcf_spi.c b/zfs/module/icp/spi/kcf_spi.c

index c2c2b54bc8d853b4ad7281bab9e640b0ade54125..0a6e38df8625643bea1e8e1075ddf6235935114c 100644 (file)
--- a/zfs/module/icp/spi/kcf_spi.c
+++ b/zfs/module/icp/spi/kcf_spi.c
@@ -111,7 +111,7 @@ int
  crypto_register_provider(crypto_provider_info_t *info,
      crypto_kcf_provider_handle_t *handle)
  {
-       char ks_name[KSTAT_STRLEN];
+       char *ks_name;
  
         kcf_provider_desc_t *prov_desc = NULL;
         int ret = CRYPTO_ARGUMENTS_BAD;
@@ -238,12 +238,12 @@ crypto_register_provider(crypto_provider_info_t *info,
                  * This kstat is deleted, when the provider unregisters.
                  */
                 if (prov_desc->pd_prov_type == CRYPTO_SW_PROVIDER) {
-                       (void) snprintf(ks_name, KSTAT_STRLEN, "%s_%s",
+                       ks_name = kmem_asprintf("%s_%s",
                             "NONAME", "provider_stats");
                 } else {
-                       (void) snprintf(ks_name, KSTAT_STRLEN, "%s_%d_%u_%s",
-                           "NONAME", 0,
-                           prov_desc->pd_prov_id, "provider_stats");
+                       ks_name = kmem_asprintf("%s_%d_%u_%s",
+                           "NONAME", 0, prov_desc->pd_prov_id,
+                           "provider_stats");
                 }
  
                 prov_desc->pd_kstat = kstat_create("kcf", 0, ks_name, "crypto",
@@ -261,6 +261,7 @@ crypto_register_provider(crypto_provider_info_t *info,
                         prov_desc->pd_kstat->ks_update = kcf_prov_kstat_update;
                         kstat_install(prov_desc->pd_kstat);
                 }
+               strfree(ks_name);
         }
  
         if (prov_desc->pd_prov_type == CRYPTO_HW_PROVIDER)
diff --git a/zfs/module/nvpair/nvpair.c b/zfs/module/nvpair/nvpair.c

index 249b7c94b5822c51787072e0b5b02f515db4b76b..abed33eafef759232a5ee0b74ba3d68f8749e0d9 100644 (file)
--- a/zfs/module/nvpair/nvpair.c
+++ b/zfs/module/nvpair/nvpair.c
@@ -21,7 +21,7 @@
  
  /*
   * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2015, 2016 by Delphix. All rights reserved.
+ * Copyright (c) 2015, 2017 by Delphix. All rights reserved.
   */
  
  #include <sys/stropts.h>
@@ -916,6 +916,8 @@ nvlist_add_common(nvlist_t *nvl, const char *name,
  
         /* calculate sizes of the nvpair elements and the nvpair itself */
         name_sz = strlen(name) + 1;
+       if (name_sz >= 1ULL << (sizeof (nvp->nvp_name_sz) * NBBY - 1))
+               return (EINVAL);
  
         nvp_sz = NVP_SIZE_CALC(name_sz, value_sz);
  
@@ -1242,6 +1244,7 @@ nvpair_type_is_array(nvpair_t *nvp)
         data_type_t type = NVP_TYPE(nvp);
  
         if ((type == DATA_TYPE_BYTE_ARRAY) ||
+           (type == DATA_TYPE_INT8_ARRAY) ||
             (type == DATA_TYPE_UINT8_ARRAY) ||
             (type == DATA_TYPE_INT16_ARRAY) ||
             (type == DATA_TYPE_UINT16_ARRAY) ||
@@ -2200,8 +2203,10 @@ nvs_embedded(nvstream_t *nvs, nvlist_t *embedded)
  
                 nvlist_init(embedded, embedded->nvl_nvflag, priv);
  
-               if (nvs->nvs_recursion >= nvpair_max_recursion)
+               if (nvs->nvs_recursion >= nvpair_max_recursion) {
+                       nvlist_free(embedded);
                         return (EINVAL);
+               }
                 nvs->nvs_recursion++;
                 if ((err = nvs_operation(nvs, embedded, NULL)) != 0)
                         nvlist_free(embedded);
diff --git a/zfs/module/zfs/abd.c b/zfs/module/zfs/abd.c

index 765ac7fb72e6bea70a46050e2255e0feee9ca8fc..3c7893dcd35e4b518edd21c6131859b99f6bda79 100644 (file)
--- a/zfs/module/zfs/abd.c
+++ b/zfs/module/zfs/abd.c
@@ -571,7 +571,7 @@ static inline void
  abd_free_struct(abd_t *abd)
  {
         kmem_cache_free(abd_cache, abd);
-       ABDSTAT_INCR(abdstat_struct_size, -sizeof (abd_t));
+       ABDSTAT_INCR(abdstat_struct_size, -(int)sizeof (abd_t));
  }
  
  /*
@@ -618,7 +618,7 @@ abd_free_scatter(abd_t *abd)
         ABDSTAT_BUMPDOWN(abdstat_scatter_cnt);
         ABDSTAT_INCR(abdstat_scatter_data_size, -(int)abd->abd_size);
         ABDSTAT_INCR(abdstat_scatter_chunk_waste,
-           abd->abd_size - P2ROUNDUP(abd->abd_size, PAGESIZE));
+           (int)abd->abd_size - (int)P2ROUNDUP(abd->abd_size, PAGESIZE));
  
         abd_free_struct(abd);
  }
diff --git a/zfs/module/zfs/bpobj.c b/zfs/module/zfs/bpobj.c

index 82ca94e1d11b105b06e2529acb0bee300c32b701..32459c9a8305a51b4daee90940c4d14ce3754f21 100644 (file)
--- a/zfs/module/zfs/bpobj.c
+++ b/zfs/module/zfs/bpobj.c
@@ -261,7 +261,7 @@ bpobj_iterate_impl(bpobj_t *bpo, bpobj_itor_t func, void *arg, dmu_tx_t *tx,
         }
         if (free) {
                 VERIFY3U(0, ==, dmu_free_range(bpo->bpo_os, bpo->bpo_object,
-                   (i + 1) * sizeof (blkptr_t), -1ULL, tx));
+                   (i + 1) * sizeof (blkptr_t), DMU_OBJECT_END, tx));
         }
         if (err || !bpo->bpo_havesubobj || bpo->bpo_phys->bpo_subobjs == 0)
                 goto out;
@@ -339,7 +339,7 @@ bpobj_iterate_impl(bpobj_t *bpo, bpobj_itor_t func, void *arg, dmu_tx_t *tx,
         if (free) {
                 VERIFY3U(0, ==, dmu_free_range(bpo->bpo_os,
                     bpo->bpo_phys->bpo_subobjs,
-                   (i + 1) * sizeof (uint64_t), -1ULL, tx));
+                   (i + 1) * sizeof (uint64_t), DMU_OBJECT_END, tx));
         }
  
  out:
diff --git a/zfs/module/zfs/dmu.c b/zfs/module/zfs/dmu.c

index 6f09aa2f7688f791e76a51777c53b6c9a0772c77..05c9fc31fa5fa4d3066a48cc9cb913557f9f6752 100644 (file)
--- a/zfs/module/zfs/dmu.c
+++ b/zfs/module/zfs/dmu.c
@@ -887,7 +887,7 @@ dmu_free_range(objset_t *os, uint64_t object, uint64_t offset,
         if (err)
                 return (err);
         ASSERT(offset < UINT64_MAX);
-       ASSERT(size == -1ULL || size <= UINT64_MAX - offset);
+       ASSERT(size == DMU_OBJECT_END || size <= UINT64_MAX - offset);
         dnode_free_range(dn, offset, size, tx);
         dnode_rele(dn, FTAG);
         return (0);
diff --git a/zfs/module/zfs/dmu_objset.c b/zfs/module/zfs/dmu_objset.c

index 9a7a6968d6319163b39ab2e5ea2be6f023ae731c..3425d542f98327bd51796e08d5336e724e889aa6 100644 (file)
--- a/zfs/module/zfs/dmu_objset.c
+++ b/zfs/module/zfs/dmu_objset.c
@@ -1853,6 +1853,7 @@ dmu_objset_space_upgrade(objset_t *os)
                 dmu_tx_hold_bonus(tx, obj);
                 objerr = dmu_tx_assign(tx, TXG_WAIT);
                 if (objerr != 0) {
+                       dmu_buf_rele(db, FTAG);
                         dmu_tx_abort(tx);
                         continue;
                 }
diff --git a/zfs/module/zfs/dmu_send.c b/zfs/module/zfs/dmu_send.c

index 344e42018df1f236c828497e2b74d00980c39ad3..2e3d70629830e215fdc9684e79d0829b5d18c468 100644 (file)
--- a/zfs/module/zfs/dmu_send.c
+++ b/zfs/module/zfs/dmu_send.c
@@ -224,9 +224,6 @@ dump_free(dmu_sendarg_t *dsp, uint64_t object, uint64_t offset,
             (object == dsp->dsa_last_data_object &&
             offset > dsp->dsa_last_data_offset));
  
-       if (length != -1ULL && offset + length < offset)
-               length = -1ULL;
-
         /*
          * If there is a pending op, but it's not PENDING_FREE, push it out,
          * since free block aggregation can only be done for blocks of the
@@ -243,19 +240,22 @@ dump_free(dmu_sendarg_t *dsp, uint64_t object, uint64_t offset,
  
         if (dsp->dsa_pending_op == PENDING_FREE) {
                 /*
-                * There should never be a PENDING_FREE if length is -1
-                * (because dump_dnode is the only place where this
-                * function is called with a -1, and only after flushing
-                * any pending record).
+                * There should never be a PENDING_FREE if length is
+                * DMU_OBJECT_END (because dump_dnode is the only place where
+                * this function is called with a DMU_OBJECT_END, and only after
+                * flushing any pending record).
                  */
-               ASSERT(length != -1ULL);
+               ASSERT(length != DMU_OBJECT_END);
                 /*
                  * Check to see whether this free block can be aggregated
                  * with pending one.
                  */
                 if (drrf->drr_object == object && drrf->drr_offset +
                     drrf->drr_length == offset) {
-                       drrf->drr_length += length;
+                       if (offset + length < offset)
+                               drrf->drr_length = DMU_OBJECT_END;
+                       else
+                               drrf->drr_length += length;
                         return (0);
                 } else {
                         /* not a continuation.  Push out pending record */
@@ -269,9 +269,12 @@ dump_free(dmu_sendarg_t *dsp, uint64_t object, uint64_t offset,
         dsp->dsa_drr->drr_type = DRR_FREE;
         drrf->drr_object = object;
         drrf->drr_offset = offset;
-       drrf->drr_length = length;
+       if (offset + length < offset)
+               drrf->drr_length = DMU_OBJECT_END;
+       else
+               drrf->drr_length = length;
         drrf->drr_toguid = dsp->dsa_toguid;
-       if (length == -1ULL) {
+       if (length == DMU_OBJECT_END) {
                 if (dump_record(dsp, NULL, 0) != 0)
                         return (SET_ERROR(EINTR));
         } else {
@@ -530,7 +533,7 @@ dump_dnode(dmu_sendarg_t *dsp, uint64_t object, dnode_phys_t *dnp)
  
         /* Free anything past the end of the file. */
         if (dump_free(dsp, object, (dnp->dn_maxblkid + 1) *
-           (dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT), -1ULL) != 0)
+           (dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT), DMU_OBJECT_END) != 0)
                 return (SET_ERROR(EINTR));
         if (dsp->dsa_err != 0)
                 return (SET_ERROR(EINTR));
@@ -666,7 +669,9 @@ do_dump(dmu_sendarg_t *dsa, struct send_block_record *data)
         } else if (BP_IS_HOLE(bp)) {
                 uint64_t span = BP_SPAN(dblkszsec, indblkshift, zb->zb_level);
                 uint64_t offset = zb->zb_blkid * span;
-               err = dump_free(dsa, zb->zb_object, offset, span);
+               /* Don't dump free records for offsets > DMU_OBJECT_END */
+               if (zb->zb_blkid == 0 || span <= DMU_OBJECT_END / zb->zb_blkid)
+                       err = dump_free(dsa, zb->zb_object, offset, span);
         } else if (zb->zb_level > 0 || type == DMU_OT_OBJSET) {
                 return (0);
         } else if (type == DMU_OT_DNODE) {
@@ -2498,7 +2503,7 @@ receive_free(struct receive_writer_arg *rwa, struct drr_free *drrf)
  {
         int err;
  
-       if (drrf->drr_length != -1ULL &&
+       if (drrf->drr_length != DMU_OBJECT_END &&
             drrf->drr_offset + drrf->drr_length < drrf->drr_offset)
                 return (SET_ERROR(EINVAL));
  
diff --git a/zfs/module/zfs/dmu_traverse.c b/zfs/module/zfs/dmu_traverse.c

index c78228d7458876a2bd64f0bb0707225cfbea353f..62f770e9fc22e2aa9dbbdd730dbb7564a11b6850 100644 (file)
--- a/zfs/module/zfs/dmu_traverse.c
+++ b/zfs/module/zfs/dmu_traverse.c
@@ -609,9 +609,20 @@ traverse_impl(spa_t *spa, dsl_dataset_t *ds, uint64_t objset, blkptr_t *rootbp,
                 if (err != 0)
                         return (err);
  
-               osp = buf->b_data;
-               traverse_zil(td, &osp->os_zil_header);
-               arc_buf_destroy(buf, &buf);
+               if (err != 0) {
+                       /*
+                        * If both TRAVERSE_HARD and TRAVERSE_PRE are set,
+                        * continue to visitbp so that td_func can be called
+                        * in pre stage, and err will reset to zero.
+                        */
+                       if (!(td->td_flags & TRAVERSE_HARD) ||
+                           !(td->td_flags & TRAVERSE_PRE))
+                               return (err);
+               } else {
+                       osp = buf->b_data;
+                       traverse_zil(td, &osp->os_zil_header);
+                       arc_buf_destroy(buf, &buf);
+               }
         }
  
         if (!(flags & TRAVERSE_PREFETCH_DATA) ||
diff --git a/zfs/module/zfs/dmu_tx.c b/zfs/module/zfs/dmu_tx.c

index 097fa774ad0650aab2597b8e6cb200009600e858..c3cc03a691a7776e0249c63c5798a830905bfae5 100644 (file)
--- a/zfs/module/zfs/dmu_tx.c
+++ b/zfs/module/zfs/dmu_tx.c
@@ -1200,7 +1200,7 @@ dmu_tx_do_callbacks(list_t *cb_list, int error)
  {
         dmu_tx_callback_t *dcb;
  
-       while ((dcb = list_head(cb_list)) != NULL) {
+       while ((dcb = list_tail(cb_list)) != NULL) {
                 list_remove(cb_list, dcb);
                 dcb->dcb_func(dcb->dcb_data, error);
                 kmem_free(dcb, sizeof (dmu_tx_callback_t));
diff --git a/zfs/module/zfs/dmu_zfetch.c b/zfs/module/zfs/dmu_zfetch.c

index 1bf5c4e34d68dcfb819cae57bdd2532bd0f50b60..e72e9ef9cbef62d5f46fccb0ea0c659488741ced 100644 (file)
--- a/zfs/module/zfs/dmu_zfetch.c
+++ b/zfs/module/zfs/dmu_zfetch.c
@@ -228,19 +228,33 @@ dmu_zfetch(zfetch_t *zf, uint64_t blkid, uint64_t nblks, boolean_t fetch_data)
  
         rw_enter(&zf->zf_rwlock, RW_READER);
  
+       /*
+        * Find matching prefetch stream.  Depending on whether the accesses
+        * are block-aligned, first block of the new access may either follow
+        * the last block of the previous access, or be equal to it.
+        */
         for (zs = list_head(&zf->zf_stream); zs != NULL;
             zs = list_next(&zf->zf_stream, zs)) {
-               if (blkid == zs->zs_blkid) {
+               if (blkid == zs->zs_blkid || blkid + 1 == zs->zs_blkid) {
                         mutex_enter(&zs->zs_lock);
                         /*
                          * zs_blkid could have changed before we
                          * acquired zs_lock; re-check them here.
                          */
-                       if (blkid != zs->zs_blkid) {
-                               mutex_exit(&zs->zs_lock);
-                               continue;
+                       if (blkid == zs->zs_blkid) {
+                               break;
+                       } else if (blkid + 1 == zs->zs_blkid) {
+                               blkid++;
+                               nblks--;
+                               if (nblks == 0) {
+                                       /* Already prefetched this before. */
+                                       mutex_exit(&zs->zs_lock);
+                                       rw_exit(&zf->zf_rwlock);
+                                       return;
+                               }
+                               break;
                         }
-                       break;
+                       mutex_exit(&zs->zs_lock);
                 }
         }
  
diff --git a/zfs/module/zfs/dsl_pool.c b/zfs/module/zfs/dsl_pool.c

index c16708048cc5f8dc9f73c92fe2f895b904a11d15..0320d0e1bc2fc08242384474268623da1570731d 100644 (file)
--- a/zfs/module/zfs/dsl_pool.c
+++ b/zfs/module/zfs/dsl_pool.c
@@ -135,6 +135,36 @@ unsigned long zfs_delay_scale = 1000 * 1000 * 1000 / 2000;
   */
  int zfs_sync_taskq_batch_pct = 75;
  
+/*
+ * These tunables determine the behavior of how zil_itxg_clean() is
+ * called via zil_clean() in the context of spa_sync(). When an itxg
+ * list needs to be cleaned, TQ_NOSLEEP will be used when dispatching.
+ * If the dispatch fails, the call to zil_itxg_clean() will occur
+ * synchronously in the context of spa_sync(), which can negatively
+ * impact the performance of spa_sync() (e.g. in the case of the itxg
+ * list having a large number of itxs that needs to be cleaned).
+ *
+ * Thus, these tunables can be used to manipulate the behavior of the
+ * taskq used by zil_clean(); they determine the number of taskq entries
+ * that are pre-populated when the taskq is first created (via the
+ * "zfs_zil_clean_taskq_minalloc" tunable) and the maximum number of
+ * taskq entries that are cached after an on-demand allocation (via the
+ * "zfs_zil_clean_taskq_maxalloc").
+ *
+ * The idea being, we want to try reasonably hard to ensure there will
+ * already be a taskq entry pre-allocated by the time that it is needed
+ * by zil_clean(). This way, we can avoid the possibility of an
+ * on-demand allocation of a new taskq entry from failing, which would
+ * result in zil_itxg_clean() being called synchronously from zil_clean()
+ * (which can adversely affect performance of spa_sync()).
+ *
+ * Additionally, the number of threads used by the taskq can be
+ * configured via the "zfs_zil_clean_taskq_nthr_pct" tunable.
+ */
+int zfs_zil_clean_taskq_nthr_pct = 100;
+int zfs_zil_clean_taskq_minalloc = 1024;
+int zfs_zil_clean_taskq_maxalloc = 1024 * 1024;
+
  int
  dsl_pool_open_special_dir(dsl_pool_t *dp, const char *name, dsl_dir_t **ddp)
  {
@@ -176,6 +206,12 @@ dsl_pool_open_impl(spa_t *spa, uint64_t txg)
             zfs_sync_taskq_batch_pct, minclsyspri, 1, INT_MAX,
             TASKQ_THREADS_CPU_PCT);
  
+       dp->dp_zil_clean_taskq = taskq_create("dp_zil_clean_taskq",
+           zfs_zil_clean_taskq_nthr_pct, minclsyspri,
+           zfs_zil_clean_taskq_minalloc,
+           zfs_zil_clean_taskq_maxalloc,
+           TASKQ_PREPOPULATE | TASKQ_THREADS_CPU_PCT);
+
         mutex_init(&dp->dp_lock, NULL, MUTEX_DEFAULT, NULL);
         cv_init(&dp->dp_spaceavail_cv, NULL, CV_DEFAULT, NULL);
  
@@ -334,6 +370,7 @@ dsl_pool_close(dsl_pool_t *dp)
         txg_list_destroy(&dp->dp_sync_tasks);
         txg_list_destroy(&dp->dp_dirty_dirs);
  
+       taskq_destroy(dp->dp_zil_clean_taskq);
         taskq_destroy(dp->dp_sync_taskq);
  
         /*
@@ -1142,5 +1179,18 @@ MODULE_PARM_DESC(zfs_delay_scale, "how quickly delay approaches infinity");
  module_param(zfs_sync_taskq_batch_pct, int, 0644);
  MODULE_PARM_DESC(zfs_sync_taskq_batch_pct,
         "max percent of CPUs that are used to sync dirty data");
+
+module_param(zfs_zil_clean_taskq_nthr_pct, int, 0644);
+MODULE_PARM_DESC(zfs_zil_clean_taskq_nthr_pct,
+       "max percent of CPUs that are used per dp_sync_taskq");
+
+module_param(zfs_zil_clean_taskq_minalloc, int, 0644);
+MODULE_PARM_DESC(zfs_zil_clean_taskq_minalloc,
+       "number of taskq entries that are pre-populated");
+
+module_param(zfs_zil_clean_taskq_maxalloc, int, 0644);
+MODULE_PARM_DESC(zfs_zil_clean_taskq_maxalloc,
+       "max number of taskq entries that are cached");
+
  /* END CSTYLED */
  #endif
diff --git a/zfs/module/zfs/metaslab.c b/zfs/module/zfs/metaslab.c

index 5e413c06518b0dda6b9899822432d655850cd41e..01e5234c7cf526be7c685a45e875da6d00db3518 100644 (file)
--- a/zfs/module/zfs/metaslab.c
+++ b/zfs/module/zfs/metaslab.c
@@ -1937,7 +1937,8 @@ metaslab_passivate(metaslab_t *msp, uint64_t weight)
          * this metaslab again.  In that case, it had better be empty,
          * or we would be leaving space on the table.
          */
-       ASSERT(size >= SPA_MINBLOCKSIZE ||
+       ASSERT(!WEIGHT_IS_SPACEBASED(msp->ms_weight) ||
+           size >= SPA_MINBLOCKSIZE ||
             range_tree_space(msp->ms_tree) == 0);
         ASSERT0(weight & METASLAB_ACTIVE_MASK);
  
diff --git a/zfs/module/zfs/mmp.c b/zfs/module/zfs/mmp.c

index 6f2aa3f5931533d87e6780cd24b9973fd2138aaf..e91ae628ab2e3095cd6302d8bb357668814e7956 100644 (file)
--- a/zfs/module/zfs/mmp.c
+++ b/zfs/module/zfs/mmp.c
@@ -26,6 +26,7 @@
  #include <sys/mmp.h>
  #include <sys/spa.h>
  #include <sys/spa_impl.h>
+#include <sys/time.h>
  #include <sys/vdev.h>
  #include <sys/vdev_impl.h>
  #include <sys/zfs_context.h>
@@ -428,6 +429,10 @@ mmp_thread(spa_t *spa)
                  */
                 if (!suspended && mmp_fail_intervals && multihost &&
                     (start - mmp->mmp_last_write) > max_fail_ns) {
+                       cmn_err(CE_WARN, "MMP writes to pool '%s' have not "
+                           "succeeded in over %llus; suspending pool",
+                           spa_name(spa),
+                           NSEC2SEC(start - mmp->mmp_last_write));
                         zio_suspend(spa, NULL);
                 }
  
diff --git a/zfs/module/zfs/spa.c b/zfs/module/zfs/spa.c

index a7a2f628174bf78543c115d8628f4273d28c418b..00587d8e88d1cee7b94220006bfafe7061d5de7c 100644 (file)
--- a/zfs/module/zfs/spa.c
+++ b/zfs/module/zfs/spa.c
@@ -1561,7 +1561,7 @@ spa_load_spares(spa_t *spa)
  static void
  spa_load_l2cache(spa_t *spa)
  {
-       nvlist_t **l2cache;
+       nvlist_t **l2cache = NULL;
         uint_t nl2cache;
         int i, j, oldnvdevs;
         uint64_t guid;
@@ -1645,7 +1645,9 @@ spa_load_l2cache(spa_t *spa)
         VERIFY(nvlist_remove(sav->sav_config, ZPOOL_CONFIG_L2CACHE,
             DATA_TYPE_NVLIST_ARRAY) == 0);
  
-       l2cache = kmem_alloc(sav->sav_count * sizeof (void *), KM_SLEEP);
+       if (sav->sav_count > 0)
+               l2cache = kmem_alloc(sav->sav_count * sizeof (void *),
+                   KM_SLEEP);
         for (i = 0; i < sav->sav_count; i++)
                 l2cache[i] = vdev_config_generate(spa,
                     sav->sav_vdevs[i], B_TRUE, VDEV_CONFIG_L2CACHE);
diff --git a/zfs/module/zfs/spa_config.c b/zfs/module/zfs/spa_config.c

index 5b792b868455b7eda50d99b912c57b4a7f5e7307..5bbfb4ad536268015ab8a3c749d548716cbeb5bb 100644 (file)
--- a/zfs/module/zfs/spa_config.c
+++ b/zfs/module/zfs/spa_config.c
@@ -162,6 +162,11 @@ spa_config_write(spa_config_dirent_t *dp, nvlist_t *nvl)
          */
         if (nvl == NULL) {
                 err = vn_remove(dp->scd_path, UIO_SYSSPACE, RMFILE);
+               /*
+                * Don't report an error when the cache file is already removed
+                */
+               if (err == ENOENT)
+                       err = 0;
                 return (err);
         }
  
diff --git a/zfs/module/zfs/spa_stats.c b/zfs/module/zfs/spa_stats.c

index 7ca359806174434e7ad99088fbc6714c1ce22745..8c4dba29e26d23607477546d5f4e921c17471622 100644 (file)
--- a/zfs/module/zfs/spa_stats.c
+++ b/zfs/module/zfs/spa_stats.c
@@ -142,7 +142,7 @@ static void
  spa_read_history_init(spa_t *spa)
  {
         spa_stats_history_t *ssh = &spa->spa_stats.read_history;
-       char name[KSTAT_STRLEN];
+       char *name;
         kstat_t *ksp;
  
         mutex_init(&ssh->lock, NULL, MUTEX_DEFAULT, NULL);
@@ -153,7 +153,7 @@ spa_read_history_init(spa_t *spa)
         ssh->size = 0;
         ssh->private = NULL;
  
-       (void) snprintf(name, KSTAT_STRLEN, "zfs/%s", spa_name(spa));
+       name = kmem_asprintf("zfs/%s", spa_name(spa));
  
         ksp = kstat_create(name, 0, "reads", "misc",
             KSTAT_TYPE_RAW, 0, KSTAT_FLAG_VIRTUAL);
@@ -168,6 +168,7 @@ spa_read_history_init(spa_t *spa)
                     spa_read_history_data, spa_read_history_addr);
                 kstat_install(ksp);
         }
+       strfree(name);
  }
  
  static void
@@ -365,7 +366,7 @@ static void
  spa_txg_history_init(spa_t *spa)
  {
         spa_stats_history_t *ssh = &spa->spa_stats.txg_history;
-       char name[KSTAT_STRLEN];
+       char *name;
         kstat_t *ksp;
  
         mutex_init(&ssh->lock, NULL, MUTEX_DEFAULT, NULL);
@@ -376,7 +377,7 @@ spa_txg_history_init(spa_t *spa)
         ssh->size = 0;
         ssh->private = NULL;
  
-       (void) snprintf(name, KSTAT_STRLEN, "zfs/%s", spa_name(spa));
+       name = kmem_asprintf("zfs/%s", spa_name(spa));
  
         ksp = kstat_create(name, 0, "txgs", "misc",
             KSTAT_TYPE_RAW, 0, KSTAT_FLAG_VIRTUAL);
@@ -391,6 +392,7 @@ spa_txg_history_init(spa_t *spa)
                     spa_txg_history_data, spa_txg_history_addr);
                 kstat_install(ksp);
         }
+       strfree(name);
  }
  
  static void
@@ -598,7 +600,7 @@ static void
  spa_tx_assign_init(spa_t *spa)
  {
         spa_stats_history_t *ssh = &spa->spa_stats.tx_assign_histogram;
-       char name[KSTAT_STRLEN];
+       char *name;
         kstat_named_t *ks;
         kstat_t *ksp;
         int i;
@@ -609,7 +611,7 @@ spa_tx_assign_init(spa_t *spa)
         ssh->size = ssh->count * sizeof (kstat_named_t);
         ssh->private = kmem_alloc(ssh->size, KM_SLEEP);
  
-       (void) snprintf(name, KSTAT_STRLEN, "zfs/%s", spa_name(spa));
+       name = kmem_asprintf("zfs/%s", spa_name(spa));
  
         for (i = 0; i < ssh->count; i++) {
                 ks = &((kstat_named_t *)ssh->private)[i];
@@ -632,6 +634,7 @@ spa_tx_assign_init(spa_t *spa)
                 ksp->ks_update = spa_tx_assign_update;
                 kstat_install(ksp);
         }
+       strfree(name);
  }
  
  static void
@@ -678,12 +681,12 @@ static void
  spa_io_history_init(spa_t *spa)
  {
         spa_stats_history_t *ssh = &spa->spa_stats.io_history;
-       char name[KSTAT_STRLEN];
+       char *name;
         kstat_t *ksp;
  
         mutex_init(&ssh->lock, NULL, MUTEX_DEFAULT, NULL);
  
-       (void) snprintf(name, KSTAT_STRLEN, "zfs/%s", spa_name(spa));
+       name = kmem_asprintf("zfs/%s", spa_name(spa));
  
         ksp = kstat_create(name, 0, "io", "disk", KSTAT_TYPE_IO, 1, 0);
         ssh->kstat = ksp;
@@ -694,6 +697,7 @@ spa_io_history_init(spa_t *spa)
                 ksp->ks_update = spa_io_history_update;
                 kstat_install(ksp);
         }
+       strfree(name);
  }
  
  static void
@@ -806,7 +810,7 @@ static void
  spa_mmp_history_init(spa_t *spa)
  {
         spa_stats_history_t *ssh = &spa->spa_stats.mmp_history;
-       char name[KSTAT_STRLEN];
+       char *name;
         kstat_t *ksp;
  
         mutex_init(&ssh->lock, NULL, MUTEX_DEFAULT, NULL);
@@ -817,7 +821,7 @@ spa_mmp_history_init(spa_t *spa)
         ssh->size = 0;
         ssh->private = NULL;
  
-       (void) snprintf(name, KSTAT_STRLEN, "zfs/%s", spa_name(spa));
+       name = kmem_asprintf("zfs/%s", spa_name(spa));
  
         ksp = kstat_create(name, 0, "multihost", "misc",
             KSTAT_TYPE_RAW, 0, KSTAT_FLAG_VIRTUAL);
@@ -832,6 +836,7 @@ spa_mmp_history_init(spa_t *spa)
                     spa_mmp_history_data, spa_mmp_history_addr);
                 kstat_install(ksp);
         }
+       strfree(name);
  }
  
  static void
diff --git a/zfs/module/zfs/vdev_disk.c b/zfs/module/zfs/vdev_disk.c

index 5ae50a315342d2eb1146bc6ac0a74adec141dccd..aecc3515deab70203f7e5fbd66ec3cc614eee2ab 100644 (file)
--- a/zfs/module/zfs/vdev_disk.c
+++ b/zfs/module/zfs/vdev_disk.c
@@ -98,7 +98,7 @@ static void
  vdev_disk_error(zio_t *zio)
  {
  #ifdef ZFS_DEBUG
-       printk("ZFS: zio error=%d type=%d offset=%llu size=%llu "
+       printk(KERN_WARNING "ZFS: zio error=%d type=%d offset=%llu size=%llu "
             "flags=%x\n", zio->io_error, zio->io_type,
             (u_longlong_t)zio->io_offset, (u_longlong_t)zio->io_size,
             zio->io_flags);
diff --git a/zfs/module/zfs/vdev_mirror.c b/zfs/module/zfs/vdev_mirror.c

index 0439e4b46f513363fb09829fa113b078cbdc3b10..d230b4db40ff5375f6bbd00d4f4c0a12ec6b88ff 100644 (file)
--- a/zfs/module/zfs/vdev_mirror.c
+++ b/zfs/module/zfs/vdev_mirror.c
@@ -116,7 +116,8 @@ static const zio_vsd_ops_t vdev_mirror_vsd_ops = {
  static int
  vdev_mirror_load(mirror_map_t *mm, vdev_t *vd, uint64_t zio_offset)
  {
-       uint64_t lastoffset;
+       uint64_t last_offset;
+       int64_t offset_diff;
         int load;
  
         /* All DVAs have equal weight at the root. */
@@ -129,13 +130,17 @@ vdev_mirror_load(mirror_map_t *mm, vdev_t *vd, uint64_t zio_offset)
          * worse overall when resilvering with compared to without.
          */
  
+       /* Fix zio_offset for leaf vdevs */
+       if (vd->vdev_ops->vdev_op_leaf)
+               zio_offset += VDEV_LABEL_START_SIZE;
+
         /* Standard load based on pending queue length. */
         load = vdev_queue_length(vd);
-       lastoffset = vdev_queue_lastoffset(vd);
+       last_offset = vdev_queue_last_offset(vd);
  
         if (vd->vdev_nonrot) {
                 /* Non-rotating media. */
-               if (lastoffset == zio_offset)
+               if (last_offset == zio_offset)
                         return (load + zfs_vdev_mirror_non_rotating_inc);
  
                 /*
@@ -148,16 +153,16 @@ vdev_mirror_load(mirror_map_t *mm, vdev_t *vd, uint64_t zio_offset)
         }
  
         /* Rotating media I/O's which directly follow the last I/O. */
-       if (lastoffset == zio_offset)
+       if (last_offset == zio_offset)
                 return (load + zfs_vdev_mirror_rotating_inc);
  
         /*
          * Apply half the seek increment to I/O's within seek offset
-        * of the last I/O queued to this vdev as they should incur less
+        * of the last I/O issued to this vdev as they should incur less
          * of a seek increment.
          */
-       if (ABS(lastoffset - zio_offset) <
-           zfs_vdev_mirror_rotating_seek_offset)
+       offset_diff = (int64_t)(last_offset - zio_offset);
+       if (ABS(offset_diff) < zfs_vdev_mirror_rotating_seek_offset)
                 return (load + (zfs_vdev_mirror_rotating_seek_inc / 2));
  
         /* Apply the full seek increment to all other I/O's. */
@@ -382,29 +387,20 @@ vdev_mirror_child_select(zio_t *zio)
                 mm->mm_preferred_cnt++;
         }
  
-       if (mm->mm_preferred_cnt == 1) {
-               vdev_queue_register_lastoffset(
-                   mm->mm_child[mm->mm_preferred[0]].mc_vd, zio);
+       if (mm->mm_preferred_cnt == 1)
                 return (mm->mm_preferred[0]);
-       }
  
-       if (mm->mm_preferred_cnt > 1) {
-               int c = vdev_mirror_preferred_child_randomize(zio);
  
-               vdev_queue_register_lastoffset(mm->mm_child[c].mc_vd, zio);
-               return (c);
-       }
+       if (mm->mm_preferred_cnt > 1)
+               return (vdev_mirror_preferred_child_randomize(zio));
  
         /*
          * Every device is either missing or has this txg in its DTL.
          * Look for any child we haven't already tried before giving up.
          */
         for (c = 0; c < mm->mm_children; c++) {
-               if (!mm->mm_child[c].mc_tried) {
-                       vdev_queue_register_lastoffset(mm->mm_child[c].mc_vd,
-                           zio);
+               if (!mm->mm_child[c].mc_tried)
                         return (c);
-               }
         }
  
         /*
diff --git a/zfs/module/zfs/vdev_queue.c b/zfs/module/zfs/vdev_queue.c

index 6b3e8729159009749b49c702726a3bc4a0c7d10c..40cba340aafd682d4f066fa7240d36869c3aff88 100644 (file)
--- a/zfs/module/zfs/vdev_queue.c
+++ b/zfs/module/zfs/vdev_queue.c
@@ -393,7 +393,7 @@ vdev_queue_init(vdev_t *vd)
                     sizeof (zio_t), offsetof(struct zio, io_queue_node));
         }
  
-       vq->vq_lastoffset = 0;
+       vq->vq_last_offset = 0;
  }
  
  void
@@ -699,9 +699,8 @@ again:
          */
         tree = vdev_queue_class_tree(vq, p);
         vq->vq_io_search.io_timestamp = 0;
-       vq->vq_io_search.io_offset = vq->vq_last_offset + 1;
-       VERIFY3P(avl_find(tree, &vq->vq_io_search,
-           &idx), ==, NULL);
+       vq->vq_io_search.io_offset = vq->vq_last_offset - 1;
+       VERIFY3P(avl_find(tree, &vq->vq_io_search, &idx), ==, NULL);
         zio = avl_nearest(tree, idx, AVL_AFTER);
         if (zio == NULL)
                 zio = avl_first(tree);
@@ -728,7 +727,7 @@ again:
         }
  
         vdev_queue_pending_add(vq, zio);
-       vq->vq_last_offset = zio->io_offset;
+       vq->vq_last_offset = zio->io_offset + zio->io_size;
  
         return (zio);
  }
@@ -806,7 +805,7 @@ vdev_queue_io_done(zio_t *zio)
  }
  
  /*
- * As these three methods are only used for load calculations we're not
+ * As these two methods are only used for load calculations we're not
   * concerned if we get an incorrect value on 32bit platforms due to lack of
   * vq_lock mutex use here, instead we prefer to keep it lock free for
   * performance.
@@ -818,15 +817,9 @@ vdev_queue_length(vdev_t *vd)
  }
  
  uint64_t
-vdev_queue_lastoffset(vdev_t *vd)
+vdev_queue_last_offset(vdev_t *vd)
  {
-       return (vd->vdev_queue.vq_lastoffset);
-}
-
-void
-vdev_queue_register_lastoffset(vdev_t *vd, zio_t *zio)
-{
-       vd->vdev_queue.vq_lastoffset = zio->io_offset + zio->io_size;
+       return (vd->vdev_queue.vq_last_offset);
  }
  
  #if defined(_KERNEL) && defined(HAVE_SPL)
diff --git a/zfs/module/zfs/zfs_acl.c b/zfs/module/zfs/zfs_acl.c

index 7ddedeaafb03086227e8d0bbdded5f5b3d2086f9..1fcfca0c726852e2dfcb64d42ee4bd87063c8d2a 100644 (file)
--- a/zfs/module/zfs/zfs_acl.c
+++ b/zfs/module/zfs/zfs_acl.c
@@ -1323,6 +1323,7 @@ zfs_aclset_common(znode_t *zp, zfs_acl_t *aclp, cred_t *cr, dmu_tx_t *tx)
         sa_bulk_attr_t          bulk[5];
         uint64_t                ctime[2];
         int                     count = 0;
+       zfs_acl_phys_t          acl_phys;
  
         mode = zp->z_mode;
  
@@ -1369,7 +1370,6 @@ zfs_aclset_common(znode_t *zp, zfs_acl_t *aclp, cred_t *cr, dmu_tx_t *tx)
         } else { /* Painful legacy way */
                 zfs_acl_node_t *aclnode;
                 uint64_t off = 0;
-               zfs_acl_phys_t acl_phys;
                 uint64_t aoid;
  
                 if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_ZNODE_ACL(zfsvfs),
diff --git a/zfs/module/zfs/zfs_dir.c b/zfs/module/zfs/zfs_dir.c

index c6ee30291f7df550e340a8060dfbb74f45bcd373..9a8bbccd92d7a80b20b189fc9bf8bfad3633733f 100644 (file)
--- a/zfs/module/zfs/zfs_dir.c
+++ b/zfs/module/zfs/zfs_dir.c
@@ -977,11 +977,25 @@ zfs_link_destroy(zfs_dirlock_t *dl, znode_t *zp, dmu_tx_t *tx, int flag,
   * Indicate whether the directory is empty.  Works with or without z_lock
   * held, but can only be consider a hint in the latter case.  Returns true
   * if only "." and ".." remain and there's no work in progress.
+ *
+ * The internal ZAP size, rather than zp->z_size, needs to be checked since
+ * some consumers (Lustre) do not strictly maintain an accurate SA_ZPL_SIZE.
   */
  boolean_t
  zfs_dirempty(znode_t *dzp)
  {
-       return (dzp->z_size == 2 && dzp->z_dirlocks == 0);
+       zfsvfs_t *zfsvfs = ZTOZSB(dzp);
+       uint64_t count;
+       int error;
+
+       if (dzp->z_dirlocks != NULL)
+               return (B_FALSE);
+
+       error = zap_count(zfsvfs->z_os, dzp->z_id, &count);
+       if (error != 0 || count != 0)
+               return (B_FALSE);
+
+       return (B_TRUE);
  }
  
  int
diff --git a/zfs/module/zfs/zfs_fm.c b/zfs/module/zfs/zfs_fm.c

index 3986b3959dea9221eb7680be04b6cdf255d6fa3b..1c66ed6e0240e51fdc66a9b787a8498827a48306 100644 (file)
--- a/zfs/module/zfs/zfs_fm.c
+++ b/zfs/module/zfs/zfs_fm.c
@@ -455,8 +455,8 @@ zfs_ereport_start(nvlist_t **ereport_out, nvlist_t **detector_out,
  
  typedef struct zfs_ecksum_info {
         /* histograms of set and cleared bits by bit number in a 64-bit word */
-       uint16_t zei_histogram_set[sizeof (uint64_t) * NBBY];
-       uint16_t zei_histogram_cleared[sizeof (uint64_t) * NBBY];
+       uint32_t zei_histogram_set[sizeof (uint64_t) * NBBY];
+       uint32_t zei_histogram_cleared[sizeof (uint64_t) * NBBY];
  
         /* inline arrays of bits set and cleared. */
         uint64_t zei_bits_set[ZFM_MAX_INLINE];
@@ -481,7 +481,7 @@ typedef struct zfs_ecksum_info {
  } zfs_ecksum_info_t;
  
  static void
-update_histogram(uint64_t value_arg, uint16_t *hist, uint32_t *count)
+update_histogram(uint64_t value_arg, uint32_t *hist, uint32_t *count)
  {
         size_t i;
         size_t bits = 0;
@@ -490,8 +490,7 @@ update_histogram(uint64_t value_arg, uint16_t *hist, uint32_t *count)
         /* We store the bits in big-endian (largest-first) order */
         for (i = 0; i < 64; i++) {
                 if (value & (1ull << i)) {
-                       if (hist[63 - i] < UINT16_MAX)
-                               hist[63 - i]++;
+                       hist[63 - i]++;
                         ++bits;
                 }
         }
@@ -649,6 +648,7 @@ annotate_ecksum(nvlist_t *ereport, zio_bad_cksum_t *info,
         if (badabd == NULL || goodabd == NULL)
                 return (eip);
  
+       ASSERT3U(nui64s, <=, UINT32_MAX);
         ASSERT3U(size, ==, nui64s * sizeof (uint64_t));
         ASSERT3U(size, <=, SPA_MAXBLOCKSIZE);
         ASSERT3U(size, <=, UINT32_MAX);
@@ -759,10 +759,10 @@ annotate_ecksum(nvlist_t *ereport, zio_bad_cksum_t *info,
         } else {
                 fm_payload_set(ereport,
                     FM_EREPORT_PAYLOAD_ZFS_BAD_SET_HISTOGRAM,
-                   DATA_TYPE_UINT16_ARRAY,
+                   DATA_TYPE_UINT32_ARRAY,
                     NBBY * sizeof (uint64_t), eip->zei_histogram_set,
                     FM_EREPORT_PAYLOAD_ZFS_BAD_CLEARED_HISTOGRAM,
-                   DATA_TYPE_UINT16_ARRAY,
+                   DATA_TYPE_UINT32_ARRAY,
                     NBBY * sizeof (uint64_t), eip->zei_histogram_cleared,
                     NULL);
         }
diff --git a/zfs/module/zfs/zfs_ioctl.c b/zfs/module/zfs/zfs_ioctl.c

index d195eded76dc818f2f5da68677ce53915449ba41..f41e1b9bd2f433fb24f3160931f622b31b1ce63d 100644 (file)
--- a/zfs/module/zfs/zfs_ioctl.c
+++ b/zfs/module/zfs/zfs_ioctl.c
@@ -3738,9 +3738,12 @@ zfs_ioc_rename(zfs_cmd_t *zc)
         boolean_t recursive = zc->zc_cookie & 1;
         char *at;
  
+       /* "zfs rename" from and to ...%recv datasets should both fail */
+       zc->zc_name[sizeof (zc->zc_name) - 1] = '\0';
         zc->zc_value[sizeof (zc->zc_value) - 1] = '\0';
-       if (dataset_namecheck(zc->zc_value, NULL, NULL) != 0 ||
-           strchr(zc->zc_value, '%'))
+       if (dataset_namecheck(zc->zc_name, NULL, NULL) != 0 ||
+           dataset_namecheck(zc->zc_value, NULL, NULL) != 0 ||
+           strchr(zc->zc_name, '%') || strchr(zc->zc_value, '%'))
                 return (SET_ERROR(EINVAL));
  
         at = strchr(zc->zc_name, '@');
@@ -5002,6 +5005,11 @@ zfs_ioc_promote(zfs_cmd_t *zc)
         char *cp;
         int error;
  
+       zc->zc_name[sizeof (zc->zc_name) - 1] = '\0';
+       if (dataset_namecheck(zc->zc_name, NULL, NULL) != 0 ||
+           strchr(zc->zc_name, '%'))
+               return (SET_ERROR(EINVAL));
+
         error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
         if (error != 0)
                 return (error);
@@ -5901,20 +5909,26 @@ static int
  zfs_ioc_pool_sync(const char *pool, nvlist_t *innvl, nvlist_t *onvl)
  {
         int err;
-       boolean_t force;
+       boolean_t force = B_FALSE;
         spa_t *spa;
  
         if ((err = spa_open(pool, &spa, FTAG)) != 0)
                 return (err);
  
-       force = fnvlist_lookup_boolean_value(innvl, "force");
+       if (innvl) {
+               if (nvlist_lookup_boolean_value(innvl, "force", &force) != 0) {
+                       err = SET_ERROR(EINVAL);
+                       goto out;
+               }
+       }
+
         if (force) {
                 spa_config_enter(spa, SCL_CONFIG, FTAG, RW_WRITER);
                 vdev_config_dirty(spa->spa_root_vdev);
                 spa_config_exit(spa, SCL_CONFIG, FTAG);
         }
         txg_wait_synced(spa_get_dsl(spa), 0);
-
+out:
         spa_close(spa, FTAG);
  
         return (err);
diff --git a/zfs/module/zfs/zil.c b/zfs/module/zfs/zil.c

index 4d714cefc758ed75b31c579756f593a37e5907c1..1e3e69d6b28964216d16e1d672407dc6da28e3bd 100644 (file)
--- a/zfs/module/zfs/zil.c
+++ b/zfs/module/zfs/zil.c
@@ -1009,7 +1009,24 @@ zil_lwb_write_start(zilog_t *zilog, lwb_t *lwb)
          * to clean up in the event of allocation failure or I/O failure.
          */
         tx = dmu_tx_create(zilog->zl_os);
-       VERIFY(dmu_tx_assign(tx, TXG_WAIT) == 0);
+
+       /*
+        * Since we are not going to create any new dirty data and we can even
+        * help with clearing the existing dirty data, we should not be subject
+        * to the dirty data based delays.
+        * We (ab)use TXG_WAITED to bypass the delay mechanism.
+        * One side effect from using TXG_WAITED is that dmu_tx_assign() can
+        * fail if the pool is suspended.  Those are dramatic circumstances,
+        * so we return NULL to signal that the normal ZIL processing is not
+        * possible and txg_wait_synced() should be used to ensure that the data
+        * is on disk.
+        */
+       error = dmu_tx_assign(tx, TXG_WAITED);
+       if (error != 0) {
+               ASSERT3S(error, ==, EIO);
+               dmu_tx_abort(tx);
+               return (NULL);
+       }
         dsl_dataset_dirty(dmu_objset_ds(zilog->zl_os), tx);
         txg = dmu_tx_get_txg(tx);
  
@@ -1435,8 +1452,7 @@ zil_clean(zilog_t *zilog, uint64_t synced_txg)
                 return;
         }
         ASSERT3U(itxg->itxg_txg, <=, synced_txg);
-       ASSERT(itxg->itxg_txg != 0);
-       ASSERT(zilog->zl_clean_taskq != NULL);
+       ASSERT3U(itxg->itxg_txg, !=, 0);
         clean_me = itxg->itxg_itxs;
         itxg->itxg_itxs = NULL;
         itxg->itxg_txg = 0;
@@ -1447,8 +1463,11 @@ zil_clean(zilog_t *zilog, uint64_t synced_txg)
          * free it in-line. This should be rare. Note, using TQ_SLEEP
          * created a bad performance problem.
          */
-       if (taskq_dispatch(zilog->zl_clean_taskq,
-           (void (*)(void *))zil_itxg_clean, clean_me, TQ_NOSLEEP) == 0)
+       ASSERT3P(zilog->zl_dmu_pool, !=, NULL);
+       ASSERT3P(zilog->zl_dmu_pool->dp_zil_clean_taskq, !=, NULL);
+       taskqid_t id = taskq_dispatch(zilog->zl_dmu_pool->dp_zil_clean_taskq,
+           (void (*)(void *))zil_itxg_clean, clean_me, TQ_NOSLEEP);
+       if (id == TASKQID_INVALID)
                 zil_itxg_clean(clean_me);
  }
  
@@ -1921,13 +1940,10 @@ zil_open(objset_t *os, zil_get_data_t *get_data)
  {
         zilog_t *zilog = dmu_objset_zil(os);
  
-       ASSERT(zilog->zl_clean_taskq == NULL);
         ASSERT(zilog->zl_get_data == NULL);
         ASSERT(list_is_empty(&zilog->zl_lwb_list));
  
         zilog->zl_get_data = get_data;
-       zilog->zl_clean_taskq = taskq_create("zil_clean", 1, defclsyspri,
-           2, 2, TASKQ_PREPOPULATE);
  
         return (zilog);
  }
@@ -1962,8 +1978,6 @@ zil_close(zilog_t *zilog)
         if (txg < spa_freeze_txg(zilog->zl_spa))
                 VERIFY(!zilog_is_dirty(zilog));
  
-       taskq_destroy(zilog->zl_clean_taskq);
-       zilog->zl_clean_taskq = NULL;
         zilog->zl_get_data = NULL;
  
         /*
diff --git a/zfs/module/zfs/zle.c b/zfs/module/zfs/zle.c

index 13c5673fbe2671ab597600e0828a5356aba3a656..613607faaa97e99e187cb9f0f5bb04a320f769b4 100644 (file)
--- a/zfs/module/zfs/zle.c
+++ b/zfs/module/zfs/zle.c
@@ -74,10 +74,14 @@ zle_decompress(void *s_start, void *d_start, size_t s_len, size_t d_len, int n)
         while (src < s_end && dst < d_end) {
                 int len = 1 + *src++;
                 if (len <= n) {
+                       if (src + len > s_end || dst + len > d_end)
+                               return (-1);
                         while (len-- != 0)
                                 *dst++ = *src++;
                 } else {
                         len -= n;
+                       if (dst + len > d_end)
+                               return (-1);
                         while (len-- != 0)
                                 *dst++ = 0;
                 }
diff --git a/zfs/module/zfs/zvol.c b/zfs/module/zfs/zvol.c

index 5293f95fb02014341a6c59122b89d07948a9c46a..5b62bf94f82f7ea8098a1ba1308366d320f5d3c4 100644 (file)
--- a/zfs/module/zfs/zvol.c
+++ b/zfs/module/zfs/zvol.c
@@ -1347,9 +1347,9 @@ zvol_open(struct block_device *bdev, fmode_t flag)
  {
         zvol_state_t *zv;
         int error = 0;
-       boolean_t drop_suspend = B_FALSE;
+       boolean_t drop_suspend = B_TRUE;
  
-       ASSERT(!mutex_owned(&zvol_state_lock));
+       ASSERT(!MUTEX_HELD(&zvol_state_lock));
  
         mutex_enter(&zvol_state_lock);
         /*
@@ -1364,23 +1364,31 @@ zvol_open(struct block_device *bdev, fmode_t flag)
                 return (SET_ERROR(-ENXIO));
         }
  
-       /* take zv_suspend_lock before zv_state_lock */
-       rw_enter(&zv->zv_suspend_lock, RW_READER);
-
         mutex_enter(&zv->zv_state_lock);
-
         /*
          * make sure zvol is not suspended during first open
-        * (hold zv_suspend_lock), otherwise, drop the lock
+        * (hold zv_suspend_lock) and respect proper lock acquisition
+        * ordering - zv_suspend_lock before zv_state_lock
          */
         if (zv->zv_open_count == 0) {
-               drop_suspend = B_TRUE;
+               if (!rw_tryenter(&zv->zv_suspend_lock, RW_READER)) {
+                       mutex_exit(&zv->zv_state_lock);
+                       rw_enter(&zv->zv_suspend_lock, RW_READER);
+                       mutex_enter(&zv->zv_state_lock);
+                       /* check to see if zv_suspend_lock is needed */
+                       if (zv->zv_open_count != 0) {
+                               rw_exit(&zv->zv_suspend_lock);
+                               drop_suspend = B_FALSE;
+                       }
+               }
         } else {
-               rw_exit(&zv->zv_suspend_lock);
+               drop_suspend = B_FALSE;
         }
-
         mutex_exit(&zvol_state_lock);
  
+       ASSERT(MUTEX_HELD(&zv->zv_state_lock));
+       ASSERT(zv->zv_open_count != 0 || RW_READ_HELD(&zv->zv_suspend_lock));
+
         if (zv->zv_open_count == 0) {
                 error = zvol_first_open(zv);
                 if (error)
@@ -1417,28 +1425,38 @@ static int
  zvol_release(struct gendisk *disk, fmode_t mode)
  {
         zvol_state_t *zv;
-       boolean_t drop_suspend = B_FALSE;
+       boolean_t drop_suspend = B_TRUE;
  
-       ASSERT(!mutex_owned(&zvol_state_lock));
+       ASSERT(!MUTEX_HELD(&zvol_state_lock));
  
         mutex_enter(&zvol_state_lock);
         zv = disk->private_data;
-       ASSERT(zv && zv->zv_open_count > 0);
-
-       /* take zv_suspend_lock before zv_state_lock */
-       rw_enter(&zv->zv_suspend_lock, RW_READER);
  
         mutex_enter(&zv->zv_state_lock);
-       mutex_exit(&zvol_state_lock);
-
+       ASSERT(zv->zv_open_count > 0);
         /*
          * make sure zvol is not suspended during last close
-        * (hold zv_suspend_lock), otherwise, drop the lock
+        * (hold zv_suspend_lock) and respect proper lock acquisition
+        * ordering - zv_suspend_lock before zv_state_lock
          */
-       if (zv->zv_open_count == 1)
-               drop_suspend = B_TRUE;
-       else
-               rw_exit(&zv->zv_suspend_lock);
+       if (zv->zv_open_count == 1) {
+               if (!rw_tryenter(&zv->zv_suspend_lock, RW_READER)) {
+                       mutex_exit(&zv->zv_state_lock);
+                       rw_enter(&zv->zv_suspend_lock, RW_READER);
+                       mutex_enter(&zv->zv_state_lock);
+                       /* check to see if zv_suspend_lock is needed */
+                       if (zv->zv_open_count != 1) {
+                               rw_exit(&zv->zv_suspend_lock);
+                               drop_suspend = B_FALSE;
+                       }
+               }
+       } else {
+               drop_suspend = B_FALSE;
+       }
+       mutex_exit(&zvol_state_lock);
+
+       ASSERT(MUTEX_HELD(&zv->zv_state_lock));
+       ASSERT(zv->zv_open_count != 1 || RW_READ_HELD(&zv->zv_suspend_lock));
  
         zv->zv_open_count--;
         if (zv->zv_open_count == 0)
author	Colin Ian King <colin.king@canonical.com>
	Tue, 17 Apr 2018 11:04:00 +0000 (13:04 +0200)
committer	Stefan Bader <stefan.bader@canonical.com>
	Tue, 22 May 2018 14:17:58 +0000 (16:17 +0200)
zfs/META		patch \| blob \| blame \| history
zfs/include/sys/dmu.h		patch \| blob \| blame \| history
zfs/include/sys/dmu_tx.h		patch \| blob \| blame \| history
zfs/include/sys/dsl_pool.h		patch \| blob \| blame \| history
zfs/include/sys/trace_dmu.h		patch \| blob \| blame \| history
zfs/include/sys/vdev.h		patch \| blob \| blame \| history
zfs/include/sys/vdev_impl.h		patch \| blob \| blame \| history
zfs/include/sys/vdev_raidz_impl.h		patch \| blob \| blame \| history
zfs/include/sys/zil_impl.h		patch \| blob \| blame \| history
zfs/module/icp/asm-x86_64/aes/aes_intel.S		patch \| blob \| blame \| history
zfs/module/icp/asm-x86_64/modes/gcm_intel.S		patch \| blob \| blame \| history
zfs/module/icp/spi/kcf_spi.c		patch \| blob \| blame \| history
zfs/module/nvpair/nvpair.c		patch \| blob \| blame \| history
zfs/module/zfs/abd.c		patch \| blob \| blame \| history
zfs/module/zfs/bpobj.c		patch \| blob \| blame \| history
zfs/module/zfs/dmu.c		patch \| blob \| blame \| history
zfs/module/zfs/dmu_objset.c		patch \| blob \| blame \| history
zfs/module/zfs/dmu_send.c		patch \| blob \| blame \| history
zfs/module/zfs/dmu_traverse.c		patch \| blob \| blame \| history
zfs/module/zfs/dmu_tx.c		patch \| blob \| blame \| history
zfs/module/zfs/dmu_zfetch.c		patch \| blob \| blame \| history
zfs/module/zfs/dsl_pool.c		patch \| blob \| blame \| history
zfs/module/zfs/metaslab.c		patch \| blob \| blame \| history
zfs/module/zfs/mmp.c		patch \| blob \| blame \| history
zfs/module/zfs/spa.c		patch \| blob \| blame \| history
zfs/module/zfs/spa_config.c		patch \| blob \| blame \| history
zfs/module/zfs/spa_stats.c		patch \| blob \| blame \| history
zfs/module/zfs/vdev_disk.c		patch \| blob \| blame \| history
zfs/module/zfs/vdev_mirror.c		patch \| blob \| blame \| history
zfs/module/zfs/vdev_queue.c		patch \| blob \| blame \| history
zfs/module/zfs/zfs_acl.c		patch \| blob \| blame \| history
zfs/module/zfs/zfs_dir.c		patch \| blob \| blame \| history
zfs/module/zfs/zfs_fm.c		patch \| blob \| blame \| history
zfs/module/zfs/zfs_ioctl.c		patch \| blob \| blame \| history
zfs/module/zfs/zil.c		patch \| blob \| blame \| history
zfs/module/zfs/zle.c		patch \| blob \| blame \| history
zfs/module/zfs/zvol.c		patch \| blob \| blame \| history