]> git.proxmox.com Git - mirror_zfs.git/commitdiff
Illumos #3112, #3113, #3114
authorMatthew Ahrens <mahrens@delphix.com>
Thu, 16 May 2013 21:18:06 +0000 (14:18 -0700)
committerBrian Behlendorf <behlendorf1@llnl.gov>
Tue, 5 Nov 2013 20:14:48 +0000 (12:14 -0800)
3112 ztest does not honor ZFS_DEBUG
3113 ztest should use watchpoints to protect frozen arc bufs
3114 some leaked nvlists in zfsdev_ioctl

Reviewed by: Adam Leventhal <ahl@delphix.com>
Reviewed by: Matt Amdur <Matt.Amdur@delphix.com>
Reviewed by: George Wilson <george.wilson@delphix.com>
Reviewed by: Christopher Siden <chris.siden@delphix.com>
Approved by: Eric Schrock <eric.schrock@delphix.com>

References:
  https://www.illumos.org/issues/3112
  https://www.illumos.org/issues/3113
  https://www.illumos.org/issues/3114
  illumos/illumos-gate@cd1c8b85eb30b568e9816221430c479ace7a559d

The /proc/self/cmd watchpoint interface is specific to Solaris.
Therefore, the #3113 implementation was reworked to use the more
portable mprotect(2) system call.  When the pages are watched they
are marked read-only for protection.  Any write to the protected
address range immediately trigger a SIGSEGV.  The pages are marked
writable again when they are unwatched.

Ported-by: Brian Behlendorf <behlendorf1@llnl.gov>
Issue #1489

cmd/ztest/ztest.c
include/sys/arc.h
include/sys/zfs_context.h
module/zfs/arc.c
module/zfs/spa_misc.c
module/zfs/zio.c

index 7b09532c05908c59eae34d9171e33577e35b41bf..3b26f8d1eb75ab5745fb1ad523e89b5ff03c3085 100644 (file)
@@ -6210,11 +6210,12 @@ main(int argc, char **argv)
 
        (void) setvbuf(stdout, NULL, _IOLBF, 0);
 
+       dprintf_setup(&argc, argv);
+
        ztest_fd_rand = open("/dev/urandom", O_RDONLY);
        ASSERT3S(ztest_fd_rand, >=, 0);
 
        if (!fd_data_str) {
-               dprintf_setup(&argc, argv);
                process_options(argc, argv);
 
                setup_data_fd();
index efafb551acfb601a20b3b4754e4db49f9770646e..221946da30a6330509c09366cf69137b2ddcd5d5 100644 (file)
@@ -136,6 +136,7 @@ int arc_buf_size(arc_buf_t *buf);
 void arc_release(arc_buf_t *buf, void *tag);
 int arc_released(arc_buf_t *buf);
 int arc_has_callback(arc_buf_t *buf);
+void arc_buf_sigsegv(int sig, siginfo_t *si, void *unused);
 void arc_buf_freeze(arc_buf_t *buf);
 void arc_buf_thaw(arc_buf_t *buf);
 boolean_t arc_buf_eviction_needed(arc_buf_t *buf);
@@ -183,6 +184,10 @@ extern int zfs_write_limit_shift;
 extern unsigned long zfs_write_limit_max;
 extern kmutex_t zfs_write_limit_lock;
 
+#ifndef _KERNEL
+extern boolean_t arc_watch;
+#endif
+
 #ifdef __cplusplus
 }
 #endif
index adb152f58b78fac45a275a58c922323352fa2450..28a1306aa41938a0925b6a01ab603f0dad4af6c0 100644 (file)
@@ -97,6 +97,8 @@
 #include <dirent.h>
 #include <time.h>
 #include <ctype.h>
+#include <signal.h>
+#include <sys/mman.h>
 #include <sys/note.h>
 #include <sys/types.h>
 #include <sys/cred.h>
index 04f87a810b18c7abf70f26eb68f8bf17163c413b..5bba9a317e7d7b645783153035accafcb95dec3a 100644 (file)
 #include <sys/dmu_tx.h>
 #include <zfs_fletcher.h>
 
+#ifndef _KERNEL
+/* set with ZFS_DEBUG=watch, to enable watchpoints on frozen buffers */
+boolean_t arc_watch = B_FALSE;
+#endif
+
 static kmutex_t                arc_reclaim_thr_lock;
 static kcondvar_t      arc_reclaim_thr_cv;     /* used to signal reclaim thr */
 static uint8_t         arc_thread_exit;
@@ -569,6 +574,7 @@ static void arc_access(arc_buf_hdr_t *buf, kmutex_t *hash_lock);
 static int arc_evict_needed(arc_buf_contents_t type);
 static void arc_evict_ghost(arc_state_t *state, uint64_t spa, int64_t bytes,
     arc_buf_contents_t type);
+static void arc_buf_watch(arc_buf_t *buf);
 
 static boolean_t l2arc_write_eligible(uint64_t spa_guid, arc_buf_hdr_t *ab);
 
@@ -1060,6 +1066,37 @@ arc_cksum_compute(arc_buf_t *buf, boolean_t force)
        fletcher_2_native(buf->b_data, buf->b_hdr->b_size,
            buf->b_hdr->b_freeze_cksum);
        mutex_exit(&buf->b_hdr->b_freeze_lock);
+       arc_buf_watch(buf);
+}
+
+#ifndef _KERNEL
+void
+arc_buf_sigsegv(int sig, siginfo_t *si, void *unused)
+{
+       panic("Got SIGSEGV at address: 0x%lx\n", (long) si->si_addr);
+}
+#endif
+
+/* ARGSUSED */
+static void
+arc_buf_unwatch(arc_buf_t *buf)
+{
+#ifndef _KERNEL
+       if (arc_watch) {
+               ASSERT0(mprotect(buf->b_data, buf->b_hdr->b_size,
+                   PROT_READ | PROT_WRITE));
+       }
+#endif
+}
+
+/* ARGSUSED */
+static void
+arc_buf_watch(arc_buf_t *buf)
+{
+#ifndef _KERNEL
+       if (arc_watch)
+               ASSERT0(mprotect(buf->b_data, buf->b_hdr->b_size, PROT_READ));
+#endif
 }
 
 void
@@ -1080,6 +1117,8 @@ arc_buf_thaw(arc_buf_t *buf)
        }
 
        mutex_exit(&buf->b_hdr->b_freeze_lock);
+
+       arc_buf_unwatch(buf);
 }
 
 void
@@ -1097,6 +1136,7 @@ arc_buf_freeze(arc_buf_t *buf)
            buf->b_hdr->b_state == arc_anon);
        arc_cksum_compute(buf, B_FALSE);
        mutex_exit(hash_lock);
+
 }
 
 static void
@@ -1504,21 +1544,22 @@ arc_buf_add_ref(arc_buf_t *buf, void* tag)
  * the buffer is placed on l2arc_free_on_write to be freed later.
  */
 static void
-arc_buf_data_free(arc_buf_hdr_t *hdr, void (*free_func)(void *, size_t),
-    void *data, size_t size)
+arc_buf_data_free(arc_buf_t *buf, void (*free_func)(void *, size_t))
 {
+       arc_buf_hdr_t *hdr = buf->b_hdr;
+
        if (HDR_L2_WRITING(hdr)) {
                l2arc_data_free_t *df;
                df = kmem_alloc(sizeof (l2arc_data_free_t), KM_PUSHPAGE);
-               df->l2df_data = data;
-               df->l2df_size = size;
+               df->l2df_data = buf->b_data;
+               df->l2df_size = hdr->b_size;
                df->l2df_func = free_func;
                mutex_enter(&l2arc_free_on_write_mtx);
                list_insert_head(l2arc_free_on_write, df);
                mutex_exit(&l2arc_free_on_write_mtx);
                ARCSTAT_BUMP(arcstat_l2_free_on_write);
        } else {
-               free_func(data, size);
+               free_func(buf->b_data, hdr->b_size);
        }
 }
 
@@ -1534,16 +1575,15 @@ arc_buf_destroy(arc_buf_t *buf, boolean_t recycle, boolean_t all)
                arc_buf_contents_t type = buf->b_hdr->b_type;
 
                arc_cksum_verify(buf);
+               arc_buf_unwatch(buf);
 
                if (!recycle) {
                        if (type == ARC_BUFC_METADATA) {
-                               arc_buf_data_free(buf->b_hdr, zio_buf_free,
-                                   buf->b_data, size);
+                               arc_buf_data_free(buf, zio_buf_free);
                                arc_space_return(size, ARC_SPACE_DATA);
                        } else {
                                ASSERT(type == ARC_BUFC_DATA);
-                               arc_buf_data_free(buf->b_hdr,
-                                   zio_data_buf_free, buf->b_data, size);
+                               arc_buf_data_free(buf, zio_data_buf_free);
                                ARCSTAT_INCR(arcstat_data_size, -size);
                                atomic_add_64(&arc_size, -size);
                        }
@@ -2908,6 +2948,7 @@ arc_read_done(zio_t *zio)
        }
 
        arc_cksum_compute(buf, B_FALSE);
+       arc_buf_watch(buf);
 
        if (hash_lock && zio->io_error == 0 && hdr->b_state == arc_anon) {
                /*
@@ -3542,6 +3583,7 @@ arc_release(arc_buf_t *buf, void *tag)
                }
                hdr->b_datacnt -= 1;
                arc_cksum_verify(buf);
+               arc_buf_unwatch(buf);
 
                mutex_exit(hash_lock);
 
index 7bdfbf7389fe2b96b25317970eb36cf582668b11..91e7fdf3580b7a57a4a5d8336fddeed4b6183abe 100644 (file)
@@ -1630,6 +1630,23 @@ spa_init(int mode)
 
        spa_mode_global = mode;
 
+#ifndef _KERNEL
+       if (spa_mode_global != FREAD && dprintf_find_string("watch")) {
+               struct sigaction sa;
+
+               sa.sa_flags = SA_SIGINFO;
+               sigemptyset(&sa.sa_mask);
+               sa.sa_sigaction = arc_buf_sigsegv;
+
+               if (sigaction(SIGSEGV, &sa, NULL) == -1) {
+                       perror("could not enable watchpoints: "
+                           "sigaction(SIGSEGV, ...) = ");
+               } else {
+                       arc_watch = B_TRUE;
+               }
+       }
+#endif
+
        fm_init();
        refcount_init();
        unique_init();
index 1f803a44b25b1cd4c01ce4a50107e960ef62ac0a..f0c9c0f08d09f147d9e99dd15cc57e470487151c 100644 (file)
@@ -169,11 +169,21 @@ zio_init(void)
                while (p2 & (p2 - 1))
                        p2 &= p2 - 1;
 
+#ifndef _KERNEL
+               /*
+                * If we are using watchpoints, put each buffer on its own page,
+                * to eliminate the performance overhead of trapping to the
+                * kernel when modifying a non-watched buffer that shares the
+                * page with a watched buffer.
+                */
+               if (arc_watch && !IS_P2ALIGNED(size, PAGESIZE))
+                       continue;
+#endif
                if (size <= 4 * SPA_MINBLOCKSIZE) {
                        align = SPA_MINBLOCKSIZE;
-               } else if (P2PHASE(size, PAGESIZE) == 0) {
+               } else if (IS_P2ALIGNED(size, PAGESIZE)) {
                        align = PAGESIZE;
-               } else if (P2PHASE(size, p2 >> 2) == 0) {
+               } else if (IS_P2ALIGNED(size, p2 >> 2)) {
                        align = p2 >> 2;
                }