]> git.proxmox.com Git - pve-kernel-2.6.32.git/commitdiff
backport dlm fixes form linux 3.y
authorDietmar Maurer <dietmar@proxmox.com>
Thu, 25 Aug 2011 12:41:45 +0000 (14:41 +0200)
committerDietmar Maurer <dietmar@proxmox.com>
Thu, 25 Aug 2011 12:41:45 +0000 (14:41 +0200)
12 files changed:
Makefile
README
changelog.Debian
dlm-Make-DLM-depend-on-CONFIGFS_FS.patch [new file with mode: 0644]
dlm-Remove-superfluous-call-to-recalc_sigpending.patch [new file with mode: 0644]
dlm-Use-cmwq-for-send-and-receive-workqueues.patch [new file with mode: 0644]
dlm-delayed-reply-message-warning.patch [new file with mode: 0644]
dlm-increase-default-hash-table-sizes.patch [new file with mode: 0644]
dlm-make-plock-operation-killable.patch [new file with mode: 0644]
dlm-remove-shared-message-stub-for-recovery.patch [new file with mode: 0644]
dlm-sanitize-work_start-in-lowcomms.c.patch [new file with mode: 0644]
dlm-use-single-thread-workqueues.patch [new file with mode: 0644]

index 06a8ac8798d61cbaa491fac8970b492bfbc7f9c4..48f31328d9d39be7f24207c57a7fb325df0f6e7c 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
 RELEASE=2.0
 
 KERNEL_VER=2.6.32
-PKGREL=42
+PKGREL=43
 # also include firmware of previous versrion into 
 # the fw package:  fwlist-2.6.32-PREV-pve
 KREL=6
@@ -102,10 +102,10 @@ data: .compile_mark ${KERNEL_CFG} arcmsr.ko aoe.ko e1000e.ko igb.ko ixgbe.ko
        install -m 644 e1000e.ko tmp/lib/modules/${KVNAME}/kernel/drivers/net/e1000e/
        # install latest ibg driver
        install -m 644 igb.ko tmp/lib/modules/${KVNAME}/kernel/drivers/net/igb/
-       # install bnx2 and tg3 drivers
+       # install bnx2 drivers
        #install -m 644 bnx2.ko tmp/lib/modules/${KVNAME}/kernel/drivers/net/
-       #install -m 644 bnx2x.ko tmp/lib/modules/${KVNAME}/kernel/drivers/net/
        #install -m 644 cnic.ko tmp/lib/modules/${KVNAME}/kernel/drivers/net/
+       #install -m 644 bnx2x.ko tmp/lib/modules/${KVNAME}/kernel/drivers/net/bnx2x/
        # install areca driver
        install -m 644 arcmsr.ko tmp/lib/modules/${KVNAME}/kernel/drivers/scsi/arcmsr/
        # remove firmware
@@ -124,7 +124,7 @@ data: .compile_mark ${KERNEL_CFG} arcmsr.ko aoe.ko e1000e.ko igb.ko ixgbe.ko
 
 ${KERNEL_CFG}: ${KERNEL_CFG_ORG} config-${KERNEL_VER}.diff
        cp ${KERNEL_CFG_ORG} ${KERNEL_CFG}.new
-       patch ${KERNEL_CFG}.new config-${KERNEL_VER}.diff
+       patch --no-backup ${KERNEL_CFG}.new config-${KERNEL_VER}.diff
        mv ${KERNEL_CFG}.new ${KERNEL_CFG}
 
 ${KERNEL_SRC}/README: ${KERNEL_SRC}.org/README
@@ -136,6 +136,16 @@ ${KERNEL_SRC}/README: ${KERNEL_SRC}.org/README
        cd ${KERNEL_SRC}; patch -p1 <../SCSI-aacraid-Add-PMC-Sierra-SRC-based-controller.patch
        cd ${KERNEL_SRC}; patch -p1 <../fix-register-corruption-in-pvclock-scale-delta.patch
        cd ${KERNEL_SRC}; patch -p1 <../bridge-patch.diff
+       # backport dlm fixes form linux 3.y (those are include in RHEL 6.2)
+       cd ${KERNEL_SRC}; patch -p1 <../dlm-Make-DLM-depend-on-CONFIGFS_FS.patch        
+       cd ${KERNEL_SRC}; patch -p1 <../dlm-increase-default-hash-table-sizes.patch
+       cd ${KERNEL_SRC}; patch -p1 <../dlm-Use-cmwq-for-send-and-receive-workqueues.patch
+       cd ${KERNEL_SRC}; patch -p1 <../dlm-sanitize-work_start-in-lowcomms.c.patch
+       cd ${KERNEL_SRC}; patch -p1 <../dlm-use-single-thread-workqueues.patch
+       cd ${KERNEL_SRC}; patch -p1 <../dlm-Remove-superfluous-call-to-recalc_sigpending.patch
+       cd ${KERNEL_SRC}; patch -p1 <../dlm-delayed-reply-message-warning.patch
+       cd ${KERNEL_SRC}; patch -p1 <../dlm-remove-shared-message-stub-for-recovery.patch
+       cd ${KERNEL_SRC}; patch -p1 <../dlm-make-plock-operation-killable.patch
        #cd ${KERNEL_SRC}; patch -p1 <../ovz-fix-slow-fsync.patch
        sed -i ${KERNEL_SRC}/Makefile -e 's/^EXTRAVERSION.*$$/EXTRAVERSION=${EXTRAVERSION}/'
        touch $@
@@ -278,3 +288,5 @@ clean:
        rm -rf *~ .compile_mark ${KERNEL_CFG} ${KERNEL_SRC} tmp data proxmox-ve/data *.deb ${AOEDIR} aoe.ko ${headers_tmp} fwdata fwlist.tmp *.ko ${IXGBEDIR} ${E1000EDIR} e1000e.ko ${IGBDIR} igb.ko fwlist-${KVNAME} ${ARECADIR} arcmsr.ko
 
 
+
+
diff --git a/README b/README
index b31cc237ed1c5c617272e6b1ed66bc44bf8bb67c..90849f2c0658e0575955187f8e902221fa53e263 100644 (file)
--- a/README
+++ b/README
@@ -23,6 +23,12 @@ Additional/Updated Modules:
 
   * ftp://ftp.areca.com.tw/RaidCards/AP_Drivers/Linux/DRIVER/SourceCode/arcmsr.1.20.0X.15-110330.zip
 
+#- include latest broadcom bnx2 drivers
+#        
+#  * original file linux-6.2.23.zip contains
+#    netxtreme2-6.2.23.tar.gz (added to repository)
+
+
 FIRMWARE:
 =========
 
index eabd3fa8e0c298bfcbabf94e7ba7c27237fdcd6b..7565ccd52976c75aa0f9a9638ecdd88152841795 100644 (file)
@@ -1,3 +1,9 @@
+pve-kernel-2.6.32 (2.6.32-43) unstable; urgency=low
+
+  * backport patches for dlm
+
+ -- Proxmox Support Team <support@proxmox.com>  Thu, 25 Aug 2011 10:35:05 +0200
+
 pve-kernel-2.6.32 (2.6.32-42) unstable; urgency=low
 
   * update to vzkernel-2.6.32-042stab033.1.src.rpm
diff --git a/dlm-Make-DLM-depend-on-CONFIGFS_FS.patch b/dlm-Make-DLM-depend-on-CONFIGFS_FS.patch
new file mode 100644 (file)
index 0000000..f98cc85
--- /dev/null
@@ -0,0 +1,39 @@
+From 86c747d2a4f028fe2fdf091c3a81d0e187827682 Mon Sep 17 00:00:00 2001
+From: Nicholas Bellinger <nab@linux-iscsi.org>
+Date: Sun, 16 Jan 2011 21:14:52 +0000
+Subject: [PATCH 1/1] dlm: Make DLM depend on CONFIGFS_FS
+
+This patch fixes the following kconfig error after changing
+CONFIGFS_FS -> select SYSFS:
+
+fs/sysfs/Kconfig:1:error: recursive dependency detected!
+fs/sysfs/Kconfig:1:    symbol SYSFS is selected by CONFIGFS_FS
+fs/configfs/Kconfig:1: symbol CONFIGFS_FS is selected by DLM
+fs/dlm/Kconfig:1:      symbol DLM depends on SYSFS
+
+Signed-off-by: Nicholas A. Bellinger <nab@linux-iscsi.org>
+Cc: Joel Becker <jlbec@evilplan.org>
+Cc: Randy Dunlap <randy.dunlap@oracle.com>
+Cc: Stephen Rothwell <sfr@canb.auug.org.au>
+Cc: James Bottomley <James.Bottomley@suse.de>
+---
+ fs/dlm/Kconfig |    3 +--
+ 1 files changed, 1 insertions(+), 2 deletions(-)
+
+diff --git a/fs/dlm/Kconfig b/fs/dlm/Kconfig
+index 2dbb422..1897eb1 100644
+--- a/fs/dlm/Kconfig
++++ b/fs/dlm/Kconfig
+@@ -1,8 +1,7 @@
+ menuconfig DLM
+       tristate "Distributed Lock Manager (DLM)"
+       depends on EXPERIMENTAL && INET
+-      depends on SYSFS && (IPV6 || IPV6=n)
+-      select CONFIGFS_FS
++      depends on SYSFS && CONFIGFS_FS && (IPV6 || IPV6=n)
+       select IP_SCTP
+       help
+       A general purpose distributed lock manager for kernel or userspace
+-- 
+1.7.2.5
+
diff --git a/dlm-Remove-superfluous-call-to-recalc_sigpending.patch b/dlm-Remove-superfluous-call-to-recalc_sigpending.patch
new file mode 100644 (file)
index 0000000..04c54d1
--- /dev/null
@@ -0,0 +1,29 @@
+From 4bcad6c1ef53a9a0224f4654ceb3b9030d0769ec Mon Sep 17 00:00:00 2001
+From: Matt Fleming <matt.fleming@linux.intel.com>
+Date: Thu, 24 Mar 2011 13:56:47 +0000
+Subject: [PATCH 1/1] dlm: Remove superfluous call to recalc_sigpending()
+
+recalc_sigpending() is called within sigprocmask(), so there is no
+need call it again after sigprocmask() has returned.
+
+Signed-off-by: Matt Fleming <matt.fleming@linux.intel.com>
+Signed-off-by: David Teigland <teigland@redhat.com>
+---
+ fs/dlm/user.c |    1 -
+ 1 files changed, 0 insertions(+), 1 deletions(-)
+
+diff --git a/fs/dlm/user.c b/fs/dlm/user.c
+index d5ab3fe..e96bf3e 100644
+--- a/fs/dlm/user.c
++++ b/fs/dlm/user.c
+@@ -611,7 +611,6 @@ static ssize_t device_write(struct file *file, const char __user *buf,
+  out_sig:
+       sigprocmask(SIG_SETMASK, &tmpsig, NULL);
+-      recalc_sigpending();
+  out_free:
+       kfree(kbuf);
+       return error;
+-- 
+1.7.2.5
+
diff --git a/dlm-Use-cmwq-for-send-and-receive-workqueues.patch b/dlm-Use-cmwq-for-send-and-receive-workqueues.patch
new file mode 100644 (file)
index 0000000..c04ef08
--- /dev/null
@@ -0,0 +1,48 @@
+From dcce240ead802d42b1e45ad2fcb2ed4a399cb255 Mon Sep 17 00:00:00 2001
+From: Steven Whitehouse <swhiteho@redhat.com>
+Date: Fri, 12 Nov 2010 12:12:29 +0000
+Subject: [PATCH 1/1] dlm: Use cmwq for send and receive workqueues
+
+So far as I can tell, there is no reason to use a single-threaded
+send workqueue for dlm, since it may need to send to several sockets
+concurrently. Both workqueues are set to WQ_MEM_RECLAIM to avoid
+any possible deadlocks, WQ_HIGHPRI since locking traffic is highly
+latency sensitive (and to avoid a priority inversion wrt GFS2's
+glock_workqueue) and WQ_FREEZABLE just in case someone needs to do
+that (even though with current cluster infrastructure, it doesn't
+make sense as the node will most likely land up ejected from the
+cluster) in the future.
+
+Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
+Cc: Tejun Heo <tj@kernel.org>
+Signed-off-by: David Teigland <teigland@redhat.com>
+---
+ fs/dlm/lowcomms.c |    6 ++++--
+ 1 files changed, 4 insertions(+), 2 deletions(-)
+
+diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
+index 77720f8..1d4e644 100644
+--- a/fs/dlm/lowcomms.c
++++ b/fs/dlm/lowcomms.c
+@@ -1451,14 +1451,16 @@ static void work_stop(void)
+ static int work_start(void)
+ {
+       int error;
+-      recv_workqueue = create_workqueue("dlm_recv");
++      recv_workqueue = alloc_workqueue("dlm_recv", WQ_MEM_RECLAIM |
++                                       WQ_HIGHPRI | WQ_FREEZEABLE, 0);
+       error = IS_ERR(recv_workqueue);
+       if (error) {
+               log_print("can't start dlm_recv %d", error);
+               return error;
+       }
+-      send_workqueue = create_singlethread_workqueue("dlm_send");
++      send_workqueue = alloc_workqueue("dlm_send", WQ_MEM_RECLAIM |
++                                       WQ_HIGHPRI | WQ_FREEZEABLE, 0);
+       error = IS_ERR(send_workqueue);
+       if (error) {
+               log_print("can't start dlm_send %d", error);
+-- 
+1.7.2.5
+
diff --git a/dlm-delayed-reply-message-warning.patch b/dlm-delayed-reply-message-warning.patch
new file mode 100644 (file)
index 0000000..3e5bff5
--- /dev/null
@@ -0,0 +1,304 @@
+From c6ff669bac5c409f4cb74366248f51b73f7d6feb Mon Sep 17 00:00:00 2001
+From: David Teigland <teigland@redhat.com>
+Date: Mon, 28 Mar 2011 14:17:26 -0500
+Subject: [PATCH 1/1] dlm: delayed reply message warning
+
+Add an option (disabled by default) to print a warning message
+when a lock has been waiting a configurable amount of time for
+a reply message from another node.  This is mainly for debugging.
+
+Signed-off-by: David Teigland <teigland@redhat.com>
+---
+ fs/dlm/config.c       |    9 ++++-
+ fs/dlm/config.h       |    1 +
+ fs/dlm/dlm_internal.h |    2 +
+ fs/dlm/lock.c         |  100 +++++++++++++++++++++++++++++++++++++++++++++---
+ fs/dlm/lock.h         |    1 +
+ fs/dlm/lockspace.c    |    6 +-
+ 6 files changed, 108 insertions(+), 11 deletions(-)
+
+diff --git a/fs/dlm/config.c b/fs/dlm/config.c
+index 0d329ff..9b026ea 100644
+--- a/fs/dlm/config.c
++++ b/fs/dlm/config.c
+@@ -100,6 +100,7 @@ struct dlm_cluster {
+       unsigned int cl_log_debug;
+       unsigned int cl_protocol;
+       unsigned int cl_timewarn_cs;
++      unsigned int cl_waitwarn_us;
+ };
+ enum {
+@@ -114,6 +115,7 @@ enum {
+       CLUSTER_ATTR_LOG_DEBUG,
+       CLUSTER_ATTR_PROTOCOL,
+       CLUSTER_ATTR_TIMEWARN_CS,
++      CLUSTER_ATTR_WAITWARN_US,
+ };
+ struct cluster_attribute {
+@@ -166,6 +168,7 @@ CLUSTER_ATTR(scan_secs, 1);
+ CLUSTER_ATTR(log_debug, 0);
+ CLUSTER_ATTR(protocol, 0);
+ CLUSTER_ATTR(timewarn_cs, 1);
++CLUSTER_ATTR(waitwarn_us, 0);
+ static struct configfs_attribute *cluster_attrs[] = {
+       [CLUSTER_ATTR_TCP_PORT] = &cluster_attr_tcp_port.attr,
+@@ -179,6 +182,7 @@ static struct configfs_attribute *cluster_attrs[] = {
+       [CLUSTER_ATTR_LOG_DEBUG] = &cluster_attr_log_debug.attr,
+       [CLUSTER_ATTR_PROTOCOL] = &cluster_attr_protocol.attr,
+       [CLUSTER_ATTR_TIMEWARN_CS] = &cluster_attr_timewarn_cs.attr,
++      [CLUSTER_ATTR_WAITWARN_US] = &cluster_attr_waitwarn_us.attr,
+       NULL,
+ };
+@@ -439,6 +443,7 @@ static struct config_group *make_cluster(struct config_group *g,
+       cl->cl_log_debug = dlm_config.ci_log_debug;
+       cl->cl_protocol = dlm_config.ci_protocol;
+       cl->cl_timewarn_cs = dlm_config.ci_timewarn_cs;
++      cl->cl_waitwarn_us = dlm_config.ci_waitwarn_us;
+       space_list = &sps->ss_group;
+       comm_list = &cms->cs_group;
+@@ -986,6 +991,7 @@ int dlm_our_addr(struct sockaddr_storage *addr, int num)
+ #define DEFAULT_LOG_DEBUG          0
+ #define DEFAULT_PROTOCOL           0
+ #define DEFAULT_TIMEWARN_CS      500 /* 5 sec = 500 centiseconds */
++#define DEFAULT_WAITWARN_US      0
+ struct dlm_config_info dlm_config = {
+       .ci_tcp_port = DEFAULT_TCP_PORT,
+@@ -998,6 +1004,7 @@ struct dlm_config_info dlm_config = {
+       .ci_scan_secs = DEFAULT_SCAN_SECS,
+       .ci_log_debug = DEFAULT_LOG_DEBUG,
+       .ci_protocol = DEFAULT_PROTOCOL,
+-      .ci_timewarn_cs = DEFAULT_TIMEWARN_CS
++      .ci_timewarn_cs = DEFAULT_TIMEWARN_CS,
++      .ci_waitwarn_us = DEFAULT_WAITWARN_US
+ };
+diff --git a/fs/dlm/config.h b/fs/dlm/config.h
+index 4f1d6fc..dd0ce24 100644
+--- a/fs/dlm/config.h
++++ b/fs/dlm/config.h
+@@ -28,6 +28,7 @@ struct dlm_config_info {
+       int ci_log_debug;
+       int ci_protocol;
+       int ci_timewarn_cs;
++      int ci_waitwarn_us;
+ };
+ extern struct dlm_config_info dlm_config;
+diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h
+index b942049..6a92478 100644
+--- a/fs/dlm/dlm_internal.h
++++ b/fs/dlm/dlm_internal.h
+@@ -245,6 +245,7 @@ struct dlm_lkb {
+       int8_t                  lkb_wait_type;  /* type of reply waiting for */
+       int8_t                  lkb_wait_count;
++      int                     lkb_wait_nodeid; /* for debugging */
+       struct list_head        lkb_idtbl_list; /* lockspace lkbtbl */
+       struct list_head        lkb_statequeue; /* rsb g/c/w list */
+@@ -254,6 +255,7 @@ struct dlm_lkb {
+       struct list_head        lkb_ownqueue;   /* list of locks for a process */
+       struct list_head        lkb_time_list;
+       ktime_t                 lkb_timestamp;
++      ktime_t                 lkb_wait_time;
+       unsigned long           lkb_timeout_cs;
+       struct dlm_callback     lkb_callbacks[DLM_CALLBACKS_SIZE];
+diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c
+index 04b8c44..e3c8641 100644
+--- a/fs/dlm/lock.c
++++ b/fs/dlm/lock.c
+@@ -799,10 +799,84 @@ static int msg_reply_type(int mstype)
+       return -1;
+ }
++static int nodeid_warned(int nodeid, int num_nodes, int *warned)
++{
++      int i;
++
++      for (i = 0; i < num_nodes; i++) {
++              if (!warned[i]) {
++                      warned[i] = nodeid;
++                      return 0;
++              }
++              if (warned[i] == nodeid)
++                      return 1;
++      }
++      return 0;
++}
++
++void dlm_scan_waiters(struct dlm_ls *ls)
++{
++      struct dlm_lkb *lkb;
++      ktime_t zero = ktime_set(0, 0);
++      s64 us;
++      s64 debug_maxus = 0;
++      u32 debug_scanned = 0;
++      u32 debug_expired = 0;
++      int num_nodes = 0;
++      int *warned = NULL;
++
++      if (!dlm_config.ci_waitwarn_us)
++              return;
++
++      mutex_lock(&ls->ls_waiters_mutex);
++
++      list_for_each_entry(lkb, &ls->ls_waiters, lkb_wait_reply) {
++              if (ktime_equal(lkb->lkb_wait_time, zero))
++                      continue;
++
++              debug_scanned++;
++
++              us = ktime_to_us(ktime_sub(ktime_get(), lkb->lkb_wait_time));
++
++              if (us < dlm_config.ci_waitwarn_us)
++                      continue;
++
++              lkb->lkb_wait_time = zero;
++
++              debug_expired++;
++              if (us > debug_maxus)
++                      debug_maxus = us;
++
++              if (!num_nodes) {
++                      num_nodes = ls->ls_num_nodes;
++                      warned = kmalloc(GFP_KERNEL, num_nodes * sizeof(int));
++                      if (warned)
++                              memset(warned, 0, num_nodes * sizeof(int));
++              }
++              if (!warned)
++                      continue;
++              if (nodeid_warned(lkb->lkb_wait_nodeid, num_nodes, warned))
++                      continue;
++
++              log_error(ls, "waitwarn %x %lld %d us check connection to "
++                        "node %d", lkb->lkb_id, (long long)us,
++                        dlm_config.ci_waitwarn_us, lkb->lkb_wait_nodeid);
++      }
++      mutex_unlock(&ls->ls_waiters_mutex);
++
++      if (warned)
++              kfree(warned);
++
++      if (debug_expired)
++              log_debug(ls, "scan_waiters %u warn %u over %d us max %lld us",
++                        debug_scanned, debug_expired,
++                        dlm_config.ci_waitwarn_us, (long long)debug_maxus);
++}
++
+ /* add/remove lkb from global waiters list of lkb's waiting for
+    a reply from a remote node */
+-static int add_to_waiters(struct dlm_lkb *lkb, int mstype)
++static int add_to_waiters(struct dlm_lkb *lkb, int mstype, int to_nodeid)
+ {
+       struct dlm_ls *ls = lkb->lkb_resource->res_ls;
+       int error = 0;
+@@ -842,6 +916,8 @@ static int add_to_waiters(struct dlm_lkb *lkb, int mstype)
+       lkb->lkb_wait_count++;
+       lkb->lkb_wait_type = mstype;
++      lkb->lkb_wait_time = ktime_get();
++      lkb->lkb_wait_nodeid = to_nodeid; /* for debugging */
+       hold_lkb(lkb);
+       list_add(&lkb->lkb_wait_reply, &ls->ls_waiters);
+  out:
+@@ -1157,6 +1233,16 @@ void dlm_adjust_timeouts(struct dlm_ls *ls)
+       list_for_each_entry(lkb, &ls->ls_timeout, lkb_time_list)
+               lkb->lkb_timestamp = ktime_add_us(lkb->lkb_timestamp, adj_us);
+       mutex_unlock(&ls->ls_timeout_mutex);
++
++      if (!dlm_config.ci_waitwarn_us)
++              return;
++
++      mutex_lock(&ls->ls_waiters_mutex);
++      list_for_each_entry(lkb, &ls->ls_waiters, lkb_wait_reply) {
++              if (ktime_to_us(lkb->lkb_wait_time))
++                      lkb->lkb_wait_time = ktime_get();
++      }
++      mutex_unlock(&ls->ls_waiters_mutex);
+ }
+ /* lkb is master or local copy */
+@@ -2844,12 +2930,12 @@ static int send_common(struct dlm_rsb *r, struct dlm_lkb *lkb, int mstype)
+       struct dlm_mhandle *mh;
+       int to_nodeid, error;
+-      error = add_to_waiters(lkb, mstype);
++      to_nodeid = r->res_nodeid;
++
++      error = add_to_waiters(lkb, mstype, to_nodeid);
+       if (error)
+               return error;
+-      to_nodeid = r->res_nodeid;
+-
+       error = create_message(r, lkb, to_nodeid, mstype, &ms, &mh);
+       if (error)
+               goto fail;
+@@ -2951,12 +3037,12 @@ static int send_lookup(struct dlm_rsb *r, struct dlm_lkb *lkb)
+       struct dlm_mhandle *mh;
+       int to_nodeid, error;
+-      error = add_to_waiters(lkb, DLM_MSG_LOOKUP);
++      to_nodeid = dlm_dir_nodeid(r);
++
++      error = add_to_waiters(lkb, DLM_MSG_LOOKUP, to_nodeid);
+       if (error)
+               return error;
+-      to_nodeid = dlm_dir_nodeid(r);
+-
+       error = create_message(r, NULL, to_nodeid, DLM_MSG_LOOKUP, &ms, &mh);
+       if (error)
+               goto fail;
+diff --git a/fs/dlm/lock.h b/fs/dlm/lock.h
+index 88e93c8..265017a 100644
+--- a/fs/dlm/lock.h
++++ b/fs/dlm/lock.h
+@@ -24,6 +24,7 @@ int dlm_put_lkb(struct dlm_lkb *lkb);
+ void dlm_scan_rsbs(struct dlm_ls *ls);
+ int dlm_lock_recovery_try(struct dlm_ls *ls);
+ void dlm_unlock_recovery(struct dlm_ls *ls);
++void dlm_scan_waiters(struct dlm_ls *ls);
+ void dlm_scan_timeout(struct dlm_ls *ls);
+ void dlm_adjust_timeouts(struct dlm_ls *ls);
+diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c
+index f994a7d..14cbf40 100644
+--- a/fs/dlm/lockspace.c
++++ b/fs/dlm/lockspace.c
+@@ -243,7 +243,6 @@ static struct dlm_ls *find_ls_to_scan(void)
+ static int dlm_scand(void *data)
+ {
+       struct dlm_ls *ls;
+-      int timeout_jiffies = dlm_config.ci_scan_secs * HZ;
+       while (!kthread_should_stop()) {
+               ls = find_ls_to_scan();
+@@ -252,13 +251,14 @@ static int dlm_scand(void *data)
+                               ls->ls_scan_time = jiffies;
+                               dlm_scan_rsbs(ls);
+                               dlm_scan_timeout(ls);
++                              dlm_scan_waiters(ls);
+                               dlm_unlock_recovery(ls);
+                       } else {
+                               ls->ls_scan_time += HZ;
+                       }
+-              } else {
+-                      schedule_timeout_interruptible(timeout_jiffies);
++                      continue;
+               }
++              schedule_timeout_interruptible(dlm_config.ci_scan_secs * HZ);
+       }
+       return 0;
+ }
+-- 
+1.7.2.5
+
diff --git a/dlm-increase-default-hash-table-sizes.patch b/dlm-increase-default-hash-table-sizes.patch
new file mode 100644 (file)
index 0000000..bf3da39
--- /dev/null
@@ -0,0 +1,34 @@
+From e3853a90e218bcb2e48d3f403d0962bf54444f5f Mon Sep 17 00:00:00 2001
+From: David Teigland <teigland@redhat.com>
+Date: Thu, 10 Mar 2011 13:07:17 -0600
+Subject: [PATCH 1/1] dlm: increase default hash table sizes
+
+Make all three hash tables a consistent size of 1024
+rather than 1024, 512, 256.  All three tables, for
+resources, locks, and lock dir entries, will generally
+be filled to the same order of magnitude.
+
+Signed-off-by: David Teigland <teigland@redhat.com>
+---
+ fs/dlm/config.c |    4 ++--
+ 1 files changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/fs/dlm/config.c b/fs/dlm/config.c
+index b54bca0..0d329ff 100644
+--- a/fs/dlm/config.c
++++ b/fs/dlm/config.c
+@@ -977,9 +977,9 @@ int dlm_our_addr(struct sockaddr_storage *addr, int num)
+ /* Config file defaults */
+ #define DEFAULT_TCP_PORT       21064
+ #define DEFAULT_BUFFER_SIZE     4096
+-#define DEFAULT_RSBTBL_SIZE      256
++#define DEFAULT_RSBTBL_SIZE     1024
+ #define DEFAULT_LKBTBL_SIZE     1024
+-#define DEFAULT_DIRTBL_SIZE      512
++#define DEFAULT_DIRTBL_SIZE     1024
+ #define DEFAULT_RECOVER_TIMER      5
+ #define DEFAULT_TOSS_SECS         10
+ #define DEFAULT_SCAN_SECS          5
+-- 
+1.7.2.5
+
diff --git a/dlm-make-plock-operation-killable.patch b/dlm-make-plock-operation-killable.patch
new file mode 100644 (file)
index 0000000..b59973b
--- /dev/null
@@ -0,0 +1,167 @@
+From 901025d2f3194b4868980c8ba80df4cc0aa1282c Mon Sep 17 00:00:00 2001
+From: David Teigland <teigland@redhat.com>
+Date: Wed, 2 Mar 2011 14:20:04 -0600
+Subject: [PATCH 1/1] dlm: make plock operation killable
+
+Allow processes blocked on plock requests to be interrupted
+when they are killed.  This leaves the problem of cleaning
+up the lock state in userspace.  This has three parts:
+
+1. Add a flag to unlock operations sent to userspace
+indicating the file is being closed.  Userspace will
+then look for and clear any waiting plock operations that
+were abandoned by an interrupted process.
+
+2. Queue an unlock-close operation (like in 1) to clean up
+userspace from an interrupted plock request.  This is needed
+because the vfs will not send a cleanup-unlock if it sees no
+locks on the file, which it won't if the interrupted operation
+was the only one.
+
+3. Do not use replies from userspace for unlock-close operations
+because they are unnecessary (they are just cleaning up for the
+process which did not make an unlock call).  This also simplifies
+the new unlock-close generated from point 2.
+
+Signed-off-by: David Teigland <teigland@redhat.com>
+---
+ fs/dlm/plock.c            |   65 ++++++++++++++++++++++++++++++++++++++++++---
+ include/linux/dlm_plock.h |    6 +++-
+ 2 files changed, 65 insertions(+), 6 deletions(-)
+
+diff --git a/fs/dlm/plock.c b/fs/dlm/plock.c
+index 30d8b85..e2b8780 100644
+--- a/fs/dlm/plock.c
++++ b/fs/dlm/plock.c
+@@ -71,6 +71,36 @@ static void send_op(struct plock_op *op)
+       wake_up(&send_wq);
+ }
++/* If a process was killed while waiting for the only plock on a file,
++   locks_remove_posix will not see any lock on the file so it won't
++   send an unlock-close to us to pass on to userspace to clean up the
++   abandoned waiter.  So, we have to insert the unlock-close when the
++   lock call is interrupted. */
++
++static void do_unlock_close(struct dlm_ls *ls, u64 number,
++                          struct file *file, struct file_lock *fl)
++{
++      struct plock_op *op;
++
++      op = kzalloc(sizeof(*op), GFP_NOFS);
++      if (!op)
++              return;
++
++      op->info.optype         = DLM_PLOCK_OP_UNLOCK;
++      op->info.pid            = fl->fl_pid;
++      op->info.fsid           = ls->ls_global_id;
++      op->info.number         = number;
++      op->info.start          = 0;
++      op->info.end            = OFFSET_MAX;
++      if (fl->fl_lmops && fl->fl_lmops->fl_grant)
++              op->info.owner  = (__u64) fl->fl_pid;
++      else
++              op->info.owner  = (__u64)(long) fl->fl_owner;
++
++      op->info.flags |= DLM_PLOCK_FL_CLOSE;
++      send_op(op);
++}
++
+ int dlm_posix_lock(dlm_lockspace_t *lockspace, u64 number, struct file *file,
+                  int cmd, struct file_lock *fl)
+ {
+@@ -114,9 +144,19 @@ int dlm_posix_lock(dlm_lockspace_t *lockspace, u64 number, struct file *file,
+       send_op(op);
+-      if (xop->callback == NULL)
+-              wait_event(recv_wq, (op->done != 0));
+-      else {
++      if (xop->callback == NULL) {
++              rv = wait_event_killable(recv_wq, (op->done != 0));
++              if (rv == -ERESTARTSYS) {
++                      log_debug(ls, "dlm_posix_lock: wait killed %llx",
++                                (unsigned long long)number);
++                      spin_lock(&ops_lock);
++                      list_del(&op->list);
++                      spin_unlock(&ops_lock);
++                      kfree(xop);
++                      do_unlock_close(ls, number, file, fl);
++                      goto out;
++              }
++      } else {
+               rv = FILE_LOCK_DEFERRED;
+               goto out;
+       }
+@@ -233,6 +273,13 @@ int dlm_posix_unlock(dlm_lockspace_t *lockspace, u64 number, struct file *file,
+       else
+               op->info.owner  = (__u64)(long) fl->fl_owner;
++      if (fl->fl_flags & FL_CLOSE) {
++              op->info.flags |= DLM_PLOCK_FL_CLOSE;
++              send_op(op);
++              rv = 0;
++              goto out;
++      }
++
+       send_op(op);
+       wait_event(recv_wq, (op->done != 0));
+@@ -334,7 +381,10 @@ static ssize_t dev_read(struct file *file, char __user *u, size_t count,
+       spin_lock(&ops_lock);
+       if (!list_empty(&send_list)) {
+               op = list_entry(send_list.next, struct plock_op, list);
+-              list_move(&op->list, &recv_list);
++              if (op->info.flags & DLM_PLOCK_FL_CLOSE)
++                      list_del(&op->list);
++              else
++                      list_move(&op->list, &recv_list);
+               memcpy(&info, &op->info, sizeof(info));
+       }
+       spin_unlock(&ops_lock);
+@@ -342,6 +392,13 @@ static ssize_t dev_read(struct file *file, char __user *u, size_t count,
+       if (!op)
+               return -EAGAIN;
++      /* there is no need to get a reply from userspace for unlocks
++         that were generated by the vfs cleaning up for a close
++         (the process did not make an unlock call). */
++
++      if (op->info.flags & DLM_PLOCK_FL_CLOSE)
++              kfree(op);
++
+       if (copy_to_user(u, &info, sizeof(info)))
+               return -EFAULT;
+       return sizeof(info);
+diff --git a/include/linux/dlm_plock.h b/include/linux/dlm_plock.h
+index 2dd2124..3b1cc1b 100644
+--- a/include/linux/dlm_plock.h
++++ b/include/linux/dlm_plock.h
+@@ -14,7 +14,7 @@
+ #define DLM_PLOCK_MISC_NAME           "dlm_plock"
+ #define DLM_PLOCK_VERSION_MAJOR       1
+-#define DLM_PLOCK_VERSION_MINOR       1
++#define DLM_PLOCK_VERSION_MINOR       2
+ #define DLM_PLOCK_VERSION_PATCH       0
+ enum {
+@@ -23,12 +23,14 @@ enum {
+       DLM_PLOCK_OP_GET,
+ };
++#define DLM_PLOCK_FL_CLOSE 1
++
+ struct dlm_plock_info {
+       __u32 version[3];
+       __u8 optype;
+       __u8 ex;
+       __u8 wait;
+-      __u8 pad;
++      __u8 flags;
+       __u32 pid;
+       __s32 nodeid;
+       __s32 rv;
+-- 
+1.7.2.5
+
diff --git a/dlm-remove-shared-message-stub-for-recovery.patch b/dlm-remove-shared-message-stub-for-recovery.patch
new file mode 100644 (file)
index 0000000..822a929
--- /dev/null
@@ -0,0 +1,218 @@
+From 2a7ce0edd661b3144c7b916ecf1eba0967b6d4a5 Mon Sep 17 00:00:00 2001
+From: David Teigland <teigland@redhat.com>
+Date: Mon, 4 Apr 2011 15:19:59 -0500
+Subject: [PATCH 1/1] dlm: remove shared message stub for recovery
+
+kmalloc a stub message struct during recovery instead of sharing the
+struct in the lockspace.  This leaves the lockspace stub_ms only for
+faking downconvert replies, where it is never modified and sharing
+is not a problem.
+
+Also improve the debug messages in the same recovery function.
+
+Signed-off-by: David Teigland <teigland@redhat.com>
+---
+ fs/dlm/dlm_internal.h |    1 +
+ fs/dlm/lock.c         |   82 +++++++++++++++++++++++++++++-------------------
+ 2 files changed, 50 insertions(+), 33 deletions(-)
+
+diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h
+index 6a92478..0262451 100644
+--- a/fs/dlm/dlm_internal.h
++++ b/fs/dlm/dlm_internal.h
+@@ -209,6 +209,7 @@ struct dlm_args {
+ #define DLM_IFL_WATCH_TIMEWARN        0x00400000
+ #define DLM_IFL_TIMEOUT_CANCEL        0x00800000
+ #define DLM_IFL_DEADLOCK_CANCEL       0x01000000
++#define DLM_IFL_STUB_MS               0x02000000 /* magic number for m_flags */
+ #define DLM_IFL_USER          0x00000001
+ #define DLM_IFL_ORPHAN                0x00000002
+diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c
+index e3c8641..8122779 100644
+--- a/fs/dlm/lock.c
++++ b/fs/dlm/lock.c
+@@ -1037,10 +1037,10 @@ static int remove_from_waiters_ms(struct dlm_lkb *lkb, struct dlm_message *ms)
+       struct dlm_ls *ls = lkb->lkb_resource->res_ls;
+       int error;
+-      if (ms != &ls->ls_stub_ms)
++      if (ms->m_flags != DLM_IFL_STUB_MS)
+               mutex_lock(&ls->ls_waiters_mutex);
+       error = _remove_from_waiters(lkb, ms->m_type, ms);
+-      if (ms != &ls->ls_stub_ms)
++      if (ms->m_flags != DLM_IFL_STUB_MS)
+               mutex_unlock(&ls->ls_waiters_mutex);
+       return error;
+ }
+@@ -1462,14 +1462,8 @@ static void grant_lock_pending(struct dlm_rsb *r, struct dlm_lkb *lkb)
+    ALTPR/ALTCW: our rqmode may have been changed to PR or CW to become
+    compatible with other granted locks */
+-static void munge_demoted(struct dlm_lkb *lkb, struct dlm_message *ms)
++static void munge_demoted(struct dlm_lkb *lkb)
+ {
+-      if (ms->m_type != DLM_MSG_CONVERT_REPLY) {
+-              log_print("munge_demoted %x invalid reply type %d",
+-                        lkb->lkb_id, ms->m_type);
+-              return;
+-      }
+-
+       if (lkb->lkb_rqmode == DLM_LOCK_IV || lkb->lkb_grmode == DLM_LOCK_IV) {
+               log_print("munge_demoted %x invalid modes gr %d rq %d",
+                         lkb->lkb_id, lkb->lkb_grmode, lkb->lkb_rqmode);
+@@ -2966,9 +2960,9 @@ static int send_convert(struct dlm_rsb *r, struct dlm_lkb *lkb)
+       /* down conversions go without a reply from the master */
+       if (!error && down_conversion(lkb)) {
+               remove_from_waiters(lkb, DLM_MSG_CONVERT_REPLY);
++              r->res_ls->ls_stub_ms.m_flags = DLM_IFL_STUB_MS;
+               r->res_ls->ls_stub_ms.m_type = DLM_MSG_CONVERT_REPLY;
+               r->res_ls->ls_stub_ms.m_result = 0;
+-              r->res_ls->ls_stub_ms.m_flags = lkb->lkb_flags;
+               __receive_convert_reply(r, lkb, &r->res_ls->ls_stub_ms);
+       }
+@@ -3156,6 +3150,9 @@ static void receive_flags(struct dlm_lkb *lkb, struct dlm_message *ms)
+ static void receive_flags_reply(struct dlm_lkb *lkb, struct dlm_message *ms)
+ {
++      if (ms->m_flags == DLM_IFL_STUB_MS)
++              return;
++
+       lkb->lkb_sbflags = ms->m_sbflags;
+       lkb->lkb_flags = (lkb->lkb_flags & 0xFFFF0000) |
+                        (ms->m_flags & 0x0000FFFF);
+@@ -3698,7 +3695,7 @@ static void __receive_convert_reply(struct dlm_rsb *r, struct dlm_lkb *lkb,
+               /* convert was queued on remote master */
+               receive_flags_reply(lkb, ms);
+               if (is_demoted(lkb))
+-                      munge_demoted(lkb, ms);
++                      munge_demoted(lkb);
+               del_lkb(r, lkb);
+               add_lkb(r, lkb, DLM_LKSTS_CONVERT);
+               add_timeout(lkb);
+@@ -3708,7 +3705,7 @@ static void __receive_convert_reply(struct dlm_rsb *r, struct dlm_lkb *lkb,
+               /* convert was granted on remote master */
+               receive_flags_reply(lkb, ms);
+               if (is_demoted(lkb))
+-                      munge_demoted(lkb, ms);
++                      munge_demoted(lkb);
+               grant_lock_pc(r, lkb, ms);
+               queue_cast(r, lkb, 0);
+               break;
+@@ -4082,15 +4079,17 @@ void dlm_receive_buffer(union dlm_packet *p, int nodeid)
+       dlm_put_lockspace(ls);
+ }
+-static void recover_convert_waiter(struct dlm_ls *ls, struct dlm_lkb *lkb)
++static void recover_convert_waiter(struct dlm_ls *ls, struct dlm_lkb *lkb,
++                                 struct dlm_message *ms_stub)
+ {
+       if (middle_conversion(lkb)) {
+               hold_lkb(lkb);
+-              ls->ls_stub_ms.m_type = DLM_MSG_CONVERT_REPLY;
+-              ls->ls_stub_ms.m_result = -EINPROGRESS;
+-              ls->ls_stub_ms.m_flags = lkb->lkb_flags;
+-              ls->ls_stub_ms.m_header.h_nodeid = lkb->lkb_nodeid;
+-              _receive_convert_reply(lkb, &ls->ls_stub_ms);
++              memset(ms_stub, 0, sizeof(struct dlm_message));
++              ms_stub->m_flags = DLM_IFL_STUB_MS;
++              ms_stub->m_type = DLM_MSG_CONVERT_REPLY;
++              ms_stub->m_result = -EINPROGRESS;
++              ms_stub->m_header.h_nodeid = lkb->lkb_nodeid;
++              _receive_convert_reply(lkb, ms_stub);
+               /* Same special case as in receive_rcom_lock_args() */
+               lkb->lkb_grmode = DLM_LOCK_IV;
+@@ -4131,13 +4130,27 @@ static int waiter_needs_recovery(struct dlm_ls *ls, struct dlm_lkb *lkb)
+ void dlm_recover_waiters_pre(struct dlm_ls *ls)
+ {
+       struct dlm_lkb *lkb, *safe;
++      struct dlm_message *ms_stub;
+       int wait_type, stub_unlock_result, stub_cancel_result;
++      ms_stub = kmalloc(GFP_KERNEL, sizeof(struct dlm_message));
++      if (!ms_stub) {
++              log_error(ls, "dlm_recover_waiters_pre no mem");
++              return;
++      }
++
+       mutex_lock(&ls->ls_waiters_mutex);
+       list_for_each_entry_safe(lkb, safe, &ls->ls_waiters, lkb_wait_reply) {
+-              log_debug(ls, "pre recover waiter lkid %x type %d flags %x",
+-                        lkb->lkb_id, lkb->lkb_wait_type, lkb->lkb_flags);
++
++              /* exclude debug messages about unlocks because there can be so
++                 many and they aren't very interesting */
++
++              if (lkb->lkb_wait_type != DLM_MSG_UNLOCK) {
++                      log_debug(ls, "recover_waiter %x nodeid %d "
++                                "msg %d to %d", lkb->lkb_id, lkb->lkb_nodeid,
++                                lkb->lkb_wait_type, lkb->lkb_wait_nodeid);
++              }
+               /* all outstanding lookups, regardless of destination  will be
+                  resent after recovery is done */
+@@ -4183,26 +4196,28 @@ void dlm_recover_waiters_pre(struct dlm_ls *ls)
+                       break;
+               case DLM_MSG_CONVERT:
+-                      recover_convert_waiter(ls, lkb);
++                      recover_convert_waiter(ls, lkb, ms_stub);
+                       break;
+               case DLM_MSG_UNLOCK:
+                       hold_lkb(lkb);
+-                      ls->ls_stub_ms.m_type = DLM_MSG_UNLOCK_REPLY;
+-                      ls->ls_stub_ms.m_result = stub_unlock_result;
+-                      ls->ls_stub_ms.m_flags = lkb->lkb_flags;
+-                      ls->ls_stub_ms.m_header.h_nodeid = lkb->lkb_nodeid;
+-                      _receive_unlock_reply(lkb, &ls->ls_stub_ms);
++                      memset(ms_stub, 0, sizeof(struct dlm_message));
++                      ms_stub->m_flags = DLM_IFL_STUB_MS;
++                      ms_stub->m_type = DLM_MSG_UNLOCK_REPLY;
++                      ms_stub->m_result = stub_unlock_result;
++                      ms_stub->m_header.h_nodeid = lkb->lkb_nodeid;
++                      _receive_unlock_reply(lkb, ms_stub);
+                       dlm_put_lkb(lkb);
+                       break;
+               case DLM_MSG_CANCEL:
+                       hold_lkb(lkb);
+-                      ls->ls_stub_ms.m_type = DLM_MSG_CANCEL_REPLY;
+-                      ls->ls_stub_ms.m_result = stub_cancel_result;
+-                      ls->ls_stub_ms.m_flags = lkb->lkb_flags;
+-                      ls->ls_stub_ms.m_header.h_nodeid = lkb->lkb_nodeid;
+-                      _receive_cancel_reply(lkb, &ls->ls_stub_ms);
++                      memset(ms_stub, 0, sizeof(struct dlm_message));
++                      ms_stub->m_flags = DLM_IFL_STUB_MS;
++                      ms_stub->m_type = DLM_MSG_CANCEL_REPLY;
++                      ms_stub->m_result = stub_cancel_result;
++                      ms_stub->m_header.h_nodeid = lkb->lkb_nodeid;
++                      _receive_cancel_reply(lkb, ms_stub);
+                       dlm_put_lkb(lkb);
+                       break;
+@@ -4213,6 +4228,7 @@ void dlm_recover_waiters_pre(struct dlm_ls *ls)
+               schedule();
+       }
+       mutex_unlock(&ls->ls_waiters_mutex);
++      kfree(ms_stub);
+ }
+ static struct dlm_lkb *find_resend_waiter(struct dlm_ls *ls)
+@@ -4277,8 +4293,8 @@ int dlm_recover_waiters_post(struct dlm_ls *ls)
+               ou = is_overlap_unlock(lkb);
+               err = 0;
+-              log_debug(ls, "recover_waiters_post %x type %d flags %x %s",
+-                        lkb->lkb_id, mstype, lkb->lkb_flags, r->res_name);
++              log_debug(ls, "recover_waiter %x nodeid %d msg %d r_nodeid %d",
++                        lkb->lkb_id, lkb->lkb_nodeid, mstype, r->res_nodeid);
+               /* At this point we assume that we won't get a reply to any
+                  previous op or overlap op on this lock.  First, do a big
+-- 
+1.7.2.5
+
diff --git a/dlm-sanitize-work_start-in-lowcomms.c.patch b/dlm-sanitize-work_start-in-lowcomms.c.patch
new file mode 100644 (file)
index 0000000..9bf0fa8
--- /dev/null
@@ -0,0 +1,51 @@
+From b9d41052794385f9d47ebb7acf4a772f3ad02398 Mon Sep 17 00:00:00 2001
+From: Namhyung Kim <namhyung@gmail.com>
+Date: Mon, 13 Dec 2010 13:42:24 -0600
+Subject: [PATCH 1/1] dlm: sanitize work_start() in lowcomms.c
+
+The create_workqueue() returns NULL if failed rather than ERR_PTR().
+Fix error checking and remove unnecessary variable 'error'.
+
+Signed-off-by: Namhyung Kim <namhyung@gmail.com>
+Cc: Tejun Heo <tj@kernel.org>
+Signed-off-by: David Teigland <teigland@redhat.com>
+---
+ fs/dlm/lowcomms.c |   15 ++++++---------
+ 1 files changed, 6 insertions(+), 9 deletions(-)
+
+diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
+index 0e75f15..9c64ae9 100644
+--- a/fs/dlm/lowcomms.c
++++ b/fs/dlm/lowcomms.c
+@@ -1468,22 +1468,19 @@ static void work_stop(void)
+ static int work_start(void)
+ {
+-      int error;
+       recv_workqueue = alloc_workqueue("dlm_recv", WQ_MEM_RECLAIM |
+                                        WQ_HIGHPRI | WQ_FREEZEABLE, 0);
+-      error = IS_ERR(recv_workqueue);
+-      if (error) {
+-              log_print("can't start dlm_recv %d", error);
+-              return error;
++      if (!recv_workqueue) {
++              log_print("can't start dlm_recv");
++              return -ENOMEM;
+       }
+       send_workqueue = alloc_workqueue("dlm_send", WQ_MEM_RECLAIM |
+                                        WQ_HIGHPRI | WQ_FREEZEABLE, 0);
+-      error = IS_ERR(send_workqueue);
+-      if (error) {
+-              log_print("can't start dlm_send %d", error);
++      if (!send_workqueue) {
++              log_print("can't start dlm_send");
+               destroy_workqueue(recv_workqueue);
+-              return error;
++              return -ENOMEM;
+       }
+       return 0;
+-- 
+1.7.2.5
+
diff --git a/dlm-use-single-thread-workqueues.patch b/dlm-use-single-thread-workqueues.patch
new file mode 100644 (file)
index 0000000..cd84522
--- /dev/null
@@ -0,0 +1,41 @@
+From 6b155c8fd4d239f7d883d455bbad1be47724bbfc Mon Sep 17 00:00:00 2001
+From: David Teigland <teigland@redhat.com>
+Date: Fri, 11 Feb 2011 16:44:31 -0600
+Subject: [PATCH 1/1] dlm: use single thread workqueues
+
+The recent commit to use cmwq for send and recv threads
+dcce240ead802d42b1e45ad2fcb2ed4a399cb255 introduced problems,
+apparently due to multiple workqueue threads.  Single threads
+make the problems go away, so return to that until we fully
+understand the concurrency issues with multiple threads.
+
+Signed-off-by: David Teigland <teigland@redhat.com>
+---
+ fs/dlm/lowcomms.c |    6 ++----
+ 1 files changed, 2 insertions(+), 4 deletions(-)
+
+diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
+index 9c64ae9..2d8c87b 100644
+--- a/fs/dlm/lowcomms.c
++++ b/fs/dlm/lowcomms.c
+@@ -1468,15 +1468,13 @@ static void work_stop(void)
+ static int work_start(void)
+ {
+-      recv_workqueue = alloc_workqueue("dlm_recv", WQ_MEM_RECLAIM |
+-                                       WQ_HIGHPRI | WQ_FREEZEABLE, 0);
++      recv_workqueue = create_singlethread_workqueue("dlm_recv");
+       if (!recv_workqueue) {
+               log_print("can't start dlm_recv");
+               return -ENOMEM;
+       }
+-      send_workqueue = alloc_workqueue("dlm_send", WQ_MEM_RECLAIM |
+-                                       WQ_HIGHPRI | WQ_FREEZEABLE, 0);
++      send_workqueue = create_singlethread_workqueue("dlm_send");
+       if (!send_workqueue) {
+               log_print("can't start dlm_send");
+               destroy_workqueue(recv_workqueue);
+-- 
+1.7.2.5
+