RELEASE=2.0
KERNEL_VER=2.6.32
-PKGREL=42
+PKGREL=43
# also include firmware of previous versrion into
# the fw package: fwlist-2.6.32-PREV-pve
KREL=6
install -m 644 e1000e.ko tmp/lib/modules/${KVNAME}/kernel/drivers/net/e1000e/
# install latest ibg driver
install -m 644 igb.ko tmp/lib/modules/${KVNAME}/kernel/drivers/net/igb/
- # install bnx2 and tg3 drivers
+ # install bnx2 drivers
#install -m 644 bnx2.ko tmp/lib/modules/${KVNAME}/kernel/drivers/net/
- #install -m 644 bnx2x.ko tmp/lib/modules/${KVNAME}/kernel/drivers/net/
#install -m 644 cnic.ko tmp/lib/modules/${KVNAME}/kernel/drivers/net/
+ #install -m 644 bnx2x.ko tmp/lib/modules/${KVNAME}/kernel/drivers/net/bnx2x/
# install areca driver
install -m 644 arcmsr.ko tmp/lib/modules/${KVNAME}/kernel/drivers/scsi/arcmsr/
# remove firmware
${KERNEL_CFG}: ${KERNEL_CFG_ORG} config-${KERNEL_VER}.diff
cp ${KERNEL_CFG_ORG} ${KERNEL_CFG}.new
- patch ${KERNEL_CFG}.new config-${KERNEL_VER}.diff
+ patch --no-backup ${KERNEL_CFG}.new config-${KERNEL_VER}.diff
mv ${KERNEL_CFG}.new ${KERNEL_CFG}
${KERNEL_SRC}/README: ${KERNEL_SRC}.org/README
cd ${KERNEL_SRC}; patch -p1 <../SCSI-aacraid-Add-PMC-Sierra-SRC-based-controller.patch
cd ${KERNEL_SRC}; patch -p1 <../fix-register-corruption-in-pvclock-scale-delta.patch
cd ${KERNEL_SRC}; patch -p1 <../bridge-patch.diff
+ # backport dlm fixes form linux 3.y (those are include in RHEL 6.2)
+ cd ${KERNEL_SRC}; patch -p1 <../dlm-Make-DLM-depend-on-CONFIGFS_FS.patch
+ cd ${KERNEL_SRC}; patch -p1 <../dlm-increase-default-hash-table-sizes.patch
+ cd ${KERNEL_SRC}; patch -p1 <../dlm-Use-cmwq-for-send-and-receive-workqueues.patch
+ cd ${KERNEL_SRC}; patch -p1 <../dlm-sanitize-work_start-in-lowcomms.c.patch
+ cd ${KERNEL_SRC}; patch -p1 <../dlm-use-single-thread-workqueues.patch
+ cd ${KERNEL_SRC}; patch -p1 <../dlm-Remove-superfluous-call-to-recalc_sigpending.patch
+ cd ${KERNEL_SRC}; patch -p1 <../dlm-delayed-reply-message-warning.patch
+ cd ${KERNEL_SRC}; patch -p1 <../dlm-remove-shared-message-stub-for-recovery.patch
+ cd ${KERNEL_SRC}; patch -p1 <../dlm-make-plock-operation-killable.patch
#cd ${KERNEL_SRC}; patch -p1 <../ovz-fix-slow-fsync.patch
sed -i ${KERNEL_SRC}/Makefile -e 's/^EXTRAVERSION.*$$/EXTRAVERSION=${EXTRAVERSION}/'
touch $@
rm -rf *~ .compile_mark ${KERNEL_CFG} ${KERNEL_SRC} tmp data proxmox-ve/data *.deb ${AOEDIR} aoe.ko ${headers_tmp} fwdata fwlist.tmp *.ko ${IXGBEDIR} ${E1000EDIR} e1000e.ko ${IGBDIR} igb.ko fwlist-${KVNAME} ${ARECADIR} arcmsr.ko
+
+
* ftp://ftp.areca.com.tw/RaidCards/AP_Drivers/Linux/DRIVER/SourceCode/arcmsr.1.20.0X.15-110330.zip
+#- include latest broadcom bnx2 drivers
+#
+# * original file linux-6.2.23.zip contains
+# netxtreme2-6.2.23.tar.gz (added to repository)
+
+
FIRMWARE:
=========
+pve-kernel-2.6.32 (2.6.32-43) unstable; urgency=low
+
+ * backport patches for dlm
+
+ -- Proxmox Support Team <support@proxmox.com> Thu, 25 Aug 2011 10:35:05 +0200
+
pve-kernel-2.6.32 (2.6.32-42) unstable; urgency=low
* update to vzkernel-2.6.32-042stab033.1.src.rpm
--- /dev/null
+From 86c747d2a4f028fe2fdf091c3a81d0e187827682 Mon Sep 17 00:00:00 2001
+From: Nicholas Bellinger <nab@linux-iscsi.org>
+Date: Sun, 16 Jan 2011 21:14:52 +0000
+Subject: [PATCH 1/1] dlm: Make DLM depend on CONFIGFS_FS
+
+This patch fixes the following kconfig error after changing
+CONFIGFS_FS -> select SYSFS:
+
+fs/sysfs/Kconfig:1:error: recursive dependency detected!
+fs/sysfs/Kconfig:1: symbol SYSFS is selected by CONFIGFS_FS
+fs/configfs/Kconfig:1: symbol CONFIGFS_FS is selected by DLM
+fs/dlm/Kconfig:1: symbol DLM depends on SYSFS
+
+Signed-off-by: Nicholas A. Bellinger <nab@linux-iscsi.org>
+Cc: Joel Becker <jlbec@evilplan.org>
+Cc: Randy Dunlap <randy.dunlap@oracle.com>
+Cc: Stephen Rothwell <sfr@canb.auug.org.au>
+Cc: James Bottomley <James.Bottomley@suse.de>
+---
+ fs/dlm/Kconfig | 3 +--
+ 1 files changed, 1 insertions(+), 2 deletions(-)
+
+diff --git a/fs/dlm/Kconfig b/fs/dlm/Kconfig
+index 2dbb422..1897eb1 100644
+--- a/fs/dlm/Kconfig
++++ b/fs/dlm/Kconfig
+@@ -1,8 +1,7 @@
+ menuconfig DLM
+ tristate "Distributed Lock Manager (DLM)"
+ depends on EXPERIMENTAL && INET
+- depends on SYSFS && (IPV6 || IPV6=n)
+- select CONFIGFS_FS
++ depends on SYSFS && CONFIGFS_FS && (IPV6 || IPV6=n)
+ select IP_SCTP
+ help
+ A general purpose distributed lock manager for kernel or userspace
+--
+1.7.2.5
+
--- /dev/null
+From 4bcad6c1ef53a9a0224f4654ceb3b9030d0769ec Mon Sep 17 00:00:00 2001
+From: Matt Fleming <matt.fleming@linux.intel.com>
+Date: Thu, 24 Mar 2011 13:56:47 +0000
+Subject: [PATCH 1/1] dlm: Remove superfluous call to recalc_sigpending()
+
+recalc_sigpending() is called within sigprocmask(), so there is no
+need call it again after sigprocmask() has returned.
+
+Signed-off-by: Matt Fleming <matt.fleming@linux.intel.com>
+Signed-off-by: David Teigland <teigland@redhat.com>
+---
+ fs/dlm/user.c | 1 -
+ 1 files changed, 0 insertions(+), 1 deletions(-)
+
+diff --git a/fs/dlm/user.c b/fs/dlm/user.c
+index d5ab3fe..e96bf3e 100644
+--- a/fs/dlm/user.c
++++ b/fs/dlm/user.c
+@@ -611,7 +611,6 @@ static ssize_t device_write(struct file *file, const char __user *buf,
+
+ out_sig:
+ sigprocmask(SIG_SETMASK, &tmpsig, NULL);
+- recalc_sigpending();
+ out_free:
+ kfree(kbuf);
+ return error;
+--
+1.7.2.5
+
--- /dev/null
+From dcce240ead802d42b1e45ad2fcb2ed4a399cb255 Mon Sep 17 00:00:00 2001
+From: Steven Whitehouse <swhiteho@redhat.com>
+Date: Fri, 12 Nov 2010 12:12:29 +0000
+Subject: [PATCH 1/1] dlm: Use cmwq for send and receive workqueues
+
+So far as I can tell, there is no reason to use a single-threaded
+send workqueue for dlm, since it may need to send to several sockets
+concurrently. Both workqueues are set to WQ_MEM_RECLAIM to avoid
+any possible deadlocks, WQ_HIGHPRI since locking traffic is highly
+latency sensitive (and to avoid a priority inversion wrt GFS2's
+glock_workqueue) and WQ_FREEZABLE just in case someone needs to do
+that (even though with current cluster infrastructure, it doesn't
+make sense as the node will most likely land up ejected from the
+cluster) in the future.
+
+Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
+Cc: Tejun Heo <tj@kernel.org>
+Signed-off-by: David Teigland <teigland@redhat.com>
+---
+ fs/dlm/lowcomms.c | 6 ++++--
+ 1 files changed, 4 insertions(+), 2 deletions(-)
+
+diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
+index 77720f8..1d4e644 100644
+--- a/fs/dlm/lowcomms.c
++++ b/fs/dlm/lowcomms.c
+@@ -1451,14 +1451,16 @@ static void work_stop(void)
+ static int work_start(void)
+ {
+ int error;
+- recv_workqueue = create_workqueue("dlm_recv");
++ recv_workqueue = alloc_workqueue("dlm_recv", WQ_MEM_RECLAIM |
++ WQ_HIGHPRI | WQ_FREEZEABLE, 0);
+ error = IS_ERR(recv_workqueue);
+ if (error) {
+ log_print("can't start dlm_recv %d", error);
+ return error;
+ }
+
+- send_workqueue = create_singlethread_workqueue("dlm_send");
++ send_workqueue = alloc_workqueue("dlm_send", WQ_MEM_RECLAIM |
++ WQ_HIGHPRI | WQ_FREEZEABLE, 0);
+ error = IS_ERR(send_workqueue);
+ if (error) {
+ log_print("can't start dlm_send %d", error);
+--
+1.7.2.5
+
--- /dev/null
+From c6ff669bac5c409f4cb74366248f51b73f7d6feb Mon Sep 17 00:00:00 2001
+From: David Teigland <teigland@redhat.com>
+Date: Mon, 28 Mar 2011 14:17:26 -0500
+Subject: [PATCH 1/1] dlm: delayed reply message warning
+
+Add an option (disabled by default) to print a warning message
+when a lock has been waiting a configurable amount of time for
+a reply message from another node. This is mainly for debugging.
+
+Signed-off-by: David Teigland <teigland@redhat.com>
+---
+ fs/dlm/config.c | 9 ++++-
+ fs/dlm/config.h | 1 +
+ fs/dlm/dlm_internal.h | 2 +
+ fs/dlm/lock.c | 100 +++++++++++++++++++++++++++++++++++++++++++++---
+ fs/dlm/lock.h | 1 +
+ fs/dlm/lockspace.c | 6 +-
+ 6 files changed, 108 insertions(+), 11 deletions(-)
+
+diff --git a/fs/dlm/config.c b/fs/dlm/config.c
+index 0d329ff..9b026ea 100644
+--- a/fs/dlm/config.c
++++ b/fs/dlm/config.c
+@@ -100,6 +100,7 @@ struct dlm_cluster {
+ unsigned int cl_log_debug;
+ unsigned int cl_protocol;
+ unsigned int cl_timewarn_cs;
++ unsigned int cl_waitwarn_us;
+ };
+
+ enum {
+@@ -114,6 +115,7 @@ enum {
+ CLUSTER_ATTR_LOG_DEBUG,
+ CLUSTER_ATTR_PROTOCOL,
+ CLUSTER_ATTR_TIMEWARN_CS,
++ CLUSTER_ATTR_WAITWARN_US,
+ };
+
+ struct cluster_attribute {
+@@ -166,6 +168,7 @@ CLUSTER_ATTR(scan_secs, 1);
+ CLUSTER_ATTR(log_debug, 0);
+ CLUSTER_ATTR(protocol, 0);
+ CLUSTER_ATTR(timewarn_cs, 1);
++CLUSTER_ATTR(waitwarn_us, 0);
+
+ static struct configfs_attribute *cluster_attrs[] = {
+ [CLUSTER_ATTR_TCP_PORT] = &cluster_attr_tcp_port.attr,
+@@ -179,6 +182,7 @@ static struct configfs_attribute *cluster_attrs[] = {
+ [CLUSTER_ATTR_LOG_DEBUG] = &cluster_attr_log_debug.attr,
+ [CLUSTER_ATTR_PROTOCOL] = &cluster_attr_protocol.attr,
+ [CLUSTER_ATTR_TIMEWARN_CS] = &cluster_attr_timewarn_cs.attr,
++ [CLUSTER_ATTR_WAITWARN_US] = &cluster_attr_waitwarn_us.attr,
+ NULL,
+ };
+
+@@ -439,6 +443,7 @@ static struct config_group *make_cluster(struct config_group *g,
+ cl->cl_log_debug = dlm_config.ci_log_debug;
+ cl->cl_protocol = dlm_config.ci_protocol;
+ cl->cl_timewarn_cs = dlm_config.ci_timewarn_cs;
++ cl->cl_waitwarn_us = dlm_config.ci_waitwarn_us;
+
+ space_list = &sps->ss_group;
+ comm_list = &cms->cs_group;
+@@ -986,6 +991,7 @@ int dlm_our_addr(struct sockaddr_storage *addr, int num)
+ #define DEFAULT_LOG_DEBUG 0
+ #define DEFAULT_PROTOCOL 0
+ #define DEFAULT_TIMEWARN_CS 500 /* 5 sec = 500 centiseconds */
++#define DEFAULT_WAITWARN_US 0
+
+ struct dlm_config_info dlm_config = {
+ .ci_tcp_port = DEFAULT_TCP_PORT,
+@@ -998,6 +1004,7 @@ struct dlm_config_info dlm_config = {
+ .ci_scan_secs = DEFAULT_SCAN_SECS,
+ .ci_log_debug = DEFAULT_LOG_DEBUG,
+ .ci_protocol = DEFAULT_PROTOCOL,
+- .ci_timewarn_cs = DEFAULT_TIMEWARN_CS
++ .ci_timewarn_cs = DEFAULT_TIMEWARN_CS,
++ .ci_waitwarn_us = DEFAULT_WAITWARN_US
+ };
+
+diff --git a/fs/dlm/config.h b/fs/dlm/config.h
+index 4f1d6fc..dd0ce24 100644
+--- a/fs/dlm/config.h
++++ b/fs/dlm/config.h
+@@ -28,6 +28,7 @@ struct dlm_config_info {
+ int ci_log_debug;
+ int ci_protocol;
+ int ci_timewarn_cs;
++ int ci_waitwarn_us;
+ };
+
+ extern struct dlm_config_info dlm_config;
+diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h
+index b942049..6a92478 100644
+--- a/fs/dlm/dlm_internal.h
++++ b/fs/dlm/dlm_internal.h
+@@ -245,6 +245,7 @@ struct dlm_lkb {
+
+ int8_t lkb_wait_type; /* type of reply waiting for */
+ int8_t lkb_wait_count;
++ int lkb_wait_nodeid; /* for debugging */
+
+ struct list_head lkb_idtbl_list; /* lockspace lkbtbl */
+ struct list_head lkb_statequeue; /* rsb g/c/w list */
+@@ -254,6 +255,7 @@ struct dlm_lkb {
+ struct list_head lkb_ownqueue; /* list of locks for a process */
+ struct list_head lkb_time_list;
+ ktime_t lkb_timestamp;
++ ktime_t lkb_wait_time;
+ unsigned long lkb_timeout_cs;
+
+ struct dlm_callback lkb_callbacks[DLM_CALLBACKS_SIZE];
+diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c
+index 04b8c44..e3c8641 100644
+--- a/fs/dlm/lock.c
++++ b/fs/dlm/lock.c
+@@ -799,10 +799,84 @@ static int msg_reply_type(int mstype)
+ return -1;
+ }
+
++static int nodeid_warned(int nodeid, int num_nodes, int *warned)
++{
++ int i;
++
++ for (i = 0; i < num_nodes; i++) {
++ if (!warned[i]) {
++ warned[i] = nodeid;
++ return 0;
++ }
++ if (warned[i] == nodeid)
++ return 1;
++ }
++ return 0;
++}
++
++void dlm_scan_waiters(struct dlm_ls *ls)
++{
++ struct dlm_lkb *lkb;
++ ktime_t zero = ktime_set(0, 0);
++ s64 us;
++ s64 debug_maxus = 0;
++ u32 debug_scanned = 0;
++ u32 debug_expired = 0;
++ int num_nodes = 0;
++ int *warned = NULL;
++
++ if (!dlm_config.ci_waitwarn_us)
++ return;
++
++ mutex_lock(&ls->ls_waiters_mutex);
++
++ list_for_each_entry(lkb, &ls->ls_waiters, lkb_wait_reply) {
++ if (ktime_equal(lkb->lkb_wait_time, zero))
++ continue;
++
++ debug_scanned++;
++
++ us = ktime_to_us(ktime_sub(ktime_get(), lkb->lkb_wait_time));
++
++ if (us < dlm_config.ci_waitwarn_us)
++ continue;
++
++ lkb->lkb_wait_time = zero;
++
++ debug_expired++;
++ if (us > debug_maxus)
++ debug_maxus = us;
++
++ if (!num_nodes) {
++ num_nodes = ls->ls_num_nodes;
++ warned = kmalloc(GFP_KERNEL, num_nodes * sizeof(int));
++ if (warned)
++ memset(warned, 0, num_nodes * sizeof(int));
++ }
++ if (!warned)
++ continue;
++ if (nodeid_warned(lkb->lkb_wait_nodeid, num_nodes, warned))
++ continue;
++
++ log_error(ls, "waitwarn %x %lld %d us check connection to "
++ "node %d", lkb->lkb_id, (long long)us,
++ dlm_config.ci_waitwarn_us, lkb->lkb_wait_nodeid);
++ }
++ mutex_unlock(&ls->ls_waiters_mutex);
++
++ if (warned)
++ kfree(warned);
++
++ if (debug_expired)
++ log_debug(ls, "scan_waiters %u warn %u over %d us max %lld us",
++ debug_scanned, debug_expired,
++ dlm_config.ci_waitwarn_us, (long long)debug_maxus);
++}
++
+ /* add/remove lkb from global waiters list of lkb's waiting for
+ a reply from a remote node */
+
+-static int add_to_waiters(struct dlm_lkb *lkb, int mstype)
++static int add_to_waiters(struct dlm_lkb *lkb, int mstype, int to_nodeid)
+ {
+ struct dlm_ls *ls = lkb->lkb_resource->res_ls;
+ int error = 0;
+@@ -842,6 +916,8 @@ static int add_to_waiters(struct dlm_lkb *lkb, int mstype)
+
+ lkb->lkb_wait_count++;
+ lkb->lkb_wait_type = mstype;
++ lkb->lkb_wait_time = ktime_get();
++ lkb->lkb_wait_nodeid = to_nodeid; /* for debugging */
+ hold_lkb(lkb);
+ list_add(&lkb->lkb_wait_reply, &ls->ls_waiters);
+ out:
+@@ -1157,6 +1233,16 @@ void dlm_adjust_timeouts(struct dlm_ls *ls)
+ list_for_each_entry(lkb, &ls->ls_timeout, lkb_time_list)
+ lkb->lkb_timestamp = ktime_add_us(lkb->lkb_timestamp, adj_us);
+ mutex_unlock(&ls->ls_timeout_mutex);
++
++ if (!dlm_config.ci_waitwarn_us)
++ return;
++
++ mutex_lock(&ls->ls_waiters_mutex);
++ list_for_each_entry(lkb, &ls->ls_waiters, lkb_wait_reply) {
++ if (ktime_to_us(lkb->lkb_wait_time))
++ lkb->lkb_wait_time = ktime_get();
++ }
++ mutex_unlock(&ls->ls_waiters_mutex);
+ }
+
+ /* lkb is master or local copy */
+@@ -2844,12 +2930,12 @@ static int send_common(struct dlm_rsb *r, struct dlm_lkb *lkb, int mstype)
+ struct dlm_mhandle *mh;
+ int to_nodeid, error;
+
+- error = add_to_waiters(lkb, mstype);
++ to_nodeid = r->res_nodeid;
++
++ error = add_to_waiters(lkb, mstype, to_nodeid);
+ if (error)
+ return error;
+
+- to_nodeid = r->res_nodeid;
+-
+ error = create_message(r, lkb, to_nodeid, mstype, &ms, &mh);
+ if (error)
+ goto fail;
+@@ -2951,12 +3037,12 @@ static int send_lookup(struct dlm_rsb *r, struct dlm_lkb *lkb)
+ struct dlm_mhandle *mh;
+ int to_nodeid, error;
+
+- error = add_to_waiters(lkb, DLM_MSG_LOOKUP);
++ to_nodeid = dlm_dir_nodeid(r);
++
++ error = add_to_waiters(lkb, DLM_MSG_LOOKUP, to_nodeid);
+ if (error)
+ return error;
+
+- to_nodeid = dlm_dir_nodeid(r);
+-
+ error = create_message(r, NULL, to_nodeid, DLM_MSG_LOOKUP, &ms, &mh);
+ if (error)
+ goto fail;
+diff --git a/fs/dlm/lock.h b/fs/dlm/lock.h
+index 88e93c8..265017a 100644
+--- a/fs/dlm/lock.h
++++ b/fs/dlm/lock.h
+@@ -24,6 +24,7 @@ int dlm_put_lkb(struct dlm_lkb *lkb);
+ void dlm_scan_rsbs(struct dlm_ls *ls);
+ int dlm_lock_recovery_try(struct dlm_ls *ls);
+ void dlm_unlock_recovery(struct dlm_ls *ls);
++void dlm_scan_waiters(struct dlm_ls *ls);
+ void dlm_scan_timeout(struct dlm_ls *ls);
+ void dlm_adjust_timeouts(struct dlm_ls *ls);
+
+diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c
+index f994a7d..14cbf40 100644
+--- a/fs/dlm/lockspace.c
++++ b/fs/dlm/lockspace.c
+@@ -243,7 +243,6 @@ static struct dlm_ls *find_ls_to_scan(void)
+ static int dlm_scand(void *data)
+ {
+ struct dlm_ls *ls;
+- int timeout_jiffies = dlm_config.ci_scan_secs * HZ;
+
+ while (!kthread_should_stop()) {
+ ls = find_ls_to_scan();
+@@ -252,13 +251,14 @@ static int dlm_scand(void *data)
+ ls->ls_scan_time = jiffies;
+ dlm_scan_rsbs(ls);
+ dlm_scan_timeout(ls);
++ dlm_scan_waiters(ls);
+ dlm_unlock_recovery(ls);
+ } else {
+ ls->ls_scan_time += HZ;
+ }
+- } else {
+- schedule_timeout_interruptible(timeout_jiffies);
++ continue;
+ }
++ schedule_timeout_interruptible(dlm_config.ci_scan_secs * HZ);
+ }
+ return 0;
+ }
+--
+1.7.2.5
+
--- /dev/null
+From e3853a90e218bcb2e48d3f403d0962bf54444f5f Mon Sep 17 00:00:00 2001
+From: David Teigland <teigland@redhat.com>
+Date: Thu, 10 Mar 2011 13:07:17 -0600
+Subject: [PATCH 1/1] dlm: increase default hash table sizes
+
+Make all three hash tables a consistent size of 1024
+rather than 1024, 512, 256. All three tables, for
+resources, locks, and lock dir entries, will generally
+be filled to the same order of magnitude.
+
+Signed-off-by: David Teigland <teigland@redhat.com>
+---
+ fs/dlm/config.c | 4 ++--
+ 1 files changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/fs/dlm/config.c b/fs/dlm/config.c
+index b54bca0..0d329ff 100644
+--- a/fs/dlm/config.c
++++ b/fs/dlm/config.c
+@@ -977,9 +977,9 @@ int dlm_our_addr(struct sockaddr_storage *addr, int num)
+ /* Config file defaults */
+ #define DEFAULT_TCP_PORT 21064
+ #define DEFAULT_BUFFER_SIZE 4096
+-#define DEFAULT_RSBTBL_SIZE 256
++#define DEFAULT_RSBTBL_SIZE 1024
+ #define DEFAULT_LKBTBL_SIZE 1024
+-#define DEFAULT_DIRTBL_SIZE 512
++#define DEFAULT_DIRTBL_SIZE 1024
+ #define DEFAULT_RECOVER_TIMER 5
+ #define DEFAULT_TOSS_SECS 10
+ #define DEFAULT_SCAN_SECS 5
+--
+1.7.2.5
+
--- /dev/null
+From 901025d2f3194b4868980c8ba80df4cc0aa1282c Mon Sep 17 00:00:00 2001
+From: David Teigland <teigland@redhat.com>
+Date: Wed, 2 Mar 2011 14:20:04 -0600
+Subject: [PATCH 1/1] dlm: make plock operation killable
+
+Allow processes blocked on plock requests to be interrupted
+when they are killed. This leaves the problem of cleaning
+up the lock state in userspace. This has three parts:
+
+1. Add a flag to unlock operations sent to userspace
+indicating the file is being closed. Userspace will
+then look for and clear any waiting plock operations that
+were abandoned by an interrupted process.
+
+2. Queue an unlock-close operation (like in 1) to clean up
+userspace from an interrupted plock request. This is needed
+because the vfs will not send a cleanup-unlock if it sees no
+locks on the file, which it won't if the interrupted operation
+was the only one.
+
+3. Do not use replies from userspace for unlock-close operations
+because they are unnecessary (they are just cleaning up for the
+process which did not make an unlock call). This also simplifies
+the new unlock-close generated from point 2.
+
+Signed-off-by: David Teigland <teigland@redhat.com>
+---
+ fs/dlm/plock.c | 65 ++++++++++++++++++++++++++++++++++++++++++---
+ include/linux/dlm_plock.h | 6 +++-
+ 2 files changed, 65 insertions(+), 6 deletions(-)
+
+diff --git a/fs/dlm/plock.c b/fs/dlm/plock.c
+index 30d8b85..e2b8780 100644
+--- a/fs/dlm/plock.c
++++ b/fs/dlm/plock.c
+@@ -71,6 +71,36 @@ static void send_op(struct plock_op *op)
+ wake_up(&send_wq);
+ }
+
++/* If a process was killed while waiting for the only plock on a file,
++ locks_remove_posix will not see any lock on the file so it won't
++ send an unlock-close to us to pass on to userspace to clean up the
++ abandoned waiter. So, we have to insert the unlock-close when the
++ lock call is interrupted. */
++
++static void do_unlock_close(struct dlm_ls *ls, u64 number,
++ struct file *file, struct file_lock *fl)
++{
++ struct plock_op *op;
++
++ op = kzalloc(sizeof(*op), GFP_NOFS);
++ if (!op)
++ return;
++
++ op->info.optype = DLM_PLOCK_OP_UNLOCK;
++ op->info.pid = fl->fl_pid;
++ op->info.fsid = ls->ls_global_id;
++ op->info.number = number;
++ op->info.start = 0;
++ op->info.end = OFFSET_MAX;
++ if (fl->fl_lmops && fl->fl_lmops->fl_grant)
++ op->info.owner = (__u64) fl->fl_pid;
++ else
++ op->info.owner = (__u64)(long) fl->fl_owner;
++
++ op->info.flags |= DLM_PLOCK_FL_CLOSE;
++ send_op(op);
++}
++
+ int dlm_posix_lock(dlm_lockspace_t *lockspace, u64 number, struct file *file,
+ int cmd, struct file_lock *fl)
+ {
+@@ -114,9 +144,19 @@ int dlm_posix_lock(dlm_lockspace_t *lockspace, u64 number, struct file *file,
+
+ send_op(op);
+
+- if (xop->callback == NULL)
+- wait_event(recv_wq, (op->done != 0));
+- else {
++ if (xop->callback == NULL) {
++ rv = wait_event_killable(recv_wq, (op->done != 0));
++ if (rv == -ERESTARTSYS) {
++ log_debug(ls, "dlm_posix_lock: wait killed %llx",
++ (unsigned long long)number);
++ spin_lock(&ops_lock);
++ list_del(&op->list);
++ spin_unlock(&ops_lock);
++ kfree(xop);
++ do_unlock_close(ls, number, file, fl);
++ goto out;
++ }
++ } else {
+ rv = FILE_LOCK_DEFERRED;
+ goto out;
+ }
+@@ -233,6 +273,13 @@ int dlm_posix_unlock(dlm_lockspace_t *lockspace, u64 number, struct file *file,
+ else
+ op->info.owner = (__u64)(long) fl->fl_owner;
+
++ if (fl->fl_flags & FL_CLOSE) {
++ op->info.flags |= DLM_PLOCK_FL_CLOSE;
++ send_op(op);
++ rv = 0;
++ goto out;
++ }
++
+ send_op(op);
+ wait_event(recv_wq, (op->done != 0));
+
+@@ -334,7 +381,10 @@ static ssize_t dev_read(struct file *file, char __user *u, size_t count,
+ spin_lock(&ops_lock);
+ if (!list_empty(&send_list)) {
+ op = list_entry(send_list.next, struct plock_op, list);
+- list_move(&op->list, &recv_list);
++ if (op->info.flags & DLM_PLOCK_FL_CLOSE)
++ list_del(&op->list);
++ else
++ list_move(&op->list, &recv_list);
+ memcpy(&info, &op->info, sizeof(info));
+ }
+ spin_unlock(&ops_lock);
+@@ -342,6 +392,13 @@ static ssize_t dev_read(struct file *file, char __user *u, size_t count,
+ if (!op)
+ return -EAGAIN;
+
++ /* there is no need to get a reply from userspace for unlocks
++ that were generated by the vfs cleaning up for a close
++ (the process did not make an unlock call). */
++
++ if (op->info.flags & DLM_PLOCK_FL_CLOSE)
++ kfree(op);
++
+ if (copy_to_user(u, &info, sizeof(info)))
+ return -EFAULT;
+ return sizeof(info);
+diff --git a/include/linux/dlm_plock.h b/include/linux/dlm_plock.h
+index 2dd2124..3b1cc1b 100644
+--- a/include/linux/dlm_plock.h
++++ b/include/linux/dlm_plock.h
+@@ -14,7 +14,7 @@
+ #define DLM_PLOCK_MISC_NAME "dlm_plock"
+
+ #define DLM_PLOCK_VERSION_MAJOR 1
+-#define DLM_PLOCK_VERSION_MINOR 1
++#define DLM_PLOCK_VERSION_MINOR 2
+ #define DLM_PLOCK_VERSION_PATCH 0
+
+ enum {
+@@ -23,12 +23,14 @@ enum {
+ DLM_PLOCK_OP_GET,
+ };
+
++#define DLM_PLOCK_FL_CLOSE 1
++
+ struct dlm_plock_info {
+ __u32 version[3];
+ __u8 optype;
+ __u8 ex;
+ __u8 wait;
+- __u8 pad;
++ __u8 flags;
+ __u32 pid;
+ __s32 nodeid;
+ __s32 rv;
+--
+1.7.2.5
+
--- /dev/null
+From 2a7ce0edd661b3144c7b916ecf1eba0967b6d4a5 Mon Sep 17 00:00:00 2001
+From: David Teigland <teigland@redhat.com>
+Date: Mon, 4 Apr 2011 15:19:59 -0500
+Subject: [PATCH 1/1] dlm: remove shared message stub for recovery
+
+kmalloc a stub message struct during recovery instead of sharing the
+struct in the lockspace. This leaves the lockspace stub_ms only for
+faking downconvert replies, where it is never modified and sharing
+is not a problem.
+
+Also improve the debug messages in the same recovery function.
+
+Signed-off-by: David Teigland <teigland@redhat.com>
+---
+ fs/dlm/dlm_internal.h | 1 +
+ fs/dlm/lock.c | 82 +++++++++++++++++++++++++++++-------------------
+ 2 files changed, 50 insertions(+), 33 deletions(-)
+
+diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h
+index 6a92478..0262451 100644
+--- a/fs/dlm/dlm_internal.h
++++ b/fs/dlm/dlm_internal.h
+@@ -209,6 +209,7 @@ struct dlm_args {
+ #define DLM_IFL_WATCH_TIMEWARN 0x00400000
+ #define DLM_IFL_TIMEOUT_CANCEL 0x00800000
+ #define DLM_IFL_DEADLOCK_CANCEL 0x01000000
++#define DLM_IFL_STUB_MS 0x02000000 /* magic number for m_flags */
+ #define DLM_IFL_USER 0x00000001
+ #define DLM_IFL_ORPHAN 0x00000002
+
+diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c
+index e3c8641..8122779 100644
+--- a/fs/dlm/lock.c
++++ b/fs/dlm/lock.c
+@@ -1037,10 +1037,10 @@ static int remove_from_waiters_ms(struct dlm_lkb *lkb, struct dlm_message *ms)
+ struct dlm_ls *ls = lkb->lkb_resource->res_ls;
+ int error;
+
+- if (ms != &ls->ls_stub_ms)
++ if (ms->m_flags != DLM_IFL_STUB_MS)
+ mutex_lock(&ls->ls_waiters_mutex);
+ error = _remove_from_waiters(lkb, ms->m_type, ms);
+- if (ms != &ls->ls_stub_ms)
++ if (ms->m_flags != DLM_IFL_STUB_MS)
+ mutex_unlock(&ls->ls_waiters_mutex);
+ return error;
+ }
+@@ -1462,14 +1462,8 @@ static void grant_lock_pending(struct dlm_rsb *r, struct dlm_lkb *lkb)
+ ALTPR/ALTCW: our rqmode may have been changed to PR or CW to become
+ compatible with other granted locks */
+
+-static void munge_demoted(struct dlm_lkb *lkb, struct dlm_message *ms)
++static void munge_demoted(struct dlm_lkb *lkb)
+ {
+- if (ms->m_type != DLM_MSG_CONVERT_REPLY) {
+- log_print("munge_demoted %x invalid reply type %d",
+- lkb->lkb_id, ms->m_type);
+- return;
+- }
+-
+ if (lkb->lkb_rqmode == DLM_LOCK_IV || lkb->lkb_grmode == DLM_LOCK_IV) {
+ log_print("munge_demoted %x invalid modes gr %d rq %d",
+ lkb->lkb_id, lkb->lkb_grmode, lkb->lkb_rqmode);
+@@ -2966,9 +2960,9 @@ static int send_convert(struct dlm_rsb *r, struct dlm_lkb *lkb)
+ /* down conversions go without a reply from the master */
+ if (!error && down_conversion(lkb)) {
+ remove_from_waiters(lkb, DLM_MSG_CONVERT_REPLY);
++ r->res_ls->ls_stub_ms.m_flags = DLM_IFL_STUB_MS;
+ r->res_ls->ls_stub_ms.m_type = DLM_MSG_CONVERT_REPLY;
+ r->res_ls->ls_stub_ms.m_result = 0;
+- r->res_ls->ls_stub_ms.m_flags = lkb->lkb_flags;
+ __receive_convert_reply(r, lkb, &r->res_ls->ls_stub_ms);
+ }
+
+@@ -3156,6 +3150,9 @@ static void receive_flags(struct dlm_lkb *lkb, struct dlm_message *ms)
+
+ static void receive_flags_reply(struct dlm_lkb *lkb, struct dlm_message *ms)
+ {
++ if (ms->m_flags == DLM_IFL_STUB_MS)
++ return;
++
+ lkb->lkb_sbflags = ms->m_sbflags;
+ lkb->lkb_flags = (lkb->lkb_flags & 0xFFFF0000) |
+ (ms->m_flags & 0x0000FFFF);
+@@ -3698,7 +3695,7 @@ static void __receive_convert_reply(struct dlm_rsb *r, struct dlm_lkb *lkb,
+ /* convert was queued on remote master */
+ receive_flags_reply(lkb, ms);
+ if (is_demoted(lkb))
+- munge_demoted(lkb, ms);
++ munge_demoted(lkb);
+ del_lkb(r, lkb);
+ add_lkb(r, lkb, DLM_LKSTS_CONVERT);
+ add_timeout(lkb);
+@@ -3708,7 +3705,7 @@ static void __receive_convert_reply(struct dlm_rsb *r, struct dlm_lkb *lkb,
+ /* convert was granted on remote master */
+ receive_flags_reply(lkb, ms);
+ if (is_demoted(lkb))
+- munge_demoted(lkb, ms);
++ munge_demoted(lkb);
+ grant_lock_pc(r, lkb, ms);
+ queue_cast(r, lkb, 0);
+ break;
+@@ -4082,15 +4079,17 @@ void dlm_receive_buffer(union dlm_packet *p, int nodeid)
+ dlm_put_lockspace(ls);
+ }
+
+-static void recover_convert_waiter(struct dlm_ls *ls, struct dlm_lkb *lkb)
++static void recover_convert_waiter(struct dlm_ls *ls, struct dlm_lkb *lkb,
++ struct dlm_message *ms_stub)
+ {
+ if (middle_conversion(lkb)) {
+ hold_lkb(lkb);
+- ls->ls_stub_ms.m_type = DLM_MSG_CONVERT_REPLY;
+- ls->ls_stub_ms.m_result = -EINPROGRESS;
+- ls->ls_stub_ms.m_flags = lkb->lkb_flags;
+- ls->ls_stub_ms.m_header.h_nodeid = lkb->lkb_nodeid;
+- _receive_convert_reply(lkb, &ls->ls_stub_ms);
++ memset(ms_stub, 0, sizeof(struct dlm_message));
++ ms_stub->m_flags = DLM_IFL_STUB_MS;
++ ms_stub->m_type = DLM_MSG_CONVERT_REPLY;
++ ms_stub->m_result = -EINPROGRESS;
++ ms_stub->m_header.h_nodeid = lkb->lkb_nodeid;
++ _receive_convert_reply(lkb, ms_stub);
+
+ /* Same special case as in receive_rcom_lock_args() */
+ lkb->lkb_grmode = DLM_LOCK_IV;
+@@ -4131,13 +4130,27 @@ static int waiter_needs_recovery(struct dlm_ls *ls, struct dlm_lkb *lkb)
+ void dlm_recover_waiters_pre(struct dlm_ls *ls)
+ {
+ struct dlm_lkb *lkb, *safe;
++ struct dlm_message *ms_stub;
+ int wait_type, stub_unlock_result, stub_cancel_result;
+
++ ms_stub = kmalloc(GFP_KERNEL, sizeof(struct dlm_message));
++ if (!ms_stub) {
++ log_error(ls, "dlm_recover_waiters_pre no mem");
++ return;
++ }
++
+ mutex_lock(&ls->ls_waiters_mutex);
+
+ list_for_each_entry_safe(lkb, safe, &ls->ls_waiters, lkb_wait_reply) {
+- log_debug(ls, "pre recover waiter lkid %x type %d flags %x",
+- lkb->lkb_id, lkb->lkb_wait_type, lkb->lkb_flags);
++
++ /* exclude debug messages about unlocks because there can be so
++ many and they aren't very interesting */
++
++ if (lkb->lkb_wait_type != DLM_MSG_UNLOCK) {
++ log_debug(ls, "recover_waiter %x nodeid %d "
++ "msg %d to %d", lkb->lkb_id, lkb->lkb_nodeid,
++ lkb->lkb_wait_type, lkb->lkb_wait_nodeid);
++ }
+
+ /* all outstanding lookups, regardless of destination will be
+ resent after recovery is done */
+@@ -4183,26 +4196,28 @@ void dlm_recover_waiters_pre(struct dlm_ls *ls)
+ break;
+
+ case DLM_MSG_CONVERT:
+- recover_convert_waiter(ls, lkb);
++ recover_convert_waiter(ls, lkb, ms_stub);
+ break;
+
+ case DLM_MSG_UNLOCK:
+ hold_lkb(lkb);
+- ls->ls_stub_ms.m_type = DLM_MSG_UNLOCK_REPLY;
+- ls->ls_stub_ms.m_result = stub_unlock_result;
+- ls->ls_stub_ms.m_flags = lkb->lkb_flags;
+- ls->ls_stub_ms.m_header.h_nodeid = lkb->lkb_nodeid;
+- _receive_unlock_reply(lkb, &ls->ls_stub_ms);
++ memset(ms_stub, 0, sizeof(struct dlm_message));
++ ms_stub->m_flags = DLM_IFL_STUB_MS;
++ ms_stub->m_type = DLM_MSG_UNLOCK_REPLY;
++ ms_stub->m_result = stub_unlock_result;
++ ms_stub->m_header.h_nodeid = lkb->lkb_nodeid;
++ _receive_unlock_reply(lkb, ms_stub);
+ dlm_put_lkb(lkb);
+ break;
+
+ case DLM_MSG_CANCEL:
+ hold_lkb(lkb);
+- ls->ls_stub_ms.m_type = DLM_MSG_CANCEL_REPLY;
+- ls->ls_stub_ms.m_result = stub_cancel_result;
+- ls->ls_stub_ms.m_flags = lkb->lkb_flags;
+- ls->ls_stub_ms.m_header.h_nodeid = lkb->lkb_nodeid;
+- _receive_cancel_reply(lkb, &ls->ls_stub_ms);
++ memset(ms_stub, 0, sizeof(struct dlm_message));
++ ms_stub->m_flags = DLM_IFL_STUB_MS;
++ ms_stub->m_type = DLM_MSG_CANCEL_REPLY;
++ ms_stub->m_result = stub_cancel_result;
++ ms_stub->m_header.h_nodeid = lkb->lkb_nodeid;
++ _receive_cancel_reply(lkb, ms_stub);
+ dlm_put_lkb(lkb);
+ break;
+
+@@ -4213,6 +4228,7 @@ void dlm_recover_waiters_pre(struct dlm_ls *ls)
+ schedule();
+ }
+ mutex_unlock(&ls->ls_waiters_mutex);
++ kfree(ms_stub);
+ }
+
+ static struct dlm_lkb *find_resend_waiter(struct dlm_ls *ls)
+@@ -4277,8 +4293,8 @@ int dlm_recover_waiters_post(struct dlm_ls *ls)
+ ou = is_overlap_unlock(lkb);
+ err = 0;
+
+- log_debug(ls, "recover_waiters_post %x type %d flags %x %s",
+- lkb->lkb_id, mstype, lkb->lkb_flags, r->res_name);
++ log_debug(ls, "recover_waiter %x nodeid %d msg %d r_nodeid %d",
++ lkb->lkb_id, lkb->lkb_nodeid, mstype, r->res_nodeid);
+
+ /* At this point we assume that we won't get a reply to any
+ previous op or overlap op on this lock. First, do a big
+--
+1.7.2.5
+
--- /dev/null
+From b9d41052794385f9d47ebb7acf4a772f3ad02398 Mon Sep 17 00:00:00 2001
+From: Namhyung Kim <namhyung@gmail.com>
+Date: Mon, 13 Dec 2010 13:42:24 -0600
+Subject: [PATCH 1/1] dlm: sanitize work_start() in lowcomms.c
+
+The create_workqueue() returns NULL if failed rather than ERR_PTR().
+Fix error checking and remove unnecessary variable 'error'.
+
+Signed-off-by: Namhyung Kim <namhyung@gmail.com>
+Cc: Tejun Heo <tj@kernel.org>
+Signed-off-by: David Teigland <teigland@redhat.com>
+---
+ fs/dlm/lowcomms.c | 15 ++++++---------
+ 1 files changed, 6 insertions(+), 9 deletions(-)
+
+diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
+index 0e75f15..9c64ae9 100644
+--- a/fs/dlm/lowcomms.c
++++ b/fs/dlm/lowcomms.c
+@@ -1468,22 +1468,19 @@ static void work_stop(void)
+
+ static int work_start(void)
+ {
+- int error;
+ recv_workqueue = alloc_workqueue("dlm_recv", WQ_MEM_RECLAIM |
+ WQ_HIGHPRI | WQ_FREEZEABLE, 0);
+- error = IS_ERR(recv_workqueue);
+- if (error) {
+- log_print("can't start dlm_recv %d", error);
+- return error;
++ if (!recv_workqueue) {
++ log_print("can't start dlm_recv");
++ return -ENOMEM;
+ }
+
+ send_workqueue = alloc_workqueue("dlm_send", WQ_MEM_RECLAIM |
+ WQ_HIGHPRI | WQ_FREEZEABLE, 0);
+- error = IS_ERR(send_workqueue);
+- if (error) {
+- log_print("can't start dlm_send %d", error);
++ if (!send_workqueue) {
++ log_print("can't start dlm_send");
+ destroy_workqueue(recv_workqueue);
+- return error;
++ return -ENOMEM;
+ }
+
+ return 0;
+--
+1.7.2.5
+
--- /dev/null
+From 6b155c8fd4d239f7d883d455bbad1be47724bbfc Mon Sep 17 00:00:00 2001
+From: David Teigland <teigland@redhat.com>
+Date: Fri, 11 Feb 2011 16:44:31 -0600
+Subject: [PATCH 1/1] dlm: use single thread workqueues
+
+The recent commit to use cmwq for send and recv threads
+dcce240ead802d42b1e45ad2fcb2ed4a399cb255 introduced problems,
+apparently due to multiple workqueue threads. Single threads
+make the problems go away, so return to that until we fully
+understand the concurrency issues with multiple threads.
+
+Signed-off-by: David Teigland <teigland@redhat.com>
+---
+ fs/dlm/lowcomms.c | 6 ++----
+ 1 files changed, 2 insertions(+), 4 deletions(-)
+
+diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
+index 9c64ae9..2d8c87b 100644
+--- a/fs/dlm/lowcomms.c
++++ b/fs/dlm/lowcomms.c
+@@ -1468,15 +1468,13 @@ static void work_stop(void)
+
+ static int work_start(void)
+ {
+- recv_workqueue = alloc_workqueue("dlm_recv", WQ_MEM_RECLAIM |
+- WQ_HIGHPRI | WQ_FREEZEABLE, 0);
++ recv_workqueue = create_singlethread_workqueue("dlm_recv");
+ if (!recv_workqueue) {
+ log_print("can't start dlm_recv");
+ return -ENOMEM;
+ }
+
+- send_workqueue = alloc_workqueue("dlm_send", WQ_MEM_RECLAIM |
+- WQ_HIGHPRI | WQ_FREEZEABLE, 0);
++ send_workqueue = create_singlethread_workqueue("dlm_send");
+ if (!send_workqueue) {
+ log_print("can't start dlm_send");
+ destroy_workqueue(recv_workqueue);
+--
+1.7.2.5
+