]> git.proxmox.com Git - pve-kernel-2.6.32.git/blob - dlm-delayed-reply-message-warning.patch
backport dlm fixes form linux 3.y
[pve-kernel-2.6.32.git] / dlm-delayed-reply-message-warning.patch
1 From c6ff669bac5c409f4cb74366248f51b73f7d6feb Mon Sep 17 00:00:00 2001
2 From: David Teigland <teigland@redhat.com>
3 Date: Mon, 28 Mar 2011 14:17:26 -0500
4 Subject: [PATCH 1/1] dlm: delayed reply message warning
5
6 Add an option (disabled by default) to print a warning message
7 when a lock has been waiting a configurable amount of time for
8 a reply message from another node. This is mainly for debugging.
9
10 Signed-off-by: David Teigland <teigland@redhat.com>
11 ---
12 fs/dlm/config.c | 9 ++++-
13 fs/dlm/config.h | 1 +
14 fs/dlm/dlm_internal.h | 2 +
15 fs/dlm/lock.c | 100 +++++++++++++++++++++++++++++++++++++++++++++---
16 fs/dlm/lock.h | 1 +
17 fs/dlm/lockspace.c | 6 +-
18 6 files changed, 108 insertions(+), 11 deletions(-)
19
20 diff --git a/fs/dlm/config.c b/fs/dlm/config.c
21 index 0d329ff..9b026ea 100644
22 --- a/fs/dlm/config.c
23 +++ b/fs/dlm/config.c
24 @@ -100,6 +100,7 @@ struct dlm_cluster {
25 unsigned int cl_log_debug;
26 unsigned int cl_protocol;
27 unsigned int cl_timewarn_cs;
28 + unsigned int cl_waitwarn_us;
29 };
30
31 enum {
32 @@ -114,6 +115,7 @@ enum {
33 CLUSTER_ATTR_LOG_DEBUG,
34 CLUSTER_ATTR_PROTOCOL,
35 CLUSTER_ATTR_TIMEWARN_CS,
36 + CLUSTER_ATTR_WAITWARN_US,
37 };
38
39 struct cluster_attribute {
40 @@ -166,6 +168,7 @@ CLUSTER_ATTR(scan_secs, 1);
41 CLUSTER_ATTR(log_debug, 0);
42 CLUSTER_ATTR(protocol, 0);
43 CLUSTER_ATTR(timewarn_cs, 1);
44 +CLUSTER_ATTR(waitwarn_us, 0);
45
46 static struct configfs_attribute *cluster_attrs[] = {
47 [CLUSTER_ATTR_TCP_PORT] = &cluster_attr_tcp_port.attr,
48 @@ -179,6 +182,7 @@ static struct configfs_attribute *cluster_attrs[] = {
49 [CLUSTER_ATTR_LOG_DEBUG] = &cluster_attr_log_debug.attr,
50 [CLUSTER_ATTR_PROTOCOL] = &cluster_attr_protocol.attr,
51 [CLUSTER_ATTR_TIMEWARN_CS] = &cluster_attr_timewarn_cs.attr,
52 + [CLUSTER_ATTR_WAITWARN_US] = &cluster_attr_waitwarn_us.attr,
53 NULL,
54 };
55
56 @@ -439,6 +443,7 @@ static struct config_group *make_cluster(struct config_group *g,
57 cl->cl_log_debug = dlm_config.ci_log_debug;
58 cl->cl_protocol = dlm_config.ci_protocol;
59 cl->cl_timewarn_cs = dlm_config.ci_timewarn_cs;
60 + cl->cl_waitwarn_us = dlm_config.ci_waitwarn_us;
61
62 space_list = &sps->ss_group;
63 comm_list = &cms->cs_group;
64 @@ -986,6 +991,7 @@ int dlm_our_addr(struct sockaddr_storage *addr, int num)
65 #define DEFAULT_LOG_DEBUG 0
66 #define DEFAULT_PROTOCOL 0
67 #define DEFAULT_TIMEWARN_CS 500 /* 5 sec = 500 centiseconds */
68 +#define DEFAULT_WAITWARN_US 0
69
70 struct dlm_config_info dlm_config = {
71 .ci_tcp_port = DEFAULT_TCP_PORT,
72 @@ -998,6 +1004,7 @@ struct dlm_config_info dlm_config = {
73 .ci_scan_secs = DEFAULT_SCAN_SECS,
74 .ci_log_debug = DEFAULT_LOG_DEBUG,
75 .ci_protocol = DEFAULT_PROTOCOL,
76 - .ci_timewarn_cs = DEFAULT_TIMEWARN_CS
77 + .ci_timewarn_cs = DEFAULT_TIMEWARN_CS,
78 + .ci_waitwarn_us = DEFAULT_WAITWARN_US
79 };
80
81 diff --git a/fs/dlm/config.h b/fs/dlm/config.h
82 index 4f1d6fc..dd0ce24 100644
83 --- a/fs/dlm/config.h
84 +++ b/fs/dlm/config.h
85 @@ -28,6 +28,7 @@ struct dlm_config_info {
86 int ci_log_debug;
87 int ci_protocol;
88 int ci_timewarn_cs;
89 + int ci_waitwarn_us;
90 };
91
92 extern struct dlm_config_info dlm_config;
93 diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h
94 index b942049..6a92478 100644
95 --- a/fs/dlm/dlm_internal.h
96 +++ b/fs/dlm/dlm_internal.h
97 @@ -245,6 +245,7 @@ struct dlm_lkb {
98
99 int8_t lkb_wait_type; /* type of reply waiting for */
100 int8_t lkb_wait_count;
101 + int lkb_wait_nodeid; /* for debugging */
102
103 struct list_head lkb_idtbl_list; /* lockspace lkbtbl */
104 struct list_head lkb_statequeue; /* rsb g/c/w list */
105 @@ -254,6 +255,7 @@ struct dlm_lkb {
106 struct list_head lkb_ownqueue; /* list of locks for a process */
107 struct list_head lkb_time_list;
108 ktime_t lkb_timestamp;
109 + ktime_t lkb_wait_time;
110 unsigned long lkb_timeout_cs;
111
112 struct dlm_callback lkb_callbacks[DLM_CALLBACKS_SIZE];
113 diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c
114 index 04b8c44..e3c8641 100644
115 --- a/fs/dlm/lock.c
116 +++ b/fs/dlm/lock.c
117 @@ -799,10 +799,84 @@ static int msg_reply_type(int mstype)
118 return -1;
119 }
120
121 +static int nodeid_warned(int nodeid, int num_nodes, int *warned)
122 +{
123 + int i;
124 +
125 + for (i = 0; i < num_nodes; i++) {
126 + if (!warned[i]) {
127 + warned[i] = nodeid;
128 + return 0;
129 + }
130 + if (warned[i] == nodeid)
131 + return 1;
132 + }
133 + return 0;
134 +}
135 +
136 +void dlm_scan_waiters(struct dlm_ls *ls)
137 +{
138 + struct dlm_lkb *lkb;
139 + ktime_t zero = ktime_set(0, 0);
140 + s64 us;
141 + s64 debug_maxus = 0;
142 + u32 debug_scanned = 0;
143 + u32 debug_expired = 0;
144 + int num_nodes = 0;
145 + int *warned = NULL;
146 +
147 + if (!dlm_config.ci_waitwarn_us)
148 + return;
149 +
150 + mutex_lock(&ls->ls_waiters_mutex);
151 +
152 + list_for_each_entry(lkb, &ls->ls_waiters, lkb_wait_reply) {
153 + if (ktime_equal(lkb->lkb_wait_time, zero))
154 + continue;
155 +
156 + debug_scanned++;
157 +
158 + us = ktime_to_us(ktime_sub(ktime_get(), lkb->lkb_wait_time));
159 +
160 + if (us < dlm_config.ci_waitwarn_us)
161 + continue;
162 +
163 + lkb->lkb_wait_time = zero;
164 +
165 + debug_expired++;
166 + if (us > debug_maxus)
167 + debug_maxus = us;
168 +
169 + if (!num_nodes) {
170 + num_nodes = ls->ls_num_nodes;
171 + warned = kmalloc(GFP_KERNEL, num_nodes * sizeof(int));
172 + if (warned)
173 + memset(warned, 0, num_nodes * sizeof(int));
174 + }
175 + if (!warned)
176 + continue;
177 + if (nodeid_warned(lkb->lkb_wait_nodeid, num_nodes, warned))
178 + continue;
179 +
180 + log_error(ls, "waitwarn %x %lld %d us check connection to "
181 + "node %d", lkb->lkb_id, (long long)us,
182 + dlm_config.ci_waitwarn_us, lkb->lkb_wait_nodeid);
183 + }
184 + mutex_unlock(&ls->ls_waiters_mutex);
185 +
186 + if (warned)
187 + kfree(warned);
188 +
189 + if (debug_expired)
190 + log_debug(ls, "scan_waiters %u warn %u over %d us max %lld us",
191 + debug_scanned, debug_expired,
192 + dlm_config.ci_waitwarn_us, (long long)debug_maxus);
193 +}
194 +
195 /* add/remove lkb from global waiters list of lkb's waiting for
196 a reply from a remote node */
197
198 -static int add_to_waiters(struct dlm_lkb *lkb, int mstype)
199 +static int add_to_waiters(struct dlm_lkb *lkb, int mstype, int to_nodeid)
200 {
201 struct dlm_ls *ls = lkb->lkb_resource->res_ls;
202 int error = 0;
203 @@ -842,6 +916,8 @@ static int add_to_waiters(struct dlm_lkb *lkb, int mstype)
204
205 lkb->lkb_wait_count++;
206 lkb->lkb_wait_type = mstype;
207 + lkb->lkb_wait_time = ktime_get();
208 + lkb->lkb_wait_nodeid = to_nodeid; /* for debugging */
209 hold_lkb(lkb);
210 list_add(&lkb->lkb_wait_reply, &ls->ls_waiters);
211 out:
212 @@ -1157,6 +1233,16 @@ void dlm_adjust_timeouts(struct dlm_ls *ls)
213 list_for_each_entry(lkb, &ls->ls_timeout, lkb_time_list)
214 lkb->lkb_timestamp = ktime_add_us(lkb->lkb_timestamp, adj_us);
215 mutex_unlock(&ls->ls_timeout_mutex);
216 +
217 + if (!dlm_config.ci_waitwarn_us)
218 + return;
219 +
220 + mutex_lock(&ls->ls_waiters_mutex);
221 + list_for_each_entry(lkb, &ls->ls_waiters, lkb_wait_reply) {
222 + if (ktime_to_us(lkb->lkb_wait_time))
223 + lkb->lkb_wait_time = ktime_get();
224 + }
225 + mutex_unlock(&ls->ls_waiters_mutex);
226 }
227
228 /* lkb is master or local copy */
229 @@ -2844,12 +2930,12 @@ static int send_common(struct dlm_rsb *r, struct dlm_lkb *lkb, int mstype)
230 struct dlm_mhandle *mh;
231 int to_nodeid, error;
232
233 - error = add_to_waiters(lkb, mstype);
234 + to_nodeid = r->res_nodeid;
235 +
236 + error = add_to_waiters(lkb, mstype, to_nodeid);
237 if (error)
238 return error;
239
240 - to_nodeid = r->res_nodeid;
241 -
242 error = create_message(r, lkb, to_nodeid, mstype, &ms, &mh);
243 if (error)
244 goto fail;
245 @@ -2951,12 +3037,12 @@ static int send_lookup(struct dlm_rsb *r, struct dlm_lkb *lkb)
246 struct dlm_mhandle *mh;
247 int to_nodeid, error;
248
249 - error = add_to_waiters(lkb, DLM_MSG_LOOKUP);
250 + to_nodeid = dlm_dir_nodeid(r);
251 +
252 + error = add_to_waiters(lkb, DLM_MSG_LOOKUP, to_nodeid);
253 if (error)
254 return error;
255
256 - to_nodeid = dlm_dir_nodeid(r);
257 -
258 error = create_message(r, NULL, to_nodeid, DLM_MSG_LOOKUP, &ms, &mh);
259 if (error)
260 goto fail;
261 diff --git a/fs/dlm/lock.h b/fs/dlm/lock.h
262 index 88e93c8..265017a 100644
263 --- a/fs/dlm/lock.h
264 +++ b/fs/dlm/lock.h
265 @@ -24,6 +24,7 @@ int dlm_put_lkb(struct dlm_lkb *lkb);
266 void dlm_scan_rsbs(struct dlm_ls *ls);
267 int dlm_lock_recovery_try(struct dlm_ls *ls);
268 void dlm_unlock_recovery(struct dlm_ls *ls);
269 +void dlm_scan_waiters(struct dlm_ls *ls);
270 void dlm_scan_timeout(struct dlm_ls *ls);
271 void dlm_adjust_timeouts(struct dlm_ls *ls);
272
273 diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c
274 index f994a7d..14cbf40 100644
275 --- a/fs/dlm/lockspace.c
276 +++ b/fs/dlm/lockspace.c
277 @@ -243,7 +243,6 @@ static struct dlm_ls *find_ls_to_scan(void)
278 static int dlm_scand(void *data)
279 {
280 struct dlm_ls *ls;
281 - int timeout_jiffies = dlm_config.ci_scan_secs * HZ;
282
283 while (!kthread_should_stop()) {
284 ls = find_ls_to_scan();
285 @@ -252,13 +251,14 @@ static int dlm_scand(void *data)
286 ls->ls_scan_time = jiffies;
287 dlm_scan_rsbs(ls);
288 dlm_scan_timeout(ls);
289 + dlm_scan_waiters(ls);
290 dlm_unlock_recovery(ls);
291 } else {
292 ls->ls_scan_time += HZ;
293 }
294 - } else {
295 - schedule_timeout_interruptible(timeout_jiffies);
296 + continue;
297 }
298 + schedule_timeout_interruptible(dlm_config.ci_scan_secs * HZ);
299 }
300 return 0;
301 }
302 --
303 1.7.2.5
304