]> git.proxmox.com Git - pve-kernel-2.6.32.git/blob - dlm-remove-shared-message-stub-for-recovery.patch
backport dlm fixes form linux 3.y
[pve-kernel-2.6.32.git] / dlm-remove-shared-message-stub-for-recovery.patch
1 From 2a7ce0edd661b3144c7b916ecf1eba0967b6d4a5 Mon Sep 17 00:00:00 2001
2 From: David Teigland <teigland@redhat.com>
3 Date: Mon, 4 Apr 2011 15:19:59 -0500
4 Subject: [PATCH 1/1] dlm: remove shared message stub for recovery
5
6 kmalloc a stub message struct during recovery instead of sharing the
7 struct in the lockspace. This leaves the lockspace stub_ms only for
8 faking downconvert replies, where it is never modified and sharing
9 is not a problem.
10
11 Also improve the debug messages in the same recovery function.
12
13 Signed-off-by: David Teigland <teigland@redhat.com>
14 ---
15 fs/dlm/dlm_internal.h | 1 +
16 fs/dlm/lock.c | 82 +++++++++++++++++++++++++++++-------------------
17 2 files changed, 50 insertions(+), 33 deletions(-)
18
19 diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h
20 index 6a92478..0262451 100644
21 --- a/fs/dlm/dlm_internal.h
22 +++ b/fs/dlm/dlm_internal.h
23 @@ -209,6 +209,7 @@ struct dlm_args {
24 #define DLM_IFL_WATCH_TIMEWARN 0x00400000
25 #define DLM_IFL_TIMEOUT_CANCEL 0x00800000
26 #define DLM_IFL_DEADLOCK_CANCEL 0x01000000
27 +#define DLM_IFL_STUB_MS 0x02000000 /* magic number for m_flags */
28 #define DLM_IFL_USER 0x00000001
29 #define DLM_IFL_ORPHAN 0x00000002
30
31 diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c
32 index e3c8641..8122779 100644
33 --- a/fs/dlm/lock.c
34 +++ b/fs/dlm/lock.c
35 @@ -1037,10 +1037,10 @@ static int remove_from_waiters_ms(struct dlm_lkb *lkb, struct dlm_message *ms)
36 struct dlm_ls *ls = lkb->lkb_resource->res_ls;
37 int error;
38
39 - if (ms != &ls->ls_stub_ms)
40 + if (ms->m_flags != DLM_IFL_STUB_MS)
41 mutex_lock(&ls->ls_waiters_mutex);
42 error = _remove_from_waiters(lkb, ms->m_type, ms);
43 - if (ms != &ls->ls_stub_ms)
44 + if (ms->m_flags != DLM_IFL_STUB_MS)
45 mutex_unlock(&ls->ls_waiters_mutex);
46 return error;
47 }
48 @@ -1462,14 +1462,8 @@ static void grant_lock_pending(struct dlm_rsb *r, struct dlm_lkb *lkb)
49 ALTPR/ALTCW: our rqmode may have been changed to PR or CW to become
50 compatible with other granted locks */
51
52 -static void munge_demoted(struct dlm_lkb *lkb, struct dlm_message *ms)
53 +static void munge_demoted(struct dlm_lkb *lkb)
54 {
55 - if (ms->m_type != DLM_MSG_CONVERT_REPLY) {
56 - log_print("munge_demoted %x invalid reply type %d",
57 - lkb->lkb_id, ms->m_type);
58 - return;
59 - }
60 -
61 if (lkb->lkb_rqmode == DLM_LOCK_IV || lkb->lkb_grmode == DLM_LOCK_IV) {
62 log_print("munge_demoted %x invalid modes gr %d rq %d",
63 lkb->lkb_id, lkb->lkb_grmode, lkb->lkb_rqmode);
64 @@ -2966,9 +2960,9 @@ static int send_convert(struct dlm_rsb *r, struct dlm_lkb *lkb)
65 /* down conversions go without a reply from the master */
66 if (!error && down_conversion(lkb)) {
67 remove_from_waiters(lkb, DLM_MSG_CONVERT_REPLY);
68 + r->res_ls->ls_stub_ms.m_flags = DLM_IFL_STUB_MS;
69 r->res_ls->ls_stub_ms.m_type = DLM_MSG_CONVERT_REPLY;
70 r->res_ls->ls_stub_ms.m_result = 0;
71 - r->res_ls->ls_stub_ms.m_flags = lkb->lkb_flags;
72 __receive_convert_reply(r, lkb, &r->res_ls->ls_stub_ms);
73 }
74
75 @@ -3156,6 +3150,9 @@ static void receive_flags(struct dlm_lkb *lkb, struct dlm_message *ms)
76
77 static void receive_flags_reply(struct dlm_lkb *lkb, struct dlm_message *ms)
78 {
79 + if (ms->m_flags == DLM_IFL_STUB_MS)
80 + return;
81 +
82 lkb->lkb_sbflags = ms->m_sbflags;
83 lkb->lkb_flags = (lkb->lkb_flags & 0xFFFF0000) |
84 (ms->m_flags & 0x0000FFFF);
85 @@ -3698,7 +3695,7 @@ static void __receive_convert_reply(struct dlm_rsb *r, struct dlm_lkb *lkb,
86 /* convert was queued on remote master */
87 receive_flags_reply(lkb, ms);
88 if (is_demoted(lkb))
89 - munge_demoted(lkb, ms);
90 + munge_demoted(lkb);
91 del_lkb(r, lkb);
92 add_lkb(r, lkb, DLM_LKSTS_CONVERT);
93 add_timeout(lkb);
94 @@ -3708,7 +3705,7 @@ static void __receive_convert_reply(struct dlm_rsb *r, struct dlm_lkb *lkb,
95 /* convert was granted on remote master */
96 receive_flags_reply(lkb, ms);
97 if (is_demoted(lkb))
98 - munge_demoted(lkb, ms);
99 + munge_demoted(lkb);
100 grant_lock_pc(r, lkb, ms);
101 queue_cast(r, lkb, 0);
102 break;
103 @@ -4082,15 +4079,17 @@ void dlm_receive_buffer(union dlm_packet *p, int nodeid)
104 dlm_put_lockspace(ls);
105 }
106
107 -static void recover_convert_waiter(struct dlm_ls *ls, struct dlm_lkb *lkb)
108 +static void recover_convert_waiter(struct dlm_ls *ls, struct dlm_lkb *lkb,
109 + struct dlm_message *ms_stub)
110 {
111 if (middle_conversion(lkb)) {
112 hold_lkb(lkb);
113 - ls->ls_stub_ms.m_type = DLM_MSG_CONVERT_REPLY;
114 - ls->ls_stub_ms.m_result = -EINPROGRESS;
115 - ls->ls_stub_ms.m_flags = lkb->lkb_flags;
116 - ls->ls_stub_ms.m_header.h_nodeid = lkb->lkb_nodeid;
117 - _receive_convert_reply(lkb, &ls->ls_stub_ms);
118 + memset(ms_stub, 0, sizeof(struct dlm_message));
119 + ms_stub->m_flags = DLM_IFL_STUB_MS;
120 + ms_stub->m_type = DLM_MSG_CONVERT_REPLY;
121 + ms_stub->m_result = -EINPROGRESS;
122 + ms_stub->m_header.h_nodeid = lkb->lkb_nodeid;
123 + _receive_convert_reply(lkb, ms_stub);
124
125 /* Same special case as in receive_rcom_lock_args() */
126 lkb->lkb_grmode = DLM_LOCK_IV;
127 @@ -4131,13 +4130,27 @@ static int waiter_needs_recovery(struct dlm_ls *ls, struct dlm_lkb *lkb)
128 void dlm_recover_waiters_pre(struct dlm_ls *ls)
129 {
130 struct dlm_lkb *lkb, *safe;
131 + struct dlm_message *ms_stub;
132 int wait_type, stub_unlock_result, stub_cancel_result;
133
134 + ms_stub = kmalloc(GFP_KERNEL, sizeof(struct dlm_message));
135 + if (!ms_stub) {
136 + log_error(ls, "dlm_recover_waiters_pre no mem");
137 + return;
138 + }
139 +
140 mutex_lock(&ls->ls_waiters_mutex);
141
142 list_for_each_entry_safe(lkb, safe, &ls->ls_waiters, lkb_wait_reply) {
143 - log_debug(ls, "pre recover waiter lkid %x type %d flags %x",
144 - lkb->lkb_id, lkb->lkb_wait_type, lkb->lkb_flags);
145 +
146 + /* exclude debug messages about unlocks because there can be so
147 + many and they aren't very interesting */
148 +
149 + if (lkb->lkb_wait_type != DLM_MSG_UNLOCK) {
150 + log_debug(ls, "recover_waiter %x nodeid %d "
151 + "msg %d to %d", lkb->lkb_id, lkb->lkb_nodeid,
152 + lkb->lkb_wait_type, lkb->lkb_wait_nodeid);
153 + }
154
155 /* all outstanding lookups, regardless of destination will be
156 resent after recovery is done */
157 @@ -4183,26 +4196,28 @@ void dlm_recover_waiters_pre(struct dlm_ls *ls)
158 break;
159
160 case DLM_MSG_CONVERT:
161 - recover_convert_waiter(ls, lkb);
162 + recover_convert_waiter(ls, lkb, ms_stub);
163 break;
164
165 case DLM_MSG_UNLOCK:
166 hold_lkb(lkb);
167 - ls->ls_stub_ms.m_type = DLM_MSG_UNLOCK_REPLY;
168 - ls->ls_stub_ms.m_result = stub_unlock_result;
169 - ls->ls_stub_ms.m_flags = lkb->lkb_flags;
170 - ls->ls_stub_ms.m_header.h_nodeid = lkb->lkb_nodeid;
171 - _receive_unlock_reply(lkb, &ls->ls_stub_ms);
172 + memset(ms_stub, 0, sizeof(struct dlm_message));
173 + ms_stub->m_flags = DLM_IFL_STUB_MS;
174 + ms_stub->m_type = DLM_MSG_UNLOCK_REPLY;
175 + ms_stub->m_result = stub_unlock_result;
176 + ms_stub->m_header.h_nodeid = lkb->lkb_nodeid;
177 + _receive_unlock_reply(lkb, ms_stub);
178 dlm_put_lkb(lkb);
179 break;
180
181 case DLM_MSG_CANCEL:
182 hold_lkb(lkb);
183 - ls->ls_stub_ms.m_type = DLM_MSG_CANCEL_REPLY;
184 - ls->ls_stub_ms.m_result = stub_cancel_result;
185 - ls->ls_stub_ms.m_flags = lkb->lkb_flags;
186 - ls->ls_stub_ms.m_header.h_nodeid = lkb->lkb_nodeid;
187 - _receive_cancel_reply(lkb, &ls->ls_stub_ms);
188 + memset(ms_stub, 0, sizeof(struct dlm_message));
189 + ms_stub->m_flags = DLM_IFL_STUB_MS;
190 + ms_stub->m_type = DLM_MSG_CANCEL_REPLY;
191 + ms_stub->m_result = stub_cancel_result;
192 + ms_stub->m_header.h_nodeid = lkb->lkb_nodeid;
193 + _receive_cancel_reply(lkb, ms_stub);
194 dlm_put_lkb(lkb);
195 break;
196
197 @@ -4213,6 +4228,7 @@ void dlm_recover_waiters_pre(struct dlm_ls *ls)
198 schedule();
199 }
200 mutex_unlock(&ls->ls_waiters_mutex);
201 + kfree(ms_stub);
202 }
203
204 static struct dlm_lkb *find_resend_waiter(struct dlm_ls *ls)
205 @@ -4277,8 +4293,8 @@ int dlm_recover_waiters_post(struct dlm_ls *ls)
206 ou = is_overlap_unlock(lkb);
207 err = 0;
208
209 - log_debug(ls, "recover_waiters_post %x type %d flags %x %s",
210 - lkb->lkb_id, mstype, lkb->lkb_flags, r->res_name);
211 + log_debug(ls, "recover_waiter %x nodeid %d msg %d r_nodeid %d",
212 + lkb->lkb_id, lkb->lkb_nodeid, mstype, r->res_nodeid);
213
214 /* At this point we assume that we won't get a reply to any
215 previous op or overlap op on this lock. First, do a big
216 --
217 1.7.2.5
218