]> git.proxmox.com Git - mirror_ubuntu-zesty-kernel.git/blame - fs/dlm/lock.c
[DLM] fix send_args() lvb copying
[mirror_ubuntu-zesty-kernel.git] / fs / dlm / lock.c
CommitLineData
e7fd4179
DT
1/******************************************************************************
2*******************************************************************************
3**
4** Copyright (C) 2005 Red Hat, Inc. All rights reserved.
5**
6** This copyrighted material is made available to anyone wishing to use,
7** modify, copy, or redistribute it subject to the terms and conditions
8** of the GNU General Public License v.2.
9**
10*******************************************************************************
11******************************************************************************/
12
13/* Central locking logic has four stages:
14
15 dlm_lock()
16 dlm_unlock()
17
18 request_lock(ls, lkb)
19 convert_lock(ls, lkb)
20 unlock_lock(ls, lkb)
21 cancel_lock(ls, lkb)
22
23 _request_lock(r, lkb)
24 _convert_lock(r, lkb)
25 _unlock_lock(r, lkb)
26 _cancel_lock(r, lkb)
27
28 do_request(r, lkb)
29 do_convert(r, lkb)
30 do_unlock(r, lkb)
31 do_cancel(r, lkb)
32
33 Stage 1 (lock, unlock) is mainly about checking input args and
34 splitting into one of the four main operations:
35
36 dlm_lock = request_lock
37 dlm_lock+CONVERT = convert_lock
38 dlm_unlock = unlock_lock
39 dlm_unlock+CANCEL = cancel_lock
40
41 Stage 2, xxxx_lock(), just finds and locks the relevant rsb which is
42 provided to the next stage.
43
44 Stage 3, _xxxx_lock(), determines if the operation is local or remote.
45 When remote, it calls send_xxxx(), when local it calls do_xxxx().
46
47 Stage 4, do_xxxx(), is the guts of the operation. It manipulates the
48 given rsb and lkb and queues callbacks.
49
50 For remote operations, send_xxxx() results in the corresponding do_xxxx()
51 function being executed on the remote node. The connecting send/receive
52 calls on local (L) and remote (R) nodes:
53
54 L: send_xxxx() -> R: receive_xxxx()
55 R: do_xxxx()
56 L: receive_xxxx_reply() <- R: send_xxxx_reply()
57*/
597d0cae 58#include <linux/types.h>
e7fd4179 59#include "dlm_internal.h"
597d0cae 60#include <linux/dlm_device.h>
e7fd4179
DT
61#include "memory.h"
62#include "lowcomms.h"
63#include "requestqueue.h"
64#include "util.h"
65#include "dir.h"
66#include "member.h"
67#include "lockspace.h"
68#include "ast.h"
69#include "lock.h"
70#include "rcom.h"
71#include "recover.h"
72#include "lvb_table.h"
597d0cae 73#include "user.h"
e7fd4179
DT
74#include "config.h"
75
76static int send_request(struct dlm_rsb *r, struct dlm_lkb *lkb);
77static int send_convert(struct dlm_rsb *r, struct dlm_lkb *lkb);
78static int send_unlock(struct dlm_rsb *r, struct dlm_lkb *lkb);
79static int send_cancel(struct dlm_rsb *r, struct dlm_lkb *lkb);
80static int send_grant(struct dlm_rsb *r, struct dlm_lkb *lkb);
81static int send_bast(struct dlm_rsb *r, struct dlm_lkb *lkb, int mode);
82static int send_lookup(struct dlm_rsb *r, struct dlm_lkb *lkb);
83static int send_remove(struct dlm_rsb *r);
84static int _request_lock(struct dlm_rsb *r, struct dlm_lkb *lkb);
85static void __receive_convert_reply(struct dlm_rsb *r, struct dlm_lkb *lkb,
86 struct dlm_message *ms);
87static int receive_extralen(struct dlm_message *ms);
88
89/*
90 * Lock compatibilty matrix - thanks Steve
91 * UN = Unlocked state. Not really a state, used as a flag
92 * PD = Padding. Used to make the matrix a nice power of two in size
93 * Other states are the same as the VMS DLM.
94 * Usage: matrix[grmode+1][rqmode+1] (although m[rq+1][gr+1] is the same)
95 */
96
97static const int __dlm_compat_matrix[8][8] = {
98 /* UN NL CR CW PR PW EX PD */
99 {1, 1, 1, 1, 1, 1, 1, 0}, /* UN */
100 {1, 1, 1, 1, 1, 1, 1, 0}, /* NL */
101 {1, 1, 1, 1, 1, 1, 0, 0}, /* CR */
102 {1, 1, 1, 1, 0, 0, 0, 0}, /* CW */
103 {1, 1, 1, 0, 1, 0, 0, 0}, /* PR */
104 {1, 1, 1, 0, 0, 0, 0, 0}, /* PW */
105 {1, 1, 0, 0, 0, 0, 0, 0}, /* EX */
106 {0, 0, 0, 0, 0, 0, 0, 0} /* PD */
107};
108
109/*
110 * This defines the direction of transfer of LVB data.
111 * Granted mode is the row; requested mode is the column.
112 * Usage: matrix[grmode+1][rqmode+1]
113 * 1 = LVB is returned to the caller
114 * 0 = LVB is written to the resource
115 * -1 = nothing happens to the LVB
116 */
117
118const int dlm_lvb_operations[8][8] = {
119 /* UN NL CR CW PR PW EX PD*/
120 { -1, 1, 1, 1, 1, 1, 1, -1 }, /* UN */
121 { -1, 1, 1, 1, 1, 1, 1, 0 }, /* NL */
122 { -1, -1, 1, 1, 1, 1, 1, 0 }, /* CR */
123 { -1, -1, -1, 1, 1, 1, 1, 0 }, /* CW */
124 { -1, -1, -1, -1, 1, 1, 1, 0 }, /* PR */
125 { -1, 0, 0, 0, 0, 0, 1, 0 }, /* PW */
126 { -1, 0, 0, 0, 0, 0, 0, 0 }, /* EX */
127 { -1, 0, 0, 0, 0, 0, 0, 0 } /* PD */
128};
e7fd4179
DT
129
130#define modes_compat(gr, rq) \
131 __dlm_compat_matrix[(gr)->lkb_grmode + 1][(rq)->lkb_rqmode + 1]
132
133int dlm_modes_compat(int mode1, int mode2)
134{
135 return __dlm_compat_matrix[mode1 + 1][mode2 + 1];
136}
137
138/*
139 * Compatibility matrix for conversions with QUECVT set.
140 * Granted mode is the row; requested mode is the column.
141 * Usage: matrix[grmode+1][rqmode+1]
142 */
143
144static const int __quecvt_compat_matrix[8][8] = {
145 /* UN NL CR CW PR PW EX PD */
146 {0, 0, 0, 0, 0, 0, 0, 0}, /* UN */
147 {0, 0, 1, 1, 1, 1, 1, 0}, /* NL */
148 {0, 0, 0, 1, 1, 1, 1, 0}, /* CR */
149 {0, 0, 0, 0, 1, 1, 1, 0}, /* CW */
150 {0, 0, 0, 1, 0, 1, 1, 0}, /* PR */
151 {0, 0, 0, 0, 0, 0, 1, 0}, /* PW */
152 {0, 0, 0, 0, 0, 0, 0, 0}, /* EX */
153 {0, 0, 0, 0, 0, 0, 0, 0} /* PD */
154};
155
597d0cae 156void dlm_print_lkb(struct dlm_lkb *lkb)
e7fd4179
DT
157{
158 printk(KERN_ERR "lkb: nodeid %d id %x remid %x exflags %x flags %x\n"
159 " status %d rqmode %d grmode %d wait_type %d ast_type %d\n",
160 lkb->lkb_nodeid, lkb->lkb_id, lkb->lkb_remid, lkb->lkb_exflags,
161 lkb->lkb_flags, lkb->lkb_status, lkb->lkb_rqmode,
162 lkb->lkb_grmode, lkb->lkb_wait_type, lkb->lkb_ast_type);
163}
164
165void dlm_print_rsb(struct dlm_rsb *r)
166{
167 printk(KERN_ERR "rsb: nodeid %d flags %lx first %x rlc %d name %s\n",
168 r->res_nodeid, r->res_flags, r->res_first_lkid,
169 r->res_recover_locks_count, r->res_name);
170}
171
a345da3e
DT
172void dlm_dump_rsb(struct dlm_rsb *r)
173{
174 struct dlm_lkb *lkb;
175
176 dlm_print_rsb(r);
177
178 printk(KERN_ERR "rsb: root_list empty %d recover_list empty %d\n",
179 list_empty(&r->res_root_list), list_empty(&r->res_recover_list));
180 printk(KERN_ERR "rsb lookup list\n");
181 list_for_each_entry(lkb, &r->res_lookup, lkb_rsb_lookup)
182 dlm_print_lkb(lkb);
183 printk(KERN_ERR "rsb grant queue:\n");
184 list_for_each_entry(lkb, &r->res_grantqueue, lkb_statequeue)
185 dlm_print_lkb(lkb);
186 printk(KERN_ERR "rsb convert queue:\n");
187 list_for_each_entry(lkb, &r->res_convertqueue, lkb_statequeue)
188 dlm_print_lkb(lkb);
189 printk(KERN_ERR "rsb wait queue:\n");
190 list_for_each_entry(lkb, &r->res_waitqueue, lkb_statequeue)
191 dlm_print_lkb(lkb);
192}
193
e7fd4179
DT
194/* Threads cannot use the lockspace while it's being recovered */
195
196static inline void lock_recovery(struct dlm_ls *ls)
197{
198 down_read(&ls->ls_in_recovery);
199}
200
201static inline void unlock_recovery(struct dlm_ls *ls)
202{
203 up_read(&ls->ls_in_recovery);
204}
205
206static inline int lock_recovery_try(struct dlm_ls *ls)
207{
208 return down_read_trylock(&ls->ls_in_recovery);
209}
210
211static inline int can_be_queued(struct dlm_lkb *lkb)
212{
213 return !(lkb->lkb_exflags & DLM_LKF_NOQUEUE);
214}
215
216static inline int force_blocking_asts(struct dlm_lkb *lkb)
217{
218 return (lkb->lkb_exflags & DLM_LKF_NOQUEUEBAST);
219}
220
221static inline int is_demoted(struct dlm_lkb *lkb)
222{
223 return (lkb->lkb_sbflags & DLM_SBF_DEMOTED);
224}
225
226static inline int is_remote(struct dlm_rsb *r)
227{
228 DLM_ASSERT(r->res_nodeid >= 0, dlm_print_rsb(r););
229 return !!r->res_nodeid;
230}
231
232static inline int is_process_copy(struct dlm_lkb *lkb)
233{
234 return (lkb->lkb_nodeid && !(lkb->lkb_flags & DLM_IFL_MSTCPY));
235}
236
237static inline int is_master_copy(struct dlm_lkb *lkb)
238{
239 if (lkb->lkb_flags & DLM_IFL_MSTCPY)
240 DLM_ASSERT(lkb->lkb_nodeid, dlm_print_lkb(lkb););
90135925 241 return (lkb->lkb_flags & DLM_IFL_MSTCPY) ? 1 : 0;
e7fd4179
DT
242}
243
244static inline int middle_conversion(struct dlm_lkb *lkb)
245{
246 if ((lkb->lkb_grmode==DLM_LOCK_PR && lkb->lkb_rqmode==DLM_LOCK_CW) ||
247 (lkb->lkb_rqmode==DLM_LOCK_PR && lkb->lkb_grmode==DLM_LOCK_CW))
90135925
DT
248 return 1;
249 return 0;
e7fd4179
DT
250}
251
252static inline int down_conversion(struct dlm_lkb *lkb)
253{
254 return (!middle_conversion(lkb) && lkb->lkb_rqmode < lkb->lkb_grmode);
255}
256
257static void queue_cast(struct dlm_rsb *r, struct dlm_lkb *lkb, int rv)
258{
259 if (is_master_copy(lkb))
260 return;
261
262 DLM_ASSERT(lkb->lkb_lksb, dlm_print_lkb(lkb););
263
264 lkb->lkb_lksb->sb_status = rv;
265 lkb->lkb_lksb->sb_flags = lkb->lkb_sbflags;
266
267 dlm_add_ast(lkb, AST_COMP);
268}
269
270static void queue_bast(struct dlm_rsb *r, struct dlm_lkb *lkb, int rqmode)
271{
272 if (is_master_copy(lkb))
273 send_bast(r, lkb, rqmode);
274 else {
275 lkb->lkb_bastmode = rqmode;
276 dlm_add_ast(lkb, AST_BAST);
277 }
278}
279
280/*
281 * Basic operations on rsb's and lkb's
282 */
283
284static struct dlm_rsb *create_rsb(struct dlm_ls *ls, char *name, int len)
285{
286 struct dlm_rsb *r;
287
288 r = allocate_rsb(ls, len);
289 if (!r)
290 return NULL;
291
292 r->res_ls = ls;
293 r->res_length = len;
294 memcpy(r->res_name, name, len);
90135925 295 mutex_init(&r->res_mutex);
e7fd4179
DT
296
297 INIT_LIST_HEAD(&r->res_lookup);
298 INIT_LIST_HEAD(&r->res_grantqueue);
299 INIT_LIST_HEAD(&r->res_convertqueue);
300 INIT_LIST_HEAD(&r->res_waitqueue);
301 INIT_LIST_HEAD(&r->res_root_list);
302 INIT_LIST_HEAD(&r->res_recover_list);
303
304 return r;
305}
306
307static int search_rsb_list(struct list_head *head, char *name, int len,
308 unsigned int flags, struct dlm_rsb **r_ret)
309{
310 struct dlm_rsb *r;
311 int error = 0;
312
313 list_for_each_entry(r, head, res_hashchain) {
314 if (len == r->res_length && !memcmp(name, r->res_name, len))
315 goto found;
316 }
597d0cae 317 return -EBADR;
e7fd4179
DT
318
319 found:
320 if (r->res_nodeid && (flags & R_MASTER))
321 error = -ENOTBLK;
322 *r_ret = r;
323 return error;
324}
325
326static int _search_rsb(struct dlm_ls *ls, char *name, int len, int b,
327 unsigned int flags, struct dlm_rsb **r_ret)
328{
329 struct dlm_rsb *r;
330 int error;
331
332 error = search_rsb_list(&ls->ls_rsbtbl[b].list, name, len, flags, &r);
333 if (!error) {
334 kref_get(&r->res_ref);
335 goto out;
336 }
337 error = search_rsb_list(&ls->ls_rsbtbl[b].toss, name, len, flags, &r);
338 if (error)
339 goto out;
340
341 list_move(&r->res_hashchain, &ls->ls_rsbtbl[b].list);
342
343 if (dlm_no_directory(ls))
344 goto out;
345
346 if (r->res_nodeid == -1) {
347 rsb_clear_flag(r, RSB_MASTER_UNCERTAIN);
348 r->res_first_lkid = 0;
349 } else if (r->res_nodeid > 0) {
350 rsb_set_flag(r, RSB_MASTER_UNCERTAIN);
351 r->res_first_lkid = 0;
352 } else {
353 DLM_ASSERT(r->res_nodeid == 0, dlm_print_rsb(r););
354 DLM_ASSERT(!rsb_flag(r, RSB_MASTER_UNCERTAIN),);
355 }
356 out:
357 *r_ret = r;
358 return error;
359}
360
361static int search_rsb(struct dlm_ls *ls, char *name, int len, int b,
362 unsigned int flags, struct dlm_rsb **r_ret)
363{
364 int error;
365 write_lock(&ls->ls_rsbtbl[b].lock);
366 error = _search_rsb(ls, name, len, b, flags, r_ret);
367 write_unlock(&ls->ls_rsbtbl[b].lock);
368 return error;
369}
370
371/*
372 * Find rsb in rsbtbl and potentially create/add one
373 *
374 * Delaying the release of rsb's has a similar benefit to applications keeping
375 * NL locks on an rsb, but without the guarantee that the cached master value
376 * will still be valid when the rsb is reused. Apps aren't always smart enough
377 * to keep NL locks on an rsb that they may lock again shortly; this can lead
378 * to excessive master lookups and removals if we don't delay the release.
379 *
380 * Searching for an rsb means looking through both the normal list and toss
381 * list. When found on the toss list the rsb is moved to the normal list with
382 * ref count of 1; when found on normal list the ref count is incremented.
383 */
384
385static int find_rsb(struct dlm_ls *ls, char *name, int namelen,
386 unsigned int flags, struct dlm_rsb **r_ret)
387{
388 struct dlm_rsb *r, *tmp;
389 uint32_t hash, bucket;
390 int error = 0;
391
392 if (dlm_no_directory(ls))
393 flags |= R_CREATE;
394
395 hash = jhash(name, namelen, 0);
396 bucket = hash & (ls->ls_rsbtbl_size - 1);
397
398 error = search_rsb(ls, name, namelen, bucket, flags, &r);
399 if (!error)
400 goto out;
401
597d0cae 402 if (error == -EBADR && !(flags & R_CREATE))
e7fd4179
DT
403 goto out;
404
405 /* the rsb was found but wasn't a master copy */
406 if (error == -ENOTBLK)
407 goto out;
408
409 error = -ENOMEM;
410 r = create_rsb(ls, name, namelen);
411 if (!r)
412 goto out;
413
414 r->res_hash = hash;
415 r->res_bucket = bucket;
416 r->res_nodeid = -1;
417 kref_init(&r->res_ref);
418
419 /* With no directory, the master can be set immediately */
420 if (dlm_no_directory(ls)) {
421 int nodeid = dlm_dir_nodeid(r);
422 if (nodeid == dlm_our_nodeid())
423 nodeid = 0;
424 r->res_nodeid = nodeid;
425 }
426
427 write_lock(&ls->ls_rsbtbl[bucket].lock);
428 error = _search_rsb(ls, name, namelen, bucket, 0, &tmp);
429 if (!error) {
430 write_unlock(&ls->ls_rsbtbl[bucket].lock);
431 free_rsb(r);
432 r = tmp;
433 goto out;
434 }
435 list_add(&r->res_hashchain, &ls->ls_rsbtbl[bucket].list);
436 write_unlock(&ls->ls_rsbtbl[bucket].lock);
437 error = 0;
438 out:
439 *r_ret = r;
440 return error;
441}
442
443int dlm_find_rsb(struct dlm_ls *ls, char *name, int namelen,
444 unsigned int flags, struct dlm_rsb **r_ret)
445{
446 return find_rsb(ls, name, namelen, flags, r_ret);
447}
448
449/* This is only called to add a reference when the code already holds
450 a valid reference to the rsb, so there's no need for locking. */
451
452static inline void hold_rsb(struct dlm_rsb *r)
453{
454 kref_get(&r->res_ref);
455}
456
457void dlm_hold_rsb(struct dlm_rsb *r)
458{
459 hold_rsb(r);
460}
461
462static void toss_rsb(struct kref *kref)
463{
464 struct dlm_rsb *r = container_of(kref, struct dlm_rsb, res_ref);
465 struct dlm_ls *ls = r->res_ls;
466
467 DLM_ASSERT(list_empty(&r->res_root_list), dlm_print_rsb(r););
468 kref_init(&r->res_ref);
469 list_move(&r->res_hashchain, &ls->ls_rsbtbl[r->res_bucket].toss);
470 r->res_toss_time = jiffies;
471 if (r->res_lvbptr) {
472 free_lvb(r->res_lvbptr);
473 r->res_lvbptr = NULL;
474 }
475}
476
477/* When all references to the rsb are gone it's transfered to
478 the tossed list for later disposal. */
479
480static void put_rsb(struct dlm_rsb *r)
481{
482 struct dlm_ls *ls = r->res_ls;
483 uint32_t bucket = r->res_bucket;
484
485 write_lock(&ls->ls_rsbtbl[bucket].lock);
486 kref_put(&r->res_ref, toss_rsb);
487 write_unlock(&ls->ls_rsbtbl[bucket].lock);
488}
489
490void dlm_put_rsb(struct dlm_rsb *r)
491{
492 put_rsb(r);
493}
494
495/* See comment for unhold_lkb */
496
497static void unhold_rsb(struct dlm_rsb *r)
498{
499 int rv;
500 rv = kref_put(&r->res_ref, toss_rsb);
a345da3e 501 DLM_ASSERT(!rv, dlm_dump_rsb(r););
e7fd4179
DT
502}
503
504static void kill_rsb(struct kref *kref)
505{
506 struct dlm_rsb *r = container_of(kref, struct dlm_rsb, res_ref);
507
508 /* All work is done after the return from kref_put() so we
509 can release the write_lock before the remove and free. */
510
a345da3e
DT
511 DLM_ASSERT(list_empty(&r->res_lookup), dlm_dump_rsb(r););
512 DLM_ASSERT(list_empty(&r->res_grantqueue), dlm_dump_rsb(r););
513 DLM_ASSERT(list_empty(&r->res_convertqueue), dlm_dump_rsb(r););
514 DLM_ASSERT(list_empty(&r->res_waitqueue), dlm_dump_rsb(r););
515 DLM_ASSERT(list_empty(&r->res_root_list), dlm_dump_rsb(r););
516 DLM_ASSERT(list_empty(&r->res_recover_list), dlm_dump_rsb(r););
e7fd4179
DT
517}
518
519/* Attaching/detaching lkb's from rsb's is for rsb reference counting.
520 The rsb must exist as long as any lkb's for it do. */
521
522static void attach_lkb(struct dlm_rsb *r, struct dlm_lkb *lkb)
523{
524 hold_rsb(r);
525 lkb->lkb_resource = r;
526}
527
528static void detach_lkb(struct dlm_lkb *lkb)
529{
530 if (lkb->lkb_resource) {
531 put_rsb(lkb->lkb_resource);
532 lkb->lkb_resource = NULL;
533 }
534}
535
536static int create_lkb(struct dlm_ls *ls, struct dlm_lkb **lkb_ret)
537{
538 struct dlm_lkb *lkb, *tmp;
539 uint32_t lkid = 0;
540 uint16_t bucket;
541
542 lkb = allocate_lkb(ls);
543 if (!lkb)
544 return -ENOMEM;
545
546 lkb->lkb_nodeid = -1;
547 lkb->lkb_grmode = DLM_LOCK_IV;
548 kref_init(&lkb->lkb_ref);
34e22bed 549 INIT_LIST_HEAD(&lkb->lkb_ownqueue);
e7fd4179
DT
550
551 get_random_bytes(&bucket, sizeof(bucket));
552 bucket &= (ls->ls_lkbtbl_size - 1);
553
554 write_lock(&ls->ls_lkbtbl[bucket].lock);
555
556 /* counter can roll over so we must verify lkid is not in use */
557
558 while (lkid == 0) {
559 lkid = bucket | (ls->ls_lkbtbl[bucket].counter++ << 16);
560
561 list_for_each_entry(tmp, &ls->ls_lkbtbl[bucket].list,
562 lkb_idtbl_list) {
563 if (tmp->lkb_id != lkid)
564 continue;
565 lkid = 0;
566 break;
567 }
568 }
569
570 lkb->lkb_id = lkid;
571 list_add(&lkb->lkb_idtbl_list, &ls->ls_lkbtbl[bucket].list);
572 write_unlock(&ls->ls_lkbtbl[bucket].lock);
573
574 *lkb_ret = lkb;
575 return 0;
576}
577
578static struct dlm_lkb *__find_lkb(struct dlm_ls *ls, uint32_t lkid)
579{
580 uint16_t bucket = lkid & 0xFFFF;
581 struct dlm_lkb *lkb;
582
583 list_for_each_entry(lkb, &ls->ls_lkbtbl[bucket].list, lkb_idtbl_list) {
584 if (lkb->lkb_id == lkid)
585 return lkb;
586 }
587 return NULL;
588}
589
590static int find_lkb(struct dlm_ls *ls, uint32_t lkid, struct dlm_lkb **lkb_ret)
591{
592 struct dlm_lkb *lkb;
593 uint16_t bucket = lkid & 0xFFFF;
594
595 if (bucket >= ls->ls_lkbtbl_size)
596 return -EBADSLT;
597
598 read_lock(&ls->ls_lkbtbl[bucket].lock);
599 lkb = __find_lkb(ls, lkid);
600 if (lkb)
601 kref_get(&lkb->lkb_ref);
602 read_unlock(&ls->ls_lkbtbl[bucket].lock);
603
604 *lkb_ret = lkb;
605 return lkb ? 0 : -ENOENT;
606}
607
608static void kill_lkb(struct kref *kref)
609{
610 struct dlm_lkb *lkb = container_of(kref, struct dlm_lkb, lkb_ref);
611
612 /* All work is done after the return from kref_put() so we
613 can release the write_lock before the detach_lkb */
614
615 DLM_ASSERT(!lkb->lkb_status, dlm_print_lkb(lkb););
616}
617
b3f58d8f
DT
618/* __put_lkb() is used when an lkb may not have an rsb attached to
619 it so we need to provide the lockspace explicitly */
620
621static int __put_lkb(struct dlm_ls *ls, struct dlm_lkb *lkb)
e7fd4179 622{
e7fd4179
DT
623 uint16_t bucket = lkb->lkb_id & 0xFFFF;
624
625 write_lock(&ls->ls_lkbtbl[bucket].lock);
626 if (kref_put(&lkb->lkb_ref, kill_lkb)) {
627 list_del(&lkb->lkb_idtbl_list);
628 write_unlock(&ls->ls_lkbtbl[bucket].lock);
629
630 detach_lkb(lkb);
631
632 /* for local/process lkbs, lvbptr points to caller's lksb */
633 if (lkb->lkb_lvbptr && is_master_copy(lkb))
634 free_lvb(lkb->lkb_lvbptr);
e7fd4179
DT
635 free_lkb(lkb);
636 return 1;
637 } else {
638 write_unlock(&ls->ls_lkbtbl[bucket].lock);
639 return 0;
640 }
641}
642
643int dlm_put_lkb(struct dlm_lkb *lkb)
644{
b3f58d8f
DT
645 struct dlm_ls *ls;
646
647 DLM_ASSERT(lkb->lkb_resource, dlm_print_lkb(lkb););
648 DLM_ASSERT(lkb->lkb_resource->res_ls, dlm_print_lkb(lkb););
649
650 ls = lkb->lkb_resource->res_ls;
651 return __put_lkb(ls, lkb);
e7fd4179
DT
652}
653
654/* This is only called to add a reference when the code already holds
655 a valid reference to the lkb, so there's no need for locking. */
656
657static inline void hold_lkb(struct dlm_lkb *lkb)
658{
659 kref_get(&lkb->lkb_ref);
660}
661
662/* This is called when we need to remove a reference and are certain
663 it's not the last ref. e.g. del_lkb is always called between a
664 find_lkb/put_lkb and is always the inverse of a previous add_lkb.
665 put_lkb would work fine, but would involve unnecessary locking */
666
667static inline void unhold_lkb(struct dlm_lkb *lkb)
668{
669 int rv;
670 rv = kref_put(&lkb->lkb_ref, kill_lkb);
671 DLM_ASSERT(!rv, dlm_print_lkb(lkb););
672}
673
674static void lkb_add_ordered(struct list_head *new, struct list_head *head,
675 int mode)
676{
677 struct dlm_lkb *lkb = NULL;
678
679 list_for_each_entry(lkb, head, lkb_statequeue)
680 if (lkb->lkb_rqmode < mode)
681 break;
682
683 if (!lkb)
684 list_add_tail(new, head);
685 else
686 __list_add(new, lkb->lkb_statequeue.prev, &lkb->lkb_statequeue);
687}
688
689/* add/remove lkb to rsb's grant/convert/wait queue */
690
691static void add_lkb(struct dlm_rsb *r, struct dlm_lkb *lkb, int status)
692{
693 kref_get(&lkb->lkb_ref);
694
695 DLM_ASSERT(!lkb->lkb_status, dlm_print_lkb(lkb););
696
697 lkb->lkb_status = status;
698
699 switch (status) {
700 case DLM_LKSTS_WAITING:
701 if (lkb->lkb_exflags & DLM_LKF_HEADQUE)
702 list_add(&lkb->lkb_statequeue, &r->res_waitqueue);
703 else
704 list_add_tail(&lkb->lkb_statequeue, &r->res_waitqueue);
705 break;
706 case DLM_LKSTS_GRANTED:
707 /* convention says granted locks kept in order of grmode */
708 lkb_add_ordered(&lkb->lkb_statequeue, &r->res_grantqueue,
709 lkb->lkb_grmode);
710 break;
711 case DLM_LKSTS_CONVERT:
712 if (lkb->lkb_exflags & DLM_LKF_HEADQUE)
713 list_add(&lkb->lkb_statequeue, &r->res_convertqueue);
714 else
715 list_add_tail(&lkb->lkb_statequeue,
716 &r->res_convertqueue);
717 break;
718 default:
719 DLM_ASSERT(0, dlm_print_lkb(lkb); printk("sts=%d\n", status););
720 }
721}
722
723static void del_lkb(struct dlm_rsb *r, struct dlm_lkb *lkb)
724{
725 lkb->lkb_status = 0;
726 list_del(&lkb->lkb_statequeue);
727 unhold_lkb(lkb);
728}
729
730static void move_lkb(struct dlm_rsb *r, struct dlm_lkb *lkb, int sts)
731{
732 hold_lkb(lkb);
733 del_lkb(r, lkb);
734 add_lkb(r, lkb, sts);
735 unhold_lkb(lkb);
736}
737
738/* add/remove lkb from global waiters list of lkb's waiting for
739 a reply from a remote node */
740
741static void add_to_waiters(struct dlm_lkb *lkb, int mstype)
742{
743 struct dlm_ls *ls = lkb->lkb_resource->res_ls;
744
90135925 745 mutex_lock(&ls->ls_waiters_mutex);
e7fd4179
DT
746 if (lkb->lkb_wait_type) {
747 log_print("add_to_waiters error %d", lkb->lkb_wait_type);
748 goto out;
749 }
750 lkb->lkb_wait_type = mstype;
751 kref_get(&lkb->lkb_ref);
752 list_add(&lkb->lkb_wait_reply, &ls->ls_waiters);
753 out:
90135925 754 mutex_unlock(&ls->ls_waiters_mutex);
e7fd4179
DT
755}
756
757static int _remove_from_waiters(struct dlm_lkb *lkb)
758{
759 int error = 0;
760
761 if (!lkb->lkb_wait_type) {
762 log_print("remove_from_waiters error");
763 error = -EINVAL;
764 goto out;
765 }
766 lkb->lkb_wait_type = 0;
767 list_del(&lkb->lkb_wait_reply);
768 unhold_lkb(lkb);
769 out:
770 return error;
771}
772
773static int remove_from_waiters(struct dlm_lkb *lkb)
774{
775 struct dlm_ls *ls = lkb->lkb_resource->res_ls;
776 int error;
777
90135925 778 mutex_lock(&ls->ls_waiters_mutex);
e7fd4179 779 error = _remove_from_waiters(lkb);
90135925 780 mutex_unlock(&ls->ls_waiters_mutex);
e7fd4179
DT
781 return error;
782}
783
784static void dir_remove(struct dlm_rsb *r)
785{
786 int to_nodeid;
787
788 if (dlm_no_directory(r->res_ls))
789 return;
790
791 to_nodeid = dlm_dir_nodeid(r);
792 if (to_nodeid != dlm_our_nodeid())
793 send_remove(r);
794 else
795 dlm_dir_remove_entry(r->res_ls, to_nodeid,
796 r->res_name, r->res_length);
797}
798
799/* FIXME: shouldn't this be able to exit as soon as one non-due rsb is
800 found since they are in order of newest to oldest? */
801
802static int shrink_bucket(struct dlm_ls *ls, int b)
803{
804 struct dlm_rsb *r;
805 int count = 0, found;
806
807 for (;;) {
90135925 808 found = 0;
e7fd4179
DT
809 write_lock(&ls->ls_rsbtbl[b].lock);
810 list_for_each_entry_reverse(r, &ls->ls_rsbtbl[b].toss,
811 res_hashchain) {
812 if (!time_after_eq(jiffies, r->res_toss_time +
813 dlm_config.toss_secs * HZ))
814 continue;
90135925 815 found = 1;
e7fd4179
DT
816 break;
817 }
818
819 if (!found) {
820 write_unlock(&ls->ls_rsbtbl[b].lock);
821 break;
822 }
823
824 if (kref_put(&r->res_ref, kill_rsb)) {
825 list_del(&r->res_hashchain);
826 write_unlock(&ls->ls_rsbtbl[b].lock);
827
828 if (is_master(r))
829 dir_remove(r);
830 free_rsb(r);
831 count++;
832 } else {
833 write_unlock(&ls->ls_rsbtbl[b].lock);
834 log_error(ls, "tossed rsb in use %s", r->res_name);
835 }
836 }
837
838 return count;
839}
840
841void dlm_scan_rsbs(struct dlm_ls *ls)
842{
843 int i;
844
845 if (dlm_locking_stopped(ls))
846 return;
847
848 for (i = 0; i < ls->ls_rsbtbl_size; i++) {
849 shrink_bucket(ls, i);
850 cond_resched();
851 }
852}
853
854/* lkb is master or local copy */
855
856static void set_lvb_lock(struct dlm_rsb *r, struct dlm_lkb *lkb)
857{
858 int b, len = r->res_ls->ls_lvblen;
859
860 /* b=1 lvb returned to caller
861 b=0 lvb written to rsb or invalidated
862 b=-1 do nothing */
863
864 b = dlm_lvb_operations[lkb->lkb_grmode + 1][lkb->lkb_rqmode + 1];
865
866 if (b == 1) {
867 if (!lkb->lkb_lvbptr)
868 return;
869
870 if (!(lkb->lkb_exflags & DLM_LKF_VALBLK))
871 return;
872
873 if (!r->res_lvbptr)
874 return;
875
876 memcpy(lkb->lkb_lvbptr, r->res_lvbptr, len);
877 lkb->lkb_lvbseq = r->res_lvbseq;
878
879 } else if (b == 0) {
880 if (lkb->lkb_exflags & DLM_LKF_IVVALBLK) {
881 rsb_set_flag(r, RSB_VALNOTVALID);
882 return;
883 }
884
885 if (!lkb->lkb_lvbptr)
886 return;
887
888 if (!(lkb->lkb_exflags & DLM_LKF_VALBLK))
889 return;
890
891 if (!r->res_lvbptr)
892 r->res_lvbptr = allocate_lvb(r->res_ls);
893
894 if (!r->res_lvbptr)
895 return;
896
897 memcpy(r->res_lvbptr, lkb->lkb_lvbptr, len);
898 r->res_lvbseq++;
899 lkb->lkb_lvbseq = r->res_lvbseq;
900 rsb_clear_flag(r, RSB_VALNOTVALID);
901 }
902
903 if (rsb_flag(r, RSB_VALNOTVALID))
904 lkb->lkb_sbflags |= DLM_SBF_VALNOTVALID;
905}
906
907static void set_lvb_unlock(struct dlm_rsb *r, struct dlm_lkb *lkb)
908{
909 if (lkb->lkb_grmode < DLM_LOCK_PW)
910 return;
911
912 if (lkb->lkb_exflags & DLM_LKF_IVVALBLK) {
913 rsb_set_flag(r, RSB_VALNOTVALID);
914 return;
915 }
916
917 if (!lkb->lkb_lvbptr)
918 return;
919
920 if (!(lkb->lkb_exflags & DLM_LKF_VALBLK))
921 return;
922
923 if (!r->res_lvbptr)
924 r->res_lvbptr = allocate_lvb(r->res_ls);
925
926 if (!r->res_lvbptr)
927 return;
928
929 memcpy(r->res_lvbptr, lkb->lkb_lvbptr, r->res_ls->ls_lvblen);
930 r->res_lvbseq++;
931 rsb_clear_flag(r, RSB_VALNOTVALID);
932}
933
934/* lkb is process copy (pc) */
935
936static void set_lvb_lock_pc(struct dlm_rsb *r, struct dlm_lkb *lkb,
937 struct dlm_message *ms)
938{
939 int b;
940
941 if (!lkb->lkb_lvbptr)
942 return;
943
944 if (!(lkb->lkb_exflags & DLM_LKF_VALBLK))
945 return;
946
597d0cae 947 b = dlm_lvb_operations[lkb->lkb_grmode + 1][lkb->lkb_rqmode + 1];
e7fd4179
DT
948 if (b == 1) {
949 int len = receive_extralen(ms);
950 memcpy(lkb->lkb_lvbptr, ms->m_extra, len);
951 lkb->lkb_lvbseq = ms->m_lvbseq;
952 }
953}
954
955/* Manipulate lkb's on rsb's convert/granted/waiting queues
956 remove_lock -- used for unlock, removes lkb from granted
957 revert_lock -- used for cancel, moves lkb from convert to granted
958 grant_lock -- used for request and convert, adds lkb to granted or
959 moves lkb from convert or waiting to granted
960
961 Each of these is used for master or local copy lkb's. There is
962 also a _pc() variation used to make the corresponding change on
963 a process copy (pc) lkb. */
964
965static void _remove_lock(struct dlm_rsb *r, struct dlm_lkb *lkb)
966{
967 del_lkb(r, lkb);
968 lkb->lkb_grmode = DLM_LOCK_IV;
969 /* this unhold undoes the original ref from create_lkb()
970 so this leads to the lkb being freed */
971 unhold_lkb(lkb);
972}
973
974static void remove_lock(struct dlm_rsb *r, struct dlm_lkb *lkb)
975{
976 set_lvb_unlock(r, lkb);
977 _remove_lock(r, lkb);
978}
979
980static void remove_lock_pc(struct dlm_rsb *r, struct dlm_lkb *lkb)
981{
982 _remove_lock(r, lkb);
983}
984
985static void revert_lock(struct dlm_rsb *r, struct dlm_lkb *lkb)
986{
987 lkb->lkb_rqmode = DLM_LOCK_IV;
988
989 switch (lkb->lkb_status) {
597d0cae
DT
990 case DLM_LKSTS_GRANTED:
991 break;
e7fd4179
DT
992 case DLM_LKSTS_CONVERT:
993 move_lkb(r, lkb, DLM_LKSTS_GRANTED);
994 break;
995 case DLM_LKSTS_WAITING:
996 del_lkb(r, lkb);
997 lkb->lkb_grmode = DLM_LOCK_IV;
998 /* this unhold undoes the original ref from create_lkb()
999 so this leads to the lkb being freed */
1000 unhold_lkb(lkb);
1001 break;
1002 default:
1003 log_print("invalid status for revert %d", lkb->lkb_status);
1004 }
1005}
1006
1007static void revert_lock_pc(struct dlm_rsb *r, struct dlm_lkb *lkb)
1008{
1009 revert_lock(r, lkb);
1010}
1011
1012static void _grant_lock(struct dlm_rsb *r, struct dlm_lkb *lkb)
1013{
1014 if (lkb->lkb_grmode != lkb->lkb_rqmode) {
1015 lkb->lkb_grmode = lkb->lkb_rqmode;
1016 if (lkb->lkb_status)
1017 move_lkb(r, lkb, DLM_LKSTS_GRANTED);
1018 else
1019 add_lkb(r, lkb, DLM_LKSTS_GRANTED);
1020 }
1021
1022 lkb->lkb_rqmode = DLM_LOCK_IV;
e7fd4179
DT
1023}
1024
1025static void grant_lock(struct dlm_rsb *r, struct dlm_lkb *lkb)
1026{
1027 set_lvb_lock(r, lkb);
1028 _grant_lock(r, lkb);
1029 lkb->lkb_highbast = 0;
1030}
1031
1032static void grant_lock_pc(struct dlm_rsb *r, struct dlm_lkb *lkb,
1033 struct dlm_message *ms)
1034{
1035 set_lvb_lock_pc(r, lkb, ms);
1036 _grant_lock(r, lkb);
1037}
1038
1039/* called by grant_pending_locks() which means an async grant message must
1040 be sent to the requesting node in addition to granting the lock if the
1041 lkb belongs to a remote node. */
1042
1043static void grant_lock_pending(struct dlm_rsb *r, struct dlm_lkb *lkb)
1044{
1045 grant_lock(r, lkb);
1046 if (is_master_copy(lkb))
1047 send_grant(r, lkb);
1048 else
1049 queue_cast(r, lkb, 0);
1050}
1051
1052static inline int first_in_list(struct dlm_lkb *lkb, struct list_head *head)
1053{
1054 struct dlm_lkb *first = list_entry(head->next, struct dlm_lkb,
1055 lkb_statequeue);
1056 if (lkb->lkb_id == first->lkb_id)
90135925 1057 return 1;
e7fd4179 1058
90135925 1059 return 0;
e7fd4179
DT
1060}
1061
e7fd4179
DT
1062/* Check if the given lkb conflicts with another lkb on the queue. */
1063
1064static int queue_conflict(struct list_head *head, struct dlm_lkb *lkb)
1065{
1066 struct dlm_lkb *this;
1067
1068 list_for_each_entry(this, head, lkb_statequeue) {
1069 if (this == lkb)
1070 continue;
3bcd3687 1071 if (!modes_compat(this, lkb))
90135925 1072 return 1;
e7fd4179 1073 }
90135925 1074 return 0;
e7fd4179
DT
1075}
1076
1077/*
1078 * "A conversion deadlock arises with a pair of lock requests in the converting
1079 * queue for one resource. The granted mode of each lock blocks the requested
1080 * mode of the other lock."
1081 *
1082 * Part 2: if the granted mode of lkb is preventing the first lkb in the
1083 * convert queue from being granted, then demote lkb (set grmode to NL).
1084 * This second form requires that we check for conv-deadlk even when
1085 * now == 0 in _can_be_granted().
1086 *
1087 * Example:
1088 * Granted Queue: empty
1089 * Convert Queue: NL->EX (first lock)
1090 * PR->EX (second lock)
1091 *
1092 * The first lock can't be granted because of the granted mode of the second
1093 * lock and the second lock can't be granted because it's not first in the
1094 * list. We demote the granted mode of the second lock (the lkb passed to this
1095 * function).
1096 *
1097 * After the resolution, the "grant pending" function needs to go back and try
1098 * to grant locks on the convert queue again since the first lock can now be
1099 * granted.
1100 */
1101
1102static int conversion_deadlock_detect(struct dlm_rsb *rsb, struct dlm_lkb *lkb)
1103{
1104 struct dlm_lkb *this, *first = NULL, *self = NULL;
1105
1106 list_for_each_entry(this, &rsb->res_convertqueue, lkb_statequeue) {
1107 if (!first)
1108 first = this;
1109 if (this == lkb) {
1110 self = lkb;
1111 continue;
1112 }
1113
e7fd4179 1114 if (!modes_compat(this, lkb) && !modes_compat(lkb, this))
90135925 1115 return 1;
e7fd4179
DT
1116 }
1117
1118 /* if lkb is on the convert queue and is preventing the first
1119 from being granted, then there's deadlock and we demote lkb.
1120 multiple converting locks may need to do this before the first
1121 converting lock can be granted. */
1122
1123 if (self && self != first) {
1124 if (!modes_compat(lkb, first) &&
1125 !queue_conflict(&rsb->res_grantqueue, first))
90135925 1126 return 1;
e7fd4179
DT
1127 }
1128
90135925 1129 return 0;
e7fd4179
DT
1130}
1131
1132/*
1133 * Return 1 if the lock can be granted, 0 otherwise.
1134 * Also detect and resolve conversion deadlocks.
1135 *
1136 * lkb is the lock to be granted
1137 *
1138 * now is 1 if the function is being called in the context of the
1139 * immediate request, it is 0 if called later, after the lock has been
1140 * queued.
1141 *
1142 * References are from chapter 6 of "VAXcluster Principles" by Roy Davis
1143 */
1144
1145static int _can_be_granted(struct dlm_rsb *r, struct dlm_lkb *lkb, int now)
1146{
1147 int8_t conv = (lkb->lkb_grmode != DLM_LOCK_IV);
1148
1149 /*
1150 * 6-10: Version 5.4 introduced an option to address the phenomenon of
1151 * a new request for a NL mode lock being blocked.
1152 *
1153 * 6-11: If the optional EXPEDITE flag is used with the new NL mode
1154 * request, then it would be granted. In essence, the use of this flag
1155 * tells the Lock Manager to expedite theis request by not considering
1156 * what may be in the CONVERTING or WAITING queues... As of this
1157 * writing, the EXPEDITE flag can be used only with new requests for NL
1158 * mode locks. This flag is not valid for conversion requests.
1159 *
1160 * A shortcut. Earlier checks return an error if EXPEDITE is used in a
1161 * conversion or used with a non-NL requested mode. We also know an
1162 * EXPEDITE request is always granted immediately, so now must always
1163 * be 1. The full condition to grant an expedite request: (now &&
1164 * !conv && lkb->rqmode == DLM_LOCK_NL && (flags & EXPEDITE)) can
1165 * therefore be shortened to just checking the flag.
1166 */
1167
1168 if (lkb->lkb_exflags & DLM_LKF_EXPEDITE)
90135925 1169 return 1;
e7fd4179
DT
1170
1171 /*
1172 * A shortcut. Without this, !queue_conflict(grantqueue, lkb) would be
1173 * added to the remaining conditions.
1174 */
1175
1176 if (queue_conflict(&r->res_grantqueue, lkb))
1177 goto out;
1178
1179 /*
1180 * 6-3: By default, a conversion request is immediately granted if the
1181 * requested mode is compatible with the modes of all other granted
1182 * locks
1183 */
1184
1185 if (queue_conflict(&r->res_convertqueue, lkb))
1186 goto out;
1187
1188 /*
1189 * 6-5: But the default algorithm for deciding whether to grant or
1190 * queue conversion requests does not by itself guarantee that such
1191 * requests are serviced on a "first come first serve" basis. This, in
1192 * turn, can lead to a phenomenon known as "indefinate postponement".
1193 *
1194 * 6-7: This issue is dealt with by using the optional QUECVT flag with
1195 * the system service employed to request a lock conversion. This flag
1196 * forces certain conversion requests to be queued, even if they are
1197 * compatible with the granted modes of other locks on the same
1198 * resource. Thus, the use of this flag results in conversion requests
1199 * being ordered on a "first come first servce" basis.
1200 *
1201 * DCT: This condition is all about new conversions being able to occur
1202 * "in place" while the lock remains on the granted queue (assuming
1203 * nothing else conflicts.) IOW if QUECVT isn't set, a conversion
1204 * doesn't _have_ to go onto the convert queue where it's processed in
1205 * order. The "now" variable is necessary to distinguish converts
1206 * being received and processed for the first time now, because once a
1207 * convert is moved to the conversion queue the condition below applies
1208 * requiring fifo granting.
1209 */
1210
1211 if (now && conv && !(lkb->lkb_exflags & DLM_LKF_QUECVT))
90135925 1212 return 1;
e7fd4179
DT
1213
1214 /*
3bcd3687
DT
1215 * The NOORDER flag is set to avoid the standard vms rules on grant
1216 * order.
e7fd4179
DT
1217 */
1218
1219 if (lkb->lkb_exflags & DLM_LKF_NOORDER)
90135925 1220 return 1;
e7fd4179
DT
1221
1222 /*
1223 * 6-3: Once in that queue [CONVERTING], a conversion request cannot be
1224 * granted until all other conversion requests ahead of it are granted
1225 * and/or canceled.
1226 */
1227
1228 if (!now && conv && first_in_list(lkb, &r->res_convertqueue))
90135925 1229 return 1;
e7fd4179
DT
1230
1231 /*
1232 * 6-4: By default, a new request is immediately granted only if all
1233 * three of the following conditions are satisfied when the request is
1234 * issued:
1235 * - The queue of ungranted conversion requests for the resource is
1236 * empty.
1237 * - The queue of ungranted new requests for the resource is empty.
1238 * - The mode of the new request is compatible with the most
1239 * restrictive mode of all granted locks on the resource.
1240 */
1241
1242 if (now && !conv && list_empty(&r->res_convertqueue) &&
1243 list_empty(&r->res_waitqueue))
90135925 1244 return 1;
e7fd4179
DT
1245
1246 /*
1247 * 6-4: Once a lock request is in the queue of ungranted new requests,
1248 * it cannot be granted until the queue of ungranted conversion
1249 * requests is empty, all ungranted new requests ahead of it are
1250 * granted and/or canceled, and it is compatible with the granted mode
1251 * of the most restrictive lock granted on the resource.
1252 */
1253
1254 if (!now && !conv && list_empty(&r->res_convertqueue) &&
1255 first_in_list(lkb, &r->res_waitqueue))
90135925 1256 return 1;
e7fd4179
DT
1257
1258 out:
1259 /*
1260 * The following, enabled by CONVDEADLK, departs from VMS.
1261 */
1262
1263 if (conv && (lkb->lkb_exflags & DLM_LKF_CONVDEADLK) &&
1264 conversion_deadlock_detect(r, lkb)) {
1265 lkb->lkb_grmode = DLM_LOCK_NL;
1266 lkb->lkb_sbflags |= DLM_SBF_DEMOTED;
1267 }
1268
90135925 1269 return 0;
e7fd4179
DT
1270}
1271
1272/*
1273 * The ALTPR and ALTCW flags aren't traditional lock manager flags, but are a
1274 * simple way to provide a big optimization to applications that can use them.
1275 */
1276
1277static int can_be_granted(struct dlm_rsb *r, struct dlm_lkb *lkb, int now)
1278{
1279 uint32_t flags = lkb->lkb_exflags;
1280 int rv;
1281 int8_t alt = 0, rqmode = lkb->lkb_rqmode;
1282
1283 rv = _can_be_granted(r, lkb, now);
1284 if (rv)
1285 goto out;
1286
1287 if (lkb->lkb_sbflags & DLM_SBF_DEMOTED)
1288 goto out;
1289
1290 if (rqmode != DLM_LOCK_PR && flags & DLM_LKF_ALTPR)
1291 alt = DLM_LOCK_PR;
1292 else if (rqmode != DLM_LOCK_CW && flags & DLM_LKF_ALTCW)
1293 alt = DLM_LOCK_CW;
1294
1295 if (alt) {
1296 lkb->lkb_rqmode = alt;
1297 rv = _can_be_granted(r, lkb, now);
1298 if (rv)
1299 lkb->lkb_sbflags |= DLM_SBF_ALTMODE;
1300 else
1301 lkb->lkb_rqmode = rqmode;
1302 }
1303 out:
1304 return rv;
1305}
1306
1307static int grant_pending_convert(struct dlm_rsb *r, int high)
1308{
1309 struct dlm_lkb *lkb, *s;
1310 int hi, demoted, quit, grant_restart, demote_restart;
1311
1312 quit = 0;
1313 restart:
1314 grant_restart = 0;
1315 demote_restart = 0;
1316 hi = DLM_LOCK_IV;
1317
1318 list_for_each_entry_safe(lkb, s, &r->res_convertqueue, lkb_statequeue) {
1319 demoted = is_demoted(lkb);
90135925 1320 if (can_be_granted(r, lkb, 0)) {
e7fd4179
DT
1321 grant_lock_pending(r, lkb);
1322 grant_restart = 1;
1323 } else {
1324 hi = max_t(int, lkb->lkb_rqmode, hi);
1325 if (!demoted && is_demoted(lkb))
1326 demote_restart = 1;
1327 }
1328 }
1329
1330 if (grant_restart)
1331 goto restart;
1332 if (demote_restart && !quit) {
1333 quit = 1;
1334 goto restart;
1335 }
1336
1337 return max_t(int, high, hi);
1338}
1339
1340static int grant_pending_wait(struct dlm_rsb *r, int high)
1341{
1342 struct dlm_lkb *lkb, *s;
1343
1344 list_for_each_entry_safe(lkb, s, &r->res_waitqueue, lkb_statequeue) {
90135925 1345 if (can_be_granted(r, lkb, 0))
e7fd4179
DT
1346 grant_lock_pending(r, lkb);
1347 else
1348 high = max_t(int, lkb->lkb_rqmode, high);
1349 }
1350
1351 return high;
1352}
1353
1354static void grant_pending_locks(struct dlm_rsb *r)
1355{
1356 struct dlm_lkb *lkb, *s;
1357 int high = DLM_LOCK_IV;
1358
a345da3e 1359 DLM_ASSERT(is_master(r), dlm_dump_rsb(r););
e7fd4179
DT
1360
1361 high = grant_pending_convert(r, high);
1362 high = grant_pending_wait(r, high);
1363
1364 if (high == DLM_LOCK_IV)
1365 return;
1366
1367 /*
1368 * If there are locks left on the wait/convert queue then send blocking
1369 * ASTs to granted locks based on the largest requested mode (high)
3bcd3687 1370 * found above. FIXME: highbast < high comparison not valid for PR/CW.
e7fd4179
DT
1371 */
1372
1373 list_for_each_entry_safe(lkb, s, &r->res_grantqueue, lkb_statequeue) {
1374 if (lkb->lkb_bastaddr && (lkb->lkb_highbast < high) &&
1375 !__dlm_compat_matrix[lkb->lkb_grmode+1][high+1]) {
1376 queue_bast(r, lkb, high);
1377 lkb->lkb_highbast = high;
1378 }
1379 }
1380}
1381
1382static void send_bast_queue(struct dlm_rsb *r, struct list_head *head,
1383 struct dlm_lkb *lkb)
1384{
1385 struct dlm_lkb *gr;
1386
1387 list_for_each_entry(gr, head, lkb_statequeue) {
1388 if (gr->lkb_bastaddr &&
1389 gr->lkb_highbast < lkb->lkb_rqmode &&
3bcd3687 1390 !modes_compat(gr, lkb)) {
e7fd4179
DT
1391 queue_bast(r, gr, lkb->lkb_rqmode);
1392 gr->lkb_highbast = lkb->lkb_rqmode;
1393 }
1394 }
1395}
1396
1397static void send_blocking_asts(struct dlm_rsb *r, struct dlm_lkb *lkb)
1398{
1399 send_bast_queue(r, &r->res_grantqueue, lkb);
1400}
1401
1402static void send_blocking_asts_all(struct dlm_rsb *r, struct dlm_lkb *lkb)
1403{
1404 send_bast_queue(r, &r->res_grantqueue, lkb);
1405 send_bast_queue(r, &r->res_convertqueue, lkb);
1406}
1407
1408/* set_master(r, lkb) -- set the master nodeid of a resource
1409
1410 The purpose of this function is to set the nodeid field in the given
1411 lkb using the nodeid field in the given rsb. If the rsb's nodeid is
1412 known, it can just be copied to the lkb and the function will return
1413 0. If the rsb's nodeid is _not_ known, it needs to be looked up
1414 before it can be copied to the lkb.
1415
1416 When the rsb nodeid is being looked up remotely, the initial lkb
1417 causing the lookup is kept on the ls_waiters list waiting for the
1418 lookup reply. Other lkb's waiting for the same rsb lookup are kept
1419 on the rsb's res_lookup list until the master is verified.
1420
1421 Return values:
1422 0: nodeid is set in rsb/lkb and the caller should go ahead and use it
1423 1: the rsb master is not available and the lkb has been placed on
1424 a wait queue
1425*/
1426
1427static int set_master(struct dlm_rsb *r, struct dlm_lkb *lkb)
1428{
1429 struct dlm_ls *ls = r->res_ls;
1430 int error, dir_nodeid, ret_nodeid, our_nodeid = dlm_our_nodeid();
1431
1432 if (rsb_flag(r, RSB_MASTER_UNCERTAIN)) {
1433 rsb_clear_flag(r, RSB_MASTER_UNCERTAIN);
1434 r->res_first_lkid = lkb->lkb_id;
1435 lkb->lkb_nodeid = r->res_nodeid;
1436 return 0;
1437 }
1438
1439 if (r->res_first_lkid && r->res_first_lkid != lkb->lkb_id) {
1440 list_add_tail(&lkb->lkb_rsb_lookup, &r->res_lookup);
1441 return 1;
1442 }
1443
1444 if (r->res_nodeid == 0) {
1445 lkb->lkb_nodeid = 0;
1446 return 0;
1447 }
1448
1449 if (r->res_nodeid > 0) {
1450 lkb->lkb_nodeid = r->res_nodeid;
1451 return 0;
1452 }
1453
a345da3e 1454 DLM_ASSERT(r->res_nodeid == -1, dlm_dump_rsb(r););
e7fd4179
DT
1455
1456 dir_nodeid = dlm_dir_nodeid(r);
1457
1458 if (dir_nodeid != our_nodeid) {
1459 r->res_first_lkid = lkb->lkb_id;
1460 send_lookup(r, lkb);
1461 return 1;
1462 }
1463
1464 for (;;) {
1465 /* It's possible for dlm_scand to remove an old rsb for
1466 this same resource from the toss list, us to create
1467 a new one, look up the master locally, and find it
1468 already exists just before dlm_scand does the
1469 dir_remove() on the previous rsb. */
1470
1471 error = dlm_dir_lookup(ls, our_nodeid, r->res_name,
1472 r->res_length, &ret_nodeid);
1473 if (!error)
1474 break;
1475 log_debug(ls, "dir_lookup error %d %s", error, r->res_name);
1476 schedule();
1477 }
1478
1479 if (ret_nodeid == our_nodeid) {
1480 r->res_first_lkid = 0;
1481 r->res_nodeid = 0;
1482 lkb->lkb_nodeid = 0;
1483 } else {
1484 r->res_first_lkid = lkb->lkb_id;
1485 r->res_nodeid = ret_nodeid;
1486 lkb->lkb_nodeid = ret_nodeid;
1487 }
1488 return 0;
1489}
1490
1491static void process_lookup_list(struct dlm_rsb *r)
1492{
1493 struct dlm_lkb *lkb, *safe;
1494
1495 list_for_each_entry_safe(lkb, safe, &r->res_lookup, lkb_rsb_lookup) {
1496 list_del(&lkb->lkb_rsb_lookup);
1497 _request_lock(r, lkb);
1498 schedule();
1499 }
1500}
1501
1502/* confirm_master -- confirm (or deny) an rsb's master nodeid */
1503
1504static void confirm_master(struct dlm_rsb *r, int error)
1505{
1506 struct dlm_lkb *lkb;
1507
1508 if (!r->res_first_lkid)
1509 return;
1510
1511 switch (error) {
1512 case 0:
1513 case -EINPROGRESS:
1514 r->res_first_lkid = 0;
1515 process_lookup_list(r);
1516 break;
1517
1518 case -EAGAIN:
1519 /* the remote master didn't queue our NOQUEUE request;
1520 make a waiting lkb the first_lkid */
1521
1522 r->res_first_lkid = 0;
1523
1524 if (!list_empty(&r->res_lookup)) {
1525 lkb = list_entry(r->res_lookup.next, struct dlm_lkb,
1526 lkb_rsb_lookup);
1527 list_del(&lkb->lkb_rsb_lookup);
1528 r->res_first_lkid = lkb->lkb_id;
1529 _request_lock(r, lkb);
1530 } else
1531 r->res_nodeid = -1;
1532 break;
1533
1534 default:
1535 log_error(r->res_ls, "confirm_master unknown error %d", error);
1536 }
1537}
1538
1539static int set_lock_args(int mode, struct dlm_lksb *lksb, uint32_t flags,
1540 int namelen, uint32_t parent_lkid, void *ast,
3bcd3687 1541 void *astarg, void *bast, struct dlm_args *args)
e7fd4179
DT
1542{
1543 int rv = -EINVAL;
1544
1545 /* check for invalid arg usage */
1546
1547 if (mode < 0 || mode > DLM_LOCK_EX)
1548 goto out;
1549
1550 if (!(flags & DLM_LKF_CONVERT) && (namelen > DLM_RESNAME_MAXLEN))
1551 goto out;
1552
1553 if (flags & DLM_LKF_CANCEL)
1554 goto out;
1555
1556 if (flags & DLM_LKF_QUECVT && !(flags & DLM_LKF_CONVERT))
1557 goto out;
1558
1559 if (flags & DLM_LKF_CONVDEADLK && !(flags & DLM_LKF_CONVERT))
1560 goto out;
1561
1562 if (flags & DLM_LKF_CONVDEADLK && flags & DLM_LKF_NOQUEUE)
1563 goto out;
1564
1565 if (flags & DLM_LKF_EXPEDITE && flags & DLM_LKF_CONVERT)
1566 goto out;
1567
1568 if (flags & DLM_LKF_EXPEDITE && flags & DLM_LKF_QUECVT)
1569 goto out;
1570
1571 if (flags & DLM_LKF_EXPEDITE && flags & DLM_LKF_NOQUEUE)
1572 goto out;
1573
1574 if (flags & DLM_LKF_EXPEDITE && mode != DLM_LOCK_NL)
1575 goto out;
1576
1577 if (!ast || !lksb)
1578 goto out;
1579
1580 if (flags & DLM_LKF_VALBLK && !lksb->sb_lvbptr)
1581 goto out;
1582
1583 /* parent/child locks not yet supported */
1584 if (parent_lkid)
1585 goto out;
1586
1587 if (flags & DLM_LKF_CONVERT && !lksb->sb_lkid)
1588 goto out;
1589
1590 /* these args will be copied to the lkb in validate_lock_args,
1591 it cannot be done now because when converting locks, fields in
1592 an active lkb cannot be modified before locking the rsb */
1593
1594 args->flags = flags;
1595 args->astaddr = ast;
1596 args->astparam = (long) astarg;
1597 args->bastaddr = bast;
1598 args->mode = mode;
1599 args->lksb = lksb;
e7fd4179
DT
1600 rv = 0;
1601 out:
1602 return rv;
1603}
1604
1605static int set_unlock_args(uint32_t flags, void *astarg, struct dlm_args *args)
1606{
1607 if (flags & ~(DLM_LKF_CANCEL | DLM_LKF_VALBLK | DLM_LKF_IVVALBLK |
1608 DLM_LKF_FORCEUNLOCK))
1609 return -EINVAL;
1610
1611 args->flags = flags;
1612 args->astparam = (long) astarg;
1613 return 0;
1614}
1615
1616static int validate_lock_args(struct dlm_ls *ls, struct dlm_lkb *lkb,
1617 struct dlm_args *args)
1618{
1619 int rv = -EINVAL;
1620
1621 if (args->flags & DLM_LKF_CONVERT) {
1622 if (lkb->lkb_flags & DLM_IFL_MSTCPY)
1623 goto out;
1624
1625 if (args->flags & DLM_LKF_QUECVT &&
1626 !__quecvt_compat_matrix[lkb->lkb_grmode+1][args->mode+1])
1627 goto out;
1628
1629 rv = -EBUSY;
1630 if (lkb->lkb_status != DLM_LKSTS_GRANTED)
1631 goto out;
1632
1633 if (lkb->lkb_wait_type)
1634 goto out;
1635 }
1636
1637 lkb->lkb_exflags = args->flags;
1638 lkb->lkb_sbflags = 0;
1639 lkb->lkb_astaddr = args->astaddr;
1640 lkb->lkb_astparam = args->astparam;
1641 lkb->lkb_bastaddr = args->bastaddr;
1642 lkb->lkb_rqmode = args->mode;
1643 lkb->lkb_lksb = args->lksb;
1644 lkb->lkb_lvbptr = args->lksb->sb_lvbptr;
1645 lkb->lkb_ownpid = (int) current->pid;
e7fd4179
DT
1646 rv = 0;
1647 out:
1648 return rv;
1649}
1650
1651static int validate_unlock_args(struct dlm_lkb *lkb, struct dlm_args *args)
1652{
1653 int rv = -EINVAL;
1654
1655 if (lkb->lkb_flags & DLM_IFL_MSTCPY)
1656 goto out;
1657
1658 if (args->flags & DLM_LKF_FORCEUNLOCK)
1659 goto out_ok;
1660
1661 if (args->flags & DLM_LKF_CANCEL &&
1662 lkb->lkb_status == DLM_LKSTS_GRANTED)
1663 goto out;
1664
1665 if (!(args->flags & DLM_LKF_CANCEL) &&
1666 lkb->lkb_status != DLM_LKSTS_GRANTED)
1667 goto out;
1668
1669 rv = -EBUSY;
1670 if (lkb->lkb_wait_type)
1671 goto out;
1672
1673 out_ok:
1674 lkb->lkb_exflags = args->flags;
1675 lkb->lkb_sbflags = 0;
1676 lkb->lkb_astparam = args->astparam;
1677
1678 rv = 0;
1679 out:
1680 return rv;
1681}
1682
1683/*
1684 * Four stage 4 varieties:
1685 * do_request(), do_convert(), do_unlock(), do_cancel()
1686 * These are called on the master node for the given lock and
1687 * from the central locking logic.
1688 */
1689
1690static int do_request(struct dlm_rsb *r, struct dlm_lkb *lkb)
1691{
1692 int error = 0;
1693
90135925 1694 if (can_be_granted(r, lkb, 1)) {
e7fd4179
DT
1695 grant_lock(r, lkb);
1696 queue_cast(r, lkb, 0);
1697 goto out;
1698 }
1699
1700 if (can_be_queued(lkb)) {
1701 error = -EINPROGRESS;
1702 add_lkb(r, lkb, DLM_LKSTS_WAITING);
1703 send_blocking_asts(r, lkb);
1704 goto out;
1705 }
1706
1707 error = -EAGAIN;
1708 if (force_blocking_asts(lkb))
1709 send_blocking_asts_all(r, lkb);
1710 queue_cast(r, lkb, -EAGAIN);
1711
1712 out:
1713 return error;
1714}
1715
1716static int do_convert(struct dlm_rsb *r, struct dlm_lkb *lkb)
1717{
1718 int error = 0;
1719
1720 /* changing an existing lock may allow others to be granted */
1721
90135925 1722 if (can_be_granted(r, lkb, 1)) {
e7fd4179
DT
1723 grant_lock(r, lkb);
1724 queue_cast(r, lkb, 0);
1725 grant_pending_locks(r);
1726 goto out;
1727 }
1728
1729 if (can_be_queued(lkb)) {
1730 if (is_demoted(lkb))
1731 grant_pending_locks(r);
1732 error = -EINPROGRESS;
1733 del_lkb(r, lkb);
1734 add_lkb(r, lkb, DLM_LKSTS_CONVERT);
1735 send_blocking_asts(r, lkb);
1736 goto out;
1737 }
1738
1739 error = -EAGAIN;
1740 if (force_blocking_asts(lkb))
1741 send_blocking_asts_all(r, lkb);
1742 queue_cast(r, lkb, -EAGAIN);
1743
1744 out:
1745 return error;
1746}
1747
1748static int do_unlock(struct dlm_rsb *r, struct dlm_lkb *lkb)
1749{
1750 remove_lock(r, lkb);
1751 queue_cast(r, lkb, -DLM_EUNLOCK);
1752 grant_pending_locks(r);
1753 return -DLM_EUNLOCK;
1754}
1755
597d0cae
DT
1756/* FIXME: if revert_lock() finds that the lkb is granted, we should
1757 skip the queue_cast(ECANCEL). It indicates that the request/convert
1758 completed (and queued a normal ast) just before the cancel; we don't
1759 want to clobber the sb_result for the normal ast with ECANCEL. */
907b9bce 1760
e7fd4179
DT
1761static int do_cancel(struct dlm_rsb *r, struct dlm_lkb *lkb)
1762{
1763 revert_lock(r, lkb);
1764 queue_cast(r, lkb, -DLM_ECANCEL);
1765 grant_pending_locks(r);
1766 return -DLM_ECANCEL;
1767}
1768
1769/*
1770 * Four stage 3 varieties:
1771 * _request_lock(), _convert_lock(), _unlock_lock(), _cancel_lock()
1772 */
1773
1774/* add a new lkb to a possibly new rsb, called by requesting process */
1775
1776static int _request_lock(struct dlm_rsb *r, struct dlm_lkb *lkb)
1777{
1778 int error;
1779
1780 /* set_master: sets lkb nodeid from r */
1781
1782 error = set_master(r, lkb);
1783 if (error < 0)
1784 goto out;
1785 if (error) {
1786 error = 0;
1787 goto out;
1788 }
1789
1790 if (is_remote(r))
1791 /* receive_request() calls do_request() on remote node */
1792 error = send_request(r, lkb);
1793 else
1794 error = do_request(r, lkb);
1795 out:
1796 return error;
1797}
1798
3bcd3687 1799/* change some property of an existing lkb, e.g. mode */
e7fd4179
DT
1800
1801static int _convert_lock(struct dlm_rsb *r, struct dlm_lkb *lkb)
1802{
1803 int error;
1804
1805 if (is_remote(r))
1806 /* receive_convert() calls do_convert() on remote node */
1807 error = send_convert(r, lkb);
1808 else
1809 error = do_convert(r, lkb);
1810
1811 return error;
1812}
1813
1814/* remove an existing lkb from the granted queue */
1815
1816static int _unlock_lock(struct dlm_rsb *r, struct dlm_lkb *lkb)
1817{
1818 int error;
1819
1820 if (is_remote(r))
1821 /* receive_unlock() calls do_unlock() on remote node */
1822 error = send_unlock(r, lkb);
1823 else
1824 error = do_unlock(r, lkb);
1825
1826 return error;
1827}
1828
1829/* remove an existing lkb from the convert or wait queue */
1830
1831static int _cancel_lock(struct dlm_rsb *r, struct dlm_lkb *lkb)
1832{
1833 int error;
1834
1835 if (is_remote(r))
1836 /* receive_cancel() calls do_cancel() on remote node */
1837 error = send_cancel(r, lkb);
1838 else
1839 error = do_cancel(r, lkb);
1840
1841 return error;
1842}
1843
1844/*
1845 * Four stage 2 varieties:
1846 * request_lock(), convert_lock(), unlock_lock(), cancel_lock()
1847 */
1848
1849static int request_lock(struct dlm_ls *ls, struct dlm_lkb *lkb, char *name,
1850 int len, struct dlm_args *args)
1851{
1852 struct dlm_rsb *r;
1853 int error;
1854
1855 error = validate_lock_args(ls, lkb, args);
1856 if (error)
1857 goto out;
1858
1859 error = find_rsb(ls, name, len, R_CREATE, &r);
1860 if (error)
1861 goto out;
1862
1863 lock_rsb(r);
1864
1865 attach_lkb(r, lkb);
1866 lkb->lkb_lksb->sb_lkid = lkb->lkb_id;
1867
1868 error = _request_lock(r, lkb);
1869
1870 unlock_rsb(r);
1871 put_rsb(r);
1872
1873 out:
1874 return error;
1875}
1876
1877static int convert_lock(struct dlm_ls *ls, struct dlm_lkb *lkb,
1878 struct dlm_args *args)
1879{
1880 struct dlm_rsb *r;
1881 int error;
1882
1883 r = lkb->lkb_resource;
1884
1885 hold_rsb(r);
1886 lock_rsb(r);
1887
1888 error = validate_lock_args(ls, lkb, args);
1889 if (error)
1890 goto out;
1891
1892 error = _convert_lock(r, lkb);
1893 out:
1894 unlock_rsb(r);
1895 put_rsb(r);
1896 return error;
1897}
1898
1899static int unlock_lock(struct dlm_ls *ls, struct dlm_lkb *lkb,
1900 struct dlm_args *args)
1901{
1902 struct dlm_rsb *r;
1903 int error;
1904
1905 r = lkb->lkb_resource;
1906
1907 hold_rsb(r);
1908 lock_rsb(r);
1909
1910 error = validate_unlock_args(lkb, args);
1911 if (error)
1912 goto out;
1913
1914 error = _unlock_lock(r, lkb);
1915 out:
1916 unlock_rsb(r);
1917 put_rsb(r);
1918 return error;
1919}
1920
1921static int cancel_lock(struct dlm_ls *ls, struct dlm_lkb *lkb,
1922 struct dlm_args *args)
1923{
1924 struct dlm_rsb *r;
1925 int error;
1926
1927 r = lkb->lkb_resource;
1928
1929 hold_rsb(r);
1930 lock_rsb(r);
1931
1932 error = validate_unlock_args(lkb, args);
1933 if (error)
1934 goto out;
1935
1936 error = _cancel_lock(r, lkb);
1937 out:
1938 unlock_rsb(r);
1939 put_rsb(r);
1940 return error;
1941}
1942
1943/*
1944 * Two stage 1 varieties: dlm_lock() and dlm_unlock()
1945 */
1946
1947int dlm_lock(dlm_lockspace_t *lockspace,
1948 int mode,
1949 struct dlm_lksb *lksb,
1950 uint32_t flags,
1951 void *name,
1952 unsigned int namelen,
1953 uint32_t parent_lkid,
1954 void (*ast) (void *astarg),
1955 void *astarg,
3bcd3687 1956 void (*bast) (void *astarg, int mode))
e7fd4179
DT
1957{
1958 struct dlm_ls *ls;
1959 struct dlm_lkb *lkb;
1960 struct dlm_args args;
1961 int error, convert = flags & DLM_LKF_CONVERT;
1962
1963 ls = dlm_find_lockspace_local(lockspace);
1964 if (!ls)
1965 return -EINVAL;
1966
1967 lock_recovery(ls);
1968
1969 if (convert)
1970 error = find_lkb(ls, lksb->sb_lkid, &lkb);
1971 else
1972 error = create_lkb(ls, &lkb);
1973
1974 if (error)
1975 goto out;
1976
1977 error = set_lock_args(mode, lksb, flags, namelen, parent_lkid, ast,
3bcd3687 1978 astarg, bast, &args);
e7fd4179
DT
1979 if (error)
1980 goto out_put;
1981
1982 if (convert)
1983 error = convert_lock(ls, lkb, &args);
1984 else
1985 error = request_lock(ls, lkb, name, namelen, &args);
1986
1987 if (error == -EINPROGRESS)
1988 error = 0;
1989 out_put:
1990 if (convert || error)
b3f58d8f 1991 __put_lkb(ls, lkb);
e7fd4179
DT
1992 if (error == -EAGAIN)
1993 error = 0;
1994 out:
1995 unlock_recovery(ls);
1996 dlm_put_lockspace(ls);
1997 return error;
1998}
1999
2000int dlm_unlock(dlm_lockspace_t *lockspace,
2001 uint32_t lkid,
2002 uint32_t flags,
2003 struct dlm_lksb *lksb,
2004 void *astarg)
2005{
2006 struct dlm_ls *ls;
2007 struct dlm_lkb *lkb;
2008 struct dlm_args args;
2009 int error;
2010
2011 ls = dlm_find_lockspace_local(lockspace);
2012 if (!ls)
2013 return -EINVAL;
2014
2015 lock_recovery(ls);
2016
2017 error = find_lkb(ls, lkid, &lkb);
2018 if (error)
2019 goto out;
2020
2021 error = set_unlock_args(flags, astarg, &args);
2022 if (error)
2023 goto out_put;
2024
2025 if (flags & DLM_LKF_CANCEL)
2026 error = cancel_lock(ls, lkb, &args);
2027 else
2028 error = unlock_lock(ls, lkb, &args);
2029
2030 if (error == -DLM_EUNLOCK || error == -DLM_ECANCEL)
2031 error = 0;
2032 out_put:
b3f58d8f 2033 dlm_put_lkb(lkb);
e7fd4179
DT
2034 out:
2035 unlock_recovery(ls);
2036 dlm_put_lockspace(ls);
2037 return error;
2038}
2039
2040/*
2041 * send/receive routines for remote operations and replies
2042 *
2043 * send_args
2044 * send_common
2045 * send_request receive_request
2046 * send_convert receive_convert
2047 * send_unlock receive_unlock
2048 * send_cancel receive_cancel
2049 * send_grant receive_grant
2050 * send_bast receive_bast
2051 * send_lookup receive_lookup
2052 * send_remove receive_remove
2053 *
2054 * send_common_reply
2055 * receive_request_reply send_request_reply
2056 * receive_convert_reply send_convert_reply
2057 * receive_unlock_reply send_unlock_reply
2058 * receive_cancel_reply send_cancel_reply
2059 * receive_lookup_reply send_lookup_reply
2060 */
2061
2062static int create_message(struct dlm_rsb *r, struct dlm_lkb *lkb,
2063 int to_nodeid, int mstype,
2064 struct dlm_message **ms_ret,
2065 struct dlm_mhandle **mh_ret)
2066{
2067 struct dlm_message *ms;
2068 struct dlm_mhandle *mh;
2069 char *mb;
2070 int mb_len = sizeof(struct dlm_message);
2071
2072 switch (mstype) {
2073 case DLM_MSG_REQUEST:
2074 case DLM_MSG_LOOKUP:
2075 case DLM_MSG_REMOVE:
2076 mb_len += r->res_length;
2077 break;
2078 case DLM_MSG_CONVERT:
2079 case DLM_MSG_UNLOCK:
2080 case DLM_MSG_REQUEST_REPLY:
2081 case DLM_MSG_CONVERT_REPLY:
2082 case DLM_MSG_GRANT:
2083 if (lkb && lkb->lkb_lvbptr)
2084 mb_len += r->res_ls->ls_lvblen;
2085 break;
2086 }
2087
2088 /* get_buffer gives us a message handle (mh) that we need to
2089 pass into lowcomms_commit and a message buffer (mb) that we
2090 write our data into */
2091
2092 mh = dlm_lowcomms_get_buffer(to_nodeid, mb_len, GFP_KERNEL, &mb);
2093 if (!mh)
2094 return -ENOBUFS;
2095
2096 memset(mb, 0, mb_len);
2097
2098 ms = (struct dlm_message *) mb;
2099
2100 ms->m_header.h_version = (DLM_HEADER_MAJOR | DLM_HEADER_MINOR);
2101 ms->m_header.h_lockspace = r->res_ls->ls_global_id;
2102 ms->m_header.h_nodeid = dlm_our_nodeid();
2103 ms->m_header.h_length = mb_len;
2104 ms->m_header.h_cmd = DLM_MSG;
2105
2106 ms->m_type = mstype;
2107
2108 *mh_ret = mh;
2109 *ms_ret = ms;
2110 return 0;
2111}
2112
2113/* further lowcomms enhancements or alternate implementations may make
2114 the return value from this function useful at some point */
2115
2116static int send_message(struct dlm_mhandle *mh, struct dlm_message *ms)
2117{
2118 dlm_message_out(ms);
2119 dlm_lowcomms_commit_buffer(mh);
2120 return 0;
2121}
2122
2123static void send_args(struct dlm_rsb *r, struct dlm_lkb *lkb,
2124 struct dlm_message *ms)
2125{
2126 ms->m_nodeid = lkb->lkb_nodeid;
2127 ms->m_pid = lkb->lkb_ownpid;
2128 ms->m_lkid = lkb->lkb_id;
2129 ms->m_remid = lkb->lkb_remid;
2130 ms->m_exflags = lkb->lkb_exflags;
2131 ms->m_sbflags = lkb->lkb_sbflags;
2132 ms->m_flags = lkb->lkb_flags;
2133 ms->m_lvbseq = lkb->lkb_lvbseq;
2134 ms->m_status = lkb->lkb_status;
2135 ms->m_grmode = lkb->lkb_grmode;
2136 ms->m_rqmode = lkb->lkb_rqmode;
2137 ms->m_hash = r->res_hash;
2138
2139 /* m_result and m_bastmode are set from function args,
2140 not from lkb fields */
2141
2142 if (lkb->lkb_bastaddr)
2143 ms->m_asts |= AST_BAST;
2144 if (lkb->lkb_astaddr)
2145 ms->m_asts |= AST_COMP;
2146
da49f36f
DT
2147 /* compare with switch in create_message; send_remove() doesn't
2148 use send_args() */
e7fd4179 2149
da49f36f
DT
2150 switch (ms->m_type) {
2151 case DLM_MSG_REQUEST:
2152 case DLM_MSG_LOOKUP:
2153 memcpy(ms->m_extra, r->res_name, r->res_length);
2154 break;
2155 case DLM_MSG_CONVERT:
2156 case DLM_MSG_UNLOCK:
2157 case DLM_MSG_REQUEST_REPLY:
2158 case DLM_MSG_CONVERT_REPLY:
2159 case DLM_MSG_GRANT:
2160 if (!lkb->lkb_lvbptr)
2161 break;
e7fd4179 2162 memcpy(ms->m_extra, lkb->lkb_lvbptr, r->res_ls->ls_lvblen);
da49f36f
DT
2163 break;
2164 }
e7fd4179
DT
2165}
2166
2167static int send_common(struct dlm_rsb *r, struct dlm_lkb *lkb, int mstype)
2168{
2169 struct dlm_message *ms;
2170 struct dlm_mhandle *mh;
2171 int to_nodeid, error;
2172
2173 add_to_waiters(lkb, mstype);
2174
2175 to_nodeid = r->res_nodeid;
2176
2177 error = create_message(r, lkb, to_nodeid, mstype, &ms, &mh);
2178 if (error)
2179 goto fail;
2180
2181 send_args(r, lkb, ms);
2182
2183 error = send_message(mh, ms);
2184 if (error)
2185 goto fail;
2186 return 0;
2187
2188 fail:
2189 remove_from_waiters(lkb);
2190 return error;
2191}
2192
2193static int send_request(struct dlm_rsb *r, struct dlm_lkb *lkb)
2194{
2195 return send_common(r, lkb, DLM_MSG_REQUEST);
2196}
2197
2198static int send_convert(struct dlm_rsb *r, struct dlm_lkb *lkb)
2199{
2200 int error;
2201
2202 error = send_common(r, lkb, DLM_MSG_CONVERT);
2203
2204 /* down conversions go without a reply from the master */
2205 if (!error && down_conversion(lkb)) {
2206 remove_from_waiters(lkb);
2207 r->res_ls->ls_stub_ms.m_result = 0;
32f105a1 2208 r->res_ls->ls_stub_ms.m_flags = lkb->lkb_flags;
e7fd4179
DT
2209 __receive_convert_reply(r, lkb, &r->res_ls->ls_stub_ms);
2210 }
2211
2212 return error;
2213}
2214
2215/* FIXME: if this lkb is the only lock we hold on the rsb, then set
2216 MASTER_UNCERTAIN to force the next request on the rsb to confirm
2217 that the master is still correct. */
2218
2219static int send_unlock(struct dlm_rsb *r, struct dlm_lkb *lkb)
2220{
2221 return send_common(r, lkb, DLM_MSG_UNLOCK);
2222}
2223
2224static int send_cancel(struct dlm_rsb *r, struct dlm_lkb *lkb)
2225{
2226 return send_common(r, lkb, DLM_MSG_CANCEL);
2227}
2228
2229static int send_grant(struct dlm_rsb *r, struct dlm_lkb *lkb)
2230{
2231 struct dlm_message *ms;
2232 struct dlm_mhandle *mh;
2233 int to_nodeid, error;
2234
2235 to_nodeid = lkb->lkb_nodeid;
2236
2237 error = create_message(r, lkb, to_nodeid, DLM_MSG_GRANT, &ms, &mh);
2238 if (error)
2239 goto out;
2240
2241 send_args(r, lkb, ms);
2242
2243 ms->m_result = 0;
2244
2245 error = send_message(mh, ms);
2246 out:
2247 return error;
2248}
2249
2250static int send_bast(struct dlm_rsb *r, struct dlm_lkb *lkb, int mode)
2251{
2252 struct dlm_message *ms;
2253 struct dlm_mhandle *mh;
2254 int to_nodeid, error;
2255
2256 to_nodeid = lkb->lkb_nodeid;
2257
2258 error = create_message(r, NULL, to_nodeid, DLM_MSG_BAST, &ms, &mh);
2259 if (error)
2260 goto out;
2261
2262 send_args(r, lkb, ms);
2263
2264 ms->m_bastmode = mode;
2265
2266 error = send_message(mh, ms);
2267 out:
2268 return error;
2269}
2270
2271static int send_lookup(struct dlm_rsb *r, struct dlm_lkb *lkb)
2272{
2273 struct dlm_message *ms;
2274 struct dlm_mhandle *mh;
2275 int to_nodeid, error;
2276
2277 add_to_waiters(lkb, DLM_MSG_LOOKUP);
2278
2279 to_nodeid = dlm_dir_nodeid(r);
2280
2281 error = create_message(r, NULL, to_nodeid, DLM_MSG_LOOKUP, &ms, &mh);
2282 if (error)
2283 goto fail;
2284
2285 send_args(r, lkb, ms);
2286
2287 error = send_message(mh, ms);
2288 if (error)
2289 goto fail;
2290 return 0;
2291
2292 fail:
2293 remove_from_waiters(lkb);
2294 return error;
2295}
2296
2297static int send_remove(struct dlm_rsb *r)
2298{
2299 struct dlm_message *ms;
2300 struct dlm_mhandle *mh;
2301 int to_nodeid, error;
2302
2303 to_nodeid = dlm_dir_nodeid(r);
2304
2305 error = create_message(r, NULL, to_nodeid, DLM_MSG_REMOVE, &ms, &mh);
2306 if (error)
2307 goto out;
2308
2309 memcpy(ms->m_extra, r->res_name, r->res_length);
2310 ms->m_hash = r->res_hash;
2311
2312 error = send_message(mh, ms);
2313 out:
2314 return error;
2315}
2316
2317static int send_common_reply(struct dlm_rsb *r, struct dlm_lkb *lkb,
2318 int mstype, int rv)
2319{
2320 struct dlm_message *ms;
2321 struct dlm_mhandle *mh;
2322 int to_nodeid, error;
2323
2324 to_nodeid = lkb->lkb_nodeid;
2325
2326 error = create_message(r, lkb, to_nodeid, mstype, &ms, &mh);
2327 if (error)
2328 goto out;
2329
2330 send_args(r, lkb, ms);
2331
2332 ms->m_result = rv;
2333
2334 error = send_message(mh, ms);
2335 out:
2336 return error;
2337}
2338
2339static int send_request_reply(struct dlm_rsb *r, struct dlm_lkb *lkb, int rv)
2340{
2341 return send_common_reply(r, lkb, DLM_MSG_REQUEST_REPLY, rv);
2342}
2343
2344static int send_convert_reply(struct dlm_rsb *r, struct dlm_lkb *lkb, int rv)
2345{
2346 return send_common_reply(r, lkb, DLM_MSG_CONVERT_REPLY, rv);
2347}
2348
2349static int send_unlock_reply(struct dlm_rsb *r, struct dlm_lkb *lkb, int rv)
2350{
2351 return send_common_reply(r, lkb, DLM_MSG_UNLOCK_REPLY, rv);
2352}
2353
2354static int send_cancel_reply(struct dlm_rsb *r, struct dlm_lkb *lkb, int rv)
2355{
2356 return send_common_reply(r, lkb, DLM_MSG_CANCEL_REPLY, rv);
2357}
2358
2359static int send_lookup_reply(struct dlm_ls *ls, struct dlm_message *ms_in,
2360 int ret_nodeid, int rv)
2361{
2362 struct dlm_rsb *r = &ls->ls_stub_rsb;
2363 struct dlm_message *ms;
2364 struct dlm_mhandle *mh;
2365 int error, nodeid = ms_in->m_header.h_nodeid;
2366
2367 error = create_message(r, NULL, nodeid, DLM_MSG_LOOKUP_REPLY, &ms, &mh);
2368 if (error)
2369 goto out;
2370
2371 ms->m_lkid = ms_in->m_lkid;
2372 ms->m_result = rv;
2373 ms->m_nodeid = ret_nodeid;
2374
2375 error = send_message(mh, ms);
2376 out:
2377 return error;
2378}
2379
2380/* which args we save from a received message depends heavily on the type
2381 of message, unlike the send side where we can safely send everything about
2382 the lkb for any type of message */
2383
2384static void receive_flags(struct dlm_lkb *lkb, struct dlm_message *ms)
2385{
2386 lkb->lkb_exflags = ms->m_exflags;
6f90a8b1 2387 lkb->lkb_sbflags = ms->m_sbflags;
e7fd4179
DT
2388 lkb->lkb_flags = (lkb->lkb_flags & 0xFFFF0000) |
2389 (ms->m_flags & 0x0000FFFF);
2390}
2391
2392static void receive_flags_reply(struct dlm_lkb *lkb, struct dlm_message *ms)
2393{
2394 lkb->lkb_sbflags = ms->m_sbflags;
2395 lkb->lkb_flags = (lkb->lkb_flags & 0xFFFF0000) |
2396 (ms->m_flags & 0x0000FFFF);
2397}
2398
2399static int receive_extralen(struct dlm_message *ms)
2400{
2401 return (ms->m_header.h_length - sizeof(struct dlm_message));
2402}
2403
e7fd4179
DT
2404static int receive_lvb(struct dlm_ls *ls, struct dlm_lkb *lkb,
2405 struct dlm_message *ms)
2406{
2407 int len;
2408
2409 if (lkb->lkb_exflags & DLM_LKF_VALBLK) {
2410 if (!lkb->lkb_lvbptr)
2411 lkb->lkb_lvbptr = allocate_lvb(ls);
2412 if (!lkb->lkb_lvbptr)
2413 return -ENOMEM;
2414 len = receive_extralen(ms);
2415 memcpy(lkb->lkb_lvbptr, ms->m_extra, len);
2416 }
2417 return 0;
2418}
2419
2420static int receive_request_args(struct dlm_ls *ls, struct dlm_lkb *lkb,
2421 struct dlm_message *ms)
2422{
2423 lkb->lkb_nodeid = ms->m_header.h_nodeid;
2424 lkb->lkb_ownpid = ms->m_pid;
2425 lkb->lkb_remid = ms->m_lkid;
2426 lkb->lkb_grmode = DLM_LOCK_IV;
2427 lkb->lkb_rqmode = ms->m_rqmode;
2428 lkb->lkb_bastaddr = (void *) (long) (ms->m_asts & AST_BAST);
2429 lkb->lkb_astaddr = (void *) (long) (ms->m_asts & AST_COMP);
2430
2431 DLM_ASSERT(is_master_copy(lkb), dlm_print_lkb(lkb););
2432
e7fd4179
DT
2433 if (receive_lvb(ls, lkb, ms))
2434 return -ENOMEM;
2435
2436 return 0;
2437}
2438
2439static int receive_convert_args(struct dlm_ls *ls, struct dlm_lkb *lkb,
2440 struct dlm_message *ms)
2441{
2442 if (lkb->lkb_nodeid != ms->m_header.h_nodeid) {
2443 log_error(ls, "convert_args nodeid %d %d lkid %x %x",
2444 lkb->lkb_nodeid, ms->m_header.h_nodeid,
2445 lkb->lkb_id, lkb->lkb_remid);
2446 return -EINVAL;
2447 }
2448
2449 if (!is_master_copy(lkb))
2450 return -EINVAL;
2451
2452 if (lkb->lkb_status != DLM_LKSTS_GRANTED)
2453 return -EBUSY;
2454
e7fd4179
DT
2455 if (receive_lvb(ls, lkb, ms))
2456 return -ENOMEM;
2457
2458 lkb->lkb_rqmode = ms->m_rqmode;
2459 lkb->lkb_lvbseq = ms->m_lvbseq;
2460
2461 return 0;
2462}
2463
2464static int receive_unlock_args(struct dlm_ls *ls, struct dlm_lkb *lkb,
2465 struct dlm_message *ms)
2466{
2467 if (!is_master_copy(lkb))
2468 return -EINVAL;
2469 if (receive_lvb(ls, lkb, ms))
2470 return -ENOMEM;
2471 return 0;
2472}
2473
2474/* We fill in the stub-lkb fields with the info that send_xxxx_reply()
2475 uses to send a reply and that the remote end uses to process the reply. */
2476
2477static void setup_stub_lkb(struct dlm_ls *ls, struct dlm_message *ms)
2478{
2479 struct dlm_lkb *lkb = &ls->ls_stub_lkb;
2480 lkb->lkb_nodeid = ms->m_header.h_nodeid;
2481 lkb->lkb_remid = ms->m_lkid;
2482}
2483
2484static void receive_request(struct dlm_ls *ls, struct dlm_message *ms)
2485{
2486 struct dlm_lkb *lkb;
2487 struct dlm_rsb *r;
2488 int error, namelen;
2489
2490 error = create_lkb(ls, &lkb);
2491 if (error)
2492 goto fail;
2493
2494 receive_flags(lkb, ms);
2495 lkb->lkb_flags |= DLM_IFL_MSTCPY;
2496 error = receive_request_args(ls, lkb, ms);
2497 if (error) {
b3f58d8f 2498 __put_lkb(ls, lkb);
e7fd4179
DT
2499 goto fail;
2500 }
2501
2502 namelen = receive_extralen(ms);
2503
2504 error = find_rsb(ls, ms->m_extra, namelen, R_MASTER, &r);
2505 if (error) {
b3f58d8f 2506 __put_lkb(ls, lkb);
e7fd4179
DT
2507 goto fail;
2508 }
2509
2510 lock_rsb(r);
2511
2512 attach_lkb(r, lkb);
2513 error = do_request(r, lkb);
2514 send_request_reply(r, lkb, error);
2515
2516 unlock_rsb(r);
2517 put_rsb(r);
2518
2519 if (error == -EINPROGRESS)
2520 error = 0;
2521 if (error)
b3f58d8f 2522 dlm_put_lkb(lkb);
e7fd4179
DT
2523 return;
2524
2525 fail:
2526 setup_stub_lkb(ls, ms);
2527 send_request_reply(&ls->ls_stub_rsb, &ls->ls_stub_lkb, error);
2528}
2529
2530static void receive_convert(struct dlm_ls *ls, struct dlm_message *ms)
2531{
2532 struct dlm_lkb *lkb;
2533 struct dlm_rsb *r;
90135925 2534 int error, reply = 1;
e7fd4179
DT
2535
2536 error = find_lkb(ls, ms->m_remid, &lkb);
2537 if (error)
2538 goto fail;
2539
2540 r = lkb->lkb_resource;
2541
2542 hold_rsb(r);
2543 lock_rsb(r);
2544
2545 receive_flags(lkb, ms);
2546 error = receive_convert_args(ls, lkb, ms);
2547 if (error)
2548 goto out;
2549 reply = !down_conversion(lkb);
2550
2551 error = do_convert(r, lkb);
2552 out:
2553 if (reply)
2554 send_convert_reply(r, lkb, error);
2555
2556 unlock_rsb(r);
2557 put_rsb(r);
b3f58d8f 2558 dlm_put_lkb(lkb);
e7fd4179
DT
2559 return;
2560
2561 fail:
2562 setup_stub_lkb(ls, ms);
2563 send_convert_reply(&ls->ls_stub_rsb, &ls->ls_stub_lkb, error);
2564}
2565
2566static void receive_unlock(struct dlm_ls *ls, struct dlm_message *ms)
2567{
2568 struct dlm_lkb *lkb;
2569 struct dlm_rsb *r;
2570 int error;
2571
2572 error = find_lkb(ls, ms->m_remid, &lkb);
2573 if (error)
2574 goto fail;
2575
2576 r = lkb->lkb_resource;
2577
2578 hold_rsb(r);
2579 lock_rsb(r);
2580
2581 receive_flags(lkb, ms);
2582 error = receive_unlock_args(ls, lkb, ms);
2583 if (error)
2584 goto out;
2585
2586 error = do_unlock(r, lkb);
2587 out:
2588 send_unlock_reply(r, lkb, error);
2589
2590 unlock_rsb(r);
2591 put_rsb(r);
b3f58d8f 2592 dlm_put_lkb(lkb);
e7fd4179
DT
2593 return;
2594
2595 fail:
2596 setup_stub_lkb(ls, ms);
2597 send_unlock_reply(&ls->ls_stub_rsb, &ls->ls_stub_lkb, error);
2598}
2599
2600static void receive_cancel(struct dlm_ls *ls, struct dlm_message *ms)
2601{
2602 struct dlm_lkb *lkb;
2603 struct dlm_rsb *r;
2604 int error;
2605
2606 error = find_lkb(ls, ms->m_remid, &lkb);
2607 if (error)
2608 goto fail;
2609
2610 receive_flags(lkb, ms);
2611
2612 r = lkb->lkb_resource;
2613
2614 hold_rsb(r);
2615 lock_rsb(r);
2616
2617 error = do_cancel(r, lkb);
2618 send_cancel_reply(r, lkb, error);
2619
2620 unlock_rsb(r);
2621 put_rsb(r);
b3f58d8f 2622 dlm_put_lkb(lkb);
e7fd4179
DT
2623 return;
2624
2625 fail:
2626 setup_stub_lkb(ls, ms);
2627 send_cancel_reply(&ls->ls_stub_rsb, &ls->ls_stub_lkb, error);
2628}
2629
2630static void receive_grant(struct dlm_ls *ls, struct dlm_message *ms)
2631{
2632 struct dlm_lkb *lkb;
2633 struct dlm_rsb *r;
2634 int error;
2635
2636 error = find_lkb(ls, ms->m_remid, &lkb);
2637 if (error) {
2638 log_error(ls, "receive_grant no lkb");
2639 return;
2640 }
2641 DLM_ASSERT(is_process_copy(lkb), dlm_print_lkb(lkb););
2642
2643 r = lkb->lkb_resource;
2644
2645 hold_rsb(r);
2646 lock_rsb(r);
2647
2648 receive_flags_reply(lkb, ms);
2649 grant_lock_pc(r, lkb, ms);
2650 queue_cast(r, lkb, 0);
2651
2652 unlock_rsb(r);
2653 put_rsb(r);
b3f58d8f 2654 dlm_put_lkb(lkb);
e7fd4179
DT
2655}
2656
2657static void receive_bast(struct dlm_ls *ls, struct dlm_message *ms)
2658{
2659 struct dlm_lkb *lkb;
2660 struct dlm_rsb *r;
2661 int error;
2662
2663 error = find_lkb(ls, ms->m_remid, &lkb);
2664 if (error) {
2665 log_error(ls, "receive_bast no lkb");
2666 return;
2667 }
2668 DLM_ASSERT(is_process_copy(lkb), dlm_print_lkb(lkb););
2669
2670 r = lkb->lkb_resource;
2671
2672 hold_rsb(r);
2673 lock_rsb(r);
2674
2675 queue_bast(r, lkb, ms->m_bastmode);
2676
2677 unlock_rsb(r);
2678 put_rsb(r);
b3f58d8f 2679 dlm_put_lkb(lkb);
e7fd4179
DT
2680}
2681
2682static void receive_lookup(struct dlm_ls *ls, struct dlm_message *ms)
2683{
2684 int len, error, ret_nodeid, dir_nodeid, from_nodeid, our_nodeid;
2685
2686 from_nodeid = ms->m_header.h_nodeid;
2687 our_nodeid = dlm_our_nodeid();
2688
2689 len = receive_extralen(ms);
2690
2691 dir_nodeid = dlm_hash2nodeid(ls, ms->m_hash);
2692 if (dir_nodeid != our_nodeid) {
2693 log_error(ls, "lookup dir_nodeid %d from %d",
2694 dir_nodeid, from_nodeid);
2695 error = -EINVAL;
2696 ret_nodeid = -1;
2697 goto out;
2698 }
2699
2700 error = dlm_dir_lookup(ls, from_nodeid, ms->m_extra, len, &ret_nodeid);
2701
2702 /* Optimization: we're master so treat lookup as a request */
2703 if (!error && ret_nodeid == our_nodeid) {
2704 receive_request(ls, ms);
2705 return;
2706 }
2707 out:
2708 send_lookup_reply(ls, ms, ret_nodeid, error);
2709}
2710
2711static void receive_remove(struct dlm_ls *ls, struct dlm_message *ms)
2712{
2713 int len, dir_nodeid, from_nodeid;
2714
2715 from_nodeid = ms->m_header.h_nodeid;
2716
2717 len = receive_extralen(ms);
2718
2719 dir_nodeid = dlm_hash2nodeid(ls, ms->m_hash);
2720 if (dir_nodeid != dlm_our_nodeid()) {
2721 log_error(ls, "remove dir entry dir_nodeid %d from %d",
2722 dir_nodeid, from_nodeid);
2723 return;
2724 }
2725
2726 dlm_dir_remove_entry(ls, from_nodeid, ms->m_extra, len);
2727}
2728
2729static void receive_request_reply(struct dlm_ls *ls, struct dlm_message *ms)
2730{
2731 struct dlm_lkb *lkb;
2732 struct dlm_rsb *r;
2733 int error, mstype;
2734
2735 error = find_lkb(ls, ms->m_remid, &lkb);
2736 if (error) {
2737 log_error(ls, "receive_request_reply no lkb");
2738 return;
2739 }
2740 DLM_ASSERT(is_process_copy(lkb), dlm_print_lkb(lkb););
2741
2742 mstype = lkb->lkb_wait_type;
2743 error = remove_from_waiters(lkb);
2744 if (error) {
2745 log_error(ls, "receive_request_reply not on waiters");
2746 goto out;
2747 }
2748
2749 /* this is the value returned from do_request() on the master */
2750 error = ms->m_result;
2751
2752 r = lkb->lkb_resource;
2753 hold_rsb(r);
2754 lock_rsb(r);
2755
2756 /* Optimization: the dir node was also the master, so it took our
2757 lookup as a request and sent request reply instead of lookup reply */
2758 if (mstype == DLM_MSG_LOOKUP) {
2759 r->res_nodeid = ms->m_header.h_nodeid;
2760 lkb->lkb_nodeid = r->res_nodeid;
2761 }
2762
2763 switch (error) {
2764 case -EAGAIN:
2765 /* request would block (be queued) on remote master;
2766 the unhold undoes the original ref from create_lkb()
2767 so it leads to the lkb being freed */
2768 queue_cast(r, lkb, -EAGAIN);
2769 confirm_master(r, -EAGAIN);
2770 unhold_lkb(lkb);
2771 break;
2772
2773 case -EINPROGRESS:
2774 case 0:
2775 /* request was queued or granted on remote master */
2776 receive_flags_reply(lkb, ms);
2777 lkb->lkb_remid = ms->m_lkid;
2778 if (error)
2779 add_lkb(r, lkb, DLM_LKSTS_WAITING);
2780 else {
2781 grant_lock_pc(r, lkb, ms);
2782 queue_cast(r, lkb, 0);
2783 }
2784 confirm_master(r, error);
2785 break;
2786
597d0cae 2787 case -EBADR:
e7fd4179
DT
2788 case -ENOTBLK:
2789 /* find_rsb failed to find rsb or rsb wasn't master */
2790 r->res_nodeid = -1;
2791 lkb->lkb_nodeid = -1;
2792 _request_lock(r, lkb);
2793 break;
2794
2795 default:
2796 log_error(ls, "receive_request_reply error %d", error);
2797 }
2798
2799 unlock_rsb(r);
2800 put_rsb(r);
2801 out:
b3f58d8f 2802 dlm_put_lkb(lkb);
e7fd4179
DT
2803}
2804
2805static void __receive_convert_reply(struct dlm_rsb *r, struct dlm_lkb *lkb,
2806 struct dlm_message *ms)
2807{
2808 int error = ms->m_result;
2809
2810 /* this is the value returned from do_convert() on the master */
2811
2812 switch (error) {
2813 case -EAGAIN:
2814 /* convert would block (be queued) on remote master */
2815 queue_cast(r, lkb, -EAGAIN);
2816 break;
2817
2818 case -EINPROGRESS:
2819 /* convert was queued on remote master */
2820 del_lkb(r, lkb);
2821 add_lkb(r, lkb, DLM_LKSTS_CONVERT);
2822 break;
2823
2824 case 0:
2825 /* convert was granted on remote master */
2826 receive_flags_reply(lkb, ms);
2827 grant_lock_pc(r, lkb, ms);
2828 queue_cast(r, lkb, 0);
2829 break;
2830
2831 default:
2832 log_error(r->res_ls, "receive_convert_reply error %d", error);
2833 }
2834}
2835
2836static void _receive_convert_reply(struct dlm_lkb *lkb, struct dlm_message *ms)
2837{
2838 struct dlm_rsb *r = lkb->lkb_resource;
2839
2840 hold_rsb(r);
2841 lock_rsb(r);
2842
2843 __receive_convert_reply(r, lkb, ms);
2844
2845 unlock_rsb(r);
2846 put_rsb(r);
2847}
2848
2849static void receive_convert_reply(struct dlm_ls *ls, struct dlm_message *ms)
2850{
2851 struct dlm_lkb *lkb;
2852 int error;
2853
2854 error = find_lkb(ls, ms->m_remid, &lkb);
2855 if (error) {
2856 log_error(ls, "receive_convert_reply no lkb");
2857 return;
2858 }
2859 DLM_ASSERT(is_process_copy(lkb), dlm_print_lkb(lkb););
2860
2861 error = remove_from_waiters(lkb);
2862 if (error) {
2863 log_error(ls, "receive_convert_reply not on waiters");
2864 goto out;
2865 }
2866
2867 _receive_convert_reply(lkb, ms);
2868 out:
b3f58d8f 2869 dlm_put_lkb(lkb);
e7fd4179
DT
2870}
2871
2872static void _receive_unlock_reply(struct dlm_lkb *lkb, struct dlm_message *ms)
2873{
2874 struct dlm_rsb *r = lkb->lkb_resource;
2875 int error = ms->m_result;
2876
2877 hold_rsb(r);
2878 lock_rsb(r);
2879
2880 /* this is the value returned from do_unlock() on the master */
2881
2882 switch (error) {
2883 case -DLM_EUNLOCK:
2884 receive_flags_reply(lkb, ms);
2885 remove_lock_pc(r, lkb);
2886 queue_cast(r, lkb, -DLM_EUNLOCK);
2887 break;
2888 default:
2889 log_error(r->res_ls, "receive_unlock_reply error %d", error);
2890 }
2891
2892 unlock_rsb(r);
2893 put_rsb(r);
2894}
2895
2896static void receive_unlock_reply(struct dlm_ls *ls, struct dlm_message *ms)
2897{
2898 struct dlm_lkb *lkb;
2899 int error;
2900
2901 error = find_lkb(ls, ms->m_remid, &lkb);
2902 if (error) {
2903 log_error(ls, "receive_unlock_reply no lkb");
2904 return;
2905 }
2906 DLM_ASSERT(is_process_copy(lkb), dlm_print_lkb(lkb););
2907
2908 error = remove_from_waiters(lkb);
2909 if (error) {
2910 log_error(ls, "receive_unlock_reply not on waiters");
2911 goto out;
2912 }
2913
2914 _receive_unlock_reply(lkb, ms);
2915 out:
b3f58d8f 2916 dlm_put_lkb(lkb);
e7fd4179
DT
2917}
2918
2919static void _receive_cancel_reply(struct dlm_lkb *lkb, struct dlm_message *ms)
2920{
2921 struct dlm_rsb *r = lkb->lkb_resource;
2922 int error = ms->m_result;
2923
2924 hold_rsb(r);
2925 lock_rsb(r);
2926
2927 /* this is the value returned from do_cancel() on the master */
2928
2929 switch (error) {
2930 case -DLM_ECANCEL:
2931 receive_flags_reply(lkb, ms);
2932 revert_lock_pc(r, lkb);
2933 queue_cast(r, lkb, -DLM_ECANCEL);
2934 break;
2935 default:
2936 log_error(r->res_ls, "receive_cancel_reply error %d", error);
2937 }
2938
2939 unlock_rsb(r);
2940 put_rsb(r);
2941}
2942
2943static void receive_cancel_reply(struct dlm_ls *ls, struct dlm_message *ms)
2944{
2945 struct dlm_lkb *lkb;
2946 int error;
2947
2948 error = find_lkb(ls, ms->m_remid, &lkb);
2949 if (error) {
2950 log_error(ls, "receive_cancel_reply no lkb");
2951 return;
2952 }
2953 DLM_ASSERT(is_process_copy(lkb), dlm_print_lkb(lkb););
2954
2955 error = remove_from_waiters(lkb);
2956 if (error) {
2957 log_error(ls, "receive_cancel_reply not on waiters");
2958 goto out;
2959 }
2960
2961 _receive_cancel_reply(lkb, ms);
2962 out:
b3f58d8f 2963 dlm_put_lkb(lkb);
e7fd4179
DT
2964}
2965
2966static void receive_lookup_reply(struct dlm_ls *ls, struct dlm_message *ms)
2967{
2968 struct dlm_lkb *lkb;
2969 struct dlm_rsb *r;
2970 int error, ret_nodeid;
2971
2972 error = find_lkb(ls, ms->m_lkid, &lkb);
2973 if (error) {
2974 log_error(ls, "receive_lookup_reply no lkb");
2975 return;
2976 }
2977
2978 error = remove_from_waiters(lkb);
2979 if (error) {
2980 log_error(ls, "receive_lookup_reply not on waiters");
2981 goto out;
2982 }
2983
2984 /* this is the value returned by dlm_dir_lookup on dir node
2985 FIXME: will a non-zero error ever be returned? */
2986 error = ms->m_result;
2987
2988 r = lkb->lkb_resource;
2989 hold_rsb(r);
2990 lock_rsb(r);
2991
2992 ret_nodeid = ms->m_nodeid;
2993 if (ret_nodeid == dlm_our_nodeid()) {
2994 r->res_nodeid = 0;
2995 ret_nodeid = 0;
2996 r->res_first_lkid = 0;
2997 } else {
2998 /* set_master() will copy res_nodeid to lkb_nodeid */
2999 r->res_nodeid = ret_nodeid;
3000 }
3001
3002 _request_lock(r, lkb);
3003
3004 if (!ret_nodeid)
3005 process_lookup_list(r);
3006
3007 unlock_rsb(r);
3008 put_rsb(r);
3009 out:
b3f58d8f 3010 dlm_put_lkb(lkb);
e7fd4179
DT
3011}
3012
3013int dlm_receive_message(struct dlm_header *hd, int nodeid, int recovery)
3014{
3015 struct dlm_message *ms = (struct dlm_message *) hd;
3016 struct dlm_ls *ls;
3017 int error;
3018
3019 if (!recovery)
3020 dlm_message_in(ms);
3021
3022 ls = dlm_find_lockspace_global(hd->h_lockspace);
3023 if (!ls) {
3024 log_print("drop message %d from %d for unknown lockspace %d",
3025 ms->m_type, nodeid, hd->h_lockspace);
3026 return -EINVAL;
3027 }
3028
3029 /* recovery may have just ended leaving a bunch of backed-up requests
3030 in the requestqueue; wait while dlm_recoverd clears them */
3031
3032 if (!recovery)
3033 dlm_wait_requestqueue(ls);
3034
3035 /* recovery may have just started while there were a bunch of
3036 in-flight requests -- save them in requestqueue to be processed
3037 after recovery. we can't let dlm_recvd block on the recovery
3038 lock. if dlm_recoverd is calling this function to clear the
3039 requestqueue, it needs to be interrupted (-EINTR) if another
3040 recovery operation is starting. */
3041
3042 while (1) {
3043 if (dlm_locking_stopped(ls)) {
d4400156
DT
3044 if (recovery) {
3045 error = -EINTR;
3046 goto out;
3047 }
3048 error = dlm_add_requestqueue(ls, nodeid, hd);
3049 if (error == -EAGAIN)
3050 continue;
3051 else {
3052 error = -EINTR;
3053 goto out;
3054 }
e7fd4179
DT
3055 }
3056
3057 if (lock_recovery_try(ls))
3058 break;
3059 schedule();
3060 }
3061
3062 switch (ms->m_type) {
3063
3064 /* messages sent to a master node */
3065
3066 case DLM_MSG_REQUEST:
3067 receive_request(ls, ms);
3068 break;
3069
3070 case DLM_MSG_CONVERT:
3071 receive_convert(ls, ms);
3072 break;
3073
3074 case DLM_MSG_UNLOCK:
3075 receive_unlock(ls, ms);
3076 break;
3077
3078 case DLM_MSG_CANCEL:
3079 receive_cancel(ls, ms);
3080 break;
3081
3082 /* messages sent from a master node (replies to above) */
3083
3084 case DLM_MSG_REQUEST_REPLY:
3085 receive_request_reply(ls, ms);
3086 break;
3087
3088 case DLM_MSG_CONVERT_REPLY:
3089 receive_convert_reply(ls, ms);
3090 break;
3091
3092 case DLM_MSG_UNLOCK_REPLY:
3093 receive_unlock_reply(ls, ms);
3094 break;
3095
3096 case DLM_MSG_CANCEL_REPLY:
3097 receive_cancel_reply(ls, ms);
3098 break;
3099
3100 /* messages sent from a master node (only two types of async msg) */
3101
3102 case DLM_MSG_GRANT:
3103 receive_grant(ls, ms);
3104 break;
3105
3106 case DLM_MSG_BAST:
3107 receive_bast(ls, ms);
3108 break;
3109
3110 /* messages sent to a dir node */
3111
3112 case DLM_MSG_LOOKUP:
3113 receive_lookup(ls, ms);
3114 break;
3115
3116 case DLM_MSG_REMOVE:
3117 receive_remove(ls, ms);
3118 break;
3119
3120 /* messages sent from a dir node (remove has no reply) */
3121
3122 case DLM_MSG_LOOKUP_REPLY:
3123 receive_lookup_reply(ls, ms);
3124 break;
3125
3126 default:
3127 log_error(ls, "unknown message type %d", ms->m_type);
3128 }
3129
3130 unlock_recovery(ls);
3131 out:
3132 dlm_put_lockspace(ls);
3133 dlm_astd_wake();
3134 return 0;
3135}
3136
3137
3138/*
3139 * Recovery related
3140 */
3141
3142static void recover_convert_waiter(struct dlm_ls *ls, struct dlm_lkb *lkb)
3143{
3144 if (middle_conversion(lkb)) {
3145 hold_lkb(lkb);
3146 ls->ls_stub_ms.m_result = -EINPROGRESS;
3147 _remove_from_waiters(lkb);
3148 _receive_convert_reply(lkb, &ls->ls_stub_ms);
3149
3150 /* Same special case as in receive_rcom_lock_args() */
3151 lkb->lkb_grmode = DLM_LOCK_IV;
3152 rsb_set_flag(lkb->lkb_resource, RSB_RECOVER_CONVERT);
3153 unhold_lkb(lkb);
3154
3155 } else if (lkb->lkb_rqmode >= lkb->lkb_grmode) {
3156 lkb->lkb_flags |= DLM_IFL_RESEND;
3157 }
3158
3159 /* lkb->lkb_rqmode < lkb->lkb_grmode shouldn't happen since down
3160 conversions are async; there's no reply from the remote master */
3161}
3162
3163/* A waiting lkb needs recovery if the master node has failed, or
3164 the master node is changing (only when no directory is used) */
3165
3166static int waiter_needs_recovery(struct dlm_ls *ls, struct dlm_lkb *lkb)
3167{
3168 if (dlm_is_removed(ls, lkb->lkb_nodeid))
3169 return 1;
3170
3171 if (!dlm_no_directory(ls))
3172 return 0;
3173
3174 if (dlm_dir_nodeid(lkb->lkb_resource) != lkb->lkb_nodeid)
3175 return 1;
3176
3177 return 0;
3178}
3179
3180/* Recovery for locks that are waiting for replies from nodes that are now
3181 gone. We can just complete unlocks and cancels by faking a reply from the
3182 dead node. Requests and up-conversions we flag to be resent after
3183 recovery. Down-conversions can just be completed with a fake reply like
3184 unlocks. Conversions between PR and CW need special attention. */
3185
3186void dlm_recover_waiters_pre(struct dlm_ls *ls)
3187{
3188 struct dlm_lkb *lkb, *safe;
3189
90135925 3190 mutex_lock(&ls->ls_waiters_mutex);
e7fd4179
DT
3191
3192 list_for_each_entry_safe(lkb, safe, &ls->ls_waiters, lkb_wait_reply) {
3193 log_debug(ls, "pre recover waiter lkid %x type %d flags %x",
3194 lkb->lkb_id, lkb->lkb_wait_type, lkb->lkb_flags);
3195
3196 /* all outstanding lookups, regardless of destination will be
3197 resent after recovery is done */
3198
3199 if (lkb->lkb_wait_type == DLM_MSG_LOOKUP) {
3200 lkb->lkb_flags |= DLM_IFL_RESEND;
3201 continue;
3202 }
3203
3204 if (!waiter_needs_recovery(ls, lkb))
3205 continue;
3206
3207 switch (lkb->lkb_wait_type) {
3208
3209 case DLM_MSG_REQUEST:
3210 lkb->lkb_flags |= DLM_IFL_RESEND;
3211 break;
3212
3213 case DLM_MSG_CONVERT:
3214 recover_convert_waiter(ls, lkb);
3215 break;
3216
3217 case DLM_MSG_UNLOCK:
3218 hold_lkb(lkb);
3219 ls->ls_stub_ms.m_result = -DLM_EUNLOCK;
3220 _remove_from_waiters(lkb);
3221 _receive_unlock_reply(lkb, &ls->ls_stub_ms);
b3f58d8f 3222 dlm_put_lkb(lkb);
e7fd4179
DT
3223 break;
3224
3225 case DLM_MSG_CANCEL:
3226 hold_lkb(lkb);
3227 ls->ls_stub_ms.m_result = -DLM_ECANCEL;
3228 _remove_from_waiters(lkb);
3229 _receive_cancel_reply(lkb, &ls->ls_stub_ms);
b3f58d8f 3230 dlm_put_lkb(lkb);
e7fd4179
DT
3231 break;
3232
3233 default:
3234 log_error(ls, "invalid lkb wait_type %d",
3235 lkb->lkb_wait_type);
3236 }
81456807 3237 schedule();
e7fd4179 3238 }
90135925 3239 mutex_unlock(&ls->ls_waiters_mutex);
e7fd4179
DT
3240}
3241
3242static int remove_resend_waiter(struct dlm_ls *ls, struct dlm_lkb **lkb_ret)
3243{
3244 struct dlm_lkb *lkb;
3245 int rv = 0;
3246
90135925 3247 mutex_lock(&ls->ls_waiters_mutex);
e7fd4179
DT
3248 list_for_each_entry(lkb, &ls->ls_waiters, lkb_wait_reply) {
3249 if (lkb->lkb_flags & DLM_IFL_RESEND) {
3250 rv = lkb->lkb_wait_type;
3251 _remove_from_waiters(lkb);
3252 lkb->lkb_flags &= ~DLM_IFL_RESEND;
3253 break;
3254 }
3255 }
90135925 3256 mutex_unlock(&ls->ls_waiters_mutex);
e7fd4179
DT
3257
3258 if (!rv)
3259 lkb = NULL;
3260 *lkb_ret = lkb;
3261 return rv;
3262}
3263
3264/* Deal with lookups and lkb's marked RESEND from _pre. We may now be the
3265 master or dir-node for r. Processing the lkb may result in it being placed
3266 back on waiters. */
3267
3268int dlm_recover_waiters_post(struct dlm_ls *ls)
3269{
3270 struct dlm_lkb *lkb;
3271 struct dlm_rsb *r;
3272 int error = 0, mstype;
3273
3274 while (1) {
3275 if (dlm_locking_stopped(ls)) {
3276 log_debug(ls, "recover_waiters_post aborted");
3277 error = -EINTR;
3278 break;
3279 }
3280
3281 mstype = remove_resend_waiter(ls, &lkb);
3282 if (!mstype)
3283 break;
3284
3285 r = lkb->lkb_resource;
3286
3287 log_debug(ls, "recover_waiters_post %x type %d flags %x %s",
3288 lkb->lkb_id, mstype, lkb->lkb_flags, r->res_name);
3289
3290 switch (mstype) {
3291
3292 case DLM_MSG_LOOKUP:
3293 hold_rsb(r);
3294 lock_rsb(r);
3295 _request_lock(r, lkb);
3296 if (is_master(r))
3297 confirm_master(r, 0);
3298 unlock_rsb(r);
3299 put_rsb(r);
3300 break;
3301
3302 case DLM_MSG_REQUEST:
3303 hold_rsb(r);
3304 lock_rsb(r);
3305 _request_lock(r, lkb);
fa9f0e49
DT
3306 if (is_master(r))
3307 confirm_master(r, 0);
e7fd4179
DT
3308 unlock_rsb(r);
3309 put_rsb(r);
3310 break;
3311
3312 case DLM_MSG_CONVERT:
3313 hold_rsb(r);
3314 lock_rsb(r);
3315 _convert_lock(r, lkb);
3316 unlock_rsb(r);
3317 put_rsb(r);
3318 break;
3319
3320 default:
3321 log_error(ls, "recover_waiters_post type %d", mstype);
3322 }
3323 }
3324
3325 return error;
3326}
3327
3328static void purge_queue(struct dlm_rsb *r, struct list_head *queue,
3329 int (*test)(struct dlm_ls *ls, struct dlm_lkb *lkb))
3330{
3331 struct dlm_ls *ls = r->res_ls;
3332 struct dlm_lkb *lkb, *safe;
3333
3334 list_for_each_entry_safe(lkb, safe, queue, lkb_statequeue) {
3335 if (test(ls, lkb)) {
97a35d1e 3336 rsb_set_flag(r, RSB_LOCKS_PURGED);
e7fd4179
DT
3337 del_lkb(r, lkb);
3338 /* this put should free the lkb */
b3f58d8f 3339 if (!dlm_put_lkb(lkb))
e7fd4179
DT
3340 log_error(ls, "purged lkb not released");
3341 }
3342 }
3343}
3344
3345static int purge_dead_test(struct dlm_ls *ls, struct dlm_lkb *lkb)
3346{
3347 return (is_master_copy(lkb) && dlm_is_removed(ls, lkb->lkb_nodeid));
3348}
3349
3350static int purge_mstcpy_test(struct dlm_ls *ls, struct dlm_lkb *lkb)
3351{
3352 return is_master_copy(lkb);
3353}
3354
3355static void purge_dead_locks(struct dlm_rsb *r)
3356{
3357 purge_queue(r, &r->res_grantqueue, &purge_dead_test);
3358 purge_queue(r, &r->res_convertqueue, &purge_dead_test);
3359 purge_queue(r, &r->res_waitqueue, &purge_dead_test);
3360}
3361
3362void dlm_purge_mstcpy_locks(struct dlm_rsb *r)
3363{
3364 purge_queue(r, &r->res_grantqueue, &purge_mstcpy_test);
3365 purge_queue(r, &r->res_convertqueue, &purge_mstcpy_test);
3366 purge_queue(r, &r->res_waitqueue, &purge_mstcpy_test);
3367}
3368
3369/* Get rid of locks held by nodes that are gone. */
3370
3371int dlm_purge_locks(struct dlm_ls *ls)
3372{
3373 struct dlm_rsb *r;
3374
3375 log_debug(ls, "dlm_purge_locks");
3376
3377 down_write(&ls->ls_root_sem);
3378 list_for_each_entry(r, &ls->ls_root_list, res_root_list) {
3379 hold_rsb(r);
3380 lock_rsb(r);
3381 if (is_master(r))
3382 purge_dead_locks(r);
3383 unlock_rsb(r);
3384 unhold_rsb(r);
3385
3386 schedule();
3387 }
3388 up_write(&ls->ls_root_sem);
3389
3390 return 0;
3391}
3392
97a35d1e
DT
3393static struct dlm_rsb *find_purged_rsb(struct dlm_ls *ls, int bucket)
3394{
3395 struct dlm_rsb *r, *r_ret = NULL;
3396
3397 read_lock(&ls->ls_rsbtbl[bucket].lock);
3398 list_for_each_entry(r, &ls->ls_rsbtbl[bucket].list, res_hashchain) {
3399 if (!rsb_flag(r, RSB_LOCKS_PURGED))
3400 continue;
3401 hold_rsb(r);
3402 rsb_clear_flag(r, RSB_LOCKS_PURGED);
3403 r_ret = r;
3404 break;
3405 }
3406 read_unlock(&ls->ls_rsbtbl[bucket].lock);
3407 return r_ret;
3408}
3409
3410void dlm_grant_after_purge(struct dlm_ls *ls)
e7fd4179
DT
3411{
3412 struct dlm_rsb *r;
2b4e926a 3413 int bucket = 0;
e7fd4179 3414
2b4e926a
DT
3415 while (1) {
3416 r = find_purged_rsb(ls, bucket);
3417 if (!r) {
3418 if (bucket == ls->ls_rsbtbl_size - 1)
3419 break;
3420 bucket++;
97a35d1e 3421 continue;
2b4e926a 3422 }
97a35d1e
DT
3423 lock_rsb(r);
3424 if (is_master(r)) {
3425 grant_pending_locks(r);
3426 confirm_master(r, 0);
e7fd4179 3427 }
97a35d1e
DT
3428 unlock_rsb(r);
3429 put_rsb(r);
2b4e926a 3430 schedule();
e7fd4179 3431 }
e7fd4179
DT
3432}
3433
3434static struct dlm_lkb *search_remid_list(struct list_head *head, int nodeid,
3435 uint32_t remid)
3436{
3437 struct dlm_lkb *lkb;
3438
3439 list_for_each_entry(lkb, head, lkb_statequeue) {
3440 if (lkb->lkb_nodeid == nodeid && lkb->lkb_remid == remid)
3441 return lkb;
3442 }
3443 return NULL;
3444}
3445
3446static struct dlm_lkb *search_remid(struct dlm_rsb *r, int nodeid,
3447 uint32_t remid)
3448{
3449 struct dlm_lkb *lkb;
3450
3451 lkb = search_remid_list(&r->res_grantqueue, nodeid, remid);
3452 if (lkb)
3453 return lkb;
3454 lkb = search_remid_list(&r->res_convertqueue, nodeid, remid);
3455 if (lkb)
3456 return lkb;
3457 lkb = search_remid_list(&r->res_waitqueue, nodeid, remid);
3458 if (lkb)
3459 return lkb;
3460 return NULL;
3461}
3462
3463static int receive_rcom_lock_args(struct dlm_ls *ls, struct dlm_lkb *lkb,
3464 struct dlm_rsb *r, struct dlm_rcom *rc)
3465{
3466 struct rcom_lock *rl = (struct rcom_lock *) rc->rc_buf;
3467 int lvblen;
3468
3469 lkb->lkb_nodeid = rc->rc_header.h_nodeid;
3470 lkb->lkb_ownpid = rl->rl_ownpid;
3471 lkb->lkb_remid = rl->rl_lkid;
3472 lkb->lkb_exflags = rl->rl_exflags;
3473 lkb->lkb_flags = rl->rl_flags & 0x0000FFFF;
3474 lkb->lkb_flags |= DLM_IFL_MSTCPY;
3475 lkb->lkb_lvbseq = rl->rl_lvbseq;
3476 lkb->lkb_rqmode = rl->rl_rqmode;
3477 lkb->lkb_grmode = rl->rl_grmode;
3478 /* don't set lkb_status because add_lkb wants to itself */
3479
3480 lkb->lkb_bastaddr = (void *) (long) (rl->rl_asts & AST_BAST);
3481 lkb->lkb_astaddr = (void *) (long) (rl->rl_asts & AST_COMP);
3482
e7fd4179
DT
3483 if (lkb->lkb_exflags & DLM_LKF_VALBLK) {
3484 lkb->lkb_lvbptr = allocate_lvb(ls);
3485 if (!lkb->lkb_lvbptr)
3486 return -ENOMEM;
3487 lvblen = rc->rc_header.h_length - sizeof(struct dlm_rcom) -
3488 sizeof(struct rcom_lock);
3489 memcpy(lkb->lkb_lvbptr, rl->rl_lvb, lvblen);
3490 }
3491
3492 /* Conversions between PR and CW (middle modes) need special handling.
3493 The real granted mode of these converting locks cannot be determined
3494 until all locks have been rebuilt on the rsb (recover_conversion) */
3495
3496 if (rl->rl_wait_type == DLM_MSG_CONVERT && middle_conversion(lkb)) {
3497 rl->rl_status = DLM_LKSTS_CONVERT;
3498 lkb->lkb_grmode = DLM_LOCK_IV;
3499 rsb_set_flag(r, RSB_RECOVER_CONVERT);
3500 }
3501
3502 return 0;
3503}
3504
3505/* This lkb may have been recovered in a previous aborted recovery so we need
3506 to check if the rsb already has an lkb with the given remote nodeid/lkid.
3507 If so we just send back a standard reply. If not, we create a new lkb with
3508 the given values and send back our lkid. We send back our lkid by sending
3509 back the rcom_lock struct we got but with the remid field filled in. */
3510
3511int dlm_recover_master_copy(struct dlm_ls *ls, struct dlm_rcom *rc)
3512{
3513 struct rcom_lock *rl = (struct rcom_lock *) rc->rc_buf;
3514 struct dlm_rsb *r;
3515 struct dlm_lkb *lkb;
3516 int error;
3517
3518 if (rl->rl_parent_lkid) {
3519 error = -EOPNOTSUPP;
3520 goto out;
3521 }
3522
3523 error = find_rsb(ls, rl->rl_name, rl->rl_namelen, R_MASTER, &r);
3524 if (error)
3525 goto out;
3526
3527 lock_rsb(r);
3528
3529 lkb = search_remid(r, rc->rc_header.h_nodeid, rl->rl_lkid);
3530 if (lkb) {
3531 error = -EEXIST;
3532 goto out_remid;
3533 }
3534
3535 error = create_lkb(ls, &lkb);
3536 if (error)
3537 goto out_unlock;
3538
3539 error = receive_rcom_lock_args(ls, lkb, r, rc);
3540 if (error) {
b3f58d8f 3541 __put_lkb(ls, lkb);
e7fd4179
DT
3542 goto out_unlock;
3543 }
3544
3545 attach_lkb(r, lkb);
3546 add_lkb(r, lkb, rl->rl_status);
3547 error = 0;
3548
3549 out_remid:
3550 /* this is the new value returned to the lock holder for
3551 saving in its process-copy lkb */
3552 rl->rl_remid = lkb->lkb_id;
3553
3554 out_unlock:
3555 unlock_rsb(r);
3556 put_rsb(r);
3557 out:
3558 if (error)
3559 log_print("recover_master_copy %d %x", error, rl->rl_lkid);
3560 rl->rl_result = error;
3561 return error;
3562}
3563
3564int dlm_recover_process_copy(struct dlm_ls *ls, struct dlm_rcom *rc)
3565{
3566 struct rcom_lock *rl = (struct rcom_lock *) rc->rc_buf;
3567 struct dlm_rsb *r;
3568 struct dlm_lkb *lkb;
3569 int error;
3570
3571 error = find_lkb(ls, rl->rl_lkid, &lkb);
3572 if (error) {
3573 log_error(ls, "recover_process_copy no lkid %x", rl->rl_lkid);
3574 return error;
3575 }
3576
3577 DLM_ASSERT(is_process_copy(lkb), dlm_print_lkb(lkb););
3578
3579 error = rl->rl_result;
3580
3581 r = lkb->lkb_resource;
3582 hold_rsb(r);
3583 lock_rsb(r);
3584
3585 switch (error) {
dc200a88
DT
3586 case -EBADR:
3587 /* There's a chance the new master received our lock before
3588 dlm_recover_master_reply(), this wouldn't happen if we did
3589 a barrier between recover_masters and recover_locks. */
3590 log_debug(ls, "master copy not ready %x r %lx %s", lkb->lkb_id,
3591 (unsigned long)r, r->res_name);
3592 dlm_send_rcom_lock(r, lkb);
3593 goto out;
e7fd4179
DT
3594 case -EEXIST:
3595 log_debug(ls, "master copy exists %x", lkb->lkb_id);
3596 /* fall through */
3597 case 0:
3598 lkb->lkb_remid = rl->rl_remid;
3599 break;
3600 default:
3601 log_error(ls, "dlm_recover_process_copy unknown error %d %x",
3602 error, lkb->lkb_id);
3603 }
3604
3605 /* an ack for dlm_recover_locks() which waits for replies from
3606 all the locks it sends to new masters */
3607 dlm_recovered_lock(r);
dc200a88 3608 out:
e7fd4179
DT
3609 unlock_rsb(r);
3610 put_rsb(r);
b3f58d8f 3611 dlm_put_lkb(lkb);
e7fd4179
DT
3612
3613 return 0;
3614}
3615
597d0cae
DT
3616int dlm_user_request(struct dlm_ls *ls, struct dlm_user_args *ua,
3617 int mode, uint32_t flags, void *name, unsigned int namelen,
3618 uint32_t parent_lkid)
3619{
3620 struct dlm_lkb *lkb;
3621 struct dlm_args args;
3622 int error;
3623
3624 lock_recovery(ls);
3625
3626 error = create_lkb(ls, &lkb);
3627 if (error) {
3628 kfree(ua);
3629 goto out;
3630 }
3631
3632 if (flags & DLM_LKF_VALBLK) {
3633 ua->lksb.sb_lvbptr = kmalloc(DLM_USER_LVB_LEN, GFP_KERNEL);
3634 if (!ua->lksb.sb_lvbptr) {
3635 kfree(ua);
3636 __put_lkb(ls, lkb);
3637 error = -ENOMEM;
3638 goto out;
3639 }
3640 }
3641
3642 /* After ua is attached to lkb it will be freed by free_lkb().
3643 When DLM_IFL_USER is set, the dlm knows that this is a userspace
3644 lock and that lkb_astparam is the dlm_user_args structure. */
3645
3646 error = set_lock_args(mode, &ua->lksb, flags, namelen, parent_lkid,
32f105a1 3647 DLM_FAKE_USER_AST, ua, DLM_FAKE_USER_AST, &args);
597d0cae
DT
3648 lkb->lkb_flags |= DLM_IFL_USER;
3649 ua->old_mode = DLM_LOCK_IV;
3650
3651 if (error) {
3652 __put_lkb(ls, lkb);
3653 goto out;
3654 }
3655
3656 error = request_lock(ls, lkb, name, namelen, &args);
3657
3658 switch (error) {
3659 case 0:
3660 break;
3661 case -EINPROGRESS:
3662 error = 0;
3663 break;
3664 case -EAGAIN:
3665 error = 0;
3666 /* fall through */
3667 default:
3668 __put_lkb(ls, lkb);
3669 goto out;
3670 }
3671
3672 /* add this new lkb to the per-process list of locks */
3673 spin_lock(&ua->proc->locks_spin);
3674 kref_get(&lkb->lkb_ref);
3675 list_add_tail(&lkb->lkb_ownqueue, &ua->proc->locks);
3676 spin_unlock(&ua->proc->locks_spin);
3677 out:
3678 unlock_recovery(ls);
3679 return error;
3680}
3681
3682int dlm_user_convert(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
3683 int mode, uint32_t flags, uint32_t lkid, char *lvb_in)
3684{
3685 struct dlm_lkb *lkb;
3686 struct dlm_args args;
3687 struct dlm_user_args *ua;
3688 int error;
3689
3690 lock_recovery(ls);
3691
3692 error = find_lkb(ls, lkid, &lkb);
3693 if (error)
3694 goto out;
3695
3696 /* user can change the params on its lock when it converts it, or
3697 add an lvb that didn't exist before */
3698
3699 ua = (struct dlm_user_args *)lkb->lkb_astparam;
3700
3701 if (flags & DLM_LKF_VALBLK && !ua->lksb.sb_lvbptr) {
3702 ua->lksb.sb_lvbptr = kmalloc(DLM_USER_LVB_LEN, GFP_KERNEL);
3703 if (!ua->lksb.sb_lvbptr) {
3704 error = -ENOMEM;
3705 goto out_put;
3706 }
3707 }
3708 if (lvb_in && ua->lksb.sb_lvbptr)
3709 memcpy(ua->lksb.sb_lvbptr, lvb_in, DLM_USER_LVB_LEN);
3710
3711 ua->castparam = ua_tmp->castparam;
3712 ua->castaddr = ua_tmp->castaddr;
3713 ua->bastparam = ua_tmp->bastparam;
3714 ua->bastaddr = ua_tmp->bastaddr;
10948eb4 3715 ua->user_lksb = ua_tmp->user_lksb;
597d0cae
DT
3716 ua->old_mode = lkb->lkb_grmode;
3717
32f105a1
DT
3718 error = set_lock_args(mode, &ua->lksb, flags, 0, 0, DLM_FAKE_USER_AST,
3719 ua, DLM_FAKE_USER_AST, &args);
597d0cae
DT
3720 if (error)
3721 goto out_put;
3722
3723 error = convert_lock(ls, lkb, &args);
3724
3725 if (error == -EINPROGRESS || error == -EAGAIN)
3726 error = 0;
3727 out_put:
3728 dlm_put_lkb(lkb);
3729 out:
3730 unlock_recovery(ls);
3731 kfree(ua_tmp);
3732 return error;
3733}
3734
3735int dlm_user_unlock(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
3736 uint32_t flags, uint32_t lkid, char *lvb_in)
3737{
3738 struct dlm_lkb *lkb;
3739 struct dlm_args args;
3740 struct dlm_user_args *ua;
3741 int error;
3742
3743 lock_recovery(ls);
3744
3745 error = find_lkb(ls, lkid, &lkb);
3746 if (error)
3747 goto out;
3748
3749 ua = (struct dlm_user_args *)lkb->lkb_astparam;
3750
3751 if (lvb_in && ua->lksb.sb_lvbptr)
3752 memcpy(ua->lksb.sb_lvbptr, lvb_in, DLM_USER_LVB_LEN);
3753 ua->castparam = ua_tmp->castparam;
cc346d55 3754 ua->user_lksb = ua_tmp->user_lksb;
597d0cae
DT
3755
3756 error = set_unlock_args(flags, ua, &args);
3757 if (error)
3758 goto out_put;
3759
3760 error = unlock_lock(ls, lkb, &args);
3761
3762 if (error == -DLM_EUNLOCK)
3763 error = 0;
3764 if (error)
3765 goto out_put;
3766
3767 spin_lock(&ua->proc->locks_spin);
34e22bed 3768 list_del_init(&lkb->lkb_ownqueue);
597d0cae
DT
3769 spin_unlock(&ua->proc->locks_spin);
3770
3771 /* this removes the reference for the proc->locks list added by
3772 dlm_user_request */
3773 unhold_lkb(lkb);
3774 out_put:
3775 dlm_put_lkb(lkb);
3776 out:
3777 unlock_recovery(ls);
3778 return error;
3779}
3780
3781int dlm_user_cancel(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
3782 uint32_t flags, uint32_t lkid)
3783{
3784 struct dlm_lkb *lkb;
3785 struct dlm_args args;
3786 struct dlm_user_args *ua;
3787 int error;
3788
3789 lock_recovery(ls);
3790
3791 error = find_lkb(ls, lkid, &lkb);
3792 if (error)
3793 goto out;
3794
3795 ua = (struct dlm_user_args *)lkb->lkb_astparam;
3796 ua->castparam = ua_tmp->castparam;
c059f70e 3797 ua->user_lksb = ua_tmp->user_lksb;
597d0cae
DT
3798
3799 error = set_unlock_args(flags, ua, &args);
3800 if (error)
3801 goto out_put;
3802
3803 error = cancel_lock(ls, lkb, &args);
3804
3805 if (error == -DLM_ECANCEL)
3806 error = 0;
3807 if (error)
3808 goto out_put;
3809
3810 /* this lkb was removed from the WAITING queue */
3811 if (lkb->lkb_grmode == DLM_LOCK_IV) {
3812 spin_lock(&ua->proc->locks_spin);
34e22bed 3813 list_del_init(&lkb->lkb_ownqueue);
597d0cae
DT
3814 spin_unlock(&ua->proc->locks_spin);
3815 unhold_lkb(lkb);
3816 }
3817 out_put:
3818 dlm_put_lkb(lkb);
3819 out:
3820 unlock_recovery(ls);
3821 return error;
3822}
3823
3824static int orphan_proc_lock(struct dlm_ls *ls, struct dlm_lkb *lkb)
3825{
3826 struct dlm_user_args *ua = (struct dlm_user_args *)lkb->lkb_astparam;
3827
3828 if (ua->lksb.sb_lvbptr)
3829 kfree(ua->lksb.sb_lvbptr);
3830 kfree(ua);
3831 lkb->lkb_astparam = (long)NULL;
3832
3833 /* TODO: propogate to master if needed */
3834 return 0;
3835}
3836
3837/* The force flag allows the unlock to go ahead even if the lkb isn't granted.
3838 Regardless of what rsb queue the lock is on, it's removed and freed. */
3839
3840static int unlock_proc_lock(struct dlm_ls *ls, struct dlm_lkb *lkb)
3841{
3842 struct dlm_user_args *ua = (struct dlm_user_args *)lkb->lkb_astparam;
3843 struct dlm_args args;
3844 int error;
3845
3846 /* FIXME: we need to handle the case where the lkb is in limbo
3847 while the rsb is being looked up, currently we assert in
3848 _unlock_lock/is_remote because rsb nodeid is -1. */
3849
3850 set_unlock_args(DLM_LKF_FORCEUNLOCK, ua, &args);
3851
3852 error = unlock_lock(ls, lkb, &args);
3853 if (error == -DLM_EUNLOCK)
3854 error = 0;
3855 return error;
3856}
3857
3858/* The ls_clear_proc_locks mutex protects against dlm_user_add_asts() which
3859 1) references lkb->ua which we free here and 2) adds lkbs to proc->asts,
3860 which we clear here. */
3861
3862/* proc CLOSING flag is set so no more device_reads should look at proc->asts
3863 list, and no more device_writes should add lkb's to proc->locks list; so we
3864 shouldn't need to take asts_spin or locks_spin here. this assumes that
3865 device reads/writes/closes are serialized -- FIXME: we may need to serialize
3866 them ourself. */
3867
3868void dlm_clear_proc_locks(struct dlm_ls *ls, struct dlm_user_proc *proc)
3869{
3870 struct dlm_lkb *lkb, *safe;
3871
3872 lock_recovery(ls);
3873 mutex_lock(&ls->ls_clear_proc_locks);
3874
3875 list_for_each_entry_safe(lkb, safe, &proc->locks, lkb_ownqueue) {
3876 if (lkb->lkb_ast_type) {
3877 list_del(&lkb->lkb_astqueue);
3878 unhold_lkb(lkb);
3879 }
3880
34e22bed 3881 list_del_init(&lkb->lkb_ownqueue);
597d0cae
DT
3882
3883 if (lkb->lkb_exflags & DLM_LKF_PERSISTENT) {
3884 lkb->lkb_flags |= DLM_IFL_ORPHAN;
3885 orphan_proc_lock(ls, lkb);
3886 } else {
3887 lkb->lkb_flags |= DLM_IFL_DEAD;
3888 unlock_proc_lock(ls, lkb);
3889 }
3890
3891 /* this removes the reference for the proc->locks list
3892 added by dlm_user_request, it may result in the lkb
3893 being freed */
3894
3895 dlm_put_lkb(lkb);
3896 }
3897 mutex_unlock(&ls->ls_clear_proc_locks);
3898 unlock_recovery(ls);
3899}