]> git.proxmox.com Git - pve-cluster.git/blame - data/src/dcdb.c
remove cman, and use corosync.conf instead of cluster.conf
[pve-cluster.git] / data / src / dcdb.c
CommitLineData
fe000966
DM
1/*
2 Copyright (C) 2010 Proxmox Server Solutions GmbH
3
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU Affero General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU Affero General Public License for more details.
13
14 You should have received a copy of the GNU Affero General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>.
16
17 Author: Dietmar Maurer <dietmar@proxmox.com>
18
19*/
20
21#define G_LOG_DOMAIN "dcdb"
22
23#ifdef HAVE_CONFIG_H
24#include <config.h>
25#endif /* HAVE_CONFIG_H */
26
27#include <stdlib.h>
28#include <stdio.h>
29#include <string.h>
30#include <unistd.h>
31#include <glib.h>
32#include <sys/types.h>
33#include <sys/wait.h>
34#include <arpa/inet.h>
35#include <sys/epoll.h>
36#include <dirent.h>
37#include <errno.h>
38
39#include "cfs-utils.h"
40#include "loop.h"
41#include "dcdb.h"
42#include "status.h"
43
44typedef struct {
45 memdb_index_t *master;
46 memdb_index_t *idx;
47 GList *updates;
48} dcdb_sync_info_t;
49
50void
51dcdb_send_unlock(
52 dfsm_t *dfsm,
53 const char *path,
54 const guchar csum[32],
55 gboolean request)
56{
57 g_return_if_fail(dfsm != NULL);
58 g_return_if_fail(path != NULL);
59 g_return_if_fail(csum != NULL);
60
61 struct iovec iov[2];
62
63 iov[0].iov_base = (char *)csum;
64 iov[0].iov_len = 32;
65
66 iov[1].iov_base = (char *)path;
67 iov[1].iov_len = strlen(path) + 1;
68
69 if (!cfs_is_quorate())
70 return;
71
72 dcdb_message_t msg_type = request ?
73 DCDB_MESSAGE_CFS_UNLOCK_REQUEST : DCDB_MESSAGE_CFS_UNLOCK;
74
75 dfsm_send_message_sync(dfsm, msg_type, iov, 2, NULL);
76}
77
78static gboolean
79dcdb_parse_unlock_request(
80 const void *msg,
81 size_t msg_len,
82 const char **path,
83 const guchar **csum)
84
85{
86 g_return_val_if_fail(msg != NULL, FALSE);
87 g_return_val_if_fail(path != NULL, FALSE);
88 g_return_val_if_fail(csum != NULL, FALSE);
89
90 if (msg_len < 33) {
91 cfs_critical("received short unlock message (%lu < 33)", msg_len);
92 return FALSE;
93 }
94
95 *csum = msg;
96 msg += 32; msg_len -= 32;
97
98 *path = msg;
99 if ((*path)[msg_len - 1] != 0) {
100 cfs_critical("received mailformed unlock message - 'path' not terminated");
101 *path = NULL;
102 return FALSE;
103 }
104
105 return TRUE;
106}
107
108int
109dcdb_send_fuse_message(
110 dfsm_t *dfsm,
111 dcdb_message_t msg_type,
112 const char *path,
113 const char *to,
114 const char *buf,
115 guint32 size,
116 guint32 offset,
117 guint32 flags)
118{
119 struct iovec iov[8];
120
121 iov[0].iov_base = (char *)&size;
122 iov[0].iov_len = sizeof(size);
123
124 iov[1].iov_base = (char *)&offset;
125 iov[1].iov_len = sizeof(offset);
126
127 guint32 pathlen = path ? strlen(path) + 1 : 0;
128 iov[2].iov_base = (char *)&pathlen;
129 iov[2].iov_len = sizeof(pathlen);
130
131 guint32 tolen = to ? strlen(to) + 1 : 0;
132 iov[3].iov_base = (char *)&tolen;
133 iov[3].iov_len = sizeof(tolen);
134
135 iov[4].iov_base = (char *)&flags;
136 iov[4].iov_len = sizeof(flags);
137
138 iov[5].iov_base = (char *)path;
139 iov[5].iov_len = pathlen;
140
141 iov[6].iov_base = (char *)to;
142 iov[6].iov_len = tolen;
143
144 iov[7].iov_base = (char *)buf;
145 iov[7].iov_len = size;
146
147 dfsm_result_t rc;
148 memset(&rc, 0, sizeof(rc));
149 rc.result = -EBUSY;
150
151 if (!cfs_is_quorate())
152 return -EACCES;
153
154 if (dfsm_send_message_sync(dfsm, msg_type, iov, 8, &rc))
155 return rc.result;
156
157 return -EACCES;
158}
159
160static gboolean
161dcdb_parse_fuse_message(
162 const void *msg,
163 size_t msg_len,
164 const char **path,
165 const char **to,
166 const char **buf,
167 guint32 *size,
168 guint32 *offset,
169 guint32 *flags)
170
171{
172 g_return_val_if_fail(msg != NULL, FALSE);
173 g_return_val_if_fail(path != NULL, FALSE);
174 g_return_val_if_fail(to != NULL, FALSE);
175 g_return_val_if_fail(buf != NULL, FALSE);
176 g_return_val_if_fail(size != NULL, FALSE);
177 g_return_val_if_fail(offset != NULL, FALSE);
178 g_return_val_if_fail(flags != NULL, FALSE);
179
180 if (msg_len < 20) {
181 cfs_critical("received short fuse message (%lu < 20)", msg_len);
182 return FALSE;
183 }
184
185 *size = *((guint32 *)msg);
186 msg += 4; msg_len -= 4;
187
188 *offset = *((guint32 *)msg);
189 msg += 4; msg_len -= 4;
190
191 guint32 pathlen = *((guint32 *)msg);
192 msg += 4; msg_len -= 4;
193
194 guint32 tolen = *((guint32 *)msg);
195 msg += 4; msg_len -= 4;
196
197 *flags = *((guint32 *)msg);
198 msg += 4; msg_len -= 4;
199
200 if (msg_len != ((*size) + pathlen + tolen)) {
201 cfs_critical("received mailformed fuse message");
202 return FALSE;
203 }
204
205 *path = (char *)msg;
206 msg += pathlen; msg_len -= pathlen;
207
208 if (pathlen) {
209 if ((*path)[pathlen - 1] != 0) {
210 cfs_critical("received mailformed fuse message - 'path' not terminated");
211 *path = NULL;
212 return FALSE;
213 }
214 } else {
215 *path = NULL;
216 }
217
218 *to = (char *)msg;
219 msg += tolen; msg_len -= tolen;
220
221 if (tolen) {
222 if ((*to)[tolen - 1] != 0) {
223 cfs_critical("received mailformed fuse message - 'to' not terminated");
224 *to = NULL;
225 return FALSE;
226 }
227 } else {
228 *to = NULL;
229 }
230
231 *buf = (*size) ? msg : NULL;
232
233 return TRUE;
234}
235
236static gboolean
237dcdb_send_update_inode(
238 dfsm_t *dfsm,
239 memdb_tree_entry_t *te)
240{
241 g_return_val_if_fail(dfsm != NULL, FALSE);
242 g_return_val_if_fail(te != NULL, FALSE);
243
244 int len;
245 struct iovec iov[20];
246
247 uint32_t namelen = strlen(te->name) + 1;
248
249 iov[0].iov_base = (char *)&te->parent;
250 iov[0].iov_len = sizeof(te->parent);
251 iov[1].iov_base = (char *)&te->inode;
252 iov[1].iov_len = sizeof(te->inode);
253 iov[2].iov_base = (char *)&te->version;
254 iov[2].iov_len = sizeof(te->version);
255 iov[3].iov_base = (char *)&te->writer;
256 iov[3].iov_len = sizeof(te->writer);
257 iov[4].iov_base = (char *)&te->mtime;
258 iov[4].iov_len = sizeof(te->mtime);
259 iov[5].iov_base = (char *)&te->size;
260 iov[5].iov_len = sizeof(te->size);
261 iov[6].iov_base = (char *)&namelen;
262 iov[6].iov_len = sizeof(namelen);
263 iov[7].iov_base = (char *)&te->type;
264 iov[7].iov_len = sizeof(te->type);
265 iov[8].iov_base = (char *)te->name;
266 iov[8].iov_len = namelen;
267
268 len = 9;
269 if (te->type == DT_REG && te->size) {
270 iov[9].iov_base = (char *)te->data.value;
271 iov[9].iov_len = te->size;
272 len++;
273 }
274
275 if (dfsm_send_update(dfsm, iov, len) != CS_OK)
276 return FALSE;
277
278 return TRUE;
279}
280
281memdb_tree_entry_t *
282dcdb_parse_update_inode(
283 const void *msg,
284 size_t msg_len)
285{
286 if (msg_len < 40) {
287 cfs_critical("received short message (msg_len < 40)");
288 return NULL;
289 }
290
291 guint64 parent = *((guint64 *)msg);
292 msg += 8; msg_len -= 8;
293 guint64 inode = *((guint64 *)msg);
294 msg += 8; msg_len -= 8;
295 guint64 version = *((guint64 *)msg);
296 msg += 8; msg_len -= 8;
297
298 guint32 writer = *((guint32 *)msg);
299 msg += 4; msg_len -= 4;
300 guint32 mtime = *((guint32 *)msg);
301 msg += 4; msg_len -= 4;
302 guint32 size = *((guint32 *)msg);
303 msg += 4; msg_len -= 4;
304 guint32 namelen = *((guint32 *)msg);
305 msg += 4; msg_len -= 4;
306
307 char type = *((char *)msg);
308 msg += 1; msg_len -= 1;
309
310 if (!(type == DT_REG || type == DT_DIR)) {
311 cfs_critical("received mailformed message (unknown inode type %d)", type);
312 return NULL;
313 }
314
315 if (msg_len != (size + namelen)) {
316 cfs_critical("received mailformed message (msg_len != (size + namelen))");
317 return NULL;
318 }
319
320 char *name = (char *)msg;
321 msg += namelen; msg_len -= namelen;
322
323 const void *data = msg;
324
325 if (name[namelen - 1] != 0) {
326 cfs_critical("received mailformed message (name[namelen-1] != 0)");
327 return NULL;
328 }
329
330 memdb_tree_entry_t *te = memdb_tree_entry_new(name);
331 if (!te)
332 return NULL;
333
334 te->parent = parent;
335 te->version = version;
336 te->inode = inode;
337 te->writer = writer;
338 te->mtime = mtime;
339 te->size = size;
340 te->type = type;
341
342 if (te->type == DT_REG && te->size) {
343 te->data.value = g_memdup(data, te->size);
344 if (!te->data.value) {
345 memdb_tree_entry_free(te);
346 return NULL;
347 }
348 }
349
350 return te;
351}
352
353void
2113d031 354dcdb_sync_corosync_conf(
fe000966 355 memdb_t *memdb,
2113d031 356 gboolean notify_corosync)
fe000966
DM
357{
358 g_return_if_fail(memdb != NULL);
359
360 int len;
361 gpointer data = NULL;
362
2113d031 363 len = memdb_read(memdb, "corosync.conf", &data);
fe000966
DM
364 if (len <= 0)
365 return;
366
367 guint64 new_version = cluster_config_version(data, len);
2113d031
DM
368 if (!new_version) {
369 cfs_critical("unable to parse cluster config_version");
fe000966 370 return;
2113d031 371 }
fe000966
DM
372
373 char *old_data = NULL;
374 gsize old_length = 0;
375 guint64 old_version = 0;
376
377 GError *err = NULL;
378 if (!g_file_get_contents(HOST_CLUSTER_CONF_FN, &old_data, &old_length, &err)) {
f6efd6bf
DM
379 if (!g_error_matches(err, G_FILE_ERROR, G_FILE_ERROR_NOENT)) {
380 cfs_critical("unable to read cluster config file '%s' - %s",
381 HOST_CLUSTER_CONF_FN, err->message);
382 }
fe000966
DM
383 g_error_free (err);
384 } else {
385 if (old_length)
386 old_version = cluster_config_version(old_data, old_length);
387 }
388
389 /* test if something changed - return if no changes */
390 if (data && old_data && (old_length == len) &&
391 !memcmp(data, old_data, len))
392 goto ret;
393
394 if (new_version < old_version) {
2113d031 395 cfs_critical("local corosync.conf is newer");
fe000966
DM
396 goto ret;
397 }
398
2113d031 399 if (!atomic_write_file(HOST_CLUSTER_CONF_FN, data, len, 0644, 0))
fe000966
DM
400 goto ret;
401
2113d031
DM
402 cfs_message("wrote new corosync config '%s' (version = %zd)",
403 HOST_CLUSTER_CONF_FN, new_version);
404
405 if (notify_corosync && old_version) {
406 /* tell corosync that there is a new config file */
407 cfs_debug ("run corosync-cfgtool -R");
408 int status = system("corosync-cfgtool -R >/dev/null 2>&1");
fe000966 409 if (WIFEXITED(status) && WEXITSTATUS(status) != 0) {
2113d031 410 cfs_critical("corosync-cfgtool -R failed with exit code %d\n", WEXITSTATUS(status));
fe000966 411 }
2113d031 412 cfs_debug ("end corosync-cfgtool -R");
fe000966
DM
413 }
414
415ret:
416
417 if (data)
418 g_free(data);
419
420 if (old_data)
421 g_free(old_data);
422}
423
424static gpointer
425dcdb_get_state(
426 dfsm_t *dfsm,
427 gpointer data,
428 unsigned int *res_len)
429{
430 g_return_val_if_fail(dfsm != NULL, FALSE);
431 g_return_val_if_fail(data != NULL, FALSE);
432
433 memdb_t *memdb = (memdb_t *)data;
434
435 g_return_val_if_fail(memdb->root != NULL, FALSE);
fe000966
DM
436
437 cfs_debug("enter %s %016zX %08X", __func__, memdb->root->version, memdb->root->mtime);
438
89fde9ac 439 g_mutex_lock (&memdb->mutex);
fe000966 440 memdb_index_t *idx = memdb_encode_index(memdb->index, memdb->root);
89fde9ac 441 g_mutex_unlock (&memdb->mutex);
fe000966
DM
442
443 if (idx) {
444 *res_len = idx->bytes;
445 }
446
447 return idx;
448}
449
450static int
451dcdb_select_leader(
452 int node_count,
453 memdb_index_t *idx[])
454{
455 g_return_val_if_fail(idx != NULL, -1);
456
457 cfs_debug("enter %s", __func__);
458
459 int leader = -1;
460
461 /* try select most actual data - compare 'version' an 'time of last write'
462 * NOTE: syncinfo members are sorted
463 */
464 for (int i = 0; i < node_count; i++) {
465 if (leader < 0) {
466 leader = i;
467 } else {
468 memdb_index_t *leaderidx = idx[leader];
469
470 if (idx[i]->version == leaderidx->version &&
471 idx[i]->mtime > leaderidx->mtime) {
472 leader = i;
473 } else if (idx[i]->version > leaderidx->version) {
474 leader = i;
475 }
476 }
477 }
478
479 cfs_debug ("leave %s (%d)", __func__, leader);
480
481 return leader;
482}
483
484static gboolean
485dcdb_create_and_send_updates(
486 dfsm_t *dfsm,
487 memdb_t *memdb,
488 memdb_index_t *master,
489 int node_count,
490 memdb_index_t *idx[])
491{
492 g_return_val_if_fail(dfsm != NULL, FALSE);
493 g_return_val_if_fail(memdb != NULL, FALSE);
fe000966
DM
494 g_return_val_if_fail(master != NULL, FALSE);
495
496 cfs_debug("enter %s", __func__);
497
498 gboolean res = FALSE;
499
500 GHashTable *updates = g_hash_table_new(g_int64_hash, g_int64_equal);
501 if (!updates)
502 goto ret;
503
89fde9ac 504 g_mutex_lock (&memdb->mutex);
fe000966
DM
505
506 for (int n = 0; n < node_count; n++) {
507 memdb_index_t *slave = idx[n];
508
509 if (slave == master)
510 continue;
511
512 int j = 0;
513
514 for (int i = 0; i < master->size; i++) {
515 guint64 inode = master->entries[i].inode;
516 while (j < slave->size && slave->entries[j].inode < inode)
517 j++;
518
519 if (memcmp(&slave->entries[j], &master->entries[i],
520 sizeof(memdb_index_extry_t)) == 0) {
521 continue;
522 }
523
524 if (g_hash_table_lookup(updates, &inode))
525 continue;
526
527 cfs_debug("found different inode %d %016zX", i, inode);
528
529 memdb_tree_entry_t *te, *cpy;
530
531 if (!(te = g_hash_table_lookup(memdb->index, &inode))) {
532 cfs_critical("can get inode data for inode %016zX", inode);
533 goto ret;
534 }
535
536 cpy = memdb_tree_entry_copy(te, 1);
537 g_hash_table_replace(updates, &cpy->inode, cpy);
538 }
539 }
540
89fde9ac 541 g_mutex_unlock (&memdb->mutex);
fe000966
DM
542
543 /* send updates */
544
545 GHashTableIter iter;
546 gpointer key, value;
547 int count = 0;
548
549 cfs_message("start sending inode updates");
550
551 g_hash_table_iter_init (&iter, updates);
552 while (g_hash_table_iter_next (&iter, &key, &value)) {
553 memdb_tree_entry_t *te = (memdb_tree_entry_t *)value;
554 count++;
555
556 if (!dcdb_send_update_inode(dfsm, te)) {
557 /* tolerate error here */
558 cfs_critical("sending update inode failed %016zX", te->inode);
559 } else {
560 cfs_debug("sent update inode %016zX", te->inode);
561 }
562
563 memdb_tree_entry_free(te);
564 }
565
566 cfs_message("sent all (%d) updates", count);
567
568 if (dfsm_send_update_complete(dfsm) != CS_OK) {
569 cfs_critical("failed to send UPDATE_COMPLETE message");
570 goto ret;
571 }
572
573 res = TRUE;
574
575 ret:
576 if (updates)
577 g_hash_table_destroy(updates);
578
579 cfs_debug("leave %s (%d)", __func__, res);
580
581 return res;
582}
583
584static int
585dcdb_process_state_update(
586 dfsm_t *dfsm,
587 gpointer data,
588 dfsm_sync_info_t *syncinfo)
589{
590 g_return_val_if_fail(dfsm != NULL, -1);
591 g_return_val_if_fail(data != NULL, -1);
592 g_return_val_if_fail(syncinfo != NULL, -1);
593
594 memdb_t *memdb = (memdb_t *)data;
595
596 cfs_debug("enter %s", __func__);
597
598 dcdb_sync_info_t *localsi = g_new0(dcdb_sync_info_t, 1);
599 if (!localsi)
600 return -1;
601
602 syncinfo->data = localsi;
603
604 memdb_index_t *idx[syncinfo->node_count];
605
606 for (int i = 0; i < syncinfo->node_count; i++) {
607 dfsm_node_info_t *ni = &syncinfo->nodes[i];
608
609 if (ni->state_len < sizeof(memdb_index_t)) {
610 cfs_critical("received short memdb index (len < sizeof(memdb_index_t))");
611 return -1;
612 }
613
614 idx[i] = (memdb_index_t *)ni->state;
615
616 if (ni->state_len != idx[i]->bytes) {
617 cfs_critical("received mailformed memdb index (len != idx->bytes)");
618 return -1;
619 }
620 }
621
622 /* select leader - set mode */
623 int leader = dcdb_select_leader(syncinfo->node_count, idx);
624 if (leader < 0) {
625 cfs_critical("unable to select leader failed");
626 return -1;
627 }
628
629 cfs_message("leader is %d/%d", syncinfo->nodes[leader].nodeid, syncinfo->nodes[leader].pid);
630
631 memdb_index_t *leaderidx = idx[leader];
632 localsi->master = leaderidx;
633
634 GString *str = g_string_new("synced members:");
635 g_string_append_printf(str, " %d/%d", syncinfo->nodes[leader].nodeid, syncinfo->nodes[leader].pid);
636
637 for (int i = 0; i < syncinfo->node_count; i++) {
638 dfsm_node_info_t *ni = &syncinfo->nodes[i];
639 if (i == leader) {
640 ni->synced = 1;
641 } else {
642 if (leaderidx->bytes == idx[i]->bytes &&
643 memcmp(leaderidx, idx[i], leaderidx->bytes) == 0) {
644 ni->synced = 1;
645 g_string_append_printf(str, ", %d/%d", ni->nodeid, ni->pid);
646 }
647 }
648 if (dfsm_nodeid_is_local(dfsm, ni->nodeid, ni->pid))
649 localsi->idx = idx[i];
650 }
651 cfs_message(str->str);
652 g_string_free(str, 1);
653
654 /* send update */
655 if (dfsm_nodeid_is_local(dfsm, syncinfo->nodes[leader].nodeid, syncinfo->nodes[leader].pid)) {
656 if (!dcdb_create_and_send_updates(dfsm, memdb, leaderidx, syncinfo->node_count, idx))
657 return -1;
658 }
659
660 return 0;
661}
662
663static int
664dcdb_process_update(
665 dfsm_t *dfsm,
666 gpointer data,
667 dfsm_sync_info_t *syncinfo,
668 uint32_t nodeid,
669 uint32_t pid,
670 const void *msg,
671 size_t msg_len)
672{
673 g_return_val_if_fail(dfsm != NULL, -1);
674 g_return_val_if_fail(data != NULL, -1);
675 g_return_val_if_fail(msg != NULL, -1);
676 g_return_val_if_fail(syncinfo != NULL, -1);
677 g_return_val_if_fail(syncinfo->data != NULL, -1);
678
679 cfs_debug("enter %s", __func__);
680
681 memdb_tree_entry_t *te;
682
683 if (!(te = dcdb_parse_update_inode(msg, msg_len)))
684 return -1;
685
686 cfs_debug("received inode update %016zX from node %d",
687 te->inode, nodeid);
688
689 dcdb_sync_info_t *localsi = (dcdb_sync_info_t *)syncinfo->data;
690
691 localsi->updates = g_list_append(localsi->updates, te);
692
693 return 0;
694}
695
696static int
697dcdb_commit(
698 dfsm_t *dfsm,
699 gpointer data,
700 dfsm_sync_info_t *syncinfo)
701{
702 g_return_val_if_fail(dfsm != NULL, -1);
703 g_return_val_if_fail(data != NULL, -1);
704 g_return_val_if_fail(syncinfo != NULL, -1);
705 g_return_val_if_fail(syncinfo->data != NULL, -1);
706
707 memdb_t *memdb = (memdb_t *)data;
708
709 cfs_debug("enter %s", __func__);
710
711 dcdb_sync_info_t *localsi = (dcdb_sync_info_t *)syncinfo->data;
712
713 guint count = g_list_length(localsi->updates);
714
715 cfs_message("update complete - trying to commit (got %u inode updates)", count);
716
717 if (!bdb_backend_commit_update(memdb, localsi->master, localsi->idx, localsi->updates))
718 return -1;
719
2113d031 720 dcdb_sync_corosync_conf(memdb, FALSE);
fe000966
DM
721
722 return 0;
723}
724
725static int
726dcdb_cleanup(
727 dfsm_t *dfsm,
728 gpointer data,
729 dfsm_sync_info_t *syncinfo)
730{
731 g_return_val_if_fail(dfsm != NULL, -1);
732 g_return_val_if_fail(data != NULL, -1);
733 g_return_val_if_fail(syncinfo != NULL, -1);
734 g_return_val_if_fail(syncinfo->data != NULL, -1);
735
736 cfs_debug("enter %s", __func__);
737
738 dcdb_sync_info_t *localsi = (dcdb_sync_info_t *)syncinfo->data;
739
740 GList *iter = localsi->updates;
741 while (iter) {
742 memdb_tree_entry_t *te = (memdb_tree_entry_t *)iter->data;
743 memdb_tree_entry_free(te);
744 iter = g_list_next(iter);
745 }
746 g_list_free(localsi->updates);
747
748 g_free(localsi);
749
750 return 0;
751}
752
753gboolean
754dcdb_checksum(
755 dfsm_t *dfsm,
756 gpointer data,
757 unsigned char *csum,
758 size_t csum_len)
759{
760 g_return_val_if_fail(dfsm != NULL, FALSE);
761 g_return_val_if_fail(csum != NULL, FALSE);
762
763 memdb_t *memdb = (memdb_t *)data;
764
765 g_return_val_if_fail(memdb != NULL, FALSE);
fe000966
DM
766
767 cfs_debug("enter %s %016zX %08X", __func__, memdb->root->version, memdb->root->mtime);
768
89fde9ac 769 g_mutex_lock (&memdb->mutex);
fe000966 770 gboolean res = memdb_compute_checksum(memdb->index, memdb->root, csum, csum_len);
89fde9ac 771 g_mutex_unlock (&memdb->mutex);
fe000966
DM
772
773 cfs_debug("leave %s %016zX (%d)", __func__, *(uint64_t *)csum, res);
774
775 return res;
776}
777
778static int
779dcdb_deliver(
780 dfsm_t *dfsm,
781 gpointer data,
782 int *res_ptr,
783 uint32_t nodeid,
784 uint32_t pid,
785 uint16_t msg_type,
786 uint32_t msg_time,
787 const void *msg,
788 size_t msg_len)
789{
790 g_return_val_if_fail(dfsm != NULL, -1);
791 g_return_val_if_fail(msg != NULL, -1);
792
793 memdb_t *memdb = (memdb_t *)data;
794
795 g_return_val_if_fail(memdb != NULL, -1);
796 g_return_val_if_fail(res_ptr != NULL, -1);
797
798 int res = 1;
799
800 int msg_result = -ENOTSUP;
801
802 if (!DCDB_VALID_MESSAGE_TYPE(msg_type))
803 goto unknown;
804
805 cfs_debug("process message %d (length = %ld)", msg_type, msg_len);
806
807 if (!cfs_is_quorate()) {
808 cfs_critical("received write while not quorate - trigger resync");
809 msg_result = -EACCES;
810 goto leave;
811 }
812
813 const char *path, *to, *buf;
814 guint32 size, offset, flags;
815 const guchar *csum;
816
817 if (msg_type == DCDB_MESSAGE_CFS_UNLOCK_REQUEST ||
818 msg_type == DCDB_MESSAGE_CFS_UNLOCK) {
819 msg_result = 0; /* ignored anyways */
820
821 if (!dcdb_parse_unlock_request(msg, msg_len, &path, &csum))
822 goto leave;
823
824 guchar cur_csum[32];
825 memdb_tree_entry_t *te = memdb_getattr(memdb, path);
826
827 if (te && te->type == DT_DIR &&
828 path_is_lockdir(path) && memdb_tree_entry_csum(te, cur_csum) &&
829 (memcmp(csum, cur_csum, 32) == 0)) {
830
831 if (msg_type == DCDB_MESSAGE_CFS_UNLOCK) {
832
833 cfs_debug("got valid unlock message");
834
835 msg_result = memdb_delete(memdb, path, nodeid, msg_time);
836
837 } else if (dfsm_lowest_nodeid(dfsm)) {
838
839 cfs_debug("got valid unlock request message");
840
841 if (memdb_lock_expired(memdb, path, csum)) {
842 cfs_debug("sending unlock message");
843 dcdb_send_unlock(dfsm, path, csum, FALSE);
844 }
845 }
846 }
847
848 } else if (msg_type == DCDB_MESSAGE_CFS_WRITE) {
849
850 if (!dcdb_parse_fuse_message(msg, msg_len, &path, &to, &buf,
851 &size, &offset, &flags))
852 goto leave;
853
854 msg_result = memdb_write(memdb, path, nodeid, msg_time,
855 buf, size, offset, flags);
856
2113d031
DM
857 if ((msg_result >= 0) && !strcmp(path, "corosync.conf"))
858 dcdb_sync_corosync_conf(memdb, dfsm_nodeid_is_local(dfsm, nodeid, pid));
fe000966
DM
859
860 } else if (msg_type == DCDB_MESSAGE_CFS_CREATE) {
861
862 if (!dcdb_parse_fuse_message(msg, msg_len, &path, &to, &buf,
863 &size, &offset, &flags))
864 goto leave;
865
866 msg_result = memdb_create(memdb, path, nodeid, msg_time);
867
2113d031
DM
868 if ((msg_result >= 0) && !strcmp(path, "corosync.conf"))
869 dcdb_sync_corosync_conf(memdb, dfsm_nodeid_is_local(dfsm, nodeid, pid));
fe000966
DM
870
871 } else if (msg_type == DCDB_MESSAGE_CFS_MKDIR) {
872
873 if (!dcdb_parse_fuse_message(msg, msg_len, &path, &to, &buf,
874 &size, &offset, &flags))
875 goto leave;
876
877 msg_result = memdb_mkdir(memdb, path, nodeid, msg_time);
878
879 } else if (msg_type == DCDB_MESSAGE_CFS_DELETE) {
880
881 if (!dcdb_parse_fuse_message(msg, msg_len, &path, &to, &buf,
882 &size, &offset, &flags))
883 goto leave;
884
885 msg_result = memdb_delete(memdb, path, nodeid, msg_time);
886
887 } else if (msg_type == DCDB_MESSAGE_CFS_RENAME) {
888
889 if (!dcdb_parse_fuse_message(msg, msg_len, &path, &to, &buf,
890 &size, &offset, &flags))
891 goto leave;
892
893 msg_result = memdb_rename(memdb, path, to, nodeid, msg_time);
894
2113d031
DM
895 if ((msg_result >= 0) && !strcmp(to, "corosync.conf"))
896 dcdb_sync_corosync_conf(memdb, dfsm_nodeid_is_local(dfsm, nodeid, pid));
fe000966
DM
897
898 } else if (msg_type == DCDB_MESSAGE_CFS_MTIME) {
899
900 if (!dcdb_parse_fuse_message(msg, msg_len, &path, &to, &buf,
901 &size, &offset, &flags))
902 goto leave;
903
904 /* Note: mtime is sent via offset field */
905 msg_result = memdb_mtime(memdb, path, nodeid, offset);
906
907 } else {
908 goto unknown;
909 }
910
911 *res_ptr = msg_result;
912ret:
913 if (memdb->errors) {
914 dfsm_set_errormode(dfsm);
915 res = -1;
916 }
917
918 cfs_debug("leave %s (%d)", __func__, res);
919
920 return res;
921leave:
922 res = -1;
923 goto ret;
924
925unknown:
926 cfs_critical("received unknown message type (msg_type == %d)", msg_type);
927 goto leave;
928};
929
930static dfsm_callbacks_t dcdb_dfsm_callbacks = {
931 .dfsm_deliver_fn = dcdb_deliver,
932 .dfsm_get_state_fn = dcdb_get_state,
933 .dfsm_process_state_update_fn = dcdb_process_state_update,
934 .dfsm_process_update_fn = dcdb_process_update,
935 .dfsm_commit_fn = dcdb_commit,
936 .dfsm_cleanup_fn = dcdb_cleanup,
937 .dfsm_checksum_fn = dcdb_checksum,
938};
939
940dfsm_t *dcdb_new(memdb_t *memdb)
941{
942 g_return_val_if_fail(memdb != NULL, NULL);
943
944 return dfsm_new(memdb, DCDB_CPG_GROUP_NAME, G_LOG_DOMAIN,
945 DCDB_PROTOCOL_VERSION, &dcdb_dfsm_callbacks);
946}