]> git.proxmox.com Git - pve-cluster.git/blame - data/src/dcdb.c
buildsys: add sbuild convenience target
[pve-cluster.git] / data / src / dcdb.c
CommitLineData
fe000966 1/*
84c98315 2 Copyright (C) 2010 - 2020 Proxmox Server Solutions GmbH
fe000966
DM
3
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU Affero General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU Affero General Public License for more details.
13
14 You should have received a copy of the GNU Affero General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>.
16
17 Author: Dietmar Maurer <dietmar@proxmox.com>
18
19*/
20
21#define G_LOG_DOMAIN "dcdb"
22
23#ifdef HAVE_CONFIG_H
24#include <config.h>
25#endif /* HAVE_CONFIG_H */
26
27#include <stdlib.h>
28#include <stdio.h>
e5a5a3ea 29#include <inttypes.h>
fe000966
DM
30#include <string.h>
31#include <unistd.h>
32#include <glib.h>
33#include <sys/types.h>
34#include <sys/wait.h>
35#include <arpa/inet.h>
36#include <sys/epoll.h>
37#include <dirent.h>
38#include <errno.h>
39
40#include "cfs-utils.h"
41#include "loop.h"
42#include "dcdb.h"
43#include "status.h"
44
45typedef struct {
46 memdb_index_t *master;
47 memdb_index_t *idx;
48 GList *updates;
49} dcdb_sync_info_t;
50
51void
52dcdb_send_unlock(
53 dfsm_t *dfsm,
54 const char *path,
55 const guchar csum[32],
56 gboolean request)
57{
58 g_return_if_fail(dfsm != NULL);
59 g_return_if_fail(path != NULL);
60 g_return_if_fail(csum != NULL);
61
62 struct iovec iov[2];
63
64 iov[0].iov_base = (char *)csum;
65 iov[0].iov_len = 32;
66
67 iov[1].iov_base = (char *)path;
68 iov[1].iov_len = strlen(path) + 1;
69
70 if (!cfs_is_quorate())
71 return;
72
73 dcdb_message_t msg_type = request ?
74 DCDB_MESSAGE_CFS_UNLOCK_REQUEST : DCDB_MESSAGE_CFS_UNLOCK;
75
76 dfsm_send_message_sync(dfsm, msg_type, iov, 2, NULL);
77}
78
79static gboolean
80dcdb_parse_unlock_request(
81 const void *msg,
82 size_t msg_len,
83 const char **path,
84 const guchar **csum)
85
86{
87 g_return_val_if_fail(msg != NULL, FALSE);
88 g_return_val_if_fail(path != NULL, FALSE);
89 g_return_val_if_fail(csum != NULL, FALSE);
90
91 if (msg_len < 33) {
e5a5a3ea 92 cfs_critical("received short unlock message (%zu < 33)", msg_len);
fe000966
DM
93 return FALSE;
94 }
95
7b6f5ef3 96 char *msg_str = (char *) msg;
fe000966 97
7b6f5ef3
TL
98 *csum = (guchar *) msg_str;
99 msg_str += 32; msg_len -= 32;
100
101 *path = msg_str;
fe000966
DM
102 if ((*path)[msg_len - 1] != 0) {
103 cfs_critical("received mailformed unlock message - 'path' not terminated");
104 *path = NULL;
105 return FALSE;
106 }
107
108 return TRUE;
109}
110
111int
112dcdb_send_fuse_message(
113 dfsm_t *dfsm,
114 dcdb_message_t msg_type,
115 const char *path,
116 const char *to,
117 const char *buf,
118 guint32 size,
119 guint32 offset,
120 guint32 flags)
121{
122 struct iovec iov[8];
123
124 iov[0].iov_base = (char *)&size;
125 iov[0].iov_len = sizeof(size);
126
127 iov[1].iov_base = (char *)&offset;
128 iov[1].iov_len = sizeof(offset);
129
130 guint32 pathlen = path ? strlen(path) + 1 : 0;
131 iov[2].iov_base = (char *)&pathlen;
132 iov[2].iov_len = sizeof(pathlen);
133
134 guint32 tolen = to ? strlen(to) + 1 : 0;
135 iov[3].iov_base = (char *)&tolen;
136 iov[3].iov_len = sizeof(tolen);
137
138 iov[4].iov_base = (char *)&flags;
139 iov[4].iov_len = sizeof(flags);
140
141 iov[5].iov_base = (char *)path;
142 iov[5].iov_len = pathlen;
143
144 iov[6].iov_base = (char *)to;
145 iov[6].iov_len = tolen;
146
147 iov[7].iov_base = (char *)buf;
148 iov[7].iov_len = size;
149
150 dfsm_result_t rc;
151 memset(&rc, 0, sizeof(rc));
152 rc.result = -EBUSY;
153
154 if (!cfs_is_quorate())
155 return -EACCES;
156
157 if (dfsm_send_message_sync(dfsm, msg_type, iov, 8, &rc))
158 return rc.result;
159
160 return -EACCES;
161}
162
163static gboolean
164dcdb_parse_fuse_message(
165 const void *msg,
166 size_t msg_len,
167 const char **path,
168 const char **to,
169 const char **buf,
170 guint32 *size,
171 guint32 *offset,
172 guint32 *flags)
173
174{
175 g_return_val_if_fail(msg != NULL, FALSE);
176 g_return_val_if_fail(path != NULL, FALSE);
177 g_return_val_if_fail(to != NULL, FALSE);
178 g_return_val_if_fail(buf != NULL, FALSE);
179 g_return_val_if_fail(size != NULL, FALSE);
180 g_return_val_if_fail(offset != NULL, FALSE);
181 g_return_val_if_fail(flags != NULL, FALSE);
182
183 if (msg_len < 20) {
e5a5a3ea 184 cfs_critical("received short fuse message (%zu < 20)", msg_len);
fe000966
DM
185 return FALSE;
186 }
187
c0547321 188 const uint8_t *msg_ptr = msg;
fe000966 189
926f961f
TL
190 *size = *((guint32 *)msg_ptr);
191 msg_ptr += 4; msg_len -= 4;
fe000966 192
926f961f
TL
193 *offset = *((guint32 *)msg_ptr);
194 msg_ptr += 4; msg_len -= 4;
fe000966 195
926f961f
TL
196 guint32 pathlen = *((guint32 *)msg_ptr);
197 msg_ptr += 4; msg_len -= 4;
fe000966 198
926f961f
TL
199 guint32 tolen = *((guint32 *)msg_ptr);
200 msg_ptr += 4; msg_len -= 4;
201
202 *flags = *((guint32 *)msg_ptr);
203 msg_ptr += 4; msg_len -= 4;
fe000966
DM
204
205 if (msg_len != ((*size) + pathlen + tolen)) {
206 cfs_critical("received mailformed fuse message");
207 return FALSE;
208 }
209
926f961f
TL
210 *path = (char *)msg_ptr;
211 msg_ptr += pathlen; msg_len -= pathlen;
fe000966
DM
212
213 if (pathlen) {
214 if ((*path)[pathlen - 1] != 0) {
215 cfs_critical("received mailformed fuse message - 'path' not terminated");
216 *path = NULL;
217 return FALSE;
218 }
219 } else {
220 *path = NULL;
221 }
222
926f961f
TL
223 *to = (char *)msg_ptr;
224 msg_ptr += tolen; msg_len -= tolen;
fe000966
DM
225
226 if (tolen) {
227 if ((*to)[tolen - 1] != 0) {
228 cfs_critical("received mailformed fuse message - 'to' not terminated");
229 *to = NULL;
230 return FALSE;
231 }
232 } else {
233 *to = NULL;
234 }
235
c0547321 236 *buf = (*size) ? (const char*)msg_ptr : NULL;
fe000966
DM
237
238 return TRUE;
239}
240
241static gboolean
242dcdb_send_update_inode(
243 dfsm_t *dfsm,
244 memdb_tree_entry_t *te)
245{
246 g_return_val_if_fail(dfsm != NULL, FALSE);
247 g_return_val_if_fail(te != NULL, FALSE);
248
249 int len;
250 struct iovec iov[20];
251
252 uint32_t namelen = strlen(te->name) + 1;
253
254 iov[0].iov_base = (char *)&te->parent;
255 iov[0].iov_len = sizeof(te->parent);
256 iov[1].iov_base = (char *)&te->inode;
257 iov[1].iov_len = sizeof(te->inode);
258 iov[2].iov_base = (char *)&te->version;
259 iov[2].iov_len = sizeof(te->version);
260 iov[3].iov_base = (char *)&te->writer;
261 iov[3].iov_len = sizeof(te->writer);
262 iov[4].iov_base = (char *)&te->mtime;
263 iov[4].iov_len = sizeof(te->mtime);
264 iov[5].iov_base = (char *)&te->size;
265 iov[5].iov_len = sizeof(te->size);
266 iov[6].iov_base = (char *)&namelen;
267 iov[6].iov_len = sizeof(namelen);
268 iov[7].iov_base = (char *)&te->type;
269 iov[7].iov_len = sizeof(te->type);
270 iov[8].iov_base = (char *)te->name;
271 iov[8].iov_len = namelen;
272
273 len = 9;
274 if (te->type == DT_REG && te->size) {
275 iov[9].iov_base = (char *)te->data.value;
276 iov[9].iov_len = te->size;
277 len++;
278 }
279
280 if (dfsm_send_update(dfsm, iov, len) != CS_OK)
281 return FALSE;
282
283 return TRUE;
284}
285
286memdb_tree_entry_t *
287dcdb_parse_update_inode(
288 const void *msg,
289 size_t msg_len)
290{
291 if (msg_len < 40) {
292 cfs_critical("received short message (msg_len < 40)");
293 return NULL;
294 }
295
7b6f5ef3 296 char *msg_ptr = (char *) msg;
926f961f
TL
297
298 guint64 parent = *((guint64 *)msg_ptr);
299 msg_ptr += 8; msg_len -= 8;
300 guint64 inode = *((guint64 *)msg_ptr);
301 msg_ptr += 8; msg_len -= 8;
302 guint64 version = *((guint64 *)msg_ptr);
303 msg_ptr += 8; msg_len -= 8;
304
305 guint32 writer = *((guint32 *)msg_ptr);
306 msg_ptr += 4; msg_len -= 4;
307 guint32 mtime = *((guint32 *)msg_ptr);
308 msg_ptr += 4; msg_len -= 4;
309 guint32 size = *((guint32 *)msg_ptr);
310 msg_ptr += 4; msg_len -= 4;
311 guint32 namelen = *((guint32 *)msg_ptr);
312 msg_ptr += 4; msg_len -= 4;
313
314 char type = *((char *)msg_ptr);
315 msg_ptr += 1; msg_len -= 1;
fe000966
DM
316
317 if (!(type == DT_REG || type == DT_DIR)) {
318 cfs_critical("received mailformed message (unknown inode type %d)", type);
319 return NULL;
320 }
321
322 if (msg_len != (size + namelen)) {
323 cfs_critical("received mailformed message (msg_len != (size + namelen))");
324 return NULL;
325 }
326
7b6f5ef3
TL
327 char *name = msg_ptr;
328 msg_ptr += namelen; msg_len -= namelen;
fe000966 329
7b6f5ef3 330 const void *data = msg_ptr;
fe000966
DM
331
332 if (name[namelen - 1] != 0) {
333 cfs_critical("received mailformed message (name[namelen-1] != 0)");
334 return NULL;
335 }
336
337 memdb_tree_entry_t *te = memdb_tree_entry_new(name);
338 if (!te)
339 return NULL;
340
341 te->parent = parent;
342 te->version = version;
343 te->inode = inode;
344 te->writer = writer;
345 te->mtime = mtime;
346 te->size = size;
347 te->type = type;
348
349 if (te->type == DT_REG && te->size) {
2c372aee 350 te->data.value = g_memdup2(data, te->size);
fe000966
DM
351 if (!te->data.value) {
352 memdb_tree_entry_free(te);
353 return NULL;
354 }
355 }
356
357 return te;
358}
359
360void
2113d031 361dcdb_sync_corosync_conf(
fe000966 362 memdb_t *memdb,
2113d031 363 gboolean notify_corosync)
fe000966
DM
364{
365 g_return_if_fail(memdb != NULL);
366
367 int len;
368 gpointer data = NULL;
369
2113d031 370 len = memdb_read(memdb, "corosync.conf", &data);
fe000966
DM
371 if (len <= 0)
372 return;
373
374 guint64 new_version = cluster_config_version(data, len);
2113d031
DM
375 if (!new_version) {
376 cfs_critical("unable to parse cluster config_version");
fe000966 377 return;
2113d031 378 }
fe000966
DM
379
380 char *old_data = NULL;
381 gsize old_length = 0;
382 guint64 old_version = 0;
383
384 GError *err = NULL;
385 if (!g_file_get_contents(HOST_CLUSTER_CONF_FN, &old_data, &old_length, &err)) {
f6efd6bf
DM
386 if (!g_error_matches(err, G_FILE_ERROR, G_FILE_ERROR_NOENT)) {
387 cfs_critical("unable to read cluster config file '%s' - %s",
388 HOST_CLUSTER_CONF_FN, err->message);
389 }
fe000966
DM
390 g_error_free (err);
391 } else {
392 if (old_length)
393 old_version = cluster_config_version(old_data, old_length);
394 }
395
396 /* test if something changed - return if no changes */
397 if (data && old_data && (old_length == len) &&
398 !memcmp(data, old_data, len))
399 goto ret;
400
401 if (new_version < old_version) {
2113d031 402 cfs_critical("local corosync.conf is newer");
fe000966
DM
403 goto ret;
404 }
405
2113d031 406 if (!atomic_write_file(HOST_CLUSTER_CONF_FN, data, len, 0644, 0))
fe000966
DM
407 goto ret;
408
e5a5a3ea 409 cfs_message("wrote new corosync config '%s' (version = %" G_GUINT64_FORMAT ")",
2113d031
DM
410 HOST_CLUSTER_CONF_FN, new_version);
411
412 if (notify_corosync && old_version) {
f64bc729
FG
413 /*
414 * sleep for 1s to hopefully allow new config to propagate
415 * FIXME: actually query the status somehow?
416 */
417 sleep(1);
418
2113d031
DM
419 /* tell corosync that there is a new config file */
420 cfs_debug ("run corosync-cfgtool -R");
421 int status = system("corosync-cfgtool -R >/dev/null 2>&1");
fe000966 422 if (WIFEXITED(status) && WEXITSTATUS(status) != 0) {
2113d031 423 cfs_critical("corosync-cfgtool -R failed with exit code %d\n", WEXITSTATUS(status));
fe000966 424 }
2113d031 425 cfs_debug ("end corosync-cfgtool -R");
fe000966
DM
426 }
427
428ret:
429
430 if (data)
431 g_free(data);
432
433 if (old_data)
434 g_free(old_data);
435}
436
437static gpointer
438dcdb_get_state(
439 dfsm_t *dfsm,
440 gpointer data,
441 unsigned int *res_len)
442{
443 g_return_val_if_fail(dfsm != NULL, FALSE);
444 g_return_val_if_fail(data != NULL, FALSE);
445
446 memdb_t *memdb = (memdb_t *)data;
447
448 g_return_val_if_fail(memdb->root != NULL, FALSE);
fe000966 449
e5a5a3ea 450 cfs_debug("enter %s %016" PRIX64 " %08X", __func__, (uint64_t) memdb->root->version, memdb->root->mtime);
fe000966 451
89fde9ac 452 g_mutex_lock (&memdb->mutex);
fe000966 453 memdb_index_t *idx = memdb_encode_index(memdb->index, memdb->root);
89fde9ac 454 g_mutex_unlock (&memdb->mutex);
fe000966
DM
455
456 if (idx) {
457 *res_len = idx->bytes;
458 }
459
460 return idx;
461}
462
463static int
464dcdb_select_leader(
465 int node_count,
466 memdb_index_t *idx[])
467{
468 g_return_val_if_fail(idx != NULL, -1);
469
470 cfs_debug("enter %s", __func__);
471
472 int leader = -1;
473
474 /* try select most actual data - compare 'version' an 'time of last write'
475 * NOTE: syncinfo members are sorted
476 */
477 for (int i = 0; i < node_count; i++) {
478 if (leader < 0) {
479 leader = i;
480 } else {
481 memdb_index_t *leaderidx = idx[leader];
482
483 if (idx[i]->version == leaderidx->version &&
484 idx[i]->mtime > leaderidx->mtime) {
485 leader = i;
486 } else if (idx[i]->version > leaderidx->version) {
487 leader = i;
488 }
489 }
490 }
491
492 cfs_debug ("leave %s (%d)", __func__, leader);
493
494 return leader;
495}
496
497static gboolean
498dcdb_create_and_send_updates(
499 dfsm_t *dfsm,
500 memdb_t *memdb,
501 memdb_index_t *master,
502 int node_count,
503 memdb_index_t *idx[])
504{
505 g_return_val_if_fail(dfsm != NULL, FALSE);
506 g_return_val_if_fail(memdb != NULL, FALSE);
fe000966
DM
507 g_return_val_if_fail(master != NULL, FALSE);
508
509 cfs_debug("enter %s", __func__);
510
511 gboolean res = FALSE;
512
513 GHashTable *updates = g_hash_table_new(g_int64_hash, g_int64_equal);
514 if (!updates)
515 goto ret;
516
89fde9ac 517 g_mutex_lock (&memdb->mutex);
fe000966
DM
518
519 for (int n = 0; n < node_count; n++) {
520 memdb_index_t *slave = idx[n];
521
522 if (slave == master)
523 continue;
524
525 int j = 0;
526
527 for (int i = 0; i < master->size; i++) {
528 guint64 inode = master->entries[i].inode;
529 while (j < slave->size && slave->entries[j].inode < inode)
530 j++;
531
532 if (memcmp(&slave->entries[j], &master->entries[i],
533 sizeof(memdb_index_extry_t)) == 0) {
534 continue;
535 }
536
537 if (g_hash_table_lookup(updates, &inode))
538 continue;
539
e5a5a3ea 540 cfs_debug("found different inode %d %016" PRIX64, i, (uint64_t) inode);
fe000966
DM
541
542 memdb_tree_entry_t *te, *cpy;
543
544 if (!(te = g_hash_table_lookup(memdb->index, &inode))) {
e5a5a3ea 545 cfs_critical("can get inode data for inode %016" PRIX64, (uint64_t) inode);
fe000966
DM
546 goto ret;
547 }
548
549 cpy = memdb_tree_entry_copy(te, 1);
550 g_hash_table_replace(updates, &cpy->inode, cpy);
551 }
552 }
553
89fde9ac 554 g_mutex_unlock (&memdb->mutex);
fe000966
DM
555
556 /* send updates */
557
558 GHashTableIter iter;
559 gpointer key, value;
560 int count = 0;
561
562 cfs_message("start sending inode updates");
563
564 g_hash_table_iter_init (&iter, updates);
565 while (g_hash_table_iter_next (&iter, &key, &value)) {
566 memdb_tree_entry_t *te = (memdb_tree_entry_t *)value;
567 count++;
568
569 if (!dcdb_send_update_inode(dfsm, te)) {
570 /* tolerate error here */
e5a5a3ea 571 cfs_critical("sending update inode failed %016" PRIX64, (uint64_t) te->inode);
fe000966 572 } else {
e5a5a3ea 573 cfs_debug("sent update inode %016" PRIX64, (uint64_t) te->inode);
fe000966
DM
574 }
575
576 memdb_tree_entry_free(te);
577 }
578
579 cfs_message("sent all (%d) updates", count);
580
581 if (dfsm_send_update_complete(dfsm) != CS_OK) {
582 cfs_critical("failed to send UPDATE_COMPLETE message");
583 goto ret;
584 }
585
586 res = TRUE;
587
588 ret:
589 if (updates)
590 g_hash_table_destroy(updates);
591
592 cfs_debug("leave %s (%d)", __func__, res);
593
594 return res;
595}
596
597static int
598dcdb_process_state_update(
599 dfsm_t *dfsm,
600 gpointer data,
601 dfsm_sync_info_t *syncinfo)
602{
603 g_return_val_if_fail(dfsm != NULL, -1);
604 g_return_val_if_fail(data != NULL, -1);
605 g_return_val_if_fail(syncinfo != NULL, -1);
606
607 memdb_t *memdb = (memdb_t *)data;
608
609 cfs_debug("enter %s", __func__);
610
611 dcdb_sync_info_t *localsi = g_new0(dcdb_sync_info_t, 1);
612 if (!localsi)
613 return -1;
614
615 syncinfo->data = localsi;
616
617 memdb_index_t *idx[syncinfo->node_count];
618
619 for (int i = 0; i < syncinfo->node_count; i++) {
620 dfsm_node_info_t *ni = &syncinfo->nodes[i];
621
622 if (ni->state_len < sizeof(memdb_index_t)) {
623 cfs_critical("received short memdb index (len < sizeof(memdb_index_t))");
624 return -1;
625 }
626
627 idx[i] = (memdb_index_t *)ni->state;
628
629 if (ni->state_len != idx[i]->bytes) {
630 cfs_critical("received mailformed memdb index (len != idx->bytes)");
631 return -1;
632 }
633 }
634
635 /* select leader - set mode */
636 int leader = dcdb_select_leader(syncinfo->node_count, idx);
637 if (leader < 0) {
638 cfs_critical("unable to select leader failed");
639 return -1;
640 }
641
642 cfs_message("leader is %d/%d", syncinfo->nodes[leader].nodeid, syncinfo->nodes[leader].pid);
643
644 memdb_index_t *leaderidx = idx[leader];
645 localsi->master = leaderidx;
646
e26a1b5f
TL
647 GString *synced_member_ids = g_string_new(NULL);
648 g_string_append_printf(synced_member_ids, "%d/%d", syncinfo->nodes[leader].nodeid, syncinfo->nodes[leader].pid);
fe000966
DM
649
650 for (int i = 0; i < syncinfo->node_count; i++) {
651 dfsm_node_info_t *ni = &syncinfo->nodes[i];
652 if (i == leader) {
653 ni->synced = 1;
654 } else {
655 if (leaderidx->bytes == idx[i]->bytes &&
656 memcmp(leaderidx, idx[i], leaderidx->bytes) == 0) {
657 ni->synced = 1;
e26a1b5f 658 g_string_append_printf(synced_member_ids, ", %d/%d", ni->nodeid, ni->pid);
fe000966
DM
659 }
660 }
661 if (dfsm_nodeid_is_local(dfsm, ni->nodeid, ni->pid))
662 localsi->idx = idx[i];
663 }
e26a1b5f
TL
664 cfs_message("synced members: %s", synced_member_ids->str);
665 g_string_free(synced_member_ids, 1);
fe000966
DM
666
667 /* send update */
668 if (dfsm_nodeid_is_local(dfsm, syncinfo->nodes[leader].nodeid, syncinfo->nodes[leader].pid)) {
669 if (!dcdb_create_and_send_updates(dfsm, memdb, leaderidx, syncinfo->node_count, idx))
670 return -1;
671 }
672
673 return 0;
674}
675
676static int
677dcdb_process_update(
678 dfsm_t *dfsm,
679 gpointer data,
680 dfsm_sync_info_t *syncinfo,
681 uint32_t nodeid,
682 uint32_t pid,
683 const void *msg,
684 size_t msg_len)
685{
686 g_return_val_if_fail(dfsm != NULL, -1);
687 g_return_val_if_fail(data != NULL, -1);
688 g_return_val_if_fail(msg != NULL, -1);
689 g_return_val_if_fail(syncinfo != NULL, -1);
690 g_return_val_if_fail(syncinfo->data != NULL, -1);
691
692 cfs_debug("enter %s", __func__);
693
694 memdb_tree_entry_t *te;
695
696 if (!(te = dcdb_parse_update_inode(msg, msg_len)))
697 return -1;
698
e5a5a3ea
TL
699 cfs_debug("received inode update %016" PRIX64 " from node %d",
700 (uint64_t) te->inode, nodeid);
fe000966
DM
701
702 dcdb_sync_info_t *localsi = (dcdb_sync_info_t *)syncinfo->data;
703
704 localsi->updates = g_list_append(localsi->updates, te);
705
706 return 0;
707}
708
709static int
710dcdb_commit(
711 dfsm_t *dfsm,
712 gpointer data,
713 dfsm_sync_info_t *syncinfo)
714{
715 g_return_val_if_fail(dfsm != NULL, -1);
716 g_return_val_if_fail(data != NULL, -1);
717 g_return_val_if_fail(syncinfo != NULL, -1);
718 g_return_val_if_fail(syncinfo->data != NULL, -1);
719
720 memdb_t *memdb = (memdb_t *)data;
721
722 cfs_debug("enter %s", __func__);
723
724 dcdb_sync_info_t *localsi = (dcdb_sync_info_t *)syncinfo->data;
725
726 guint count = g_list_length(localsi->updates);
727
728 cfs_message("update complete - trying to commit (got %u inode updates)", count);
729
730 if (!bdb_backend_commit_update(memdb, localsi->master, localsi->idx, localsi->updates))
731 return -1;
732
2113d031 733 dcdb_sync_corosync_conf(memdb, FALSE);
fe000966
DM
734
735 return 0;
736}
737
738static int
739dcdb_cleanup(
740 dfsm_t *dfsm,
741 gpointer data,
742 dfsm_sync_info_t *syncinfo)
743{
744 g_return_val_if_fail(dfsm != NULL, -1);
745 g_return_val_if_fail(data != NULL, -1);
746 g_return_val_if_fail(syncinfo != NULL, -1);
747 g_return_val_if_fail(syncinfo->data != NULL, -1);
748
749 cfs_debug("enter %s", __func__);
750
751 dcdb_sync_info_t *localsi = (dcdb_sync_info_t *)syncinfo->data;
752
753 GList *iter = localsi->updates;
754 while (iter) {
755 memdb_tree_entry_t *te = (memdb_tree_entry_t *)iter->data;
756 memdb_tree_entry_free(te);
757 iter = g_list_next(iter);
758 }
759 g_list_free(localsi->updates);
760
761 g_free(localsi);
762
763 return 0;
764}
765
766gboolean
767dcdb_checksum(
768 dfsm_t *dfsm,
769 gpointer data,
770 unsigned char *csum,
771 size_t csum_len)
772{
773 g_return_val_if_fail(dfsm != NULL, FALSE);
774 g_return_val_if_fail(csum != NULL, FALSE);
775
776 memdb_t *memdb = (memdb_t *)data;
777
778 g_return_val_if_fail(memdb != NULL, FALSE);
fe000966 779
e5a5a3ea 780 cfs_debug("enter %s %016" PRIX64 " %08X", __func__, memdb->root->version, memdb->root->mtime);
fe000966 781
89fde9ac 782 g_mutex_lock (&memdb->mutex);
fe000966 783 gboolean res = memdb_compute_checksum(memdb->index, memdb->root, csum, csum_len);
89fde9ac 784 g_mutex_unlock (&memdb->mutex);
fe000966 785
e5a5a3ea 786 cfs_debug("leave %s %016" PRIX64 " (%d)", __func__, *( (uint64_t *) csum), res);
fe000966
DM
787
788 return res;
789}
790
791static int
792dcdb_deliver(
793 dfsm_t *dfsm,
794 gpointer data,
795 int *res_ptr,
796 uint32_t nodeid,
797 uint32_t pid,
798 uint16_t msg_type,
799 uint32_t msg_time,
800 const void *msg,
801 size_t msg_len)
802{
803 g_return_val_if_fail(dfsm != NULL, -1);
804 g_return_val_if_fail(msg != NULL, -1);
805
806 memdb_t *memdb = (memdb_t *)data;
807
808 g_return_val_if_fail(memdb != NULL, -1);
809 g_return_val_if_fail(res_ptr != NULL, -1);
810
811 int res = 1;
812
813 int msg_result = -ENOTSUP;
814
815 if (!DCDB_VALID_MESSAGE_TYPE(msg_type))
816 goto unknown;
817
e5a5a3ea 818 cfs_debug("process message %u (length = %zd)", msg_type, msg_len);
fe000966
DM
819
820 if (!cfs_is_quorate()) {
821 cfs_critical("received write while not quorate - trigger resync");
822 msg_result = -EACCES;
823 goto leave;
824 }
825
826 const char *path, *to, *buf;
827 guint32 size, offset, flags;
828 const guchar *csum;
829
830 if (msg_type == DCDB_MESSAGE_CFS_UNLOCK_REQUEST ||
831 msg_type == DCDB_MESSAGE_CFS_UNLOCK) {
832 msg_result = 0; /* ignored anyways */
833
834 if (!dcdb_parse_unlock_request(msg, msg_len, &path, &csum))
835 goto leave;
836
837 guchar cur_csum[32];
838 memdb_tree_entry_t *te = memdb_getattr(memdb, path);
839
840 if (te && te->type == DT_DIR &&
841 path_is_lockdir(path) && memdb_tree_entry_csum(te, cur_csum) &&
842 (memcmp(csum, cur_csum, 32) == 0)) {
843
844 if (msg_type == DCDB_MESSAGE_CFS_UNLOCK) {
845
846 cfs_debug("got valid unlock message");
847
848 msg_result = memdb_delete(memdb, path, nodeid, msg_time);
849
850 } else if (dfsm_lowest_nodeid(dfsm)) {
851
852 cfs_debug("got valid unlock request message");
853
854 if (memdb_lock_expired(memdb, path, csum)) {
855 cfs_debug("sending unlock message");
856 dcdb_send_unlock(dfsm, path, csum, FALSE);
857 }
858 }
859 }
a0fce192 860 memdb_tree_entry_free(te);
fe000966
DM
861
862 } else if (msg_type == DCDB_MESSAGE_CFS_WRITE) {
863
864 if (!dcdb_parse_fuse_message(msg, msg_len, &path, &to, &buf,
865 &size, &offset, &flags))
866 goto leave;
867
868 msg_result = memdb_write(memdb, path, nodeid, msg_time,
869 buf, size, offset, flags);
870
2113d031
DM
871 if ((msg_result >= 0) && !strcmp(path, "corosync.conf"))
872 dcdb_sync_corosync_conf(memdb, dfsm_nodeid_is_local(dfsm, nodeid, pid));
fe000966
DM
873
874 } else if (msg_type == DCDB_MESSAGE_CFS_CREATE) {
875
876 if (!dcdb_parse_fuse_message(msg, msg_len, &path, &to, &buf,
877 &size, &offset, &flags))
878 goto leave;
879
880 msg_result = memdb_create(memdb, path, nodeid, msg_time);
881
2113d031
DM
882 if ((msg_result >= 0) && !strcmp(path, "corosync.conf"))
883 dcdb_sync_corosync_conf(memdb, dfsm_nodeid_is_local(dfsm, nodeid, pid));
fe000966
DM
884
885 } else if (msg_type == DCDB_MESSAGE_CFS_MKDIR) {
886
887 if (!dcdb_parse_fuse_message(msg, msg_len, &path, &to, &buf,
888 &size, &offset, &flags))
889 goto leave;
890
891 msg_result = memdb_mkdir(memdb, path, nodeid, msg_time);
892
893 } else if (msg_type == DCDB_MESSAGE_CFS_DELETE) {
894
895 if (!dcdb_parse_fuse_message(msg, msg_len, &path, &to, &buf,
896 &size, &offset, &flags))
897 goto leave;
898
899 msg_result = memdb_delete(memdb, path, nodeid, msg_time);
900
901 } else if (msg_type == DCDB_MESSAGE_CFS_RENAME) {
902
903 if (!dcdb_parse_fuse_message(msg, msg_len, &path, &to, &buf,
904 &size, &offset, &flags))
905 goto leave;
906
907 msg_result = memdb_rename(memdb, path, to, nodeid, msg_time);
908
2113d031
DM
909 if ((msg_result >= 0) && !strcmp(to, "corosync.conf"))
910 dcdb_sync_corosync_conf(memdb, dfsm_nodeid_is_local(dfsm, nodeid, pid));
fe000966
DM
911
912 } else if (msg_type == DCDB_MESSAGE_CFS_MTIME) {
913
914 if (!dcdb_parse_fuse_message(msg, msg_len, &path, &to, &buf,
915 &size, &offset, &flags))
916 goto leave;
917
918 /* Note: mtime is sent via offset field */
919 msg_result = memdb_mtime(memdb, path, nodeid, offset);
920
921 } else {
922 goto unknown;
923 }
924
925 *res_ptr = msg_result;
926ret:
927 if (memdb->errors) {
928 dfsm_set_errormode(dfsm);
929 res = -1;
930 }
931
932 cfs_debug("leave %s (%d)", __func__, res);
933
934 return res;
751a3b88
TL
935
936unknown:
e5a5a3ea 937 cfs_critical("received unknown message type (msg_type == %u)", msg_type);
fe000966
DM
938leave:
939 res = -1;
940 goto ret;
941
49af93b4 942}
fe000966
DM
943
944static dfsm_callbacks_t dcdb_dfsm_callbacks = {
945 .dfsm_deliver_fn = dcdb_deliver,
946 .dfsm_get_state_fn = dcdb_get_state,
947 .dfsm_process_state_update_fn = dcdb_process_state_update,
948 .dfsm_process_update_fn = dcdb_process_update,
949 .dfsm_commit_fn = dcdb_commit,
950 .dfsm_cleanup_fn = dcdb_cleanup,
951 .dfsm_checksum_fn = dcdb_checksum,
952};
953
954dfsm_t *dcdb_new(memdb_t *memdb)
955{
956 g_return_val_if_fail(memdb != NULL, NULL);
957
958 return dfsm_new(memdb, DCDB_CPG_GROUP_NAME, G_LOG_DOMAIN,
959 DCDB_PROTOCOL_VERSION, &dcdb_dfsm_callbacks);
960}