]> git.proxmox.com Git - pve-cluster.git/blob - data/src/dcdb.c
fix some format security issues
[pve-cluster.git] / data / src / dcdb.c
1 /*
2 Copyright (C) 2010 Proxmox Server Solutions GmbH
3
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU Affero General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU Affero General Public License for more details.
13
14 You should have received a copy of the GNU Affero General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>.
16
17 Author: Dietmar Maurer <dietmar@proxmox.com>
18
19 */
20
21 #define G_LOG_DOMAIN "dcdb"
22
23 #ifdef HAVE_CONFIG_H
24 #include <config.h>
25 #endif /* HAVE_CONFIG_H */
26
27 #include <stdlib.h>
28 #include <stdio.h>
29 #include <inttypes.h>
30 #include <string.h>
31 #include <unistd.h>
32 #include <glib.h>
33 #include <sys/types.h>
34 #include <sys/wait.h>
35 #include <arpa/inet.h>
36 #include <sys/epoll.h>
37 #include <dirent.h>
38 #include <errno.h>
39
40 #include "cfs-utils.h"
41 #include "loop.h"
42 #include "dcdb.h"
43 #include "status.h"
44
45 typedef struct {
46 memdb_index_t *master;
47 memdb_index_t *idx;
48 GList *updates;
49 } dcdb_sync_info_t;
50
51 void
52 dcdb_send_unlock(
53 dfsm_t *dfsm,
54 const char *path,
55 const guchar csum[32],
56 gboolean request)
57 {
58 g_return_if_fail(dfsm != NULL);
59 g_return_if_fail(path != NULL);
60 g_return_if_fail(csum != NULL);
61
62 struct iovec iov[2];
63
64 iov[0].iov_base = (char *)csum;
65 iov[0].iov_len = 32;
66
67 iov[1].iov_base = (char *)path;
68 iov[1].iov_len = strlen(path) + 1;
69
70 if (!cfs_is_quorate())
71 return;
72
73 dcdb_message_t msg_type = request ?
74 DCDB_MESSAGE_CFS_UNLOCK_REQUEST : DCDB_MESSAGE_CFS_UNLOCK;
75
76 dfsm_send_message_sync(dfsm, msg_type, iov, 2, NULL);
77 }
78
79 static gboolean
80 dcdb_parse_unlock_request(
81 const void *msg,
82 size_t msg_len,
83 const char **path,
84 const guchar **csum)
85
86 {
87 g_return_val_if_fail(msg != NULL, FALSE);
88 g_return_val_if_fail(path != NULL, FALSE);
89 g_return_val_if_fail(csum != NULL, FALSE);
90
91 if (msg_len < 33) {
92 cfs_critical("received short unlock message (%zu < 33)", msg_len);
93 return FALSE;
94 }
95
96 *csum = msg;
97 msg += 32; msg_len -= 32;
98
99 *path = msg;
100 if ((*path)[msg_len - 1] != 0) {
101 cfs_critical("received mailformed unlock message - 'path' not terminated");
102 *path = NULL;
103 return FALSE;
104 }
105
106 return TRUE;
107 }
108
109 int
110 dcdb_send_fuse_message(
111 dfsm_t *dfsm,
112 dcdb_message_t msg_type,
113 const char *path,
114 const char *to,
115 const char *buf,
116 guint32 size,
117 guint32 offset,
118 guint32 flags)
119 {
120 struct iovec iov[8];
121
122 iov[0].iov_base = (char *)&size;
123 iov[0].iov_len = sizeof(size);
124
125 iov[1].iov_base = (char *)&offset;
126 iov[1].iov_len = sizeof(offset);
127
128 guint32 pathlen = path ? strlen(path) + 1 : 0;
129 iov[2].iov_base = (char *)&pathlen;
130 iov[2].iov_len = sizeof(pathlen);
131
132 guint32 tolen = to ? strlen(to) + 1 : 0;
133 iov[3].iov_base = (char *)&tolen;
134 iov[3].iov_len = sizeof(tolen);
135
136 iov[4].iov_base = (char *)&flags;
137 iov[4].iov_len = sizeof(flags);
138
139 iov[5].iov_base = (char *)path;
140 iov[5].iov_len = pathlen;
141
142 iov[6].iov_base = (char *)to;
143 iov[6].iov_len = tolen;
144
145 iov[7].iov_base = (char *)buf;
146 iov[7].iov_len = size;
147
148 dfsm_result_t rc;
149 memset(&rc, 0, sizeof(rc));
150 rc.result = -EBUSY;
151
152 if (!cfs_is_quorate())
153 return -EACCES;
154
155 if (dfsm_send_message_sync(dfsm, msg_type, iov, 8, &rc))
156 return rc.result;
157
158 return -EACCES;
159 }
160
161 static gboolean
162 dcdb_parse_fuse_message(
163 const void *msg,
164 size_t msg_len,
165 const char **path,
166 const char **to,
167 const char **buf,
168 guint32 *size,
169 guint32 *offset,
170 guint32 *flags)
171
172 {
173 g_return_val_if_fail(msg != NULL, FALSE);
174 g_return_val_if_fail(path != NULL, FALSE);
175 g_return_val_if_fail(to != NULL, FALSE);
176 g_return_val_if_fail(buf != NULL, FALSE);
177 g_return_val_if_fail(size != NULL, FALSE);
178 g_return_val_if_fail(offset != NULL, FALSE);
179 g_return_val_if_fail(flags != NULL, FALSE);
180
181 if (msg_len < 20) {
182 cfs_critical("received short fuse message (%zu < 20)", msg_len);
183 return FALSE;
184 }
185
186 *size = *((guint32 *)msg);
187 msg += 4; msg_len -= 4;
188
189 *offset = *((guint32 *)msg);
190 msg += 4; msg_len -= 4;
191
192 guint32 pathlen = *((guint32 *)msg);
193 msg += 4; msg_len -= 4;
194
195 guint32 tolen = *((guint32 *)msg);
196 msg += 4; msg_len -= 4;
197
198 *flags = *((guint32 *)msg);
199 msg += 4; msg_len -= 4;
200
201 if (msg_len != ((*size) + pathlen + tolen)) {
202 cfs_critical("received mailformed fuse message");
203 return FALSE;
204 }
205
206 *path = (char *)msg;
207 msg += pathlen; msg_len -= pathlen;
208
209 if (pathlen) {
210 if ((*path)[pathlen - 1] != 0) {
211 cfs_critical("received mailformed fuse message - 'path' not terminated");
212 *path = NULL;
213 return FALSE;
214 }
215 } else {
216 *path = NULL;
217 }
218
219 *to = (char *)msg;
220 msg += tolen; msg_len -= tolen;
221
222 if (tolen) {
223 if ((*to)[tolen - 1] != 0) {
224 cfs_critical("received mailformed fuse message - 'to' not terminated");
225 *to = NULL;
226 return FALSE;
227 }
228 } else {
229 *to = NULL;
230 }
231
232 *buf = (*size) ? msg : NULL;
233
234 return TRUE;
235 }
236
237 static gboolean
238 dcdb_send_update_inode(
239 dfsm_t *dfsm,
240 memdb_tree_entry_t *te)
241 {
242 g_return_val_if_fail(dfsm != NULL, FALSE);
243 g_return_val_if_fail(te != NULL, FALSE);
244
245 int len;
246 struct iovec iov[20];
247
248 uint32_t namelen = strlen(te->name) + 1;
249
250 iov[0].iov_base = (char *)&te->parent;
251 iov[0].iov_len = sizeof(te->parent);
252 iov[1].iov_base = (char *)&te->inode;
253 iov[1].iov_len = sizeof(te->inode);
254 iov[2].iov_base = (char *)&te->version;
255 iov[2].iov_len = sizeof(te->version);
256 iov[3].iov_base = (char *)&te->writer;
257 iov[3].iov_len = sizeof(te->writer);
258 iov[4].iov_base = (char *)&te->mtime;
259 iov[4].iov_len = sizeof(te->mtime);
260 iov[5].iov_base = (char *)&te->size;
261 iov[5].iov_len = sizeof(te->size);
262 iov[6].iov_base = (char *)&namelen;
263 iov[6].iov_len = sizeof(namelen);
264 iov[7].iov_base = (char *)&te->type;
265 iov[7].iov_len = sizeof(te->type);
266 iov[8].iov_base = (char *)te->name;
267 iov[8].iov_len = namelen;
268
269 len = 9;
270 if (te->type == DT_REG && te->size) {
271 iov[9].iov_base = (char *)te->data.value;
272 iov[9].iov_len = te->size;
273 len++;
274 }
275
276 if (dfsm_send_update(dfsm, iov, len) != CS_OK)
277 return FALSE;
278
279 return TRUE;
280 }
281
282 memdb_tree_entry_t *
283 dcdb_parse_update_inode(
284 const void *msg,
285 size_t msg_len)
286 {
287 if (msg_len < 40) {
288 cfs_critical("received short message (msg_len < 40)");
289 return NULL;
290 }
291
292 guint64 parent = *((guint64 *)msg);
293 msg += 8; msg_len -= 8;
294 guint64 inode = *((guint64 *)msg);
295 msg += 8; msg_len -= 8;
296 guint64 version = *((guint64 *)msg);
297 msg += 8; msg_len -= 8;
298
299 guint32 writer = *((guint32 *)msg);
300 msg += 4; msg_len -= 4;
301 guint32 mtime = *((guint32 *)msg);
302 msg += 4; msg_len -= 4;
303 guint32 size = *((guint32 *)msg);
304 msg += 4; msg_len -= 4;
305 guint32 namelen = *((guint32 *)msg);
306 msg += 4; msg_len -= 4;
307
308 char type = *((char *)msg);
309 msg += 1; msg_len -= 1;
310
311 if (!(type == DT_REG || type == DT_DIR)) {
312 cfs_critical("received mailformed message (unknown inode type %d)", type);
313 return NULL;
314 }
315
316 if (msg_len != (size + namelen)) {
317 cfs_critical("received mailformed message (msg_len != (size + namelen))");
318 return NULL;
319 }
320
321 char *name = (char *)msg;
322 msg += namelen; msg_len -= namelen;
323
324 const void *data = msg;
325
326 if (name[namelen - 1] != 0) {
327 cfs_critical("received mailformed message (name[namelen-1] != 0)");
328 return NULL;
329 }
330
331 memdb_tree_entry_t *te = memdb_tree_entry_new(name);
332 if (!te)
333 return NULL;
334
335 te->parent = parent;
336 te->version = version;
337 te->inode = inode;
338 te->writer = writer;
339 te->mtime = mtime;
340 te->size = size;
341 te->type = type;
342
343 if (te->type == DT_REG && te->size) {
344 te->data.value = g_memdup(data, te->size);
345 if (!te->data.value) {
346 memdb_tree_entry_free(te);
347 return NULL;
348 }
349 }
350
351 return te;
352 }
353
354 void
355 dcdb_sync_corosync_conf(
356 memdb_t *memdb,
357 gboolean notify_corosync)
358 {
359 g_return_if_fail(memdb != NULL);
360
361 int len;
362 gpointer data = NULL;
363
364 len = memdb_read(memdb, "corosync.conf", &data);
365 if (len <= 0)
366 return;
367
368 guint64 new_version = cluster_config_version(data, len);
369 if (!new_version) {
370 cfs_critical("unable to parse cluster config_version");
371 return;
372 }
373
374 char *old_data = NULL;
375 gsize old_length = 0;
376 guint64 old_version = 0;
377
378 GError *err = NULL;
379 if (!g_file_get_contents(HOST_CLUSTER_CONF_FN, &old_data, &old_length, &err)) {
380 if (!g_error_matches(err, G_FILE_ERROR, G_FILE_ERROR_NOENT)) {
381 cfs_critical("unable to read cluster config file '%s' - %s",
382 HOST_CLUSTER_CONF_FN, err->message);
383 }
384 g_error_free (err);
385 } else {
386 if (old_length)
387 old_version = cluster_config_version(old_data, old_length);
388 }
389
390 /* test if something changed - return if no changes */
391 if (data && old_data && (old_length == len) &&
392 !memcmp(data, old_data, len))
393 goto ret;
394
395 if (new_version < old_version) {
396 cfs_critical("local corosync.conf is newer");
397 goto ret;
398 }
399
400 if (!atomic_write_file(HOST_CLUSTER_CONF_FN, data, len, 0644, 0))
401 goto ret;
402
403 cfs_message("wrote new corosync config '%s' (version = %" G_GUINT64_FORMAT ")",
404 HOST_CLUSTER_CONF_FN, new_version);
405
406 if (notify_corosync && old_version) {
407 /* tell corosync that there is a new config file */
408 cfs_debug ("run corosync-cfgtool -R");
409 int status = system("corosync-cfgtool -R >/dev/null 2>&1");
410 if (WIFEXITED(status) && WEXITSTATUS(status) != 0) {
411 cfs_critical("corosync-cfgtool -R failed with exit code %d\n", WEXITSTATUS(status));
412 }
413 cfs_debug ("end corosync-cfgtool -R");
414 }
415
416 ret:
417
418 if (data)
419 g_free(data);
420
421 if (old_data)
422 g_free(old_data);
423 }
424
425 static gpointer
426 dcdb_get_state(
427 dfsm_t *dfsm,
428 gpointer data,
429 unsigned int *res_len)
430 {
431 g_return_val_if_fail(dfsm != NULL, FALSE);
432 g_return_val_if_fail(data != NULL, FALSE);
433
434 memdb_t *memdb = (memdb_t *)data;
435
436 g_return_val_if_fail(memdb->root != NULL, FALSE);
437
438 cfs_debug("enter %s %016" PRIX64 " %08X", __func__, (uint64_t) memdb->root->version, memdb->root->mtime);
439
440 g_mutex_lock (&memdb->mutex);
441 memdb_index_t *idx = memdb_encode_index(memdb->index, memdb->root);
442 g_mutex_unlock (&memdb->mutex);
443
444 if (idx) {
445 *res_len = idx->bytes;
446 }
447
448 return idx;
449 }
450
451 static int
452 dcdb_select_leader(
453 int node_count,
454 memdb_index_t *idx[])
455 {
456 g_return_val_if_fail(idx != NULL, -1);
457
458 cfs_debug("enter %s", __func__);
459
460 int leader = -1;
461
462 /* try select most actual data - compare 'version' an 'time of last write'
463 * NOTE: syncinfo members are sorted
464 */
465 for (int i = 0; i < node_count; i++) {
466 if (leader < 0) {
467 leader = i;
468 } else {
469 memdb_index_t *leaderidx = idx[leader];
470
471 if (idx[i]->version == leaderidx->version &&
472 idx[i]->mtime > leaderidx->mtime) {
473 leader = i;
474 } else if (idx[i]->version > leaderidx->version) {
475 leader = i;
476 }
477 }
478 }
479
480 cfs_debug ("leave %s (%d)", __func__, leader);
481
482 return leader;
483 }
484
485 static gboolean
486 dcdb_create_and_send_updates(
487 dfsm_t *dfsm,
488 memdb_t *memdb,
489 memdb_index_t *master,
490 int node_count,
491 memdb_index_t *idx[])
492 {
493 g_return_val_if_fail(dfsm != NULL, FALSE);
494 g_return_val_if_fail(memdb != NULL, FALSE);
495 g_return_val_if_fail(master != NULL, FALSE);
496
497 cfs_debug("enter %s", __func__);
498
499 gboolean res = FALSE;
500
501 GHashTable *updates = g_hash_table_new(g_int64_hash, g_int64_equal);
502 if (!updates)
503 goto ret;
504
505 g_mutex_lock (&memdb->mutex);
506
507 for (int n = 0; n < node_count; n++) {
508 memdb_index_t *slave = idx[n];
509
510 if (slave == master)
511 continue;
512
513 int j = 0;
514
515 for (int i = 0; i < master->size; i++) {
516 guint64 inode = master->entries[i].inode;
517 while (j < slave->size && slave->entries[j].inode < inode)
518 j++;
519
520 if (memcmp(&slave->entries[j], &master->entries[i],
521 sizeof(memdb_index_extry_t)) == 0) {
522 continue;
523 }
524
525 if (g_hash_table_lookup(updates, &inode))
526 continue;
527
528 cfs_debug("found different inode %d %016" PRIX64, i, (uint64_t) inode);
529
530 memdb_tree_entry_t *te, *cpy;
531
532 if (!(te = g_hash_table_lookup(memdb->index, &inode))) {
533 cfs_critical("can get inode data for inode %016" PRIX64, (uint64_t) inode);
534 goto ret;
535 }
536
537 cpy = memdb_tree_entry_copy(te, 1);
538 g_hash_table_replace(updates, &cpy->inode, cpy);
539 }
540 }
541
542 g_mutex_unlock (&memdb->mutex);
543
544 /* send updates */
545
546 GHashTableIter iter;
547 gpointer key, value;
548 int count = 0;
549
550 cfs_message("start sending inode updates");
551
552 g_hash_table_iter_init (&iter, updates);
553 while (g_hash_table_iter_next (&iter, &key, &value)) {
554 memdb_tree_entry_t *te = (memdb_tree_entry_t *)value;
555 count++;
556
557 if (!dcdb_send_update_inode(dfsm, te)) {
558 /* tolerate error here */
559 cfs_critical("sending update inode failed %016" PRIX64, (uint64_t) te->inode);
560 } else {
561 cfs_debug("sent update inode %016" PRIX64, (uint64_t) te->inode);
562 }
563
564 memdb_tree_entry_free(te);
565 }
566
567 cfs_message("sent all (%d) updates", count);
568
569 if (dfsm_send_update_complete(dfsm) != CS_OK) {
570 cfs_critical("failed to send UPDATE_COMPLETE message");
571 goto ret;
572 }
573
574 res = TRUE;
575
576 ret:
577 if (updates)
578 g_hash_table_destroy(updates);
579
580 cfs_debug("leave %s (%d)", __func__, res);
581
582 return res;
583 }
584
585 static int
586 dcdb_process_state_update(
587 dfsm_t *dfsm,
588 gpointer data,
589 dfsm_sync_info_t *syncinfo)
590 {
591 g_return_val_if_fail(dfsm != NULL, -1);
592 g_return_val_if_fail(data != NULL, -1);
593 g_return_val_if_fail(syncinfo != NULL, -1);
594
595 memdb_t *memdb = (memdb_t *)data;
596
597 cfs_debug("enter %s", __func__);
598
599 dcdb_sync_info_t *localsi = g_new0(dcdb_sync_info_t, 1);
600 if (!localsi)
601 return -1;
602
603 syncinfo->data = localsi;
604
605 memdb_index_t *idx[syncinfo->node_count];
606
607 for (int i = 0; i < syncinfo->node_count; i++) {
608 dfsm_node_info_t *ni = &syncinfo->nodes[i];
609
610 if (ni->state_len < sizeof(memdb_index_t)) {
611 cfs_critical("received short memdb index (len < sizeof(memdb_index_t))");
612 return -1;
613 }
614
615 idx[i] = (memdb_index_t *)ni->state;
616
617 if (ni->state_len != idx[i]->bytes) {
618 cfs_critical("received mailformed memdb index (len != idx->bytes)");
619 return -1;
620 }
621 }
622
623 /* select leader - set mode */
624 int leader = dcdb_select_leader(syncinfo->node_count, idx);
625 if (leader < 0) {
626 cfs_critical("unable to select leader failed");
627 return -1;
628 }
629
630 cfs_message("leader is %d/%d", syncinfo->nodes[leader].nodeid, syncinfo->nodes[leader].pid);
631
632 memdb_index_t *leaderidx = idx[leader];
633 localsi->master = leaderidx;
634
635 GString *synced_member_ids = g_string_new(NULL);
636 g_string_append_printf(synced_member_ids, "%d/%d", syncinfo->nodes[leader].nodeid, syncinfo->nodes[leader].pid);
637
638 for (int i = 0; i < syncinfo->node_count; i++) {
639 dfsm_node_info_t *ni = &syncinfo->nodes[i];
640 if (i == leader) {
641 ni->synced = 1;
642 } else {
643 if (leaderidx->bytes == idx[i]->bytes &&
644 memcmp(leaderidx, idx[i], leaderidx->bytes) == 0) {
645 ni->synced = 1;
646 g_string_append_printf(synced_member_ids, ", %d/%d", ni->nodeid, ni->pid);
647 }
648 }
649 if (dfsm_nodeid_is_local(dfsm, ni->nodeid, ni->pid))
650 localsi->idx = idx[i];
651 }
652 cfs_message("synced members: %s", synced_member_ids->str);
653 g_string_free(synced_member_ids, 1);
654
655 /* send update */
656 if (dfsm_nodeid_is_local(dfsm, syncinfo->nodes[leader].nodeid, syncinfo->nodes[leader].pid)) {
657 if (!dcdb_create_and_send_updates(dfsm, memdb, leaderidx, syncinfo->node_count, idx))
658 return -1;
659 }
660
661 return 0;
662 }
663
664 static int
665 dcdb_process_update(
666 dfsm_t *dfsm,
667 gpointer data,
668 dfsm_sync_info_t *syncinfo,
669 uint32_t nodeid,
670 uint32_t pid,
671 const void *msg,
672 size_t msg_len)
673 {
674 g_return_val_if_fail(dfsm != NULL, -1);
675 g_return_val_if_fail(data != NULL, -1);
676 g_return_val_if_fail(msg != NULL, -1);
677 g_return_val_if_fail(syncinfo != NULL, -1);
678 g_return_val_if_fail(syncinfo->data != NULL, -1);
679
680 cfs_debug("enter %s", __func__);
681
682 memdb_tree_entry_t *te;
683
684 if (!(te = dcdb_parse_update_inode(msg, msg_len)))
685 return -1;
686
687 cfs_debug("received inode update %016" PRIX64 " from node %d",
688 (uint64_t) te->inode, nodeid);
689
690 dcdb_sync_info_t *localsi = (dcdb_sync_info_t *)syncinfo->data;
691
692 localsi->updates = g_list_append(localsi->updates, te);
693
694 return 0;
695 }
696
697 static int
698 dcdb_commit(
699 dfsm_t *dfsm,
700 gpointer data,
701 dfsm_sync_info_t *syncinfo)
702 {
703 g_return_val_if_fail(dfsm != NULL, -1);
704 g_return_val_if_fail(data != NULL, -1);
705 g_return_val_if_fail(syncinfo != NULL, -1);
706 g_return_val_if_fail(syncinfo->data != NULL, -1);
707
708 memdb_t *memdb = (memdb_t *)data;
709
710 cfs_debug("enter %s", __func__);
711
712 dcdb_sync_info_t *localsi = (dcdb_sync_info_t *)syncinfo->data;
713
714 guint count = g_list_length(localsi->updates);
715
716 cfs_message("update complete - trying to commit (got %u inode updates)", count);
717
718 if (!bdb_backend_commit_update(memdb, localsi->master, localsi->idx, localsi->updates))
719 return -1;
720
721 dcdb_sync_corosync_conf(memdb, FALSE);
722
723 return 0;
724 }
725
726 static int
727 dcdb_cleanup(
728 dfsm_t *dfsm,
729 gpointer data,
730 dfsm_sync_info_t *syncinfo)
731 {
732 g_return_val_if_fail(dfsm != NULL, -1);
733 g_return_val_if_fail(data != NULL, -1);
734 g_return_val_if_fail(syncinfo != NULL, -1);
735 g_return_val_if_fail(syncinfo->data != NULL, -1);
736
737 cfs_debug("enter %s", __func__);
738
739 dcdb_sync_info_t *localsi = (dcdb_sync_info_t *)syncinfo->data;
740
741 GList *iter = localsi->updates;
742 while (iter) {
743 memdb_tree_entry_t *te = (memdb_tree_entry_t *)iter->data;
744 memdb_tree_entry_free(te);
745 iter = g_list_next(iter);
746 }
747 g_list_free(localsi->updates);
748
749 g_free(localsi);
750
751 return 0;
752 }
753
754 gboolean
755 dcdb_checksum(
756 dfsm_t *dfsm,
757 gpointer data,
758 unsigned char *csum,
759 size_t csum_len)
760 {
761 g_return_val_if_fail(dfsm != NULL, FALSE);
762 g_return_val_if_fail(csum != NULL, FALSE);
763
764 memdb_t *memdb = (memdb_t *)data;
765
766 g_return_val_if_fail(memdb != NULL, FALSE);
767
768 cfs_debug("enter %s %016" PRIX64 " %08X", __func__, memdb->root->version, memdb->root->mtime);
769
770 g_mutex_lock (&memdb->mutex);
771 gboolean res = memdb_compute_checksum(memdb->index, memdb->root, csum, csum_len);
772 g_mutex_unlock (&memdb->mutex);
773
774 cfs_debug("leave %s %016" PRIX64 " (%d)", __func__, *( (uint64_t *) csum), res);
775
776 return res;
777 }
778
779 static int
780 dcdb_deliver(
781 dfsm_t *dfsm,
782 gpointer data,
783 int *res_ptr,
784 uint32_t nodeid,
785 uint32_t pid,
786 uint16_t msg_type,
787 uint32_t msg_time,
788 const void *msg,
789 size_t msg_len)
790 {
791 g_return_val_if_fail(dfsm != NULL, -1);
792 g_return_val_if_fail(msg != NULL, -1);
793
794 memdb_t *memdb = (memdb_t *)data;
795
796 g_return_val_if_fail(memdb != NULL, -1);
797 g_return_val_if_fail(res_ptr != NULL, -1);
798
799 int res = 1;
800
801 int msg_result = -ENOTSUP;
802
803 if (!DCDB_VALID_MESSAGE_TYPE(msg_type))
804 goto unknown;
805
806 cfs_debug("process message %u (length = %zd)", msg_type, msg_len);
807
808 if (!cfs_is_quorate()) {
809 cfs_critical("received write while not quorate - trigger resync");
810 msg_result = -EACCES;
811 goto leave;
812 }
813
814 const char *path, *to, *buf;
815 guint32 size, offset, flags;
816 const guchar *csum;
817
818 if (msg_type == DCDB_MESSAGE_CFS_UNLOCK_REQUEST ||
819 msg_type == DCDB_MESSAGE_CFS_UNLOCK) {
820 msg_result = 0; /* ignored anyways */
821
822 if (!dcdb_parse_unlock_request(msg, msg_len, &path, &csum))
823 goto leave;
824
825 guchar cur_csum[32];
826 memdb_tree_entry_t *te = memdb_getattr(memdb, path);
827
828 if (te && te->type == DT_DIR &&
829 path_is_lockdir(path) && memdb_tree_entry_csum(te, cur_csum) &&
830 (memcmp(csum, cur_csum, 32) == 0)) {
831
832 if (msg_type == DCDB_MESSAGE_CFS_UNLOCK) {
833
834 cfs_debug("got valid unlock message");
835
836 msg_result = memdb_delete(memdb, path, nodeid, msg_time);
837
838 } else if (dfsm_lowest_nodeid(dfsm)) {
839
840 cfs_debug("got valid unlock request message");
841
842 if (memdb_lock_expired(memdb, path, csum)) {
843 cfs_debug("sending unlock message");
844 dcdb_send_unlock(dfsm, path, csum, FALSE);
845 }
846 }
847 }
848 memdb_tree_entry_free(te);
849
850 } else if (msg_type == DCDB_MESSAGE_CFS_WRITE) {
851
852 if (!dcdb_parse_fuse_message(msg, msg_len, &path, &to, &buf,
853 &size, &offset, &flags))
854 goto leave;
855
856 msg_result = memdb_write(memdb, path, nodeid, msg_time,
857 buf, size, offset, flags);
858
859 if ((msg_result >= 0) && !strcmp(path, "corosync.conf"))
860 dcdb_sync_corosync_conf(memdb, dfsm_nodeid_is_local(dfsm, nodeid, pid));
861
862 } else if (msg_type == DCDB_MESSAGE_CFS_CREATE) {
863
864 if (!dcdb_parse_fuse_message(msg, msg_len, &path, &to, &buf,
865 &size, &offset, &flags))
866 goto leave;
867
868 msg_result = memdb_create(memdb, path, nodeid, msg_time);
869
870 if ((msg_result >= 0) && !strcmp(path, "corosync.conf"))
871 dcdb_sync_corosync_conf(memdb, dfsm_nodeid_is_local(dfsm, nodeid, pid));
872
873 } else if (msg_type == DCDB_MESSAGE_CFS_MKDIR) {
874
875 if (!dcdb_parse_fuse_message(msg, msg_len, &path, &to, &buf,
876 &size, &offset, &flags))
877 goto leave;
878
879 msg_result = memdb_mkdir(memdb, path, nodeid, msg_time);
880
881 } else if (msg_type == DCDB_MESSAGE_CFS_DELETE) {
882
883 if (!dcdb_parse_fuse_message(msg, msg_len, &path, &to, &buf,
884 &size, &offset, &flags))
885 goto leave;
886
887 msg_result = memdb_delete(memdb, path, nodeid, msg_time);
888
889 } else if (msg_type == DCDB_MESSAGE_CFS_RENAME) {
890
891 if (!dcdb_parse_fuse_message(msg, msg_len, &path, &to, &buf,
892 &size, &offset, &flags))
893 goto leave;
894
895 msg_result = memdb_rename(memdb, path, to, nodeid, msg_time);
896
897 if ((msg_result >= 0) && !strcmp(to, "corosync.conf"))
898 dcdb_sync_corosync_conf(memdb, dfsm_nodeid_is_local(dfsm, nodeid, pid));
899
900 } else if (msg_type == DCDB_MESSAGE_CFS_MTIME) {
901
902 if (!dcdb_parse_fuse_message(msg, msg_len, &path, &to, &buf,
903 &size, &offset, &flags))
904 goto leave;
905
906 /* Note: mtime is sent via offset field */
907 msg_result = memdb_mtime(memdb, path, nodeid, offset);
908
909 } else {
910 goto unknown;
911 }
912
913 *res_ptr = msg_result;
914 ret:
915 if (memdb->errors) {
916 dfsm_set_errormode(dfsm);
917 res = -1;
918 }
919
920 cfs_debug("leave %s (%d)", __func__, res);
921
922 return res;
923
924 unknown:
925 cfs_critical("received unknown message type (msg_type == %u)", msg_type);
926 leave:
927 res = -1;
928 goto ret;
929
930 };
931
932 static dfsm_callbacks_t dcdb_dfsm_callbacks = {
933 .dfsm_deliver_fn = dcdb_deliver,
934 .dfsm_get_state_fn = dcdb_get_state,
935 .dfsm_process_state_update_fn = dcdb_process_state_update,
936 .dfsm_process_update_fn = dcdb_process_update,
937 .dfsm_commit_fn = dcdb_commit,
938 .dfsm_cleanup_fn = dcdb_cleanup,
939 .dfsm_checksum_fn = dcdb_checksum,
940 };
941
942 dfsm_t *dcdb_new(memdb_t *memdb)
943 {
944 g_return_val_if_fail(memdb != NULL, NULL);
945
946 return dfsm_new(memdb, DCDB_CPG_GROUP_NAME, G_LOG_DOMAIN,
947 DCDB_PROTOCOL_VERSION, &dcdb_dfsm_callbacks);
948 }