]> git.proxmox.com Git - pve-cluster.git/blame - data/src/memdb.c
correctly use 'from' instead of 'to'
[pve-cluster.git] / data / src / memdb.c
CommitLineData
fe000966
DM
1/*
2 Copyright (C) 2010 Proxmox Server Solutions GmbH
3
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU Affero General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU Affero General Public License for more details.
13
14 You should have received a copy of the GNU Affero General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>.
16
17 Author: Dietmar Maurer <dietmar@proxmox.com>
18
19*/
20
21#ifdef HAVE_CONFIG_H
22#include <config.h>
23#endif /* HAVE_CONFIG_H */
24
25#include <stdio.h>
26#include <stdlib.h>
27#include <sys/types.h>
28#include <sys/stat.h>
29#include <fcntl.h>
30#include <sys/file.h>
31#include <unistd.h>
32#include <dirent.h>
33#include <string.h>
34#include <errno.h>
35#include <glib.h>
36
37#include "cfs-utils.h"
38#include "memdb.h"
39#include "status.h"
40
41#define CFS_LOCK_TIMEOUT (60*2)
42
43memdb_tree_entry_t *
44memdb_tree_entry_new(
45 const char *name)
46{
47 g_return_val_if_fail(name != NULL, NULL);
48
49 memdb_tree_entry_t *te = g_malloc0(sizeof(memdb_tree_entry_t) + strlen(name) + 1);
50 g_return_val_if_fail(te != NULL, NULL);
51
52 strcpy(te->name, name);
53
54 return te;
55}
56
57memdb_tree_entry_t *
58memdb_tree_entry_copy(
59 memdb_tree_entry_t *te,
60 gboolean with_data)
61{
62 g_return_val_if_fail(te != NULL, NULL);
63
64 memdb_tree_entry_t *cpy = memdb_tree_entry_new(te->name);
65
66 cpy->parent = te->parent;
67 cpy->inode = te->inode;
68 cpy->version = te->version;
69 cpy->writer = te->writer;
70 cpy->mtime = te->mtime;
71 cpy->type = te->type;
72 cpy->size = te->size;
73
74 if (with_data && te->size && te->type == DT_REG) {
75 cpy->data.value = g_memdup(te->data.value, te->size);
76 } else {
77 cpy->data.value = NULL;
78 }
79
80 return cpy;
81}
82
83void
84memdb_tree_entry_free(
85 memdb_tree_entry_t *te)
86{
87 g_return_if_fail(te != NULL);
88
89 if (te->type == DT_REG) {
90 if (te->data.value)
91 g_free(te->data.value);
92 }
93
94 if (te->type == DT_DIR) {
95 if (te->data.entries)
96 g_hash_table_destroy(te->data.entries);
97 }
98
99 g_free(te);
100}
101
102void
103memdb_lock_info_free(memdb_lock_info_t *li)
104{
105 g_return_if_fail(li != NULL);
106
107 if (li->path)
108 g_free(li->path);
109
110 g_free(li);
111}
112
113static gint
114memdb_tree_compare(
115 gconstpointer v1,
116 gconstpointer v2)
117{
118 guint64 a = ((const memdb_tree_entry_t *)v1)->inode;
119 guint64 b = ((const memdb_tree_entry_t *)v2)->inode;
120
121 if (a == b)
122 return 0;
123
124 if (a > b)
125 return 1;
126
127 return -1;
128}
129
130static void
131split_path(
132 const char *path,
133 char **dirname,
134 char **basename)
135{
136 char *dup = g_strdup (path);
137 int len = strlen (dup) - 1;
138 while (len >= 0 && dup[len] == '/') dup[len--] = 0;
139
140 char *dn = g_path_get_dirname (dup);
141 char *bn = g_path_get_basename (dup);
142
143 g_free (dup);
144
145 *dirname = dn;
146 *basename = bn;
147}
148
149static memdb_tree_entry_t *
150memdb_lookup_dir_entry(
151 memdb_t *memdb,
152 const char *name,
153 memdb_tree_entry_t *parent)
154{
155
156 g_return_val_if_fail(memdb != NULL, NULL);
157 g_return_val_if_fail(name != NULL, NULL);
158 g_return_val_if_fail(parent != NULL, NULL);
159 g_return_val_if_fail(parent->type == DT_DIR, NULL);
160
161 GHashTable *ht = parent->data.entries;
162
163 g_return_val_if_fail(ht != NULL, NULL);
164
165 return g_hash_table_lookup(ht, name);
166}
167
168static memdb_tree_entry_t *
169memdb_lookup_path(
170 memdb_t *memdb,
171 const char *path,
172 memdb_tree_entry_t **parent)
173{
174 g_return_val_if_fail(memdb != NULL, NULL);
175 g_return_val_if_fail(path != NULL, NULL);
176 g_return_val_if_fail(parent != NULL, NULL);
177
178 memdb_tree_entry_t *cdir = memdb->root;
179 *parent = NULL;
180
181 if (path[0] == 0 || ((path[0] == '.' || path[0] == '/') && path[1] == 0))
182 return cdir;
183
184 gchar **set = g_strsplit_set(path, "/", 0);
185
186 int i = 0;
187 char *name;
188
189 while ((name = set[i++])) {
190
191 if (name[0] == 0) continue;
192
193 *parent = cdir;
194 if ((cdir = memdb_lookup_dir_entry(memdb, name, cdir)) == NULL)
195 break;
196 }
197
198 g_strfreev(set);
199
200 return cdir;
201}
202
203
204static gboolean
205name_is_vm_config(
206 const char *name,
207 guint32 *vmid_ret)
208{
209 if (!name || name[0] < '1' || name[0] > '9')
210 return FALSE;
211
212 char *end = NULL;
213 guint32 vmid = strtoul(name, &end, 10);
214
215 if (!end || end[0] != '.' || end[1] != 'c'|| end[2] != 'o' || end[3] != 'n' ||
216 end[4] != 'f' || end[5] != 0)
217 return FALSE;
218
219 if (vmid_ret)
220 *vmid_ret = vmid;
221
222 return TRUE;
223}
224
225static gboolean
226valid_nodename(
227 const char *nodename)
228{
229 g_return_val_if_fail(nodename != NULL, FALSE);
230
231 /* LDH rule (letters, digits, hyphen) */
232
233 int len = strlen(nodename);
234 for (int i = 0; i < len; i ++) {
235 char c = nodename[i];
236 if ((c >= 'A' && c <= 'Z') ||
237 (c >= 'a' && c <= 'z') ||
238 (i != 0 && c >= '0' && c <= '9') ||
239 (i != 0 && i != (len-1) && c == '-'))
240 continue;
241 return FALSE;
242 }
243
244 return TRUE;
245}
246
247static char*
248dir_contain_vm_config(
249 const char *dirname,
250 int *vmtype_ret)
251{
252 if (!dirname)
253 return NULL;
254
255 if (strncmp(dirname, "nodes/", 6) != 0)
256 return NULL;
257
258 dirname += 6;
259
260 char *nodename = NULL;
261
262 char **sa = g_strsplit(dirname, "/", 2);
263 if (sa[0] && sa[1] && valid_nodename(sa[0])) {
264 if (strcmp(sa[1], "qemu-server") == 0) {
265 *vmtype_ret = VMTYPE_QEMU;
266 nodename = g_strdup(sa[0]);
267 } else if (strcmp(sa[1], "openvz") == 0) {
268 *vmtype_ret = VMTYPE_OPENVZ;
269 nodename = g_strdup(sa[0]);
270 }
271 }
272
273 g_strfreev(sa);
274
275 return nodename;
276}
277
278static char *
279path_contain_vm_config(
280 const char *path,
281 int *vmtype_ret,
282 guint32 *vmid_ret)
283{
284 if (!path)
285 return NULL;
286
287 char *dirname = NULL;
288 char *base = NULL;
289 char *nodename = NULL;
290
291 split_path(path, &dirname, &base);
292
293 if (name_is_vm_config(base, vmid_ret))
294 nodename = dir_contain_vm_config(dirname, vmtype_ret);
295
296 if (dirname) g_free (dirname);
297 if (base) g_free (base);
298
299 return nodename;
300}
301
302static gboolean
303vmlist_add_dir(
304 memdb_t *memdb,
305 GHashTable *vmlist,
306 const char *nodename,
307 const int vmtype,
308 memdb_tree_entry_t *subdir)
309{
310 g_return_val_if_fail(memdb != NULL, FALSE);
311 g_return_val_if_fail(vmlist != NULL, FALSE);
312 g_return_val_if_fail(subdir != NULL, FALSE);
313 g_return_val_if_fail(subdir->type == DT_DIR, FALSE);
314 g_return_val_if_fail(subdir->data.entries != NULL, FALSE);
315
316 gboolean ret = TRUE;
317
318 GHashTable *ht = subdir->data.entries;
319 GHashTableIter iter;
320 gpointer key, value;
321
322 g_hash_table_iter_init (&iter, ht);
323
324 while (g_hash_table_iter_next (&iter, &key, &value)) {
325
326 memdb_tree_entry_t *node_te = (memdb_tree_entry_t *)value;
327
328 if (node_te->type != DT_REG)
329 continue;
330
331 guint32 vmid = 0;
332 if (!name_is_vm_config(node_te->name, &vmid))
333 continue;
334
335 if (!vmlist_hash_insert_vm(vmlist, vmtype, vmid, nodename, FALSE))
336 ret = FALSE;
337 }
338
339 return ret;
340}
341
342
343gboolean
344memdb_lock_expired(
345 memdb_t *memdb,
346 const char *path,
347 const guchar csum[32])
348{
349 g_return_val_if_fail(memdb != NULL, FALSE);
350 g_return_val_if_fail(memdb->locks != NULL, FALSE);
351 g_return_val_if_fail(path != NULL, FALSE);
352 g_return_val_if_fail(csum != NULL, FALSE);
353
354 memdb_lock_info_t *li;
355 uint32_t ctime = time(NULL);
356
357 if ((li = g_hash_table_lookup(memdb->locks, path))) {
358 if (memcmp(csum, li->csum, 32) != 0) {
359 li->ltime = ctime;
360 memcpy(li->csum, csum, 32);
361 g_critical("wrong lock csum - reset timeout");
362 return FALSE;
363 }
364 if ((ctime > li->ltime) && ((ctime - li->ltime) > CFS_LOCK_TIMEOUT))
365 return TRUE;
366 } else {
367 li = g_new0(memdb_lock_info_t, 1);
368 li->path = g_strdup(path);
369 li->ltime = ctime;
370 memcpy(li->csum, csum, 32);
371 g_hash_table_replace(memdb->locks, li->path, li);
372 }
373
374 return FALSE;
375}
376
377void
378memdb_update_locks(memdb_t *memdb)
379{
380 g_return_if_fail(memdb != NULL);
381 g_return_if_fail(memdb->locks != NULL);
382
383 memdb_tree_entry_t *te, *parent;
384
385 if (!(te = memdb_lookup_path(memdb, "priv/lock", &parent)))
386 return;
387
388 if (te->type != DT_DIR)
389 return;
390
391
392 GHashTable *old = memdb->locks;
393 memdb->locks = g_hash_table_new_full(g_str_hash, g_str_equal, NULL,
394 (GDestroyNotify)memdb_lock_info_free);
395 GHashTableIter iter;
396 GHashTable *ht = te->data.entries;
397
398 gpointer key, value;
399
400 g_hash_table_iter_init (&iter, ht);
401 while (g_hash_table_iter_next (&iter, &key, &value)) {
402
403 memdb_tree_entry_t *lock_te = (memdb_tree_entry_t *)value;
404 if (lock_te->type != DT_DIR)
405 continue;
406
407 memdb_lock_info_t *li;
408 li = g_new0(memdb_lock_info_t, 1);
409 li->path = g_strdup_printf("priv/lock/%s", lock_te->name);
410
411 guchar csum[32];
412 if (memdb_tree_entry_csum(lock_te, csum)) {
413 memcpy(li->csum, csum, 32);
414 memdb_lock_info_t *oldli;
415 if ((oldli = g_hash_table_lookup(memdb->locks, lock_te->name)) &&
416 (memcmp(csum, oldli->csum, 32) == 0)) {
417 li->ltime = oldli->ltime;
418 } else {
419 li->ltime = time(NULL);
420 }
421 g_hash_table_insert(memdb->locks, li->path, li);
422 } else {
423 memdb_lock_info_free(li);
424 }
425 }
426
427 if (old)
428 g_hash_table_destroy(old);
429
430}
431
432gboolean
433memdb_recreate_vmlist(
434 memdb_t *memdb)
435{
436 g_return_val_if_fail(memdb != NULL, FALSE);
437
438 memdb_tree_entry_t *te, *parent;
439
440 if (!(te = memdb_lookup_path(memdb, "nodes", &parent)))
441 return TRUE;
442
443 if (te->type != DT_DIR)
444 return TRUE;
445
446 GHashTable *vmlist = vmlist_hash_new();
447
448 GHashTable *ht = te->data.entries;
449
450 gboolean ret = TRUE;
451
452 GHashTableIter iter;
453 gpointer key, value;
454
455 g_hash_table_iter_init (&iter, ht);
456
457 while (g_hash_table_iter_next (&iter, &key, &value)) {
458
459 memdb_tree_entry_t *node_te = (memdb_tree_entry_t *)value;
460 if (node_te->type != DT_DIR)
461 continue;
462
463 if (!valid_nodename(node_te->name))
464 continue;
465
466 if ((te = g_hash_table_lookup(node_te->data.entries, "qemu-server"))) {
467 if (!vmlist_add_dir(memdb, vmlist, node_te->name, VMTYPE_QEMU, te))
468 ret = FALSE;
469 }
470 if ((te = g_hash_table_lookup(node_te->data.entries, "openvz"))) {
471 if (!vmlist_add_dir(memdb, vmlist, node_te->name, VMTYPE_OPENVZ, te))
472 ret = FALSE;
473 }
474 }
475
476 /* always update list - even if we detected duplicates */
477 cfs_status_set_vmlist(vmlist);
478
479 return ret;
480}
481
482memdb_t *
483memdb_open(const char *dbfilename)
484{
485 memdb_t *memdb = g_new0(memdb_t, 1);
486
487 memdb->mutex = g_mutex_new();
488
489 memdb->dbfilename = g_strdup(dbfilename);
490
491 memdb->root = memdb_tree_entry_new("");
492 memdb->root->data.entries = g_hash_table_new(g_str_hash, g_str_equal);
493 memdb->root->type = DT_DIR;
494
495 memdb->index = g_hash_table_new_full(g_int64_hash, g_int64_equal, NULL,
496 (GDestroyNotify)memdb_tree_entry_free);
497
498 g_hash_table_replace(memdb->index, &memdb->root->inode, memdb->root);
499
500 memdb->locks = g_hash_table_new_full(g_str_hash, g_str_equal, NULL,
501 (GDestroyNotify)memdb_lock_info_free);
502
503 if (!(memdb->bdb = bdb_backend_open(dbfilename, memdb->root, memdb->index))) {
504 memdb_close(memdb);
505 return NULL;
506 }
507
508 record_memdb_reload();
509
510 if (!memdb_recreate_vmlist(memdb)) {
511 memdb_close(memdb);
512 return NULL;
513 }
514
515 memdb_update_locks(memdb);
516
517 cfs_debug("memdb open '%s' successful (version = %016zX)",
518 dbfilename, memdb->root->version);
519
520 return memdb;
521}
522
523void
524memdb_close(memdb_t *memdb)
525{
526 g_return_if_fail(memdb != NULL);
527
528 g_mutex_lock (memdb->mutex);
529
530 if (memdb->bdb)
531 bdb_backend_close(memdb->bdb);
532
533 if (memdb->index)
534 g_hash_table_destroy(memdb->index);
535
536 if (memdb->locks)
537 g_hash_table_destroy(memdb->locks);
538
539 if (memdb->dbfilename)
540 g_free(memdb->dbfilename);
541
542 memdb->index = NULL;
543 memdb->bdb = NULL;
544 memdb->dbfilename = NULL;
545
546 g_mutex_unlock (memdb->mutex);
547
548 g_mutex_free(memdb->mutex);
549
550 g_free(memdb);
551}
552
553int memdb_mkdir(
554 memdb_t *memdb,
555 const char *path,
556 guint32 writer,
557 guint32 mtime)
558{
559 g_return_val_if_fail(memdb != NULL, -EINVAL);
560 g_return_val_if_fail(path != NULL, -EINVAL);
561
562 int ret = -EACCES;
563
564 char *dirname = NULL;
565 char *base = NULL;
566
567 g_mutex_lock (memdb->mutex);
568
569 if (memdb->errors) {
570 ret = -EIO;
571 goto ret;
572 }
573
574 split_path(path, &dirname, &base);
575
576 memdb_tree_entry_t *parent, *unused;
577
578 if (!(parent = memdb_lookup_path(memdb, dirname, &unused))) {
579 ret = -ENOENT;
580 goto ret;
581 }
582
583 if (parent->type != DT_DIR) {
584 ret = -ENOTDIR;
585 goto ret;
586 }
587
588 /* do not allow '.' and '..' */
589 if ((base[0] == 0) ||
590 (base[0] == '.' && base[1] == 0) ||
591 (base[0] == '.' && base[1] == '.' && base[2] == 0)) {
592 ret = -EACCES;
593 goto ret;
594 }
595
596 memdb_tree_entry_t *te;
597 if ((te = memdb_lookup_dir_entry(memdb, base, parent))) {
598 ret = -EEXIST;
599 goto ret;
600 }
601
602 memdb->root->version++;
603 memdb->root->mtime = mtime;
604 memdb->root->writer = writer;
605
606 te = memdb_tree_entry_new(base);
607 te->parent = parent->inode;
608 te->data.entries = g_hash_table_new(g_str_hash, g_str_equal);
609 te->inode = te->version = memdb->root->version;
610 te->writer = writer;
611 te->type = DT_DIR;
612 te->mtime = mtime;
613
614 g_hash_table_replace(parent->data.entries, te->name, te);
615 g_hash_table_replace(memdb->index, &te->inode, te);
616
617 cfs_debug("memdb_mkdir %s %s %016zX", dirname, base, memdb->root->version);
618
619 if (bdb_backend_write(memdb->bdb, te->inode, te->parent, te->version,
620 te->writer, te->mtime, 0, DT_DIR, te->name, NULL, 0)) {
621 memdb->errors = 1;
622 ret = -EIO;
623 goto ret;
624 }
625
626 if (strcmp(dirname, "priv/lock") == 0) {
627 g_hash_table_remove(memdb->locks, path);
628 guchar csum[32];
629 if (memdb_tree_entry_csum(te, csum)) {
630 memdb_lock_expired(memdb, path, csum); // insert a new entry
631 }
632 }
633
634 ret = 0;
635
636 ret:
637 g_mutex_unlock (memdb->mutex);
638
639 if (dirname) g_free (dirname);
640 if (base) g_free (base);
641
642 return ret;
643}
644
645int
646memdb_read(
647 memdb_t *memdb,
648 const char *path,
649 gpointer *data_ret)
650{
651 g_return_val_if_fail(memdb != NULL, -EINVAL);
652 g_return_val_if_fail(path != NULL, -EINVAL);
653 g_return_val_if_fail(data_ret != NULL, -EINVAL);
654
655 memdb_tree_entry_t *te, *parent;
656
657 g_mutex_lock (memdb->mutex);
658
659 if ((te = memdb_lookup_path(memdb, path, &parent))) {
660 if (te->type == DT_REG) {
661 *data_ret = g_memdup(te->data.value, te->size);
662 guint32 size = te->size;
663 g_mutex_unlock (memdb->mutex);
664 return size;
665 }
666 }
667
668 g_mutex_unlock (memdb->mutex);
669
670 return -ENOENT;
671}
672
673static int
674memdb_pwrite(
675 memdb_t *memdb,
676 const char *path,
677 guint32 writer,
678 guint32 mtime,
679 gconstpointer data,
680 size_t count,
681 off_t offset,
682 gboolean truncate,
683 gboolean create)
684{
685 g_return_val_if_fail(memdb != NULL, -EINVAL);
686 g_return_val_if_fail(path != NULL, -EINVAL);
687 g_return_val_if_fail(count == 0 || data != NULL, -EINVAL);
688
689 int ret = -EACCES;
690
691 char *dirname = NULL;
692 char *base = NULL;
693 char *nodename = NULL;
694
695 g_mutex_lock (memdb->mutex);
696
697 if (memdb->errors) {
698 ret = -EIO;
699 goto ret;
700 }
701
702 if ((offset + count) > MEMDB_MAX_FILE_SIZE) {
703 ret = -EFBIG;
704 goto ret;
705 }
706
707 split_path(path, &dirname, &base);
708
709 memdb_tree_entry_t *parent, *unused;
710 if (!(parent = memdb_lookup_path(memdb, dirname, &unused))) {
711 ret = -ENOENT;
712 goto ret;
713 }
714 if (parent->type != DT_DIR) {
715 ret = -ENOTDIR;
716 goto ret;
717 }
718
719 /* do not allow '.' and '..' */
720 if ((base[0] == 0) ||
721 (base[0] == '.' && base[1] == 0) ||
722 (base[0] == '.' && base[1] == '.' && base[2] == 0)) {
723 ret = -EACCES;
724 goto ret;
725 }
726
727 guint32 vmid = 0;
728 int vmtype = 0;
729
730 if ((nodename = path_contain_vm_config(path, &vmtype, &vmid))) {
731 if (vmlist_different_vm_exists(vmtype, vmid, nodename)) {
732 ret = -EEXIST;
733 goto ret;
734 }
735 }
736
737 gpointer olddata = NULL;
738
739 memdb_tree_entry_t *te, *old;
740 if ((old = te = memdb_lookup_dir_entry(memdb, base, parent))) {
741 if (te->type != DT_REG) {
742 ret = -ENOTDIR;
743 goto ret;
744 }
745
746 if (create) {
747 ret = -EEXIST;
748 goto ret;
749 }
750
751 memdb->root->version++;
752 memdb->root->mtime = mtime;
753 memdb->root->writer = writer;
754
755 olddata = te->data.value;
756 } else {
757
758 if (!create) {
759 ret = -ENOENT;
760 goto ret;
761 }
762
763 memdb->root->version++;
764 memdb->root->mtime = mtime;
765 memdb->root->writer = writer;
766
767 te = memdb_tree_entry_new(base);
768 te->parent = parent->inode;
769 te->type = DT_REG;
770 te->inode = memdb->root->version;
771 }
772
773 te->version = memdb->root->version;
774 te->writer = writer;
775 te->mtime = mtime;
776
777 size_t newsize = offset + count;
778
779 gpointer newdata = NULL;
780
781 if (olddata) {
782
783 if (newsize > te->size) {
784 newdata = g_malloc0(newsize);
785 memcpy(newdata, olddata, te->size);
786
787 } else {
788
789 if (!truncate) {
790 newsize = te->size;
791 }
792 newdata = g_malloc0(newsize);
793 memcpy(newdata, olddata, newsize);
794 }
795
796 if (count && data)
797 memcpy(newdata + offset, data, count);
798
799 } else {
800
801 if (count && data) {
802 newdata = g_malloc0(newsize);
803 memcpy(newdata + offset, data, count);
804 }
805 }
806
807 te->size = newsize;
808 te->data.value = newdata;
809
810 g_free(olddata);
811
812 if (!old) {
813 g_hash_table_replace(parent->data.entries, te->name, te);
814 g_hash_table_replace(memdb->index, &te->inode, te);
815 }
816
817 record_memdb_change(path);
818
819 cfs_debug("memdb_pwrite %s %s %016zX %016zX", dirname, te->name, te->inode, te->version);
820
821 if (bdb_backend_write(memdb->bdb, te->inode, te->parent, te->version,
822 te->writer, te->mtime, te->size, te->type, te->name,
823 te->data.value, 0)) {
824 memdb->errors = 1;
825 ret = -EIO;
826 goto ret;
827 }
828
829 if (nodename)
830 vmlist_register_vm(vmtype, vmid, nodename);
831
832 ret = count;
833
834 ret:
835 g_mutex_unlock (memdb->mutex);
836
837 if (nodename) g_free (nodename);
838 if (dirname) g_free (dirname);
839 if (base) g_free (base);
840
841 return ret;
842}
843
844int
845memdb_mtime(
846 memdb_t *memdb,
847 const char *path,
848 guint32 writer,
849 guint32 mtime)
850{
851 g_return_val_if_fail(memdb != NULL, -EINVAL);
852 g_return_val_if_fail(path != NULL, -EINVAL);
853
854 int ret = -EACCES;
855
856 char *dirname = NULL;
857 char *base = NULL;
858
859 g_mutex_lock (memdb->mutex);
860
861 if (memdb->errors) {
862 ret = -EIO;
863 goto ret;
864 }
865
866 split_path(path, &dirname, &base);
867
868 memdb_tree_entry_t *parent, *unused;
869 if (!(parent = memdb_lookup_path(memdb, dirname, &unused))) {
870 ret = -ENOENT;
871 goto ret;
872 }
873 if (parent->type != DT_DIR) {
874 ret = -ENOTDIR;
875 goto ret;
876 }
877
878 /* do not allow '.' and '..' */
879 if ((base[0] == 0) ||
880 (base[0] == '.' && base[1] == 0) ||
881 (base[0] == '.' && base[1] == '.' && base[2] == 0)) {
882 ret = -EACCES;
883 goto ret;
884 }
885
886 memdb_tree_entry_t *te;
887 if (!(te = memdb_lookup_dir_entry(memdb, base, parent))) {
888 ret = -ENOENT;
889 goto ret;
890 }
891
892 /* NOTE: we use utime(0,0) to trigger 'unlock', so we do not
893 * allow to change mtime for locks (only it mtime is newer).
894 * See README for details about locks.
895 */
896 if (mtime < te->mtime && te->type == DT_DIR &&
897 strcmp(dirname, "priv/lock") == 0) {
898 cfs_debug("dir is locked");
899 ret = -EACCES;
900 goto ret;
901 }
902
903
904 memdb->root->version++;
905 memdb->root->mtime = mtime;
906 memdb->root->writer = writer;
907
908 te->version = memdb->root->version;
909 te->writer = writer;
910 te->mtime = mtime;
911
912 record_memdb_change(path);
913
914 cfs_debug("memdb_mtime %s %s %016zX %016zX", dirname, te->name, te->inode, te->version);
915
916 if (bdb_backend_write(memdb->bdb, te->inode, te->parent, te->version,
917 te->writer, te->mtime, te->size, te->type, te->name,
918 te->data.value, 0)) {
919 memdb->errors = 1;
920 ret = -EIO;
921 goto ret;
922 }
923
924 ret = 0;
925
926 ret:
927 g_mutex_unlock (memdb->mutex);
928
929 if (dirname) g_free (dirname);
930 if (base) g_free (base);
931
932 return ret;
933}
934
935int
936memdb_create(
937 memdb_t *memdb,
938 const char *path,
939 guint32 writer,
940 guint32 mtime)
941{
942 return memdb_pwrite(memdb, path, writer, mtime, NULL, 0, 0, FALSE, TRUE);
943}
944
945int
946memdb_write(
947 memdb_t *memdb,
948 const char *path,
949 guint32 writer,
950 guint32 mtime,
951 gconstpointer data,
952 size_t count,
953 off_t offset,
954 gboolean truncate)
955{
956 return memdb_pwrite(memdb, path, writer, mtime, data, count, offset, truncate, FALSE);
957}
958
959memdb_tree_entry_t *
960memdb_getattr(
961 memdb_t *memdb,
962 const char *path)
963{
964 memdb_tree_entry_t *te, *parent;
965
966 g_mutex_lock (memdb->mutex);
967
968 if ((te = memdb_lookup_path(memdb, path, &parent))) {
969
970 memdb_tree_entry_t *cpy = memdb_tree_entry_copy(te, 0);
971
972 g_mutex_unlock (memdb->mutex);
973
974 return cpy;
975 }
976
977 g_mutex_unlock (memdb->mutex);
978
979 return NULL;
980}
981
982GList *
983memdb_readdir(
984 memdb_t *memdb,
985 const char *path)
986{
987 g_return_val_if_fail(memdb != NULL, NULL);
988 g_return_val_if_fail(path != NULL, NULL);
989 g_assert(memdb->mutex != NULL);
990
991 memdb_tree_entry_t *te, *parent;
992
993 GList *list = NULL;
994
995 g_mutex_lock (memdb->mutex);
996
997 if (!(te = memdb_lookup_path(memdb, path, &parent)))
998 goto ret;
999
1000 if (te->type != DT_DIR)
1001 goto ret;
1002
1003 GHashTable *ht = te->data.entries;
1004
1005 GHashTableIter iter;
1006 gpointer key, value;
1007
1008 g_hash_table_iter_init (&iter, ht);
1009
1010 while (g_hash_table_iter_next (&iter, &key, &value)) {
1011
1012 te = (memdb_tree_entry_t *)value;
1013
1014 memdb_tree_entry_t *cpy = memdb_tree_entry_copy(te, 0);
1015
1016 list = g_list_append(list, cpy);
1017 }
1018
1019 ret:
1020 g_mutex_unlock (memdb->mutex);
1021
1022 return list;
1023}
1024
1025void
1026memdb_dirlist_free(GList *dirlist)
1027{
1028 GList *l = dirlist;
1029
1030 while (l) {
1031 if (l->data)
1032 g_free (l->data);
1033
1034 l = g_list_next(l);
1035 }
1036
1037 if (dirlist)
1038 g_list_free(dirlist);
1039}
1040
1041static int
1042unlink_tree_entry(
1043 memdb_t *memdb,
1044 memdb_tree_entry_t *parent,
1045 memdb_tree_entry_t *te)
1046{
1047 g_return_val_if_fail(parent != NULL, -EACCES);
1048 g_return_val_if_fail(parent->inode == te->parent, -EACCES);
1049
1050 if (te->type == DT_DIR)
1051 if (g_hash_table_size(te->data.entries))
1052 return -ENOTEMPTY;
1053
1054 if (!g_hash_table_steal(parent->data.entries, te->name)) {
1055 cfs_critical("internal error - can't delete entry");
1056 memdb->errors = 1;
1057 return -EIO;
1058 }
1059
1060 if (!g_hash_table_steal(memdb->index, &te->inode)) {
1061 cfs_critical("internal error - can't delete entry");
1062 memdb->errors = 1;
1063 return -EIO;
1064 }
1065
1066 return 0;
1067}
1068
1069int
1070memdb_rename(
1071 memdb_t *memdb,
1072 const char *from,
1073 const char *to,
1074 guint32 writer,
1075 guint32 mtime)
1076{
1077 int ret = -EACCES;
1078
1079 char *nodename = NULL;
1080 char *dirname = NULL;
1081 char *base = NULL;
1082
1083 g_mutex_lock (memdb->mutex);
1084
1085 if (memdb->errors) {
1086 ret = -EIO;
1087 goto ret;
1088 }
1089
1090 memdb_tree_entry_t *from_te, *from_parent;
1091 memdb_tree_entry_t *to_te, *to_parent;
1092 memdb_tree_entry_t *target_te, *target_parent;
1093
1094 guint64 delete_inode = 0;
1095
1096 if (!(from_te = memdb_lookup_path(memdb, from, &from_parent))) {
1097 ret = -ENOENT;
1098 goto ret;
1099 }
1100
1101 if (!from_parent) { /* can't rename root */
1102 ret = -EACCES;
1103 goto ret;
1104 }
1105
1106 guint32 vmid = 0;
1107 int vmtype = 0;
1108 guint32 from_vmid = 0;
1109
1110 if (from_te->type == DT_REG && (nodename = path_contain_vm_config(to, &vmtype, &vmid))) {
1111 if (vmlist_vm_exists(vmid)) {
1112 int from_vmtype = 0;
9bb50abd 1113 char *from_node = path_contain_vm_config(from, &from_vmtype, &from_vmid);
fe000966
DM
1114 if (from_node) {
1115 g_free(from_node);
1116 if (!(vmid == from_vmid)) {
1117 ret = -EEXIST;
1118 goto ret;
1119 }
1120
1121 }
1122 }
1123 }
1124
1125 /* we do not allow rename for locks */
1126 if (from_te->type == DT_DIR && path_is_lockdir(from)) {
1127 ret = -EACCES;
1128 goto ret;
1129 }
1130
1131 if ((to_te = memdb_lookup_path(memdb, to, &to_parent))) {
1132
1133 if ((ret = unlink_tree_entry(memdb, to_parent, to_te)) != 0)
1134 goto ret;
1135
1136 base = strdup(to_te->name);
1137
1138 delete_inode = to_te->inode;
1139
1140 target_te = to_parent;
1141
1142 memdb_tree_entry_free(to_te);
1143
1144 } else {
1145
1146 split_path(to, &dirname, &base);
1147
1148 if (!(target_te = memdb_lookup_path(memdb, dirname, &target_parent))) {
1149 ret = -ENOENT;
1150 goto ret;
1151 }
1152
1153 if (target_te->type != DT_DIR) {
1154 ret = -ENOTDIR;
1155 goto ret;
1156 }
1157 }
1158
1159 record_memdb_change(from);
1160 record_memdb_change(to);
1161
1162 /* NOTE: unlink_tree_entry() make sure that we can only
1163 rename emtpy directories */
1164
1165 if ((ret = unlink_tree_entry(memdb, from_parent, from_te)) != 0)
1166 goto ret;
1167
1168 memdb->root->version++;
1169 memdb->root->mtime = mtime;
1170 memdb->root->writer = writer;
1171
1172 memdb_tree_entry_t *new = memdb_tree_entry_new(base);
1173 new->parent = target_te->inode;
1174 new->inode = from_te->inode;
1175 new->version = memdb->root->version;
1176 new->writer = writer;
1177 new->mtime = mtime;
1178 new->size = from_te->size;
1179 new->type = from_te->type;
1180 new->data = from_te->data;
1181
1182 g_free(from_te);
1183
1184 g_hash_table_replace(target_te->data.entries, new->name, new);
1185 g_hash_table_replace(memdb->index, &new->inode, new);
1186
1187 if (bdb_backend_write(memdb->bdb, new->inode, new->parent,
1188 new->version, new->writer, new->mtime,
1189 new->size, new->type, new->name,
1190 new->data.value, delete_inode)) {
1191 memdb->errors = 1;
1192 ret = -EIO;
1193 goto ret;
1194 }
1195
1196 if (new->type == DT_REG) {
1197
1198 if (from_vmid)
1199 vmlist_delete_vm(from_vmid);
1200
1201 if (nodename)
1202 vmlist_register_vm(vmtype, vmid, nodename);
1203
1204 } else if (new->type == DT_DIR) {
1205 /* directories are alwayse empty (see unlink_tree_entry) */
1206 }
1207
1208 ret = 0;
1209
1210 ret:
1211 g_mutex_unlock (memdb->mutex);
1212
1213 if (nodename) g_free (nodename);
1214 if (dirname) g_free (dirname);
1215 if (base) g_free (base);
1216
1217 return ret;
1218}
1219
1220int
1221memdb_delete(
1222 memdb_t *memdb,
1223 const char *path,
1224 guint32 writer,
1225 guint32 mtime)
1226{
1227 memdb_tree_entry_t *te, *parent;
1228
1229 g_mutex_lock (memdb->mutex);
1230
1231 int ret = -EACCES;
1232
1233 if (memdb->errors) {
1234 ret = -EIO;
1235 goto ret;
1236 }
1237
1238 if (!(te = memdb_lookup_path(memdb, path, &parent))) {
1239 ret = -ENOENT;
1240 goto ret;
1241 }
1242
1243 if (!parent) { /* cant remove root */
1244 ret = -EACCES;
1245 goto ret;
1246 }
1247
1248 if (te->type == DT_DIR) {
1249 if (g_hash_table_size(te->data.entries)) {
1250 ret = -ENOTEMPTY;
1251 goto ret;
1252 }
1253
1254 g_hash_table_remove(memdb->locks, path);
1255 }
1256
1257 record_memdb_change(path);
1258
1259 if ((ret = unlink_tree_entry(memdb, parent, te)) != 0)
1260 goto ret;
1261
1262 memdb->root->version++;
1263 memdb->root->mtime = mtime;
1264 memdb->root->writer = writer;
1265
1266 if (bdb_backend_write(memdb->bdb, 0, 0, memdb->root->version, writer, mtime, 0,
1267 DT_REG, NULL, NULL, te->inode)) {
1268 memdb->errors = 1;
1269 memdb_tree_entry_free(te);
1270 ret = -EIO;
1271 goto ret;
1272 }
1273
1274 memdb_tree_entry_free(te);
1275
1276 int vmtype = 0;
1277 guint32 vmid = 0;
1278 char *nodename;
1279 if ((nodename = path_contain_vm_config(path, &vmtype, &vmid))) {
1280 g_free(nodename);
1281 vmlist_delete_vm(vmid);
1282 }
1283
1284 ret = 0;
1285
1286 ret:
1287 g_mutex_unlock (memdb->mutex);
1288
1289 return ret;
1290}
1291
1292int
1293memdb_statfs(
1294 memdb_t *memdb,
1295 struct statvfs *stbuf)
1296{
1297 g_return_val_if_fail(memdb != NULL, -EINVAL);
1298 g_return_val_if_fail(stbuf != NULL, -EINVAL);
1299
1300 g_mutex_lock (memdb->mutex);
1301
1302 GHashTableIter iter;
1303 gpointer key, value;
1304
1305 size_t size = 0;
1306 size_t files = 0;
1307
1308 g_hash_table_iter_init (&iter, memdb->index);
1309
1310 while (g_hash_table_iter_next (&iter, &key, &value)) {
1311 memdb_tree_entry_t *te = (memdb_tree_entry_t *)value;
1312 files++;
1313 size += te->size;
1314 }
1315
1316 g_mutex_unlock (memdb->mutex);
1317
1318 stbuf->f_bsize = MEMDB_BLOCKSIZE;
1319 stbuf->f_blocks = MEMDB_BLOCKS;
1320 stbuf->f_bfree = stbuf->f_bavail = stbuf->f_blocks -
1321 ((size + stbuf->f_bsize - 1)/stbuf->f_bsize);
1322 stbuf->f_files = MEMDB_MAX_INODES;
1323 stbuf->f_ffree = stbuf->f_files - files;
1324
1325 stbuf->f_namemax = 256;
1326
1327 return 0;
1328}
1329
1330void
1331tree_entry_debug(memdb_tree_entry_t *te)
1332{
1333 g_return_if_fail(te != NULL);
1334
1335 // same as tree_entry_print(), but use cfs_debug() instead of g_print()
1336
1337 cfs_debug("%016zX %c %016zX %016zX %08X %08X %08X %s\n",
1338 te->inode, te->type == DT_DIR ? 'D' : 'R', te->parent, te->version,
1339 te->writer, te->mtime, te->size, te->name);
1340}
1341
1342void
1343tree_entry_print(memdb_tree_entry_t *te)
1344{
1345 g_return_if_fail(te != NULL);
1346
1347 g_print("%016zX %c %016zX %016zX %08X %08X %08X %s\n",
1348 te->inode, te->type == DT_DIR ? 'D' : 'R', te->parent, te->version,
1349 te->writer, te->mtime, te->size, te->name);
1350}
1351
1352void
1353memdb_dump(memdb_t *memdb)
1354{
1355 g_return_if_fail(memdb != NULL);
1356
1357 g_mutex_lock (memdb->mutex);
1358
1359 GList *list = g_hash_table_get_values(memdb->index);
1360
1361 list = g_list_sort(list, memdb_tree_compare);
1362
1363 g_print("%16s %c %16s %16s %8s %8s %8s %s\n",
1364 "INODE", 'T', "PARENT", "VERSION", "WRITER", "MTIME", "SIZE", "NAME");
1365
1366 GList *l = list;
1367 while (l) {
1368 memdb_tree_entry_t *te = (memdb_tree_entry_t *)l->data;
1369
1370 tree_entry_print(te);
1371
1372 l = g_list_next(l);
1373 }
1374
1375 g_list_free(list);
1376
1377 g_mutex_unlock (memdb->mutex);
1378}
1379
1380void
1381memdb_dump_index (memdb_index_t *idx)
1382{
1383 g_return_if_fail(idx != NULL);
1384
1385 g_print ("INDEX DUMP %016zX\n", idx->version);
1386
1387 int i;
1388 for (i = 0; i < idx->size; i++) {
1389 g_print ("%016zX %016zX%016zX%016zX%016zX\n", idx->entries[i].inode,
1390 *((guint64 *)idx->entries[i].digest),
1391 *((guint64 *)(idx->entries[i].digest + 8)),
1392 *((guint64 *)(idx->entries[i].digest + 16)),
1393 *((guint64 *)(idx->entries[i].digest + 24)));
1394 }
1395}
1396
1397memdb_index_t *
1398memdb_index_copy(memdb_index_t *idx)
1399{
1400 g_return_val_if_fail(idx != NULL, NULL);
1401
1402 int bytes = sizeof(memdb_index_t) + idx->size*sizeof(memdb_index_extry_t);
1403 if (idx->bytes != bytes) {
1404 cfs_critical("memdb index contains wrong number of bytes");
1405 return NULL;
1406 }
1407
1408 memdb_index_t *copy = (memdb_index_t *)g_memdup(idx, bytes);
1409
1410 return copy;
1411}
1412
1413gboolean
1414memdb_tree_entry_csum(
1415 memdb_tree_entry_t *te,
1416 guchar csum[32])
1417{
1418 g_return_val_if_fail(te != NULL, FALSE);
1419 g_return_val_if_fail(csum != NULL, FALSE);
1420
1421 GChecksum *sha256 = g_checksum_new(G_CHECKSUM_SHA256);
1422
1423 g_checksum_update(sha256, (unsigned char*)&te->inode, sizeof(te->inode));
1424 g_checksum_update(sha256, (unsigned char*)&te->version, sizeof(te->version));
1425 g_checksum_update(sha256, (unsigned char*)&te->writer, sizeof(te->writer));
1426 g_checksum_update(sha256, (unsigned char*)&te->mtime, sizeof(te->mtime));
1427 g_checksum_update(sha256, (unsigned char*)&te->size, sizeof(te->size));
1428 g_checksum_update(sha256, (unsigned char*)&te->type, sizeof(te->type));
1429 g_checksum_update(sha256, (unsigned char*)&te->parent, sizeof(te->parent));
1430 g_checksum_update(sha256, (unsigned char*)te->name, strlen(te->name));
1431
1432 if (te->type == DT_REG && te->size)
1433 g_checksum_update(sha256, (unsigned char*)te->data.value, te->size);
1434
1435 size_t csum_len = 32;
1436 g_checksum_get_digest(sha256, csum, &csum_len);
1437 g_checksum_free(sha256);
1438
1439 return TRUE;
1440}
1441
1442gboolean
1443memdb_compute_checksum(
1444 GHashTable *index,
1445 memdb_tree_entry_t *root,
1446 guchar *csum,
1447 size_t csum_len)
1448{
1449 g_return_val_if_fail(index != NULL, FALSE);
1450 g_return_val_if_fail(root != NULL, FALSE);
1451
1452 GChecksum *sha256 = g_checksum_new(G_CHECKSUM_SHA256);
1453
1454 GList *list = g_hash_table_get_values(index);
1455
1456 list = g_list_sort(list, memdb_tree_compare);
1457
1458 GList *l = list;
1459 while (l) {
1460 memdb_tree_entry_t *te = (memdb_tree_entry_t *)l->data;
1461
1462 g_checksum_update(sha256, (unsigned char*)&te->inode, sizeof(te->inode));
1463 g_checksum_update(sha256, (unsigned char*)&te->version, sizeof(te->version));
1464 g_checksum_update(sha256, (unsigned char*)&te->writer, sizeof(te->writer));
1465 g_checksum_update(sha256, (unsigned char*)&te->mtime, sizeof(te->mtime));
1466 g_checksum_update(sha256, (unsigned char*)&te->size, sizeof(te->size));
1467 g_checksum_update(sha256, (unsigned char*)&te->type, sizeof(te->type));
1468 g_checksum_update(sha256, (unsigned char*)&te->parent, sizeof(te->parent));
1469 g_checksum_update(sha256, (unsigned char*)te->name, strlen(te->name));
1470
1471 if (te->type == DT_REG && te->size)
1472 g_checksum_update(sha256, (unsigned char*)te->data.value, te->size);
1473
1474 l = g_list_next(l);
1475 }
1476
1477 g_list_free(list);
1478
1479 g_checksum_get_digest(sha256, csum, &csum_len);
1480
1481 cfs_debug("checksum: %s", g_checksum_get_string(sha256));
1482
1483 g_checksum_free(sha256);
1484
1485 return TRUE;
1486}
1487
1488memdb_index_t *
1489memdb_encode_index(
1490 GHashTable *index,
1491 memdb_tree_entry_t *root)
1492{
1493 g_return_val_if_fail(index != NULL, NULL);
1494 g_return_val_if_fail(root != NULL, NULL);
1495
1496 memdb_index_t *idx = NULL;
1497
1498 int count = g_hash_table_size(index);
1499 if (!count) {
1500 cfs_critical("memdb index has no entires");
1501 return NULL;
1502 }
1503
1504 int bytes = sizeof(memdb_index_t) + count*sizeof(memdb_index_extry_t);
1505 idx = g_malloc0(bytes);
1506
1507 idx->size = count;
1508 idx->bytes = bytes;
1509 idx->version = root->version;
1510 idx->mtime = root->mtime;
1511 idx->writer = root->writer;
1512
1513 GChecksum *sha256 = g_checksum_new(G_CHECKSUM_SHA256);
1514
1515 GList *list = g_hash_table_get_values(index);
1516
1517 list = g_list_sort(list, memdb_tree_compare);
1518
1519 int ind = 0;
1520 GList *l = list;
1521 while (l) {
1522 memdb_tree_entry_t *te = (memdb_tree_entry_t *)l->data;
1523
1524 if (te->inode > idx->last_inode)
1525 idx->last_inode = te->inode;
1526
1527 idx->entries[ind].inode = te->inode;
1528
1529 g_checksum_reset (sha256);
1530
1531 g_checksum_update(sha256, (unsigned char*)&te->version, sizeof(te->version));
1532 g_checksum_update(sha256, (unsigned char*)&te->writer, sizeof(te->writer));
1533 g_checksum_update(sha256, (unsigned char*)&te->mtime, sizeof(te->mtime));
1534 g_checksum_update(sha256, (unsigned char*)&te->size, sizeof(te->size));
1535 g_checksum_update(sha256, (unsigned char*)&te->type, sizeof(te->type));
1536 g_checksum_update(sha256, (unsigned char*)&te->parent, sizeof(te->parent));
1537 g_checksum_update(sha256, (unsigned char*)te->name, strlen(te->name));
1538
1539 if (te->type == DT_REG && te->size)
1540 g_checksum_update(sha256, (unsigned char*)te->data.value, te->size);
1541
1542 gsize len = 32;
1543 g_checksum_get_digest(sha256, (guint8 *)idx->entries[ind].digest, &len);
1544
1545 ind++;
1546
1547 l = g_list_next(l);
1548 }
1549
1550 g_list_free(list);
1551
1552 g_checksum_free(sha256);
1553
1554 return idx;
1555}