]> git.proxmox.com Git - pve-cluster.git/blob - data/src/memdb.c
use unsigned long for strtoul result
[pve-cluster.git] / data / src / memdb.c
1 /*
2 Copyright (C) 2010 Proxmox Server Solutions GmbH
3
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU Affero General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU Affero General Public License for more details.
13
14 You should have received a copy of the GNU Affero General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>.
16
17 Author: Dietmar Maurer <dietmar@proxmox.com>
18
19 */
20
21 #ifdef HAVE_CONFIG_H
22 #include <config.h>
23 #endif /* HAVE_CONFIG_H */
24
25 #include <stdio.h>
26 #include <inttypes.h>
27 #include <stdlib.h>
28 #include <sys/types.h>
29 #include <sys/stat.h>
30 #include <fcntl.h>
31 #include <sys/file.h>
32 #include <unistd.h>
33 #include <dirent.h>
34 #include <string.h>
35 #include <errno.h>
36 #include <glib.h>
37
38 #include "cfs-utils.h"
39 #include "memdb.h"
40 #include "status.h"
41
42 #define CFS_LOCK_TIMEOUT (60*2)
43
44 memdb_tree_entry_t *
45 memdb_tree_entry_new(
46 const char *name)
47 {
48 g_return_val_if_fail(name != NULL, NULL);
49
50 memdb_tree_entry_t *te = g_malloc0(sizeof(memdb_tree_entry_t) + strlen(name) + 1);
51 g_return_val_if_fail(te != NULL, NULL);
52
53 strcpy(te->name, name);
54
55 return te;
56 }
57
58 memdb_tree_entry_t *
59 memdb_tree_entry_copy(
60 memdb_tree_entry_t *te,
61 gboolean with_data)
62 {
63 g_return_val_if_fail(te != NULL, NULL);
64
65 memdb_tree_entry_t *cpy = memdb_tree_entry_new(te->name);
66
67 cpy->parent = te->parent;
68 cpy->inode = te->inode;
69 cpy->version = te->version;
70 cpy->writer = te->writer;
71 cpy->mtime = te->mtime;
72 cpy->type = te->type;
73 cpy->size = te->size;
74
75 if (with_data && te->size && te->type == DT_REG) {
76 cpy->data.value = g_memdup(te->data.value, te->size);
77 } else {
78 cpy->data.value = NULL;
79 }
80
81 return cpy;
82 }
83
84 void
85 memdb_tree_entry_free(
86 memdb_tree_entry_t *te)
87 {
88 g_return_if_fail(te != NULL);
89
90 if (te->type == DT_REG) {
91 if (te->data.value)
92 g_free(te->data.value);
93 }
94
95 if (te->type == DT_DIR) {
96 if (te->data.entries)
97 g_hash_table_destroy(te->data.entries);
98 }
99
100 g_free(te);
101 }
102
103 void
104 memdb_lock_info_free(memdb_lock_info_t *li)
105 {
106 g_return_if_fail(li != NULL);
107
108 if (li->path)
109 g_free(li->path);
110
111 g_free(li);
112 }
113
114 static gint
115 memdb_tree_compare(
116 gconstpointer v1,
117 gconstpointer v2)
118 {
119 guint64 a = ((const memdb_tree_entry_t *)v1)->inode;
120 guint64 b = ((const memdb_tree_entry_t *)v2)->inode;
121
122 if (a == b)
123 return 0;
124
125 if (a > b)
126 return 1;
127
128 return -1;
129 }
130
131 static void
132 split_path(
133 const char *path,
134 char **dirname,
135 char **basename)
136 {
137 char *dup = g_strdup (path);
138 int len = strlen (dup) - 1;
139 while (len >= 0 && dup[len] == '/') dup[len--] = 0;
140
141 char *dn = g_path_get_dirname (dup);
142 char *bn = g_path_get_basename (dup);
143
144 g_free (dup);
145
146 *dirname = dn;
147 *basename = bn;
148 }
149
150 static memdb_tree_entry_t *
151 memdb_lookup_dir_entry(
152 memdb_t *memdb,
153 const char *name,
154 memdb_tree_entry_t *parent)
155 {
156
157 g_return_val_if_fail(memdb != NULL, NULL);
158 g_return_val_if_fail(name != NULL, NULL);
159 g_return_val_if_fail(parent != NULL, NULL);
160 g_return_val_if_fail(parent->type == DT_DIR, NULL);
161
162 GHashTable *ht = parent->data.entries;
163
164 g_return_val_if_fail(ht != NULL, NULL);
165
166 return g_hash_table_lookup(ht, name);
167 }
168
169 static memdb_tree_entry_t *
170 memdb_lookup_path(
171 memdb_t *memdb,
172 const char *path,
173 memdb_tree_entry_t **parent)
174 {
175 g_return_val_if_fail(memdb != NULL, NULL);
176 g_return_val_if_fail(path != NULL, NULL);
177 g_return_val_if_fail(parent != NULL, NULL);
178
179 memdb_tree_entry_t *cdir = memdb->root;
180 *parent = NULL;
181
182 if (path[0] == 0 || ((path[0] == '.' || path[0] == '/') && path[1] == 0))
183 return cdir;
184
185 gchar **set = g_strsplit_set(path, "/", 0);
186
187 int i = 0;
188 char *name;
189
190 while ((name = set[i++])) {
191
192 if (name[0] == 0) continue;
193
194 *parent = cdir;
195 if ((cdir = memdb_lookup_dir_entry(memdb, name, cdir)) == NULL)
196 break;
197 }
198
199 g_strfreev(set);
200
201 return cdir;
202 }
203
204
205 static gboolean
206 name_is_vm_config(
207 const char *name,
208 guint32 *vmid_ret)
209 {
210 if (!name || name[0] < '1' || name[0] > '9')
211 return FALSE;
212
213 char *end = NULL;
214
215 errno = 0; /* see man strtoul */
216
217 unsigned long int vmid = strtoul(name, &end, 10);
218
219 if (!end || end[0] != '.' || end[1] != 'c'|| end[2] != 'o' || end[3] != 'n' ||
220 end[4] != 'f' || end[5] != 0 || errno != 0 || vmid > G_MAXUINT32)
221 return FALSE;
222
223 if (vmid_ret)
224 *vmid_ret = (guint32)vmid;
225
226 return TRUE;
227 }
228
229 static gboolean
230 valid_nodename(
231 const char *nodename)
232 {
233 g_return_val_if_fail(nodename != NULL, FALSE);
234
235 /* LDH rule (letters, digits, hyphen) */
236
237 int len = strlen(nodename);
238
239 if (len < 1) {
240 return FALSE;
241 }
242
243 for (int i = 0; i < len; i ++) {
244 char c = nodename[i];
245 if ((c >= 'A' && c <= 'Z') ||
246 (c >= 'a' && c <= 'z') ||
247 (c >= '0' && c <= '9') ||
248 (i != 0 && i != (len-1) && c == '-'))
249 continue;
250 return FALSE;
251 }
252
253 return TRUE;
254 }
255
256 static char*
257 dir_contain_vm_config(
258 const char *dirname,
259 int *vmtype_ret)
260 {
261 if (!dirname)
262 return NULL;
263
264 if (strncmp(dirname, "nodes/", 6) != 0)
265 return NULL;
266
267 dirname += 6;
268
269 char *nodename = NULL;
270
271 char **sa = g_strsplit(dirname, "/", 2);
272 if (sa[0] && sa[1] && valid_nodename(sa[0])) {
273 if (strcmp(sa[1], "qemu-server") == 0) {
274 *vmtype_ret = VMTYPE_QEMU;
275 nodename = g_strdup(sa[0]);
276 } else if (strcmp(sa[1], "openvz") == 0) {
277 *vmtype_ret = VMTYPE_OPENVZ;
278 nodename = g_strdup(sa[0]);
279 } else if (strcmp(sa[1], "lxc") == 0) {
280 *vmtype_ret = VMTYPE_LXC;
281 nodename = g_strdup(sa[0]);
282 }
283 }
284
285 g_strfreev(sa);
286
287 return nodename;
288 }
289
290 static char *
291 path_contain_vm_config(
292 const char *path,
293 int *vmtype_ret,
294 guint32 *vmid_ret)
295 {
296 if (!path)
297 return NULL;
298
299 char *dirname = NULL;
300 char *base = NULL;
301 char *nodename = NULL;
302
303 split_path(path, &dirname, &base);
304
305 if (name_is_vm_config(base, vmid_ret))
306 nodename = dir_contain_vm_config(dirname, vmtype_ret);
307
308 g_free (dirname);
309 g_free (base);
310
311 return nodename;
312 }
313
314 static gboolean
315 vmlist_add_dir(
316 memdb_t *memdb,
317 GHashTable *vmlist,
318 const char *nodename,
319 const int vmtype,
320 memdb_tree_entry_t *subdir)
321 {
322 g_return_val_if_fail(memdb != NULL, FALSE);
323 g_return_val_if_fail(vmlist != NULL, FALSE);
324 g_return_val_if_fail(subdir != NULL, FALSE);
325 g_return_val_if_fail(subdir->type == DT_DIR, FALSE);
326 g_return_val_if_fail(subdir->data.entries != NULL, FALSE);
327
328 gboolean ret = TRUE;
329
330 GHashTable *ht = subdir->data.entries;
331 GHashTableIter iter;
332 gpointer key, value;
333
334 g_hash_table_iter_init (&iter, ht);
335
336 while (g_hash_table_iter_next (&iter, &key, &value)) {
337
338 memdb_tree_entry_t *node_te = (memdb_tree_entry_t *)value;
339
340 if (node_te->type != DT_REG)
341 continue;
342
343 guint32 vmid = 0;
344 if (!name_is_vm_config(node_te->name, &vmid))
345 continue;
346
347 if (!vmlist_hash_insert_vm(vmlist, vmtype, vmid, nodename, FALSE))
348 ret = FALSE;
349 }
350
351 return ret;
352 }
353
354
355 gboolean
356 memdb_lock_expired(
357 memdb_t *memdb,
358 const char *path,
359 const guchar csum[32])
360 {
361 g_return_val_if_fail(memdb != NULL, FALSE);
362 g_return_val_if_fail(memdb->locks != NULL, FALSE);
363 g_return_val_if_fail(path != NULL, FALSE);
364 g_return_val_if_fail(csum != NULL, FALSE);
365
366 memdb_lock_info_t *li;
367 uint32_t ctime = time(NULL);
368
369 if ((li = g_hash_table_lookup(memdb->locks, path))) {
370 if (memcmp(csum, li->csum, 32) != 0) {
371 li->ltime = ctime;
372 memcpy(li->csum, csum, 32);
373 g_critical("wrong lock csum - reset timeout");
374 return FALSE;
375 }
376 if ((ctime > li->ltime) && ((ctime - li->ltime) > CFS_LOCK_TIMEOUT))
377 return TRUE;
378 } else {
379 li = g_new0(memdb_lock_info_t, 1);
380 li->path = g_strdup(path);
381 li->ltime = ctime;
382 memcpy(li->csum, csum, 32);
383 g_hash_table_replace(memdb->locks, li->path, li);
384 }
385
386 return FALSE;
387 }
388
389 void
390 memdb_update_locks(memdb_t *memdb)
391 {
392 g_return_if_fail(memdb != NULL);
393 g_return_if_fail(memdb->locks != NULL);
394
395 memdb_tree_entry_t *te, *parent;
396
397 if (!(te = memdb_lookup_path(memdb, "priv/lock", &parent)))
398 return;
399
400 if (te->type != DT_DIR)
401 return;
402
403
404 GHashTable *old = memdb->locks;
405 memdb->locks = g_hash_table_new_full(g_str_hash, g_str_equal, NULL,
406 (GDestroyNotify)memdb_lock_info_free);
407 GHashTableIter iter;
408 GHashTable *ht = te->data.entries;
409
410 gpointer key, value;
411
412 g_hash_table_iter_init (&iter, ht);
413 while (g_hash_table_iter_next (&iter, &key, &value)) {
414
415 memdb_tree_entry_t *lock_te = (memdb_tree_entry_t *)value;
416 if (lock_te->type != DT_DIR)
417 continue;
418
419 memdb_lock_info_t *li;
420 li = g_new0(memdb_lock_info_t, 1);
421 li->path = g_strdup_printf("priv/lock/%s", lock_te->name);
422
423 guchar csum[32];
424 if (memdb_tree_entry_csum(lock_te, csum)) {
425 memcpy(li->csum, csum, 32);
426 memdb_lock_info_t *oldli;
427 if ((oldli = g_hash_table_lookup(memdb->locks, lock_te->name)) &&
428 (memcmp(csum, oldli->csum, 32) == 0)) {
429 li->ltime = oldli->ltime;
430 } else {
431 li->ltime = time(NULL);
432 }
433 g_hash_table_insert(memdb->locks, li->path, li);
434 } else {
435 memdb_lock_info_free(li);
436 }
437 }
438
439 if (old)
440 g_hash_table_destroy(old);
441
442 }
443
444 gboolean
445 memdb_recreate_vmlist(
446 memdb_t *memdb)
447 {
448 g_return_val_if_fail(memdb != NULL, FALSE);
449
450 memdb_tree_entry_t *te, *parent;
451
452 if (!(te = memdb_lookup_path(memdb, "nodes", &parent)))
453 return TRUE;
454
455 if (te->type != DT_DIR)
456 return TRUE;
457
458 GHashTable *vmlist = vmlist_hash_new();
459
460 GHashTable *ht = te->data.entries;
461
462 gboolean ret = TRUE;
463
464 GHashTableIter iter;
465 gpointer key, value;
466
467 g_hash_table_iter_init (&iter, ht);
468
469 while (g_hash_table_iter_next (&iter, &key, &value)) {
470
471 memdb_tree_entry_t *node_te = (memdb_tree_entry_t *)value;
472 if (node_te->type != DT_DIR)
473 continue;
474
475 if (!valid_nodename(node_te->name))
476 continue;
477
478 if ((te = g_hash_table_lookup(node_te->data.entries, "qemu-server"))) {
479 if (!vmlist_add_dir(memdb, vmlist, node_te->name, VMTYPE_QEMU, te))
480 ret = FALSE;
481 }
482 if ((te = g_hash_table_lookup(node_te->data.entries, "openvz"))) {
483 if (!vmlist_add_dir(memdb, vmlist, node_te->name, VMTYPE_OPENVZ, te))
484 ret = FALSE;
485 }
486 if ((te = g_hash_table_lookup(node_te->data.entries, "lxc"))) {
487 if (!vmlist_add_dir(memdb, vmlist, node_te->name, VMTYPE_LXC, te))
488 ret = FALSE;
489 }
490 }
491
492 /* always update list - even if we detected duplicates */
493 cfs_status_set_vmlist(vmlist);
494
495 return ret;
496 }
497
498 memdb_t *
499 memdb_open(const char *dbfilename)
500 {
501 memdb_t *memdb = g_new0(memdb_t, 1);
502
503 g_mutex_init(&memdb->mutex);
504
505 memdb->dbfilename = g_strdup(dbfilename);
506
507 memdb->root = memdb_tree_entry_new("");
508 memdb->root->data.entries = g_hash_table_new(g_str_hash, g_str_equal);
509 memdb->root->type = DT_DIR;
510
511 memdb->index = g_hash_table_new_full(g_int64_hash, g_int64_equal, NULL,
512 (GDestroyNotify)memdb_tree_entry_free);
513
514 g_hash_table_replace(memdb->index, &memdb->root->inode, memdb->root);
515
516 memdb->locks = g_hash_table_new_full(g_str_hash, g_str_equal, NULL,
517 (GDestroyNotify)memdb_lock_info_free);
518
519 if (!(memdb->bdb = bdb_backend_open(dbfilename, memdb->root, memdb->index))) {
520 memdb_close(memdb);
521 return NULL;
522 }
523
524 record_memdb_reload();
525
526 if (!memdb_recreate_vmlist(memdb)) {
527 memdb_close(memdb);
528 return NULL;
529 }
530
531 memdb_update_locks(memdb);
532
533 cfs_debug("memdb open '%s' successful (version = %016" PRIX64 ")",
534 dbfilename, memdb->root->version);
535
536 return memdb;
537 }
538
539 void
540 memdb_close(memdb_t *memdb)
541 {
542 g_return_if_fail(memdb != NULL);
543
544 g_mutex_lock (&memdb->mutex);
545
546 if (memdb->bdb)
547 bdb_backend_close(memdb->bdb);
548
549 if (memdb->index)
550 g_hash_table_destroy(memdb->index);
551
552 if (memdb->locks)
553 g_hash_table_destroy(memdb->locks);
554
555 if (memdb->dbfilename)
556 g_free(memdb->dbfilename);
557
558 memdb->index = NULL;
559 memdb->bdb = NULL;
560 memdb->dbfilename = NULL;
561
562 g_mutex_unlock (&memdb->mutex);
563
564 g_mutex_clear (&memdb->mutex);
565
566 g_free(memdb);
567 }
568
569 int memdb_mkdir(
570 memdb_t *memdb,
571 const char *path,
572 guint32 writer,
573 guint32 mtime)
574 {
575 g_return_val_if_fail(memdb != NULL, -EINVAL);
576 g_return_val_if_fail(path != NULL, -EINVAL);
577
578 int ret = -EACCES;
579
580 char *dirname = NULL;
581 char *base = NULL;
582
583 g_mutex_lock (&memdb->mutex);
584
585 if (memdb->errors) {
586 ret = -EIO;
587 goto ret;
588 }
589
590 split_path(path, &dirname, &base);
591
592 memdb_tree_entry_t *parent, *unused;
593
594 if (!(parent = memdb_lookup_path(memdb, dirname, &unused))) {
595 ret = -ENOENT;
596 goto ret;
597 }
598
599 if (parent->type != DT_DIR) {
600 ret = -ENOTDIR;
601 goto ret;
602 }
603
604 /* do not allow '.' and '..' */
605 if ((base[0] == 0) ||
606 (base[0] == '.' && base[1] == 0) ||
607 (base[0] == '.' && base[1] == '.' && base[2] == 0)) {
608 ret = -EACCES;
609 goto ret;
610 }
611
612 memdb_tree_entry_t *te;
613 if ((te = memdb_lookup_dir_entry(memdb, base, parent))) {
614 ret = -EEXIST;
615 goto ret;
616 }
617
618 memdb->root->version++;
619 memdb->root->mtime = mtime;
620 memdb->root->writer = writer;
621
622 te = memdb_tree_entry_new(base);
623 te->parent = parent->inode;
624 te->data.entries = g_hash_table_new(g_str_hash, g_str_equal);
625 te->inode = te->version = memdb->root->version;
626 te->writer = writer;
627 te->type = DT_DIR;
628 te->mtime = mtime;
629
630 g_hash_table_replace(parent->data.entries, te->name, te);
631 g_hash_table_replace(memdb->index, &te->inode, te);
632
633 cfs_debug("memdb_mkdir %s %s %016" PRIX64, dirname, base, memdb->root->version);
634
635 if (bdb_backend_write(memdb->bdb, te->inode, te->parent, te->version,
636 te->writer, te->mtime, 0, DT_DIR, te->name, NULL, 0)) {
637 memdb->errors = 1;
638 ret = -EIO;
639 goto ret;
640 }
641
642 if (strcmp(dirname, "priv/lock") == 0) {
643 g_hash_table_remove(memdb->locks, path);
644 guchar csum[32];
645 if (memdb_tree_entry_csum(te, csum)) {
646 memdb_lock_expired(memdb, path, csum); // insert a new entry
647 }
648 }
649
650 ret = 0;
651
652 ret:
653 g_mutex_unlock (&memdb->mutex);
654
655 g_free (dirname);
656 g_free (base);
657
658 return ret;
659 }
660
661 int
662 memdb_read(
663 memdb_t *memdb,
664 const char *path,
665 gpointer *data_ret)
666 {
667 g_return_val_if_fail(memdb != NULL, -EINVAL);
668 g_return_val_if_fail(path != NULL, -EINVAL);
669 g_return_val_if_fail(data_ret != NULL, -EINVAL);
670
671 memdb_tree_entry_t *te, *parent;
672
673 g_mutex_lock (&memdb->mutex);
674
675 if ((te = memdb_lookup_path(memdb, path, &parent))) {
676 if (te->type == DT_REG) {
677 *data_ret = g_memdup(te->data.value, te->size);
678 guint32 size = te->size;
679 g_mutex_unlock (&memdb->mutex);
680 return size;
681 }
682 }
683
684 g_mutex_unlock (&memdb->mutex);
685
686 return -ENOENT;
687 }
688
689 static int
690 memdb_pwrite(
691 memdb_t *memdb,
692 const char *path,
693 guint32 writer,
694 guint32 mtime,
695 gconstpointer data,
696 size_t count,
697 off_t offset,
698 gboolean truncate,
699 gboolean create)
700 {
701 g_return_val_if_fail(memdb != NULL, -EINVAL);
702 g_return_val_if_fail(path != NULL, -EINVAL);
703 g_return_val_if_fail(count == 0 || data != NULL, -EINVAL);
704
705 int ret = -EACCES;
706
707 char *dirname = NULL;
708 char *base = NULL;
709 char *nodename = NULL;
710
711 g_mutex_lock (&memdb->mutex);
712
713 if (memdb->errors) {
714 ret = -EIO;
715 goto ret;
716 }
717
718 if ((offset + count) > MEMDB_MAX_FILE_SIZE) {
719 ret = -EFBIG;
720 goto ret;
721 }
722
723 split_path(path, &dirname, &base);
724
725 memdb_tree_entry_t *parent, *unused;
726 if (!(parent = memdb_lookup_path(memdb, dirname, &unused))) {
727 ret = -ENOENT;
728 goto ret;
729 }
730 if (parent->type != DT_DIR) {
731 ret = -ENOTDIR;
732 goto ret;
733 }
734
735 /* do not allow '.' and '..' */
736 if ((base[0] == 0) ||
737 (base[0] == '.' && base[1] == 0) ||
738 (base[0] == '.' && base[1] == '.' && base[2] == 0)) {
739 ret = -EACCES;
740 goto ret;
741 }
742
743 guint32 vmid = 0;
744 int vmtype = 0;
745
746 if ((nodename = path_contain_vm_config(path, &vmtype, &vmid))) {
747 if (vmlist_different_vm_exists(vmtype, vmid, nodename)) {
748 ret = -EEXIST;
749 goto ret;
750 }
751 }
752
753 gpointer olddata = NULL;
754
755 memdb_tree_entry_t *te, *old;
756 if ((old = te = memdb_lookup_dir_entry(memdb, base, parent))) {
757 if (te->type != DT_REG) {
758 ret = -ENOTDIR;
759 goto ret;
760 }
761
762 if (create) {
763 ret = -EEXIST;
764 goto ret;
765 }
766
767 memdb->root->version++;
768 memdb->root->mtime = mtime;
769 memdb->root->writer = writer;
770
771 olddata = te->data.value;
772 } else {
773
774 if (!create) {
775 ret = -ENOENT;
776 goto ret;
777 }
778
779 memdb->root->version++;
780 memdb->root->mtime = mtime;
781 memdb->root->writer = writer;
782
783 te = memdb_tree_entry_new(base);
784 te->parent = parent->inode;
785 te->type = DT_REG;
786 te->inode = memdb->root->version;
787 }
788
789 te->version = memdb->root->version;
790 te->writer = writer;
791 te->mtime = mtime;
792
793 size_t newsize = offset + count;
794
795 gpointer newdata = NULL;
796
797 if (olddata) {
798
799 if (newsize > te->size) {
800 newdata = g_malloc0(newsize);
801 memcpy(newdata, olddata, te->size);
802
803 } else {
804
805 if (!truncate) {
806 newsize = te->size;
807 }
808 newdata = g_malloc0(newsize);
809 memcpy(newdata, olddata, newsize);
810 }
811
812 if (count && data)
813 memcpy(newdata + offset, data, count);
814
815 } else {
816
817 if (count && data) {
818 newdata = g_malloc0(newsize);
819 memcpy(newdata + offset, data, count);
820 }
821 }
822
823 te->size = newsize;
824 te->data.value = newdata;
825
826 g_free(olddata);
827
828 if (!old) {
829 g_hash_table_replace(parent->data.entries, te->name, te);
830 g_hash_table_replace(memdb->index, &te->inode, te);
831 }
832
833 record_memdb_change(path);
834
835 cfs_debug("memdb_pwrite %s %s %016" PRIX64 " %016" PRIX64, dirname, te->name, te->inode, te->version);
836
837 if (bdb_backend_write(memdb->bdb, te->inode, te->parent, te->version,
838 te->writer, te->mtime, te->size, te->type, te->name,
839 te->data.value, 0)) {
840 memdb->errors = 1;
841 ret = -EIO;
842 goto ret;
843 }
844
845 if (nodename)
846 vmlist_register_vm(vmtype, vmid, nodename);
847
848 ret = count;
849
850 ret:
851 g_mutex_unlock (&memdb->mutex);
852
853 g_free (nodename);
854 g_free (dirname);
855 g_free (base);
856
857 return ret;
858 }
859
860 int
861 memdb_mtime(
862 memdb_t *memdb,
863 const char *path,
864 guint32 writer,
865 guint32 mtime)
866 {
867 g_return_val_if_fail(memdb != NULL, -EINVAL);
868 g_return_val_if_fail(path != NULL, -EINVAL);
869
870 int ret = -EACCES;
871
872 char *dirname = NULL;
873 char *base = NULL;
874
875 g_mutex_lock (&memdb->mutex);
876
877 if (memdb->errors) {
878 ret = -EIO;
879 goto ret;
880 }
881
882 split_path(path, &dirname, &base);
883
884 memdb_tree_entry_t *parent, *unused;
885 if (!(parent = memdb_lookup_path(memdb, dirname, &unused))) {
886 ret = -ENOENT;
887 goto ret;
888 }
889 if (parent->type != DT_DIR) {
890 ret = -ENOTDIR;
891 goto ret;
892 }
893
894 /* do not allow '.' and '..' */
895 if ((base[0] == 0) ||
896 (base[0] == '.' && base[1] == 0) ||
897 (base[0] == '.' && base[1] == '.' && base[2] == 0)) {
898 ret = -EACCES;
899 goto ret;
900 }
901
902 memdb_tree_entry_t *te;
903 if (!(te = memdb_lookup_dir_entry(memdb, base, parent))) {
904 ret = -ENOENT;
905 goto ret;
906 }
907
908 int is_lock = (strcmp(dirname, "priv/lock") == 0) && (te->type == DT_DIR);
909
910 /* NOTE: we use utime(0,0) to trigger 'unlock', so we do not
911 * allow to change mtime for locks (only if mtime is newer).
912 * See README for details about locks.
913 */
914 if (is_lock) {
915 if (mtime < te->mtime) {
916 cfs_debug("dir is locked");
917 ret = -EACCES;
918 goto ret;
919 } else {
920 /* only allow lock updates if the writer is the same */
921 if (te->writer != writer) {
922 ret = -EACCES;
923 goto ret;
924 }
925 }
926 }
927
928 memdb->root->version++;
929 memdb->root->mtime = mtime;
930 memdb->root->writer = writer;
931
932 te->version = memdb->root->version;
933 te->writer = writer;
934 te->mtime = mtime;
935
936 record_memdb_change(path);
937
938 cfs_debug("memdb_mtime %s %s %016" PRIX64 " %016" PRIX64, dirname, te->name, te->inode, te->version);
939
940 if (bdb_backend_write(memdb->bdb, te->inode, te->parent, te->version,
941 te->writer, te->mtime, te->size, te->type, te->name,
942 te->data.value, 0)) {
943 memdb->errors = 1;
944 ret = -EIO;
945 goto ret;
946 }
947
948 if (is_lock) {
949 cfs_debug("update cfs lock");
950 g_hash_table_remove(memdb->locks, path);
951 guchar csum[32];
952 if (memdb_tree_entry_csum(te, csum)) {
953 memdb_lock_expired(memdb, path, csum); // insert a new entry
954 }
955 }
956
957 ret = 0;
958
959 ret:
960 g_mutex_unlock (&memdb->mutex);
961
962 g_free (dirname);
963 g_free (base);
964
965 return ret;
966 }
967
968 int
969 memdb_create(
970 memdb_t *memdb,
971 const char *path,
972 guint32 writer,
973 guint32 mtime)
974 {
975 return memdb_pwrite(memdb, path, writer, mtime, NULL, 0, 0, FALSE, TRUE);
976 }
977
978 int
979 memdb_write(
980 memdb_t *memdb,
981 const char *path,
982 guint32 writer,
983 guint32 mtime,
984 gconstpointer data,
985 size_t count,
986 off_t offset,
987 gboolean truncate)
988 {
989 return memdb_pwrite(memdb, path, writer, mtime, data, count, offset, truncate, FALSE);
990 }
991
992 memdb_tree_entry_t *
993 memdb_getattr(
994 memdb_t *memdb,
995 const char *path)
996 {
997 memdb_tree_entry_t *te, *parent;
998
999 g_mutex_lock (&memdb->mutex);
1000
1001 if ((te = memdb_lookup_path(memdb, path, &parent))) {
1002
1003 memdb_tree_entry_t *cpy = memdb_tree_entry_copy(te, 0);
1004
1005 g_mutex_unlock (&memdb->mutex);
1006
1007 return cpy;
1008 }
1009
1010 g_mutex_unlock (&memdb->mutex);
1011
1012 return NULL;
1013 }
1014
1015 GList *
1016 memdb_readdir(
1017 memdb_t *memdb,
1018 const char *path)
1019 {
1020 g_return_val_if_fail(memdb != NULL, NULL);
1021 g_return_val_if_fail(path != NULL, NULL);
1022
1023 memdb_tree_entry_t *te, *parent;
1024
1025 GList *list = NULL;
1026
1027 g_mutex_lock (&memdb->mutex);
1028
1029 if (!(te = memdb_lookup_path(memdb, path, &parent)))
1030 goto ret;
1031
1032 if (te->type != DT_DIR)
1033 goto ret;
1034
1035 GHashTable *ht = te->data.entries;
1036
1037 GHashTableIter iter;
1038 gpointer key, value;
1039
1040 g_hash_table_iter_init (&iter, ht);
1041
1042 while (g_hash_table_iter_next (&iter, &key, &value)) {
1043
1044 te = (memdb_tree_entry_t *)value;
1045
1046 memdb_tree_entry_t *cpy = memdb_tree_entry_copy(te, 0);
1047
1048 list = g_list_append(list, cpy);
1049 }
1050
1051 ret:
1052 g_mutex_unlock (&memdb->mutex);
1053
1054 return list;
1055 }
1056
1057 void
1058 memdb_dirlist_free(GList *dirlist)
1059 {
1060 GList *l = dirlist;
1061
1062 while (l) {
1063 if (l->data)
1064 g_free (l->data);
1065
1066 l = g_list_next(l);
1067 }
1068
1069 if (dirlist)
1070 g_list_free(dirlist);
1071 }
1072
1073 static int
1074 unlink_tree_entry(
1075 memdb_t *memdb,
1076 memdb_tree_entry_t *parent,
1077 memdb_tree_entry_t *te)
1078 {
1079 g_return_val_if_fail(parent != NULL, -EACCES);
1080 g_return_val_if_fail(parent->inode == te->parent, -EACCES);
1081
1082 if (te->type == DT_DIR)
1083 if (g_hash_table_size(te->data.entries))
1084 return -ENOTEMPTY;
1085
1086 if (!g_hash_table_steal(parent->data.entries, te->name)) {
1087 cfs_critical("internal error - can't delete entry");
1088 memdb->errors = 1;
1089 return -EIO;
1090 }
1091
1092 if (!g_hash_table_steal(memdb->index, &te->inode)) {
1093 cfs_critical("internal error - can't delete entry");
1094 memdb->errors = 1;
1095 return -EIO;
1096 }
1097
1098 return 0;
1099 }
1100
1101 int
1102 memdb_rename(
1103 memdb_t *memdb,
1104 const char *from,
1105 const char *to,
1106 guint32 writer,
1107 guint32 mtime)
1108 {
1109 int ret = -EACCES;
1110
1111 char *nodename = NULL;
1112 char *dirname = NULL;
1113 char *base = NULL;
1114
1115 guint32 vmid = 0;
1116 guint32 from_vmid = 0;
1117 int vmtype = 0;
1118 int from_vmtype = 0;
1119 char *from_node = NULL;
1120
1121 g_mutex_lock (&memdb->mutex);
1122
1123 if (memdb->errors) {
1124 ret = -EIO;
1125 goto ret;
1126 }
1127
1128 memdb_tree_entry_t *from_te, *from_parent;
1129 memdb_tree_entry_t *to_te, *to_parent;
1130 memdb_tree_entry_t *target_te, *target_parent;
1131
1132 guint64 delete_inode = 0;
1133
1134 if (!(from_te = memdb_lookup_path(memdb, from, &from_parent))) {
1135 ret = -ENOENT;
1136 goto ret;
1137 }
1138
1139 if (!from_parent) { /* can't rename root */
1140 ret = -EACCES;
1141 goto ret;
1142 }
1143
1144 from_node = path_contain_vm_config(from, &from_vmtype, &from_vmid);
1145
1146 if (from_te->type == DT_REG && (nodename = path_contain_vm_config(to, &vmtype, &vmid))) {
1147 if (vmlist_different_vm_exists(vmtype, vmid, nodename)) {
1148 if (!(from_node && vmid == from_vmid)) {
1149 ret = -EEXIST;
1150 goto ret;
1151 }
1152 }
1153 }
1154
1155 /* we do not allow rename for locks */
1156 if (from_te->type == DT_DIR && path_is_lockdir(from)) {
1157 ret = -EACCES;
1158 goto ret;
1159 }
1160
1161 if ((to_te = memdb_lookup_path(memdb, to, &to_parent))) {
1162
1163 if ((ret = unlink_tree_entry(memdb, to_parent, to_te)) != 0)
1164 goto ret;
1165
1166 base = strdup(to_te->name);
1167
1168 delete_inode = to_te->inode;
1169
1170 target_te = to_parent;
1171
1172 memdb_tree_entry_free(to_te);
1173
1174 } else {
1175
1176 split_path(to, &dirname, &base);
1177
1178 if (!(target_te = memdb_lookup_path(memdb, dirname, &target_parent))) {
1179 ret = -ENOENT;
1180 goto ret;
1181 }
1182
1183 if (target_te->type != DT_DIR) {
1184 ret = -ENOTDIR;
1185 goto ret;
1186 }
1187 }
1188
1189 record_memdb_change(from);
1190 record_memdb_change(to);
1191
1192 /* NOTE: unlink_tree_entry() make sure that we can only
1193 rename emtpy directories */
1194
1195 if ((ret = unlink_tree_entry(memdb, from_parent, from_te)) != 0)
1196 goto ret;
1197
1198 memdb->root->version++;
1199 memdb->root->mtime = mtime;
1200 memdb->root->writer = writer;
1201
1202 memdb_tree_entry_t *new = memdb_tree_entry_new(base);
1203 new->parent = target_te->inode;
1204 new->inode = from_te->inode;
1205 new->version = memdb->root->version;
1206 new->writer = writer;
1207 new->mtime = mtime;
1208 new->size = from_te->size;
1209 new->type = from_te->type;
1210 new->data = from_te->data;
1211
1212 g_free(from_te);
1213
1214 g_hash_table_replace(target_te->data.entries, new->name, new);
1215 g_hash_table_replace(memdb->index, &new->inode, new);
1216
1217 if (bdb_backend_write(memdb->bdb, new->inode, new->parent,
1218 new->version, new->writer, new->mtime,
1219 new->size, new->type, new->name,
1220 new->data.value, delete_inode)) {
1221 memdb->errors = 1;
1222 ret = -EIO;
1223 goto ret;
1224 }
1225
1226 if (new->type == DT_REG) {
1227
1228 if (from_node)
1229 vmlist_delete_vm(from_vmid);
1230
1231 if (nodename)
1232 vmlist_register_vm(vmtype, vmid, nodename);
1233
1234 } else if (new->type == DT_DIR) {
1235 /* directories are alwayse empty (see unlink_tree_entry) */
1236 }
1237
1238 ret = 0;
1239
1240 ret:
1241 g_mutex_unlock (&memdb->mutex);
1242
1243 g_free(from_node);
1244 g_free (nodename);
1245 g_free (dirname);
1246 g_free (base);
1247
1248 return ret;
1249 }
1250
1251 int
1252 memdb_delete(
1253 memdb_t *memdb,
1254 const char *path,
1255 guint32 writer,
1256 guint32 mtime)
1257 {
1258 memdb_tree_entry_t *te, *parent;
1259
1260 g_mutex_lock (&memdb->mutex);
1261
1262 int ret = -EACCES;
1263
1264 if (memdb->errors) {
1265 ret = -EIO;
1266 goto ret;
1267 }
1268
1269 if (!(te = memdb_lookup_path(memdb, path, &parent))) {
1270 ret = -ENOENT;
1271 goto ret;
1272 }
1273
1274 if (!parent) { /* cant remove root */
1275 ret = -EACCES;
1276 goto ret;
1277 }
1278
1279 if (te->type == DT_DIR) {
1280 if (g_hash_table_size(te->data.entries)) {
1281 ret = -ENOTEMPTY;
1282 goto ret;
1283 }
1284
1285 g_hash_table_remove(memdb->locks, path);
1286 }
1287
1288 record_memdb_change(path);
1289
1290 if ((ret = unlink_tree_entry(memdb, parent, te)) != 0)
1291 goto ret;
1292
1293 memdb->root->version++;
1294 memdb->root->mtime = mtime;
1295 memdb->root->writer = writer;
1296
1297 if (bdb_backend_write(memdb->bdb, 0, 0, memdb->root->version, writer, mtime, 0,
1298 DT_REG, NULL, NULL, te->inode)) {
1299 memdb->errors = 1;
1300 memdb_tree_entry_free(te);
1301 ret = -EIO;
1302 goto ret;
1303 }
1304
1305 memdb_tree_entry_free(te);
1306
1307 int vmtype = 0;
1308 guint32 vmid = 0;
1309 char *nodename;
1310 if ((nodename = path_contain_vm_config(path, &vmtype, &vmid))) {
1311 g_free(nodename);
1312 vmlist_delete_vm(vmid);
1313 }
1314
1315 ret = 0;
1316
1317 ret:
1318 g_mutex_unlock (&memdb->mutex);
1319
1320 return ret;
1321 }
1322
1323 int
1324 memdb_statfs(
1325 memdb_t *memdb,
1326 struct statvfs *stbuf)
1327 {
1328 g_return_val_if_fail(memdb != NULL, -EINVAL);
1329 g_return_val_if_fail(stbuf != NULL, -EINVAL);
1330
1331 g_mutex_lock (&memdb->mutex);
1332
1333 GHashTableIter iter;
1334 gpointer key, value;
1335
1336 size_t size = 0;
1337 size_t files = 0;
1338
1339 g_hash_table_iter_init (&iter, memdb->index);
1340
1341 while (g_hash_table_iter_next (&iter, &key, &value)) {
1342 memdb_tree_entry_t *te = (memdb_tree_entry_t *)value;
1343 files++;
1344 size += te->size;
1345 }
1346
1347 g_mutex_unlock (&memdb->mutex);
1348
1349 stbuf->f_bsize = MEMDB_BLOCKSIZE;
1350 stbuf->f_blocks = MEMDB_BLOCKS;
1351 stbuf->f_bfree = stbuf->f_bavail = stbuf->f_blocks -
1352 ((size + stbuf->f_bsize - 1)/stbuf->f_bsize);
1353 stbuf->f_files = MEMDB_MAX_INODES;
1354 stbuf->f_ffree = stbuf->f_files - files;
1355
1356 stbuf->f_namemax = 256;
1357
1358 return 0;
1359 }
1360
1361 void
1362 tree_entry_debug(memdb_tree_entry_t *te)
1363 {
1364 g_return_if_fail(te != NULL);
1365
1366 // same as tree_entry_print(), but use cfs_debug() instead of g_print()
1367
1368 cfs_debug("%016" PRIX64 " %c %016" PRIX64 " %016" PRIX64 " %08X %08X %08X %s\n",
1369 te->inode, te->type == DT_DIR ? 'D' : 'R', te->parent, te->version,
1370 te->writer, te->mtime, te->size, te->name);
1371 }
1372
1373 void
1374 tree_entry_print(memdb_tree_entry_t *te)
1375 {
1376 g_return_if_fail(te != NULL);
1377
1378 g_print("%016" PRIX64 " %c %016" PRIX64 " %016" PRIX64 " %08X %08X %08X %s\n",
1379 te->inode, te->type == DT_DIR ? 'D' : 'R', te->parent, te->version,
1380 te->writer, te->mtime, te->size, te->name);
1381 }
1382
1383 void
1384 memdb_dump(memdb_t *memdb)
1385 {
1386 g_return_if_fail(memdb != NULL);
1387
1388 g_mutex_lock (&memdb->mutex);
1389
1390 GList *list = g_hash_table_get_values(memdb->index);
1391
1392 list = g_list_sort(list, memdb_tree_compare);
1393
1394 g_print("%16s %c %16s %16s %8s %8s %8s %s\n",
1395 "INODE", 'T', "PARENT", "VERSION", "WRITER", "MTIME", "SIZE", "NAME");
1396
1397 GList *l = list;
1398 while (l) {
1399 memdb_tree_entry_t *te = (memdb_tree_entry_t *)l->data;
1400
1401 tree_entry_print(te);
1402
1403 l = g_list_next(l);
1404 }
1405
1406 g_list_free(list);
1407
1408 g_mutex_unlock (&memdb->mutex);
1409 }
1410
1411 void
1412 memdb_dump_index (memdb_index_t *idx)
1413 {
1414 g_return_if_fail(idx != NULL);
1415
1416 g_print ("INDEX DUMP %016" PRIX64 "\n", idx->version);
1417
1418 int i;
1419 for (i = 0; i < idx->size; i++) {
1420 g_print ("%016" PRIX64 " %016" PRIX64 "%016" PRIX64 "%016" PRIX64 "%016" PRIX64 "\n", idx->entries[i].inode,
1421 *((guint64 *)idx->entries[i].digest),
1422 *((guint64 *)(idx->entries[i].digest + 8)),
1423 *((guint64 *)(idx->entries[i].digest + 16)),
1424 *((guint64 *)(idx->entries[i].digest + 24)));
1425 }
1426 }
1427
1428 memdb_index_t *
1429 memdb_index_copy(memdb_index_t *idx)
1430 {
1431 g_return_val_if_fail(idx != NULL, NULL);
1432
1433 int bytes = sizeof(memdb_index_t) + idx->size*sizeof(memdb_index_extry_t);
1434 if (idx->bytes != bytes) {
1435 cfs_critical("memdb index contains wrong number of bytes");
1436 return NULL;
1437 }
1438
1439 memdb_index_t *copy = (memdb_index_t *)g_memdup(idx, bytes);
1440
1441 return copy;
1442 }
1443
1444 gboolean
1445 memdb_tree_entry_csum(
1446 memdb_tree_entry_t *te,
1447 guchar csum[32])
1448 {
1449 g_return_val_if_fail(te != NULL, FALSE);
1450 g_return_val_if_fail(csum != NULL, FALSE);
1451
1452 GChecksum *sha256 = g_checksum_new(G_CHECKSUM_SHA256);
1453
1454 g_checksum_update(sha256, (unsigned char*)&te->inode, sizeof(te->inode));
1455 g_checksum_update(sha256, (unsigned char*)&te->version, sizeof(te->version));
1456 g_checksum_update(sha256, (unsigned char*)&te->writer, sizeof(te->writer));
1457 g_checksum_update(sha256, (unsigned char*)&te->mtime, sizeof(te->mtime));
1458 g_checksum_update(sha256, (unsigned char*)&te->size, sizeof(te->size));
1459 g_checksum_update(sha256, (unsigned char*)&te->type, sizeof(te->type));
1460 g_checksum_update(sha256, (unsigned char*)&te->parent, sizeof(te->parent));
1461 g_checksum_update(sha256, (unsigned char*)te->name, strlen(te->name));
1462
1463 if (te->type == DT_REG && te->size)
1464 g_checksum_update(sha256, (unsigned char*)te->data.value, te->size);
1465
1466 size_t csum_len = 32;
1467 g_checksum_get_digest(sha256, csum, &csum_len);
1468 g_checksum_free(sha256);
1469
1470 return TRUE;
1471 }
1472
1473 gboolean
1474 memdb_compute_checksum(
1475 GHashTable *index,
1476 memdb_tree_entry_t *root,
1477 guchar *csum,
1478 size_t csum_len)
1479 {
1480 g_return_val_if_fail(index != NULL, FALSE);
1481 g_return_val_if_fail(root != NULL, FALSE);
1482
1483 GChecksum *sha256 = g_checksum_new(G_CHECKSUM_SHA256);
1484
1485 GList *list = g_hash_table_get_values(index);
1486
1487 list = g_list_sort(list, memdb_tree_compare);
1488
1489 GList *l = list;
1490 while (l) {
1491 memdb_tree_entry_t *te = (memdb_tree_entry_t *)l->data;
1492
1493 g_checksum_update(sha256, (unsigned char*)&te->inode, sizeof(te->inode));
1494 g_checksum_update(sha256, (unsigned char*)&te->version, sizeof(te->version));
1495 g_checksum_update(sha256, (unsigned char*)&te->writer, sizeof(te->writer));
1496 g_checksum_update(sha256, (unsigned char*)&te->mtime, sizeof(te->mtime));
1497 g_checksum_update(sha256, (unsigned char*)&te->size, sizeof(te->size));
1498 g_checksum_update(sha256, (unsigned char*)&te->type, sizeof(te->type));
1499 g_checksum_update(sha256, (unsigned char*)&te->parent, sizeof(te->parent));
1500 g_checksum_update(sha256, (unsigned char*)te->name, strlen(te->name));
1501
1502 if (te->type == DT_REG && te->size)
1503 g_checksum_update(sha256, (unsigned char*)te->data.value, te->size);
1504
1505 l = g_list_next(l);
1506 }
1507
1508 g_list_free(list);
1509
1510 g_checksum_get_digest(sha256, csum, &csum_len);
1511
1512 cfs_debug("checksum: %s", g_checksum_get_string(sha256));
1513
1514 g_checksum_free(sha256);
1515
1516 return TRUE;
1517 }
1518
1519 memdb_index_t *
1520 memdb_encode_index(
1521 GHashTable *index,
1522 memdb_tree_entry_t *root)
1523 {
1524 g_return_val_if_fail(index != NULL, NULL);
1525 g_return_val_if_fail(root != NULL, NULL);
1526
1527 memdb_index_t *idx = NULL;
1528
1529 int count = g_hash_table_size(index);
1530 if (!count) {
1531 cfs_critical("memdb index has no entires");
1532 return NULL;
1533 }
1534
1535 int bytes = sizeof(memdb_index_t) + count*sizeof(memdb_index_extry_t);
1536 idx = g_malloc0(bytes);
1537
1538 idx->size = count;
1539 idx->bytes = bytes;
1540 idx->version = root->version;
1541 idx->mtime = root->mtime;
1542 idx->writer = root->writer;
1543
1544 GChecksum *sha256 = g_checksum_new(G_CHECKSUM_SHA256);
1545
1546 GList *list = g_hash_table_get_values(index);
1547
1548 list = g_list_sort(list, memdb_tree_compare);
1549
1550 int ind = 0;
1551 GList *l = list;
1552 while (l) {
1553 memdb_tree_entry_t *te = (memdb_tree_entry_t *)l->data;
1554
1555 if (te->inode > idx->last_inode)
1556 idx->last_inode = te->inode;
1557
1558 idx->entries[ind].inode = te->inode;
1559
1560 g_checksum_reset (sha256);
1561
1562 g_checksum_update(sha256, (unsigned char*)&te->version, sizeof(te->version));
1563 g_checksum_update(sha256, (unsigned char*)&te->writer, sizeof(te->writer));
1564 g_checksum_update(sha256, (unsigned char*)&te->mtime, sizeof(te->mtime));
1565 g_checksum_update(sha256, (unsigned char*)&te->size, sizeof(te->size));
1566 g_checksum_update(sha256, (unsigned char*)&te->type, sizeof(te->type));
1567 g_checksum_update(sha256, (unsigned char*)&te->parent, sizeof(te->parent));
1568 g_checksum_update(sha256, (unsigned char*)te->name, strlen(te->name));
1569
1570 if (te->type == DT_REG && te->size)
1571 g_checksum_update(sha256, (unsigned char*)te->data.value, te->size);
1572
1573 gsize len = 32;
1574 g_checksum_get_digest(sha256, (guint8 *)idx->entries[ind].digest, &len);
1575
1576 ind++;
1577
1578 l = g_list_next(l);
1579 }
1580
1581 g_list_free(list);
1582
1583 g_checksum_free(sha256);
1584
1585 return idx;
1586 }