]>
git.proxmox.com Git - pve-cluster.git/blob - data/src/memdb.c
2 Copyright (C) 2010 Proxmox Server Solutions GmbH
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU Affero General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU Affero General Public License for more details.
14 You should have received a copy of the GNU Affero General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>.
17 Author: Dietmar Maurer <dietmar@proxmox.com>
23 #endif /* HAVE_CONFIG_H */
28 #include <sys/types.h>
38 #include "cfs-utils.h"
42 #define CFS_LOCK_TIMEOUT (60*2)
48 g_return_val_if_fail(name
!= NULL
, NULL
);
50 memdb_tree_entry_t
*te
= g_malloc0(sizeof(memdb_tree_entry_t
) + strlen(name
) + 1);
51 g_return_val_if_fail(te
!= NULL
, NULL
);
53 strcpy(te
->name
, name
);
59 memdb_tree_entry_copy(
60 memdb_tree_entry_t
*te
,
63 g_return_val_if_fail(te
!= NULL
, NULL
);
65 memdb_tree_entry_t
*cpy
= memdb_tree_entry_new(te
->name
);
67 cpy
->parent
= te
->parent
;
68 cpy
->inode
= te
->inode
;
69 cpy
->version
= te
->version
;
70 cpy
->writer
= te
->writer
;
71 cpy
->mtime
= te
->mtime
;
75 if (with_data
&& te
->size
&& te
->type
== DT_REG
) {
76 cpy
->data
.value
= g_memdup(te
->data
.value
, te
->size
);
78 cpy
->data
.value
= NULL
;
85 memdb_tree_entry_free(
86 memdb_tree_entry_t
*te
)
91 if (te
->type
== DT_REG
) {
93 g_free(te
->data
.value
);
96 if (te
->type
== DT_DIR
) {
98 g_hash_table_destroy(te
->data
.entries
);
105 memdb_lock_info_free(memdb_lock_info_t
*li
)
107 g_return_if_fail(li
!= NULL
);
120 guint64 a
= ((const memdb_tree_entry_t
*)v1
)->inode
;
121 guint64 b
= ((const memdb_tree_entry_t
*)v2
)->inode
;
138 char *dup
= g_strdup (path
);
139 int len
= strlen (dup
) - 1;
140 while (len
>= 0 && dup
[len
] == '/') dup
[len
--] = 0;
142 char *dn
= g_path_get_dirname (dup
);
143 char *bn
= g_path_get_basename (dup
);
151 static memdb_tree_entry_t
*
152 memdb_lookup_dir_entry(
155 memdb_tree_entry_t
*parent
)
158 g_return_val_if_fail(memdb
!= NULL
, NULL
);
159 g_return_val_if_fail(name
!= NULL
, NULL
);
160 g_return_val_if_fail(parent
!= NULL
, NULL
);
161 g_return_val_if_fail(parent
->type
== DT_DIR
, NULL
);
163 GHashTable
*ht
= parent
->data
.entries
;
165 g_return_val_if_fail(ht
!= NULL
, NULL
);
167 return g_hash_table_lookup(ht
, name
);
170 static memdb_tree_entry_t
*
174 memdb_tree_entry_t
**parent
)
176 g_return_val_if_fail(memdb
!= NULL
, NULL
);
177 g_return_val_if_fail(path
!= NULL
, NULL
);
178 g_return_val_if_fail(parent
!= NULL
, NULL
);
180 memdb_tree_entry_t
*cdir
= memdb
->root
;
183 if (path
[0] == 0 || ((path
[0] == '.' || path
[0] == '/') && path
[1] == 0))
186 gchar
**set
= g_strsplit_set(path
, "/", 0);
191 while ((name
= set
[i
++])) {
193 if (name
[0] == 0) continue;
196 if ((cdir
= memdb_lookup_dir_entry(memdb
, name
, cdir
)) == NULL
)
211 if (!name
|| name
[0] < '1' || name
[0] > '9')
216 errno
= 0; /* see man strtoul */
218 unsigned long int vmid
= strtoul(name
, &end
, 10);
220 if (!end
|| end
[0] != '.' || end
[1] != 'c'|| end
[2] != 'o' || end
[3] != 'n' ||
221 end
[4] != 'f' || end
[5] != 0 || errno
!= 0 || vmid
> G_MAXUINT32
)
225 *vmid_ret
= (guint32
)vmid
;
232 const char *nodename
)
234 g_return_val_if_fail(nodename
!= NULL
, FALSE
);
236 /* LDH rule (letters, digits, hyphen) */
238 int len
= strlen(nodename
);
244 for (int i
= 0; i
< len
; i
++) {
245 char c
= nodename
[i
];
246 if ((c
>= 'A' && c
<= 'Z') ||
247 (c
>= 'a' && c
<= 'z') ||
248 (c
>= '0' && c
<= '9') ||
249 (i
!= 0 && i
!= (len
-1) && c
== '-'))
258 dir_contain_vm_config(
265 if (strncmp(dirname
, "nodes/", 6) != 0)
270 char *nodename
= NULL
;
272 char **sa
= g_strsplit(dirname
, "/", 2);
273 if (sa
[0] && sa
[1] && valid_nodename(sa
[0])) {
274 if (strcmp(sa
[1], "qemu-server") == 0) {
275 *vmtype_ret
= VMTYPE_QEMU
;
276 nodename
= g_strdup(sa
[0]);
277 } else if (strcmp(sa
[1], "openvz") == 0) {
278 *vmtype_ret
= VMTYPE_OPENVZ
;
279 nodename
= g_strdup(sa
[0]);
280 } else if (strcmp(sa
[1], "lxc") == 0) {
281 *vmtype_ret
= VMTYPE_LXC
;
282 nodename
= g_strdup(sa
[0]);
292 path_contain_vm_config(
300 char *dirname
= NULL
;
302 char *nodename
= NULL
;
304 split_path(path
, &dirname
, &base
);
306 if (name_is_vm_config(base
, vmid_ret
))
307 nodename
= dir_contain_vm_config(dirname
, vmtype_ret
);
319 const char *nodename
,
321 memdb_tree_entry_t
*subdir
)
323 g_return_val_if_fail(memdb
!= NULL
, FALSE
);
324 g_return_val_if_fail(vmlist
!= NULL
, FALSE
);
325 g_return_val_if_fail(subdir
!= NULL
, FALSE
);
326 g_return_val_if_fail(subdir
->type
== DT_DIR
, FALSE
);
327 g_return_val_if_fail(subdir
->data
.entries
!= NULL
, FALSE
);
331 GHashTable
*ht
= subdir
->data
.entries
;
335 g_hash_table_iter_init (&iter
, ht
);
337 while (g_hash_table_iter_next (&iter
, &key
, &value
)) {
339 memdb_tree_entry_t
*node_te
= (memdb_tree_entry_t
*)value
;
341 if (node_te
->type
!= DT_REG
)
345 if (!name_is_vm_config(node_te
->name
, &vmid
))
348 if (!vmlist_hash_insert_vm(vmlist
, vmtype
, vmid
, nodename
, FALSE
))
360 const guchar csum
[32])
362 g_return_val_if_fail(memdb
!= NULL
, FALSE
);
363 g_return_val_if_fail(memdb
->locks
!= NULL
, FALSE
);
364 g_return_val_if_fail(path
!= NULL
, FALSE
);
365 g_return_val_if_fail(csum
!= NULL
, FALSE
);
367 memdb_lock_info_t
*li
;
368 uint32_t ctime
= time(NULL
);
370 if ((li
= g_hash_table_lookup(memdb
->locks
, path
))) {
371 if (memcmp(csum
, li
->csum
, 32) != 0) {
373 memcpy(li
->csum
, csum
, 32);
374 g_critical("wrong lock csum - reset timeout");
377 if ((ctime
> li
->ltime
) && ((ctime
- li
->ltime
) > CFS_LOCK_TIMEOUT
))
380 li
= g_new0(memdb_lock_info_t
, 1);
381 li
->path
= g_strdup(path
);
383 memcpy(li
->csum
, csum
, 32);
384 g_hash_table_replace(memdb
->locks
, li
->path
, li
);
391 memdb_update_locks(memdb_t
*memdb
)
393 g_return_if_fail(memdb
!= NULL
);
394 g_return_if_fail(memdb
->locks
!= NULL
);
396 memdb_tree_entry_t
*te
, *parent
;
398 if (!(te
= memdb_lookup_path(memdb
, "priv/lock", &parent
)))
401 if (te
->type
!= DT_DIR
)
405 GHashTable
*old
= memdb
->locks
;
406 memdb
->locks
= g_hash_table_new_full(g_str_hash
, g_str_equal
, NULL
,
407 (GDestroyNotify
)memdb_lock_info_free
);
409 GHashTable
*ht
= te
->data
.entries
;
413 g_hash_table_iter_init (&iter
, ht
);
414 while (g_hash_table_iter_next (&iter
, &key
, &value
)) {
416 memdb_tree_entry_t
*lock_te
= (memdb_tree_entry_t
*)value
;
417 if (lock_te
->type
!= DT_DIR
)
420 memdb_lock_info_t
*li
;
421 li
= g_new0(memdb_lock_info_t
, 1);
422 li
->path
= g_strdup_printf("priv/lock/%s", lock_te
->name
);
425 if (memdb_tree_entry_csum(lock_te
, csum
)) {
426 memcpy(li
->csum
, csum
, 32);
427 memdb_lock_info_t
*oldli
;
428 if ((oldli
= g_hash_table_lookup(memdb
->locks
, lock_te
->name
)) &&
429 (memcmp(csum
, oldli
->csum
, 32) == 0)) {
430 li
->ltime
= oldli
->ltime
;
432 li
->ltime
= time(NULL
);
434 g_hash_table_insert(memdb
->locks
, li
->path
, li
);
436 memdb_lock_info_free(li
);
441 g_hash_table_destroy(old
);
446 memdb_recreate_vmlist(
449 g_return_val_if_fail(memdb
!= NULL
, FALSE
);
451 memdb_tree_entry_t
*te
, *parent
;
453 if (!(te
= memdb_lookup_path(memdb
, "nodes", &parent
)))
456 if (te
->type
!= DT_DIR
)
459 GHashTable
*vmlist
= vmlist_hash_new();
461 GHashTable
*ht
= te
->data
.entries
;
468 g_hash_table_iter_init (&iter
, ht
);
470 while (g_hash_table_iter_next (&iter
, &key
, &value
)) {
472 memdb_tree_entry_t
*node_te
= (memdb_tree_entry_t
*)value
;
473 if (node_te
->type
!= DT_DIR
)
476 if (!valid_nodename(node_te
->name
))
479 if ((te
= g_hash_table_lookup(node_te
->data
.entries
, "qemu-server"))) {
480 if (!vmlist_add_dir(memdb
, vmlist
, node_te
->name
, VMTYPE_QEMU
, te
))
483 if ((te
= g_hash_table_lookup(node_te
->data
.entries
, "openvz"))) {
484 if (!vmlist_add_dir(memdb
, vmlist
, node_te
->name
, VMTYPE_OPENVZ
, te
))
487 if ((te
= g_hash_table_lookup(node_te
->data
.entries
, "lxc"))) {
488 if (!vmlist_add_dir(memdb
, vmlist
, node_te
->name
, VMTYPE_LXC
, te
))
493 /* always update list - even if we detected duplicates */
494 cfs_status_set_vmlist(vmlist
);
500 memdb_open(const char *dbfilename
)
502 memdb_t
*memdb
= g_new0(memdb_t
, 1);
504 g_mutex_init(&memdb
->mutex
);
506 memdb
->dbfilename
= g_strdup(dbfilename
);
508 memdb
->root
= memdb_tree_entry_new("");
509 memdb
->root
->data
.entries
= g_hash_table_new(g_str_hash
, g_str_equal
);
510 memdb
->root
->type
= DT_DIR
;
512 memdb
->index
= g_hash_table_new_full(g_int64_hash
, g_int64_equal
, NULL
,
513 (GDestroyNotify
)memdb_tree_entry_free
);
515 g_hash_table_replace(memdb
->index
, &memdb
->root
->inode
, memdb
->root
);
517 memdb
->locks
= g_hash_table_new_full(g_str_hash
, g_str_equal
, NULL
,
518 (GDestroyNotify
)memdb_lock_info_free
);
520 if (!(memdb
->bdb
= bdb_backend_open(dbfilename
, memdb
->root
, memdb
->index
))) {
525 record_memdb_reload();
527 if (!memdb_recreate_vmlist(memdb
)) {
532 memdb_update_locks(memdb
);
534 cfs_debug("memdb open '%s' successful (version = %016" PRIX64
")",
535 dbfilename
, memdb
->root
->version
);
541 memdb_close(memdb_t
*memdb
)
543 g_return_if_fail(memdb
!= NULL
);
545 g_mutex_lock (&memdb
->mutex
);
548 bdb_backend_close(memdb
->bdb
);
551 g_hash_table_destroy(memdb
->index
);
554 g_hash_table_destroy(memdb
->locks
);
556 if (memdb
->dbfilename
)
557 g_free(memdb
->dbfilename
);
561 memdb
->dbfilename
= NULL
;
563 g_mutex_unlock (&memdb
->mutex
);
565 g_mutex_clear (&memdb
->mutex
);
576 g_return_val_if_fail(memdb
!= NULL
, -EINVAL
);
577 g_return_val_if_fail(path
!= NULL
, -EINVAL
);
581 char *dirname
= NULL
;
584 g_mutex_lock (&memdb
->mutex
);
591 split_path(path
, &dirname
, &base
);
593 memdb_tree_entry_t
*parent
, *unused
;
595 if (!(parent
= memdb_lookup_path(memdb
, dirname
, &unused
))) {
600 if (parent
->type
!= DT_DIR
) {
605 /* do not allow '.' and '..' */
606 if ((base
[0] == 0) ||
607 (base
[0] == '.' && base
[1] == 0) ||
608 (base
[0] == '.' && base
[1] == '.' && base
[2] == 0)) {
613 memdb_tree_entry_t
*te
;
614 if ((te
= memdb_lookup_dir_entry(memdb
, base
, parent
))) {
619 memdb
->root
->version
++;
620 memdb
->root
->mtime
= mtime
;
621 memdb
->root
->writer
= writer
;
623 te
= memdb_tree_entry_new(base
);
624 te
->parent
= parent
->inode
;
625 te
->data
.entries
= g_hash_table_new(g_str_hash
, g_str_equal
);
626 te
->inode
= te
->version
= memdb
->root
->version
;
631 g_hash_table_replace(parent
->data
.entries
, te
->name
, te
);
632 g_hash_table_replace(memdb
->index
, &te
->inode
, te
);
634 cfs_debug("memdb_mkdir %s %s %016" PRIX64
, dirname
, base
, memdb
->root
->version
);
636 if (bdb_backend_write(memdb
->bdb
, te
->inode
, te
->parent
, te
->version
,
637 te
->writer
, te
->mtime
, 0, DT_DIR
, te
->name
, NULL
, 0)) {
643 if (strcmp(dirname
, "priv/lock") == 0) {
644 g_hash_table_remove(memdb
->locks
, path
);
646 if (memdb_tree_entry_csum(te
, csum
)) {
647 memdb_lock_expired(memdb
, path
, csum
); // insert a new entry
654 g_mutex_unlock (&memdb
->mutex
);
668 g_return_val_if_fail(memdb
!= NULL
, -EINVAL
);
669 g_return_val_if_fail(path
!= NULL
, -EINVAL
);
670 g_return_val_if_fail(data_ret
!= NULL
, -EINVAL
);
672 memdb_tree_entry_t
*te
, *parent
;
674 g_mutex_lock (&memdb
->mutex
);
676 if ((te
= memdb_lookup_path(memdb
, path
, &parent
))) {
677 if (te
->type
== DT_REG
) {
678 *data_ret
= g_memdup(te
->data
.value
, te
->size
);
679 guint32 size
= te
->size
;
680 g_mutex_unlock (&memdb
->mutex
);
685 g_mutex_unlock (&memdb
->mutex
);
702 g_return_val_if_fail(memdb
!= NULL
, -EINVAL
);
703 g_return_val_if_fail(path
!= NULL
, -EINVAL
);
704 g_return_val_if_fail(count
== 0 || data
!= NULL
, -EINVAL
);
708 char *dirname
= NULL
;
710 char *nodename
= NULL
;
712 g_mutex_lock (&memdb
->mutex
);
719 if ((offset
+ count
) > MEMDB_MAX_FILE_SIZE
) {
724 split_path(path
, &dirname
, &base
);
726 memdb_tree_entry_t
*parent
, *unused
;
727 if (!(parent
= memdb_lookup_path(memdb
, dirname
, &unused
))) {
731 if (parent
->type
!= DT_DIR
) {
736 /* do not allow '.' and '..' */
737 if ((base
[0] == 0) ||
738 (base
[0] == '.' && base
[1] == 0) ||
739 (base
[0] == '.' && base
[1] == '.' && base
[2] == 0)) {
747 if ((nodename
= path_contain_vm_config(path
, &vmtype
, &vmid
))) {
748 if (vmlist_different_vm_exists(vmtype
, vmid
, nodename
)) {
754 gpointer olddata
= NULL
;
756 memdb_tree_entry_t
*te
, *old
;
757 if ((old
= te
= memdb_lookup_dir_entry(memdb
, base
, parent
))) {
758 if (te
->type
!= DT_REG
) {
768 memdb
->root
->version
++;
769 memdb
->root
->mtime
= mtime
;
770 memdb
->root
->writer
= writer
;
772 olddata
= te
->data
.value
;
780 memdb
->root
->version
++;
781 memdb
->root
->mtime
= mtime
;
782 memdb
->root
->writer
= writer
;
784 te
= memdb_tree_entry_new(base
);
785 te
->parent
= parent
->inode
;
787 te
->inode
= memdb
->root
->version
;
790 te
->version
= memdb
->root
->version
;
794 size_t newsize
= offset
+ count
;
796 gpointer newdata
= NULL
;
800 if (newsize
> te
->size
) {
801 newdata
= g_malloc0(newsize
);
802 memcpy(newdata
, olddata
, te
->size
);
809 newdata
= g_malloc0(newsize
);
810 memcpy(newdata
, olddata
, newsize
);
814 memcpy(newdata
+ offset
, data
, count
);
819 newdata
= g_malloc0(newsize
);
820 memcpy(newdata
+ offset
, data
, count
);
825 te
->data
.value
= newdata
;
830 g_hash_table_replace(parent
->data
.entries
, te
->name
, te
);
831 g_hash_table_replace(memdb
->index
, &te
->inode
, te
);
834 record_memdb_change(path
);
836 cfs_debug("memdb_pwrite %s %s %016" PRIX64
" %016" PRIX64
, dirname
, te
->name
, te
->inode
, te
->version
);
838 if (bdb_backend_write(memdb
->bdb
, te
->inode
, te
->parent
, te
->version
,
839 te
->writer
, te
->mtime
, te
->size
, te
->type
, te
->name
,
840 te
->data
.value
, 0)) {
847 vmlist_register_vm(vmtype
, vmid
, nodename
);
852 g_mutex_unlock (&memdb
->mutex
);
868 g_return_val_if_fail(memdb
!= NULL
, -EINVAL
);
869 g_return_val_if_fail(path
!= NULL
, -EINVAL
);
873 char *dirname
= NULL
;
876 g_mutex_lock (&memdb
->mutex
);
883 split_path(path
, &dirname
, &base
);
885 memdb_tree_entry_t
*parent
, *unused
;
886 if (!(parent
= memdb_lookup_path(memdb
, dirname
, &unused
))) {
890 if (parent
->type
!= DT_DIR
) {
895 /* do not allow '.' and '..' */
896 if ((base
[0] == 0) ||
897 (base
[0] == '.' && base
[1] == 0) ||
898 (base
[0] == '.' && base
[1] == '.' && base
[2] == 0)) {
903 memdb_tree_entry_t
*te
;
904 if (!(te
= memdb_lookup_dir_entry(memdb
, base
, parent
))) {
909 int is_lock
= (strcmp(dirname
, "priv/lock") == 0) && (te
->type
== DT_DIR
);
911 /* NOTE: we use utime(0,0) to trigger 'unlock', so we do not
912 * allow to change mtime for locks (only if mtime is newer).
913 * See README for details about locks.
916 if (mtime
< te
->mtime
) {
917 cfs_debug("dir is locked");
921 /* only allow lock updates if the writer is the same */
922 if (te
->writer
!= writer
) {
929 memdb
->root
->version
++;
930 memdb
->root
->mtime
= mtime
;
931 memdb
->root
->writer
= writer
;
933 te
->version
= memdb
->root
->version
;
937 record_memdb_change(path
);
939 cfs_debug("memdb_mtime %s %s %016" PRIX64
" %016" PRIX64
, dirname
, te
->name
, te
->inode
, te
->version
);
941 if (bdb_backend_write(memdb
->bdb
, te
->inode
, te
->parent
, te
->version
,
942 te
->writer
, te
->mtime
, te
->size
, te
->type
, te
->name
,
943 te
->data
.value
, 0)) {
950 cfs_debug("update cfs lock");
951 g_hash_table_remove(memdb
->locks
, path
);
953 if (memdb_tree_entry_csum(te
, csum
)) {
954 memdb_lock_expired(memdb
, path
, csum
); // insert a new entry
961 g_mutex_unlock (&memdb
->mutex
);
976 return memdb_pwrite(memdb
, path
, writer
, mtime
, NULL
, 0, 0, FALSE
, TRUE
);
990 return memdb_pwrite(memdb
, path
, writer
, mtime
, data
, count
, offset
, truncate
, FALSE
);
998 memdb_tree_entry_t
*te
, *parent
;
1000 g_mutex_lock (&memdb
->mutex
);
1002 if ((te
= memdb_lookup_path(memdb
, path
, &parent
))) {
1004 memdb_tree_entry_t
*cpy
= memdb_tree_entry_copy(te
, 0);
1006 g_mutex_unlock (&memdb
->mutex
);
1011 g_mutex_unlock (&memdb
->mutex
);
1021 g_return_val_if_fail(memdb
!= NULL
, NULL
);
1022 g_return_val_if_fail(path
!= NULL
, NULL
);
1024 memdb_tree_entry_t
*te
, *parent
;
1028 g_mutex_lock (&memdb
->mutex
);
1030 if (!(te
= memdb_lookup_path(memdb
, path
, &parent
)))
1033 if (te
->type
!= DT_DIR
)
1036 GHashTable
*ht
= te
->data
.entries
;
1038 GHashTableIter iter
;
1039 gpointer key
, value
;
1041 g_hash_table_iter_init (&iter
, ht
);
1043 while (g_hash_table_iter_next (&iter
, &key
, &value
)) {
1045 te
= (memdb_tree_entry_t
*)value
;
1047 memdb_tree_entry_t
*cpy
= memdb_tree_entry_copy(te
, 0);
1049 list
= g_list_append(list
, cpy
);
1053 g_mutex_unlock (&memdb
->mutex
);
1059 memdb_dirlist_free(GList
*dirlist
)
1071 g_list_free(dirlist
);
1077 memdb_tree_entry_t
*parent
,
1078 memdb_tree_entry_t
*te
)
1080 g_return_val_if_fail(parent
!= NULL
, -EACCES
);
1081 g_return_val_if_fail(parent
->inode
== te
->parent
, -EACCES
);
1083 if (te
->type
== DT_DIR
)
1084 if (g_hash_table_size(te
->data
.entries
))
1087 if (!g_hash_table_steal(parent
->data
.entries
, te
->name
)) {
1088 cfs_critical("internal error - can't delete entry");
1093 if (!g_hash_table_steal(memdb
->index
, &te
->inode
)) {
1094 cfs_critical("internal error - can't delete entry");
1112 char *nodename
= NULL
;
1113 char *dirname
= NULL
;
1117 guint32 from_vmid
= 0;
1119 int from_vmtype
= 0;
1120 char *from_node
= NULL
;
1122 g_mutex_lock (&memdb
->mutex
);
1124 if (memdb
->errors
) {
1129 memdb_tree_entry_t
*from_te
, *from_parent
;
1130 memdb_tree_entry_t
*to_te
, *to_parent
;
1131 memdb_tree_entry_t
*target_te
, *target_parent
;
1133 guint64 delete_inode
= 0;
1135 if (!(from_te
= memdb_lookup_path(memdb
, from
, &from_parent
))) {
1140 if (!from_parent
) { /* can't rename root */
1145 from_node
= path_contain_vm_config(from
, &from_vmtype
, &from_vmid
);
1147 if (from_te
->type
== DT_REG
&& (nodename
= path_contain_vm_config(to
, &vmtype
, &vmid
))) {
1148 if (vmlist_different_vm_exists(vmtype
, vmid
, nodename
)) {
1149 if (!(from_node
&& vmid
== from_vmid
)) {
1156 /* we do not allow rename for locks */
1157 if (from_te
->type
== DT_DIR
&& path_is_lockdir(from
)) {
1162 if ((to_te
= memdb_lookup_path(memdb
, to
, &to_parent
))) {
1164 if ((ret
= unlink_tree_entry(memdb
, to_parent
, to_te
)) != 0)
1167 base
= strdup(to_te
->name
);
1169 delete_inode
= to_te
->inode
;
1171 target_te
= to_parent
;
1173 memdb_tree_entry_free(to_te
);
1177 split_path(to
, &dirname
, &base
);
1179 if (!(target_te
= memdb_lookup_path(memdb
, dirname
, &target_parent
))) {
1184 if (target_te
->type
!= DT_DIR
) {
1190 record_memdb_change(from
);
1191 record_memdb_change(to
);
1193 /* NOTE: unlink_tree_entry() make sure that we can only
1194 rename emtpy directories */
1196 if ((ret
= unlink_tree_entry(memdb
, from_parent
, from_te
)) != 0)
1199 memdb
->root
->version
++;
1200 memdb
->root
->mtime
= mtime
;
1201 memdb
->root
->writer
= writer
;
1203 memdb_tree_entry_t
*new = memdb_tree_entry_new(base
);
1204 new->parent
= target_te
->inode
;
1205 new->inode
= from_te
->inode
;
1206 new->version
= memdb
->root
->version
;
1207 new->writer
= writer
;
1209 new->size
= from_te
->size
;
1210 new->type
= from_te
->type
;
1211 new->data
= from_te
->data
;
1215 g_hash_table_replace(target_te
->data
.entries
, new->name
, new);
1216 g_hash_table_replace(memdb
->index
, &new->inode
, new);
1218 if (bdb_backend_write(memdb
->bdb
, new->inode
, new->parent
,
1219 new->version
, new->writer
, new->mtime
,
1220 new->size
, new->type
, new->name
,
1221 new->data
.value
, delete_inode
)) {
1227 if (new->type
== DT_REG
) {
1230 vmlist_delete_vm(from_vmid
);
1233 vmlist_register_vm(vmtype
, vmid
, nodename
);
1235 } else if (new->type
== DT_DIR
) {
1236 /* directories are alwayse empty (see unlink_tree_entry) */
1242 g_mutex_unlock (&memdb
->mutex
);
1259 memdb_tree_entry_t
*te
, *parent
;
1261 g_mutex_lock (&memdb
->mutex
);
1265 if (memdb
->errors
) {
1270 if (!(te
= memdb_lookup_path(memdb
, path
, &parent
))) {
1275 if (!parent
) { /* cant remove root */
1280 if (te
->type
== DT_DIR
) {
1281 if (g_hash_table_size(te
->data
.entries
)) {
1286 g_hash_table_remove(memdb
->locks
, path
);
1289 record_memdb_change(path
);
1291 if ((ret
= unlink_tree_entry(memdb
, parent
, te
)) != 0)
1294 memdb
->root
->version
++;
1295 memdb
->root
->mtime
= mtime
;
1296 memdb
->root
->writer
= writer
;
1298 if (bdb_backend_write(memdb
->bdb
, 0, 0, memdb
->root
->version
, writer
, mtime
, 0,
1299 DT_REG
, NULL
, NULL
, te
->inode
)) {
1301 memdb_tree_entry_free(te
);
1306 memdb_tree_entry_free(te
);
1311 if ((nodename
= path_contain_vm_config(path
, &vmtype
, &vmid
))) {
1313 vmlist_delete_vm(vmid
);
1319 g_mutex_unlock (&memdb
->mutex
);
1327 struct statvfs
*stbuf
)
1329 g_return_val_if_fail(memdb
!= NULL
, -EINVAL
);
1330 g_return_val_if_fail(stbuf
!= NULL
, -EINVAL
);
1332 g_mutex_lock (&memdb
->mutex
);
1334 GHashTableIter iter
;
1335 gpointer key
, value
;
1340 g_hash_table_iter_init (&iter
, memdb
->index
);
1342 while (g_hash_table_iter_next (&iter
, &key
, &value
)) {
1343 memdb_tree_entry_t
*te
= (memdb_tree_entry_t
*)value
;
1348 g_mutex_unlock (&memdb
->mutex
);
1350 stbuf
->f_bsize
= MEMDB_BLOCKSIZE
;
1351 stbuf
->f_blocks
= MEMDB_BLOCKS
;
1352 stbuf
->f_bfree
= stbuf
->f_bavail
= stbuf
->f_blocks
-
1353 ((size
+ stbuf
->f_bsize
- 1)/stbuf
->f_bsize
);
1354 stbuf
->f_files
= MEMDB_MAX_INODES
;
1355 stbuf
->f_ffree
= stbuf
->f_files
- files
;
1357 stbuf
->f_namemax
= 256;
1363 tree_entry_debug(memdb_tree_entry_t
*te
)
1365 g_return_if_fail(te
!= NULL
);
1367 // same as tree_entry_print(), but use cfs_debug() instead of g_print()
1369 cfs_debug("%016" PRIX64
" %c %016" PRIX64
" %016" PRIX64
" %08X %08X %08X %s\n",
1370 te
->inode
, te
->type
== DT_DIR
? 'D' : 'R', te
->parent
, te
->version
,
1371 te
->writer
, te
->mtime
, te
->size
, te
->name
);
1375 tree_entry_print(memdb_tree_entry_t
*te
)
1377 g_return_if_fail(te
!= NULL
);
1379 g_print("%016" PRIX64
" %c %016" PRIX64
" %016" PRIX64
" %08X %08X %08X %s\n",
1380 te
->inode
, te
->type
== DT_DIR
? 'D' : 'R', te
->parent
, te
->version
,
1381 te
->writer
, te
->mtime
, te
->size
, te
->name
);
1385 memdb_dump(memdb_t
*memdb
)
1387 g_return_if_fail(memdb
!= NULL
);
1389 g_mutex_lock (&memdb
->mutex
);
1391 GList
*list
= g_hash_table_get_values(memdb
->index
);
1393 list
= g_list_sort(list
, memdb_tree_compare
);
1395 g_print("%16s %c %16s %16s %8s %8s %8s %s\n",
1396 "INODE", 'T', "PARENT", "VERSION", "WRITER", "MTIME", "SIZE", "NAME");
1400 memdb_tree_entry_t
*te
= (memdb_tree_entry_t
*)l
->data
;
1402 tree_entry_print(te
);
1409 g_mutex_unlock (&memdb
->mutex
);
1413 memdb_dump_index (memdb_index_t
*idx
)
1415 g_return_if_fail(idx
!= NULL
);
1417 g_print ("INDEX DUMP %016" PRIX64
"\n", idx
->version
);
1420 for (i
= 0; i
< idx
->size
; i
++) {
1421 g_print ("%016" PRIX64
" %016" PRIX64
"%016" PRIX64
"%016" PRIX64
"%016" PRIX64
"\n", idx
->entries
[i
].inode
,
1422 *((guint64
*)idx
->entries
[i
].digest
),
1423 *((guint64
*)(idx
->entries
[i
].digest
+ 8)),
1424 *((guint64
*)(idx
->entries
[i
].digest
+ 16)),
1425 *((guint64
*)(idx
->entries
[i
].digest
+ 24)));
1430 memdb_index_copy(memdb_index_t
*idx
)
1432 g_return_val_if_fail(idx
!= NULL
, NULL
);
1434 int bytes
= sizeof(memdb_index_t
) + idx
->size
*sizeof(memdb_index_extry_t
);
1435 if (idx
->bytes
!= bytes
) {
1436 cfs_critical("memdb index contains wrong number of bytes");
1440 memdb_index_t
*copy
= (memdb_index_t
*)g_memdup(idx
, bytes
);
1446 memdb_tree_entry_csum(
1447 memdb_tree_entry_t
*te
,
1450 g_return_val_if_fail(te
!= NULL
, FALSE
);
1451 g_return_val_if_fail(csum
!= NULL
, FALSE
);
1453 GChecksum
*sha256
= g_checksum_new(G_CHECKSUM_SHA256
);
1455 g_checksum_update(sha256
, (unsigned char*)&te
->inode
, sizeof(te
->inode
));
1456 g_checksum_update(sha256
, (unsigned char*)&te
->version
, sizeof(te
->version
));
1457 g_checksum_update(sha256
, (unsigned char*)&te
->writer
, sizeof(te
->writer
));
1458 g_checksum_update(sha256
, (unsigned char*)&te
->mtime
, sizeof(te
->mtime
));
1459 g_checksum_update(sha256
, (unsigned char*)&te
->size
, sizeof(te
->size
));
1460 g_checksum_update(sha256
, (unsigned char*)&te
->type
, sizeof(te
->type
));
1461 g_checksum_update(sha256
, (unsigned char*)&te
->parent
, sizeof(te
->parent
));
1462 g_checksum_update(sha256
, (unsigned char*)te
->name
, strlen(te
->name
));
1464 if (te
->type
== DT_REG
&& te
->size
)
1465 g_checksum_update(sha256
, (unsigned char*)te
->data
.value
, te
->size
);
1467 size_t csum_len
= 32;
1468 g_checksum_get_digest(sha256
, csum
, &csum_len
);
1469 g_checksum_free(sha256
);
1475 memdb_compute_checksum(
1477 memdb_tree_entry_t
*root
,
1481 g_return_val_if_fail(index
!= NULL
, FALSE
);
1482 g_return_val_if_fail(root
!= NULL
, FALSE
);
1484 GChecksum
*sha256
= g_checksum_new(G_CHECKSUM_SHA256
);
1486 GList
*list
= g_hash_table_get_values(index
);
1488 list
= g_list_sort(list
, memdb_tree_compare
);
1492 memdb_tree_entry_t
*te
= (memdb_tree_entry_t
*)l
->data
;
1494 g_checksum_update(sha256
, (unsigned char*)&te
->inode
, sizeof(te
->inode
));
1495 g_checksum_update(sha256
, (unsigned char*)&te
->version
, sizeof(te
->version
));
1496 g_checksum_update(sha256
, (unsigned char*)&te
->writer
, sizeof(te
->writer
));
1497 g_checksum_update(sha256
, (unsigned char*)&te
->mtime
, sizeof(te
->mtime
));
1498 g_checksum_update(sha256
, (unsigned char*)&te
->size
, sizeof(te
->size
));
1499 g_checksum_update(sha256
, (unsigned char*)&te
->type
, sizeof(te
->type
));
1500 g_checksum_update(sha256
, (unsigned char*)&te
->parent
, sizeof(te
->parent
));
1501 g_checksum_update(sha256
, (unsigned char*)te
->name
, strlen(te
->name
));
1503 if (te
->type
== DT_REG
&& te
->size
)
1504 g_checksum_update(sha256
, (unsigned char*)te
->data
.value
, te
->size
);
1511 g_checksum_get_digest(sha256
, csum
, &csum_len
);
1513 cfs_debug("checksum: %s", g_checksum_get_string(sha256
));
1515 g_checksum_free(sha256
);
1523 memdb_tree_entry_t
*root
)
1525 g_return_val_if_fail(index
!= NULL
, NULL
);
1526 g_return_val_if_fail(root
!= NULL
, NULL
);
1528 memdb_index_t
*idx
= NULL
;
1530 int count
= g_hash_table_size(index
);
1532 cfs_critical("memdb index has no entires");
1536 int bytes
= sizeof(memdb_index_t
) + count
*sizeof(memdb_index_extry_t
);
1537 idx
= g_malloc0(bytes
);
1541 idx
->version
= root
->version
;
1542 idx
->mtime
= root
->mtime
;
1543 idx
->writer
= root
->writer
;
1545 GChecksum
*sha256
= g_checksum_new(G_CHECKSUM_SHA256
);
1547 GList
*list
= g_hash_table_get_values(index
);
1549 list
= g_list_sort(list
, memdb_tree_compare
);
1554 memdb_tree_entry_t
*te
= (memdb_tree_entry_t
*)l
->data
;
1556 if (te
->inode
> idx
->last_inode
)
1557 idx
->last_inode
= te
->inode
;
1559 idx
->entries
[ind
].inode
= te
->inode
;
1561 g_checksum_reset (sha256
);
1563 g_checksum_update(sha256
, (unsigned char*)&te
->version
, sizeof(te
->version
));
1564 g_checksum_update(sha256
, (unsigned char*)&te
->writer
, sizeof(te
->writer
));
1565 g_checksum_update(sha256
, (unsigned char*)&te
->mtime
, sizeof(te
->mtime
));
1566 g_checksum_update(sha256
, (unsigned char*)&te
->size
, sizeof(te
->size
));
1567 g_checksum_update(sha256
, (unsigned char*)&te
->type
, sizeof(te
->type
));
1568 g_checksum_update(sha256
, (unsigned char*)&te
->parent
, sizeof(te
->parent
));
1569 g_checksum_update(sha256
, (unsigned char*)te
->name
, strlen(te
->name
));
1571 if (te
->type
== DT_REG
&& te
->size
)
1572 g_checksum_update(sha256
, (unsigned char*)te
->data
.value
, te
->size
);
1575 g_checksum_get_digest(sha256
, (guint8
*)idx
->entries
[ind
].digest
, &len
);
1584 g_checksum_free(sha256
);