]>
git.proxmox.com Git - pve-cluster.git/blob - data/src/memdb.c
56044b4b1a1bfecb9a84012265c8a90e559f87f5
2 Copyright (C) 2010 Proxmox Server Solutions GmbH
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU Affero General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU Affero General Public License for more details.
14 You should have received a copy of the GNU Affero General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>.
17 Author: Dietmar Maurer <dietmar@proxmox.com>
23 #endif /* HAVE_CONFIG_H */
28 #include <sys/types.h>
38 #include "cfs-utils.h"
42 #define CFS_LOCK_TIMEOUT (60*2)
48 g_return_val_if_fail(name
!= NULL
, NULL
);
50 memdb_tree_entry_t
*te
= g_malloc0(sizeof(memdb_tree_entry_t
) + strlen(name
) + 1);
51 g_return_val_if_fail(te
!= NULL
, NULL
);
53 strcpy(te
->name
, name
);
59 memdb_tree_entry_copy(
60 memdb_tree_entry_t
*te
,
63 g_return_val_if_fail(te
!= NULL
, NULL
);
65 memdb_tree_entry_t
*cpy
= memdb_tree_entry_new(te
->name
);
67 cpy
->parent
= te
->parent
;
68 cpy
->inode
= te
->inode
;
69 cpy
->version
= te
->version
;
70 cpy
->writer
= te
->writer
;
71 cpy
->mtime
= te
->mtime
;
75 if (with_data
&& te
->size
&& te
->type
== DT_REG
) {
76 cpy
->data
.value
= g_memdup(te
->data
.value
, te
->size
);
78 cpy
->data
.value
= NULL
;
85 memdb_tree_entry_free(
86 memdb_tree_entry_t
*te
)
88 g_return_if_fail(te
!= NULL
);
90 if (te
->type
== DT_REG
) {
92 g_free(te
->data
.value
);
95 if (te
->type
== DT_DIR
) {
97 g_hash_table_destroy(te
->data
.entries
);
104 memdb_lock_info_free(memdb_lock_info_t
*li
)
106 g_return_if_fail(li
!= NULL
);
119 guint64 a
= ((const memdb_tree_entry_t
*)v1
)->inode
;
120 guint64 b
= ((const memdb_tree_entry_t
*)v2
)->inode
;
137 char *dup
= g_strdup (path
);
138 int len
= strlen (dup
) - 1;
139 while (len
>= 0 && dup
[len
] == '/') dup
[len
--] = 0;
141 char *dn
= g_path_get_dirname (dup
);
142 char *bn
= g_path_get_basename (dup
);
150 static memdb_tree_entry_t
*
151 memdb_lookup_dir_entry(
154 memdb_tree_entry_t
*parent
)
157 g_return_val_if_fail(memdb
!= NULL
, NULL
);
158 g_return_val_if_fail(name
!= NULL
, NULL
);
159 g_return_val_if_fail(parent
!= NULL
, NULL
);
160 g_return_val_if_fail(parent
->type
== DT_DIR
, NULL
);
162 GHashTable
*ht
= parent
->data
.entries
;
164 g_return_val_if_fail(ht
!= NULL
, NULL
);
166 return g_hash_table_lookup(ht
, name
);
169 static memdb_tree_entry_t
*
173 memdb_tree_entry_t
**parent
)
175 g_return_val_if_fail(memdb
!= NULL
, NULL
);
176 g_return_val_if_fail(path
!= NULL
, NULL
);
177 g_return_val_if_fail(parent
!= NULL
, NULL
);
179 memdb_tree_entry_t
*cdir
= memdb
->root
;
182 if (path
[0] == 0 || ((path
[0] == '.' || path
[0] == '/') && path
[1] == 0))
185 gchar
**set
= g_strsplit_set(path
, "/", 0);
190 while ((name
= set
[i
++])) {
192 if (name
[0] == 0) continue;
195 if ((cdir
= memdb_lookup_dir_entry(memdb
, name
, cdir
)) == NULL
)
210 if (!name
|| name
[0] < '1' || name
[0] > '9')
215 errno
= 0; /* see man strtoul */
217 unsigned long int vmid
= strtoul(name
, &end
, 10);
219 if (!end
|| end
[0] != '.' || end
[1] != 'c'|| end
[2] != 'o' || end
[3] != 'n' ||
220 end
[4] != 'f' || end
[5] != 0 || errno
!= 0 || vmid
> G_MAXUINT32
)
224 *vmid_ret
= (guint32
)vmid
;
231 const char *nodename
)
233 g_return_val_if_fail(nodename
!= NULL
, FALSE
);
235 /* LDH rule (letters, digits, hyphen) */
237 int len
= strlen(nodename
);
243 for (int i
= 0; i
< len
; i
++) {
244 char c
= nodename
[i
];
245 if ((c
>= 'A' && c
<= 'Z') ||
246 (c
>= 'a' && c
<= 'z') ||
247 (c
>= '0' && c
<= '9') ||
248 (i
!= 0 && i
!= (len
-1) && c
== '-'))
257 dir_contain_vm_config(
264 if (strncmp(dirname
, "nodes/", 6) != 0)
269 char *nodename
= NULL
;
271 char **sa
= g_strsplit(dirname
, "/", 2);
272 if (sa
[0] && sa
[1] && valid_nodename(sa
[0])) {
273 if (strcmp(sa
[1], "qemu-server") == 0) {
274 *vmtype_ret
= VMTYPE_QEMU
;
275 nodename
= g_strdup(sa
[0]);
276 } else if (strcmp(sa
[1], "openvz") == 0) {
277 *vmtype_ret
= VMTYPE_OPENVZ
;
278 nodename
= g_strdup(sa
[0]);
279 } else if (strcmp(sa
[1], "lxc") == 0) {
280 *vmtype_ret
= VMTYPE_LXC
;
281 nodename
= g_strdup(sa
[0]);
291 path_contain_vm_config(
299 char *dirname
= NULL
;
301 char *nodename
= NULL
;
303 split_path(path
, &dirname
, &base
);
305 if (name_is_vm_config(base
, vmid_ret
))
306 nodename
= dir_contain_vm_config(dirname
, vmtype_ret
);
318 const char *nodename
,
320 memdb_tree_entry_t
*subdir
)
322 g_return_val_if_fail(memdb
!= NULL
, FALSE
);
323 g_return_val_if_fail(vmlist
!= NULL
, FALSE
);
324 g_return_val_if_fail(subdir
!= NULL
, FALSE
);
325 g_return_val_if_fail(subdir
->type
== DT_DIR
, FALSE
);
326 g_return_val_if_fail(subdir
->data
.entries
!= NULL
, FALSE
);
330 GHashTable
*ht
= subdir
->data
.entries
;
334 g_hash_table_iter_init (&iter
, ht
);
336 while (g_hash_table_iter_next (&iter
, &key
, &value
)) {
338 memdb_tree_entry_t
*node_te
= (memdb_tree_entry_t
*)value
;
340 if (node_te
->type
!= DT_REG
)
344 if (!name_is_vm_config(node_te
->name
, &vmid
))
347 if (!vmlist_hash_insert_vm(vmlist
, vmtype
, vmid
, nodename
, FALSE
))
359 const guchar csum
[32])
361 g_return_val_if_fail(memdb
!= NULL
, FALSE
);
362 g_return_val_if_fail(memdb
->locks
!= NULL
, FALSE
);
363 g_return_val_if_fail(path
!= NULL
, FALSE
);
364 g_return_val_if_fail(csum
!= NULL
, FALSE
);
366 memdb_lock_info_t
*li
;
367 uint32_t ctime
= time(NULL
);
369 if ((li
= g_hash_table_lookup(memdb
->locks
, path
))) {
370 if (memcmp(csum
, li
->csum
, 32) != 0) {
372 memcpy(li
->csum
, csum
, 32);
373 g_critical("wrong lock csum - reset timeout");
376 if ((ctime
> li
->ltime
) && ((ctime
- li
->ltime
) > CFS_LOCK_TIMEOUT
))
379 li
= g_new0(memdb_lock_info_t
, 1);
380 li
->path
= g_strdup(path
);
382 memcpy(li
->csum
, csum
, 32);
383 g_hash_table_replace(memdb
->locks
, li
->path
, li
);
390 memdb_update_locks(memdb_t
*memdb
)
392 g_return_if_fail(memdb
!= NULL
);
393 g_return_if_fail(memdb
->locks
!= NULL
);
395 memdb_tree_entry_t
*te
, *parent
;
397 if (!(te
= memdb_lookup_path(memdb
, "priv/lock", &parent
)))
400 if (te
->type
!= DT_DIR
)
404 GHashTable
*old
= memdb
->locks
;
405 memdb
->locks
= g_hash_table_new_full(g_str_hash
, g_str_equal
, NULL
,
406 (GDestroyNotify
)memdb_lock_info_free
);
408 GHashTable
*ht
= te
->data
.entries
;
412 g_hash_table_iter_init (&iter
, ht
);
413 while (g_hash_table_iter_next (&iter
, &key
, &value
)) {
415 memdb_tree_entry_t
*lock_te
= (memdb_tree_entry_t
*)value
;
416 if (lock_te
->type
!= DT_DIR
)
419 memdb_lock_info_t
*li
;
420 li
= g_new0(memdb_lock_info_t
, 1);
421 li
->path
= g_strdup_printf("priv/lock/%s", lock_te
->name
);
424 if (memdb_tree_entry_csum(lock_te
, csum
)) {
425 memcpy(li
->csum
, csum
, 32);
426 memdb_lock_info_t
*oldli
;
427 if ((oldli
= g_hash_table_lookup(memdb
->locks
, lock_te
->name
)) &&
428 (memcmp(csum
, oldli
->csum
, 32) == 0)) {
429 li
->ltime
= oldli
->ltime
;
431 li
->ltime
= time(NULL
);
433 g_hash_table_insert(memdb
->locks
, li
->path
, li
);
435 memdb_lock_info_free(li
);
440 g_hash_table_destroy(old
);
445 memdb_recreate_vmlist(
448 g_return_val_if_fail(memdb
!= NULL
, FALSE
);
450 memdb_tree_entry_t
*te
, *parent
;
452 if (!(te
= memdb_lookup_path(memdb
, "nodes", &parent
)))
455 if (te
->type
!= DT_DIR
)
458 GHashTable
*vmlist
= vmlist_hash_new();
460 GHashTable
*ht
= te
->data
.entries
;
467 g_hash_table_iter_init (&iter
, ht
);
469 while (g_hash_table_iter_next (&iter
, &key
, &value
)) {
471 memdb_tree_entry_t
*node_te
= (memdb_tree_entry_t
*)value
;
472 if (node_te
->type
!= DT_DIR
)
475 if (!valid_nodename(node_te
->name
))
478 if ((te
= g_hash_table_lookup(node_te
->data
.entries
, "qemu-server"))) {
479 if (!vmlist_add_dir(memdb
, vmlist
, node_te
->name
, VMTYPE_QEMU
, te
))
482 if ((te
= g_hash_table_lookup(node_te
->data
.entries
, "openvz"))) {
483 if (!vmlist_add_dir(memdb
, vmlist
, node_te
->name
, VMTYPE_OPENVZ
, te
))
486 if ((te
= g_hash_table_lookup(node_te
->data
.entries
, "lxc"))) {
487 if (!vmlist_add_dir(memdb
, vmlist
, node_te
->name
, VMTYPE_LXC
, te
))
492 /* always update list - even if we detected duplicates */
493 cfs_status_set_vmlist(vmlist
);
499 memdb_open(const char *dbfilename
)
501 memdb_t
*memdb
= g_new0(memdb_t
, 1);
503 g_mutex_init(&memdb
->mutex
);
505 memdb
->dbfilename
= g_strdup(dbfilename
);
507 memdb
->root
= memdb_tree_entry_new("");
508 memdb
->root
->data
.entries
= g_hash_table_new(g_str_hash
, g_str_equal
);
509 memdb
->root
->type
= DT_DIR
;
511 memdb
->index
= g_hash_table_new_full(g_int64_hash
, g_int64_equal
, NULL
,
512 (GDestroyNotify
)memdb_tree_entry_free
);
514 g_hash_table_replace(memdb
->index
, &memdb
->root
->inode
, memdb
->root
);
516 memdb
->locks
= g_hash_table_new_full(g_str_hash
, g_str_equal
, NULL
,
517 (GDestroyNotify
)memdb_lock_info_free
);
519 if (!(memdb
->bdb
= bdb_backend_open(dbfilename
, memdb
->root
, memdb
->index
))) {
524 record_memdb_reload();
526 if (!memdb_recreate_vmlist(memdb
)) {
531 memdb_update_locks(memdb
);
533 cfs_debug("memdb open '%s' successful (version = %016" PRIX64
")",
534 dbfilename
, memdb
->root
->version
);
540 memdb_close(memdb_t
*memdb
)
542 g_return_if_fail(memdb
!= NULL
);
544 g_mutex_lock (&memdb
->mutex
);
547 bdb_backend_close(memdb
->bdb
);
550 g_hash_table_destroy(memdb
->index
);
553 g_hash_table_destroy(memdb
->locks
);
555 if (memdb
->dbfilename
)
556 g_free(memdb
->dbfilename
);
560 memdb
->dbfilename
= NULL
;
562 g_mutex_unlock (&memdb
->mutex
);
564 g_mutex_clear (&memdb
->mutex
);
575 g_return_val_if_fail(memdb
!= NULL
, -EINVAL
);
576 g_return_val_if_fail(path
!= NULL
, -EINVAL
);
580 char *dirname
= NULL
;
583 g_mutex_lock (&memdb
->mutex
);
590 split_path(path
, &dirname
, &base
);
592 memdb_tree_entry_t
*parent
, *unused
;
594 if (!(parent
= memdb_lookup_path(memdb
, dirname
, &unused
))) {
599 if (parent
->type
!= DT_DIR
) {
604 /* do not allow '.' and '..' */
605 if ((base
[0] == 0) ||
606 (base
[0] == '.' && base
[1] == 0) ||
607 (base
[0] == '.' && base
[1] == '.' && base
[2] == 0)) {
612 memdb_tree_entry_t
*te
;
613 if ((te
= memdb_lookup_dir_entry(memdb
, base
, parent
))) {
618 memdb
->root
->version
++;
619 memdb
->root
->mtime
= mtime
;
620 memdb
->root
->writer
= writer
;
622 te
= memdb_tree_entry_new(base
);
623 te
->parent
= parent
->inode
;
624 te
->data
.entries
= g_hash_table_new(g_str_hash
, g_str_equal
);
625 te
->inode
= te
->version
= memdb
->root
->version
;
630 g_hash_table_replace(parent
->data
.entries
, te
->name
, te
);
631 g_hash_table_replace(memdb
->index
, &te
->inode
, te
);
633 cfs_debug("memdb_mkdir %s %s %016" PRIX64
, dirname
, base
, memdb
->root
->version
);
635 if (bdb_backend_write(memdb
->bdb
, te
->inode
, te
->parent
, te
->version
,
636 te
->writer
, te
->mtime
, 0, DT_DIR
, te
->name
, NULL
, 0)) {
642 if (strcmp(dirname
, "priv/lock") == 0) {
643 g_hash_table_remove(memdb
->locks
, path
);
645 if (memdb_tree_entry_csum(te
, csum
)) {
646 memdb_lock_expired(memdb
, path
, csum
); // insert a new entry
653 g_mutex_unlock (&memdb
->mutex
);
667 g_return_val_if_fail(memdb
!= NULL
, -EINVAL
);
668 g_return_val_if_fail(path
!= NULL
, -EINVAL
);
669 g_return_val_if_fail(data_ret
!= NULL
, -EINVAL
);
671 memdb_tree_entry_t
*te
, *parent
;
673 g_mutex_lock (&memdb
->mutex
);
675 if ((te
= memdb_lookup_path(memdb
, path
, &parent
))) {
676 if (te
->type
== DT_REG
) {
677 *data_ret
= g_memdup(te
->data
.value
, te
->size
);
678 guint32 size
= te
->size
;
679 g_mutex_unlock (&memdb
->mutex
);
684 g_mutex_unlock (&memdb
->mutex
);
701 g_return_val_if_fail(memdb
!= NULL
, -EINVAL
);
702 g_return_val_if_fail(path
!= NULL
, -EINVAL
);
703 g_return_val_if_fail(count
== 0 || data
!= NULL
, -EINVAL
);
707 char *dirname
= NULL
;
709 char *nodename
= NULL
;
711 g_mutex_lock (&memdb
->mutex
);
718 if ((offset
+ count
) > MEMDB_MAX_FILE_SIZE
) {
723 split_path(path
, &dirname
, &base
);
725 memdb_tree_entry_t
*parent
, *unused
;
726 if (!(parent
= memdb_lookup_path(memdb
, dirname
, &unused
))) {
730 if (parent
->type
!= DT_DIR
) {
735 /* do not allow '.' and '..' */
736 if ((base
[0] == 0) ||
737 (base
[0] == '.' && base
[1] == 0) ||
738 (base
[0] == '.' && base
[1] == '.' && base
[2] == 0)) {
746 if ((nodename
= path_contain_vm_config(path
, &vmtype
, &vmid
))) {
747 if (vmlist_different_vm_exists(vmtype
, vmid
, nodename
)) {
753 gpointer olddata
= NULL
;
755 memdb_tree_entry_t
*te
, *old
;
756 if ((old
= te
= memdb_lookup_dir_entry(memdb
, base
, parent
))) {
757 if (te
->type
!= DT_REG
) {
767 memdb
->root
->version
++;
768 memdb
->root
->mtime
= mtime
;
769 memdb
->root
->writer
= writer
;
771 olddata
= te
->data
.value
;
779 memdb
->root
->version
++;
780 memdb
->root
->mtime
= mtime
;
781 memdb
->root
->writer
= writer
;
783 te
= memdb_tree_entry_new(base
);
784 te
->parent
= parent
->inode
;
786 te
->inode
= memdb
->root
->version
;
789 te
->version
= memdb
->root
->version
;
793 size_t newsize
= offset
+ count
;
795 gpointer newdata
= NULL
;
799 if (newsize
> te
->size
) {
800 newdata
= g_malloc0(newsize
);
801 memcpy(newdata
, olddata
, te
->size
);
808 newdata
= g_malloc0(newsize
);
809 memcpy(newdata
, olddata
, newsize
);
813 memcpy(newdata
+ offset
, data
, count
);
818 newdata
= g_malloc0(newsize
);
819 memcpy(newdata
+ offset
, data
, count
);
824 te
->data
.value
= newdata
;
829 g_hash_table_replace(parent
->data
.entries
, te
->name
, te
);
830 g_hash_table_replace(memdb
->index
, &te
->inode
, te
);
833 record_memdb_change(path
);
835 cfs_debug("memdb_pwrite %s %s %016" PRIX64
" %016" PRIX64
, dirname
, te
->name
, te
->inode
, te
->version
);
837 if (bdb_backend_write(memdb
->bdb
, te
->inode
, te
->parent
, te
->version
,
838 te
->writer
, te
->mtime
, te
->size
, te
->type
, te
->name
,
839 te
->data
.value
, 0)) {
846 vmlist_register_vm(vmtype
, vmid
, nodename
);
851 g_mutex_unlock (&memdb
->mutex
);
867 g_return_val_if_fail(memdb
!= NULL
, -EINVAL
);
868 g_return_val_if_fail(path
!= NULL
, -EINVAL
);
872 char *dirname
= NULL
;
875 g_mutex_lock (&memdb
->mutex
);
882 split_path(path
, &dirname
, &base
);
884 memdb_tree_entry_t
*parent
, *unused
;
885 if (!(parent
= memdb_lookup_path(memdb
, dirname
, &unused
))) {
889 if (parent
->type
!= DT_DIR
) {
894 /* do not allow '.' and '..' */
895 if ((base
[0] == 0) ||
896 (base
[0] == '.' && base
[1] == 0) ||
897 (base
[0] == '.' && base
[1] == '.' && base
[2] == 0)) {
902 memdb_tree_entry_t
*te
;
903 if (!(te
= memdb_lookup_dir_entry(memdb
, base
, parent
))) {
908 int is_lock
= (strcmp(dirname
, "priv/lock") == 0) && (te
->type
== DT_DIR
);
910 /* NOTE: we use utime(0,0) to trigger 'unlock', so we do not
911 * allow to change mtime for locks (only if mtime is newer).
912 * See README for details about locks.
915 if (mtime
< te
->mtime
) {
916 cfs_debug("dir is locked");
920 /* only allow lock updates if the writer is the same */
921 if (te
->writer
!= writer
) {
928 memdb
->root
->version
++;
929 memdb
->root
->mtime
= mtime
;
930 memdb
->root
->writer
= writer
;
932 te
->version
= memdb
->root
->version
;
936 record_memdb_change(path
);
938 cfs_debug("memdb_mtime %s %s %016" PRIX64
" %016" PRIX64
, dirname
, te
->name
, te
->inode
, te
->version
);
940 if (bdb_backend_write(memdb
->bdb
, te
->inode
, te
->parent
, te
->version
,
941 te
->writer
, te
->mtime
, te
->size
, te
->type
, te
->name
,
942 te
->data
.value
, 0)) {
949 cfs_debug("update cfs lock");
950 g_hash_table_remove(memdb
->locks
, path
);
952 if (memdb_tree_entry_csum(te
, csum
)) {
953 memdb_lock_expired(memdb
, path
, csum
); // insert a new entry
960 g_mutex_unlock (&memdb
->mutex
);
975 return memdb_pwrite(memdb
, path
, writer
, mtime
, NULL
, 0, 0, FALSE
, TRUE
);
989 return memdb_pwrite(memdb
, path
, writer
, mtime
, data
, count
, offset
, truncate
, FALSE
);
997 memdb_tree_entry_t
*te
, *parent
;
999 g_mutex_lock (&memdb
->mutex
);
1001 if ((te
= memdb_lookup_path(memdb
, path
, &parent
))) {
1003 memdb_tree_entry_t
*cpy
= memdb_tree_entry_copy(te
, 0);
1005 g_mutex_unlock (&memdb
->mutex
);
1010 g_mutex_unlock (&memdb
->mutex
);
1020 g_return_val_if_fail(memdb
!= NULL
, NULL
);
1021 g_return_val_if_fail(path
!= NULL
, NULL
);
1023 memdb_tree_entry_t
*te
, *parent
;
1027 g_mutex_lock (&memdb
->mutex
);
1029 if (!(te
= memdb_lookup_path(memdb
, path
, &parent
)))
1032 if (te
->type
!= DT_DIR
)
1035 GHashTable
*ht
= te
->data
.entries
;
1037 GHashTableIter iter
;
1038 gpointer key
, value
;
1040 g_hash_table_iter_init (&iter
, ht
);
1042 while (g_hash_table_iter_next (&iter
, &key
, &value
)) {
1044 te
= (memdb_tree_entry_t
*)value
;
1046 memdb_tree_entry_t
*cpy
= memdb_tree_entry_copy(te
, 0);
1048 list
= g_list_append(list
, cpy
);
1052 g_mutex_unlock (&memdb
->mutex
);
1058 memdb_dirlist_free(GList
*dirlist
)
1070 g_list_free(dirlist
);
1076 memdb_tree_entry_t
*parent
,
1077 memdb_tree_entry_t
*te
)
1079 g_return_val_if_fail(parent
!= NULL
, -EACCES
);
1080 g_return_val_if_fail(parent
->inode
== te
->parent
, -EACCES
);
1082 if (te
->type
== DT_DIR
)
1083 if (g_hash_table_size(te
->data
.entries
))
1086 if (!g_hash_table_steal(parent
->data
.entries
, te
->name
)) {
1087 cfs_critical("internal error - can't delete entry");
1092 if (!g_hash_table_steal(memdb
->index
, &te
->inode
)) {
1093 cfs_critical("internal error - can't delete entry");
1111 char *nodename
= NULL
;
1112 char *dirname
= NULL
;
1116 guint32 from_vmid
= 0;
1118 int from_vmtype
= 0;
1119 char *from_node
= NULL
;
1121 g_mutex_lock (&memdb
->mutex
);
1123 if (memdb
->errors
) {
1128 memdb_tree_entry_t
*from_te
, *from_parent
;
1129 memdb_tree_entry_t
*to_te
, *to_parent
;
1130 memdb_tree_entry_t
*target_te
, *target_parent
;
1132 guint64 delete_inode
= 0;
1134 if (!(from_te
= memdb_lookup_path(memdb
, from
, &from_parent
))) {
1139 if (!from_parent
) { /* can't rename root */
1144 from_node
= path_contain_vm_config(from
, &from_vmtype
, &from_vmid
);
1146 if (from_te
->type
== DT_REG
&& (nodename
= path_contain_vm_config(to
, &vmtype
, &vmid
))) {
1147 if (vmlist_different_vm_exists(vmtype
, vmid
, nodename
)) {
1148 if (!(from_node
&& vmid
== from_vmid
)) {
1155 /* we do not allow rename for locks */
1156 if (from_te
->type
== DT_DIR
&& path_is_lockdir(from
)) {
1161 if ((to_te
= memdb_lookup_path(memdb
, to
, &to_parent
))) {
1163 if ((ret
= unlink_tree_entry(memdb
, to_parent
, to_te
)) != 0)
1166 base
= strdup(to_te
->name
);
1168 delete_inode
= to_te
->inode
;
1170 target_te
= to_parent
;
1172 memdb_tree_entry_free(to_te
);
1176 split_path(to
, &dirname
, &base
);
1178 if (!(target_te
= memdb_lookup_path(memdb
, dirname
, &target_parent
))) {
1183 if (target_te
->type
!= DT_DIR
) {
1189 record_memdb_change(from
);
1190 record_memdb_change(to
);
1192 /* NOTE: unlink_tree_entry() make sure that we can only
1193 rename emtpy directories */
1195 if ((ret
= unlink_tree_entry(memdb
, from_parent
, from_te
)) != 0)
1198 memdb
->root
->version
++;
1199 memdb
->root
->mtime
= mtime
;
1200 memdb
->root
->writer
= writer
;
1202 memdb_tree_entry_t
*new = memdb_tree_entry_new(base
);
1203 new->parent
= target_te
->inode
;
1204 new->inode
= from_te
->inode
;
1205 new->version
= memdb
->root
->version
;
1206 new->writer
= writer
;
1208 new->size
= from_te
->size
;
1209 new->type
= from_te
->type
;
1210 new->data
= from_te
->data
;
1214 g_hash_table_replace(target_te
->data
.entries
, new->name
, new);
1215 g_hash_table_replace(memdb
->index
, &new->inode
, new);
1217 if (bdb_backend_write(memdb
->bdb
, new->inode
, new->parent
,
1218 new->version
, new->writer
, new->mtime
,
1219 new->size
, new->type
, new->name
,
1220 new->data
.value
, delete_inode
)) {
1226 if (new->type
== DT_REG
) {
1229 vmlist_delete_vm(from_vmid
);
1232 vmlist_register_vm(vmtype
, vmid
, nodename
);
1234 } else if (new->type
== DT_DIR
) {
1235 /* directories are alwayse empty (see unlink_tree_entry) */
1241 g_mutex_unlock (&memdb
->mutex
);
1258 memdb_tree_entry_t
*te
, *parent
;
1260 g_mutex_lock (&memdb
->mutex
);
1264 if (memdb
->errors
) {
1269 if (!(te
= memdb_lookup_path(memdb
, path
, &parent
))) {
1274 if (!parent
) { /* cant remove root */
1279 if (te
->type
== DT_DIR
) {
1280 if (g_hash_table_size(te
->data
.entries
)) {
1285 g_hash_table_remove(memdb
->locks
, path
);
1288 record_memdb_change(path
);
1290 if ((ret
= unlink_tree_entry(memdb
, parent
, te
)) != 0)
1293 memdb
->root
->version
++;
1294 memdb
->root
->mtime
= mtime
;
1295 memdb
->root
->writer
= writer
;
1297 if (bdb_backend_write(memdb
->bdb
, 0, 0, memdb
->root
->version
, writer
, mtime
, 0,
1298 DT_REG
, NULL
, NULL
, te
->inode
)) {
1300 memdb_tree_entry_free(te
);
1305 memdb_tree_entry_free(te
);
1310 if ((nodename
= path_contain_vm_config(path
, &vmtype
, &vmid
))) {
1312 vmlist_delete_vm(vmid
);
1318 g_mutex_unlock (&memdb
->mutex
);
1326 struct statvfs
*stbuf
)
1328 g_return_val_if_fail(memdb
!= NULL
, -EINVAL
);
1329 g_return_val_if_fail(stbuf
!= NULL
, -EINVAL
);
1331 g_mutex_lock (&memdb
->mutex
);
1333 GHashTableIter iter
;
1334 gpointer key
, value
;
1339 g_hash_table_iter_init (&iter
, memdb
->index
);
1341 while (g_hash_table_iter_next (&iter
, &key
, &value
)) {
1342 memdb_tree_entry_t
*te
= (memdb_tree_entry_t
*)value
;
1347 g_mutex_unlock (&memdb
->mutex
);
1349 stbuf
->f_bsize
= MEMDB_BLOCKSIZE
;
1350 stbuf
->f_blocks
= MEMDB_BLOCKS
;
1351 stbuf
->f_bfree
= stbuf
->f_bavail
= stbuf
->f_blocks
-
1352 ((size
+ stbuf
->f_bsize
- 1)/stbuf
->f_bsize
);
1353 stbuf
->f_files
= MEMDB_MAX_INODES
;
1354 stbuf
->f_ffree
= stbuf
->f_files
- files
;
1356 stbuf
->f_namemax
= 256;
1362 tree_entry_debug(memdb_tree_entry_t
*te
)
1364 g_return_if_fail(te
!= NULL
);
1366 // same as tree_entry_print(), but use cfs_debug() instead of g_print()
1368 cfs_debug("%016" PRIX64
" %c %016" PRIX64
" %016" PRIX64
" %08X %08X %08X %s\n",
1369 te
->inode
, te
->type
== DT_DIR
? 'D' : 'R', te
->parent
, te
->version
,
1370 te
->writer
, te
->mtime
, te
->size
, te
->name
);
1374 tree_entry_print(memdb_tree_entry_t
*te
)
1376 g_return_if_fail(te
!= NULL
);
1378 g_print("%016" PRIX64
" %c %016" PRIX64
" %016" PRIX64
" %08X %08X %08X %s\n",
1379 te
->inode
, te
->type
== DT_DIR
? 'D' : 'R', te
->parent
, te
->version
,
1380 te
->writer
, te
->mtime
, te
->size
, te
->name
);
1384 memdb_dump(memdb_t
*memdb
)
1386 g_return_if_fail(memdb
!= NULL
);
1388 g_mutex_lock (&memdb
->mutex
);
1390 GList
*list
= g_hash_table_get_values(memdb
->index
);
1392 list
= g_list_sort(list
, memdb_tree_compare
);
1394 g_print("%16s %c %16s %16s %8s %8s %8s %s\n",
1395 "INODE", 'T', "PARENT", "VERSION", "WRITER", "MTIME", "SIZE", "NAME");
1399 memdb_tree_entry_t
*te
= (memdb_tree_entry_t
*)l
->data
;
1401 tree_entry_print(te
);
1408 g_mutex_unlock (&memdb
->mutex
);
1412 memdb_dump_index (memdb_index_t
*idx
)
1414 g_return_if_fail(idx
!= NULL
);
1416 g_print ("INDEX DUMP %016" PRIX64
"\n", idx
->version
);
1419 for (i
= 0; i
< idx
->size
; i
++) {
1420 g_print ("%016" PRIX64
" %016" PRIX64
"%016" PRIX64
"%016" PRIX64
"%016" PRIX64
"\n", idx
->entries
[i
].inode
,
1421 *((guint64
*)idx
->entries
[i
].digest
),
1422 *((guint64
*)(idx
->entries
[i
].digest
+ 8)),
1423 *((guint64
*)(idx
->entries
[i
].digest
+ 16)),
1424 *((guint64
*)(idx
->entries
[i
].digest
+ 24)));
1429 memdb_index_copy(memdb_index_t
*idx
)
1431 g_return_val_if_fail(idx
!= NULL
, NULL
);
1433 int bytes
= sizeof(memdb_index_t
) + idx
->size
*sizeof(memdb_index_extry_t
);
1434 if (idx
->bytes
!= bytes
) {
1435 cfs_critical("memdb index contains wrong number of bytes");
1439 memdb_index_t
*copy
= (memdb_index_t
*)g_memdup(idx
, bytes
);
1445 memdb_tree_entry_csum(
1446 memdb_tree_entry_t
*te
,
1449 g_return_val_if_fail(te
!= NULL
, FALSE
);
1450 g_return_val_if_fail(csum
!= NULL
, FALSE
);
1452 GChecksum
*sha256
= g_checksum_new(G_CHECKSUM_SHA256
);
1454 g_checksum_update(sha256
, (unsigned char*)&te
->inode
, sizeof(te
->inode
));
1455 g_checksum_update(sha256
, (unsigned char*)&te
->version
, sizeof(te
->version
));
1456 g_checksum_update(sha256
, (unsigned char*)&te
->writer
, sizeof(te
->writer
));
1457 g_checksum_update(sha256
, (unsigned char*)&te
->mtime
, sizeof(te
->mtime
));
1458 g_checksum_update(sha256
, (unsigned char*)&te
->size
, sizeof(te
->size
));
1459 g_checksum_update(sha256
, (unsigned char*)&te
->type
, sizeof(te
->type
));
1460 g_checksum_update(sha256
, (unsigned char*)&te
->parent
, sizeof(te
->parent
));
1461 g_checksum_update(sha256
, (unsigned char*)te
->name
, strlen(te
->name
));
1463 if (te
->type
== DT_REG
&& te
->size
)
1464 g_checksum_update(sha256
, (unsigned char*)te
->data
.value
, te
->size
);
1466 size_t csum_len
= 32;
1467 g_checksum_get_digest(sha256
, csum
, &csum_len
);
1468 g_checksum_free(sha256
);
1474 memdb_compute_checksum(
1476 memdb_tree_entry_t
*root
,
1480 g_return_val_if_fail(index
!= NULL
, FALSE
);
1481 g_return_val_if_fail(root
!= NULL
, FALSE
);
1483 GChecksum
*sha256
= g_checksum_new(G_CHECKSUM_SHA256
);
1485 GList
*list
= g_hash_table_get_values(index
);
1487 list
= g_list_sort(list
, memdb_tree_compare
);
1491 memdb_tree_entry_t
*te
= (memdb_tree_entry_t
*)l
->data
;
1493 g_checksum_update(sha256
, (unsigned char*)&te
->inode
, sizeof(te
->inode
));
1494 g_checksum_update(sha256
, (unsigned char*)&te
->version
, sizeof(te
->version
));
1495 g_checksum_update(sha256
, (unsigned char*)&te
->writer
, sizeof(te
->writer
));
1496 g_checksum_update(sha256
, (unsigned char*)&te
->mtime
, sizeof(te
->mtime
));
1497 g_checksum_update(sha256
, (unsigned char*)&te
->size
, sizeof(te
->size
));
1498 g_checksum_update(sha256
, (unsigned char*)&te
->type
, sizeof(te
->type
));
1499 g_checksum_update(sha256
, (unsigned char*)&te
->parent
, sizeof(te
->parent
));
1500 g_checksum_update(sha256
, (unsigned char*)te
->name
, strlen(te
->name
));
1502 if (te
->type
== DT_REG
&& te
->size
)
1503 g_checksum_update(sha256
, (unsigned char*)te
->data
.value
, te
->size
);
1510 g_checksum_get_digest(sha256
, csum
, &csum_len
);
1512 cfs_debug("checksum: %s", g_checksum_get_string(sha256
));
1514 g_checksum_free(sha256
);
1522 memdb_tree_entry_t
*root
)
1524 g_return_val_if_fail(index
!= NULL
, NULL
);
1525 g_return_val_if_fail(root
!= NULL
, NULL
);
1527 memdb_index_t
*idx
= NULL
;
1529 int count
= g_hash_table_size(index
);
1531 cfs_critical("memdb index has no entires");
1535 int bytes
= sizeof(memdb_index_t
) + count
*sizeof(memdb_index_extry_t
);
1536 idx
= g_malloc0(bytes
);
1540 idx
->version
= root
->version
;
1541 idx
->mtime
= root
->mtime
;
1542 idx
->writer
= root
->writer
;
1544 GChecksum
*sha256
= g_checksum_new(G_CHECKSUM_SHA256
);
1546 GList
*list
= g_hash_table_get_values(index
);
1548 list
= g_list_sort(list
, memdb_tree_compare
);
1553 memdb_tree_entry_t
*te
= (memdb_tree_entry_t
*)l
->data
;
1555 if (te
->inode
> idx
->last_inode
)
1556 idx
->last_inode
= te
->inode
;
1558 idx
->entries
[ind
].inode
= te
->inode
;
1560 g_checksum_reset (sha256
);
1562 g_checksum_update(sha256
, (unsigned char*)&te
->version
, sizeof(te
->version
));
1563 g_checksum_update(sha256
, (unsigned char*)&te
->writer
, sizeof(te
->writer
));
1564 g_checksum_update(sha256
, (unsigned char*)&te
->mtime
, sizeof(te
->mtime
));
1565 g_checksum_update(sha256
, (unsigned char*)&te
->size
, sizeof(te
->size
));
1566 g_checksum_update(sha256
, (unsigned char*)&te
->type
, sizeof(te
->type
));
1567 g_checksum_update(sha256
, (unsigned char*)&te
->parent
, sizeof(te
->parent
));
1568 g_checksum_update(sha256
, (unsigned char*)te
->name
, strlen(te
->name
));
1570 if (te
->type
== DT_REG
&& te
->size
)
1571 g_checksum_update(sha256
, (unsigned char*)te
->data
.value
, te
->size
);
1574 g_checksum_get_digest(sha256
, (guint8
*)idx
->entries
[ind
].digest
, &len
);
1583 g_checksum_free(sha256
);