]>
git.proxmox.com Git - pve-cluster.git/blob - data/src/memdb.c
2 Copyright (C) 2010 Proxmox Server Solutions GmbH
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU Affero General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU Affero General Public License for more details.
14 You should have received a copy of the GNU Affero General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>.
17 Author: Dietmar Maurer <dietmar@proxmox.com>
23 #endif /* HAVE_CONFIG_H */
27 #include <sys/types.h>
37 #include "cfs-utils.h"
41 #define CFS_LOCK_TIMEOUT (60*2)
47 g_return_val_if_fail(name
!= NULL
, NULL
);
49 memdb_tree_entry_t
*te
= g_malloc0(sizeof(memdb_tree_entry_t
) + strlen(name
) + 1);
50 g_return_val_if_fail(te
!= NULL
, NULL
);
52 strcpy(te
->name
, name
);
58 memdb_tree_entry_copy(
59 memdb_tree_entry_t
*te
,
62 g_return_val_if_fail(te
!= NULL
, NULL
);
64 memdb_tree_entry_t
*cpy
= memdb_tree_entry_new(te
->name
);
66 cpy
->parent
= te
->parent
;
67 cpy
->inode
= te
->inode
;
68 cpy
->version
= te
->version
;
69 cpy
->writer
= te
->writer
;
70 cpy
->mtime
= te
->mtime
;
74 if (with_data
&& te
->size
&& te
->type
== DT_REG
) {
75 cpy
->data
.value
= g_memdup(te
->data
.value
, te
->size
);
77 cpy
->data
.value
= NULL
;
84 memdb_tree_entry_free(
85 memdb_tree_entry_t
*te
)
87 g_return_if_fail(te
!= NULL
);
89 if (te
->type
== DT_REG
) {
91 g_free(te
->data
.value
);
94 if (te
->type
== DT_DIR
) {
96 g_hash_table_destroy(te
->data
.entries
);
103 memdb_lock_info_free(memdb_lock_info_t
*li
)
105 g_return_if_fail(li
!= NULL
);
118 guint64 a
= ((const memdb_tree_entry_t
*)v1
)->inode
;
119 guint64 b
= ((const memdb_tree_entry_t
*)v2
)->inode
;
136 char *dup
= g_strdup (path
);
137 int len
= strlen (dup
) - 1;
138 while (len
>= 0 && dup
[len
] == '/') dup
[len
--] = 0;
140 char *dn
= g_path_get_dirname (dup
);
141 char *bn
= g_path_get_basename (dup
);
149 static memdb_tree_entry_t
*
150 memdb_lookup_dir_entry(
153 memdb_tree_entry_t
*parent
)
156 g_return_val_if_fail(memdb
!= NULL
, NULL
);
157 g_return_val_if_fail(name
!= NULL
, NULL
);
158 g_return_val_if_fail(parent
!= NULL
, NULL
);
159 g_return_val_if_fail(parent
->type
== DT_DIR
, NULL
);
161 GHashTable
*ht
= parent
->data
.entries
;
163 g_return_val_if_fail(ht
!= NULL
, NULL
);
165 return g_hash_table_lookup(ht
, name
);
168 static memdb_tree_entry_t
*
172 memdb_tree_entry_t
**parent
)
174 g_return_val_if_fail(memdb
!= NULL
, NULL
);
175 g_return_val_if_fail(path
!= NULL
, NULL
);
176 g_return_val_if_fail(parent
!= NULL
, NULL
);
178 memdb_tree_entry_t
*cdir
= memdb
->root
;
181 if (path
[0] == 0 || ((path
[0] == '.' || path
[0] == '/') && path
[1] == 0))
184 gchar
**set
= g_strsplit_set(path
, "/", 0);
189 while ((name
= set
[i
++])) {
191 if (name
[0] == 0) continue;
194 if ((cdir
= memdb_lookup_dir_entry(memdb
, name
, cdir
)) == NULL
)
209 if (!name
|| name
[0] < '1' || name
[0] > '9')
213 guint32 vmid
= strtoul(name
, &end
, 10);
215 if (!end
|| end
[0] != '.' || end
[1] != 'c'|| end
[2] != 'o' || end
[3] != 'n' ||
216 end
[4] != 'f' || end
[5] != 0)
227 const char *nodename
)
229 g_return_val_if_fail(nodename
!= NULL
, FALSE
);
231 /* LDH rule (letters, digits, hyphen) */
233 int len
= strlen(nodename
);
239 for (int i
= 0; i
< len
; i
++) {
240 char c
= nodename
[i
];
241 if ((c
>= 'A' && c
<= 'Z') ||
242 (c
>= 'a' && c
<= 'z') ||
243 (c
>= '0' && c
<= '9') ||
244 (i
!= 0 && i
!= (len
-1) && c
== '-'))
253 dir_contain_vm_config(
260 if (strncmp(dirname
, "nodes/", 6) != 0)
265 char *nodename
= NULL
;
267 char **sa
= g_strsplit(dirname
, "/", 2);
268 if (sa
[0] && sa
[1] && valid_nodename(sa
[0])) {
269 if (strcmp(sa
[1], "qemu-server") == 0) {
270 *vmtype_ret
= VMTYPE_QEMU
;
271 nodename
= g_strdup(sa
[0]);
272 } else if (strcmp(sa
[1], "openvz") == 0) {
273 *vmtype_ret
= VMTYPE_OPENVZ
;
274 nodename
= g_strdup(sa
[0]);
284 path_contain_vm_config(
292 char *dirname
= NULL
;
294 char *nodename
= NULL
;
296 split_path(path
, &dirname
, &base
);
298 if (name_is_vm_config(base
, vmid_ret
))
299 nodename
= dir_contain_vm_config(dirname
, vmtype_ret
);
301 if (dirname
) g_free (dirname
);
302 if (base
) g_free (base
);
311 const char *nodename
,
313 memdb_tree_entry_t
*subdir
)
315 g_return_val_if_fail(memdb
!= NULL
, FALSE
);
316 g_return_val_if_fail(vmlist
!= NULL
, FALSE
);
317 g_return_val_if_fail(subdir
!= NULL
, FALSE
);
318 g_return_val_if_fail(subdir
->type
== DT_DIR
, FALSE
);
319 g_return_val_if_fail(subdir
->data
.entries
!= NULL
, FALSE
);
323 GHashTable
*ht
= subdir
->data
.entries
;
327 g_hash_table_iter_init (&iter
, ht
);
329 while (g_hash_table_iter_next (&iter
, &key
, &value
)) {
331 memdb_tree_entry_t
*node_te
= (memdb_tree_entry_t
*)value
;
333 if (node_te
->type
!= DT_REG
)
337 if (!name_is_vm_config(node_te
->name
, &vmid
))
340 if (!vmlist_hash_insert_vm(vmlist
, vmtype
, vmid
, nodename
, FALSE
))
352 const guchar csum
[32])
354 g_return_val_if_fail(memdb
!= NULL
, FALSE
);
355 g_return_val_if_fail(memdb
->locks
!= NULL
, FALSE
);
356 g_return_val_if_fail(path
!= NULL
, FALSE
);
357 g_return_val_if_fail(csum
!= NULL
, FALSE
);
359 memdb_lock_info_t
*li
;
360 uint32_t ctime
= time(NULL
);
362 if ((li
= g_hash_table_lookup(memdb
->locks
, path
))) {
363 if (memcmp(csum
, li
->csum
, 32) != 0) {
365 memcpy(li
->csum
, csum
, 32);
366 g_critical("wrong lock csum - reset timeout");
369 if ((ctime
> li
->ltime
) && ((ctime
- li
->ltime
) > CFS_LOCK_TIMEOUT
))
372 li
= g_new0(memdb_lock_info_t
, 1);
373 li
->path
= g_strdup(path
);
375 memcpy(li
->csum
, csum
, 32);
376 g_hash_table_replace(memdb
->locks
, li
->path
, li
);
383 memdb_update_locks(memdb_t
*memdb
)
385 g_return_if_fail(memdb
!= NULL
);
386 g_return_if_fail(memdb
->locks
!= NULL
);
388 memdb_tree_entry_t
*te
, *parent
;
390 if (!(te
= memdb_lookup_path(memdb
, "priv/lock", &parent
)))
393 if (te
->type
!= DT_DIR
)
397 GHashTable
*old
= memdb
->locks
;
398 memdb
->locks
= g_hash_table_new_full(g_str_hash
, g_str_equal
, NULL
,
399 (GDestroyNotify
)memdb_lock_info_free
);
401 GHashTable
*ht
= te
->data
.entries
;
405 g_hash_table_iter_init (&iter
, ht
);
406 while (g_hash_table_iter_next (&iter
, &key
, &value
)) {
408 memdb_tree_entry_t
*lock_te
= (memdb_tree_entry_t
*)value
;
409 if (lock_te
->type
!= DT_DIR
)
412 memdb_lock_info_t
*li
;
413 li
= g_new0(memdb_lock_info_t
, 1);
414 li
->path
= g_strdup_printf("priv/lock/%s", lock_te
->name
);
417 if (memdb_tree_entry_csum(lock_te
, csum
)) {
418 memcpy(li
->csum
, csum
, 32);
419 memdb_lock_info_t
*oldli
;
420 if ((oldli
= g_hash_table_lookup(memdb
->locks
, lock_te
->name
)) &&
421 (memcmp(csum
, oldli
->csum
, 32) == 0)) {
422 li
->ltime
= oldli
->ltime
;
424 li
->ltime
= time(NULL
);
426 g_hash_table_insert(memdb
->locks
, li
->path
, li
);
428 memdb_lock_info_free(li
);
433 g_hash_table_destroy(old
);
438 memdb_recreate_vmlist(
441 g_return_val_if_fail(memdb
!= NULL
, FALSE
);
443 memdb_tree_entry_t
*te
, *parent
;
445 if (!(te
= memdb_lookup_path(memdb
, "nodes", &parent
)))
448 if (te
->type
!= DT_DIR
)
451 GHashTable
*vmlist
= vmlist_hash_new();
453 GHashTable
*ht
= te
->data
.entries
;
460 g_hash_table_iter_init (&iter
, ht
);
462 while (g_hash_table_iter_next (&iter
, &key
, &value
)) {
464 memdb_tree_entry_t
*node_te
= (memdb_tree_entry_t
*)value
;
465 if (node_te
->type
!= DT_DIR
)
468 if (!valid_nodename(node_te
->name
))
471 if ((te
= g_hash_table_lookup(node_te
->data
.entries
, "qemu-server"))) {
472 if (!vmlist_add_dir(memdb
, vmlist
, node_te
->name
, VMTYPE_QEMU
, te
))
475 if ((te
= g_hash_table_lookup(node_te
->data
.entries
, "openvz"))) {
476 if (!vmlist_add_dir(memdb
, vmlist
, node_te
->name
, VMTYPE_OPENVZ
, te
))
481 /* always update list - even if we detected duplicates */
482 cfs_status_set_vmlist(vmlist
);
488 memdb_open(const char *dbfilename
)
490 memdb_t
*memdb
= g_new0(memdb_t
, 1);
492 g_mutex_init(&memdb
->mutex
);
494 memdb
->dbfilename
= g_strdup(dbfilename
);
496 memdb
->root
= memdb_tree_entry_new("");
497 memdb
->root
->data
.entries
= g_hash_table_new(g_str_hash
, g_str_equal
);
498 memdb
->root
->type
= DT_DIR
;
500 memdb
->index
= g_hash_table_new_full(g_int64_hash
, g_int64_equal
, NULL
,
501 (GDestroyNotify
)memdb_tree_entry_free
);
503 g_hash_table_replace(memdb
->index
, &memdb
->root
->inode
, memdb
->root
);
505 memdb
->locks
= g_hash_table_new_full(g_str_hash
, g_str_equal
, NULL
,
506 (GDestroyNotify
)memdb_lock_info_free
);
508 if (!(memdb
->bdb
= bdb_backend_open(dbfilename
, memdb
->root
, memdb
->index
))) {
513 record_memdb_reload();
515 if (!memdb_recreate_vmlist(memdb
)) {
520 memdb_update_locks(memdb
);
522 cfs_debug("memdb open '%s' successful (version = %016zX)",
523 dbfilename
, memdb
->root
->version
);
529 memdb_close(memdb_t
*memdb
)
531 g_return_if_fail(memdb
!= NULL
);
533 g_mutex_lock (&memdb
->mutex
);
536 bdb_backend_close(memdb
->bdb
);
539 g_hash_table_destroy(memdb
->index
);
542 g_hash_table_destroy(memdb
->locks
);
544 if (memdb
->dbfilename
)
545 g_free(memdb
->dbfilename
);
549 memdb
->dbfilename
= NULL
;
551 g_mutex_unlock (&memdb
->mutex
);
553 g_mutex_clear (&memdb
->mutex
);
564 g_return_val_if_fail(memdb
!= NULL
, -EINVAL
);
565 g_return_val_if_fail(path
!= NULL
, -EINVAL
);
569 char *dirname
= NULL
;
572 g_mutex_lock (&memdb
->mutex
);
579 split_path(path
, &dirname
, &base
);
581 memdb_tree_entry_t
*parent
, *unused
;
583 if (!(parent
= memdb_lookup_path(memdb
, dirname
, &unused
))) {
588 if (parent
->type
!= DT_DIR
) {
593 /* do not allow '.' and '..' */
594 if ((base
[0] == 0) ||
595 (base
[0] == '.' && base
[1] == 0) ||
596 (base
[0] == '.' && base
[1] == '.' && base
[2] == 0)) {
601 memdb_tree_entry_t
*te
;
602 if ((te
= memdb_lookup_dir_entry(memdb
, base
, parent
))) {
607 memdb
->root
->version
++;
608 memdb
->root
->mtime
= mtime
;
609 memdb
->root
->writer
= writer
;
611 te
= memdb_tree_entry_new(base
);
612 te
->parent
= parent
->inode
;
613 te
->data
.entries
= g_hash_table_new(g_str_hash
, g_str_equal
);
614 te
->inode
= te
->version
= memdb
->root
->version
;
619 g_hash_table_replace(parent
->data
.entries
, te
->name
, te
);
620 g_hash_table_replace(memdb
->index
, &te
->inode
, te
);
622 cfs_debug("memdb_mkdir %s %s %016zX", dirname
, base
, memdb
->root
->version
);
624 if (bdb_backend_write(memdb
->bdb
, te
->inode
, te
->parent
, te
->version
,
625 te
->writer
, te
->mtime
, 0, DT_DIR
, te
->name
, NULL
, 0)) {
631 if (strcmp(dirname
, "priv/lock") == 0) {
632 g_hash_table_remove(memdb
->locks
, path
);
634 if (memdb_tree_entry_csum(te
, csum
)) {
635 memdb_lock_expired(memdb
, path
, csum
); // insert a new entry
642 g_mutex_unlock (&memdb
->mutex
);
644 if (dirname
) g_free (dirname
);
645 if (base
) g_free (base
);
656 g_return_val_if_fail(memdb
!= NULL
, -EINVAL
);
657 g_return_val_if_fail(path
!= NULL
, -EINVAL
);
658 g_return_val_if_fail(data_ret
!= NULL
, -EINVAL
);
660 memdb_tree_entry_t
*te
, *parent
;
662 g_mutex_lock (&memdb
->mutex
);
664 if ((te
= memdb_lookup_path(memdb
, path
, &parent
))) {
665 if (te
->type
== DT_REG
) {
666 *data_ret
= g_memdup(te
->data
.value
, te
->size
);
667 guint32 size
= te
->size
;
668 g_mutex_unlock (&memdb
->mutex
);
673 g_mutex_unlock (&memdb
->mutex
);
690 g_return_val_if_fail(memdb
!= NULL
, -EINVAL
);
691 g_return_val_if_fail(path
!= NULL
, -EINVAL
);
692 g_return_val_if_fail(count
== 0 || data
!= NULL
, -EINVAL
);
696 char *dirname
= NULL
;
698 char *nodename
= NULL
;
700 g_mutex_lock (&memdb
->mutex
);
707 if ((offset
+ count
) > MEMDB_MAX_FILE_SIZE
) {
712 split_path(path
, &dirname
, &base
);
714 memdb_tree_entry_t
*parent
, *unused
;
715 if (!(parent
= memdb_lookup_path(memdb
, dirname
, &unused
))) {
719 if (parent
->type
!= DT_DIR
) {
724 /* do not allow '.' and '..' */
725 if ((base
[0] == 0) ||
726 (base
[0] == '.' && base
[1] == 0) ||
727 (base
[0] == '.' && base
[1] == '.' && base
[2] == 0)) {
735 if ((nodename
= path_contain_vm_config(path
, &vmtype
, &vmid
))) {
736 if (vmlist_different_vm_exists(vmtype
, vmid
, nodename
)) {
742 gpointer olddata
= NULL
;
744 memdb_tree_entry_t
*te
, *old
;
745 if ((old
= te
= memdb_lookup_dir_entry(memdb
, base
, parent
))) {
746 if (te
->type
!= DT_REG
) {
756 memdb
->root
->version
++;
757 memdb
->root
->mtime
= mtime
;
758 memdb
->root
->writer
= writer
;
760 olddata
= te
->data
.value
;
768 memdb
->root
->version
++;
769 memdb
->root
->mtime
= mtime
;
770 memdb
->root
->writer
= writer
;
772 te
= memdb_tree_entry_new(base
);
773 te
->parent
= parent
->inode
;
775 te
->inode
= memdb
->root
->version
;
778 te
->version
= memdb
->root
->version
;
782 size_t newsize
= offset
+ count
;
784 gpointer newdata
= NULL
;
788 if (newsize
> te
->size
) {
789 newdata
= g_malloc0(newsize
);
790 memcpy(newdata
, olddata
, te
->size
);
797 newdata
= g_malloc0(newsize
);
798 memcpy(newdata
, olddata
, newsize
);
802 memcpy(newdata
+ offset
, data
, count
);
807 newdata
= g_malloc0(newsize
);
808 memcpy(newdata
+ offset
, data
, count
);
813 te
->data
.value
= newdata
;
818 g_hash_table_replace(parent
->data
.entries
, te
->name
, te
);
819 g_hash_table_replace(memdb
->index
, &te
->inode
, te
);
822 record_memdb_change(path
);
824 cfs_debug("memdb_pwrite %s %s %016zX %016zX", dirname
, te
->name
, te
->inode
, te
->version
);
826 if (bdb_backend_write(memdb
->bdb
, te
->inode
, te
->parent
, te
->version
,
827 te
->writer
, te
->mtime
, te
->size
, te
->type
, te
->name
,
828 te
->data
.value
, 0)) {
835 vmlist_register_vm(vmtype
, vmid
, nodename
);
840 g_mutex_unlock (&memdb
->mutex
);
842 if (nodename
) g_free (nodename
);
843 if (dirname
) g_free (dirname
);
844 if (base
) g_free (base
);
856 g_return_val_if_fail(memdb
!= NULL
, -EINVAL
);
857 g_return_val_if_fail(path
!= NULL
, -EINVAL
);
861 char *dirname
= NULL
;
864 g_mutex_lock (&memdb
->mutex
);
871 split_path(path
, &dirname
, &base
);
873 memdb_tree_entry_t
*parent
, *unused
;
874 if (!(parent
= memdb_lookup_path(memdb
, dirname
, &unused
))) {
878 if (parent
->type
!= DT_DIR
) {
883 /* do not allow '.' and '..' */
884 if ((base
[0] == 0) ||
885 (base
[0] == '.' && base
[1] == 0) ||
886 (base
[0] == '.' && base
[1] == '.' && base
[2] == 0)) {
891 memdb_tree_entry_t
*te
;
892 if (!(te
= memdb_lookup_dir_entry(memdb
, base
, parent
))) {
897 int is_lock
= (strcmp(dirname
, "priv/lock") == 0) && (te
->type
== DT_DIR
);
899 /* NOTE: we use utime(0,0) to trigger 'unlock', so we do not
900 * allow to change mtime for locks (only it mtime is newer).
901 * See README for details about locks.
904 if (mtime
< te
->mtime
) {
905 cfs_debug("dir is locked");
909 /* only allow lock updates if the writer is the same */
910 if (te
->writer
!= writer
) {
917 memdb
->root
->version
++;
918 memdb
->root
->mtime
= mtime
;
919 memdb
->root
->writer
= writer
;
921 te
->version
= memdb
->root
->version
;
925 record_memdb_change(path
);
927 cfs_debug("memdb_mtime %s %s %016zX %016zX", dirname
, te
->name
, te
->inode
, te
->version
);
929 if (bdb_backend_write(memdb
->bdb
, te
->inode
, te
->parent
, te
->version
,
930 te
->writer
, te
->mtime
, te
->size
, te
->type
, te
->name
,
931 te
->data
.value
, 0)) {
938 cfs_debug("update cfs lock");
939 g_hash_table_remove(memdb
->locks
, path
);
941 if (memdb_tree_entry_csum(te
, csum
)) {
942 memdb_lock_expired(memdb
, path
, csum
); // insert a new entry
949 g_mutex_unlock (&memdb
->mutex
);
951 if (dirname
) g_free (dirname
);
952 if (base
) g_free (base
);
964 return memdb_pwrite(memdb
, path
, writer
, mtime
, NULL
, 0, 0, FALSE
, TRUE
);
978 return memdb_pwrite(memdb
, path
, writer
, mtime
, data
, count
, offset
, truncate
, FALSE
);
986 memdb_tree_entry_t
*te
, *parent
;
988 g_mutex_lock (&memdb
->mutex
);
990 if ((te
= memdb_lookup_path(memdb
, path
, &parent
))) {
992 memdb_tree_entry_t
*cpy
= memdb_tree_entry_copy(te
, 0);
994 g_mutex_unlock (&memdb
->mutex
);
999 g_mutex_unlock (&memdb
->mutex
);
1009 g_return_val_if_fail(memdb
!= NULL
, NULL
);
1010 g_return_val_if_fail(path
!= NULL
, NULL
);
1012 memdb_tree_entry_t
*te
, *parent
;
1016 g_mutex_lock (&memdb
->mutex
);
1018 if (!(te
= memdb_lookup_path(memdb
, path
, &parent
)))
1021 if (te
->type
!= DT_DIR
)
1024 GHashTable
*ht
= te
->data
.entries
;
1026 GHashTableIter iter
;
1027 gpointer key
, value
;
1029 g_hash_table_iter_init (&iter
, ht
);
1031 while (g_hash_table_iter_next (&iter
, &key
, &value
)) {
1033 te
= (memdb_tree_entry_t
*)value
;
1035 memdb_tree_entry_t
*cpy
= memdb_tree_entry_copy(te
, 0);
1037 list
= g_list_append(list
, cpy
);
1041 g_mutex_unlock (&memdb
->mutex
);
1047 memdb_dirlist_free(GList
*dirlist
)
1059 g_list_free(dirlist
);
1065 memdb_tree_entry_t
*parent
,
1066 memdb_tree_entry_t
*te
)
1068 g_return_val_if_fail(parent
!= NULL
, -EACCES
);
1069 g_return_val_if_fail(parent
->inode
== te
->parent
, -EACCES
);
1071 if (te
->type
== DT_DIR
)
1072 if (g_hash_table_size(te
->data
.entries
))
1075 if (!g_hash_table_steal(parent
->data
.entries
, te
->name
)) {
1076 cfs_critical("internal error - can't delete entry");
1081 if (!g_hash_table_steal(memdb
->index
, &te
->inode
)) {
1082 cfs_critical("internal error - can't delete entry");
1100 char *nodename
= NULL
;
1101 char *dirname
= NULL
;
1105 guint32 from_vmid
= 0;
1107 int from_vmtype
= 0;
1108 char *from_node
= NULL
;
1110 g_mutex_lock (&memdb
->mutex
);
1112 if (memdb
->errors
) {
1117 memdb_tree_entry_t
*from_te
, *from_parent
;
1118 memdb_tree_entry_t
*to_te
, *to_parent
;
1119 memdb_tree_entry_t
*target_te
, *target_parent
;
1121 guint64 delete_inode
= 0;
1123 if (!(from_te
= memdb_lookup_path(memdb
, from
, &from_parent
))) {
1128 if (!from_parent
) { /* can't rename root */
1133 from_node
= path_contain_vm_config(from
, &from_vmtype
, &from_vmid
);
1135 if (from_te
->type
== DT_REG
&& (nodename
= path_contain_vm_config(to
, &vmtype
, &vmid
))) {
1136 if (vmlist_different_vm_exists(vmtype
, vmid
, nodename
)) {
1137 if (!(from_node
&& vmid
== from_vmid
)) {
1144 /* we do not allow rename for locks */
1145 if (from_te
->type
== DT_DIR
&& path_is_lockdir(from
)) {
1150 if ((to_te
= memdb_lookup_path(memdb
, to
, &to_parent
))) {
1152 if ((ret
= unlink_tree_entry(memdb
, to_parent
, to_te
)) != 0)
1155 base
= strdup(to_te
->name
);
1157 delete_inode
= to_te
->inode
;
1159 target_te
= to_parent
;
1161 memdb_tree_entry_free(to_te
);
1165 split_path(to
, &dirname
, &base
);
1167 if (!(target_te
= memdb_lookup_path(memdb
, dirname
, &target_parent
))) {
1172 if (target_te
->type
!= DT_DIR
) {
1178 record_memdb_change(from
);
1179 record_memdb_change(to
);
1181 /* NOTE: unlink_tree_entry() make sure that we can only
1182 rename emtpy directories */
1184 if ((ret
= unlink_tree_entry(memdb
, from_parent
, from_te
)) != 0)
1187 memdb
->root
->version
++;
1188 memdb
->root
->mtime
= mtime
;
1189 memdb
->root
->writer
= writer
;
1191 memdb_tree_entry_t
*new = memdb_tree_entry_new(base
);
1192 new->parent
= target_te
->inode
;
1193 new->inode
= from_te
->inode
;
1194 new->version
= memdb
->root
->version
;
1195 new->writer
= writer
;
1197 new->size
= from_te
->size
;
1198 new->type
= from_te
->type
;
1199 new->data
= from_te
->data
;
1203 g_hash_table_replace(target_te
->data
.entries
, new->name
, new);
1204 g_hash_table_replace(memdb
->index
, &new->inode
, new);
1206 if (bdb_backend_write(memdb
->bdb
, new->inode
, new->parent
,
1207 new->version
, new->writer
, new->mtime
,
1208 new->size
, new->type
, new->name
,
1209 new->data
.value
, delete_inode
)) {
1215 if (new->type
== DT_REG
) {
1218 vmlist_delete_vm(from_vmid
);
1221 vmlist_register_vm(vmtype
, vmid
, nodename
);
1223 } else if (new->type
== DT_DIR
) {
1224 /* directories are alwayse empty (see unlink_tree_entry) */
1230 g_mutex_unlock (&memdb
->mutex
);
1232 if (from_node
) g_free(from_node
);
1233 if (nodename
) g_free (nodename
);
1234 if (dirname
) g_free (dirname
);
1235 if (base
) g_free (base
);
1247 memdb_tree_entry_t
*te
, *parent
;
1249 g_mutex_lock (&memdb
->mutex
);
1253 if (memdb
->errors
) {
1258 if (!(te
= memdb_lookup_path(memdb
, path
, &parent
))) {
1263 if (!parent
) { /* cant remove root */
1268 if (te
->type
== DT_DIR
) {
1269 if (g_hash_table_size(te
->data
.entries
)) {
1274 g_hash_table_remove(memdb
->locks
, path
);
1277 record_memdb_change(path
);
1279 if ((ret
= unlink_tree_entry(memdb
, parent
, te
)) != 0)
1282 memdb
->root
->version
++;
1283 memdb
->root
->mtime
= mtime
;
1284 memdb
->root
->writer
= writer
;
1286 if (bdb_backend_write(memdb
->bdb
, 0, 0, memdb
->root
->version
, writer
, mtime
, 0,
1287 DT_REG
, NULL
, NULL
, te
->inode
)) {
1289 memdb_tree_entry_free(te
);
1294 memdb_tree_entry_free(te
);
1299 if ((nodename
= path_contain_vm_config(path
, &vmtype
, &vmid
))) {
1301 vmlist_delete_vm(vmid
);
1307 g_mutex_unlock (&memdb
->mutex
);
1315 struct statvfs
*stbuf
)
1317 g_return_val_if_fail(memdb
!= NULL
, -EINVAL
);
1318 g_return_val_if_fail(stbuf
!= NULL
, -EINVAL
);
1320 g_mutex_lock (&memdb
->mutex
);
1322 GHashTableIter iter
;
1323 gpointer key
, value
;
1328 g_hash_table_iter_init (&iter
, memdb
->index
);
1330 while (g_hash_table_iter_next (&iter
, &key
, &value
)) {
1331 memdb_tree_entry_t
*te
= (memdb_tree_entry_t
*)value
;
1336 g_mutex_unlock (&memdb
->mutex
);
1338 stbuf
->f_bsize
= MEMDB_BLOCKSIZE
;
1339 stbuf
->f_blocks
= MEMDB_BLOCKS
;
1340 stbuf
->f_bfree
= stbuf
->f_bavail
= stbuf
->f_blocks
-
1341 ((size
+ stbuf
->f_bsize
- 1)/stbuf
->f_bsize
);
1342 stbuf
->f_files
= MEMDB_MAX_INODES
;
1343 stbuf
->f_ffree
= stbuf
->f_files
- files
;
1345 stbuf
->f_namemax
= 256;
1351 tree_entry_debug(memdb_tree_entry_t
*te
)
1353 g_return_if_fail(te
!= NULL
);
1355 // same as tree_entry_print(), but use cfs_debug() instead of g_print()
1357 cfs_debug("%016zX %c %016zX %016zX %08X %08X %08X %s\n",
1358 te
->inode
, te
->type
== DT_DIR
? 'D' : 'R', te
->parent
, te
->version
,
1359 te
->writer
, te
->mtime
, te
->size
, te
->name
);
1363 tree_entry_print(memdb_tree_entry_t
*te
)
1365 g_return_if_fail(te
!= NULL
);
1367 g_print("%016zX %c %016zX %016zX %08X %08X %08X %s\n",
1368 te
->inode
, te
->type
== DT_DIR
? 'D' : 'R', te
->parent
, te
->version
,
1369 te
->writer
, te
->mtime
, te
->size
, te
->name
);
1373 memdb_dump(memdb_t
*memdb
)
1375 g_return_if_fail(memdb
!= NULL
);
1377 g_mutex_lock (&memdb
->mutex
);
1379 GList
*list
= g_hash_table_get_values(memdb
->index
);
1381 list
= g_list_sort(list
, memdb_tree_compare
);
1383 g_print("%16s %c %16s %16s %8s %8s %8s %s\n",
1384 "INODE", 'T', "PARENT", "VERSION", "WRITER", "MTIME", "SIZE", "NAME");
1388 memdb_tree_entry_t
*te
= (memdb_tree_entry_t
*)l
->data
;
1390 tree_entry_print(te
);
1397 g_mutex_unlock (&memdb
->mutex
);
1401 memdb_dump_index (memdb_index_t
*idx
)
1403 g_return_if_fail(idx
!= NULL
);
1405 g_print ("INDEX DUMP %016zX\n", idx
->version
);
1408 for (i
= 0; i
< idx
->size
; i
++) {
1409 g_print ("%016zX %016zX%016zX%016zX%016zX\n", idx
->entries
[i
].inode
,
1410 *((guint64
*)idx
->entries
[i
].digest
),
1411 *((guint64
*)(idx
->entries
[i
].digest
+ 8)),
1412 *((guint64
*)(idx
->entries
[i
].digest
+ 16)),
1413 *((guint64
*)(idx
->entries
[i
].digest
+ 24)));
1418 memdb_index_copy(memdb_index_t
*idx
)
1420 g_return_val_if_fail(idx
!= NULL
, NULL
);
1422 int bytes
= sizeof(memdb_index_t
) + idx
->size
*sizeof(memdb_index_extry_t
);
1423 if (idx
->bytes
!= bytes
) {
1424 cfs_critical("memdb index contains wrong number of bytes");
1428 memdb_index_t
*copy
= (memdb_index_t
*)g_memdup(idx
, bytes
);
1434 memdb_tree_entry_csum(
1435 memdb_tree_entry_t
*te
,
1438 g_return_val_if_fail(te
!= NULL
, FALSE
);
1439 g_return_val_if_fail(csum
!= NULL
, FALSE
);
1441 GChecksum
*sha256
= g_checksum_new(G_CHECKSUM_SHA256
);
1443 g_checksum_update(sha256
, (unsigned char*)&te
->inode
, sizeof(te
->inode
));
1444 g_checksum_update(sha256
, (unsigned char*)&te
->version
, sizeof(te
->version
));
1445 g_checksum_update(sha256
, (unsigned char*)&te
->writer
, sizeof(te
->writer
));
1446 g_checksum_update(sha256
, (unsigned char*)&te
->mtime
, sizeof(te
->mtime
));
1447 g_checksum_update(sha256
, (unsigned char*)&te
->size
, sizeof(te
->size
));
1448 g_checksum_update(sha256
, (unsigned char*)&te
->type
, sizeof(te
->type
));
1449 g_checksum_update(sha256
, (unsigned char*)&te
->parent
, sizeof(te
->parent
));
1450 g_checksum_update(sha256
, (unsigned char*)te
->name
, strlen(te
->name
));
1452 if (te
->type
== DT_REG
&& te
->size
)
1453 g_checksum_update(sha256
, (unsigned char*)te
->data
.value
, te
->size
);
1455 size_t csum_len
= 32;
1456 g_checksum_get_digest(sha256
, csum
, &csum_len
);
1457 g_checksum_free(sha256
);
1463 memdb_compute_checksum(
1465 memdb_tree_entry_t
*root
,
1469 g_return_val_if_fail(index
!= NULL
, FALSE
);
1470 g_return_val_if_fail(root
!= NULL
, FALSE
);
1472 GChecksum
*sha256
= g_checksum_new(G_CHECKSUM_SHA256
);
1474 GList
*list
= g_hash_table_get_values(index
);
1476 list
= g_list_sort(list
, memdb_tree_compare
);
1480 memdb_tree_entry_t
*te
= (memdb_tree_entry_t
*)l
->data
;
1482 g_checksum_update(sha256
, (unsigned char*)&te
->inode
, sizeof(te
->inode
));
1483 g_checksum_update(sha256
, (unsigned char*)&te
->version
, sizeof(te
->version
));
1484 g_checksum_update(sha256
, (unsigned char*)&te
->writer
, sizeof(te
->writer
));
1485 g_checksum_update(sha256
, (unsigned char*)&te
->mtime
, sizeof(te
->mtime
));
1486 g_checksum_update(sha256
, (unsigned char*)&te
->size
, sizeof(te
->size
));
1487 g_checksum_update(sha256
, (unsigned char*)&te
->type
, sizeof(te
->type
));
1488 g_checksum_update(sha256
, (unsigned char*)&te
->parent
, sizeof(te
->parent
));
1489 g_checksum_update(sha256
, (unsigned char*)te
->name
, strlen(te
->name
));
1491 if (te
->type
== DT_REG
&& te
->size
)
1492 g_checksum_update(sha256
, (unsigned char*)te
->data
.value
, te
->size
);
1499 g_checksum_get_digest(sha256
, csum
, &csum_len
);
1501 cfs_debug("checksum: %s", g_checksum_get_string(sha256
));
1503 g_checksum_free(sha256
);
1511 memdb_tree_entry_t
*root
)
1513 g_return_val_if_fail(index
!= NULL
, NULL
);
1514 g_return_val_if_fail(root
!= NULL
, NULL
);
1516 memdb_index_t
*idx
= NULL
;
1518 int count
= g_hash_table_size(index
);
1520 cfs_critical("memdb index has no entires");
1524 int bytes
= sizeof(memdb_index_t
) + count
*sizeof(memdb_index_extry_t
);
1525 idx
= g_malloc0(bytes
);
1529 idx
->version
= root
->version
;
1530 idx
->mtime
= root
->mtime
;
1531 idx
->writer
= root
->writer
;
1533 GChecksum
*sha256
= g_checksum_new(G_CHECKSUM_SHA256
);
1535 GList
*list
= g_hash_table_get_values(index
);
1537 list
= g_list_sort(list
, memdb_tree_compare
);
1542 memdb_tree_entry_t
*te
= (memdb_tree_entry_t
*)l
->data
;
1544 if (te
->inode
> idx
->last_inode
)
1545 idx
->last_inode
= te
->inode
;
1547 idx
->entries
[ind
].inode
= te
->inode
;
1549 g_checksum_reset (sha256
);
1551 g_checksum_update(sha256
, (unsigned char*)&te
->version
, sizeof(te
->version
));
1552 g_checksum_update(sha256
, (unsigned char*)&te
->writer
, sizeof(te
->writer
));
1553 g_checksum_update(sha256
, (unsigned char*)&te
->mtime
, sizeof(te
->mtime
));
1554 g_checksum_update(sha256
, (unsigned char*)&te
->size
, sizeof(te
->size
));
1555 g_checksum_update(sha256
, (unsigned char*)&te
->type
, sizeof(te
->type
));
1556 g_checksum_update(sha256
, (unsigned char*)&te
->parent
, sizeof(te
->parent
));
1557 g_checksum_update(sha256
, (unsigned char*)te
->name
, strlen(te
->name
));
1559 if (te
->type
== DT_REG
&& te
->size
)
1560 g_checksum_update(sha256
, (unsigned char*)te
->data
.value
, te
->size
);
1563 g_checksum_get_digest(sha256
, (guint8
*)idx
->entries
[ind
].digest
, &len
);
1572 g_checksum_free(sha256
);