]>
git.proxmox.com Git - pve-cluster.git/blob - data/src/memdb.c
69532275412a95e953d155bd254f2ae03da329a3
2 Copyright (C) 2010 - 2020 Proxmox Server Solutions GmbH
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU Affero General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU Affero General Public License for more details.
14 You should have received a copy of the GNU Affero General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>.
17 Author: Dietmar Maurer <dietmar@proxmox.com>
23 #endif /* HAVE_CONFIG_H */
28 #include <sys/types.h>
38 #include "cfs-utils.h"
42 #define CFS_LOCK_TIMEOUT (60*2)
48 g_return_val_if_fail(name
!= NULL
, NULL
);
50 memdb_tree_entry_t
*te
= g_malloc0(sizeof(memdb_tree_entry_t
) + strlen(name
) + 1);
51 g_return_val_if_fail(te
!= NULL
, NULL
);
53 strcpy(te
->name
, name
);
59 memdb_tree_entry_copy(
60 memdb_tree_entry_t
*te
,
63 g_return_val_if_fail(te
!= NULL
, NULL
);
65 memdb_tree_entry_t
*cpy
= memdb_tree_entry_new(te
->name
);
67 cpy
->parent
= te
->parent
;
68 cpy
->inode
= te
->inode
;
69 cpy
->version
= te
->version
;
70 cpy
->writer
= te
->writer
;
71 cpy
->mtime
= te
->mtime
;
75 if (with_data
&& te
->size
&& te
->type
== DT_REG
) {
76 cpy
->data
.value
= g_memdup2(te
->data
.value
, te
->size
);
78 cpy
->data
.value
= NULL
;
85 memdb_tree_entry_free(
86 memdb_tree_entry_t
*te
)
91 if (te
->type
== DT_REG
) {
93 g_free(te
->data
.value
);
96 if (te
->type
== DT_DIR
) {
98 g_hash_table_destroy(te
->data
.entries
);
105 memdb_lock_info_free(memdb_lock_info_t
*li
)
107 g_return_if_fail(li
!= NULL
);
120 guint64 a
= ((const memdb_tree_entry_t
*)v1
)->inode
;
121 guint64 b
= ((const memdb_tree_entry_t
*)v2
)->inode
;
138 char *dup
= g_strdup (path
);
139 int len
= strlen (dup
) - 1;
140 while (len
>= 0 && dup
[len
] == '/') dup
[len
--] = 0;
142 char *dn
= g_path_get_dirname (dup
);
143 char *bn
= g_path_get_basename (dup
);
151 static memdb_tree_entry_t
*
152 memdb_lookup_dir_entry(
155 memdb_tree_entry_t
*parent
)
158 g_return_val_if_fail(memdb
!= NULL
, NULL
);
159 g_return_val_if_fail(name
!= NULL
, NULL
);
160 g_return_val_if_fail(parent
!= NULL
, NULL
);
161 g_return_val_if_fail(parent
->type
== DT_DIR
, NULL
);
163 GHashTable
*ht
= parent
->data
.entries
;
165 g_return_val_if_fail(ht
!= NULL
, NULL
);
167 return g_hash_table_lookup(ht
, name
);
170 static memdb_tree_entry_t
*
174 memdb_tree_entry_t
**parent
)
176 g_return_val_if_fail(memdb
!= NULL
, NULL
);
177 g_return_val_if_fail(path
!= NULL
, NULL
);
178 g_return_val_if_fail(parent
!= NULL
, NULL
);
180 memdb_tree_entry_t
*cdir
= memdb
->root
;
183 if (path
[0] == 0 || ((path
[0] == '.' || path
[0] == '/') && path
[1] == 0))
186 gchar
**set
= g_strsplit_set(path
, "/", 0);
191 while ((name
= set
[i
++])) {
193 if (name
[0] == 0) continue;
196 if ((cdir
= memdb_lookup_dir_entry(memdb
, name
, cdir
)) == NULL
)
211 if (!name
|| name
[0] < '1' || name
[0] > '9')
216 errno
= 0; /* see man strtoul */
218 unsigned long int vmid
= strtoul(name
, &end
, 10);
220 if (!end
|| end
[0] != '.' || end
[1] != 'c'|| end
[2] != 'o' || end
[3] != 'n' ||
221 end
[4] != 'f' || end
[5] != 0 || errno
!= 0 || vmid
> G_MAXUINT32
)
225 *vmid_ret
= (guint32
)vmid
;
232 const char *nodename
)
234 g_return_val_if_fail(nodename
!= NULL
, FALSE
);
236 /* LDH rule (letters, digits, hyphen) */
238 int len
= strlen(nodename
);
244 for (int i
= 0; i
< len
; i
++) {
245 char c
= nodename
[i
];
246 if ((c
>= 'A' && c
<= 'Z') ||
247 (c
>= 'a' && c
<= 'z') ||
248 (c
>= '0' && c
<= '9') ||
249 (i
!= 0 && i
!= (len
-1) && c
== '-'))
258 dir_contain_vm_config(
265 if (strncmp(dirname
, "nodes/", 6) != 0)
270 char *nodename
= NULL
;
272 char **sa
= g_strsplit(dirname
, "/", 2);
273 if (sa
[0] && sa
[1] && valid_nodename(sa
[0])) {
274 if (strcmp(sa
[1], "qemu-server") == 0) {
275 *vmtype_ret
= VMTYPE_QEMU
;
276 nodename
= g_strdup(sa
[0]);
277 } else if (strcmp(sa
[1], "openvz") == 0) {
278 // FIXME: remove openvz stuff for 7.x
279 *vmtype_ret
= VMTYPE_OPENVZ
;
280 nodename
= g_strdup(sa
[0]);
281 } else if (strcmp(sa
[1], "lxc") == 0) {
282 *vmtype_ret
= VMTYPE_LXC
;
283 nodename
= g_strdup(sa
[0]);
293 path_contain_vm_config(
301 char *dirname
= NULL
;
303 char *nodename
= NULL
;
305 split_path(path
, &dirname
, &base
);
307 if (name_is_vm_config(base
, vmid_ret
))
308 nodename
= dir_contain_vm_config(dirname
, vmtype_ret
);
320 const char *nodename
,
322 memdb_tree_entry_t
*subdir
)
324 g_return_val_if_fail(memdb
!= NULL
, FALSE
);
325 g_return_val_if_fail(vmlist
!= NULL
, FALSE
);
326 g_return_val_if_fail(subdir
!= NULL
, FALSE
);
327 g_return_val_if_fail(subdir
->type
== DT_DIR
, FALSE
);
328 g_return_val_if_fail(subdir
->data
.entries
!= NULL
, FALSE
);
332 GHashTable
*ht
= subdir
->data
.entries
;
336 g_hash_table_iter_init (&iter
, ht
);
338 while (g_hash_table_iter_next (&iter
, &key
, &value
)) {
340 memdb_tree_entry_t
*node_te
= (memdb_tree_entry_t
*)value
;
342 if (node_te
->type
!= DT_REG
)
346 if (!name_is_vm_config(node_te
->name
, &vmid
))
349 if (!vmlist_hash_insert_vm(vmlist
, vmtype
, vmid
, nodename
, FALSE
))
361 const guchar csum
[32])
363 g_return_val_if_fail(memdb
!= NULL
, FALSE
);
364 g_return_val_if_fail(memdb
->locks
!= NULL
, FALSE
);
365 g_return_val_if_fail(path
!= NULL
, FALSE
);
366 g_return_val_if_fail(csum
!= NULL
, FALSE
);
368 memdb_lock_info_t
*li
;
369 uint32_t ctime
= time(NULL
);
371 if ((li
= g_hash_table_lookup(memdb
->locks
, path
))) {
372 if (memcmp(csum
, li
->csum
, 32) != 0) {
374 memcpy(li
->csum
, csum
, 32);
375 g_critical("wrong lock csum - reset timeout");
378 if ((ctime
> li
->ltime
) && ((ctime
- li
->ltime
) > CFS_LOCK_TIMEOUT
))
381 li
= g_new0(memdb_lock_info_t
, 1);
382 li
->path
= g_strdup(path
);
384 memcpy(li
->csum
, csum
, 32);
385 g_hash_table_replace(memdb
->locks
, li
->path
, li
);
392 memdb_update_locks(memdb_t
*memdb
)
394 g_return_if_fail(memdb
!= NULL
);
395 g_return_if_fail(memdb
->locks
!= NULL
);
397 memdb_tree_entry_t
*te
, *parent
;
399 if (!(te
= memdb_lookup_path(memdb
, "priv/lock", &parent
)))
402 if (te
->type
!= DT_DIR
)
406 GHashTable
*old
= memdb
->locks
;
407 memdb
->locks
= g_hash_table_new_full(g_str_hash
, g_str_equal
, NULL
,
408 (GDestroyNotify
)memdb_lock_info_free
);
410 GHashTable
*ht
= te
->data
.entries
;
414 g_hash_table_iter_init (&iter
, ht
);
415 while (g_hash_table_iter_next (&iter
, &key
, &value
)) {
417 memdb_tree_entry_t
*lock_te
= (memdb_tree_entry_t
*)value
;
418 if (lock_te
->type
!= DT_DIR
)
421 memdb_lock_info_t
*li
;
422 li
= g_new0(memdb_lock_info_t
, 1);
423 li
->path
= g_strdup_printf("priv/lock/%s", lock_te
->name
);
426 if (memdb_tree_entry_csum(lock_te
, csum
)) {
427 memcpy(li
->csum
, csum
, 32);
428 memdb_lock_info_t
*oldli
;
429 if ((oldli
= g_hash_table_lookup(memdb
->locks
, lock_te
->name
)) &&
430 (memcmp(csum
, oldli
->csum
, 32) == 0)) {
431 li
->ltime
= oldli
->ltime
;
433 li
->ltime
= time(NULL
);
435 g_hash_table_insert(memdb
->locks
, li
->path
, li
);
437 memdb_lock_info_free(li
);
442 g_hash_table_destroy(old
);
447 memdb_recreate_vmlist(
450 g_return_val_if_fail(memdb
!= NULL
, FALSE
);
452 memdb_tree_entry_t
*te
, *parent
;
454 if (!(te
= memdb_lookup_path(memdb
, "nodes", &parent
)))
457 if (te
->type
!= DT_DIR
)
460 GHashTable
*vmlist
= vmlist_hash_new();
462 GHashTable
*ht
= te
->data
.entries
;
469 g_hash_table_iter_init (&iter
, ht
);
471 while (g_hash_table_iter_next (&iter
, &key
, &value
)) {
473 memdb_tree_entry_t
*node_te
= (memdb_tree_entry_t
*)value
;
474 if (node_te
->type
!= DT_DIR
)
477 if (!valid_nodename(node_te
->name
))
480 if ((te
= g_hash_table_lookup(node_te
->data
.entries
, "qemu-server"))) {
481 if (!vmlist_add_dir(memdb
, vmlist
, node_te
->name
, VMTYPE_QEMU
, te
))
484 // FIXME: remove openvz stuff for 7.x
485 if ((te
= g_hash_table_lookup(node_te
->data
.entries
, "openvz"))) {
486 if (!vmlist_add_dir(memdb
, vmlist
, node_te
->name
, VMTYPE_OPENVZ
, te
))
489 if ((te
= g_hash_table_lookup(node_te
->data
.entries
, "lxc"))) {
490 if (!vmlist_add_dir(memdb
, vmlist
, node_te
->name
, VMTYPE_LXC
, te
))
495 /* always update list - even if we detected duplicates */
496 cfs_status_set_vmlist(vmlist
);
502 memdb_open(const char *dbfilename
)
504 memdb_t
*memdb
= g_new0(memdb_t
, 1);
506 g_mutex_init(&memdb
->mutex
);
508 memdb
->dbfilename
= g_strdup(dbfilename
);
510 memdb
->root
= memdb_tree_entry_new("");
511 memdb
->root
->data
.entries
= g_hash_table_new(g_str_hash
, g_str_equal
);
512 memdb
->root
->type
= DT_DIR
;
514 memdb
->index
= g_hash_table_new_full(g_int64_hash
, g_int64_equal
, NULL
,
515 (GDestroyNotify
)memdb_tree_entry_free
);
517 g_hash_table_replace(memdb
->index
, &memdb
->root
->inode
, memdb
->root
);
519 memdb
->locks
= g_hash_table_new_full(g_str_hash
, g_str_equal
, NULL
,
520 (GDestroyNotify
)memdb_lock_info_free
);
522 if (!(memdb
->bdb
= bdb_backend_open(dbfilename
, memdb
->root
, memdb
->index
))) {
527 record_memdb_reload();
529 if (!memdb_recreate_vmlist(memdb
)) {
534 memdb_update_locks(memdb
);
536 cfs_debug("memdb open '%s' successful (version = %016" PRIX64
")",
537 dbfilename
, memdb
->root
->version
);
543 memdb_close(memdb_t
*memdb
)
545 g_return_if_fail(memdb
!= NULL
);
547 g_mutex_lock (&memdb
->mutex
);
550 bdb_backend_close(memdb
->bdb
);
553 g_hash_table_destroy(memdb
->index
);
556 g_hash_table_destroy(memdb
->locks
);
558 if (memdb
->dbfilename
)
559 g_free(memdb
->dbfilename
);
563 memdb
->dbfilename
= NULL
;
565 g_mutex_unlock (&memdb
->mutex
);
567 g_mutex_clear (&memdb
->mutex
);
578 g_return_val_if_fail(memdb
!= NULL
, -EINVAL
);
579 g_return_val_if_fail(path
!= NULL
, -EINVAL
);
583 char *dirname
= NULL
;
586 g_mutex_lock (&memdb
->mutex
);
593 split_path(path
, &dirname
, &base
);
595 memdb_tree_entry_t
*parent
, *unused
;
597 if (!(parent
= memdb_lookup_path(memdb
, dirname
, &unused
))) {
602 if (parent
->type
!= DT_DIR
) {
607 /* do not allow '.' and '..' */
608 if ((base
[0] == 0) ||
609 (base
[0] == '.' && base
[1] == 0) ||
610 (base
[0] == '.' && base
[1] == '.' && base
[2] == 0)) {
615 memdb_tree_entry_t
*te
;
616 if ((te
= memdb_lookup_dir_entry(memdb
, base
, parent
))) {
621 memdb
->root
->version
++;
622 memdb
->root
->mtime
= mtime
;
623 memdb
->root
->writer
= writer
;
625 te
= memdb_tree_entry_new(base
);
626 te
->parent
= parent
->inode
;
627 te
->data
.entries
= g_hash_table_new(g_str_hash
, g_str_equal
);
628 te
->inode
= te
->version
= memdb
->root
->version
;
633 g_hash_table_replace(parent
->data
.entries
, te
->name
, te
);
634 g_hash_table_replace(memdb
->index
, &te
->inode
, te
);
636 cfs_debug("memdb_mkdir %s %s %016" PRIX64
, dirname
, base
, memdb
->root
->version
);
638 if (bdb_backend_write(memdb
->bdb
, te
->inode
, te
->parent
, te
->version
,
639 te
->writer
, te
->mtime
, 0, DT_DIR
, te
->name
, NULL
, 0)) {
645 if (strcmp(dirname
, "priv/lock") == 0) {
646 g_hash_table_remove(memdb
->locks
, path
);
648 if (memdb_tree_entry_csum(te
, csum
)) {
649 memdb_lock_expired(memdb
, path
, csum
); // insert a new entry
656 g_mutex_unlock (&memdb
->mutex
);
664 // Original memdb_read without locking - Caller MUST handle the locking
671 g_return_val_if_fail(memdb
!= NULL
, -EINVAL
);
672 g_return_val_if_fail(path
!= NULL
, -EINVAL
);
673 g_return_val_if_fail(data_ret
!= NULL
, -EINVAL
);
675 memdb_tree_entry_t
*te
, *parent
;
677 if ((te
= memdb_lookup_path(memdb
, path
, &parent
))) {
678 if (te
->type
== DT_REG
) {
679 *data_ret
= g_memdup2(te
->data
.value
, te
->size
);
680 guint32 size
= te
->size
;
695 g_mutex_lock (&memdb
->mutex
);
697 res
= memdb_read_nolock(memdb
, path
, data_ret
);
699 g_mutex_unlock (&memdb
->mutex
);
716 g_return_val_if_fail(memdb
!= NULL
, -EINVAL
);
717 g_return_val_if_fail(path
!= NULL
, -EINVAL
);
718 g_return_val_if_fail(count
== 0 || data
!= NULL
, -EINVAL
);
722 char *dirname
= NULL
;
724 char *nodename
= NULL
;
726 g_mutex_lock (&memdb
->mutex
);
733 if ((offset
+ count
) > MEMDB_MAX_FILE_SIZE
) {
738 split_path(path
, &dirname
, &base
);
740 memdb_tree_entry_t
*parent
, *unused
;
741 if (!(parent
= memdb_lookup_path(memdb
, dirname
, &unused
))) {
745 if (parent
->type
!= DT_DIR
) {
750 /* do not allow '.' and '..' */
751 if ((base
[0] == 0) ||
752 (base
[0] == '.' && base
[1] == 0) ||
753 (base
[0] == '.' && base
[1] == '.' && base
[2] == 0)) {
761 if ((nodename
= path_contain_vm_config(path
, &vmtype
, &vmid
))) {
762 if (vmlist_different_vm_exists(vmtype
, vmid
, nodename
)) {
768 gpointer olddata
= NULL
;
770 memdb_tree_entry_t
*te
, *old
;
771 if ((old
= te
= memdb_lookup_dir_entry(memdb
, base
, parent
))) {
772 if (te
->type
!= DT_REG
) {
782 memdb
->root
->version
++;
783 memdb
->root
->mtime
= mtime
;
784 memdb
->root
->writer
= writer
;
786 olddata
= te
->data
.value
;
794 memdb
->root
->version
++;
795 memdb
->root
->mtime
= mtime
;
796 memdb
->root
->writer
= writer
;
798 te
= memdb_tree_entry_new(base
);
799 te
->parent
= parent
->inode
;
801 te
->inode
= memdb
->root
->version
;
804 te
->version
= memdb
->root
->version
;
808 size_t newsize
= offset
+ count
;
810 gpointer newdata
= NULL
;
814 if (newsize
> te
->size
) {
815 newdata
= g_malloc0(newsize
);
816 memcpy(newdata
, olddata
, te
->size
);
823 newdata
= g_malloc0(newsize
);
824 memcpy(newdata
, olddata
, newsize
);
828 memcpy((uint8_t *) newdata
+ offset
, data
, count
);
833 newdata
= g_malloc0(newsize
);
834 memcpy((uint8_t *) newdata
+ offset
, data
, count
);
839 te
->data
.value
= newdata
;
844 g_hash_table_replace(parent
->data
.entries
, te
->name
, te
);
845 g_hash_table_replace(memdb
->index
, &te
->inode
, te
);
848 record_memdb_change(path
);
850 cfs_debug("memdb_pwrite %s %s %016" PRIX64
" %016" PRIX64
, dirname
, te
->name
, te
->inode
, te
->version
);
852 if (bdb_backend_write(memdb
->bdb
, te
->inode
, te
->parent
, te
->version
,
853 te
->writer
, te
->mtime
, te
->size
, te
->type
, te
->name
,
854 te
->data
.value
, 0)) {
861 vmlist_register_vm(vmtype
, vmid
, nodename
);
866 g_mutex_unlock (&memdb
->mutex
);
882 g_return_val_if_fail(memdb
!= NULL
, -EINVAL
);
883 g_return_val_if_fail(path
!= NULL
, -EINVAL
);
887 char *dirname
= NULL
;
890 g_mutex_lock (&memdb
->mutex
);
897 split_path(path
, &dirname
, &base
);
899 memdb_tree_entry_t
*parent
, *unused
;
900 if (!(parent
= memdb_lookup_path(memdb
, dirname
, &unused
))) {
904 if (parent
->type
!= DT_DIR
) {
909 /* do not allow '.' and '..' */
910 if ((base
[0] == 0) ||
911 (base
[0] == '.' && base
[1] == 0) ||
912 (base
[0] == '.' && base
[1] == '.' && base
[2] == 0)) {
917 memdb_tree_entry_t
*te
;
918 if (!(te
= memdb_lookup_dir_entry(memdb
, base
, parent
))) {
923 int is_lock
= (strcmp(dirname
, "priv/lock") == 0) && (te
->type
== DT_DIR
);
925 /* NOTE: we use utime(0,0) to trigger 'unlock', so we do not
926 * allow to change mtime for locks (only if mtime is newer).
927 * See README for details about locks.
930 if (mtime
< te
->mtime
) {
931 cfs_debug("dir is locked");
935 /* only allow lock updates if the writer is the same */
936 if (te
->writer
!= writer
) {
943 memdb
->root
->version
++;
944 memdb
->root
->mtime
= mtime
;
945 memdb
->root
->writer
= writer
;
947 te
->version
= memdb
->root
->version
;
951 record_memdb_change(path
);
953 cfs_debug("memdb_mtime %s %s %016" PRIX64
" %016" PRIX64
, dirname
, te
->name
, te
->inode
, te
->version
);
955 if (bdb_backend_write(memdb
->bdb
, te
->inode
, te
->parent
, te
->version
,
956 te
->writer
, te
->mtime
, te
->size
, te
->type
, te
->name
,
957 te
->data
.value
, 0)) {
964 cfs_debug("update cfs lock");
965 g_hash_table_remove(memdb
->locks
, path
);
967 if (memdb_tree_entry_csum(te
, csum
)) {
968 memdb_lock_expired(memdb
, path
, csum
); // insert a new entry
975 g_mutex_unlock (&memdb
->mutex
);
990 return memdb_pwrite(memdb
, path
, writer
, mtime
, NULL
, 0, 0, FALSE
, TRUE
);
1004 return memdb_pwrite(memdb
, path
, writer
, mtime
, data
, count
, offset
, truncate
, FALSE
);
1007 memdb_tree_entry_t
*
1012 memdb_tree_entry_t
*te
, *parent
;
1014 g_mutex_lock (&memdb
->mutex
);
1016 if ((te
= memdb_lookup_path(memdb
, path
, &parent
))) {
1018 memdb_tree_entry_t
*cpy
= memdb_tree_entry_copy(te
, 0);
1020 g_mutex_unlock (&memdb
->mutex
);
1025 g_mutex_unlock (&memdb
->mutex
);
1035 g_return_val_if_fail(memdb
!= NULL
, NULL
);
1036 g_return_val_if_fail(path
!= NULL
, NULL
);
1038 memdb_tree_entry_t
*te
, *parent
;
1042 g_mutex_lock (&memdb
->mutex
);
1044 if (!(te
= memdb_lookup_path(memdb
, path
, &parent
)))
1047 if (te
->type
!= DT_DIR
)
1050 GHashTable
*ht
= te
->data
.entries
;
1052 GHashTableIter iter
;
1053 gpointer key
, value
;
1055 g_hash_table_iter_init (&iter
, ht
);
1057 while (g_hash_table_iter_next (&iter
, &key
, &value
)) {
1059 te
= (memdb_tree_entry_t
*)value
;
1061 memdb_tree_entry_t
*cpy
= memdb_tree_entry_copy(te
, 0);
1063 list
= g_list_append(list
, cpy
);
1067 g_mutex_unlock (&memdb
->mutex
);
1073 memdb_dirlist_free(GList
*dirlist
)
1085 g_list_free(dirlist
);
1091 memdb_tree_entry_t
*parent
,
1092 memdb_tree_entry_t
*te
)
1094 g_return_val_if_fail(parent
!= NULL
, -EACCES
);
1095 g_return_val_if_fail(parent
->inode
== te
->parent
, -EACCES
);
1097 if (te
->type
== DT_DIR
)
1098 if (g_hash_table_size(te
->data
.entries
))
1101 if (!g_hash_table_steal(parent
->data
.entries
, te
->name
)) {
1102 cfs_critical("internal error - can't delete entry");
1107 if (!g_hash_table_steal(memdb
->index
, &te
->inode
)) {
1108 cfs_critical("internal error - can't delete entry");
1126 char *nodename
= NULL
;
1127 char *dirname
= NULL
;
1131 guint32 from_vmid
= 0;
1133 int from_vmtype
= 0;
1134 char *from_node
= NULL
;
1136 g_mutex_lock (&memdb
->mutex
);
1138 if (memdb
->errors
) {
1143 memdb_tree_entry_t
*from_te
, *from_parent
;
1144 memdb_tree_entry_t
*to_te
, *to_parent
;
1145 memdb_tree_entry_t
*target_te
, *target_parent
;
1147 guint64 delete_inode
= 0;
1149 if (!(from_te
= memdb_lookup_path(memdb
, from
, &from_parent
))) {
1154 if (!from_parent
) { /* can't rename root */
1159 from_node
= path_contain_vm_config(from
, &from_vmtype
, &from_vmid
);
1161 if (from_te
->type
== DT_REG
&& (nodename
= path_contain_vm_config(to
, &vmtype
, &vmid
))) {
1162 if (vmlist_different_vm_exists(vmtype
, vmid
, nodename
)) {
1163 if (!(from_node
&& vmid
== from_vmid
)) {
1170 /* we do not allow rename for locks */
1171 if (from_te
->type
== DT_DIR
&& path_is_lockdir(from
)) {
1176 if ((to_te
= memdb_lookup_path(memdb
, to
, &to_parent
))) {
1178 if ((ret
= unlink_tree_entry(memdb
, to_parent
, to_te
)) != 0)
1181 base
= strdup(to_te
->name
);
1183 delete_inode
= to_te
->inode
;
1185 target_te
= to_parent
;
1187 memdb_tree_entry_free(to_te
);
1191 split_path(to
, &dirname
, &base
);
1193 if (!(target_te
= memdb_lookup_path(memdb
, dirname
, &target_parent
))) {
1198 if (target_te
->type
!= DT_DIR
) {
1204 record_memdb_change(from
);
1205 record_memdb_change(to
);
1207 /* NOTE: unlink_tree_entry() make sure that we can only
1208 rename emtpy directories */
1210 if ((ret
= unlink_tree_entry(memdb
, from_parent
, from_te
)) != 0)
1213 memdb
->root
->version
++;
1214 memdb
->root
->mtime
= mtime
;
1215 memdb
->root
->writer
= writer
;
1217 memdb_tree_entry_t
*new = memdb_tree_entry_new(base
);
1218 new->parent
= target_te
->inode
;
1219 new->inode
= from_te
->inode
;
1220 new->version
= memdb
->root
->version
;
1221 new->writer
= writer
;
1223 new->size
= from_te
->size
;
1224 new->type
= from_te
->type
;
1225 new->data
= from_te
->data
;
1229 g_hash_table_replace(target_te
->data
.entries
, new->name
, new);
1230 g_hash_table_replace(memdb
->index
, &new->inode
, new);
1232 if (bdb_backend_write(memdb
->bdb
, new->inode
, new->parent
,
1233 new->version
, new->writer
, new->mtime
,
1234 new->size
, new->type
, new->name
,
1235 new->data
.value
, delete_inode
)) {
1241 if (new->type
== DT_REG
) {
1244 vmlist_delete_vm(from_vmid
);
1247 vmlist_register_vm(vmtype
, vmid
, nodename
);
1249 } else if (new->type
== DT_DIR
) {
1250 /* directories are alwayse empty (see unlink_tree_entry) */
1256 g_mutex_unlock (&memdb
->mutex
);
1273 memdb_tree_entry_t
*te
, *parent
;
1275 g_mutex_lock (&memdb
->mutex
);
1279 if (memdb
->errors
) {
1284 if (!(te
= memdb_lookup_path(memdb
, path
, &parent
))) {
1289 if (!parent
) { /* cant remove root */
1294 if (te
->type
== DT_DIR
) {
1295 if (g_hash_table_size(te
->data
.entries
)) {
1300 g_hash_table_remove(memdb
->locks
, path
);
1303 record_memdb_change(path
);
1305 if ((ret
= unlink_tree_entry(memdb
, parent
, te
)) != 0)
1308 memdb
->root
->version
++;
1309 memdb
->root
->mtime
= mtime
;
1310 memdb
->root
->writer
= writer
;
1312 if (bdb_backend_write(memdb
->bdb
, 0, 0, memdb
->root
->version
, writer
, mtime
, 0,
1313 DT_REG
, NULL
, NULL
, te
->inode
)) {
1315 memdb_tree_entry_free(te
);
1320 memdb_tree_entry_free(te
);
1325 if ((nodename
= path_contain_vm_config(path
, &vmtype
, &vmid
))) {
1327 vmlist_delete_vm(vmid
);
1333 g_mutex_unlock (&memdb
->mutex
);
1341 struct statvfs
*stbuf
)
1343 g_return_val_if_fail(memdb
!= NULL
, -EINVAL
);
1344 g_return_val_if_fail(stbuf
!= NULL
, -EINVAL
);
1346 g_mutex_lock (&memdb
->mutex
);
1348 GHashTableIter iter
;
1349 gpointer key
, value
;
1354 g_hash_table_iter_init (&iter
, memdb
->index
);
1356 while (g_hash_table_iter_next (&iter
, &key
, &value
)) {
1357 memdb_tree_entry_t
*te
= (memdb_tree_entry_t
*)value
;
1362 g_mutex_unlock (&memdb
->mutex
);
1364 stbuf
->f_bsize
= MEMDB_BLOCKSIZE
;
1365 stbuf
->f_blocks
= MEMDB_BLOCKS
;
1366 stbuf
->f_bfree
= stbuf
->f_bavail
= stbuf
->f_blocks
-
1367 ((size
+ stbuf
->f_bsize
- 1)/stbuf
->f_bsize
);
1368 stbuf
->f_files
= MEMDB_MAX_INODES
;
1369 stbuf
->f_ffree
= stbuf
->f_files
- files
;
1371 stbuf
->f_namemax
= 256;
1377 tree_entry_debug(memdb_tree_entry_t
*te
)
1379 g_return_if_fail(te
!= NULL
);
1381 // same as tree_entry_print(), but use cfs_debug() instead of g_print()
1383 cfs_debug("%016" PRIX64
" %c %016" PRIX64
" %016" PRIX64
" %08X %08X %08X %s\n",
1384 te
->inode
, te
->type
== DT_DIR
? 'D' : 'R', te
->parent
, te
->version
,
1385 te
->writer
, te
->mtime
, te
->size
, te
->name
);
1389 tree_entry_print(memdb_tree_entry_t
*te
)
1391 g_return_if_fail(te
!= NULL
);
1393 g_print("%016" PRIX64
" %c %016" PRIX64
" %016" PRIX64
" %08X %08X %08X %s\n",
1394 te
->inode
, te
->type
== DT_DIR
? 'D' : 'R', te
->parent
, te
->version
,
1395 te
->writer
, te
->mtime
, te
->size
, te
->name
);
1399 memdb_dump(memdb_t
*memdb
)
1401 g_return_if_fail(memdb
!= NULL
);
1403 g_mutex_lock (&memdb
->mutex
);
1405 GList
*list
= g_hash_table_get_values(memdb
->index
);
1407 list
= g_list_sort(list
, memdb_tree_compare
);
1409 g_print("%16s %c %16s %16s %8s %8s %8s %s\n",
1410 "INODE", 'T', "PARENT", "VERSION", "WRITER", "MTIME", "SIZE", "NAME");
1414 memdb_tree_entry_t
*te
= (memdb_tree_entry_t
*)l
->data
;
1416 tree_entry_print(te
);
1423 g_mutex_unlock (&memdb
->mutex
);
1427 memdb_dump_index (memdb_index_t
*idx
)
1429 g_return_if_fail(idx
!= NULL
);
1431 g_print ("INDEX DUMP %016" PRIX64
"\n", idx
->version
);
1434 for (i
= 0; i
< idx
->size
; i
++) {
1435 g_print ("%016" PRIX64
" %016" PRIX64
"%016" PRIX64
"%016" PRIX64
"%016" PRIX64
"\n", idx
->entries
[i
].inode
,
1436 *((guint64
*)idx
->entries
[i
].digest
),
1437 *((guint64
*)(idx
->entries
[i
].digest
+ 8)),
1438 *((guint64
*)(idx
->entries
[i
].digest
+ 16)),
1439 *((guint64
*)(idx
->entries
[i
].digest
+ 24)));
1444 memdb_index_copy(memdb_index_t
*idx
)
1446 g_return_val_if_fail(idx
!= NULL
, NULL
);
1448 int bytes
= sizeof(memdb_index_t
) + idx
->size
*sizeof(memdb_index_extry_t
);
1449 if (idx
->bytes
!= bytes
) {
1450 cfs_critical("memdb index contains wrong number of bytes");
1454 memdb_index_t
*copy
= (memdb_index_t
*)g_memdup2(idx
, bytes
);
1460 memdb_tree_entry_csum(
1461 memdb_tree_entry_t
*te
,
1464 g_return_val_if_fail(te
!= NULL
, FALSE
);
1465 g_return_val_if_fail(csum
!= NULL
, FALSE
);
1467 GChecksum
*sha256
= g_checksum_new(G_CHECKSUM_SHA256
);
1469 g_checksum_update(sha256
, (unsigned char*)&te
->inode
, sizeof(te
->inode
));
1470 g_checksum_update(sha256
, (unsigned char*)&te
->version
, sizeof(te
->version
));
1471 g_checksum_update(sha256
, (unsigned char*)&te
->writer
, sizeof(te
->writer
));
1472 g_checksum_update(sha256
, (unsigned char*)&te
->mtime
, sizeof(te
->mtime
));
1473 g_checksum_update(sha256
, (unsigned char*)&te
->size
, sizeof(te
->size
));
1474 g_checksum_update(sha256
, (unsigned char*)&te
->type
, sizeof(te
->type
));
1475 g_checksum_update(sha256
, (unsigned char*)&te
->parent
, sizeof(te
->parent
));
1476 g_checksum_update(sha256
, (unsigned char*)te
->name
, strlen(te
->name
));
1478 if (te
->type
== DT_REG
&& te
->size
)
1479 g_checksum_update(sha256
, (unsigned char*)te
->data
.value
, te
->size
);
1481 size_t csum_len
= 32;
1482 g_checksum_get_digest(sha256
, csum
, &csum_len
);
1483 g_checksum_free(sha256
);
1489 memdb_compute_checksum(
1491 memdb_tree_entry_t
*root
,
1495 g_return_val_if_fail(index
!= NULL
, FALSE
);
1496 g_return_val_if_fail(root
!= NULL
, FALSE
);
1498 GChecksum
*sha256
= g_checksum_new(G_CHECKSUM_SHA256
);
1500 GList
*list
= g_hash_table_get_values(index
);
1502 list
= g_list_sort(list
, memdb_tree_compare
);
1506 memdb_tree_entry_t
*te
= (memdb_tree_entry_t
*)l
->data
;
1508 g_checksum_update(sha256
, (unsigned char*)&te
->inode
, sizeof(te
->inode
));
1509 g_checksum_update(sha256
, (unsigned char*)&te
->version
, sizeof(te
->version
));
1510 g_checksum_update(sha256
, (unsigned char*)&te
->writer
, sizeof(te
->writer
));
1511 g_checksum_update(sha256
, (unsigned char*)&te
->mtime
, sizeof(te
->mtime
));
1512 g_checksum_update(sha256
, (unsigned char*)&te
->size
, sizeof(te
->size
));
1513 g_checksum_update(sha256
, (unsigned char*)&te
->type
, sizeof(te
->type
));
1514 g_checksum_update(sha256
, (unsigned char*)&te
->parent
, sizeof(te
->parent
));
1515 g_checksum_update(sha256
, (unsigned char*)te
->name
, strlen(te
->name
));
1517 if (te
->type
== DT_REG
&& te
->size
)
1518 g_checksum_update(sha256
, (unsigned char*)te
->data
.value
, te
->size
);
1525 g_checksum_get_digest(sha256
, csum
, &csum_len
);
1527 cfs_debug("checksum: %s", g_checksum_get_string(sha256
));
1529 g_checksum_free(sha256
);
1537 memdb_tree_entry_t
*root
)
1539 g_return_val_if_fail(index
!= NULL
, NULL
);
1540 g_return_val_if_fail(root
!= NULL
, NULL
);
1542 memdb_index_t
*idx
= NULL
;
1544 int count
= g_hash_table_size(index
);
1546 cfs_critical("memdb index has no entires");
1550 int bytes
= sizeof(memdb_index_t
) + count
*sizeof(memdb_index_extry_t
);
1551 idx
= g_malloc0(bytes
);
1555 idx
->version
= root
->version
;
1556 idx
->mtime
= root
->mtime
;
1557 idx
->writer
= root
->writer
;
1559 GChecksum
*sha256
= g_checksum_new(G_CHECKSUM_SHA256
);
1561 GList
*list
= g_hash_table_get_values(index
);
1563 list
= g_list_sort(list
, memdb_tree_compare
);
1568 memdb_tree_entry_t
*te
= (memdb_tree_entry_t
*)l
->data
;
1570 if (te
->inode
> idx
->last_inode
)
1571 idx
->last_inode
= te
->inode
;
1573 idx
->entries
[ind
].inode
= te
->inode
;
1575 g_checksum_reset (sha256
);
1577 g_checksum_update(sha256
, (unsigned char*)&te
->version
, sizeof(te
->version
));
1578 g_checksum_update(sha256
, (unsigned char*)&te
->writer
, sizeof(te
->writer
));
1579 g_checksum_update(sha256
, (unsigned char*)&te
->mtime
, sizeof(te
->mtime
));
1580 g_checksum_update(sha256
, (unsigned char*)&te
->size
, sizeof(te
->size
));
1581 g_checksum_update(sha256
, (unsigned char*)&te
->type
, sizeof(te
->type
));
1582 g_checksum_update(sha256
, (unsigned char*)&te
->parent
, sizeof(te
->parent
));
1583 g_checksum_update(sha256
, (unsigned char*)te
->name
, strlen(te
->name
));
1585 if (te
->type
== DT_REG
&& te
->size
)
1586 g_checksum_update(sha256
, (unsigned char*)te
->data
.value
, te
->size
);
1589 g_checksum_get_digest(sha256
, (guint8
*)idx
->entries
[ind
].digest
, &len
);
1598 g_checksum_free(sha256
);