]>
Commit | Line | Data |
---|---|---|
663996b3 MS |
1 | /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/ |
2 | ||
3 | /*** | |
4 | This file is part of systemd. | |
5 | ||
6 | Copyright 2012 Kay Sievers <kay@vrfy.org> | |
7 | ||
8 | systemd is free software; you can redistribute it and/or modify it | |
9 | under the terms of the GNU Lesser General Public License as published by | |
10 | the Free Software Foundation; either version 2.1 of the License, or | |
11 | (at your option) any later version. | |
12 | ||
13 | systemd is distributed in the hope that it will be useful, but | |
14 | WITHOUT ANY WARRANTY; without even the implied warranty of | |
15 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
16 | Lesser General Public License for more details. | |
17 | ||
18 | You should have received a copy of the GNU Lesser General Public License | |
19 | along with systemd; If not, see <http://www.gnu.org/licenses/>. | |
20 | ***/ | |
21 | ||
22 | #include <stdlib.h> | |
23 | #include <string.h> | |
24 | ||
25 | #include "util.h" | |
26 | #include "strbuf.h" | |
27 | ||
28 | /* | |
29 | * Strbuf stores given strings in a single continuous allocated memory | |
30 | * area. Identical strings are de-duplicated and return the same offset | |
31 | * as the first string stored. If the tail of a string already exists | |
32 | * in the buffer, the tail is returned. | |
33 | * | |
34 | * A trie (http://en.wikipedia.org/wiki/Trie) is used to maintain the | |
35 | * information about the stored strings. | |
36 | * | |
37 | * Example of udev rules: | |
38 | * $ ./udevadm test . | |
39 | * ... | |
40 | * read rules file: /usr/lib/udev/rules.d/99-systemd.rules | |
41 | * rules contain 196608 bytes tokens (16384 * 12 bytes), 39742 bytes strings | |
42 | * 23939 strings (207859 bytes), 20404 de-duplicated (171653 bytes), 3536 trie nodes used | |
43 | * ... | |
44 | */ | |
45 | ||
46 | struct strbuf *strbuf_new(void) { | |
47 | struct strbuf *str; | |
48 | ||
49 | str = new0(struct strbuf, 1); | |
50 | if (!str) | |
51 | return NULL; | |
52 | ||
53 | str->buf = new0(char, 1); | |
54 | if (!str->buf) | |
55 | goto err; | |
56 | str->len = 1; | |
57 | ||
58 | str->root = new0(struct strbuf_node, 1); | |
59 | if (!str->root) | |
60 | goto err; | |
61 | str->nodes_count = 1; | |
62 | return str; | |
63 | err: | |
64 | free(str->buf); | |
65 | free(str->root); | |
66 | free(str); | |
67 | return NULL; | |
68 | } | |
69 | ||
70 | static void strbuf_node_cleanup(struct strbuf_node *node) { | |
71 | size_t i; | |
72 | ||
73 | for (i = 0; i < node->children_count; i++) | |
74 | strbuf_node_cleanup(node->children[i].child); | |
75 | free(node->children); | |
76 | free(node); | |
77 | } | |
78 | ||
79 | /* clean up trie data, leave only the string buffer */ | |
80 | void strbuf_complete(struct strbuf *str) { | |
81 | if (!str) | |
82 | return; | |
83 | if (str->root) | |
84 | strbuf_node_cleanup(str->root); | |
85 | str->root = NULL; | |
86 | } | |
87 | ||
88 | /* clean up everything */ | |
89 | void strbuf_cleanup(struct strbuf *str) { | |
90 | if (!str) | |
91 | return; | |
92 | if (str->root) | |
93 | strbuf_node_cleanup(str->root); | |
94 | free(str->buf); | |
95 | free(str); | |
96 | } | |
97 | ||
98 | static int strbuf_children_cmp(const struct strbuf_child_entry *n1, | |
99 | const struct strbuf_child_entry *n2) { | |
100 | return n1->c - n2->c; | |
101 | } | |
102 | ||
103 | static void bubbleinsert(struct strbuf_node *node, | |
104 | uint8_t c, | |
105 | struct strbuf_node *node_child) { | |
106 | ||
107 | struct strbuf_child_entry new = { | |
108 | .c = c, | |
109 | .child = node_child, | |
110 | }; | |
111 | int left = 0, right = node->children_count; | |
112 | ||
113 | while (right > left) { | |
114 | int middle = (right + left) / 2 ; | |
115 | if (strbuf_children_cmp(&node->children[middle], &new) <= 0) | |
116 | left = middle + 1; | |
117 | else | |
118 | right = middle; | |
119 | } | |
120 | ||
121 | memmove(node->children + left + 1, node->children + left, | |
122 | sizeof(struct strbuf_child_entry) * (node->children_count - left)); | |
123 | node->children[left] = new; | |
124 | ||
125 | node->children_count ++; | |
126 | } | |
127 | ||
128 | /* add string, return the index/offset into the buffer */ | |
129 | ssize_t strbuf_add_string(struct strbuf *str, const char *s, size_t len) { | |
130 | uint8_t c; | |
131 | struct strbuf_node *node; | |
132 | size_t depth; | |
133 | char *buf_new; | |
134 | struct strbuf_child_entry *child; | |
135 | struct strbuf_node *node_child; | |
136 | ssize_t off; | |
137 | ||
138 | if (!str->root) | |
139 | return -EINVAL; | |
140 | ||
141 | /* search string; start from last character to find possibly matching tails */ | |
142 | if (len == 0) | |
143 | return 0; | |
144 | str->in_count++; | |
145 | str->in_len += len; | |
146 | ||
147 | node = str->root; | |
148 | c = s[len-1]; | |
149 | for (depth = 0; depth <= len; depth++) { | |
150 | struct strbuf_child_entry search; | |
151 | ||
152 | /* match against current node */ | |
153 | off = node->value_off + node->value_len - len; | |
154 | if (depth == len || (node->value_len >= len && memcmp(str->buf + off, s, len) == 0)) { | |
155 | str->dedup_len += len; | |
156 | str->dedup_count++; | |
157 | return off; | |
158 | } | |
159 | ||
160 | /* lookup child node */ | |
161 | c = s[len - 1 - depth]; | |
162 | search.c = c; | |
163 | child = bsearch(&search, node->children, node->children_count, | |
164 | sizeof(struct strbuf_child_entry), | |
165 | (__compar_fn_t) strbuf_children_cmp); | |
166 | if (!child) | |
167 | break; | |
168 | node = child->child; | |
169 | } | |
170 | ||
171 | /* add new string */ | |
172 | buf_new = realloc(str->buf, str->len + len+1); | |
173 | if (!buf_new) | |
174 | return -ENOMEM; | |
175 | str->buf = buf_new; | |
176 | off = str->len; | |
177 | memcpy(str->buf + off, s, len); | |
178 | str->len += len; | |
179 | str->buf[str->len++] = '\0'; | |
180 | ||
181 | /* new node */ | |
182 | node_child = new0(struct strbuf_node, 1); | |
183 | if (!node_child) | |
184 | return -ENOMEM; | |
185 | node_child->value_off = off; | |
186 | node_child->value_len = len; | |
187 | ||
188 | /* extend array, add new entry, sort for bisection */ | |
189 | child = realloc(node->children, (node->children_count + 1) * sizeof(struct strbuf_child_entry)); | |
190 | if (!child) { | |
191 | free(node_child); | |
192 | return -ENOMEM; | |
193 | } | |
194 | ||
195 | str->nodes_count++; | |
196 | ||
197 | node->children = child; | |
198 | bubbleinsert(node, c, node_child); | |
199 | ||
200 | return off; | |
201 | } |