]> git.proxmox.com Git - libgit2.git/blob - src/buf_text.c
treebuilder: fix memory leaks in `write_with_buffer`
[libgit2.git] / src / buf_text.c
1 /*
2 * Copyright (C) the libgit2 contributors. All rights reserved.
3 *
4 * This file is part of libgit2, distributed under the GNU GPL v2 with
5 * a Linking Exception. For full terms see the included COPYING file.
6 */
7 #include "buf_text.h"
8
9 int git_buf_text_puts_escaped(
10 git_buf *buf,
11 const char *string,
12 const char *esc_chars,
13 const char *esc_with)
14 {
15 const char *scan;
16 size_t total = 0, esc_len = strlen(esc_with), count, alloclen;
17
18 if (!string)
19 return 0;
20
21 for (scan = string; *scan; ) {
22 /* count run of non-escaped characters */
23 count = strcspn(scan, esc_chars);
24 total += count;
25 scan += count;
26 /* count run of escaped characters */
27 count = strspn(scan, esc_chars);
28 total += count * (esc_len + 1);
29 scan += count;
30 }
31
32 GITERR_CHECK_ALLOC_ADD(&alloclen, total, 1);
33 if (git_buf_grow_by(buf, alloclen) < 0)
34 return -1;
35
36 for (scan = string; *scan; ) {
37 count = strcspn(scan, esc_chars);
38
39 memmove(buf->ptr + buf->size, scan, count);
40 scan += count;
41 buf->size += count;
42
43 for (count = strspn(scan, esc_chars); count > 0; --count) {
44 /* copy escape sequence */
45 memmove(buf->ptr + buf->size, esc_with, esc_len);
46 buf->size += esc_len;
47 /* copy character to be escaped */
48 buf->ptr[buf->size] = *scan;
49 buf->size++;
50 scan++;
51 }
52 }
53
54 buf->ptr[buf->size] = '\0';
55
56 return 0;
57 }
58
59 void git_buf_text_unescape(git_buf *buf)
60 {
61 buf->size = git__unescape(buf->ptr);
62 }
63
64 int git_buf_text_crlf_to_lf(git_buf *tgt, const git_buf *src)
65 {
66 const char *scan = src->ptr;
67 const char *scan_end = src->ptr + src->size;
68 const char *next = memchr(scan, '\r', src->size);
69 size_t new_size;
70 char *out;
71
72 assert(tgt != src);
73
74 if (!next)
75 return git_buf_set(tgt, src->ptr, src->size);
76
77 /* reduce reallocs while in the loop */
78 GITERR_CHECK_ALLOC_ADD(&new_size, src->size, 1);
79 if (git_buf_grow(tgt, new_size) < 0)
80 return -1;
81
82 out = tgt->ptr;
83 tgt->size = 0;
84
85 /* Find the next \r and copy whole chunk up to there to tgt */
86 for (; next; scan = next + 1, next = memchr(scan, '\r', scan_end - scan)) {
87 if (next > scan) {
88 size_t copylen = (size_t)(next - scan);
89 memcpy(out, scan, copylen);
90 out += copylen;
91 }
92
93 /* Do not drop \r unless it is followed by \n */
94 if (next + 1 == scan_end || next[1] != '\n')
95 *out++ = '\r';
96 }
97
98 /* Copy remaining input into dest */
99 if (scan < scan_end) {
100 size_t remaining = (size_t)(scan_end - scan);
101 memcpy(out, scan, remaining);
102 out += remaining;
103 }
104
105 tgt->size = (size_t)(out - tgt->ptr);
106 tgt->ptr[tgt->size] = '\0';
107
108 return 0;
109 }
110
111 int git_buf_text_lf_to_crlf(git_buf *tgt, const git_buf *src)
112 {
113 const char *start = src->ptr;
114 const char *end = start + src->size;
115 const char *scan = start;
116 const char *next = memchr(scan, '\n', src->size);
117 size_t alloclen;
118
119 assert(tgt != src);
120
121 if (!next)
122 return git_buf_set(tgt, src->ptr, src->size);
123
124 /* attempt to reduce reallocs while in the loop */
125 GITERR_CHECK_ALLOC_ADD(&alloclen, src->size, src->size >> 4);
126 GITERR_CHECK_ALLOC_ADD(&alloclen, alloclen, 1);
127 if (git_buf_grow(tgt, alloclen) < 0)
128 return -1;
129 tgt->size = 0;
130
131 for (; next; scan = next + 1, next = memchr(scan, '\n', end - scan)) {
132 size_t copylen = next - scan;
133
134 /* if we find mixed line endings, carry on */
135 if (copylen && next[-1] == '\r')
136 copylen--;
137
138 GITERR_CHECK_ALLOC_ADD(&alloclen, copylen, 3);
139 if (git_buf_grow_by(tgt, alloclen) < 0)
140 return -1;
141
142 if (copylen) {
143 memcpy(tgt->ptr + tgt->size, scan, copylen);
144 tgt->size += copylen;
145 }
146
147 tgt->ptr[tgt->size++] = '\r';
148 tgt->ptr[tgt->size++] = '\n';
149 }
150
151 tgt->ptr[tgt->size] = '\0';
152 return git_buf_put(tgt, scan, end - scan);
153 }
154
155 int git_buf_text_common_prefix(git_buf *buf, const git_strarray *strings)
156 {
157 size_t i;
158 const char *str, *pfx;
159
160 git_buf_clear(buf);
161
162 if (!strings || !strings->count)
163 return 0;
164
165 /* initialize common prefix to first string */
166 if (git_buf_sets(buf, strings->strings[0]) < 0)
167 return -1;
168
169 /* go through the rest of the strings, truncating to shared prefix */
170 for (i = 1; i < strings->count; ++i) {
171
172 for (str = strings->strings[i], pfx = buf->ptr;
173 *str && *str == *pfx; str++, pfx++)
174 /* scanning */;
175
176 git_buf_truncate(buf, pfx - buf->ptr);
177
178 if (!buf->size)
179 break;
180 }
181
182 return 0;
183 }
184
185 bool git_buf_text_is_binary(const git_buf *buf)
186 {
187 const char *scan = buf->ptr, *end = buf->ptr + buf->size;
188 git_bom_t bom;
189 int printable = 0, nonprintable = 0;
190
191 scan += git_buf_text_detect_bom(&bom, buf, 0);
192
193 if (bom > GIT_BOM_UTF8)
194 return 1;
195
196 while (scan < end) {
197 unsigned char c = *scan++;
198
199 /* Printable characters are those above SPACE (0x1F) excluding DEL,
200 * and including BS, ESC and FF.
201 */
202 if ((c > 0x1F && c != 127) || c == '\b' || c == '\033' || c == '\014')
203 printable++;
204 else if (c == '\0')
205 return true;
206 else if (!git__isspace(c))
207 nonprintable++;
208 }
209
210 return ((printable >> 7) < nonprintable);
211 }
212
213 bool git_buf_text_contains_nul(const git_buf *buf)
214 {
215 return (memchr(buf->ptr, '\0', buf->size) != NULL);
216 }
217
218 int git_buf_text_detect_bom(git_bom_t *bom, const git_buf *buf, size_t offset)
219 {
220 const char *ptr;
221 size_t len;
222
223 *bom = GIT_BOM_NONE;
224 /* need at least 2 bytes after offset to look for any BOM */
225 if (buf->size < offset + 2)
226 return 0;
227
228 ptr = buf->ptr + offset;
229 len = buf->size - offset;
230
231 switch (*ptr++) {
232 case 0:
233 if (len >= 4 && ptr[0] == 0 && ptr[1] == '\xFE' && ptr[2] == '\xFF') {
234 *bom = GIT_BOM_UTF32_BE;
235 return 4;
236 }
237 break;
238 case '\xEF':
239 if (len >= 3 && ptr[0] == '\xBB' && ptr[1] == '\xBF') {
240 *bom = GIT_BOM_UTF8;
241 return 3;
242 }
243 break;
244 case '\xFE':
245 if (*ptr == '\xFF') {
246 *bom = GIT_BOM_UTF16_BE;
247 return 2;
248 }
249 break;
250 case '\xFF':
251 if (*ptr != '\xFE')
252 break;
253 if (len >= 4 && ptr[1] == 0 && ptr[2] == 0) {
254 *bom = GIT_BOM_UTF32_LE;
255 return 4;
256 } else {
257 *bom = GIT_BOM_UTF16_LE;
258 return 2;
259 }
260 break;
261 default:
262 break;
263 }
264
265 return 0;
266 }
267
268 bool git_buf_text_gather_stats(
269 git_buf_text_stats *stats, const git_buf *buf, bool skip_bom)
270 {
271 const char *scan = buf->ptr, *end = buf->ptr + buf->size;
272 int skip;
273
274 memset(stats, 0, sizeof(*stats));
275
276 /* BOM detection */
277 skip = git_buf_text_detect_bom(&stats->bom, buf, 0);
278 if (skip_bom)
279 scan += skip;
280
281 /* Ignore EOF character */
282 if (buf->size > 0 && end[-1] == '\032')
283 end--;
284
285 /* Counting loop */
286 while (scan < end) {
287 unsigned char c = *scan++;
288
289 if (c > 0x1F && c != 0x7F)
290 stats->printable++;
291 else switch (c) {
292 case '\0':
293 stats->nul++;
294 stats->nonprintable++;
295 break;
296 case '\n':
297 stats->lf++;
298 break;
299 case '\r':
300 stats->cr++;
301 if (scan < end && *scan == '\n')
302 stats->crlf++;
303 break;
304 case '\t': case '\f': case '\v': case '\b': case 0x1b: /*ESC*/
305 stats->printable++;
306 break;
307 default:
308 stats->nonprintable++;
309 break;
310 }
311 }
312
313 return (stats->nul > 0 ||
314 ((stats->printable >> 7) < stats->nonprintable));
315 }