]> git.proxmox.com Git - libgit2.git/blob - src/str.c
0d405bfda500f404591b5b05780c2a70c5909269
[libgit2.git] / src / str.c
1 /*
2 * Copyright (C) the libgit2 contributors. All rights reserved.
3 *
4 * This file is part of libgit2, distributed under the GNU GPL v2 with
5 * a Linking Exception. For full terms see the included COPYING file.
6 */
7
8 #include "str.h"
9 #include "posix.h"
10 #include <ctype.h>
11
12 /* Used as default value for git_str->ptr so that people can always
13 * assume ptr is non-NULL and zero terminated even for new git_strs.
14 */
15 char git_str__initstr[1];
16
17 char git_str__oom[1];
18
19 #define ENSURE_SIZE(b, d) \
20 if ((b)->ptr == git_str__oom || \
21 ((d) > (b)->asize && git_str_grow((b), (d)) < 0))\
22 return -1;
23
24
25 int git_str_init(git_str *buf, size_t initial_size)
26 {
27 buf->asize = 0;
28 buf->size = 0;
29 buf->ptr = git_str__initstr;
30
31 ENSURE_SIZE(buf, initial_size);
32
33 return 0;
34 }
35
36 int git_str_try_grow(
37 git_str *buf, size_t target_size, bool mark_oom)
38 {
39 char *new_ptr;
40 size_t new_size;
41
42 if (buf->ptr == git_str__oom)
43 return -1;
44
45 if (buf->asize == 0 && buf->size != 0) {
46 git_error_set(GIT_ERROR_INVALID, "cannot grow a borrowed buffer");
47 return GIT_EINVALID;
48 }
49
50 if (!target_size)
51 target_size = buf->size;
52
53 if (target_size <= buf->asize)
54 return 0;
55
56 if (buf->asize == 0) {
57 new_size = target_size;
58 new_ptr = NULL;
59 } else {
60 new_size = buf->asize;
61 /*
62 * Grow the allocated buffer by 1.5 to allow
63 * re-use of memory holes resulting from the
64 * realloc. If this is still too small, then just
65 * use the target size.
66 */
67 if ((new_size = (new_size << 1) - (new_size >> 1)) < target_size)
68 new_size = target_size;
69 new_ptr = buf->ptr;
70 }
71
72 /* round allocation up to multiple of 8 */
73 new_size = (new_size + 7) & ~7;
74
75 if (new_size < buf->size) {
76 if (mark_oom) {
77 if (buf->ptr && buf->ptr != git_str__initstr)
78 git__free(buf->ptr);
79 buf->ptr = git_str__oom;
80 }
81
82 git_error_set_oom();
83 return -1;
84 }
85
86 new_ptr = git__realloc(new_ptr, new_size);
87
88 if (!new_ptr) {
89 if (mark_oom) {
90 if (buf->ptr && (buf->ptr != git_str__initstr))
91 git__free(buf->ptr);
92 buf->ptr = git_str__oom;
93 }
94 return -1;
95 }
96
97 buf->asize = new_size;
98 buf->ptr = new_ptr;
99
100 /* truncate the existing buffer size if necessary */
101 if (buf->size >= buf->asize)
102 buf->size = buf->asize - 1;
103 buf->ptr[buf->size] = '\0';
104
105 return 0;
106 }
107
108 int git_str_grow(git_str *buffer, size_t target_size)
109 {
110 return git_str_try_grow(buffer, target_size, true);
111 }
112
113 int git_str_grow_by(git_str *buffer, size_t additional_size)
114 {
115 size_t newsize;
116
117 if (GIT_ADD_SIZET_OVERFLOW(&newsize, buffer->size, additional_size)) {
118 buffer->ptr = git_str__oom;
119 return -1;
120 }
121
122 return git_str_try_grow(buffer, newsize, true);
123 }
124
125 void git_str_dispose(git_str *buf)
126 {
127 if (!buf) return;
128
129 if (buf->asize > 0 && buf->ptr != NULL && buf->ptr != git_str__oom)
130 git__free(buf->ptr);
131
132 git_str_init(buf, 0);
133 }
134
135 void git_str_clear(git_str *buf)
136 {
137 buf->size = 0;
138
139 if (!buf->ptr) {
140 buf->ptr = git_str__initstr;
141 buf->asize = 0;
142 }
143
144 if (buf->asize > 0)
145 buf->ptr[0] = '\0';
146 }
147
148 int git_str_set(git_str *buf, const void *data, size_t len)
149 {
150 size_t alloclen;
151
152 if (len == 0 || data == NULL) {
153 git_str_clear(buf);
154 } else {
155 if (data != buf->ptr) {
156 GIT_ERROR_CHECK_ALLOC_ADD(&alloclen, len, 1);
157 ENSURE_SIZE(buf, alloclen);
158 memmove(buf->ptr, data, len);
159 }
160
161 buf->size = len;
162 if (buf->asize > buf->size)
163 buf->ptr[buf->size] = '\0';
164
165 }
166 return 0;
167 }
168
169 int git_str_sets(git_str *buf, const char *string)
170 {
171 return git_str_set(buf, string, string ? strlen(string) : 0);
172 }
173
174 int git_str_putc(git_str *buf, char c)
175 {
176 size_t new_size;
177 GIT_ERROR_CHECK_ALLOC_ADD(&new_size, buf->size, 2);
178 ENSURE_SIZE(buf, new_size);
179 buf->ptr[buf->size++] = c;
180 buf->ptr[buf->size] = '\0';
181 return 0;
182 }
183
184 int git_str_putcn(git_str *buf, char c, size_t len)
185 {
186 size_t new_size;
187 GIT_ERROR_CHECK_ALLOC_ADD(&new_size, buf->size, len);
188 GIT_ERROR_CHECK_ALLOC_ADD(&new_size, new_size, 1);
189 ENSURE_SIZE(buf, new_size);
190 memset(buf->ptr + buf->size, c, len);
191 buf->size += len;
192 buf->ptr[buf->size] = '\0';
193 return 0;
194 }
195
196 int git_str_put(git_str *buf, const char *data, size_t len)
197 {
198 if (len) {
199 size_t new_size;
200
201 GIT_ASSERT_ARG(data);
202
203 GIT_ERROR_CHECK_ALLOC_ADD(&new_size, buf->size, len);
204 GIT_ERROR_CHECK_ALLOC_ADD(&new_size, new_size, 1);
205 ENSURE_SIZE(buf, new_size);
206 memmove(buf->ptr + buf->size, data, len);
207 buf->size += len;
208 buf->ptr[buf->size] = '\0';
209 }
210 return 0;
211 }
212
213 int git_str_puts(git_str *buf, const char *string)
214 {
215 GIT_ASSERT_ARG(string);
216
217 return git_str_put(buf, string, strlen(string));
218 }
219
220 static char hex_encode[] = "0123456789abcdef";
221
222 int git_str_encode_hexstr(git_str *str, const char *data, size_t len)
223 {
224 size_t new_size, i;
225 char *s;
226
227 GIT_ERROR_CHECK_ALLOC_MULTIPLY(&new_size, len, 2);
228 GIT_ERROR_CHECK_ALLOC_ADD(&new_size, new_size, 1);
229
230 if (git_str_grow_by(str, new_size) < 0)
231 return -1;
232
233 s = str->ptr + str->size;
234
235 for (i = 0; i < len; i++) {
236 *s++ = hex_encode[(data[i] & 0xf0) >> 4];
237 *s++ = hex_encode[(data[i] & 0x0f)];
238 }
239
240 str->size += (len * 2);
241 str->ptr[str->size] = '\0';
242
243 return 0;
244 }
245
246 static const char base64_encode[] =
247 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
248
249 int git_str_encode_base64(git_str *buf, const char *data, size_t len)
250 {
251 size_t extra = len % 3;
252 uint8_t *write, a, b, c;
253 const uint8_t *read = (const uint8_t *)data;
254 size_t blocks = (len / 3) + !!extra, alloclen;
255
256 GIT_ERROR_CHECK_ALLOC_ADD(&blocks, blocks, 1);
257 GIT_ERROR_CHECK_ALLOC_MULTIPLY(&alloclen, blocks, 4);
258 GIT_ERROR_CHECK_ALLOC_ADD(&alloclen, alloclen, buf->size);
259
260 ENSURE_SIZE(buf, alloclen);
261 write = (uint8_t *)&buf->ptr[buf->size];
262
263 /* convert each run of 3 bytes into 4 output bytes */
264 for (len -= extra; len > 0; len -= 3) {
265 a = *read++;
266 b = *read++;
267 c = *read++;
268
269 *write++ = base64_encode[a >> 2];
270 *write++ = base64_encode[(a & 0x03) << 4 | b >> 4];
271 *write++ = base64_encode[(b & 0x0f) << 2 | c >> 6];
272 *write++ = base64_encode[c & 0x3f];
273 }
274
275 if (extra > 0) {
276 a = *read++;
277 b = (extra > 1) ? *read++ : 0;
278
279 *write++ = base64_encode[a >> 2];
280 *write++ = base64_encode[(a & 0x03) << 4 | b >> 4];
281 *write++ = (extra > 1) ? base64_encode[(b & 0x0f) << 2] : '=';
282 *write++ = '=';
283 }
284
285 buf->size = ((char *)write) - buf->ptr;
286 buf->ptr[buf->size] = '\0';
287
288 return 0;
289 }
290
291 /* The inverse of base64_encode */
292 static const int8_t base64_decode[] = {
293 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
294 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
295 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, -1, -1, 63,
296 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, 0, -1, -1,
297 -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
298 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1,
299 -1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
300 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -1, -1, -1, -1, -1,
301 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
302 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
303 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
304 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
305 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
306 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
307 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
308 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
309 };
310
311 int git_str_decode_base64(git_str *buf, const char *base64, size_t len)
312 {
313 size_t i;
314 int8_t a, b, c, d;
315 size_t orig_size = buf->size, new_size;
316
317 if (len % 4) {
318 git_error_set(GIT_ERROR_INVALID, "invalid base64 input");
319 return -1;
320 }
321
322 GIT_ASSERT_ARG(len % 4 == 0);
323 GIT_ERROR_CHECK_ALLOC_ADD(&new_size, (len / 4 * 3), buf->size);
324 GIT_ERROR_CHECK_ALLOC_ADD(&new_size, new_size, 1);
325 ENSURE_SIZE(buf, new_size);
326
327 for (i = 0; i < len; i += 4) {
328 if ((a = base64_decode[(unsigned char)base64[i]]) < 0 ||
329 (b = base64_decode[(unsigned char)base64[i+1]]) < 0 ||
330 (c = base64_decode[(unsigned char)base64[i+2]]) < 0 ||
331 (d = base64_decode[(unsigned char)base64[i+3]]) < 0) {
332 buf->size = orig_size;
333 buf->ptr[buf->size] = '\0';
334
335 git_error_set(GIT_ERROR_INVALID, "invalid base64 input");
336 return -1;
337 }
338
339 buf->ptr[buf->size++] = ((a << 2) | (b & 0x30) >> 4);
340 buf->ptr[buf->size++] = ((b & 0x0f) << 4) | ((c & 0x3c) >> 2);
341 buf->ptr[buf->size++] = (c & 0x03) << 6 | (d & 0x3f);
342 }
343
344 buf->ptr[buf->size] = '\0';
345 return 0;
346 }
347
348 static const char base85_encode[] =
349 "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz!#$%&()*+-;<=>?@^_`{|}~";
350
351 int git_str_encode_base85(git_str *buf, const char *data, size_t len)
352 {
353 size_t blocks = (len / 4) + !!(len % 4), alloclen;
354
355 GIT_ERROR_CHECK_ALLOC_MULTIPLY(&alloclen, blocks, 5);
356 GIT_ERROR_CHECK_ALLOC_ADD(&alloclen, alloclen, buf->size);
357 GIT_ERROR_CHECK_ALLOC_ADD(&alloclen, alloclen, 1);
358
359 ENSURE_SIZE(buf, alloclen);
360
361 while (len) {
362 uint32_t acc = 0;
363 char b85[5];
364 int i;
365
366 for (i = 24; i >= 0; i -= 8) {
367 uint8_t ch = *data++;
368 acc |= (uint32_t)ch << i;
369
370 if (--len == 0)
371 break;
372 }
373
374 for (i = 4; i >= 0; i--) {
375 int val = acc % 85;
376 acc /= 85;
377
378 b85[i] = base85_encode[val];
379 }
380
381 for (i = 0; i < 5; i++)
382 buf->ptr[buf->size++] = b85[i];
383 }
384
385 buf->ptr[buf->size] = '\0';
386
387 return 0;
388 }
389
390 /* The inverse of base85_encode */
391 static const int8_t base85_decode[] = {
392 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
393 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
394 -1, 63, -1, 64, 65, 66, 67, -1, 68, 69, 70, 71, -1, 72, -1, -1,
395 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, -1, 73, 74, 75, 76, 77,
396 78, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,
397 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, -1, -1, -1, 79, 80,
398 81, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51,
399 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 82, 83, 84, 85, -1,
400 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
401 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
402 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
403 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
404 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
405 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
406 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
407 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
408 };
409
410 int git_str_decode_base85(
411 git_str *buf,
412 const char *base85,
413 size_t base85_len,
414 size_t output_len)
415 {
416 size_t orig_size = buf->size, new_size;
417
418 if (base85_len % 5 ||
419 output_len > base85_len * 4 / 5) {
420 git_error_set(GIT_ERROR_INVALID, "invalid base85 input");
421 return -1;
422 }
423
424 GIT_ERROR_CHECK_ALLOC_ADD(&new_size, output_len, buf->size);
425 GIT_ERROR_CHECK_ALLOC_ADD(&new_size, new_size, 1);
426 ENSURE_SIZE(buf, new_size);
427
428 while (output_len) {
429 unsigned acc = 0;
430 int de, cnt = 4;
431 unsigned char ch;
432 do {
433 ch = *base85++;
434 de = base85_decode[ch];
435 if (--de < 0)
436 goto on_error;
437
438 acc = acc * 85 + de;
439 } while (--cnt);
440 ch = *base85++;
441 de = base85_decode[ch];
442 if (--de < 0)
443 goto on_error;
444
445 /* Detect overflow. */
446 if (0xffffffff / 85 < acc ||
447 0xffffffff - de < (acc *= 85))
448 goto on_error;
449
450 acc += de;
451
452 cnt = (output_len < 4) ? (int)output_len : 4;
453 output_len -= cnt;
454 do {
455 acc = (acc << 8) | (acc >> 24);
456 buf->ptr[buf->size++] = acc;
457 } while (--cnt);
458 }
459
460 buf->ptr[buf->size] = 0;
461
462 return 0;
463
464 on_error:
465 buf->size = orig_size;
466 buf->ptr[buf->size] = '\0';
467
468 git_error_set(GIT_ERROR_INVALID, "invalid base85 input");
469 return -1;
470 }
471
472 #define HEX_DECODE(c) ((c | 32) % 39 - 9)
473
474 int git_str_decode_percent(
475 git_str *buf,
476 const char *str,
477 size_t str_len)
478 {
479 size_t str_pos, new_size;
480
481 GIT_ERROR_CHECK_ALLOC_ADD(&new_size, buf->size, str_len);
482 GIT_ERROR_CHECK_ALLOC_ADD(&new_size, new_size, 1);
483 ENSURE_SIZE(buf, new_size);
484
485 for (str_pos = 0; str_pos < str_len; buf->size++, str_pos++) {
486 if (str[str_pos] == '%' &&
487 str_len > str_pos + 2 &&
488 isxdigit(str[str_pos + 1]) &&
489 isxdigit(str[str_pos + 2])) {
490 buf->ptr[buf->size] = (HEX_DECODE(str[str_pos + 1]) << 4) +
491 HEX_DECODE(str[str_pos + 2]);
492 str_pos += 2;
493 } else {
494 buf->ptr[buf->size] = str[str_pos];
495 }
496 }
497
498 buf->ptr[buf->size] = '\0';
499 return 0;
500 }
501
502 int git_str_vprintf(git_str *buf, const char *format, va_list ap)
503 {
504 size_t expected_size, new_size;
505 int len;
506
507 GIT_ERROR_CHECK_ALLOC_MULTIPLY(&expected_size, strlen(format), 2);
508 GIT_ERROR_CHECK_ALLOC_ADD(&expected_size, expected_size, buf->size);
509 ENSURE_SIZE(buf, expected_size);
510
511 while (1) {
512 va_list args;
513 va_copy(args, ap);
514
515 len = p_vsnprintf(
516 buf->ptr + buf->size,
517 buf->asize - buf->size,
518 format, args
519 );
520
521 va_end(args);
522
523 if (len < 0) {
524 git__free(buf->ptr);
525 buf->ptr = git_str__oom;
526 return -1;
527 }
528
529 if ((size_t)len + 1 <= buf->asize - buf->size) {
530 buf->size += len;
531 break;
532 }
533
534 GIT_ERROR_CHECK_ALLOC_ADD(&new_size, buf->size, len);
535 GIT_ERROR_CHECK_ALLOC_ADD(&new_size, new_size, 1);
536 ENSURE_SIZE(buf, new_size);
537 }
538
539 return 0;
540 }
541
542 int git_str_printf(git_str *buf, const char *format, ...)
543 {
544 int r;
545 va_list ap;
546
547 va_start(ap, format);
548 r = git_str_vprintf(buf, format, ap);
549 va_end(ap);
550
551 return r;
552 }
553
554 int git_str_copy_cstr(char *data, size_t datasize, const git_str *buf)
555 {
556 size_t copylen;
557
558 GIT_ASSERT_ARG(data);
559 GIT_ASSERT_ARG(datasize);
560 GIT_ASSERT_ARG(buf);
561
562 data[0] = '\0';
563
564 if (buf->size == 0 || buf->asize <= 0)
565 return 0;
566
567 copylen = buf->size;
568 if (copylen > datasize - 1)
569 copylen = datasize - 1;
570 memmove(data, buf->ptr, copylen);
571 data[copylen] = '\0';
572
573 return 0;
574 }
575
576 void git_str_consume_bytes(git_str *buf, size_t len)
577 {
578 git_str_consume(buf, buf->ptr + len);
579 }
580
581 void git_str_consume(git_str *buf, const char *end)
582 {
583 if (end > buf->ptr && end <= buf->ptr + buf->size) {
584 size_t consumed = end - buf->ptr;
585 memmove(buf->ptr, end, buf->size - consumed);
586 buf->size -= consumed;
587 buf->ptr[buf->size] = '\0';
588 }
589 }
590
591 void git_str_truncate(git_str *buf, size_t len)
592 {
593 if (len >= buf->size)
594 return;
595
596 buf->size = len;
597 if (buf->size < buf->asize)
598 buf->ptr[buf->size] = '\0';
599 }
600
601 void git_str_shorten(git_str *buf, size_t amount)
602 {
603 if (buf->size > amount)
604 git_str_truncate(buf, buf->size - amount);
605 else
606 git_str_clear(buf);
607 }
608
609 void git_str_truncate_at_char(git_str *buf, char separator)
610 {
611 ssize_t idx = git_str_find(buf, separator);
612 if (idx >= 0)
613 git_str_truncate(buf, (size_t)idx);
614 }
615
616 void git_str_rtruncate_at_char(git_str *buf, char separator)
617 {
618 ssize_t idx = git_str_rfind_next(buf, separator);
619 git_str_truncate(buf, idx < 0 ? 0 : (size_t)idx);
620 }
621
622 void git_str_swap(git_str *str_a, git_str *str_b)
623 {
624 git_str t = *str_a;
625 *str_a = *str_b;
626 *str_b = t;
627 }
628
629 char *git_str_detach(git_str *buf)
630 {
631 char *data = buf->ptr;
632
633 if (buf->asize == 0 || buf->ptr == git_str__oom)
634 return NULL;
635
636 git_str_init(buf, 0);
637
638 return data;
639 }
640
641 int git_str_attach(git_str *buf, char *ptr, size_t asize)
642 {
643 git_str_dispose(buf);
644
645 if (ptr) {
646 buf->ptr = ptr;
647 buf->size = strlen(ptr);
648 if (asize)
649 buf->asize = (asize < buf->size) ? buf->size + 1 : asize;
650 else /* pass 0 to fall back on strlen + 1 */
651 buf->asize = buf->size + 1;
652 }
653
654 ENSURE_SIZE(buf, asize);
655 return 0;
656 }
657
658 void git_str_attach_notowned(git_str *buf, const char *ptr, size_t size)
659 {
660 if (git_str_is_allocated(buf))
661 git_str_dispose(buf);
662
663 if (!size) {
664 git_str_init(buf, 0);
665 } else {
666 buf->ptr = (char *)ptr;
667 buf->asize = 0;
668 buf->size = size;
669 }
670 }
671
672 int git_str_join_n(git_str *buf, char separator, int nbuf, ...)
673 {
674 va_list ap;
675 int i;
676 size_t total_size = 0, original_size = buf->size;
677 char *out, *original = buf->ptr;
678
679 if (buf->size > 0 && buf->ptr[buf->size - 1] != separator)
680 ++total_size; /* space for initial separator */
681
682 /* Make two passes to avoid multiple reallocation */
683
684 va_start(ap, nbuf);
685 for (i = 0; i < nbuf; ++i) {
686 const char *segment;
687 size_t segment_len;
688
689 segment = va_arg(ap, const char *);
690 if (!segment)
691 continue;
692
693 segment_len = strlen(segment);
694
695 GIT_ERROR_CHECK_ALLOC_ADD(&total_size, total_size, segment_len);
696
697 if (segment_len == 0 || segment[segment_len - 1] != separator)
698 GIT_ERROR_CHECK_ALLOC_ADD(&total_size, total_size, 1);
699 }
700 va_end(ap);
701
702 /* expand buffer if needed */
703 if (total_size == 0)
704 return 0;
705
706 GIT_ERROR_CHECK_ALLOC_ADD(&total_size, total_size, 1);
707 if (git_str_grow_by(buf, total_size) < 0)
708 return -1;
709
710 out = buf->ptr + buf->size;
711
712 /* append separator to existing buf if needed */
713 if (buf->size > 0 && out[-1] != separator)
714 *out++ = separator;
715
716 va_start(ap, nbuf);
717 for (i = 0; i < nbuf; ++i) {
718 const char *segment;
719 size_t segment_len;
720
721 segment = va_arg(ap, const char *);
722 if (!segment)
723 continue;
724
725 /* deal with join that references buffer's original content */
726 if (segment >= original && segment < original + original_size) {
727 size_t offset = (segment - original);
728 segment = buf->ptr + offset;
729 segment_len = original_size - offset;
730 } else {
731 segment_len = strlen(segment);
732 }
733
734 /* skip leading separators */
735 if (out > buf->ptr && out[-1] == separator)
736 while (segment_len > 0 && *segment == separator) {
737 segment++;
738 segment_len--;
739 }
740
741 /* copy over next buffer */
742 if (segment_len > 0) {
743 memmove(out, segment, segment_len);
744 out += segment_len;
745 }
746
747 /* append trailing separator (except for last item) */
748 if (i < nbuf - 1 && out > buf->ptr && out[-1] != separator)
749 *out++ = separator;
750 }
751 va_end(ap);
752
753 /* set size based on num characters actually written */
754 buf->size = out - buf->ptr;
755 buf->ptr[buf->size] = '\0';
756
757 return 0;
758 }
759
760 int git_str_join(
761 git_str *buf,
762 char separator,
763 const char *str_a,
764 const char *str_b)
765 {
766 size_t strlen_a = str_a ? strlen(str_a) : 0;
767 size_t strlen_b = strlen(str_b);
768 size_t alloc_len;
769 int need_sep = 0;
770 ssize_t offset_a = -1;
771
772 /* not safe to have str_b point internally to the buffer */
773 if (buf->size)
774 GIT_ASSERT_ARG(str_b < buf->ptr || str_b >= buf->ptr + buf->size);
775
776 /* figure out if we need to insert a separator */
777 if (separator && strlen_a) {
778 while (*str_b == separator) { str_b++; strlen_b--; }
779 if (str_a[strlen_a - 1] != separator)
780 need_sep = 1;
781 }
782
783 /* str_a could be part of the buffer */
784 if (buf->size && str_a >= buf->ptr && str_a < buf->ptr + buf->size)
785 offset_a = str_a - buf->ptr;
786
787 GIT_ERROR_CHECK_ALLOC_ADD(&alloc_len, strlen_a, strlen_b);
788 GIT_ERROR_CHECK_ALLOC_ADD(&alloc_len, alloc_len, need_sep);
789 GIT_ERROR_CHECK_ALLOC_ADD(&alloc_len, alloc_len, 1);
790 ENSURE_SIZE(buf, alloc_len);
791
792 /* fix up internal pointers */
793 if (offset_a >= 0)
794 str_a = buf->ptr + offset_a;
795
796 /* do the actual copying */
797 if (offset_a != 0 && str_a)
798 memmove(buf->ptr, str_a, strlen_a);
799 if (need_sep)
800 buf->ptr[strlen_a] = separator;
801 memcpy(buf->ptr + strlen_a + need_sep, str_b, strlen_b);
802
803 buf->size = strlen_a + strlen_b + need_sep;
804 buf->ptr[buf->size] = '\0';
805
806 return 0;
807 }
808
809 int git_str_join3(
810 git_str *buf,
811 char separator,
812 const char *str_a,
813 const char *str_b,
814 const char *str_c)
815 {
816 size_t len_a = strlen(str_a),
817 len_b = strlen(str_b),
818 len_c = strlen(str_c),
819 len_total;
820 int sep_a = 0, sep_b = 0;
821 char *tgt;
822
823 /* for this function, disallow pointers into the existing buffer */
824 GIT_ASSERT(str_a < buf->ptr || str_a >= buf->ptr + buf->size);
825 GIT_ASSERT(str_b < buf->ptr || str_b >= buf->ptr + buf->size);
826 GIT_ASSERT(str_c < buf->ptr || str_c >= buf->ptr + buf->size);
827
828 if (separator) {
829 if (len_a > 0) {
830 while (*str_b == separator) { str_b++; len_b--; }
831 sep_a = (str_a[len_a - 1] != separator);
832 }
833 if (len_a > 0 || len_b > 0)
834 while (*str_c == separator) { str_c++; len_c--; }
835 if (len_b > 0)
836 sep_b = (str_b[len_b - 1] != separator);
837 }
838
839 GIT_ERROR_CHECK_ALLOC_ADD(&len_total, len_a, sep_a);
840 GIT_ERROR_CHECK_ALLOC_ADD(&len_total, len_total, len_b);
841 GIT_ERROR_CHECK_ALLOC_ADD(&len_total, len_total, sep_b);
842 GIT_ERROR_CHECK_ALLOC_ADD(&len_total, len_total, len_c);
843 GIT_ERROR_CHECK_ALLOC_ADD(&len_total, len_total, 1);
844 ENSURE_SIZE(buf, len_total);
845
846 tgt = buf->ptr;
847
848 if (len_a) {
849 memcpy(tgt, str_a, len_a);
850 tgt += len_a;
851 }
852 if (sep_a)
853 *tgt++ = separator;
854 if (len_b) {
855 memcpy(tgt, str_b, len_b);
856 tgt += len_b;
857 }
858 if (sep_b)
859 *tgt++ = separator;
860 if (len_c)
861 memcpy(tgt, str_c, len_c);
862
863 buf->size = len_a + sep_a + len_b + sep_b + len_c;
864 buf->ptr[buf->size] = '\0';
865
866 return 0;
867 }
868
869 void git_str_rtrim(git_str *buf)
870 {
871 while (buf->size > 0) {
872 if (!git__isspace(buf->ptr[buf->size - 1]))
873 break;
874
875 buf->size--;
876 }
877
878 if (buf->asize > buf->size)
879 buf->ptr[buf->size] = '\0';
880 }
881
882 int git_str_cmp(const git_str *a, const git_str *b)
883 {
884 int result = memcmp(a->ptr, b->ptr, min(a->size, b->size));
885 return (result != 0) ? result :
886 (a->size < b->size) ? -1 : (a->size > b->size) ? 1 : 0;
887 }
888
889 int git_str_splice(
890 git_str *buf,
891 size_t where,
892 size_t nb_to_remove,
893 const char *data,
894 size_t nb_to_insert)
895 {
896 char *splice_loc;
897 size_t new_size, alloc_size;
898
899 GIT_ASSERT(buf);
900 GIT_ASSERT(where <= buf->size);
901 GIT_ASSERT(nb_to_remove <= buf->size - where);
902
903 splice_loc = buf->ptr + where;
904
905 /* Ported from git.git
906 * https://github.com/git/git/blob/16eed7c/strbuf.c#L159-176
907 */
908 GIT_ERROR_CHECK_ALLOC_ADD(&new_size, (buf->size - nb_to_remove), nb_to_insert);
909 GIT_ERROR_CHECK_ALLOC_ADD(&alloc_size, new_size, 1);
910 ENSURE_SIZE(buf, alloc_size);
911
912 memmove(splice_loc + nb_to_insert,
913 splice_loc + nb_to_remove,
914 buf->size - where - nb_to_remove);
915
916 memcpy(splice_loc, data, nb_to_insert);
917
918 buf->size = new_size;
919 buf->ptr[buf->size] = '\0';
920 return 0;
921 }
922
923 /* Quote per http://marc.info/?l=git&m=112927316408690&w=2 */
924 int git_str_quote(git_str *buf)
925 {
926 const char whitespace[] = { 'a', 'b', 't', 'n', 'v', 'f', 'r' };
927 git_str quoted = GIT_STR_INIT;
928 size_t i = 0;
929 bool quote = false;
930 int error = 0;
931
932 /* walk to the first char that needs quoting */
933 if (buf->size && buf->ptr[0] == '!')
934 quote = true;
935
936 for (i = 0; !quote && i < buf->size; i++) {
937 if (buf->ptr[i] == '"' || buf->ptr[i] == '\\' ||
938 buf->ptr[i] < ' ' || buf->ptr[i] > '~') {
939 quote = true;
940 break;
941 }
942 }
943
944 if (!quote)
945 goto done;
946
947 git_str_putc(&quoted, '"');
948 git_str_put(&quoted, buf->ptr, i);
949
950 for (; i < buf->size; i++) {
951 /* whitespace - use the map above, which is ordered by ascii value */
952 if (buf->ptr[i] >= '\a' && buf->ptr[i] <= '\r') {
953 git_str_putc(&quoted, '\\');
954 git_str_putc(&quoted, whitespace[buf->ptr[i] - '\a']);
955 }
956
957 /* double quote and backslash must be escaped */
958 else if (buf->ptr[i] == '"' || buf->ptr[i] == '\\') {
959 git_str_putc(&quoted, '\\');
960 git_str_putc(&quoted, buf->ptr[i]);
961 }
962
963 /* escape anything unprintable as octal */
964 else if (buf->ptr[i] != ' ' &&
965 (buf->ptr[i] < '!' || buf->ptr[i] > '~')) {
966 git_str_printf(&quoted, "\\%03o", (unsigned char)buf->ptr[i]);
967 }
968
969 /* yay, printable! */
970 else {
971 git_str_putc(&quoted, buf->ptr[i]);
972 }
973 }
974
975 git_str_putc(&quoted, '"');
976
977 if (git_str_oom(&quoted)) {
978 error = -1;
979 goto done;
980 }
981
982 git_str_swap(&quoted, buf);
983
984 done:
985 git_str_dispose(&quoted);
986 return error;
987 }
988
989 /* Unquote per http://marc.info/?l=git&m=112927316408690&w=2 */
990 int git_str_unquote(git_str *buf)
991 {
992 size_t i, j;
993 char ch;
994
995 git_str_rtrim(buf);
996
997 if (buf->size < 2 || buf->ptr[0] != '"' || buf->ptr[buf->size-1] != '"')
998 goto invalid;
999
1000 for (i = 0, j = 1; j < buf->size-1; i++, j++) {
1001 ch = buf->ptr[j];
1002
1003 if (ch == '\\') {
1004 if (j == buf->size-2)
1005 goto invalid;
1006
1007 ch = buf->ptr[++j];
1008
1009 switch (ch) {
1010 /* \" or \\ simply copy the char in */
1011 case '"': case '\\':
1012 break;
1013
1014 /* add the appropriate escaped char */
1015 case 'a': ch = '\a'; break;
1016 case 'b': ch = '\b'; break;
1017 case 'f': ch = '\f'; break;
1018 case 'n': ch = '\n'; break;
1019 case 'r': ch = '\r'; break;
1020 case 't': ch = '\t'; break;
1021 case 'v': ch = '\v'; break;
1022
1023 /* \xyz digits convert to the char*/
1024 case '0': case '1': case '2': case '3':
1025 if (j == buf->size-3) {
1026 git_error_set(GIT_ERROR_INVALID,
1027 "truncated quoted character \\%c", ch);
1028 return -1;
1029 }
1030
1031 if (buf->ptr[j+1] < '0' || buf->ptr[j+1] > '7' ||
1032 buf->ptr[j+2] < '0' || buf->ptr[j+2] > '7') {
1033 git_error_set(GIT_ERROR_INVALID,
1034 "truncated quoted character \\%c%c%c",
1035 buf->ptr[j], buf->ptr[j+1], buf->ptr[j+2]);
1036 return -1;
1037 }
1038
1039 ch = ((buf->ptr[j] - '0') << 6) |
1040 ((buf->ptr[j+1] - '0') << 3) |
1041 (buf->ptr[j+2] - '0');
1042 j += 2;
1043 break;
1044
1045 default:
1046 git_error_set(GIT_ERROR_INVALID, "invalid quoted character \\%c", ch);
1047 return -1;
1048 }
1049 }
1050
1051 buf->ptr[i] = ch;
1052 }
1053
1054 buf->ptr[i] = '\0';
1055 buf->size = i;
1056
1057 return 0;
1058
1059 invalid:
1060 git_error_set(GIT_ERROR_INVALID, "invalid quoted line");
1061 return -1;
1062 }
1063
1064 int git_str_puts_escaped(
1065 git_str *buf,
1066 const char *string,
1067 const char *esc_chars,
1068 const char *esc_with)
1069 {
1070 const char *scan;
1071 size_t total = 0, esc_len = strlen(esc_with), count, alloclen;
1072
1073 if (!string)
1074 return 0;
1075
1076 for (scan = string; *scan; ) {
1077 /* count run of non-escaped characters */
1078 count = strcspn(scan, esc_chars);
1079 total += count;
1080 scan += count;
1081 /* count run of escaped characters */
1082 count = strspn(scan, esc_chars);
1083 total += count * (esc_len + 1);
1084 scan += count;
1085 }
1086
1087 GIT_ERROR_CHECK_ALLOC_ADD(&alloclen, total, 1);
1088 if (git_str_grow_by(buf, alloclen) < 0)
1089 return -1;
1090
1091 for (scan = string; *scan; ) {
1092 count = strcspn(scan, esc_chars);
1093
1094 memmove(buf->ptr + buf->size, scan, count);
1095 scan += count;
1096 buf->size += count;
1097
1098 for (count = strspn(scan, esc_chars); count > 0; --count) {
1099 /* copy escape sequence */
1100 memmove(buf->ptr + buf->size, esc_with, esc_len);
1101 buf->size += esc_len;
1102 /* copy character to be escaped */
1103 buf->ptr[buf->size] = *scan;
1104 buf->size++;
1105 scan++;
1106 }
1107 }
1108
1109 buf->ptr[buf->size] = '\0';
1110
1111 return 0;
1112 }
1113
1114 void git_str_unescape(git_str *buf)
1115 {
1116 buf->size = git__unescape(buf->ptr);
1117 }
1118
1119 int git_str_crlf_to_lf(git_str *tgt, const git_str *src)
1120 {
1121 const char *scan = src->ptr;
1122 const char *scan_end = src->ptr + src->size;
1123 const char *next = memchr(scan, '\r', src->size);
1124 size_t new_size;
1125 char *out;
1126
1127 GIT_ASSERT(tgt != src);
1128
1129 if (!next)
1130 return git_str_set(tgt, src->ptr, src->size);
1131
1132 /* reduce reallocs while in the loop */
1133 GIT_ERROR_CHECK_ALLOC_ADD(&new_size, src->size, 1);
1134 if (git_str_grow(tgt, new_size) < 0)
1135 return -1;
1136
1137 out = tgt->ptr;
1138 tgt->size = 0;
1139
1140 /* Find the next \r and copy whole chunk up to there to tgt */
1141 for (; next; scan = next + 1, next = memchr(scan, '\r', scan_end - scan)) {
1142 if (next > scan) {
1143 size_t copylen = (size_t)(next - scan);
1144 memcpy(out, scan, copylen);
1145 out += copylen;
1146 }
1147
1148 /* Do not drop \r unless it is followed by \n */
1149 if (next + 1 == scan_end || next[1] != '\n')
1150 *out++ = '\r';
1151 }
1152
1153 /* Copy remaining input into dest */
1154 if (scan < scan_end) {
1155 size_t remaining = (size_t)(scan_end - scan);
1156 memcpy(out, scan, remaining);
1157 out += remaining;
1158 }
1159
1160 tgt->size = (size_t)(out - tgt->ptr);
1161 tgt->ptr[tgt->size] = '\0';
1162
1163 return 0;
1164 }
1165
1166 int git_str_lf_to_crlf(git_str *tgt, const git_str *src)
1167 {
1168 const char *start = src->ptr;
1169 const char *end = start + src->size;
1170 const char *scan = start;
1171 const char *next = memchr(scan, '\n', src->size);
1172 size_t alloclen;
1173
1174 GIT_ASSERT(tgt != src);
1175
1176 if (!next)
1177 return git_str_set(tgt, src->ptr, src->size);
1178
1179 /* attempt to reduce reallocs while in the loop */
1180 GIT_ERROR_CHECK_ALLOC_ADD(&alloclen, src->size, src->size >> 4);
1181 GIT_ERROR_CHECK_ALLOC_ADD(&alloclen, alloclen, 1);
1182 if (git_str_grow(tgt, alloclen) < 0)
1183 return -1;
1184 tgt->size = 0;
1185
1186 for (; next; scan = next + 1, next = memchr(scan, '\n', end - scan)) {
1187 size_t copylen = next - scan;
1188
1189 /* if we find mixed line endings, carry on */
1190 if (copylen && next[-1] == '\r')
1191 copylen--;
1192
1193 GIT_ERROR_CHECK_ALLOC_ADD(&alloclen, copylen, 3);
1194 if (git_str_grow_by(tgt, alloclen) < 0)
1195 return -1;
1196
1197 if (copylen) {
1198 memcpy(tgt->ptr + tgt->size, scan, copylen);
1199 tgt->size += copylen;
1200 }
1201
1202 tgt->ptr[tgt->size++] = '\r';
1203 tgt->ptr[tgt->size++] = '\n';
1204 }
1205
1206 tgt->ptr[tgt->size] = '\0';
1207 return git_str_put(tgt, scan, end - scan);
1208 }
1209
1210 int git_str_common_prefix(git_str *buf, char *const *const strings, size_t count)
1211 {
1212 size_t i;
1213 const char *str, *pfx;
1214
1215 git_str_clear(buf);
1216
1217 if (!strings || !count)
1218 return 0;
1219
1220 /* initialize common prefix to first string */
1221 if (git_str_sets(buf, strings[0]) < 0)
1222 return -1;
1223
1224 /* go through the rest of the strings, truncating to shared prefix */
1225 for (i = 1; i < count; ++i) {
1226
1227 for (str = strings[i], pfx = buf->ptr;
1228 *str && *str == *pfx;
1229 str++, pfx++)
1230 /* scanning */;
1231
1232 git_str_truncate(buf, pfx - buf->ptr);
1233
1234 if (!buf->size)
1235 break;
1236 }
1237
1238 return 0;
1239 }
1240
1241 int git_str_is_binary(const git_str *buf)
1242 {
1243 const char *scan = buf->ptr, *end = buf->ptr + buf->size;
1244 git_str_bom_t bom;
1245 int printable = 0, nonprintable = 0;
1246
1247 scan += git_str_detect_bom(&bom, buf);
1248
1249 if (bom > GIT_STR_BOM_UTF8)
1250 return 1;
1251
1252 while (scan < end) {
1253 unsigned char c = *scan++;
1254
1255 /* Printable characters are those above SPACE (0x1F) excluding DEL,
1256 * and including BS, ESC and FF.
1257 */
1258 if ((c > 0x1F && c != 127) || c == '\b' || c == '\033' || c == '\014')
1259 printable++;
1260 else if (c == '\0')
1261 return true;
1262 else if (!git__isspace(c))
1263 nonprintable++;
1264 }
1265
1266 return ((printable >> 7) < nonprintable);
1267 }
1268
1269 int git_str_contains_nul(const git_str *buf)
1270 {
1271 return (memchr(buf->ptr, '\0', buf->size) != NULL);
1272 }
1273
1274 int git_str_detect_bom(git_str_bom_t *bom, const git_str *buf)
1275 {
1276 const char *ptr;
1277 size_t len;
1278
1279 *bom = GIT_STR_BOM_NONE;
1280 /* need at least 2 bytes to look for any BOM */
1281 if (buf->size < 2)
1282 return 0;
1283
1284 ptr = buf->ptr;
1285 len = buf->size;
1286
1287 switch (*ptr++) {
1288 case 0:
1289 if (len >= 4 && ptr[0] == 0 && ptr[1] == '\xFE' && ptr[2] == '\xFF') {
1290 *bom = GIT_STR_BOM_UTF32_BE;
1291 return 4;
1292 }
1293 break;
1294 case '\xEF':
1295 if (len >= 3 && ptr[0] == '\xBB' && ptr[1] == '\xBF') {
1296 *bom = GIT_STR_BOM_UTF8;
1297 return 3;
1298 }
1299 break;
1300 case '\xFE':
1301 if (*ptr == '\xFF') {
1302 *bom = GIT_STR_BOM_UTF16_BE;
1303 return 2;
1304 }
1305 break;
1306 case '\xFF':
1307 if (*ptr != '\xFE')
1308 break;
1309 if (len >= 4 && ptr[1] == 0 && ptr[2] == 0) {
1310 *bom = GIT_STR_BOM_UTF32_LE;
1311 return 4;
1312 } else {
1313 *bom = GIT_STR_BOM_UTF16_LE;
1314 return 2;
1315 }
1316 break;
1317 default:
1318 break;
1319 }
1320
1321 return 0;
1322 }
1323
1324 bool git_str_gather_text_stats(
1325 git_str_text_stats *stats, const git_str *buf, bool skip_bom)
1326 {
1327 const char *scan = buf->ptr, *end = buf->ptr + buf->size;
1328 int skip;
1329
1330 memset(stats, 0, sizeof(*stats));
1331
1332 /* BOM detection */
1333 skip = git_str_detect_bom(&stats->bom, buf);
1334 if (skip_bom)
1335 scan += skip;
1336
1337 /* Ignore EOF character */
1338 if (buf->size > 0 && end[-1] == '\032')
1339 end--;
1340
1341 /* Counting loop */
1342 while (scan < end) {
1343 unsigned char c = *scan++;
1344
1345 if (c > 0x1F && c != 0x7F)
1346 stats->printable++;
1347 else switch (c) {
1348 case '\0':
1349 stats->nul++;
1350 stats->nonprintable++;
1351 break;
1352 case '\n':
1353 stats->lf++;
1354 break;
1355 case '\r':
1356 stats->cr++;
1357 if (scan < end && *scan == '\n')
1358 stats->crlf++;
1359 break;
1360 case '\t': case '\f': case '\v': case '\b': case 0x1b: /*ESC*/
1361 stats->printable++;
1362 break;
1363 default:
1364 stats->nonprintable++;
1365 break;
1366 }
1367 }
1368
1369 /* Treat files with a bare CR as binary */
1370 return (stats->cr != stats->crlf || stats->nul > 0 ||
1371 ((stats->printable >> 7) < stats->nonprintable));
1372 }