]> git.proxmox.com Git - libgit2.git/blob - src/config_parse.c
ed2c87f6bcf5d74604c41f5e7ea2fdeac994e0eb
[libgit2.git] / src / config_parse.c
1 /*
2 * Copyright (C) the libgit2 contributors. All rights reserved.
3 *
4 * This file is part of libgit2, distributed under the GNU GPL v2 with
5 * a Linking Exception. For full terms see the included COPYING file.
6 */
7
8 #include "config_parse.h"
9
10 #include "buf_text.h"
11
12 #include <ctype.h>
13
14 const char *git_config_escapes = "ntb\"\\";
15 const char *git_config_escaped = "\n\t\b\"\\";
16
17 static void set_parse_error(git_config_parser *reader, int col, const char *error_str)
18 {
19 if (col)
20 git_error_set(GIT_ERROR_CONFIG,
21 "failed to parse config file: %s (in %s:%"PRIuZ", column %d)",
22 error_str, reader->path, reader->ctx.line_num, col);
23 else
24 git_error_set(GIT_ERROR_CONFIG,
25 "failed to parse config file: %s (in %s:%"PRIuZ")",
26 error_str, reader->path, reader->ctx.line_num);
27 }
28
29
30 GIT_INLINE(int) config_keychar(int c)
31 {
32 return isalnum(c) || c == '-';
33 }
34
35 static int strip_comments(char *line, int in_quotes)
36 {
37 int quote_count = in_quotes, backslash_count = 0;
38 char *ptr;
39
40 for (ptr = line; *ptr; ++ptr) {
41 if (ptr[0] == '"' && ptr > line && ptr[-1] != '\\')
42 quote_count++;
43
44 if ((ptr[0] == ';' || ptr[0] == '#') &&
45 (quote_count % 2) == 0 &&
46 (backslash_count % 2) == 0) {
47 ptr[0] = '\0';
48 break;
49 }
50
51 if (ptr[0] == '\\')
52 backslash_count++;
53 else
54 backslash_count = 0;
55 }
56
57 /* skip any space at the end */
58 while (ptr > line && git__isspace(ptr[-1])) {
59 ptr--;
60 }
61 ptr[0] = '\0';
62
63 return quote_count;
64 }
65
66
67 static int parse_subsection_header(git_config_parser *reader, const char *line, size_t pos, const char *base_name, char **section_name)
68 {
69 int c, rpos;
70 const char *first_quote, *last_quote;
71 const char *line_start = line;
72 git_buf buf = GIT_BUF_INIT;
73 size_t quoted_len, alloc_len, base_name_len = strlen(base_name);
74
75 /* Skip any additional whitespace before our section name */
76 while (git__isspace(line[pos]))
77 pos++;
78
79 /* We should be at the first quotation mark. */
80 if (line[pos] != '"') {
81 set_parse_error(reader, 0, "missing quotation marks in section header");
82 goto end_error;
83 }
84
85 first_quote = &line[pos];
86 last_quote = strrchr(line, '"');
87 quoted_len = last_quote - first_quote;
88
89 if ((last_quote - line) > INT_MAX) {
90 set_parse_error(reader, 0, "invalid section header, line too long");
91 goto end_error;
92 }
93
94 if (quoted_len == 0) {
95 set_parse_error(reader, 0, "missing closing quotation mark in section header");
96 goto end_error;
97 }
98
99 GIT_ERROR_CHECK_ALLOC_ADD(&alloc_len, base_name_len, quoted_len);
100 GIT_ERROR_CHECK_ALLOC_ADD(&alloc_len, alloc_len, 2);
101
102 if (git_buf_grow(&buf, alloc_len) < 0 ||
103 git_buf_printf(&buf, "%s.", base_name) < 0)
104 goto end_error;
105
106 rpos = 0;
107
108 line = first_quote;
109 c = line[++rpos];
110
111 /*
112 * At the end of each iteration, whatever is stored in c will be
113 * added to the string. In case of error, jump to out
114 */
115 do {
116
117 switch (c) {
118 case 0:
119 set_parse_error(reader, 0, "unexpected end-of-line in section header");
120 goto end_error;
121
122 case '"':
123 goto end_parse;
124
125 case '\\':
126 c = line[++rpos];
127
128 if (c == 0) {
129 set_parse_error(reader, rpos, "unexpected end-of-line in section header");
130 goto end_error;
131 }
132
133 default:
134 break;
135 }
136
137 git_buf_putc(&buf, (char)c);
138 c = line[++rpos];
139 } while (line + rpos < last_quote);
140
141 end_parse:
142 if (git_buf_oom(&buf))
143 goto end_error;
144
145 if (line[rpos] != '"' || line[rpos + 1] != ']') {
146 set_parse_error(reader, rpos, "unexpected text after closing quotes");
147 git_buf_dispose(&buf);
148 return -1;
149 }
150
151 *section_name = git_buf_detach(&buf);
152 return (int)(&line[rpos + 2] - line_start); /* rpos is at the closing quote */
153
154 end_error:
155 git_buf_dispose(&buf);
156
157 return -1;
158 }
159
160 static int parse_section_header(git_config_parser *reader, char **section_out)
161 {
162 char *name, *name_end;
163 int name_length, c, pos;
164 int result;
165 char *line;
166 size_t line_len;
167
168 git_parse_advance_ws(&reader->ctx);
169 line = git__strndup(reader->ctx.line, reader->ctx.line_len);
170 if (line == NULL)
171 return -1;
172
173 /* find the end of the variable's name */
174 name_end = strrchr(line, ']');
175 if (name_end == NULL) {
176 git__free(line);
177 set_parse_error(reader, 0, "missing ']' in section header");
178 return -1;
179 }
180
181 GIT_ERROR_CHECK_ALLOC_ADD(&line_len, (size_t)(name_end - line), 1);
182 name = git__malloc(line_len);
183 GIT_ERROR_CHECK_ALLOC(name);
184
185 name_length = 0;
186 pos = 0;
187
188 /* Make sure we were given a section header */
189 c = line[pos++];
190 assert(c == '[');
191
192 c = line[pos++];
193
194 do {
195 if (git__isspace(c)){
196 name[name_length] = '\0';
197 result = parse_subsection_header(reader, line, pos, name, section_out);
198 git__free(line);
199 git__free(name);
200 return result;
201 }
202
203 if (!config_keychar(c) && c != '.') {
204 set_parse_error(reader, pos, "unexpected character in header");
205 goto fail_parse;
206 }
207
208 name[name_length++] = (char)git__tolower(c);
209
210 } while ((c = line[pos++]) != ']');
211
212 if (line[pos - 1] != ']') {
213 set_parse_error(reader, pos, "unexpected end of file");
214 goto fail_parse;
215 }
216
217 git__free(line);
218
219 name[name_length] = 0;
220 *section_out = name;
221
222 return pos;
223
224 fail_parse:
225 git__free(line);
226 git__free(name);
227 return -1;
228 }
229
230 static int skip_bom(git_parse_ctx *parser)
231 {
232 git_buf buf = GIT_BUF_INIT_CONST(parser->content, parser->content_len);
233 git_bom_t bom;
234 int bom_offset = git_buf_text_detect_bom(&bom, &buf);
235
236 if (bom == GIT_BOM_UTF8)
237 git_parse_advance_chars(parser, bom_offset);
238
239 /* TODO: reference implementation is pretty stupid with BoM */
240
241 return 0;
242 }
243
244 /*
245 (* basic types *)
246 digit = "0".."9"
247 integer = digit { digit }
248 alphabet = "a".."z" + "A" .. "Z"
249
250 section_char = alphabet | "." | "-"
251 extension_char = (* any character except newline *)
252 any_char = (* any character *)
253 variable_char = "alphabet" | "-"
254
255
256 (* actual grammar *)
257 config = { section }
258
259 section = header { definition }
260
261 header = "[" section [subsection | subsection_ext] "]"
262
263 subsection = "." section
264 subsection_ext = "\"" extension "\""
265
266 section = section_char { section_char }
267 extension = extension_char { extension_char }
268
269 definition = variable_name ["=" variable_value] "\n"
270
271 variable_name = variable_char { variable_char }
272 variable_value = string | boolean | integer
273
274 string = quoted_string | plain_string
275 quoted_string = "\"" plain_string "\""
276 plain_string = { any_char }
277
278 boolean = boolean_true | boolean_false
279 boolean_true = "yes" | "1" | "true" | "on"
280 boolean_false = "no" | "0" | "false" | "off"
281 */
282
283 /* '\"' -> '"' etc */
284 static int unescape_line(
285 char **out, bool *is_multi, const char *ptr, int quote_count)
286 {
287 char *str, *fixed, *esc;
288 size_t ptr_len = strlen(ptr), alloc_len;
289
290 *is_multi = false;
291
292 if (GIT_ADD_SIZET_OVERFLOW(&alloc_len, ptr_len, 1) ||
293 (str = git__malloc(alloc_len)) == NULL) {
294 return -1;
295 }
296
297 fixed = str;
298
299 while (*ptr != '\0') {
300 if (*ptr == '"') {
301 quote_count++;
302 } else if (*ptr != '\\') {
303 *fixed++ = *ptr;
304 } else {
305 /* backslash, check the next char */
306 ptr++;
307 /* if we're at the end, it's a multiline, so keep the backslash */
308 if (*ptr == '\0') {
309 *is_multi = true;
310 goto done;
311 }
312 if ((esc = strchr(git_config_escapes, *ptr)) != NULL) {
313 *fixed++ = git_config_escaped[esc - git_config_escapes];
314 } else {
315 git__free(str);
316 git_error_set(GIT_ERROR_CONFIG, "invalid escape at %s", ptr);
317 return -1;
318 }
319 }
320 ptr++;
321 }
322
323 done:
324 *fixed = '\0';
325 *out = str;
326
327 return 0;
328 }
329
330 static int parse_multiline_variable(git_config_parser *reader, git_buf *value, int in_quotes)
331 {
332 int quote_count;
333 bool multiline = true;
334
335 while (multiline) {
336 char *line = NULL, *proc_line = NULL;
337 int error;
338
339 /* Check that the next line exists */
340 git_parse_advance_line(&reader->ctx);
341 line = git__strndup(reader->ctx.line, reader->ctx.line_len);
342 GIT_ERROR_CHECK_ALLOC(line);
343
344 /*
345 * We've reached the end of the file, there is no continuation.
346 * (this is not an error).
347 */
348 if (line[0] == '\0') {
349 error = 0;
350 goto out;
351 }
352
353 /* If it was just a comment, pretend it didn't exist */
354 quote_count = strip_comments(line, !!in_quotes);
355 if (line[0] == '\0')
356 goto next;
357
358 if ((error = unescape_line(&proc_line, &multiline,
359 line, in_quotes)) < 0)
360 goto out;
361
362 /* Add this line to the multiline var */
363 if ((error = git_buf_puts(value, proc_line)) < 0)
364 goto out;
365
366 next:
367 git__free(line);
368 git__free(proc_line);
369 in_quotes = quote_count;
370 continue;
371
372 out:
373 git__free(line);
374 git__free(proc_line);
375 return error;
376 }
377
378 return 0;
379 }
380
381 GIT_INLINE(bool) is_namechar(char c)
382 {
383 return isalnum(c) || c == '-';
384 }
385
386 static int parse_name(
387 char **name, const char **value, git_config_parser *reader, const char *line)
388 {
389 const char *name_end = line, *value_start;
390
391 *name = NULL;
392 *value = NULL;
393
394 while (*name_end && is_namechar(*name_end))
395 name_end++;
396
397 if (line == name_end) {
398 set_parse_error(reader, 0, "invalid configuration key");
399 return -1;
400 }
401
402 value_start = name_end;
403
404 while (*value_start && git__isspace(*value_start))
405 value_start++;
406
407 if (*value_start == '=') {
408 *value = value_start + 1;
409 } else if (*value_start) {
410 set_parse_error(reader, 0, "invalid configuration key");
411 return -1;
412 }
413
414 if ((*name = git__strndup(line, name_end - line)) == NULL)
415 return -1;
416
417 return 0;
418 }
419
420 static int parse_variable(git_config_parser *reader, char **var_name, char **var_value)
421 {
422 const char *value_start = NULL;
423 char *line = NULL, *name = NULL, *value = NULL;
424 int quote_count, error;
425 bool multiline;
426
427 *var_name = NULL;
428 *var_value = NULL;
429
430 git_parse_advance_ws(&reader->ctx);
431 line = git__strndup(reader->ctx.line, reader->ctx.line_len);
432 GIT_ERROR_CHECK_ALLOC(line);
433
434 quote_count = strip_comments(line, 0);
435
436 if ((error = parse_name(&name, &value_start, reader, line)) < 0)
437 goto out;
438
439 /*
440 * Now, let's try to parse the value
441 */
442 if (value_start != NULL) {
443 while (git__isspace(value_start[0]))
444 value_start++;
445
446 if ((error = unescape_line(&value, &multiline, value_start, 0)) < 0)
447 goto out;
448
449 if (multiline) {
450 git_buf multi_value = GIT_BUF_INIT;
451 git_buf_attach(&multi_value, value, 0);
452 value = NULL;
453
454 if (parse_multiline_variable(reader, &multi_value, quote_count % 2) < 0 ||
455 git_buf_oom(&multi_value)) {
456 error = -1;
457 git_buf_dispose(&multi_value);
458 goto out;
459 }
460
461 value = git_buf_detach(&multi_value);
462 }
463 }
464
465 *var_name = name;
466 *var_value = value;
467 name = NULL;
468 value = NULL;
469
470 out:
471 git__free(name);
472 git__free(value);
473 git__free(line);
474 return error;
475 }
476
477 int git_config_parser_init(git_config_parser *out, const char *path, const char *data, size_t datalen)
478 {
479 out->path = path;
480 return git_parse_ctx_init(&out->ctx, data, datalen);
481 }
482
483 void git_config_parser_dispose(git_config_parser *parser)
484 {
485 git_parse_ctx_clear(&parser->ctx);
486 }
487
488 int git_config_parse(
489 git_config_parser *parser,
490 git_config_parser_section_cb on_section,
491 git_config_parser_variable_cb on_variable,
492 git_config_parser_comment_cb on_comment,
493 git_config_parser_eof_cb on_eof,
494 void *payload)
495 {
496 git_parse_ctx *ctx;
497 char *current_section = NULL, *var_name = NULL, *var_value = NULL;
498 int result = 0;
499
500 ctx = &parser->ctx;
501
502 skip_bom(ctx);
503
504 for (; ctx->remain_len > 0; git_parse_advance_line(ctx)) {
505 const char *line_start;
506 size_t line_len;
507 char c;
508
509 restart:
510 line_start = ctx->line;
511 line_len = ctx->line_len;
512
513 /*
514 * Get either first non-whitespace character or, if that does
515 * not exist, the first whitespace character. This is required
516 * to preserve whitespaces when writing back the file.
517 */
518 if (git_parse_peek(&c, ctx, GIT_PARSE_PEEK_SKIP_WHITESPACE) < 0 &&
519 git_parse_peek(&c, ctx, 0) < 0)
520 continue;
521
522 switch (c) {
523 case '[': /* section header, new section begins */
524 git__free(current_section);
525 current_section = NULL;
526
527 result = parse_section_header(parser, &current_section);
528 if (result < 0)
529 break;
530
531 git_parse_advance_chars(ctx, result);
532
533 if (on_section)
534 result = on_section(parser, current_section, line_start, line_len, payload);
535 /*
536 * After we've parsed the section header we may not be
537 * done with the line. If there's still data in there,
538 * run the next loop with the rest of the current line
539 * instead of moving forward.
540 */
541
542 if (!git_parse_peek(&c, ctx, GIT_PARSE_PEEK_SKIP_WHITESPACE))
543 goto restart;
544
545 break;
546
547 case '\n': /* comment or whitespace-only */
548 case '\r':
549 case ' ':
550 case '\t':
551 case ';':
552 case '#':
553 if (on_comment) {
554 result = on_comment(parser, line_start, line_len, payload);
555 }
556 break;
557
558 default: /* assume variable declaration */
559 if ((result = parse_variable(parser, &var_name, &var_value)) == 0 && on_variable) {
560 result = on_variable(parser, current_section, var_name, var_value, line_start, line_len, payload);
561 git__free(var_name);
562 git__free(var_value);
563 }
564
565 break;
566 }
567
568 if (result < 0)
569 goto out;
570 }
571
572 if (on_eof)
573 result = on_eof(parser, current_section, payload);
574
575 out:
576 git__free(current_section);
577 return result;
578 }