]>
Commit | Line | Data |
---|---|---|
eae0bfdc PP |
1 | /* |
2 | * Copyright (C) the libgit2 contributors. All rights reserved. | |
3 | * | |
4 | * This file is part of libgit2, distributed under the GNU GPL v2 with | |
5 | * a Linking Exception. For full terms see the included COPYING file. | |
6 | */ | |
7 | ||
8 | #include "config_parse.h" | |
9 | ||
eae0bfdc PP |
10 | #include <ctype.h> |
11 | ||
ac3d33df JK |
12 | const char *git_config_escapes = "ntb\"\\"; |
13 | const char *git_config_escaped = "\n\t\b\"\\"; | |
14 | ||
eae0bfdc PP |
15 | static void set_parse_error(git_config_parser *reader, int col, const char *error_str) |
16 | { | |
22a2d3d5 UG |
17 | if (col) |
18 | git_error_set(GIT_ERROR_CONFIG, | |
19 | "failed to parse config file: %s (in %s:%"PRIuZ", column %d)", | |
20 | error_str, reader->path, reader->ctx.line_num, col); | |
21 | else | |
22 | git_error_set(GIT_ERROR_CONFIG, | |
23 | "failed to parse config file: %s (in %s:%"PRIuZ")", | |
24 | error_str, reader->path, reader->ctx.line_num); | |
eae0bfdc PP |
25 | } |
26 | ||
27 | ||
28 | GIT_INLINE(int) config_keychar(int c) | |
29 | { | |
30 | return isalnum(c) || c == '-'; | |
31 | } | |
32 | ||
33 | static int strip_comments(char *line, int in_quotes) | |
34 | { | |
35 | int quote_count = in_quotes, backslash_count = 0; | |
36 | char *ptr; | |
37 | ||
38 | for (ptr = line; *ptr; ++ptr) { | |
e579e0f7 | 39 | if (ptr[0] == '"' && ((ptr > line && ptr[-1] != '\\') || ptr == line)) |
eae0bfdc PP |
40 | quote_count++; |
41 | ||
42 | if ((ptr[0] == ';' || ptr[0] == '#') && | |
43 | (quote_count % 2) == 0 && | |
44 | (backslash_count % 2) == 0) { | |
45 | ptr[0] = '\0'; | |
46 | break; | |
47 | } | |
48 | ||
49 | if (ptr[0] == '\\') | |
50 | backslash_count++; | |
51 | else | |
52 | backslash_count = 0; | |
53 | } | |
54 | ||
55 | /* skip any space at the end */ | |
56 | while (ptr > line && git__isspace(ptr[-1])) { | |
57 | ptr--; | |
58 | } | |
59 | ptr[0] = '\0'; | |
60 | ||
61 | return quote_count; | |
62 | } | |
63 | ||
64 | ||
22a2d3d5 | 65 | static int parse_subsection_header(git_config_parser *reader, const char *line, size_t pos, const char *base_name, char **section_name) |
eae0bfdc PP |
66 | { |
67 | int c, rpos; | |
22a2d3d5 | 68 | const char *first_quote, *last_quote; |
ac3d33df | 69 | const char *line_start = line; |
e579e0f7 | 70 | git_str buf = GIT_STR_INIT; |
eae0bfdc PP |
71 | size_t quoted_len, alloc_len, base_name_len = strlen(base_name); |
72 | ||
22a2d3d5 UG |
73 | /* Skip any additional whitespace before our section name */ |
74 | while (git__isspace(line[pos])) | |
75 | pos++; | |
eae0bfdc | 76 | |
22a2d3d5 UG |
77 | /* We should be at the first quotation mark. */ |
78 | if (line[pos] != '"') { | |
79 | set_parse_error(reader, 0, "missing quotation marks in section header"); | |
eae0bfdc PP |
80 | goto end_error; |
81 | } | |
82 | ||
22a2d3d5 | 83 | first_quote = &line[pos]; |
eae0bfdc PP |
84 | last_quote = strrchr(line, '"'); |
85 | quoted_len = last_quote - first_quote; | |
86 | ||
22a2d3d5 UG |
87 | if ((last_quote - line) > INT_MAX) { |
88 | set_parse_error(reader, 0, "invalid section header, line too long"); | |
89 | goto end_error; | |
90 | } | |
91 | ||
eae0bfdc | 92 | if (quoted_len == 0) { |
22a2d3d5 | 93 | set_parse_error(reader, 0, "missing closing quotation mark in section header"); |
eae0bfdc PP |
94 | goto end_error; |
95 | } | |
96 | ||
ac3d33df JK |
97 | GIT_ERROR_CHECK_ALLOC_ADD(&alloc_len, base_name_len, quoted_len); |
98 | GIT_ERROR_CHECK_ALLOC_ADD(&alloc_len, alloc_len, 2); | |
eae0bfdc | 99 | |
e579e0f7 MB |
100 | if (git_str_grow(&buf, alloc_len) < 0 || |
101 | git_str_printf(&buf, "%s.", base_name) < 0) | |
eae0bfdc PP |
102 | goto end_error; |
103 | ||
104 | rpos = 0; | |
105 | ||
106 | line = first_quote; | |
107 | c = line[++rpos]; | |
108 | ||
109 | /* | |
110 | * At the end of each iteration, whatever is stored in c will be | |
111 | * added to the string. In case of error, jump to out | |
112 | */ | |
113 | do { | |
114 | ||
115 | switch (c) { | |
116 | case 0: | |
22a2d3d5 | 117 | set_parse_error(reader, 0, "unexpected end-of-line in section header"); |
eae0bfdc PP |
118 | goto end_error; |
119 | ||
120 | case '"': | |
121 | goto end_parse; | |
122 | ||
123 | case '\\': | |
124 | c = line[++rpos]; | |
125 | ||
126 | if (c == 0) { | |
22a2d3d5 | 127 | set_parse_error(reader, rpos, "unexpected end-of-line in section header"); |
eae0bfdc PP |
128 | goto end_error; |
129 | } | |
130 | ||
131 | default: | |
132 | break; | |
133 | } | |
134 | ||
e579e0f7 | 135 | git_str_putc(&buf, (char)c); |
eae0bfdc PP |
136 | c = line[++rpos]; |
137 | } while (line + rpos < last_quote); | |
138 | ||
139 | end_parse: | |
e579e0f7 | 140 | if (git_str_oom(&buf)) |
eae0bfdc PP |
141 | goto end_error; |
142 | ||
143 | if (line[rpos] != '"' || line[rpos + 1] != ']') { | |
22a2d3d5 | 144 | set_parse_error(reader, rpos, "unexpected text after closing quotes"); |
e579e0f7 | 145 | git_str_dispose(&buf); |
eae0bfdc PP |
146 | return -1; |
147 | } | |
148 | ||
e579e0f7 | 149 | *section_name = git_str_detach(&buf); |
22a2d3d5 | 150 | return (int)(&line[rpos + 2] - line_start); /* rpos is at the closing quote */ |
eae0bfdc PP |
151 | |
152 | end_error: | |
e579e0f7 | 153 | git_str_dispose(&buf); |
eae0bfdc PP |
154 | |
155 | return -1; | |
156 | } | |
157 | ||
158 | static int parse_section_header(git_config_parser *reader, char **section_out) | |
159 | { | |
160 | char *name, *name_end; | |
161 | int name_length, c, pos; | |
162 | int result; | |
163 | char *line; | |
164 | size_t line_len; | |
165 | ||
166 | git_parse_advance_ws(&reader->ctx); | |
167 | line = git__strndup(reader->ctx.line, reader->ctx.line_len); | |
168 | if (line == NULL) | |
169 | return -1; | |
170 | ||
171 | /* find the end of the variable's name */ | |
172 | name_end = strrchr(line, ']'); | |
173 | if (name_end == NULL) { | |
174 | git__free(line); | |
22a2d3d5 | 175 | set_parse_error(reader, 0, "missing ']' in section header"); |
eae0bfdc PP |
176 | return -1; |
177 | } | |
178 | ||
ac3d33df | 179 | GIT_ERROR_CHECK_ALLOC_ADD(&line_len, (size_t)(name_end - line), 1); |
eae0bfdc | 180 | name = git__malloc(line_len); |
ac3d33df | 181 | GIT_ERROR_CHECK_ALLOC(name); |
eae0bfdc PP |
182 | |
183 | name_length = 0; | |
184 | pos = 0; | |
185 | ||
186 | /* Make sure we were given a section header */ | |
187 | c = line[pos++]; | |
c25aa7cd | 188 | GIT_ASSERT(c == '['); |
eae0bfdc PP |
189 | |
190 | c = line[pos++]; | |
191 | ||
192 | do { | |
193 | if (git__isspace(c)){ | |
194 | name[name_length] = '\0'; | |
22a2d3d5 | 195 | result = parse_subsection_header(reader, line, pos, name, section_out); |
eae0bfdc PP |
196 | git__free(line); |
197 | git__free(name); | |
198 | return result; | |
199 | } | |
200 | ||
201 | if (!config_keychar(c) && c != '.') { | |
22a2d3d5 | 202 | set_parse_error(reader, pos, "unexpected character in header"); |
eae0bfdc PP |
203 | goto fail_parse; |
204 | } | |
205 | ||
206 | name[name_length++] = (char)git__tolower(c); | |
207 | ||
208 | } while ((c = line[pos++]) != ']'); | |
209 | ||
210 | if (line[pos - 1] != ']') { | |
22a2d3d5 | 211 | set_parse_error(reader, pos, "unexpected end of file"); |
eae0bfdc PP |
212 | goto fail_parse; |
213 | } | |
214 | ||
215 | git__free(line); | |
216 | ||
217 | name[name_length] = 0; | |
218 | *section_out = name; | |
219 | ||
ac3d33df | 220 | return pos; |
eae0bfdc PP |
221 | |
222 | fail_parse: | |
223 | git__free(line); | |
224 | git__free(name); | |
225 | return -1; | |
226 | } | |
227 | ||
228 | static int skip_bom(git_parse_ctx *parser) | |
229 | { | |
e579e0f7 MB |
230 | git_str buf = GIT_STR_INIT_CONST(parser->content, parser->content_len); |
231 | git_str_bom_t bom; | |
232 | int bom_offset = git_str_detect_bom(&bom, &buf); | |
eae0bfdc | 233 | |
e579e0f7 | 234 | if (bom == GIT_STR_BOM_UTF8) |
eae0bfdc PP |
235 | git_parse_advance_chars(parser, bom_offset); |
236 | ||
237 | /* TODO: reference implementation is pretty stupid with BoM */ | |
238 | ||
239 | return 0; | |
240 | } | |
241 | ||
242 | /* | |
243 | (* basic types *) | |
244 | digit = "0".."9" | |
245 | integer = digit { digit } | |
246 | alphabet = "a".."z" + "A" .. "Z" | |
247 | ||
248 | section_char = alphabet | "." | "-" | |
249 | extension_char = (* any character except newline *) | |
250 | any_char = (* any character *) | |
251 | variable_char = "alphabet" | "-" | |
252 | ||
253 | ||
254 | (* actual grammar *) | |
255 | config = { section } | |
256 | ||
257 | section = header { definition } | |
258 | ||
259 | header = "[" section [subsection | subsection_ext] "]" | |
260 | ||
261 | subsection = "." section | |
262 | subsection_ext = "\"" extension "\"" | |
263 | ||
264 | section = section_char { section_char } | |
265 | extension = extension_char { extension_char } | |
266 | ||
267 | definition = variable_name ["=" variable_value] "\n" | |
268 | ||
269 | variable_name = variable_char { variable_char } | |
270 | variable_value = string | boolean | integer | |
271 | ||
272 | string = quoted_string | plain_string | |
273 | quoted_string = "\"" plain_string "\"" | |
274 | plain_string = { any_char } | |
275 | ||
276 | boolean = boolean_true | boolean_false | |
277 | boolean_true = "yes" | "1" | "true" | "on" | |
278 | boolean_false = "no" | "0" | "false" | "off" | |
279 | */ | |
280 | ||
281 | /* '\"' -> '"' etc */ | |
282 | static int unescape_line( | |
283 | char **out, bool *is_multi, const char *ptr, int quote_count) | |
284 | { | |
285 | char *str, *fixed, *esc; | |
286 | size_t ptr_len = strlen(ptr), alloc_len; | |
287 | ||
288 | *is_multi = false; | |
289 | ||
290 | if (GIT_ADD_SIZET_OVERFLOW(&alloc_len, ptr_len, 1) || | |
291 | (str = git__malloc(alloc_len)) == NULL) { | |
292 | return -1; | |
293 | } | |
294 | ||
295 | fixed = str; | |
296 | ||
297 | while (*ptr != '\0') { | |
298 | if (*ptr == '"') { | |
299 | quote_count++; | |
300 | } else if (*ptr != '\\') { | |
301 | *fixed++ = *ptr; | |
302 | } else { | |
303 | /* backslash, check the next char */ | |
304 | ptr++; | |
305 | /* if we're at the end, it's a multiline, so keep the backslash */ | |
306 | if (*ptr == '\0') { | |
307 | *is_multi = true; | |
308 | goto done; | |
309 | } | |
310 | if ((esc = strchr(git_config_escapes, *ptr)) != NULL) { | |
311 | *fixed++ = git_config_escaped[esc - git_config_escapes]; | |
312 | } else { | |
313 | git__free(str); | |
ac3d33df | 314 | git_error_set(GIT_ERROR_CONFIG, "invalid escape at %s", ptr); |
eae0bfdc PP |
315 | return -1; |
316 | } | |
317 | } | |
318 | ptr++; | |
319 | } | |
320 | ||
321 | done: | |
322 | *fixed = '\0'; | |
323 | *out = str; | |
324 | ||
325 | return 0; | |
326 | } | |
327 | ||
e579e0f7 | 328 | static int parse_multiline_variable(git_config_parser *reader, git_str *value, int in_quotes, size_t *line_len) |
eae0bfdc | 329 | { |
eae0bfdc | 330 | int quote_count; |
6c7cee42 RD |
331 | bool multiline = true; |
332 | ||
333 | while (multiline) { | |
334 | char *line = NULL, *proc_line = NULL; | |
335 | int error; | |
336 | ||
337 | /* Check that the next line exists */ | |
338 | git_parse_advance_line(&reader->ctx); | |
339 | line = git__strndup(reader->ctx.line, reader->ctx.line_len); | |
ac3d33df | 340 | GIT_ERROR_CHECK_ALLOC(line); |
e579e0f7 MB |
341 | if (GIT_ADD_SIZET_OVERFLOW(line_len, *line_len, reader->ctx.line_len)) { |
342 | error = -1; | |
343 | goto out; | |
344 | } | |
6c7cee42 RD |
345 | |
346 | /* | |
347 | * We've reached the end of the file, there is no continuation. | |
348 | * (this is not an error). | |
349 | */ | |
350 | if (line[0] == '\0') { | |
351 | error = 0; | |
352 | goto out; | |
353 | } | |
eae0bfdc | 354 | |
6c7cee42 | 355 | /* If it was just a comment, pretend it didn't exist */ |
c25aa7cd | 356 | quote_count = strip_comments(line, in_quotes); |
6c7cee42 RD |
357 | if (line[0] == '\0') |
358 | goto next; | |
eae0bfdc | 359 | |
6c7cee42 RD |
360 | if ((error = unescape_line(&proc_line, &multiline, |
361 | line, in_quotes)) < 0) | |
362 | goto out; | |
eae0bfdc | 363 | |
6c7cee42 | 364 | /* Add this line to the multiline var */ |
e579e0f7 | 365 | if ((error = git_str_puts(value, proc_line)) < 0) |
6c7cee42 | 366 | goto out; |
eae0bfdc | 367 | |
6c7cee42 | 368 | next: |
eae0bfdc | 369 | git__free(line); |
6c7cee42 RD |
370 | git__free(proc_line); |
371 | in_quotes = quote_count; | |
372 | continue; | |
eae0bfdc | 373 | |
6c7cee42 | 374 | out: |
eae0bfdc | 375 | git__free(line); |
6c7cee42 RD |
376 | git__free(proc_line); |
377 | return error; | |
eae0bfdc | 378 | } |
eae0bfdc PP |
379 | |
380 | return 0; | |
381 | } | |
382 | ||
383 | GIT_INLINE(bool) is_namechar(char c) | |
384 | { | |
385 | return isalnum(c) || c == '-'; | |
386 | } | |
387 | ||
388 | static int parse_name( | |
389 | char **name, const char **value, git_config_parser *reader, const char *line) | |
390 | { | |
391 | const char *name_end = line, *value_start; | |
392 | ||
393 | *name = NULL; | |
394 | *value = NULL; | |
395 | ||
396 | while (*name_end && is_namechar(*name_end)) | |
397 | name_end++; | |
398 | ||
399 | if (line == name_end) { | |
22a2d3d5 | 400 | set_parse_error(reader, 0, "invalid configuration key"); |
eae0bfdc PP |
401 | return -1; |
402 | } | |
403 | ||
404 | value_start = name_end; | |
405 | ||
406 | while (*value_start && git__isspace(*value_start)) | |
407 | value_start++; | |
408 | ||
409 | if (*value_start == '=') { | |
410 | *value = value_start + 1; | |
411 | } else if (*value_start) { | |
22a2d3d5 | 412 | set_parse_error(reader, 0, "invalid configuration key"); |
eae0bfdc PP |
413 | return -1; |
414 | } | |
415 | ||
416 | if ((*name = git__strndup(line, name_end - line)) == NULL) | |
417 | return -1; | |
418 | ||
419 | return 0; | |
420 | } | |
421 | ||
e579e0f7 | 422 | static int parse_variable(git_config_parser *reader, char **var_name, char **var_value, size_t *line_len) |
eae0bfdc PP |
423 | { |
424 | const char *value_start = NULL; | |
ac3d33df JK |
425 | char *line = NULL, *name = NULL, *value = NULL; |
426 | int quote_count, error; | |
eae0bfdc PP |
427 | bool multiline; |
428 | ||
ac3d33df JK |
429 | *var_name = NULL; |
430 | *var_value = NULL; | |
431 | ||
eae0bfdc PP |
432 | git_parse_advance_ws(&reader->ctx); |
433 | line = git__strndup(reader->ctx.line, reader->ctx.line_len); | |
ac3d33df | 434 | GIT_ERROR_CHECK_ALLOC(line); |
eae0bfdc PP |
435 | |
436 | quote_count = strip_comments(line, 0); | |
437 | ||
ac3d33df JK |
438 | if ((error = parse_name(&name, &value_start, reader, line)) < 0) |
439 | goto out; | |
eae0bfdc PP |
440 | |
441 | /* | |
442 | * Now, let's try to parse the value | |
443 | */ | |
444 | if (value_start != NULL) { | |
445 | while (git__isspace(value_start[0])) | |
446 | value_start++; | |
447 | ||
ac3d33df JK |
448 | if ((error = unescape_line(&value, &multiline, value_start, 0)) < 0) |
449 | goto out; | |
eae0bfdc PP |
450 | |
451 | if (multiline) { | |
e579e0f7 MB |
452 | git_str multi_value = GIT_STR_INIT; |
453 | git_str_attach(&multi_value, value, 0); | |
ac3d33df | 454 | value = NULL; |
eae0bfdc | 455 | |
e579e0f7 MB |
456 | if (parse_multiline_variable(reader, &multi_value, quote_count % 2, line_len) < 0 || |
457 | git_str_oom(&multi_value)) { | |
ac3d33df | 458 | error = -1; |
e579e0f7 | 459 | git_str_dispose(&multi_value); |
ac3d33df | 460 | goto out; |
eae0bfdc PP |
461 | } |
462 | ||
e579e0f7 | 463 | value = git_str_detach(&multi_value); |
eae0bfdc PP |
464 | } |
465 | } | |
466 | ||
ac3d33df JK |
467 | *var_name = name; |
468 | *var_value = value; | |
469 | name = NULL; | |
470 | value = NULL; | |
eae0bfdc | 471 | |
ac3d33df JK |
472 | out: |
473 | git__free(name); | |
474 | git__free(value); | |
eae0bfdc | 475 | git__free(line); |
ac3d33df | 476 | return error; |
eae0bfdc PP |
477 | } |
478 | ||
22a2d3d5 UG |
479 | int git_config_parser_init(git_config_parser *out, const char *path, const char *data, size_t datalen) |
480 | { | |
481 | out->path = path; | |
482 | return git_parse_ctx_init(&out->ctx, data, datalen); | |
483 | } | |
484 | ||
485 | void git_config_parser_dispose(git_config_parser *parser) | |
486 | { | |
487 | git_parse_ctx_clear(&parser->ctx); | |
488 | } | |
489 | ||
eae0bfdc PP |
490 | int git_config_parse( |
491 | git_config_parser *parser, | |
492 | git_config_parser_section_cb on_section, | |
493 | git_config_parser_variable_cb on_variable, | |
494 | git_config_parser_comment_cb on_comment, | |
495 | git_config_parser_eof_cb on_eof, | |
22a2d3d5 | 496 | void *payload) |
eae0bfdc PP |
497 | { |
498 | git_parse_ctx *ctx; | |
ac3d33df | 499 | char *current_section = NULL, *var_name = NULL, *var_value = NULL; |
eae0bfdc PP |
500 | int result = 0; |
501 | ||
502 | ctx = &parser->ctx; | |
503 | ||
504 | skip_bom(ctx); | |
505 | ||
506 | for (; ctx->remain_len > 0; git_parse_advance_line(ctx)) { | |
ac3d33df JK |
507 | const char *line_start; |
508 | size_t line_len; | |
eae0bfdc PP |
509 | char c; |
510 | ||
ac3d33df JK |
511 | restart: |
512 | line_start = ctx->line; | |
513 | line_len = ctx->line_len; | |
514 | ||
eae0bfdc PP |
515 | /* |
516 | * Get either first non-whitespace character or, if that does | |
517 | * not exist, the first whitespace character. This is required | |
518 | * to preserve whitespaces when writing back the file. | |
519 | */ | |
520 | if (git_parse_peek(&c, ctx, GIT_PARSE_PEEK_SKIP_WHITESPACE) < 0 && | |
521 | git_parse_peek(&c, ctx, 0) < 0) | |
522 | continue; | |
523 | ||
524 | switch (c) { | |
525 | case '[': /* section header, new section begins */ | |
526 | git__free(current_section); | |
527 | current_section = NULL; | |
528 | ||
ac3d33df JK |
529 | result = parse_section_header(parser, ¤t_section); |
530 | if (result < 0) | |
531 | break; | |
532 | ||
533 | git_parse_advance_chars(ctx, result); | |
534 | ||
535 | if (on_section) | |
22a2d3d5 | 536 | result = on_section(parser, current_section, line_start, line_len, payload); |
ac3d33df JK |
537 | /* |
538 | * After we've parsed the section header we may not be | |
539 | * done with the line. If there's still data in there, | |
540 | * run the next loop with the rest of the current line | |
541 | * instead of moving forward. | |
542 | */ | |
543 | ||
544 | if (!git_parse_peek(&c, ctx, GIT_PARSE_PEEK_SKIP_WHITESPACE)) | |
545 | goto restart; | |
546 | ||
eae0bfdc PP |
547 | break; |
548 | ||
549 | case '\n': /* comment or whitespace-only */ | |
550 | case '\r': | |
551 | case ' ': | |
552 | case '\t': | |
553 | case ';': | |
554 | case '#': | |
555 | if (on_comment) { | |
22a2d3d5 | 556 | result = on_comment(parser, line_start, line_len, payload); |
eae0bfdc PP |
557 | } |
558 | break; | |
559 | ||
560 | default: /* assume variable declaration */ | |
e579e0f7 | 561 | if ((result = parse_variable(parser, &var_name, &var_value, &line_len)) == 0 && on_variable) { |
22a2d3d5 | 562 | result = on_variable(parser, current_section, var_name, var_value, line_start, line_len, payload); |
ac3d33df JK |
563 | git__free(var_name); |
564 | git__free(var_value); | |
eae0bfdc | 565 | } |
ac3d33df | 566 | |
eae0bfdc PP |
567 | break; |
568 | } | |
569 | ||
570 | if (result < 0) | |
571 | goto out; | |
572 | } | |
573 | ||
574 | if (on_eof) | |
22a2d3d5 | 575 | result = on_eof(parser, current_section, payload); |
eae0bfdc PP |
576 | |
577 | out: | |
578 | git__free(current_section); | |
579 | return result; | |
580 | } |