]> git.proxmox.com Git - libgit2.git/blob - src/patch_parse.c
78cd96252f8b3edc19d7a70711db19dc3ec94038
[libgit2.git] / src / patch_parse.c
1 /*
2 * Copyright (C) the libgit2 contributors. All rights reserved.
3 *
4 * This file is part of libgit2, distributed under the GNU GPL v2 with
5 * a Linking Exception. For full terms see the included COPYING file.
6 */
7
8 #include "patch_parse.h"
9
10 #include "git2/patch.h"
11 #include "patch.h"
12 #include "diff_parse.h"
13 #include "fs_path.h"
14
15 typedef struct {
16 git_patch base;
17
18 git_patch_parse_ctx *ctx;
19
20 /* the paths from the `diff --git` header, these will be used if this is not
21 * a rename (and rename paths are specified) or if no `+++`/`---` line specify
22 * the paths.
23 */
24 char *header_old_path, *header_new_path;
25
26 /* renamed paths are precise and are not prefixed */
27 char *rename_old_path, *rename_new_path;
28
29 /* the paths given in `---` and `+++` lines */
30 char *old_path, *new_path;
31
32 /* the prefixes from the old/new paths */
33 char *old_prefix, *new_prefix;
34 } git_patch_parsed;
35
36 static int git_parse_err(const char *fmt, ...) GIT_FORMAT_PRINTF(1, 2);
37 static int git_parse_err(const char *fmt, ...)
38 {
39 va_list ap;
40
41 va_start(ap, fmt);
42 git_error_vset(GIT_ERROR_PATCH, fmt, ap);
43 va_end(ap);
44
45 return -1;
46 }
47
48 static size_t header_path_len(git_patch_parse_ctx *ctx)
49 {
50 bool inquote = 0;
51 bool quoted = git_parse_ctx_contains_s(&ctx->parse_ctx, "\"");
52 size_t len;
53
54 for (len = quoted; len < ctx->parse_ctx.line_len; len++) {
55 if (!quoted && git__isspace(ctx->parse_ctx.line[len]))
56 break;
57 else if (quoted && !inquote && ctx->parse_ctx.line[len] == '"') {
58 len++;
59 break;
60 }
61
62 inquote = (!inquote && ctx->parse_ctx.line[len] == '\\');
63 }
64
65 return len;
66 }
67
68 static int parse_header_path_buf(git_str *path, git_patch_parse_ctx *ctx, size_t path_len)
69 {
70 int error;
71
72 if ((error = git_str_put(path, ctx->parse_ctx.line, path_len)) < 0)
73 return error;
74
75 git_parse_advance_chars(&ctx->parse_ctx, path_len);
76
77 git_str_rtrim(path);
78
79 if (path->size > 0 && path->ptr[0] == '"' &&
80 (error = git_str_unquote(path)) < 0)
81 return error;
82
83 git_fs_path_squash_slashes(path);
84
85 if (!path->size)
86 return git_parse_err("patch contains empty path at line %"PRIuZ,
87 ctx->parse_ctx.line_num);
88
89 return 0;
90 }
91
92 static int parse_header_path(char **out, git_patch_parse_ctx *ctx)
93 {
94 git_str path = GIT_STR_INIT;
95 int error;
96
97 if ((error = parse_header_path_buf(&path, ctx, header_path_len(ctx))) < 0)
98 goto out;
99 *out = git_str_detach(&path);
100
101 out:
102 git_str_dispose(&path);
103 return error;
104 }
105
106 static int parse_header_git_oldpath(
107 git_patch_parsed *patch, git_patch_parse_ctx *ctx)
108 {
109 git_str old_path = GIT_STR_INIT;
110 int error;
111
112 if (patch->old_path) {
113 error = git_parse_err("patch contains duplicate old path at line %"PRIuZ,
114 ctx->parse_ctx.line_num);
115 goto out;
116 }
117
118 if ((error = parse_header_path_buf(&old_path, ctx, ctx->parse_ctx.line_len - 1)) < 0)
119 goto out;
120
121 patch->old_path = git_str_detach(&old_path);
122
123 out:
124 git_str_dispose(&old_path);
125 return error;
126 }
127
128 static int parse_header_git_newpath(
129 git_patch_parsed *patch, git_patch_parse_ctx *ctx)
130 {
131 git_str new_path = GIT_STR_INIT;
132 int error;
133
134 if (patch->new_path) {
135 error = git_parse_err("patch contains duplicate new path at line %"PRIuZ,
136 ctx->parse_ctx.line_num);
137 goto out;
138 }
139
140 if ((error = parse_header_path_buf(&new_path, ctx, ctx->parse_ctx.line_len - 1)) < 0)
141 goto out;
142 patch->new_path = git_str_detach(&new_path);
143
144 out:
145 git_str_dispose(&new_path);
146 return error;
147 }
148
149 static int parse_header_mode(uint16_t *mode, git_patch_parse_ctx *ctx)
150 {
151 int64_t m;
152
153 if ((git_parse_advance_digit(&m, &ctx->parse_ctx, 8)) < 0)
154 return git_parse_err("invalid file mode at line %"PRIuZ, ctx->parse_ctx.line_num);
155
156 if (m > UINT16_MAX)
157 return -1;
158
159 *mode = (uint16_t)m;
160
161 return 0;
162 }
163
164 static int parse_header_oid(
165 git_oid *oid,
166 uint16_t *oid_len,
167 git_patch_parse_ctx *ctx)
168 {
169 size_t len;
170
171 for (len = 0; len < ctx->parse_ctx.line_len && len < GIT_OID_HEXSZ; len++) {
172 if (!git__isxdigit(ctx->parse_ctx.line[len]))
173 break;
174 }
175
176 if (len < GIT_OID_MINPREFIXLEN || len > GIT_OID_HEXSZ ||
177 git_oid_fromstrn(oid, ctx->parse_ctx.line, len) < 0)
178 return git_parse_err("invalid hex formatted object id at line %"PRIuZ,
179 ctx->parse_ctx.line_num);
180
181 git_parse_advance_chars(&ctx->parse_ctx, len);
182
183 *oid_len = (uint16_t)len;
184
185 return 0;
186 }
187
188 static int parse_header_git_index(
189 git_patch_parsed *patch, git_patch_parse_ctx *ctx)
190 {
191 char c;
192
193 if (parse_header_oid(&patch->base.delta->old_file.id,
194 &patch->base.delta->old_file.id_abbrev, ctx) < 0 ||
195 git_parse_advance_expected_str(&ctx->parse_ctx, "..") < 0 ||
196 parse_header_oid(&patch->base.delta->new_file.id,
197 &patch->base.delta->new_file.id_abbrev, ctx) < 0)
198 return -1;
199
200 if (git_parse_peek(&c, &ctx->parse_ctx, 0) == 0 && c == ' ') {
201 uint16_t mode = 0;
202
203 git_parse_advance_chars(&ctx->parse_ctx, 1);
204
205 if (parse_header_mode(&mode, ctx) < 0)
206 return -1;
207
208 if (!patch->base.delta->new_file.mode)
209 patch->base.delta->new_file.mode = mode;
210
211 if (!patch->base.delta->old_file.mode)
212 patch->base.delta->old_file.mode = mode;
213 }
214
215 return 0;
216 }
217
218 static int parse_header_git_oldmode(
219 git_patch_parsed *patch, git_patch_parse_ctx *ctx)
220 {
221 return parse_header_mode(&patch->base.delta->old_file.mode, ctx);
222 }
223
224 static int parse_header_git_newmode(
225 git_patch_parsed *patch, git_patch_parse_ctx *ctx)
226 {
227 return parse_header_mode(&patch->base.delta->new_file.mode, ctx);
228 }
229
230 static int parse_header_git_deletedfilemode(
231 git_patch_parsed *patch,
232 git_patch_parse_ctx *ctx)
233 {
234 git__free((char *)patch->base.delta->new_file.path);
235
236 patch->base.delta->new_file.path = NULL;
237 patch->base.delta->status = GIT_DELTA_DELETED;
238 patch->base.delta->nfiles = 1;
239
240 return parse_header_mode(&patch->base.delta->old_file.mode, ctx);
241 }
242
243 static int parse_header_git_newfilemode(
244 git_patch_parsed *patch,
245 git_patch_parse_ctx *ctx)
246 {
247 git__free((char *)patch->base.delta->old_file.path);
248
249 patch->base.delta->old_file.path = NULL;
250 patch->base.delta->status = GIT_DELTA_ADDED;
251 patch->base.delta->nfiles = 1;
252
253 return parse_header_mode(&patch->base.delta->new_file.mode, ctx);
254 }
255
256 static int parse_header_rename(
257 char **out,
258 git_patch_parse_ctx *ctx)
259 {
260 git_str path = GIT_STR_INIT;
261
262 if (parse_header_path_buf(&path, ctx, header_path_len(ctx)) < 0)
263 return -1;
264
265 /* Note: the `rename from` and `rename to` lines include the literal
266 * filename. They do *not* include the prefix. (Who needs consistency?)
267 */
268 *out = git_str_detach(&path);
269 return 0;
270 }
271
272 static int parse_header_renamefrom(
273 git_patch_parsed *patch, git_patch_parse_ctx *ctx)
274 {
275 patch->base.delta->status = GIT_DELTA_RENAMED;
276 return parse_header_rename(&patch->rename_old_path, ctx);
277 }
278
279 static int parse_header_renameto(
280 git_patch_parsed *patch, git_patch_parse_ctx *ctx)
281 {
282 patch->base.delta->status = GIT_DELTA_RENAMED;
283 return parse_header_rename(&patch->rename_new_path, ctx);
284 }
285
286 static int parse_header_copyfrom(
287 git_patch_parsed *patch, git_patch_parse_ctx *ctx)
288 {
289 patch->base.delta->status = GIT_DELTA_COPIED;
290 return parse_header_rename(&patch->rename_old_path, ctx);
291 }
292
293 static int parse_header_copyto(
294 git_patch_parsed *patch, git_patch_parse_ctx *ctx)
295 {
296 patch->base.delta->status = GIT_DELTA_COPIED;
297 return parse_header_rename(&patch->rename_new_path, ctx);
298 }
299
300 static int parse_header_percent(uint16_t *out, git_patch_parse_ctx *ctx)
301 {
302 int64_t val;
303
304 if (git_parse_advance_digit(&val, &ctx->parse_ctx, 10) < 0)
305 return -1;
306
307 if (git_parse_advance_expected_str(&ctx->parse_ctx, "%") < 0)
308 return -1;
309
310 if (val < 0 || val > 100)
311 return -1;
312
313 *out = (uint16_t)val;
314 return 0;
315 }
316
317 static int parse_header_similarity(
318 git_patch_parsed *patch, git_patch_parse_ctx *ctx)
319 {
320 if (parse_header_percent(&patch->base.delta->similarity, ctx) < 0)
321 return git_parse_err("invalid similarity percentage at line %"PRIuZ,
322 ctx->parse_ctx.line_num);
323
324 return 0;
325 }
326
327 static int parse_header_dissimilarity(
328 git_patch_parsed *patch, git_patch_parse_ctx *ctx)
329 {
330 uint16_t dissimilarity;
331
332 if (parse_header_percent(&dissimilarity, ctx) < 0)
333 return git_parse_err("invalid similarity percentage at line %"PRIuZ,
334 ctx->parse_ctx.line_num);
335
336 patch->base.delta->similarity = 100 - dissimilarity;
337
338 return 0;
339 }
340
341 static int parse_header_start(git_patch_parsed *patch, git_patch_parse_ctx *ctx)
342 {
343 if (parse_header_path(&patch->header_old_path, ctx) < 0)
344 return git_parse_err("corrupt old path in git diff header at line %"PRIuZ,
345 ctx->parse_ctx.line_num);
346
347 if (git_parse_advance_ws(&ctx->parse_ctx) < 0 ||
348 parse_header_path(&patch->header_new_path, ctx) < 0)
349 return git_parse_err("corrupt new path in git diff header at line %"PRIuZ,
350 ctx->parse_ctx.line_num);
351
352 /*
353 * We cannot expect to be able to always parse paths correctly at this
354 * point. Due to the possibility of unquoted names, whitespaces in
355 * filenames and custom prefixes we have to allow that, though, and just
356 * proceed here. We then hope for the "---" and "+++" lines to fix that
357 * for us.
358 */
359 if (!git_parse_ctx_contains(&ctx->parse_ctx, "\n", 1) &&
360 !git_parse_ctx_contains(&ctx->parse_ctx, "\r\n", 2)) {
361 git_parse_advance_chars(&ctx->parse_ctx, ctx->parse_ctx.line_len - 1);
362
363 git__free(patch->header_old_path);
364 patch->header_old_path = NULL;
365 git__free(patch->header_new_path);
366 patch->header_new_path = NULL;
367 }
368
369 return 0;
370 }
371
372 typedef enum {
373 STATE_START,
374
375 STATE_DIFF,
376 STATE_FILEMODE,
377 STATE_MODE,
378 STATE_INDEX,
379 STATE_PATH,
380
381 STATE_SIMILARITY,
382 STATE_RENAME,
383 STATE_COPY,
384
385 STATE_END
386 } parse_header_state;
387
388 typedef struct {
389 const char *str;
390 parse_header_state expected_state;
391 parse_header_state next_state;
392 int(*fn)(git_patch_parsed *, git_patch_parse_ctx *);
393 } parse_header_transition;
394
395 static const parse_header_transition transitions[] = {
396 /* Start */
397 { "diff --git " , STATE_START, STATE_DIFF, parse_header_start },
398
399 { "deleted file mode " , STATE_DIFF, STATE_FILEMODE, parse_header_git_deletedfilemode },
400 { "new file mode " , STATE_DIFF, STATE_FILEMODE, parse_header_git_newfilemode },
401 { "old mode " , STATE_DIFF, STATE_MODE, parse_header_git_oldmode },
402 { "new mode " , STATE_MODE, STATE_END, parse_header_git_newmode },
403
404 { "index " , STATE_FILEMODE, STATE_INDEX, parse_header_git_index },
405 { "index " , STATE_DIFF, STATE_INDEX, parse_header_git_index },
406 { "index " , STATE_END, STATE_INDEX, parse_header_git_index },
407
408 { "--- " , STATE_DIFF, STATE_PATH, parse_header_git_oldpath },
409 { "--- " , STATE_INDEX, STATE_PATH, parse_header_git_oldpath },
410 { "--- " , STATE_FILEMODE, STATE_PATH, parse_header_git_oldpath },
411 { "+++ " , STATE_PATH, STATE_END, parse_header_git_newpath },
412 { "GIT binary patch" , STATE_INDEX, STATE_END, NULL },
413 { "Binary files " , STATE_INDEX, STATE_END, NULL },
414
415 { "similarity index " , STATE_END, STATE_SIMILARITY, parse_header_similarity },
416 { "similarity index " , STATE_DIFF, STATE_SIMILARITY, parse_header_similarity },
417 { "dissimilarity index ", STATE_DIFF, STATE_SIMILARITY, parse_header_dissimilarity },
418 { "rename from " , STATE_SIMILARITY, STATE_RENAME, parse_header_renamefrom },
419 { "rename old " , STATE_SIMILARITY, STATE_RENAME, parse_header_renamefrom },
420 { "copy from " , STATE_SIMILARITY, STATE_COPY, parse_header_copyfrom },
421 { "rename to " , STATE_RENAME, STATE_END, parse_header_renameto },
422 { "rename new " , STATE_RENAME, STATE_END, parse_header_renameto },
423 { "copy to " , STATE_COPY, STATE_END, parse_header_copyto },
424
425 /* Next patch */
426 { "diff --git " , STATE_END, 0, NULL },
427 { "@@ -" , STATE_END, 0, NULL },
428 { "-- " , STATE_INDEX, 0, NULL },
429 { "-- " , STATE_END, 0, NULL },
430 };
431
432 static int parse_header_git(
433 git_patch_parsed *patch,
434 git_patch_parse_ctx *ctx)
435 {
436 size_t i;
437 int error = 0;
438 parse_header_state state = STATE_START;
439
440 /* Parse remaining header lines */
441 for (; ctx->parse_ctx.remain_len > 0; git_parse_advance_line(&ctx->parse_ctx)) {
442 bool found = false;
443
444 if (ctx->parse_ctx.line_len == 0 || ctx->parse_ctx.line[ctx->parse_ctx.line_len - 1] != '\n')
445 break;
446
447 for (i = 0; i < ARRAY_SIZE(transitions); i++) {
448 const parse_header_transition *transition = &transitions[i];
449 size_t op_len = strlen(transition->str);
450
451 if (transition->expected_state != state ||
452 git__prefixcmp(ctx->parse_ctx.line, transition->str) != 0)
453 continue;
454
455 state = transition->next_state;
456
457 /* Do not advance if this is the patch separator */
458 if (transition->fn == NULL)
459 goto done;
460
461 git_parse_advance_chars(&ctx->parse_ctx, op_len);
462
463 if ((error = transition->fn(patch, ctx)) < 0)
464 goto done;
465
466 git_parse_advance_ws(&ctx->parse_ctx);
467
468 if (git_parse_advance_expected_str(&ctx->parse_ctx, "\n") < 0 ||
469 ctx->parse_ctx.line_len > 0) {
470 error = git_parse_err("trailing data at line %"PRIuZ, ctx->parse_ctx.line_num);
471 goto done;
472 }
473
474 found = true;
475 break;
476 }
477
478 if (!found) {
479 error = git_parse_err("invalid patch header at line %"PRIuZ,
480 ctx->parse_ctx.line_num);
481 goto done;
482 }
483 }
484
485 if (state != STATE_END) {
486 error = git_parse_err("unexpected header line %"PRIuZ, ctx->parse_ctx.line_num);
487 goto done;
488 }
489
490 done:
491 return error;
492 }
493
494 static int parse_int(int *out, git_patch_parse_ctx *ctx)
495 {
496 int64_t num;
497
498 if (git_parse_advance_digit(&num, &ctx->parse_ctx, 10) < 0 || !git__is_int(num))
499 return -1;
500
501 *out = (int)num;
502 return 0;
503 }
504
505 static int parse_hunk_header(
506 git_patch_hunk *hunk,
507 git_patch_parse_ctx *ctx)
508 {
509 const char *header_start = ctx->parse_ctx.line;
510 char c;
511
512 hunk->hunk.old_lines = 1;
513 hunk->hunk.new_lines = 1;
514
515 if (git_parse_advance_expected_str(&ctx->parse_ctx, "@@ -") < 0 ||
516 parse_int(&hunk->hunk.old_start, ctx) < 0)
517 goto fail;
518
519 if (git_parse_peek(&c, &ctx->parse_ctx, 0) == 0 && c == ',') {
520 if (git_parse_advance_expected_str(&ctx->parse_ctx, ",") < 0 ||
521 parse_int(&hunk->hunk.old_lines, ctx) < 0)
522 goto fail;
523 }
524
525 if (git_parse_advance_expected_str(&ctx->parse_ctx, " +") < 0 ||
526 parse_int(&hunk->hunk.new_start, ctx) < 0)
527 goto fail;
528
529 if (git_parse_peek(&c, &ctx->parse_ctx, 0) == 0 && c == ',') {
530 if (git_parse_advance_expected_str(&ctx->parse_ctx, ",") < 0 ||
531 parse_int(&hunk->hunk.new_lines, ctx) < 0)
532 goto fail;
533 }
534
535 if (git_parse_advance_expected_str(&ctx->parse_ctx, " @@") < 0)
536 goto fail;
537
538 git_parse_advance_line(&ctx->parse_ctx);
539
540 if (!hunk->hunk.old_lines && !hunk->hunk.new_lines)
541 goto fail;
542
543 hunk->hunk.header_len = ctx->parse_ctx.line - header_start;
544 if (hunk->hunk.header_len > (GIT_DIFF_HUNK_HEADER_SIZE - 1))
545 return git_parse_err("oversized patch hunk header at line %"PRIuZ,
546 ctx->parse_ctx.line_num);
547
548 memcpy(hunk->hunk.header, header_start, hunk->hunk.header_len);
549 hunk->hunk.header[hunk->hunk.header_len] = '\0';
550
551 return 0;
552
553 fail:
554 git_error_set(GIT_ERROR_PATCH, "invalid patch hunk header at line %"PRIuZ,
555 ctx->parse_ctx.line_num);
556 return -1;
557 }
558
559 static int eof_for_origin(int origin) {
560 if (origin == GIT_DIFF_LINE_ADDITION)
561 return GIT_DIFF_LINE_ADD_EOFNL;
562 if (origin == GIT_DIFF_LINE_DELETION)
563 return GIT_DIFF_LINE_DEL_EOFNL;
564 return GIT_DIFF_LINE_CONTEXT_EOFNL;
565 }
566
567 static int parse_hunk_body(
568 git_patch_parsed *patch,
569 git_patch_hunk *hunk,
570 git_patch_parse_ctx *ctx)
571 {
572 git_diff_line *line;
573 int error = 0;
574
575 int oldlines = hunk->hunk.old_lines;
576 int newlines = hunk->hunk.new_lines;
577 int last_origin = 0;
578
579 for (;
580 ctx->parse_ctx.remain_len > 1 &&
581 (oldlines || newlines) &&
582 !git_parse_ctx_contains_s(&ctx->parse_ctx, "@@ -");
583 git_parse_advance_line(&ctx->parse_ctx)) {
584
585 int old_lineno, new_lineno, origin, prefix = 1;
586 char c;
587
588 if (git__add_int_overflow(&old_lineno, hunk->hunk.old_start, hunk->hunk.old_lines) ||
589 git__sub_int_overflow(&old_lineno, old_lineno, oldlines) ||
590 git__add_int_overflow(&new_lineno, hunk->hunk.new_start, hunk->hunk.new_lines) ||
591 git__sub_int_overflow(&new_lineno, new_lineno, newlines)) {
592 error = git_parse_err("unrepresentable line count at line %"PRIuZ,
593 ctx->parse_ctx.line_num);
594 goto done;
595 }
596
597 if (ctx->parse_ctx.line_len == 0 || ctx->parse_ctx.line[ctx->parse_ctx.line_len - 1] != '\n') {
598 error = git_parse_err("invalid patch instruction at line %"PRIuZ,
599 ctx->parse_ctx.line_num);
600 goto done;
601 }
602
603 git_parse_peek(&c, &ctx->parse_ctx, 0);
604
605 switch (c) {
606 case '\n':
607 prefix = 0;
608 /* fall through */
609
610 case ' ':
611 origin = GIT_DIFF_LINE_CONTEXT;
612 oldlines--;
613 newlines--;
614 break;
615
616 case '-':
617 origin = GIT_DIFF_LINE_DELETION;
618 oldlines--;
619 new_lineno = -1;
620 break;
621
622 case '+':
623 origin = GIT_DIFF_LINE_ADDITION;
624 newlines--;
625 old_lineno = -1;
626 break;
627
628 case '\\':
629 /*
630 * If there are no oldlines left, then this is probably
631 * the "\ No newline at end of file" marker. Do not
632 * verify its format, as it may be localized.
633 */
634 if (!oldlines) {
635 prefix = 0;
636 origin = eof_for_origin(last_origin);
637 old_lineno = -1;
638 new_lineno = -1;
639 break;
640 }
641 /* fall through */
642
643 default:
644 error = git_parse_err("invalid patch hunk at line %"PRIuZ, ctx->parse_ctx.line_num);
645 goto done;
646 }
647
648 line = git_array_alloc(patch->base.lines);
649 GIT_ERROR_CHECK_ALLOC(line);
650
651 memset(line, 0x0, sizeof(git_diff_line));
652
653 line->content_len = ctx->parse_ctx.line_len - prefix;
654 line->content = git__strndup(ctx->parse_ctx.line + prefix, line->content_len);
655 GIT_ERROR_CHECK_ALLOC(line->content);
656 line->content_offset = ctx->parse_ctx.content_len - ctx->parse_ctx.remain_len;
657 line->origin = origin;
658 line->num_lines = 1;
659 line->old_lineno = old_lineno;
660 line->new_lineno = new_lineno;
661
662 hunk->line_count++;
663
664 last_origin = origin;
665 }
666
667 if (oldlines || newlines) {
668 error = git_parse_err(
669 "invalid patch hunk, expected %d old lines and %d new lines",
670 hunk->hunk.old_lines, hunk->hunk.new_lines);
671 goto done;
672 }
673
674 /*
675 * Handle "\ No newline at end of file". Only expect the leading
676 * backslash, though, because the rest of the string could be
677 * localized. Because `diff` optimizes for the case where you
678 * want to apply the patch by hand.
679 */
680 if (git_parse_ctx_contains_s(&ctx->parse_ctx, "\\ ") &&
681 git_array_size(patch->base.lines) > 0) {
682
683 line = git_array_get(patch->base.lines, git_array_size(patch->base.lines) - 1);
684
685 if (line->content_len < 1) {
686 error = git_parse_err("last line has no trailing newline");
687 goto done;
688 }
689
690 line = git_array_alloc(patch->base.lines);
691 GIT_ERROR_CHECK_ALLOC(line);
692
693 memset(line, 0x0, sizeof(git_diff_line));
694
695 line->content_len = ctx->parse_ctx.line_len;
696 line->content = git__strndup(ctx->parse_ctx.line, line->content_len);
697 GIT_ERROR_CHECK_ALLOC(line->content);
698 line->content_offset = ctx->parse_ctx.content_len - ctx->parse_ctx.remain_len;
699 line->origin = eof_for_origin(last_origin);
700 line->num_lines = 1;
701 line->old_lineno = -1;
702 line->new_lineno = -1;
703
704 hunk->line_count++;
705
706 git_parse_advance_line(&ctx->parse_ctx);
707 }
708
709 done:
710 return error;
711 }
712
713 static int parse_patch_header(
714 git_patch_parsed *patch,
715 git_patch_parse_ctx *ctx)
716 {
717 int error = 0;
718
719 for (; ctx->parse_ctx.remain_len > 0; git_parse_advance_line(&ctx->parse_ctx)) {
720 /* This line is too short to be a patch header. */
721 if (ctx->parse_ctx.line_len < 6)
722 continue;
723
724 /* This might be a hunk header without a patch header, provide a
725 * sensible error message. */
726 if (git_parse_ctx_contains_s(&ctx->parse_ctx, "@@ -")) {
727 size_t line_num = ctx->parse_ctx.line_num;
728 git_patch_hunk hunk;
729
730 /* If this cannot be parsed as a hunk header, it's just leading
731 * noise, continue.
732 */
733 if (parse_hunk_header(&hunk, ctx) < 0) {
734 git_error_clear();
735 continue;
736 }
737
738 error = git_parse_err("invalid hunk header outside patch at line %"PRIuZ,
739 line_num);
740 goto done;
741 }
742
743 /* This buffer is too short to contain a patch. */
744 if (ctx->parse_ctx.remain_len < ctx->parse_ctx.line_len + 6)
745 break;
746
747 /* A proper git patch */
748 if (git_parse_ctx_contains_s(&ctx->parse_ctx, "diff --git ")) {
749 error = parse_header_git(patch, ctx);
750 goto done;
751 }
752
753 error = 0;
754 continue;
755 }
756
757 git_error_set(GIT_ERROR_PATCH, "no patch found");
758 error = GIT_ENOTFOUND;
759
760 done:
761 return error;
762 }
763
764 static int parse_patch_binary_side(
765 git_diff_binary_file *binary,
766 git_patch_parse_ctx *ctx)
767 {
768 git_diff_binary_t type = GIT_DIFF_BINARY_NONE;
769 git_str base85 = GIT_STR_INIT, decoded = GIT_STR_INIT;
770 int64_t len;
771 int error = 0;
772
773 if (git_parse_ctx_contains_s(&ctx->parse_ctx, "literal ")) {
774 type = GIT_DIFF_BINARY_LITERAL;
775 git_parse_advance_chars(&ctx->parse_ctx, 8);
776 } else if (git_parse_ctx_contains_s(&ctx->parse_ctx, "delta ")) {
777 type = GIT_DIFF_BINARY_DELTA;
778 git_parse_advance_chars(&ctx->parse_ctx, 6);
779 } else {
780 error = git_parse_err(
781 "unknown binary delta type at line %"PRIuZ, ctx->parse_ctx.line_num);
782 goto done;
783 }
784
785 if (git_parse_advance_digit(&len, &ctx->parse_ctx, 10) < 0 ||
786 git_parse_advance_nl(&ctx->parse_ctx) < 0 || len < 0) {
787 error = git_parse_err("invalid binary size at line %"PRIuZ, ctx->parse_ctx.line_num);
788 goto done;
789 }
790
791 while (ctx->parse_ctx.line_len) {
792 char c;
793 size_t encoded_len, decoded_len = 0, decoded_orig = decoded.size;
794
795 git_parse_peek(&c, &ctx->parse_ctx, 0);
796
797 if (c == '\n')
798 break;
799 else if (c >= 'A' && c <= 'Z')
800 decoded_len = c - 'A' + 1;
801 else if (c >= 'a' && c <= 'z')
802 decoded_len = c - 'a' + (('z' - 'a') + 1) + 1;
803
804 if (!decoded_len) {
805 error = git_parse_err("invalid binary length at line %"PRIuZ, ctx->parse_ctx.line_num);
806 goto done;
807 }
808
809 git_parse_advance_chars(&ctx->parse_ctx, 1);
810
811 encoded_len = ((decoded_len / 4) + !!(decoded_len % 4)) * 5;
812
813 if (!encoded_len || !ctx->parse_ctx.line_len || encoded_len > ctx->parse_ctx.line_len - 1) {
814 error = git_parse_err("truncated binary data at line %"PRIuZ, ctx->parse_ctx.line_num);
815 goto done;
816 }
817
818 if ((error = git_str_decode_base85(
819 &decoded, ctx->parse_ctx.line, encoded_len, decoded_len)) < 0)
820 goto done;
821
822 if (decoded.size - decoded_orig != decoded_len) {
823 error = git_parse_err("truncated binary data at line %"PRIuZ, ctx->parse_ctx.line_num);
824 goto done;
825 }
826
827 git_parse_advance_chars(&ctx->parse_ctx, encoded_len);
828
829 if (git_parse_advance_nl(&ctx->parse_ctx) < 0) {
830 error = git_parse_err("trailing data at line %"PRIuZ, ctx->parse_ctx.line_num);
831 goto done;
832 }
833 }
834
835 binary->type = type;
836 binary->inflatedlen = (size_t)len;
837 binary->datalen = decoded.size;
838 binary->data = git_str_detach(&decoded);
839
840 done:
841 git_str_dispose(&base85);
842 git_str_dispose(&decoded);
843 return error;
844 }
845
846 static int parse_patch_binary(
847 git_patch_parsed *patch,
848 git_patch_parse_ctx *ctx)
849 {
850 int error;
851
852 if (git_parse_advance_expected_str(&ctx->parse_ctx, "GIT binary patch") < 0 ||
853 git_parse_advance_nl(&ctx->parse_ctx) < 0)
854 return git_parse_err("corrupt git binary header at line %"PRIuZ, ctx->parse_ctx.line_num);
855
856 /* parse old->new binary diff */
857 if ((error = parse_patch_binary_side(
858 &patch->base.binary.new_file, ctx)) < 0)
859 return error;
860
861 if (git_parse_advance_nl(&ctx->parse_ctx) < 0)
862 return git_parse_err("corrupt git binary separator at line %"PRIuZ,
863 ctx->parse_ctx.line_num);
864
865 /* parse new->old binary diff */
866 if ((error = parse_patch_binary_side(
867 &patch->base.binary.old_file, ctx)) < 0)
868 return error;
869
870 if (git_parse_advance_nl(&ctx->parse_ctx) < 0)
871 return git_parse_err("corrupt git binary patch separator at line %"PRIuZ,
872 ctx->parse_ctx.line_num);
873
874 patch->base.binary.contains_data = 1;
875 patch->base.delta->flags |= GIT_DIFF_FLAG_BINARY;
876 return 0;
877 }
878
879 static int parse_patch_binary_nodata(
880 git_patch_parsed *patch,
881 git_patch_parse_ctx *ctx)
882 {
883 const char *old = patch->old_path ? patch->old_path : patch->header_old_path;
884 const char *new = patch->new_path ? patch->new_path : patch->header_new_path;
885
886 if (!old || !new)
887 return git_parse_err("corrupt binary data without paths at line %"PRIuZ, ctx->parse_ctx.line_num);
888
889 if (patch->base.delta->status == GIT_DELTA_ADDED)
890 old = "/dev/null";
891 else if (patch->base.delta->status == GIT_DELTA_DELETED)
892 new = "/dev/null";
893
894 if (git_parse_advance_expected_str(&ctx->parse_ctx, "Binary files ") < 0 ||
895 git_parse_advance_expected_str(&ctx->parse_ctx, old) < 0 ||
896 git_parse_advance_expected_str(&ctx->parse_ctx, " and ") < 0 ||
897 git_parse_advance_expected_str(&ctx->parse_ctx, new) < 0 ||
898 git_parse_advance_expected_str(&ctx->parse_ctx, " differ") < 0 ||
899 git_parse_advance_nl(&ctx->parse_ctx) < 0)
900 return git_parse_err("corrupt git binary header at line %"PRIuZ, ctx->parse_ctx.line_num);
901
902 patch->base.binary.contains_data = 0;
903 patch->base.delta->flags |= GIT_DIFF_FLAG_BINARY;
904 return 0;
905 }
906
907 static int parse_patch_hunks(
908 git_patch_parsed *patch,
909 git_patch_parse_ctx *ctx)
910 {
911 git_patch_hunk *hunk;
912 int error = 0;
913
914 while (git_parse_ctx_contains_s(&ctx->parse_ctx, "@@ -")) {
915 hunk = git_array_alloc(patch->base.hunks);
916 GIT_ERROR_CHECK_ALLOC(hunk);
917
918 memset(hunk, 0, sizeof(git_patch_hunk));
919
920 hunk->line_start = git_array_size(patch->base.lines);
921 hunk->line_count = 0;
922
923 if ((error = parse_hunk_header(hunk, ctx)) < 0 ||
924 (error = parse_hunk_body(patch, hunk, ctx)) < 0)
925 goto done;
926 }
927
928 patch->base.delta->flags |= GIT_DIFF_FLAG_NOT_BINARY;
929
930 done:
931 return error;
932 }
933
934 static int parse_patch_body(
935 git_patch_parsed *patch, git_patch_parse_ctx *ctx)
936 {
937 if (git_parse_ctx_contains_s(&ctx->parse_ctx, "GIT binary patch"))
938 return parse_patch_binary(patch, ctx);
939 else if (git_parse_ctx_contains_s(&ctx->parse_ctx, "Binary files "))
940 return parse_patch_binary_nodata(patch, ctx);
941 else
942 return parse_patch_hunks(patch, ctx);
943 }
944
945 static int check_header_names(
946 const char *one,
947 const char *two,
948 const char *old_or_new,
949 bool two_null)
950 {
951 if (!one || !two)
952 return 0;
953
954 if (two_null && strcmp(two, "/dev/null") != 0)
955 return git_parse_err("expected %s path of '/dev/null'", old_or_new);
956
957 else if (!two_null && strcmp(one, two) != 0)
958 return git_parse_err("mismatched %s path names", old_or_new);
959
960 return 0;
961 }
962
963 static int check_prefix(
964 char **out,
965 size_t *out_len,
966 git_patch_parsed *patch,
967 const char *path_start)
968 {
969 const char *path = path_start;
970 size_t prefix_len = patch->ctx->opts.prefix_len;
971 size_t remain_len = prefix_len;
972
973 *out = NULL;
974 *out_len = 0;
975
976 if (prefix_len == 0)
977 goto done;
978
979 /* leading slashes do not count as part of the prefix in git apply */
980 while (*path == '/')
981 path++;
982
983 while (*path && remain_len) {
984 if (*path == '/')
985 remain_len--;
986
987 path++;
988 }
989
990 if (remain_len || !*path)
991 return git_parse_err(
992 "header filename does not contain %"PRIuZ" path components",
993 prefix_len);
994
995 done:
996 *out_len = (path - path_start);
997 *out = git__strndup(path_start, *out_len);
998
999 return (*out == NULL) ? -1 : 0;
1000 }
1001
1002 static int check_filenames(git_patch_parsed *patch)
1003 {
1004 const char *prefixed_new, *prefixed_old;
1005 size_t old_prefixlen = 0, new_prefixlen = 0;
1006 bool added = (patch->base.delta->status == GIT_DELTA_ADDED);
1007 bool deleted = (patch->base.delta->status == GIT_DELTA_DELETED);
1008
1009 if (patch->old_path && !patch->new_path)
1010 return git_parse_err("missing new path");
1011
1012 if (!patch->old_path && patch->new_path)
1013 return git_parse_err("missing old path");
1014
1015 /* Ensure (non-renamed) paths match */
1016 if (check_header_names(patch->header_old_path, patch->old_path, "old", added) < 0 ||
1017 check_header_names(patch->header_new_path, patch->new_path, "new", deleted) < 0)
1018 return -1;
1019
1020 prefixed_old = (!added && patch->old_path) ? patch->old_path : patch->header_old_path;
1021 prefixed_new = (!deleted && patch->new_path) ? patch->new_path : patch->header_new_path;
1022
1023 if ((prefixed_old && check_prefix(&patch->old_prefix, &old_prefixlen, patch, prefixed_old) < 0) ||
1024 (prefixed_new && check_prefix(&patch->new_prefix, &new_prefixlen, patch, prefixed_new) < 0))
1025 return -1;
1026
1027 /* Prefer the rename filenames as they are unambiguous and unprefixed */
1028 if (patch->rename_old_path)
1029 patch->base.delta->old_file.path = patch->rename_old_path;
1030 else if (prefixed_old)
1031 patch->base.delta->old_file.path = prefixed_old + old_prefixlen;
1032 else
1033 patch->base.delta->old_file.path = NULL;
1034
1035 if (patch->rename_new_path)
1036 patch->base.delta->new_file.path = patch->rename_new_path;
1037 else if (prefixed_new)
1038 patch->base.delta->new_file.path = prefixed_new + new_prefixlen;
1039 else
1040 patch->base.delta->new_file.path = NULL;
1041
1042 if (!patch->base.delta->old_file.path &&
1043 !patch->base.delta->new_file.path)
1044 return git_parse_err("git diff header lacks old / new paths");
1045
1046 return 0;
1047 }
1048
1049 static int check_patch(git_patch_parsed *patch)
1050 {
1051 git_diff_delta *delta = patch->base.delta;
1052
1053 if (check_filenames(patch) < 0)
1054 return -1;
1055
1056 if (delta->old_file.path &&
1057 delta->status != GIT_DELTA_DELETED &&
1058 !delta->new_file.mode)
1059 delta->new_file.mode = delta->old_file.mode;
1060
1061 if (delta->status == GIT_DELTA_MODIFIED &&
1062 !(delta->flags & GIT_DIFF_FLAG_BINARY) &&
1063 delta->new_file.mode == delta->old_file.mode &&
1064 git_array_size(patch->base.hunks) == 0)
1065 return git_parse_err("patch with no hunks");
1066
1067 if (delta->status == GIT_DELTA_ADDED) {
1068 memset(&delta->old_file.id, 0x0, sizeof(git_oid));
1069 delta->old_file.id_abbrev = 0;
1070 }
1071
1072 if (delta->status == GIT_DELTA_DELETED) {
1073 memset(&delta->new_file.id, 0x0, sizeof(git_oid));
1074 delta->new_file.id_abbrev = 0;
1075 }
1076
1077 return 0;
1078 }
1079
1080 git_patch_parse_ctx *git_patch_parse_ctx_init(
1081 const char *content,
1082 size_t content_len,
1083 const git_patch_options *opts)
1084 {
1085 git_patch_parse_ctx *ctx;
1086 git_patch_options default_opts = GIT_PATCH_OPTIONS_INIT;
1087
1088 if ((ctx = git__calloc(1, sizeof(git_patch_parse_ctx))) == NULL)
1089 return NULL;
1090
1091 if ((git_parse_ctx_init(&ctx->parse_ctx, content, content_len)) < 0) {
1092 git__free(ctx);
1093 return NULL;
1094 }
1095
1096 if (opts)
1097 memcpy(&ctx->opts, opts, sizeof(git_patch_options));
1098 else
1099 memcpy(&ctx->opts, &default_opts, sizeof(git_patch_options));
1100
1101 GIT_REFCOUNT_INC(ctx);
1102 return ctx;
1103 }
1104
1105 static void patch_parse_ctx_free(git_patch_parse_ctx *ctx)
1106 {
1107 if (!ctx)
1108 return;
1109
1110 git_parse_ctx_clear(&ctx->parse_ctx);
1111 git__free(ctx);
1112 }
1113
1114 void git_patch_parse_ctx_free(git_patch_parse_ctx *ctx)
1115 {
1116 GIT_REFCOUNT_DEC(ctx, patch_parse_ctx_free);
1117 }
1118
1119 int git_patch_parsed_from_diff(git_patch **out, git_diff *d, size_t idx)
1120 {
1121 git_diff_parsed *diff = (git_diff_parsed *)d;
1122 git_patch *p;
1123
1124 if ((p = git_vector_get(&diff->patches, idx)) == NULL)
1125 return -1;
1126
1127 GIT_REFCOUNT_INC(p);
1128 *out = p;
1129
1130 return 0;
1131 }
1132
1133 static void patch_parsed__free(git_patch *p)
1134 {
1135 git_patch_parsed *patch = (git_patch_parsed *)p;
1136 git_diff_line *line;
1137 size_t i;
1138
1139 if (!patch)
1140 return;
1141
1142 git_patch_parse_ctx_free(patch->ctx);
1143
1144 git__free((char *)patch->base.binary.old_file.data);
1145 git__free((char *)patch->base.binary.new_file.data);
1146 git_array_clear(patch->base.hunks);
1147 git_array_foreach(patch->base.lines, i, line)
1148 git__free((char *) line->content);
1149 git_array_clear(patch->base.lines);
1150 git__free(patch->base.delta);
1151
1152 git__free(patch->old_prefix);
1153 git__free(patch->new_prefix);
1154 git__free(patch->header_old_path);
1155 git__free(patch->header_new_path);
1156 git__free(patch->rename_old_path);
1157 git__free(patch->rename_new_path);
1158 git__free(patch->old_path);
1159 git__free(patch->new_path);
1160 git__free(patch);
1161 }
1162
1163 int git_patch_parse(
1164 git_patch **out,
1165 git_patch_parse_ctx *ctx)
1166 {
1167 git_patch_parsed *patch;
1168 size_t start, used;
1169 int error = 0;
1170
1171 GIT_ASSERT_ARG(out);
1172 GIT_ASSERT_ARG(ctx);
1173
1174 *out = NULL;
1175
1176 patch = git__calloc(1, sizeof(git_patch_parsed));
1177 GIT_ERROR_CHECK_ALLOC(patch);
1178
1179 patch->ctx = ctx;
1180 GIT_REFCOUNT_INC(patch->ctx);
1181
1182 patch->base.free_fn = patch_parsed__free;
1183
1184 patch->base.delta = git__calloc(1, sizeof(git_diff_delta));
1185 GIT_ERROR_CHECK_ALLOC(patch->base.delta);
1186
1187 patch->base.delta->status = GIT_DELTA_MODIFIED;
1188 patch->base.delta->nfiles = 2;
1189
1190 start = ctx->parse_ctx.remain_len;
1191
1192 if ((error = parse_patch_header(patch, ctx)) < 0 ||
1193 (error = parse_patch_body(patch, ctx)) < 0 ||
1194 (error = check_patch(patch)) < 0)
1195 goto done;
1196
1197 used = start - ctx->parse_ctx.remain_len;
1198 ctx->parse_ctx.remain += used;
1199
1200 patch->base.diff_opts.old_prefix = patch->old_prefix;
1201 patch->base.diff_opts.new_prefix = patch->new_prefix;
1202 patch->base.diff_opts.flags |= GIT_DIFF_SHOW_BINARY;
1203
1204 GIT_REFCOUNT_INC(&patch->base);
1205 *out = &patch->base;
1206
1207 done:
1208 if (error < 0)
1209 patch_parsed__free(&patch->base);
1210
1211 return error;
1212 }
1213
1214 int git_patch_from_buffer(
1215 git_patch **out,
1216 const char *content,
1217 size_t content_len,
1218 const git_patch_options *opts)
1219 {
1220 git_patch_parse_ctx *ctx;
1221 int error;
1222
1223 ctx = git_patch_parse_ctx_init(content, content_len, opts);
1224 GIT_ERROR_CHECK_ALLOC(ctx);
1225
1226 error = git_patch_parse(out, ctx);
1227
1228 git_patch_parse_ctx_free(ctx);
1229 return error;
1230 }
1231