]> git.proxmox.com Git - libgit2.git/blob - src/patch_parse.c
Teach `git_patch_from_diff` about parsed diffs
[libgit2.git] / src / patch_parse.c
1 /*
2 * Copyright (C) the libgit2 contributors. All rights reserved.
3 *
4 * This file is part of libgit2, distributed under the GNU GPL v2 with
5 * a Linking Exception. For full terms see the included COPYING file.
6 */
7 #include "git2/patch.h"
8 #include "patch.h"
9 #include "patch_parse.h"
10 #include "diff_parse.h"
11 #include "path.h"
12
13 #define parse_err(...) \
14 ( giterr_set(GITERR_PATCH, __VA_ARGS__), -1 )
15
16 typedef struct {
17 git_patch base;
18
19 git_patch_parse_ctx *ctx;
20
21 /* the paths from the `diff --git` header, these will be used if this is not
22 * a rename (and rename paths are specified) or if no `+++`/`---` line specify
23 * the paths.
24 */
25 char *header_old_path, *header_new_path;
26
27 /* renamed paths are precise and are not prefixed */
28 char *rename_old_path, *rename_new_path;
29
30 /* the paths given in `---` and `+++` lines */
31 char *old_path, *new_path;
32
33 /* the prefixes from the old/new paths */
34 char *old_prefix, *new_prefix;
35 } git_patch_parsed;
36
37
38 GIT_INLINE(bool) parse_ctx_contains(
39 git_patch_parse_ctx *ctx, const char *str, size_t len)
40 {
41 return (ctx->line_len >= len && memcmp(ctx->line, str, len) == 0);
42 }
43
44 #define parse_ctx_contains_s(ctx, str) \
45 parse_ctx_contains(ctx, str, sizeof(str) - 1)
46
47 static void parse_advance_line(git_patch_parse_ctx *ctx)
48 {
49 ctx->line += ctx->line_len;
50 ctx->remain_len -= ctx->line_len;
51 ctx->line_len = git__linenlen(ctx->line, ctx->remain_len);
52 ctx->line_num++;
53 }
54
55 static void parse_advance_chars(git_patch_parse_ctx *ctx, size_t char_cnt)
56 {
57 ctx->line += char_cnt;
58 ctx->remain_len -= char_cnt;
59 ctx->line_len -= char_cnt;
60 }
61
62 static int parse_advance_expected(
63 git_patch_parse_ctx *ctx,
64 const char *expected,
65 size_t expected_len)
66 {
67 if (ctx->line_len < expected_len)
68 return -1;
69
70 if (memcmp(ctx->line, expected, expected_len) != 0)
71 return -1;
72
73 parse_advance_chars(ctx, expected_len);
74 return 0;
75 }
76
77 #define parse_advance_expected_s(ctx, str) \
78 parse_advance_expected(ctx, str, sizeof(str) - 1)
79
80 static int parse_advance_ws(git_patch_parse_ctx *ctx)
81 {
82 int ret = -1;
83
84 while (ctx->line_len > 0 &&
85 ctx->line[0] != '\n' &&
86 git__isspace(ctx->line[0])) {
87 ctx->line++;
88 ctx->line_len--;
89 ctx->remain_len--;
90 ret = 0;
91 }
92
93 return ret;
94 }
95
96 static int parse_advance_nl(git_patch_parse_ctx *ctx)
97 {
98 if (ctx->line_len != 1 || ctx->line[0] != '\n')
99 return -1;
100
101 parse_advance_line(ctx);
102 return 0;
103 }
104
105 static int header_path_len(git_patch_parse_ctx *ctx)
106 {
107 bool inquote = 0;
108 bool quoted = (ctx->line_len > 0 && ctx->line[0] == '"');
109 size_t len;
110
111 for (len = quoted; len < ctx->line_len; len++) {
112 if (!quoted && git__isspace(ctx->line[len]))
113 break;
114 else if (quoted && !inquote && ctx->line[len] == '"') {
115 len++;
116 break;
117 }
118
119 inquote = (!inquote && ctx->line[len] == '\\');
120 }
121
122 return len;
123 }
124
125 static int parse_header_path_buf(git_buf *path, git_patch_parse_ctx *ctx)
126 {
127 int path_len, error = 0;
128
129 path_len = header_path_len(ctx);
130
131 if ((error = git_buf_put(path, ctx->line, path_len)) < 0)
132 goto done;
133
134 parse_advance_chars(ctx, path_len);
135
136 git_buf_rtrim(path);
137
138 if (path->size > 0 && path->ptr[0] == '"')
139 error = git_buf_unquote(path);
140
141 if (error < 0)
142 goto done;
143
144 git_path_squash_slashes(path);
145
146 done:
147 return error;
148 }
149
150 static int parse_header_path(char **out, git_patch_parse_ctx *ctx)
151 {
152 git_buf path = GIT_BUF_INIT;
153 int error = parse_header_path_buf(&path, ctx);
154
155 *out = git_buf_detach(&path);
156
157 return error;
158 }
159
160 static int parse_header_git_oldpath(
161 git_patch_parsed *patch, git_patch_parse_ctx *ctx)
162 {
163 return parse_header_path(&patch->old_path, ctx);
164 }
165
166 static int parse_header_git_newpath(
167 git_patch_parsed *patch, git_patch_parse_ctx *ctx)
168 {
169 return parse_header_path(&patch->new_path, ctx);
170 }
171
172 static int parse_header_mode(uint16_t *mode, git_patch_parse_ctx *ctx)
173 {
174 const char *end;
175 int32_t m;
176 int ret;
177
178 if (ctx->line_len < 1 || !git__isdigit(ctx->line[0]))
179 return parse_err("invalid file mode at line %d", ctx->line_num);
180
181 if ((ret = git__strntol32(&m, ctx->line, ctx->line_len, &end, 8)) < 0)
182 return ret;
183
184 if (m > UINT16_MAX)
185 return -1;
186
187 *mode = (uint16_t)m;
188
189 parse_advance_chars(ctx, (end - ctx->line));
190
191 return ret;
192 }
193
194 static int parse_header_oid(
195 git_oid *oid,
196 uint16_t *oid_len,
197 git_patch_parse_ctx *ctx)
198 {
199 size_t len;
200
201 for (len = 0; len < ctx->line_len && len < GIT_OID_HEXSZ; len++) {
202 if (!git__isxdigit(ctx->line[len]))
203 break;
204 }
205
206 if (len < GIT_OID_MINPREFIXLEN || len > GIT_OID_HEXSZ ||
207 git_oid_fromstrn(oid, ctx->line, len) < 0)
208 return parse_err("invalid hex formatted object id at line %d",
209 ctx->line_num);
210
211 parse_advance_chars(ctx, len);
212
213 *oid_len = (uint16_t)len;
214
215 return 0;
216 }
217
218 static int parse_header_git_index(
219 git_patch_parsed *patch, git_patch_parse_ctx *ctx)
220 {
221 if (parse_header_oid(&patch->base.delta->old_file.id,
222 &patch->base.delta->old_file.id_abbrev, ctx) < 0 ||
223 parse_advance_expected_s(ctx, "..") < 0 ||
224 parse_header_oid(&patch->base.delta->new_file.id,
225 &patch->base.delta->new_file.id_abbrev, ctx) < 0)
226 return -1;
227
228 if (ctx->line_len > 0 && ctx->line[0] == ' ') {
229 uint16_t mode;
230
231 parse_advance_chars(ctx, 1);
232
233 if (parse_header_mode(&mode, ctx) < 0)
234 return -1;
235
236 if (!patch->base.delta->new_file.mode)
237 patch->base.delta->new_file.mode = mode;
238
239 if (!patch->base.delta->old_file.mode)
240 patch->base.delta->old_file.mode = mode;
241 }
242
243 return 0;
244 }
245
246 static int parse_header_git_oldmode(
247 git_patch_parsed *patch, git_patch_parse_ctx *ctx)
248 {
249 return parse_header_mode(&patch->base.delta->old_file.mode, ctx);
250 }
251
252 static int parse_header_git_newmode(
253 git_patch_parsed *patch, git_patch_parse_ctx *ctx)
254 {
255 return parse_header_mode(&patch->base.delta->new_file.mode, ctx);
256 }
257
258 static int parse_header_git_deletedfilemode(
259 git_patch_parsed *patch,
260 git_patch_parse_ctx *ctx)
261 {
262 git__free((char *)patch->base.delta->old_file.path);
263
264 patch->base.delta->old_file.path = NULL;
265 patch->base.delta->status = GIT_DELTA_DELETED;
266 patch->base.delta->nfiles = 1;
267
268 return parse_header_mode(&patch->base.delta->old_file.mode, ctx);
269 }
270
271 static int parse_header_git_newfilemode(
272 git_patch_parsed *patch,
273 git_patch_parse_ctx *ctx)
274 {
275 git__free((char *)patch->base.delta->new_file.path);
276
277 patch->base.delta->new_file.path = NULL;
278 patch->base.delta->status = GIT_DELTA_ADDED;
279 patch->base.delta->nfiles = 1;
280
281 return parse_header_mode(&patch->base.delta->new_file.mode, ctx);
282 }
283
284 static int parse_header_rename(
285 char **out,
286 git_patch_parse_ctx *ctx)
287 {
288 git_buf path = GIT_BUF_INIT;
289
290 if (parse_header_path_buf(&path, ctx) < 0)
291 return -1;
292
293 /* Note: the `rename from` and `rename to` lines include the literal
294 * filename. They do *not* include the prefix. (Who needs consistency?)
295 */
296 *out = git_buf_detach(&path);
297 return 0;
298 }
299
300 static int parse_header_renamefrom(
301 git_patch_parsed *patch, git_patch_parse_ctx *ctx)
302 {
303 patch->base.delta->status = GIT_DELTA_RENAMED;
304 return parse_header_rename(&patch->rename_old_path, ctx);
305 }
306
307 static int parse_header_renameto(
308 git_patch_parsed *patch, git_patch_parse_ctx *ctx)
309 {
310 patch->base.delta->status = GIT_DELTA_RENAMED;
311 return parse_header_rename(&patch->rename_new_path, ctx);
312 }
313
314 static int parse_header_copyfrom(
315 git_patch_parsed *patch, git_patch_parse_ctx *ctx)
316 {
317 patch->base.delta->status = GIT_DELTA_COPIED;
318 return parse_header_rename(&patch->rename_old_path, ctx);
319 }
320
321 static int parse_header_copyto(
322 git_patch_parsed *patch, git_patch_parse_ctx *ctx)
323 {
324 patch->base.delta->status = GIT_DELTA_COPIED;
325 return parse_header_rename(&patch->rename_new_path, ctx);
326 }
327
328 static int parse_header_percent(uint16_t *out, git_patch_parse_ctx *ctx)
329 {
330 int32_t val;
331 const char *end;
332
333 if (ctx->line_len < 1 || !git__isdigit(ctx->line[0]) ||
334 git__strntol32(&val, ctx->line, ctx->line_len, &end, 10) < 0)
335 return -1;
336
337 parse_advance_chars(ctx, (end - ctx->line));
338
339 if (parse_advance_expected_s(ctx, "%") < 0)
340 return -1;
341
342 if (val > 100)
343 return -1;
344
345 *out = val;
346 return 0;
347 }
348
349 static int parse_header_similarity(
350 git_patch_parsed *patch, git_patch_parse_ctx *ctx)
351 {
352 if (parse_header_percent(&patch->base.delta->similarity, ctx) < 0)
353 return parse_err("invalid similarity percentage at line %d",
354 ctx->line_num);
355
356 return 0;
357 }
358
359 static int parse_header_dissimilarity(
360 git_patch_parsed *patch, git_patch_parse_ctx *ctx)
361 {
362 uint16_t dissimilarity;
363
364 if (parse_header_percent(&dissimilarity, ctx) < 0)
365 return parse_err("invalid similarity percentage at line %d",
366 ctx->line_num);
367
368 patch->base.delta->similarity = 100 - dissimilarity;
369
370 return 0;
371 }
372
373 typedef struct {
374 const char *str;
375 int(*fn)(git_patch_parsed *, git_patch_parse_ctx *);
376 } header_git_op;
377
378 static const header_git_op header_git_ops[] = {
379 { "diff --git ", NULL },
380 { "@@ -", NULL },
381 { "GIT binary patch", NULL },
382 { "--- ", parse_header_git_oldpath },
383 { "+++ ", parse_header_git_newpath },
384 { "index ", parse_header_git_index },
385 { "old mode ", parse_header_git_oldmode },
386 { "new mode ", parse_header_git_newmode },
387 { "deleted file mode ", parse_header_git_deletedfilemode },
388 { "new file mode ", parse_header_git_newfilemode },
389 { "rename from ", parse_header_renamefrom },
390 { "rename to ", parse_header_renameto },
391 { "rename old ", parse_header_renamefrom },
392 { "rename new ", parse_header_renameto },
393 { "copy from ", parse_header_copyfrom },
394 { "copy to ", parse_header_copyto },
395 { "similarity index ", parse_header_similarity },
396 { "dissimilarity index ", parse_header_dissimilarity },
397 };
398
399 static int parse_header_git(
400 git_patch_parsed *patch,
401 git_patch_parse_ctx *ctx)
402 {
403 size_t i;
404 int error = 0;
405
406 /* Parse the diff --git line */
407 if (parse_advance_expected_s(ctx, "diff --git ") < 0)
408 return parse_err("corrupt git diff header at line %d", ctx->line_num);
409
410 if (parse_header_path(&patch->header_old_path, ctx) < 0)
411 return parse_err("corrupt old path in git diff header at line %d",
412 ctx->line_num);
413
414 if (parse_advance_ws(ctx) < 0 ||
415 parse_header_path(&patch->header_new_path, ctx) < 0)
416 return parse_err("corrupt new path in git diff header at line %d",
417 ctx->line_num);
418
419 /* Parse remaining header lines */
420 for (parse_advance_line(ctx);
421 ctx->remain_len > 0;
422 parse_advance_line(ctx)) {
423
424 bool found = false;
425
426 if (ctx->line_len == 0 || ctx->line[ctx->line_len - 1] != '\n')
427 break;
428
429 for (i = 0; i < ARRAY_SIZE(header_git_ops); i++) {
430 const header_git_op *op = &header_git_ops[i];
431 size_t op_len = strlen(op->str);
432
433 if (memcmp(ctx->line, op->str, min(op_len, ctx->line_len)) != 0)
434 continue;
435
436 /* Do not advance if this is the patch separator */
437 if (op->fn == NULL)
438 goto done;
439
440 parse_advance_chars(ctx, op_len);
441
442 if ((error = op->fn(patch, ctx)) < 0)
443 goto done;
444
445 parse_advance_ws(ctx);
446 parse_advance_expected_s(ctx, "\n");
447
448 if (ctx->line_len > 0) {
449 error = parse_err("trailing data at line %d", ctx->line_num);
450 goto done;
451 }
452
453 found = true;
454 break;
455 }
456
457 if (!found) {
458 error = parse_err("invalid patch header at line %d",
459 ctx->line_num);
460 goto done;
461 }
462 }
463
464 done:
465 return error;
466 }
467
468 static int parse_number(git_off_t *out, git_patch_parse_ctx *ctx)
469 {
470 const char *end;
471 int64_t num;
472
473 if (!git__isdigit(ctx->line[0]))
474 return -1;
475
476 if (git__strntol64(&num, ctx->line, ctx->line_len, &end, 10) < 0)
477 return -1;
478
479 if (num < 0)
480 return -1;
481
482 *out = num;
483 parse_advance_chars(ctx, (end - ctx->line));
484
485 return 0;
486 }
487
488 static int parse_int(int *out, git_patch_parse_ctx *ctx)
489 {
490 git_off_t num;
491
492 if (parse_number(&num, ctx) < 0 || !git__is_int(num))
493 return -1;
494
495 *out = (int)num;
496 return 0;
497 }
498
499 static int parse_hunk_header(
500 git_patch_hunk *hunk,
501 git_patch_parse_ctx *ctx)
502 {
503 const char *header_start = ctx->line;
504
505 hunk->hunk.old_lines = 1;
506 hunk->hunk.new_lines = 1;
507
508 if (parse_advance_expected_s(ctx, "@@ -") < 0 ||
509 parse_int(&hunk->hunk.old_start, ctx) < 0)
510 goto fail;
511
512 if (ctx->line_len > 0 && ctx->line[0] == ',') {
513 if (parse_advance_expected_s(ctx, ",") < 0 ||
514 parse_int(&hunk->hunk.old_lines, ctx) < 0)
515 goto fail;
516 }
517
518 if (parse_advance_expected_s(ctx, " +") < 0 ||
519 parse_int(&hunk->hunk.new_start, ctx) < 0)
520 goto fail;
521
522 if (ctx->line_len > 0 && ctx->line[0] == ',') {
523 if (parse_advance_expected_s(ctx, ",") < 0 ||
524 parse_int(&hunk->hunk.new_lines, ctx) < 0)
525 goto fail;
526 }
527
528 if (parse_advance_expected_s(ctx, " @@") < 0)
529 goto fail;
530
531 parse_advance_line(ctx);
532
533 if (!hunk->hunk.old_lines && !hunk->hunk.new_lines)
534 goto fail;
535
536 hunk->hunk.header_len = ctx->line - header_start;
537 if (hunk->hunk.header_len > (GIT_DIFF_HUNK_HEADER_SIZE - 1))
538 return parse_err("oversized patch hunk header at line %d",
539 ctx->line_num);
540
541 memcpy(hunk->hunk.header, header_start, hunk->hunk.header_len);
542 hunk->hunk.header[hunk->hunk.header_len] = '\0';
543
544 return 0;
545
546 fail:
547 giterr_set(GITERR_PATCH, "invalid patch hunk header at line %d",
548 ctx->line_num);
549 return -1;
550 }
551
552 static int parse_hunk_body(
553 git_patch_parsed *patch,
554 git_patch_hunk *hunk,
555 git_patch_parse_ctx *ctx)
556 {
557 git_diff_line *line;
558 int error = 0;
559
560 int oldlines = hunk->hunk.old_lines;
561 int newlines = hunk->hunk.new_lines;
562
563 for (;
564 ctx->remain_len > 4 && (oldlines || newlines) &&
565 memcmp(ctx->line, "@@ -", 4) != 0;
566 parse_advance_line(ctx)) {
567
568 int origin;
569 int prefix = 1;
570
571 if (ctx->line_len == 0 || ctx->line[ctx->line_len - 1] != '\n') {
572 error = parse_err("invalid patch instruction at line %d",
573 ctx->line_num);
574 goto done;
575 }
576
577 switch (ctx->line[0]) {
578 case '\n':
579 prefix = 0;
580
581 case ' ':
582 origin = GIT_DIFF_LINE_CONTEXT;
583 oldlines--;
584 newlines--;
585 break;
586
587 case '-':
588 origin = GIT_DIFF_LINE_DELETION;
589 oldlines--;
590 break;
591
592 case '+':
593 origin = GIT_DIFF_LINE_ADDITION;
594 newlines--;
595 break;
596
597 default:
598 error = parse_err("invalid patch hunk at line %d", ctx->line_num);
599 goto done;
600 }
601
602 line = git_array_alloc(patch->base.lines);
603 GITERR_CHECK_ALLOC(line);
604
605 memset(line, 0x0, sizeof(git_diff_line));
606
607 line->content = ctx->line + prefix;
608 line->content_len = ctx->line_len - prefix;
609 line->content_offset = ctx->content_len - ctx->remain_len;
610 line->origin = origin;
611
612 hunk->line_count++;
613 }
614
615 if (oldlines || newlines) {
616 error = parse_err(
617 "invalid patch hunk, expected %d old lines and %d new lines",
618 hunk->hunk.old_lines, hunk->hunk.new_lines);
619 goto done;
620 }
621
622 /* Handle "\ No newline at end of file". Only expect the leading
623 * backslash, though, because the rest of the string could be
624 * localized. Because `diff` optimizes for the case where you
625 * want to apply the patch by hand.
626 */
627 if (parse_ctx_contains_s(ctx, "\\ ") &&
628 git_array_size(patch->base.lines) > 0) {
629
630 line = git_array_get(patch->base.lines, git_array_size(patch->base.lines) - 1);
631
632 if (line->content_len < 1) {
633 error = parse_err("cannot trim trailing newline of empty line");
634 goto done;
635 }
636
637 line->content_len--;
638
639 parse_advance_line(ctx);
640 }
641
642 done:
643 return error;
644 }
645
646 static int parse_patch_header(
647 git_patch_parsed *patch,
648 git_patch_parse_ctx *ctx)
649 {
650 int error = 0;
651
652 for (ctx->line = ctx->remain;
653 ctx->remain_len > 0;
654 parse_advance_line(ctx)) {
655
656 /* This line is too short to be a patch header. */
657 if (ctx->line_len < 6)
658 continue;
659
660 /* This might be a hunk header without a patch header, provide a
661 * sensible error message. */
662 if (parse_ctx_contains_s(ctx, "@@ -")) {
663 size_t line_num = ctx->line_num;
664 git_patch_hunk hunk;
665
666 /* If this cannot be parsed as a hunk header, it's just leading
667 * noise, continue.
668 */
669 if (parse_hunk_header(&hunk, ctx) < 0) {
670 giterr_clear();
671 continue;
672 }
673
674 error = parse_err("invalid hunk header outside patch at line %d",
675 line_num);
676 goto done;
677 }
678
679 /* This buffer is too short to contain a patch. */
680 if (ctx->remain_len < ctx->line_len + 6)
681 break;
682
683 /* A proper git patch */
684 if (parse_ctx_contains_s(ctx, "diff --git ")) {
685 error = parse_header_git(patch, ctx);
686 goto done;
687 }
688
689 error = 0;
690 continue;
691 }
692
693 giterr_set(GITERR_PATCH, "no patch found");
694 error = GIT_ENOTFOUND;
695
696 done:
697 return error;
698 }
699
700 static int parse_patch_binary_side(
701 git_diff_binary_file *binary,
702 git_patch_parse_ctx *ctx)
703 {
704 git_diff_binary_t type = GIT_DIFF_BINARY_NONE;
705 git_buf base85 = GIT_BUF_INIT, decoded = GIT_BUF_INIT;
706 git_off_t len;
707 int error = 0;
708
709 if (parse_ctx_contains_s(ctx, "literal ")) {
710 type = GIT_DIFF_BINARY_LITERAL;
711 parse_advance_chars(ctx, 8);
712 } else if (parse_ctx_contains_s(ctx, "delta ")) {
713 type = GIT_DIFF_BINARY_DELTA;
714 parse_advance_chars(ctx, 6);
715 } else {
716 error = parse_err(
717 "unknown binary delta type at line %d", ctx->line_num);
718 goto done;
719 }
720
721 if (parse_number(&len, ctx) < 0 || parse_advance_nl(ctx) < 0 || len < 0) {
722 error = parse_err("invalid binary size at line %d", ctx->line_num);
723 goto done;
724 }
725
726 while (ctx->line_len) {
727 char c = ctx->line[0];
728 size_t encoded_len, decoded_len = 0, decoded_orig = decoded.size;
729
730 if (c == '\n')
731 break;
732 else if (c >= 'A' && c <= 'Z')
733 decoded_len = c - 'A' + 1;
734 else if (c >= 'a' && c <= 'z')
735 decoded_len = c - 'a' + (('z' - 'a') + 1) + 1;
736
737 if (!decoded_len) {
738 error = parse_err("invalid binary length at line %d", ctx->line_num);
739 goto done;
740 }
741
742 parse_advance_chars(ctx, 1);
743
744 encoded_len = ((decoded_len / 4) + !!(decoded_len % 4)) * 5;
745
746 if (encoded_len > ctx->line_len - 1) {
747 error = parse_err("truncated binary data at line %d", ctx->line_num);
748 goto done;
749 }
750
751 if ((error = git_buf_decode_base85(
752 &decoded, ctx->line, encoded_len, decoded_len)) < 0)
753 goto done;
754
755 if (decoded.size - decoded_orig != decoded_len) {
756 error = parse_err("truncated binary data at line %d", ctx->line_num);
757 goto done;
758 }
759
760 parse_advance_chars(ctx, encoded_len);
761
762 if (parse_advance_nl(ctx) < 0) {
763 error = parse_err("trailing data at line %d", ctx->line_num);
764 goto done;
765 }
766 }
767
768 binary->type = type;
769 binary->inflatedlen = (size_t)len;
770 binary->datalen = decoded.size;
771 binary->data = git_buf_detach(&decoded);
772
773 done:
774 git_buf_free(&base85);
775 git_buf_free(&decoded);
776 return error;
777 }
778
779 static int parse_patch_binary(
780 git_patch_parsed *patch,
781 git_patch_parse_ctx *ctx)
782 {
783 int error;
784
785 if (parse_advance_expected_s(ctx, "GIT binary patch") < 0 ||
786 parse_advance_nl(ctx) < 0)
787 return parse_err("corrupt git binary header at line %d", ctx->line_num);
788
789 /* parse old->new binary diff */
790 if ((error = parse_patch_binary_side(
791 &patch->base.binary.new_file, ctx)) < 0)
792 return error;
793
794 if (parse_advance_nl(ctx) < 0)
795 return parse_err("corrupt git binary separator at line %d",
796 ctx->line_num);
797
798 /* parse new->old binary diff */
799 if ((error = parse_patch_binary_side(
800 &patch->base.binary.old_file, ctx)) < 0)
801 return error;
802
803 if (parse_advance_nl(ctx) < 0)
804 return parse_err("corrupt git binary patch separator at line %d",
805 ctx->line_num);
806
807 patch->base.delta->flags |= GIT_DIFF_FLAG_BINARY;
808 return 0;
809 }
810
811 static int parse_patch_hunks(
812 git_patch_parsed *patch,
813 git_patch_parse_ctx *ctx)
814 {
815 git_patch_hunk *hunk;
816 int error = 0;
817
818 while (parse_ctx_contains_s(ctx, "@@ -")) {
819 hunk = git_array_alloc(patch->base.hunks);
820 GITERR_CHECK_ALLOC(hunk);
821
822 memset(hunk, 0, sizeof(git_patch_hunk));
823
824 hunk->line_start = git_array_size(patch->base.lines);
825 hunk->line_count = 0;
826
827 if ((error = parse_hunk_header(hunk, ctx)) < 0 ||
828 (error = parse_hunk_body(patch, hunk, ctx)) < 0)
829 goto done;
830 }
831
832 patch->base.delta->flags |= GIT_DIFF_FLAG_NOT_BINARY;
833
834 done:
835 return error;
836 }
837
838 static int parse_patch_body(
839 git_patch_parsed *patch, git_patch_parse_ctx *ctx)
840 {
841 if (parse_ctx_contains_s(ctx, "GIT binary patch"))
842 return parse_patch_binary(patch, ctx);
843 else
844 return parse_patch_hunks(patch, ctx);
845 }
846
847 int check_header_names(
848 const char *one,
849 const char *two,
850 const char *old_or_new,
851 bool two_null)
852 {
853 if (!one || !two)
854 return 0;
855
856 if (two_null && strcmp(two, "/dev/null") != 0)
857 return parse_err("expected %s path of '/dev/null'", old_or_new);
858
859 else if (!two_null && strcmp(one, two) != 0)
860 return parse_err("mismatched %s path names", old_or_new);
861
862 return 0;
863 }
864
865 static int check_prefix(
866 char **out,
867 size_t *out_len,
868 git_patch_parsed *patch,
869 const char *path_start)
870 {
871 const char *path = path_start;
872 size_t prefix_len = patch->ctx->opts.prefix_len;
873 size_t remain_len = prefix_len;
874
875 *out = NULL;
876 *out_len = 0;
877
878 if (prefix_len == 0)
879 goto done;
880
881 /* leading slashes do not count as part of the prefix in git apply */
882 while (*path == '/')
883 path++;
884
885 while (*path && remain_len) {
886 if (*path == '/')
887 remain_len--;
888
889 path++;
890 }
891
892 if (remain_len || !*path)
893 return parse_err(
894 "header filename does not contain %d path components",
895 prefix_len);
896
897 done:
898 *out_len = (path - path_start);
899 *out = git__strndup(path_start, *out_len);
900
901 return (*out == NULL) ? -1 : 0;
902 }
903
904 static int check_filenames(git_patch_parsed *patch)
905 {
906 const char *prefixed_new, *prefixed_old;
907 size_t old_prefixlen = 0, new_prefixlen = 0;
908 bool added = (patch->base.delta->status == GIT_DELTA_ADDED);
909 bool deleted = (patch->base.delta->status == GIT_DELTA_DELETED);
910
911 if (patch->old_path && !patch->new_path)
912 return parse_err("missing new path");
913
914 if (!patch->old_path && patch->new_path)
915 return parse_err("missing old path");
916
917 /* Ensure (non-renamed) paths match */
918 if (check_header_names(
919 patch->header_old_path, patch->old_path, "old", added) < 0 ||
920 check_header_names(
921 patch->header_new_path, patch->new_path, "new", deleted) < 0)
922 return -1;
923
924 prefixed_old = (!added && patch->old_path) ? patch->old_path :
925 patch->header_old_path;
926 prefixed_new = (!deleted && patch->new_path) ? patch->new_path :
927 patch->header_new_path;
928
929 if (check_prefix(
930 &patch->old_prefix, &old_prefixlen, patch, prefixed_old) < 0 ||
931 check_prefix(
932 &patch->new_prefix, &new_prefixlen, patch, prefixed_new) < 0)
933 return -1;
934
935 /* Prefer the rename filenames as they are unambiguous and unprefixed */
936 if (patch->rename_old_path)
937 patch->base.delta->old_file.path = patch->rename_old_path;
938 else
939 patch->base.delta->old_file.path = prefixed_old + old_prefixlen;
940
941 if (patch->rename_new_path)
942 patch->base.delta->new_file.path = patch->rename_new_path;
943 else
944 patch->base.delta->new_file.path = prefixed_new + new_prefixlen;
945
946 if (!patch->base.delta->old_file.path &&
947 !patch->base.delta->new_file.path)
948 return parse_err("git diff header lacks old / new paths");
949
950 return 0;
951 }
952
953 static int check_patch(git_patch_parsed *patch)
954 {
955 git_diff_delta *delta = patch->base.delta;
956
957 if (check_filenames(patch) < 0)
958 return -1;
959
960 if (delta->old_file.path &&
961 delta->status != GIT_DELTA_DELETED &&
962 !delta->new_file.mode)
963 delta->new_file.mode = delta->old_file.mode;
964
965 if (delta->status == GIT_DELTA_MODIFIED &&
966 !(delta->flags & GIT_DIFF_FLAG_BINARY) &&
967 delta->new_file.mode == delta->old_file.mode &&
968 git_array_size(patch->base.hunks) == 0)
969 return parse_err("patch with no hunks");
970
971 if (delta->status == GIT_DELTA_ADDED) {
972 memset(&delta->old_file.id, 0x0, sizeof(git_oid));
973 delta->old_file.id_abbrev = 0;
974 }
975
976 if (delta->status == GIT_DELTA_DELETED) {
977 memset(&delta->new_file.id, 0x0, sizeof(git_oid));
978 delta->new_file.id_abbrev = 0;
979 }
980
981 return 0;
982 }
983
984 git_patch_parse_ctx *git_patch_parse_ctx_init(
985 const char *content,
986 size_t content_len,
987 const git_patch_options *opts)
988 {
989 git_patch_parse_ctx *ctx;
990 git_patch_options default_opts = GIT_PATCH_OPTIONS_INIT;
991
992 if ((ctx = git__calloc(1, sizeof(git_patch_parse_ctx))) == NULL)
993 return NULL;
994
995 if (content_len) {
996 if ((ctx->content = git__malloc(content_len)) == NULL)
997 return NULL;
998
999 memcpy((char *)ctx->content, content, content_len);
1000 }
1001
1002 ctx->content_len = content_len;
1003 ctx->remain = ctx->content;
1004 ctx->remain_len = ctx->content_len;
1005
1006 if (opts)
1007 memcpy(&ctx->opts, opts, sizeof(git_patch_options));
1008 else
1009 memcpy(&ctx->opts, &default_opts, sizeof(git_patch_options));
1010
1011 GIT_REFCOUNT_INC(ctx);
1012 return ctx;
1013 }
1014
1015 static void patch_parse_ctx_free(git_patch_parse_ctx *ctx)
1016 {
1017 if (!ctx)
1018 return;
1019
1020 git__free((char *)ctx->content);
1021 git__free(ctx);
1022 }
1023
1024 void git_patch_parse_ctx_free(git_patch_parse_ctx *ctx)
1025 {
1026 GIT_REFCOUNT_DEC(ctx, patch_parse_ctx_free);
1027 }
1028
1029 int git_patch_parsed_from_diff(git_patch **out, git_diff *d, size_t idx)
1030 {
1031 git_diff_parsed *diff = (git_diff_parsed *)d;
1032 git_patch *p;
1033
1034 if ((p = git_vector_get(&diff->patches, idx)) == NULL)
1035 return -1;
1036
1037 GIT_REFCOUNT_INC(p);
1038 *out = p;
1039
1040 return 0;
1041 }
1042
1043 static void patch_parsed__free(git_patch *p)
1044 {
1045 git_patch_parsed *patch = (git_patch_parsed *)p;
1046
1047 if (!patch)
1048 return;
1049
1050 git_patch_parse_ctx_free(patch->ctx);
1051
1052 git__free((char *)patch->base.binary.old_file.data);
1053 git__free((char *)patch->base.binary.new_file.data);
1054 git_array_clear(patch->base.hunks);
1055 git_array_clear(patch->base.lines);
1056 git__free(patch->base.delta);
1057
1058 git__free(patch->old_prefix);
1059 git__free(patch->new_prefix);
1060 git__free(patch->header_old_path);
1061 git__free(patch->header_new_path);
1062 git__free(patch->rename_old_path);
1063 git__free(patch->rename_new_path);
1064 git__free(patch->old_path);
1065 git__free(patch->new_path);
1066 git__free(patch);
1067 }
1068
1069 int git_patch_parse(
1070 git_patch **out,
1071 git_patch_parse_ctx *ctx)
1072 {
1073 git_patch_parsed *patch;
1074 size_t start, used;
1075 int error = 0;
1076
1077 assert(out && ctx);
1078
1079 *out = NULL;
1080
1081 patch = git__calloc(1, sizeof(git_patch_parsed));
1082 GITERR_CHECK_ALLOC(patch);
1083
1084 patch->ctx = ctx;
1085 GIT_REFCOUNT_INC(patch->ctx);
1086
1087 patch->base.free_fn = patch_parsed__free;
1088
1089 patch->base.delta = git__calloc(1, sizeof(git_diff_delta));
1090 GITERR_CHECK_ALLOC(patch->base.delta);
1091
1092 patch->base.delta->status = GIT_DELTA_MODIFIED;
1093 patch->base.delta->nfiles = 2;
1094
1095 start = ctx->remain_len;
1096
1097 if ((error = parse_patch_header(patch, ctx)) < 0 ||
1098 (error = parse_patch_body(patch, ctx)) < 0 ||
1099 (error = check_patch(patch)) < 0)
1100 goto done;
1101
1102 used = start - ctx->remain_len;
1103 ctx->remain += used;
1104
1105 patch->base.diff_opts.old_prefix = patch->old_prefix;
1106 patch->base.diff_opts.new_prefix = patch->new_prefix;
1107 patch->base.diff_opts.flags |= GIT_DIFF_SHOW_BINARY;
1108
1109 GIT_REFCOUNT_INC(patch);
1110 *out = &patch->base;
1111
1112 done:
1113 if (error < 0)
1114 patch_parsed__free(&patch->base);
1115
1116 return error;
1117 }
1118
1119 int git_patch_from_buffer(
1120 git_patch **out,
1121 const char *content,
1122 size_t content_len,
1123 const git_patch_options *opts)
1124 {
1125 git_patch_parse_ctx *ctx;
1126 int error;
1127
1128 ctx = git_patch_parse_ctx_init(content, content_len, opts);
1129 GITERR_CHECK_ALLOC(ctx);
1130
1131 error = git_patch_parse(out, ctx);
1132
1133 git_patch_parse_ctx_free(ctx);
1134 return error;
1135 }
1136