]>
Commit | Line | Data |
---|---|---|
804d5fe9 ET |
1 | #include "git2/patch.h" |
2 | #include "patch.h" | |
3 | #include "path.h" | |
4 | ||
5 | #define parse_err(...) \ | |
6 | ( giterr_set(GITERR_PATCH, __VA_ARGS__), -1 ) | |
7 | ||
b85bd8ce | 8 | /* TODO: remove this, just use git_patch */ |
804d5fe9 ET |
9 | typedef struct { |
10 | git_patch base; | |
804d5fe9 ET |
11 | } git_patch_parsed; |
12 | ||
13 | typedef struct { | |
14 | const char *content; | |
15 | size_t content_len; | |
16 | ||
17 | const char *line; | |
18 | size_t line_len; | |
19 | size_t line_num; | |
20 | ||
21 | size_t remain; | |
22 | ||
23 | /* TODO: move this into the parse struct? its lifecycle is odd... */ | |
24 | char *header_new_path; | |
25 | char *header_old_path; | |
26 | } patch_parse_ctx; | |
27 | ||
28 | ||
29 | static void parse_advance_line(patch_parse_ctx *ctx) | |
30 | { | |
31 | ctx->line += ctx->line_len; | |
32 | ctx->remain -= ctx->line_len; | |
33 | ctx->line_len = git__linenlen(ctx->line, ctx->remain); | |
34 | ctx->line_num++; | |
35 | } | |
36 | ||
37 | static void parse_advance_chars(patch_parse_ctx *ctx, size_t char_cnt) | |
38 | { | |
39 | ctx->line += char_cnt; | |
40 | ctx->remain -= char_cnt; | |
41 | ctx->line_len -= char_cnt; | |
42 | } | |
43 | ||
44 | static int parse_advance_expected( | |
45 | patch_parse_ctx *ctx, | |
46 | const char *expected, | |
47 | size_t expected_len) | |
48 | { | |
49 | if (ctx->line_len < expected_len) | |
50 | return -1; | |
51 | ||
52 | if (memcmp(ctx->line, expected, expected_len) != 0) | |
53 | return -1; | |
54 | ||
55 | parse_advance_chars(ctx, expected_len); | |
56 | return 0; | |
57 | } | |
58 | ||
59 | static int parse_advance_ws(patch_parse_ctx *ctx) | |
60 | { | |
61 | int ret = -1; | |
62 | ||
63 | while (ctx->line_len > 0 && | |
64 | ctx->line[0] != '\n' && | |
65 | git__isspace(ctx->line[0])) { | |
66 | ctx->line++; | |
67 | ctx->line_len--; | |
68 | ctx->remain--; | |
69 | ret = 0; | |
70 | } | |
71 | ||
72 | return ret; | |
73 | } | |
74 | ||
75 | static int parse_advance_nl(patch_parse_ctx *ctx) | |
76 | { | |
77 | if (ctx->line_len != 1 || ctx->line[0] != '\n') | |
78 | return -1; | |
79 | ||
80 | parse_advance_line(ctx); | |
81 | return 0; | |
82 | } | |
83 | ||
84 | static int header_path_len(patch_parse_ctx *ctx) | |
85 | { | |
86 | bool inquote = 0; | |
87 | bool quoted = (ctx->line_len > 0 && ctx->line[0] == '"'); | |
88 | size_t len; | |
89 | ||
90 | for (len = quoted; len < ctx->line_len; len++) { | |
91 | if (!quoted && git__isspace(ctx->line[len])) | |
92 | break; | |
93 | else if (quoted && !inquote && ctx->line[len] == '"') { | |
94 | len++; | |
95 | break; | |
96 | } | |
97 | ||
98 | inquote = (!inquote && ctx->line[len] == '\\'); | |
99 | } | |
100 | ||
101 | return len; | |
102 | } | |
103 | ||
104 | static int parse_header_path_buf(git_buf *path, patch_parse_ctx *ctx) | |
105 | { | |
106 | int path_len, error = 0; | |
107 | ||
108 | path_len = header_path_len(ctx); | |
109 | ||
110 | if ((error = git_buf_put(path, ctx->line, path_len)) < 0) | |
111 | goto done; | |
112 | ||
113 | parse_advance_chars(ctx, path_len); | |
114 | ||
115 | git_buf_rtrim(path); | |
116 | ||
117 | if (path->size > 0 && path->ptr[0] == '"') | |
118 | error = git_buf_unquote(path); | |
119 | ||
120 | if (error < 0) | |
121 | goto done; | |
122 | ||
123 | git_path_squash_slashes(path); | |
124 | ||
125 | done: | |
126 | return error; | |
127 | } | |
128 | ||
129 | static int parse_header_path(char **out, patch_parse_ctx *ctx) | |
130 | { | |
131 | git_buf path = GIT_BUF_INIT; | |
132 | int error = parse_header_path_buf(&path, ctx); | |
133 | ||
134 | *out = git_buf_detach(&path); | |
135 | ||
136 | return error; | |
137 | } | |
138 | ||
139 | static int parse_header_git_oldpath( | |
140 | git_patch_parsed *patch, patch_parse_ctx *ctx) | |
141 | { | |
b85bd8ce | 142 | return parse_header_path((char **)&patch->base.delta->old_file.path, ctx); |
804d5fe9 ET |
143 | } |
144 | ||
145 | static int parse_header_git_newpath( | |
146 | git_patch_parsed *patch, patch_parse_ctx *ctx) | |
147 | { | |
b85bd8ce | 148 | return parse_header_path((char **)&patch->base.delta->new_file.path, ctx); |
804d5fe9 ET |
149 | } |
150 | ||
151 | static int parse_header_mode(uint16_t *mode, patch_parse_ctx *ctx) | |
152 | { | |
153 | const char *end; | |
154 | int32_t m; | |
155 | int ret; | |
156 | ||
157 | if (ctx->line_len < 1 || !git__isdigit(ctx->line[0])) | |
158 | return parse_err("invalid file mode at line %d", ctx->line_num); | |
159 | ||
160 | if ((ret = git__strntol32(&m, ctx->line, ctx->line_len, &end, 8)) < 0) | |
161 | return ret; | |
162 | ||
163 | if (m > UINT16_MAX) | |
164 | return -1; | |
165 | ||
166 | *mode = (uint16_t)m; | |
167 | ||
168 | parse_advance_chars(ctx, (end - ctx->line)); | |
169 | ||
170 | return ret; | |
171 | } | |
172 | ||
173 | static int parse_header_oid( | |
174 | git_oid *oid, | |
175 | size_t *oid_len, | |
176 | patch_parse_ctx *ctx) | |
177 | { | |
178 | size_t len; | |
179 | ||
180 | for (len = 0; len < ctx->line_len && len < GIT_OID_HEXSZ; len++) { | |
181 | if (!git__isxdigit(ctx->line[len])) | |
182 | break; | |
183 | } | |
184 | ||
185 | if (len < GIT_OID_MINPREFIXLEN || | |
186 | git_oid_fromstrn(oid, ctx->line, len) < 0) | |
187 | return parse_err("invalid hex formatted object id at line %d", | |
188 | ctx->line_num); | |
189 | ||
190 | parse_advance_chars(ctx, len); | |
191 | ||
192 | *oid_len = len; | |
193 | ||
194 | return 0; | |
195 | } | |
196 | ||
197 | static int parse_header_git_index( | |
198 | git_patch_parsed *patch, patch_parse_ctx *ctx) | |
199 | { | |
d68cb736 ET |
200 | if (parse_header_oid(&patch->base.delta->old_file.id, |
201 | &patch->base.delta->old_file.id_abbrev, ctx) < 0 || | |
804d5fe9 | 202 | parse_advance_expected(ctx, "..", 2) < 0 || |
d68cb736 ET |
203 | parse_header_oid(&patch->base.delta->new_file.id, |
204 | &patch->base.delta->new_file.id_abbrev, ctx) < 0) | |
804d5fe9 ET |
205 | return -1; |
206 | ||
207 | if (ctx->line_len > 0 && ctx->line[0] == ' ') { | |
208 | uint16_t mode; | |
209 | ||
210 | parse_advance_chars(ctx, 1); | |
211 | ||
212 | if (parse_header_mode(&mode, ctx) < 0) | |
213 | return -1; | |
214 | ||
215 | if (!patch->base.delta->new_file.mode) | |
216 | patch->base.delta->new_file.mode = mode; | |
217 | ||
218 | if (!patch->base.delta->old_file.mode) | |
219 | patch->base.delta->old_file.mode = mode; | |
220 | } | |
221 | ||
222 | return 0; | |
223 | } | |
224 | ||
225 | static int parse_header_git_oldmode( | |
226 | git_patch_parsed *patch, patch_parse_ctx *ctx) | |
227 | { | |
b85bd8ce | 228 | return parse_header_mode(&patch->base.delta->old_file.mode, ctx); |
804d5fe9 ET |
229 | } |
230 | ||
231 | static int parse_header_git_newmode( | |
232 | git_patch_parsed *patch, patch_parse_ctx *ctx) | |
233 | { | |
b85bd8ce | 234 | return parse_header_mode(&patch->base.delta->new_file.mode, ctx); |
804d5fe9 ET |
235 | } |
236 | ||
237 | static int parse_header_git_deletedfilemode( | |
238 | git_patch_parsed *patch, | |
239 | patch_parse_ctx *ctx) | |
240 | { | |
b85bd8ce | 241 | git__free((char *)patch->base.delta->old_file.path); |
804d5fe9 | 242 | |
b85bd8ce | 243 | patch->base.delta->old_file.path = NULL; |
804d5fe9 ET |
244 | patch->base.delta->status = GIT_DELTA_DELETED; |
245 | ||
b85bd8ce | 246 | return parse_header_mode(&patch->base.delta->old_file.mode, ctx); |
804d5fe9 ET |
247 | } |
248 | ||
249 | static int parse_header_git_newfilemode( | |
250 | git_patch_parsed *patch, | |
251 | patch_parse_ctx *ctx) | |
252 | { | |
b85bd8ce | 253 | git__free((char *)patch->base.delta->new_file.path); |
804d5fe9 | 254 | |
b85bd8ce | 255 | patch->base.delta->new_file.path = NULL; |
804d5fe9 ET |
256 | patch->base.delta->status = GIT_DELTA_ADDED; |
257 | ||
b85bd8ce | 258 | return parse_header_mode(&patch->base.delta->new_file.mode, ctx); |
804d5fe9 ET |
259 | } |
260 | ||
261 | static int parse_header_rename( | |
262 | char **out, | |
263 | char **header_path, | |
264 | patch_parse_ctx *ctx) | |
265 | { | |
266 | git_buf path = GIT_BUF_INIT; | |
267 | size_t header_path_len, prefix_len; | |
268 | ||
269 | if (*header_path == NULL) | |
270 | return parse_err("rename without proper git diff header at line %d", | |
271 | ctx->line_num); | |
272 | ||
273 | header_path_len = strlen(*header_path); | |
274 | ||
275 | if (parse_header_path_buf(&path, ctx) < 0) | |
276 | return -1; | |
277 | ||
278 | if (header_path_len < git_buf_len(&path)) | |
279 | return parse_err("rename path is invalid at line %d", ctx->line_num); | |
280 | ||
281 | /* This sanity check exists because git core uses the data in the | |
282 | * "rename from" / "rename to" lines, but it's formatted differently | |
283 | * than the other paths and lacks the normal prefix. This irregularity | |
284 | * causes us to ignore these paths (we always store the prefixed paths) | |
285 | * but instead validate that they match the suffix of the paths we parsed | |
286 | * since we would behave differently from git core if they ever differed. | |
287 | * Instead, we raise an error, rather than parsing differently. | |
288 | */ | |
289 | prefix_len = header_path_len - path.size; | |
290 | ||
291 | if (strncmp(*header_path + prefix_len, path.ptr, path.size) != 0 || | |
292 | (prefix_len > 0 && (*header_path)[prefix_len - 1] != '/')) | |
293 | return parse_err("rename path does not match header at line %d", | |
294 | ctx->line_num); | |
295 | ||
296 | *out = *header_path; | |
297 | *header_path = NULL; | |
298 | ||
299 | git_buf_free(&path); | |
300 | ||
301 | return 0; | |
302 | } | |
303 | ||
304 | static int parse_header_renamefrom( | |
305 | git_patch_parsed *patch, patch_parse_ctx *ctx) | |
306 | { | |
307 | patch->base.delta->status |= GIT_DELTA_RENAMED; | |
308 | ||
309 | return parse_header_rename( | |
b85bd8ce | 310 | (char **)&patch->base.delta->old_file.path, |
804d5fe9 ET |
311 | &ctx->header_old_path, |
312 | ctx); | |
313 | } | |
314 | ||
315 | static int parse_header_renameto( | |
316 | git_patch_parsed *patch, patch_parse_ctx *ctx) | |
317 | { | |
318 | patch->base.delta->status |= GIT_DELTA_RENAMED; | |
319 | ||
320 | return parse_header_rename( | |
b85bd8ce | 321 | (char **)&patch->base.delta->new_file.path, |
804d5fe9 ET |
322 | &ctx->header_new_path, |
323 | ctx); | |
324 | } | |
325 | ||
326 | static int parse_header_percent(uint16_t *out, patch_parse_ctx *ctx) | |
327 | { | |
328 | int32_t val; | |
329 | const char *end; | |
330 | ||
331 | if (ctx->line_len < 1 || !git__isdigit(ctx->line[0]) || | |
332 | git__strntol32(&val, ctx->line, ctx->line_len, &end, 10) < 0) | |
333 | return -1; | |
334 | ||
335 | parse_advance_chars(ctx, (end - ctx->line)); | |
336 | ||
337 | if (parse_advance_expected(ctx, "%", 1) < 0) | |
338 | return -1; | |
339 | ||
340 | if (val > 100) | |
341 | return -1; | |
342 | ||
343 | *out = val; | |
344 | return 0; | |
345 | } | |
346 | ||
347 | static int parse_header_similarity( | |
348 | git_patch_parsed *patch, patch_parse_ctx *ctx) | |
349 | { | |
350 | if (parse_header_percent(&patch->base.delta->similarity, ctx) < 0) | |
351 | return parse_err("invalid similarity percentage at line %d", | |
352 | ctx->line_num); | |
353 | ||
354 | return 0; | |
355 | } | |
356 | ||
357 | static int parse_header_dissimilarity( | |
358 | git_patch_parsed *patch, patch_parse_ctx *ctx) | |
359 | { | |
360 | uint16_t dissimilarity; | |
361 | ||
362 | if (parse_header_percent(&dissimilarity, ctx) < 0) | |
363 | return parse_err("invalid similarity percentage at line %d", | |
364 | ctx->line_num); | |
365 | ||
366 | patch->base.delta->similarity = 100 - dissimilarity; | |
367 | ||
368 | return 0; | |
369 | } | |
370 | ||
371 | typedef struct { | |
372 | const char *str; | |
373 | int(*fn)(git_patch_parsed *, patch_parse_ctx *); | |
374 | } header_git_op; | |
375 | ||
376 | static const header_git_op header_git_ops[] = { | |
377 | { "@@ -", NULL }, | |
378 | { "GIT binary patch", NULL }, | |
379 | { "--- ", parse_header_git_oldpath }, | |
380 | { "+++ ", parse_header_git_newpath }, | |
381 | { "index ", parse_header_git_index }, | |
382 | { "old mode ", parse_header_git_oldmode }, | |
383 | { "new mode ", parse_header_git_newmode }, | |
384 | { "deleted file mode ", parse_header_git_deletedfilemode }, | |
385 | { "new file mode ", parse_header_git_newfilemode }, | |
386 | { "rename from ", parse_header_renamefrom }, | |
387 | { "rename to ", parse_header_renameto }, | |
388 | { "rename old ", parse_header_renamefrom }, | |
389 | { "rename new ", parse_header_renameto }, | |
390 | { "similarity index ", parse_header_similarity }, | |
391 | { "dissimilarity index ", parse_header_dissimilarity }, | |
392 | }; | |
393 | ||
394 | static int parse_header_git( | |
395 | git_patch_parsed *patch, | |
396 | patch_parse_ctx *ctx) | |
397 | { | |
398 | size_t i; | |
399 | int error = 0; | |
400 | ||
401 | /* Parse the diff --git line */ | |
402 | if (parse_advance_expected(ctx, "diff --git ", 11) < 0) | |
403 | return parse_err("corrupt git diff header at line %d", ctx->line_num); | |
404 | ||
405 | if (parse_header_path(&ctx->header_old_path, ctx) < 0) | |
406 | return parse_err("corrupt old path in git diff header at line %d", | |
407 | ctx->line_num); | |
408 | ||
409 | if (parse_advance_ws(ctx) < 0 || | |
410 | parse_header_path(&ctx->header_new_path, ctx) < 0) | |
411 | return parse_err("corrupt new path in git diff header at line %d", | |
412 | ctx->line_num); | |
413 | ||
414 | /* Parse remaining header lines */ | |
415 | for (parse_advance_line(ctx); ctx->remain > 0; parse_advance_line(ctx)) { | |
416 | if (ctx->line_len == 0 || ctx->line[ctx->line_len - 1] != '\n') | |
417 | break; | |
418 | ||
419 | for (i = 0; i < ARRAY_SIZE(header_git_ops); i++) { | |
420 | const header_git_op *op = &header_git_ops[i]; | |
421 | size_t op_len = strlen(op->str); | |
422 | ||
423 | if (memcmp(ctx->line, op->str, min(op_len, ctx->line_len)) != 0) | |
424 | continue; | |
425 | ||
426 | /* Do not advance if this is the patch separator */ | |
427 | if (op->fn == NULL) | |
428 | goto done; | |
429 | ||
430 | parse_advance_chars(ctx, op_len); | |
431 | ||
432 | if ((error = op->fn(patch, ctx)) < 0) | |
433 | goto done; | |
434 | ||
435 | parse_advance_ws(ctx); | |
436 | parse_advance_expected(ctx, "\n", 1); | |
437 | ||
438 | if (ctx->line_len > 0) { | |
439 | error = parse_err("trailing data at line %d", ctx->line_num); | |
440 | goto done; | |
441 | } | |
442 | ||
443 | break; | |
444 | } | |
445 | } | |
446 | ||
447 | done: | |
448 | return error; | |
449 | } | |
450 | ||
451 | static int parse_number(git_off_t *out, patch_parse_ctx *ctx) | |
452 | { | |
453 | const char *end; | |
454 | int64_t num; | |
455 | ||
456 | if (!git__isdigit(ctx->line[0])) | |
457 | return -1; | |
458 | ||
459 | if (git__strntol64(&num, ctx->line, ctx->line_len, &end, 10) < 0) | |
460 | return -1; | |
461 | ||
462 | if (num < 0) | |
463 | return -1; | |
464 | ||
465 | *out = num; | |
466 | parse_advance_chars(ctx, (end - ctx->line)); | |
467 | ||
468 | return 0; | |
469 | } | |
470 | ||
471 | static int parse_int(int *out, patch_parse_ctx *ctx) | |
472 | { | |
473 | git_off_t num; | |
474 | ||
475 | if (parse_number(&num, ctx) < 0 || !git__is_int(num)) | |
476 | return -1; | |
477 | ||
478 | *out = (int)num; | |
479 | return 0; | |
480 | } | |
481 | ||
482 | static int parse_hunk_header( | |
483 | git_patch_hunk *hunk, | |
484 | patch_parse_ctx *ctx) | |
485 | { | |
486 | const char *header_start = ctx->line; | |
487 | ||
488 | hunk->hunk.old_lines = 1; | |
489 | hunk->hunk.new_lines = 1; | |
490 | ||
491 | if (parse_advance_expected(ctx, "@@ -", 4) < 0 || | |
492 | parse_int(&hunk->hunk.old_start, ctx) < 0) | |
493 | goto fail; | |
494 | ||
495 | if (ctx->line_len > 0 && ctx->line[0] == ',') { | |
496 | if (parse_advance_expected(ctx, ",", 1) < 0 || | |
497 | parse_int(&hunk->hunk.old_lines, ctx) < 0) | |
498 | goto fail; | |
499 | } | |
500 | ||
501 | if (parse_advance_expected(ctx, " +", 2) < 0 || | |
502 | parse_int(&hunk->hunk.new_start, ctx) < 0) | |
503 | goto fail; | |
504 | ||
505 | if (ctx->line_len > 0 && ctx->line[0] == ',') { | |
506 | if (parse_advance_expected(ctx, ",", 1) < 0 || | |
507 | parse_int(&hunk->hunk.new_lines, ctx) < 0) | |
508 | goto fail; | |
509 | } | |
510 | ||
511 | if (parse_advance_expected(ctx, " @@", 3) < 0) | |
512 | goto fail; | |
513 | ||
514 | parse_advance_line(ctx); | |
515 | ||
516 | if (!hunk->hunk.old_lines && !hunk->hunk.new_lines) | |
517 | goto fail; | |
518 | ||
519 | hunk->hunk.header_len = ctx->line - header_start; | |
520 | if (hunk->hunk.header_len > (GIT_DIFF_HUNK_HEADER_SIZE - 1)) | |
521 | return parse_err("oversized patch hunk header at line %d", | |
522 | ctx->line_num); | |
523 | ||
524 | memcpy(hunk->hunk.header, header_start, hunk->hunk.header_len); | |
525 | hunk->hunk.header[hunk->hunk.header_len] = '\0'; | |
526 | ||
527 | return 0; | |
528 | ||
529 | fail: | |
530 | giterr_set(GITERR_PATCH, "invalid patch hunk header at line %d", | |
531 | ctx->line_num); | |
532 | return -1; | |
533 | } | |
534 | ||
535 | static int parse_hunk_body( | |
536 | git_patch_parsed *patch, | |
537 | git_patch_hunk *hunk, | |
538 | patch_parse_ctx *ctx) | |
539 | { | |
540 | git_diff_line *line; | |
541 | int error = 0; | |
542 | ||
543 | int oldlines = hunk->hunk.old_lines; | |
544 | int newlines = hunk->hunk.new_lines; | |
545 | ||
546 | for (; | |
547 | ctx->remain > 4 && (oldlines || newlines) && | |
548 | memcmp(ctx->line, "@@ -", 4) != 0; | |
549 | parse_advance_line(ctx)) { | |
550 | ||
551 | int origin; | |
552 | int prefix = 1; | |
553 | ||
554 | if (ctx->line_len == 0 || ctx->line[ctx->line_len - 1] != '\n') { | |
555 | error = parse_err("invalid patch instruction at line %d", | |
556 | ctx->line_num); | |
557 | goto done; | |
558 | } | |
559 | ||
560 | switch (ctx->line[0]) { | |
561 | case '\n': | |
562 | prefix = 0; | |
563 | ||
564 | case ' ': | |
565 | origin = GIT_DIFF_LINE_CONTEXT; | |
566 | oldlines--; | |
567 | newlines--; | |
568 | break; | |
569 | ||
570 | case '-': | |
571 | origin = GIT_DIFF_LINE_DELETION; | |
572 | oldlines--; | |
573 | break; | |
574 | ||
575 | case '+': | |
576 | origin = GIT_DIFF_LINE_ADDITION; | |
577 | newlines--; | |
578 | break; | |
579 | ||
580 | default: | |
581 | error = parse_err("invalid patch hunk at line %d", ctx->line_num); | |
582 | goto done; | |
583 | } | |
584 | ||
585 | line = git_array_alloc(patch->base.lines); | |
586 | GITERR_CHECK_ALLOC(line); | |
587 | ||
588 | memset(line, 0x0, sizeof(git_diff_line)); | |
589 | ||
590 | line->content = ctx->line + prefix; | |
591 | line->content_len = ctx->line_len - prefix; | |
592 | line->content_offset = ctx->content_len - ctx->remain; | |
593 | line->origin = origin; | |
594 | ||
595 | hunk->line_count++; | |
596 | } | |
597 | ||
598 | if (oldlines || newlines) { | |
599 | error = parse_err( | |
600 | "invalid patch hunk, expected %d old lines and %d new lines", | |
601 | hunk->hunk.old_lines, hunk->hunk.new_lines); | |
602 | goto done; | |
603 | } | |
604 | ||
605 | /* Handle "\ No newline at end of file". Only expect the leading | |
606 | * backslash, though, because the rest of the string could be | |
607 | * localized. Because `diff` optimizes for the case where you | |
608 | * want to apply the patch by hand. | |
609 | */ | |
610 | if (ctx->line_len >= 2 && memcmp(ctx->line, "\\ ", 2) == 0 && | |
611 | git_array_size(patch->base.lines) > 0) { | |
612 | ||
613 | line = git_array_get(patch->base.lines, git_array_size(patch->base.lines) - 1); | |
614 | ||
615 | if (line->content_len < 1) { | |
616 | error = parse_err("cannot trim trailing newline of empty line"); | |
617 | goto done; | |
618 | } | |
619 | ||
620 | line->content_len--; | |
621 | ||
622 | parse_advance_line(ctx); | |
623 | } | |
624 | ||
625 | done: | |
626 | return error; | |
627 | } | |
628 | ||
629 | static int parsed_patch_header( | |
630 | git_patch_parsed *patch, | |
631 | patch_parse_ctx *ctx) | |
632 | { | |
633 | int error = 0; | |
634 | ||
635 | for (ctx->line = ctx->content; ctx->remain > 0; parse_advance_line(ctx)) { | |
636 | /* This line is too short to be a patch header. */ | |
637 | if (ctx->line_len < 6) | |
638 | continue; | |
639 | ||
640 | /* This might be a hunk header without a patch header, provide a | |
641 | * sensible error message. */ | |
642 | if (memcmp(ctx->line, "@@ -", 4) == 0) { | |
643 | size_t line_num = ctx->line_num; | |
644 | git_patch_hunk hunk; | |
645 | ||
646 | /* If this cannot be parsed as a hunk header, it's just leading | |
647 | * noise, continue. | |
648 | */ | |
649 | if (parse_hunk_header(&hunk, ctx) < 0) { | |
650 | giterr_clear(); | |
651 | continue; | |
652 | } | |
653 | ||
654 | error = parse_err("invalid hunk header outside patch at line %d", | |
655 | line_num); | |
656 | goto done; | |
657 | } | |
658 | ||
659 | /* This buffer is too short to contain a patch. */ | |
660 | if (ctx->remain < ctx->line_len + 6) | |
661 | break; | |
662 | ||
663 | /* A proper git patch */ | |
664 | if (ctx->line_len >= 11 && memcmp(ctx->line, "diff --git ", 11) == 0) { | |
665 | if ((error = parse_header_git(patch, ctx)) < 0) | |
666 | goto done; | |
667 | ||
668 | /* For modechange only patches, it does not include filenames; | |
669 | * instead we need to use the paths in the diff --git header. | |
670 | */ | |
b85bd8ce ET |
671 | if (!patch->base.delta->old_file.path && |
672 | !patch->base.delta->new_file.path) { | |
673 | ||
804d5fe9 ET |
674 | if (!ctx->header_old_path || !ctx->header_new_path) { |
675 | error = parse_err("git diff header lacks old / new paths"); | |
676 | goto done; | |
677 | } | |
678 | ||
b85bd8ce | 679 | patch->base.delta->old_file.path = ctx->header_old_path; |
804d5fe9 ET |
680 | ctx->header_old_path = NULL; |
681 | ||
b85bd8ce | 682 | patch->base.delta->new_file.path = ctx->header_new_path; |
804d5fe9 ET |
683 | ctx->header_new_path = NULL; |
684 | } | |
685 | ||
686 | goto done; | |
687 | } | |
688 | ||
689 | error = 0; | |
690 | continue; | |
691 | } | |
692 | ||
693 | error = parse_err("no header in patch file"); | |
694 | ||
695 | done: | |
696 | return error; | |
697 | } | |
698 | ||
699 | static int parsed_patch_binary_side( | |
700 | git_diff_binary_file *binary, | |
701 | patch_parse_ctx *ctx) | |
702 | { | |
703 | git_diff_binary_t type = GIT_DIFF_BINARY_NONE; | |
704 | git_buf base85 = GIT_BUF_INIT, decoded = GIT_BUF_INIT; | |
705 | git_off_t len; | |
706 | int error = 0; | |
707 | ||
708 | if (ctx->line_len >= 8 && memcmp(ctx->line, "literal ", 8) == 0) { | |
709 | type = GIT_DIFF_BINARY_LITERAL; | |
710 | parse_advance_chars(ctx, 8); | |
711 | } | |
712 | else if (ctx->line_len >= 6 && memcmp(ctx->line, "delta ", 6) == 0) { | |
713 | type = GIT_DIFF_BINARY_DELTA; | |
714 | parse_advance_chars(ctx, 6); | |
715 | } | |
716 | else { | |
717 | error = parse_err("unknown binary delta type at line %d", ctx->line_num); | |
718 | goto done; | |
719 | } | |
720 | ||
721 | if (parse_number(&len, ctx) < 0 || parse_advance_nl(ctx) < 0 || len < 0) { | |
722 | error = parse_err("invalid binary size at line %d", ctx->line_num); | |
723 | goto done; | |
724 | } | |
725 | ||
726 | while (ctx->line_len) { | |
727 | char c = ctx->line[0]; | |
728 | size_t encoded_len, decoded_len = 0, decoded_orig = decoded.size; | |
729 | ||
730 | if (c == '\n') | |
731 | break; | |
732 | else if (c >= 'A' && c <= 'Z') | |
733 | decoded_len = c - 'A' + 1; | |
734 | else if (c >= 'a' && c <= 'z') | |
735 | decoded_len = c - 'a' + (('z' - 'a') + 1) + 1; | |
736 | ||
737 | if (!decoded_len) { | |
738 | error = parse_err("invalid binary length at line %d", ctx->line_num); | |
739 | goto done; | |
740 | } | |
741 | ||
742 | parse_advance_chars(ctx, 1); | |
743 | ||
744 | encoded_len = ((decoded_len / 4) + !!(decoded_len % 4)) * 5; | |
745 | ||
746 | if (encoded_len > ctx->line_len - 1) { | |
747 | error = parse_err("truncated binary data at line %d", ctx->line_num); | |
748 | goto done; | |
749 | } | |
750 | ||
751 | if ((error = git_buf_decode_base85( | |
752 | &decoded, ctx->line, encoded_len, decoded_len)) < 0) | |
753 | goto done; | |
754 | ||
755 | if (decoded.size - decoded_orig != decoded_len) { | |
756 | error = parse_err("truncated binary data at line %d", ctx->line_num); | |
757 | goto done; | |
758 | } | |
759 | ||
760 | parse_advance_chars(ctx, encoded_len); | |
761 | ||
762 | if (parse_advance_nl(ctx) < 0) { | |
763 | error = parse_err("trailing data at line %d", ctx->line_num); | |
764 | goto done; | |
765 | } | |
766 | } | |
767 | ||
768 | binary->type = type; | |
769 | binary->inflatedlen = (size_t)len; | |
770 | binary->datalen = decoded.size; | |
771 | binary->data = git_buf_detach(&decoded); | |
772 | ||
773 | done: | |
774 | git_buf_free(&base85); | |
775 | git_buf_free(&decoded); | |
776 | return error; | |
777 | } | |
778 | ||
779 | static int parsed_patch_binary( | |
780 | git_patch_parsed *patch, | |
781 | patch_parse_ctx *ctx) | |
782 | { | |
783 | int error; | |
784 | ||
785 | if (parse_advance_expected(ctx, "GIT binary patch", 16) < 0 || | |
786 | parse_advance_nl(ctx) < 0) | |
787 | return parse_err("corrupt git binary header at line %d", ctx->line_num); | |
788 | ||
789 | /* parse old->new binary diff */ | |
790 | if ((error = parsed_patch_binary_side( | |
791 | &patch->base.binary.new_file, ctx)) < 0) | |
792 | return error; | |
793 | ||
794 | if (parse_advance_nl(ctx) < 0) | |
795 | return parse_err("corrupt git binary separator at line %d", | |
796 | ctx->line_num); | |
797 | ||
798 | /* parse new->old binary diff */ | |
799 | if ((error = parsed_patch_binary_side( | |
800 | &patch->base.binary.old_file, ctx)) < 0) | |
801 | return error; | |
802 | ||
803 | patch->base.delta->flags |= GIT_DIFF_FLAG_BINARY; | |
804 | return 0; | |
805 | } | |
806 | ||
807 | static int parsed_patch_hunks( | |
808 | git_patch_parsed *patch, | |
809 | patch_parse_ctx *ctx) | |
810 | { | |
811 | git_patch_hunk *hunk; | |
812 | int error = 0; | |
813 | ||
814 | for (; ctx->line_len > 4 && memcmp(ctx->line, "@@ -", 4) == 0; ) { | |
815 | ||
816 | hunk = git_array_alloc(patch->base.hunks); | |
817 | GITERR_CHECK_ALLOC(hunk); | |
818 | ||
819 | memset(hunk, 0, sizeof(git_patch_hunk)); | |
820 | ||
821 | hunk->line_start = git_array_size(patch->base.lines); | |
822 | hunk->line_count = 0; | |
823 | ||
824 | if ((error = parse_hunk_header(hunk, ctx)) < 0 || | |
825 | (error = parse_hunk_body(patch, hunk, ctx)) < 0) | |
826 | goto done; | |
827 | } | |
828 | ||
829 | done: | |
830 | return error; | |
831 | } | |
832 | ||
833 | static int parsed_patch_body( | |
834 | git_patch_parsed *patch, patch_parse_ctx *ctx) | |
835 | { | |
836 | if (ctx->line_len >= 16 && memcmp(ctx->line, "GIT binary patch", 16) == 0) | |
837 | return parsed_patch_binary(patch, ctx); | |
838 | ||
839 | else if (ctx->line_len >= 4 && memcmp(ctx->line, "@@ -", 4) == 0) | |
840 | return parsed_patch_hunks(patch, ctx); | |
841 | ||
842 | return 0; | |
843 | } | |
844 | ||
845 | static int check_patch(git_patch_parsed *patch) | |
846 | { | |
b85bd8ce ET |
847 | if (!patch->base.delta->old_file.path && |
848 | patch->base.delta->status != GIT_DELTA_ADDED) | |
804d5fe9 ET |
849 | return parse_err("missing old file path"); |
850 | ||
b85bd8ce ET |
851 | if (!patch->base.delta->new_file.path && |
852 | patch->base.delta->status != GIT_DELTA_DELETED) | |
804d5fe9 ET |
853 | return parse_err("missing new file path"); |
854 | ||
b85bd8ce ET |
855 | if (patch->base.delta->old_file.path && patch->base.delta->new_file.path) { |
856 | if (!patch->base.delta->new_file.mode) | |
857 | patch->base.delta->new_file.mode = patch->base.delta->old_file.mode; | |
804d5fe9 ET |
858 | } |
859 | ||
860 | if (patch->base.delta->status == GIT_DELTA_MODIFIED && | |
861 | !(patch->base.delta->flags & GIT_DIFF_FLAG_BINARY) && | |
b85bd8ce | 862 | patch->base.delta->new_file.mode == patch->base.delta->old_file.mode && |
804d5fe9 ET |
863 | git_array_size(patch->base.hunks) == 0) |
864 | return parse_err("patch with no hunks"); | |
865 | ||
866 | return 0; | |
867 | } | |
868 | ||
804d5fe9 ET |
869 | int git_patch_from_patchfile( |
870 | git_patch **out, | |
871 | const char *content, | |
872 | size_t content_len) | |
873 | { | |
874 | patch_parse_ctx ctx = { 0 }; | |
875 | git_patch_parsed *patch; | |
876 | int error = 0; | |
877 | ||
878 | *out = NULL; | |
879 | ||
880 | patch = git__calloc(1, sizeof(git_patch_parsed)); | |
881 | GITERR_CHECK_ALLOC(patch); | |
882 | ||
e7ec327d ET |
883 | /* TODO: allow callers to specify prefix depth (eg, `-p2`) */ |
884 | patch->base.diff_opts.new_prefix = ""; | |
885 | patch->base.diff_opts.old_prefix = ""; | |
886 | ||
804d5fe9 ET |
887 | patch->base.delta = git__calloc(1, sizeof(git_diff_delta)); |
888 | patch->base.delta->status = GIT_DELTA_MODIFIED; | |
889 | ||
890 | ctx.content = content; | |
891 | ctx.content_len = content_len; | |
892 | ctx.remain = content_len; | |
893 | ||
894 | if ((error = parsed_patch_header(patch, &ctx)) < 0 || | |
895 | (error = parsed_patch_body(patch, &ctx)) < 0 || | |
896 | (error = check_patch(patch)) < 0) | |
897 | goto done; | |
898 | ||
899 | GIT_REFCOUNT_INC(patch); | |
900 | *out = &patch->base; | |
901 | ||
902 | done: | |
903 | git__free(ctx.header_old_path); | |
904 | git__free(ctx.header_new_path); | |
905 | ||
906 | return error; | |
907 | } |