]>
Commit | Line | Data |
---|---|---|
eae0bfdc PP |
1 | /* |
2 | * Copyright (C) the libgit2 contributors. All rights reserved. | |
3 | * | |
4 | * This file is part of libgit2, distributed under the GNU GPL v2 with | |
5 | * a Linking Exception. For full terms see the included COPYING file. | |
6 | */ | |
7 | #include "array.h" | |
8 | #include "common.h" | |
9 | #include "git2/message.h" | |
10 | ||
11 | #include <stddef.h> | |
12 | #include <string.h> | |
13 | #include <ctype.h> | |
14 | ||
15 | #define COMMENT_LINE_CHAR '#' | |
16 | #define TRAILER_SEPARATORS ":" | |
17 | ||
18 | static const char *const git_generated_prefixes[] = { | |
19 | "Signed-off-by: ", | |
20 | "(cherry picked from commit ", | |
21 | NULL | |
22 | }; | |
23 | ||
24 | static int is_blank_line(const char *str) | |
25 | { | |
26 | const char *s = str; | |
27 | while (*s && *s != '\n' && isspace(*s)) | |
28 | s++; | |
29 | return !*s || *s == '\n'; | |
30 | } | |
31 | ||
32 | static const char *next_line(const char *str) | |
33 | { | |
34 | const char *nl = strchr(str, '\n'); | |
35 | ||
36 | if (nl) { | |
37 | return nl + 1; | |
38 | } else { | |
ac3d33df | 39 | /* return pointer to the NUL terminator: */ |
eae0bfdc PP |
40 | return str + strlen(str); |
41 | } | |
42 | } | |
43 | ||
44 | /* | |
22a2d3d5 | 45 | * Return the position of the start of the last line. If len is 0, return 0. |
eae0bfdc | 46 | */ |
22a2d3d5 | 47 | static bool last_line(size_t *out, const char *buf, size_t len) |
eae0bfdc | 48 | { |
22a2d3d5 UG |
49 | size_t i; |
50 | ||
51 | *out = 0; | |
52 | ||
eae0bfdc | 53 | if (len == 0) |
22a2d3d5 | 54 | return false; |
eae0bfdc | 55 | if (len == 1) |
22a2d3d5 UG |
56 | return true; |
57 | ||
eae0bfdc PP |
58 | /* |
59 | * Skip the last character (in addition to the null terminator), | |
60 | * because if the last character is a newline, it is considered as part | |
61 | * of the last line anyway. | |
62 | */ | |
63 | i = len - 2; | |
64 | ||
22a2d3d5 UG |
65 | for (; i > 0; i--) { |
66 | if (buf[i] == '\n') { | |
67 | *out = i + 1; | |
68 | return true; | |
69 | } | |
eae0bfdc | 70 | } |
22a2d3d5 | 71 | return true; |
eae0bfdc PP |
72 | } |
73 | ||
74 | /* | |
75 | * If the given line is of the form | |
22a2d3d5 UG |
76 | * "<token><optional whitespace><separator>..." or "<separator>...", sets out |
77 | * to the location of the separator and returns true. Otherwise, returns | |
78 | * false. The optional whitespace is allowed there primarily to allow things | |
79 | * like "Bug #43" where <token> is "Bug" and <separator> is "#". | |
eae0bfdc | 80 | * |
22a2d3d5 UG |
81 | * The separator-starts-line case (in which this function returns true and |
82 | * sets out to 0) is distinguished from the non-well-formed-line case (in | |
83 | * which this function returns false) because some callers of this function | |
84 | * need such a distinction. | |
eae0bfdc | 85 | */ |
22a2d3d5 | 86 | static bool find_separator(size_t *out, const char *line, const char *separators) |
eae0bfdc PP |
87 | { |
88 | int whitespace_found = 0; | |
89 | const char *c; | |
90 | for (c = line; *c; c++) { | |
22a2d3d5 UG |
91 | if (strchr(separators, *c)) { |
92 | *out = c - line; | |
93 | return true; | |
94 | } | |
95 | ||
eae0bfdc PP |
96 | if (!whitespace_found && (isalnum(*c) || *c == '-')) |
97 | continue; | |
98 | if (c != line && (*c == ' ' || *c == '\t')) { | |
99 | whitespace_found = 1; | |
100 | continue; | |
101 | } | |
102 | break; | |
103 | } | |
22a2d3d5 | 104 | return false; |
eae0bfdc PP |
105 | } |
106 | ||
107 | /* | |
108 | * Inspect the given string and determine the true "end" of the log message, in | |
109 | * order to find where to put a new Signed-off-by: line. Ignored are | |
110 | * trailing comment lines and blank lines. To support "git commit -s | |
111 | * --amend" on an existing commit, we also ignore "Conflicts:". To | |
112 | * support "git commit -v", we truncate at cut lines. | |
113 | * | |
114 | * Returns the number of bytes from the tail to ignore, to be fed as | |
115 | * the second parameter to append_signoff(). | |
116 | */ | |
22a2d3d5 | 117 | static size_t ignore_non_trailer(const char *buf, size_t len) |
eae0bfdc | 118 | { |
22a2d3d5 | 119 | size_t boc = 0, bol = 0; |
eae0bfdc PP |
120 | int in_old_conflicts_block = 0; |
121 | size_t cutoff = len; | |
122 | ||
123 | while (bol < cutoff) { | |
124 | const char *next_line = memchr(buf + bol, '\n', len - bol); | |
125 | ||
126 | if (!next_line) | |
127 | next_line = buf + len; | |
128 | else | |
129 | next_line++; | |
130 | ||
131 | if (buf[bol] == COMMENT_LINE_CHAR || buf[bol] == '\n') { | |
132 | /* is this the first of the run of comments? */ | |
133 | if (!boc) | |
134 | boc = bol; | |
135 | /* otherwise, it is just continuing */ | |
136 | } else if (git__prefixcmp(buf + bol, "Conflicts:\n") == 0) { | |
137 | in_old_conflicts_block = 1; | |
138 | if (!boc) | |
139 | boc = bol; | |
140 | } else if (in_old_conflicts_block && buf[bol] == '\t') { | |
141 | ; /* a pathname in the conflicts block */ | |
142 | } else if (boc) { | |
143 | /* the previous was not trailing comment */ | |
144 | boc = 0; | |
145 | in_old_conflicts_block = 0; | |
146 | } | |
147 | bol = next_line - buf; | |
148 | } | |
149 | return boc ? len - boc : len - cutoff; | |
150 | } | |
151 | ||
152 | /* | |
153 | * Return the position of the start of the patch or the length of str if there | |
154 | * is no patch in the message. | |
155 | */ | |
22a2d3d5 | 156 | static size_t find_patch_start(const char *str) |
eae0bfdc PP |
157 | { |
158 | const char *s; | |
159 | ||
160 | for (s = str; *s; s = next_line(s)) { | |
161 | if (git__prefixcmp(s, "---") == 0) | |
162 | return s - str; | |
163 | } | |
164 | ||
165 | return s - str; | |
166 | } | |
167 | ||
168 | /* | |
169 | * Return the position of the first trailer line or len if there are no | |
170 | * trailers. | |
171 | */ | |
22a2d3d5 | 172 | static size_t find_trailer_start(const char *buf, size_t len) |
eae0bfdc PP |
173 | { |
174 | const char *s; | |
22a2d3d5 UG |
175 | size_t end_of_title, l; |
176 | int only_spaces = 1; | |
eae0bfdc PP |
177 | int recognized_prefix = 0, trailer_lines = 0, non_trailer_lines = 0; |
178 | /* | |
179 | * Number of possible continuation lines encountered. This will be | |
180 | * reset to 0 if we encounter a trailer (since those lines are to be | |
181 | * considered continuations of that trailer), and added to | |
182 | * non_trailer_lines if we encounter a non-trailer (since those lines | |
183 | * are to be considered non-trailers). | |
184 | */ | |
185 | int possible_continuation_lines = 0; | |
186 | ||
187 | /* The first paragraph is the title and cannot be trailers */ | |
188 | for (s = buf; s < buf + len; s = next_line(s)) { | |
189 | if (s[0] == COMMENT_LINE_CHAR) | |
190 | continue; | |
191 | if (is_blank_line(s)) | |
192 | break; | |
193 | } | |
194 | end_of_title = s - buf; | |
195 | ||
196 | /* | |
197 | * Get the start of the trailers by looking starting from the end for a | |
198 | * blank line before a set of non-blank lines that (i) are all | |
199 | * trailers, or (ii) contains at least one Git-generated trailer and | |
200 | * consists of at least 25% trailers. | |
201 | */ | |
22a2d3d5 UG |
202 | l = len; |
203 | while (last_line(&l, buf, l) && l >= end_of_title) { | |
eae0bfdc PP |
204 | const char *bol = buf + l; |
205 | const char *const *p; | |
22a2d3d5 | 206 | size_t separator_pos = 0; |
eae0bfdc PP |
207 | |
208 | if (bol[0] == COMMENT_LINE_CHAR) { | |
209 | non_trailer_lines += possible_continuation_lines; | |
210 | possible_continuation_lines = 0; | |
211 | continue; | |
212 | } | |
213 | if (is_blank_line(bol)) { | |
214 | if (only_spaces) | |
215 | continue; | |
216 | non_trailer_lines += possible_continuation_lines; | |
217 | if (recognized_prefix && | |
218 | trailer_lines * 3 >= non_trailer_lines) | |
219 | return next_line(bol) - buf; | |
220 | else if (trailer_lines && !non_trailer_lines) | |
221 | return next_line(bol) - buf; | |
222 | return len; | |
223 | } | |
224 | only_spaces = 0; | |
225 | ||
226 | for (p = git_generated_prefixes; *p; p++) { | |
227 | if (git__prefixcmp(bol, *p) == 0) { | |
228 | trailer_lines++; | |
229 | possible_continuation_lines = 0; | |
230 | recognized_prefix = 1; | |
231 | goto continue_outer_loop; | |
232 | } | |
233 | } | |
234 | ||
22a2d3d5 | 235 | find_separator(&separator_pos, bol, TRAILER_SEPARATORS); |
eae0bfdc PP |
236 | if (separator_pos >= 1 && !isspace(bol[0])) { |
237 | trailer_lines++; | |
238 | possible_continuation_lines = 0; | |
239 | if (recognized_prefix) | |
240 | continue; | |
241 | } else if (isspace(bol[0])) | |
242 | possible_continuation_lines++; | |
243 | else { | |
244 | non_trailer_lines++; | |
245 | non_trailer_lines += possible_continuation_lines; | |
246 | possible_continuation_lines = 0; | |
247 | } | |
248 | continue_outer_loop: | |
249 | ; | |
250 | } | |
251 | ||
252 | return len; | |
253 | } | |
254 | ||
255 | /* Return the position of the end of the trailers. */ | |
22a2d3d5 | 256 | static size_t find_trailer_end(const char *buf, size_t len) |
eae0bfdc PP |
257 | { |
258 | return len - ignore_non_trailer(buf, len); | |
259 | } | |
260 | ||
261 | static char *extract_trailer_block(const char *message, size_t* len) | |
262 | { | |
263 | size_t patch_start = find_patch_start(message); | |
264 | size_t trailer_end = find_trailer_end(message, patch_start); | |
265 | size_t trailer_start = find_trailer_start(message, trailer_end); | |
266 | ||
267 | size_t trailer_len = trailer_end - trailer_start; | |
268 | ||
269 | char *buffer = git__malloc(trailer_len + 1); | |
22a2d3d5 UG |
270 | if (buffer == NULL) |
271 | return NULL; | |
272 | ||
eae0bfdc PP |
273 | memcpy(buffer, message + trailer_start, trailer_len); |
274 | buffer[trailer_len] = 0; | |
275 | ||
276 | *len = trailer_len; | |
277 | ||
278 | return buffer; | |
279 | } | |
280 | ||
281 | enum trailer_state { | |
282 | S_START = 0, | |
283 | S_KEY = 1, | |
284 | S_KEY_WS = 2, | |
285 | S_SEP_WS = 3, | |
286 | S_VALUE = 4, | |
287 | S_VALUE_NL = 5, | |
288 | S_VALUE_END = 6, | |
289 | S_IGNORE = 7, | |
290 | }; | |
291 | ||
292 | #define NEXT(st) { state = (st); ptr++; continue; } | |
293 | #define GOTO(st) { state = (st); continue; } | |
294 | ||
295 | typedef git_array_t(git_message_trailer) git_array_trailer_t; | |
296 | ||
297 | int git_message_trailers(git_message_trailer_array *trailer_arr, const char *message) | |
298 | { | |
299 | enum trailer_state state = S_START; | |
300 | int rc = 0; | |
301 | char *ptr; | |
302 | char *key = NULL; | |
303 | char *value = NULL; | |
304 | git_array_trailer_t arr = GIT_ARRAY_INIT; | |
305 | ||
306 | size_t trailer_len; | |
307 | char *trailer = extract_trailer_block(message, &trailer_len); | |
22a2d3d5 UG |
308 | if (trailer == NULL) |
309 | return -1; | |
eae0bfdc PP |
310 | |
311 | for (ptr = trailer;;) { | |
312 | switch (state) { | |
313 | case S_START: { | |
314 | if (*ptr == 0) { | |
315 | goto ret; | |
316 | } | |
317 | ||
318 | key = ptr; | |
319 | GOTO(S_KEY); | |
320 | } | |
321 | case S_KEY: { | |
322 | if (*ptr == 0) { | |
323 | goto ret; | |
324 | } | |
325 | ||
326 | if (isalnum(*ptr) || *ptr == '-') { | |
ac3d33df | 327 | /* legal key character */ |
eae0bfdc PP |
328 | NEXT(S_KEY); |
329 | } | |
330 | ||
331 | if (*ptr == ' ' || *ptr == '\t') { | |
ac3d33df | 332 | /* optional whitespace before separator */ |
eae0bfdc PP |
333 | *ptr = 0; |
334 | NEXT(S_KEY_WS); | |
335 | } | |
336 | ||
337 | if (strchr(TRAILER_SEPARATORS, *ptr)) { | |
338 | *ptr = 0; | |
339 | NEXT(S_SEP_WS); | |
340 | } | |
341 | ||
ac3d33df | 342 | /* illegal character */ |
eae0bfdc PP |
343 | GOTO(S_IGNORE); |
344 | } | |
345 | case S_KEY_WS: { | |
346 | if (*ptr == 0) { | |
347 | goto ret; | |
348 | } | |
349 | ||
350 | if (*ptr == ' ' || *ptr == '\t') { | |
351 | NEXT(S_KEY_WS); | |
352 | } | |
353 | ||
354 | if (strchr(TRAILER_SEPARATORS, *ptr)) { | |
355 | NEXT(S_SEP_WS); | |
356 | } | |
357 | ||
ac3d33df | 358 | /* illegal character */ |
eae0bfdc PP |
359 | GOTO(S_IGNORE); |
360 | } | |
361 | case S_SEP_WS: { | |
362 | if (*ptr == 0) { | |
363 | goto ret; | |
364 | } | |
365 | ||
366 | if (*ptr == ' ' || *ptr == '\t') { | |
367 | NEXT(S_SEP_WS); | |
368 | } | |
369 | ||
370 | value = ptr; | |
371 | NEXT(S_VALUE); | |
372 | } | |
373 | case S_VALUE: { | |
374 | if (*ptr == 0) { | |
375 | GOTO(S_VALUE_END); | |
376 | } | |
377 | ||
378 | if (*ptr == '\n') { | |
379 | NEXT(S_VALUE_NL); | |
380 | } | |
381 | ||
382 | NEXT(S_VALUE); | |
383 | } | |
384 | case S_VALUE_NL: { | |
385 | if (*ptr == ' ') { | |
ac3d33df | 386 | /* continuation; */ |
eae0bfdc PP |
387 | NEXT(S_VALUE); |
388 | } | |
389 | ||
390 | ptr[-1] = 0; | |
391 | GOTO(S_VALUE_END); | |
392 | } | |
393 | case S_VALUE_END: { | |
394 | git_message_trailer *t = git_array_alloc(arr); | |
395 | ||
396 | t->key = key; | |
397 | t->value = value; | |
398 | ||
399 | key = NULL; | |
400 | value = NULL; | |
401 | ||
402 | GOTO(S_START); | |
403 | } | |
404 | case S_IGNORE: { | |
405 | if (*ptr == 0) { | |
406 | goto ret; | |
407 | } | |
408 | ||
409 | if (*ptr == '\n') { | |
410 | NEXT(S_START); | |
411 | } | |
412 | ||
413 | NEXT(S_IGNORE); | |
414 | } | |
415 | } | |
416 | } | |
417 | ||
418 | ret: | |
419 | trailer_arr->_trailer_block = trailer; | |
420 | trailer_arr->trailers = arr.ptr; | |
421 | trailer_arr->count = arr.size; | |
422 | ||
423 | return rc; | |
424 | } | |
425 | ||
426 | void git_message_trailer_array_free(git_message_trailer_array *arr) | |
427 | { | |
428 | git__free(arr->_trailer_block); | |
429 | git__free(arr->trailers); | |
430 | } |