]>
Commit | Line | Data |
---|---|---|
27950fa3 | 1 | /* |
359fc2d2 | 2 | * Copyright (C) the libgit2 contributors. All rights reserved. |
27950fa3 VM |
3 | * |
4 | * This file is part of libgit2, distributed under the GNU GPL v2 with | |
5 | * a Linking Exception. For full terms see the included COPYING file. | |
6 | */ | |
7 | ||
eae0bfdc PP |
8 | #include "common.h" |
9 | ||
114f5a6c RB |
10 | #include "git2/attr.h" |
11 | #include "git2/blob.h" | |
12 | #include "git2/index.h" | |
2a7d224f | 13 | #include "git2/sys/filter.h" |
114f5a6c | 14 | |
27950fa3 VM |
15 | #include "fileops.h" |
16 | #include "hash.h" | |
17 | #include "filter.h" | |
3658e81e | 18 | #include "buf_text.h" |
27950fa3 | 19 | #include "repository.h" |
27950fa3 | 20 | |
ac3d33df JK |
21 | typedef enum { |
22 | GIT_CRLF_UNDEFINED, | |
23 | GIT_CRLF_BINARY, | |
24 | GIT_CRLF_TEXT, | |
25 | GIT_CRLF_TEXT_INPUT, | |
26 | GIT_CRLF_TEXT_CRLF, | |
27 | GIT_CRLF_AUTO, | |
28 | GIT_CRLF_AUTO_INPUT, | |
29 | GIT_CRLF_AUTO_CRLF, | |
30 | } git_crlf_t; | |
31 | ||
27950fa3 | 32 | struct crlf_attrs { |
ac3d33df JK |
33 | int attr_action; /* the .gitattributes setting */ |
34 | int crlf_action; /* the core.autocrlf setting */ | |
35 | ||
85d54812 | 36 | int auto_crlf; |
855c66de | 37 | int safe_crlf; |
ac3d33df | 38 | int core_eol; |
27950fa3 VM |
39 | }; |
40 | ||
41 | struct crlf_filter { | |
42 | git_filter f; | |
27950fa3 VM |
43 | }; |
44 | ||
ac3d33df | 45 | static git_crlf_t check_crlf(const char *value) |
27950fa3 | 46 | { |
c63793ee | 47 | if (GIT_ATTR_TRUE(value)) |
27950fa3 | 48 | return GIT_CRLF_TEXT; |
ac3d33df | 49 | else if (GIT_ATTR_FALSE(value)) |
27950fa3 | 50 | return GIT_CRLF_BINARY; |
ac3d33df JK |
51 | else if (GIT_ATTR_UNSPECIFIED(value)) |
52 | ; | |
53 | else if (strcmp(value, "input") == 0) | |
54 | return GIT_CRLF_TEXT_INPUT; | |
55 | else if (strcmp(value, "auto") == 0) | |
27950fa3 VM |
56 | return GIT_CRLF_AUTO; |
57 | ||
ac3d33df | 58 | return GIT_CRLF_UNDEFINED; |
27950fa3 VM |
59 | } |
60 | ||
ac3d33df | 61 | static git_cvar_value check_eol(const char *value) |
27950fa3 | 62 | { |
c63793ee | 63 | if (GIT_ATTR_UNSPECIFIED(value)) |
ac3d33df JK |
64 | ; |
65 | else if (strcmp(value, "lf") == 0) | |
27950fa3 | 66 | return GIT_EOL_LF; |
ac3d33df | 67 | else if (strcmp(value, "crlf") == 0) |
27950fa3 VM |
68 | return GIT_EOL_CRLF; |
69 | ||
70 | return GIT_EOL_UNSET; | |
71 | } | |
72 | ||
570ba25c | 73 | static int has_cr_in_index(const git_filter_source *src) |
9733e80c | 74 | { |
570ba25c RB |
75 | git_repository *repo = git_filter_source_repo(src); |
76 | const char *path = git_filter_source_path(src); | |
9733e80c RB |
77 | git_index *index; |
78 | const git_index_entry *entry; | |
79 | git_blob *blob; | |
80 | const void *blobcontent; | |
81 | git_off_t blobsize; | |
82 | bool found_cr; | |
83 | ||
40cb40fa RB |
84 | if (!path) |
85 | return false; | |
86 | ||
85d54812 | 87 | if (git_repository_index__weakptr(&index, repo) < 0) { |
ac3d33df | 88 | git_error_clear(); |
9733e80c RB |
89 | return false; |
90 | } | |
91 | ||
85d54812 RB |
92 | if (!(entry = git_index_get_bypath(index, path, 0)) && |
93 | !(entry = git_index_get_bypath(index, path, 1))) | |
9733e80c RB |
94 | return false; |
95 | ||
96 | if (!S_ISREG(entry->mode)) /* don't crlf filter non-blobs */ | |
97 | return true; | |
98 | ||
d541170c | 99 | if (git_blob_lookup(&blob, repo, &entry->id) < 0) |
9733e80c RB |
100 | return false; |
101 | ||
102 | blobcontent = git_blob_rawcontent(blob); | |
103 | blobsize = git_blob_rawsize(blob); | |
104 | if (!git__is_sizet(blobsize)) | |
105 | blobsize = (size_t)-1; | |
106 | ||
107 | found_cr = (blobcontent != NULL && | |
108 | blobsize > 0 && | |
109 | memchr(blobcontent, '\r', (size_t)blobsize) != NULL); | |
110 | ||
111 | git_blob_free(blob); | |
112 | return found_cr; | |
113 | } | |
114 | ||
ac3d33df | 115 | static int text_eol_is_crlf(struct crlf_attrs *ca) |
47a899ff | 116 | { |
ac3d33df JK |
117 | if (ca->auto_crlf == GIT_AUTO_CRLF_TRUE) |
118 | return 1; | |
119 | else if (ca->auto_crlf == GIT_AUTO_CRLF_INPUT) | |
47a899ff VM |
120 | return 0; |
121 | ||
ac3d33df JK |
122 | if (ca->core_eol == GIT_EOL_CRLF) |
123 | return 1; | |
124 | if (ca->core_eol == GIT_EOL_UNSET && GIT_EOL_NATIVE == GIT_EOL_CRLF) | |
125 | return 1; | |
7bf87ab6 | 126 | |
ac3d33df JK |
127 | return 0; |
128 | } | |
27950fa3 | 129 | |
ac3d33df JK |
130 | static git_cvar_value output_eol(struct crlf_attrs *ca) |
131 | { | |
132 | switch (ca->crlf_action) { | |
133 | case GIT_CRLF_BINARY: | |
134 | return GIT_EOL_UNSET; | |
135 | case GIT_CRLF_TEXT_CRLF: | |
136 | return GIT_EOL_CRLF; | |
137 | case GIT_CRLF_TEXT_INPUT: | |
138 | return GIT_EOL_LF; | |
139 | case GIT_CRLF_UNDEFINED: | |
140 | case GIT_CRLF_AUTO_CRLF: | |
141 | return GIT_EOL_CRLF; | |
142 | case GIT_CRLF_AUTO_INPUT: | |
143 | return GIT_EOL_LF; | |
144 | case GIT_CRLF_TEXT: | |
145 | case GIT_CRLF_AUTO: | |
146 | return text_eol_is_crlf(ca) ? GIT_EOL_CRLF : GIT_EOL_LF; | |
147 | } | |
148 | ||
149 | /* TODO: warn when available */ | |
150 | return ca->core_eol; | |
151 | } | |
152 | ||
153 | GIT_INLINE(int) check_safecrlf( | |
154 | struct crlf_attrs *ca, | |
155 | const git_filter_source *src, | |
156 | git_buf_text_stats *stats) | |
157 | { | |
158 | const char *filename = git_filter_source_path(src); | |
159 | ||
160 | if (!ca->safe_crlf) | |
161 | return 0; | |
162 | ||
163 | if (output_eol(ca) == GIT_EOL_LF) { | |
164 | /* | |
165 | * CRLFs would not be restored by checkout: | |
166 | * check if we'd remove CRLFs | |
167 | */ | |
168 | if (stats->crlf) { | |
169 | if (ca->safe_crlf == GIT_SAFE_CRLF_WARN) { | |
170 | /* TODO: issue a warning when available */ | |
171 | } else { | |
172 | if (filename && *filename) | |
173 | git_error_set( | |
174 | GIT_ERROR_FILTER, "CRLF would be replaced by LF in '%s'", | |
175 | filename); | |
176 | else | |
177 | git_error_set( | |
178 | GIT_ERROR_FILTER, "CRLF would be replaced by LF"); | |
16798d08 | 179 | |
5269008c | 180 | return -1; |
5269008c | 181 | } |
7be1caf7 | 182 | } |
ac3d33df | 183 | } else if (output_eol(ca) == GIT_EOL_CRLF) { |
27950fa3 | 184 | /* |
ac3d33df JK |
185 | * CRLFs would be added by checkout: |
186 | * check if we have "naked" LFs | |
27950fa3 | 187 | */ |
ac3d33df JK |
188 | if (stats->crlf != stats->lf) { |
189 | if (ca->safe_crlf == GIT_SAFE_CRLF_WARN) { | |
190 | /* TODO: issue a warning when available */ | |
191 | } else { | |
192 | if (filename && *filename) | |
193 | git_error_set( | |
194 | GIT_ERROR_FILTER, "LF would be replaced by CRLF in '%s'", | |
195 | filename); | |
196 | else | |
197 | git_error_set( | |
198 | GIT_ERROR_FILTER, "LF would be replaced by CRLF"); | |
27950fa3 | 199 | |
ac3d33df JK |
200 | return -1; |
201 | } | |
27950fa3 | 202 | } |
27950fa3 VM |
203 | } |
204 | ||
ac3d33df | 205 | return 0; |
e4bac3c4 BS |
206 | } |
207 | ||
ac3d33df JK |
208 | static int crlf_apply_to_odb( |
209 | struct crlf_attrs *ca, | |
210 | git_buf *to, | |
211 | const git_buf *from, | |
212 | const git_filter_source *src) | |
e4bac3c4 | 213 | { |
ac3d33df JK |
214 | git_buf_text_stats stats; |
215 | bool is_binary; | |
216 | int error; | |
e4bac3c4 | 217 | |
ac3d33df JK |
218 | /* Binary attribute? Empty file? Nothing to do */ |
219 | if (ca->crlf_action == GIT_CRLF_BINARY || !git_buf_len(from)) | |
220 | return GIT_PASSTHROUGH; | |
e4bac3c4 | 221 | |
ac3d33df | 222 | is_binary = git_buf_text_gather_stats(&stats, from, false); |
146d0d08 | 223 | |
ac3d33df JK |
224 | /* Heuristics to see if we can skip the conversion. |
225 | * Straight from Core Git. | |
226 | */ | |
227 | if (ca->crlf_action == GIT_CRLF_AUTO || | |
228 | ca->crlf_action == GIT_CRLF_AUTO_INPUT || | |
229 | ca->crlf_action == GIT_CRLF_AUTO_CRLF) { | |
230 | ||
231 | if (is_binary) | |
232 | return GIT_PASSTHROUGH; | |
e4bac3c4 | 233 | |
ac3d33df JK |
234 | /* |
235 | * If the file in the index has any CR in it, do not convert. | |
236 | * This is the new safer autocrlf handling. | |
237 | */ | |
238 | if (has_cr_in_index(src)) | |
239 | return GIT_PASSTHROUGH; | |
e4bac3c4 BS |
240 | } |
241 | ||
ac3d33df JK |
242 | if ((error = check_safecrlf(ca, src, &stats)) < 0) |
243 | return error; | |
244 | ||
245 | /* If there are no CR characters to filter out, then just pass */ | |
246 | if (!stats.crlf) | |
247 | return GIT_PASSTHROUGH; | |
248 | ||
249 | /* Actually drop the carriage returns */ | |
250 | return git_buf_text_crlf_to_lf(to, from); | |
e4bac3c4 BS |
251 | } |
252 | ||
3658e81e | 253 | static int crlf_apply_to_workdir( |
ac3d33df JK |
254 | struct crlf_attrs *ca, |
255 | git_buf *to, | |
256 | const git_buf *from) | |
e4bac3c4 | 257 | { |
47e9a6cb | 258 | git_buf_text_stats stats; |
47e9a6cb | 259 | bool is_binary; |
e4bac3c4 | 260 | |
e4bac3c4 | 261 | /* Empty file? Nothing to do. */ |
ac3d33df | 262 | if (git_buf_len(from) == 0 || output_eol(ca) != GIT_EOL_CRLF) |
b033f3a3 | 263 | return GIT_PASSTHROUGH; |
0cf77103 | 264 | |
47e9a6cb ET |
265 | is_binary = git_buf_text_gather_stats(&stats, from, false); |
266 | ||
ac3d33df | 267 | /* If there are no LFs, or all LFs are part of a CRLF, nothing to do */ |
47e9a6cb ET |
268 | if (stats.lf == 0 || stats.lf == stats.crlf) |
269 | return GIT_PASSTHROUGH; | |
270 | ||
271 | if (ca->crlf_action == GIT_CRLF_AUTO || | |
ac3d33df JK |
272 | ca->crlf_action == GIT_CRLF_AUTO_INPUT || |
273 | ca->crlf_action == GIT_CRLF_AUTO_CRLF) { | |
47e9a6cb ET |
274 | |
275 | /* If we have any existing CR or CRLF line endings, do nothing */ | |
ac3d33df | 276 | if (stats.cr > 0) |
47e9a6cb ET |
277 | return GIT_PASSTHROUGH; |
278 | ||
279 | /* Don't filter binary files */ | |
280 | if (is_binary) | |
281 | return GIT_PASSTHROUGH; | |
282 | } | |
283 | ||
b033f3a3 | 284 | return git_buf_text_lf_to_crlf(to, from); |
e4bac3c4 BS |
285 | } |
286 | ||
ac3d33df JK |
287 | static int convert_attrs( |
288 | struct crlf_attrs *ca, | |
289 | const char **attr_values, | |
290 | const git_filter_source *src) | |
27950fa3 | 291 | { |
27950fa3 | 292 | int error; |
85d54812 | 293 | |
ac3d33df JK |
294 | memset(ca, 0, sizeof(struct crlf_attrs)); |
295 | ||
296 | if ((error = git_repository__cvar(&ca->auto_crlf, | |
297 | git_filter_source_repo(src), GIT_CVAR_AUTO_CRLF)) < 0 || | |
298 | (error = git_repository__cvar(&ca->safe_crlf, | |
299 | git_filter_source_repo(src), GIT_CVAR_SAFE_CRLF)) < 0 || | |
300 | (error = git_repository__cvar(&ca->core_eol, | |
301 | git_filter_source_repo(src), GIT_CVAR_EOL)) < 0) | |
302 | return error; | |
303 | ||
304 | /* downgrade FAIL to WARN if ALLOW_UNSAFE option is used */ | |
305 | if ((git_filter_source_flags(src) & GIT_FILTER_ALLOW_UNSAFE) && | |
306 | ca->safe_crlf == GIT_SAFE_CRLF_FAIL) | |
307 | ca->safe_crlf = GIT_SAFE_CRLF_WARN; | |
308 | ||
309 | if (attr_values) { | |
310 | /* load the text attribute */ | |
311 | ca->crlf_action = check_crlf(attr_values[2]); /* text */ | |
312 | ||
313 | if (ca->crlf_action == GIT_CRLF_UNDEFINED) | |
314 | ca->crlf_action = check_crlf(attr_values[0]); /* crlf */ | |
315 | ||
316 | if (ca->crlf_action != GIT_CRLF_BINARY) { | |
317 | /* load the eol attribute */ | |
318 | int eol_attr = check_eol(attr_values[1]); | |
319 | ||
320 | if (ca->crlf_action == GIT_CRLF_AUTO && eol_attr == GIT_EOL_LF) | |
321 | ca->crlf_action = GIT_CRLF_AUTO_INPUT; | |
322 | else if (ca->crlf_action == GIT_CRLF_AUTO && eol_attr == GIT_EOL_CRLF) | |
323 | ca->crlf_action = GIT_CRLF_AUTO_CRLF; | |
324 | else if (eol_attr == GIT_EOL_LF) | |
325 | ca->crlf_action = GIT_CRLF_TEXT_INPUT; | |
326 | else if (eol_attr == GIT_EOL_CRLF) | |
327 | ca->crlf_action = GIT_CRLF_TEXT_CRLF; | |
328 | } | |
27950fa3 | 329 | |
ac3d33df | 330 | ca->attr_action = ca->crlf_action; |
974774c7 | 331 | } else { |
ac3d33df | 332 | ca->crlf_action = GIT_CRLF_UNDEFINED; |
974774c7 | 333 | } |
27950fa3 | 334 | |
ac3d33df JK |
335 | if (ca->crlf_action == GIT_CRLF_TEXT) |
336 | ca->crlf_action = text_eol_is_crlf(ca) ? GIT_CRLF_TEXT_CRLF : GIT_CRLF_TEXT_INPUT; | |
337 | if (ca->crlf_action == GIT_CRLF_UNDEFINED && ca->auto_crlf == GIT_AUTO_CRLF_FALSE) | |
338 | ca->crlf_action = GIT_CRLF_BINARY; | |
339 | if (ca->crlf_action == GIT_CRLF_UNDEFINED && ca->auto_crlf == GIT_AUTO_CRLF_TRUE) | |
340 | ca->crlf_action = GIT_CRLF_AUTO_CRLF; | |
341 | if (ca->crlf_action == GIT_CRLF_UNDEFINED && ca->auto_crlf == GIT_AUTO_CRLF_INPUT) | |
342 | ca->crlf_action = GIT_CRLF_AUTO_INPUT; | |
27950fa3 | 343 | |
ac3d33df JK |
344 | return 0; |
345 | } | |
f2c25d18 | 346 | |
ac3d33df JK |
347 | static int crlf_check( |
348 | git_filter *self, | |
349 | void **payload, /* points to NULL ptr on entry, may be set */ | |
350 | const git_filter_source *src, | |
351 | const char **attr_values) | |
352 | { | |
353 | struct crlf_attrs ca; | |
66b2626c | 354 | |
ac3d33df | 355 | GIT_UNUSED(self); |
27950fa3 | 356 | |
ac3d33df | 357 | convert_attrs(&ca, attr_values, src); |
5269008c | 358 | |
ac3d33df JK |
359 | if (ca.crlf_action == GIT_CRLF_BINARY) |
360 | return GIT_PASSTHROUGH; | |
855c66de | 361 | |
85d54812 | 362 | *payload = git__malloc(sizeof(ca)); |
ac3d33df | 363 | GIT_ERROR_CHECK_ALLOC(*payload); |
85d54812 | 364 | memcpy(*payload, &ca, sizeof(ca)); |
27950fa3 | 365 | |
85d54812 RB |
366 | return 0; |
367 | } | |
368 | ||
369 | static int crlf_apply( | |
ac3d33df JK |
370 | git_filter *self, |
371 | void **payload, /* may be read and/or set */ | |
372 | git_buf *to, | |
a9f51e43 | 373 | const git_buf *from, |
85d54812 RB |
374 | const git_filter_source *src) |
375 | { | |
40cb40fa RB |
376 | /* initialize payload in case `check` was bypassed */ |
377 | if (!*payload) { | |
378 | int error = crlf_check(self, payload, src, NULL); | |
ac3d33df | 379 | |
2129d6df | 380 | if (error < 0) |
40cb40fa RB |
381 | return error; |
382 | } | |
47a899ff | 383 | |
2a7d224f | 384 | if (git_filter_source_mode(src) == GIT_FILTER_SMUDGE) |
85d54812 RB |
385 | return crlf_apply_to_workdir(*payload, to, from); |
386 | else | |
387 | return crlf_apply_to_odb(*payload, to, from, src); | |
27950fa3 VM |
388 | } |
389 | ||
85d54812 RB |
390 | static void crlf_cleanup( |
391 | git_filter *self, | |
392 | void *payload) | |
f2d42eea | 393 | { |
85d54812 RB |
394 | GIT_UNUSED(self); |
395 | git__free(payload); | |
f2d42eea BS |
396 | } |
397 | ||
85d54812 | 398 | git_filter *git_crlf_filter_new(void) |
f2d42eea | 399 | { |
85d54812 | 400 | struct crlf_filter *f = git__calloc(1, sizeof(struct crlf_filter)); |
dfda1cf5 JG |
401 | if (f == NULL) |
402 | return NULL; | |
974774c7 | 403 | |
85d54812 | 404 | f->f.version = GIT_FILTER_VERSION; |
974774c7 RB |
405 | f->f.attributes = "crlf eol text"; |
406 | f->f.initialize = NULL; | |
4b11f25a | 407 | f->f.shutdown = git_filter_free; |
0646634e RB |
408 | f->f.check = crlf_check; |
409 | f->f.apply = crlf_apply; | |
410 | f->f.cleanup = crlf_cleanup; | |
2a7d224f | 411 | |
85d54812 | 412 | return (git_filter *)f; |
f2d42eea | 413 | } |