]> git.proxmox.com Git - libgit2.git/blob - src/libgit2/crlf.c
New upstream version 1.5.0+ds
[libgit2.git] / src / libgit2 / crlf.c
1 /*
2 * Copyright (C) the libgit2 contributors. All rights reserved.
3 *
4 * This file is part of libgit2, distributed under the GNU GPL v2 with
5 * a Linking Exception. For full terms see the included COPYING file.
6 */
7
8 #include "common.h"
9
10 #include "git2/attr.h"
11 #include "git2/blob.h"
12 #include "git2/index.h"
13 #include "git2/sys/filter.h"
14
15 #include "buf.h"
16 #include "futils.h"
17 #include "hash.h"
18 #include "filter.h"
19 #include "repository.h"
20
21 typedef enum {
22 GIT_CRLF_UNDEFINED,
23 GIT_CRLF_BINARY,
24 GIT_CRLF_TEXT,
25 GIT_CRLF_TEXT_INPUT,
26 GIT_CRLF_TEXT_CRLF,
27 GIT_CRLF_AUTO,
28 GIT_CRLF_AUTO_INPUT,
29 GIT_CRLF_AUTO_CRLF
30 } git_crlf_t;
31
32 struct crlf_attrs {
33 int attr_action; /* the .gitattributes setting */
34 int crlf_action; /* the core.autocrlf setting */
35
36 int auto_crlf;
37 int safe_crlf;
38 int core_eol;
39 };
40
41 struct crlf_filter {
42 git_filter f;
43 };
44
45 static git_crlf_t check_crlf(const char *value)
46 {
47 if (GIT_ATTR_IS_TRUE(value))
48 return GIT_CRLF_TEXT;
49 else if (GIT_ATTR_IS_FALSE(value))
50 return GIT_CRLF_BINARY;
51 else if (GIT_ATTR_IS_UNSPECIFIED(value))
52 ;
53 else if (strcmp(value, "input") == 0)
54 return GIT_CRLF_TEXT_INPUT;
55 else if (strcmp(value, "auto") == 0)
56 return GIT_CRLF_AUTO;
57
58 return GIT_CRLF_UNDEFINED;
59 }
60
61 static git_configmap_value check_eol(const char *value)
62 {
63 if (GIT_ATTR_IS_UNSPECIFIED(value))
64 ;
65 else if (strcmp(value, "lf") == 0)
66 return GIT_EOL_LF;
67 else if (strcmp(value, "crlf") == 0)
68 return GIT_EOL_CRLF;
69
70 return GIT_EOL_UNSET;
71 }
72
73 static int has_cr_in_index(const git_filter_source *src)
74 {
75 git_repository *repo = git_filter_source_repo(src);
76 const char *path = git_filter_source_path(src);
77 git_index *index;
78 const git_index_entry *entry;
79 git_blob *blob;
80 const void *blobcontent;
81 git_object_size_t blobsize;
82 bool found_cr;
83
84 if (!path)
85 return false;
86
87 if (git_repository_index__weakptr(&index, repo) < 0) {
88 git_error_clear();
89 return false;
90 }
91
92 if (!(entry = git_index_get_bypath(index, path, 0)) &&
93 !(entry = git_index_get_bypath(index, path, 1)))
94 return false;
95
96 if (!S_ISREG(entry->mode)) /* don't crlf filter non-blobs */
97 return true;
98
99 if (git_blob_lookup(&blob, repo, &entry->id) < 0)
100 return false;
101
102 blobcontent = git_blob_rawcontent(blob);
103 blobsize = git_blob_rawsize(blob);
104 if (!git__is_sizet(blobsize))
105 blobsize = (size_t)-1;
106
107 found_cr = (blobcontent != NULL &&
108 blobsize > 0 &&
109 memchr(blobcontent, '\r', (size_t)blobsize) != NULL);
110
111 git_blob_free(blob);
112 return found_cr;
113 }
114
115 static int text_eol_is_crlf(struct crlf_attrs *ca)
116 {
117 if (ca->auto_crlf == GIT_AUTO_CRLF_TRUE)
118 return 1;
119 else if (ca->auto_crlf == GIT_AUTO_CRLF_INPUT)
120 return 0;
121
122 if (ca->core_eol == GIT_EOL_CRLF)
123 return 1;
124 if (ca->core_eol == GIT_EOL_UNSET && GIT_EOL_NATIVE == GIT_EOL_CRLF)
125 return 1;
126
127 return 0;
128 }
129
130 static git_configmap_value output_eol(struct crlf_attrs *ca)
131 {
132 switch (ca->crlf_action) {
133 case GIT_CRLF_BINARY:
134 return GIT_EOL_UNSET;
135 case GIT_CRLF_TEXT_CRLF:
136 return GIT_EOL_CRLF;
137 case GIT_CRLF_TEXT_INPUT:
138 return GIT_EOL_LF;
139 case GIT_CRLF_UNDEFINED:
140 case GIT_CRLF_AUTO_CRLF:
141 return GIT_EOL_CRLF;
142 case GIT_CRLF_AUTO_INPUT:
143 return GIT_EOL_LF;
144 case GIT_CRLF_TEXT:
145 case GIT_CRLF_AUTO:
146 return text_eol_is_crlf(ca) ? GIT_EOL_CRLF : GIT_EOL_LF;
147 }
148
149 /* TODO: warn when available */
150 return ca->core_eol;
151 }
152
153 GIT_INLINE(int) check_safecrlf(
154 struct crlf_attrs *ca,
155 const git_filter_source *src,
156 git_str_text_stats *stats)
157 {
158 const char *filename = git_filter_source_path(src);
159
160 if (!ca->safe_crlf)
161 return 0;
162
163 if (output_eol(ca) == GIT_EOL_LF) {
164 /*
165 * CRLFs would not be restored by checkout:
166 * check if we'd remove CRLFs
167 */
168 if (stats->crlf) {
169 if (ca->safe_crlf == GIT_SAFE_CRLF_WARN) {
170 /* TODO: issue a warning when available */
171 } else {
172 if (filename && *filename)
173 git_error_set(
174 GIT_ERROR_FILTER, "CRLF would be replaced by LF in '%s'",
175 filename);
176 else
177 git_error_set(
178 GIT_ERROR_FILTER, "CRLF would be replaced by LF");
179
180 return -1;
181 }
182 }
183 } else if (output_eol(ca) == GIT_EOL_CRLF) {
184 /*
185 * CRLFs would be added by checkout:
186 * check if we have "naked" LFs
187 */
188 if (stats->crlf != stats->lf) {
189 if (ca->safe_crlf == GIT_SAFE_CRLF_WARN) {
190 /* TODO: issue a warning when available */
191 } else {
192 if (filename && *filename)
193 git_error_set(
194 GIT_ERROR_FILTER, "LF would be replaced by CRLF in '%s'",
195 filename);
196 else
197 git_error_set(
198 GIT_ERROR_FILTER, "LF would be replaced by CRLF");
199
200 return -1;
201 }
202 }
203 }
204
205 return 0;
206 }
207
208 static int crlf_apply_to_odb(
209 struct crlf_attrs *ca,
210 git_str *to,
211 const git_str *from,
212 const git_filter_source *src)
213 {
214 git_str_text_stats stats;
215 bool is_binary;
216 int error;
217
218 /* Binary attribute? Empty file? Nothing to do */
219 if (ca->crlf_action == GIT_CRLF_BINARY || from->size == 0)
220 return GIT_PASSTHROUGH;
221
222 is_binary = git_str_gather_text_stats(&stats, from, false);
223
224 /* Heuristics to see if we can skip the conversion.
225 * Straight from Core Git.
226 */
227 if (ca->crlf_action == GIT_CRLF_AUTO ||
228 ca->crlf_action == GIT_CRLF_AUTO_INPUT ||
229 ca->crlf_action == GIT_CRLF_AUTO_CRLF) {
230
231 if (is_binary)
232 return GIT_PASSTHROUGH;
233
234 /*
235 * If the file in the index has any CR in it, do not convert.
236 * This is the new safer autocrlf handling.
237 */
238 if (has_cr_in_index(src))
239 return GIT_PASSTHROUGH;
240 }
241
242 if ((error = check_safecrlf(ca, src, &stats)) < 0)
243 return error;
244
245 /* If there are no CR characters to filter out, then just pass */
246 if (!stats.crlf)
247 return GIT_PASSTHROUGH;
248
249 /* Actually drop the carriage returns */
250 return git_str_crlf_to_lf(to, from);
251 }
252
253 static int crlf_apply_to_workdir(
254 struct crlf_attrs *ca,
255 git_str *to,
256 const git_str *from)
257 {
258 git_str_text_stats stats;
259 bool is_binary;
260
261 /* Empty file? Nothing to do. */
262 if (git_str_len(from) == 0 || output_eol(ca) != GIT_EOL_CRLF)
263 return GIT_PASSTHROUGH;
264
265 is_binary = git_str_gather_text_stats(&stats, from, false);
266
267 /* If there are no LFs, or all LFs are part of a CRLF, nothing to do */
268 if (stats.lf == 0 || stats.lf == stats.crlf)
269 return GIT_PASSTHROUGH;
270
271 if (ca->crlf_action == GIT_CRLF_AUTO ||
272 ca->crlf_action == GIT_CRLF_AUTO_INPUT ||
273 ca->crlf_action == GIT_CRLF_AUTO_CRLF) {
274
275 /* If we have any existing CR or CRLF line endings, do nothing */
276 if (stats.cr > 0)
277 return GIT_PASSTHROUGH;
278
279 /* Don't filter binary files */
280 if (is_binary)
281 return GIT_PASSTHROUGH;
282 }
283
284 return git_str_lf_to_crlf(to, from);
285 }
286
287 static int convert_attrs(
288 struct crlf_attrs *ca,
289 const char **attr_values,
290 const git_filter_source *src)
291 {
292 int error;
293
294 memset(ca, 0, sizeof(struct crlf_attrs));
295
296 if ((error = git_repository__configmap_lookup(&ca->auto_crlf,
297 git_filter_source_repo(src), GIT_CONFIGMAP_AUTO_CRLF)) < 0 ||
298 (error = git_repository__configmap_lookup(&ca->safe_crlf,
299 git_filter_source_repo(src), GIT_CONFIGMAP_SAFE_CRLF)) < 0 ||
300 (error = git_repository__configmap_lookup(&ca->core_eol,
301 git_filter_source_repo(src), GIT_CONFIGMAP_EOL)) < 0)
302 return error;
303
304 /* downgrade FAIL to WARN if ALLOW_UNSAFE option is used */
305 if ((git_filter_source_flags(src) & GIT_FILTER_ALLOW_UNSAFE) &&
306 ca->safe_crlf == GIT_SAFE_CRLF_FAIL)
307 ca->safe_crlf = GIT_SAFE_CRLF_WARN;
308
309 if (attr_values) {
310 /* load the text attribute */
311 ca->crlf_action = check_crlf(attr_values[2]); /* text */
312
313 if (ca->crlf_action == GIT_CRLF_UNDEFINED)
314 ca->crlf_action = check_crlf(attr_values[0]); /* crlf */
315
316 if (ca->crlf_action != GIT_CRLF_BINARY) {
317 /* load the eol attribute */
318 int eol_attr = check_eol(attr_values[1]);
319
320 if (ca->crlf_action == GIT_CRLF_AUTO && eol_attr == GIT_EOL_LF)
321 ca->crlf_action = GIT_CRLF_AUTO_INPUT;
322 else if (ca->crlf_action == GIT_CRLF_AUTO && eol_attr == GIT_EOL_CRLF)
323 ca->crlf_action = GIT_CRLF_AUTO_CRLF;
324 else if (eol_attr == GIT_EOL_LF)
325 ca->crlf_action = GIT_CRLF_TEXT_INPUT;
326 else if (eol_attr == GIT_EOL_CRLF)
327 ca->crlf_action = GIT_CRLF_TEXT_CRLF;
328 }
329
330 ca->attr_action = ca->crlf_action;
331 } else {
332 ca->crlf_action = GIT_CRLF_UNDEFINED;
333 }
334
335 if (ca->crlf_action == GIT_CRLF_TEXT)
336 ca->crlf_action = text_eol_is_crlf(ca) ? GIT_CRLF_TEXT_CRLF : GIT_CRLF_TEXT_INPUT;
337 if (ca->crlf_action == GIT_CRLF_UNDEFINED && ca->auto_crlf == GIT_AUTO_CRLF_FALSE)
338 ca->crlf_action = GIT_CRLF_BINARY;
339 if (ca->crlf_action == GIT_CRLF_UNDEFINED && ca->auto_crlf == GIT_AUTO_CRLF_TRUE)
340 ca->crlf_action = GIT_CRLF_AUTO_CRLF;
341 if (ca->crlf_action == GIT_CRLF_UNDEFINED && ca->auto_crlf == GIT_AUTO_CRLF_INPUT)
342 ca->crlf_action = GIT_CRLF_AUTO_INPUT;
343
344 return 0;
345 }
346
347 static int crlf_check(
348 git_filter *self,
349 void **payload, /* points to NULL ptr on entry, may be set */
350 const git_filter_source *src,
351 const char **attr_values)
352 {
353 struct crlf_attrs ca;
354
355 GIT_UNUSED(self);
356
357 convert_attrs(&ca, attr_values, src);
358
359 if (ca.crlf_action == GIT_CRLF_BINARY)
360 return GIT_PASSTHROUGH;
361
362 *payload = git__malloc(sizeof(ca));
363 GIT_ERROR_CHECK_ALLOC(*payload);
364 memcpy(*payload, &ca, sizeof(ca));
365
366 return 0;
367 }
368
369 static int crlf_apply(
370 git_filter *self,
371 void **payload, /* may be read and/or set */
372 git_str *to,
373 const git_str *from,
374 const git_filter_source *src)
375 {
376 int error = 0;
377
378 /* initialize payload in case `check` was bypassed */
379 if (!*payload) {
380 if ((error = crlf_check(self, payload, src, NULL)) < 0)
381 return error;
382 }
383
384 if (git_filter_source_mode(src) == GIT_FILTER_SMUDGE)
385 error = crlf_apply_to_workdir(*payload, to, from);
386 else
387 error = crlf_apply_to_odb(*payload, to, from, src);
388
389 return error;
390 }
391
392 static int crlf_stream(
393 git_writestream **out,
394 git_filter *self,
395 void **payload,
396 const git_filter_source *src,
397 git_writestream *next)
398 {
399 return git_filter_buffered_stream_new(out,
400 self, crlf_apply, NULL, payload, src, next);
401 }
402
403 static void crlf_cleanup(
404 git_filter *self,
405 void *payload)
406 {
407 GIT_UNUSED(self);
408 git__free(payload);
409 }
410
411 git_filter *git_crlf_filter_new(void)
412 {
413 struct crlf_filter *f = git__calloc(1, sizeof(struct crlf_filter));
414 if (f == NULL)
415 return NULL;
416
417 f->f.version = GIT_FILTER_VERSION;
418 f->f.attributes = "crlf eol text";
419 f->f.initialize = NULL;
420 f->f.shutdown = git_filter_free;
421 f->f.check = crlf_check;
422 f->f.stream = crlf_stream;
423 f->f.cleanup = crlf_cleanup;
424
425 return (git_filter *)f;
426 }