]> git.proxmox.com Git - libgit2.git/blob - src/crlf.c
giterr_set: consistent error messages
[libgit2.git] / src / crlf.c
1 /*
2 * Copyright (C) the libgit2 contributors. All rights reserved.
3 *
4 * This file is part of libgit2, distributed under the GNU GPL v2 with
5 * a Linking Exception. For full terms see the included COPYING file.
6 */
7
8 #include "git2/attr.h"
9 #include "git2/blob.h"
10 #include "git2/index.h"
11 #include "git2/sys/filter.h"
12
13 #include "common.h"
14 #include "fileops.h"
15 #include "hash.h"
16 #include "filter.h"
17 #include "buf_text.h"
18 #include "repository.h"
19
20 struct crlf_attrs {
21 int crlf_action;
22 int eol;
23 int auto_crlf;
24 int safe_crlf;
25 };
26
27 struct crlf_filter {
28 git_filter f;
29 };
30
31 static int check_crlf(const char *value)
32 {
33 if (GIT_ATTR_TRUE(value))
34 return GIT_CRLF_TEXT;
35
36 if (GIT_ATTR_FALSE(value))
37 return GIT_CRLF_BINARY;
38
39 if (GIT_ATTR_UNSPECIFIED(value))
40 return GIT_CRLF_GUESS;
41
42 if (strcmp(value, "input") == 0)
43 return GIT_CRLF_INPUT;
44
45 if (strcmp(value, "auto") == 0)
46 return GIT_CRLF_AUTO;
47
48 return GIT_CRLF_GUESS;
49 }
50
51 static int check_eol(const char *value)
52 {
53 if (GIT_ATTR_UNSPECIFIED(value))
54 return GIT_EOL_UNSET;
55
56 if (strcmp(value, "lf") == 0)
57 return GIT_EOL_LF;
58
59 if (strcmp(value, "crlf") == 0)
60 return GIT_EOL_CRLF;
61
62 return GIT_EOL_UNSET;
63 }
64
65 static int crlf_input_action(struct crlf_attrs *ca)
66 {
67 if (ca->crlf_action == GIT_CRLF_BINARY)
68 return GIT_CRLF_BINARY;
69
70 if (ca->eol == GIT_EOL_LF)
71 return GIT_CRLF_INPUT;
72
73 if (ca->eol == GIT_EOL_CRLF)
74 return GIT_CRLF_CRLF;
75
76 return ca->crlf_action;
77 }
78
79 static int has_cr_in_index(const git_filter_source *src)
80 {
81 git_repository *repo = git_filter_source_repo(src);
82 const char *path = git_filter_source_path(src);
83 git_index *index;
84 const git_index_entry *entry;
85 git_blob *blob;
86 const void *blobcontent;
87 git_off_t blobsize;
88 bool found_cr;
89
90 if (!path)
91 return false;
92
93 if (git_repository_index__weakptr(&index, repo) < 0) {
94 giterr_clear();
95 return false;
96 }
97
98 if (!(entry = git_index_get_bypath(index, path, 0)) &&
99 !(entry = git_index_get_bypath(index, path, 1)))
100 return false;
101
102 if (!S_ISREG(entry->mode)) /* don't crlf filter non-blobs */
103 return true;
104
105 if (git_blob_lookup(&blob, repo, &entry->id) < 0)
106 return false;
107
108 blobcontent = git_blob_rawcontent(blob);
109 blobsize = git_blob_rawsize(blob);
110 if (!git__is_sizet(blobsize))
111 blobsize = (size_t)-1;
112
113 found_cr = (blobcontent != NULL &&
114 blobsize > 0 &&
115 memchr(blobcontent, '\r', (size_t)blobsize) != NULL);
116
117 git_blob_free(blob);
118 return found_cr;
119 }
120
121 static int crlf_apply_to_odb(
122 struct crlf_attrs *ca,
123 git_buf *to,
124 const git_buf *from,
125 const git_filter_source *src)
126 {
127 /* Empty file? Nothing to do */
128 if (!git_buf_len(from))
129 return 0;
130
131 /* Heuristics to see if we can skip the conversion.
132 * Straight from Core Git.
133 */
134 if (ca->crlf_action == GIT_CRLF_AUTO || ca->crlf_action == GIT_CRLF_GUESS) {
135 git_buf_text_stats stats;
136
137 /* Check heuristics for binary vs text - returns true if binary */
138 if (git_buf_text_gather_stats(&stats, from, false))
139 return GIT_PASSTHROUGH;
140
141 /* If there are no CR characters to filter out, then just pass */
142 if (!stats.cr)
143 return GIT_PASSTHROUGH;
144
145 /* If safecrlf is enabled, sanity-check the result. */
146 if (stats.cr != stats.crlf || stats.lf != stats.crlf) {
147 switch (ca->safe_crlf) {
148 case GIT_SAFE_CRLF_FAIL:
149 giterr_set(
150 GITERR_FILTER, "LF would be replaced by CRLF in '%s'",
151 git_filter_source_path(src));
152 return -1;
153 case GIT_SAFE_CRLF_WARN:
154 /* TODO: issue warning when warning API is available */;
155 break;
156 default:
157 break;
158 }
159 }
160
161 /*
162 * We're currently not going to even try to convert stuff
163 * that has bare CR characters. Does anybody do that crazy
164 * stuff?
165 */
166 if (stats.cr != stats.crlf)
167 return GIT_PASSTHROUGH;
168
169 if (ca->crlf_action == GIT_CRLF_GUESS) {
170 /*
171 * If the file in the index has any CR in it, do not convert.
172 * This is the new safer autocrlf handling.
173 */
174 if (has_cr_in_index(src))
175 return GIT_PASSTHROUGH;
176 }
177
178 if (!stats.cr)
179 return GIT_PASSTHROUGH;
180 }
181
182 /* Actually drop the carriage returns */
183 return git_buf_text_crlf_to_lf(to, from);
184 }
185
186 static const char *line_ending(struct crlf_attrs *ca)
187 {
188 switch (ca->crlf_action) {
189 case GIT_CRLF_BINARY:
190 case GIT_CRLF_INPUT:
191 return "\n";
192
193 case GIT_CRLF_CRLF:
194 return "\r\n";
195
196 case GIT_CRLF_GUESS:
197 if (ca->auto_crlf == GIT_AUTO_CRLF_FALSE)
198 return "\n";
199 break;
200
201 case GIT_CRLF_AUTO:
202 case GIT_CRLF_TEXT:
203 break;
204
205 default:
206 goto line_ending_error;
207 }
208
209 if (ca->auto_crlf == GIT_AUTO_CRLF_TRUE)
210 return "\r\n";
211 else if (ca->auto_crlf == GIT_AUTO_CRLF_INPUT)
212 return "\n";
213 else if (ca->eol == GIT_EOL_UNSET)
214 return GIT_EOL_NATIVE == GIT_EOL_CRLF ? "\r\n" : "\n";
215 else if (ca->eol == GIT_EOL_LF)
216 return "\n";
217 else if (ca->eol == GIT_EOL_CRLF)
218 return "\r\n";
219
220 line_ending_error:
221 giterr_set(GITERR_INVALID, "invalid input to line ending filter");
222 return NULL;
223 }
224
225 static int crlf_apply_to_workdir(
226 struct crlf_attrs *ca, git_buf *to, const git_buf *from)
227 {
228 git_buf_text_stats stats;
229 const char *workdir_ending = NULL;
230 bool is_binary;
231
232 /* Empty file? Nothing to do. */
233 if (git_buf_len(from) == 0)
234 return 0;
235
236 /* Determine proper line ending */
237 workdir_ending = line_ending(ca);
238 if (!workdir_ending)
239 return -1;
240
241 /* only LF->CRLF conversion is supported, do nothing on LF platforms */
242 if (strcmp(workdir_ending, "\r\n") != 0)
243 return GIT_PASSTHROUGH;
244
245 /* If there are no LFs, or all LFs are part of a CRLF, nothing to do */
246 is_binary = git_buf_text_gather_stats(&stats, from, false);
247
248 if (stats.lf == 0 || stats.lf == stats.crlf)
249 return GIT_PASSTHROUGH;
250
251 if (ca->crlf_action == GIT_CRLF_AUTO ||
252 ca->crlf_action == GIT_CRLF_GUESS) {
253
254 /* If we have any existing CR or CRLF line endings, do nothing */
255 if (ca->crlf_action == GIT_CRLF_GUESS &&
256 stats.cr > 0 && stats.crlf > 0)
257 return GIT_PASSTHROUGH;
258
259 /* If we have bare CR characters, do nothing */
260 if (stats.cr != stats.crlf)
261 return GIT_PASSTHROUGH;
262
263 /* Don't filter binary files */
264 if (is_binary)
265 return GIT_PASSTHROUGH;
266 }
267
268 return git_buf_text_lf_to_crlf(to, from);
269 }
270
271 static int crlf_check(
272 git_filter *self,
273 void **payload, /* points to NULL ptr on entry, may be set */
274 const git_filter_source *src,
275 const char **attr_values)
276 {
277 int error;
278 struct crlf_attrs ca;
279
280 GIT_UNUSED(self);
281
282 if (!attr_values) {
283 ca.crlf_action = GIT_CRLF_GUESS;
284 ca.eol = GIT_EOL_UNSET;
285 } else {
286 ca.crlf_action = check_crlf(attr_values[2]); /* text */
287 if (ca.crlf_action == GIT_CRLF_GUESS)
288 ca.crlf_action = check_crlf(attr_values[0]); /* clrf */
289 ca.eol = check_eol(attr_values[1]); /* eol */
290 }
291 ca.auto_crlf = GIT_AUTO_CRLF_DEFAULT;
292 ca.safe_crlf = GIT_SAFE_CRLF_DEFAULT;
293
294 /*
295 * Use the core Git logic to see if we should perform CRLF for this file
296 * based on its attributes & the value of `core.autocrlf`
297 */
298 ca.crlf_action = crlf_input_action(&ca);
299
300 if (ca.crlf_action == GIT_CRLF_BINARY)
301 return GIT_PASSTHROUGH;
302
303 if (ca.crlf_action == GIT_CRLF_GUESS ||
304 ((ca.crlf_action == GIT_CRLF_AUTO || ca.crlf_action == GIT_CRLF_TEXT) &&
305 git_filter_source_mode(src) == GIT_FILTER_SMUDGE)) {
306
307 error = git_repository__cvar(
308 &ca.auto_crlf, git_filter_source_repo(src), GIT_CVAR_AUTO_CRLF);
309 if (error < 0)
310 return error;
311
312 if (ca.crlf_action == GIT_CRLF_GUESS &&
313 ca.auto_crlf == GIT_AUTO_CRLF_FALSE)
314 return GIT_PASSTHROUGH;
315
316 if (ca.auto_crlf == GIT_AUTO_CRLF_INPUT &&
317 git_filter_source_mode(src) == GIT_FILTER_SMUDGE)
318 return GIT_PASSTHROUGH;
319 }
320
321 if (git_filter_source_mode(src) == GIT_FILTER_CLEAN) {
322 error = git_repository__cvar(
323 &ca.safe_crlf, git_filter_source_repo(src), GIT_CVAR_SAFE_CRLF);
324 if (error < 0)
325 return error;
326
327 /* downgrade FAIL to WARN if ALLOW_UNSAFE option is used */
328 if ((git_filter_source_flags(src) & GIT_FILTER_ALLOW_UNSAFE) &&
329 ca.safe_crlf == GIT_SAFE_CRLF_FAIL)
330 ca.safe_crlf = GIT_SAFE_CRLF_WARN;
331 }
332
333 *payload = git__malloc(sizeof(ca));
334 GITERR_CHECK_ALLOC(*payload);
335 memcpy(*payload, &ca, sizeof(ca));
336
337 return 0;
338 }
339
340 static int crlf_apply(
341 git_filter *self,
342 void **payload, /* may be read and/or set */
343 git_buf *to,
344 const git_buf *from,
345 const git_filter_source *src)
346 {
347 /* initialize payload in case `check` was bypassed */
348 if (!*payload) {
349 int error = crlf_check(self, payload, src, NULL);
350 if (error < 0)
351 return error;
352 }
353
354 if (git_filter_source_mode(src) == GIT_FILTER_SMUDGE)
355 return crlf_apply_to_workdir(*payload, to, from);
356 else
357 return crlf_apply_to_odb(*payload, to, from, src);
358 }
359
360 static void crlf_cleanup(
361 git_filter *self,
362 void *payload)
363 {
364 GIT_UNUSED(self);
365 git__free(payload);
366 }
367
368 git_filter *git_crlf_filter_new(void)
369 {
370 struct crlf_filter *f = git__calloc(1, sizeof(struct crlf_filter));
371 if (f == NULL)
372 return NULL;
373
374 f->f.version = GIT_FILTER_VERSION;
375 f->f.attributes = "crlf eol text";
376 f->f.initialize = NULL;
377 f->f.shutdown = git_filter_free;
378 f->f.check = crlf_check;
379 f->f.apply = crlf_apply;
380 f->f.cleanup = crlf_cleanup;
381
382 return (git_filter *)f;
383 }