]>
Commit | Line | Data |
---|---|---|
7bf87ab6 RB |
1 | /* |
2 | * Copyright (C) 2009-2012 the libgit2 contributors | |
3 | * | |
4 | * This file is part of libgit2, distributed under the GNU GPL v2 with | |
5 | * a Linking Exception. For full terms see the included COPYING file. | |
6 | */ | |
7 | #include "buf_text.h" | |
8 | ||
9 | int git_buf_text_puts_escaped( | |
10 | git_buf *buf, | |
11 | const char *string, | |
12 | const char *esc_chars, | |
13 | const char *esc_with) | |
14 | { | |
15 | const char *scan; | |
16 | size_t total = 0, esc_len = strlen(esc_with), count; | |
17 | ||
18 | if (!string) | |
19 | return 0; | |
20 | ||
21 | for (scan = string; *scan; ) { | |
22 | /* count run of non-escaped characters */ | |
23 | count = strcspn(scan, esc_chars); | |
24 | total += count; | |
25 | scan += count; | |
26 | /* count run of escaped characters */ | |
27 | count = strspn(scan, esc_chars); | |
28 | total += count * (esc_len + 1); | |
29 | scan += count; | |
30 | } | |
31 | ||
32 | if (git_buf_grow(buf, buf->size + total + 1) < 0) | |
33 | return -1; | |
34 | ||
35 | for (scan = string; *scan; ) { | |
36 | count = strcspn(scan, esc_chars); | |
37 | ||
38 | memmove(buf->ptr + buf->size, scan, count); | |
39 | scan += count; | |
40 | buf->size += count; | |
41 | ||
42 | for (count = strspn(scan, esc_chars); count > 0; --count) { | |
43 | /* copy escape sequence */ | |
44 | memmove(buf->ptr + buf->size, esc_with, esc_len); | |
45 | buf->size += esc_len; | |
46 | /* copy character to be escaped */ | |
47 | buf->ptr[buf->size] = *scan; | |
48 | buf->size++; | |
49 | scan++; | |
50 | } | |
51 | } | |
52 | ||
53 | buf->ptr[buf->size] = '\0'; | |
54 | ||
55 | return 0; | |
56 | } | |
57 | ||
58 | void git_buf_text_unescape(git_buf *buf) | |
59 | { | |
60 | buf->size = git__unescape(buf->ptr); | |
61 | } | |
62 | ||
63 | int git_buf_text_common_prefix(git_buf *buf, const git_strarray *strings) | |
64 | { | |
65 | size_t i; | |
66 | const char *str, *pfx; | |
67 | ||
68 | git_buf_clear(buf); | |
69 | ||
70 | if (!strings || !strings->count) | |
71 | return 0; | |
72 | ||
73 | /* initialize common prefix to first string */ | |
74 | if (git_buf_sets(buf, strings->strings[0]) < 0) | |
75 | return -1; | |
76 | ||
77 | /* go through the rest of the strings, truncating to shared prefix */ | |
78 | for (i = 1; i < strings->count; ++i) { | |
79 | ||
80 | for (str = strings->strings[i], pfx = buf->ptr; | |
81 | *str && *str == *pfx; str++, pfx++) | |
82 | /* scanning */; | |
83 | ||
84 | git_buf_truncate(buf, pfx - buf->ptr); | |
85 | ||
86 | if (!buf->size) | |
87 | break; | |
88 | } | |
89 | ||
90 | return 0; | |
91 | } | |
92 | ||
93 | bool git_buf_text_is_binary(const git_buf *buf) | |
94 | { | |
95 | const char *scan = buf->ptr, *end = buf->ptr + buf->size; | |
96 | int printable = 0, nonprintable = 0; | |
97 | ||
98 | while (scan < end) { | |
99 | unsigned char c = *scan++; | |
100 | ||
101 | if (c > 0x1F && c < 0x7F) | |
102 | printable++; | |
103 | else if (c == '\0') | |
104 | return true; | |
105 | else if (!git__isspace(c)) | |
106 | nonprintable++; | |
107 | } | |
108 | ||
109 | return ((printable >> 7) < nonprintable); | |
110 | } | |
111 | ||
112 | int git_buf_text_detect_bom(git_bom_t *bom, const git_buf *buf, size_t offset) | |
113 | { | |
114 | const char *ptr; | |
115 | size_t len; | |
116 | ||
9ff07c24 | 117 | *bom = GIT_BOM_NONE; |
7bf87ab6 RB |
118 | /* need at least 2 bytes after offset to look for any BOM */ |
119 | if (buf->size < offset + 2) | |
120 | return 0; | |
121 | ||
122 | ptr = buf->ptr + offset; | |
123 | len = buf->size - offset; | |
124 | ||
125 | switch (*ptr++) { | |
126 | case 0: | |
127 | if (len >= 4 && ptr[0] == 0 && ptr[1] == '\xFE' && ptr[2] == '\xFF') { | |
128 | *bom = GIT_BOM_UTF32_BE; | |
129 | return 4; | |
130 | } | |
131 | break; | |
132 | case '\xEF': | |
133 | if (len >= 3 && ptr[0] == '\xBB' && ptr[1] == '\xBF') { | |
134 | *bom = GIT_BOM_UTF8; | |
135 | return 3; | |
136 | } | |
137 | break; | |
138 | case '\xFE': | |
139 | if (*ptr == '\xFF') { | |
140 | *bom = GIT_BOM_UTF16_BE; | |
141 | return 2; | |
142 | } | |
143 | break; | |
144 | case '\xFF': | |
145 | if (*ptr != '\xFE') | |
146 | break; | |
147 | if (len >= 4 && ptr[1] == 0 && ptr[2] == 0) { | |
148 | *bom = GIT_BOM_UTF32_LE; | |
149 | return 4; | |
150 | } else { | |
151 | *bom = GIT_BOM_UTF16_LE; | |
152 | return 2; | |
153 | } | |
154 | break; | |
155 | default: | |
156 | break; | |
157 | } | |
158 | ||
159 | return 0; | |
160 | } | |
161 | ||
162 | bool git_buf_text_gather_stats( | |
163 | git_buf_text_stats *stats, const git_buf *buf, bool skip_bom) | |
164 | { | |
165 | const char *scan = buf->ptr, *end = buf->ptr + buf->size; | |
166 | int skip; | |
167 | ||
168 | memset(stats, 0, sizeof(*stats)); | |
169 | ||
170 | /* BOM detection */ | |
171 | skip = git_buf_text_detect_bom(&stats->bom, buf, 0); | |
172 | if (skip_bom) | |
173 | scan += skip; | |
174 | ||
175 | /* Ignore EOF character */ | |
176 | if (buf->size > 0 && end[-1] == '\032') | |
177 | end--; | |
178 | ||
179 | /* Counting loop */ | |
180 | while (scan < end) { | |
181 | unsigned char c = *scan++; | |
182 | ||
183 | if ((c > 0x1F && c < 0x7F) || c > 0x9f) | |
184 | stats->printable++; | |
185 | else switch (c) { | |
186 | case '\0': | |
187 | stats->nul++; | |
188 | stats->nonprintable++; | |
189 | break; | |
190 | case '\n': | |
191 | stats->lf++; | |
192 | break; | |
193 | case '\r': | |
194 | stats->cr++; | |
195 | if (scan < end && *scan == '\n') | |
196 | stats->crlf++; | |
197 | break; | |
198 | case '\t': case '\f': case '\v': case '\b': case 0x1b: /*ESC*/ | |
199 | stats->printable++; | |
200 | break; | |
201 | default: | |
202 | stats->nonprintable++; | |
203 | break; | |
204 | } | |
205 | } | |
206 | ||
207 | return (stats->nul > 0 || | |
208 | ((stats->printable >> 7) < stats->nonprintable)); | |
209 | } |