]> git.proxmox.com Git - libgit2.git/blob - src/buf_text.c
Merge pull request #1204 from arrbee/diff-blob-to-buffer
[libgit2.git] / src / buf_text.c
1 /*
2 * Copyright (C) 2009-2012 the libgit2 contributors
3 *
4 * This file is part of libgit2, distributed under the GNU GPL v2 with
5 * a Linking Exception. For full terms see the included COPYING file.
6 */
7 #include "buf_text.h"
8
9 int git_buf_text_puts_escaped(
10 git_buf *buf,
11 const char *string,
12 const char *esc_chars,
13 const char *esc_with)
14 {
15 const char *scan;
16 size_t total = 0, esc_len = strlen(esc_with), count;
17
18 if (!string)
19 return 0;
20
21 for (scan = string; *scan; ) {
22 /* count run of non-escaped characters */
23 count = strcspn(scan, esc_chars);
24 total += count;
25 scan += count;
26 /* count run of escaped characters */
27 count = strspn(scan, esc_chars);
28 total += count * (esc_len + 1);
29 scan += count;
30 }
31
32 if (git_buf_grow(buf, buf->size + total + 1) < 0)
33 return -1;
34
35 for (scan = string; *scan; ) {
36 count = strcspn(scan, esc_chars);
37
38 memmove(buf->ptr + buf->size, scan, count);
39 scan += count;
40 buf->size += count;
41
42 for (count = strspn(scan, esc_chars); count > 0; --count) {
43 /* copy escape sequence */
44 memmove(buf->ptr + buf->size, esc_with, esc_len);
45 buf->size += esc_len;
46 /* copy character to be escaped */
47 buf->ptr[buf->size] = *scan;
48 buf->size++;
49 scan++;
50 }
51 }
52
53 buf->ptr[buf->size] = '\0';
54
55 return 0;
56 }
57
58 void git_buf_text_unescape(git_buf *buf)
59 {
60 buf->size = git__unescape(buf->ptr);
61 }
62
63 int git_buf_text_common_prefix(git_buf *buf, const git_strarray *strings)
64 {
65 size_t i;
66 const char *str, *pfx;
67
68 git_buf_clear(buf);
69
70 if (!strings || !strings->count)
71 return 0;
72
73 /* initialize common prefix to first string */
74 if (git_buf_sets(buf, strings->strings[0]) < 0)
75 return -1;
76
77 /* go through the rest of the strings, truncating to shared prefix */
78 for (i = 1; i < strings->count; ++i) {
79
80 for (str = strings->strings[i], pfx = buf->ptr;
81 *str && *str == *pfx; str++, pfx++)
82 /* scanning */;
83
84 git_buf_truncate(buf, pfx - buf->ptr);
85
86 if (!buf->size)
87 break;
88 }
89
90 return 0;
91 }
92
93 bool git_buf_text_is_binary(const git_buf *buf)
94 {
95 const char *scan = buf->ptr, *end = buf->ptr + buf->size;
96 int printable = 0, nonprintable = 0;
97
98 while (scan < end) {
99 unsigned char c = *scan++;
100
101 if (c > 0x1F && c < 0x7F)
102 printable++;
103 else if (c == '\0')
104 return true;
105 else if (!git__isspace(c))
106 nonprintable++;
107 }
108
109 return ((printable >> 7) < nonprintable);
110 }
111
112 int git_buf_text_detect_bom(git_bom_t *bom, const git_buf *buf, size_t offset)
113 {
114 const char *ptr;
115 size_t len;
116
117 *bom = GIT_BOM_NONE;
118 /* need at least 2 bytes after offset to look for any BOM */
119 if (buf->size < offset + 2)
120 return 0;
121
122 ptr = buf->ptr + offset;
123 len = buf->size - offset;
124
125 switch (*ptr++) {
126 case 0:
127 if (len >= 4 && ptr[0] == 0 && ptr[1] == '\xFE' && ptr[2] == '\xFF') {
128 *bom = GIT_BOM_UTF32_BE;
129 return 4;
130 }
131 break;
132 case '\xEF':
133 if (len >= 3 && ptr[0] == '\xBB' && ptr[1] == '\xBF') {
134 *bom = GIT_BOM_UTF8;
135 return 3;
136 }
137 break;
138 case '\xFE':
139 if (*ptr == '\xFF') {
140 *bom = GIT_BOM_UTF16_BE;
141 return 2;
142 }
143 break;
144 case '\xFF':
145 if (*ptr != '\xFE')
146 break;
147 if (len >= 4 && ptr[1] == 0 && ptr[2] == 0) {
148 *bom = GIT_BOM_UTF32_LE;
149 return 4;
150 } else {
151 *bom = GIT_BOM_UTF16_LE;
152 return 2;
153 }
154 break;
155 default:
156 break;
157 }
158
159 return 0;
160 }
161
162 bool git_buf_text_gather_stats(
163 git_buf_text_stats *stats, const git_buf *buf, bool skip_bom)
164 {
165 const char *scan = buf->ptr, *end = buf->ptr + buf->size;
166 int skip;
167
168 memset(stats, 0, sizeof(*stats));
169
170 /* BOM detection */
171 skip = git_buf_text_detect_bom(&stats->bom, buf, 0);
172 if (skip_bom)
173 scan += skip;
174
175 /* Ignore EOF character */
176 if (buf->size > 0 && end[-1] == '\032')
177 end--;
178
179 /* Counting loop */
180 while (scan < end) {
181 unsigned char c = *scan++;
182
183 if ((c > 0x1F && c < 0x7F) || c > 0x9f)
184 stats->printable++;
185 else switch (c) {
186 case '\0':
187 stats->nul++;
188 stats->nonprintable++;
189 break;
190 case '\n':
191 stats->lf++;
192 break;
193 case '\r':
194 stats->cr++;
195 if (scan < end && *scan == '\n')
196 stats->crlf++;
197 break;
198 case '\t': case '\f': case '\v': case '\b': case 0x1b: /*ESC*/
199 stats->printable++;
200 break;
201 default:
202 stats->nonprintable++;
203 break;
204 }
205 }
206
207 return (stats->nul > 0 ||
208 ((stats->printable >> 7) < stats->nonprintable));
209 }