]>
Commit | Line | Data |
---|---|---|
7bf87ab6 | 1 | /* |
359fc2d2 | 2 | * Copyright (C) the libgit2 contributors. All rights reserved. |
7bf87ab6 RB |
3 | * |
4 | * This file is part of libgit2, distributed under the GNU GPL v2 with | |
5 | * a Linking Exception. For full terms see the included COPYING file. | |
6 | */ | |
7 | #ifndef INCLUDE_buf_text_h__ | |
8 | #define INCLUDE_buf_text_h__ | |
9 | ||
eae0bfdc PP |
10 | #include "common.h" |
11 | ||
7bf87ab6 RB |
12 | #include "buffer.h" |
13 | ||
14 | typedef enum { | |
15 | GIT_BOM_NONE = 0, | |
16 | GIT_BOM_UTF8 = 1, | |
17 | GIT_BOM_UTF16_LE = 2, | |
18 | GIT_BOM_UTF16_BE = 3, | |
19 | GIT_BOM_UTF32_LE = 4, | |
20 | GIT_BOM_UTF32_BE = 5 | |
21 | } git_bom_t; | |
22 | ||
23 | typedef struct { | |
24 | git_bom_t bom; /* BOM found at head of text */ | |
25 | unsigned int nul, cr, lf, crlf; /* NUL, CR, LF and CRLF counts */ | |
26 | unsigned int printable, nonprintable; /* These are just approximations! */ | |
27 | } git_buf_text_stats; | |
28 | ||
29 | /** | |
30 | * Append string to buffer, prefixing each character from `esc_chars` with | |
31 | * `esc_with` string. | |
32 | * | |
33 | * @param buf Buffer to append data to | |
34 | * @param string String to escape and append | |
35 | * @param esc_chars Characters to be escaped | |
36 | * @param esc_with String to insert in from of each found character | |
37 | * @return 0 on success, <0 on failure (probably allocation problem) | |
38 | */ | |
39 | extern int git_buf_text_puts_escaped( | |
40 | git_buf *buf, | |
41 | const char *string, | |
42 | const char *esc_chars, | |
43 | const char *esc_with); | |
44 | ||
45 | /** | |
46 | * Append string escaping characters that are regex special | |
47 | */ | |
48 | GIT_INLINE(int) git_buf_text_puts_escape_regex(git_buf *buf, const char *string) | |
49 | { | |
50 | return git_buf_text_puts_escaped(buf, string, "^.[]$()|*+?{}\\", "\\"); | |
51 | } | |
52 | ||
53 | /** | |
54 | * Unescape all characters in a buffer in place | |
55 | * | |
56 | * I.e. remove backslashes | |
57 | */ | |
58 | extern void git_buf_text_unescape(git_buf *buf); | |
59 | ||
3658e81e | 60 | /** |
5a76ad35 | 61 | * Replace all \r\n with \n. |
3658e81e | 62 | * |
8293c8f9 | 63 | * @return 0 on success, -1 on memory error |
3658e81e RB |
64 | */ |
65 | extern int git_buf_text_crlf_to_lf(git_buf *tgt, const git_buf *src); | |
66 | ||
67 | /** | |
0cf77103 | 68 | * Replace all \n with \r\n. Does not modify existing \r\n. |
3658e81e | 69 | * |
0cf77103 | 70 | * @return 0 on success, -1 on memory error |
3658e81e RB |
71 | */ |
72 | extern int git_buf_text_lf_to_crlf(git_buf *tgt, const git_buf *src); | |
73 | ||
7bf87ab6 RB |
74 | /** |
75 | * Fill buffer with the common prefix of a array of strings | |
76 | * | |
77 | * Buffer will be set to empty if there is no common prefix | |
78 | */ | |
79 | extern int git_buf_text_common_prefix(git_buf *buf, const git_strarray *strs); | |
80 | ||
81 | /** | |
82 | * Check quickly if buffer looks like it contains binary data | |
83 | * | |
84 | * @param buf Buffer to check | |
85 | * @return true if buffer looks like non-text data | |
86 | */ | |
87 | extern bool git_buf_text_is_binary(const git_buf *buf); | |
88 | ||
0d65acad RB |
89 | /** |
90 | * Check quickly if buffer contains a NUL byte | |
91 | * | |
92 | * @param buf Buffer to check | |
93 | * @return true if buffer contains a NUL byte | |
94 | */ | |
95 | extern bool git_buf_text_contains_nul(const git_buf *buf); | |
96 | ||
7bf87ab6 RB |
97 | /** |
98 | * Check if a buffer begins with a UTF BOM | |
99 | * | |
100 | * @param bom Set to the type of BOM detected or GIT_BOM_NONE | |
101 | * @param buf Buffer in which to check the first bytes for a BOM | |
7bf87ab6 RB |
102 | * @return Number of bytes of BOM data (or 0 if no BOM found) |
103 | */ | |
eae0bfdc | 104 | extern int git_buf_text_detect_bom(git_bom_t *bom, const git_buf *buf); |
7bf87ab6 RB |
105 | |
106 | /** | |
107 | * Gather stats for a piece of text | |
108 | * | |
109 | * Fill the `stats` structure with counts of unreadable characters, carriage | |
110 | * returns, etc, so it can be used in heuristics. This automatically skips | |
111 | * a trailing EOF (\032 character). Also it will look for a BOM at the | |
112 | * start of the text and can be told to skip that as well. | |
113 | * | |
114 | * @param stats Structure to be filled in | |
115 | * @param buf Text to process | |
116 | * @param skip_bom Exclude leading BOM from stats if true | |
117 | * @return Does the buffer heuristically look like binary data | |
118 | */ | |
119 | extern bool git_buf_text_gather_stats( | |
120 | git_buf_text_stats *stats, const git_buf *buf, bool skip_bom); | |
121 | ||
122 | #endif |