]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | /* |
2 | * Copyright (c) 1993-1994 by Xerox Corporation. All rights reserved. | |
3 | * | |
4 | * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY EXPRESSED | |
5 | * OR IMPLIED. ANY USE IS AT YOUR OWN RISK. | |
6 | * | |
7 | * Permission is hereby granted to use or copy this program | |
8 | * for any purpose, provided the above notices are retained on all copies. | |
9 | * Permission to modify the code and to distribute modified code is granted, | |
10 | * provided the above notices are retained, and a notice that the code was | |
11 | * modified is included with the above copyright notice. | |
12 | * | |
13 | * Author: Hans-J. Boehm (boehm@parc.xerox.com) | |
14 | */ | |
15 | /* Boehm, October 5, 1995 4:20 pm PDT */ | |
16 | ||
17 | /* | |
18 | * Cords are immutable character strings. A number of operations | |
19 | * on long cords are much more efficient than their strings.h counterpart. | |
20 | * In particular, concatenation takes constant time independent of the length | |
21 | * of the arguments. (Cords are represented as trees, with internal | |
22 | * nodes representing concatenation and leaves consisting of either C | |
23 | * strings or a functional description of the string.) | |
24 | * | |
25 | * The following are reasonable applications of cords. They would perform | |
26 | * unacceptably if C strings were used: | |
27 | * - A compiler that produces assembly language output by repeatedly | |
28 | * concatenating instructions onto a cord representing the output file. | |
29 | * - A text editor that converts the input file to a cord, and then | |
30 | * performs editing operations by producing a new cord representing | |
31 | * the file after echa character change (and keeping the old ones in an | |
32 | * edit history) | |
33 | * | |
34 | * For optimal performance, cords should be built by | |
35 | * concatenating short sections. | |
36 | * This interface is designed for maximum compatibility with C strings. | |
37 | * ASCII NUL characters may be embedded in cords using CORD_from_fn. | |
38 | * This is handled correctly, but CORD_to_char_star will produce a string | |
39 | * with embedded NULs when given such a cord. | |
40 | * | |
41 | * This interface is fairly big, largely for performance reasons. | |
42 | * The most basic constants and functions: | |
43 | * | |
44 | * CORD - the type of a cord; | |
45 | * CORD_EMPTY - empty cord; | |
46 | * CORD_len(cord) - length of a cord; | |
47 | * CORD_cat(cord1,cord2) - concatenation of two cords; | |
48 | * CORD_substr(cord, start, len) - substring (or subcord); | |
49 | * CORD_pos i; CORD_FOR(i, cord) { ... CORD_pos_fetch(i) ... } - | |
50 | * examine each character in a cord. CORD_pos_fetch(i) is the char. | |
51 | * CORD_fetch(int i) - Retrieve i'th character (slowly). | |
52 | * CORD_cmp(cord1, cord2) - compare two cords. | |
53 | * CORD_from_file(FILE * f) - turn a read-only file into a cord. | |
54 | * CORD_to_char_star(cord) - convert to C string. | |
55 | * (Non-NULL C constant strings are cords.) | |
56 | * CORD_printf (etc.) - cord version of printf. Use %r for cords. | |
57 | */ | |
58 | # ifndef CORD_H | |
59 | ||
60 | # define CORD_H | |
61 | # include <stddef.h> | |
62 | # include <stdio.h> | |
63 | /* Cords have type const char *. This is cheating quite a bit, and not */ | |
64 | /* 100% portable. But it means that nonempty character string */ | |
65 | /* constants may be used as cords directly, provided the string is */ | |
66 | /* never modified in place. The empty cord is represented by, and */ | |
67 | /* can be written as, 0. */ | |
68 | ||
69 | typedef const char * CORD; | |
70 | ||
71 | /* An empty cord is always represented as nil */ | |
72 | # define CORD_EMPTY 0 | |
73 | ||
74 | /* Is a nonempty cord represented as a C string? */ | |
75 | #define CORD_IS_STRING(s) (*(s) != '\0') | |
76 | ||
77 | /* Concatenate two cords. If the arguments are C strings, they may */ | |
78 | /* not be subsequently altered. */ | |
79 | CORD CORD_cat(CORD x, CORD y); | |
80 | ||
81 | /* Concatenate a cord and a C string with known length. Except for the */ | |
82 | /* empty string case, this is a special case of CORD_cat. Since the */ | |
83 | /* length is known, it can be faster. */ | |
84 | /* The string y is shared with the resulting CORD. Hence it should */ | |
85 | /* not be altered by the caller. */ | |
86 | CORD CORD_cat_char_star(CORD x, const char * y, size_t leny); | |
87 | ||
88 | /* Compute the length of a cord */ | |
89 | size_t CORD_len(CORD x); | |
90 | ||
91 | /* Cords may be represented by functions defining the ith character */ | |
92 | typedef char (* CORD_fn)(size_t i, void * client_data); | |
93 | ||
94 | /* Turn a functional description into a cord. */ | |
95 | CORD CORD_from_fn(CORD_fn fn, void * client_data, size_t len); | |
96 | ||
97 | /* Return the substring (subcord really) of x with length at most n, */ | |
98 | /* starting at position i. (The initial character has position 0.) */ | |
99 | CORD CORD_substr(CORD x, size_t i, size_t n); | |
100 | ||
101 | /* Return the argument, but rebalanced to allow more efficient */ | |
102 | /* character retrieval, substring operations, and comparisons. */ | |
103 | /* This is useful only for cords that were built using repeated */ | |
104 | /* concatenation. Guarantees log time access to the result, unless */ | |
105 | /* x was obtained through a large number of repeated substring ops */ | |
106 | /* or the embedded functional descriptions take longer to evaluate. */ | |
107 | /* May reallocate significant parts of the cord. The argument is not */ | |
108 | /* modified; only the result is balanced. */ | |
109 | CORD CORD_balance(CORD x); | |
110 | ||
111 | /* The following traverse a cord by applying a function to each */ | |
112 | /* character. This is occasionally appropriate, especially where */ | |
113 | /* speed is crucial. But, since C doesn't have nested functions, */ | |
114 | /* clients of this sort of traversal are clumsy to write. Consider */ | |
115 | /* the functions that operate on cord positions instead. */ | |
116 | ||
117 | /* Function to iteratively apply to individual characters in cord. */ | |
118 | typedef int (* CORD_iter_fn)(char c, void * client_data); | |
119 | ||
120 | /* Function to apply to substrings of a cord. Each substring is a */ | |
121 | /* a C character string, not a general cord. */ | |
122 | typedef int (* CORD_batched_iter_fn)(const char * s, void * client_data); | |
123 | # define CORD_NO_FN ((CORD_batched_iter_fn)0) | |
124 | ||
125 | /* Apply f1 to each character in the cord, in ascending order, */ | |
126 | /* starting at position i. If */ | |
127 | /* f2 is not CORD_NO_FN, then multiple calls to f1 may be replaced by */ | |
128 | /* a single call to f2. The parameter f2 is provided only to allow */ | |
129 | /* some optimization by the client. This terminates when the right */ | |
130 | /* end of this string is reached, or when f1 or f2 return != 0. In the */ | |
131 | /* latter case CORD_iter returns != 0. Otherwise it returns 0. */ | |
132 | /* The specified value of i must be < CORD_len(x). */ | |
133 | int CORD_iter5(CORD x, size_t i, CORD_iter_fn f1, | |
134 | CORD_batched_iter_fn f2, void * client_data); | |
135 | ||
136 | /* A simpler version that starts at 0, and without f2: */ | |
137 | int CORD_iter(CORD x, CORD_iter_fn f1, void * client_data); | |
138 | # define CORD_iter(x, f1, cd) CORD_iter5(x, 0, f1, CORD_NO_FN, cd) | |
139 | ||
140 | /* Similar to CORD_iter5, but end-to-beginning. No provisions for */ | |
141 | /* CORD_batched_iter_fn. */ | |
142 | int CORD_riter4(CORD x, size_t i, CORD_iter_fn f1, void * client_data); | |
143 | ||
144 | /* A simpler version that starts at the end: */ | |
145 | int CORD_riter(CORD x, CORD_iter_fn f1, void * client_data); | |
146 | ||
147 | /* Functions that operate on cord positions. The easy way to traverse */ | |
148 | /* cords. A cord position is logically a pair consisting of a cord */ | |
149 | /* and an index into that cord. But it is much faster to retrieve a */ | |
150 | /* charcter based on a position than on an index. Unfortunately, */ | |
151 | /* positions are big (order of a few 100 bytes), so allocate them with */ | |
152 | /* caution. */ | |
153 | /* Things in cord_pos.h should be treated as opaque, except as */ | |
154 | /* described below. Also note that */ | |
155 | /* CORD_pos_fetch, CORD_next and CORD_prev have both macro and function */ | |
156 | /* definitions. The former may evaluate their argument more than once. */ | |
157 | # include "private/cord_pos.h" | |
158 | ||
159 | /* | |
160 | Visible definitions from above: | |
161 | ||
162 | typedef <OPAQUE but fairly big> CORD_pos[1]; | |
163 | ||
164 | * Extract the cord from a position: | |
165 | CORD CORD_pos_to_cord(CORD_pos p); | |
166 | ||
167 | * Extract the current index from a position: | |
168 | size_t CORD_pos_to_index(CORD_pos p); | |
169 | ||
170 | * Fetch the character located at the given position: | |
171 | char CORD_pos_fetch(CORD_pos p); | |
172 | ||
173 | * Initialize the position to refer to the given cord and index. | |
174 | * Note that this is the most expensive function on positions: | |
175 | void CORD_set_pos(CORD_pos p, CORD x, size_t i); | |
176 | ||
177 | * Advance the position to the next character. | |
178 | * P must be initialized and valid. | |
179 | * Invalidates p if past end: | |
180 | void CORD_next(CORD_pos p); | |
181 | ||
182 | * Move the position to the preceding character. | |
183 | * P must be initialized and valid. | |
184 | * Invalidates p if past beginning: | |
185 | void CORD_prev(CORD_pos p); | |
186 | ||
187 | * Is the position valid, i.e. inside the cord? | |
188 | int CORD_pos_valid(CORD_pos p); | |
189 | */ | |
190 | # define CORD_FOR(pos, cord) \ | |
191 | for (CORD_set_pos(pos, cord, 0); CORD_pos_valid(pos); CORD_next(pos)) | |
192 | ||
193 | ||
194 | /* An out of memory handler to call. May be supplied by client. */ | |
195 | /* Must not return. */ | |
196 | extern void (* CORD_oom_fn)(void); | |
197 | ||
198 | /* Dump the representation of x to stdout in an implementation defined */ | |
199 | /* manner. Intended for debugging only. */ | |
200 | void CORD_dump(CORD x); | |
201 | ||
202 | /* The following could easily be implemented by the client. They are */ | |
203 | /* provided in cordxtra.c for convenience. */ | |
204 | ||
205 | /* Concatenate a character to the end of a cord. */ | |
206 | CORD CORD_cat_char(CORD x, char c); | |
207 | ||
208 | /* Concatenate n cords. */ | |
209 | CORD CORD_catn(int n, /* CORD */ ...); | |
210 | ||
211 | /* Return the character in CORD_substr(x, i, 1) */ | |
212 | char CORD_fetch(CORD x, size_t i); | |
213 | ||
214 | /* Return < 0, 0, or > 0, depending on whether x < y, x = y, x > y */ | |
215 | int CORD_cmp(CORD x, CORD y); | |
216 | ||
217 | /* A generalization that takes both starting positions for the */ | |
218 | /* comparison, and a limit on the number of characters to be compared. */ | |
219 | int CORD_ncmp(CORD x, size_t x_start, CORD y, size_t y_start, size_t len); | |
220 | ||
221 | /* Find the first occurrence of s in x at position start or later. */ | |
222 | /* Return the position of the first character of s in x, or */ | |
223 | /* CORD_NOT_FOUND if there is none. */ | |
224 | size_t CORD_str(CORD x, size_t start, CORD s); | |
225 | ||
226 | /* Return a cord consisting of i copies of (possibly NUL) c. Dangerous */ | |
227 | /* in conjunction with CORD_to_char_star. */ | |
228 | /* The resulting representation takes constant space, independent of i. */ | |
229 | CORD CORD_chars(char c, size_t i); | |
230 | # define CORD_nul(i) CORD_chars('\0', (i)) | |
231 | ||
232 | /* Turn a file into cord. The file must be seekable. Its contents */ | |
233 | /* must remain constant. The file may be accessed as an immediate */ | |
234 | /* result of this call and/or as a result of subsequent accesses to */ | |
235 | /* the cord. Short files are likely to be immediately read, but */ | |
236 | /* long files are likely to be read on demand, possibly relying on */ | |
237 | /* stdio for buffering. */ | |
238 | /* We must have exclusive access to the descriptor f, i.e. we may */ | |
239 | /* read it at any time, and expect the file pointer to be */ | |
240 | /* where we left it. Normally this should be invoked as */ | |
241 | /* CORD_from_file(fopen(...)) */ | |
242 | /* CORD_from_file arranges to close the file descriptor when it is no */ | |
243 | /* longer needed (e.g. when the result becomes inaccessible). */ | |
244 | /* The file f must be such that ftell reflects the actual character */ | |
245 | /* position in the file, i.e. the number of characters that can be */ | |
246 | /* or were read with fread. On UNIX systems this is always true. On */ | |
247 | /* MS Windows systems, f must be opened in binary mode. */ | |
248 | CORD CORD_from_file(FILE * f); | |
249 | ||
250 | /* Equivalent to the above, except that the entire file will be read */ | |
251 | /* and the file pointer will be closed immediately. */ | |
252 | /* The binary mode restriction from above does not apply. */ | |
253 | CORD CORD_from_file_eager(FILE * f); | |
254 | ||
255 | /* Equivalent to the above, except that the file will be read on demand.*/ | |
256 | /* The binary mode restriction applies. */ | |
257 | CORD CORD_from_file_lazy(FILE * f); | |
258 | ||
259 | /* Turn a cord into a C string. The result shares no structure with */ | |
260 | /* x, and is thus modifiable. */ | |
261 | char * CORD_to_char_star(CORD x); | |
262 | ||
263 | /* Turn a C string into a CORD. The C string is copied, and so may */ | |
264 | /* subsequently be modified. */ | |
265 | CORD CORD_from_char_star(const char *s); | |
266 | ||
267 | /* Identical to the above, but the result may share structure with */ | |
268 | /* the argument and is thus not modifiable. */ | |
269 | const char * CORD_to_const_char_star(CORD x); | |
270 | ||
271 | /* Write a cord to a file, starting at the current position. No */ | |
272 | /* trailing NULs are newlines are added. */ | |
273 | /* Returns EOF if a write error occurs, 1 otherwise. */ | |
274 | int CORD_put(CORD x, FILE * f); | |
275 | ||
276 | /* "Not found" result for the following two functions. */ | |
277 | # define CORD_NOT_FOUND ((size_t)(-1)) | |
278 | ||
279 | /* A vague analog of strchr. Returns the position (an integer, not */ | |
280 | /* a pointer) of the first occurrence of (char) c inside x at position */ | |
281 | /* i or later. The value i must be < CORD_len(x). */ | |
282 | size_t CORD_chr(CORD x, size_t i, int c); | |
283 | ||
284 | /* A vague analog of strrchr. Returns index of the last occurrence */ | |
285 | /* of (char) c inside x at position i or earlier. The value i */ | |
286 | /* must be < CORD_len(x). */ | |
287 | size_t CORD_rchr(CORD x, size_t i, int c); | |
288 | ||
289 | ||
290 | /* The following are also not primitive, but are implemented in */ | |
291 | /* cordprnt.c. They provide functionality similar to the ANSI C */ | |
292 | /* functions with corresponding names, but with the following */ | |
293 | /* additions and changes: */ | |
294 | /* 1. A %r conversion specification specifies a CORD argument. Field */ | |
295 | /* width, precision, etc. have the same semantics as for %s. */ | |
296 | /* (Note that %c,%C, and %S were already taken.) */ | |
297 | /* 2. The format string is represented as a CORD. */ | |
298 | /* 3. CORD_sprintf and CORD_vsprintf assign the result through the 1st */ /* argument. Unlike their ANSI C versions, there is no need to guess */ | |
299 | /* the correct buffer size. */ | |
300 | /* 4. Most of the conversions are implement through the native */ | |
301 | /* vsprintf. Hence they are usually no faster, and */ | |
302 | /* idiosyncracies of the native printf are preserved. However, */ | |
303 | /* CORD arguments to CORD_sprintf and CORD_vsprintf are NOT copied; */ | |
304 | /* the result shares the original structure. This may make them */ | |
305 | /* very efficient in some unusual applications. */ | |
306 | /* The format string is copied. */ | |
307 | /* All functions return the number of characters generated or -1 on */ | |
308 | /* error. This complies with the ANSI standard, but is inconsistent */ | |
309 | /* with some older implementations of sprintf. */ | |
310 | ||
311 | /* The implementation of these is probably less portable than the rest */ | |
312 | /* of this package. */ | |
313 | ||
314 | #ifndef CORD_NO_IO | |
315 | ||
316 | #include <stdarg.h> | |
317 | ||
318 | int CORD_sprintf(CORD * out, CORD format, ...); | |
319 | int CORD_vsprintf(CORD * out, CORD format, va_list args); | |
320 | int CORD_fprintf(FILE * f, CORD format, ...); | |
321 | int CORD_vfprintf(FILE * f, CORD format, va_list args); | |
322 | int CORD_printf(CORD format, ...); | |
323 | int CORD_vprintf(CORD format, va_list args); | |
324 | ||
325 | #endif /* CORD_NO_IO */ | |
326 | ||
327 | # endif /* CORD_H */ |