]>
Commit | Line | Data |
---|---|---|
223e47cc LB |
1 | //===-- Twine.h - Fast Temporary String Concatenation -----------*- C++ -*-===// |
2 | // | |
3 | // The LLVM Compiler Infrastructure | |
4 | // | |
5 | // This file is distributed under the University of Illinois Open Source | |
6 | // License. See LICENSE.TXT for details. | |
7 | // | |
8 | //===----------------------------------------------------------------------===// | |
9 | ||
10 | #ifndef LLVM_ADT_TWINE_H | |
11 | #define LLVM_ADT_TWINE_H | |
12 | ||
13 | #include "llvm/ADT/StringRef.h" | |
14 | #include "llvm/Support/DataTypes.h" | |
15 | #include "llvm/Support/ErrorHandling.h" | |
16 | #include <cassert> | |
17 | #include <string> | |
18 | ||
19 | namespace llvm { | |
20 | template <typename T> | |
21 | class SmallVectorImpl; | |
22 | class StringRef; | |
23 | class raw_ostream; | |
24 | ||
25 | /// Twine - A lightweight data structure for efficiently representing the | |
26 | /// concatenation of temporary values as strings. | |
27 | /// | |
28 | /// A Twine is a kind of rope, it represents a concatenated string using a | |
29 | /// binary-tree, where the string is the preorder of the nodes. Since the | |
30 | /// Twine can be efficiently rendered into a buffer when its result is used, | |
31 | /// it avoids the cost of generating temporary values for intermediate string | |
32 | /// results -- particularly in cases when the Twine result is never | |
33 | /// required. By explicitly tracking the type of leaf nodes, we can also avoid | |
34 | /// the creation of temporary strings for conversions operations (such as | |
35 | /// appending an integer to a string). | |
36 | /// | |
37 | /// A Twine is not intended for use directly and should not be stored, its | |
38 | /// implementation relies on the ability to store pointers to temporary stack | |
39 | /// objects which may be deallocated at the end of a statement. Twines should | |
40 | /// only be used accepted as const references in arguments, when an API wishes | |
41 | /// to accept possibly-concatenated strings. | |
42 | /// | |
43 | /// Twines support a special 'null' value, which always concatenates to form | |
44 | /// itself, and renders as an empty string. This can be returned from APIs to | |
45 | /// effectively nullify any concatenations performed on the result. | |
46 | /// | |
47 | /// \b Implementation | |
48 | /// | |
49 | /// Given the nature of a Twine, it is not possible for the Twine's | |
50 | /// concatenation method to construct interior nodes; the result must be | |
51 | /// represented inside the returned value. For this reason a Twine object | |
52 | /// actually holds two values, the left- and right-hand sides of a | |
53 | /// concatenation. We also have nullary Twine objects, which are effectively | |
54 | /// sentinel values that represent empty strings. | |
55 | /// | |
56 | /// Thus, a Twine can effectively have zero, one, or two children. The \see | |
57 | /// isNullary(), \see isUnary(), and \see isBinary() predicates exist for | |
58 | /// testing the number of children. | |
59 | /// | |
60 | /// We maintain a number of invariants on Twine objects (FIXME: Why): | |
61 | /// - Nullary twines are always represented with their Kind on the left-hand | |
62 | /// side, and the Empty kind on the right-hand side. | |
63 | /// - Unary twines are always represented with the value on the left-hand | |
64 | /// side, and the Empty kind on the right-hand side. | |
65 | /// - If a Twine has another Twine as a child, that child should always be | |
66 | /// binary (otherwise it could have been folded into the parent). | |
67 | /// | |
68 | /// These invariants are check by \see isValid(). | |
69 | /// | |
70 | /// \b Efficiency Considerations | |
71 | /// | |
72 | /// The Twine is designed to yield efficient and small code for common | |
73 | /// situations. For this reason, the concat() method is inlined so that | |
74 | /// concatenations of leaf nodes can be optimized into stores directly into a | |
75 | /// single stack allocated object. | |
76 | /// | |
77 | /// In practice, not all compilers can be trusted to optimize concat() fully, | |
78 | /// so we provide two additional methods (and accompanying operator+ | |
79 | /// overloads) to guarantee that particularly important cases (cstring plus | |
80 | /// StringRef) codegen as desired. | |
81 | class Twine { | |
82 | /// NodeKind - Represent the type of an argument. | |
85aaf69f | 83 | enum NodeKind : unsigned char { |
223e47cc LB |
84 | /// An empty string; the result of concatenating anything with it is also |
85 | /// empty. | |
86 | NullKind, | |
87 | ||
88 | /// The empty string. | |
89 | EmptyKind, | |
90 | ||
91 | /// A pointer to a Twine instance. | |
92 | TwineKind, | |
93 | ||
94 | /// A pointer to a C string instance. | |
95 | CStringKind, | |
96 | ||
97 | /// A pointer to an std::string instance. | |
98 | StdStringKind, | |
99 | ||
100 | /// A pointer to a StringRef instance. | |
101 | StringRefKind, | |
102 | ||
103 | /// A char value reinterpreted as a pointer, to render as a character. | |
104 | CharKind, | |
105 | ||
106 | /// An unsigned int value reinterpreted as a pointer, to render as an | |
107 | /// unsigned decimal integer. | |
108 | DecUIKind, | |
109 | ||
110 | /// An int value reinterpreted as a pointer, to render as a signed | |
111 | /// decimal integer. | |
112 | DecIKind, | |
113 | ||
114 | /// A pointer to an unsigned long value, to render as an unsigned decimal | |
115 | /// integer. | |
116 | DecULKind, | |
117 | ||
118 | /// A pointer to a long value, to render as a signed decimal integer. | |
119 | DecLKind, | |
120 | ||
121 | /// A pointer to an unsigned long long value, to render as an unsigned | |
122 | /// decimal integer. | |
123 | DecULLKind, | |
124 | ||
125 | /// A pointer to a long long value, to render as a signed decimal integer. | |
126 | DecLLKind, | |
127 | ||
128 | /// A pointer to a uint64_t value, to render as an unsigned hexadecimal | |
129 | /// integer. | |
130 | UHexKind | |
131 | }; | |
132 | ||
133 | union Child | |
134 | { | |
135 | const Twine *twine; | |
136 | const char *cString; | |
137 | const std::string *stdString; | |
138 | const StringRef *stringRef; | |
139 | char character; | |
140 | unsigned int decUI; | |
141 | int decI; | |
142 | const unsigned long *decUL; | |
143 | const long *decL; | |
144 | const unsigned long long *decULL; | |
145 | const long long *decLL; | |
146 | const uint64_t *uHex; | |
147 | }; | |
148 | ||
149 | private: | |
150 | /// LHS - The prefix in the concatenation, which may be uninitialized for | |
151 | /// Null or Empty kinds. | |
152 | Child LHS; | |
153 | /// RHS - The suffix in the concatenation, which may be uninitialized for | |
154 | /// Null or Empty kinds. | |
155 | Child RHS; | |
223e47cc | 156 | /// LHSKind - The NodeKind of the left hand side, \see getLHSKind(). |
85aaf69f SL |
157 | NodeKind LHSKind; |
158 | /// RHSKind - The NodeKind of the right hand side, \see getRHSKind(). | |
159 | NodeKind RHSKind; | |
223e47cc LB |
160 | |
161 | private: | |
162 | /// Construct a nullary twine; the kind must be NullKind or EmptyKind. | |
163 | explicit Twine(NodeKind Kind) | |
164 | : LHSKind(Kind), RHSKind(EmptyKind) { | |
165 | assert(isNullary() && "Invalid kind!"); | |
166 | } | |
167 | ||
168 | /// Construct a binary twine. | |
169 | explicit Twine(const Twine &_LHS, const Twine &_RHS) | |
170 | : LHSKind(TwineKind), RHSKind(TwineKind) { | |
171 | LHS.twine = &_LHS; | |
172 | RHS.twine = &_RHS; | |
173 | assert(isValid() && "Invalid twine!"); | |
174 | } | |
175 | ||
176 | /// Construct a twine from explicit values. | |
177 | explicit Twine(Child _LHS, NodeKind _LHSKind, | |
178 | Child _RHS, NodeKind _RHSKind) | |
179 | : LHS(_LHS), RHS(_RHS), LHSKind(_LHSKind), RHSKind(_RHSKind) { | |
180 | assert(isValid() && "Invalid twine!"); | |
181 | } | |
182 | ||
1a4d82fc JJ |
183 | /// Since the intended use of twines is as temporary objects, assignments |
184 | /// when concatenating might cause undefined behavior or stack corruptions | |
185 | Twine &operator=(const Twine &Other) LLVM_DELETED_FUNCTION; | |
186 | ||
223e47cc LB |
187 | /// isNull - Check for the null twine. |
188 | bool isNull() const { | |
189 | return getLHSKind() == NullKind; | |
190 | } | |
191 | ||
192 | /// isEmpty - Check for the empty twine. | |
193 | bool isEmpty() const { | |
194 | return getLHSKind() == EmptyKind; | |
195 | } | |
196 | ||
197 | /// isNullary - Check if this is a nullary twine (null or empty). | |
198 | bool isNullary() const { | |
199 | return isNull() || isEmpty(); | |
200 | } | |
201 | ||
202 | /// isUnary - Check if this is a unary twine. | |
203 | bool isUnary() const { | |
204 | return getRHSKind() == EmptyKind && !isNullary(); | |
205 | } | |
206 | ||
207 | /// isBinary - Check if this is a binary twine. | |
208 | bool isBinary() const { | |
209 | return getLHSKind() != NullKind && getRHSKind() != EmptyKind; | |
210 | } | |
211 | ||
212 | /// isValid - Check if this is a valid twine (satisfying the invariants on | |
213 | /// order and number of arguments). | |
214 | bool isValid() const { | |
215 | // Nullary twines always have Empty on the RHS. | |
216 | if (isNullary() && getRHSKind() != EmptyKind) | |
217 | return false; | |
218 | ||
219 | // Null should never appear on the RHS. | |
220 | if (getRHSKind() == NullKind) | |
221 | return false; | |
222 | ||
223 | // The RHS cannot be non-empty if the LHS is empty. | |
224 | if (getRHSKind() != EmptyKind && getLHSKind() == EmptyKind) | |
225 | return false; | |
226 | ||
227 | // A twine child should always be binary. | |
228 | if (getLHSKind() == TwineKind && | |
229 | !LHS.twine->isBinary()) | |
230 | return false; | |
231 | if (getRHSKind() == TwineKind && | |
232 | !RHS.twine->isBinary()) | |
233 | return false; | |
234 | ||
235 | return true; | |
236 | } | |
237 | ||
238 | /// getLHSKind - Get the NodeKind of the left-hand side. | |
85aaf69f | 239 | NodeKind getLHSKind() const { return LHSKind; } |
223e47cc | 240 | |
1a4d82fc | 241 | /// getRHSKind - Get the NodeKind of the right-hand side. |
85aaf69f | 242 | NodeKind getRHSKind() const { return RHSKind; } |
223e47cc LB |
243 | |
244 | /// printOneChild - Print one child from a twine. | |
245 | void printOneChild(raw_ostream &OS, Child Ptr, NodeKind Kind) const; | |
246 | ||
247 | /// printOneChildRepr - Print the representation of one child from a twine. | |
248 | void printOneChildRepr(raw_ostream &OS, Child Ptr, | |
249 | NodeKind Kind) const; | |
250 | ||
251 | public: | |
252 | /// @name Constructors | |
253 | /// @{ | |
254 | ||
255 | /// Construct from an empty string. | |
256 | /*implicit*/ Twine() : LHSKind(EmptyKind), RHSKind(EmptyKind) { | |
257 | assert(isValid() && "Invalid twine!"); | |
258 | } | |
259 | ||
260 | /// Construct from a C string. | |
261 | /// | |
262 | /// We take care here to optimize "" into the empty twine -- this will be | |
263 | /// optimized out for string constants. This allows Twine arguments have | |
264 | /// default "" values, without introducing unnecessary string constants. | |
265 | /*implicit*/ Twine(const char *Str) | |
266 | : RHSKind(EmptyKind) { | |
267 | if (Str[0] != '\0') { | |
268 | LHS.cString = Str; | |
269 | LHSKind = CStringKind; | |
270 | } else | |
271 | LHSKind = EmptyKind; | |
272 | ||
273 | assert(isValid() && "Invalid twine!"); | |
274 | } | |
275 | ||
276 | /// Construct from an std::string. | |
277 | /*implicit*/ Twine(const std::string &Str) | |
278 | : LHSKind(StdStringKind), RHSKind(EmptyKind) { | |
279 | LHS.stdString = &Str; | |
280 | assert(isValid() && "Invalid twine!"); | |
281 | } | |
282 | ||
283 | /// Construct from a StringRef. | |
284 | /*implicit*/ Twine(const StringRef &Str) | |
285 | : LHSKind(StringRefKind), RHSKind(EmptyKind) { | |
286 | LHS.stringRef = &Str; | |
287 | assert(isValid() && "Invalid twine!"); | |
288 | } | |
289 | ||
290 | /// Construct from a char. | |
291 | explicit Twine(char Val) | |
292 | : LHSKind(CharKind), RHSKind(EmptyKind) { | |
293 | LHS.character = Val; | |
294 | } | |
295 | ||
296 | /// Construct from a signed char. | |
297 | explicit Twine(signed char Val) | |
298 | : LHSKind(CharKind), RHSKind(EmptyKind) { | |
299 | LHS.character = static_cast<char>(Val); | |
300 | } | |
301 | ||
302 | /// Construct from an unsigned char. | |
303 | explicit Twine(unsigned char Val) | |
304 | : LHSKind(CharKind), RHSKind(EmptyKind) { | |
305 | LHS.character = static_cast<char>(Val); | |
306 | } | |
307 | ||
308 | /// Construct a twine to print \p Val as an unsigned decimal integer. | |
309 | explicit Twine(unsigned Val) | |
310 | : LHSKind(DecUIKind), RHSKind(EmptyKind) { | |
311 | LHS.decUI = Val; | |
312 | } | |
313 | ||
314 | /// Construct a twine to print \p Val as a signed decimal integer. | |
315 | explicit Twine(int Val) | |
316 | : LHSKind(DecIKind), RHSKind(EmptyKind) { | |
317 | LHS.decI = Val; | |
318 | } | |
319 | ||
320 | /// Construct a twine to print \p Val as an unsigned decimal integer. | |
321 | explicit Twine(const unsigned long &Val) | |
322 | : LHSKind(DecULKind), RHSKind(EmptyKind) { | |
323 | LHS.decUL = &Val; | |
324 | } | |
325 | ||
326 | /// Construct a twine to print \p Val as a signed decimal integer. | |
327 | explicit Twine(const long &Val) | |
328 | : LHSKind(DecLKind), RHSKind(EmptyKind) { | |
329 | LHS.decL = &Val; | |
330 | } | |
331 | ||
332 | /// Construct a twine to print \p Val as an unsigned decimal integer. | |
333 | explicit Twine(const unsigned long long &Val) | |
334 | : LHSKind(DecULLKind), RHSKind(EmptyKind) { | |
335 | LHS.decULL = &Val; | |
336 | } | |
337 | ||
338 | /// Construct a twine to print \p Val as a signed decimal integer. | |
339 | explicit Twine(const long long &Val) | |
340 | : LHSKind(DecLLKind), RHSKind(EmptyKind) { | |
341 | LHS.decLL = &Val; | |
342 | } | |
343 | ||
344 | // FIXME: Unfortunately, to make sure this is as efficient as possible we | |
345 | // need extra binary constructors from particular types. We can't rely on | |
346 | // the compiler to be smart enough to fold operator+()/concat() down to the | |
347 | // right thing. Yet. | |
348 | ||
349 | /// Construct as the concatenation of a C string and a StringRef. | |
350 | /*implicit*/ Twine(const char *_LHS, const StringRef &_RHS) | |
351 | : LHSKind(CStringKind), RHSKind(StringRefKind) { | |
352 | LHS.cString = _LHS; | |
353 | RHS.stringRef = &_RHS; | |
354 | assert(isValid() && "Invalid twine!"); | |
355 | } | |
356 | ||
357 | /// Construct as the concatenation of a StringRef and a C string. | |
358 | /*implicit*/ Twine(const StringRef &_LHS, const char *_RHS) | |
359 | : LHSKind(StringRefKind), RHSKind(CStringKind) { | |
360 | LHS.stringRef = &_LHS; | |
361 | RHS.cString = _RHS; | |
362 | assert(isValid() && "Invalid twine!"); | |
363 | } | |
364 | ||
365 | /// Create a 'null' string, which is an empty string that always | |
366 | /// concatenates to form another empty string. | |
367 | static Twine createNull() { | |
368 | return Twine(NullKind); | |
369 | } | |
370 | ||
371 | /// @} | |
372 | /// @name Numeric Conversions | |
373 | /// @{ | |
374 | ||
375 | // Construct a twine to print \p Val as an unsigned hexadecimal integer. | |
376 | static Twine utohexstr(const uint64_t &Val) { | |
377 | Child LHS, RHS; | |
378 | LHS.uHex = &Val; | |
1a4d82fc | 379 | RHS.twine = nullptr; |
223e47cc LB |
380 | return Twine(LHS, UHexKind, RHS, EmptyKind); |
381 | } | |
382 | ||
383 | /// @} | |
384 | /// @name Predicate Operations | |
385 | /// @{ | |
386 | ||
387 | /// isTriviallyEmpty - Check if this twine is trivially empty; a false | |
388 | /// return value does not necessarily mean the twine is empty. | |
389 | bool isTriviallyEmpty() const { | |
390 | return isNullary(); | |
391 | } | |
392 | ||
393 | /// isSingleStringRef - Return true if this twine can be dynamically | |
394 | /// accessed as a single StringRef value with getSingleStringRef(). | |
395 | bool isSingleStringRef() const { | |
396 | if (getRHSKind() != EmptyKind) return false; | |
397 | ||
398 | switch (getLHSKind()) { | |
399 | case EmptyKind: | |
400 | case CStringKind: | |
401 | case StdStringKind: | |
402 | case StringRefKind: | |
403 | return true; | |
404 | default: | |
405 | return false; | |
406 | } | |
407 | } | |
408 | ||
409 | /// @} | |
410 | /// @name String Operations | |
411 | /// @{ | |
412 | ||
413 | Twine concat(const Twine &Suffix) const; | |
414 | ||
415 | /// @} | |
416 | /// @name Output & Conversion. | |
417 | /// @{ | |
418 | ||
419 | /// str - Return the twine contents as a std::string. | |
420 | std::string str() const; | |
421 | ||
422 | /// toVector - Write the concatenated string into the given SmallString or | |
423 | /// SmallVector. | |
424 | void toVector(SmallVectorImpl<char> &Out) const; | |
425 | ||
426 | /// getSingleStringRef - This returns the twine as a single StringRef. This | |
427 | /// method is only valid if isSingleStringRef() is true. | |
428 | StringRef getSingleStringRef() const { | |
429 | assert(isSingleStringRef() &&"This cannot be had as a single stringref!"); | |
430 | switch (getLHSKind()) { | |
431 | default: llvm_unreachable("Out of sync with isSingleStringRef"); | |
432 | case EmptyKind: return StringRef(); | |
433 | case CStringKind: return StringRef(LHS.cString); | |
434 | case StdStringKind: return StringRef(*LHS.stdString); | |
435 | case StringRefKind: return *LHS.stringRef; | |
436 | } | |
437 | } | |
438 | ||
439 | /// toStringRef - This returns the twine as a single StringRef if it can be | |
440 | /// represented as such. Otherwise the twine is written into the given | |
441 | /// SmallVector and a StringRef to the SmallVector's data is returned. | |
442 | StringRef toStringRef(SmallVectorImpl<char> &Out) const; | |
443 | ||
444 | /// toNullTerminatedStringRef - This returns the twine as a single null | |
445 | /// terminated StringRef if it can be represented as such. Otherwise the | |
446 | /// twine is written into the given SmallVector and a StringRef to the | |
447 | /// SmallVector's data is returned. | |
448 | /// | |
449 | /// The returned StringRef's size does not include the null terminator. | |
450 | StringRef toNullTerminatedStringRef(SmallVectorImpl<char> &Out) const; | |
451 | ||
452 | /// Write the concatenated string represented by this twine to the | |
453 | /// stream \p OS. | |
454 | void print(raw_ostream &OS) const; | |
455 | ||
456 | /// Dump the concatenated string represented by this twine to stderr. | |
457 | void dump() const; | |
458 | ||
459 | /// Write the representation of this twine to the stream \p OS. | |
460 | void printRepr(raw_ostream &OS) const; | |
461 | ||
462 | /// Dump the representation of this twine to stderr. | |
463 | void dumpRepr() const; | |
464 | ||
465 | /// @} | |
466 | }; | |
467 | ||
468 | /// @name Twine Inline Implementations | |
469 | /// @{ | |
470 | ||
471 | inline Twine Twine::concat(const Twine &Suffix) const { | |
472 | // Concatenation with null is null. | |
473 | if (isNull() || Suffix.isNull()) | |
474 | return Twine(NullKind); | |
475 | ||
476 | // Concatenation with empty yields the other side. | |
477 | if (isEmpty()) | |
478 | return Suffix; | |
479 | if (Suffix.isEmpty()) | |
480 | return *this; | |
481 | ||
482 | // Otherwise we need to create a new node, taking care to fold in unary | |
483 | // twines. | |
484 | Child NewLHS, NewRHS; | |
485 | NewLHS.twine = this; | |
486 | NewRHS.twine = &Suffix; | |
487 | NodeKind NewLHSKind = TwineKind, NewRHSKind = TwineKind; | |
488 | if (isUnary()) { | |
489 | NewLHS = LHS; | |
490 | NewLHSKind = getLHSKind(); | |
491 | } | |
492 | if (Suffix.isUnary()) { | |
493 | NewRHS = Suffix.LHS; | |
494 | NewRHSKind = Suffix.getLHSKind(); | |
495 | } | |
496 | ||
497 | return Twine(NewLHS, NewLHSKind, NewRHS, NewRHSKind); | |
498 | } | |
499 | ||
500 | inline Twine operator+(const Twine &LHS, const Twine &RHS) { | |
501 | return LHS.concat(RHS); | |
502 | } | |
503 | ||
504 | /// Additional overload to guarantee simplified codegen; this is equivalent to | |
505 | /// concat(). | |
506 | ||
507 | inline Twine operator+(const char *LHS, const StringRef &RHS) { | |
508 | return Twine(LHS, RHS); | |
509 | } | |
510 | ||
511 | /// Additional overload to guarantee simplified codegen; this is equivalent to | |
512 | /// concat(). | |
513 | ||
514 | inline Twine operator+(const StringRef &LHS, const char *RHS) { | |
515 | return Twine(LHS, RHS); | |
516 | } | |
517 | ||
518 | inline raw_ostream &operator<<(raw_ostream &OS, const Twine &RHS) { | |
519 | RHS.print(OS); | |
520 | return OS; | |
521 | } | |
522 | ||
523 | /// @} | |
524 | } | |
525 | ||
526 | #endif |