]>
Commit | Line | Data |
---|---|---|
1e59de90 TL |
1 | /* |
2 | * Heap string representation. | |
3 | * | |
4 | * Strings are byte sequences ordinarily stored in extended UTF-8 format, | |
5 | * allowing values larger than the official UTF-8 range (used internally) | |
6 | * and also allowing UTF-8 encoding of surrogate pairs (CESU-8 format). | |
7 | * Strings may also be invalid UTF-8 altogether which is the case e.g. with | |
8 | * strings used as internal property names and raw buffers converted to | |
9 | * strings. In such cases the 'clen' field contains an inaccurate value. | |
10 | * | |
11 | * Ecmascript requires support for 32-bit long strings. However, since each | |
12 | * 16-bit codepoint can take 3 bytes in CESU-8, this representation can only | |
13 | * support about 1.4G codepoint long strings in extreme cases. This is not | |
14 | * really a practical issue. | |
15 | */ | |
16 | ||
17 | #ifndef DUK_HSTRING_H_INCLUDED | |
18 | #define DUK_HSTRING_H_INCLUDED | |
19 | ||
20 | /* Impose a maximum string length for now. Restricted artificially to | |
21 | * ensure adding a heap header length won't overflow size_t. The limit | |
22 | * should be synchronized with DUK_HBUFFER_MAX_BYTELEN. | |
23 | * | |
24 | * E5.1 makes provisions to support strings longer than 4G characters. | |
25 | * This limit should be eliminated on 64-bit platforms (and increased | |
26 | * closer to maximum support on 32-bit platforms). | |
27 | */ | |
28 | ||
29 | #if defined(DUK_USE_STRLEN16) | |
30 | #define DUK_HSTRING_MAX_BYTELEN (0x0000ffffUL) | |
31 | #else | |
32 | #define DUK_HSTRING_MAX_BYTELEN (0x7fffffffUL) | |
33 | #endif | |
34 | ||
35 | /* XXX: could add flags for "is valid CESU-8" (Ecmascript compatible strings), | |
36 | * "is valid UTF-8", "is valid extended UTF-8" (internal strings are not, | |
37 | * regexp bytecode is), and "contains non-BMP characters". These are not | |
38 | * needed right now. | |
39 | */ | |
40 | ||
41 | #define DUK_HSTRING_FLAG_ASCII DUK_HEAPHDR_USER_FLAG(0) /* string is ASCII, clen == blen */ | |
42 | #define DUK_HSTRING_FLAG_ARRIDX DUK_HEAPHDR_USER_FLAG(1) /* string is a valid array index */ | |
43 | #define DUK_HSTRING_FLAG_INTERNAL DUK_HEAPHDR_USER_FLAG(2) /* string is internal */ | |
44 | #define DUK_HSTRING_FLAG_RESERVED_WORD DUK_HEAPHDR_USER_FLAG(3) /* string is a reserved word (non-strict) */ | |
45 | #define DUK_HSTRING_FLAG_STRICT_RESERVED_WORD DUK_HEAPHDR_USER_FLAG(4) /* string is a reserved word (strict) */ | |
46 | #define DUK_HSTRING_FLAG_EVAL_OR_ARGUMENTS DUK_HEAPHDR_USER_FLAG(5) /* string is 'eval' or 'arguments' */ | |
47 | #define DUK_HSTRING_FLAG_EXTDATA DUK_HEAPHDR_USER_FLAG(6) /* string data is external (duk_hstring_external) */ | |
48 | ||
49 | #define DUK_HSTRING_HAS_ASCII(x) DUK_HEAPHDR_CHECK_FLAG_BITS(&(x)->hdr, DUK_HSTRING_FLAG_ASCII) | |
50 | #define DUK_HSTRING_HAS_ARRIDX(x) DUK_HEAPHDR_CHECK_FLAG_BITS(&(x)->hdr, DUK_HSTRING_FLAG_ARRIDX) | |
51 | #define DUK_HSTRING_HAS_INTERNAL(x) DUK_HEAPHDR_CHECK_FLAG_BITS(&(x)->hdr, DUK_HSTRING_FLAG_INTERNAL) | |
52 | #define DUK_HSTRING_HAS_RESERVED_WORD(x) DUK_HEAPHDR_CHECK_FLAG_BITS(&(x)->hdr, DUK_HSTRING_FLAG_RESERVED_WORD) | |
53 | #define DUK_HSTRING_HAS_STRICT_RESERVED_WORD(x) DUK_HEAPHDR_CHECK_FLAG_BITS(&(x)->hdr, DUK_HSTRING_FLAG_STRICT_RESERVED_WORD) | |
54 | #define DUK_HSTRING_HAS_EVAL_OR_ARGUMENTS(x) DUK_HEAPHDR_CHECK_FLAG_BITS(&(x)->hdr, DUK_HSTRING_FLAG_EVAL_OR_ARGUMENTS) | |
55 | #define DUK_HSTRING_HAS_EXTDATA(x) DUK_HEAPHDR_CHECK_FLAG_BITS(&(x)->hdr, DUK_HSTRING_FLAG_EXTDATA) | |
56 | ||
57 | #define DUK_HSTRING_SET_ASCII(x) DUK_HEAPHDR_SET_FLAG_BITS(&(x)->hdr, DUK_HSTRING_FLAG_ASCII) | |
58 | #define DUK_HSTRING_SET_ARRIDX(x) DUK_HEAPHDR_SET_FLAG_BITS(&(x)->hdr, DUK_HSTRING_FLAG_ARRIDX) | |
59 | #define DUK_HSTRING_SET_INTERNAL(x) DUK_HEAPHDR_SET_FLAG_BITS(&(x)->hdr, DUK_HSTRING_FLAG_INTERNAL) | |
60 | #define DUK_HSTRING_SET_RESERVED_WORD(x) DUK_HEAPHDR_SET_FLAG_BITS(&(x)->hdr, DUK_HSTRING_FLAG_RESERVED_WORD) | |
61 | #define DUK_HSTRING_SET_STRICT_RESERVED_WORD(x) DUK_HEAPHDR_SET_FLAG_BITS(&(x)->hdr, DUK_HSTRING_FLAG_STRICT_RESERVED_WORD) | |
62 | #define DUK_HSTRING_SET_EVAL_OR_ARGUMENTS(x) DUK_HEAPHDR_SET_FLAG_BITS(&(x)->hdr, DUK_HSTRING_FLAG_EVAL_OR_ARGUMENTS) | |
63 | #define DUK_HSTRING_SET_EXTDATA(x) DUK_HEAPHDR_SET_FLAG_BITS(&(x)->hdr, DUK_HSTRING_FLAG_EXTDATA) | |
64 | ||
65 | #define DUK_HSTRING_CLEAR_ASCII(x) DUK_HEAPHDR_CLEAR_FLAG_BITS(&(x)->hdr, DUK_HSTRING_FLAG_ASCII) | |
66 | #define DUK_HSTRING_CLEAR_ARRIDX(x) DUK_HEAPHDR_CLEAR_FLAG_BITS(&(x)->hdr, DUK_HSTRING_FLAG_ARRIDX) | |
67 | #define DUK_HSTRING_CLEAR_INTERNAL(x) DUK_HEAPHDR_CLEAR_FLAG_BITS(&(x)->hdr, DUK_HSTRING_FLAG_INTERNAL) | |
68 | #define DUK_HSTRING_CLEAR_RESERVED_WORD(x) DUK_HEAPHDR_CLEAR_FLAG_BITS(&(x)->hdr, DUK_HSTRING_FLAG_RESERVED_WORD) | |
69 | #define DUK_HSTRING_CLEAR_STRICT_RESERVED_WORD(x) DUK_HEAPHDR_CLEAR_FLAG_BITS(&(x)->hdr, DUK_HSTRING_FLAG_STRICT_RESERVED_WORD) | |
70 | #define DUK_HSTRING_CLEAR_EVAL_OR_ARGUMENTS(x) DUK_HEAPHDR_CLEAR_FLAG_BITS(&(x)->hdr, DUK_HSTRING_FLAG_EVAL_OR_ARGUMENTS) | |
71 | #define DUK_HSTRING_CLEAR_EXTDATA(x) DUK_HEAPHDR_CLEAR_FLAG_BITS(&(x)->hdr, DUK_HSTRING_FLAG_EXTDATA) | |
72 | ||
73 | #if 0 /* Slightly smaller code without explicit flag, but explicit flag | |
74 | * is very useful when 'clen' is dropped. | |
75 | */ | |
76 | #define DUK_HSTRING_IS_ASCII(x) (DUK_HSTRING_GET_BYTELEN((x)) == DUK_HSTRING_GET_CHARLEN((x))) | |
77 | #endif | |
78 | #define DUK_HSTRING_IS_ASCII(x) DUK_HSTRING_HAS_ASCII((x)) | |
79 | #define DUK_HSTRING_IS_EMPTY(x) (DUK_HSTRING_GET_BYTELEN((x)) == 0) | |
80 | ||
81 | #if defined(DUK_USE_STRHASH16) | |
82 | #define DUK_HSTRING_GET_HASH(x) ((x)->hdr.h_flags >> 16) | |
83 | #define DUK_HSTRING_SET_HASH(x,v) do { \ | |
84 | (x)->hdr.h_flags = ((x)->hdr.h_flags & 0x0000ffffUL) | ((v) << 16); \ | |
85 | } while (0) | |
86 | #else | |
87 | #define DUK_HSTRING_GET_HASH(x) ((x)->hash) | |
88 | #define DUK_HSTRING_SET_HASH(x,v) do { \ | |
89 | (x)->hash = (v); \ | |
90 | } while (0) | |
91 | #endif | |
92 | ||
93 | #if defined(DUK_USE_STRLEN16) | |
94 | #define DUK_HSTRING_GET_BYTELEN(x) ((x)->hdr.h_strextra16) | |
95 | #define DUK_HSTRING_SET_BYTELEN(x,v) do { \ | |
96 | (x)->hdr.h_strextra16 = (v); \ | |
97 | } while (0) | |
98 | #if defined(DUK_USE_HSTRING_CLEN) | |
99 | #define DUK_HSTRING_GET_CHARLEN(x) ((x)->clen16) | |
100 | #define DUK_HSTRING_SET_CHARLEN(x,v) do { \ | |
101 | (x)->clen16 = (v); \ | |
102 | } while (0) | |
103 | #else | |
104 | #define DUK_HSTRING_GET_CHARLEN(x) duk_hstring_get_charlen((x)) | |
105 | #define DUK_HSTRING_SET_CHARLEN(x,v) do { \ | |
106 | DUK_ASSERT(0); /* should never be called */ \ | |
107 | } while (0) | |
108 | #endif | |
109 | #else | |
110 | #define DUK_HSTRING_GET_BYTELEN(x) ((x)->blen) | |
111 | #define DUK_HSTRING_SET_BYTELEN(x,v) do { \ | |
112 | (x)->blen = (v); \ | |
113 | } while (0) | |
114 | #define DUK_HSTRING_GET_CHARLEN(x) ((x)->clen) | |
115 | #define DUK_HSTRING_SET_CHARLEN(x,v) do { \ | |
116 | (x)->clen = (v); \ | |
117 | } while (0) | |
118 | #endif | |
119 | ||
120 | #if defined(DUK_USE_HSTRING_EXTDATA) | |
121 | #define DUK_HSTRING_GET_EXTDATA(x) \ | |
122 | ((x)->extdata) | |
123 | #define DUK_HSTRING_GET_DATA(x) \ | |
124 | (DUK_HSTRING_HAS_EXTDATA((x)) ? \ | |
125 | DUK_HSTRING_GET_EXTDATA((const duk_hstring_external *) (x)) : ((const duk_uint8_t *) ((x) + 1))) | |
126 | #else | |
127 | #define DUK_HSTRING_GET_DATA(x) \ | |
128 | ((const duk_uint8_t *) ((x) + 1)) | |
129 | #endif | |
130 | ||
131 | #define DUK_HSTRING_GET_DATA_END(x) \ | |
132 | (DUK_HSTRING_GET_DATA((x)) + (x)->blen) | |
133 | ||
134 | /* marker value; in E5 2^32-1 is not a valid array index (2^32-2 is highest valid) */ | |
135 | #define DUK_HSTRING_NO_ARRAY_INDEX (0xffffffffUL) | |
136 | ||
137 | /* get array index related to string (or return DUK_HSTRING_NO_ARRAY_INDEX); | |
138 | * avoids helper call if string has no array index value. | |
139 | */ | |
140 | #define DUK_HSTRING_GET_ARRIDX_FAST(h) \ | |
141 | (DUK_HSTRING_HAS_ARRIDX((h)) ? duk_js_to_arrayindex_string_helper((h)) : DUK_HSTRING_NO_ARRAY_INDEX) | |
142 | ||
143 | /* slower but more compact variant */ | |
144 | #define DUK_HSTRING_GET_ARRIDX_SLOW(h) \ | |
145 | (duk_js_to_arrayindex_string_helper((h))) | |
146 | ||
147 | /* | |
148 | * Misc | |
149 | */ | |
150 | ||
151 | struct duk_hstring { | |
152 | /* Smaller heaphdr than for other objects, because strings are held | |
153 | * in string intern table which requires no link pointers. Much of | |
154 | * the 32-bit flags field is unused by flags, so we can stuff a 16-bit | |
155 | * field in there. | |
156 | */ | |
157 | duk_heaphdr_string hdr; | |
158 | ||
159 | /* Note: we could try to stuff a partial hash (e.g. 16 bits) into the | |
160 | * shared heap header. Good hashing needs more hash bits though. | |
161 | */ | |
162 | ||
163 | /* string hash */ | |
164 | #if defined(DUK_USE_STRHASH16) | |
165 | /* If 16-bit hash is in use, stuff it into duk_heaphdr_string flags. */ | |
166 | #else | |
167 | duk_uint32_t hash; | |
168 | #endif | |
169 | ||
170 | /* length in bytes (not counting NUL term) */ | |
171 | #if defined(DUK_USE_STRLEN16) | |
172 | /* placed in duk_heaphdr_string */ | |
173 | #else | |
174 | duk_uint32_t blen; | |
175 | #endif | |
176 | ||
177 | /* length in codepoints (must be E5 compatible) */ | |
178 | #if defined(DUK_USE_STRLEN16) | |
179 | #if defined(DUK_USE_HSTRING_CLEN) | |
180 | duk_uint16_t clen16; | |
181 | #else | |
182 | /* computed live */ | |
183 | #endif | |
184 | #else | |
185 | duk_uint32_t clen; | |
186 | #endif | |
187 | ||
188 | /* | |
189 | * String value of 'blen+1' bytes follows (+1 for NUL termination | |
190 | * convenience for C API). No alignment needs to be guaranteed | |
191 | * for strings, but fields above should guarantee alignment-by-4 | |
192 | * (but not alignment-by-8). | |
193 | */ | |
194 | }; | |
195 | ||
196 | /* The external string struct is defined even when the feature is inactive. */ | |
197 | struct duk_hstring_external { | |
198 | duk_hstring str; | |
199 | ||
200 | /* | |
201 | * For an external string, the NUL-terminated string data is stored | |
202 | * externally. The user must guarantee that data behind this pointer | |
203 | * doesn't change while it's used. | |
204 | */ | |
205 | ||
206 | const duk_uint8_t *extdata; | |
207 | }; | |
208 | ||
209 | /* | |
210 | * Prototypes | |
211 | */ | |
212 | ||
213 | DUK_INTERNAL_DECL duk_ucodepoint_t duk_hstring_char_code_at_raw(duk_hthread *thr, duk_hstring *h, duk_uint_t pos); | |
214 | ||
215 | #if !defined(DUK_USE_HSTRING_CLEN) | |
216 | DUK_INTERNAL_DECL duk_size_t duk_hstring_get_charlen(duk_hstring *h); | |
217 | #endif | |
218 | ||
219 | #endif /* DUK_HSTRING_H_INCLUDED */ |