]>
Commit | Line | Data |
---|---|---|
20effc67 TL |
1 | // |
2 | // Copyright (c) 2019 Peter Dimov (pdimov at gmail dot com), | |
3 | // Vinnie Falco (vinnie.falco@gmail.com) | |
4 | // Copyright (c) 2020 Krystian Stasiowski (sdkrystian@gmail.com) | |
5 | // | |
6 | // Distributed under the Boost Software License, Version 1.0. (See accompanying | |
7 | // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) | |
8 | // | |
9 | // Official repository: https://github.com/boostorg/json | |
10 | // | |
11 | ||
12 | #ifndef BOOST_JSON_DETAIL_SSE2_HPP | |
13 | #define BOOST_JSON_DETAIL_SSE2_HPP | |
14 | ||
15 | #include <boost/json/detail/config.hpp> | |
16 | #include <boost/json/detail/utf8.hpp> | |
17 | #include <cstddef> | |
18 | #include <cstring> | |
19 | #ifdef BOOST_JSON_USE_SSE2 | |
20 | # include <emmintrin.h> | |
21 | # include <xmmintrin.h> | |
22 | # ifdef _MSC_VER | |
23 | # include <intrin.h> | |
24 | # endif | |
25 | #endif | |
26 | ||
27 | BOOST_JSON_NS_BEGIN | |
28 | namespace detail { | |
29 | ||
30 | #ifdef BOOST_JSON_USE_SSE2 | |
31 | ||
32 | template<bool AllowBadUTF8> | |
33 | inline | |
34 | const char* | |
35 | count_valid( | |
36 | char const* p, | |
37 | const char* end) noexcept | |
38 | { | |
39 | __m128i const q1 = _mm_set1_epi8( '\x22' ); // '"' | |
40 | __m128i const q2 = _mm_set1_epi8( '\\' ); // '\\' | |
41 | __m128i const q3 = _mm_set1_epi8( 0x1F ); | |
42 | ||
43 | while(end - p >= 16) | |
44 | { | |
45 | __m128i v1 = _mm_loadu_si128( (__m128i const*)p ); | |
46 | __m128i v2 = _mm_cmpeq_epi8( v1, q1 ); // quote | |
47 | __m128i v3 = _mm_cmpeq_epi8( v1, q2 ); // backslash | |
48 | __m128i v4 = _mm_or_si128( v2, v3 ); // combine quotes and backslash | |
49 | __m128i v5 = _mm_min_epu8( v1, q3 ); | |
50 | __m128i v6 = _mm_cmpeq_epi8( v5, v1 ); // controls | |
51 | __m128i v7 = _mm_or_si128( v4, v6 ); // combine with control | |
52 | ||
53 | int w = _mm_movemask_epi8( v7 ); | |
54 | ||
55 | if( w != 0 ) | |
56 | { | |
57 | int m; | |
58 | #if defined(__GNUC__) || defined(__clang__) | |
59 | m = __builtin_ffs( w ) - 1; | |
60 | #else | |
61 | unsigned long index; | |
62 | _BitScanForward( &index, w ); | |
63 | m = index; | |
64 | #endif | |
65 | return p + m; | |
66 | } | |
67 | ||
68 | p += 16; | |
69 | } | |
70 | ||
71 | while(p != end) | |
72 | { | |
73 | const unsigned char c = *p; | |
74 | if(c == '\x22' || c == '\\' || c < 0x20) | |
75 | break; | |
76 | ++p; | |
77 | } | |
78 | ||
79 | return p; | |
80 | } | |
81 | ||
82 | template<> | |
83 | inline | |
84 | const char* | |
85 | count_valid<false>( | |
86 | char const* p, | |
87 | const char* end) noexcept | |
88 | { | |
89 | __m128i const q1 = _mm_set1_epi8( '\x22' ); // '"' | |
90 | __m128i const q2 = _mm_set1_epi8( '\\' ); | |
91 | __m128i const q3 = _mm_set1_epi8( 0x20 ); | |
92 | ||
93 | while(end - p >= 16) | |
94 | { | |
95 | __m128i v1 = _mm_loadu_si128( (__m128i const*)p ); | |
96 | ||
97 | __m128i v2 = _mm_cmpeq_epi8( v1, q1 ); | |
98 | __m128i v3 = _mm_cmpeq_epi8( v1, q2 ); | |
99 | __m128i v4 = _mm_cmplt_epi8( v1, q3 ); | |
100 | ||
101 | __m128i v5 = _mm_or_si128( v2, v3 ); | |
102 | __m128i v6 = _mm_or_si128( v5, v4 ); | |
103 | ||
104 | int w = _mm_movemask_epi8( v6 ); | |
105 | ||
106 | if( w != 0 ) | |
107 | { | |
108 | int m; | |
109 | #if defined(__GNUC__) || defined(__clang__) | |
110 | m = __builtin_ffs( w ) - 1; | |
111 | #else | |
112 | unsigned long index; | |
113 | _BitScanForward( &index, w ); | |
114 | m = index; | |
115 | #endif | |
116 | p += m; | |
117 | break; | |
118 | } | |
119 | ||
120 | p += 16; | |
121 | } | |
122 | ||
123 | while(p != end) | |
124 | { | |
125 | const unsigned char c = *p; | |
126 | if(c == '\x22' || c == '\\' || c < 0x20) | |
127 | break; | |
128 | if(c < 0x80) | |
129 | { | |
130 | ++p; | |
131 | continue; | |
132 | } | |
133 | // validate utf-8 | |
134 | uint16_t first = classify_utf8(c & 0x7F); | |
135 | uint8_t len = first & 0xFF; | |
136 | if(BOOST_JSON_UNLIKELY(end - p < len)) | |
137 | break; | |
138 | if(BOOST_JSON_UNLIKELY(! is_valid_utf8(p, first))) | |
139 | break; | |
140 | p += len; | |
141 | } | |
142 | ||
143 | return p; | |
144 | } | |
145 | ||
146 | #else | |
147 | ||
148 | template<bool AllowBadUTF8> | |
149 | char const* | |
150 | count_valid( | |
151 | char const* p, | |
152 | char const* end) noexcept | |
153 | { | |
154 | while(p != end) | |
155 | { | |
156 | const unsigned char c = *p; | |
157 | if(c == '\x22' || c == '\\' || c < 0x20) | |
158 | break; | |
159 | ++p; | |
160 | } | |
161 | ||
162 | return p; | |
163 | } | |
164 | ||
165 | template<> | |
166 | inline | |
167 | char const* | |
168 | count_valid<false>( | |
169 | char const* p, | |
170 | char const* end) noexcept | |
171 | { | |
172 | while(p != end) | |
173 | { | |
174 | const unsigned char c = *p; | |
175 | if(c == '\x22' || c == '\\' || c < 0x20) | |
176 | break; | |
177 | if(c < 0x80) | |
178 | { | |
179 | ++p; | |
180 | continue; | |
181 | } | |
182 | // validate utf-8 | |
183 | uint16_t first = classify_utf8(c & 0x7F); | |
184 | uint8_t len = first & 0xFF; | |
185 | if(BOOST_JSON_UNLIKELY(end - p < len)) | |
186 | break; | |
187 | if(BOOST_JSON_UNLIKELY(! is_valid_utf8(p, first))) | |
188 | break; | |
189 | p += len; | |
190 | } | |
191 | ||
192 | return p; | |
193 | } | |
194 | ||
195 | #endif | |
196 | ||
197 | // KRYSTIAN NOTE: does not stop to validate | |
198 | // count_unescaped | |
199 | ||
200 | #ifdef BOOST_JSON_USE_SSE2 | |
201 | ||
202 | inline | |
203 | size_t | |
204 | count_unescaped( | |
205 | char const* s, | |
206 | size_t n) noexcept | |
207 | { | |
208 | ||
209 | __m128i const q1 = _mm_set1_epi8( '\x22' ); // '"' | |
210 | __m128i const q2 = _mm_set1_epi8( '\\' ); // '\\' | |
211 | __m128i const q3 = _mm_set1_epi8( 0x1F ); | |
212 | ||
213 | char const * s0 = s; | |
214 | ||
215 | while( n >= 16 ) | |
216 | { | |
217 | __m128i v1 = _mm_loadu_si128( (__m128i const*)s ); | |
218 | __m128i v2 = _mm_cmpeq_epi8( v1, q1 ); // quote | |
219 | __m128i v3 = _mm_cmpeq_epi8( v1, q2 ); // backslash | |
220 | __m128i v4 = _mm_or_si128( v2, v3 ); // combine quotes and backslash | |
221 | __m128i v5 = _mm_min_epu8( v1, q3 ); | |
222 | __m128i v6 = _mm_cmpeq_epi8( v5, v1 ); // controls | |
223 | __m128i v7 = _mm_or_si128( v4, v6 ); // combine with control | |
224 | ||
225 | int w = _mm_movemask_epi8( v7 ); | |
226 | ||
227 | if( w != 0 ) | |
228 | { | |
229 | int m; | |
230 | #if defined(__GNUC__) || defined(__clang__) | |
231 | m = __builtin_ffs( w ) - 1; | |
232 | #else | |
233 | unsigned long index; | |
234 | _BitScanForward( &index, w ); | |
235 | m = index; | |
236 | #endif | |
237 | ||
238 | s += m; | |
239 | break; | |
240 | } | |
241 | ||
242 | s += 16; | |
243 | n -= 16; | |
244 | } | |
245 | ||
246 | return s - s0; | |
247 | } | |
248 | ||
249 | #else | |
250 | ||
251 | inline | |
252 | std::size_t | |
253 | count_unescaped( | |
254 | char const*, | |
255 | std::size_t) noexcept | |
256 | { | |
257 | return 0; | |
258 | } | |
259 | ||
260 | #endif | |
261 | ||
262 | // count_digits | |
263 | ||
264 | #ifdef BOOST_JSON_USE_SSE2 | |
265 | ||
266 | // assumes p..p+15 are valid | |
267 | inline int count_digits( char const* p ) noexcept | |
268 | { | |
269 | __m128i v1 = _mm_loadu_si128( (__m128i const*)p ); | |
270 | v1 = _mm_add_epi8(v1, _mm_set1_epi8(70)); | |
271 | v1 = _mm_cmplt_epi8(v1, _mm_set1_epi8(118)); | |
272 | ||
273 | int m = _mm_movemask_epi8(v1); | |
274 | ||
275 | int n; | |
276 | ||
277 | if( m == 0 ) | |
278 | { | |
279 | n = 16; | |
280 | } | |
281 | else | |
282 | { | |
283 | #if defined(__GNUC__) || defined(__clang__) | |
284 | n = __builtin_ffs( m ) - 1; | |
285 | #else | |
286 | unsigned long index; | |
287 | _BitScanForward( &index, m ); | |
288 | n = static_cast<int>(index); | |
289 | #endif | |
290 | } | |
291 | ||
292 | return n; | |
293 | } | |
294 | ||
295 | #else | |
296 | ||
297 | // assumes p..p+15 are valid | |
298 | inline int count_digits( char const* p ) noexcept | |
299 | { | |
300 | int n = 0; | |
301 | ||
302 | for( ; n < 16; ++n ) | |
303 | { | |
304 | unsigned char const d = *p++ - '0'; | |
305 | if(d > 9) break; | |
306 | } | |
307 | ||
308 | return n; | |
309 | } | |
310 | ||
311 | #endif | |
312 | ||
313 | // parse_unsigned | |
314 | ||
315 | inline uint64_t parse_unsigned( uint64_t r, char const * p, std::size_t n ) noexcept | |
316 | { | |
317 | while( n >= 4 ) | |
318 | { | |
319 | // faster on on clang for x86, | |
320 | // slower on gcc | |
321 | #ifdef __clang__ | |
322 | r = r * 10 + p[0] - '0'; | |
323 | r = r * 10 + p[1] - '0'; | |
324 | r = r * 10 + p[2] - '0'; | |
325 | r = r * 10 + p[3] - '0'; | |
326 | #else | |
327 | uint32_t v; | |
328 | std::memcpy( &v, p, 4 ); | |
329 | ||
330 | v -= 0x30303030; | |
331 | ||
332 | unsigned w0 = v & 0xFF; | |
333 | unsigned w1 = (v >> 8) & 0xFF; | |
334 | unsigned w2 = (v >> 16) & 0xFF; | |
335 | unsigned w3 = (v >> 24); | |
336 | ||
337 | #ifdef BOOST_JSON_BIG_ENDIAN | |
338 | r = (((r * 10 + w3) * 10 + w2) * 10 + w1) * 10 + w0; | |
339 | #else | |
340 | r = (((r * 10 + w0) * 10 + w1) * 10 + w2) * 10 + w3; | |
341 | #endif | |
342 | #endif | |
343 | p += 4; | |
344 | n -= 4; | |
345 | } | |
346 | ||
347 | switch( n ) | |
348 | { | |
349 | case 0: | |
350 | break; | |
351 | case 1: | |
352 | r = r * 10 + p[0] - '0'; | |
353 | break; | |
354 | case 2: | |
355 | r = r * 10 + p[0] - '0'; | |
356 | r = r * 10 + p[1] - '0'; | |
357 | break; | |
358 | case 3: | |
359 | r = r * 10 + p[0] - '0'; | |
360 | r = r * 10 + p[1] - '0'; | |
361 | r = r * 10 + p[2] - '0'; | |
362 | break; | |
363 | } | |
364 | return r; | |
365 | } | |
366 | ||
367 | // KRYSTIAN: this function is unused | |
368 | // count_leading | |
369 | ||
370 | /* | |
371 | #ifdef BOOST_JSON_USE_SSE2 | |
372 | ||
373 | // assumes p..p+15 | |
374 | inline std::size_t count_leading( char const * p, char ch ) noexcept | |
375 | { | |
376 | __m128i const q1 = _mm_set1_epi8( ch ); | |
377 | ||
378 | __m128i v = _mm_loadu_si128( (__m128i const*)p ); | |
379 | ||
380 | __m128i w = _mm_cmpeq_epi8( v, q1 ); | |
381 | ||
382 | int m = _mm_movemask_epi8( w ) ^ 0xFFFF; | |
383 | ||
384 | std::size_t n; | |
385 | ||
386 | if( m == 0 ) | |
387 | { | |
388 | n = 16; | |
389 | } | |
390 | else | |
391 | { | |
392 | #if defined(__GNUC__) || defined(__clang__) | |
393 | n = __builtin_ffs( m ) - 1; | |
394 | #else | |
395 | unsigned long index; | |
396 | _BitScanForward( &index, m ); | |
397 | n = index; | |
398 | #endif | |
399 | } | |
400 | ||
401 | return n; | |
402 | } | |
403 | ||
404 | #else | |
405 | ||
406 | // assumes p..p+15 | |
407 | inline std::size_t count_leading( char const * p, char ch ) noexcept | |
408 | { | |
409 | std::size_t n = 0; | |
410 | ||
411 | for( ; n < 16 && *p == ch; ++p, ++n ); | |
412 | ||
413 | return n; | |
414 | } | |
415 | ||
416 | #endif | |
417 | */ | |
418 | ||
419 | // count_whitespace | |
420 | ||
421 | #ifdef BOOST_JSON_USE_SSE2 | |
422 | ||
423 | inline const char* count_whitespace( char const* p, const char* end ) noexcept | |
424 | { | |
425 | if( p == end ) | |
426 | { | |
427 | return p; | |
428 | } | |
429 | ||
430 | if( static_cast<unsigned char>( *p ) > 0x20 ) | |
431 | { | |
432 | return p; | |
433 | } | |
434 | ||
435 | __m128i const q1 = _mm_set1_epi8( ' ' ); | |
436 | __m128i const q2 = _mm_set1_epi8( '\n' ); | |
437 | __m128i const q3 = _mm_set1_epi8( 4 ); // '\t' | 4 == '\r' | |
438 | __m128i const q4 = _mm_set1_epi8( '\r' ); | |
439 | ||
440 | while( end - p >= 16 ) | |
441 | { | |
442 | __m128i v0 = _mm_loadu_si128( (__m128i const*)p ); | |
443 | ||
444 | __m128i w0 = _mm_or_si128( | |
445 | _mm_cmpeq_epi8( v0, q1 ), | |
446 | _mm_cmpeq_epi8( v0, q2 )); | |
447 | __m128i v1 = _mm_or_si128( v0, q3 ); | |
448 | __m128i w1 = _mm_cmpeq_epi8( v1, q4 ); | |
449 | __m128i w2 = _mm_or_si128( w0, w1 ); | |
450 | ||
451 | int m = _mm_movemask_epi8( w2 ) ^ 0xFFFF; | |
452 | ||
453 | if( m != 0 ) | |
454 | { | |
455 | #if defined(__GNUC__) || defined(__clang__) | |
456 | std::size_t c = __builtin_ffs( m ) - 1; | |
457 | #else | |
458 | unsigned long index; | |
459 | _BitScanForward( &index, m ); | |
460 | std::size_t c = index; | |
461 | #endif | |
462 | ||
463 | p += c; | |
464 | return p; | |
465 | } | |
466 | ||
467 | p += 16; | |
468 | } | |
469 | ||
470 | while( p != end ) | |
471 | { | |
472 | if( *p != ' ' && *p != '\t' && *p != '\r' && *p != '\n' ) | |
473 | { | |
474 | return p; | |
475 | } | |
476 | ||
477 | ++p; | |
478 | } | |
479 | ||
480 | return p; | |
481 | } | |
482 | ||
483 | /* | |
484 | ||
485 | // slightly faster on msvc-14.2, slightly slower on clang-win | |
486 | ||
487 | inline std::size_t count_whitespace( char const * p, std::size_t n ) noexcept | |
488 | { | |
489 | char const * p0 = p; | |
490 | ||
491 | while( n > 0 ) | |
492 | { | |
493 | char ch = *p; | |
494 | ||
495 | if( ch == '\n' || ch == '\r' ) | |
496 | { | |
497 | ++p; | |
498 | --n; | |
499 | continue; | |
500 | } | |
501 | ||
502 | if( ch != ' ' && ch != '\t' ) | |
503 | { | |
504 | break; | |
505 | } | |
506 | ||
507 | ++p; | |
508 | --n; | |
509 | ||
510 | while( n >= 16 ) | |
511 | { | |
512 | std::size_t n2 = count_leading( p, ch ); | |
513 | ||
514 | p += n2; | |
515 | n -= n2; | |
516 | ||
517 | if( n2 < 16 ) | |
518 | { | |
519 | break; | |
520 | } | |
521 | } | |
522 | } | |
523 | ||
524 | return p - p0; | |
525 | } | |
526 | */ | |
527 | ||
528 | #else | |
529 | ||
530 | inline const char* count_whitespace( char const* p, const char* end ) noexcept | |
531 | { | |
532 | ||
533 | for(; p != end; ++p) | |
534 | { | |
535 | char const c = *p; | |
536 | if( c != ' ' && c != '\n' && c != '\r' && c != '\t' ) break; | |
537 | } | |
538 | ||
539 | return p; | |
540 | } | |
541 | ||
542 | #endif | |
543 | ||
544 | } // detail | |
545 | BOOST_JSON_NS_END | |
546 | ||
547 | #endif |