]> git.proxmox.com Git - ceph.git/blame - ceph/src/boost/boost/json/detail/utf8.hpp
import quincy beta 17.1.0
[ceph.git] / ceph / src / boost / boost / json / detail / utf8.hpp
CommitLineData
20effc67
TL
1//
2// Copyright (c) 2020 Krystian Stasiowski (sdkrystian@gmail.com)
3//
4// Distributed under the Boost Software License, Version 1.0. (See accompanying
5// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
6//
7// Official repository: https://github.com/boostorg/json
8//
9
10#ifndef BOOST_JSON_DETAIL_UTF8_HPP
11#define BOOST_JSON_DETAIL_UTF8_HPP
12
13#include <cstddef>
14#include <cstring>
15#include <cstdint>
16
17BOOST_JSON_NS_BEGIN
18namespace detail {
19
20template<int N>
21std::uint32_t
22load_little_endian(void const* p)
23{
24 // VFALCO do we need to initialize this to 0?
25 std::uint32_t v;
26 std::memcpy(&v, p, N);
27#ifdef BOOST_JSON_BIG_ENDIAN
28 v = ((v & 0xFF000000) >> 24) |
29 ((v & 0x00FF0000) >> 8) |
30 ((v & 0x0000FF00) << 8) |
31 ((v & 0x000000FF) << 24);
32#endif
33 return v;
34}
35
36inline
37uint16_t
38classify_utf8(char c)
39{
40 // 0x000 = invalid
41 // 0x102 = 2 bytes, second byte [80, BF]
42 // 0x203 = 3 bytes, second byte [A0, BF]
43 // 0x303 = 3 bytes, second byte [80, BF]
44 // 0x403 = 3 bytes, second byte [80, 9F]
45 // 0x504 = 4 bytes, second byte [90, BF]
46 // 0x604 = 4 bytes, second byte [80, BF]
47 // 0x704 = 4 bytes, second byte [80, 8F]
48 static constexpr uint16_t first[128]
49 {
50 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000,
51 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000,
52 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000,
53 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000,
54 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000,
55 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000,
56 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000,
57 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000,
58
59 0x000, 0x000, 0x102, 0x102, 0x102, 0x102, 0x102, 0x102,
60 0x102, 0x102, 0x102, 0x102, 0x102, 0x102, 0x102, 0x102,
61 0x102, 0x102, 0x102, 0x102, 0x102, 0x102, 0x102, 0x102,
62 0x102, 0x102, 0x102, 0x102, 0x102, 0x102, 0x102, 0x102,
63 0x203, 0x303, 0x303, 0x303, 0x303, 0x303, 0x303, 0x303,
64 0x303, 0x303, 0x303, 0x303, 0x303, 0x403, 0x303, 0x303,
65 0x504, 0x604, 0x604, 0x604, 0x704, 0x000, 0x000, 0x000,
66 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000,
67 };
68 return first[static_cast<unsigned char>(c)];
69}
70
71inline
72bool
73is_valid_utf8(const char* p, uint16_t first)
74{
75 uint32_t v;
76 switch(first >> 8)
77 {
78 default:
79 return false;
80
81 // 2 bytes, second byte [80, BF]
82 case 1:
83 v = load_little_endian<2>(p);
84 return (v & 0xC000) == 0x8000;
85
86 // 3 bytes, second byte [A0, BF]
87 case 2:
88 v = load_little_endian<3>(p);
89 std::memcpy(&v, p, 3);
90 return (v & 0xC0E000) == 0x80A000;
91
92 // 3 bytes, second byte [80, BF]
93 case 3:
94 v = load_little_endian<3>(p);
95 return (v & 0xC0C000) == 0x808000;
96
97 // 3 bytes, second byte [80, 9F]
98 case 4:
99 v = load_little_endian<3>(p);
100 return (v & 0xC0E000) == 0x808000;
101
102 // 4 bytes, second byte [90, BF]
103 case 5:
104 v = load_little_endian<4>(p);
105 return (v & 0xC0C0FF00) + 0x7F7F7000 <= 0x2F00;
106
107 // 4 bytes, second byte [80, BF]
108 case 6:
109 v = load_little_endian<4>(p);
110 return (v & 0xC0C0C000) == 0x80808000;
111
112 // 4 bytes, second byte [80, 8F]
113 case 7:
114 v = load_little_endian<4>(p);
115 return (v & 0xC0C0F000) == 0x80808000;
116 }
117}
118
119class utf8_sequence
120{
121 char seq_[4];
122 uint16_t first_;
123 uint8_t size_;
124
125public:
126 void
127 save(
128 const char* p,
129 std::size_t remain) noexcept
130 {
131 first_ = classify_utf8(*p & 0x7F);
132 if(remain >= length())
133 size_ = length();
134 else
135 size_ = static_cast<uint8_t>(remain);
136 std::memcpy(seq_, p, size_);
137 }
138
139 uint8_t
140 length() const noexcept
141 {
142 return first_ & 0xFF;
143 }
144
145 bool
146 complete() const noexcept
147 {
148 return size_ >= length();
149 }
150
151 // returns true if complete
152 bool
153 append(
154 const char* p,
155 std::size_t remain) noexcept
156 {
157 if(BOOST_JSON_UNLIKELY(needed() == 0))
158 return true;
159 if(BOOST_JSON_LIKELY(remain >= needed()))
160 {
161 std::memcpy(
162 seq_ + size_, p, needed());
163 size_ = length();
164 return true;
165 }
166 if(BOOST_JSON_LIKELY(remain > 0))
167 {
168 std::memcpy(seq_ + size_, p, remain);
169 size_ += static_cast<uint8_t>(remain);
170 }
171 return false;
172 }
173
174 const char*
175 data() const noexcept
176 {
177 return seq_;
178 }
179
180 uint8_t
181 needed() const noexcept
182 {
183 return length() - size_;
184 }
185
186 bool
187 valid() const noexcept
188 {
189 BOOST_ASSERT(size_ >= length());
190 return is_valid_utf8(seq_, first_);
191 }
192};
193
194} // detail
195BOOST_JSON_NS_END
196
197#endif