]> git.proxmox.com Git - ceph.git/blame - ceph/src/Beast/include/beast/websocket/detail/utf8_checker.hpp
bump version to 12.2.2-pve1
[ceph.git] / ceph / src / Beast / include / beast / websocket / detail / utf8_checker.hpp
CommitLineData
7c673cae
FG
1//
2// Copyright (c) 2013-2017 Vinnie Falco (vinnie dot falco at gmail dot com)
3//
4// Distributed under the Boost Software License, Version 1.0. (See accompanying
5// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
6//
7
8#ifndef BEAST_WEBSOCKET_DETAIL_UTF8_CHECKER_HPP
9#define BEAST_WEBSOCKET_DETAIL_UTF8_CHECKER_HPP
10
11#include <boost/asio/buffer.hpp>
12#include <boost/assert.hpp>
13#include <beast/core/buffer_concepts.hpp>
14#include <algorithm>
15#include <cstdint>
16
17namespace beast {
18namespace websocket {
19namespace detail {
20
21/* This is a modified work.
22
23 Original version and license:
24 https://www.cl.cam.ac.uk/~mgk25/ucs/utf8_check.c
25 Permission is hereby granted, free of charge, to any person obtaining
26 a copy of this software and associated documentation files (the
27 "Software"), to deal in the Software without restriction, including
28 without limitation the rights to use, copy, modify, merge, publish,
29 distribute, sublicense, and/or sell copies of the Software, and to
30 permit persons to whom the Software is furnished to do so, subject
31 to the following conditions:
32
33 The above copyright notice and this permission notice shall be included
34 in all copies or substantial portions of the Software.
35
36 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
37 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
38 OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
39 IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR
40 ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
41 TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
42 SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. *
43
44 Additional changes:
45 Optimized for predominantly 7-bit content, 2016
46 https://github.com/uWebSockets/uWebSockets/blob/755bd362649c06abff102f18e273c5792c51c1a0/src/WebSocketProtocol.h#L198
47 Copyright (c) 2016 Alex Hultman and contributors
48
49 This software is provided 'as-is', without any express or implied
50 warranty. In no event will the authors be held liable for any damages
51 arising from the use of this software.
52
53 Permission is granted to anyone to use this software for any purpose,
54 including commercial applications, and to alter it and redistribute it
55 freely, subject to the following restrictions:
56
57 1. The origin of this software must not be misrepresented; you must not
58 claim that you wrote the original software. If you use this software
59 in a product, an acknowledgement in the product documentation would be
60 appreciated but is not required.
61 2. Altered source versions must be plainly marked as such, and must not be
62 misrepresented as being the original software.
63 3. This notice may not be removed or altered from any source distribution.
64*/
65
66/** A UTF8 validator.
67
68 This validator can be used to check if a buffer containing UTF8 text is
69 valid. The write function may be called incrementally with segmented UTF8
70 sequences. The finish function determines if all processed text is valid.
71*/
72template<class = void>
73class utf8_checker_t
74{
75 std::size_t need_ = 0;
76 std::uint8_t* p_ = have_;
77 std::uint8_t have_[4];
78
79public:
80 /** Prepare to process text as valid utf8
81 */
82 void
83 reset();
84
85 /** Check that all processed text is valid utf8
86 */
87 bool
88 finish();
89
90 /** Check if text is valid UTF8
91
92 @return `true` if the text is valid utf8 or false otherwise.
93 */
94 bool
95 write(std::uint8_t const* in, std::size_t size);
96
97 /** Check if text is valid UTF8
98
99 @return `true` if the text is valid utf8 or false otherwise.
100 */
101 template<class ConstBufferSequence>
102 bool
103 write(ConstBufferSequence const& bs);
104};
105
106template<class _>
107void
108utf8_checker_t<_>::reset()
109{
110 need_ = 0;
111 p_ = have_;
112}
113
114template<class _>
115bool
116utf8_checker_t<_>::finish()
117{
118 auto const success = need_ == 0;
119 reset();
120 return success;
121}
122
123template<class _>
124template<class ConstBufferSequence>
125bool
126utf8_checker_t<_>::write(ConstBufferSequence const& bs)
127{
128 static_assert(is_ConstBufferSequence<ConstBufferSequence>::value,
129 "ConstBufferSequence requirements not met");
130 using boost::asio::buffer_cast;
131 using boost::asio::buffer_size;
132 for(auto const& b : bs)
133 if(! write(buffer_cast<std::uint8_t const*>(b),
134 buffer_size(b)))
135 return false;
136 return true;
137}
138
139template<class _>
140bool
141utf8_checker_t<_>::write(std::uint8_t const* in, std::size_t size)
142{
143 auto const valid =
144 [](std::uint8_t const*& in)
145 {
146 if (in[0] < 128)
147 {
148 ++in;
149 return true;
150 }
151 if ((in[0] & 0x60) == 0x40)
152 {
153 if ((in[1] & 0xc0) != 0x80)
154 return false;
155 in += 2;
156 return true;
157 }
158 if ((in[0] & 0xf0) == 0xe0)
159 {
160 if ((in[1] & 0xc0) != 0x80 ||
161 (in[2] & 0xc0) != 0x80 ||
162 (in[0] == 224 && in[1] < 160) ||
163 (in[0] == 237 && in[1] > 159))
164 return false;
165 in += 3;
166 return true;
167 }
168 if ((in[0] & 0xf8) == 0xf0)
169 {
170 if (in[0] > 244 ||
171 (in[1] & 0xc0) != 0x80 ||
172 (in[2] & 0xc0) != 0x80 ||
173 (in[3] & 0xc0) != 0x80 ||
174 (in[0] == 240 && in[1] < 144) ||
175 (in[0] == 244 && in[1] > 143))
176 return false;
177 in += 4;
178 return true;
179 }
180 return false;
181 };
182 auto const valid_have =
183 [&]()
184 {
185 if ((have_[0] & 0x60) == 0x40)
186 return have_[0] <= 223;
187 if ((have_[0] & 0xf0) == 0xe0)
188 {
189 if (p_ - have_ > 1 &&
190 ((have_[1] & 0xc0) != 0x80 ||
191 (have_[0] == 224 && have_[1] < 160) ||
192 (have_[0] == 237 && have_[1] > 159)))
193 return false;
194 return true;
195 }
196 if ((have_[0] & 0xf8) == 0xf0)
197 {
198 auto const size = p_ - have_;
199 if (size > 2 && (have_[2] & 0xc0) != 0x80)
200 return false;
201 if (size > 1 &&
202 ((have_[1] & 0xc0) != 0x80 ||
203 (have_[0] == 240 && have_[1] < 144) ||
204 (have_[0] == 244 && have_[1] > 143)))
205 return false;
206 }
207 return true;
208 };
209 auto const needed =
210 [](std::uint8_t const in)
211 {
212 if (in < 128)
213 return 1;
214 if (in < 194)
215 return 0;
216 if (in < 224)
217 return 2;
218 if (in < 240)
219 return 3;
220 if (in < 245)
221 return 4;
222 return 0;
223 };
224
225 auto const end = in + size;
226 if (need_ > 0)
227 {
228 auto n = (std::min)(size, need_);
229 size -= n;
230 need_ -= n;
231 while(n--)
232 *p_++ = *in++;
233 if(need_ > 0)
234 {
235 BOOST_ASSERT(in == end);
236 return valid_have();
237 }
238 std::uint8_t const* p = &have_[0];
239 if (! valid(p))
240 return false;
241 p_ = have_;
242 }
243
244 auto last = in + size - 7;
245 while(in < last)
246 {
247#if BEAST_WEBSOCKET_NO_UNALIGNED_READ
248 auto constexpr align = sizeof(std::size_t) - 1;
249 auto constexpr mask = static_cast<
250 std::size_t>(0x8080808080808080 &
251 ~std::size_t{0});
252 if(
253 ((reinterpret_cast<
254 std::uintptr_t>(in) & align) == 0) &&
255 (*reinterpret_cast<
256 std::size_t const*>(in) & mask) == 0)
257 in += sizeof(std::size_t);
258 else if(! valid(in))
259 return false;
260#else
261 auto constexpr mask = static_cast<
262 std::size_t>(0x8080808080808080 &
263 ~std::size_t{0});
264 if(
265 (*reinterpret_cast<
266 std::size_t const*>(in) & mask) == 0)
267 in += sizeof(std::size_t);
268 else if(! valid(in))
269 return false;
270#endif
271 }
272 last += 4;
273 while(in < last)
274 if(! valid(in))
275 return false;
276
277 for(;;)
278 {
279 auto n = end - in;
280 if(! n)
281 break;
282 auto const need = needed(*in);
283 if (need == 0)
284 return false;
285 if(need <= n)
286 {
287 if(! valid(in))
288 return false;
289 }
290 else
291 {
292 need_ = need - n;
293 while(n--)
294 *p_++ = *in++;
295 return valid_have();
296 }
297 }
298 return true;
299}
300
301using utf8_checker = utf8_checker_t<>;
302
303template<class = void>
304bool
305check_utf8(char const* p, std::size_t n)
306{
307 utf8_checker c;
308 if(! c.write(reinterpret_cast<const uint8_t*>(p), n))
309 return false;
310 return c.finish();
311}
312
313} // detail
314} // websocket
315} // beast
316
317#endif