1 // Tencent is pleased to support the open source community by making RapidJSON available.
3 // (C) Copyright IBM Corporation 2021
5 // Licensed under the MIT License (the "License"); you may not use this file except
6 // in compliance with the License. You may obtain a copy of the License at
8 // http://opensource.org/licenses/MIT
10 // Unless required by applicable law or agreed to in writing, software distributed
11 // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
12 // CONDITIONS OF ANY KIND, either express or implied. See the License for the
13 // specific language governing permissions and limitations under the License.
15 #ifndef RAPIDJSON_URI_H_
16 #define RAPIDJSON_URI_H_
18 #include "internal/strfunc.h"
20 #if defined(__clang__)
22 RAPIDJSON_DIAG_OFF(c
++98-compat
)
23 #elif defined(_MSC_VER)
24 RAPIDJSON_DIAG_OFF(4512) // assignment operator could not be generated
27 RAPIDJSON_NAMESPACE_BEGIN
29 ///////////////////////////////////////////////////////////////////////////////
32 template <typename ValueType
, typename Allocator
=CrtAllocator
>
35 typedef typename
ValueType::Ch Ch
;
36 #if RAPIDJSON_HAS_STDSTRING
37 typedef std::basic_string
<Ch
> String
;
41 GenericUri(Allocator
* allocator
= 0) : uri_(), base_(), scheme_(), auth_(), path_(), query_(), frag_(), allocator_(allocator
), ownAllocator_() {
44 GenericUri(const Ch
* uri
, SizeType len
, Allocator
* allocator
= 0) : uri_(), base_(), scheme_(), auth_(), path_(), query_(), frag_(), allocator_(allocator
), ownAllocator_() {
48 GenericUri(const Ch
* uri
, Allocator
* allocator
= 0) : uri_(), base_(), scheme_(), auth_(), path_(), query_(), frag_(), allocator_(allocator
), ownAllocator_() {
49 Parse(uri
, internal::StrLen
<Ch
>(uri
));
52 // Use with specializations of GenericValue
53 template<typename T
> GenericUri(const T
& uri
, Allocator
* allocator
= 0) : uri_(), base_(), scheme_(), auth_(), path_(), query_(), frag_(), allocator_(allocator
), ownAllocator_() {
54 const Ch
* u
= uri
.template Get
<const Ch
*>(); // TypeHelper from document.h
55 Parse(u
, internal::StrLen
<Ch
>(u
));
58 #if RAPIDJSON_HAS_STDSTRING
59 GenericUri(const String
& uri
, Allocator
* allocator
= 0) : uri_(), base_(), scheme_(), auth_(), path_(), query_(), frag_(), allocator_(allocator
), ownAllocator_() {
60 Parse(uri
.c_str(), internal::StrLen
<Ch
>(uri
.c_str()));
65 GenericUri(const GenericUri
& rhs
) : uri_(), base_(), scheme_(), auth_(), path_(), query_(), frag_(), allocator_(), ownAllocator_() {
70 GenericUri(const GenericUri
& rhs
, Allocator
* allocator
) : uri_(), base_(), scheme_(), auth_(), path_(), query_(), frag_(), allocator_(allocator
), ownAllocator_() {
77 RAPIDJSON_DELETE(ownAllocator_
);
80 //! Assignment operator
81 GenericUri
& operator=(const GenericUri
& rhs
) {
83 // Do not delete ownAllocator
85 Allocate(rhs
.GetStringLength());
86 auth_
= CopyPart(scheme_
, rhs
.scheme_
, rhs
.GetSchemeStringLength());
87 path_
= CopyPart(auth_
, rhs
.auth_
, rhs
.GetAuthStringLength());
88 query_
= CopyPart(path_
, rhs
.path_
, rhs
.GetPathStringLength());
89 frag_
= CopyPart(query_
, rhs
.query_
, rhs
.GetQueryStringLength());
90 base_
= CopyPart(frag_
, rhs
.frag_
, rhs
.GetFragStringLength());
91 uri_
= CopyPart(base_
, rhs
.base_
, rhs
.GetBaseStringLength());
92 CopyPart(uri_
, rhs
.uri_
, rhs
.GetStringLength());
98 // Use with specializations of GenericValue
99 template<typename T
> void Get(T
& uri
, Allocator
& allocator
) {
100 uri
.template Set
<const Ch
*>(this->GetString(), allocator
); // TypeHelper from document.h
103 const Ch
* GetString() const { return uri_
; }
104 SizeType
GetStringLength() const { return uri_
== 0 ? 0 : internal::StrLen
<Ch
>(uri_
); }
105 const Ch
* GetBaseString() const { return base_
; }
106 SizeType
GetBaseStringLength() const { return base_
== 0 ? 0 : internal::StrLen
<Ch
>(base_
); }
107 const Ch
* GetSchemeString() const { return scheme_
; }
108 SizeType
GetSchemeStringLength() const { return scheme_
== 0 ? 0 : internal::StrLen
<Ch
>(scheme_
); }
109 const Ch
* GetAuthString() const { return auth_
; }
110 SizeType
GetAuthStringLength() const { return auth_
== 0 ? 0 : internal::StrLen
<Ch
>(auth_
); }
111 const Ch
* GetPathString() const { return path_
; }
112 SizeType
GetPathStringLength() const { return path_
== 0 ? 0 : internal::StrLen
<Ch
>(path_
); }
113 const Ch
* GetQueryString() const { return query_
; }
114 SizeType
GetQueryStringLength() const { return query_
== 0 ? 0 : internal::StrLen
<Ch
>(query_
); }
115 const Ch
* GetFragString() const { return frag_
; }
116 SizeType
GetFragStringLength() const { return frag_
== 0 ? 0 : internal::StrLen
<Ch
>(frag_
); }
118 #if RAPIDJSON_HAS_STDSTRING
119 static String
Get(const GenericUri
& uri
) { return String(uri
.GetString(), uri
.GetStringLength()); }
120 static String
GetBase(const GenericUri
& uri
) { return String(uri
.GetBaseString(), uri
.GetBaseStringLength()); }
121 static String
GetScheme(const GenericUri
& uri
) { return String(uri
.GetSchemeString(), uri
.GetSchemeStringLength()); }
122 static String
GetAuth(const GenericUri
& uri
) { return String(uri
.GetAuthString(), uri
.GetAuthStringLength()); }
123 static String
GetPath(const GenericUri
& uri
) { return String(uri
.GetPathString(), uri
.GetPathStringLength()); }
124 static String
GetQuery(const GenericUri
& uri
) { return String(uri
.GetQueryString(), uri
.GetQueryStringLength()); }
125 static String
GetFrag(const GenericUri
& uri
) { return String(uri
.GetFragString(), uri
.GetFragStringLength()); }
128 //! Equality operators
129 bool operator==(const GenericUri
& rhs
) const {
130 return Match(rhs
, true);
133 bool operator!=(const GenericUri
& rhs
) const {
134 return !Match(rhs
, true);
137 bool Match(const GenericUri
& uri
, bool full
= true) const {
147 if (s1
== s2
) return true;
148 if (s1
== 0 || s2
== 0) return false;
149 return internal::StrCmp
<Ch
>(s1
, s2
) == 0;
152 //! Resolve this URI against another (base) URI in accordance with URI resolution rules.
153 // See https://tools.ietf.org/html/rfc3986
154 // Use for resolving an id or $ref with an in-scope id.
155 // Returns a new GenericUri for the resolved URI.
156 GenericUri
Resolve(const GenericUri
& baseuri
, Allocator
* allocator
= 0) {
158 resuri
.allocator_
= allocator
;
159 // Ensure enough space for combining paths
160 resuri
.Allocate(GetStringLength() + baseuri
.GetStringLength() + 1); // + 1 for joining slash
162 if (!(GetSchemeStringLength() == 0)) {
163 // Use all of this URI
164 resuri
.auth_
= CopyPart(resuri
.scheme_
, scheme_
, GetSchemeStringLength());
165 resuri
.path_
= CopyPart(resuri
.auth_
, auth_
, GetAuthStringLength());
166 resuri
.query_
= CopyPart(resuri
.path_
, path_
, GetPathStringLength());
167 resuri
.frag_
= CopyPart(resuri
.query_
, query_
, GetQueryStringLength());
168 resuri
.RemoveDotSegments();
170 // Use the base scheme
171 resuri
.auth_
= CopyPart(resuri
.scheme_
, baseuri
.scheme_
, baseuri
.GetSchemeStringLength());
172 if (!(GetAuthStringLength() == 0)) {
173 // Use this auth, path, query
174 resuri
.path_
= CopyPart(resuri
.auth_
, auth_
, GetAuthStringLength());
175 resuri
.query_
= CopyPart(resuri
.path_
, path_
, GetPathStringLength());
176 resuri
.frag_
= CopyPart(resuri
.query_
, query_
, GetQueryStringLength());
177 resuri
.RemoveDotSegments();
180 resuri
.path_
= CopyPart(resuri
.auth_
, baseuri
.auth_
, baseuri
.GetAuthStringLength());
181 if (GetPathStringLength() == 0) {
183 resuri
.query_
= CopyPart(resuri
.path_
, baseuri
.path_
, baseuri
.GetPathStringLength());
184 if (GetQueryStringLength() == 0) {
185 // Use the base query
186 resuri
.frag_
= CopyPart(resuri
.query_
, baseuri
.query_
, baseuri
.GetQueryStringLength());
189 resuri
.frag_
= CopyPart(resuri
.query_
, query_
, GetQueryStringLength());
192 if (path_
[0] == '/') {
193 // Absolute path - use all of this path
194 resuri
.query_
= CopyPart(resuri
.path_
, path_
, GetPathStringLength());
195 resuri
.RemoveDotSegments();
197 // Relative path - append this path to base path after base path's last slash
199 if (!(baseuri
.GetAuthStringLength() == 0) && baseuri
.GetPathStringLength() == 0) {
200 resuri
.path_
[pos
] = '/';
203 size_t lastslashpos
= baseuri
.GetPathStringLength();
204 while (lastslashpos
> 0) {
205 if (baseuri
.path_
[lastslashpos
- 1] == '/') break;
208 std::memcpy(&resuri
.path_
[pos
], baseuri
.path_
, lastslashpos
* sizeof(Ch
));
210 resuri
.query_
= CopyPart(&resuri
.path_
[pos
], path_
, GetPathStringLength());
211 resuri
.RemoveDotSegments();
214 resuri
.frag_
= CopyPart(resuri
.query_
, query_
, GetQueryStringLength());
218 // Always use this frag
219 resuri
.base_
= CopyPart(resuri
.frag_
, frag_
, GetFragStringLength());
221 // Re-constitute base_ and uri_
223 resuri
.uri_
= resuri
.base_
+ resuri
.GetBaseStringLength() + 1;
228 //! Get the allocator of this GenericUri.
229 Allocator
& GetAllocator() { return *allocator_
; }
232 // Allocate memory for a URI
233 // Returns total amount allocated
234 std::size_t Allocate(std::size_t len
) {
235 // Create own allocator if user did not supply.
237 ownAllocator_
= allocator_
= RAPIDJSON_NEW(Allocator
)();
239 // Allocate one block containing each part of the URI (5) plus base plus full URI, all null terminated.
240 // Order: scheme, auth, path, query, frag, base, uri
241 // Note need to set, increment, assign in 3 stages to avoid compiler warning bug.
242 size_t total
= (3 * len
+ 7) * sizeof(Ch
);
243 scheme_
= static_cast<Ch
*>(allocator_
->Malloc(total
));
266 // Free memory for a URI
269 Allocator::Free(scheme_
);
274 // Parse a URI into constituent scheme, authority, path, query, & fragment parts
275 // Supports URIs that match regex ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))? as per
276 // https://tools.ietf.org/html/rfc3986
277 void Parse(const Ch
* uri
, std::size_t len
) {
278 std::size_t start
= 0, pos1
= 0, pos2
= 0;
281 // Look for scheme ([^:/?#]+):)?
284 if (uri
[pos1
] == ':') break;
289 if (uri
[pos2
] == '/') break;
290 if (uri
[pos2
] == '?') break;
291 if (uri
[pos2
] == '#') break;
296 std::memcpy(scheme_
, &uri
[start
], pos1
* sizeof(Ch
));
297 scheme_
[pos1
] = '\0';
302 // Look for auth (//([^/?#]*))?
303 // Note need to set, increment, assign in 3 stages to avoid compiler warning bug.
304 auth_
= scheme_
+ GetSchemeStringLength();
307 if (start
< len
- 1 && uri
[start
] == '/' && uri
[start
+ 1] == '/') {
310 if (uri
[pos2
] == '/') break;
311 if (uri
[pos2
] == '?') break;
312 if (uri
[pos2
] == '#') break;
315 std::memcpy(auth_
, &uri
[start
], (pos2
- start
) * sizeof(Ch
));
316 auth_
[pos2
- start
] = '\0';
319 // Look for path ([^?#]*)
320 // Note need to set, increment, assign in 3 stages to avoid compiler warning bug.
321 path_
= auth_
+ GetAuthStringLength();
327 if (uri
[pos2
] == '?') break;
328 if (uri
[pos2
] == '#') break;
332 std::memcpy(path_
, &uri
[start
], (pos2
- start
) * sizeof(Ch
));
333 path_
[pos2
- start
] = '\0';
335 RemoveDotSegments(); // absolute path - normalize
339 // Look for query (\?([^#]*))?
340 // Note need to set, increment, assign in 3 stages to avoid compiler warning bug.
341 query_
= path_
+ GetPathStringLength();
344 if (start
< len
&& uri
[start
] == '?') {
347 if (uri
[pos2
] == '#') break;
351 std::memcpy(query_
, &uri
[start
], (pos2
- start
) * sizeof(Ch
));
352 query_
[pos2
- start
] = '\0';
356 // Look for fragment (#(.*))?
357 // Note need to set, increment, assign in 3 stages to avoid compiler warning bug.
358 frag_
= query_
+ GetQueryStringLength();
361 if (start
< len
&& uri
[start
] == '#') {
362 std::memcpy(frag_
, &uri
[start
], (len
- start
) * sizeof(Ch
));
363 frag_
[len
- start
] = '\0';
366 // Re-constitute base_ and uri_
367 base_
= frag_
+ GetFragStringLength() + 1;
369 uri_
= base_
+ GetBaseStringLength() + 1;
376 std::memcpy(next
, scheme_
, GetSchemeStringLength() * sizeof(Ch
));
377 next
+= GetSchemeStringLength();
378 std::memcpy(next
, auth_
, GetAuthStringLength() * sizeof(Ch
));
379 next
+= GetAuthStringLength();
380 std::memcpy(next
, path_
, GetPathStringLength() * sizeof(Ch
));
381 next
+= GetPathStringLength();
382 std::memcpy(next
, query_
, GetQueryStringLength() * sizeof(Ch
));
383 next
+= GetQueryStringLength();
390 std::memcpy(next
, base_
, GetBaseStringLength() * sizeof(Ch
));
391 next
+= GetBaseStringLength();
392 std::memcpy(next
, frag_
, GetFragStringLength() * sizeof(Ch
));
393 next
+= GetFragStringLength();
397 // Copy a part from one GenericUri to another
398 // Return the pointer to the next part to be copied to
399 Ch
* CopyPart(Ch
* to
, Ch
* from
, std::size_t len
) {
400 RAPIDJSON_ASSERT(to
!= 0);
401 RAPIDJSON_ASSERT(from
!= 0);
402 std::memcpy(to
, from
, len
* sizeof(Ch
));
404 Ch
* next
= to
+ len
+ 1;
408 // Remove . and .. segments from the path_ member.
409 // https://tools.ietf.org/html/rfc3986
410 // This is done in place as we are only removing segments.
411 void RemoveDotSegments() {
412 std::size_t pathlen
= GetPathStringLength();
413 std::size_t pathpos
= 0; // Position in path_
414 std::size_t newpos
= 0; // Position in new path_
416 // Loop through each segment in original path_
417 while (pathpos
< pathlen
) {
418 // Get next segment, bounded by '/' or end
420 while ((pathpos
+ slashpos
) < pathlen
) {
421 if (path_
[pathpos
+ slashpos
] == '/') break;
424 // Check for .. and . segments
425 if (slashpos
== 2 && path_
[pathpos
] == '.' && path_
[pathpos
+ 1] == '.') {
426 // Backup a .. segment in the new path_
427 // We expect to find a previously added slash at the end or nothing
428 RAPIDJSON_ASSERT(newpos
== 0 || path_
[newpos
- 1] == '/');
429 size_t lastslashpos
= newpos
;
430 // Make sure we don't go beyond the start segment
431 if (lastslashpos
> 1) {
432 // Find the next to last slash and back up to it
434 while (lastslashpos
> 0) {
435 if (path_
[lastslashpos
- 1] == '/') break;
438 // Set the new path_ position
439 newpos
= lastslashpos
;
441 } else if (slashpos
== 1 && path_
[pathpos
] == '.') {
442 // Discard . segment, leaves new path_ unchanged
444 // Move any other kind of segment to the new path_
445 RAPIDJSON_ASSERT(newpos
<= pathpos
);
446 std::memmove(&path_
[newpos
], &path_
[pathpos
], slashpos
* sizeof(Ch
));
448 // Add slash if not at end
449 if ((pathpos
+ slashpos
) < pathlen
) {
454 // Move to next segment
455 pathpos
+= slashpos
+ 1;
457 path_
[newpos
] = '\0';
460 Ch
* uri_
; // Everything
461 Ch
* base_
; // Everything except fragment
462 Ch
* scheme_
; // Includes the :
463 Ch
* auth_
; // Includes the //
464 Ch
* path_
; // Absolute if starts with /
465 Ch
* query_
; // Includes the ?
466 Ch
* frag_
; // Includes the #
468 Allocator
* allocator_
; //!< The current allocator. It is either user-supplied or equal to ownAllocator_.
469 Allocator
* ownAllocator_
; //!< Allocator owned by this Uri.
472 //! GenericUri for Value (UTF-8, default allocator).
473 typedef GenericUri
<Value
> Uri
;
475 RAPIDJSON_NAMESPACE_END
477 #if defined(__clang__)
481 #endif // RAPIDJSON_URI_H_