1 // Copyright 2013-2016 The rust-url developers.
3 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
4 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
5 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
6 // option. This file may not be copied, modified, or distributed
7 // except according to those terms.
9 //! Parser and serializer for the [`application/x-www-form-urlencoded` syntax](
10 //! http://url.spec.whatwg.org/#application/x-www-form-urlencoded),
11 //! as used by HTML forms.
13 //! Converts between a string (such as an URL’s query string)
14 //! and a sequence of (name, value) pairs.
16 extern crate percent_encoding
;
20 use percent_encoding
::{percent_decode, percent_encode_byte}
;
21 use query_encoding
::decode_utf8_lossy
;
22 use std
::borrow
::{Borrow, Cow}
;
27 pub use query_encoding
::EncodingOverride
;
29 /// Convert a byte string in the `application/x-www-form-urlencoded` syntax
30 /// into a iterator of (name, value) pairs.
32 /// Use `parse(input.as_bytes())` to parse a `&str` string.
34 /// The names and values are percent-decoded. For instance, `%23first=%25try%25` will be
35 /// converted to `[("#first", "%try%")]`.
37 pub fn parse(input
: &[u8]) -> Parse
{
40 /// The return type of `parse()`.
41 #[derive(Copy, Clone)]
42 pub struct Parse
<'a
> {
46 impl<'a
> Iterator
for Parse
<'a
> {
47 type Item
= (Cow
<'a
, str>, Cow
<'a
, str>);
49 fn next(&mut self) -> Option
<Self::Item
> {
51 if self.input
.is_empty() {
54 let mut split2
= self.input
.splitn(2, |&b
| b
== b'
&'
);
55 let sequence
= split2
.next().unwrap();
56 self.input
= split2
.next().unwrap_or(&[][..]);
57 if sequence
.is_empty() {
60 let mut split2
= sequence
.splitn(2, |&b
| b
== b'
='
);
61 let name
= split2
.next().unwrap();
62 let value
= split2
.next().unwrap_or(&[][..]);
63 return Some((decode(name
), decode(value
)));
68 fn decode(input
: &[u8]) -> Cow
<str> {
69 let replaced
= replace_plus(input
);
70 decode_utf8_lossy(match percent_decode(&replaced
).into() {
71 Cow
::Owned(vec
) => Cow
::Owned(vec
),
72 Cow
::Borrowed(_
) => replaced
,
76 /// Replace b'+' with b' '
77 fn replace_plus(input
: &[u8]) -> Cow
<[u8]> {
78 match input
.iter().position(|&b
| b
== b'
+'
) {
79 None
=> Cow
::Borrowed(input
),
80 Some(first_position
) => {
81 let mut replaced
= input
.to_owned();
82 replaced
[first_position
] = b' '
;
83 for byte
in &mut replaced
[first_position
+ 1..] {
94 /// Return a new iterator that yields pairs of `String` instead of pairs of `Cow<str>`.
95 pub fn into_owned(self) -> ParseIntoOwned
<'a
> {
96 ParseIntoOwned { inner: self }
100 /// Like `Parse`, but yields pairs of `String` instead of pairs of `Cow<str>`.
101 pub struct ParseIntoOwned
<'a
> {
105 impl<'a
> Iterator
for ParseIntoOwned
<'a
> {
106 type Item
= (String
, String
);
108 fn next(&mut self) -> Option
<Self::Item
> {
111 .map(|(k
, v
)| (k
.into_owned(), v
.into_owned()))
115 /// The [`application/x-www-form-urlencoded` byte serializer](
116 /// https://url.spec.whatwg.org/#concept-urlencoded-byte-serializer).
118 /// Return an iterator of `&str` slices.
119 pub fn byte_serialize(input
: &[u8]) -> ByteSerialize
{
120 ByteSerialize { bytes: input }
123 /// Return value of `byte_serialize()`.
125 pub struct ByteSerialize
<'a
> {
129 fn byte_serialized_unchanged(byte
: u8) -> bool
{
130 matches
!(byte
, b'
*'
| b'
-'
| b'
.'
| b'
0'
..= b'
9'
| b'A'
..= b'Z'
| b'_'
| b'a'
..= b'z'
)
133 impl<'a
> Iterator
for ByteSerialize
<'a
> {
136 fn next(&mut self) -> Option
<&'a
str> {
137 if let Some((&first
, tail
)) = self.bytes
.split_first() {
138 if !byte_serialized_unchanged(first
) {
140 return Some(if first
== b' '
{
143 percent_encode_byte(first
)
146 let position
= tail
.iter().position(|&b
| !byte_serialized_unchanged(b
));
147 let (unchanged_slice
, remaining
) = match position
{
148 // 1 for first_byte + i unchanged in tail
149 Some(i
) => self.bytes
.split_at(1 + i
),
150 None
=> (self.bytes
, &[][..]),
152 self.bytes
= remaining
;
153 Some(unsafe { str::from_utf8_unchecked(unchanged_slice) }
)
159 fn size_hint(&self) -> (usize, Option
<usize>) {
160 if self.bytes
.is_empty() {
163 (1, Some(self.bytes
.len()))
168 /// The [`application/x-www-form-urlencoded` serializer](
169 /// https://url.spec.whatwg.org/#concept-urlencoded-serializer).
170 pub struct Serializer
<'a
, T
: Target
> {
172 start_position
: usize,
173 encoding
: EncodingOverride
<'a
>,
177 fn as_mut_string(&mut self) -> &mut String
;
178 fn finish(self) -> Self::Finished
;
182 impl Target
for String
{
183 fn as_mut_string(&mut self) -> &mut String
{
186 fn finish(self) -> Self {
189 type Finished
= Self;
192 impl<'a
> Target
for &'a
mut String
{
193 fn as_mut_string(&mut self) -> &mut String
{
196 fn finish(self) -> Self {
199 type Finished
= Self;
202 impl<'a
, T
: Target
> Serializer
<'a
, T
> {
203 /// Create a new `application/x-www-form-urlencoded` serializer for the given target.
205 /// If the target is non-empty,
206 /// its content is assumed to already be in `application/x-www-form-urlencoded` syntax.
207 pub fn new(target
: T
) -> Self {
208 Self::for_suffix(target
, 0)
211 /// Create a new `application/x-www-form-urlencoded` serializer
212 /// for a suffix of the given target.
214 /// If that suffix is non-empty,
215 /// its content is assumed to already be in `application/x-www-form-urlencoded` syntax.
216 pub fn for_suffix(mut target
: T
, start_position
: usize) -> Self {
217 &target
.as_mut_string()[start_position
..]; // Panic if out of bounds
219 target
: Some(target
),
225 /// Remove any existing name/value pair.
227 /// Panics if called after `.finish()`.
228 pub fn clear(&mut self) -> &mut Self {
229 string(&mut self.target
).truncate(self.start_position
);
233 /// Set the character encoding to be used for names and values before percent-encoding.
234 pub fn encoding_override(&mut self, new
: EncodingOverride
<'a
>) -> &mut Self {
239 /// Serialize and append a name/value pair.
241 /// Panics if called after `.finish()`.
242 pub fn append_pair(&mut self, name
: &str, value
: &str) -> &mut Self {
244 string(&mut self.target
),
253 /// Serialize and append a number of name/value pairs.
255 /// This simply calls `append_pair` repeatedly.
256 /// This can be more convenient, so the user doesn’t need to introduce a block
257 /// to limit the scope of `Serializer`’s borrow of its string.
259 /// Panics if called after `.finish()`.
260 pub fn extend_pairs
<I
, K
, V
>(&mut self, iter
: I
) -> &mut Self
263 I
::Item
: Borrow
<(K
, V
)>,
268 let string
= string(&mut self.target
);
270 let &(ref k
, ref v
) = pair
.borrow();
283 /// If this serializer was constructed with a string, take and return that string.
286 /// use form_urlencoded;
287 /// let encoded: String = form_urlencoded::Serializer::new(String::new())
288 /// .append_pair("foo", "bar & baz")
289 /// .append_pair("saison", "Été+hiver")
291 /// assert_eq!(encoded, "foo=bar+%26+baz&saison=%C3%89t%C3%A9%2Bhiver");
294 /// Panics if called more than once.
295 pub fn finish(&mut self) -> T
::Finished
{
298 .expect("url::form_urlencoded::Serializer double finish")
303 fn append_separator_if_needed(string
: &mut String
, start_position
: usize) {
304 if string
.len() > start_position
{
309 fn string
<T
: Target
>(target
: &mut Option
<T
>) -> &mut String
{
312 .expect("url::form_urlencoded::Serializer finished")
318 start_position
: usize,
319 encoding
: EncodingOverride
,
323 append_separator_if_needed(string
, start_position
);
324 append_encoded(name
, string
, encoding
);
326 append_encoded(value
, string
, encoding
);
329 fn append_encoded(s
: &str, string
: &mut String
, encoding
: EncodingOverride
) {
330 string
.extend(byte_serialize(&query_encoding
::encode(encoding
, s
.into())))