1 // Copyright 2018 Google LLC
3 // Permission is hereby granted, free of charge, to any person obtaining a copy
4 // of this software and associated documentation files (the "Software"), to deal
5 // in the Software without restriction, including without limitation the rights
6 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 // copies of the Software, and to permit persons to whom the Software is
8 // furnished to do so, subject to the following conditions:
10 // The above copyright notice and this permission notice shall be included in
11 // all copies or substantial portions of the Software.
13 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21 //! Link label parsing and matching.
25 use crate::scanners
::{is_ascii_whitespace, scan_eol}
;
26 use crate::strings
::CowStr
;
28 pub enum ReferenceLabel
<'a
> {
33 pub type LinkLabel
<'a
> = UniCase
<CowStr
<'a
>>;
35 /// Assumes the opening bracket has already been scanned.
36 /// The line break handler determines what happens when a linebreak
37 /// is found. It is passed the bytes following the line break and
38 /// either returns `Some(k)`, where `k` is the number of bytes to skip,
39 /// or `None` to abort parsing the label.
40 /// Returns the number of bytes read (including closing bracket) and label on success.
41 pub(crate) fn scan_link_label_rest
<'t
>(
43 linebreak_handler
: &dyn Fn(&[u8]) -> Option
<usize>,
44 ) -> Option
<(usize, CowStr
<'t
>)> {
45 let bytes
= text
.as_bytes();
47 let mut only_white_space
= true;
48 let mut codepoints
= 0;
49 // no worries, doesnt allocate until we push things onto it
50 let mut label
= String
::new();
54 if codepoints
>= 1000 {
57 match *bytes
.get(ix
)?
{
63 only_white_space
= false;
65 b
if is_ascii_whitespace(b
) => {
66 // normalize labels by collapsing whitespaces, including linebreaks
67 let mut whitespaces
= 0;
68 let mut linebreaks
= 0;
69 let whitespace_start
= ix
;
71 while ix
< bytes
.len() && is_ascii_whitespace(bytes
[ix
]) {
72 if let Some(eol_bytes
) = scan_eol(&bytes
[ix
..]) {
78 ix
+= linebreak_handler(&bytes
[ix
..])?
;
79 whitespaces
+= 2; // indicate that we need to replace
81 whitespaces
+= if bytes
[ix
] == b' ' { 1 }
else { 2 }
;
86 label
.push_str(&text
[mark
..whitespace_start
]);
89 codepoints
+= ix
- whitespace_start
;
95 only_white_space
= false;
97 if b
& 0b1000_0000 != 0 {
104 if only_white_space
{
107 let cow
= if mark
== 0 {
110 label
.push_str(&text
[mark
..ix
]);
119 use super::scan_link_label_rest
;
122 fn whitespace_normalization() {
123 let input
= "«\t\tBlurry Eyes\t\t»][blurry_eyes]";
124 let expected_output
= "« Blurry Eyes »"; // regular spaces!
126 let (_bytes
, normalized_label
) = scan_link_label_rest(input
, &|_
| None
).unwrap();
127 assert_eq
!(expected_output
, normalized_label
.as_ref());
131 fn return_carriage_linefeed_ok() {
132 let input
= "hello\r\nworld\r\n]";
133 assert
!(scan_link_label_rest(input
, &|_
| Some(0)).is_some());