]>
Commit | Line | Data |
---|---|---|
abe05a73 XL |
1 | // Copyright 2015 The Servo Project Developers. See the |
2 | // COPYRIGHT file at the top-level directory of this distribution. | |
3 | // | |
4 | // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or | |
5 | // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license | |
6 | // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your | |
7 | // option. This file may not be copied, modified, or distributed | |
8 | // except according to those terms. | |
9 | ||
10 | //! 3.3.4 - 3.3.6. Resolve implicit levels and types. | |
11 | ||
12 | use std::cmp::max; | |
13 | ||
14 | use super::char_data::BidiClass; | |
15 | use super::prepare::{IsolatingRunSequence, LevelRun, not_removed_by_x9, removed_by_x9}; | |
16 | use super::level::Level; | |
17 | ||
18 | use BidiClass::*; | |
19 | ||
20 | /// 3.3.4 Resolving Weak Types | |
21 | /// | |
22 | /// <http://www.unicode.org/reports/tr9/#Resolving_Weak_Types> | |
23 | #[cfg_attr(feature = "flame_it", flame)] | |
24 | pub fn resolve_weak(sequence: &IsolatingRunSequence, processing_classes: &mut [BidiClass]) { | |
25 | // FIXME (#8): This function applies steps W1-W6 in a single pass. This can produce | |
26 | // incorrect results in cases where a "later" rule changes the value of `prev_class` seen | |
27 | // by an "earlier" rule. We should either split this into separate passes, or preserve | |
28 | // extra state so each rule can see the correct previous class. | |
29 | ||
30 | // FIXME: Also, this could be the cause of increased failure for using longer-UTF-8 chars in | |
31 | // conformance tests, like BidiTest:69635 (AL ET EN) | |
32 | ||
33 | let mut prev_class = sequence.sos; | |
34 | let mut last_strong_is_al = false; | |
35 | let mut et_run_indices = Vec::new(); // for W5 | |
36 | ||
37 | // Like sequence.runs.iter().flat_map(Clone::clone), but make indices itself clonable. | |
38 | fn id(x: LevelRun) -> LevelRun { | |
39 | x | |
40 | } | |
41 | let mut indices = sequence.runs.iter().cloned().flat_map( | |
42 | id as fn(LevelRun) -> LevelRun, | |
43 | ); | |
44 | ||
45 | while let Some(i) = indices.next() { | |
46 | match processing_classes[i] { | |
47 | // <http://www.unicode.org/reports/tr9/#W1> | |
48 | NSM => { | |
49 | processing_classes[i] = match prev_class { | |
50 | RLI | LRI | FSI | PDI => ON, | |
51 | _ => prev_class, | |
52 | }; | |
53 | } | |
54 | EN => { | |
55 | if last_strong_is_al { | |
56 | // W2. If previous strong char was AL, change EN to AN. | |
57 | processing_classes[i] = AN; | |
58 | } else { | |
59 | // W5. If a run of ETs is adjacent to an EN, change the ETs to EN. | |
60 | for j in &et_run_indices { | |
61 | processing_classes[*j] = EN; | |
62 | } | |
63 | et_run_indices.clear(); | |
64 | } | |
65 | } | |
66 | // <http://www.unicode.org/reports/tr9/#W3> | |
67 | AL => processing_classes[i] = R, | |
68 | ||
69 | // <http://www.unicode.org/reports/tr9/#W4> | |
70 | ES | CS => { | |
71 | let next_class = indices | |
72 | .clone() | |
73 | .map(|j| processing_classes[j]) | |
74 | .find(not_removed_by_x9) | |
75 | .unwrap_or(sequence.eos); | |
76 | processing_classes[i] = match (prev_class, processing_classes[i], next_class) { | |
77 | (EN, ES, EN) | (EN, CS, EN) => EN, | |
78 | (AN, CS, AN) => AN, | |
79 | (_, _, _) => ON, | |
80 | } | |
81 | } | |
82 | // <http://www.unicode.org/reports/tr9/#W5> | |
83 | ET => { | |
84 | match prev_class { | |
85 | EN => processing_classes[i] = EN, | |
86 | _ => et_run_indices.push(i), // In case this is followed by an EN. | |
87 | } | |
88 | } | |
89 | class => { | |
90 | if removed_by_x9(class) { | |
91 | continue; | |
92 | } | |
93 | } | |
94 | } | |
95 | ||
96 | prev_class = processing_classes[i]; | |
97 | match prev_class { | |
98 | L | R => { | |
99 | last_strong_is_al = false; | |
100 | } | |
101 | AL => { | |
102 | last_strong_is_al = true; | |
103 | } | |
104 | _ => {} | |
105 | } | |
106 | if prev_class != ET { | |
107 | // W6. If we didn't find an adjacent EN, turn any ETs into ON instead. | |
108 | for j in &et_run_indices { | |
109 | processing_classes[*j] = ON; | |
110 | } | |
111 | et_run_indices.clear(); | |
112 | } | |
113 | } | |
114 | ||
115 | // W7. If the previous strong char was L, change EN to L. | |
116 | let mut last_strong_is_l = sequence.sos == L; | |
117 | for run in &sequence.runs { | |
118 | for i in run.clone() { | |
119 | match processing_classes[i] { | |
120 | EN if last_strong_is_l => { | |
121 | processing_classes[i] = L; | |
122 | } | |
123 | L => { | |
124 | last_strong_is_l = true; | |
125 | } | |
126 | R | AL => { | |
127 | last_strong_is_l = false; | |
128 | } | |
129 | _ => {} | |
130 | } | |
131 | } | |
132 | } | |
133 | } | |
134 | ||
135 | /// 3.3.5 Resolving Neutral Types | |
136 | /// | |
137 | /// <http://www.unicode.org/reports/tr9/#Resolving_Neutral_Types> | |
138 | #[cfg_attr(feature = "flame_it", flame)] | |
139 | pub fn resolve_neutral( | |
140 | sequence: &IsolatingRunSequence, | |
141 | levels: &[Level], | |
142 | processing_classes: &mut [BidiClass], | |
143 | ) { | |
144 | let e: BidiClass = levels[sequence.runs[0].start].bidi_class(); | |
145 | let mut indices = sequence.runs.iter().flat_map(Clone::clone); | |
146 | let mut prev_class = sequence.sos; | |
147 | ||
148 | while let Some(mut i) = indices.next() { | |
149 | // N0. Process bracket pairs. | |
150 | // TODO | |
151 | ||
152 | // Process sequences of NI characters. | |
153 | let mut ni_run = Vec::new(); | |
154 | if is_NI(processing_classes[i]) { | |
155 | // Consume a run of consecutive NI characters. | |
156 | ni_run.push(i); | |
157 | let mut next_class; | |
158 | loop { | |
159 | match indices.next() { | |
160 | Some(j) => { | |
161 | i = j; | |
162 | if removed_by_x9(processing_classes[i]) { | |
163 | continue; | |
164 | } | |
165 | next_class = processing_classes[j]; | |
166 | if is_NI(next_class) { | |
167 | ni_run.push(i); | |
168 | } else { | |
169 | break; | |
170 | } | |
171 | } | |
172 | None => { | |
173 | next_class = sequence.eos; | |
174 | break; | |
175 | } | |
176 | }; | |
177 | } | |
178 | ||
179 | // N1-N2. | |
180 | // | |
181 | // <http://www.unicode.org/reports/tr9/#N1> | |
182 | // <http://www.unicode.org/reports/tr9/#N2> | |
183 | let new_class = match (prev_class, next_class) { | |
184 | (L, L) => L, | |
185 | (R, R) | (R, AN) | (R, EN) | (AN, R) | (AN, AN) | (AN, EN) | (EN, R) | | |
186 | (EN, AN) | (EN, EN) => R, | |
187 | (_, _) => e, | |
188 | }; | |
189 | for j in &ni_run { | |
190 | processing_classes[*j] = new_class; | |
191 | } | |
192 | ni_run.clear(); | |
193 | } | |
194 | prev_class = processing_classes[i]; | |
195 | } | |
196 | } | |
197 | ||
198 | /// 3.3.6 Resolving Implicit Levels | |
199 | /// | |
200 | /// Returns the maximum embedding level in the paragraph. | |
201 | /// | |
202 | /// <http://www.unicode.org/reports/tr9/#Resolving_Implicit_Levels> | |
203 | #[cfg_attr(feature = "flame_it", flame)] | |
204 | pub fn resolve_levels(original_classes: &[BidiClass], levels: &mut [Level]) -> Level { | |
205 | let mut max_level = Level::ltr(); | |
206 | ||
207 | assert_eq!(original_classes.len(), levels.len()); | |
208 | for i in 0..levels.len() { | |
209 | match (levels[i].is_rtl(), original_classes[i]) { | |
210 | (false, AN) | (false, EN) => levels[i].raise(2).expect("Level number error"), | |
211 | (false, R) | (true, L) | (true, EN) | (true, AN) => { | |
212 | levels[i].raise(1).expect("Level number error") | |
213 | } | |
214 | (_, _) => {} | |
215 | } | |
216 | max_level = max(max_level, levels[i]); | |
217 | } | |
218 | ||
219 | max_level | |
220 | } | |
221 | ||
222 | /// Neutral or Isolate formatting character (B, S, WS, ON, FSI, LRI, RLI, PDI) | |
223 | /// | |
224 | /// <http://www.unicode.org/reports/tr9/#NI> | |
225 | #[allow(non_snake_case)] | |
226 | fn is_NI(class: BidiClass) -> bool { | |
227 | matches!(class, B | S | WS | ON | FSI | LRI | RLI | PDI) | |
228 | } |