]> git.proxmox.com Git - rustc.git/blob - vendor/unicode-bidi/src/prepare.rs
New upstream version 1.63.0+dfsg1
[rustc.git] / vendor / unicode-bidi / src / prepare.rs
1 // Copyright 2015 The Servo Project Developers. See the
2 // COPYRIGHT file at the top-level directory of this distribution.
3 //
4 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
5 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
6 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
7 // option. This file may not be copied, modified, or distributed
8 // except according to those terms.
9
10 //! 3.3.3 Preparations for Implicit Processing
11 //!
12 //! <http://www.unicode.org/reports/tr9/#Preparations_for_Implicit_Processing>
13
14 use alloc::vec::Vec;
15 use core::cmp::max;
16 use core::ops::Range;
17
18 use super::level::Level;
19 use super::BidiClass::{self, *};
20
21 /// A maximal substring of characters with the same embedding level.
22 ///
23 /// Represented as a range of byte indices.
24 pub type LevelRun = Range<usize>;
25
26 /// Output of `isolating_run_sequences` (steps X9-X10)
27 #[derive(Debug, PartialEq)]
28 pub struct IsolatingRunSequence {
29 pub runs: Vec<LevelRun>,
30 pub sos: BidiClass, // Start-of-sequence type.
31 pub eos: BidiClass, // End-of-sequence type.
32 }
33
34 /// Compute the set of isolating run sequences.
35 ///
36 /// An isolating run sequence is a maximal sequence of level runs such that for all level runs
37 /// except the last one in the sequence, the last character of the run is an isolate initiator
38 /// whose matching PDI is the first character of the next level run in the sequence.
39 ///
40 /// Note: This function does *not* return the sequences in order by their first characters.
41 #[cfg_attr(feature = "flame_it", flamer::flame)]
42 pub fn isolating_run_sequences(
43 para_level: Level,
44 original_classes: &[BidiClass],
45 levels: &[Level],
46 ) -> Vec<IsolatingRunSequence> {
47 let runs = level_runs(levels, original_classes);
48
49 // Compute the set of isolating run sequences.
50 // <http://www.unicode.org/reports/tr9/#BD13>
51 let mut sequences = Vec::with_capacity(runs.len());
52
53 // When we encounter an isolate initiator, we push the current sequence onto the
54 // stack so we can resume it after the matching PDI.
55 let mut stack = vec![Vec::new()];
56
57 for run in runs {
58 assert!(run.len() > 0);
59 assert!(!stack.is_empty());
60
61 let start_class = original_classes[run.start];
62 let end_class = original_classes[run.end - 1];
63
64 let mut sequence = if start_class == PDI && stack.len() > 1 {
65 // Continue a previous sequence interrupted by an isolate.
66 stack.pop().unwrap()
67 } else {
68 // Start a new sequence.
69 Vec::new()
70 };
71
72 sequence.push(run);
73
74 if let RLI | LRI | FSI = end_class {
75 // Resume this sequence after the isolate.
76 stack.push(sequence);
77 } else {
78 // This sequence is finished.
79 sequences.push(sequence);
80 }
81 }
82 // Pop any remaning sequences off the stack.
83 sequences.extend(stack.into_iter().rev().filter(|seq| !seq.is_empty()));
84
85 // Determine the `sos` and `eos` class for each sequence.
86 // <http://www.unicode.org/reports/tr9/#X10>
87 sequences
88 .into_iter()
89 .map(|sequence: Vec<LevelRun>| {
90 assert!(!sequence.is_empty());
91
92 let start_of_seq = sequence[0].start;
93 let end_of_seq = sequence[sequence.len() - 1].end;
94 let seq_level = levels[start_of_seq];
95
96 #[cfg(test)]
97 for run in sequence.clone() {
98 for idx in run {
99 if not_removed_by_x9(&original_classes[idx]) {
100 assert_eq!(seq_level, levels[idx]);
101 }
102 }
103 }
104
105 // Get the level of the last non-removed char before the runs.
106 let pred_level = match original_classes[..start_of_seq]
107 .iter()
108 .rposition(not_removed_by_x9)
109 {
110 Some(idx) => levels[idx],
111 None => para_level,
112 };
113
114 // Get the level of the next non-removed char after the runs.
115 let succ_level = if let RLI | LRI | FSI = original_classes[end_of_seq - 1] {
116 para_level
117 } else {
118 match original_classes[end_of_seq..]
119 .iter()
120 .position(not_removed_by_x9)
121 {
122 Some(idx) => levels[end_of_seq + idx],
123 None => para_level,
124 }
125 };
126
127 IsolatingRunSequence {
128 runs: sequence,
129 sos: max(seq_level, pred_level).bidi_class(),
130 eos: max(seq_level, succ_level).bidi_class(),
131 }
132 })
133 .collect()
134 }
135
136 /// Finds the level runs in a paragraph.
137 ///
138 /// <http://www.unicode.org/reports/tr9/#BD7>
139 fn level_runs(levels: &[Level], original_classes: &[BidiClass]) -> Vec<LevelRun> {
140 assert_eq!(levels.len(), original_classes.len());
141
142 let mut runs = Vec::new();
143 if levels.is_empty() {
144 return runs;
145 }
146
147 let mut current_run_level = levels[0];
148 let mut current_run_start = 0;
149 for i in 1..levels.len() {
150 if !removed_by_x9(original_classes[i]) && levels[i] != current_run_level {
151 // End the last run and start a new one.
152 runs.push(current_run_start..i);
153 current_run_level = levels[i];
154 current_run_start = i;
155 }
156 }
157 runs.push(current_run_start..levels.len());
158
159 runs
160 }
161
162 /// Should this character be ignored in steps after X9?
163 ///
164 /// <http://www.unicode.org/reports/tr9/#X9>
165 pub fn removed_by_x9(class: BidiClass) -> bool {
166 match class {
167 RLE | LRE | RLO | LRO | PDF | BN => true,
168 _ => false,
169 }
170 }
171
172 // For use as a predicate for `position` / `rposition`
173 pub fn not_removed_by_x9(class: &BidiClass) -> bool {
174 !removed_by_x9(*class)
175 }
176
177 #[cfg(test)]
178 mod tests {
179 use super::*;
180
181 #[test]
182 fn test_level_runs() {
183 assert_eq!(level_runs(&Level::vec(&[]), &[]), &[]);
184 assert_eq!(
185 level_runs(&Level::vec(&[0, 0, 0, 1, 1, 2, 0, 0]), &[L; 8]),
186 &[0..3, 3..5, 5..6, 6..8]
187 );
188 }
189
190 // From <http://www.unicode.org/reports/tr9/#BD13>
191 #[rustfmt::skip]
192 #[test]
193 fn test_isolating_run_sequences() {
194
195 // == Example 1 ==
196 // text1·RLE·text2·PDF·RLE·text3·PDF·text4
197 // index 0 1 2 3 4 5 6 7
198 let classes = &[L, RLE, L, PDF, RLE, L, PDF, L];
199 let levels = &[0, 1, 1, 1, 1, 1, 1, 0];
200 let para_level = Level::ltr();
201 let mut sequences = isolating_run_sequences(para_level, classes, &Level::vec(levels));
202 sequences.sort_by(|a, b| a.runs[0].clone().cmp(b.runs[0].clone()));
203 assert_eq!(
204 sequences.iter().map(|s| s.runs.clone()).collect::<Vec<_>>(),
205 vec![vec![0..2], vec![2..7], vec![7..8]]
206 );
207
208 // == Example 2 ==
209 // text1·RLI·text2·PDI·RLI·text3·PDI·text4
210 // index 0 1 2 3 4 5 6 7
211 let classes = &[L, RLI, L, PDI, RLI, L, PDI, L];
212 let levels = &[0, 0, 1, 0, 0, 1, 0, 0];
213 let para_level = Level::ltr();
214 let mut sequences = isolating_run_sequences(para_level, classes, &Level::vec(levels));
215 sequences.sort_by(|a, b| a.runs[0].clone().cmp(b.runs[0].clone()));
216 assert_eq!(
217 sequences.iter().map(|s| s.runs.clone()).collect::<Vec<_>>(),
218 vec![vec![0..2, 3..5, 6..8], vec![2..3], vec![5..6]]
219 );
220
221 // == Example 3 ==
222 // text1·RLI·text2·LRI·text3·RLE·text4·PDF·text5·PDI·text6·PDI·text7
223 // index 0 1 2 3 4 5 6 7 8 9 10 11 12
224 let classes = &[L, RLI, L, LRI, L, RLE, L, PDF, L, PDI, L, PDI, L];
225 let levels = &[0, 0, 1, 1, 2, 3, 3, 3, 2, 1, 1, 0, 0];
226 let para_level = Level::ltr();
227 let mut sequences = isolating_run_sequences(para_level, classes, &Level::vec(levels));
228 sequences.sort_by(|a, b| a.runs[0].clone().cmp(b.runs[0].clone()));
229 assert_eq!(
230 sequences.iter().map(|s| s.runs.clone()).collect::<Vec<_>>(),
231 vec![vec![0..2, 11..13], vec![2..4, 9..11], vec![4..6], vec![6..8], vec![8..9]]
232 );
233 }
234
235 // From <http://www.unicode.org/reports/tr9/#X10>
236 #[rustfmt::skip]
237 #[test]
238 fn test_isolating_run_sequences_sos_and_eos() {
239
240 // == Example 1 ==
241 // text1·RLE·text2·LRE·text3·PDF·text4·PDF·RLE·text5·PDF·text6
242 // index 0 1 2 3 4 5 6 7 8 9 10 11
243 let classes = &[L, RLE, L, LRE, L, PDF, L, PDF, RLE, L, PDF, L];
244 let levels = &[0, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 0];
245 let para_level = Level::ltr();
246 let mut sequences = isolating_run_sequences(para_level, classes, &Level::vec(levels));
247 sequences.sort_by(|a, b| a.runs[0].clone().cmp(b.runs[0].clone()));
248
249 // text1
250 assert_eq!(
251 &sequences[0],
252 &IsolatingRunSequence {
253 runs: vec![0..2],
254 sos: L,
255 eos: R,
256 }
257 );
258
259 // text2
260 assert_eq!(
261 &sequences[1],
262 &IsolatingRunSequence {
263 runs: vec![2..4],
264 sos: R,
265 eos: L,
266 }
267 );
268
269 // text3
270 assert_eq!(
271 &sequences[2],
272 &IsolatingRunSequence {
273 runs: vec![4..6],
274 sos: L,
275 eos: L,
276 }
277 );
278
279 // text4 text5
280 assert_eq!(
281 &sequences[3],
282 &IsolatingRunSequence {
283 runs: vec![6..11],
284 sos: L,
285 eos: R,
286 }
287 );
288
289 // text6
290 assert_eq!(
291 &sequences[4],
292 &IsolatingRunSequence {
293 runs: vec![11..12],
294 sos: R,
295 eos: L,
296 }
297 );
298
299 // == Example 2 ==
300 // text1·RLI·text2·LRI·text3·PDI·text4·PDI·RLI·text5·PDI·text6
301 // index 0 1 2 3 4 5 6 7 8 9 10 11
302 let classes = &[L, RLI, L, LRI, L, PDI, L, PDI, RLI, L, PDI, L];
303 let levels = &[0, 0, 1, 1, 2, 1, 1, 0, 0, 1, 0, 0];
304 let para_level = Level::ltr();
305 let mut sequences = isolating_run_sequences(para_level, classes, &Level::vec(levels));
306 sequences.sort_by(|a, b| a.runs[0].clone().cmp(b.runs[0].clone()));
307
308 // text1·RLI·PDI·RLI·PDI·text6
309 assert_eq!(
310 &sequences[0],
311 &IsolatingRunSequence {
312 runs: vec![0..2, 7..9, 10..12],
313 sos: L,
314 eos: L,
315 }
316 );
317
318 // text2·LRI·PDI·text4
319 assert_eq!(
320 &sequences[1],
321 &IsolatingRunSequence {
322 runs: vec![2..4, 5..7],
323 sos: R,
324 eos: R,
325 }
326 );
327
328 // text3
329 assert_eq!(
330 &sequences[2],
331 &IsolatingRunSequence {
332 runs: vec![4..5],
333 sos: L,
334 eos: L,
335 }
336 );
337
338 // text5
339 assert_eq!(
340 &sequences[3],
341 &IsolatingRunSequence {
342 runs: vec![9..10],
343 sos: R,
344 eos: R,
345 }
346 );
347 }
348
349 #[test]
350 fn test_removed_by_x9() {
351 let rem_classes = &[RLE, LRE, RLO, LRO, PDF, BN];
352 let not_classes = &[L, RLI, AL, LRI, PDI];
353 for x in rem_classes {
354 assert_eq!(removed_by_x9(*x), true);
355 }
356 for x in not_classes {
357 assert_eq!(removed_by_x9(*x), false);
358 }
359 }
360
361 #[test]
362 fn test_not_removed_by_x9() {
363 let non_x9_classes = &[L, R, AL, EN, ES, ET, AN, CS, NSM, B, S, WS, ON, LRI, RLI, FSI, PDI];
364 for x in non_x9_classes {
365 assert_eq!(not_removed_by_x9(&x), true);
366 }
367 }
368 }