]>
git.proxmox.com Git - rustc.git/blob - vendor/unicode-bidi/src/prepare.rs
1 // Copyright 2015 The Servo Project Developers. See the
2 // COPYRIGHT file at the top-level directory of this distribution.
4 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
5 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
6 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
7 // option. This file may not be copied, modified, or distributed
8 // except according to those terms.
10 //! 3.3.3 Preparations for Implicit Processing
12 //! <http://www.unicode.org/reports/tr9/#Preparations_for_Implicit_Processing>
18 use super::level
::Level
;
19 use super::BidiClass
::{self, *}
;
21 /// A maximal substring of characters with the same embedding level.
23 /// Represented as a range of byte indices.
24 pub type LevelRun
= Range
<usize>;
26 /// Output of `isolating_run_sequences` (steps X9-X10)
27 #[derive(Debug, PartialEq)]
28 pub struct IsolatingRunSequence
{
29 pub runs
: Vec
<LevelRun
>,
30 pub sos
: BidiClass
, // Start-of-sequence type.
31 pub eos
: BidiClass
, // End-of-sequence type.
34 /// Compute the set of isolating run sequences.
36 /// An isolating run sequence is a maximal sequence of level runs such that for all level runs
37 /// except the last one in the sequence, the last character of the run is an isolate initiator
38 /// whose matching PDI is the first character of the next level run in the sequence.
40 /// Note: This function does *not* return the sequences in order by their first characters.
41 #[cfg_attr(feature = "flame_it", flamer::flame)]
42 pub fn isolating_run_sequences(
44 original_classes
: &[BidiClass
],
46 ) -> Vec
<IsolatingRunSequence
> {
47 let runs
= level_runs(levels
, original_classes
);
49 // Compute the set of isolating run sequences.
50 // <http://www.unicode.org/reports/tr9/#BD13>
51 let mut sequences
= Vec
::with_capacity(runs
.len());
53 // When we encounter an isolate initiator, we push the current sequence onto the
54 // stack so we can resume it after the matching PDI.
55 let mut stack
= vec
![Vec
::new()];
58 assert
!(run
.len() > 0);
59 assert
!(!stack
.is_empty());
61 let start_class
= original_classes
[run
.start
];
62 let end_class
= original_classes
[run
.end
- 1];
64 let mut sequence
= if start_class
== PDI
&& stack
.len() > 1 {
65 // Continue a previous sequence interrupted by an isolate.
68 // Start a new sequence.
74 if let RLI
| LRI
| FSI
= end_class
{
75 // Resume this sequence after the isolate.
78 // This sequence is finished.
79 sequences
.push(sequence
);
82 // Pop any remaning sequences off the stack.
83 sequences
.extend(stack
.into_iter().rev().filter(|seq
| !seq
.is_empty()));
85 // Determine the `sos` and `eos` class for each sequence.
86 // <http://www.unicode.org/reports/tr9/#X10>
89 .map(|sequence
: Vec
<LevelRun
>| {
90 assert
!(!sequence
.is_empty());
92 let start_of_seq
= sequence
[0].start
;
93 let end_of_seq
= sequence
[sequence
.len() - 1].end
;
94 let seq_level
= levels
[start_of_seq
];
97 for run
in sequence
.clone() {
99 if not_removed_by_x9(&original_classes
[idx
]) {
100 assert_eq
!(seq_level
, levels
[idx
]);
105 // Get the level of the last non-removed char before the runs.
106 let pred_level
= match original_classes
[..start_of_seq
]
108 .rposition(not_removed_by_x9
)
110 Some(idx
) => levels
[idx
],
114 // Get the level of the next non-removed char after the runs.
115 let succ_level
= if let RLI
| LRI
| FSI
= original_classes
[end_of_seq
- 1] {
118 match original_classes
[end_of_seq
..]
120 .position(not_removed_by_x9
)
122 Some(idx
) => levels
[end_of_seq
+ idx
],
127 IsolatingRunSequence
{
129 sos
: max(seq_level
, pred_level
).bidi_class(),
130 eos
: max(seq_level
, succ_level
).bidi_class(),
136 /// Finds the level runs in a paragraph.
138 /// <http://www.unicode.org/reports/tr9/#BD7>
139 fn level_runs(levels
: &[Level
], original_classes
: &[BidiClass
]) -> Vec
<LevelRun
> {
140 assert_eq
!(levels
.len(), original_classes
.len());
142 let mut runs
= Vec
::new();
143 if levels
.is_empty() {
147 let mut current_run_level
= levels
[0];
148 let mut current_run_start
= 0;
149 for i
in 1..levels
.len() {
150 if !removed_by_x9(original_classes
[i
]) && levels
[i
] != current_run_level
{
151 // End the last run and start a new one.
152 runs
.push(current_run_start
..i
);
153 current_run_level
= levels
[i
];
154 current_run_start
= i
;
157 runs
.push(current_run_start
..levels
.len());
162 /// Should this character be ignored in steps after X9?
164 /// <http://www.unicode.org/reports/tr9/#X9>
165 pub fn removed_by_x9(class
: BidiClass
) -> bool
{
167 RLE
| LRE
| RLO
| LRO
| PDF
| BN
=> true,
172 // For use as a predicate for `position` / `rposition`
173 pub fn not_removed_by_x9(class
: &BidiClass
) -> bool
{
174 !removed_by_x9(*class
)
182 fn test_level_runs() {
183 assert_eq
!(level_runs(&Level
::vec(&[]), &[]), &[]);
185 level_runs(&Level
::vec(&[0, 0, 0, 1, 1, 2, 0, 0]), &[L
; 8]),
186 &[0..3, 3..5, 5..6, 6..8]
190 // From <http://www.unicode.org/reports/tr9/#BD13>
193 fn test_isolating_run_sequences() {
196 // text1·RLE·text2·PDF·RLE·text3·PDF·text4
197 // index 0 1 2 3 4 5 6 7
198 let classes
= &[L
, RLE
, L
, PDF
, RLE
, L
, PDF
, L
];
199 let levels
= &[0, 1, 1, 1, 1, 1, 1, 0];
200 let para_level
= Level
::ltr();
201 let mut sequences
= isolating_run_sequences(para_level
, classes
, &Level
::vec(levels
));
202 sequences
.sort_by(|a
, b
| a
.runs
[0].clone().cmp(b
.runs
[0].clone()));
204 sequences
.iter().map(|s
| s
.runs
.clone()).collect
::<Vec
<_
>>(),
205 vec
![vec
![0..2], vec
![2..7], vec
![7..8]]
209 // text1·RLI·text2·PDI·RLI·text3·PDI·text4
210 // index 0 1 2 3 4 5 6 7
211 let classes
= &[L
, RLI
, L
, PDI
, RLI
, L
, PDI
, L
];
212 let levels
= &[0, 0, 1, 0, 0, 1, 0, 0];
213 let para_level
= Level
::ltr();
214 let mut sequences
= isolating_run_sequences(para_level
, classes
, &Level
::vec(levels
));
215 sequences
.sort_by(|a
, b
| a
.runs
[0].clone().cmp(b
.runs
[0].clone()));
217 sequences
.iter().map(|s
| s
.runs
.clone()).collect
::<Vec
<_
>>(),
218 vec
![vec
![0..2, 3..5, 6..8], vec
![2..3], vec
![5..6]]
222 // text1·RLI·text2·LRI·text3·RLE·text4·PDF·text5·PDI·text6·PDI·text7
223 // index 0 1 2 3 4 5 6 7 8 9 10 11 12
224 let classes
= &[L
, RLI
, L
, LRI
, L
, RLE
, L
, PDF
, L
, PDI
, L
, PDI
, L
];
225 let levels
= &[0, 0, 1, 1, 2, 3, 3, 3, 2, 1, 1, 0, 0];
226 let para_level
= Level
::ltr();
227 let mut sequences
= isolating_run_sequences(para_level
, classes
, &Level
::vec(levels
));
228 sequences
.sort_by(|a
, b
| a
.runs
[0].clone().cmp(b
.runs
[0].clone()));
230 sequences
.iter().map(|s
| s
.runs
.clone()).collect
::<Vec
<_
>>(),
231 vec
![vec
![0..2, 11..13], vec
![2..4, 9..11], vec
![4..6], vec
![6..8], vec
![8..9]]
235 // From <http://www.unicode.org/reports/tr9/#X10>
238 fn test_isolating_run_sequences_sos_and_eos() {
241 // text1·RLE·text2·LRE·text3·PDF·text4·PDF·RLE·text5·PDF·text6
242 // index 0 1 2 3 4 5 6 7 8 9 10 11
243 let classes
= &[L
, RLE
, L
, LRE
, L
, PDF
, L
, PDF
, RLE
, L
, PDF
, L
];
244 let levels
= &[0, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 0];
245 let para_level
= Level
::ltr();
246 let mut sequences
= isolating_run_sequences(para_level
, classes
, &Level
::vec(levels
));
247 sequences
.sort_by(|a
, b
| a
.runs
[0].clone().cmp(b
.runs
[0].clone()));
252 &IsolatingRunSequence
{
262 &IsolatingRunSequence
{
272 &IsolatingRunSequence
{
282 &IsolatingRunSequence
{
292 &IsolatingRunSequence
{
300 // text1·RLI·text2·LRI·text3·PDI·text4·PDI·RLI·text5·PDI·text6
301 // index 0 1 2 3 4 5 6 7 8 9 10 11
302 let classes
= &[L
, RLI
, L
, LRI
, L
, PDI
, L
, PDI
, RLI
, L
, PDI
, L
];
303 let levels
= &[0, 0, 1, 1, 2, 1, 1, 0, 0, 1, 0, 0];
304 let para_level
= Level
::ltr();
305 let mut sequences
= isolating_run_sequences(para_level
, classes
, &Level
::vec(levels
));
306 sequences
.sort_by(|a
, b
| a
.runs
[0].clone().cmp(b
.runs
[0].clone()));
308 // text1·RLI·PDI·RLI·PDI·text6
311 &IsolatingRunSequence
{
312 runs
: vec
![0..2, 7..9, 10..12],
318 // text2·LRI·PDI·text4
321 &IsolatingRunSequence
{
322 runs
: vec
![2..4, 5..7],
331 &IsolatingRunSequence
{
341 &IsolatingRunSequence
{
350 fn test_removed_by_x9() {
351 let rem_classes
= &[RLE
, LRE
, RLO
, LRO
, PDF
, BN
];
352 let not_classes
= &[L
, RLI
, AL
, LRI
, PDI
];
353 for x
in rem_classes
{
354 assert_eq
!(removed_by_x9(*x
), true);
356 for x
in not_classes
{
357 assert_eq
!(removed_by_x9(*x
), false);
362 fn test_not_removed_by_x9() {
363 let non_x9_classes
= &[L
, R
, AL
, EN
, ES
, ET
, AN
, CS
, NSM
, B
, S
, WS
, ON
, LRI
, RLI
, FSI
, PDI
];
364 for x
in non_x9_classes
{
365 assert_eq
!(not_removed_by_x9(&x
), true);