]>
Commit | Line | Data |
---|---|---|
abe05a73 XL |
1 | // Copyright 2015 The Servo Project Developers. See the |
2 | // COPYRIGHT file at the top-level directory of this distribution. | |
3 | // | |
4 | // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or | |
5 | // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license | |
6 | // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your | |
7 | // option. This file may not be copied, modified, or distributed | |
8 | // except according to those terms. | |
9 | ||
10 | //! This crate implements the [Unicode Bidirectional Algorithm][tr9] for display of mixed | |
11 | //! right-to-left and left-to-right text. It is written in safe Rust, compatible with the | |
12 | //! current stable release. | |
13 | //! | |
14 | //! ## Example | |
15 | //! | |
16 | //! ```rust | |
923072b8 | 17 | //! # #[cfg(feature = "hardcoded-data")] { |
abe05a73 XL |
18 | //! use unicode_bidi::BidiInfo; |
19 | //! | |
20 | //! // This example text is defined using `concat!` because some browsers | |
21 | //! // and text editors have trouble displaying bidi strings. | |
22 | //! let text = concat![ | |
23 | //! "א", | |
24 | //! "ב", | |
25 | //! "ג", | |
26 | //! "a", | |
27 | //! "b", | |
28 | //! "c", | |
29 | //! ]; | |
30 | //! | |
31 | //! // Resolve embedding levels within the text. Pass `None` to detect the | |
32 | //! // paragraph level automatically. | |
33 | //! let bidi_info = BidiInfo::new(&text, None); | |
34 | //! | |
35 | //! // This paragraph has embedding level 1 because its first strong character is RTL. | |
36 | //! assert_eq!(bidi_info.paragraphs.len(), 1); | |
37 | //! let para = &bidi_info.paragraphs[0]; | |
38 | //! assert_eq!(para.level.number(), 1); | |
39 | //! assert_eq!(para.level.is_rtl(), true); | |
40 | //! | |
41 | //! // Re-ordering is done after wrapping each paragraph into a sequence of | |
42 | //! // lines. For this example, I'll just use a single line that spans the | |
43 | //! // entire paragraph. | |
44 | //! let line = para.range.clone(); | |
45 | //! | |
46 | //! let display = bidi_info.reorder_line(para, line); | |
47 | //! assert_eq!(display, concat![ | |
48 | //! "a", | |
49 | //! "b", | |
50 | //! "c", | |
51 | //! "ג", | |
52 | //! "ב", | |
53 | //! "א", | |
54 | //! ]); | |
923072b8 | 55 | //! # } // feature = "hardcoded-data" |
abe05a73 XL |
56 | //! ``` |
57 | //! | |
94222f64 XL |
58 | //! # Features |
59 | //! | |
60 | //! - `std`: Enabled by default, but can be disabled to make `unicode_bidi` | |
61 | //! `#![no_std]` + `alloc` compatible. | |
923072b8 | 62 | //! - `hardcoded-data`: Enabled by default. Includes hardcoded Unicode bidi data and more convenient APIs. |
94222f64 XL |
63 | //! - `serde`: Adds [`serde::Serialize`] and [`serde::Deserialize`] |
64 | //! implementations to relevant types. | |
65 | //! | |
abe05a73 XL |
66 | //! [tr9]: <http://www.unicode.org/reports/tr9/> |
67 | ||
68 | #![forbid(unsafe_code)] | |
94222f64 XL |
69 | #![no_std] |
70 | // We need to link to std to make doc tests work on older Rust versions | |
3c0e092e | 71 | #[cfg(feature = "std")] |
94222f64 XL |
72 | extern crate std; |
73 | #[macro_use] | |
74 | extern crate alloc; | |
75 | ||
923072b8 | 76 | pub mod data_source; |
abe05a73 XL |
77 | pub mod deprecated; |
78 | pub mod format_chars; | |
79 | pub mod level; | |
80 | ||
81 | mod char_data; | |
82 | mod explicit; | |
83 | mod implicit; | |
84 | mod prepare; | |
85 | ||
923072b8 FG |
86 | pub use crate::char_data::{BidiClass, UNICODE_VERSION}; |
87 | pub use crate::data_source::BidiDataSource; | |
cdc7bbd5 XL |
88 | pub use crate::level::{Level, LTR_LEVEL, RTL_LEVEL}; |
89 | pub use crate::prepare::LevelRun; | |
abe05a73 | 90 | |
923072b8 FG |
91 | #[cfg(feature = "hardcoded-data")] |
92 | pub use crate::char_data::{bidi_class, HardcodedBidiData}; | |
93 | ||
94222f64 | 94 | use alloc::borrow::Cow; |
94222f64 | 95 | use alloc::string::String; |
923072b8 | 96 | use alloc::vec::Vec; |
94222f64 XL |
97 | use core::cmp::{max, min}; |
98 | use core::iter::repeat; | |
99 | use core::ops::Range; | |
abe05a73 | 100 | |
cdc7bbd5 | 101 | use crate::format_chars as chars; |
923072b8 FG |
102 | use crate::BidiClass::*; |
103 | ||
104 | #[derive(PartialEq, Debug)] | |
105 | pub enum Direction { | |
106 | Ltr, | |
107 | Rtl, | |
108 | Mixed, | |
109 | } | |
abe05a73 XL |
110 | |
111 | /// Bidi information about a single paragraph | |
112 | #[derive(Debug, PartialEq)] | |
113 | pub struct ParagraphInfo { | |
114 | /// The paragraphs boundaries within the text, as byte indices. | |
115 | /// | |
116 | /// TODO: Shrink this to only include the starting index? | |
117 | pub range: Range<usize>, | |
118 | ||
119 | /// The paragraph embedding level. | |
120 | /// | |
121 | /// <http://www.unicode.org/reports/tr9/#BD4> | |
122 | pub level: Level, | |
123 | } | |
124 | ||
923072b8 FG |
125 | impl ParagraphInfo { |
126 | /// Gets the length of the paragraph in the source text. | |
127 | pub fn len(&self) -> usize { | |
128 | self.range.end - self.range.start | |
129 | } | |
130 | } | |
131 | ||
abe05a73 XL |
132 | /// Initial bidi information of the text. |
133 | /// | |
134 | /// Contains the text paragraphs and `BidiClass` of its characters. | |
135 | #[derive(PartialEq, Debug)] | |
136 | pub struct InitialInfo<'text> { | |
137 | /// The text | |
138 | pub text: &'text str, | |
139 | ||
140 | /// The BidiClass of the character at each byte in the text. | |
141 | /// If a character is multiple bytes, its class will appear multiple times in the vector. | |
142 | pub original_classes: Vec<BidiClass>, | |
143 | ||
144 | /// The boundaries and level of each paragraph within the text. | |
145 | pub paragraphs: Vec<ParagraphInfo>, | |
146 | } | |
147 | ||
148 | impl<'text> InitialInfo<'text> { | |
149 | /// Find the paragraphs and BidiClasses in a string of text. | |
150 | /// | |
151 | /// <http://www.unicode.org/reports/tr9/#The_Paragraph_Level> | |
152 | /// | |
153 | /// Also sets the class for each First Strong Isolate initiator (FSI) to LRI or RLI if a strong | |
154 | /// character is found before the matching PDI. If no strong character is found, the class will | |
155 | /// remain FSI, and it's up to later stages to treat these as LRI when needed. | |
923072b8 FG |
156 | /// |
157 | /// The `hardcoded-data` Cargo feature (enabled by default) must be enabled to use this. | |
cdc7bbd5 | 158 | #[cfg_attr(feature = "flame_it", flamer::flame)] |
923072b8 | 159 | #[cfg(feature = "hardcoded-data")] |
cdc7bbd5 | 160 | pub fn new(text: &str, default_para_level: Option<Level>) -> InitialInfo<'_> { |
923072b8 FG |
161 | Self::new_with_data_source(&HardcodedBidiData, text, default_para_level) |
162 | } | |
163 | ||
164 | /// Find the paragraphs and BidiClasses in a string of text, with a custom [`BidiDataSource`] | |
165 | /// for Bidi data. If you just wish to use the hardcoded Bidi data, please use [`InitialInfo::new()`] | |
166 | /// instead (enabled with tbe default `hardcoded-data` Cargo feature) | |
167 | /// | |
168 | /// <http://www.unicode.org/reports/tr9/#The_Paragraph_Level> | |
169 | /// | |
170 | /// Also sets the class for each First Strong Isolate initiator (FSI) to LRI or RLI if a strong | |
171 | /// character is found before the matching PDI. If no strong character is found, the class will | |
172 | /// remain FSI, and it's up to later stages to treat these as LRI when needed. | |
173 | #[cfg_attr(feature = "flame_it", flamer::flame)] | |
174 | pub fn new_with_data_source<'a, D: BidiDataSource>( | |
175 | data_source: &D, | |
176 | text: &'a str, | |
177 | default_para_level: Option<Level>, | |
178 | ) -> InitialInfo<'a> { | |
abe05a73 XL |
179 | let mut original_classes = Vec::with_capacity(text.len()); |
180 | ||
181 | // The stack contains the starting byte index for each nested isolate we're inside. | |
182 | let mut isolate_stack = Vec::new(); | |
183 | let mut paragraphs = Vec::new(); | |
184 | ||
185 | let mut para_start = 0; | |
186 | let mut para_level = default_para_level; | |
187 | ||
923072b8 FG |
188 | #[cfg(feature = "flame_it")] |
189 | flame::start("InitialInfo::new(): iter text.char_indices()"); | |
abe05a73 XL |
190 | |
191 | for (i, c) in text.char_indices() { | |
923072b8 | 192 | let class = data_source.bidi_class(c); |
abe05a73 | 193 | |
923072b8 FG |
194 | #[cfg(feature = "flame_it")] |
195 | flame::start("original_classes.extend()"); | |
abe05a73 XL |
196 | |
197 | original_classes.extend(repeat(class).take(c.len_utf8())); | |
198 | ||
923072b8 FG |
199 | #[cfg(feature = "flame_it")] |
200 | flame::end("original_classes.extend()"); | |
abe05a73 XL |
201 | |
202 | match class { | |
abe05a73 XL |
203 | B => { |
204 | // P1. Split the text into separate paragraphs. The paragraph separator is kept | |
205 | // with the previous paragraph. | |
206 | let para_end = i + c.len_utf8(); | |
207 | paragraphs.push(ParagraphInfo { | |
208 | range: para_start..para_end, | |
209 | // P3. If no character is found in p2, set the paragraph level to zero. | |
210 | level: para_level.unwrap_or(LTR_LEVEL), | |
211 | }); | |
212 | // Reset state for the start of the next paragraph. | |
213 | para_start = para_end; | |
214 | // TODO: Support defaulting to direction of previous paragraph | |
215 | // | |
216 | // <http://www.unicode.org/reports/tr9/#HL1> | |
217 | para_level = default_para_level; | |
218 | isolate_stack.clear(); | |
219 | } | |
220 | ||
221 | L | R | AL => { | |
222 | match isolate_stack.last() { | |
223 | Some(&start) => { | |
224 | if original_classes[start] == FSI { | |
225 | // X5c. If the first strong character between FSI and its matching | |
226 | // PDI is R or AL, treat it as RLI. Otherwise, treat it as LRI. | |
227 | for j in 0..chars::FSI.len_utf8() { | |
228 | original_classes[start + j] = | |
229 | if class == L { LRI } else { RLI }; | |
230 | } | |
231 | } | |
232 | } | |
233 | ||
234 | None => { | |
235 | if para_level.is_none() { | |
236 | // P2. Find the first character of type L, AL, or R, while skipping | |
237 | // any characters between an isolate initiator and its matching | |
238 | // PDI. | |
239 | para_level = Some(if class != L { RTL_LEVEL } else { LTR_LEVEL }); | |
240 | } | |
241 | } | |
242 | } | |
243 | } | |
244 | ||
245 | RLI | LRI | FSI => { | |
246 | isolate_stack.push(i); | |
247 | } | |
248 | ||
249 | PDI => { | |
250 | isolate_stack.pop(); | |
251 | } | |
252 | ||
253 | _ => {} | |
254 | } | |
255 | } | |
256 | if para_start < text.len() { | |
257 | paragraphs.push(ParagraphInfo { | |
258 | range: para_start..text.len(), | |
259 | level: para_level.unwrap_or(LTR_LEVEL), | |
260 | }); | |
261 | } | |
262 | assert_eq!(original_classes.len(), text.len()); | |
263 | ||
923072b8 FG |
264 | #[cfg(feature = "flame_it")] |
265 | flame::end("InitialInfo::new(): iter text.char_indices()"); | |
abe05a73 XL |
266 | |
267 | InitialInfo { | |
268 | text, | |
269 | original_classes, | |
270 | paragraphs, | |
271 | } | |
272 | } | |
273 | } | |
274 | ||
275 | /// Bidi information of the text. | |
276 | /// | |
277 | /// The `original_classes` and `levels` vectors are indexed by byte offsets into the text. If a | |
278 | /// character is multiple bytes wide, then its class and level will appear multiple times in these | |
279 | /// vectors. | |
280 | // TODO: Impl `struct StringProperty<T> { values: Vec<T> }` and use instead of Vec<T> | |
281 | #[derive(Debug, PartialEq)] | |
282 | pub struct BidiInfo<'text> { | |
283 | /// The text | |
284 | pub text: &'text str, | |
285 | ||
286 | /// The BidiClass of the character at each byte in the text. | |
287 | pub original_classes: Vec<BidiClass>, | |
288 | ||
289 | /// The directional embedding level of each byte in the text. | |
290 | pub levels: Vec<Level>, | |
291 | ||
292 | /// The boundaries and paragraph embedding level of each paragraph within the text. | |
293 | /// | |
294 | /// TODO: Use SmallVec or similar to avoid overhead when there are only one or two paragraphs? | |
295 | /// Or just don't include the first paragraph, which always starts at 0? | |
296 | pub paragraphs: Vec<ParagraphInfo>, | |
297 | } | |
298 | ||
299 | impl<'text> BidiInfo<'text> { | |
300 | /// Split the text into paragraphs and determine the bidi embedding levels for each paragraph. | |
301 | /// | |
923072b8 FG |
302 | /// |
303 | /// The `hardcoded-data` Cargo feature (enabled by default) must be enabled to use this. | |
304 | /// | |
abe05a73 XL |
305 | /// TODO: In early steps, check for special cases that allow later steps to be skipped. like |
306 | /// text that is entirely LTR. See the `nsBidi` class from Gecko for comparison. | |
307 | /// | |
308 | /// TODO: Support auto-RTL base direction | |
cdc7bbd5 | 309 | #[cfg_attr(feature = "flame_it", flamer::flame)] |
923072b8 | 310 | #[cfg(feature = "hardcoded-data")] |
cdc7bbd5 | 311 | pub fn new(text: &str, default_para_level: Option<Level>) -> BidiInfo<'_> { |
923072b8 FG |
312 | Self::new_with_data_source(&HardcodedBidiData, text, default_para_level) |
313 | } | |
314 | ||
315 | /// Split the text into paragraphs and determine the bidi embedding levels for each paragraph, with a custom [`BidiDataSource`] | |
316 | /// for Bidi data. If you just wish to use the hardcoded Bidi data, please use [`BidiInfo::new()`] | |
317 | /// instead (enabled with tbe default `hardcoded-data` Cargo feature). | |
318 | /// | |
319 | /// TODO: In early steps, check for special cases that allow later steps to be skipped. like | |
320 | /// text that is entirely LTR. See the `nsBidi` class from Gecko for comparison. | |
321 | /// | |
322 | /// TODO: Support auto-RTL base direction | |
323 | #[cfg_attr(feature = "flame_it", flamer::flame)] | |
324 | pub fn new_with_data_source<'a, D: BidiDataSource>( | |
325 | data_source: &D, | |
326 | text: &'a str, | |
327 | default_para_level: Option<Level>, | |
328 | ) -> BidiInfo<'a> { | |
abe05a73 XL |
329 | let InitialInfo { |
330 | original_classes, | |
331 | paragraphs, | |
332 | .. | |
923072b8 | 333 | } = InitialInfo::new_with_data_source(data_source, text, default_para_level); |
abe05a73 XL |
334 | |
335 | let mut levels = Vec::<Level>::with_capacity(text.len()); | |
336 | let mut processing_classes = original_classes.clone(); | |
337 | ||
338 | for para in ¶graphs { | |
339 | let text = &text[para.range.clone()]; | |
340 | let original_classes = &original_classes[para.range.clone()]; | |
341 | let processing_classes = &mut processing_classes[para.range.clone()]; | |
342 | ||
343 | let new_len = levels.len() + para.range.len(); | |
344 | levels.resize(new_len, para.level); | |
345 | let levels = &mut levels[para.range.clone()]; | |
346 | ||
347 | explicit::compute( | |
348 | text, | |
349 | para.level, | |
350 | original_classes, | |
351 | levels, | |
352 | processing_classes, | |
353 | ); | |
354 | ||
355 | let sequences = prepare::isolating_run_sequences(para.level, original_classes, levels); | |
356 | for sequence in &sequences { | |
357 | implicit::resolve_weak(sequence, processing_classes); | |
358 | implicit::resolve_neutral(sequence, levels, processing_classes); | |
359 | } | |
360 | implicit::resolve_levels(processing_classes, levels); | |
361 | ||
362 | assign_levels_to_removed_chars(para.level, original_classes, levels); | |
363 | } | |
364 | ||
365 | BidiInfo { | |
366 | text, | |
367 | original_classes, | |
368 | paragraphs, | |
369 | levels, | |
370 | } | |
371 | } | |
372 | ||
373 | /// Re-order a line based on resolved levels and return only the embedding levels, one `Level` | |
374 | /// per *byte*. | |
cdc7bbd5 | 375 | #[cfg_attr(feature = "flame_it", flamer::flame)] |
abe05a73 | 376 | pub fn reordered_levels(&self, para: &ParagraphInfo, line: Range<usize>) -> Vec<Level> { |
cdc7bbd5 | 377 | let (levels, _) = self.visual_runs(para, line); |
abe05a73 XL |
378 | levels |
379 | } | |
380 | ||
381 | /// Re-order a line based on resolved levels and return only the embedding levels, one `Level` | |
382 | /// per *character*. | |
cdc7bbd5 | 383 | #[cfg_attr(feature = "flame_it", flamer::flame)] |
abe05a73 XL |
384 | pub fn reordered_levels_per_char( |
385 | &self, | |
386 | para: &ParagraphInfo, | |
387 | line: Range<usize>, | |
388 | ) -> Vec<Level> { | |
389 | let levels = self.reordered_levels(para, line); | |
390 | self.text.char_indices().map(|(i, _)| levels[i]).collect() | |
391 | } | |
392 | ||
abe05a73 | 393 | /// Re-order a line based on resolved levels and return the line in display order. |
cdc7bbd5 | 394 | #[cfg_attr(feature = "flame_it", flamer::flame)] |
abe05a73 XL |
395 | pub fn reorder_line(&self, para: &ParagraphInfo, line: Range<usize>) -> Cow<'text, str> { |
396 | let (levels, runs) = self.visual_runs(para, line.clone()); | |
397 | ||
398 | // If all isolating run sequences are LTR, no reordering is needed | |
399 | if runs.iter().all(|run| levels[run.start].is_ltr()) { | |
cdc7bbd5 | 400 | return self.text[line].into(); |
abe05a73 XL |
401 | } |
402 | ||
403 | let mut result = String::with_capacity(line.len()); | |
404 | for run in runs { | |
405 | if levels[run.start].is_rtl() { | |
406 | result.extend(self.text[run].chars().rev()); | |
407 | } else { | |
408 | result.push_str(&self.text[run]); | |
409 | } | |
410 | } | |
411 | result.into() | |
412 | } | |
413 | ||
414 | /// Find the level runs within a line and return them in visual order. | |
415 | /// | |
416 | /// `line` is a range of bytes indices within `levels`. | |
417 | /// | |
418 | /// <http://www.unicode.org/reports/tr9/#Reordering_Resolved_Levels> | |
cdc7bbd5 | 419 | #[cfg_attr(feature = "flame_it", flamer::flame)] |
abe05a73 XL |
420 | pub fn visual_runs( |
421 | &self, | |
422 | para: &ParagraphInfo, | |
423 | line: Range<usize>, | |
424 | ) -> (Vec<Level>, Vec<LevelRun>) { | |
425 | assert!(line.start <= self.levels.len()); | |
426 | assert!(line.end <= self.levels.len()); | |
427 | ||
428 | let mut levels = self.levels.clone(); | |
cdc7bbd5 XL |
429 | let line_classes = &self.original_classes[line.clone()]; |
430 | let line_levels = &mut levels[line.clone()]; | |
abe05a73 XL |
431 | |
432 | // Reset some whitespace chars to paragraph level. | |
433 | // <http://www.unicode.org/reports/tr9/#L1> | |
434 | let line_str: &str = &self.text[line.clone()]; | |
435 | let mut reset_from: Option<usize> = Some(0); | |
436 | let mut reset_to: Option<usize> = None; | |
437 | for (i, c) in line_str.char_indices() { | |
cdc7bbd5 | 438 | match line_classes[i] { |
abe05a73 XL |
439 | // Ignored by X9 |
440 | RLE | LRE | RLO | LRO | PDF | BN => {} | |
441 | // Segment separator, Paragraph separator | |
442 | B | S => { | |
443 | assert_eq!(reset_to, None); | |
444 | reset_to = Some(i + c.len_utf8()); | |
445 | if reset_from == None { | |
446 | reset_from = Some(i); | |
447 | } | |
448 | } | |
449 | // Whitespace, isolate formatting | |
450 | WS | FSI | LRI | RLI | PDI => { | |
451 | if reset_from == None { | |
452 | reset_from = Some(i); | |
453 | } | |
454 | } | |
455 | _ => { | |
456 | reset_from = None; | |
457 | } | |
458 | } | |
459 | if let (Some(from), Some(to)) = (reset_from, reset_to) { | |
cdc7bbd5 XL |
460 | for level in &mut line_levels[from..to] { |
461 | *level = para.level; | |
abe05a73 XL |
462 | } |
463 | reset_from = None; | |
464 | reset_to = None; | |
465 | } | |
466 | } | |
467 | if let Some(from) = reset_from { | |
cdc7bbd5 XL |
468 | for level in &mut line_levels[from..] { |
469 | *level = para.level; | |
abe05a73 XL |
470 | } |
471 | } | |
472 | ||
473 | // Find consecutive level runs. | |
474 | let mut runs = Vec::new(); | |
475 | let mut start = line.start; | |
476 | let mut run_level = levels[start]; | |
477 | let mut min_level = run_level; | |
478 | let mut max_level = run_level; | |
479 | ||
480 | for (i, &new_level) in levels.iter().enumerate().take(line.end).skip(start + 1) { | |
481 | if new_level != run_level { | |
482 | // End of the previous run, start of a new one. | |
483 | runs.push(start..i); | |
484 | start = i; | |
485 | run_level = new_level; | |
486 | min_level = min(run_level, min_level); | |
487 | max_level = max(run_level, max_level); | |
488 | } | |
489 | } | |
490 | runs.push(start..line.end); | |
491 | ||
492 | let run_count = runs.len(); | |
493 | ||
494 | // Re-order the odd runs. | |
495 | // <http://www.unicode.org/reports/tr9/#L2> | |
496 | ||
497 | // Stop at the lowest *odd* level. | |
498 | min_level = min_level.new_lowest_ge_rtl().expect("Level error"); | |
499 | ||
500 | while max_level >= min_level { | |
501 | // Look for the start of a sequence of consecutive runs of max_level or higher. | |
502 | let mut seq_start = 0; | |
503 | while seq_start < run_count { | |
504 | if self.levels[runs[seq_start].start] < max_level { | |
505 | seq_start += 1; | |
506 | continue; | |
507 | } | |
508 | ||
509 | // Found the start of a sequence. Now find the end. | |
510 | let mut seq_end = seq_start + 1; | |
511 | while seq_end < run_count { | |
512 | if self.levels[runs[seq_end].start] < max_level { | |
513 | break; | |
514 | } | |
515 | seq_end += 1; | |
516 | } | |
517 | ||
518 | // Reverse the runs within this sequence. | |
519 | runs[seq_start..seq_end].reverse(); | |
520 | ||
521 | seq_start = seq_end; | |
522 | } | |
923072b8 FG |
523 | max_level |
524 | .lower(1) | |
525 | .expect("Lowering embedding level below zero"); | |
abe05a73 XL |
526 | } |
527 | ||
528 | (levels, runs) | |
529 | } | |
530 | ||
531 | /// If processed text has any computed RTL levels | |
532 | /// | |
533 | /// This information is usually used to skip re-ordering of text when no RTL level is present | |
534 | #[inline] | |
535 | pub fn has_rtl(&self) -> bool { | |
536 | level::has_rtl(&self.levels) | |
537 | } | |
538 | } | |
539 | ||
923072b8 FG |
540 | /// Contains a reference of `BidiInfo` and one of its `paragraphs`. |
541 | /// And it supports all operation in the `Paragraph` that needs also its | |
542 | /// `BidiInfo` such as `direction`. | |
543 | #[derive(Debug)] | |
544 | pub struct Paragraph<'a, 'text> { | |
545 | pub info: &'a BidiInfo<'text>, | |
546 | pub para: &'a ParagraphInfo, | |
547 | } | |
548 | ||
549 | impl<'a, 'text> Paragraph<'a, 'text> { | |
550 | pub fn new(info: &'a BidiInfo<'text>, para: &'a ParagraphInfo) -> Paragraph<'a, 'text> { | |
551 | Paragraph { info, para } | |
552 | } | |
553 | ||
554 | /// Returns if the paragraph is Left direction, right direction or mixed. | |
555 | pub fn direction(&self) -> Direction { | |
556 | let mut ltr = false; | |
557 | let mut rtl = false; | |
558 | for i in self.para.range.clone() { | |
559 | if self.info.levels[i].is_ltr() { | |
560 | ltr = true; | |
561 | } | |
562 | ||
563 | if self.info.levels[i].is_rtl() { | |
564 | rtl = true; | |
565 | } | |
566 | } | |
567 | ||
568 | if ltr && rtl { | |
569 | return Direction::Mixed; | |
570 | } | |
571 | ||
572 | if ltr { | |
573 | return Direction::Ltr; | |
574 | } | |
575 | ||
576 | Direction::Rtl | |
577 | } | |
578 | ||
579 | /// Returns the `Level` of a certain character in the paragraph. | |
580 | pub fn level_at(&self, pos: usize) -> Level { | |
581 | let actual_position = self.para.range.start + pos; | |
582 | self.info.levels[actual_position] | |
583 | } | |
584 | } | |
585 | ||
abe05a73 XL |
586 | /// Assign levels to characters removed by rule X9. |
587 | /// | |
588 | /// The levels assigned to these characters are not specified by the algorithm. This function | |
589 | /// assigns each one the level of the previous character, to avoid breaking level runs. | |
cdc7bbd5 | 590 | #[cfg_attr(feature = "flame_it", flamer::flame)] |
abe05a73 XL |
591 | fn assign_levels_to_removed_chars(para_level: Level, classes: &[BidiClass], levels: &mut [Level]) { |
592 | for i in 0..levels.len() { | |
593 | if prepare::removed_by_x9(classes[i]) { | |
594 | levels[i] = if i > 0 { levels[i - 1] } else { para_level }; | |
595 | } | |
596 | } | |
597 | } | |
598 | ||
abe05a73 | 599 | #[cfg(test)] |
923072b8 | 600 | #[cfg(feature = "hardcoded-data")] |
abe05a73 XL |
601 | mod tests { |
602 | use super::*; | |
603 | ||
604 | #[test] | |
605 | fn test_initial_text_info() { | |
606 | let text = "a1"; | |
607 | assert_eq!( | |
608 | InitialInfo::new(text, None), | |
609 | InitialInfo { | |
610 | text, | |
611 | original_classes: vec![L, EN], | |
923072b8 FG |
612 | paragraphs: vec![ParagraphInfo { |
613 | range: 0..2, | |
614 | level: LTR_LEVEL, | |
615 | },], | |
abe05a73 XL |
616 | } |
617 | ); | |
618 | ||
619 | let text = "غ א"; | |
620 | assert_eq!( | |
621 | InitialInfo::new(text, None), | |
622 | InitialInfo { | |
623 | text, | |
624 | original_classes: vec![AL, AL, WS, R, R], | |
923072b8 FG |
625 | paragraphs: vec![ParagraphInfo { |
626 | range: 0..5, | |
627 | level: RTL_LEVEL, | |
628 | },], | |
abe05a73 XL |
629 | } |
630 | ); | |
631 | ||
632 | let text = "a\u{2029}b"; | |
633 | assert_eq!( | |
634 | InitialInfo::new(text, None), | |
635 | InitialInfo { | |
636 | text, | |
637 | original_classes: vec![L, B, B, B, L], | |
638 | paragraphs: vec![ | |
639 | ParagraphInfo { | |
640 | range: 0..4, | |
641 | level: LTR_LEVEL, | |
642 | }, | |
643 | ParagraphInfo { | |
644 | range: 4..5, | |
645 | level: LTR_LEVEL, | |
646 | }, | |
647 | ], | |
648 | } | |
649 | ); | |
650 | ||
651 | let text = format!("{}א{}a", chars::FSI, chars::PDI); | |
652 | assert_eq!( | |
653 | InitialInfo::new(&text, None), | |
654 | InitialInfo { | |
655 | text: &text, | |
656 | original_classes: vec![RLI, RLI, RLI, R, R, PDI, PDI, PDI, L], | |
923072b8 FG |
657 | paragraphs: vec![ParagraphInfo { |
658 | range: 0..9, | |
659 | level: LTR_LEVEL, | |
660 | },], | |
abe05a73 XL |
661 | } |
662 | ); | |
663 | } | |
664 | ||
665 | #[test] | |
923072b8 | 666 | #[cfg(feature = "hardcoded-data")] |
abe05a73 XL |
667 | fn test_process_text() { |
668 | let text = "abc123"; | |
669 | assert_eq!( | |
670 | BidiInfo::new(text, Some(LTR_LEVEL)), | |
671 | BidiInfo { | |
672 | text, | |
673 | levels: Level::vec(&[0, 0, 0, 0, 0, 0]), | |
674 | original_classes: vec![L, L, L, EN, EN, EN], | |
923072b8 FG |
675 | paragraphs: vec![ParagraphInfo { |
676 | range: 0..6, | |
677 | level: LTR_LEVEL, | |
678 | },], | |
abe05a73 XL |
679 | } |
680 | ); | |
681 | ||
682 | let text = "abc אבג"; | |
683 | assert_eq!( | |
684 | BidiInfo::new(text, Some(LTR_LEVEL)), | |
685 | BidiInfo { | |
686 | text, | |
687 | levels: Level::vec(&[0, 0, 0, 0, 1, 1, 1, 1, 1, 1]), | |
688 | original_classes: vec![L, L, L, WS, R, R, R, R, R, R], | |
923072b8 FG |
689 | paragraphs: vec![ParagraphInfo { |
690 | range: 0..10, | |
691 | level: LTR_LEVEL, | |
692 | },], | |
abe05a73 XL |
693 | } |
694 | ); | |
695 | assert_eq!( | |
696 | BidiInfo::new(text, Some(RTL_LEVEL)), | |
697 | BidiInfo { | |
698 | text, | |
699 | levels: Level::vec(&[2, 2, 2, 1, 1, 1, 1, 1, 1, 1]), | |
700 | original_classes: vec![L, L, L, WS, R, R, R, R, R, R], | |
923072b8 FG |
701 | paragraphs: vec![ParagraphInfo { |
702 | range: 0..10, | |
703 | level: RTL_LEVEL, | |
704 | },], | |
abe05a73 XL |
705 | } |
706 | ); | |
707 | ||
708 | let text = "אבג abc"; | |
709 | assert_eq!( | |
710 | BidiInfo::new(text, Some(LTR_LEVEL)), | |
711 | BidiInfo { | |
712 | text, | |
713 | levels: Level::vec(&[1, 1, 1, 1, 1, 1, 0, 0, 0, 0]), | |
714 | original_classes: vec![R, R, R, R, R, R, WS, L, L, L], | |
923072b8 FG |
715 | paragraphs: vec![ParagraphInfo { |
716 | range: 0..10, | |
717 | level: LTR_LEVEL, | |
718 | },], | |
abe05a73 XL |
719 | } |
720 | ); | |
721 | assert_eq!( | |
722 | BidiInfo::new(text, None), | |
723 | BidiInfo { | |
724 | text, | |
725 | levels: Level::vec(&[1, 1, 1, 1, 1, 1, 1, 2, 2, 2]), | |
726 | original_classes: vec![R, R, R, R, R, R, WS, L, L, L], | |
923072b8 FG |
727 | paragraphs: vec![ParagraphInfo { |
728 | range: 0..10, | |
729 | level: RTL_LEVEL, | |
730 | },], | |
abe05a73 XL |
731 | } |
732 | ); | |
733 | ||
734 | let text = "غ2ظ א2ג"; | |
735 | assert_eq!( | |
736 | BidiInfo::new(text, Some(LTR_LEVEL)), | |
737 | BidiInfo { | |
738 | text, | |
739 | levels: Level::vec(&[1, 1, 2, 1, 1, 1, 1, 1, 2, 1, 1]), | |
740 | original_classes: vec![AL, AL, EN, AL, AL, WS, R, R, EN, R, R], | |
923072b8 FG |
741 | paragraphs: vec![ParagraphInfo { |
742 | range: 0..11, | |
743 | level: LTR_LEVEL, | |
744 | },], | |
abe05a73 XL |
745 | } |
746 | ); | |
747 | ||
748 | let text = "a א.\nג"; | |
749 | assert_eq!( | |
750 | BidiInfo::new(text, None), | |
751 | BidiInfo { | |
752 | text, | |
753 | original_classes: vec![L, WS, R, R, CS, B, R, R], | |
754 | levels: Level::vec(&[0, 0, 1, 1, 0, 0, 1, 1]), | |
755 | paragraphs: vec![ | |
756 | ParagraphInfo { | |
757 | range: 0..6, | |
758 | level: LTR_LEVEL, | |
759 | }, | |
760 | ParagraphInfo { | |
761 | range: 6..8, | |
762 | level: RTL_LEVEL, | |
763 | }, | |
764 | ], | |
765 | } | |
766 | ); | |
767 | ||
cdc7bbd5 | 768 | // BidiTest:69635 (AL ET EN) |
abe05a73 XL |
769 | let bidi_info = BidiInfo::new("\u{060B}\u{20CF}\u{06F9}", None); |
770 | assert_eq!(bidi_info.original_classes, vec![AL, AL, ET, ET, ET, EN, EN]); | |
771 | } | |
772 | ||
773 | #[test] | |
923072b8 | 774 | #[cfg(feature = "hardcoded-data")] |
abe05a73 XL |
775 | fn test_bidi_info_has_rtl() { |
776 | // ASCII only | |
777 | assert_eq!(BidiInfo::new("123", None).has_rtl(), false); | |
778 | assert_eq!(BidiInfo::new("123", Some(LTR_LEVEL)).has_rtl(), false); | |
779 | assert_eq!(BidiInfo::new("123", Some(RTL_LEVEL)).has_rtl(), false); | |
780 | assert_eq!(BidiInfo::new("abc", None).has_rtl(), false); | |
781 | assert_eq!(BidiInfo::new("abc", Some(LTR_LEVEL)).has_rtl(), false); | |
782 | assert_eq!(BidiInfo::new("abc", Some(RTL_LEVEL)).has_rtl(), false); | |
783 | assert_eq!(BidiInfo::new("abc 123", None).has_rtl(), false); | |
784 | assert_eq!(BidiInfo::new("abc\n123", None).has_rtl(), false); | |
785 | ||
786 | // With Hebrew | |
787 | assert_eq!(BidiInfo::new("אבּג", None).has_rtl(), true); | |
788 | assert_eq!(BidiInfo::new("אבּג", Some(LTR_LEVEL)).has_rtl(), true); | |
789 | assert_eq!(BidiInfo::new("אבּג", Some(RTL_LEVEL)).has_rtl(), true); | |
790 | assert_eq!(BidiInfo::new("abc אבּג", None).has_rtl(), true); | |
791 | assert_eq!(BidiInfo::new("abc\nאבּג", None).has_rtl(), true); | |
792 | assert_eq!(BidiInfo::new("אבּג abc", None).has_rtl(), true); | |
793 | assert_eq!(BidiInfo::new("אבּג\nabc", None).has_rtl(), true); | |
794 | assert_eq!(BidiInfo::new("אבּג 123", None).has_rtl(), true); | |
795 | assert_eq!(BidiInfo::new("אבּג\n123", None).has_rtl(), true); | |
796 | } | |
797 | ||
923072b8 | 798 | #[cfg(feature = "hardcoded-data")] |
cdc7bbd5 | 799 | fn reorder_paras(text: &str) -> Vec<Cow<'_, str>> { |
abe05a73 XL |
800 | let bidi_info = BidiInfo::new(text, None); |
801 | bidi_info | |
802 | .paragraphs | |
803 | .iter() | |
804 | .map(|para| bidi_info.reorder_line(para, para.range.clone())) | |
805 | .collect() | |
806 | } | |
807 | ||
808 | #[test] | |
923072b8 | 809 | #[cfg(feature = "hardcoded-data")] |
abe05a73 | 810 | fn test_reorder_line() { |
cdc7bbd5 | 811 | // Bidi_Class: L L L B L L L B L L L |
abe05a73 XL |
812 | assert_eq!( |
813 | reorder_paras("abc\ndef\nghi"), | |
814 | vec!["abc\n", "def\n", "ghi"] | |
815 | ); | |
816 | ||
cdc7bbd5 | 817 | // Bidi_Class: L L EN B L L EN B L L EN |
abe05a73 XL |
818 | assert_eq!( |
819 | reorder_paras("ab1\nde2\ngh3"), | |
820 | vec!["ab1\n", "de2\n", "gh3"] | |
821 | ); | |
822 | ||
cdc7bbd5 | 823 | // Bidi_Class: L L L B AL AL AL |
abe05a73 XL |
824 | assert_eq!(reorder_paras("abc\nابج"), vec!["abc\n", "جبا"]); |
825 | ||
cdc7bbd5 | 826 | // Bidi_Class: AL AL AL B L L L |
abe05a73 XL |
827 | assert_eq!(reorder_paras("ابج\nabc"), vec!["\nجبا", "abc"]); |
828 | ||
829 | assert_eq!(reorder_paras("1.-2"), vec!["1.-2"]); | |
830 | assert_eq!(reorder_paras("1-.2"), vec!["1-.2"]); | |
831 | assert_eq!(reorder_paras("abc אבג"), vec!["abc גבא"]); | |
832 | ||
833 | // Numbers being weak LTR characters, cannot reorder strong RTL | |
834 | assert_eq!(reorder_paras("123 אבג"), vec!["גבא 123"]); | |
835 | ||
836 | assert_eq!(reorder_paras("abc\u{202A}def"), vec!["abc\u{202A}def"]); | |
837 | ||
838 | assert_eq!( | |
839 | reorder_paras("abc\u{202A}def\u{202C}ghi"), | |
840 | vec!["abc\u{202A}def\u{202C}ghi"] | |
841 | ); | |
842 | ||
843 | assert_eq!( | |
844 | reorder_paras("abc\u{2066}def\u{2069}ghi"), | |
845 | vec!["abc\u{2066}def\u{2069}ghi"] | |
846 | ); | |
847 | ||
848 | // Testing for RLE Character | |
849 | assert_eq!( | |
850 | reorder_paras("\u{202B}abc אבג\u{202C}"), | |
851 | vec!["\u{202B}\u{202C}גבא abc"] | |
852 | ); | |
853 | ||
854 | // Testing neutral characters | |
855 | assert_eq!(reorder_paras("אבג? אבג"), vec!["גבא ?גבא"]); | |
856 | ||
857 | // Testing neutral characters with special case | |
858 | assert_eq!(reorder_paras("A אבג?"), vec!["A גבא?"]); | |
859 | ||
860 | // Testing neutral characters with Implicit RTL Marker | |
923072b8 | 861 | assert_eq!(reorder_paras("A אבג?\u{200F}"), vec!["A \u{200F}?גבא"]); |
abe05a73 XL |
862 | assert_eq!(reorder_paras("אבג abc"), vec!["abc גבא"]); |
863 | assert_eq!( | |
864 | reorder_paras("abc\u{2067}.-\u{2069}ghi"), | |
865 | vec!["abc\u{2067}-.\u{2069}ghi"] | |
866 | ); | |
867 | ||
868 | assert_eq!( | |
869 | reorder_paras("Hello, \u{2068}\u{202E}world\u{202C}\u{2069}!"), | |
870 | vec!["Hello, \u{2068}\u{202E}\u{202C}dlrow\u{2069}!"] | |
871 | ); | |
872 | ||
873 | // With mirrorable characters in RTL run | |
874 | assert_eq!(reorder_paras("א(ב)ג."), vec![".ג)ב(א"]); | |
875 | ||
876 | // With mirrorable characters on level boundry | |
923072b8 | 877 | assert_eq!(reorder_paras("אב(גד[&ef].)gh"), vec!["ef].)gh&[דג(בא"]); |
abe05a73 XL |
878 | } |
879 | ||
880 | fn reordered_levels_for_paras(text: &str) -> Vec<Vec<Level>> { | |
881 | let bidi_info = BidiInfo::new(text, None); | |
882 | bidi_info | |
883 | .paragraphs | |
884 | .iter() | |
885 | .map(|para| bidi_info.reordered_levels(para, para.range.clone())) | |
886 | .collect() | |
887 | } | |
888 | ||
889 | fn reordered_levels_per_char_for_paras(text: &str) -> Vec<Vec<Level>> { | |
890 | let bidi_info = BidiInfo::new(text, None); | |
891 | bidi_info | |
892 | .paragraphs | |
893 | .iter() | |
923072b8 | 894 | .map(|para| bidi_info.reordered_levels_per_char(para, para.range.clone())) |
abe05a73 XL |
895 | .collect() |
896 | } | |
897 | ||
898 | #[test] | |
923072b8 | 899 | #[cfg(feature = "hardcoded-data")] |
abe05a73 | 900 | fn test_reordered_levels() { |
cdc7bbd5 | 901 | // BidiTest:946 (LRI PDI) |
abe05a73 XL |
902 | let text = "\u{2067}\u{2069}"; |
903 | assert_eq!( | |
904 | reordered_levels_for_paras(text), | |
905 | vec![Level::vec(&[0, 0, 0, 0, 0, 0])] | |
906 | ); | |
907 | assert_eq!( | |
908 | reordered_levels_per_char_for_paras(text), | |
909 | vec![Level::vec(&[0, 0])] | |
910 | ); | |
911 | ||
cdc7bbd5 XL |
912 | let text = "aa טֶ"; |
913 | let bidi_info = BidiInfo::new(text, None); | |
914 | assert_eq!( | |
915 | bidi_info.reordered_levels(&bidi_info.paragraphs[0], 3..7), | |
916 | Level::vec(&[0, 0, 0, 1, 1, 1, 1]), | |
917 | ) | |
918 | ||
abe05a73 XL |
919 | /* TODO |
920 | /// BidiTest:69635 (AL ET EN) | |
921 | let text = "\u{060B}\u{20CF}\u{06F9}"; | |
922 | assert_eq!( | |
923 | reordered_levels_for_paras(text), | |
924 | vec![Level::vec(&[1, 1, 1, 1, 1, 2, 2])] | |
925 | ); | |
926 | assert_eq!( | |
927 | reordered_levels_per_char_for_paras(text), | |
928 | vec![Level::vec(&[1, 1, 2])] | |
929 | ); | |
930 | */ | |
931 | ||
932 | /* TODO | |
933 | // BidiTest:291284 (AN RLI PDF R) | |
934 | assert_eq!( | |
935 | reordered_levels_per_char_for_paras("\u{0605}\u{2067}\u{202C}\u{0590}"), | |
936 | vec![&["2", "0", "x", "1"]] | |
937 | ); | |
938 | */ | |
939 | } | |
abe05a73 | 940 | |
923072b8 FG |
941 | #[test] |
942 | fn test_paragraph_info_len() { | |
943 | let text = "hello world"; | |
944 | let bidi_info = BidiInfo::new(text, None); | |
945 | assert_eq!(bidi_info.paragraphs.len(), 1); | |
946 | assert_eq!(bidi_info.paragraphs[0].len(), text.len()); | |
947 | ||
948 | let text2 = "How are you"; | |
949 | let whole_text = format!("{}\n{}", text, text2); | |
950 | let bidi_info = BidiInfo::new(&whole_text, None); | |
951 | assert_eq!(bidi_info.paragraphs.len(), 2); | |
952 | ||
953 | // The first paragraph include the paragraph separator. | |
954 | // TODO: investigate if the paragraph separator character | |
955 | // should not be part of any paragraph. | |
956 | assert_eq!(bidi_info.paragraphs[0].len(), text.len() + 1); | |
957 | assert_eq!(bidi_info.paragraphs[1].len(), text2.len()); | |
958 | } | |
959 | ||
960 | #[test] | |
961 | fn test_direction() { | |
962 | let ltr_text = "hello world"; | |
963 | let rtl_text = "أهلا بكم"; | |
964 | let all_paragraphs = format!("{}\n{}\n{}{}", ltr_text, rtl_text, ltr_text, rtl_text); | |
965 | let bidi_info = BidiInfo::new(&all_paragraphs, None); | |
966 | assert_eq!(bidi_info.paragraphs.len(), 3); | |
967 | let p_ltr = Paragraph::new(&bidi_info, &bidi_info.paragraphs[0]); | |
968 | let p_rtl = Paragraph::new(&bidi_info, &bidi_info.paragraphs[1]); | |
969 | let p_mixed = Paragraph::new(&bidi_info, &bidi_info.paragraphs[2]); | |
970 | assert_eq!(p_ltr.direction(), Direction::Ltr); | |
971 | assert_eq!(p_rtl.direction(), Direction::Rtl); | |
972 | assert_eq!(p_mixed.direction(), Direction::Mixed); | |
973 | } | |
974 | ||
975 | #[test] | |
976 | fn test_edge_cases_direction() { | |
977 | // No paragraphs for empty text. | |
978 | let empty = ""; | |
979 | let bidi_info = BidiInfo::new(empty, Option::from(RTL_LEVEL)); | |
980 | assert_eq!(bidi_info.paragraphs.len(), 0); | |
981 | // The paragraph separator will take the value of the default direction | |
982 | // which is left to right. | |
983 | let empty = "\n"; | |
984 | let bidi_info = BidiInfo::new(empty, None); | |
985 | assert_eq!(bidi_info.paragraphs.len(), 1); | |
986 | let p = Paragraph::new(&bidi_info, &bidi_info.paragraphs[0]); | |
987 | assert_eq!(p.direction(), Direction::Ltr); | |
988 | // The paragraph separator will take the value of the given initial direction | |
989 | // which is left to right. | |
990 | let empty = "\n"; | |
991 | let bidi_info = BidiInfo::new(empty, Option::from(LTR_LEVEL)); | |
992 | assert_eq!(bidi_info.paragraphs.len(), 1); | |
993 | let p = Paragraph::new(&bidi_info, &bidi_info.paragraphs[0]); | |
994 | assert_eq!(p.direction(), Direction::Ltr); | |
995 | ||
996 | // The paragraph separator will take the value of the given initial direction | |
997 | // which is right to left. | |
998 | let empty = "\n"; | |
999 | let bidi_info = BidiInfo::new(empty, Option::from(RTL_LEVEL)); | |
1000 | assert_eq!(bidi_info.paragraphs.len(), 1); | |
1001 | let p = Paragraph::new(&bidi_info, &bidi_info.paragraphs[0]); | |
1002 | assert_eq!(p.direction(), Direction::Rtl); | |
1003 | } | |
1004 | ||
1005 | #[test] | |
1006 | fn test_level_at() { | |
1007 | let ltr_text = "hello world"; | |
1008 | let rtl_text = "أهلا بكم"; | |
1009 | let all_paragraphs = format!("{}\n{}\n{}{}", ltr_text, rtl_text, ltr_text, rtl_text); | |
1010 | let bidi_info = BidiInfo::new(&all_paragraphs, None); | |
1011 | assert_eq!(bidi_info.paragraphs.len(), 3); | |
1012 | ||
1013 | let p_ltr = Paragraph::new(&bidi_info, &bidi_info.paragraphs[0]); | |
1014 | let p_rtl = Paragraph::new(&bidi_info, &bidi_info.paragraphs[1]); | |
1015 | let p_mixed = Paragraph::new(&bidi_info, &bidi_info.paragraphs[2]); | |
1016 | ||
1017 | assert_eq!(p_ltr.level_at(0), LTR_LEVEL); | |
1018 | assert_eq!(p_rtl.level_at(0), RTL_LEVEL); | |
1019 | assert_eq!(p_mixed.level_at(0), LTR_LEVEL); | |
1020 | assert_eq!(p_mixed.info.levels.len(), 54); | |
1021 | assert_eq!(p_mixed.para.range.start, 28); | |
1022 | assert_eq!(p_mixed.level_at(ltr_text.len()), RTL_LEVEL); | |
1023 | } | |
1024 | } | |
abe05a73 XL |
1025 | |
1026 | #[cfg(all(feature = "serde", test))] | |
1027 | mod serde_tests { | |
abe05a73 | 1028 | use super::*; |
923072b8 | 1029 | use serde_test::{assert_tokens, Token}; |
abe05a73 XL |
1030 | |
1031 | #[test] | |
1032 | fn test_levels() { | |
1033 | let text = "abc אבג"; | |
1034 | let bidi_info = BidiInfo::new(text, None); | |
1035 | let levels = bidi_info.levels; | |
1036 | assert_eq!(text.as_bytes().len(), 10); | |
1037 | assert_eq!(levels.len(), 10); | |
1038 | assert_tokens( | |
1039 | &levels, | |
1040 | &[ | |
1041 | Token::Seq { len: Some(10) }, | |
1042 | Token::NewtypeStruct { name: "Level" }, | |
1043 | Token::U8(0), | |
1044 | Token::NewtypeStruct { name: "Level" }, | |
1045 | Token::U8(0), | |
1046 | Token::NewtypeStruct { name: "Level" }, | |
1047 | Token::U8(0), | |
1048 | Token::NewtypeStruct { name: "Level" }, | |
1049 | Token::U8(0), | |
1050 | Token::NewtypeStruct { name: "Level" }, | |
1051 | Token::U8(1), | |
1052 | Token::NewtypeStruct { name: "Level" }, | |
1053 | Token::U8(1), | |
1054 | Token::NewtypeStruct { name: "Level" }, | |
1055 | Token::U8(1), | |
1056 | Token::NewtypeStruct { name: "Level" }, | |
1057 | Token::U8(1), | |
1058 | Token::NewtypeStruct { name: "Level" }, | |
1059 | Token::U8(1), | |
1060 | Token::NewtypeStruct { name: "Level" }, | |
1061 | Token::U8(1), | |
1062 | Token::SeqEnd, | |
1063 | ], | |
1064 | ); | |
1065 | } | |
1066 | } |