1 // Copyright 2015 The Servo Project Developers. See the
2 // COPYRIGHT file at the top-level directory of this distribution.
4 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
5 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
6 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
7 // option. This file may not be copied, modified, or distributed
8 // except according to those terms.
10 //! This crate implements the [Unicode Bidirectional Algorithm][tr9] for display of mixed
11 //! right-to-left and left-to-right text. It is written in safe Rust, compatible with the
12 //! current stable release.
17 //! # #[cfg(feature = "hardcoded-data")] {
18 //! use unicode_bidi::BidiInfo;
20 //! // This example text is defined using `concat!` because some browsers
21 //! // and text editors have trouble displaying bidi strings.
22 //! let text = concat![
31 //! // Resolve embedding levels within the text. Pass `None` to detect the
32 //! // paragraph level automatically.
33 //! let bidi_info = BidiInfo::new(&text, None);
35 //! // This paragraph has embedding level 1 because its first strong character is RTL.
36 //! assert_eq!(bidi_info.paragraphs.len(), 1);
37 //! let para = &bidi_info.paragraphs[0];
38 //! assert_eq!(para.level.number(), 1);
39 //! assert_eq!(para.level.is_rtl(), true);
41 //! // Re-ordering is done after wrapping each paragraph into a sequence of
42 //! // lines. For this example, I'll just use a single line that spans the
43 //! // entire paragraph.
44 //! let line = para.range.clone();
46 //! let display = bidi_info.reorder_line(para, line);
47 //! assert_eq!(display, concat![
55 //! # } // feature = "hardcoded-data"
60 //! - `std`: Enabled by default, but can be disabled to make `unicode_bidi`
61 //! `#![no_std]` + `alloc` compatible.
62 //! - `hardcoded-data`: Enabled by default. Includes hardcoded Unicode bidi data and more convenient APIs.
63 //! - `serde`: Adds [`serde::Serialize`] and [`serde::Deserialize`]
64 //! implementations to relevant types.
66 //! [tr9]: <http://www.unicode.org/reports/tr9/>
68 #![forbid(unsafe_code)]
70 // We need to link to std to make doc tests work on older Rust versions
71 #[cfg(feature = "std")]
86 pub use crate::char_data
::{BidiClass, UNICODE_VERSION}
;
87 pub use crate::data_source
::BidiDataSource
;
88 pub use crate::level
::{Level, LTR_LEVEL, RTL_LEVEL}
;
89 pub use crate::prepare
::LevelRun
;
91 #[cfg(feature = "hardcoded-data")]
92 pub use crate::char_data
::{bidi_class, HardcodedBidiData}
;
94 use alloc
::borrow
::Cow
;
95 use alloc
::string
::String
;
97 use core
::cmp
::{max, min}
;
98 use core
::iter
::repeat
;
101 use crate::format_chars
as chars
;
102 use crate::BidiClass
::*;
104 #[derive(PartialEq, Debug)]
111 /// Bidi information about a single paragraph
112 #[derive(Debug, PartialEq)]
113 pub struct ParagraphInfo
{
114 /// The paragraphs boundaries within the text, as byte indices.
116 /// TODO: Shrink this to only include the starting index?
117 pub range
: Range
<usize>,
119 /// The paragraph embedding level.
121 /// <http://www.unicode.org/reports/tr9/#BD4>
126 /// Gets the length of the paragraph in the source text.
127 pub fn len(&self) -> usize {
128 self.range
.end
- self.range
.start
132 /// Initial bidi information of the text.
134 /// Contains the text paragraphs and `BidiClass` of its characters.
135 #[derive(PartialEq, Debug)]
136 pub struct InitialInfo
<'text
> {
138 pub text
: &'text
str,
140 /// The BidiClass of the character at each byte in the text.
141 /// If a character is multiple bytes, its class will appear multiple times in the vector.
142 pub original_classes
: Vec
<BidiClass
>,
144 /// The boundaries and level of each paragraph within the text.
145 pub paragraphs
: Vec
<ParagraphInfo
>,
148 impl<'text
> InitialInfo
<'text
> {
149 /// Find the paragraphs and BidiClasses in a string of text.
151 /// <http://www.unicode.org/reports/tr9/#The_Paragraph_Level>
153 /// Also sets the class for each First Strong Isolate initiator (FSI) to LRI or RLI if a strong
154 /// character is found before the matching PDI. If no strong character is found, the class will
155 /// remain FSI, and it's up to later stages to treat these as LRI when needed.
157 /// The `hardcoded-data` Cargo feature (enabled by default) must be enabled to use this.
158 #[cfg_attr(feature = "flame_it", flamer::flame)]
159 #[cfg(feature = "hardcoded-data")]
160 pub fn new(text
: &str, default_para_level
: Option
<Level
>) -> InitialInfo
<'_
> {
161 Self::new_with_data_source(&HardcodedBidiData
, text
, default_para_level
)
164 /// Find the paragraphs and BidiClasses in a string of text, with a custom [`BidiDataSource`]
165 /// for Bidi data. If you just wish to use the hardcoded Bidi data, please use [`InitialInfo::new()`]
166 /// instead (enabled with tbe default `hardcoded-data` Cargo feature)
168 /// <http://www.unicode.org/reports/tr9/#The_Paragraph_Level>
170 /// Also sets the class for each First Strong Isolate initiator (FSI) to LRI or RLI if a strong
171 /// character is found before the matching PDI. If no strong character is found, the class will
172 /// remain FSI, and it's up to later stages to treat these as LRI when needed.
173 #[cfg_attr(feature = "flame_it", flamer::flame)]
174 pub fn new_with_data_source
<'a
, D
: BidiDataSource
>(
177 default_para_level
: Option
<Level
>,
178 ) -> InitialInfo
<'a
> {
179 let mut original_classes
= Vec
::with_capacity(text
.len());
181 // The stack contains the starting byte index for each nested isolate we're inside.
182 let mut isolate_stack
= Vec
::new();
183 let mut paragraphs
= Vec
::new();
185 let mut para_start
= 0;
186 let mut para_level
= default_para_level
;
188 #[cfg(feature = "flame_it")]
189 flame
::start("InitialInfo::new(): iter text.char_indices()");
191 for (i
, c
) in text
.char_indices() {
192 let class
= data_source
.bidi_class(c
);
194 #[cfg(feature = "flame_it")]
195 flame
::start("original_classes.extend()");
197 original_classes
.extend(repeat(class
).take(c
.len_utf8()));
199 #[cfg(feature = "flame_it")]
200 flame
::end("original_classes.extend()");
204 // P1. Split the text into separate paragraphs. The paragraph separator is kept
205 // with the previous paragraph.
206 let para_end
= i
+ c
.len_utf8();
207 paragraphs
.push(ParagraphInfo
{
208 range
: para_start
..para_end
,
209 // P3. If no character is found in p2, set the paragraph level to zero.
210 level
: para_level
.unwrap_or(LTR_LEVEL
),
212 // Reset state for the start of the next paragraph.
213 para_start
= para_end
;
214 // TODO: Support defaulting to direction of previous paragraph
216 // <http://www.unicode.org/reports/tr9/#HL1>
217 para_level
= default_para_level
;
218 isolate_stack
.clear();
222 match isolate_stack
.last() {
224 if original_classes
[start
] == FSI
{
225 // X5c. If the first strong character between FSI and its matching
226 // PDI is R or AL, treat it as RLI. Otherwise, treat it as LRI.
227 for j
in 0..chars
::FSI
.len_utf8() {
228 original_classes
[start
+ j
] =
229 if class
== L { LRI }
else { RLI }
;
235 if para_level
.is_none() {
236 // P2. Find the first character of type L, AL, or R, while skipping
237 // any characters between an isolate initiator and its matching
239 para_level
= Some(if class
!= L { RTL_LEVEL }
else { LTR_LEVEL }
);
246 isolate_stack
.push(i
);
256 if para_start
< text
.len() {
257 paragraphs
.push(ParagraphInfo
{
258 range
: para_start
..text
.len(),
259 level
: para_level
.unwrap_or(LTR_LEVEL
),
262 assert_eq
!(original_classes
.len(), text
.len());
264 #[cfg(feature = "flame_it")]
265 flame
::end("InitialInfo::new(): iter text.char_indices()");
275 /// Bidi information of the text.
277 /// The `original_classes` and `levels` vectors are indexed by byte offsets into the text. If a
278 /// character is multiple bytes wide, then its class and level will appear multiple times in these
280 // TODO: Impl `struct StringProperty<T> { values: Vec<T> }` and use instead of Vec<T>
281 #[derive(Debug, PartialEq)]
282 pub struct BidiInfo
<'text
> {
284 pub text
: &'text
str,
286 /// The BidiClass of the character at each byte in the text.
287 pub original_classes
: Vec
<BidiClass
>,
289 /// The directional embedding level of each byte in the text.
290 pub levels
: Vec
<Level
>,
292 /// The boundaries and paragraph embedding level of each paragraph within the text.
294 /// TODO: Use SmallVec or similar to avoid overhead when there are only one or two paragraphs?
295 /// Or just don't include the first paragraph, which always starts at 0?
296 pub paragraphs
: Vec
<ParagraphInfo
>,
299 impl<'text
> BidiInfo
<'text
> {
300 /// Split the text into paragraphs and determine the bidi embedding levels for each paragraph.
303 /// The `hardcoded-data` Cargo feature (enabled by default) must be enabled to use this.
305 /// TODO: In early steps, check for special cases that allow later steps to be skipped. like
306 /// text that is entirely LTR. See the `nsBidi` class from Gecko for comparison.
308 /// TODO: Support auto-RTL base direction
309 #[cfg_attr(feature = "flame_it", flamer::flame)]
310 #[cfg(feature = "hardcoded-data")]
311 pub fn new(text
: &str, default_para_level
: Option
<Level
>) -> BidiInfo
<'_
> {
312 Self::new_with_data_source(&HardcodedBidiData
, text
, default_para_level
)
315 /// Split the text into paragraphs and determine the bidi embedding levels for each paragraph, with a custom [`BidiDataSource`]
316 /// for Bidi data. If you just wish to use the hardcoded Bidi data, please use [`BidiInfo::new()`]
317 /// instead (enabled with tbe default `hardcoded-data` Cargo feature).
319 /// TODO: In early steps, check for special cases that allow later steps to be skipped. like
320 /// text that is entirely LTR. See the `nsBidi` class from Gecko for comparison.
322 /// TODO: Support auto-RTL base direction
323 #[cfg_attr(feature = "flame_it", flamer::flame)]
324 pub fn new_with_data_source
<'a
, D
: BidiDataSource
>(
327 default_para_level
: Option
<Level
>,
333 } = InitialInfo
::new_with_data_source(data_source
, text
, default_para_level
);
335 let mut levels
= Vec
::<Level
>::with_capacity(text
.len());
336 let mut processing_classes
= original_classes
.clone();
338 for para
in ¶graphs
{
339 let text
= &text
[para
.range
.clone()];
340 let original_classes
= &original_classes
[para
.range
.clone()];
341 let processing_classes
= &mut processing_classes
[para
.range
.clone()];
343 let new_len
= levels
.len() + para
.range
.len();
344 levels
.resize(new_len
, para
.level
);
345 let levels
= &mut levels
[para
.range
.clone()];
355 let sequences
= prepare
::isolating_run_sequences(para
.level
, original_classes
, levels
);
356 for sequence
in &sequences
{
357 implicit
::resolve_weak(sequence
, processing_classes
);
358 implicit
::resolve_neutral(sequence
, levels
, processing_classes
);
360 implicit
::resolve_levels(processing_classes
, levels
);
362 assign_levels_to_removed_chars(para
.level
, original_classes
, levels
);
373 /// Re-order a line based on resolved levels and return only the embedding levels, one `Level`
375 #[cfg_attr(feature = "flame_it", flamer::flame)]
376 pub fn reordered_levels(&self, para
: &ParagraphInfo
, line
: Range
<usize>) -> Vec
<Level
> {
377 let (levels
, _
) = self.visual_runs(para
, line
);
381 /// Re-order a line based on resolved levels and return only the embedding levels, one `Level`
383 #[cfg_attr(feature = "flame_it", flamer::flame)]
384 pub fn reordered_levels_per_char(
386 para
: &ParagraphInfo
,
389 let levels
= self.reordered_levels(para
, line
);
390 self.text
.char_indices().map(|(i
, _
)| levels
[i
]).collect()
393 /// Re-order a line based on resolved levels and return the line in display order.
394 #[cfg_attr(feature = "flame_it", flamer::flame)]
395 pub fn reorder_line(&self, para
: &ParagraphInfo
, line
: Range
<usize>) -> Cow
<'text
, str> {
396 let (levels
, runs
) = self.visual_runs(para
, line
.clone());
398 // If all isolating run sequences are LTR, no reordering is needed
399 if runs
.iter().all(|run
| levels
[run
.start
].is_ltr()) {
400 return self.text
[line
].into();
403 let mut result
= String
::with_capacity(line
.len());
405 if levels
[run
.start
].is_rtl() {
406 result
.extend(self.text
[run
].chars().rev());
408 result
.push_str(&self.text
[run
]);
414 /// Find the level runs within a line and return them in visual order.
416 /// `line` is a range of bytes indices within `levels`.
418 /// <http://www.unicode.org/reports/tr9/#Reordering_Resolved_Levels>
419 #[cfg_attr(feature = "flame_it", flamer::flame)]
422 para
: &ParagraphInfo
,
424 ) -> (Vec
<Level
>, Vec
<LevelRun
>) {
425 assert
!(line
.start
<= self.levels
.len());
426 assert
!(line
.end
<= self.levels
.len());
428 let mut levels
= self.levels
.clone();
429 let line_classes
= &self.original_classes
[line
.clone()];
430 let line_levels
= &mut levels
[line
.clone()];
432 // Reset some whitespace chars to paragraph level.
433 // <http://www.unicode.org/reports/tr9/#L1>
434 let line_str
: &str = &self.text
[line
.clone()];
435 let mut reset_from
: Option
<usize> = Some(0);
436 let mut reset_to
: Option
<usize> = None
;
437 for (i
, c
) in line_str
.char_indices() {
438 match line_classes
[i
] {
440 RLE
| LRE
| RLO
| LRO
| PDF
| BN
=> {}
441 // Segment separator, Paragraph separator
443 assert_eq
!(reset_to
, None
);
444 reset_to
= Some(i
+ c
.len_utf8());
445 if reset_from
== None
{
446 reset_from
= Some(i
);
449 // Whitespace, isolate formatting
450 WS
| FSI
| LRI
| RLI
| PDI
=> {
451 if reset_from
== None
{
452 reset_from
= Some(i
);
459 if let (Some(from
), Some(to
)) = (reset_from
, reset_to
) {
460 for level
in &mut line_levels
[from
..to
] {
467 if let Some(from
) = reset_from
{
468 for level
in &mut line_levels
[from
..] {
473 // Find consecutive level runs.
474 let mut runs
= Vec
::new();
475 let mut start
= line
.start
;
476 let mut run_level
= levels
[start
];
477 let mut min_level
= run_level
;
478 let mut max_level
= run_level
;
480 for (i
, &new_level
) in levels
.iter().enumerate().take(line
.end
).skip(start
+ 1) {
481 if new_level
!= run_level
{
482 // End of the previous run, start of a new one.
485 run_level
= new_level
;
486 min_level
= min(run_level
, min_level
);
487 max_level
= max(run_level
, max_level
);
490 runs
.push(start
..line
.end
);
492 let run_count
= runs
.len();
494 // Re-order the odd runs.
495 // <http://www.unicode.org/reports/tr9/#L2>
497 // Stop at the lowest *odd* level.
498 min_level
= min_level
.new_lowest_ge_rtl().expect("Level error");
500 while max_level
>= min_level
{
501 // Look for the start of a sequence of consecutive runs of max_level or higher.
502 let mut seq_start
= 0;
503 while seq_start
< run_count
{
504 if self.levels
[runs
[seq_start
].start
] < max_level
{
509 // Found the start of a sequence. Now find the end.
510 let mut seq_end
= seq_start
+ 1;
511 while seq_end
< run_count
{
512 if self.levels
[runs
[seq_end
].start
] < max_level
{
518 // Reverse the runs within this sequence.
519 runs
[seq_start
..seq_end
].reverse();
525 .expect("Lowering embedding level below zero");
531 /// If processed text has any computed RTL levels
533 /// This information is usually used to skip re-ordering of text when no RTL level is present
535 pub fn has_rtl(&self) -> bool
{
536 level
::has_rtl(&self.levels
)
540 /// Contains a reference of `BidiInfo` and one of its `paragraphs`.
541 /// And it supports all operation in the `Paragraph` that needs also its
542 /// `BidiInfo` such as `direction`.
544 pub struct Paragraph
<'a
, 'text
> {
545 pub info
: &'a BidiInfo
<'text
>,
546 pub para
: &'a ParagraphInfo
,
549 impl<'a
, 'text
> Paragraph
<'a
, 'text
> {
550 pub fn new(info
: &'a BidiInfo
<'text
>, para
: &'a ParagraphInfo
) -> Paragraph
<'a
, 'text
> {
551 Paragraph { info, para }
554 /// Returns if the paragraph is Left direction, right direction or mixed.
555 pub fn direction(&self) -> Direction
{
558 for i
in self.para
.range
.clone() {
559 if self.info
.levels
[i
].is_ltr() {
563 if self.info
.levels
[i
].is_rtl() {
569 return Direction
::Mixed
;
573 return Direction
::Ltr
;
579 /// Returns the `Level` of a certain character in the paragraph.
580 pub fn level_at(&self, pos
: usize) -> Level
{
581 let actual_position
= self.para
.range
.start
+ pos
;
582 self.info
.levels
[actual_position
]
586 /// Assign levels to characters removed by rule X9.
588 /// The levels assigned to these characters are not specified by the algorithm. This function
589 /// assigns each one the level of the previous character, to avoid breaking level runs.
590 #[cfg_attr(feature = "flame_it", flamer::flame)]
591 fn assign_levels_to_removed_chars(para_level
: Level
, classes
: &[BidiClass
], levels
: &mut [Level
]) {
592 for i
in 0..levels
.len() {
593 if prepare
::removed_by_x9(classes
[i
]) {
594 levels
[i
] = if i
> 0 { levels[i - 1] }
else { para_level }
;
600 #[cfg(feature = "hardcoded-data")]
605 fn test_initial_text_info() {
608 InitialInfo
::new(text
, None
),
611 original_classes
: vec
![L
, EN
],
612 paragraphs
: vec
![ParagraphInfo
{
621 InitialInfo
::new(text
, None
),
624 original_classes
: vec
![AL
, AL
, WS
, R
, R
],
625 paragraphs
: vec
![ParagraphInfo
{
632 let text
= "a\u{2029}b";
634 InitialInfo
::new(text
, None
),
637 original_classes
: vec
![L
, B
, B
, B
, L
],
651 let text
= format
!("{}א{}a", chars
::FSI
, chars
::PDI
);
653 InitialInfo
::new(&text
, None
),
656 original_classes
: vec
![RLI
, RLI
, RLI
, R
, R
, PDI
, PDI
, PDI
, L
],
657 paragraphs
: vec
![ParagraphInfo
{
666 #[cfg(feature = "hardcoded-data")]
667 fn test_process_text() {
670 BidiInfo
::new(text
, Some(LTR_LEVEL
)),
673 levels
: Level
::vec(&[0, 0, 0, 0, 0, 0]),
674 original_classes
: vec
![L
, L
, L
, EN
, EN
, EN
],
675 paragraphs
: vec
![ParagraphInfo
{
682 let text
= "abc אבג";
684 BidiInfo
::new(text
, Some(LTR_LEVEL
)),
687 levels
: Level
::vec(&[0, 0, 0, 0, 1, 1, 1, 1, 1, 1]),
688 original_classes
: vec
![L
, L
, L
, WS
, R
, R
, R
, R
, R
, R
],
689 paragraphs
: vec
![ParagraphInfo
{
696 BidiInfo
::new(text
, Some(RTL_LEVEL
)),
699 levels
: Level
::vec(&[2, 2, 2, 1, 1, 1, 1, 1, 1, 1]),
700 original_classes
: vec
![L
, L
, L
, WS
, R
, R
, R
, R
, R
, R
],
701 paragraphs
: vec
![ParagraphInfo
{
708 let text
= "אבג abc";
710 BidiInfo
::new(text
, Some(LTR_LEVEL
)),
713 levels
: Level
::vec(&[1, 1, 1, 1, 1, 1, 0, 0, 0, 0]),
714 original_classes
: vec
![R
, R
, R
, R
, R
, R
, WS
, L
, L
, L
],
715 paragraphs
: vec
![ParagraphInfo
{
722 BidiInfo
::new(text
, None
),
725 levels
: Level
::vec(&[1, 1, 1, 1, 1, 1, 1, 2, 2, 2]),
726 original_classes
: vec
![R
, R
, R
, R
, R
, R
, WS
, L
, L
, L
],
727 paragraphs
: vec
![ParagraphInfo
{
734 let text
= "غ2ظ א2ג";
736 BidiInfo
::new(text
, Some(LTR_LEVEL
)),
739 levels
: Level
::vec(&[1, 1, 2, 1, 1, 1, 1, 1, 2, 1, 1]),
740 original_classes
: vec
![AL
, AL
, EN
, AL
, AL
, WS
, R
, R
, EN
, R
, R
],
741 paragraphs
: vec
![ParagraphInfo
{
748 let text
= "a א.\nג";
750 BidiInfo
::new(text
, None
),
753 original_classes
: vec
![L
, WS
, R
, R
, CS
, B
, R
, R
],
754 levels
: Level
::vec(&[0, 0, 1, 1, 0, 0, 1, 1]),
768 // BidiTest:69635 (AL ET EN)
769 let bidi_info
= BidiInfo
::new("\u{060B}\u{20CF}\u{06F9}", None
);
770 assert_eq
!(bidi_info
.original_classes
, vec
![AL
, AL
, ET
, ET
, ET
, EN
, EN
]);
774 #[cfg(feature = "hardcoded-data")]
775 fn test_bidi_info_has_rtl() {
777 assert_eq
!(BidiInfo
::new("123", None
).has_rtl(), false);
778 assert_eq
!(BidiInfo
::new("123", Some(LTR_LEVEL
)).has_rtl(), false);
779 assert_eq
!(BidiInfo
::new("123", Some(RTL_LEVEL
)).has_rtl(), false);
780 assert_eq
!(BidiInfo
::new("abc", None
).has_rtl(), false);
781 assert_eq
!(BidiInfo
::new("abc", Some(LTR_LEVEL
)).has_rtl(), false);
782 assert_eq
!(BidiInfo
::new("abc", Some(RTL_LEVEL
)).has_rtl(), false);
783 assert_eq
!(BidiInfo
::new("abc 123", None
).has_rtl(), false);
784 assert_eq
!(BidiInfo
::new("abc\n123", None
).has_rtl(), false);
787 assert_eq
!(BidiInfo
::new("אבּג", None
).has_rtl(), true);
788 assert_eq
!(BidiInfo
::new("אבּג", Some(LTR_LEVEL
)).has_rtl(), true);
789 assert_eq
!(BidiInfo
::new("אבּג", Some(RTL_LEVEL
)).has_rtl(), true);
790 assert_eq
!(BidiInfo
::new("abc אבּג", None
).has_rtl(), true);
791 assert_eq
!(BidiInfo
::new("abc\nאבּג", None
).has_rtl(), true);
792 assert_eq
!(BidiInfo
::new("אבּג abc", None
).has_rtl(), true);
793 assert_eq
!(BidiInfo
::new("אבּג\nabc", None
).has_rtl(), true);
794 assert_eq
!(BidiInfo
::new("אבּג 123", None
).has_rtl(), true);
795 assert_eq
!(BidiInfo
::new("אבּג\n123", None
).has_rtl(), true);
798 #[cfg(feature = "hardcoded-data")]
799 fn reorder_paras(text
: &str) -> Vec
<Cow
<'_
, str>> {
800 let bidi_info
= BidiInfo
::new(text
, None
);
804 .map(|para
| bidi_info
.reorder_line(para
, para
.range
.clone()))
809 #[cfg(feature = "hardcoded-data")]
810 fn test_reorder_line() {
811 // Bidi_Class: L L L B L L L B L L L
813 reorder_paras("abc\ndef\nghi"),
814 vec
!["abc\n", "def\n", "ghi"]
817 // Bidi_Class: L L EN B L L EN B L L EN
819 reorder_paras("ab1\nde2\ngh3"),
820 vec
!["ab1\n", "de2\n", "gh3"]
823 // Bidi_Class: L L L B AL AL AL
824 assert_eq
!(reorder_paras("abc\nابج"), vec
!["abc\n", "جبا"]);
826 // Bidi_Class: AL AL AL B L L L
827 assert_eq
!(reorder_paras("ابج\nabc"), vec
!["\nجبا", "abc"]);
829 assert_eq
!(reorder_paras("1.-2"), vec
!["1.-2"]);
830 assert_eq
!(reorder_paras("1-.2"), vec
!["1-.2"]);
831 assert_eq
!(reorder_paras("abc אבג"), vec
!["abc גבא"]);
833 // Numbers being weak LTR characters, cannot reorder strong RTL
834 assert_eq
!(reorder_paras("123 אבג"), vec
!["גבא 123"]);
836 assert_eq
!(reorder_paras("abc\u{202A}def"), vec
!["abc\u{202A}def"]);
839 reorder_paras("abc\u{202A}def\u{202C}ghi"),
840 vec
!["abc\u{202A}def\u{202C}ghi"]
844 reorder_paras("abc\u{2066}def\u{2069}ghi"),
845 vec
!["abc\u{2066}def\u{2069}ghi"]
848 // Testing for RLE Character
850 reorder_paras("\u{202B}abc אבג\u{202C}"),
851 vec
!["\u{202B}\u{202C}גבא abc"]
854 // Testing neutral characters
855 assert_eq
!(reorder_paras("אבג? אבג"), vec
!["גבא ?גבא"]);
857 // Testing neutral characters with special case
858 assert_eq
!(reorder_paras("A אבג?"), vec
!["A גבא?"]);
860 // Testing neutral characters with Implicit RTL Marker
861 assert_eq
!(reorder_paras("A אבג?\u{200F}"), vec
!["A \u{200F}?גבא"]);
862 assert_eq
!(reorder_paras("אבג abc"), vec
!["abc גבא"]);
864 reorder_paras("abc\u{2067}.-\u{2069}ghi"),
865 vec
!["abc\u{2067}-.\u{2069}ghi"]
869 reorder_paras("Hello, \u{2068}\u{202E}world\u{202C}\u{2069}!"),
870 vec
!["Hello, \u{2068}\u{202E}\u{202C}dlrow\u{2069}!"]
873 // With mirrorable characters in RTL run
874 assert_eq
!(reorder_paras("א(ב)ג."), vec
![".ג)ב(א"]);
876 // With mirrorable characters on level boundry
877 assert_eq
!(reorder_paras("אב(גד[&ef].)gh"), vec
!["ef].)gh&[דג(בא"]);
880 fn reordered_levels_for_paras(text
: &str) -> Vec
<Vec
<Level
>> {
881 let bidi_info
= BidiInfo
::new(text
, None
);
885 .map(|para
| bidi_info
.reordered_levels(para
, para
.range
.clone()))
889 fn reordered_levels_per_char_for_paras(text
: &str) -> Vec
<Vec
<Level
>> {
890 let bidi_info
= BidiInfo
::new(text
, None
);
894 .map(|para
| bidi_info
.reordered_levels_per_char(para
, para
.range
.clone()))
899 #[cfg(feature = "hardcoded-data")]
900 fn test_reordered_levels() {
901 // BidiTest:946 (LRI PDI)
902 let text
= "\u{2067}\u{2069}";
904 reordered_levels_for_paras(text
),
905 vec
![Level
::vec(&[0, 0, 0, 0, 0, 0])]
908 reordered_levels_per_char_for_paras(text
),
909 vec
![Level
::vec(&[0, 0])]
913 let bidi_info
= BidiInfo
::new(text
, None
);
915 bidi_info
.reordered_levels(&bidi_info
.paragraphs
[0], 3..7),
916 Level
::vec(&[0, 0, 0, 1, 1, 1, 1]),
920 /// BidiTest:69635 (AL ET EN)
921 let text = "\u{060B}\u{20CF}\u{06F9}";
923 reordered_levels_for_paras(text),
924 vec![Level::vec(&[1, 1, 1, 1, 1, 2, 2])]
927 reordered_levels_per_char_for_paras(text),
928 vec![Level::vec(&[1, 1, 2])]
933 // BidiTest:291284 (AN RLI PDF R)
935 reordered_levels_per_char_for_paras("\u{0605}\u{2067}\u{202C}\u{0590}"),
936 vec![&["2", "0", "x", "1"]]
942 fn test_paragraph_info_len() {
943 let text
= "hello world";
944 let bidi_info
= BidiInfo
::new(text
, None
);
945 assert_eq
!(bidi_info
.paragraphs
.len(), 1);
946 assert_eq
!(bidi_info
.paragraphs
[0].len(), text
.len());
948 let text2
= "How are you";
949 let whole_text
= format
!("{}\n{}", text
, text2
);
950 let bidi_info
= BidiInfo
::new(&whole_text
, None
);
951 assert_eq
!(bidi_info
.paragraphs
.len(), 2);
953 // The first paragraph include the paragraph separator.
954 // TODO: investigate if the paragraph separator character
955 // should not be part of any paragraph.
956 assert_eq
!(bidi_info
.paragraphs
[0].len(), text
.len() + 1);
957 assert_eq
!(bidi_info
.paragraphs
[1].len(), text2
.len());
961 fn test_direction() {
962 let ltr_text
= "hello world";
963 let rtl_text
= "أهلا بكم";
964 let all_paragraphs
= format
!("{}\n{}\n{}{}", ltr_text
, rtl_text
, ltr_text
, rtl_text
);
965 let bidi_info
= BidiInfo
::new(&all_paragraphs
, None
);
966 assert_eq
!(bidi_info
.paragraphs
.len(), 3);
967 let p_ltr
= Paragraph
::new(&bidi_info
, &bidi_info
.paragraphs
[0]);
968 let p_rtl
= Paragraph
::new(&bidi_info
, &bidi_info
.paragraphs
[1]);
969 let p_mixed
= Paragraph
::new(&bidi_info
, &bidi_info
.paragraphs
[2]);
970 assert_eq
!(p_ltr
.direction(), Direction
::Ltr
);
971 assert_eq
!(p_rtl
.direction(), Direction
::Rtl
);
972 assert_eq
!(p_mixed
.direction(), Direction
::Mixed
);
976 fn test_edge_cases_direction() {
977 // No paragraphs for empty text.
979 let bidi_info
= BidiInfo
::new(empty
, Option
::from(RTL_LEVEL
));
980 assert_eq
!(bidi_info
.paragraphs
.len(), 0);
981 // The paragraph separator will take the value of the default direction
982 // which is left to right.
984 let bidi_info
= BidiInfo
::new(empty
, None
);
985 assert_eq
!(bidi_info
.paragraphs
.len(), 1);
986 let p
= Paragraph
::new(&bidi_info
, &bidi_info
.paragraphs
[0]);
987 assert_eq
!(p
.direction(), Direction
::Ltr
);
988 // The paragraph separator will take the value of the given initial direction
989 // which is left to right.
991 let bidi_info
= BidiInfo
::new(empty
, Option
::from(LTR_LEVEL
));
992 assert_eq
!(bidi_info
.paragraphs
.len(), 1);
993 let p
= Paragraph
::new(&bidi_info
, &bidi_info
.paragraphs
[0]);
994 assert_eq
!(p
.direction(), Direction
::Ltr
);
996 // The paragraph separator will take the value of the given initial direction
997 // which is right to left.
999 let bidi_info
= BidiInfo
::new(empty
, Option
::from(RTL_LEVEL
));
1000 assert_eq
!(bidi_info
.paragraphs
.len(), 1);
1001 let p
= Paragraph
::new(&bidi_info
, &bidi_info
.paragraphs
[0]);
1002 assert_eq
!(p
.direction(), Direction
::Rtl
);
1006 fn test_level_at() {
1007 let ltr_text
= "hello world";
1008 let rtl_text
= "أهلا بكم";
1009 let all_paragraphs
= format
!("{}\n{}\n{}{}", ltr_text
, rtl_text
, ltr_text
, rtl_text
);
1010 let bidi_info
= BidiInfo
::new(&all_paragraphs
, None
);
1011 assert_eq
!(bidi_info
.paragraphs
.len(), 3);
1013 let p_ltr
= Paragraph
::new(&bidi_info
, &bidi_info
.paragraphs
[0]);
1014 let p_rtl
= Paragraph
::new(&bidi_info
, &bidi_info
.paragraphs
[1]);
1015 let p_mixed
= Paragraph
::new(&bidi_info
, &bidi_info
.paragraphs
[2]);
1017 assert_eq
!(p_ltr
.level_at(0), LTR_LEVEL
);
1018 assert_eq
!(p_rtl
.level_at(0), RTL_LEVEL
);
1019 assert_eq
!(p_mixed
.level_at(0), LTR_LEVEL
);
1020 assert_eq
!(p_mixed
.info
.levels
.len(), 54);
1021 assert_eq
!(p_mixed
.para
.range
.start
, 28);
1022 assert_eq
!(p_mixed
.level_at(ltr_text
.len()), RTL_LEVEL
);
1026 #[cfg(all(feature = "serde", test))]
1029 use serde_test
::{assert_tokens, Token}
;
1033 let text
= "abc אבג";
1034 let bidi_info
= BidiInfo
::new(text
, None
);
1035 let levels
= bidi_info
.levels
;
1036 assert_eq
!(text
.as_bytes().len(), 10);
1037 assert_eq
!(levels
.len(), 10);
1041 Token
::Seq { len: Some(10) }
,
1042 Token
::NewtypeStruct { name: "Level" }
,
1044 Token
::NewtypeStruct { name: "Level" }
,
1046 Token
::NewtypeStruct { name: "Level" }
,
1048 Token
::NewtypeStruct { name: "Level" }
,
1050 Token
::NewtypeStruct { name: "Level" }
,
1052 Token
::NewtypeStruct { name: "Level" }
,
1054 Token
::NewtypeStruct { name: "Level" }
,
1056 Token
::NewtypeStruct { name: "Level" }
,
1058 Token
::NewtypeStruct { name: "Level" }
,
1060 Token
::NewtypeStruct { name: "Level" }
,