]>
Commit | Line | Data |
---|---|---|
abe05a73 XL |
1 | // Copyright 2015 The Servo Project Developers. See the |
2 | // COPYRIGHT file at the top-level directory of this distribution. | |
3 | // | |
4 | // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or | |
5 | // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license | |
6 | // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your | |
7 | // option. This file may not be copied, modified, or distributed | |
8 | // except according to those terms. | |
9 | ||
10 | //! Accessor for `Bidi_Class` property from Unicode Character Database (UCD) | |
11 | ||
12 | mod tables; | |
13 | ||
14 | pub use self::tables::{BidiClass, UNICODE_VERSION}; | |
923072b8 | 15 | #[cfg(feature = "hardcoded-data")] |
94222f64 | 16 | use core::char; |
923072b8 FG |
17 | #[cfg(feature = "hardcoded-data")] |
18 | use core::cmp::Ordering::{Equal, Greater, Less}; | |
abe05a73 | 19 | |
923072b8 | 20 | #[cfg(feature = "hardcoded-data")] |
abe05a73 | 21 | use self::tables::bidi_class_table; |
cdc7bbd5 | 22 | use crate::BidiClass::*; |
923072b8 FG |
23 | #[cfg(feature = "hardcoded-data")] |
24 | use crate::BidiDataSource; | |
25 | ||
26 | /// Hardcoded Bidi data that ships with the unicode-bidi crate. | |
27 | /// | |
28 | /// This can be enabled with the default `hardcoded-data` Cargo feature. | |
29 | #[cfg(feature = "hardcoded-data")] | |
30 | pub struct HardcodedBidiData; | |
31 | ||
32 | #[cfg(feature = "hardcoded-data")] | |
33 | impl BidiDataSource for HardcodedBidiData { | |
34 | fn bidi_class(&self, c: char) -> BidiClass { | |
35 | bsearch_range_value_table(c, bidi_class_table) | |
36 | } | |
37 | } | |
abe05a73 XL |
38 | |
39 | /// Find the `BidiClass` of a single char. | |
923072b8 | 40 | #[cfg(feature = "hardcoded-data")] |
abe05a73 XL |
41 | pub fn bidi_class(c: char) -> BidiClass { |
42 | bsearch_range_value_table(c, bidi_class_table) | |
43 | } | |
44 | ||
45 | pub fn is_rtl(bidi_class: BidiClass) -> bool { | |
46 | match bidi_class { | |
47 | RLE | RLO | RLI => true, | |
48 | _ => false, | |
49 | } | |
50 | } | |
51 | ||
923072b8 | 52 | #[cfg(feature = "hardcoded-data")] |
abe05a73 | 53 | fn bsearch_range_value_table(c: char, r: &'static [(char, char, BidiClass)]) -> BidiClass { |
923072b8 FG |
54 | match r.binary_search_by(|&(lo, hi, _)| { |
55 | if lo <= c && c <= hi { | |
56 | Equal | |
57 | } else if hi < c { | |
58 | Less | |
59 | } else { | |
60 | Greater | |
61 | } | |
abe05a73 XL |
62 | }) { |
63 | Ok(idx) => { | |
64 | let (_, _, cat) = r[idx]; | |
65 | cat | |
66 | } | |
67 | // UCD/extracted/DerivedBidiClass.txt: "All code points not explicitly listed | |
68 | // for Bidi_Class have the value Left_To_Right (L)." | |
69 | Err(_) => L, | |
70 | } | |
71 | } | |
72 | ||
923072b8 | 73 | #[cfg(all(test, feature = "hardcoded-data"))] |
abe05a73 XL |
74 | mod tests { |
75 | use super::*; | |
76 | ||
77 | #[test] | |
78 | fn test_ascii() { | |
79 | assert_eq!(bidi_class('\u{0000}'), BN); | |
80 | assert_eq!(bidi_class('\u{0040}'), ON); | |
81 | assert_eq!(bidi_class('\u{0041}'), L); | |
82 | assert_eq!(bidi_class('\u{0062}'), L); | |
83 | assert_eq!(bidi_class('\u{007F}'), BN); | |
84 | } | |
85 | ||
86 | #[test] | |
87 | fn test_bmp() { | |
88 | // Hebrew | |
89 | assert_eq!(bidi_class('\u{0590}'), R); | |
90 | assert_eq!(bidi_class('\u{05D0}'), R); | |
91 | assert_eq!(bidi_class('\u{05D1}'), R); | |
92 | assert_eq!(bidi_class('\u{05FF}'), R); | |
93 | ||
94 | // Arabic | |
95 | assert_eq!(bidi_class('\u{0600}'), AN); | |
96 | assert_eq!(bidi_class('\u{0627}'), AL); | |
97 | assert_eq!(bidi_class('\u{07BF}'), AL); | |
98 | ||
99 | // Default R + Arabic Extras | |
100 | assert_eq!(bidi_class('\u{07C0}'), R); | |
101 | assert_eq!(bidi_class('\u{085F}'), R); | |
102 | assert_eq!(bidi_class('\u{0860}'), AL); | |
923072b8 FG |
103 | assert_eq!(bidi_class('\u{0870}'), AL); |
104 | assert_eq!(bidi_class('\u{089F}'), NSM); | |
abe05a73 | 105 | assert_eq!(bidi_class('\u{08A0}'), AL); |
923072b8 | 106 | assert_eq!(bidi_class('\u{089F}'), NSM); |
abe05a73 XL |
107 | assert_eq!(bidi_class('\u{08FF}'), NSM); |
108 | ||
109 | // Default ET | |
110 | assert_eq!(bidi_class('\u{20A0}'), ET); | |
111 | assert_eq!(bidi_class('\u{20CF}'), ET); | |
112 | ||
113 | // Arabic Presentation Forms | |
114 | assert_eq!(bidi_class('\u{FB1D}'), R); | |
115 | assert_eq!(bidi_class('\u{FB4F}'), R); | |
116 | assert_eq!(bidi_class('\u{FB50}'), AL); | |
923072b8 | 117 | assert_eq!(bidi_class('\u{FDCF}'), ON); |
abe05a73 | 118 | assert_eq!(bidi_class('\u{FDF0}'), AL); |
923072b8 | 119 | assert_eq!(bidi_class('\u{FDFF}'), ON); |
abe05a73 XL |
120 | assert_eq!(bidi_class('\u{FE70}'), AL); |
121 | assert_eq!(bidi_class('\u{FEFE}'), AL); | |
122 | assert_eq!(bidi_class('\u{FEFF}'), BN); | |
123 | ||
124 | // noncharacters | |
125 | assert_eq!(bidi_class('\u{FDD0}'), L); | |
126 | assert_eq!(bidi_class('\u{FDD1}'), L); | |
127 | assert_eq!(bidi_class('\u{FDEE}'), L); | |
128 | assert_eq!(bidi_class('\u{FDEF}'), L); | |
129 | assert_eq!(bidi_class('\u{FFFE}'), L); | |
130 | assert_eq!(bidi_class('\u{FFFF}'), L); | |
131 | } | |
132 | ||
133 | #[test] | |
134 | fn test_smp() { | |
135 | // Default AL + R | |
136 | assert_eq!(bidi_class('\u{10800}'), R); | |
137 | assert_eq!(bidi_class('\u{10FFF}'), R); | |
138 | assert_eq!(bidi_class('\u{1E800}'), R); | |
139 | assert_eq!(bidi_class('\u{1EDFF}'), R); | |
140 | assert_eq!(bidi_class('\u{1EE00}'), AL); | |
141 | assert_eq!(bidi_class('\u{1EEFF}'), AL); | |
142 | assert_eq!(bidi_class('\u{1EF00}'), R); | |
143 | assert_eq!(bidi_class('\u{1EFFF}'), R); | |
144 | } | |
145 | ||
146 | #[test] | |
147 | fn test_unassigned_planes() { | |
148 | assert_eq!(bidi_class('\u{30000}'), L); | |
149 | assert_eq!(bidi_class('\u{40000}'), L); | |
150 | assert_eq!(bidi_class('\u{50000}'), L); | |
151 | assert_eq!(bidi_class('\u{60000}'), L); | |
152 | assert_eq!(bidi_class('\u{70000}'), L); | |
153 | assert_eq!(bidi_class('\u{80000}'), L); | |
154 | assert_eq!(bidi_class('\u{90000}'), L); | |
155 | assert_eq!(bidi_class('\u{a0000}'), L); | |
156 | } | |
157 | } |