]>
Commit | Line | Data |
---|---|---|
dfeec247 XL |
1 | // Copyright 2019 The Rust Project Developers. See the COPYRIGHT |
2 | // file at the top-level directory of this distribution and at | |
3 | // http://rust-lang.org/COPYRIGHT. | |
4 | // | |
5 | // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or | |
6 | // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license | |
7 | // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your | |
8 | // option. This file may not be copied, modified, or distributed | |
9 | // except according to those terms. | |
10 | ||
11 | //! Lookups of unicode properties using minimal perfect hashing. | |
12 | ||
f035d41b XL |
13 | use crate::perfect_hash::mph_lookup; |
14 | use crate::tables::*; | |
dfeec247 XL |
15 | |
16 | /// Look up the canonical combining class for a codepoint. | |
f035d41b | 17 | /// |
dfeec247 XL |
18 | /// The value returned is as defined in the Unicode Character Database. |
19 | pub fn canonical_combining_class(c: char) -> u8 { | |
f035d41b XL |
20 | mph_lookup( |
21 | c.into(), | |
22 | CANONICAL_COMBINING_CLASS_SALT, | |
23 | CANONICAL_COMBINING_CLASS_KV, | |
24 | u8_lookup_fk, | |
25 | u8_lookup_fv, | |
26 | 0, | |
27 | ) | |
dfeec247 XL |
28 | } |
29 | ||
30 | pub(crate) fn composition_table(c1: char, c2: char) -> Option<char> { | |
31 | if c1 < '\u{10000}' && c2 < '\u{10000}' { | |
f035d41b XL |
32 | mph_lookup( |
33 | (c1 as u32) << 16 | (c2 as u32), | |
34 | COMPOSITION_TABLE_SALT, | |
35 | COMPOSITION_TABLE_KV, | |
36 | pair_lookup_fk, | |
37 | pair_lookup_fv_opt, | |
38 | None, | |
39 | ) | |
dfeec247 XL |
40 | } else { |
41 | composition_table_astral(c1, c2) | |
42 | } | |
43 | } | |
44 | ||
45 | pub(crate) fn canonical_fully_decomposed(c: char) -> Option<&'static [char]> { | |
f035d41b XL |
46 | mph_lookup( |
47 | c.into(), | |
48 | CANONICAL_DECOMPOSED_SALT, | |
49 | CANONICAL_DECOMPOSED_KV, | |
50 | pair_lookup_fk, | |
51 | pair_lookup_fv_opt, | |
52 | None, | |
53 | ) | |
dfeec247 XL |
54 | } |
55 | ||
56 | pub(crate) fn compatibility_fully_decomposed(c: char) -> Option<&'static [char]> { | |
f035d41b XL |
57 | mph_lookup( |
58 | c.into(), | |
59 | COMPATIBILITY_DECOMPOSED_SALT, | |
60 | COMPATIBILITY_DECOMPOSED_KV, | |
61 | pair_lookup_fk, | |
62 | pair_lookup_fv_opt, | |
63 | None, | |
64 | ) | |
dfeec247 XL |
65 | } |
66 | ||
6a06907d XL |
67 | pub(crate) fn cjk_compat_variants_fully_decomposed(c: char) -> Option<&'static [char]> { |
68 | mph_lookup( | |
69 | c.into(), | |
70 | CJK_COMPAT_VARIANTS_DECOMPOSED_SALT, | |
71 | CJK_COMPAT_VARIANTS_DECOMPOSED_KV, | |
72 | pair_lookup_fk, | |
73 | pair_lookup_fv_opt, | |
74 | None, | |
75 | ) | |
76 | } | |
77 | ||
dfeec247 XL |
78 | /// Return whether the given character is a combining mark (`General_Category=Mark`) |
79 | pub fn is_combining_mark(c: char) -> bool { | |
f035d41b XL |
80 | mph_lookup( |
81 | c.into(), | |
82 | COMBINING_MARK_SALT, | |
83 | COMBINING_MARK_KV, | |
84 | bool_lookup_fk, | |
85 | bool_lookup_fv, | |
86 | false, | |
87 | ) | |
dfeec247 XL |
88 | } |
89 | ||
90 | pub fn stream_safe_trailing_nonstarters(c: char) -> usize { | |
f035d41b XL |
91 | mph_lookup( |
92 | c.into(), | |
93 | TRAILING_NONSTARTERS_SALT, | |
94 | TRAILING_NONSTARTERS_KV, | |
95 | u8_lookup_fk, | |
96 | u8_lookup_fv, | |
97 | 0, | |
98 | ) as usize | |
dfeec247 XL |
99 | } |
100 | ||
101 | /// Extract the key in a 24 bit key and 8 bit value packed in a u32. | |
102 | #[inline] | |
103 | fn u8_lookup_fk(kv: u32) -> u32 { | |
104 | kv >> 8 | |
105 | } | |
106 | ||
107 | /// Extract the value in a 24 bit key and 8 bit value packed in a u32. | |
108 | #[inline] | |
109 | fn u8_lookup_fv(kv: u32) -> u8 { | |
110 | (kv & 0xff) as u8 | |
111 | } | |
112 | ||
113 | /// Extract the key for a boolean lookup. | |
114 | #[inline] | |
115 | fn bool_lookup_fk(kv: u32) -> u32 { | |
116 | kv | |
117 | } | |
118 | ||
119 | /// Extract the value for a boolean lookup. | |
120 | #[inline] | |
121 | fn bool_lookup_fv(_kv: u32) -> bool { | |
122 | true | |
123 | } | |
124 | ||
125 | /// Extract the key in a pair. | |
126 | #[inline] | |
127 | fn pair_lookup_fk<T>(kv: (u32, T)) -> u32 { | |
128 | kv.0 | |
129 | } | |
130 | ||
131 | /// Extract the value in a pair, returning an option. | |
132 | #[inline] | |
133 | fn pair_lookup_fv_opt<T>(kv: (u32, T)) -> Option<T> { | |
134 | Some(kv.1) | |
135 | } |