]>
Commit | Line | Data |
---|---|---|
1a4d82fc JJ |
1 | // Copyright 2012-2014 The Rust Project Developers. See the COPYRIGHT |
2 | // file at the top-level directory of this distribution and at | |
3 | // http://rust-lang.org/COPYRIGHT. | |
4 | // | |
5 | // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or | |
6 | // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license | |
7 | // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your | |
8 | // option. This file may not be copied, modified, or distributed | |
9 | // except according to those terms. | |
10 | ||
11 | //! Functions for computing canonical and compatible decompositions for Unicode characters. | |
12 | ||
13 | use core::cmp::Ordering::{Equal, Less, Greater}; | |
14 | use core::ops::FnMut; | |
15 | use core::option::Option; | |
16 | use core::option::Option::{Some, None}; | |
17 | use core::slice::SliceExt; | |
18 | use core::result::Result::{Ok, Err}; | |
19 | use tables::normalization::{canonical_table, compatibility_table, composition_table}; | |
20 | ||
21 | fn bsearch_table<T>(c: char, r: &'static [(char, &'static [T])]) -> Option<&'static [T]> { | |
22 | match r.binary_search_by(|&(val, _)| { | |
23 | if c == val { Equal } | |
24 | else if val < c { Less } | |
25 | else { Greater } | |
26 | }) { | |
27 | Ok(idx) => { | |
28 | let (_, result) = r[idx]; | |
29 | Some(result) | |
30 | } | |
31 | Err(_) => None | |
32 | } | |
33 | } | |
34 | ||
35 | /// Compute canonical Unicode decomposition for character | |
36 | pub fn decompose_canonical<F>(c: char, mut i: F) where F: FnMut(char) { d(c, &mut i, false); } | |
37 | ||
38 | /// Compute canonical or compatible Unicode decomposition for character | |
39 | pub fn decompose_compatible<F>(c: char, mut i: F) where F: FnMut(char) { d(c, &mut i, true); } | |
40 | ||
41 | // FIXME(#19596) This is a workaround, we should use `F` instead of `&mut F` | |
42 | fn d<F>(c: char, i: &mut F, k: bool) where F: FnMut(char) { | |
43 | // 7-bit ASCII never decomposes | |
44 | if c <= '\x7f' { (*i)(c); return; } | |
45 | ||
46 | // Perform decomposition for Hangul | |
47 | if (c as u32) >= S_BASE && (c as u32) < (S_BASE + S_COUNT) { | |
48 | decompose_hangul(c, i); | |
49 | return; | |
50 | } | |
51 | ||
52 | // First check the canonical decompositions | |
53 | match bsearch_table(c, canonical_table) { | |
54 | Some(canon) => { | |
85aaf69f | 55 | for x in canon { |
1a4d82fc JJ |
56 | d(*x, i, k); |
57 | } | |
58 | return; | |
59 | } | |
60 | None => () | |
61 | } | |
62 | ||
63 | // Bottom out if we're not doing compat. | |
64 | if !k { (*i)(c); return; } | |
65 | ||
66 | // Then check the compatibility decompositions | |
67 | match bsearch_table(c, compatibility_table) { | |
68 | Some(compat) => { | |
85aaf69f | 69 | for x in compat { |
1a4d82fc JJ |
70 | d(*x, i, k); |
71 | } | |
72 | return; | |
73 | } | |
74 | None => () | |
75 | } | |
76 | ||
77 | // Finally bottom out. | |
78 | (*i)(c); | |
79 | } | |
80 | ||
81 | pub fn compose(a: char, b: char) -> Option<char> { | |
82 | compose_hangul(a, b).or_else(|| { | |
83 | match bsearch_table(a, composition_table) { | |
84 | None => None, | |
85 | Some(candidates) => { | |
86 | match candidates.binary_search_by(|&(val, _)| { | |
87 | if b == val { Equal } | |
88 | else if val < b { Less } | |
89 | else { Greater } | |
90 | }) { | |
91 | Ok(idx) => { | |
92 | let (_, result) = candidates[idx]; | |
93 | Some(result) | |
94 | } | |
95 | Err(_) => None | |
96 | } | |
97 | } | |
98 | } | |
99 | }) | |
100 | } | |
101 | ||
102 | // Constants from Unicode 6.3.0 Section 3.12 Conjoining Jamo Behavior | |
103 | const S_BASE: u32 = 0xAC00; | |
104 | const L_BASE: u32 = 0x1100; | |
105 | const V_BASE: u32 = 0x1161; | |
106 | const T_BASE: u32 = 0x11A7; | |
107 | const L_COUNT: u32 = 19; | |
108 | const V_COUNT: u32 = 21; | |
109 | const T_COUNT: u32 = 28; | |
110 | const N_COUNT: u32 = (V_COUNT * T_COUNT); | |
111 | const S_COUNT: u32 = (L_COUNT * N_COUNT); | |
112 | ||
113 | // FIXME(#19596) This is a workaround, we should use `F` instead of `&mut F` | |
114 | // Decompose a precomposed Hangul syllable | |
115 | #[inline(always)] | |
116 | fn decompose_hangul<F>(s: char, f: &mut F) where F: FnMut(char) { | |
117 | use core::mem::transmute; | |
118 | ||
119 | let si = s as u32 - S_BASE; | |
120 | ||
121 | let li = si / N_COUNT; | |
122 | unsafe { | |
123 | (*f)(transmute(L_BASE + li)); | |
124 | ||
125 | let vi = (si % N_COUNT) / T_COUNT; | |
126 | (*f)(transmute(V_BASE + vi)); | |
127 | ||
128 | let ti = si % T_COUNT; | |
129 | if ti > 0 { | |
130 | (*f)(transmute(T_BASE + ti)); | |
131 | } | |
132 | } | |
133 | } | |
134 | ||
135 | // Compose a pair of Hangul Jamo | |
136 | #[inline(always)] | |
137 | fn compose_hangul(a: char, b: char) -> Option<char> { | |
138 | use core::mem::transmute; | |
139 | let l = a as u32; | |
140 | let v = b as u32; | |
141 | // Compose an LPart and a VPart | |
142 | if L_BASE <= l && l < (L_BASE + L_COUNT) && V_BASE <= v && v < (V_BASE + V_COUNT) { | |
143 | let r = S_BASE + (l - L_BASE) * N_COUNT + (v - V_BASE) * T_COUNT; | |
144 | return unsafe { Some(transmute(r)) }; | |
145 | } | |
146 | // Compose an LVPart and a TPart | |
147 | if S_BASE <= l && l <= (S_BASE+S_COUNT-T_COUNT) && T_BASE <= v && v < (T_BASE+T_COUNT) { | |
148 | let r = l + (v - T_BASE); | |
149 | return unsafe { Some(transmute(r)) }; | |
150 | } | |
151 | None | |
152 | } |