2 canonical_combining_class
, canonical_fully_decomposed
, compatibility_fully_decomposed
,
3 stream_safe_trailing_nonstarters
,
5 use crate::normalize
::{hangul_decomposition_length, is_hangul_syllable}
;
6 use crate::tables
::stream_safe_leading_nonstarters
;
8 pub(crate) const MAX_NONSTARTERS
: usize = 30;
9 const COMBINING_GRAPHEME_JOINER
: char = '
\u{034F}'
;
11 /// UAX15-D4: This iterator keeps track of how many non-starters there have been
12 /// since the last starter in *NFKD* and will emit a Combining Grapheme Joiner
13 /// (U+034F) if the count exceeds 30.
14 pub struct StreamSafe
<I
> {
16 nonstarter_count
: usize,
20 impl<I
> StreamSafe
<I
> {
21 pub(crate) fn new(iter
: I
) -> Self {
30 impl<I
: Iterator
<Item
= char>> Iterator
for StreamSafe
<I
> {
34 fn next(&mut self) -> Option
<char> {
35 let next_ch
= match self.buffer
.take().or_else(|| self.iter
.next()) {
39 let d
= classify_nonstarters(next_ch
);
40 if self.nonstarter_count
+ d
.leading_nonstarters
> MAX_NONSTARTERS
{
41 // Since we're emitting a CGJ, the suffix of the emitted string in NFKD has no trailing
42 // nonstarters, so we can reset the counter to zero. Put `next_ch` back into the
43 // iterator (via `self.buffer`), and we'll reclassify it next iteration.
44 self.nonstarter_count
= 0;
45 self.buffer
= Some(next_ch
);
46 return Some(COMBINING_GRAPHEME_JOINER
);
49 // Is the character all nonstarters in NFKD? If so, increment our counter of contiguous
50 // nonstarters in NKFD.
51 if d
.leading_nonstarters
== d
.decomposition_len
{
52 self.nonstarter_count
+= d
.decomposition_len
;
54 // Otherwise, reset the counter to the decomposition's number of trailing nonstarters.
56 self.nonstarter_count
= d
.trailing_nonstarters
;
63 pub(crate) struct Decomposition
{
64 pub(crate) leading_nonstarters
: usize,
65 pub(crate) trailing_nonstarters
: usize,
66 pub(crate) decomposition_len
: usize,
70 pub(crate) fn classify_nonstarters(c
: char) -> Decomposition
{
71 // As usual, fast path for ASCII (which is always a starter)
73 return Decomposition
{
74 leading_nonstarters
: 0,
75 trailing_nonstarters
: 0,
79 // Next, special case Hangul, since it's not handled by our tables.
80 if is_hangul_syllable(c
) {
81 return Decomposition
{
82 leading_nonstarters
: 0,
83 trailing_nonstarters
: 0,
84 decomposition_len
: hangul_decomposition_length(c
),
87 let decomp
= compatibility_fully_decomposed(c
).or_else(|| canonical_fully_decomposed(c
));
89 Some(decomp
) => Decomposition
{
90 leading_nonstarters
: stream_safe_leading_nonstarters(c
),
91 trailing_nonstarters
: stream_safe_trailing_nonstarters(c
),
92 decomposition_len
: decomp
.len(),
95 let is_nonstarter
= canonical_combining_class(c
) != 0;
96 let nonstarter
= if is_nonstarter { 1 }
else { 0 }
;
98 leading_nonstarters
: nonstarter
,
99 trailing_nonstarters
: nonstarter
,
100 decomposition_len
: 1,
108 use super::{classify_nonstarters, StreamSafe}
;
109 use crate::lookups
::canonical_combining_class
;
110 use crate::normalize
::decompose_compatible
;
112 #[cfg(not(feature = "std"))]
113 use crate::no_std_prelude
::*;
117 fn stream_safe(s
: &str) -> String
{
118 StreamSafe
::new(s
.chars()).collect()
123 let technically_okay
= "Da\u{0300}\u{0301}\u{0302}\u{0303}\u{0304}\u{0305}\u{0306}\u{0307}\u{0308}\u{0309}\u{030a}\u{030b}\u{030c}\u{030d}\u{030e}\u{030f}\u{0310}\u{0311}\u{0312}\u{0313}\u{0314}\u{0315}\u{0316}\u{0317}\u{0318}\u{0319}\u{031a}\u{031b}\u{031c}\u{031d}ngerzone";
124 assert_eq
!(stream_safe(technically_okay
), technically_okay
);
126 let too_much
= "Da\u{0300}\u{0301}\u{0302}\u{0303}\u{0304}\u{0305}\u{0306}\u{0307}\u{0308}\u{0309}\u{030a}\u{030b}\u{030c}\u{030d}\u{030e}\u{030f}\u{0310}\u{0311}\u{0312}\u{0313}\u{0314}\u{0315}\u{0316}\u{0317}\u{0318}\u{0319}\u{031a}\u{031b}\u{031c}\u{031d}\u{032e}ngerzone";
127 let fixed_it
= "Da\u{0300}\u{0301}\u{0302}\u{0303}\u{0304}\u{0305}\u{0306}\u{0307}\u{0308}\u{0309}\u{030a}\u{030b}\u{030c}\u{030d}\u{030e}\u{030f}\u{0310}\u{0311}\u{0312}\u{0313}\u{0314}\u{0315}\u{0316}\u{0317}\u{0318}\u{0319}\u{031a}\u{031b}\u{031c}\u{031d}\u{034f}\u{032e}ngerzone";
128 assert_eq
!(stream_safe(too_much
), fixed_it
);
130 let woah_nelly
= "Da\u{0300}\u{0301}\u{0302}\u{0303}\u{0304}\u{0305}\u{0306}\u{0307}\u{0308}\u{0309}\u{030a}\u{030b}\u{030c}\u{030d}\u{030e}\u{030f}\u{0310}\u{0311}\u{0312}\u{0313}\u{0314}\u{0315}\u{0316}\u{0317}\u{0318}\u{0319}\u{031a}\u{031b}\u{031c}\u{031d}\u{032e}\u{0300}\u{0301}\u{0302}\u{0303}\u{0304}\u{0305}\u{0306}\u{0307}\u{0308}\u{0309}\u{030a}\u{030b}\u{030c}\u{030d}\u{030e}\u{030f}\u{0310}\u{0311}\u{0312}\u{0313}\u{0314}\u{0315}\u{0316}\u{0317}\u{0318}\u{0319}\u{031a}\u{031b}\u{031c}\u{031d}\u{032e}ngerzone";
131 let its_cool
= "Da\u{0300}\u{0301}\u{0302}\u{0303}\u{0304}\u{0305}\u{0306}\u{0307}\u{0308}\u{0309}\u{030a}\u{030b}\u{030c}\u{030d}\u{030e}\u{030f}\u{0310}\u{0311}\u{0312}\u{0313}\u{0314}\u{0315}\u{0316}\u{0317}\u{0318}\u{0319}\u{031a}\u{031b}\u{031c}\u{031d}\u{034f}\u{032e}\u{0300}\u{0301}\u{0302}\u{0303}\u{0304}\u{0305}\u{0306}\u{0307}\u{0308}\u{0309}\u{030a}\u{030b}\u{030c}\u{030d}\u{030e}\u{030f}\u{0310}\u{0311}\u{0312}\u{0313}\u{0314}\u{0315}\u{0316}\u{0317}\u{0318}\u{0319}\u{031a}\u{031b}\u{031c}\u{034f}\u{031d}\u{032e}ngerzone";
132 assert_eq
!(stream_safe(woah_nelly
), its_cool
);
136 fn test_all_nonstarters() {
137 let s
= "\u{0300}\u{0300}\u{0300}\u{0300}\u{0300}\u{0300}\u{0300}\u{0300}\u{0300}\u{0300}\u{0300}\u{0300}\u{0300}\u{0300}\u{0300}\u{0300}\u{0300}\u{0300}\u{0300}\u{0300}\u{0300}\u{0300}\u{0300}\u{0300}\u{0300}\u{0300}\u{0300}\u{0300}\u{0300}\u{0300}\u{0300}\u{0300}\u{0300}\u{0300}\u{0300}\u{0300}\u{0300}\u{0300}\u{0300}\u{0300}";
138 let expected
= "\u{300}\u{300}\u{300}\u{300}\u{300}\u{300}\u{300}\u{300}\u{300}\u{300}\u{300}\u{300}\u{300}\u{300}\u{300}\u{300}\u{300}\u{300}\u{300}\u{300}\u{300}\u{300}\u{300}\u{300}\u{300}\u{300}\u{300}\u{300}\u{300}\u{300}\u{034F}\u{300}\u{300}\u{300}\u{300}\u{300}\u{300}\u{300}\u{300}\u{300}\u{300}";
139 assert_eq
!(stream_safe(s
), expected
);
143 fn test_classify_nonstarters() {
144 // Highest character in the `compat_fully_decomp` table is 2FA1D
145 for ch
in 0..0x2FA1E {
146 let ch
= match char::from_u32(ch
) {
150 let c
= classify_nonstarters(ch
);
151 let mut s
= Vec
::new();
152 decompose_compatible(ch
, |c
| s
.push(c
));
154 assert_eq
!(s
.len(), c
.decomposition_len
);
158 .take_while(|&c
| canonical_combining_class(*c
) != 0)
163 .take_while(|&c
| canonical_combining_class(*c
) != 0)
166 assert_eq
!(num_leading
, c
.leading_nonstarters
);
167 assert_eq
!(num_trailing
, c
.trailing_nonstarters
);