1 // Copyright 2012-2014 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
13 use tables
::grapheme
::GraphemeCat
;
15 /// External iterator for grapheme clusters and byte offsets.
17 /// This struct is created by the [`grapheme_indices`] method on the [`UnicodeSegmentation`]
18 /// trait. See its documentation for more.
20 /// [`grapheme_indices`]: trait.UnicodeSegmentation.html#tymethod.grapheme_indices
21 /// [`UnicodeSegmentation`]: trait.UnicodeSegmentation.html
23 pub struct GraphemeIndices
<'a
> {
28 impl<'a
> GraphemeIndices
<'a
> {
30 /// View the underlying data (the part yet to be iterated) as a slice of the original string.
33 /// # use unicode_segmentation::UnicodeSegmentation;
34 /// let mut iter = "abc".grapheme_indices(true);
35 /// assert_eq!(iter.as_str(), "abc");
37 /// assert_eq!(iter.as_str(), "bc");
40 /// assert_eq!(iter.as_str(), "");
42 pub fn as_str(&self) -> &'a
str {
47 impl<'a
> Iterator
for GraphemeIndices
<'a
> {
48 type Item
= (usize, &'a
str);
51 fn next(&mut self) -> Option
<(usize, &'a
str)> {
52 self.iter
.next().map(|s
| (s
.as_ptr() as usize - self.start_offset
, s
))
56 fn size_hint(&self) -> (usize, Option
<usize>) {
61 impl<'a
> DoubleEndedIterator
for GraphemeIndices
<'a
> {
63 fn next_back(&mut self) -> Option
<(usize, &'a
str)> {
64 self.iter
.next_back().map(|s
| (s
.as_ptr() as usize - self.start_offset
, s
))
68 /// External iterator for a string's
69 /// [grapheme clusters](http://www.unicode.org/reports/tr29/#Grapheme_Cluster_Boundaries).
71 /// This struct is created by the [`graphemes`] method on the [`UnicodeSegmentation`] trait. See its
72 /// documentation for more.
74 /// [`graphemes`]: trait.UnicodeSegmentation.html#tymethod.graphemes
75 /// [`UnicodeSegmentation`]: trait.UnicodeSegmentation.html
77 pub struct Graphemes
<'a
> {
79 cursor
: GraphemeCursor
,
80 cursor_back
: GraphemeCursor
,
83 impl<'a
> Graphemes
<'a
> {
85 /// View the underlying data (the part yet to be iterated) as a slice of the original string.
88 /// # use unicode_segmentation::UnicodeSegmentation;
89 /// let mut iter = "abc".graphemes(true);
90 /// assert_eq!(iter.as_str(), "abc");
92 /// assert_eq!(iter.as_str(), "bc");
95 /// assert_eq!(iter.as_str(), "");
97 pub fn as_str(&self) -> &'a
str {
98 &self.string
[self.cursor
.cur_cursor()..self.cursor_back
.cur_cursor()]
102 impl<'a
> Iterator
for Graphemes
<'a
> {
106 fn size_hint(&self) -> (usize, Option
<usize>) {
107 let slen
= self.cursor_back
.cur_cursor() - self.cursor
.cur_cursor();
108 (cmp
::min(slen
, 1), Some(slen
))
112 fn next(&mut self) -> Option
<&'a
str> {
113 let start
= self.cursor
.cur_cursor();
114 if start
== self.cursor_back
.cur_cursor() {
117 let next
= self.cursor
.next_boundary(self.string
, 0).unwrap().unwrap();
118 Some(&self.string
[start
..next
])
122 impl<'a
> DoubleEndedIterator
for Graphemes
<'a
> {
124 fn next_back(&mut self) -> Option
<&'a
str> {
125 let end
= self.cursor_back
.cur_cursor();
126 if end
== self.cursor
.cur_cursor() {
129 let prev
= self.cursor_back
.prev_boundary(self.string
, 0).unwrap().unwrap();
130 Some(&self.string
[prev
..end
])
135 pub fn new_graphemes
<'b
>(s
: &'b
str, is_extended
: bool
) -> Graphemes
<'b
> {
139 cursor
: GraphemeCursor
::new(0, len
, is_extended
),
140 cursor_back
: GraphemeCursor
::new(len
, len
, is_extended
),
145 pub fn new_grapheme_indices
<'b
>(s
: &'b
str, is_extended
: bool
) -> GraphemeIndices
<'b
> {
146 GraphemeIndices { start_offset: s.as_ptr() as usize, iter: new_graphemes(s, is_extended) }
149 // maybe unify with PairResult?
150 // An enum describing information about a potential boundary.
151 #[derive(PartialEq, Eq, Clone)]
153 // No information is known.
155 // It is known to not be a boundary.
157 // It is known to be a boundary.
159 // The codepoint after is a Regional Indicator Symbol, so a boundary iff
160 // it is preceded by an even number of RIS codepoints. (GB12, GB13)
162 // The codepoint after is Extended_Pictographic,
163 // so whether it's a boundary depends on pre-context according to GB11.
167 /// Cursor-based segmenter for grapheme clusters.
169 pub struct GraphemeCursor
{
170 // Current cursor position.
172 // Total length of the string.
174 // A config flag indicating whether this cursor computes legacy or extended
175 // grapheme cluster boundaries (enables GB9a and GB9b if set).
177 // Information about the potential boundary at `offset`
178 state
: GraphemeState
,
179 // Category of codepoint immediately preceding cursor, if known.
180 cat_before
: Option
<GraphemeCat
>,
181 // Category of codepoint immediately after cursor, if known.
182 cat_after
: Option
<GraphemeCat
>,
183 // If set, at least one more codepoint immediately preceding this offset
184 // is needed to resolve whether there's a boundary at `offset`.
185 pre_context_offset
: Option
<usize>,
186 // The number of RIS codepoints preceding `offset`. If `pre_context_offset`
187 // is set, then counts the number of RIS between that and `offset`, otherwise
188 // is an accurate count relative to the string.
189 ris_count
: Option
<usize>,
190 // Set if a call to `prev_boundary` or `next_boundary` was suspended due
191 // to needing more input.
193 // Cached grapheme category and associated scalar value range.
194 grapheme_cat_cache
: (u32, u32, GraphemeCat
),
197 /// An error return indicating that not enough content was available in the
198 /// provided chunk to satisfy the query, and that more content must be provided.
199 #[derive(PartialEq, Eq, Debug)]
200 pub enum GraphemeIncomplete
{
201 /// More pre-context is needed. The caller should call `provide_context`
202 /// with a chunk ending at the offset given, then retry the query. This
203 /// will only be returned if the `chunk_start` parameter is nonzero.
206 /// When requesting `prev_boundary`, the cursor is moving past the beginning
207 /// of the current chunk, so the chunk before that is requested. This will
208 /// only be returned if the `chunk_start` parameter is nonzero.
211 /// When requesting `next_boundary`, the cursor is moving past the end of the
212 /// current chunk, so the chunk after that is requested. This will only be
213 /// returned if the chunk ends before the `len` parameter provided on
214 /// creation of the cursor.
215 NextChunk
, // requesting chunk following the one given
217 /// An error returned when the chunk given does not contain the cursor position.
221 // An enum describing the result from lookup of a pair of categories.
222 #[derive(PartialEq, Eq)]
224 NotBreak
, // definitely not a break
225 Break
, // definitely a break
226 Extended
, // a break iff not in extended mode
227 Regional
, // a break if preceded by an even number of RIS
228 Emoji
, // a break if preceded by emoji base and (Extend)*
231 fn check_pair(before
: GraphemeCat
, after
: GraphemeCat
) -> PairResult
{
232 use tables
::grapheme
::GraphemeCat
::*;
233 use self::PairResult
::*;
234 match (before
, after
) {
235 (GC_CR
, GC_LF
) => NotBreak
, // GB3
236 (GC_Control
, _
) => Break
, // GB4
237 (GC_CR
, _
) => Break
, // GB4
238 (GC_LF
, _
) => Break
, // GB4
239 (_
, GC_Control
) => Break
, // GB5
240 (_
, GC_CR
) => Break
, // GB5
241 (_
, GC_LF
) => Break
, // GB5
242 (GC_L
, GC_L
) => NotBreak
, // GB6
243 (GC_L
, GC_V
) => NotBreak
, // GB6
244 (GC_L
, GC_LV
) => NotBreak
, // GB6
245 (GC_L
, GC_LVT
) => NotBreak
, // GB6
246 (GC_LV
, GC_V
) => NotBreak
, // GB7
247 (GC_LV
, GC_T
) => NotBreak
, // GB7
248 (GC_V
, GC_V
) => NotBreak
, // GB7
249 (GC_V
, GC_T
) => NotBreak
, // GB7
250 (GC_LVT
, GC_T
) => NotBreak
, // GB8
251 (GC_T
, GC_T
) => NotBreak
, // GB8
252 (_
, GC_Extend
) => NotBreak
, // GB9
253 (_
, GC_ZWJ
) => NotBreak
, // GB9
254 (_
, GC_SpacingMark
) => Extended
, // GB9a
255 (GC_Prepend
, _
) => Extended
, // GB9b
256 (GC_ZWJ
, GC_Extended_Pictographic
) => Emoji
, // GB11
257 (GC_Regional_Indicator
, GC_Regional_Indicator
) => Regional
, // GB12, GB13
258 (_
, _
) => Break
, // GB999
262 impl GraphemeCursor
{
263 /// Create a new cursor. The string and initial offset are given at creation
264 /// time, but the contents of the string are not. The `is_extended` parameter
265 /// controls whether extended grapheme clusters are selected.
267 /// The `offset` parameter must be on a codepoint boundary.
270 /// # use unicode_segmentation::GraphemeCursor;
271 /// let s = "हिन्दी";
272 /// let mut legacy = GraphemeCursor::new(0, s.len(), false);
273 /// assert_eq!(legacy.next_boundary(s, 0), Ok(Some("ह".len())));
274 /// let mut extended = GraphemeCursor::new(0, s.len(), true);
275 /// assert_eq!(extended.next_boundary(s, 0), Ok(Some("हि".len())));
277 pub fn new(offset
: usize, len
: usize, is_extended
: bool
) -> GraphemeCursor
{
278 let state
= if offset
== 0 || offset
== len
{
281 GraphemeState
::Unknown
287 is_extended
: is_extended
,
290 pre_context_offset
: None
,
293 grapheme_cat_cache
: (0, 0, GraphemeCat
::GC_Control
),
297 fn grapheme_category(&mut self, ch
: char) -> GraphemeCat
{
298 use tables
::grapheme
as gr
;
299 use tables
::grapheme
::GraphemeCat
::*;
302 // Special-case optimization for ascii, except U+007F. This
303 // improves performance even for many primarily non-ascii texts,
304 // due to use of punctuation and white space characters from the
308 } else if ch
== '
\n'
{
310 } else if ch
== '
\r'
{
316 // If this char isn't within the cached range, update the cache to the
317 // range that includes it.
318 if (ch
as u32) < self.grapheme_cat_cache
.0 || (ch
as u32) > self.grapheme_cat_cache
.1 {
319 self.grapheme_cat_cache
= gr
::grapheme_category(ch
);
321 self.grapheme_cat_cache
.2
325 // Not sure I'm gonna keep this, the advantage over new() seems thin.
327 /// Set the cursor to a new location in the same string.
330 /// # use unicode_segmentation::GraphemeCursor;
332 /// let mut cursor = GraphemeCursor::new(0, s.len(), false);
333 /// assert_eq!(cursor.cur_cursor(), 0);
334 /// cursor.set_cursor(2);
335 /// assert_eq!(cursor.cur_cursor(), 2);
337 pub fn set_cursor(&mut self, offset
: usize) {
338 if offset
!= self.offset
{
339 self.offset
= offset
;
340 self.state
= if offset
== 0 || offset
== self.len
{
343 GraphemeState
::Unknown
345 // reset state derived from text around cursor
346 self.cat_before
= None
;
347 self.cat_after
= None
;
348 self.ris_count
= None
;
353 /// The current offset of the cursor. Equal to the last value provided to
354 /// `new()` or `set_cursor()`, or returned from `next_boundary()` or
355 /// `prev_boundary()`.
358 /// # use unicode_segmentation::GraphemeCursor;
359 /// // Two flags (🇷🇸🇮🇴), each flag is two RIS codepoints, each RIS is 4 bytes.
360 /// let flags = "\u{1F1F7}\u{1F1F8}\u{1F1EE}\u{1F1F4}";
361 /// let mut cursor = GraphemeCursor::new(4, flags.len(), false);
362 /// assert_eq!(cursor.cur_cursor(), 4);
363 /// assert_eq!(cursor.next_boundary(flags, 0), Ok(Some(8)));
364 /// assert_eq!(cursor.cur_cursor(), 8);
366 pub fn cur_cursor(&self) -> usize {
370 /// Provide additional pre-context when it is needed to decide a boundary.
371 /// The end of the chunk must coincide with the value given in the
372 /// `GraphemeIncomplete::PreContext` request.
375 /// # use unicode_segmentation::{GraphemeCursor, GraphemeIncomplete};
376 /// let flags = "\u{1F1F7}\u{1F1F8}\u{1F1EE}\u{1F1F4}";
377 /// let mut cursor = GraphemeCursor::new(8, flags.len(), false);
378 /// // Not enough pre-context to decide if there's a boundary between the two flags.
379 /// assert_eq!(cursor.is_boundary(&flags[8..], 8), Err(GraphemeIncomplete::PreContext(8)));
380 /// // Provide one more Regional Indicator Symbol of pre-context
381 /// cursor.provide_context(&flags[4..8], 4);
382 /// // Still not enough context to decide.
383 /// assert_eq!(cursor.is_boundary(&flags[8..], 8), Err(GraphemeIncomplete::PreContext(4)));
384 /// // Provide additional requested context.
385 /// cursor.provide_context(&flags[0..4], 0);
386 /// // That's enough to decide (it always is when context goes to the start of the string)
387 /// assert_eq!(cursor.is_boundary(&flags[8..], 8), Ok(true));
389 pub fn provide_context(&mut self, chunk
: &str, chunk_start
: usize) {
390 use tables
::grapheme
as gr
;
391 assert
!(chunk_start
+ chunk
.len() == self.pre_context_offset
.unwrap());
392 self.pre_context_offset
= None
;
393 if self.is_extended
&& chunk_start
+ chunk
.len() == self.offset
{
394 let ch
= chunk
.chars().rev().next().unwrap();
395 if self.grapheme_category(ch
) == gr
::GC_Prepend
{
396 self.decide(false); // GB9b
401 GraphemeState
::Regional
=> self.handle_regional(chunk
, chunk_start
),
402 GraphemeState
::Emoji
=> self.handle_emoji(chunk
, chunk_start
),
403 _
=> if self.cat_before
.is_none() && self.offset
== chunk
.len() + chunk_start
{
404 let ch
= chunk
.chars().rev().next().unwrap();
405 self.cat_before
= Some(self.grapheme_category(ch
));
410 fn decide(&mut self, is_break
: bool
) {
411 self.state
= if is_break
{
414 GraphemeState
::NotBreak
418 fn decision(&mut self, is_break
: bool
) -> Result
<bool
, GraphemeIncomplete
> {
419 self.decide(is_break
);
423 fn is_boundary_result(&self) -> Result
<bool
, GraphemeIncomplete
> {
424 if self.state
== GraphemeState
::Break
{
426 } else if self.state
== GraphemeState
::NotBreak
{
428 } else if let Some(pre_context_offset
) = self.pre_context_offset
{
429 Err(GraphemeIncomplete
::PreContext(pre_context_offset
))
431 unreachable
!("inconsistent state");
435 fn handle_regional(&mut self, chunk
: &str, chunk_start
: usize) {
436 use tables
::grapheme
as gr
;
437 let mut ris_count
= self.ris_count
.unwrap_or(0);
438 for ch
in chunk
.chars().rev() {
439 if self.grapheme_category(ch
) != gr
::GC_Regional_Indicator
{
440 self.ris_count
= Some(ris_count
);
441 self.decide((ris_count
% 2) == 0);
446 self.ris_count
= Some(ris_count
);
447 if chunk_start
== 0 {
448 self.decide((ris_count
% 2) == 0);
451 self.pre_context_offset
= Some(chunk_start
);
452 self.state
= GraphemeState
::Regional
;
455 fn handle_emoji(&mut self, chunk
: &str, chunk_start
: usize) {
456 use tables
::grapheme
as gr
;
457 let mut iter
= chunk
.chars().rev();
458 if let Some(ch
) = iter
.next() {
459 if self.grapheme_category(ch
) != gr
::GC_ZWJ
{
465 match self.grapheme_category(ch
) {
467 gr
::GC_Extended_Pictographic
=> {
477 if chunk_start
== 0 {
481 self.pre_context_offset
= Some(chunk_start
);
482 self.state
= GraphemeState
::Emoji
;
485 /// Determine whether the current cursor location is a grapheme cluster boundary.
486 /// Only a part of the string need be supplied. If `chunk_start` is nonzero or
487 /// the length of `chunk` is not equal to `len` on creation, then this method
488 /// may return `GraphemeIncomplete::PreContext`. The caller should then
489 /// call `provide_context` with the requested chunk, then retry calling this
492 /// For partial chunks, if the cursor is not at the beginning or end of the
493 /// string, the chunk should contain at least the codepoint following the cursor.
494 /// If the string is nonempty, the chunk must be nonempty.
496 /// All calls should have consistent chunk contents (ie, if a chunk provides
497 /// content for a given slice, all further chunks covering that slice must have
498 /// the same content for it).
501 /// # use unicode_segmentation::GraphemeCursor;
502 /// let flags = "\u{1F1F7}\u{1F1F8}\u{1F1EE}\u{1F1F4}";
503 /// let mut cursor = GraphemeCursor::new(8, flags.len(), false);
504 /// assert_eq!(cursor.is_boundary(flags, 0), Ok(true));
505 /// cursor.set_cursor(12);
506 /// assert_eq!(cursor.is_boundary(flags, 0), Ok(false));
508 pub fn is_boundary(&mut self, chunk
: &str, chunk_start
: usize) -> Result
<bool
, GraphemeIncomplete
> {
509 use tables
::grapheme
as gr
;
510 if self.state
== GraphemeState
::Break
{
513 if self.state
== GraphemeState
::NotBreak
{
516 if self.offset
< chunk_start
|| self.offset
>= chunk_start
+ chunk
.len() {
517 if self.offset
> chunk_start
+ chunk
.len() || self.cat_after
.is_none() {
518 return Err(GraphemeIncomplete
::InvalidOffset
)
521 if let Some(pre_context_offset
) = self.pre_context_offset
{
522 return Err(GraphemeIncomplete
::PreContext(pre_context_offset
));
524 let offset_in_chunk
= self.offset
- chunk_start
;
525 if self.cat_after
.is_none() {
526 let ch
= chunk
[offset_in_chunk
..].chars().next().unwrap();
527 self.cat_after
= Some(self.grapheme_category(ch
));
529 if self.offset
== chunk_start
{
530 let mut need_pre_context
= true;
531 match self.cat_after
.unwrap() {
532 gr
::GC_Regional_Indicator
=> self.state
= GraphemeState
::Regional
,
533 gr
::GC_Extended_Pictographic
=> self.state
= GraphemeState
::Emoji
,
534 _
=> need_pre_context
= self.cat_before
.is_none(),
536 if need_pre_context
{
537 self.pre_context_offset
= Some(chunk_start
);
538 return Err(GraphemeIncomplete
::PreContext(chunk_start
));
541 if self.cat_before
.is_none() {
542 let ch
= chunk
[..offset_in_chunk
].chars().rev().next().unwrap();
543 self.cat_before
= Some(self.grapheme_category(ch
));
545 match check_pair(self.cat_before
.unwrap(), self.cat_after
.unwrap()) {
546 PairResult
::NotBreak
=> return self.decision(false),
547 PairResult
::Break
=> return self.decision(true),
548 PairResult
::Extended
=> {
549 let is_extended
= self.is_extended
;
550 return self.decision(!is_extended
);
552 PairResult
::Regional
=> {
553 if let Some(ris_count
) = self.ris_count
{
554 return self.decision((ris_count
% 2) == 0);
556 self.handle_regional(&chunk
[..offset_in_chunk
], chunk_start
);
557 self.is_boundary_result()
559 PairResult
::Emoji
=> {
560 self.handle_emoji(&chunk
[..offset_in_chunk
], chunk_start
);
561 self.is_boundary_result()
566 /// Find the next boundary after the current cursor position. Only a part of
567 /// the string need be supplied. If the chunk is incomplete, then this
568 /// method might return `GraphemeIncomplete::PreContext` or
569 /// `GraphemeIncomplete::NextChunk`. In the former case, the caller should
570 /// call `provide_context` with the requested chunk, then retry. In the
571 /// latter case, the caller should provide the chunk following the one
572 /// given, then retry.
574 /// See `is_boundary` for expectations on the provided chunk.
577 /// # use unicode_segmentation::GraphemeCursor;
578 /// let flags = "\u{1F1F7}\u{1F1F8}\u{1F1EE}\u{1F1F4}";
579 /// let mut cursor = GraphemeCursor::new(4, flags.len(), false);
580 /// assert_eq!(cursor.next_boundary(flags, 0), Ok(Some(8)));
581 /// assert_eq!(cursor.next_boundary(flags, 0), Ok(Some(16)));
582 /// assert_eq!(cursor.next_boundary(flags, 0), Ok(None));
585 /// And an example that uses partial strings:
588 /// # use unicode_segmentation::{GraphemeCursor, GraphemeIncomplete};
590 /// let mut cursor = GraphemeCursor::new(0, s.len(), false);
591 /// assert_eq!(cursor.next_boundary(&s[..2], 0), Ok(Some(1)));
592 /// assert_eq!(cursor.next_boundary(&s[..2], 0), Err(GraphemeIncomplete::NextChunk));
593 /// assert_eq!(cursor.next_boundary(&s[2..4], 2), Ok(Some(2)));
594 /// assert_eq!(cursor.next_boundary(&s[2..4], 2), Ok(Some(3)));
595 /// assert_eq!(cursor.next_boundary(&s[2..4], 2), Ok(Some(4)));
596 /// assert_eq!(cursor.next_boundary(&s[2..4], 2), Ok(None));
598 pub fn next_boundary(&mut self, chunk
: &str, chunk_start
: usize) -> Result
<Option
<usize>, GraphemeIncomplete
> {
599 if self.offset
== self.len
{
602 let mut iter
= chunk
[self.offset
- chunk_start
..].chars();
603 let mut ch
= iter
.next().unwrap();
606 if self.cat_after
.is_none() {
607 self.cat_after
= Some(self.grapheme_category(ch
));
610 self.offset
+= ch
.len_utf8();
611 self.state
= GraphemeState
::Unknown
;
612 self.cat_before
= self.cat_after
.take();
613 if self.cat_before
.is_none() {
614 self.cat_before
= Some(self.grapheme_category(ch
));
616 if self.cat_before
.unwrap() == GraphemeCat
::GC_Regional_Indicator
{
617 self.ris_count
= self.ris_count
.map(|c
| c
+ 1);
619 self.ris_count
= Some(0);
621 if let Some(next_ch
) = iter
.next() {
623 self.cat_after
= Some(self.grapheme_category(ch
));
624 } else if self.offset
== self.len
{
627 self.resuming
= true;
628 return Err(GraphemeIncomplete
::NextChunk
);
631 self.resuming
= true;
632 if self.is_boundary(chunk
, chunk_start
)?
{
633 self.resuming
= false;
634 return Ok(Some(self.offset
));
636 self.resuming
= false;
640 /// Find the previous boundary after the current cursor position. Only a part
641 /// of the string need be supplied. If the chunk is incomplete, then this
642 /// method might return `GraphemeIncomplete::PreContext` or
643 /// `GraphemeIncomplete::PrevChunk`. In the former case, the caller should
644 /// call `provide_context` with the requested chunk, then retry. In the
645 /// latter case, the caller should provide the chunk preceding the one
646 /// given, then retry.
648 /// See `is_boundary` for expectations on the provided chunk.
651 /// # use unicode_segmentation::GraphemeCursor;
652 /// let flags = "\u{1F1F7}\u{1F1F8}\u{1F1EE}\u{1F1F4}";
653 /// let mut cursor = GraphemeCursor::new(12, flags.len(), false);
654 /// assert_eq!(cursor.prev_boundary(flags, 0), Ok(Some(8)));
655 /// assert_eq!(cursor.prev_boundary(flags, 0), Ok(Some(0)));
656 /// assert_eq!(cursor.prev_boundary(flags, 0), Ok(None));
659 /// And an example that uses partial strings (note the exact return is not
660 /// guaranteed, and may be `PrevChunk` or `PreContext` arbitrarily):
663 /// # use unicode_segmentation::{GraphemeCursor, GraphemeIncomplete};
665 /// let mut cursor = GraphemeCursor::new(4, s.len(), false);
666 /// assert_eq!(cursor.prev_boundary(&s[2..4], 2), Ok(Some(3)));
667 /// assert_eq!(cursor.prev_boundary(&s[2..4], 2), Err(GraphemeIncomplete::PrevChunk));
668 /// assert_eq!(cursor.prev_boundary(&s[0..2], 0), Ok(Some(2)));
669 /// assert_eq!(cursor.prev_boundary(&s[0..2], 0), Ok(Some(1)));
670 /// assert_eq!(cursor.prev_boundary(&s[0..2], 0), Ok(Some(0)));
671 /// assert_eq!(cursor.prev_boundary(&s[0..2], 0), Ok(None));
673 pub fn prev_boundary(&mut self, chunk
: &str, chunk_start
: usize) -> Result
<Option
<usize>, GraphemeIncomplete
> {
674 if self.offset
== 0 {
677 if self.offset
== chunk_start
{
678 return Err(GraphemeIncomplete
::PrevChunk
);
680 let mut iter
= chunk
[..self.offset
- chunk_start
].chars().rev();
681 let mut ch
= iter
.next().unwrap();
683 if self.offset
== chunk_start
{
684 self.resuming
= true;
685 return Err(GraphemeIncomplete
::PrevChunk
);
688 self.cat_before
= Some(self.grapheme_category(ch
));
690 self.offset
-= ch
.len_utf8();
691 self.cat_after
= self.cat_before
.take();
692 self.state
= GraphemeState
::Unknown
;
693 if let Some(ris_count
) = self.ris_count
{
694 self.ris_count
= if ris_count
> 0 { Some(ris_count - 1) }
else { None }
;
696 if let Some(prev_ch
) = iter
.next() {
698 self.cat_before
= Some(self.grapheme_category(ch
));
699 } else if self.offset
== 0 {
702 self.resuming
= true;
703 self.cat_after
= Some(self.grapheme_category(ch
));
704 return Err(GraphemeIncomplete
::PrevChunk
);
707 self.resuming
= true;
708 if self.is_boundary(chunk
, chunk_start
)?
{
709 self.resuming
= false;
710 return Ok(Some(self.offset
));
712 self.resuming
= false;
718 fn test_grapheme_cursor_ris_precontext() {
719 let s
= "\u{1f1fa}\u{1f1f8}\u{1f1fa}\u{1f1f8}\u{1f1fa}\u{1f1f8}";
720 let mut c
= GraphemeCursor
::new(8, s
.len(), true);
721 assert_eq
!(c
.is_boundary(&s
[4..], 4), Err(GraphemeIncomplete
::PreContext(4)));
722 c
.provide_context(&s
[..4], 0);
723 assert_eq
!(c
.is_boundary(&s
[4..], 4), Ok(true));
727 fn test_grapheme_cursor_chunk_start_require_precontext() {
729 let mut c
= GraphemeCursor
::new(1, s
.len(), true);
730 assert_eq
!(c
.is_boundary(&s
[1..], 1), Err(GraphemeIncomplete
::PreContext(1)));
731 c
.provide_context(&s
[..1], 0);
732 assert_eq
!(c
.is_boundary(&s
[1..], 1), Ok(false));
736 fn test_grapheme_cursor_prev_boundary() {
738 let mut c
= GraphemeCursor
::new(3, s
.len(), true);
739 assert_eq
!(c
.prev_boundary(&s
[2..], 2), Err(GraphemeIncomplete
::PrevChunk
));
740 assert_eq
!(c
.prev_boundary(&s
[..2], 0), Ok(Some(2)));
744 fn test_grapheme_cursor_prev_boundary_chunk_start() {
746 let mut c
= GraphemeCursor
::new(2, s
.len(), true);
747 assert_eq
!(c
.prev_boundary(&s
[2..], 2), Err(GraphemeIncomplete
::PrevChunk
));
748 assert_eq
!(c
.prev_boundary(&s
[..2], 0), Ok(Some(1)));