New upstream version 1.41.1+dfsg1

[rustc.git] / vendor / unicode-segmentation / src / lib.rs
diff --git a/vendor/unicode-segmentation/src/lib.rs b/vendor/unicode-segmentation/src/lib.rs

index 6f903c051803d39702435c802886865ea344b419..fce3c52efe3dfd3ec6fe6cbab975a90433c24e81 100644 (file)
--- a/vendor/unicode-segmentation/src/lib.rs
+++ b/vendor/unicode-segmentation/src/lib.rs
@@ -8,7 +8,7 @@
  // option. This file may not be copied, modified, or distributed
  // except according to those terms.
  
-//! Iterators which split strings on Grapheme Cluster or Word boundaries, according
+//! Iterators which split strings on Grapheme Cluster, Word or Sentence boundaries, according
  //! to the [Unicode Standard Annex #29](http://www.unicode.org/reports/tr29/) rules.
  //!
  //! ```rust
@@ -29,7 +29,7 @@
  //!
  //!     let s = "The quick (\"brown\")  fox";
  //!     let w = s.split_word_bounds().collect::<Vec<&str>>();
-//!     let b: &[_] = &["The", " ", "quick", " ", "(", "\"", "brown", "\"", ")", " ", " ", "fox"];
+//!     let b: &[_] = &["The", " ", "quick", " ", "(", "\"", "brown", "\"", ")", "  ", "fox"];
  //!     assert_eq!(w, b);
  //! }
  //! ```
@@ -46,7 +46,7 @@
  //!
  //! ```toml
  //! [dependencies]
-//! unicode-segmentation = "1.1.0"
+//! unicode-segmentation = "1.3.0"
  //! ```
  
  #![deny(missing_docs, unsafe_code)]
@@ -67,10 +67,12 @@ pub use grapheme::{Graphemes, GraphemeIndices};
  pub use grapheme::{GraphemeCursor, GraphemeIncomplete};
  pub use tables::UNICODE_VERSION;
  pub use word::{UWordBounds, UWordBoundIndices, UnicodeWords};
+pub use sentence::{USentenceBounds, USentenceBoundIndices, UnicodeSentences};
  
  mod grapheme;
  mod tables;
  mod word;
+mod sentence;
  
  #[cfg(test)]
  mod test;
@@ -154,7 +156,7 @@ pub trait UnicodeSegmentation {
      /// ```
      /// # use self::unicode_segmentation::UnicodeSegmentation;
      /// let swu1 = "The quick (\"brown\")  fox".split_word_bounds().collect::<Vec<&str>>();
-    /// let b: &[_] = &["The", " ", "quick", " ", "(", "\"", "brown", "\"", ")", " ", " ", "fox"];
+    /// let b: &[_] = &["The", " ", "quick", " ", "(", "\"", "brown", "\"", ")", "  ", "fox"];
      ///
      /// assert_eq!(&swu1[..], b);
      /// ```
@@ -174,6 +176,27 @@ pub trait UnicodeSegmentation {
      /// assert_eq!(&swi1[..], b);
      /// ```
      fn split_word_bound_indices<'a>(&'a self) -> UWordBoundIndices<'a>;
+
+    /// Returns an iterator over substrings of `self` separated on
+    /// [UAX#29 sentence boundaries](http://www.unicode.org/reports/tr29/#Sentence_Boundaries).
+    ///
+    /// The concatenation of the substrings returned by this function is just the original string.
+    fn unicode_sentences<'a>(&'a self) -> UnicodeSentences<'a>;
+
+    /// Returns an iterator over substrings of `self` separated on
+    /// [UAX#29 sentence boundaries](http://www.unicode.org/reports/tr29/#Sentence_Boundaries).
+    ///
+    /// Here, "sentences" are just those substrings which, after splitting on
+    /// UAX#29 sentence boundaries, contain any alphanumeric characters. That is, the
+    /// substring must contain at least one character with the
+    /// [Alphabetic](http://unicode.org/reports/tr44/#Alphabetic)
+    /// property, or with
+    /// [General_Category=Number](http://unicode.org/reports/tr44/#General_Category_Values).
+    fn split_sentence_bounds<'a>(&'a self) -> USentenceBounds<'a>;
+
+    /// Returns an iterator over substrings of `self`, split on UAX#29 sentence boundaries,
+    /// and their offsets. See `split_sentence_bounds()` for more information.
+    fn split_sentence_bound_indices<'a>(&'a self) -> USentenceBoundIndices<'a>;
  }
  
  impl UnicodeSegmentation for str {
@@ -201,4 +224,19 @@ impl UnicodeSegmentation for str {
      fn split_word_bound_indices(&self) -> UWordBoundIndices {
          word::new_word_bound_indices(self)
      }
+
+    #[inline]
+    fn unicode_sentences(&self) -> UnicodeSentences {
+        sentence::new_unicode_sentences(self)
+    }
+
+    #[inline]
+    fn split_sentence_bounds(&self) -> USentenceBounds {
+        sentence::new_sentence_bounds(self)
+    }
+
+    #[inline]
+    fn split_sentence_bound_indices(&self) -> USentenceBoundIndices {
+        sentence::new_sentence_bound_indices(self)
+    }
  }