Imported Upstream version 1.6.0+dfsg1

[rustc.git] / src / libstd / primitive_docs.rs
diff --git a/src/libstd/primitive_docs.rs b/src/libstd/primitive_docs.rs

index 7d62d477a0a52acdd96e90d49e79dbf2dac4081e..afeb4231aba48136ae454e7059703ef59afd5468 100644 (file)
--- a/src/libstd/primitive_docs.rs
+++ b/src/libstd/primitive_docs.rs
@@ -16,21 +16,82 @@ mod prim_bool { }
  
  #[doc(primitive = "char")]
  //
-/// A Unicode scalar value.
+/// A character type.
  ///
-/// A `char` represents a
-/// *[Unicode scalar
-/// value](http://www.unicode.org/glossary/#unicode_scalar_value)*, as it can
-/// contain any Unicode code point except high-surrogate and low-surrogate code
-/// points.
+/// The `char` type represents a single character. More specifically, since
+/// 'character' isn't a well-defined concept in Unicode, `char` is a '[Unicode
+/// scalar value]', which is similar to, but not the same as, a '[Unicode code
+/// point]'.
  ///
-/// As such, only values in the ranges \[0x0,0xD7FF\] and \[0xE000,0x10FFFF\]
-/// (inclusive) are allowed. A `char` can always be safely cast to a `u32`;
-/// however the converse is not always true due to the above range limits
-/// and, as such, should be performed via the `from_u32` function.
+/// [Unicode scalar value]: http://www.unicode.org/glossary/#unicode_scalar_value
+/// [Unicode code point]: http://www.unicode.org/glossary/#code_point
  ///
-/// *[See also the `std::char` module](char/index.html).*
+/// This documentation describes a number of methods and trait implementations on the
+/// `char` type. For technical reasons, there is additional, separate
+/// documentation in [the `std::char` module](char/index.html) as well.
  ///
+/// # Representation
+///
+/// `char` is always four bytes in size. This is a different representation than
+/// a given character would have as part of a [`String`], for example:
+///
+/// ```
+/// let v = vec!['h', 'e', 'l', 'l', 'o'];
+///
+/// // five elements times four bytes for each element
+/// assert_eq!(20, v.len() * std::mem::size_of::<char>());
+///
+/// let s = String::from("hello");
+///
+/// // five elements times one byte per element
+/// assert_eq!(5, s.len() * std::mem::size_of::<u8>());
+/// ```
+///
+/// [`String`]: string/struct.String.html
+///
+/// As always, remember that a human intuition for 'character' may not map to
+/// Unicode's definitions. For example, emoji symbols such as '❤️' are more than
+/// one byte; ❤️ in particular is six:
+///
+/// ```
+/// let s = String::from("❤️");
+///
+/// // six bytes times one byte for each element
+/// assert_eq!(6, s.len() * std::mem::size_of::<u8>());
+/// ```
+///
+/// This also means it won't fit into a `char`, and so trying to create a
+/// literal with `let heart = '❤️';` gives an error:
+///
+/// ```text
+/// error: character literal may only contain one codepoint: '❤
+/// let heart = '❤️';
+///             ^~
+/// ```
+///
+/// Another implication of this is that if you want to do per-`char`acter
+/// processing, it can end up using a lot more memory:
+///
+/// ```
+/// let s = String::from("love: ❤️");
+/// let v: Vec<char> = s.chars().collect();
+///
+/// assert_eq!(12, s.len() * std::mem::size_of::<u8>());
+/// assert_eq!(32, v.len() * std::mem::size_of::<char>());
+/// ```
+///
+/// Or may give you results you may not expect:
+///
+/// ```
+/// let s = String::from("❤️");
+///
+/// let mut iter = s.chars();
+///
+/// // we get two chars out of a single ❤️
+/// assert_eq!(Some('\u{2764}'), iter.next());
+/// assert_eq!(Some('\u{fe0f}'), iter.next());
+/// assert_eq!(None, iter.next());
+/// ```
  mod prim_char { }
  
  #[doc(primitive = "unit")]
@@ -153,8 +214,8 @@ mod prim_pointer { }
  
  #[doc(primitive = "array")]
  //
-/// A fixed-size array, denoted `[T; N]`, for the element type, `T`, and
-/// the non-negative compile time constant size, `N`.
+/// A fixed-size array, denoted `[T; N]`, for the element type, `T`, and the
+/// non-negative compile time constant size, `N`.
  ///
  /// Arrays values are created either with an explicit expression that lists
  /// each element: `[x, y, z]` or a repeat expression: `[x; N]`. The repeat
@@ -162,18 +223,20 @@ mod prim_pointer { }
  ///
  /// The type `[T; N]` is `Copy` if `T: Copy`.
  ///
-/// Arrays of sizes from 0 to 32 (inclusive) implement the following traits
-/// if the element type allows it:
+/// Arrays of sizes from 0 to 32 (inclusive) implement the following traits if
+/// the element type allows it:
  ///
-/// - `Clone`
+/// - `Clone` (only if `T: Copy`)
  /// - `Debug`
  /// - `IntoIterator` (implemented for `&[T; N]` and `&mut [T; N]`)
  /// - `PartialEq`, `PartialOrd`, `Ord`, `Eq`
  /// - `Hash`
  /// - `AsRef`, `AsMut`
+/// - `Borrow`, `BorrowMut`
+/// - `Default`
  ///
-/// Arrays dereference to [slices (`[T]`)][slice], so their methods can be called
-/// on arrays.
+/// Arrays coerce to [slices (`[T]`)][slice], so their methods can be called on
+/// arrays.
  ///
  /// [slice]: primitive.slice.html
  ///
@@ -230,44 +293,64 @@ mod prim_slice { }
  
  #[doc(primitive = "str")]
  //
-/// Unicode string slices.
+/// String slices.
+///
+/// The `str` type, also called a 'string slice', is the most primitive string
+/// type. It is usually seen in its borrowed form, `&str`. It is also the type
+/// of string literals, `&'static str`.
+///
+/// Strings slices are always valid UTF-8.
  ///
-/// Rust's `str` type is one of the core primitive types of the language. `&str`
-/// is the borrowed string type. This type of string can only be created from
-/// other strings, unless it is a `&'static str` (see below). It is not possible
-/// to move out of borrowed strings because they are owned elsewhere.
+/// This documentation describes a number of methods and trait implementations
+/// on the `str` type. For technical reasons, there is additional, separate
+/// documentation in [the `std::str` module](str/index.html) as well.
  ///
  /// # Examples
  ///
-/// Here's some code that uses a `&str`:
+/// String literals are string slices:
  ///
  /// ```
-/// let s = "Hello, world.";
+/// let hello = "Hello, world!";
+///
+/// // with an explicit type annotation
+/// let hello: &'static str = "Hello, world!";
  /// ```
  ///
-/// This `&str` is a `&'static str`, which is the type of string literals.
-/// They're `'static` because literals are available for the entire lifetime of
-/// the program.
+/// They are `'static` because they're stored directly in the final binary, and
+/// so will be valid for the `'static` duration.
  ///
-/// You can get a non-`'static` `&str` by taking a slice of a `String`:
+/// # Representation
+///
+/// A `&str` is made up of two components: a pointer to some bytes, and a
+/// length. You can look at these with the [`.as_ptr()`] and [`len()`] methods:
  ///
  /// ```
-/// let some_string = "Hello, world.".to_string();
-/// let s = &some_string;
-/// ```
+/// use std::slice;
+/// use std::str;
  ///
-/// # Representation
+/// let story = "Once upon a time...";
  ///
-/// Rust's string type, `str`, is a sequence of Unicode scalar values encoded as
-/// a stream of UTF-8 bytes. All [strings](../../reference.html#literals) are
-/// guaranteed to be validly encoded UTF-8 sequences. Additionally, strings are
-/// not null-terminated and can thus contain null bytes.
+/// let ptr = story.as_ptr();
+/// let len = story.len();
  ///
-/// The actual representation of `str`s have direct mappings to slices: `&str`
-/// is the same as `&[u8]`.
+/// // story has thirteen bytes
+/// assert_eq!(19, len);
  ///
-/// *[See also the `std::str` module](str/index.html).*
+/// // We can re-build a str out of ptr and len. This is all unsafe becuase
+/// // we are responsible for making sure the two components are valid:
+/// let s = unsafe {
+///     // First, we build a &[u8]...
+///     let slice = slice::from_raw_parts(ptr, len);
+///
+///     // ... and then convert that slice into a string slice
+///     str::from_utf8(slice)
+/// };
+///
+/// assert_eq!(s, Ok(story));
+/// ```
  ///
+/// [`.as_ptr()`]: #method.as_ptr
+/// [`len()`]: #method.len
  mod prim_str { }
  
  #[doc(primitive = "tuple")]