]>
Commit | Line | Data |
---|---|---|
2c00a5a8 XL |
1 | # Strings |
2 | ||
3 | There are two types of strings in Rust: `String` and `&str`. | |
4 | ||
5 | A `String` is stored as a vector of bytes (`Vec<u8>`), but guaranteed to | |
6 | always be a valid UTF-8 sequence. `String` is heap allocated, growable and not | |
7 | null terminated. | |
8 | ||
9 | `&str` is a slice (`&[u8]`) that always points to a valid UTF-8 sequence, and | |
10 | can be used to view into a `String`, just like `&[T]` is a view into `Vec<T>`. | |
11 | ||
12 | ```rust,editable | |
13 | fn main() { | |
14 | // (all the type annotations are superfluous) | |
15 | // A reference to a string allocated in read only memory | |
16 | let pangram: &'static str = "the quick brown fox jumps over the lazy dog"; | |
17 | println!("Pangram: {}", pangram); | |
18 | ||
19 | // Iterate over words in reverse, no new string is allocated | |
20 | println!("Words in reverse"); | |
21 | for word in pangram.split_whitespace().rev() { | |
22 | println!("> {}", word); | |
23 | } | |
24 | ||
25 | // Copy chars into a vector, sort and remove duplicates | |
26 | let mut chars: Vec<char> = pangram.chars().collect(); | |
27 | chars.sort(); | |
28 | chars.dedup(); | |
29 | ||
30 | // Create an empty and growable `String` | |
31 | let mut string = String::new(); | |
32 | for c in chars { | |
33 | // Insert a char at the end of string | |
34 | string.push(c); | |
35 | // Insert a string at the end of string | |
36 | string.push_str(", "); | |
37 | } | |
38 | ||
39 | // The trimmed string is a slice to the original string, hence no new | |
40 | // allocation is performed | |
41 | let chars_to_trim: &[char] = &[' ', ',']; | |
42 | let trimmed_str: &str = string.trim_matches(chars_to_trim); | |
43 | println!("Used characters: {}", trimmed_str); | |
44 | ||
45 | // Heap allocate a string | |
46 | let alice = String::from("I like dogs"); | |
47 | // Allocate new memory and store the modified string there | |
48 | let bob: String = alice.replace("dog", "cat"); | |
49 | ||
50 | println!("Alice says: {}", alice); | |
51 | println!("Bob says: {}", bob); | |
52 | } | |
53 | ``` | |
54 | ||
55 | More `str`/`String` methods can be found under the | |
56 | [std::str][str] and | |
57 | [std::string][string] | |
58 | modules | |
59 | ||
60 | ## Literals and escapes | |
61 | ||
62 | There are multiple ways to write string literals with special characters in them. | |
63 | All result in a similar `&str` so it's best to use the form that is the most | |
64 | convenient to write. Similarly there are multiple ways to write byte string literals, | |
65 | which all result in `&[u8; N]`. | |
66 | ||
67 | Generally special characters are escaped with a backslash character: `\`. | |
68 | This way you can add any character to your string, even unprintable ones | |
69 | and ones that you don't know how to type. If you want a literal backslash, | |
70 | escape it with another one: `\\` | |
71 | ||
72 | String or character literal delimiters occuring within a literal must be escaped: `"\""`, `'\''`. | |
73 | ||
74 | ```rust,editable | |
75 | fn main() { | |
76 | // You can use escapes to write bytes by their hexadecimal values... | |
77 | let byte_escape = "I'm writing \x52\x75\x73\x74!"; | |
78 | println!("What are you doing\x3F (\\x3F means ?) {}", byte_escape); | |
79 | ||
80 | // ...or Unicode code points. | |
81 | let unicode_codepoint = "\u{211D}"; | |
82 | let character_name = "\"DOUBLE-STRUCK CAPITAL R\""; | |
83 | ||
84 | println!("Unicode character {} (U+211D) is called {}", | |
85 | unicode_codepoint, character_name ); | |
86 | ||
87 | ||
88 | let long_string = "String literals | |
89 | can span multiple lines. | |
90 | The linebreak and indentation here ->\ | |
91 | <- can be escaped too!"; | |
92 | println!("{}", long_string); | |
93 | } | |
94 | ``` | |
95 | ||
96 | Sometimes there are just too many characters that need to be escaped or it's just | |
97 | much more convenient to write a string out as-is. This is where raw string literals come into play. | |
98 | ||
99 | ```rust, editable | |
100 | fn main() { | |
101 | let raw_str = r"Escapes don't work here: \x3F \u{211D}"; | |
102 | println!("{}", raw_str); | |
103 | ||
104 | // If you need quotes in a raw string, add a pair of #s | |
105 | let quotes = r#"And then I said: "There is no escape!""#; | |
106 | println!("{}", quotes); | |
107 | ||
108 | // If you need "# in your string, just use more #s in the delimiter. | |
109 | // There is no limit for the number of #s you can use. | |
110 | let longer_delimiter = r###"A string with "# in it. And even "##!"###; | |
111 | println!("{}", longer_delimiter); | |
112 | } | |
113 | ``` | |
114 | ||
416331ca | 115 | Want a string that's not UTF-8? (Remember, `str` and `String` must be valid UTF-8). |
2c00a5a8 XL |
116 | Or maybe you want an array of bytes that's mostly text? Byte strings to the rescue! |
117 | ||
118 | ```rust, editable | |
119 | use std::str; | |
120 | ||
121 | fn main() { | |
416331ca XL |
122 | // Note that this is not actually a `&str` |
123 | let bytestring: &[u8; 21] = b"this is a byte string"; | |
2c00a5a8 | 124 | |
416331ca XL |
125 | // Byte arrays don't have the `Display` trait, so printing them is a bit limited |
126 | println!("A byte string: {:?}", bytestring); | |
2c00a5a8 | 127 | |
416331ca | 128 | // Byte strings can have byte escapes... |
2c00a5a8 XL |
129 | let escaped = b"\x52\x75\x73\x74 as bytes"; |
130 | // ...but no unicode escapes | |
131 | // let escaped = b"\u{211D} is not allowed"; | |
132 | println!("Some escaped bytes: {:?}", escaped); | |
133 | ||
134 | ||
416331ca | 135 | // Raw byte strings work just like raw strings |
2c00a5a8 XL |
136 | let raw_bytestring = br"\u{211D} is not escaped here"; |
137 | println!("{:?}", raw_bytestring); | |
138 | ||
416331ca | 139 | // Converting a byte array to `str` can fail |
2c00a5a8 XL |
140 | if let Ok(my_str) = str::from_utf8(raw_bytestring) { |
141 | println!("And the same as text: '{}'", my_str); | |
142 | } | |
143 | ||
416331ca | 144 | let _quotes = br#"You can also use "fancier" formatting, \ |
2c00a5a8 XL |
145 | like with normal raw strings"#; |
146 | ||
416331ca | 147 | // Byte strings don't have to be UTF-8 |
f9f354fc | 148 | let shift_jis = b"\x82\xe6\x82\xa8\x82\xb1\x82\xbb"; // "ようこそ" in SHIFT-JIS |
2c00a5a8 | 149 | |
416331ca | 150 | // But then they can't always be converted to `str` |
2c00a5a8 XL |
151 | match str::from_utf8(shift_jis) { |
152 | Ok(my_str) => println!("Conversion successful: '{}'", my_str), | |
153 | Err(e) => println!("Conversion failed: {:?}", e), | |
154 | }; | |
155 | } | |
156 | ``` | |
157 | ||
158 | For conversions between character encodings check out the [encoding][encoding-crate] crate. | |
159 | ||
160 | A more detailed listing of the ways to write string literals and escape characters | |
161 | is given in the ['Tokens' chapter][tokens] of the Rust Reference. | |
162 | ||
163 | [str]: https://doc.rust-lang.org/std/str/ | |
164 | [string]: https://doc.rust-lang.org/std/string/ | |
165 | [tokens]: https://doc.rust-lang.org/reference/tokens.html | |
166 | [encoding-crate]: https://crates.io/crates/encoding |