]>
Commit | Line | Data |
---|---|---|
8bb4bdeb XL |
1 | // Copyright 2012-2015 The Rust Project Developers. See the COPYRIGHT |
2 | // file at the top-level directory of this distribution and at | |
3 | // http://rust-lang.org/COPYRIGHT. | |
4 | // | |
5 | // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or | |
6 | // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license | |
7 | // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your | |
8 | // option. This file may not be copied, modified, or distributed | |
9 | // except according to those terms. | |
10 | ||
11 | #[cfg(feature = "bench")] | |
12 | use std::iter; | |
13 | #[cfg(feature = "bench")] | |
14 | use test::{self, Bencher}; | |
15 | #[cfg(feature = "bench")] | |
2b03887a | 16 | use super::{UnicodeWidthChar, UnicodeWidthStr}; |
8bb4bdeb XL |
17 | |
18 | use std::prelude::v1::*; | |
19 | ||
20 | #[cfg(feature = "bench")] | |
21 | #[bench] | |
22 | fn cargo(b: &mut Bencher) { | |
23 | let string = iter::repeat('a').take(4096).collect::<String>(); | |
24 | ||
25 | b.iter(|| { | |
26 | for c in string.chars() { | |
27 | test::black_box(UnicodeWidthChar::width(c)); | |
28 | } | |
29 | }); | |
30 | } | |
31 | ||
32 | #[cfg(feature = "bench")] | |
33 | #[bench] | |
34 | #[allow(deprecated)] | |
35 | fn stdlib(b: &mut Bencher) { | |
36 | let string = iter::repeat('a').take(4096).collect::<String>(); | |
37 | ||
38 | b.iter(|| { | |
39 | for c in string.chars() { | |
40 | test::black_box(c.width()); | |
41 | } | |
42 | }); | |
43 | } | |
44 | ||
45 | #[cfg(feature = "bench")] | |
46 | #[bench] | |
47 | fn simple_if(b: &mut Bencher) { | |
48 | let string = iter::repeat('a').take(4096).collect::<String>(); | |
49 | ||
50 | b.iter(|| { | |
51 | for c in string.chars() { | |
52 | test::black_box(simple_width_if(c)); | |
53 | } | |
54 | }); | |
55 | } | |
56 | ||
57 | #[cfg(feature = "bench")] | |
58 | #[bench] | |
59 | fn simple_match(b: &mut Bencher) { | |
60 | let string = iter::repeat('a').take(4096).collect::<String>(); | |
61 | ||
62 | b.iter(|| { | |
63 | for c in string.chars() { | |
64 | test::black_box(simple_width_match(c)); | |
65 | } | |
66 | }); | |
67 | } | |
68 | ||
69 | #[cfg(feature = "bench")] | |
70 | #[inline] | |
71 | fn simple_width_if(c: char) -> Option<usize> { | |
72 | let cu = c as u32; | |
73 | if cu < 127 { | |
74 | if cu > 31 { | |
75 | Some(1) | |
76 | } else if cu == 0 { | |
77 | Some(0) | |
78 | } else { | |
79 | None | |
80 | } | |
81 | } else { | |
82 | UnicodeWidthChar::width(c) | |
83 | } | |
84 | } | |
85 | ||
86 | #[cfg(feature = "bench")] | |
87 | #[inline] | |
88 | fn simple_width_match(c: char) -> Option<usize> { | |
89 | match c as u32 { | |
90 | cu if cu == 0 => Some(0), | |
91 | cu if cu < 0x20 => None, | |
92 | cu if cu < 0x7f => Some(1), | |
93 | _ => UnicodeWidthChar::width(c) | |
94 | } | |
95 | } | |
2b03887a FG |
96 | #[cfg(all(feature = "bench", not(feature = "no_std")))] |
97 | #[bench] | |
98 | fn enwik8(b: &mut Bencher) { | |
99 | // To benchmark, download & unzip `enwik8` from https://data.deepai.org/enwik8.zip | |
100 | let data_path = "bench_data/enwik8"; | |
101 | let string = std::fs::read_to_string(data_path).unwrap_or_default(); | |
102 | b.iter(|| test::black_box(UnicodeWidthStr::width(string.as_str()))); | |
103 | } | |
104 | #[cfg(all(feature = "bench", not(feature = "no_std")))] | |
105 | #[bench] | |
106 | fn jawiki(b: &mut Bencher) { | |
107 | // To benchmark, download & extract `jawiki-20220501-pages-articles-multistream-index.txt` from | |
108 | // https://dumps.wikimedia.org/jawiki/20220501/jawiki-20220501-pages-articles-multistream-index.txt.bz2 | |
109 | let data_path = "bench_data/jawiki-20220501-pages-articles-multistream-index.txt"; | |
110 | let string = std::fs::read_to_string(data_path).unwrap_or_default(); | |
111 | b.iter(|| test::black_box(UnicodeWidthStr::width(string.as_str()))); | |
112 | } | |
8bb4bdeb XL |
113 | #[test] |
114 | fn test_str() { | |
115 | use super::UnicodeWidthStr; | |
116 | ||
117 | assert_eq!(UnicodeWidthStr::width("hello"), 10); | |
118 | assert_eq!("hello".width_cjk(), 10); | |
119 | assert_eq!(UnicodeWidthStr::width("\0\0\0\x01\x01"), 0); | |
120 | assert_eq!("\0\0\0\x01\x01".width_cjk(), 0); | |
121 | assert_eq!(UnicodeWidthStr::width(""), 0); | |
122 | assert_eq!("".width_cjk(), 0); | |
123 | assert_eq!(UnicodeWidthStr::width("\u{2081}\u{2082}\u{2083}\u{2084}"), 4); | |
124 | assert_eq!("\u{2081}\u{2082}\u{2083}\u{2084}".width_cjk(), 8); | |
125 | } | |
126 | ||
e1599b0c XL |
127 | #[test] |
128 | fn test_emoji() { | |
129 | // Example from the README. | |
130 | use super::UnicodeWidthStr; | |
131 | ||
132 | assert_eq!(UnicodeWidthStr::width("👩"), 2); // Woman | |
133 | assert_eq!(UnicodeWidthStr::width("🔬"), 2); // Microscope | |
134 | assert_eq!(UnicodeWidthStr::width("👩🔬"), 4); // Woman scientist | |
135 | } | |
136 | ||
8bb4bdeb XL |
137 | #[test] |
138 | fn test_char() { | |
139 | use super::UnicodeWidthChar; | |
140 | #[cfg(feature = "no_std")] | |
141 | use core::option::Option::{Some, None}; | |
142 | ||
143 | assert_eq!(UnicodeWidthChar::width('h'), Some(2)); | |
144 | assert_eq!('h'.width_cjk(), Some(2)); | |
145 | assert_eq!(UnicodeWidthChar::width('\x00'), Some(0)); | |
146 | assert_eq!('\x00'.width_cjk(), Some(0)); | |
147 | assert_eq!(UnicodeWidthChar::width('\x01'), None); | |
148 | assert_eq!('\x01'.width_cjk(), None); | |
149 | assert_eq!(UnicodeWidthChar::width('\u{2081}'), Some(1)); | |
150 | assert_eq!('\u{2081}'.width_cjk(), Some(2)); | |
151 | } | |
152 | ||
153 | #[test] | |
154 | fn test_char2() { | |
155 | use super::UnicodeWidthChar; | |
156 | #[cfg(feature = "no_std")] | |
157 | use core::option::Option::{Some, None}; | |
158 | ||
159 | assert_eq!(UnicodeWidthChar::width('\x00'),Some(0)); | |
160 | assert_eq!('\x00'.width_cjk(),Some(0)); | |
161 | ||
162 | assert_eq!(UnicodeWidthChar::width('\x0A'),None); | |
163 | assert_eq!('\x0A'.width_cjk(),None); | |
164 | ||
165 | assert_eq!(UnicodeWidthChar::width('w'),Some(1)); | |
166 | assert_eq!('w'.width_cjk(),Some(1)); | |
167 | ||
168 | assert_eq!(UnicodeWidthChar::width('h'),Some(2)); | |
169 | assert_eq!('h'.width_cjk(),Some(2)); | |
170 | ||
171 | assert_eq!(UnicodeWidthChar::width('\u{AD}'),Some(1)); | |
172 | assert_eq!('\u{AD}'.width_cjk(),Some(1)); | |
173 | ||
174 | assert_eq!(UnicodeWidthChar::width('\u{1160}'),Some(0)); | |
175 | assert_eq!('\u{1160}'.width_cjk(),Some(0)); | |
176 | ||
177 | assert_eq!(UnicodeWidthChar::width('\u{a1}'),Some(1)); | |
178 | assert_eq!('\u{a1}'.width_cjk(),Some(2)); | |
179 | ||
180 | assert_eq!(UnicodeWidthChar::width('\u{300}'),Some(0)); | |
181 | assert_eq!('\u{300}'.width_cjk(),Some(0)); | |
182 | } | |
3dfed10e XL |
183 | |
184 | #[test] | |
185 | fn unicode_12() { | |
186 | use super::UnicodeWidthChar; | |
187 | #[cfg(feature = "no_std")] | |
188 | use core::option::Option::{Some, None}; | |
189 | ||
190 | assert_eq!(UnicodeWidthChar::width('\u{1F971}'), Some(2)); | |
191 | } |