1 // This file is part of ICU4X. For terms of use, please see the file
2 // called LICENSE at the top level of the ICU4X source tree
3 // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
5 //! Zero-copy vector abstractions for arbitrary types, backed by byte slices.
7 //! `zerovec` enables a far wider range of types — beyond just `&[u8]` and `&str` — to participate in
8 //! zero-copy deserialization from byte slices. It is `serde` compatible and comes equipped with
11 //! Clients upgrading to `zerovec` benefit from zero heap allocations when deserializing
14 //! This crate has four main types:
16 //! - [`ZeroVec<'a, T>`] (and [`ZeroSlice<T>`](ZeroSlice)) for fixed-width types like `u32`
17 //! - [`VarZeroVec<'a, T>`] (and [`VarZeroSlice<T>`](ZeroSlice)) for variable-width types like `str`
18 //! - [`ZeroMap<'a, K, V>`] to map from `K` to `V`
19 //! - [`ZeroMap2d<'a, K0, K1, V>`] to map from the pair `(K0, K1)` to `V`
21 //! The first two are intended as close-to-drop-in replacements for `Vec<T>` in Serde structs. The third and fourth are
22 //! intended as a replacement for `HashMap` or [`LiteMap`](docs.rs/litemap). When used with Serde derives, **be sure to apply
23 //! `#[serde(borrow)]` to these types**, same as one would for [`Cow<'a, T>`].
25 //! [`ZeroVec<'a, T>`], [`VarZeroVec<'a, T>`], [`ZeroMap<'a, K, V>`], and [`ZeroMap2d<'a, K0, K1, V>`] all behave like
26 //! [`Cow<'a, T>`] in that they abstract over either borrowed or owned data. When performing deserialization
27 //! from human-readable formats (like `json` and `xml`), typically these types will allocate and fully own their data, whereas if deserializing
28 //! from binary formats like `bincode` and `postcard`, these types will borrow data directly from the buffer being deserialized from,
29 //! avoiding allocations and only performing validity checks. As such, this crate can be pretty fast (see [below](#Performance) for more information)
30 //! on deserialization.
32 //! See [the design doc](https://github.com/unicode-org/icu4x/blob/main/utils/zerovec/design_doc.md) for details on how this crate
33 //! works under the hood.
37 //! This crate has several optional Cargo features:
38 //! - `serde`: Allows serializing and deserializing `zerovec`'s abstractions via [`serde`](https://docs.rs/serde)
39 //! - `yoke`: Enables implementations of `Yokeable` from the [`yoke`](https://docs.rs/yoke/) crate, which is also useful
40 //! in situations involving a lot of zero-copy deserialization.
41 //! - `derive`: Makes it easier to use custom types in these collections by providing the [`#[make_ule]`](crate::make_ule) and
42 //! [`#[make_varule]`](crate::make_varule) proc macros, which generate appropriate [`ULE`](crate::ule::ULE) and
43 //! [`VarULE`](crate::ule::VarULE)-conformant types for a given "normal" type.
44 //! - `std`: Enabled `std::Error` implementations for error types. This crate is by default `no_std` with a dependency on `alloc`.
46 //! [`ZeroVec<'a, T>`]: ZeroVec
47 //! [`VarZeroVec<'a, T>`]: VarZeroVec
48 //! [`ZeroMap<'a, K, V>`]: ZeroMap
49 //! [`ZeroMap2d<'a, K0, K1, V>`]: ZeroMap2d
50 //! [`Cow<'a, T>`]: alloc::borrow::Cow
54 //! Serialize and deserialize a struct with ZeroVec and VarZeroVec with Bincode:
57 //! # #[cfg(feature = "serde")] {
58 //! use zerovec::{VarZeroVec, ZeroVec};
60 //! // This example requires the "serde" feature
61 //! #[derive(serde::Serialize, serde::Deserialize)]
62 //! pub struct DataStruct<'data> {
64 //! nums: ZeroVec<'data, u32>,
66 //! chars: ZeroVec<'data, char>,
68 //! strs: VarZeroVec<'data, str>,
71 //! let data = DataStruct {
72 //! nums: ZeroVec::from_slice_or_alloc(&[211, 281, 421, 461]),
73 //! chars: ZeroVec::alloc_from_slice(&['ö', '冇', 'म']),
74 //! strs: VarZeroVec::from(&["hello", "world"]),
76 //! let bincode_bytes =
77 //! bincode::serialize(&data).expect("Serialization should be successful");
78 //! assert_eq!(bincode_bytes.len(), 67);
80 //! let deserialized: DataStruct = bincode::deserialize(&bincode_bytes)
81 //! .expect("Deserialization should be successful");
82 //! assert_eq!(deserialized.nums.first(), Some(211));
83 //! assert_eq!(deserialized.chars.get(1), Some('冇'));
84 //! assert_eq!(deserialized.strs.get(1), Some("world"));
85 //! // The deserialization will not have allocated anything
86 //! assert!(!deserialized.nums.is_owned());
87 //! # } // feature = "serde"
90 //! Use custom types inside of ZeroVec:
93 //! # #[cfg(all(feature = "serde", feature = "derive"))] {
94 //! use zerovec::{ZeroVec, VarZeroVec, ZeroMap};
95 //! use std::borrow::Cow;
96 //! use zerovec::ule::encode_varule_to_box;
98 //! // custom fixed-size ULE type for ZeroVec
99 //! #[zerovec::make_ule(DateULE)]
100 //! #[derive(Copy, Clone, PartialEq, Eq, Ord, PartialOrd, serde::Serialize, serde::Deserialize)]
107 //! // custom variable sized VarULE type for VarZeroVec
108 //! #[zerovec::make_varule(PersonULE)]
109 //! #[zerovec::derive(Serialize, Deserialize)] // add Serde impls to PersonULE
110 //! #[derive(Clone, PartialEq, Eq, Ord, PartialOrd, serde::Serialize, serde::Deserialize)]
111 //! struct Person<'a> {
113 //! favorite_character: char,
115 //! name: Cow<'a, str>,
118 //! #[derive(serde::Serialize, serde::Deserialize)]
119 //! struct Data<'a> {
121 //! important_dates: ZeroVec<'a, Date>,
122 //! // note: VarZeroVec always must reference the ULE type directly
124 //! important_people: VarZeroVec<'a, PersonULE>,
126 //! birthdays_to_people: ZeroMap<'a, Date, PersonULE>
130 //! let person1 = Person {
131 //! birthday: Date { y: 1990, m: 9, d: 7},
132 //! favorite_character: 'π',
133 //! name: Cow::from("Kate")
135 //! let person2 = Person {
136 //! birthday: Date { y: 1960, m: 5, d: 25},
137 //! favorite_character: '冇',
138 //! name: Cow::from("Jesse")
141 //! let important_dates = ZeroVec::alloc_from_slice(&[Date { y: 1943, m: 3, d: 20}, Date { y: 1976, m: 8, d: 2}, Date { y: 1998, m: 2, d: 15}]);
142 //! let important_people = VarZeroVec::from(&[&person1, &person2]);
143 //! let mut birthdays_to_people: ZeroMap<Date, PersonULE> = ZeroMap::new();
144 //! // `.insert_var_v()` is slightly more convenient over `.insert()` for custom ULE types
145 //! birthdays_to_people.insert_var_v(&person1.birthday, &person1);
146 //! birthdays_to_people.insert_var_v(&person2.birthday, &person2);
148 //! let data = Data { important_dates, important_people, birthdays_to_people };
150 //! let bincode_bytes = bincode::serialize(&data)
151 //! .expect("Serialization should be successful");
152 //! assert_eq!(bincode_bytes.len(), 168);
154 //! let deserialized: Data = bincode::deserialize(&bincode_bytes)
155 //! .expect("Deserialization should be successful");
157 //! assert_eq!(deserialized.important_dates.get(0).unwrap().y, 1943);
158 //! assert_eq!(&deserialized.important_people.get(1).unwrap().name, "Jesse");
159 //! assert_eq!(&deserialized.important_people.get(0).unwrap().name, "Kate");
160 //! assert_eq!(&deserialized.birthdays_to_people.get(&person1.birthday).unwrap().name, "Kate");
162 //! } // feature = serde and derive
167 //! `zerovec` is designed for fast deserialization from byte buffers with zero memory allocations
168 //! while minimizing performance regressions for common vector operations.
170 //! Benchmark results on x86_64:
172 //! | Operation | `Vec<T>` | `zerovec` |
174 //! | Deserialize vec of 100 `u32` | 233.18 ns | 14.120 ns |
175 //! | Compute sum of vec of 100 `u32` (read every element) | 8.7472 ns | 10.775 ns |
176 //! | Binary search vec of 1000 `u32` 50 times | 442.80 ns | 472.51 ns |
177 //! | Deserialize vec of 100 strings | 7.3740 μs\* | 1.4495 μs |
178 //! | Count chars in vec of 100 strings (read every element) | 747.50 ns | 955.28 ns |
179 //! | Binary search vec of 500 strings 10 times | 466.09 ns | 790.33 ns |
181 //! \* *This result is reported for `Vec<String>`. However, Serde also supports deserializing to the partially-zero-copy `Vec<&str>`; this gives 1.8420 μs, much faster than `Vec<String>` but a bit slower than `zerovec`.*
183 //! | Operation | `HashMap<K,V>` | `LiteMap<K,V>` | `ZeroMap<K,V>` |
184 //! |---|---|---|---|
185 //! | Deserialize a small map | 2.72 μs | 1.28 μs | 480 ns |
186 //! | Deserialize a large map | 50.5 ms | 18.3 ms | 3.74 ms |
187 //! | Look up from a small deserialized map | 49 ns | 42 ns | 54 ns |
188 //! | Look up from a large deserialized map | 51 ns | 155 ns | 213 ns |
190 //! Small = 16 elements, large = 131,072 elements. Maps contain `<String, String>`.
192 //! The benches used to generate the above table can be found in the `benches` directory in the project repository.
193 //! `zeromap` benches are named by convention, e.g. `zeromap/deserialize/small`, `zeromap/lookup/large`. The type
194 //! is appended for baseline comparisons, e.g. `zeromap/lookup/small/hashmap`.
196 // https://github.com/unicode-org/icu4x/blob/main/docs/process/boilerplate.md#library-annotations
197 #![cfg_attr(not(any(test, feature = "std")), no_std)]
201 clippy
::indexing_slicing
,
205 clippy
::exhaustive_structs
,
206 clippy
::exhaustive_enums
,
207 missing_debug_implementations
,
210 // this crate does a lot of nuanced lifetime manipulation, being explicit
212 #![allow(clippy::needless_lifetimes)]
218 #[cfg(feature = "hashmap")]
227 // This must be after `mod zerovec` for some impls on `ZeroSlice<RawBytesULE>`
228 // to show up in the right spot in the docs
231 #[cfg(feature = "yoke")]
235 pub use crate::error
::ZeroVecError
;
236 #[cfg(feature = "hashmap")]
237 pub use crate::hashmap
::ZeroHashMap
;
238 pub use crate::map
::map
::ZeroMap
;
239 pub use crate::map2d
::map
::ZeroMap2d
;
240 pub use crate::varzerovec
::{slice::VarZeroSlice, vec::VarZeroVec}
;
241 pub use crate::zerovec
::{ZeroSlice, ZeroVec}
;
243 pub(crate) use flexzerovec
::chunk_to_usize
;
246 pub mod __zerovec_internal_reexport
{
247 pub use zerofrom
::ZeroFrom
;
249 pub use alloc
::boxed
;
251 #[cfg(feature = "serde")]
256 //! This module contains additional utility types and traits for working with
257 //! [`ZeroMap`] and [`ZeroMap2d`]. See their docs for more details on the general purpose
260 //! [`ZeroMapBorrowed`] and [`ZeroMap2dBorrowed`] are versions of [`ZeroMap`] and [`ZeroMap2d`]
261 //! that can be used when you wish to guarantee that the map data is always borrowed, leading to
262 //! relaxed lifetime constraints.
264 //! The [`ZeroMapKV`] trait is required to be implemented on any type that needs to be used
265 //! within a map type. [`ZeroVecLike`] and [`MutableZeroVecLike`] are traits used in the
266 //! internal workings of the map types, and should typically not be used or implemented by
267 //! users of this crate.
269 pub use crate::map
::ZeroMap
;
270 pub use crate::map
::ZeroMapBorrowed
;
273 pub use crate::map2d
::ZeroMap2d
;
274 pub use crate::map2d
::ZeroMap2dBorrowed
;
276 pub use crate::map
::{MutableZeroVecLike, ZeroMapKV, ZeroVecLike}
;
278 pub use crate::map2d
::ZeroMap2dCursor
;
282 //! This module contains additional utility types for working with
283 //! [`ZeroVec`] and [`VarZeroVec`]. See their docs for more details on the general purpose
286 //! [`ZeroSlice`] and [`VarZeroSlice`] provide slice-like versions of the vector types
287 //! for use behind references and in custom ULE types.
289 //! [`VarZeroVecOwned`] is a special owned/mutable version of [`VarZeroVec`], allowing
290 //! direct manipulation of the backing buffer.
293 pub use crate::zerovec
::{ZeroSlice, ZeroVec}
;
296 pub use crate::varzerovec
::{VarZeroSlice, VarZeroVec}
;
298 pub use crate::varzerovec
::{Index16, Index32, VarZeroVecFormat, VarZeroVecOwned}
;
300 pub use crate::flexzerovec
::{FlexZeroSlice, FlexZeroVec, FlexZeroVecOwned}
;
303 // Proc macro reexports
305 // These exist so that our docs can use intra-doc links.
306 // Due to quirks of how rustdoc does documentation on reexports, these must be in this module and not reexported from
309 /// Generate a corresponding [`ULE`] type and the relevant [`AsULE`] implementations for this type
311 /// This can be attached to structs containing only [`AsULE`] types, or C-like enums that have `#[repr(u8)]`
312 /// and all explicit discriminants.
314 /// The type must be [`Copy`], [`PartialEq`], and [`Eq`].
316 /// `#[make_ule]` will automatically derive the following traits on the [`ULE`] type:
318 /// - [`Ord`] and [`PartialOrd`]
321 /// To disable one of the automatic derives, use `#[zerovec::skip_derive(...)]` like so: `#[zerovec::skip_derive(ZeroMapKV)]`.
322 /// `Ord` and `PartialOrd` are implemented as a unit and can only be disabled as a group with `#[zerovec::skip_derive(Ord)]`.
324 /// The following traits are available to derive, but not automatic:
328 /// To enable one of these additional derives, use `#[zerovec::derive(...)]` like so: `#[zerovec::derive(Debug)]`.
330 /// In most cases these derives will defer to the impl of the same trait on the current type, so such impls must exist.
332 /// For enums, this attribute will generate a crate-public `fn new_from_u8(value: u8) -> Option<Self>`
333 /// method on the main type that allows one to construct the value from a u8. If this method is desired
334 /// to be more public, it should be wrapped.
336 /// [`ULE`]: ule::ULE
337 /// [`AsULE`]: ule::AsULE
338 /// [`ZeroMapKV`]: maps::ZeroMapKV
343 /// use zerovec::ZeroVec;
345 /// #[zerovec::make_ule(DateULE)]
353 /// serde::Serialize,
354 /// serde::Deserialize,
362 /// #[derive(serde::Serialize, serde::Deserialize)]
363 /// struct Dates<'a> {
365 /// dates: ZeroVec<'a, Date>,
368 /// let dates = Dates {
369 /// dates: ZeroVec::alloc_from_slice(&[
388 /// let bincode_bytes =
389 /// bincode::serialize(&dates).expect("Serialization should be successful");
391 /// // Will deserialize without allocations
392 /// let deserialized: Dates = bincode::deserialize(&bincode_bytes)
393 /// .expect("Deserialization should be successful");
395 /// assert_eq!(deserialized.dates.get(1).unwrap().y, 1970);
396 /// assert_eq!(deserialized.dates.get(2).unwrap().d, 13);
398 #[cfg(feature = "derive")]
399 pub use zerovec_derive
::make_ule
;
401 /// Generate a corresponding [`VarULE`] type and the relevant [`EncodeAsVarULE`]/[`zerofrom::ZeroFrom`]
402 /// implementations for this type
404 /// This can be attached to structs containing only [`AsULE`] types with the last fields being
405 /// [`Cow<'a, str>`](alloc::borrow::Cow), [`ZeroSlice`], or [`VarZeroSlice`]. If there is more than one such field, it will be represented
406 /// using [`MultiFieldsULE`](crate::ule::MultiFieldsULE) and getters will be generated. Other VarULE fields will be detected if they are
407 /// tagged with `#[zerovec::varule(NameOfVarULETy)]`.
409 /// The type must be [`PartialEq`] and [`Eq`].
411 /// [`EncodeAsVarULE`] and [`zerofrom::ZeroFrom`] are useful for avoiding the need to deal with
412 /// the [`VarULE`] type directly. In particular, it is recommended to use [`zerofrom::ZeroFrom`]
413 /// to convert the [`VarULE`] type back to this type in a cheap, zero-copy way (see the example below
414 /// for more details).
416 /// `#[make_varule]` will automatically derive the following traits on the [`VarULE`] type:
418 /// - [`Ord`] and [`PartialOrd`]
421 /// To disable one of the automatic derives, use `#[zerovec::skip_derive(...)]` like so: `#[zerovec::skip_derive(ZeroMapKV)]`.
422 /// `Ord` and `PartialOrd` are implemented as a unit and can only be disabled as a group with `#[zerovec::skip_derive(Ord)]`.
424 /// The following traits are available to derive, but not automatic:
427 /// - [`Serialize`](serde::Serialize)
428 /// - [`Deserialize`](serde::Deserialize)
430 /// To enable one of these additional derives, use `#[zerovec::derive(...)]` like so: `#[zerovec::derive(Debug)]`.
432 /// In most cases these derives will defer to the impl of the same trait on the current type, so such impls must exist.
434 /// This implementation will also by default autogenerate [`Ord`] and [`PartialOrd`] on the [`VarULE`] type based on
435 /// the implementation on `Self`. You can opt out of this with `#[zerovec::skip_derive(Ord)]`
437 /// Note that this implementation will autogenerate [`EncodeAsVarULE`] impls for _both_ `Self` and `&Self`
438 /// for convenience. This allows for a little more flexibility encoding slices.
440 /// [`EncodeAsVarULE`]: ule::EncodeAsVarULE
441 /// [`VarULE`]: ule::VarULE
442 /// [`ULE`]: ule::ULE
443 /// [`AsULE`]: ule::AsULE
444 /// [`ZeroMapKV`]: maps::ZeroMapKV
449 /// use std::borrow::Cow;
450 /// use zerofrom::ZeroFrom;
451 /// use zerovec::ule::encode_varule_to_box;
452 /// use zerovec::{VarZeroVec, ZeroMap, ZeroVec};
454 /// // custom fixed-size ULE type for ZeroVec
455 /// #[zerovec::make_ule(DateULE)]
456 /// #[derive(Copy, Clone, PartialEq, Eq, Ord, PartialOrd, serde::Serialize, serde::Deserialize)]
463 /// // custom variable sized VarULE type for VarZeroVec
464 /// #[zerovec::make_varule(PersonULE)]
465 /// #[zerovec::derive(Serialize, Deserialize)]
466 /// #[derive(Clone, PartialEq, Eq, Ord, PartialOrd, serde::Serialize, serde::Deserialize)]
467 /// struct Person<'a> {
469 /// favorite_character: char,
471 /// name: Cow<'a, str>,
474 /// #[derive(serde::Serialize, serde::Deserialize)]
475 /// struct Data<'a> {
476 /// // note: VarZeroVec always must reference the ULE type directly
478 /// important_people: VarZeroVec<'a, PersonULE>,
481 /// let person1 = Person {
487 /// favorite_character: 'π',
488 /// name: Cow::from("Kate"),
490 /// let person2 = Person {
496 /// favorite_character: '冇',
497 /// name: Cow::from("Jesse"),
500 /// let important_people = VarZeroVec::from(&[person1, person2]);
501 /// let data = Data { important_people };
503 /// let bincode_bytes = bincode::serialize(&data).expect("Serialization should be successful");
505 /// // Will deserialize without allocations
506 /// let deserialized: Data =
507 /// bincode::deserialize(&bincode_bytes).expect("Deserialization should be successful");
509 /// assert_eq!(&deserialized.important_people.get(1).unwrap().name, "Jesse");
510 /// assert_eq!(&deserialized.important_people.get(0).unwrap().name, "Kate");
512 /// // Since VarZeroVec produces PersonULE types, it's convenient to use ZeroFrom
513 /// // to recoup Person values in a zero-copy way
514 /// let person_converted: Person =
515 /// ZeroFrom::zero_from(deserialized.important_people.get(1).unwrap());
516 /// assert_eq!(person_converted.name, "Jesse");
517 /// assert_eq!(person_converted.birthday.y, 1960);
519 #[cfg(feature = "derive")]
520 pub use zerovec_derive
::make_varule
;
525 use core
::mem
::size_of
;
527 /// Checks that the size of the type is one of the given sizes.
528 /// The size might differ across Rust versions or channels.
529 macro_rules
! check_size_of
{
530 ($sizes
:pat
, $
type:path
) => {
532 matches
!(size_of
::<$
type>(), $sizes
),
533 concat
!(stringify
!($
type), " is of size {}"),
541 check_size_of
!(24, ZeroVec
<u8>);
542 check_size_of
!(24, ZeroVec
<u32>);
543 check_size_of
!(32 | 24, VarZeroVec
<[u8]>);
544 check_size_of
!(32 | 24, VarZeroVec
<str>);
545 check_size_of
!(48, ZeroMap
<u32, u32>);
546 check_size_of
!(56 | 48, ZeroMap
<u32, str>);
547 check_size_of
!(56 | 48, ZeroMap
<str, u32>);
548 check_size_of
!(64 | 48, ZeroMap
<str, str>);
549 check_size_of
!(120 | 96, ZeroMap2d
<str, str, str>);
550 check_size_of
!(32 | 24, vecs
::FlexZeroVec
);
552 check_size_of
!(32, Option
<ZeroVec
<u8>>);
553 check_size_of
!(32, Option
<VarZeroVec
<str>>);
554 check_size_of
!(64 | 56, Option
<ZeroMap
<str, str>>);
555 check_size_of
!(120 | 104, Option
<ZeroMap2d
<str, str, str>>);
556 check_size_of
!(32, Option
<vecs
::FlexZeroVec
>);