1 use std
::cmp
::Ordering
;
5 use std
::marker
::PhantomData
;
9 use std
::str::Utf8Error
;
11 use crate::raw
::RawYarn
;
12 use crate::Utf8Chunks
;
18 /// An optimized, possibly heap-allocated string type.
20 /// This is the core data structure of `byteyarn`. It is a string that can be
21 /// borrowed, boxed, or inlined. Generally, you'll want to use the [`Yarn`]
22 /// or [`ByteYarn`] type aliases directly, instead.
24 /// The lifetime `'a` is the shortest lifetime this yarn can borrow for; often,
25 /// this will be `'static`.
27 /// See the [crate documentation](crate) for general information.
29 pub struct YarnBox
<'a
, Buf
= [u8]>
31 Buf
: crate::Buf
+ ?Sized
,
34 _ph
: PhantomData
<&'a Buf
>,
37 impl<'a
, Buf
> YarnBox
<'a
, Buf
>
39 Buf
: crate::Buf
+ ?Sized
,
41 /// Returns a reference to an empty yarn of any lifetime.
44 /// # use byteyarn::*;
45 /// let empty: &Yarn = Yarn::empty();
46 /// assert_eq!(empty, "");
49 /// This will also be found by the `Default` impl for `&YarnBox`.
50 pub fn empty
<'b
>() -> &'b
Self {
52 // SAFETY: YarnBox is a transparent wrapper over RawYarn; even though
53 // YarnBox has a destructor, this is fine, because this lifetime is 'static
54 // and will thus never run a destructor.
55 mem
::transmute
::<&'b RawYarn
, &'b
Self>(RawYarn
::empty())
59 /// Returns a yarn pointing to the given slice, without copying.
62 /// # use byteyarn::*;
63 /// let foo = Yarn::new("Byzantium");
64 /// assert_eq!(foo.len(), 9);
66 pub const fn new(buf
: &'a Buf
) -> Self {
67 YarnRef
::new(buf
).to_box()
70 /// Returns a new yarn containing the contents of the given slice.
71 /// This function will always return an inlined string, or `None` if the
72 /// given buffer is too big.
74 /// Note that the maximum inlined size is architecture-dependent.
77 /// # use byteyarn::*;
78 /// let smol = Yarn::inlined("smol");
79 /// assert_eq!(smol.unwrap(), "smol");
81 /// let big = Yarn::inlined("biiiiiiiiiiiiiiig");
82 /// assert!(big.is_none());
84 pub const fn inlined(buf
: &Buf
) -> Option
<Self> {
85 match YarnRef
::inlined(buf
) {
86 Some(y
) => Some(y
.to_box()),
91 /// Returns a new yarn that aliases the contents of this yarn.
93 /// In effect, this is like `Copy`ing out of `*self`, by shortening the
94 /// lifetime of the yarn.
97 /// # use byteyarn::*;
98 /// /// Joins two yarns with "and", but re-uses the buffer if one of them is
100 /// fn and<'a>(a: Option<&'a YarnBox<str>>, b: Option<&'a YarnBox<str>>) -> YarnBox<'a, str> {
102 /// (Some(a), Some(b)) => yarn!("{a} and {b}"),
103 /// (Some(a), None) => a.aliased(),
104 /// (None, Some(b)) => b.aliased(),
105 /// (None, None) => Yarn::default(),
109 /// assert_eq!(and(Some(&yarn!("apples")), Some(&yarn!("oranges"))), "apples and oranges");
110 /// assert_eq!(and(Some(&yarn!("apples")), None), "apples");
111 /// assert_eq!(and(None, None), "");
114 /// This function will be found by `From` impls from `&YarnBox`.
116 /// Note also that unlike `YarnBox::new(y.as_ref())`, this will ensure the
117 /// yarn remembers that it's a static string.
120 /// # use byteyarn::*;
123 /// let lit = Yarn::from_static("nice long static string constant");
125 /// // Immortalizing the aliased yarn does not require a new heap allocation.
126 /// assert!(ptr::eq(lit.aliased().immortalize().as_slice(), lit.as_slice()));
128 /// // We forgot this yarn was static, so immortalization requires a copy.
129 /// assert!(!ptr::eq(YarnBox::<str>::new(&lit).immortalize().as_slice(), lit.as_slice()));
131 pub const fn aliased(&self) -> YarnBox
<Buf
> {
132 // NOTE: going through YarnRef will ensure we preserve static-ness.
133 self.as_ref().to_box()
136 /// Returns a yarn containing a single UTF-8-encoded Unicode scalar.
137 /// This function does not allocate: every `char` fits in an inlined yarn.
140 /// # use byteyarn::*;
141 /// let a = Yarn::from_char('a');
142 /// assert_eq!(a, "a");
144 pub const fn from_char(c
: char) -> Self {
145 YarnRef
::<Buf
>::from_char(c
).to_box()
148 /// Returns a yarn by taking ownership of an allocation.
151 /// # use byteyarn::*;
152 /// let str = String::from("big string box").into_boxed_str();
153 /// let yarn = Yarn::from_boxed_str(str);
154 /// assert_eq!(yarn, "big string box");
156 pub fn from_boxed_str(string
: Box
<str>) -> Self {
157 let raw
= RawYarn
::from_heap(string
.into());
159 // SAFETY: both [u8] and str can be safely constructed from a str. We have
160 // unique ownership of raw's allocation because from_heap guarantees it.
165 /// Returns a yarn by taking ownership of an allocation.
168 /// # use byteyarn::*;
169 /// let str = String::from("big string box");
170 /// let yarn = Yarn::from_string(str);
171 /// assert_eq!(yarn, "big string box");
173 pub fn from_string(string
: String
) -> Self {
174 Self::from_boxed_str(string
.into())
177 /// Checks whether this yarn is empty.
180 /// # use byteyarn::*;
181 /// assert!(yarn!("").is_empty());
182 /// assert!(!yarn!("xyz").is_empty());
184 pub const fn is_empty(&self) -> bool
{
185 self.as_ref().is_empty()
188 /// Returns the length of this yarn, in bytes.
191 /// # use byteyarn::*;
192 /// assert_eq!(yarn!("").len(), 0);
193 /// assert_eq!(yarn!("42").len(), 2);
194 /// assert_eq!(yarn!("猫").len(), 3);
195 /// assert_eq!(yarn!("🐈⬛").len(), 10);
197 /// assert_eq!(ByteYarn::new(b"").len(), 0);
198 /// assert_eq!(ByteYarn::new(b"xyz").len(), 3);
199 /// assert_eq!(ByteYarn::new(&[1, 2, 3]).len(), 3);
201 pub const fn len(&self) -> usize {
205 /// Converts this yarn into a slice.
208 /// # use byteyarn::*;
209 /// let yarn = yarn!("jellybeans");
210 /// let s: &str = yarn.as_slice();
211 /// assert_eq!(s, "jellybeans");
213 /// let yarn = ByteYarn::new(b"jellybeans");
214 /// let s: &[u8] = yarn.as_slice();
215 /// assert_eq!(s, b"jellybeans");
217 pub const fn as_slice(&self) -> &Buf
{
219 // SAFETY: converting back to buf from raw is ok here because this is
220 // evidently a round-trip.
221 YarnRef
::raw2buf(self.as_bytes())
225 /// Converts this owning yarn into a reference yarn.
228 /// # use byteyarn::*;
229 /// let yarn = yarn!("jellybeans");
230 /// let ry = yarn.as_ref();
231 /// assert_eq!(ry, "jellybeans");
233 pub const fn as_ref(&self) -> YarnRef
<Buf
> {
234 if let Some(inl
) = YarnRef
::inlined(self.as_slice()) {
238 let raw
= match self.raw
.on_heap() {
240 // SAFETY: The returned YarnRef will prevent self from being used
241 // until this raw yarn goes away, because it borrows self.
242 RawYarn
::alias_slice(self.as_bytes())
248 // SAFETY: The lifetime of the output is shorter than that of
249 // the input, so raw is valid for a yarn reference. Even in the case
250 // that self.on_heap, the aliased slice will not outlive the &self of
252 YarnRef
::from_raw(raw
)
256 /// Converts this owning yarn into a reference yarn, with the same lifetime
259 /// Note that if this yarn is on the heap, this function will return `None`.
262 /// # use byteyarn::*;
263 /// let yarn = yarn!("lots and lots of jellybeans");
264 /// assert_eq!(yarn.to_ref().unwrap(), "lots and lots of jellybeans");
266 /// let boxed = Yarn::from_string(String::from("lots and lots of jellybeans"));
267 /// assert!(boxed.to_ref().is_none());
269 pub const fn to_ref(&self) -> Option
<YarnRef
<'a
, Buf
>> {
270 if self.raw
.on_heap() {
275 // SAFETY: The lifetime of the output is equal than that of
276 // the input, so raw is valid for a yarn reference. We have excluded the
277 // on_heap case above.
278 Some(YarnRef
::from_raw(self.raw
))
282 /// Converts this yarn into a byte slice.
284 /// # use byteyarn::*;
285 /// assert_eq!(yarn!("").as_bytes(), b"");
286 /// assert_eq!(yarn!("猫").as_bytes(), b"\xE7\x8C\xAB");
288 /// assert_eq!(ByteYarn::new(b"xyz").as_bytes(), b"xyz");
289 /// assert_eq!(ByteYarn::new(&[1, 2, 3]).as_bytes(), [1, 2, 3]);
291 pub const fn as_bytes(&self) -> &[u8] {
295 /// Converts this yarn into a boxed slice, potentially by copying it.
298 /// # use byteyarn::*;
299 /// let boxed = yarn!("jellybeans").into_boxed_bytes();
300 /// assert_eq!(&boxed[..], b"jellybeans");
302 pub fn into_boxed_bytes(self) -> Box
<[u8]> {
303 let mut raw
= self.into_raw();
305 return raw
.as_slice().into();
309 // SAFETY: raw is guaranteed to be on the heap, so this slice is on the
310 // heap with the correct layout; because we called into_raw(), this
311 // reference is uniquely owned.
312 Box
::from_raw(raw
.as_mut_slice())
316 /// Converts this yarn into a vector, potentially by copying it.
319 /// # use byteyarn::*;
320 /// let mut vec = ByteYarn::new(b"jellybeans").into_vec();
321 /// vec.extend_from_slice(b" & KNUCKLES");
322 /// let yarn = ByteYarn::from_vec(vec);
324 /// assert_eq!(yarn, b"jellybeans & KNUCKLES");
326 pub fn into_vec(self) -> Vec
<u8> {
327 self.into_boxed_bytes().into()
330 /// Converts this yarn into a byte yarn.
331 pub const fn into_bytes(self) -> YarnBox
<'a
, [u8]> {
333 // SAFETY: The lifetimes are the same, and [u8] is constructible from
334 // either a [u8] or str, so this is just weakening the user-facing type.
335 YarnBox
::from_raw(self.into_raw())
339 /// Extends the lifetime of this yarn if this yarn is dynamically known to
340 /// point to immortal memory.
342 /// If it doesn't, the contents are copied into a fresh heap allocation.
345 /// # use byteyarn::*;
346 /// let bytes = Vec::from(*b"crunchcrunchcrunch");
347 /// let yarn = YarnBox::new(&*bytes);
349 /// let immortal: ByteYarn = yarn.immortalize();
350 /// drop(bytes); // Show that yarn continues to exist despite `bytes` going
353 /// assert_eq!(immortal, b"crunchcrunchcrunch");
355 pub fn immortalize(self) -> YarnBox
<'
static, Buf
> {
356 if self.raw
.is_immortal() {
358 // SAFETY: We just validated that this raw is in fact suitable for use
359 // with 'static lifetime, and all this cast is doing is extending the
361 return YarnBox
::from_raw(self.into_raw());
365 let raw
= RawYarn
::copy_slice(self.as_bytes());
367 // SAFETY: RawYarn::copy_slice always returns an immortal, uniquely-owned
369 YarnBox
::from_raw(raw
)
373 /// Returns a yarn consisting of the concatenation of the given slices.
375 /// Does not allocate if the resulting concatenation can be inlined.
378 /// # use byteyarn::*;
379 /// let yarn = Yarn::concat(&["foo", "bar", "baz"]);
380 /// assert_eq!(yarn, "foobarbaz");
382 pub fn concat(bufs
: &[impl AsRef
<Buf
>]) -> Self {
385 .map(|b
| YarnRef
::buf2raw(b
.as_ref()).len())
387 let iter
= bufs
.iter().map(|b
| YarnRef
::buf2raw(b
.as_ref()));
389 unsafe { Self::from_raw(RawYarn::concat(total_len, iter)) }
392 /// Tries to inline this yarn, if it's small enough.
394 /// This operation has no directly visible side effects, and is only intended
395 /// to provide a way to relieve memory pressure. In general, you should not
396 /// have to call this function directly.
397 pub fn inline_in_place(&mut self) {
398 if let Some(inlined
) = Self::inlined(self.as_slice()) {
403 /// Leaks any heap allocation associated with this yarn.
405 /// The allocation is tagged as "static", so upcasting via
406 /// [`Yarn::immortalize()`] will not need to reallocate.
407 pub fn leak(&mut self) {
408 if !self.raw
.on_heap() {
413 // SAFETY: We have unique ownership of this yarn, and we know it's HEAP,
414 // so updating the tag from HEAP to STATIC will not change anything
415 // except to make it immutable and to inhibit the destructor.
416 self.raw
= RawYarn
::from_ptr_len_tag(
417 self.as_bytes().as_ptr(),
424 /// Returns an iterator over the UTF-8 (or otherwise) chunks in this string.
426 /// This iterator is also used for the `Debug` and `Display` formatter
430 /// # use byteyarn::*;
431 /// let yarn = ByteYarn::new(b"abc\xFF\xFE\xFF\xF0\x9F\x90\x88\xE2\x80\x8D\xE2\xAC\x9B!");
432 /// let chunks = yarn.utf8_chunks().collect::<Vec<_>>();
433 /// assert_eq!(chunks, [
435 /// Err(&[0xff][..]),
436 /// Err(&[0xfe][..]),
437 /// Err(&[0xff][..]),
441 /// assert_eq!(format!("{yarn:?}"), r#""abc\xFF\xFE\xFF🐈\u{200d}⬛!""#);
442 /// assert_eq!(format!("{yarn}"), "abc���🐈⬛!");
444 pub fn utf8_chunks(&self) -> Utf8Chunks
{
445 Utf8Chunks
::new(self.as_bytes())
448 /// Returns a new yarn wrapping the given raw yarn.
452 /// If `raw` is aliased, its lifetime must not be shorter than 'a.
454 /// If `raw` is heap-allocated, no other yarn must be holding it.
455 pub(crate) const unsafe fn from_raw(raw
: RawYarn
) -> Self {
462 /// Consumes self, inhibits the destructor, and returns the raw yarn.
463 pub(crate) const fn into_raw(self) -> RawYarn
{
470 impl<Buf
> YarnBox
<'
static, Buf
>
472 Buf
: crate::Buf
+ ?Sized
,
474 /// Returns a yarn pointing to the given slice, without copying. This function
475 /// has the benefit of creating a yarn that remembers that it came from a
476 /// static string, meaning that it can be dynamically upcast back to a
477 /// `'static` lifetime.
479 /// This function will *not* be found by `From` impls.
480 pub const fn from_static(buf
: &'
static Buf
) -> Self {
481 YarnRef
::from_static(buf
).to_box()
485 impl<'a
> YarnBox
<'a
, [u8]> {
486 /// Returns a yarn containing a single byte, without allocating.
489 /// # use byteyarn::*;
490 /// let a = ByteYarn::from_byte(0x20);
491 /// assert_eq!(a, b" ");
493 pub const fn from_byte(c
: u8) -> Self {
494 YarnRef
::from_byte(c
).to_box()
497 /// Returns a yarn by taking ownership of the given allocation.
500 /// # use byteyarn::*;
501 /// let str = Box::new([0xf0, 0x9f, 0x90, 0x88, 0xe2, 0x80, 0x8d, 0xe2, 0xac, 0x9b]);
502 /// let yarn = ByteYarn::from_boxed_bytes(str);
503 /// assert_eq!(yarn, "🐈⬛".as_bytes());
505 pub fn from_boxed_bytes(bytes
: Box
<[u8]>) -> Self {
506 let raw
= RawYarn
::from_heap(bytes
);
507 unsafe { Self::from_raw(raw) }
510 /// Returns a yarn by taking ownership of the given allocation.
513 /// # use byteyarn::*;
514 /// let str = vec![0xf0, 0x9f, 0x90, 0x88, 0xe2, 0x80, 0x8d, 0xe2, 0xac, 0x9b];
515 /// let yarn = ByteYarn::from_vec(str);
516 /// assert_eq!(yarn, "🐈⬛".as_bytes());
518 pub fn from_vec(bytes
: Vec
<u8>) -> Self {
519 Self::from_boxed_bytes(bytes
.into_boxed_slice())
522 /// Tries to convert this yarn into a UTF-8 yarn via [`str::from_utf8()`].
525 /// # use byteyarn::*;
526 /// let yarn = ByteYarn::new(&[0xf0, 0x9f, 0x90, 0x88, 0xe2, 0x80, 0x8d, 0xe2, 0xac, 0x9b]);
527 /// assert_eq!(yarn.to_utf8().unwrap(), "🐈⬛");
529 /// assert!(ByteYarn::from_byte(0xff).to_utf8().is_err());
531 pub fn to_utf8(self) -> Result
<YarnBox
<'a
, str>, Utf8Error
> {
532 self.to_utf8_or_bytes().map_err(|(_
, e
)| e
)
535 /// Tries to convert this yarn into a UTF-8 yarn via [`str::from_utf8()`].
537 /// If conversion fails, the original yarn is returned with the error.
540 /// # use byteyarn::*;
541 /// let blob = ByteYarn::new(&[0xff; 5]);
542 /// let (bad, _) = blob.to_utf8_or_bytes().unwrap_err();
544 /// assert_eq!(bad, &[0xff; 5]);
546 pub fn to_utf8_or_bytes(self) -> Result
<YarnBox
<'a
, str>, (Self, Utf8Error
)> {
547 if let Err(e
) = str::from_utf8(self.as_bytes()) {
548 return Err((self, e
));
550 unsafe { Ok(YarnBox::from_raw(self.into_raw())) }
553 /// Returns a mutable reference into this yarn's internal buffer.
555 /// If the buffer is not uniquely owned (e.g., it is an alias of some other
556 /// buffer or a string constant) this function will first perform a copy and
557 /// possibly a heap allocation.
560 /// # use byteyarn::*;
561 /// let mut yarn = ByteYarn::new(b"const but very long");
562 /// assert!(yarn.try_mut().is_none());
564 /// let mut smol = ByteYarn::new(b"smol const");
565 /// smol.try_mut().unwrap()[3] = b'g';
566 /// assert_eq!(smol, b"smog const");
568 pub fn try_mut(&mut self) -> Option
<&mut [u8]> {
569 self.inline_in_place();
570 if !self.raw
.on_heap() && !self.raw
.is_small() {
577 /// Returns a mutable reference into this yarn's internal buffer.
579 /// If the buffer is not uniquely owned (e.g., it is an alias of some other
580 /// buffer or a string constant) this function will first perform a copy and
581 /// possibly a heap allocation.
584 /// # use byteyarn::*;
585 /// let mut yarn = ByteYarn::new(b"const but very long");
586 /// yarn.as_mut()[17] = b'_';
587 /// assert_eq!(yarn, b"const but very lo_g");
589 #[allow(clippy::should_implement_trait)]
590 pub fn as_mut(&mut self) -> &mut [u8] {
591 self.inline_in_place();
592 if !self.raw
.on_heap() && !self.raw
.is_small() {
593 *self = Self::from_boxed_bytes(mem
::take(self).into_boxed_bytes());
596 unsafe { self.raw.as_mut_slice() }
600 impl YarnBox
<'_
, str> {
601 /// Builds a new yarn from the given formatting arguments
602 /// (see [`format_args!()`]), allocating only when absolutely necessary.
604 /// In general, you'll want to use the [`yarn!()`] macro, instead.
605 pub fn from_fmt(args
: fmt
::Arguments
) -> Self {
606 unsafe { YarnBox::from_raw(RawYarn::from_fmt_args(args)) }
609 /// Converts this yarn into a string slice.
610 pub fn as_str(&self) -> &str {
614 /// Converts this yarn into a boxed slice, potentially by copying it.
615 pub fn into_boxed_str(self) -> Box
<str> {
616 self.into_string().into()
619 /// Converts this yarn into a string, potentially by copying it.
620 pub fn into_string(self) -> String
{
621 unsafe { String::from_utf8_unchecked(self.into_vec()) }
625 impl<Buf
> Deref
for YarnBox
<'_
, Buf
>
627 Buf
: crate::Buf
+ ?Sized
,
630 fn deref(&self) -> &Buf
{
635 impl<Buf
> Drop
for YarnBox
<'_
, Buf
>
637 Buf
: crate::Buf
+ ?Sized
,
640 unsafe { self.raw.destroy() }
644 impl<Buf
> Clone
for YarnBox
<'_
, Buf
>
646 Buf
: crate::Buf
+ ?Sized
,
648 fn clone(&self) -> Self {
649 if let Some(yr
) = self.to_ref() {
653 let copy
= RawYarn
::copy_slice(self.as_bytes());
654 unsafe { Self::from_raw(copy) }
658 impl<Buf
: crate::Buf
+ ?Sized
> fmt
::Debug
for YarnBox
<'_
, Buf
> {
659 fn fmt(&self, f
: &mut fmt
::Formatter
) -> fmt
::Result
{
660 fmt
::Debug
::fmt(&self.as_ref(), f
)
664 impl<Buf
: crate::Buf
+ ?Sized
> fmt
::Display
for YarnBox
<'_
, Buf
> {
665 fn fmt(&self, f
: &mut fmt
::Formatter
<'_
>) -> fmt
::Result
{
666 fmt
::Display
::fmt(&self.as_ref(), f
)
670 impl<Slice
, Buf
> PartialEq
<Slice
> for YarnBox
<'_
, Buf
>
672 Buf
: crate::Buf
+ ?Sized
,
673 Slice
: AsRef
<Buf
> + ?Sized
,
675 fn eq(&self, that
: &Slice
) -> bool
{
676 self.as_slice() == that
.as_ref()
680 impl<Buf
: crate::Buf
+ Eq
+ ?Sized
> Eq
for YarnBox
<'_
, Buf
> {}
682 impl<Slice
, Buf
> PartialOrd
<Slice
> for YarnBox
<'_
, Buf
>
684 Buf
: crate::Buf
+ ?Sized
,
685 Slice
: AsRef
<Buf
> + ?Sized
,
687 fn partial_cmp(&self, that
: &Slice
) -> Option
<Ordering
> {
688 self.as_slice().partial_cmp(that
.as_ref())
692 impl<Buf
: crate::Buf
+ ?Sized
> Ord
for YarnBox
<'_
, Buf
> {
693 fn cmp(&self, that
: &Self) -> Ordering
{
694 self.as_slice().cmp(that
.as_slice())
698 impl<Buf
: crate::Buf
+ ?Sized
> Hash
for YarnBox
<'_
, Buf
> {
699 fn hash
<H
: Hasher
>(&self, state
: &mut H
) {
700 self.as_slice().hash(state
)
704 impl<Buf
: crate::Buf
+ ?Sized
> Default
for YarnBox
<'_
, Buf
> {
705 fn default() -> Self {
706 <&Self>::default().clone()
710 impl<Buf
: crate::Buf
+ ?Sized
> Default
for &YarnBox
<'_
, Buf
> {
711 fn default() -> Self {