3 use std
::cell
::RefCell
;
4 use std
::collections
::hash_map
::Entry
;
5 use std
::collections
::HashMap
;
6 use std
::convert
::TryInto
;
7 use std
::io
::{Read, Seek, SeekFrom}
;
11 use crate::read
::ReadRef
;
13 /// An implementation of `ReadRef` for data in a stream that implements
16 /// Contains a cache of read-only blocks of data, allowing references to
17 /// them to be returned. Entries in the cache are never removed.
18 /// Entries are keyed on the offset and size of the read.
19 /// Currently overlapping reads are considered separate reads.
21 pub struct ReadCache
<R
: Read
+ Seek
> {
22 cache
: RefCell
<ReadCacheInternal
<R
>>,
26 struct ReadCacheInternal
<R
: Read
+ Seek
> {
28 bufs
: HashMap
<(u64, u64), Box
<[u8]>>,
29 strings
: HashMap
<(u64, u8), Box
<[u8]>>,
32 impl<R
: Read
+ Seek
> ReadCache
<R
> {
33 /// Create an empty `ReadCache` for the given stream.
34 pub fn new(read
: R
) -> Self {
36 cache
: RefCell
::new(ReadCacheInternal
{
39 strings
: HashMap
::new(),
44 /// Return an implementation of `ReadRef` that restricts reads
45 /// to the given range of the stream.
46 pub fn range(&self, offset
: u64, size
: u64) -> ReadCacheRange
<'_
, R
> {
54 /// Free buffers used by the cache.
55 pub fn clear(&mut self) {
56 self.cache
.borrow_mut().bufs
.clear();
59 /// Unwrap this `ReadCache<R>`, returning the underlying reader.
60 pub fn into_inner(self) -> R
{
61 self.cache
.into_inner().read
65 impl<'a
, R
: Read
+ Seek
> ReadRef
<'a
> for &'a ReadCache
<R
> {
66 fn len(self) -> Result
<u64, ()> {
67 let cache
= &mut *self.cache
.borrow_mut();
68 cache
.read
.seek(SeekFrom
::End(0)).map_err(|_
| ())
71 fn read_bytes_at(self, offset
: u64, size
: u64) -> Result
<&'a
[u8], ()> {
75 let cache
= &mut *self.cache
.borrow_mut();
76 let buf
= match cache
.bufs
.entry((offset
, size
)) {
77 Entry
::Occupied(entry
) => entry
.into_mut(),
78 Entry
::Vacant(entry
) => {
79 let size
= size
.try_into().map_err(|_
| ())?
;
82 .seek(SeekFrom
::Start(offset
as u64))
84 let mut bytes
= vec
![0; size
].into_boxed_slice();
85 cache
.read
.read_exact(&mut bytes
).map_err(|_
| ())?
;
89 // Extend the lifetime to that of self.
90 // This is OK because we never mutate or remove entries.
91 Ok(unsafe { mem::transmute::<&[u8], &[u8]>(buf) }
)
94 fn read_bytes_at_until(self, range
: Range
<u64>, delimiter
: u8) -> Result
<&'a
[u8], ()> {
95 let cache
= &mut *self.cache
.borrow_mut();
96 let buf
= match cache
.strings
.entry((range
.start
, delimiter
)) {
97 Entry
::Occupied(entry
) => entry
.into_mut(),
98 Entry
::Vacant(entry
) => {
101 .seek(SeekFrom
::Start(range
.start
))
104 let max_check
: usize = (range
.end
- range
.start
).try_into().map_err(|_
| ())?
;
105 // Strings should be relatively small.
106 // TODO: make this configurable?
107 let max_check
= max_check
.min(4096);
109 let mut bytes
= Vec
::new();
112 bytes
.resize((checked
+ 256).min(max_check
), 0);
113 let read
= cache
.read
.read(&mut bytes
[checked
..]).map_err(|_
| ())?
;
117 if let Some(len
) = memchr
::memchr(delimiter
, &bytes
[checked
..][..read
]) {
118 bytes
.truncate(checked
+ len
);
119 break entry
.insert(bytes
.into_boxed_slice());
122 if checked
>= max_check
{
128 // Extend the lifetime to that of self.
129 // This is OK because we never mutate or remove entries.
130 Ok(unsafe { mem::transmute::<&[u8], &[u8]>(buf) }
)
134 /// An implementation of `ReadRef` for a range of data in a stream that
135 /// implements `Read + Seek`.
137 /// Shares an underlying `ReadCache` with a lifetime of `'a`.
139 pub struct ReadCacheRange
<'a
, R
: Read
+ Seek
> {
145 impl<'a
, R
: Read
+ Seek
> Clone
for ReadCacheRange
<'a
, R
> {
146 fn clone(&self) -> Self {
155 impl<'a
, R
: Read
+ Seek
> Copy
for ReadCacheRange
<'a
, R
> {}
157 impl<'a
, R
: Read
+ Seek
> ReadRef
<'a
> for ReadCacheRange
<'a
, R
> {
158 fn len(self) -> Result
<u64, ()> {
162 fn read_bytes_at(self, offset
: u64, size
: u64) -> Result
<&'a
[u8], ()> {
166 let end
= offset
.checked_add(size
).ok_or(())?
;
170 let r_offset
= self.offset
.checked_add(offset
).ok_or(())?
;
171 self.r
.read_bytes_at(r_offset
, size
)
174 fn read_bytes_at_until(self, range
: Range
<u64>, delimiter
: u8) -> Result
<&'a
[u8], ()> {
175 let r_start
= self.offset
.checked_add(range
.start
).ok_or(())?
;
176 let r_end
= self.offset
.checked_add(range
.end
).ok_or(())?
;
177 let bytes
= self.r
.read_bytes_at_until(r_start
..r_end
, delimiter
)?
;
178 let size
= bytes
.len().try_into().map_err(|_
| ())?
;
179 let end
= range
.start
.checked_add(size
).ok_or(())?
;