use std::io;
-use bstr::BStr;
-use bstring::BString;
+use ext_slice::ByteSlice;
+use ext_vec::ByteVec;
/// An extention trait for
/// [`std::io::BufRead`](https://doc.rust-lang.org/std/io/trait.BufRead.html)
/// Returns an iterator over the lines of this reader, where each line
/// is represented as a byte string.
///
- /// Each item yielded by this iterator is a `io::Result<BString>`, where
+ /// Each item yielded by this iterator is a `io::Result<Vec<u8>>`, where
/// an error is yielded if there was a problem reading from the underlying
/// reader.
///
/// lines.push(line);
/// }
/// assert_eq!(lines.len(), 3);
- /// assert_eq!(lines[0], "lorem");
- /// assert_eq!(lines[1], "ipsum");
- /// assert_eq!(lines[2], "dolor");
+ /// assert_eq!(lines[0], "lorem".as_bytes());
+ /// assert_eq!(lines[1], "ipsum".as_bytes());
+ /// assert_eq!(lines[2], "dolor".as_bytes());
/// # Ok(()) }; example().unwrap()
/// ```
- fn byte_lines(self) -> ByteLines<Self> where Self: Sized {
+ fn byte_lines(self) -> ByteLines<Self>
+ where
+ Self: Sized,
+ {
ByteLines { buf: self }
}
+ /// Returns an iterator over byte-terminated records of this reader, where
+ /// each record is represented as a byte string.
+ ///
+ /// Each item yielded by this iterator is a `io::Result<Vec<u8>>`, where
+ /// an error is yielded if there was a problem reading from the underlying
+ /// reader.
+ ///
+ /// On success, the next record in the iterator is returned. The record
+ /// does *not* contain its trailing terminator.
+ ///
+ /// Note that calling `byte_records(b'\n')` differs from `byte_lines()` in
+ /// that it has no special handling for `\r`.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use std::io;
+ ///
+ /// use bstr::io::BufReadExt;
+ ///
+ /// # fn example() -> Result<(), io::Error> {
+ /// let cursor = io::Cursor::new(b"lorem\x00ipsum\x00dolor");
+ ///
+ /// let mut records = vec![];
+ /// for result in cursor.byte_records(b'\x00') {
+ /// let record = result?;
+ /// records.push(record);
+ /// }
+ /// assert_eq!(records.len(), 3);
+ /// assert_eq!(records[0], "lorem".as_bytes());
+ /// assert_eq!(records[1], "ipsum".as_bytes());
+ /// assert_eq!(records[2], "dolor".as_bytes());
+ /// # Ok(()) }; example().unwrap()
+ /// ```
+ fn byte_records(self, terminator: u8) -> ByteRecords<Self>
+ where
+ Self: Sized,
+ {
+ ByteRecords { terminator, buf: self }
+ }
+
/// Executes the given closure on each line in the underlying reader.
///
/// If the closure returns an error (or if the underlying reader returns an
///
/// let mut lines = vec![];
/// cursor.for_byte_line(|line| {
- /// lines.push(line.to_bstring());
+ /// lines.push(line.to_vec());
/// Ok(true)
/// })?;
/// assert_eq!(lines.len(), 3);
- /// assert_eq!(lines[0], "lorem");
- /// assert_eq!(lines[1], "ipsum");
- /// assert_eq!(lines[2], "dolor");
+ /// assert_eq!(lines[0], "lorem".as_bytes());
+ /// assert_eq!(lines[1], "ipsum".as_bytes());
+ /// assert_eq!(lines[2], "dolor".as_bytes());
/// # Ok(()) }; example().unwrap()
/// ```
- fn for_byte_line<F>(
- mut self,
- mut for_each_line: F,
+ fn for_byte_line<F>(self, mut for_each_line: F) -> io::Result<()>
+ where
+ Self: Sized,
+ F: FnMut(&[u8]) -> io::Result<bool>,
+ {
+ self.for_byte_line_with_terminator(|line| {
+ for_each_line(&trim_line_slice(&line))
+ })
+ }
+
+ /// Executes the given closure on each byte-terminated record in the
+ /// underlying reader.
+ ///
+ /// If the closure returns an error (or if the underlying reader returns an
+ /// error), then iteration is stopped and the error is returned. If false
+ /// is returned, then iteration is stopped and no error is returned.
+ ///
+ /// The closure given is called on exactly the same values as yielded by
+ /// the [`byte_records`](trait.BufReadExt.html#method.byte_records)
+ /// iterator. Namely, records do _not_ contain a trailing terminator byte.
+ ///
+ /// This routine is useful for iterating over records as quickly as
+ /// possible. Namely, a single allocation is reused for each record.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use std::io;
+ ///
+ /// use bstr::io::BufReadExt;
+ ///
+ /// # fn example() -> Result<(), io::Error> {
+ /// let cursor = io::Cursor::new(b"lorem\x00ipsum\x00dolor");
+ ///
+ /// let mut records = vec![];
+ /// cursor.for_byte_record(b'\x00', |record| {
+ /// records.push(record.to_vec());
+ /// Ok(true)
+ /// })?;
+ /// assert_eq!(records.len(), 3);
+ /// assert_eq!(records[0], "lorem".as_bytes());
+ /// assert_eq!(records[1], "ipsum".as_bytes());
+ /// assert_eq!(records[2], "dolor".as_bytes());
+ /// # Ok(()) }; example().unwrap()
+ /// ```
+ fn for_byte_record<F>(
+ self,
+ terminator: u8,
+ mut for_each_record: F,
) -> io::Result<()>
- where Self: Sized,
- F: FnMut(&BStr) -> io::Result<bool>
+ where
+ Self: Sized,
+ F: FnMut(&[u8]) -> io::Result<bool>,
{
- let mut bytes = BString::new();
- while self.read_until(b'\n', bytes.as_mut_vec())? > 0 {
- trim_line(&mut bytes);
- if !for_each_line(&bytes)? {
- break;
- }
- bytes.clear();
- }
- Ok(())
+ self.for_byte_record_with_terminator(terminator, |chunk| {
+ for_each_record(&trim_record_slice(&chunk, terminator))
+ })
}
/// Executes the given closure on each line in the underlying reader.
/// This routine is useful for iterating over lines as quickly as
/// possible. Namely, a single allocation is reused for each line.
///
+ /// This is identical to `for_byte_record_with_terminator` with a
+ /// terminator of `\n`.
+ ///
/// # Examples
///
/// Basic usage:
///
/// let mut lines = vec![];
/// cursor.for_byte_line_with_terminator(|line| {
- /// lines.push(line.to_bstring());
+ /// lines.push(line.to_vec());
/// Ok(true)
/// })?;
/// assert_eq!(lines.len(), 3);
- /// assert_eq!(lines[0], "lorem\n");
- /// assert_eq!(lines[1], "ipsum\r\n");
- /// assert_eq!(lines[2], "dolor");
+ /// assert_eq!(lines[0], "lorem\n".as_bytes());
+ /// assert_eq!(lines[1], "ipsum\r\n".as_bytes());
+ /// assert_eq!(lines[2], "dolor".as_bytes());
/// # Ok(()) }; example().unwrap()
/// ```
fn for_byte_line_with_terminator<F>(
+ self,
+ for_each_line: F,
+ ) -> io::Result<()>
+ where
+ Self: Sized,
+ F: FnMut(&[u8]) -> io::Result<bool>,
+ {
+ self.for_byte_record_with_terminator(b'\n', for_each_line)
+ }
+
+ /// Executes the given closure on each byte-terminated record in the
+ /// underlying reader.
+ ///
+ /// If the closure returns an error (or if the underlying reader returns an
+ /// error), then iteration is stopped and the error is returned. If false
+ /// is returned, then iteration is stopped and no error is returned.
+ ///
+ /// Unlike
+ /// [`for_byte_record`](trait.BufReadExt.html#method.for_byte_record),
+ /// the lines given to the closure *do* include the record terminator, if
+ /// one exists.
+ ///
+ /// This routine is useful for iterating over records as quickly as
+ /// possible. Namely, a single allocation is reused for each record.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use std::io;
+ ///
+ /// use bstr::B;
+ /// use bstr::io::BufReadExt;
+ ///
+ /// # fn example() -> Result<(), io::Error> {
+ /// let cursor = io::Cursor::new(b"lorem\x00ipsum\x00dolor");
+ ///
+ /// let mut records = vec![];
+ /// cursor.for_byte_record_with_terminator(b'\x00', |record| {
+ /// records.push(record.to_vec());
+ /// Ok(true)
+ /// })?;
+ /// assert_eq!(records.len(), 3);
+ /// assert_eq!(records[0], B(b"lorem\x00"));
+ /// assert_eq!(records[1], B("ipsum\x00"));
+ /// assert_eq!(records[2], B("dolor"));
+ /// # Ok(()) }; example().unwrap()
+ /// ```
+ fn for_byte_record_with_terminator<F>(
mut self,
- mut for_each_line: F,
+ terminator: u8,
+ mut for_each_record: F,
) -> io::Result<()>
- where Self: Sized,
- F: FnMut(&BStr) -> io::Result<bool>
+ where
+ Self: Sized,
+ F: FnMut(&[u8]) -> io::Result<bool>,
{
- let mut bytes = BString::new();
- while self.read_until(b'\n', bytes.as_mut_vec())? > 0 {
- if !for_each_line(&bytes)? {
+ let mut bytes = vec![];
+ let mut res = Ok(());
+ let mut consumed = 0;
+ 'outer: loop {
+ // Lend out complete record slices from our buffer
+ {
+ let mut buf = self.fill_buf()?;
+ while let Some(index) = buf.find_byte(terminator) {
+ let (record, rest) = buf.split_at(index + 1);
+ buf = rest;
+ consumed += record.len();
+ match for_each_record(&record) {
+ Ok(false) => break 'outer,
+ Err(err) => {
+ res = Err(err);
+ break 'outer;
+ }
+ _ => (),
+ }
+ }
+
+ // Copy the final record fragment to our local buffer. This
+ // saves read_until() from re-scanning a buffer we know
+ // contains no remaining terminators.
+ bytes.extend_from_slice(&buf);
+ consumed += buf.len();
+ }
+
+ self.consume(consumed);
+ consumed = 0;
+
+ // N.B. read_until uses a different version of memchr that may
+ // be slower than the memchr crate that bstr uses. However, this
+ // should only run for a fairly small number of records, assuming a
+ // decent buffer size.
+ self.read_until(terminator, &mut bytes)?;
+ if bytes.is_empty() || !for_each_record(&bytes)? {
break;
}
bytes.clear();
}
- Ok(())
+ self.consume(consumed);
+ res
}
}
buf: B,
}
+/// An iterator over records from an instance of
+/// [`std::io::BufRead`](https://doc.rust-lang.org/std/io/trait.BufRead.html).
+///
+/// A byte record is any sequence of bytes terminated by a particular byte
+/// chosen by the caller. For example, NUL separated byte strings are said to
+/// be NUL-terminated byte records.
+///
+/// This iterator is generally created by calling the
+/// [`byte_records`](trait.BufReadExt.html#method.byte_records)
+/// method on the
+/// [`BufReadExt`](trait.BufReadExt.html)
+/// trait.
+#[derive(Debug)]
+pub struct ByteRecords<B> {
+ buf: B,
+ terminator: u8,
+}
+
impl<B: io::BufRead> Iterator for ByteLines<B> {
- type Item = io::Result<BString>;
+ type Item = io::Result<Vec<u8>>;
- fn next(&mut self) -> Option<io::Result<BString>> {
- let mut bytes = BString::new();
- match self.buf.read_until(b'\n', bytes.as_mut_vec()) {
+ fn next(&mut self) -> Option<io::Result<Vec<u8>>> {
+ let mut bytes = vec![];
+ match self.buf.read_until(b'\n', &mut bytes) {
Err(e) => Some(Err(e)),
Ok(0) => None,
Ok(_) => {
}
}
-fn trim_line(line: &mut BString) {
- if line.last() == Some(b'\n') {
+impl<B: io::BufRead> Iterator for ByteRecords<B> {
+ type Item = io::Result<Vec<u8>>;
+
+ fn next(&mut self) -> Option<io::Result<Vec<u8>>> {
+ let mut bytes = vec![];
+ match self.buf.read_until(self.terminator, &mut bytes) {
+ Err(e) => Some(Err(e)),
+ Ok(0) => None,
+ Ok(_) => {
+ trim_record(&mut bytes, self.terminator);
+ Some(Ok(bytes))
+ }
+ }
+ }
+}
+
+fn trim_line(line: &mut Vec<u8>) {
+ if line.last_byte() == Some(b'\n') {
line.pop_byte();
- if line.last() == Some(b'\r') {
+ if line.last_byte() == Some(b'\r') {
line.pop_byte();
}
}
}
+
+fn trim_line_slice(mut line: &[u8]) -> &[u8] {
+ if line.last_byte() == Some(b'\n') {
+ line = &line[..line.len() - 1];
+ if line.last_byte() == Some(b'\r') {
+ line = &line[..line.len() - 1];
+ }
+ }
+ line
+}
+
+fn trim_record(record: &mut Vec<u8>, terminator: u8) {
+ if record.last_byte() == Some(terminator) {
+ record.pop_byte();
+ }
+}
+
+fn trim_record_slice(mut record: &[u8], terminator: u8) -> &[u8] {
+ if record.last_byte() == Some(terminator) {
+ record = &record[..record.len() - 1];
+ }
+ record
+}
+
+#[cfg(test)]
+mod tests {
+ use super::BufReadExt;
+ use bstring::BString;
+
+ fn collect_lines<B: AsRef<[u8]>>(slice: B) -> Vec<BString> {
+ let mut lines = vec![];
+ slice
+ .as_ref()
+ .for_byte_line(|line| {
+ lines.push(BString::from(line.to_vec()));
+ Ok(true)
+ })
+ .unwrap();
+ lines
+ }
+
+ fn collect_lines_term<B: AsRef<[u8]>>(slice: B) -> Vec<BString> {
+ let mut lines = vec![];
+ slice
+ .as_ref()
+ .for_byte_line_with_terminator(|line| {
+ lines.push(BString::from(line.to_vec()));
+ Ok(true)
+ })
+ .unwrap();
+ lines
+ }
+
+ #[test]
+ fn lines_without_terminator() {
+ assert_eq!(collect_lines(""), Vec::<BString>::new());
+
+ assert_eq!(collect_lines("\n"), vec![""]);
+ assert_eq!(collect_lines("\n\n"), vec!["", ""]);
+ assert_eq!(collect_lines("a\nb\n"), vec!["a", "b"]);
+ assert_eq!(collect_lines("a\nb"), vec!["a", "b"]);
+ assert_eq!(collect_lines("abc\nxyz\n"), vec!["abc", "xyz"]);
+ assert_eq!(collect_lines("abc\nxyz"), vec!["abc", "xyz"]);
+
+ assert_eq!(collect_lines("\r\n"), vec![""]);
+ assert_eq!(collect_lines("\r\n\r\n"), vec!["", ""]);
+ assert_eq!(collect_lines("a\r\nb\r\n"), vec!["a", "b"]);
+ assert_eq!(collect_lines("a\r\nb"), vec!["a", "b"]);
+ assert_eq!(collect_lines("abc\r\nxyz\r\n"), vec!["abc", "xyz"]);
+ assert_eq!(collect_lines("abc\r\nxyz"), vec!["abc", "xyz"]);
+
+ assert_eq!(collect_lines("abc\rxyz"), vec!["abc\rxyz"]);
+ }
+
+ #[test]
+ fn lines_with_terminator() {
+ assert_eq!(collect_lines_term(""), Vec::<BString>::new());
+
+ assert_eq!(collect_lines_term("\n"), vec!["\n"]);
+ assert_eq!(collect_lines_term("\n\n"), vec!["\n", "\n"]);
+ assert_eq!(collect_lines_term("a\nb\n"), vec!["a\n", "b\n"]);
+ assert_eq!(collect_lines_term("a\nb"), vec!["a\n", "b"]);
+ assert_eq!(collect_lines_term("abc\nxyz\n"), vec!["abc\n", "xyz\n"]);
+ assert_eq!(collect_lines_term("abc\nxyz"), vec!["abc\n", "xyz"]);
+
+ assert_eq!(collect_lines_term("\r\n"), vec!["\r\n"]);
+ assert_eq!(collect_lines_term("\r\n\r\n"), vec!["\r\n", "\r\n"]);
+ assert_eq!(collect_lines_term("a\r\nb\r\n"), vec!["a\r\n", "b\r\n"]);
+ assert_eq!(collect_lines_term("a\r\nb"), vec!["a\r\n", "b"]);
+ assert_eq!(
+ collect_lines_term("abc\r\nxyz\r\n"),
+ vec!["abc\r\n", "xyz\r\n"]
+ );
+ assert_eq!(collect_lines_term("abc\r\nxyz"), vec!["abc\r\n", "xyz"]);
+
+ assert_eq!(collect_lines_term("abc\rxyz"), vec!["abc\rxyz"]);
+ }
+}