[rustc.git] / vendor / bstr / src / io.rs

/*!
Utilities for working with I/O using byte strings.

This module currently only exports a single trait, `BufReadExt`, which provides
facilities for conveniently and efficiently working with lines as byte strings.

More APIs may be added in the future.
*/

use std::io;

use ext_slice::ByteSlice;
use ext_vec::ByteVec;

/// An extention trait for
/// [`std::io::BufRead`](https://doc.rust-lang.org/std/io/trait.BufRead.html)
/// which provides convenience APIs for dealing with byte strings.
pub trait BufReadExt: io::BufRead {
    /// Returns an iterator over the lines of this reader, where each line
    /// is represented as a byte string.
    ///
    /// Each item yielded by this iterator is a `io::Result<Vec<u8>>`, where
    /// an error is yielded if there was a problem reading from the underlying
    /// reader.
    ///
    /// On success, the next line in the iterator is returned. The line does
    /// *not* contain a trailing `\n` or `\r\n`.
    ///
    /// # Examples
    ///
    /// Basic usage:
    ///
    /// ```
    /// use std::io;
    ///
    /// use bstr::io::BufReadExt;
    ///
    /// # fn example() -> Result<(), io::Error> {
    /// let cursor = io::Cursor::new(b"lorem\nipsum\r\ndolor");
    ///
    /// let mut lines = vec![];
    /// for result in cursor.byte_lines() {
    ///     let line = result?;
    ///     lines.push(line);
    /// }
    /// assert_eq!(lines.len(), 3);
    /// assert_eq!(lines[0], "lorem".as_bytes());
    /// assert_eq!(lines[1], "ipsum".as_bytes());
    /// assert_eq!(lines[2], "dolor".as_bytes());
    /// # Ok(()) }; example().unwrap()
    /// ```
    fn byte_lines(self) -> ByteLines<Self>
    where
        Self: Sized,
    {
        ByteLines { buf: self }
    }

    /// Returns an iterator over byte-terminated records of this reader, where
    /// each record is represented as a byte string.
    ///
    /// Each item yielded by this iterator is a `io::Result<Vec<u8>>`, where
    /// an error is yielded if there was a problem reading from the underlying
    /// reader.
    ///
    /// On success, the next record in the iterator is returned. The record
    /// does *not* contain its trailing terminator.
    ///
    /// Note that calling `byte_records(b'\n')` differs from `byte_lines()` in
    /// that it has no special handling for `\r`.
    ///
    /// # Examples
    ///
    /// Basic usage:
    ///
    /// ```
    /// use std::io;
    ///
    /// use bstr::io::BufReadExt;
    ///
    /// # fn example() -> Result<(), io::Error> {
    /// let cursor = io::Cursor::new(b"lorem\x00ipsum\x00dolor");
    ///
    /// let mut records = vec![];
    /// for result in cursor.byte_records(b'\x00') {
    ///     let record = result?;
    ///     records.push(record);
    /// }
    /// assert_eq!(records.len(), 3);
    /// assert_eq!(records[0], "lorem".as_bytes());
    /// assert_eq!(records[1], "ipsum".as_bytes());
    /// assert_eq!(records[2], "dolor".as_bytes());
    /// # Ok(()) }; example().unwrap()
    /// ```
    fn byte_records(self, terminator: u8) -> ByteRecords<Self>
    where
        Self: Sized,
    {
        ByteRecords { terminator, buf: self }
    }

    /// Executes the given closure on each line in the underlying reader.
    ///
    /// If the closure returns an error (or if the underlying reader returns an
    /// error), then iteration is stopped and the error is returned. If false
    /// is returned, then iteration is stopped and no error is returned.
    ///
    /// The closure given is called on exactly the same values as yielded by
    /// the [`byte_lines`](trait.BufReadExt.html#method.byte_lines)
    /// iterator. Namely, lines do _not_ contain trailing `\n` or `\r\n` bytes.
    ///
    /// This routine is useful for iterating over lines as quickly as
    /// possible. Namely, a single allocation is reused for each line.
    ///
    /// # Examples
    ///
    /// Basic usage:
    ///
    /// ```
    /// use std::io;
    ///
    /// use bstr::io::BufReadExt;
    ///
    /// # fn example() -> Result<(), io::Error> {
    /// let cursor = io::Cursor::new(b"lorem\nipsum\r\ndolor");
    ///
    /// let mut lines = vec![];
    /// cursor.for_byte_line(|line| {
    ///     lines.push(line.to_vec());
    ///     Ok(true)
    /// })?;
    /// assert_eq!(lines.len(), 3);
    /// assert_eq!(lines[0], "lorem".as_bytes());
    /// assert_eq!(lines[1], "ipsum".as_bytes());
    /// assert_eq!(lines[2], "dolor".as_bytes());
    /// # Ok(()) }; example().unwrap()
    /// ```
    fn for_byte_line<F>(self, mut for_each_line: F) -> io::Result<()>
    where
        Self: Sized,
        F: FnMut(&[u8]) -> io::Result<bool>,
    {
        self.for_byte_line_with_terminator(|line| {
            for_each_line(&trim_line_slice(&line))
        })
    }

    /// Executes the given closure on each byte-terminated record in the
    /// underlying reader.
    ///
    /// If the closure returns an error (or if the underlying reader returns an
    /// error), then iteration is stopped and the error is returned. If false
    /// is returned, then iteration is stopped and no error is returned.
    ///
    /// The closure given is called on exactly the same values as yielded by
    /// the [`byte_records`](trait.BufReadExt.html#method.byte_records)
    /// iterator. Namely, records do _not_ contain a trailing terminator byte.
    ///
    /// This routine is useful for iterating over records as quickly as
    /// possible. Namely, a single allocation is reused for each record.
    ///
    /// # Examples
    ///
    /// Basic usage:
    ///
    /// ```
    /// use std::io;
    ///
    /// use bstr::io::BufReadExt;
    ///
    /// # fn example() -> Result<(), io::Error> {
    /// let cursor = io::Cursor::new(b"lorem\x00ipsum\x00dolor");
    ///
    /// let mut records = vec![];
    /// cursor.for_byte_record(b'\x00', |record| {
    ///     records.push(record.to_vec());
    ///     Ok(true)
    /// })?;
    /// assert_eq!(records.len(), 3);
    /// assert_eq!(records[0], "lorem".as_bytes());
    /// assert_eq!(records[1], "ipsum".as_bytes());
    /// assert_eq!(records[2], "dolor".as_bytes());
    /// # Ok(()) }; example().unwrap()
    /// ```
    fn for_byte_record<F>(
        self,
        terminator: u8,
        mut for_each_record: F,
    ) -> io::Result<()>
    where
        Self: Sized,
        F: FnMut(&[u8]) -> io::Result<bool>,
    {
        self.for_byte_record_with_terminator(terminator, |chunk| {
            for_each_record(&trim_record_slice(&chunk, terminator))
        })
    }

    /// Executes the given closure on each line in the underlying reader.
    ///
    /// If the closure returns an error (or if the underlying reader returns an
    /// error), then iteration is stopped and the error is returned. If false
    /// is returned, then iteration is stopped and no error is returned.
    ///
    /// Unlike
    /// [`for_byte_line`](trait.BufReadExt.html#method.for_byte_line),
    /// the lines given to the closure *do* include the line terminator, if one
    /// exists.
    ///
    /// This routine is useful for iterating over lines as quickly as
    /// possible. Namely, a single allocation is reused for each line.
    ///
    /// This is identical to `for_byte_record_with_terminator` with a
    /// terminator of `\n`.
    ///
    /// # Examples
    ///
    /// Basic usage:
    ///
    /// ```
    /// use std::io;
    ///
    /// use bstr::io::BufReadExt;
    ///
    /// # fn example() -> Result<(), io::Error> {
    /// let cursor = io::Cursor::new(b"lorem\nipsum\r\ndolor");
    ///
    /// let mut lines = vec![];
    /// cursor.for_byte_line_with_terminator(|line| {
    ///     lines.push(line.to_vec());
    ///     Ok(true)
    /// })?;
    /// assert_eq!(lines.len(), 3);
    /// assert_eq!(lines[0], "lorem\n".as_bytes());
    /// assert_eq!(lines[1], "ipsum\r\n".as_bytes());
    /// assert_eq!(lines[2], "dolor".as_bytes());
    /// # Ok(()) }; example().unwrap()
    /// ```
    fn for_byte_line_with_terminator<F>(
        self,
        for_each_line: F,
    ) -> io::Result<()>
    where
        Self: Sized,
        F: FnMut(&[u8]) -> io::Result<bool>,
    {
        self.for_byte_record_with_terminator(b'\n', for_each_line)
    }

    /// Executes the given closure on each byte-terminated record in the
    /// underlying reader.
    ///
    /// If the closure returns an error (or if the underlying reader returns an
    /// error), then iteration is stopped and the error is returned. If false
    /// is returned, then iteration is stopped and no error is returned.
    ///
    /// Unlike
    /// [`for_byte_record`](trait.BufReadExt.html#method.for_byte_record),
    /// the lines given to the closure *do* include the record terminator, if
    /// one exists.
    ///
    /// This routine is useful for iterating over records as quickly as
    /// possible. Namely, a single allocation is reused for each record.
    ///
    /// # Examples
    ///
    /// Basic usage:
    ///
    /// ```
    /// use std::io;
    ///
    /// use bstr::B;
    /// use bstr::io::BufReadExt;
    ///
    /// # fn example() -> Result<(), io::Error> {
    /// let cursor = io::Cursor::new(b"lorem\x00ipsum\x00dolor");
    ///
    /// let mut records = vec![];
    /// cursor.for_byte_record_with_terminator(b'\x00', |record| {
    ///     records.push(record.to_vec());
    ///     Ok(true)
    /// })?;
    /// assert_eq!(records.len(), 3);
    /// assert_eq!(records[0], B(b"lorem\x00"));
    /// assert_eq!(records[1], B("ipsum\x00"));
    /// assert_eq!(records[2], B("dolor"));
    /// # Ok(()) }; example().unwrap()
    /// ```
    fn for_byte_record_with_terminator<F>(
        mut self,
        terminator: u8,
        mut for_each_record: F,
    ) -> io::Result<()>
    where
        Self: Sized,
        F: FnMut(&[u8]) -> io::Result<bool>,
    {
        let mut bytes = vec![];
        let mut res = Ok(());
        let mut consumed = 0;
        'outer: loop {
            // Lend out complete record slices from our buffer
            {
                let mut buf = self.fill_buf()?;
                while let Some(index) = buf.find_byte(terminator) {
                    let (record, rest) = buf.split_at(index + 1);
                    buf = rest;
                    consumed += record.len();
                    match for_each_record(&record) {
                        Ok(false) => break 'outer,
                        Err(err) => {
                            res = Err(err);
                            break 'outer;
                        }
                        _ => (),
                    }
                }

                // Copy the final record fragment to our local buffer. This
                // saves read_until() from re-scanning a buffer we know
                // contains no remaining terminators.
                bytes.extend_from_slice(&buf);
                consumed += buf.len();
            }

            self.consume(consumed);
            consumed = 0;

            // N.B. read_until uses a different version of memchr that may
            // be slower than the memchr crate that bstr uses. However, this
            // should only run for a fairly small number of records, assuming a
            // decent buffer size.
            self.read_until(terminator, &mut bytes)?;
            if bytes.is_empty() || !for_each_record(&bytes)? {
                break;
            }
            bytes.clear();
        }
        self.consume(consumed);
        res
    }
}

impl<B: io::BufRead> BufReadExt for B {}

/// An iterator over lines from an instance of
/// [`std::io::BufRead`](https://doc.rust-lang.org/std/io/trait.BufRead.html).
///
/// This iterator is generally created by calling the
/// [`byte_lines`](trait.BufReadExt.html#method.byte_lines)
/// method on the
/// [`BufReadExt`](trait.BufReadExt.html)
/// trait.
#[derive(Debug)]
pub struct ByteLines<B> {
    buf: B,
}

/// An iterator over records from an instance of
/// [`std::io::BufRead`](https://doc.rust-lang.org/std/io/trait.BufRead.html).
///
/// A byte record is any sequence of bytes terminated by a particular byte
/// chosen by the caller. For example, NUL separated byte strings are said to
/// be NUL-terminated byte records.
///
/// This iterator is generally created by calling the
/// [`byte_records`](trait.BufReadExt.html#method.byte_records)
/// method on the
/// [`BufReadExt`](trait.BufReadExt.html)
/// trait.
#[derive(Debug)]
pub struct ByteRecords<B> {
    buf: B,
    terminator: u8,
}

impl<B: io::BufRead> Iterator for ByteLines<B> {
    type Item = io::Result<Vec<u8>>;

    fn next(&mut self) -> Option<io::Result<Vec<u8>>> {
        let mut bytes = vec![];
        match self.buf.read_until(b'\n', &mut bytes) {
            Err(e) => Some(Err(e)),
            Ok(0) => None,
            Ok(_) => {
                trim_line(&mut bytes);
                Some(Ok(bytes))
            }
        }
    }
}

impl<B: io::BufRead> Iterator for ByteRecords<B> {
    type Item = io::Result<Vec<u8>>;

    fn next(&mut self) -> Option<io::Result<Vec<u8>>> {
        let mut bytes = vec![];
        match self.buf.read_until(self.terminator, &mut bytes) {
            Err(e) => Some(Err(e)),
            Ok(0) => None,
            Ok(_) => {
                trim_record(&mut bytes, self.terminator);
                Some(Ok(bytes))
            }
        }
    }
}

fn trim_line(line: &mut Vec<u8>) {
    if line.last_byte() == Some(b'\n') {
        line.pop_byte();
        if line.last_byte() == Some(b'\r') {
            line.pop_byte();
        }
    }
}

fn trim_line_slice(mut line: &[u8]) -> &[u8] {
    if line.last_byte() == Some(b'\n') {
        line = &line[..line.len() - 1];
        if line.last_byte() == Some(b'\r') {
            line = &line[..line.len() - 1];
        }
    }
    line
}

fn trim_record(record: &mut Vec<u8>, terminator: u8) {
    if record.last_byte() == Some(terminator) {
        record.pop_byte();
    }
}

fn trim_record_slice(mut record: &[u8], terminator: u8) -> &[u8] {
    if record.last_byte() == Some(terminator) {
        record = &record[..record.len() - 1];
    }
    record
}

#[cfg(test)]
mod tests {
    use super::BufReadExt;
    use bstring::BString;

    fn collect_lines<B: AsRef<[u8]>>(slice: B) -> Vec<BString> {
        let mut lines = vec![];
        slice
            .as_ref()
            .for_byte_line(|line| {
                lines.push(BString::from(line.to_vec()));
                Ok(true)
            })
            .unwrap();
        lines
    }

    fn collect_lines_term<B: AsRef<[u8]>>(slice: B) -> Vec<BString> {
        let mut lines = vec![];
        slice
            .as_ref()
            .for_byte_line_with_terminator(|line| {
                lines.push(BString::from(line.to_vec()));
                Ok(true)
            })
            .unwrap();
        lines
    }

    #[test]
    fn lines_without_terminator() {
        assert_eq!(collect_lines(""), Vec::<BString>::new());

        assert_eq!(collect_lines("\n"), vec![""]);
        assert_eq!(collect_lines("\n\n"), vec!["", ""]);
        assert_eq!(collect_lines("a\nb\n"), vec!["a", "b"]);
        assert_eq!(collect_lines("a\nb"), vec!["a", "b"]);
        assert_eq!(collect_lines("abc\nxyz\n"), vec!["abc", "xyz"]);
        assert_eq!(collect_lines("abc\nxyz"), vec!["abc", "xyz"]);

        assert_eq!(collect_lines("\r\n"), vec![""]);
        assert_eq!(collect_lines("\r\n\r\n"), vec!["", ""]);
        assert_eq!(collect_lines("a\r\nb\r\n"), vec!["a", "b"]);
        assert_eq!(collect_lines("a\r\nb"), vec!["a", "b"]);
        assert_eq!(collect_lines("abc\r\nxyz\r\n"), vec!["abc", "xyz"]);
        assert_eq!(collect_lines("abc\r\nxyz"), vec!["abc", "xyz"]);

        assert_eq!(collect_lines("abc\rxyz"), vec!["abc\rxyz"]);
    }

    #[test]
    fn lines_with_terminator() {
        assert_eq!(collect_lines_term(""), Vec::<BString>::new());

        assert_eq!(collect_lines_term("\n"), vec!["\n"]);
        assert_eq!(collect_lines_term("\n\n"), vec!["\n", "\n"]);
        assert_eq!(collect_lines_term("a\nb\n"), vec!["a\n", "b\n"]);
        assert_eq!(collect_lines_term("a\nb"), vec!["a\n", "b"]);
        assert_eq!(collect_lines_term("abc\nxyz\n"), vec!["abc\n", "xyz\n"]);
        assert_eq!(collect_lines_term("abc\nxyz"), vec!["abc\n", "xyz"]);

        assert_eq!(collect_lines_term("\r\n"), vec!["\r\n"]);
        assert_eq!(collect_lines_term("\r\n\r\n"), vec!["\r\n", "\r\n"]);
        assert_eq!(collect_lines_term("a\r\nb\r\n"), vec!["a\r\n", "b\r\n"]);
        assert_eq!(collect_lines_term("a\r\nb"), vec!["a\r\n", "b"]);
        assert_eq!(
            collect_lines_term("abc\r\nxyz\r\n"),
            vec!["abc\r\n", "xyz\r\n"]
        );
        assert_eq!(collect_lines_term("abc\r\nxyz"), vec!["abc\r\n", "xyz"]);

        assert_eq!(collect_lines_term("abc\rxyz"), vec!["abc\rxyz"]);
    }
}
Commit	Line	Data
dfeec247 XL	1	/*!
	2	Utilities for working with I/O using byte strings.
	3
	4	This module currently only exports a single trait, `BufReadExt`, which provides
	5	facilities for conveniently and efficiently working with lines as byte strings.
	6
	7	More APIs may be added in the future.
	8	*/
	9
	10	use std::io;
	11
f035d41b XL	12	use ext_slice::ByteSlice;
f035d41b XL	13	use ext_vec::ByteVec;
dfeec247 XL	14
	15	/// An extention trait for
	16	/// [`std::io::BufRead`](https://doc.rust-lang.org/std/io/trait.BufRead.html)
	17	/// which provides convenience APIs for dealing with byte strings.
	18	pub trait BufReadExt: io::BufRead {
	19	/// Returns an iterator over the lines of this reader, where each line
	20	/// is represented as a byte string.
	21	///
f035d41b	22	/// Each item yielded by this iterator is a `io::Result<Vec<u8>>`, where
dfeec247 XL	23	/// an error is yielded if there was a problem reading from the underlying
	24	/// reader.
	25	///
	26	/// On success, the next line in the iterator is returned. The line does
	27	/// not contain a trailing `\n` or `\r\n`.
	28	///
	29	/// # Examples
	30	///
	31	/// Basic usage:
	32	///
	33	/// ```
	34	/// use std::io;
	35	///
	36	/// use bstr::io::BufReadExt;
	37	///
	38	/// # fn example() -> Result<(), io::Error> {
	39	/// let cursor = io::Cursor::new(b"lorem\nipsum\r\ndolor");
	40	///
	41	/// let mut lines = vec![];
	42	/// for result in cursor.byte_lines() {
	43	/// let line = result?;
	44	/// lines.push(line);
	45	/// }
	46	/// assert_eq!(lines.len(), 3);
f035d41b XL	47	/// assert_eq!(lines[0], "lorem".as_bytes());
	48	/// assert_eq!(lines[1], "ipsum".as_bytes());
	49	/// assert_eq!(lines[2], "dolor".as_bytes());
dfeec247 XL	50	/// # Ok(()) }; example().unwrap()
dfeec247 XL	51	/// ```
f035d41b XL	52	fn byte_lines(self) -> ByteLines<Self>
	53	where
	54	Self: Sized,
	55	{
dfeec247 XL	56	ByteLines { buf: self }
	57	}
	58
f035d41b XL	59	/// Returns an iterator over byte-terminated records of this reader, where
	60	/// each record is represented as a byte string.
	61	///
	62	/// Each item yielded by this iterator is a `io::Result<Vec<u8>>`, where
	63	/// an error is yielded if there was a problem reading from the underlying
	64	/// reader.
	65	///
	66	/// On success, the next record in the iterator is returned. The record
	67	/// does not contain its trailing terminator.
	68	///
	69	/// Note that calling `byte_records(b'\n')` differs from `byte_lines()` in
	70	/// that it has no special handling for `\r`.
	71	///
	72	/// # Examples
	73	///
	74	/// Basic usage:
	75	///
	76	/// ```
	77	/// use std::io;
	78	///
	79	/// use bstr::io::BufReadExt;
	80	///
	81	/// # fn example() -> Result<(), io::Error> {
	82	/// let cursor = io::Cursor::new(b"lorem\x00ipsum\x00dolor");
	83	///
	84	/// let mut records = vec![];
	85	/// for result in cursor.byte_records(b'\x00') {
	86	/// let record = result?;
	87	/// records.push(record);
	88	/// }
	89	/// assert_eq!(records.len(), 3);
	90	/// assert_eq!(records[0], "lorem".as_bytes());
	91	/// assert_eq!(records[1], "ipsum".as_bytes());
	92	/// assert_eq!(records[2], "dolor".as_bytes());
	93	/// # Ok(()) }; example().unwrap()
	94	/// ```
	95	fn byte_records(self, terminator: u8) -> ByteRecords<Self>
	96	where
	97	Self: Sized,
	98	{
	99	ByteRecords { terminator, buf: self }
	100	}
	101
dfeec247 XL	102	/// Executes the given closure on each line in the underlying reader.
	103	///
	104	/// If the closure returns an error (or if the underlying reader returns an
	105	/// error), then iteration is stopped and the error is returned. If false
	106	/// is returned, then iteration is stopped and no error is returned.
	107	///
	108	/// The closure given is called on exactly the same values as yielded by
	109	/// the [`byte_lines`](trait.BufReadExt.html#method.byte_lines)
	110	/// iterator. Namely, lines do _not_ contain trailing `\n` or `\r\n` bytes.
	111	///
	112	/// This routine is useful for iterating over lines as quickly as
	113	/// possible. Namely, a single allocation is reused for each line.
	114	///
	115	/// # Examples
	116	///
	117	/// Basic usage:
	118	///
	119	/// ```
	120	/// use std::io;
	121	///
	122	/// use bstr::io::BufReadExt;
	123	///
	124	/// # fn example() -> Result<(), io::Error> {
	125	/// let cursor = io::Cursor::new(b"lorem\nipsum\r\ndolor");
	126	///
	127	/// let mut lines = vec![];
	128	/// cursor.for_byte_line(\|line\| {
f035d41b	129	/// lines.push(line.to_vec());
dfeec247 XL	130	/// Ok(true)
	131	/// })?;
	132	/// assert_eq!(lines.len(), 3);
f035d41b XL	133	/// assert_eq!(lines[0], "lorem".as_bytes());
	134	/// assert_eq!(lines[1], "ipsum".as_bytes());
	135	/// assert_eq!(lines[2], "dolor".as_bytes());
dfeec247 XL	136	/// # Ok(()) }; example().unwrap()
dfeec247 XL	137	/// ```
f035d41b XL	138	fn for_byte_line<F>(self, mut for_each_line: F) -> io::Result<()>
	139	where
	140	Self: Sized,
	141	F: FnMut(&[u8]) -> io::Result<bool>,
	142	{
	143	self.for_byte_line_with_terminator(\|line\| {
	144	for_each_line(&trim_line_slice(&line))
	145	})
	146	}
	147
	148	/// Executes the given closure on each byte-terminated record in the
	149	/// underlying reader.
	150	///
	151	/// If the closure returns an error (or if the underlying reader returns an
	152	/// error), then iteration is stopped and the error is returned. If false
	153	/// is returned, then iteration is stopped and no error is returned.
	154	///
	155	/// The closure given is called on exactly the same values as yielded by
	156	/// the [`byte_records`](trait.BufReadExt.html#method.byte_records)
	157	/// iterator. Namely, records do _not_ contain a trailing terminator byte.
	158	///
	159	/// This routine is useful for iterating over records as quickly as
	160	/// possible. Namely, a single allocation is reused for each record.
	161	///
	162	/// # Examples
	163	///
	164	/// Basic usage:
	165	///
	166	/// ```
	167	/// use std::io;
	168	///
	169	/// use bstr::io::BufReadExt;
	170	///
	171	/// # fn example() -> Result<(), io::Error> {
	172	/// let cursor = io::Cursor::new(b"lorem\x00ipsum\x00dolor");
	173	///
	174	/// let mut records = vec![];
	175	/// cursor.for_byte_record(b'\x00', \|record\| {
	176	/// records.push(record.to_vec());
	177	/// Ok(true)
	178	/// })?;
	179	/// assert_eq!(records.len(), 3);
	180	/// assert_eq!(records[0], "lorem".as_bytes());
	181	/// assert_eq!(records[1], "ipsum".as_bytes());
	182	/// assert_eq!(records[2], "dolor".as_bytes());
	183	/// # Ok(()) }; example().unwrap()
	184	/// ```
	185	fn for_byte_record<F>(
	186	self,
	187	terminator: u8,
	188	mut for_each_record: F,
dfeec247	189	) -> io::Result<()>
f035d41b XL	190	where
	191	Self: Sized,
	192	F: FnMut(&[u8]) -> io::Result<bool>,
dfeec247	193	{
f035d41b XL	194	self.for_byte_record_with_terminator(terminator, \|chunk\| {
	195	for_each_record(&trim_record_slice(&chunk, terminator))
	196	})
dfeec247 XL	197	}
	198
	199	/// Executes the given closure on each line in the underlying reader.
	200	///
	201	/// If the closure returns an error (or if the underlying reader returns an
	202	/// error), then iteration is stopped and the error is returned. If false
	203	/// is returned, then iteration is stopped and no error is returned.
	204	///
	205	/// Unlike
	206	/// [`for_byte_line`](trait.BufReadExt.html#method.for_byte_line),
	207	/// the lines given to the closure do include the line terminator, if one
	208	/// exists.
	209	///
	210	/// This routine is useful for iterating over lines as quickly as
	211	/// possible. Namely, a single allocation is reused for each line.
	212	///
f035d41b XL	213	/// This is identical to `for_byte_record_with_terminator` with a
	214	/// terminator of `\n`.
	215	///
dfeec247 XL	216	/// # Examples
	217	///
	218	/// Basic usage:
	219	///
	220	/// ```
	221	/// use std::io;
	222	///
	223	/// use bstr::io::BufReadExt;
	224	///
	225	/// # fn example() -> Result<(), io::Error> {
	226	/// let cursor = io::Cursor::new(b"lorem\nipsum\r\ndolor");
	227	///
	228	/// let mut lines = vec![];
	229	/// cursor.for_byte_line_with_terminator(\|line\| {
f035d41b	230	/// lines.push(line.to_vec());
dfeec247 XL	231	/// Ok(true)
	232	/// })?;
	233	/// assert_eq!(lines.len(), 3);
f035d41b XL	234	/// assert_eq!(lines[0], "lorem\n".as_bytes());
	235	/// assert_eq!(lines[1], "ipsum\r\n".as_bytes());
	236	/// assert_eq!(lines[2], "dolor".as_bytes());
dfeec247 XL	237	/// # Ok(()) }; example().unwrap()
	238	/// ```
	239	fn for_byte_line_with_terminator<F>(
f035d41b XL	240	self,
	241	for_each_line: F,
	242	) -> io::Result<()>
	243	where
	244	Self: Sized,
	245	F: FnMut(&[u8]) -> io::Result<bool>,
	246	{
	247	self.for_byte_record_with_terminator(b'\n', for_each_line)
	248	}
	249
	250	/// Executes the given closure on each byte-terminated record in the
	251	/// underlying reader.
	252	///
	253	/// If the closure returns an error (or if the underlying reader returns an
	254	/// error), then iteration is stopped and the error is returned. If false
	255	/// is returned, then iteration is stopped and no error is returned.
	256	///
	257	/// Unlike
	258	/// [`for_byte_record`](trait.BufReadExt.html#method.for_byte_record),
	259	/// the lines given to the closure do include the record terminator, if
	260	/// one exists.
	261	///
	262	/// This routine is useful for iterating over records as quickly as
	263	/// possible. Namely, a single allocation is reused for each record.
	264	///
	265	/// # Examples
	266	///
	267	/// Basic usage:
	268	///
	269	/// ```
	270	/// use std::io;
	271	///
	272	/// use bstr::B;
	273	/// use bstr::io::BufReadExt;
	274	///
	275	/// # fn example() -> Result<(), io::Error> {
	276	/// let cursor = io::Cursor::new(b"lorem\x00ipsum\x00dolor");
	277	///
	278	/// let mut records = vec![];
	279	/// cursor.for_byte_record_with_terminator(b'\x00', \|record\| {
	280	/// records.push(record.to_vec());
	281	/// Ok(true)
	282	/// })?;
	283	/// assert_eq!(records.len(), 3);
	284	/// assert_eq!(records[0], B(b"lorem\x00"));
	285	/// assert_eq!(records[1], B("ipsum\x00"));
	286	/// assert_eq!(records[2], B("dolor"));
	287	/// # Ok(()) }; example().unwrap()
	288	/// ```
	289	fn for_byte_record_with_terminator<F>(
dfeec247	290	mut self,
f035d41b XL	291	terminator: u8,
f035d41b XL	292	mut for_each_record: F,
dfeec247	293	) -> io::Result<()>
f035d41b XL	294	where
	295	Self: Sized,
	296	F: FnMut(&[u8]) -> io::Result<bool>,
dfeec247	297	{
f035d41b XL	298	let mut bytes = vec![];
	299	let mut res = Ok(());
	300	let mut consumed = 0;
	301	'outer: loop {
	302	// Lend out complete record slices from our buffer
	303	{
	304	let mut buf = self.fill_buf()?;
	305	while let Some(index) = buf.find_byte(terminator) {
	306	let (record, rest) = buf.split_at(index + 1);
	307	buf = rest;
	308	consumed += record.len();
	309	match for_each_record(&record) {
	310	Ok(false) => break 'outer,
	311	Err(err) => {
	312	res = Err(err);
	313	break 'outer;
	314	}
	315	_ => (),
	316	}
	317	}
	318
	319	// Copy the final record fragment to our local buffer. This
	320	// saves read_until() from re-scanning a buffer we know
	321	// contains no remaining terminators.
	322	bytes.extend_from_slice(&buf);
	323	consumed += buf.len();
	324	}
	325
	326	self.consume(consumed);
	327	consumed = 0;
	328
	329	// N.B. read_until uses a different version of memchr that may
	330	// be slower than the memchr crate that bstr uses. However, this
	331	// should only run for a fairly small number of records, assuming a
	332	// decent buffer size.
	333	self.read_until(terminator, &mut bytes)?;
	334	if bytes.is_empty() \|\| !for_each_record(&bytes)? {
dfeec247 XL	335	break;
	336	}
	337	bytes.clear();
	338	}
f035d41b XL	339	self.consume(consumed);
f035d41b XL	340	res
dfeec247 XL	341	}
	342	}
	343
	344	impl<B: io::BufRead> BufReadExt for B {}
	345
	346	/// An iterator over lines from an instance of
	347	/// [`std::io::BufRead`](https://doc.rust-lang.org/std/io/trait.BufRead.html).
	348	///
	349	/// This iterator is generally created by calling the
	350	/// [`byte_lines`](trait.BufReadExt.html#method.byte_lines)
	351	/// method on the
	352	/// [`BufReadExt`](trait.BufReadExt.html)
	353	/// trait.
	354	#[derive(Debug)]
	355	pub struct ByteLines<B> {
	356	buf: B,
	357	}
	358
f035d41b XL	359	/// An iterator over records from an instance of
	360	/// [`std::io::BufRead`](https://doc.rust-lang.org/std/io/trait.BufRead.html).
	361	///
	362	/// A byte record is any sequence of bytes terminated by a particular byte
	363	/// chosen by the caller. For example, NUL separated byte strings are said to
	364	/// be NUL-terminated byte records.
	365	///
	366	/// This iterator is generally created by calling the
	367	/// [`byte_records`](trait.BufReadExt.html#method.byte_records)
	368	/// method on the
	369	/// [`BufReadExt`](trait.BufReadExt.html)
	370	/// trait.
	371	#[derive(Debug)]
	372	pub struct ByteRecords<B> {
	373	buf: B,
	374	terminator: u8,
	375	}
	376
dfeec247	377	impl<B: io::BufRead> Iterator for ByteLines<B> {
f035d41b	378	type Item = io::Result<Vec<u8>>;
dfeec247	379
f035d41b XL	380	fn next(&mut self) -> Option<io::Result<Vec<u8>>> {
	381	let mut bytes = vec![];
	382	match self.buf.read_until(b'\n', &mut bytes) {
dfeec247 XL	383	Err(e) => Some(Err(e)),
	384	Ok(0) => None,
	385	Ok(_) => {
	386	trim_line(&mut bytes);
	387	Some(Ok(bytes))
	388	}
	389	}
	390	}
	391	}
	392
f035d41b XL	393	impl<B: io::BufRead> Iterator for ByteRecords<B> {
	394	type Item = io::Result<Vec<u8>>;
	395
	396	fn next(&mut self) -> Option<io::Result<Vec<u8>>> {
	397	let mut bytes = vec![];
	398	match self.buf.read_until(self.terminator, &mut bytes) {
	399	Err(e) => Some(Err(e)),
	400	Ok(0) => None,
	401	Ok(_) => {
	402	trim_record(&mut bytes, self.terminator);
	403	Some(Ok(bytes))
	404	}
	405	}
	406	}
	407	}
	408
	409	fn trim_line(line: &mut Vec<u8>) {
	410	if line.last_byte() == Some(b'\n') {
dfeec247	411	line.pop_byte();
f035d41b	412	if line.last_byte() == Some(b'\r') {
dfeec247 XL	413	line.pop_byte();
	414	}
	415	}
	416	}
f035d41b XL	417
	418	fn trim_line_slice(mut line: &[u8]) -> &[u8] {
	419	if line.last_byte() == Some(b'\n') {
	420	line = &line[..line.len() - 1];
	421	if line.last_byte() == Some(b'\r') {
	422	line = &line[..line.len() - 1];
	423	}
	424	}
	425	line
	426	}
	427
	428	fn trim_record(record: &mut Vec<u8>, terminator: u8) {
	429	if record.last_byte() == Some(terminator) {
	430	record.pop_byte();
	431	}
	432	}
	433
	434	fn trim_record_slice(mut record: &[u8], terminator: u8) -> &[u8] {
	435	if record.last_byte() == Some(terminator) {
	436	record = &record[..record.len() - 1];
	437	}
	438	record
	439	}
	440
	441	#[cfg(test)]
	442	mod tests {
	443	use super::BufReadExt;
	444	use bstring::BString;
	445
	446	fn collect_lines<B: AsRef<[u8]>>(slice: B) -> Vec<BString> {
	447	let mut lines = vec![];
	448	slice
	449	.as_ref()
	450	.for_byte_line(\|line\| {
	451	lines.push(BString::from(line.to_vec()));
	452	Ok(true)
	453	})
	454	.unwrap();
	455	lines
	456	}
	457
	458	fn collect_lines_term<B: AsRef<[u8]>>(slice: B) -> Vec<BString> {
	459	let mut lines = vec![];
	460	slice
	461	.as_ref()
	462	.for_byte_line_with_terminator(\|line\| {
	463	lines.push(BString::from(line.to_vec()));
	464	Ok(true)
	465	})
	466	.unwrap();
	467	lines
	468	}
	469
	470	#[test]
	471	fn lines_without_terminator() {
	472	assert_eq!(collect_lines(""), Vec::<BString>::new());
	473
	474	assert_eq!(collect_lines("\n"), vec![""]);
	475	assert_eq!(collect_lines("\n\n"), vec!["", ""]);
	476	assert_eq!(collect_lines("a\nb\n"), vec!["a", "b"]);
	477	assert_eq!(collect_lines("a\nb"), vec!["a", "b"]);
	478	assert_eq!(collect_lines("abc\nxyz\n"), vec!["abc", "xyz"]);
	479	assert_eq!(collect_lines("abc\nxyz"), vec!["abc", "xyz"]);
	480
481	assert_eq!(collect_lines("\r\n"), vec![""]);
482	assert_eq!(collect_lines("\r\n\r\n"), vec!["", ""]);
483	assert_eq!(collect_lines("a\r\nb\r\n"), vec!["a", "b"]);
484	assert_eq!(collect_lines("a\r\nb"), vec!["a", "b"]);
485	assert_eq!(collect_lines("abc\r\nxyz\r\n"), vec!["abc", "xyz"]);
486	assert_eq!(collect_lines("abc\r\nxyz"), vec!["abc", "xyz"]);
487
488	assert_eq!(collect_lines("abc\rxyz"), vec!["abc\rxyz"]);
489	}
490
491	#[test]
492	fn lines_with_terminator() {
493	assert_eq!(collect_lines_term(""), Vec::<BString>::new());
494
495	assert_eq!(collect_lines_term("\n"), vec!["\n"]);
496	assert_eq!(collect_lines_term("\n\n"), vec!["\n", "\n"]);
497	assert_eq!(collect_lines_term("a\nb\n"), vec!["a\n", "b\n"]);
498	assert_eq!(collect_lines_term("a\nb"), vec!["a\n", "b"]);
499	assert_eq!(collect_lines_term("abc\nxyz\n"), vec!["abc\n", "xyz\n"]);
500	assert_eq!(collect_lines_term("abc\nxyz"), vec!["abc\n", "xyz"]);
501
502	assert_eq!(collect_lines_term("\r\n"), vec!["\r\n"]);
503	assert_eq!(collect_lines_term("\r\n\r\n"), vec!["\r\n", "\r\n"]);
504	assert_eq!(collect_lines_term("a\r\nb\r\n"), vec!["a\r\n", "b\r\n"]);
505	assert_eq!(collect_lines_term("a\r\nb"), vec!["a\r\n", "b"]);
506	assert_eq!(
507	collect_lines_term("abc\r\nxyz\r\n"),
508	vec!["abc\r\n", "xyz\r\n"]
509	);
510	assert_eq!(collect_lines_term("abc\r\nxyz"), vec!["abc\r\n", "xyz"]);
511
512	assert_eq!(collect_lines_term("abc\rxyz"), vec!["abc\rxyz"]);
513	}
514	}