[rustc.git] / vendor / heck / src / lib.rs

//! **heck** is a case conversion library.
//!
//! This library exists to provide case conversion between common cases like
//! CamelCase and snake_case. It is intended to be unicode aware, internally,
//! consistent, and reasonably well performing.
//!
//! ## Definition of a word boundary
//!
//! Word boundaries are defined as the "unicode words" defined in the
//! `unicode_segmentation` library, as well as within those words in this
//! manner:
//!
//! 1. All underscore characters are considered word boundaries.
//! 2. If an uppercase character is followed by lowercase letters, a word
//! boundary is considered to be just prior to that uppercase character.
//! 3. If multiple uppercase characters are consecutive, they are considered to
//! be within a single word, except that the last will be part of the next word
//! if it is followed by lowercase characters (see rule 2).
//!
//! That is, "HelloWorld" is segmented `Hello|World` whereas "XMLHttpRequest" is
//! segmented `XML|Http|Request`.
//!
//! Characters not within words (such as spaces, punctuations, and underscores)
//! are not included in the output string except as they are a part of the case
//! being converted to. Multiple adjacent word boundaries (such as a series of
//! underscores) are folded into one. ("hello__world" in snake case is therefore
//! "hello_world", not the exact same string). Leading or trailing word boundary
//! indicators are dropped, except insofar as CamelCase capitalizes the first
//! word.
//!
//! ### Cases contained in this library:
//!
//! 1. UpperCamelCase
//! 2. lowerCamelCase
//! 3. snake_case
//! 4. kebab-case
//! 5. SHOUTY_SNAKE_CASE
//! 6. Title Case
//! 7. SHOUTY-KEBAB-CASE
#![deny(missing_docs)]
#![forbid(unsafe_code)]

mod kebab;
mod lower_camel;
mod shouty_kebab;
mod shouty_snake;
mod snake;
mod title;
mod upper_camel;

pub use kebab::{AsKebabCase, ToKebabCase};
pub use lower_camel::{AsLowerCamelCase, ToLowerCamelCase};
pub use shouty_kebab::{AsShoutyKebabCase, ToShoutyKebabCase};
pub use shouty_snake::{
    AsShoutySnakeCase, AsShoutySnakeCase as AsShoutySnekCase, ToShoutySnakeCase, ToShoutySnekCase,
};
pub use snake::{AsSnakeCase, AsSnakeCase as AsSnekCase, ToSnakeCase, ToSnekCase};
pub use title::{AsTitleCase, ToTitleCase};
pub use upper_camel::{
    AsUpperCamelCase, AsUpperCamelCase as AsPascalCase, ToPascalCase, ToUpperCamelCase,
};

use std::fmt;

#[cfg(feature = "unicode")]
fn get_iterator(s: &str) -> unicode_segmentation::UnicodeWords {
    use unicode_segmentation::UnicodeSegmentation;
    s.unicode_words()
}
#[cfg(not(feature = "unicode"))]
fn get_iterator(s: &str) -> impl Iterator<Item = &str> {
    s.split(|letter: char| !letter.is_ascii_alphanumeric())
}

fn transform<F, G>(
    s: &str,
    mut with_word: F,
    mut boundary: G,
    f: &mut fmt::Formatter,
) -> fmt::Result
where
    F: FnMut(&str, &mut fmt::Formatter) -> fmt::Result,
    G: FnMut(&mut fmt::Formatter) -> fmt::Result,
{
    /// Tracks the current 'mode' of the transformation algorithm as it scans
    /// the input string.
    ///
    /// The mode is a tri-state which tracks the case of the last cased
    /// character of the current word. If there is no cased character
    /// (either lowercase or uppercase) since the previous word boundary,
    /// than the mode is `Boundary`. If the last cased character is lowercase,
    /// then the mode is `Lowercase`. Othertherwise, the mode is
    /// `Uppercase`.
    #[derive(Clone, Copy, PartialEq)]
    enum WordMode {
        /// There have been no lowercase or uppercase characters in the current
        /// word.
        Boundary,
        /// The previous cased character in the current word is lowercase.
        Lowercase,
        /// The previous cased character in the current word is uppercase.
        Uppercase,
    }

    let mut first_word = true;

    for word in get_iterator(s) {
        let mut char_indices = word.char_indices().peekable();
        let mut init = 0;
        let mut mode = WordMode::Boundary;

        while let Some((i, c)) = char_indices.next() {
            // Skip underscore characters
            if c == '_' {
                if init == i {
                    init += 1;
                }
                continue;
            }

            if let Some(&(next_i, next)) = char_indices.peek() {
                // The mode including the current character, assuming the
                // current character does not result in a word boundary.
                let next_mode = if c.is_lowercase() {
                    WordMode::Lowercase
                } else if c.is_uppercase() {
                    WordMode::Uppercase
                } else {
                    mode
                };

                // Word boundary after if next is underscore or current is
                // not uppercase and next is uppercase
                if next == '_' || (next_mode == WordMode::Lowercase && next.is_uppercase()) {
                    if !first_word {
                        boundary(f)?;
                    }
                    with_word(&word[init..next_i], f)?;
                    first_word = false;
                    init = next_i;
                    mode = WordMode::Boundary;

                // Otherwise if current and previous are uppercase and next
                // is lowercase, word boundary before
                } else if mode == WordMode::Uppercase && c.is_uppercase() && next.is_lowercase() {
                    if !first_word {
                        boundary(f)?;
                    } else {
                        first_word = false;
                    }
                    with_word(&word[init..i], f)?;
                    init = i;
                    mode = WordMode::Boundary;

                // Otherwise no word boundary, just update the mode
                } else {
                    mode = next_mode;
                }
            } else {
                // Collect trailing characters as a word
                if !first_word {
                    boundary(f)?;
                } else {
                    first_word = false;
                }
                with_word(&word[init..], f)?;
                break;
            }
        }
    }

    Ok(())
}

fn lowercase(s: &str, f: &mut fmt::Formatter) -> fmt::Result {
    let mut chars = s.chars().peekable();
    while let Some(c) = chars.next() {
        if c == 'Σ' && chars.peek().is_none() {
            write!(f, "ς")?;
        } else {
            write!(f, "{}", c.to_lowercase())?;
        }
    }

    Ok(())
}

fn uppercase(s: &str, f: &mut fmt::Formatter) -> fmt::Result {
    for c in s.chars() {
        write!(f, "{}", c.to_uppercase())?;
    }

    Ok(())
}

fn capitalize(s: &str, f: &mut fmt::Formatter) -> fmt::Result {
    let mut char_indices = s.char_indices();
    if let Some((_, c)) = char_indices.next() {
        write!(f, "{}", c.to_uppercase())?;
        if let Some((i, _)) = char_indices.next() {
            lowercase(&s[i..], f)?;
        }
    }

    Ok(())
}
Commit	Line	Data
0731742a XL	1	//! heck is a case conversion library.
	2	//!
	3	//! This library exists to provide case conversion between common cases like
	4	//! CamelCase and snake_case. It is intended to be unicode aware, internally,
	5	//! consistent, and reasonably well performing.
	6	//!
	7	//! ## Definition of a word boundary
	8	//!
	9	//! Word boundaries are defined as the "unicode words" defined in the
136023e0 XL	10	//! `unicode_segmentation` library, as well as within those words in this
136023e0 XL	11	//! manner:
0731742a XL	12	//!
0731742a XL	13	//! 1. All underscore characters are considered word boundaries.
136023e0 XL	14	//! 2. If an uppercase character is followed by lowercase letters, a word
	15	//! boundary is considered to be just prior to that uppercase character.
	16	//! 3. If multiple uppercase characters are consecutive, they are considered to
	17	//! be within a single word, except that the last will be part of the next word
	18	//! if it is followed by lowercase characters (see rule 2).
0731742a XL	19	//!
	20	//! That is, "HelloWorld" is segmented `Hello\|World` whereas "XMLHttpRequest" is
	21	//! segmented `XML\|Http\|Request`.
	22	//!
	23	//! Characters not within words (such as spaces, punctuations, and underscores)
	24	//! are not included in the output string except as they are a part of the case
	25	//! being converted to. Multiple adjacent word boundaries (such as a series of
	26	//! underscores) are folded into one. ("hello__world" in snake case is therefore
	27	//! "hello_world", not the exact same string). Leading or trailing word boundary
136023e0 XL	28	//! indicators are dropped, except insofar as CamelCase capitalizes the first
136023e0 XL	29	//! word.
0731742a XL	30	//!
	31	//! ### Cases contained in this library:
	32	//!
923072b8 FG	33	//! 1. UpperCamelCase
	34	//! 2. lowerCamelCase
	35	//! 3. snake_case
	36	//! 4. kebab-case
	37	//! 5. SHOUTY_SNAKE_CASE
0731742a	38	//! 6. Title Case
5869c6ff	39	//! 7. SHOUTY-KEBAB-CASE
0731742a	40	#![deny(missing_docs)]
923072b8	41	#![forbid(unsafe_code)]
0731742a	42
0731742a	43	mod kebab;
923072b8	44	mod lower_camel;
5869c6ff	45	mod shouty_kebab;
0731742a XL	46	mod shouty_snake;
	47	mod snake;
	48	mod title;
923072b8	49	mod upper_camel;
0731742a	50
923072b8 FG	51	pub use kebab::{AsKebabCase, ToKebabCase};
	52	pub use lower_camel::{AsLowerCamelCase, ToLowerCamelCase};
	53	pub use shouty_kebab::{AsShoutyKebabCase, ToShoutyKebabCase};
	54	pub use shouty_snake::{
	55	AsShoutySnakeCase, AsShoutySnakeCase as AsShoutySnekCase, ToShoutySnakeCase, ToShoutySnekCase,
	56	};
	57	pub use snake::{AsSnakeCase, AsSnakeCase as AsSnekCase, ToSnakeCase, ToSnekCase};
	58	pub use title::{AsTitleCase, ToTitleCase};
	59	pub use upper_camel::{
	60	AsUpperCamelCase, AsUpperCamelCase as AsPascalCase, ToPascalCase, ToUpperCamelCase,
	61	};
0731742a	62
923072b8	63	use std::fmt;
0731742a	64
923072b8 FG	65	#[cfg(feature = "unicode")]
	66	fn get_iterator(s: &str) -> unicode_segmentation::UnicodeWords {
	67	use unicode_segmentation::UnicodeSegmentation;
	68	s.unicode_words()
	69	}
	70	#[cfg(not(feature = "unicode"))]
	71	fn get_iterator(s: &str) -> impl Iterator<Item = &str> {
	72	s.split(\|letter: char\| !letter.is_ascii_alphanumeric())
	73	}
	74
	75	fn transform<F, G>(
	76	s: &str,
	77	mut with_word: F,
	78	mut boundary: G,
	79	f: &mut fmt::Formatter,
	80	) -> fmt::Result
0731742a	81	where
923072b8 FG	82	F: FnMut(&str, &mut fmt::Formatter) -> fmt::Result,
923072b8 FG	83	G: FnMut(&mut fmt::Formatter) -> fmt::Result,
0731742a	84	{
136023e0 XL	85	/// Tracks the current 'mode' of the transformation algorithm as it scans
136023e0 XL	86	/// the input string.
0731742a	87	///
136023e0 XL	88	/// The mode is a tri-state which tracks the case of the last cased
	89	/// character of the current word. If there is no cased character
	90	/// (either lowercase or uppercase) since the previous word boundary,
	91	/// than the mode is `Boundary`. If the last cased character is lowercase,
	92	/// then the mode is `Lowercase`. Othertherwise, the mode is
	93	/// `Uppercase`.
0731742a XL	94	#[derive(Clone, Copy, PartialEq)]
0731742a XL	95	enum WordMode {
136023e0 XL	96	/// There have been no lowercase or uppercase characters in the current
136023e0 XL	97	/// word.
0731742a XL	98	Boundary,
	99	/// The previous cased character in the current word is lowercase.
	100	Lowercase,
	101	/// The previous cased character in the current word is uppercase.
	102	Uppercase,
	103	}
	104
0731742a XL	105	let mut first_word = true;
0731742a XL	106
923072b8	107	for word in get_iterator(s) {
0731742a XL	108	let mut char_indices = word.char_indices().peekable();
	109	let mut init = 0;
	110	let mut mode = WordMode::Boundary;
	111
	112	while let Some((i, c)) = char_indices.next() {
	113	// Skip underscore characters
	114	if c == '_' {
136023e0 XL	115	if init == i {
	116	init += 1;
	117	}
	118	continue;
0731742a XL	119	}
	120
	121	if let Some(&(next_i, next)) = char_indices.peek() {
136023e0 XL	122	// The mode including the current character, assuming the
136023e0 XL	123	// current character does not result in a word boundary.
0731742a XL	124	let next_mode = if c.is_lowercase() {
	125	WordMode::Lowercase
	126	} else if c.is_uppercase() {
	127	WordMode::Uppercase
	128	} else {
	129	mode
	130	};
	131
	132	// Word boundary after if next is underscore or current is
	133	// not uppercase and next is uppercase
	134	if next == '_' \|\| (next_mode == WordMode::Lowercase && next.is_uppercase()) {
136023e0	135	if !first_word {
923072b8	136	boundary(f)?;
136023e0	137	}
923072b8	138	with_word(&word[init..next_i], f)?;
0731742a XL	139	first_word = false;
	140	init = next_i;
	141	mode = WordMode::Boundary;
	142
	143	// Otherwise if current and previous are uppercase and next
	144	// is lowercase, word boundary before
	145	} else if mode == WordMode::Uppercase && c.is_uppercase() && next.is_lowercase() {
136023e0	146	if !first_word {
923072b8	147	boundary(f)?;
136023e0 XL	148	} else {
	149	first_word = false;
	150	}
923072b8	151	with_word(&word[init..i], f)?;
0731742a XL	152	init = i;
	153	mode = WordMode::Boundary;
	154
	155	// Otherwise no word boundary, just update the mode
	156	} else {
	157	mode = next_mode;
	158	}
	159	} else {
	160	// Collect trailing characters as a word
136023e0	161	if !first_word {
923072b8	162	boundary(f)?;
136023e0 XL	163	} else {
	164	first_word = false;
	165	}
923072b8	166	with_word(&word[init..], f)?;
0731742a XL	167	break;
	168	}
	169	}
	170	}
	171
923072b8	172	Ok(())
0731742a XL	173	}
0731742a XL	174
923072b8	175	fn lowercase(s: &str, f: &mut fmt::Formatter) -> fmt::Result {
0731742a XL	176	let mut chars = s.chars().peekable();
	177	while let Some(c) = chars.next() {
	178	if c == 'Σ' && chars.peek().is_none() {
923072b8	179	write!(f, "ς")?;
0731742a	180	} else {
923072b8	181	write!(f, "{}", c.to_lowercase())?;
0731742a XL	182	}
0731742a XL	183	}
923072b8 FG	184
923072b8 FG	185	Ok(())
0731742a XL	186	}
0731742a XL	187
923072b8	188	fn uppercase(s: &str, f: &mut fmt::Formatter) -> fmt::Result {
0731742a	189	for c in s.chars() {
923072b8	190	write!(f, "{}", c.to_uppercase())?;
0731742a	191	}
923072b8 FG	192
923072b8 FG	193	Ok(())
0731742a XL	194	}
0731742a XL	195
923072b8	196	fn capitalize(s: &str, f: &mut fmt::Formatter) -> fmt::Result {
0731742a XL	197	let mut char_indices = s.char_indices();
0731742a XL	198	if let Some((_, c)) = char_indices.next() {
923072b8	199	write!(f, "{}", c.to_uppercase())?;
0731742a	200	if let Some((i, _)) = char_indices.next() {
923072b8	201	lowercase(&s[i..], f)?;
0731742a XL	202	}
0731742a XL	203	}
923072b8 FG	204
923072b8 FG	205	Ok(())
0731742a	206	}