[ceph.git] / ceph / src / boost / libs / spirit / include / boost / spirit / home / support / detail / lexer / parser / tokeniser / re_tokeniser.hpp

// tokeniser.hpp
// Copyright (c) 2007-2009 Ben Hanson (http://www.benhanson.net/)
//
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
#ifndef BOOST_LEXER_RE_TOKENISER_HPP
#define BOOST_LEXER_RE_TOKENISER_HPP

// memcpy()
#include <cstring>
#include <map>
#include "num_token.hpp"
#include "../../runtime_error.hpp"
#include "../../size_t.hpp"
#include <sstream>
#include "../../string_token.hpp"
#include "re_tokeniser_helper.hpp"

namespace boost
{
namespace lexer
{
namespace detail
{
template<typename CharT>
class basic_re_tokeniser
{
public:
    typedef basic_num_token<CharT> num_token;
    typedef basic_re_tokeniser_state<CharT> state;
    typedef basic_string_token<CharT> string_token;
    typedef typename string_token::string string;
    typedef std::map<string_token, std::size_t> token_map;
    typedef std::pair<string_token, std::size_t> token_pair;

    static void next (state &state_, token_map &map_, num_token &token_)
    {
        CharT ch_ = 0;
        bool eos_ = state_.next (ch_);

        token_.min_max (0, false, 0);

        while (!eos_ && ch_ == '"')
        {
            state_._in_string ^= 1;
            eos_ = state_.next (ch_);
        }

        if (eos_)
        {
            if (state_._in_string)
            {
                throw runtime_error ("Unexpected end of regex "
                    "(missing '\"').");
            }

            if (state_._paren_count)
            {
                throw runtime_error ("Unexpected end of regex "
                    "(missing ')').");
            }

            token_.set (num_token::END, null_token);
        }
        else
        {
            if (ch_ == '\\')
            {
                // Even if we are in a string, respect escape sequences...
                escape (state_, map_, token_);
            }
            else if (state_._in_string)
            {
                // All other meta characters lose their special meaning
                // inside a string.
                create_charset_token (string (1, ch_), false, map_, token_);
            }
            else
            {
                // Not an escape sequence and not inside a string, so
                // check for meta characters.
                switch (ch_)
                {
                case '(':
                    token_.set (num_token::OPENPAREN, null_token);
                    ++state_._paren_count;
                    read_options (state_);
                    break;
                case ')':
                    --state_._paren_count;

                    if (state_._paren_count < 0)
                    {
                        std::ostringstream ss_;

                        ss_ << "Number of open parenthesis < 0 at index " <<
                            state_.index () - 1 << '.';
                        throw runtime_error (ss_.str ().c_str ());
                    }

                    token_.set (num_token::CLOSEPAREN, null_token);

                    if (!state_._flags_stack.empty ())
                    {
                        state_._flags = state_._flags_stack.top ();
                        state_._flags_stack.pop ();
                    }
                    break;
                case '?':
                    if (!state_.eos () && *state_._curr == '?')
                    {
                        token_.set (num_token::AOPT, null_token);
                        state_.increment ();
                    }
                    else
                    {
                        token_.set (num_token::OPT, null_token);
                    }

                    break;
                case '*':
                    if (!state_.eos () && *state_._curr == '?')
                    {
                        token_.set (num_token::AZEROORMORE, null_token);
                        state_.increment ();
                    }
                    else
                    {
                        token_.set (num_token::ZEROORMORE, null_token);
                    }

                    break;
                case '+':
                    if (!state_.eos () && *state_._curr == '?')
                    {
                        token_.set (num_token::AONEORMORE, null_token);
                        state_.increment ();
                    }
                    else
                    {
                        token_.set (num_token::ONEORMORE, null_token);
                    }

                    break;
                case '{':
                    open_curly (state_, token_);
                    break;
                case '|':
                    token_.set (num_token::OR, null_token);
                    break;
                case '^':
                    if (state_._curr - 1 == state_._start)
                    {
                        token_.set (num_token::CHARSET, bol_token);
                        state_._seen_BOL_assertion = true;
                    }
                    else
                    {
                        create_charset_token (string (1, ch_), false,
                            map_, token_);
                    }

                    break;
                case '$':
                    if (state_._curr == state_._end)
                    {
                        token_.set (num_token::CHARSET, eol_token);
                        state_._seen_EOL_assertion = true;
                    }
                    else
                    {
                        create_charset_token (string (1, ch_), false,
                            map_, token_);
                    }

                    break;
                case '.':
                {
                    string dot_;

                    if (state_._flags & dot_not_newline)
                    {
                        dot_ = '\n';
                    }

                    create_charset_token (dot_, true, map_, token_);
                    break;
                }
                case '[':
                {
                    charset (state_, map_, token_);
                    break;
                }
                case '/':
                    throw runtime_error("Lookahead ('/') is not supported yet.");
                    break;
                default:
                    if ((state_._flags & icase) &&
                        (std::isupper (ch_, state_._locale) ||
                        std::islower (ch_, state_._locale)))
                    {
                        CharT upper_ = std::toupper (ch_, state_._locale);
                        CharT lower_ = std::tolower (ch_, state_._locale);

                        string str_ (1, upper_);

                        str_ += lower_;
                        create_charset_token (str_, false, map_, token_);
                    }
                    else
                    {
                        create_charset_token (string (1, ch_), false,
                            map_, token_);
                    }

                    break;
                }
            }
        }
    }

private:
    typedef basic_re_tokeniser_helper<CharT> tokeniser_helper;

    static void read_options (state &state_)
    {
        if (!state_.eos () && *state_._curr == '?')
        {
            CharT ch_ = 0;
            bool eos_ = false;
            bool negate_ = false;

            state_.increment ();
            eos_ = state_.next (ch_);
            state_._flags_stack.push (state_._flags);

            while (!eos_ && ch_ != ':')
            {
                switch (ch_)
                {
                case '-':
                    negate_ ^= 1;
                    break;
                case 'i':
                    if (negate_)
                    {
                        state_._flags = static_cast<regex_flags>
                            (state_._flags & ~icase);
                    }
                    else
                    {
                        state_._flags = static_cast<regex_flags>
                            (state_._flags | icase);
                    }

                    negate_ = false;
                    break;
                case 's':
                    if (negate_)
                    {
                        state_._flags = static_cast<regex_flags>
                            (state_._flags | dot_not_newline);
                    }
                    else
                    {
                        state_._flags = static_cast<regex_flags>
                            (state_._flags & ~dot_not_newline);
                    }

                    negate_ = false;
                    break;
                default:
                {
                    std::ostringstream ss_;

                    ss_ << "Unknown option at index " <<
                        state_.index () - 1 << '.';
                    throw runtime_error (ss_.str ().c_str ());
                }
                }

                eos_ = state_.next (ch_);
            }

            // End of string handler will handle early termination
        }
        else if (!state_._flags_stack.empty ())
        {
            state_._flags_stack.push (state_._flags);
        }
    }

    static void escape (state &state_, token_map &map_, num_token &token_)
    {
        CharT ch_ = 0;
        std::size_t str_len_ = 0;
        const CharT *str_ = tokeniser_helper::escape_sequence (state_,
            ch_, str_len_);

        if (str_)
        {
            state state2_ (str_ + 1, str_ + str_len_, state_._flags,
                state_._locale);

            charset (state2_, map_, token_);
        }
        else
        {
            create_charset_token (string (1, ch_), false, map_, token_);
        }
    }

    static void charset (state &state_, token_map &map_, num_token &token_)
    {
        string chars_;
        bool negated_ = false;

        tokeniser_helper::charset (state_, chars_, negated_);
        create_charset_token (chars_, negated_, map_, token_);
    }

    static void create_charset_token (const string &charset_,
        const bool negated_, token_map &map_, num_token &token_)
    {
        std::size_t id_ = null_token;
        string_token stok_ (negated_, charset_);

        stok_.remove_duplicates ();
        stok_.normalise ();

        typename token_map::const_iterator iter_ = map_.find (stok_);

        if (iter_ == map_.end ())
        {
            id_ = map_.size ();
            map_.insert (token_pair (stok_, id_));
        }
        else
        {
            id_ = iter_->second;
        }

        token_.set (num_token::CHARSET, id_);
    }

    static void open_curly (state &state_, num_token &token_)
    {
        if (state_.eos ())
        {
            throw runtime_error ("Unexpected end of regex "
                "(missing '}').");
        }
        else if (*state_._curr >= '0' && *state_._curr <= '9')
        {
            repeat_n (state_, token_);

            if (!state_.eos () && *state_._curr == '?')
            {
                token_._type = num_token::AREPEATN;
                state_.increment ();
            }
        }
        else
        {
            macro (state_, token_);
        }
    }

    // SYNTAX:
    //   {n[,[n]]}
    // SEMANTIC RULES:
    //   {0} - INVALID (throw exception)
    //   {0,} = *
    //   {0,0} - INVALID (throw exception)
    //   {0,1} = ?
    //   {1,} = +
    //   {min,max} where min == max - {min}
    //   {min,max} where max < min - INVALID (throw exception)
    static void repeat_n (state &state_, num_token &token_)
    {
        CharT ch_ = 0;
        bool eos_ = state_.next (ch_);

        while (!eos_ && ch_ >= '0' && ch_ <= '9')
        {
            token_._min *= 10;
            token_._min += ch_ - '0';
            eos_ = state_.next (ch_);
        }

        if (eos_)
        {
            throw runtime_error ("Unexpected end of regex "
                "(missing '}').");
        }

        bool min_max_ = false;
        bool repeatn_ = true;

        token_._comma = ch_ == ',';

        if (token_._comma)
        {
            eos_ = state_.next (ch_);

            if (eos_)
            {
                throw runtime_error ("Unexpected end of regex "
                    "(missing '}').");
            }

            if (ch_ == '}')
            {
                // Small optimisation: Check for '*' equivalency.
                if (token_._min == 0)
                {
                    token_.set (num_token::ZEROORMORE, null_token);
                    repeatn_ = false;
                }
                // Small optimisation: Check for '+' equivalency.
                else if (token_._min == 1)
                {
                    token_.set (num_token::ONEORMORE, null_token);
                    repeatn_ = false;
                }
            }
            else
            {
                if (ch_ < '0' || ch_ > '9')
                {
                    std::ostringstream ss_;

                    ss_ << "Missing '}' at index " <<
                        state_.index () - 1 << '.';
                    throw runtime_error (ss_.str ().c_str ());
                }

                min_max_ = true;

                do
                {
                    token_._max *= 10;
                    token_._max += ch_ - '0';
                    eos_ = state_.next (ch_);
                } while (!eos_ && ch_ >= '0' && ch_ <= '9');

                if (eos_)
                {
                    throw runtime_error ("Unexpected end of regex "
                        "(missing '}').");
                }

                // Small optimisation: Check for '?' equivalency.
                if (token_._min == 0 && token_._max == 1)
                {
                    token_.set (num_token::OPT, null_token);
                    repeatn_ = false;
                }
                // Small optimisation: if min == max, then min.
                else if (token_._min == token_._max)
                {
                    token_._comma = false;
                    min_max_ = false;
                    token_._max = 0;
                }
            }
        }

        if (ch_ != '}')
        {
            std::ostringstream ss_;

            ss_ << "Missing '}' at index " << state_.index () - 1 << '.';
            throw runtime_error (ss_.str ().c_str ());
        }

        if (repeatn_)
        {
            // SEMANTIC VALIDATION follows:
            // NOTE: {0,} has already become *
            // therefore we don't check for a comma.
            if (token_._min == 0 && token_._max == 0)
            {
                std::ostringstream ss_;

                ss_ << "Cannot have exactly zero repeats preceding index " <<
                    state_.index () << '.';
                throw runtime_error (ss_.str ().c_str ());
            }

            if (min_max_ && token_._max < token_._min)
            {
                std::ostringstream ss_;

                ss_ << "Max less than min preceding index " <<
                    state_.index () << '.';
                throw runtime_error (ss_.str ().c_str ());
            }

            token_.set (num_token::REPEATN, null_token);
        }
    }

    static void macro (state &state_, num_token &token_)
    {
        CharT ch_ = 0;
        bool eos_ = false;
        const CharT *start_ = state_._curr;

        state_.next (ch_);

        if (ch_ != '_' && !(ch_ >= 'A' && ch_ <= 'Z') &&
            !(ch_ >= 'a' && ch_ <= 'z'))
        {
            std::ostringstream ss_;

            ss_ << "Invalid MACRO name at index " <<
                state_.index () - 1 << '.';
            throw runtime_error (ss_.str ().c_str ());
        }

        do
        {
            eos_ = state_.next (ch_);

            if (eos_)
            {
                throw runtime_error ("Unexpected end of regex "
                    "(missing '}').");
            }
        } while (ch_ == '_' || ch_ == '-' || (ch_ >= 'A' && ch_ <= 'Z') ||
            (ch_ >= 'a' && ch_ <= 'z') || (ch_ >= '0' && ch_ <= '9'));

        if (ch_ != '}')
        {
            std::ostringstream ss_;

            ss_ << "Missing '}' at index " << state_.index () - 1 << '.';
            throw runtime_error (ss_.str ().c_str ());
        }

        std::size_t len_ = state_._curr - 1 - start_;

        if (len_ > max_macro_len)
        {
            std::basic_stringstream<CharT> ss_;
            std::ostringstream os_;

            os_ << "MACRO name '";

            while (len_)
            {
                os_ << ss_.narrow (*start_++, ' ');
                --len_;
            }

            os_ << "' too long.";
            throw runtime_error (os_.str ());
        }

        token_.set (num_token::MACRO, null_token);

        // Some systems have memcpy in namespace std.
        using namespace std;

        memcpy (token_._macro, start_, len_ * sizeof (CharT));
        token_._macro[len_] = 0;
    }
};
}
}
}

#endif
Commit	Line	Data
7c673cae FG	1	// tokeniser.hpp
	2	// Copyright (c) 2007-2009 Ben Hanson (http://www.benhanson.net/)
	3	//
	4	// Distributed under the Boost Software License, Version 1.0. (See accompanying
	5	// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
	6	#ifndef BOOST_LEXER_RE_TOKENISER_HPP
	7	#define BOOST_LEXER_RE_TOKENISER_HPP
	8
	9	// memcpy()
	10	#include <cstring>
	11	#include <map>
	12	#include "num_token.hpp"
	13	#include "../../runtime_error.hpp"
	14	#include "../../size_t.hpp"
	15	#include <sstream>
	16	#include "../../string_token.hpp"
	17	#include "re_tokeniser_helper.hpp"
	18
	19	namespace boost
	20	{
	21	namespace lexer
	22	{
	23	namespace detail
	24	{
	25	template<typename CharT>
	26	class basic_re_tokeniser
	27	{
	28	public:
	29	typedef basic_num_token<CharT> num_token;
	30	typedef basic_re_tokeniser_state<CharT> state;
	31	typedef basic_string_token<CharT> string_token;
	32	typedef typename string_token::string string;
	33	typedef std::map<string_token, std::size_t> token_map;
	34	typedef std::pair<string_token, std::size_t> token_pair;
	35
	36	static void next (state &state_, token_map &map_, num_token &token_)
	37	{
	38	CharT ch_ = 0;
	39	bool eos_ = state_.next (ch_);
	40
	41	token_.min_max (0, false, 0);
	42
	43	while (!eos_ && ch_ == '"')
	44	{
	45	state_._in_string ^= 1;
	46	eos_ = state_.next (ch_);
	47	}
	48
	49	if (eos_)
	50	{
	51	if (state_._in_string)
	52	{
	53	throw runtime_error ("Unexpected end of regex "
	54	"(missing '\"').");
	55	}
	56
	57	if (state_._paren_count)
	58	{
	59	throw runtime_error ("Unexpected end of regex "
	60	"(missing ')').");
	61	}
	62
	63	token_.set (num_token::END, null_token);
	64	}
65	else
66	{
67	if (ch_ == '\\')
68	{
69	// Even if we are in a string, respect escape sequences...
70	escape (state_, map_, token_);
71	}
72	else if (state_._in_string)
73	{
74	// All other meta characters lose their special meaning
75	// inside a string.
76	create_charset_token (string (1, ch_), false, map_, token_);
77	}
78	else
79	{
80	// Not an escape sequence and not inside a string, so
81	// check for meta characters.
82	switch (ch_)
83	{
84	case '(':
85	token_.set (num_token::OPENPAREN, null_token);
86	++state_._paren_count;
87	read_options (state_);
88	break;
89	case ')':
90	--state_._paren_count;
91
92	if (state_._paren_count < 0)
93	{
94	std::ostringstream ss_;
95
96	ss_ << "Number of open parenthesis < 0 at index " <<
97	state_.index () - 1 << '.';
98	throw runtime_error (ss_.str ().c_str ());
99	}
100
101	token_.set (num_token::CLOSEPAREN, null_token);
102
103	if (!state_._flags_stack.empty ())
104	{
105	state_._flags = state_._flags_stack.top ();
106	state_._flags_stack.pop ();
107	}
108	break;
109	case '?':
110	if (!state_.eos () && *state_._curr == '?')
111	{
112	token_.set (num_token::AOPT, null_token);
113	state_.increment ();
114	}
115	else
116	{
117	token_.set (num_token::OPT, null_token);
118	}
119
120	break;
121	case '*':
122	if (!state_.eos () && *state_._curr == '?')
123	{
124	token_.set (num_token::AZEROORMORE, null_token);
125	state_.increment ();
126	}
127	else
128	{
129	token_.set (num_token::ZEROORMORE, null_token);
130	}
131
132	break;
133	case '+':
134	if (!state_.eos () && *state_._curr == '?')
135	{
136	token_.set (num_token::AONEORMORE, null_token);
137	state_.increment ();
138	}
139	else
140	{
141	token_.set (num_token::ONEORMORE, null_token);
142	}
143
144	break;
145	case '{':
146	open_curly (state_, token_);
147	break;
148	case '\|':
149	token_.set (num_token::OR, null_token);
150	break;
151	case '^':
152	if (state_._curr - 1 == state_._start)
153	{
154	token_.set (num_token::CHARSET, bol_token);
155	state_._seen_BOL_assertion = true;
156	}
157	else
158	{
159	create_charset_token (string (1, ch_), false,
160	map_, token_);
161	}
162
163	break;
164	case '$':
165	if (state_._curr == state_._end)
166	{
167	token_.set (num_token::CHARSET, eol_token);
168	state_._seen_EOL_assertion = true;
169	}
170	else
171	{
172	create_charset_token (string (1, ch_), false,
173	map_, token_);
174	}
175
176	break;
177	case '.':
178	{
179	string dot_;
180
181	if (state_._flags & dot_not_newline)
182	{
183	dot_ = '\n';
184	}
185
186	create_charset_token (dot_, true, map_, token_);
187	break;
188	}
189	case '[':
190	{
191	charset (state_, map_, token_);
192	break;
193	}
194	case '/':
195	throw runtime_error("Lookahead ('/') is not supported yet.");
196	break;
197	default:
198	if ((state_._flags & icase) &&
199	(std::isupper (ch_, state_._locale) \|\|
200	std::islower (ch_, state_._locale)))
201	{
202	CharT upper_ = std::toupper (ch_, state_._locale);
203	CharT lower_ = std::tolower (ch_, state_._locale);
204
205	string str_ (1, upper_);
206
207	str_ += lower_;
208	create_charset_token (str_, false, map_, token_);
209	}
210	else
211	{
212	create_charset_token (string (1, ch_), false,
213	map_, token_);
214	}
215
216	break;
217	}
218	}
219	}
220	}
221
222	private:
223	typedef basic_re_tokeniser_helper<CharT> tokeniser_helper;
224
225	static void read_options (state &state_)
226	{
227	if (!state_.eos () && *state_._curr == '?')
228	{
229	CharT ch_ = 0;
230	bool eos_ = false;
231	bool negate_ = false;
232
233	state_.increment ();
234	eos_ = state_.next (ch_);
235	state_._flags_stack.push (state_._flags);
236
237	while (!eos_ && ch_ != ':')
238	{
239	switch (ch_)
240	{
241	case '-':
242	negate_ ^= 1;
243	break;
244	case 'i':
245	if (negate_)
246	{
247	state_._flags = static_cast<regex_flags>
248	(state_._flags & ~icase);
249	}
250	else
251	{
252	state_._flags = static_cast<regex_flags>
253	(state_._flags \| icase);
254	}
255
256	negate_ = false;
257	break;
258	case 's':
259	if (negate_)
260	{
261	state_._flags = static_cast<regex_flags>
262	(state_._flags \| dot_not_newline);
263	}
264	else
265	{
266	state_._flags = static_cast<regex_flags>
267	(state_._flags & ~dot_not_newline);
268	}
269
270	negate_ = false;
271	break;
272	default:
273	{
274	std::ostringstream ss_;
275
276	ss_ << "Unknown option at index " <<
277	state_.index () - 1 << '.';
278	throw runtime_error (ss_.str ().c_str ());
279	}
280	}
281
282	eos_ = state_.next (ch_);
283	}
284
285	// End of string handler will handle early termination
286	}
287	else if (!state_._flags_stack.empty ())
288	{
289	state_._flags_stack.push (state_._flags);
290	}
291	}
292
293	static void escape (state &state_, token_map &map_, num_token &token_)
294	{
295	CharT ch_ = 0;
296	std::size_t str_len_ = 0;
297	const CharT *str_ = tokeniser_helper::escape_sequence (state_,
298	ch_, str_len_);
299
300	if (str_)
301	{
302	state state2_ (str_ + 1, str_ + str_len_, state_._flags,
303	state_._locale);
304
305	charset (state2_, map_, token_);
306	}
307	else
308	{
309	create_charset_token (string (1, ch_), false, map_, token_);
310	}
311	}
312
313	static void charset (state &state_, token_map &map_, num_token &token_)
314	{
315	string chars_;
316	bool negated_ = false;
317
318	tokeniser_helper::charset (state_, chars_, negated_);
319	create_charset_token (chars_, negated_, map_, token_);
320	}
321
322	static void create_charset_token (const string &charset_,
323	const bool negated_, token_map &map_, num_token &token_)
324	{
325	std::size_t id_ = null_token;
326	string_token stok_ (negated_, charset_);
327
328	stok_.remove_duplicates ();
329	stok_.normalise ();
330
331	typename token_map::const_iterator iter_ = map_.find (stok_);
332
333	if (iter_ == map_.end ())
334	{
335	id_ = map_.size ();
336	map_.insert (token_pair (stok_, id_));
337	}
338	else
339	{
340	id_ = iter_->second;
341	}
342
343	token_.set (num_token::CHARSET, id_);
344	}
345
346	static void open_curly (state &state_, num_token &token_)
347	{
348	if (state_.eos ())
349	{
350	throw runtime_error ("Unexpected end of regex "
351	"(missing '}').");
352	}
353	else if (state_._curr >= '0' && state_._curr <= '9')
354	{
355	repeat_n (state_, token_);
356
357	if (!state_.eos () && *state_._curr == '?')
358	{
359	token_._type = num_token::AREPEATN;
360	state_.increment ();
361	}
362	}
363	else
364	{
365	macro (state_, token_);
366	}
367	}
368
369	// SYNTAX:
370	// {n[,[n]]}
371	// SEMANTIC RULES:
372	// {0} - INVALID (throw exception)
373	// {0,} = *
374	// {0,0} - INVALID (throw exception)
375	// {0,1} = ?
376	// {1,} = +
377	// {min,max} where min == max - {min}
378	// {min,max} where max < min - INVALID (throw exception)
379	static void repeat_n (state &state_, num_token &token_)
380	{
381	CharT ch_ = 0;
382	bool eos_ = state_.next (ch_);
383
384	while (!eos_ && ch_ >= '0' && ch_ <= '9')
385	{
386	token_._min *= 10;
387	token_._min += ch_ - '0';
388	eos_ = state_.next (ch_);
389	}
390
391	if (eos_)
392	{
393	throw runtime_error ("Unexpected end of regex "
394	"(missing '}').");
395	}
396
397	bool min_max_ = false;
398	bool repeatn_ = true;
399
400	token_._comma = ch_ == ',';
401
402	if (token_._comma)
403	{
404	eos_ = state_.next (ch_);
405
406	if (eos_)
407	{
408	throw runtime_error ("Unexpected end of regex "
409	"(missing '}').");
410	}
411
412	if (ch_ == '}')
413	{
414	// Small optimisation: Check for '*' equivalency.
415	if (token_._min == 0)
416	{
417	token_.set (num_token::ZEROORMORE, null_token);
418	repeatn_ = false;
419	}
420	// Small optimisation: Check for '+' equivalency.
421	else if (token_._min == 1)
422	{
423	token_.set (num_token::ONEORMORE, null_token);
424	repeatn_ = false;
425	}
426	}
427	else
428	{
429	if (ch_ < '0' \|\| ch_ > '9')
430	{
431	std::ostringstream ss_;
432
433	ss_ << "Missing '}' at index " <<
434	state_.index () - 1 << '.';
435	throw runtime_error (ss_.str ().c_str ());
436	}
437
438	min_max_ = true;
439
440	do
441	{
442	token_._max *= 10;
443	token_._max += ch_ - '0';
444	eos_ = state_.next (ch_);
445	} while (!eos_ && ch_ >= '0' && ch_ <= '9');
446
447	if (eos_)
448	{
449	throw runtime_error ("Unexpected end of regex "
450	"(missing '}').");
451	}
452
453	// Small optimisation: Check for '?' equivalency.
454	if (token_._min == 0 && token_._max == 1)
455	{
456	token_.set (num_token::OPT, null_token);
457	repeatn_ = false;
458	}
459	// Small optimisation: if min == max, then min.
460	else if (token_._min == token_._max)
461	{
462	token_._comma = false;
463	min_max_ = false;
464	token_._max = 0;
465	}
466	}
467	}
468
469	if (ch_ != '}')
470	{
471	std::ostringstream ss_;
472
473	ss_ << "Missing '}' at index " << state_.index () - 1 << '.';
474	throw runtime_error (ss_.str ().c_str ());
475	}
476
477	if (repeatn_)
478	{
479	// SEMANTIC VALIDATION follows:
480	// NOTE: {0,} has already become *
481	// therefore we don't check for a comma.
482	if (token_._min == 0 && token_._max == 0)
483	{
484	std::ostringstream ss_;
485
486	ss_ << "Cannot have exactly zero repeats preceding index " <<
487	state_.index () << '.';
488	throw runtime_error (ss_.str ().c_str ());
489	}
490
491	if (min_max_ && token_._max < token_._min)
492	{
493	std::ostringstream ss_;
494
495	ss_ << "Max less than min preceding index " <<
496	state_.index () << '.';
497	throw runtime_error (ss_.str ().c_str ());
498	}
499
500	token_.set (num_token::REPEATN, null_token);
501	}
502	}
503
504	static void macro (state &state_, num_token &token_)
505	{
506	CharT ch_ = 0;
507	bool eos_ = false;
508	const CharT *start_ = state_._curr;
509
510	state_.next (ch_);
511
512	if (ch_ != '_' && !(ch_ >= 'A' && ch_ <= 'Z') &&
513	!(ch_ >= 'a' && ch_ <= 'z'))
514	{
515	std::ostringstream ss_;
516
517	ss_ << "Invalid MACRO name at index " <<
518	state_.index () - 1 << '.';
519	throw runtime_error (ss_.str ().c_str ());
520	}
521
522	do
523	{
524	eos_ = state_.next (ch_);
525
526	if (eos_)
527	{
528	throw runtime_error ("Unexpected end of regex "
529	"(missing '}').");
530	}
531	} while (ch_ == '_' \|\| ch_ == '-' \|\| (ch_ >= 'A' && ch_ <= 'Z') \|\|
532	(ch_ >= 'a' && ch_ <= 'z') \|\| (ch_ >= '0' && ch_ <= '9'));
533
534	if (ch_ != '}')
535	{
536	std::ostringstream ss_;
537
538	ss_ << "Missing '}' at index " << state_.index () - 1 << '.';
539	throw runtime_error (ss_.str ().c_str ());
540	}
541
542	std::size_t len_ = state_._curr - 1 - start_;
543
544	if (len_ > max_macro_len)
545	{
546	std::basic_stringstream<CharT> ss_;
547	std::ostringstream os_;
548
549	os_ << "MACRO name '";
550
551	while (len_)
552	{
553	os_ << ss_.narrow (*start_++, ' ');
554	--len_;
555	}
556
557	os_ << "' too long.";
558	throw runtime_error (os_.str ());
559	}
560
561	token_.set (num_token::MACRO, null_token);
562
563	// Some systems have memcpy in namespace std.
564	using namespace std;
565
566	memcpy (token_._macro, start_, len_ * sizeof (CharT));
567	token_._macro[len_] = 0;
568	}
569	};
570	}
571	}
572	}
573
574	#endif