]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | // Copyright 2004-9 Trustees of Indiana University |
2 | ||
3 | // Distributed under the Boost Software License, Version 1.0. | |
4 | // (See accompanying file LICENSE_1_0.txt or copy at | |
5 | // http://www.boost.org/LICENSE_1_0.txt) | |
6 | ||
7 | // | |
92f5a8d4 | 8 | // read_graphviz_new.cpp - |
7c673cae FG |
9 | // Initialize a model of the BGL's MutableGraph concept and an associated |
10 | // collection of property maps using a graph expressed in the GraphViz | |
92f5a8d4 | 11 | // DOT Language. |
7c673cae FG |
12 | // |
13 | // Based on the grammar found at: | |
92f5a8d4 | 14 | // https://web.archive.org/web/20041213234742/http://www.graphviz.org/cvs/doc/info/lang.html |
7c673cae FG |
15 | // |
16 | // Jeremiah rewrite used grammar found at: | |
17 | // http://www.graphviz.org/doc/info/lang.html | |
18 | // and page 34 or http://www.graphviz.org/pdf/dotguide.pdf | |
19 | // | |
20 | // See documentation for this code at: | |
21 | // http://www.boost.org/libs/graph/doc/read_graphviz.html | |
22 | // | |
23 | ||
24 | // Author: Jeremiah Willcock | |
25 | // Ronald Garcia | |
26 | // | |
27 | ||
28 | #define BOOST_GRAPH_SOURCE | |
29 | #include <boost/assert.hpp> | |
30 | #include <boost/ref.hpp> | |
31 | #include <boost/function/function2.hpp> | |
32 | #include <boost/property_map/dynamic_property_map.hpp> | |
33 | #include <boost/graph/graph_traits.hpp> | |
34 | #include <boost/detail/workaround.hpp> | |
35 | #include <boost/algorithm/string/case_conv.hpp> | |
36 | #include <algorithm> | |
37 | #include <exception> // for std::exception | |
38 | #include <string> | |
39 | #include <vector> | |
40 | #include <set> | |
41 | #include <utility> | |
42 | #include <map> | |
43 | #include <iostream> | |
44 | #include <cstdlib> | |
45 | #include <boost/throw_exception.hpp> | |
46 | #include <boost/regex.hpp> | |
47 | #include <boost/function.hpp> | |
48 | #include <boost/bind.hpp> | |
49 | #include <boost/graph/dll_import_export.hpp> | |
50 | #include <boost/graph/graphviz.hpp> | |
51 | ||
52 | namespace boost { | |
53 | ||
54 | namespace read_graphviz_detail { | |
55 | struct token { | |
56 | enum token_type { | |
57 | kw_strict, | |
58 | kw_graph, | |
59 | kw_digraph, | |
60 | kw_node, | |
61 | kw_edge, | |
62 | kw_subgraph, | |
63 | left_brace, | |
64 | right_brace, | |
65 | semicolon, | |
66 | equal, | |
67 | left_bracket, | |
68 | right_bracket, | |
69 | comma, | |
70 | colon, | |
71 | dash_greater, | |
72 | dash_dash, | |
73 | plus, | |
74 | left_paren, | |
75 | right_paren, | |
76 | at, | |
77 | identifier, | |
78 | quoted_string, // Only used internally in tokenizer | |
79 | eof, | |
80 | invalid | |
81 | }; | |
82 | token_type type; | |
83 | std::string normalized_value; // May have double-quotes removed and/or some escapes replaced | |
84 | token(token_type type, const std::string& normalized_value) | |
85 | : type(type), normalized_value(normalized_value) {} | |
86 | token(): type(invalid), normalized_value("") {} | |
87 | friend std::ostream& operator<<(std::ostream& o, const token& t) { | |
88 | switch (t.type) { | |
89 | case token::kw_strict: o << "<strict>"; break; | |
90 | case token::kw_graph: o << "<graph>"; break; | |
91 | case token::kw_digraph: o << "<digraph>"; break; | |
92 | case token::kw_node: o << "<node>"; break; | |
93 | case token::kw_edge: o << "<edge>"; break; | |
94 | case token::kw_subgraph: o << "<subgraph>"; break; | |
95 | case token::left_brace: o << "<left_brace>"; break; | |
96 | case token::right_brace: o << "<right_brace>"; break; | |
97 | case token::semicolon: o << "<semicolon>"; break; | |
98 | case token::equal: o << "<equal>"; break; | |
99 | case token::left_bracket: o << "<left_bracket>"; break; | |
100 | case token::right_bracket: o << "<right_bracket>"; break; | |
101 | case token::comma: o << "<comma>"; break; | |
102 | case token::colon: o << "<colon>"; break; | |
103 | case token::dash_greater: o << "<dash-greater>"; break; | |
104 | case token::dash_dash: o << "<dash-dash>"; break; | |
105 | case token::plus: o << "<plus>"; break; | |
106 | case token::left_paren: o << "<left_paren>"; break; | |
107 | case token::right_paren: o << "<right_paren>"; break; | |
108 | case token::at: o << "<at>"; break; | |
109 | case token::identifier: o << "<identifier>"; break; | |
110 | case token::quoted_string: o << "<quoted_string>"; break; | |
111 | case token::eof: o << "<eof>"; break; | |
112 | default: o << "<invalid type>"; break; | |
113 | } | |
114 | o << " '" << t.normalized_value << "'"; | |
115 | return o; | |
116 | } | |
117 | }; | |
118 | ||
119 | bad_graphviz_syntax lex_error(const std::string& errmsg, char bad_char) { | |
120 | if (bad_char == '\0') { | |
121 | return bad_graphviz_syntax(errmsg + " (at end of input)"); | |
122 | } else { | |
123 | return bad_graphviz_syntax(errmsg + " (char is '" + bad_char + "')"); | |
124 | } | |
125 | } | |
126 | ||
127 | bad_graphviz_syntax parse_error(const std::string& errmsg, const token& bad_token) { | |
128 | return bad_graphviz_syntax(errmsg + " (token is \"" + boost::lexical_cast<std::string>(bad_token) + "\")"); | |
129 | } | |
130 | ||
131 | struct tokenizer { | |
132 | std::string::const_iterator begin, end; | |
133 | std::vector<token> lookahead; | |
134 | // Precomputed regexes | |
135 | boost::regex stuff_to_skip; | |
136 | boost::regex basic_id_token; | |
137 | boost::regex punctuation_token; | |
138 | boost::regex number_token; | |
139 | boost::regex quoted_string_token; | |
140 | boost::regex xml_tag_token; | |
141 | boost::regex cdata; | |
142 | ||
143 | tokenizer(const std::string& str) : begin(str.begin()), end(str.end()) | |
144 | { | |
145 | std::string end_of_token = "(?=(?:\\W))"; | |
146 | std::string whitespace = "(?:\\s+)"; | |
147 | std::string slash_slash_comment = "(?://.*?$)"; | |
148 | std::string slash_star_comment = "(?:/\\*.*?\\*/)"; | |
149 | std::string hash_comment = "(?:^#.*?$)"; | |
150 | std::string backslash_newline = "(?:[\\\\][\\n])"; | |
151 | stuff_to_skip = "\\A(?:" + whitespace + "|" + | |
152 | slash_slash_comment + "|" + | |
153 | slash_star_comment + "|" + | |
154 | hash_comment + "|" + | |
155 | backslash_newline + ")*"; | |
156 | basic_id_token = "\\A([[:alpha:]_](?:\\w*))"; | |
157 | punctuation_token = "\\A([][{};=,:+()@]|[-][>-])"; | |
158 | number_token = "\\A([-]?(?:(?:\\.\\d+)|(?:\\d+(?:\\.\\d*)?)))"; | |
159 | quoted_string_token = "\\A(\"(?:[^\"\\\\]|(?:[\\\\].))*\")"; | |
160 | xml_tag_token = "\\A<(/?)(?:[^!?'\"]|(?:'[^']*?')|(?:\"[^\"]*?\"))*?(/?)>"; | |
161 | cdata = "\\A\\Q<![CDATA[\\E.*?\\Q]]>\\E"; | |
162 | } | |
163 | ||
164 | void skip() { | |
165 | boost::match_results<std::string::const_iterator> results; | |
166 | #ifndef NDEBUG | |
167 | bool found = | |
168 | #endif | |
169 | boost::regex_search(begin, end, results, stuff_to_skip); | |
170 | #ifndef NDEBUG | |
171 | BOOST_ASSERT (found); | |
172 | #endif | |
173 | boost::sub_match<std::string::const_iterator> sm1 = results.suffix(); | |
174 | BOOST_ASSERT (sm1.second == end); | |
175 | begin = sm1.first; | |
176 | } | |
177 | ||
178 | token get_token_raw() { | |
179 | if (!lookahead.empty()) { | |
180 | token t = lookahead.front(); | |
181 | lookahead.erase(lookahead.begin()); | |
182 | return t; | |
183 | } | |
184 | skip(); | |
185 | if (begin == end) return token(token::eof, ""); | |
186 | // Look for keywords first | |
187 | bool found; | |
188 | boost::match_results<std::string::const_iterator> results; | |
189 | found = boost::regex_search(begin, end, results, basic_id_token); | |
190 | if (found) { | |
191 | std::string str = results[1].str(); | |
192 | std::string str_lower = boost::algorithm::to_lower_copy(str); | |
193 | begin = results.suffix().first; | |
194 | if (str_lower == "strict") { | |
195 | return token(token::kw_strict, str); | |
196 | } else if (str_lower == "graph") { | |
197 | return token(token::kw_graph, str); | |
198 | } else if (str_lower == "digraph") { | |
199 | return token(token::kw_digraph, str); | |
200 | } else if (str_lower == "node") { | |
201 | return token(token::kw_node, str); | |
202 | } else if (str_lower == "edge") { | |
203 | return token(token::kw_edge, str); | |
204 | } else if (str_lower == "subgraph") { | |
205 | return token(token::kw_subgraph, str); | |
206 | } else { | |
207 | return token(token::identifier, str); | |
208 | } | |
209 | } | |
210 | found = boost::regex_search(begin, end, results, punctuation_token); | |
211 | if (found) { | |
212 | std::string str = results[1].str(); | |
213 | begin = results.suffix().first; | |
214 | switch (str[0]) { | |
215 | case '[': return token(token::left_bracket, str); | |
216 | case ']': return token(token::right_bracket, str); | |
217 | case '{': return token(token::left_brace, str); | |
218 | case '}': return token(token::right_brace, str); | |
219 | case ';': return token(token::semicolon, str); | |
220 | case '=': return token(token::equal, str); | |
221 | case ',': return token(token::comma, str); | |
222 | case ':': return token(token::colon, str); | |
223 | case '+': return token(token::plus, str); | |
224 | case '(': return token(token::left_paren, str); | |
225 | case ')': return token(token::right_paren, str); | |
226 | case '@': return token(token::at, str); | |
227 | case '-': { | |
228 | switch (str[1]) { | |
229 | case '-': return token(token::dash_dash, str); | |
230 | case '>': return token(token::dash_greater, str); | |
231 | default: BOOST_ASSERT (!"Definition of punctuation_token does not match switch statement"); | |
232 | } | |
233 | } | |
234 | default: BOOST_ASSERT (!"Definition of punctuation_token does not match switch statement"); | |
235 | } | |
236 | } | |
237 | found = boost::regex_search(begin, end, results, number_token); | |
238 | if (found) { | |
239 | std::string str = results[1].str(); | |
240 | begin = results.suffix().first; | |
241 | return token(token::identifier, str); | |
242 | } | |
243 | found = boost::regex_search(begin, end, results, quoted_string_token); | |
244 | if (found) { | |
245 | std::string str = results[1].str(); | |
246 | begin = results.suffix().first; | |
247 | // Remove the beginning and ending quotes | |
248 | BOOST_ASSERT (str.size() >= 2); | |
249 | str.erase(str.begin()); | |
250 | str.erase(str.end() - 1); | |
251 | // Unescape quotes in the middle, but nothing else (see format spec) | |
252 | for (size_t i = 0; i + 1 < str.size() /* May change */; ++i) { | |
253 | if (str[i] == '\\' && str[i + 1] == '"') { | |
254 | str.erase(str.begin() + i); | |
255 | // Don't need to adjust i | |
256 | } else if (str[i] == '\\' && str[i + 1] == '\n') { | |
257 | str.erase(str.begin() + i); | |
258 | str.erase(str.begin() + i); | |
259 | --i; // Invert ++ that will be applied | |
260 | } | |
261 | } | |
262 | return token(token::quoted_string, str); | |
263 | } | |
264 | if (*begin == '<') { | |
265 | std::string::const_iterator saved_begin = begin; | |
266 | int counter = 0; | |
267 | do { | |
268 | if (begin == end) throw_lex_error("Unclosed HTML string"); | |
269 | if (*begin != '<') { | |
270 | ++begin; | |
271 | continue; | |
272 | } | |
273 | found = boost::regex_search(begin, end, results, xml_tag_token); | |
274 | if (found) { | |
275 | begin = results.suffix().first; | |
276 | if (results[1].str() == "/") { // Close tag | |
277 | --counter; | |
278 | } else if (results[2].str() == "/") { // Empty tag | |
279 | } else { // Open tag | |
280 | ++counter; | |
281 | } | |
282 | continue; | |
283 | } | |
284 | found = boost::regex_search(begin, end, results, cdata); | |
285 | if (found) { | |
286 | begin = results.suffix().first; | |
287 | continue; | |
288 | } | |
289 | throw_lex_error("Invalid contents in HTML string"); | |
290 | } while (counter > 0); | |
291 | return token(token::identifier, std::string(saved_begin, begin)); | |
292 | } else { | |
293 | throw_lex_error("Invalid character"); | |
294 | return token(); | |
295 | } | |
296 | } | |
297 | ||
298 | token peek_token_raw() { | |
299 | if (lookahead.empty()) { | |
300 | token t = get_token_raw(); | |
301 | lookahead.push_back(t); | |
302 | } | |
303 | return lookahead.front(); | |
304 | } | |
305 | ||
306 | token get_token() { // Handle string concatenation | |
307 | token t = get_token_raw(); | |
308 | if (t.type != token::quoted_string) return t; | |
309 | std::string str = t.normalized_value; | |
310 | while (peek_token_raw().type == token::plus) { | |
311 | get_token_raw(); | |
312 | token t2 = get_token_raw(); | |
313 | if (t2.type != token::quoted_string) { | |
314 | throw_lex_error("Must have quoted string after string concatenation"); | |
315 | } | |
316 | str += t2.normalized_value; | |
317 | } | |
318 | return token(token::identifier, str); // Note that quoted_string does not get passed to the parser | |
319 | } | |
320 | ||
321 | void throw_lex_error(const std::string& errmsg) { | |
322 | boost::throw_exception(lex_error(errmsg, (begin == end ? '\0' : *begin))); | |
323 | } | |
324 | }; | |
325 | ||
326 | struct edge_endpoint { | |
327 | bool is_subgraph; | |
328 | node_and_port node_ep; | |
329 | subgraph_name subgraph_ep; | |
330 | ||
331 | static edge_endpoint node(const node_and_port& ep) { | |
332 | edge_endpoint r; | |
333 | r.is_subgraph = false; | |
334 | r.node_ep = ep; | |
335 | return r; | |
336 | } | |
337 | ||
338 | static edge_endpoint subgraph(const subgraph_name& ep) { | |
339 | edge_endpoint r; | |
340 | r.is_subgraph = true; | |
341 | r.subgraph_ep = ep; | |
342 | return r; | |
343 | } | |
344 | }; | |
345 | ||
346 | struct node_or_subgraph_ref { | |
347 | bool is_subgraph; | |
348 | std::string name; // Name for subgraphs or nodes, "___root___" for root graph | |
349 | }; | |
350 | ||
351 | static node_or_subgraph_ref noderef(const node_name& n) { | |
352 | node_or_subgraph_ref r; | |
353 | r.is_subgraph = false; | |
354 | r.name = n; | |
355 | return r; | |
356 | } | |
357 | ||
358 | static node_or_subgraph_ref subgraphref(const subgraph_name& n) { | |
359 | node_or_subgraph_ref r; | |
360 | r.is_subgraph = true; | |
361 | r.name = n; | |
362 | return r; | |
363 | } | |
364 | ||
365 | typedef std::vector<node_or_subgraph_ref> subgraph_member_list; | |
366 | ||
367 | struct subgraph_info { | |
368 | properties def_node_props; | |
369 | properties def_edge_props; | |
370 | subgraph_member_list members; | |
371 | }; | |
372 | ||
373 | struct parser { | |
374 | tokenizer the_tokenizer; | |
375 | std::vector<token> lookahead; | |
376 | parser_result& r; | |
377 | std::map<subgraph_name, subgraph_info> subgraphs; | |
378 | std::string current_subgraph_name; | |
379 | int sgcounter; // Counter for anonymous subgraphs | |
380 | std::set<std::pair<node_name, node_name> > existing_edges; // Used for checking in strict graphs | |
381 | ||
382 | subgraph_info& current() {return subgraphs[current_subgraph_name];} | |
383 | properties& current_graph_props() {return r.graph_props[current_subgraph_name];} | |
384 | subgraph_member_list& current_members() {return current().members;} | |
385 | ||
386 | parser(const std::string& gr, parser_result& result) | |
387 | : the_tokenizer(gr), lookahead(), r(result), sgcounter(0) { | |
388 | current_subgraph_name = "___root___"; | |
389 | current() = subgraph_info(); // Initialize root graph | |
390 | current_graph_props().clear(); | |
391 | current_members().clear(); | |
392 | } | |
393 | ||
394 | token get() { | |
395 | if (lookahead.empty()) { | |
396 | token t = the_tokenizer.get_token(); | |
397 | return t; | |
398 | } else { | |
399 | token t = lookahead.front(); | |
400 | lookahead.erase(lookahead.begin()); | |
401 | return t; | |
402 | } | |
403 | } | |
404 | ||
405 | token peek() { | |
406 | if (lookahead.empty()) { | |
407 | lookahead.push_back(the_tokenizer.get_token()); | |
408 | } | |
409 | return lookahead.front(); | |
410 | } | |
411 | ||
412 | void error(const std::string& str) { | |
413 | boost::throw_exception(parse_error(str, peek())); | |
414 | } | |
415 | ||
416 | void parse_graph(bool want_directed) { | |
417 | bool is_strict = false; | |
418 | bool is_directed = false; | |
419 | std::string name; | |
420 | if (peek().type == token::kw_strict) {get(); is_strict = true;} | |
421 | switch (peek().type) { | |
422 | case token::kw_graph: is_directed = false; break; | |
423 | case token::kw_digraph: is_directed = true; break; | |
424 | default: error("Wanted \"graph\" or \"digraph\""); | |
425 | } | |
426 | r.graph_is_directed = is_directed; // Used to check edges | |
427 | r.graph_is_strict = is_strict; | |
428 | if (want_directed != r.graph_is_directed) { | |
429 | if (want_directed) { | |
430 | boost::throw_exception(boost::undirected_graph_error()); | |
431 | } else { | |
432 | boost::throw_exception(boost::directed_graph_error()); | |
433 | } | |
434 | } | |
435 | get(); | |
436 | switch (peek().type) { | |
437 | case token::identifier: name = peek().normalized_value; get(); break; | |
438 | case token::left_brace: break; | |
439 | default: error("Wanted a graph name or left brace"); | |
440 | } | |
441 | if (peek().type == token::left_brace) get(); else error("Wanted a left brace to start the graph"); | |
442 | parse_stmt_list(); | |
443 | if (peek().type == token::right_brace) get(); else error("Wanted a right brace to end the graph"); | |
444 | if (peek().type == token::eof) {} else error("Wanted end of file"); | |
445 | } | |
446 | ||
447 | void parse_stmt_list() { | |
448 | while (true) { | |
449 | if (peek().type == token::right_brace) return; | |
450 | parse_stmt(); | |
451 | if (peek().type == token::semicolon) get(); | |
452 | } | |
453 | } | |
454 | ||
455 | void parse_stmt() { | |
456 | switch (peek().type) { | |
457 | case token::kw_node: | |
458 | case token::kw_edge: | |
459 | case token::kw_graph: parse_attr_stmt(); break; | |
460 | case token::kw_subgraph: | |
461 | case token::left_brace: | |
462 | case token::identifier: { | |
463 | token id = get(); | |
464 | if (id.type == token::identifier && peek().type == token::equal) { // Graph property | |
465 | get(); | |
466 | if (peek().type != token::identifier) error("Wanted identifier as right side of ="); | |
467 | token id2 = get(); | |
468 | current_graph_props()[id.normalized_value] = id2.normalized_value; | |
469 | } else { | |
470 | edge_endpoint ep = parse_endpoint_rest(id); | |
471 | if (peek().type == token::dash_dash || peek().type == token::dash_greater) { // Edge | |
472 | parse_edge_stmt(ep); | |
473 | } else { | |
474 | if (!ep.is_subgraph) { // Only nodes can have attribute lists | |
475 | // This node already exists because of its first mention | |
476 | // (properties set to defaults by parse_node_and_port, called | |
477 | // by parse_endpoint_rest) | |
478 | properties this_node_props; | |
479 | if (peek().type == token::left_bracket) { | |
480 | parse_attr_list(this_node_props); | |
481 | } | |
482 | for (properties::const_iterator i = this_node_props.begin(); | |
483 | i != this_node_props.end(); ++i) { | |
484 | // Override old properties with same names | |
485 | r.nodes[ep.node_ep.name][i->first] = i->second; | |
486 | } | |
487 | current_members().push_back(noderef(ep.node_ep.name)); | |
488 | } else { | |
489 | current_members().push_back(subgraphref(ep.subgraph_ep)); | |
490 | } | |
491 | } | |
492 | } | |
493 | break; | |
494 | } | |
495 | default: error("Invalid start token for statement"); | |
496 | } | |
497 | } | |
498 | ||
499 | void parse_attr_stmt() { | |
500 | switch (get().type) { | |
501 | case token::kw_graph: parse_attr_list(current_graph_props()); break; | |
502 | case token::kw_node: parse_attr_list(current().def_node_props); break; | |
503 | case token::kw_edge: parse_attr_list(current().def_edge_props); break; | |
504 | default: BOOST_ASSERT (!"Bad attr_stmt case"); | |
505 | } | |
506 | } | |
507 | ||
508 | edge_endpoint parse_endpoint() { | |
509 | switch (peek().type) { | |
510 | case token::kw_subgraph: | |
511 | case token::left_brace: | |
512 | case token::identifier: { | |
513 | token first = get(); | |
514 | return parse_endpoint_rest(first); | |
515 | } | |
516 | default: { | |
517 | error("Wanted \"subgraph\", \"{\", or identifier to start node or subgraph"); | |
518 | return edge_endpoint(); | |
519 | } | |
520 | } | |
521 | } | |
522 | ||
523 | edge_endpoint parse_endpoint_rest(const token& first_token) { | |
524 | switch (first_token.type) { | |
525 | case token::kw_subgraph: | |
526 | case token::left_brace: return edge_endpoint::subgraph(parse_subgraph(first_token)); | |
527 | default: return edge_endpoint::node(parse_node_and_port(first_token)); | |
528 | } | |
529 | } | |
530 | ||
531 | subgraph_name parse_subgraph(const token& first_token) { | |
532 | std::string name; | |
533 | bool is_anonymous = true; | |
534 | if (first_token.type == token::kw_subgraph) { | |
535 | if (peek().type == token::identifier) { | |
536 | name = get().normalized_value; | |
537 | is_anonymous = false; | |
538 | } | |
539 | } | |
540 | if (is_anonymous) { | |
541 | name = "___subgraph_" + | |
542 | boost::lexical_cast<std::string>(++sgcounter); | |
543 | } | |
544 | if (subgraphs.find(name) == subgraphs.end()) { | |
545 | subgraphs[name] = current(); // Initialize properties and defaults | |
546 | subgraphs[name].members.clear(); // Except member list | |
547 | } | |
548 | if (first_token.type == token::kw_subgraph && peek().type != token::left_brace) { | |
549 | if (is_anonymous) error("Subgraph reference needs a name"); | |
550 | return name; | |
551 | } | |
552 | subgraph_name old_sg = current_subgraph_name; | |
553 | current_subgraph_name = name; | |
554 | if (peek().type == token::left_brace) get(); else error("Wanted left brace to start subgraph"); | |
555 | parse_stmt_list(); | |
556 | if (peek().type == token::right_brace) get(); else error("Wanted right brace to end subgraph"); | |
557 | current_subgraph_name = old_sg; | |
558 | return name; | |
559 | } | |
560 | ||
561 | node_and_port parse_node_and_port(const token& name) { | |
562 | // A node ID is a node name, followed optionally by a port angle and a | |
563 | // port location (in either order); a port location is either :id, | |
564 | // :id:id, or :(id,id); the last two forms are treated as equivalent | |
565 | // although I am not sure about that. | |
566 | node_and_port id; | |
567 | id.name = name.normalized_value; | |
568 | parse_more: | |
569 | switch (peek().type) { | |
570 | case token::at: { | |
571 | get(); | |
572 | if (peek().type != token::identifier) error("Wanted identifier as port angle"); | |
573 | if (id.angle != "") error("Duplicate port angle"); | |
574 | id.angle = get().normalized_value; | |
575 | goto parse_more; | |
576 | } | |
577 | case token::colon: { | |
578 | get(); | |
579 | if (!id.location.empty()) error("Duplicate port location"); | |
580 | switch (peek().type) { | |
581 | case token::identifier: { | |
582 | id.location.push_back(get().normalized_value); | |
583 | switch (peek().type) { | |
584 | case token::colon: { | |
585 | get(); | |
586 | if (peek().type != token::identifier) error("Wanted identifier as port location"); | |
587 | id.location.push_back(get().normalized_value); | |
588 | goto parse_more; | |
589 | } | |
590 | default: goto parse_more; | |
591 | } | |
592 | } | |
593 | case token::left_paren: { | |
594 | get(); | |
595 | if (peek().type != token::identifier) error("Wanted identifier as first element of port location"); | |
596 | id.location.push_back(get().normalized_value); | |
597 | if (peek().type != token::comma) error("Wanted comma between parts of port location"); | |
598 | get(); | |
599 | if (peek().type != token::identifier) error("Wanted identifier as second element of port location"); | |
600 | id.location.push_back(get().normalized_value); | |
601 | if (peek().type != token::right_paren) error("Wanted right parenthesis to close port location"); | |
602 | get(); | |
603 | goto parse_more; | |
604 | } | |
605 | default: error("Wanted identifier or left parenthesis as start of port location"); | |
606 | } | |
607 | } | |
608 | default: break; | |
609 | } | |
610 | if (r.nodes.find(id.name) == r.nodes.end()) { // First mention | |
611 | r.nodes[id.name] = current().def_node_props; | |
612 | } | |
613 | return id; | |
614 | } | |
615 | ||
616 | void parse_edge_stmt(const edge_endpoint& lhs) { | |
617 | std::vector<edge_endpoint> nodes_in_chain(1, lhs); | |
618 | while (true) { | |
619 | bool leave_loop = true; | |
620 | switch (peek().type) { | |
621 | case token::dash_dash: { | |
622 | if (r.graph_is_directed) error("Using -- in directed graph"); | |
623 | get(); | |
624 | nodes_in_chain.push_back(parse_endpoint()); | |
625 | leave_loop = false; | |
626 | break; | |
627 | } | |
628 | case token::dash_greater: { | |
629 | if (!r.graph_is_directed) error("Using -> in undirected graph"); | |
630 | get(); | |
631 | nodes_in_chain.push_back(parse_endpoint()); | |
632 | leave_loop = false; | |
633 | break; | |
634 | } | |
635 | default: leave_loop = true; break; | |
636 | } | |
637 | if (leave_loop) break; | |
638 | } | |
639 | properties this_edge_props = current().def_edge_props; | |
640 | if (peek().type == token::left_bracket) parse_attr_list(this_edge_props); | |
641 | BOOST_ASSERT (nodes_in_chain.size() >= 2); // Should be in node parser otherwise | |
642 | for (size_t i = 0; i + 1 < nodes_in_chain.size(); ++i) { | |
643 | do_orig_edge(nodes_in_chain[i], nodes_in_chain[i + 1], this_edge_props); | |
644 | } | |
645 | } | |
646 | ||
647 | // Do an edge from the file, the edge may need to be expanded if it connects to a subgraph | |
648 | void do_orig_edge(const edge_endpoint& src, const edge_endpoint& tgt, const properties& props) { | |
649 | std::set<node_and_port> sources = get_recursive_members(src); | |
650 | std::set<node_and_port> targets = get_recursive_members(tgt); | |
651 | for (std::set<node_and_port>::const_iterator i = sources.begin(); i != sources.end(); ++i) { | |
652 | for (std::set<node_and_port>::const_iterator j = targets.begin(); j != targets.end(); ++j) { | |
653 | do_edge(*i, *j, props); | |
654 | } | |
655 | } | |
656 | } | |
657 | ||
658 | // Get nodes in an edge_endpoint, recursively | |
659 | std::set<node_and_port> get_recursive_members(const edge_endpoint& orig_ep) { | |
660 | std::set<node_and_port> result; | |
661 | std::vector<edge_endpoint> worklist(1, orig_ep); | |
662 | std::set<subgraph_name> done; | |
663 | while (!worklist.empty()) { | |
664 | edge_endpoint ep = worklist.back(); | |
665 | worklist.pop_back(); | |
666 | if (ep.is_subgraph) { | |
667 | if (done.find(ep.subgraph_ep) == done.end()) { | |
668 | done.insert(ep.subgraph_ep); | |
669 | std::map<subgraph_name, subgraph_info>::const_iterator | |
670 | info_i = subgraphs.find(ep.subgraph_ep); | |
671 | if (info_i != subgraphs.end()) { | |
672 | const subgraph_member_list& members = info_i->second.members; | |
673 | for (subgraph_member_list::const_iterator i = members.begin(); | |
674 | i != members.end(); ++i) { | |
675 | node_or_subgraph_ref ref = *i; | |
676 | if (ref.is_subgraph) { | |
677 | worklist.push_back(edge_endpoint::subgraph(ref.name)); | |
678 | } else { | |
679 | node_and_port np; | |
680 | np.name = ref.name; | |
681 | worklist.push_back(edge_endpoint::node(np)); | |
682 | } | |
683 | } | |
684 | } | |
685 | } | |
686 | } else { | |
687 | result.insert(ep.node_ep); | |
688 | } | |
689 | } | |
690 | return result; | |
691 | } | |
692 | ||
693 | // Do a fixed-up edge, with only nodes as endpoints | |
694 | void do_edge(const node_and_port& src, const node_and_port& tgt, const properties& props) { | |
695 | if (r.graph_is_strict) { | |
696 | if (src.name == tgt.name) return; | |
697 | std::pair<node_name, node_name> tag(src.name, tgt.name); | |
698 | if (existing_edges.find(tag) != existing_edges.end()) { | |
699 | return; // Parallel edge | |
700 | } | |
701 | existing_edges.insert(tag); | |
702 | } | |
703 | edge_info e; | |
704 | e.source = src; | |
705 | e.target = tgt; | |
706 | e.props = props; | |
707 | r.edges.push_back(e); | |
708 | } | |
709 | ||
710 | void parse_attr_list(properties& props) { | |
711 | while (true) { | |
712 | if (peek().type == token::left_bracket) get(); else error("Wanted left bracket to start attribute list"); | |
713 | while (true) { | |
714 | switch (peek().type) { | |
715 | case token::right_bracket: break; | |
716 | case token::identifier: { | |
717 | std::string lhs = get().normalized_value; | |
718 | std::string rhs = "true"; | |
719 | if (peek().type == token::equal) { | |
720 | get(); | |
721 | if (peek().type != token::identifier) error("Wanted identifier as value of attribute"); | |
722 | rhs = get().normalized_value; | |
723 | } | |
724 | props[lhs] = rhs; | |
725 | break; | |
726 | } | |
727 | default: error("Wanted identifier as name of attribute"); | |
728 | } | |
729 | if (peek().type == token::comma || peek().type == token::semicolon) get(); | |
730 | else if(peek().type == token::right_bracket) break; | |
731 | } | |
732 | if (peek().type == token::right_bracket) get(); else error("Wanted right bracket to end attribute list"); | |
733 | if (peek().type != token::left_bracket) break; | |
734 | } | |
735 | } | |
736 | }; | |
737 | ||
738 | void parse_graphviz_from_string(const std::string& str, parser_result& result, bool want_directed) { | |
739 | parser p(str, result); | |
740 | p.parse_graph(want_directed); | |
741 | } | |
742 | ||
743 | // Some debugging stuff | |
744 | std::ostream& operator<<(std::ostream& o, const node_and_port& n) { | |
745 | o << n.name; | |
746 | for (size_t i = 0; i < n.location.size(); ++i) { | |
747 | o << ":" << n.location[i]; | |
748 | } | |
749 | if (!n.angle.empty()) o << "@" << n.angle; | |
750 | return o; | |
751 | } | |
752 | ||
753 | // Can't be operator<< because properties is just an std::map | |
754 | std::string props_to_string(const properties& props) { | |
755 | std::string result = "["; | |
756 | for (properties::const_iterator i = props.begin(); i != props.end(); ++i) { | |
757 | if (i != props.begin()) result += ", "; | |
758 | result += i->first + "=" + i->second; | |
759 | } | |
760 | result += "]"; | |
761 | return result; | |
762 | } | |
763 | ||
764 | void translate_results_to_graph(const parser_result& r, ::boost::detail::graph::mutate_graph* mg) { | |
765 | typedef boost::detail::graph::edge_t edge; | |
766 | for (std::map<node_name, properties>::const_iterator i = r.nodes.begin(); i != r.nodes.end(); ++i) { | |
767 | // std::cerr << i->first << " " << props_to_string(i->second) << std::endl; | |
768 | mg->do_add_vertex(i->first); | |
769 | for (properties::const_iterator j = i->second.begin(); j != i->second.end(); ++j) { | |
770 | mg->set_node_property(j->first, i->first, j->second); | |
771 | } | |
772 | } | |
773 | for (std::vector<edge_info>::const_iterator i = r.edges.begin(); i != r.edges.end(); ++i) { | |
774 | const edge_info& ei = *i; | |
775 | // std::cerr << ei.source << " -> " << ei.target << " " << props_to_string(ei.props) << std::endl; | |
776 | edge e = edge::new_edge(); | |
777 | mg->do_add_edge(e, ei.source.name, ei.target.name); | |
778 | for (properties::const_iterator j = ei.props.begin(); j != ei.props.end(); ++j) { | |
779 | mg->set_edge_property(j->first, e, j->second); | |
780 | } | |
781 | } | |
782 | std::map<subgraph_name, properties>::const_iterator root_graph_props_i = r.graph_props.find("___root___"); | |
783 | BOOST_ASSERT (root_graph_props_i != r.graph_props.end()); // Should not happen | |
784 | const properties& root_graph_props = root_graph_props_i->second; | |
785 | // std::cerr << "ending graph " << props_to_string(root_graph_props) << std::endl; | |
786 | for (properties::const_iterator i = root_graph_props.begin(); i != root_graph_props.end(); ++i) { | |
787 | mg->set_graph_property(i->first, i->second); | |
788 | } | |
789 | mg->finish_building_graph(); | |
790 | } | |
791 | ||
792 | } // end namespace read_graphviz_detail | |
793 | ||
794 | namespace detail { | |
795 | namespace graph { | |
796 | ||
797 | BOOST_GRAPH_DECL bool read_graphviz_new(const std::string& str, boost::detail::graph::mutate_graph* mg) { | |
798 | read_graphviz_detail::parser_result parsed_file; | |
799 | read_graphviz_detail::parse_graphviz_from_string(str, parsed_file, mg->is_directed()); | |
800 | read_graphviz_detail::translate_results_to_graph(parsed_file, mg); | |
801 | return true; | |
802 | } | |
803 | ||
804 | } // end namespace graph | |
805 | } // end namespace detail | |
806 | ||
807 | } // end namespace boost | |
808 | ||
809 | // GraphViz format notes (tested using "dot version 1.13 (v16) (Mon August 23, | |
810 | // 2004)", grammar from references in read_graphviz_new.hpp): | |
811 | ||
812 | // Subgraphs don't nest (all a0 subgraphs are the same thing), but a node or | |
813 | // subgraph can have multiple parents (sources online say that the layout | |
814 | // algorithms can't handle non-tree structures of clusters, but it seems to | |
815 | // read them the same from the file). The containment relation is required to | |
816 | // be a DAG, though; it appears that a node or subgraph can't be moved into an | |
817 | // ancestor of a subgraph where it already was (we don't enforce that but do a | |
818 | // DFS when finding members to prevent cycles). Nodes get their properties by | |
819 | // when they are first mentioned, and can only have them overridden after that | |
820 | // by explicit properties on that particular node. Closing and reopening the | |
821 | // same subgraph name adds to its members, and graph properties and node/edge | |
822 | // defaults are preserved in that subgraph. The members of a subgraph used in | |
823 | // an edge are gathered when the edge is read, even if new members are added to | |
824 | // the subgraph later. Ports are allowed in a lot more places in the grammar | |
825 | // than Dot uses. For example, declaring a node seems to ignore ports, and I | |
826 | // don't think it's possible to set properties on a particular port. Adding an | |
827 | // edge between two ports on the same node seems to make Dot unhappy (crashed | |
828 | // for me). | |
829 | ||
830 | // Test graph for GraphViz behavior on strange combinations of subgraphs and | |
831 | // such. I don't have anywhere else to put this file. | |
832 | ||
833 | #if 0 | |
834 | dIGRaph foo { | |
835 | node [color=blue] | |
836 | subgraph a -> b | |
837 | subgraph a {c} | |
838 | subgraph a -> d | |
839 | subgraph a {node [color=red]; e} | |
840 | subgraph a -> f | |
841 | subgraph a {g} -> h | |
842 | subgraph a {node [color=green]; i} -> j | |
843 | subgraph a {node [color=yellow]} | |
844 | ||
845 | subgraph a0 {node [color=black]; subgraph a1 {node [color=white]}} | |
846 | node [color=pink] zz | |
847 | subgraph a0 {x1} | |
848 | subgraph a0 {subgraph a1 {x2}} | |
849 | ||
850 | subgraph a0 -> x3 | |
851 | subgraph a0 {subgraph a1 -> x3} | |
852 | x3 | |
853 | subgraph a0 {subgraph a0 {node [color=xxx]; x2} x7} | |
854 | x2 [color=yyy] | |
855 | subgraph cluster_ax {foo; subgraph a0} | |
856 | subgraph a0 {foo2} | |
857 | subgraph cluster_ax {foo3} | |
858 | // subgraph a0 -> subgraph a0 | |
859 | ||
860 | bgcolor=yellow | |
861 | subgraph cluster_a2 {y1} | |
862 | // y1:n -> y1:(s,3)@se | |
863 | y1@se [color=green] | |
864 | y1@n [color=red] | |
865 | } | |
866 | #endif |