]> git.proxmox.com Git - ceph.git/blob - ceph/src/boost/tools/quickbook/src/files.cpp
add subtree-ish sources for 12.0.3
[ceph.git] / ceph / src / boost / tools / quickbook / src / files.cpp
1 /*=============================================================================
2 Copyright (c) 2002 2004 2006 Joel de Guzman
3 Copyright (c) 2004 Eric Niebler
4 http://spirit.sourceforge.net/
5
6 Use, modification and distribution is subject to the Boost Software
7 License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
8 http://www.boost.org/LICENSE_1_0.txt)
9 =============================================================================*/
10 #include "files.hpp"
11 #include <boost/filesystem/fstream.hpp>
12 #include <boost/unordered_map.hpp>
13 #include <boost/range/algorithm/upper_bound.hpp>
14 #include <boost/range/algorithm/transform.hpp>
15 #include <boost/foreach.hpp>
16 #include <fstream>
17 #include <iterator>
18 #include <vector>
19
20 namespace quickbook
21 {
22 namespace
23 {
24 boost::unordered_map<fs::path, file_ptr> files;
25 }
26
27 // Read the first few bytes in a file to see it starts with a byte order
28 // mark. If it doesn't, then write the characters we've already read in.
29 // Although, given how UTF-8 works, if we've read anything in, the files
30 // probably broken.
31
32 template <typename InputIterator, typename OutputIterator>
33 bool check_bom(InputIterator& begin, InputIterator end,
34 OutputIterator out, char const* chars, int length)
35 {
36 char const* ptr = chars;
37
38 while(begin != end && *begin == *ptr) {
39 ++begin;
40 ++ptr;
41 --length;
42 if(length == 0) return true;
43 }
44
45 // Failed to match, so write the skipped characters to storage:
46 while(chars != ptr) *out++ = *chars++;
47
48 return false;
49 }
50
51 template <typename InputIterator, typename OutputIterator>
52 std::string read_bom(InputIterator& begin, InputIterator end,
53 OutputIterator out)
54 {
55 if(begin == end) return "";
56
57 const char* utf8 = "\xef\xbb\xbf" ;
58 const char* utf32be = "\0\0\xfe\xff";
59 const char* utf32le = "\xff\xfe\0\0";
60
61 unsigned char c = *begin;
62 switch(c)
63 {
64 case 0xEF: { // UTF-8
65 return check_bom(begin, end, out, utf8, 3) ? "UTF-8" : "";
66 }
67 case 0xFF: // UTF-16/UTF-32 little endian
68 return !check_bom(begin, end, out, utf32le, 2) ? "" :
69 check_bom(begin, end, out, utf32le + 2, 2) ? "UTF-32" : "UTF-16";
70 case 0: // UTF-32 big endian
71 return check_bom(begin, end, out, utf32be, 4) ? "UTF-32" : "";
72 case 0xFE: // UTF-16 big endian
73 return check_bom(begin, end, out, utf32be + 2, 2) ? "UTF-16" : "";
74 default:
75 return "";
76 }
77 }
78
79 // Copy a string, converting mac and windows style newlines to unix
80 // newlines.
81
82 template <typename InputIterator, typename OutputIterator>
83 void normalize(InputIterator begin, InputIterator end,
84 OutputIterator out)
85 {
86 std::string encoding = read_bom(begin, end, out);
87
88 if(encoding != "UTF-8" && encoding != "")
89 throw load_error(encoding +
90 " is not supported. Please use UTF-8.");
91
92 while(begin != end) {
93 if(*begin == '\r') {
94 *out++ = '\n';
95 ++begin;
96 if(begin != end && *begin == '\n') ++begin;
97 }
98 else {
99 *out++ = *begin++;
100 }
101 }
102 }
103
104 file_ptr load(fs::path const& filename, unsigned qbk_version)
105 {
106 boost::unordered_map<fs::path, file_ptr>::iterator pos
107 = files.find(filename);
108
109 if (pos == files.end())
110 {
111 fs::ifstream in(filename, std::ios_base::in);
112
113 if (!in)
114 throw load_error("Could not open input file.");
115
116 // Turn off white space skipping on the stream
117 in.unsetf(std::ios::skipws);
118
119 std::string source;
120 normalize(
121 std::istream_iterator<char>(in),
122 std::istream_iterator<char>(),
123 std::back_inserter(source));
124
125 if (in.bad())
126 throw load_error("Error reading input file.");
127
128 bool inserted;
129
130 boost::tie(pos, inserted) = files.emplace(
131 filename, new file(filename, source, qbk_version));
132
133 assert(inserted);
134 }
135
136 return pos->second;
137 }
138
139 std::ostream& operator<<(std::ostream& out, file_position const& x)
140 {
141 return out << "line: " << x.line << ", column: " << x.column;
142 }
143
144 file_position relative_position(
145 boost::string_ref::const_iterator begin,
146 boost::string_ref::const_iterator iterator)
147 {
148 file_position pos;
149 boost::string_ref::const_iterator line_begin = begin;
150
151 while (begin != iterator)
152 {
153 if (*begin == '\r')
154 {
155 ++begin;
156 ++pos.line;
157 line_begin = begin;
158 }
159 else if (*begin == '\n')
160 {
161 ++begin;
162 ++pos.line;
163 line_begin = begin;
164 if (begin == iterator) break;
165 if (*begin == '\r')
166 {
167 ++begin;
168 line_begin = begin;
169 }
170 }
171 else
172 {
173 ++begin;
174 }
175 }
176
177 pos.column = iterator - line_begin + 1;
178 return pos;
179 }
180
181 file_position file::position_of(boost::string_ref::const_iterator iterator) const
182 {
183 return relative_position(source().begin(), iterator);
184 }
185
186 // Mapped files.
187
188 struct mapped_file_section
189 {
190 enum section_types {
191 normal,
192 empty,
193 indented
194 };
195
196 std::string::size_type original_pos;
197 std::string::size_type our_pos;
198 section_types section_type;
199
200 mapped_file_section(
201 std::string::size_type original_pos,
202 std::string::size_type our_pos,
203 section_types section_type = normal) :
204 original_pos(original_pos), our_pos(our_pos),
205 section_type(section_type) {}
206 };
207
208 struct mapped_section_original_cmp
209 {
210 bool operator()(mapped_file_section const& x,
211 mapped_file_section const& y)
212 {
213 return x.original_pos < y.original_pos;
214 }
215
216 bool operator()(mapped_file_section const& x,
217 std::string::size_type const& y)
218 {
219 return x.original_pos < y;
220 }
221
222 bool operator()(std::string::size_type const& x,
223 mapped_file_section const& y)
224 {
225 return x < y.original_pos;
226 }
227 };
228
229 struct mapped_section_pos_cmp
230 {
231 bool operator()(mapped_file_section const& x,
232 mapped_file_section const& y)
233 {
234 return x.our_pos < y.our_pos;
235 }
236
237 bool operator()(mapped_file_section const& x,
238 std::string::size_type const& y)
239 {
240 return x.our_pos < y;
241 }
242
243 bool operator()(std::string::size_type const& x,
244 mapped_file_section const& y)
245 {
246 return x < y.our_pos;
247 }
248 };
249
250 struct mapped_file : file
251 {
252 mapped_file(file_ptr original) :
253 file(*original, std::string()),
254 original(original), mapped_sections()
255 {}
256
257 file_ptr original;
258 std::vector<mapped_file_section> mapped_sections;
259
260 void add_empty_mapped_file_section(boost::string_ref::const_iterator pos) {
261 std::string::size_type original_pos =
262 pos - original->source().begin();
263
264 if (mapped_sections.empty() ||
265 mapped_sections.back().section_type !=
266 mapped_file_section::empty ||
267 mapped_sections.back().original_pos != original_pos)
268 {
269 mapped_sections.push_back(mapped_file_section(
270 original_pos, source().size(),
271 mapped_file_section::empty));
272 }
273 }
274
275 void add_mapped_file_section(boost::string_ref::const_iterator pos) {
276 mapped_sections.push_back(mapped_file_section(
277 pos - original->source().begin(), source().size()));
278 }
279
280 void add_indented_mapped_file_section(boost::string_ref::const_iterator pos)
281 {
282 mapped_sections.push_back(mapped_file_section(
283 pos - original->source().begin(), source().size(),
284 mapped_file_section::indented));
285 }
286
287 std::string::size_type to_original_pos(
288 std::vector<mapped_file_section>::const_iterator section,
289 std::string::size_type pos) const
290 {
291 switch (section->section_type) {
292 case mapped_file_section::normal:
293 return pos - section->our_pos + section->original_pos;
294
295 case mapped_file_section::empty:
296 return section->original_pos;
297
298 case mapped_file_section::indented: {
299 // Will contain the start of the current line.
300 boost::string_ref::size_type our_line = section->our_pos;
301
302 // Will contain the number of lines in the block before
303 // the current line.
304 unsigned newline_count = 0;
305
306 for(boost::string_ref::size_type i = section->our_pos;
307 i != pos; ++i)
308 {
309 if (source()[i] == '\n') {
310 our_line = i + 1;
311 ++newline_count;
312 }
313 }
314
315 // The start of the line in the original source.
316 boost::string_ref::size_type original_line =
317 section->original_pos;
318
319 while(newline_count > 0) {
320 if (original->source()[original_line] == '\n')
321 --newline_count;
322 ++original_line;
323 }
324
325 // The start of line content (i.e. after indentation).
326 our_line = skip_indentation(source(), our_line);
327
328 // The position is in the middle of indentation, so
329 // just return the start of the whitespace, which should
330 // be good enough.
331 if (our_line > pos) return original_line;
332
333 original_line =
334 skip_indentation(original->source(), original_line);
335
336 // Confirm that we are actually in the same position.
337 assert(original->source()[original_line] ==
338 source()[our_line]);
339
340 // Calculate the position
341 return original_line + (pos - our_line);
342 }
343 default:
344 assert(false);
345 return section->original_pos;
346 }
347 }
348
349 std::vector<mapped_file_section>::const_iterator find_section(
350 boost::string_ref::const_iterator pos) const
351 {
352 std::vector<mapped_file_section>::const_iterator section =
353 boost::upper_bound(mapped_sections,
354 std::string::size_type(pos - source().begin()),
355 mapped_section_pos_cmp());
356 assert(section != mapped_sections.begin());
357 --section;
358
359 return section;
360 }
361
362 virtual file_position position_of(boost::string_ref::const_iterator) const;
363
364 private:
365
366 static std::string::size_type skip_indentation(
367 boost::string_ref src, std::string::size_type i)
368 {
369 while (i != src.size() && (src[i] == ' ' || src[i] == '\t')) ++i;
370 return i;
371 }
372
373 };
374
375 namespace {
376 std::list<mapped_file> mapped_files;
377 }
378
379 struct mapped_file_builder_data
380 {
381 mapped_file_builder_data() { reset(); }
382 void reset() { new_file.reset(); }
383
384 boost::intrusive_ptr<mapped_file> new_file;
385 };
386
387 mapped_file_builder::mapped_file_builder() : data(0) {}
388 mapped_file_builder::~mapped_file_builder() { delete data; }
389
390 void mapped_file_builder::start(file_ptr f)
391 {
392 if (!data) {
393 data = new mapped_file_builder_data;
394 }
395
396 assert(!data->new_file);
397 data->new_file = new mapped_file(f);
398 }
399
400 file_ptr mapped_file_builder::release()
401 {
402 file_ptr r = data->new_file;
403 data->reset();
404 return r;
405 }
406
407 void mapped_file_builder::clear()
408 {
409 data->reset();
410 }
411
412 bool mapped_file_builder::empty() const
413 {
414 return data->new_file->source().empty();
415 }
416
417 mapped_file_builder::pos mapped_file_builder::get_pos() const
418 {
419 return data->new_file->source().size();
420 }
421
422 void mapped_file_builder::add_at_pos(boost::string_ref x, iterator pos)
423 {
424 data->new_file->add_empty_mapped_file_section(pos);
425 data->new_file->source_.append(x.begin(), x.end());
426 }
427
428 void mapped_file_builder::add(boost::string_ref x)
429 {
430 data->new_file->add_mapped_file_section(x.begin());
431 data->new_file->source_.append(x.begin(), x.end());
432 }
433
434 void mapped_file_builder::add(mapped_file_builder const& x)
435 {
436 add(x, 0, x.data->new_file->source_.size());
437 }
438
439 void mapped_file_builder::add(mapped_file_builder const& x,
440 pos begin, pos end)
441 {
442 assert(data->new_file->original == x.data->new_file->original);
443 assert(begin <= x.data->new_file->source_.size());
444 assert(end <= x.data->new_file->source_.size());
445
446 if (begin != end)
447 {
448 std::vector<mapped_file_section>::const_iterator start =
449 x.data->new_file->find_section(
450 x.data->new_file->source().begin() + begin);
451
452 std::string::size_type size = data->new_file->source_.size();
453
454 data->new_file->mapped_sections.push_back(mapped_file_section(
455 x.data->new_file->to_original_pos(start, begin),
456 size, start->section_type));
457
458 for (++start; start != x.data->new_file->mapped_sections.end() &&
459 start->our_pos < end; ++start)
460 {
461 data->new_file->mapped_sections.push_back(mapped_file_section(
462 start->original_pos, start->our_pos - begin + size,
463 start->section_type));
464 }
465
466 data->new_file->source_.append(
467 x.data->new_file->source_.begin() + begin,
468 x.data->new_file->source_.begin() + end);
469 }
470 }
471
472 boost::string_ref::size_type indentation_count(boost::string_ref x)
473 {
474 unsigned count = 0;
475
476 for(boost::string_ref::const_iterator begin = x.begin(), end = x.end();
477 begin != end; ++begin)
478 {
479 switch(*begin)
480 {
481 case ' ':
482 ++count;
483 break;
484 case '\t':
485 // hardcoded tab to 4 for now
486 count = count - (count % 4) + 4;
487 break;
488 default:
489 assert(false);
490 }
491 }
492
493 return count;
494 }
495
496 void mapped_file_builder::unindent_and_add(boost::string_ref x)
497 {
498 // I wanted to do everything using a string_ref, but unfortunately
499 // they don't have all the overloads used in here. So...
500 std::string const program(x.begin(), x.end());
501
502 // Erase leading blank lines and newlines:
503 std::string::size_type start = program.find_first_not_of(" \t\r\n");
504 if (start == std::string::npos) return;
505
506 start = program.find_last_of("\r\n", start);
507 start = start == std::string::npos ? 0 : start + 1;
508
509 assert(start < program.size());
510
511 // Get the first line indentation
512 std::string::size_type indent = program.find_first_not_of(" \t", start) - start;
513 boost::string_ref::size_type full_indent = indentation_count(
514 boost::string_ref(&program[start], indent));
515
516 std::string::size_type pos = start;
517
518 // Calculate the minimum indent from the rest of the lines
519 // Detecting a mix of spaces and tabs.
520 while (std::string::npos != (pos = program.find_first_of("\r\n", pos)))
521 {
522 pos = program.find_first_not_of("\r\n", pos);
523 if (std::string::npos == pos) break;
524
525 std::string::size_type n = program.find_first_not_of(" \t", pos);
526 if (n == std::string::npos) break;
527
528 char ch = program[n];
529 if (ch == '\r' || ch == '\n') continue; // ignore empty lines
530
531 indent = (std::min)(indent, n-pos);
532 full_indent = (std::min)(full_indent, indentation_count(
533 boost::string_ref(&program[pos], n-pos)));
534 }
535
536 // Detect if indentation is mixed.
537 bool mixed_indentation = false;
538 boost::string_ref first_indent(&program[start], indent);
539 pos = start;
540
541 while (std::string::npos != (pos = program.find_first_of("\r\n", pos)))
542 {
543 pos = program.find_first_not_of("\r\n", pos);
544 if (std::string::npos == pos) break;
545
546 std::string::size_type n = program.find_first_not_of(" \t", pos);
547 if (n == std::string::npos || n-pos < indent) continue;
548
549 if (boost::string_ref(&program[pos], indent) != first_indent) {
550 mixed_indentation = true;
551 break;
552 }
553 }
554
555 // Trim white spaces from column 0..indent
556 std::string unindented_program;
557 std::string::size_type copy_start = start;
558 pos = start;
559
560 do {
561 if (std::string::npos == (pos = program.find_first_not_of("\r\n", pos)))
562 break;
563
564 unindented_program.append(program.begin() + copy_start, program.begin() + pos);
565 copy_start = pos;
566
567 // Find the end of the indentation.
568 std::string::size_type next = program.find_first_not_of(" \t", pos);
569 if (next == std::string::npos) next = program.size();
570
571 if (mixed_indentation)
572 {
573 unsigned length = indentation_count(boost::string_ref(
574 &program[pos], next - pos));
575
576 if (length > full_indent) {
577 std::string new_indentation(length - full_indent, ' ');
578 unindented_program.append(new_indentation);
579 }
580
581 copy_start = next;
582 }
583 else
584 {
585 copy_start = (std::min)(pos + indent, next);
586 }
587
588 pos = next;
589 } while (std::string::npos !=
590 (pos = program.find_first_of("\r\n", pos)));
591
592 unindented_program.append(program.begin() + copy_start, program.end());
593
594 data->new_file->add_indented_mapped_file_section(x.begin());
595 data->new_file->source_.append(unindented_program);
596 }
597
598 file_position mapped_file::position_of(boost::string_ref::const_iterator pos) const
599 {
600 return original->position_of(original->source().begin() +
601 to_original_pos(find_section(pos), pos - source().begin()));
602 }
603 }