1 /*=============================================================================
2 Copyright (c) 2002 2004 2006 Joel de Guzman
3 Copyright (c) 2004 Eric Niebler
4 http://spirit.sourceforge.net/
6 Use, modification and distribution is subject to the Boost Software
7 License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
8 http://www.boost.org/LICENSE_1_0.txt)
9 =============================================================================*/
11 #include <boost/filesystem/fstream.hpp>
12 #include <boost/unordered_map.hpp>
13 #include <boost/range/algorithm/upper_bound.hpp>
14 #include <boost/range/algorithm/transform.hpp>
15 #include <boost/foreach.hpp>
24 boost::unordered_map
<fs::path
, file_ptr
> files
;
27 // Read the first few bytes in a file to see it starts with a byte order
28 // mark. If it doesn't, then write the characters we've already read in.
29 // Although, given how UTF-8 works, if we've read anything in, the files
32 template <typename InputIterator
, typename OutputIterator
>
33 bool check_bom(InputIterator
& begin
, InputIterator end
,
34 OutputIterator out
, char const* chars
, int length
)
36 char const* ptr
= chars
;
38 while(begin
!= end
&& *begin
== *ptr
) {
42 if(length
== 0) return true;
45 // Failed to match, so write the skipped characters to storage:
46 while(chars
!= ptr
) *out
++ = *chars
++;
51 template <typename InputIterator
, typename OutputIterator
>
52 std::string
read_bom(InputIterator
& begin
, InputIterator end
,
55 if(begin
== end
) return "";
57 const char* utf8
= "\xef\xbb\xbf" ;
58 const char* utf32be
= "\0\0\xfe\xff";
59 const char* utf32le
= "\xff\xfe\0\0";
61 unsigned char c
= *begin
;
65 return check_bom(begin
, end
, out
, utf8
, 3) ? "UTF-8" : "";
67 case 0xFF: // UTF-16/UTF-32 little endian
68 return !check_bom(begin
, end
, out
, utf32le
, 2) ? "" :
69 check_bom(begin
, end
, out
, utf32le
+ 2, 2) ? "UTF-32" : "UTF-16";
70 case 0: // UTF-32 big endian
71 return check_bom(begin
, end
, out
, utf32be
, 4) ? "UTF-32" : "";
72 case 0xFE: // UTF-16 big endian
73 return check_bom(begin
, end
, out
, utf32be
+ 2, 2) ? "UTF-16" : "";
79 // Copy a string, converting mac and windows style newlines to unix
82 template <typename InputIterator
, typename OutputIterator
>
83 void normalize(InputIterator begin
, InputIterator end
,
86 std::string encoding
= read_bom(begin
, end
, out
);
88 if(encoding
!= "UTF-8" && encoding
!= "")
89 throw load_error(encoding
+
90 " is not supported. Please use UTF-8.");
96 if(begin
!= end
&& *begin
== '\n') ++begin
;
104 file_ptr
load(fs::path
const& filename
, unsigned qbk_version
)
106 boost::unordered_map
<fs::path
, file_ptr
>::iterator pos
107 = files
.find(filename
);
109 if (pos
== files
.end())
111 fs::ifstream
in(filename
, std::ios_base::in
);
114 throw load_error("Could not open input file.");
116 // Turn off white space skipping on the stream
117 in
.unsetf(std::ios::skipws
);
121 std::istream_iterator
<char>(in
),
122 std::istream_iterator
<char>(),
123 std::back_inserter(source
));
126 throw load_error("Error reading input file.");
130 boost::tie(pos
, inserted
) = files
.emplace(
131 filename
, new file(filename
, source
, qbk_version
));
139 std::ostream
& operator<<(std::ostream
& out
, file_position
const& x
)
141 return out
<< "line: " << x
.line
<< ", column: " << x
.column
;
144 file_position
relative_position(
145 boost::string_ref::const_iterator begin
,
146 boost::string_ref::const_iterator iterator
)
149 boost::string_ref::const_iterator line_begin
= begin
;
151 while (begin
!= iterator
)
159 else if (*begin
== '\n')
164 if (begin
== iterator
) break;
177 pos
.column
= iterator
- line_begin
+ 1;
181 file_position
file::position_of(boost::string_ref::const_iterator iterator
) const
183 return relative_position(source().begin(), iterator
);
188 struct mapped_file_section
196 std::string::size_type original_pos
;
197 std::string::size_type our_pos
;
198 section_types section_type
;
201 std::string::size_type original_pos
,
202 std::string::size_type our_pos
,
203 section_types section_type
= normal
) :
204 original_pos(original_pos
), our_pos(our_pos
),
205 section_type(section_type
) {}
208 struct mapped_section_original_cmp
210 bool operator()(mapped_file_section
const& x
,
211 mapped_file_section
const& y
)
213 return x
.original_pos
< y
.original_pos
;
216 bool operator()(mapped_file_section
const& x
,
217 std::string::size_type
const& y
)
219 return x
.original_pos
< y
;
222 bool operator()(std::string::size_type
const& x
,
223 mapped_file_section
const& y
)
225 return x
< y
.original_pos
;
229 struct mapped_section_pos_cmp
231 bool operator()(mapped_file_section
const& x
,
232 mapped_file_section
const& y
)
234 return x
.our_pos
< y
.our_pos
;
237 bool operator()(mapped_file_section
const& x
,
238 std::string::size_type
const& y
)
240 return x
.our_pos
< y
;
243 bool operator()(std::string::size_type
const& x
,
244 mapped_file_section
const& y
)
246 return x
< y
.our_pos
;
250 struct mapped_file
: file
252 mapped_file(file_ptr original
) :
253 file(*original
, std::string()),
254 original(original
), mapped_sections()
258 std::vector
<mapped_file_section
> mapped_sections
;
260 void add_empty_mapped_file_section(boost::string_ref::const_iterator pos
) {
261 std::string::size_type original_pos
=
262 pos
- original
->source().begin();
264 if (mapped_sections
.empty() ||
265 mapped_sections
.back().section_type
!=
266 mapped_file_section::empty
||
267 mapped_sections
.back().original_pos
!= original_pos
)
269 mapped_sections
.push_back(mapped_file_section(
270 original_pos
, source().size(),
271 mapped_file_section::empty
));
275 void add_mapped_file_section(boost::string_ref::const_iterator pos
) {
276 mapped_sections
.push_back(mapped_file_section(
277 pos
- original
->source().begin(), source().size()));
280 void add_indented_mapped_file_section(boost::string_ref::const_iterator pos
)
282 mapped_sections
.push_back(mapped_file_section(
283 pos
- original
->source().begin(), source().size(),
284 mapped_file_section::indented
));
287 std::string::size_type
to_original_pos(
288 std::vector
<mapped_file_section
>::const_iterator section
,
289 std::string::size_type pos
) const
291 switch (section
->section_type
) {
292 case mapped_file_section::normal
:
293 return pos
- section
->our_pos
+ section
->original_pos
;
295 case mapped_file_section::empty
:
296 return section
->original_pos
;
298 case mapped_file_section::indented
: {
299 // Will contain the start of the current line.
300 boost::string_ref::size_type our_line
= section
->our_pos
;
302 // Will contain the number of lines in the block before
304 unsigned newline_count
= 0;
306 for(boost::string_ref::size_type i
= section
->our_pos
;
309 if (source()[i
] == '\n') {
315 // The start of the line in the original source.
316 boost::string_ref::size_type original_line
=
317 section
->original_pos
;
319 while(newline_count
> 0) {
320 if (original
->source()[original_line
] == '\n')
325 // The start of line content (i.e. after indentation).
326 our_line
= skip_indentation(source(), our_line
);
328 // The position is in the middle of indentation, so
329 // just return the start of the whitespace, which should
331 if (our_line
> pos
) return original_line
;
334 skip_indentation(original
->source(), original_line
);
336 // Confirm that we are actually in the same position.
337 assert(original
->source()[original_line
] ==
340 // Calculate the position
341 return original_line
+ (pos
- our_line
);
345 return section
->original_pos
;
349 std::vector
<mapped_file_section
>::const_iterator
find_section(
350 boost::string_ref::const_iterator pos
) const
352 std::vector
<mapped_file_section
>::const_iterator section
=
353 boost::upper_bound(mapped_sections
,
354 std::string::size_type(pos
- source().begin()),
355 mapped_section_pos_cmp());
356 assert(section
!= mapped_sections
.begin());
362 virtual file_position
position_of(boost::string_ref::const_iterator
) const;
366 static std::string::size_type
skip_indentation(
367 boost::string_ref src
, std::string::size_type i
)
369 while (i
!= src
.size() && (src
[i
] == ' ' || src
[i
] == '\t')) ++i
;
376 std::list
<mapped_file
> mapped_files
;
379 struct mapped_file_builder_data
381 mapped_file_builder_data() { reset(); }
382 void reset() { new_file
.reset(); }
384 boost::intrusive_ptr
<mapped_file
> new_file
;
387 mapped_file_builder::mapped_file_builder() : data(0) {}
388 mapped_file_builder::~mapped_file_builder() { delete data
; }
390 void mapped_file_builder::start(file_ptr f
)
393 data
= new mapped_file_builder_data
;
396 assert(!data
->new_file
);
397 data
->new_file
= new mapped_file(f
);
400 file_ptr
mapped_file_builder::release()
402 file_ptr r
= data
->new_file
;
407 void mapped_file_builder::clear()
412 bool mapped_file_builder::empty() const
414 return data
->new_file
->source().empty();
417 mapped_file_builder::pos
mapped_file_builder::get_pos() const
419 return data
->new_file
->source().size();
422 void mapped_file_builder::add_at_pos(boost::string_ref x
, iterator pos
)
424 data
->new_file
->add_empty_mapped_file_section(pos
);
425 data
->new_file
->source_
.append(x
.begin(), x
.end());
428 void mapped_file_builder::add(boost::string_ref x
)
430 data
->new_file
->add_mapped_file_section(x
.begin());
431 data
->new_file
->source_
.append(x
.begin(), x
.end());
434 void mapped_file_builder::add(mapped_file_builder
const& x
)
436 add(x
, 0, x
.data
->new_file
->source_
.size());
439 void mapped_file_builder::add(mapped_file_builder
const& x
,
442 assert(data
->new_file
->original
== x
.data
->new_file
->original
);
443 assert(begin
<= x
.data
->new_file
->source_
.size());
444 assert(end
<= x
.data
->new_file
->source_
.size());
448 std::vector
<mapped_file_section
>::const_iterator start
=
449 x
.data
->new_file
->find_section(
450 x
.data
->new_file
->source().begin() + begin
);
452 std::string::size_type size
= data
->new_file
->source_
.size();
454 data
->new_file
->mapped_sections
.push_back(mapped_file_section(
455 x
.data
->new_file
->to_original_pos(start
, begin
),
456 size
, start
->section_type
));
458 for (++start
; start
!= x
.data
->new_file
->mapped_sections
.end() &&
459 start
->our_pos
< end
; ++start
)
461 data
->new_file
->mapped_sections
.push_back(mapped_file_section(
462 start
->original_pos
, start
->our_pos
- begin
+ size
,
463 start
->section_type
));
466 data
->new_file
->source_
.append(
467 x
.data
->new_file
->source_
.begin() + begin
,
468 x
.data
->new_file
->source_
.begin() + end
);
472 boost::string_ref::size_type
indentation_count(boost::string_ref x
)
476 for(boost::string_ref::const_iterator begin
= x
.begin(), end
= x
.end();
477 begin
!= end
; ++begin
)
485 // hardcoded tab to 4 for now
486 count
= count
- (count
% 4) + 4;
496 void mapped_file_builder::unindent_and_add(boost::string_ref x
)
498 // I wanted to do everything using a string_ref, but unfortunately
499 // they don't have all the overloads used in here. So...
500 std::string
const program(x
.begin(), x
.end());
502 // Erase leading blank lines and newlines:
503 std::string::size_type start
= program
.find_first_not_of(" \t\r\n");
504 if (start
== std::string::npos
) return;
506 start
= program
.find_last_of("\r\n", start
);
507 start
= start
== std::string::npos
? 0 : start
+ 1;
509 assert(start
< program
.size());
511 // Get the first line indentation
512 std::string::size_type indent
= program
.find_first_not_of(" \t", start
) - start
;
513 boost::string_ref::size_type full_indent
= indentation_count(
514 boost::string_ref(&program
[start
], indent
));
516 std::string::size_type pos
= start
;
518 // Calculate the minimum indent from the rest of the lines
519 // Detecting a mix of spaces and tabs.
520 while (std::string::npos
!= (pos
= program
.find_first_of("\r\n", pos
)))
522 pos
= program
.find_first_not_of("\r\n", pos
);
523 if (std::string::npos
== pos
) break;
525 std::string::size_type n
= program
.find_first_not_of(" \t", pos
);
526 if (n
== std::string::npos
) break;
528 char ch
= program
[n
];
529 if (ch
== '\r' || ch
== '\n') continue; // ignore empty lines
531 indent
= (std::min
)(indent
, n
-pos
);
532 full_indent
= (std::min
)(full_indent
, indentation_count(
533 boost::string_ref(&program
[pos
], n
-pos
)));
536 // Detect if indentation is mixed.
537 bool mixed_indentation
= false;
538 boost::string_ref
first_indent(&program
[start
], indent
);
541 while (std::string::npos
!= (pos
= program
.find_first_of("\r\n", pos
)))
543 pos
= program
.find_first_not_of("\r\n", pos
);
544 if (std::string::npos
== pos
) break;
546 std::string::size_type n
= program
.find_first_not_of(" \t", pos
);
547 if (n
== std::string::npos
|| n
-pos
< indent
) continue;
549 if (boost::string_ref(&program
[pos
], indent
) != first_indent
) {
550 mixed_indentation
= true;
555 // Trim white spaces from column 0..indent
556 std::string unindented_program
;
557 std::string::size_type copy_start
= start
;
561 if (std::string::npos
== (pos
= program
.find_first_not_of("\r\n", pos
)))
564 unindented_program
.append(program
.begin() + copy_start
, program
.begin() + pos
);
567 // Find the end of the indentation.
568 std::string::size_type next
= program
.find_first_not_of(" \t", pos
);
569 if (next
== std::string::npos
) next
= program
.size();
571 if (mixed_indentation
)
573 unsigned length
= indentation_count(boost::string_ref(
574 &program
[pos
], next
- pos
));
576 if (length
> full_indent
) {
577 std::string
new_indentation(length
- full_indent
, ' ');
578 unindented_program
.append(new_indentation
);
585 copy_start
= (std::min
)(pos
+ indent
, next
);
589 } while (std::string::npos
!=
590 (pos
= program
.find_first_of("\r\n", pos
)));
592 unindented_program
.append(program
.begin() + copy_start
, program
.end());
594 data
->new_file
->add_indented_mapped_file_section(x
.begin());
595 data
->new_file
->source_
.append(unindented_program
);
598 file_position
mapped_file::position_of(boost::string_ref::const_iterator pos
) const
600 return original
->position_of(original
->source().begin() +
601 to_original_pos(find_section(pos
), pos
- source().begin()));