]> git.proxmox.com Git - ceph.git/blob - ceph/src/boost/libs/locale/include/boost/locale/boundary/index.hpp
add subtree-ish sources for 12.0.3
[ceph.git] / ceph / src / boost / libs / locale / include / boost / locale / boundary / index.hpp
1 //
2 // Copyright (c) 2009-2011 Artyom Beilis (Tonkikh)
3 //
4 // Distributed under the Boost Software License, Version 1.0. (See
5 // accompanying file LICENSE_1_0.txt or copy at
6 // http://www.boost.org/LICENSE_1_0.txt)
7 //
8 #ifndef BOOST_LOCALE_BOUNDARY_INDEX_HPP_INCLUDED
9 #define BOOST_LOCALE_BOUNDARY_INDEX_HPP_INCLUDED
10
11 #include <boost/locale/config.hpp>
12 #include <boost/locale/boundary/types.hpp>
13 #include <boost/locale/boundary/facets.hpp>
14 #include <boost/locale/boundary/segment.hpp>
15 #include <boost/locale/boundary/boundary_point.hpp>
16 #include <boost/iterator/iterator_facade.hpp>
17 #include <boost/type_traits/is_same.hpp>
18 #include <boost/shared_ptr.hpp>
19 #include <boost/cstdint.hpp>
20 #include <boost/assert.hpp>
21 #ifdef BOOST_MSVC
22 # pragma warning(push)
23 # pragma warning(disable : 4275 4251 4231 4660)
24 #endif
25 #include <string>
26 #include <locale>
27 #include <vector>
28 #include <iterator>
29 #include <algorithm>
30 #include <stdexcept>
31
32 #include <iostream>
33
34 namespace boost {
35
36 namespace locale {
37
38 namespace boundary {
39 ///
40 /// \defgroup boundary Boundary Analysis
41 ///
42 /// This module contains all operations required for %boundary analysis of text: character, word, like and sentence boundaries
43 ///
44 /// @{
45 ///
46
47 /// \cond INTERNAL
48
49 namespace details {
50
51 template<typename IteratorType,typename CategoryType = typename std::iterator_traits<IteratorType>::iterator_category>
52 struct mapping_traits {
53 typedef typename std::iterator_traits<IteratorType>::value_type char_type;
54 static index_type map(boundary_type t,IteratorType b,IteratorType e,std::locale const &l)
55 {
56 std::basic_string<char_type> str(b,e);
57 return std::use_facet<boundary_indexing<char_type> >(l).map(t,str.c_str(),str.c_str()+str.size());
58 }
59 };
60
61 template<typename CharType,typename SomeIteratorType>
62 struct linear_iterator_traits {
63 static const bool is_linear =
64 is_same<SomeIteratorType,CharType*>::value
65 || is_same<SomeIteratorType,CharType const*>::value
66 || is_same<SomeIteratorType,typename std::basic_string<CharType>::iterator>::value
67 || is_same<SomeIteratorType,typename std::basic_string<CharType>::const_iterator>::value
68 || is_same<SomeIteratorType,typename std::vector<CharType>::iterator>::value
69 || is_same<SomeIteratorType,typename std::vector<CharType>::const_iterator>::value
70 ;
71 };
72
73
74
75 template<typename IteratorType>
76 struct mapping_traits<IteratorType,std::random_access_iterator_tag> {
77
78 typedef typename std::iterator_traits<IteratorType>::value_type char_type;
79
80
81
82 static index_type map(boundary_type t,IteratorType b,IteratorType e,std::locale const &l)
83 {
84 index_type result;
85
86 //
87 // Optimize for most common cases
88 //
89 // C++0x requires that string is continious in memory and all known
90 // string implementations
91 // do this because of c_str() support.
92 //
93
94 if(linear_iterator_traits<char_type,IteratorType>::is_linear && b!=e)
95 {
96 char_type const *begin = &*b;
97 char_type const *end = begin + (e-b);
98 index_type tmp=std::use_facet<boundary_indexing<char_type> >(l).map(t,begin,end);
99 result.swap(tmp);
100 }
101 else {
102 std::basic_string<char_type> str(b,e);
103 index_type tmp = std::use_facet<boundary_indexing<char_type> >(l).map(t,str.c_str(),str.c_str()+str.size());
104 result.swap(tmp);
105 }
106 return result;
107 }
108 };
109
110 template<typename BaseIterator>
111 class mapping {
112 public:
113 typedef BaseIterator base_iterator;
114 typedef typename std::iterator_traits<base_iterator>::value_type char_type;
115
116
117 mapping(boundary_type type,
118 base_iterator begin,
119 base_iterator end,
120 std::locale const &loc)
121 :
122 index_(new index_type()),
123 begin_(begin),
124 end_(end)
125 {
126 index_type idx=details::mapping_traits<base_iterator>::map(type,begin,end,loc);
127 index_->swap(idx);
128 }
129
130 mapping()
131 {
132 }
133
134 index_type const &index() const
135 {
136 return *index_;
137 }
138
139 base_iterator begin() const
140 {
141 return begin_;
142 }
143
144 base_iterator end() const
145 {
146 return end_;
147 }
148
149 private:
150 boost::shared_ptr<index_type> index_;
151 base_iterator begin_,end_;
152 };
153
154 template<typename BaseIterator>
155 class segment_index_iterator :
156 public boost::iterator_facade<
157 segment_index_iterator<BaseIterator>,
158 segment<BaseIterator>,
159 boost::bidirectional_traversal_tag,
160 segment<BaseIterator> const &
161 >
162 {
163 public:
164 typedef BaseIterator base_iterator;
165 typedef mapping<base_iterator> mapping_type;
166 typedef segment<base_iterator> segment_type;
167
168 segment_index_iterator() : current_(0,0),map_(0)
169 {
170 }
171
172 segment_index_iterator(base_iterator p,mapping_type const *map,rule_type mask,bool full_select) :
173 map_(map),
174 mask_(mask),
175 full_select_(full_select)
176 {
177 set(p);
178 }
179 segment_index_iterator(bool is_begin,mapping_type const *map,rule_type mask,bool full_select) :
180 map_(map),
181 mask_(mask),
182 full_select_(full_select)
183 {
184 if(is_begin)
185 set_begin();
186 else
187 set_end();
188 }
189
190 segment_type const &dereference() const
191 {
192 return value_;
193 }
194
195 bool equal(segment_index_iterator const &other) const
196 {
197 return map_ == other.map_ && current_.second == other.current_.second;
198 }
199
200 void increment()
201 {
202 std::pair<size_t,size_t> next = current_;
203 if(full_select_) {
204 next.first = next.second;
205 while(next.second < size()) {
206 next.second++;
207 if(valid_offset(next.second))
208 break;
209 }
210 if(next.second == size())
211 next.first = next.second - 1;
212 }
213 else {
214 while(next.second < size()) {
215 next.first = next.second;
216 next.second++;
217 if(valid_offset(next.second))
218 break;
219 }
220 }
221 update_current(next);
222 }
223
224 void decrement()
225 {
226 std::pair<size_t,size_t> next = current_;
227 if(full_select_) {
228 while(next.second >1) {
229 next.second--;
230 if(valid_offset(next.second))
231 break;
232 }
233 next.first = next.second;
234 while(next.first >0) {
235 next.first--;
236 if(valid_offset(next.first))
237 break;
238 }
239 }
240 else {
241 while(next.second >1) {
242 next.second--;
243 if(valid_offset(next.second))
244 break;
245 }
246 next.first = next.second - 1;
247 }
248 update_current(next);
249 }
250
251 private:
252
253 void set_end()
254 {
255 current_.first = size() - 1;
256 current_.second = size();
257 value_ = segment_type(map_->end(),map_->end(),0);
258 }
259 void set_begin()
260 {
261 current_.first = current_.second = 0;
262 value_ = segment_type(map_->begin(),map_->begin(),0);
263 increment();
264 }
265
266 void set(base_iterator p)
267 {
268 size_t dist=std::distance(map_->begin(),p);
269 index_type::const_iterator b=map_->index().begin(),e=map_->index().end();
270 index_type::const_iterator
271 boundary_point=std::upper_bound(b,e,break_info(dist));
272 while(boundary_point != e && (boundary_point->rule & mask_)==0)
273 boundary_point++;
274
275 current_.first = current_.second = boundary_point - b;
276
277 if(full_select_) {
278 while(current_.first > 0) {
279 current_.first --;
280 if(valid_offset(current_.first))
281 break;
282 }
283 }
284 else {
285 if(current_.first > 0)
286 current_.first --;
287 }
288 value_.first = map_->begin();
289 std::advance(value_.first,get_offset(current_.first));
290 value_.second = value_.first;
291 std::advance(value_.second,get_offset(current_.second) - get_offset(current_.first));
292
293 update_rule();
294 }
295
296 void update_current(std::pair<size_t,size_t> pos)
297 {
298 std::ptrdiff_t first_diff = get_offset(pos.first) - get_offset(current_.first);
299 std::ptrdiff_t second_diff = get_offset(pos.second) - get_offset(current_.second);
300 std::advance(value_.first,first_diff);
301 std::advance(value_.second,second_diff);
302 current_ = pos;
303 update_rule();
304 }
305
306 void update_rule()
307 {
308 if(current_.second != size()) {
309 value_.rule(index()[current_.second].rule);
310 }
311 }
312 size_t get_offset(size_t ind) const
313 {
314 if(ind == size())
315 return index().back().offset;
316 return index()[ind].offset;
317 }
318
319 bool valid_offset(size_t offset) const
320 {
321 return offset == 0
322 || offset == size() // make sure we not acess index[size]
323 || (index()[offset].rule & mask_)!=0;
324 }
325
326 size_t size() const
327 {
328 return index().size();
329 }
330
331 index_type const &index() const
332 {
333 return map_->index();
334 }
335
336
337 segment_type value_;
338 std::pair<size_t,size_t> current_;
339 mapping_type const *map_;
340 rule_type mask_;
341 bool full_select_;
342 };
343
344 template<typename BaseIterator>
345 class boundary_point_index_iterator :
346 public boost::iterator_facade<
347 boundary_point_index_iterator<BaseIterator>,
348 boundary_point<BaseIterator>,
349 boost::bidirectional_traversal_tag,
350 boundary_point<BaseIterator> const &
351 >
352 {
353 public:
354 typedef BaseIterator base_iterator;
355 typedef mapping<base_iterator> mapping_type;
356 typedef boundary_point<base_iterator> boundary_point_type;
357
358 boundary_point_index_iterator() : current_(0),map_(0)
359 {
360 }
361
362 boundary_point_index_iterator(bool is_begin,mapping_type const *map,rule_type mask) :
363 map_(map),
364 mask_(mask)
365 {
366 if(is_begin)
367 set_begin();
368 else
369 set_end();
370 }
371 boundary_point_index_iterator(base_iterator p,mapping_type const *map,rule_type mask) :
372 map_(map),
373 mask_(mask)
374 {
375 set(p);
376 }
377
378 boundary_point_type const &dereference() const
379 {
380 return value_;
381 }
382
383 bool equal(boundary_point_index_iterator const &other) const
384 {
385 return map_ == other.map_ && current_ == other.current_;
386 }
387
388 void increment()
389 {
390 size_t next = current_;
391 while(next < size()) {
392 next++;
393 if(valid_offset(next))
394 break;
395 }
396 update_current(next);
397 }
398
399 void decrement()
400 {
401 size_t next = current_;
402 while(next>0) {
403 next--;
404 if(valid_offset(next))
405 break;
406 }
407 update_current(next);
408 }
409
410 private:
411 void set_end()
412 {
413 current_ = size();
414 value_ = boundary_point_type(map_->end(),0);
415 }
416 void set_begin()
417 {
418 current_ = 0;
419 value_ = boundary_point_type(map_->begin(),0);
420 }
421
422 void set(base_iterator p)
423 {
424 size_t dist = std::distance(map_->begin(),p);
425
426 index_type::const_iterator b=index().begin();
427 index_type::const_iterator e=index().end();
428 index_type::const_iterator ptr = std::lower_bound(b,e,break_info(dist));
429
430 if(ptr==index().end())
431 current_=size()-1;
432 else
433 current_=ptr - index().begin();
434
435 while(!valid_offset(current_))
436 current_ ++;
437
438 std::ptrdiff_t diff = get_offset(current_) - dist;
439 std::advance(p,diff);
440 value_.iterator(p);
441 update_rule();
442 }
443
444 void update_current(size_t pos)
445 {
446 std::ptrdiff_t diff = get_offset(pos) - get_offset(current_);
447 base_iterator i=value_.iterator();
448 std::advance(i,diff);
449 current_ = pos;
450 value_.iterator(i);
451 update_rule();
452 }
453
454 void update_rule()
455 {
456 if(current_ != size()) {
457 value_.rule(index()[current_].rule);
458 }
459 }
460 size_t get_offset(size_t ind) const
461 {
462 if(ind == size())
463 return index().back().offset;
464 return index()[ind].offset;
465 }
466
467 bool valid_offset(size_t offset) const
468 {
469 return offset == 0
470 || offset + 1 >= size() // last and first are always valid regardless of mark
471 || (index()[offset].rule & mask_)!=0;
472 }
473
474 size_t size() const
475 {
476 return index().size();
477 }
478
479 index_type const &index() const
480 {
481 return map_->index();
482 }
483
484
485 boundary_point_type value_;
486 size_t current_;
487 mapping_type const *map_;
488 rule_type mask_;
489 };
490
491
492 } // details
493
494 /// \endcond
495
496 template<typename BaseIterator>
497 class segment_index;
498
499 template<typename BaseIterator>
500 class boundary_point_index;
501
502
503 ///
504 /// \brief This class holds an index of segments in the text range and allows to iterate over them
505 ///
506 /// This class is provides \ref begin() and \ref end() member functions that return bidirectional iterators
507 /// to the \ref segment objects.
508 ///
509 /// It provides two options on way of selecting segments:
510 ///
511 /// - \ref rule(rule_type mask) - a mask that allows to select only specific types of segments according to
512 /// various masks %as \ref word_any.
513 /// \n
514 /// The default is to select any types of boundaries.
515 /// \n
516 /// For example: using word %boundary analysis, when the provided mask is \ref word_kana then the iterators
517 /// would iterate only over the words containing Kana letters and \ref word_any would select all types of
518 /// words excluding ranges that consist of white space and punctuation marks. So iterating over the text
519 /// "to be or not to be?" with \ref word_any rule would return segments "to", "be", "or", "not", "to", "be", instead
520 /// of default "to", " ", "be", " ", "or", " ", "not", " ", "to", " ", "be", "?".
521 /// - \ref full_select(bool how) - a flag that defines the way a range is selected if the rule of the previous
522 /// %boundary point does not fit the selected rule.
523 /// \n
524 /// For example: We want to fetch all sentences from the following text: "Hello! How\nare you?".
525 /// \n
526 /// This text contains three %boundary points separating it to sentences by different rules:
527 /// - The exclamation mark "!" ends the sentence "Hello!"
528 /// - The line feed that splits the sentence "How\nare you?" into two parts.
529 /// - The question mark that ends the second sentence.
530 /// \n
531 /// If you would only change the \ref rule() to \ref sentence_term then the segment_index would
532 /// provide two sentences "Hello!" and "are you?" %as only them actually terminated with required
533 /// terminator "!" or "?". But changing \ref full_select() to true, the selected segment would include
534 /// all the text up to previous valid %boundary point and would return two expected sentences:
535 /// "Hello!" and "How\nare you?".
536 ///
537 /// This class allows to find a segment according to the given iterator in range using \ref find() member
538 /// function.
539 ///
540 /// \note
541 ///
542 /// - Changing any of the options - \ref rule() or \ref full_select() and of course re-indexing the text
543 /// invalidates existing iterators and they can't be used any more.
544 /// - segment_index can be created from boundary_point_index or other segment_index that was created with
545 /// same \ref boundary_type. This is very fast operation %as they shared same index
546 /// and it does not require its regeneration.
547 ///
548 /// \see
549 ///
550 /// - \ref boundary_point_index
551 /// - \ref segment
552 /// - \ref boundary_point
553 ///
554
555 template<typename BaseIterator>
556 class segment_index {
557 public:
558
559 ///
560 /// The type of the iterator used to iterate over the original text
561 ///
562 typedef BaseIterator base_iterator;
563 #ifdef BOOST_LOCALE_DOXYGEN
564 ///
565 /// The bidirectional iterator that iterates over \ref value_type objects.
566 ///
567 /// - The iterators may be invalidated by use of any non-const member function
568 /// including but not limited to \ref rule(rule_type) and \ref full_select(bool).
569 /// - The returned value_type object is valid %as long %as iterator points to it.
570 /// So this following code is wrong %as t used after p was updated:
571 /// \code
572 /// segment_index<some_iterator>::iterator p=index.begin();
573 /// segment<some_iterator> &t = *p;
574 /// ++p;
575 /// cout << t.str() << endl;
576 /// \endcode
577 ///
578 typedef unspecified_iterator_type iterator;
579 ///
580 /// \copydoc iterator
581 ///
582 typedef unspecified_iterator_type const_iterator;
583 #else
584 typedef details::segment_index_iterator<base_iterator> iterator;
585 typedef details::segment_index_iterator<base_iterator> const_iterator;
586 #endif
587 ///
588 /// The type dereferenced by the \ref iterator and \ref const_iterator. It is
589 /// an object that represents selected segment.
590 ///
591 typedef segment<base_iterator> value_type;
592
593 ///
594 /// Default constructor.
595 ///
596 /// \note
597 ///
598 /// When this object is constructed by default it does not include a valid index, thus
599 /// calling \ref begin(), \ref end() or \ref find() member functions would lead to undefined
600 /// behavior
601 ///
602 segment_index() : mask_(0xFFFFFFFFu),full_select_(false)
603 {
604 }
605 ///
606 /// Create a segment_index for %boundary analysis \ref boundary_type "type" of the text
607 /// in range [begin,end) using a rule \a mask for locale \a loc.
608 ///
609 segment_index(boundary_type type,
610 base_iterator begin,
611 base_iterator end,
612 rule_type mask,
613 std::locale const &loc=std::locale())
614 :
615 map_(type,begin,end,loc),
616 mask_(mask),
617 full_select_(false)
618 {
619 }
620 ///
621 /// Create a segment_index for %boundary analysis \ref boundary_type "type" of the text
622 /// in range [begin,end) selecting all possible segments (full mask) for locale \a loc.
623 ///
624 segment_index(boundary_type type,
625 base_iterator begin,
626 base_iterator end,
627 std::locale const &loc=std::locale())
628 :
629 map_(type,begin,end,loc),
630 mask_(0xFFFFFFFFu),
631 full_select_(false)
632 {
633 }
634
635 ///
636 /// Create a segment_index from a \ref boundary_point_index. It copies all indexing information
637 /// and used default rule (all possible segments)
638 ///
639 /// This operation is very cheap, so if you use boundary_point_index and segment_index on same text
640 /// range it is much better to create one from another rather then indexing the same
641 /// range twice.
642 ///
643 /// \note \ref rule() flags are not copied
644 ///
645 segment_index(boundary_point_index<base_iterator> const &);
646 ///
647 /// Copy an index from a \ref boundary_point_index. It copies all indexing information
648 /// and uses the default rule (all possible segments)
649 ///
650 /// This operation is very cheap, so if you use boundary_point_index and segment_index on same text
651 /// range it is much better to create one from another rather then indexing the same
652 /// range twice.
653 ///
654 /// \note \ref rule() flags are not copied
655 ///
656 segment_index const &operator = (boundary_point_index<base_iterator> const &);
657
658
659 ///
660 /// Create a new index for %boundary analysis \ref boundary_type "type" of the text
661 /// in range [begin,end) for locale \a loc.
662 ///
663 /// \note \ref rule() and \ref full_select() remain unchanged.
664 ///
665 void map(boundary_type type,base_iterator begin,base_iterator end,std::locale const &loc=std::locale())
666 {
667 map_ = mapping_type(type,begin,end,loc);
668 }
669
670 ///
671 /// Get the \ref iterator on the beginning of the segments range.
672 ///
673 /// Preconditions: the segment_index should have a mapping
674 ///
675 /// \note
676 ///
677 /// The returned iterator is invalidated by access to any non-const member functions of this object
678 ///
679 iterator begin() const
680 {
681 return iterator(true,&map_,mask_,full_select_);
682 }
683
684 ///
685 /// Get the \ref iterator on the ending of the segments range.
686 ///
687 /// Preconditions: the segment_index should have a mapping
688 ///
689 /// The returned iterator is invalidated by access to any non-const member functions of this object
690 ///
691 iterator end() const
692 {
693 return iterator(false,&map_,mask_,full_select_);
694 }
695
696 ///
697 /// Find a first valid segment following a position \a p.
698 ///
699 /// If \a p is inside a valid segment this segment is selected:
700 ///
701 /// For example: For \ref word %boundary analysis with \ref word_any rule():
702 ///
703 /// - "to| be or ", would point to "be",
704 /// - "t|o be or ", would point to "to",
705 /// - "to be or| ", would point to end.
706 ///
707 ///
708 /// Preconditions: the segment_index should have a mapping and \a p should be valid iterator
709 /// to the text in the mapped range.
710 ///
711 /// The returned iterator is invalidated by access to any non-const member functions of this object
712 ///
713 iterator find(base_iterator p) const
714 {
715 return iterator(p,&map_,mask_,full_select_);
716 }
717
718 ///
719 /// Get the mask of rules that are used
720 ///
721 rule_type rule() const
722 {
723 return mask_;
724 }
725 ///
726 /// Set the mask of rules that are used
727 ///
728 void rule(rule_type v)
729 {
730 mask_ = v;
731 }
732
733 ///
734 /// Get the full_select property value - should segment include in the range
735 /// values that not belong to specific \ref rule() or not.
736 ///
737 /// The default value is false.
738 ///
739 /// For example for \ref sentence %boundary with rule \ref sentence_term the segments
740 /// of text "Hello! How\nare you?" are "Hello!\", "are you?" when full_select() is false
741 /// because "How\n" is selected %as sentence by a rule spits the text by line feed. If full_select()
742 /// is true the returned segments are "Hello! ", "How\nare you?" where "How\n" is joined with the
743 /// following part "are you?"
744 ///
745
746 bool full_select() const
747 {
748 return full_select_;
749 }
750
751 ///
752 /// Set the full_select property value - should segment include in the range
753 /// values that not belong to specific \ref rule() or not.
754 ///
755 /// The default value is false.
756 ///
757 /// For example for \ref sentence %boundary with rule \ref sentence_term the segments
758 /// of text "Hello! How\nare you?" are "Hello!\", "are you?" when full_select() is false
759 /// because "How\n" is selected %as sentence by a rule spits the text by line feed. If full_select()
760 /// is true the returned segments are "Hello! ", "How\nare you?" where "How\n" is joined with the
761 /// following part "are you?"
762 ///
763
764 void full_select(bool v)
765 {
766 full_select_ = v;
767 }
768
769 private:
770 friend class boundary_point_index<base_iterator>;
771 typedef details::mapping<base_iterator> mapping_type;
772 mapping_type map_;
773 rule_type mask_;
774 bool full_select_;
775 };
776
777 ///
778 /// \brief This class holds an index of \ref boundary_point "boundary points" and allows iterating
779 /// over them.
780 ///
781 /// This class is provides \ref begin() and \ref end() member functions that return bidirectional iterators
782 /// to the \ref boundary_point objects.
783 ///
784 /// It provides an option that affects selecting %boundary points according to different rules:
785 /// using \ref rule(rule_type mask) member function. It allows to set a mask that select only specific
786 /// types of %boundary points like \ref sentence_term.
787 ///
788 /// For example for a sentence %boundary analysis of a text "Hello! How\nare you?" when the default
789 /// rule is used the %boundary points would be:
790 ///
791 /// - "|Hello! How\nare you?"
792 /// - "Hello! |How\nare you?"
793 /// - "Hello! How\n|are you?"
794 /// - "Hello! How\nare you?|"
795 ///
796 /// However if \ref rule() is set to \ref sentence_term then the selected %boundary points would be:
797 ///
798 /// - "|Hello! How\nare you?"
799 /// - "Hello! |How\nare you?"
800 /// - "Hello! How\nare you?|"
801 ///
802 /// Such that a %boundary point defined by a line feed character would be ignored.
803 ///
804 /// This class allows to find a boundary_point according to the given iterator in range using \ref find() member
805 /// function.
806 ///
807 /// \note
808 /// - Even an empty text range [x,x) considered to have a one %boundary point x.
809 /// - \a a and \a b points of the range [a,b) are always considered %boundary points
810 /// regardless the rules used.
811 /// - Changing any of the option \ref rule() or course re-indexing the text
812 /// invalidates existing iterators and they can't be used any more.
813 /// - boundary_point_index can be created from segment_index or other boundary_point_index that was created with
814 /// same \ref boundary_type. This is very fast operation %as they shared same index
815 /// and it does not require its regeneration.
816 ///
817 /// \see
818 ///
819 /// - \ref segment_index
820 /// - \ref boundary_point
821 /// - \ref segment
822 ///
823
824
825 template<typename BaseIterator>
826 class boundary_point_index {
827 public:
828 ///
829 /// The type of the iterator used to iterate over the original text
830 ///
831 typedef BaseIterator base_iterator;
832 #ifdef BOOST_LOCALE_DOXYGEN
833 ///
834 /// The bidirectional iterator that iterates over \ref value_type objects.
835 ///
836 /// - The iterators may be invalidated by use of any non-const member function
837 /// including but not limited to \ref rule(rule_type) member function.
838 /// - The returned value_type object is valid %as long %as iterator points to it.
839 /// So this following code is wrong %as t used after p was updated:
840 /// \code
841 /// boundary_point_index<some_iterator>::iterator p=index.begin();
842 /// boundary_point<some_iterator> &t = *p;
843 /// ++p;
844 /// rule_type r = t->rule();
845 /// \endcode
846 ///
847 typedef unspecified_iterator_type iterator;
848 ///
849 /// \copydoc iterator
850 ///
851 typedef unspecified_iterator_type const_iterator;
852 #else
853 typedef details::boundary_point_index_iterator<base_iterator> iterator;
854 typedef details::boundary_point_index_iterator<base_iterator> const_iterator;
855 #endif
856 ///
857 /// The type dereferenced by the \ref iterator and \ref const_iterator. It is
858 /// an object that represents the selected \ref boundary_point "boundary point".
859 ///
860 typedef boundary_point<base_iterator> value_type;
861
862 ///
863 /// Default constructor.
864 ///
865 /// \note
866 ///
867 /// When this object is constructed by default it does not include a valid index, thus
868 /// calling \ref begin(), \ref end() or \ref find() member functions would lead to undefined
869 /// behavior
870 ///
871 boundary_point_index() : mask_(0xFFFFFFFFu)
872 {
873 }
874
875 ///
876 /// Create a segment_index for %boundary analysis \ref boundary_type "type" of the text
877 /// in range [begin,end) using a rule \a mask for locale \a loc.
878 ///
879 boundary_point_index(boundary_type type,
880 base_iterator begin,
881 base_iterator end,
882 rule_type mask,
883 std::locale const &loc=std::locale())
884 :
885 map_(type,begin,end,loc),
886 mask_(mask)
887 {
888 }
889 ///
890 /// Create a segment_index for %boundary analysis \ref boundary_type "type" of the text
891 /// in range [begin,end) selecting all possible %boundary points (full mask) for locale \a loc.
892 ///
893 boundary_point_index(boundary_type type,
894 base_iterator begin,
895 base_iterator end,
896 std::locale const &loc=std::locale())
897 :
898 map_(type,begin,end,loc),
899 mask_(0xFFFFFFFFu)
900 {
901 }
902
903 ///
904 /// Create a boundary_point_index from a \ref segment_index. It copies all indexing information
905 /// and uses the default rule (all possible %boundary points)
906 ///
907 /// This operation is very cheap, so if you use boundary_point_index and segment_index on same text
908 /// range it is much better to create one from another rather then indexing the same
909 /// range twice.
910 ///
911 /// \note \ref rule() flags are not copied
912 ///
913 boundary_point_index(segment_index<base_iterator> const &other);
914 ///
915 /// Copy a boundary_point_index from a \ref segment_index. It copies all indexing information
916 /// and keeps the current \ref rule() unchanged
917 ///
918 /// This operation is very cheap, so if you use boundary_point_index and segment_index on same text
919 /// range it is much better to create one from another rather then indexing the same
920 /// range twice.
921 ///
922 /// \note \ref rule() flags are not copied
923 ///
924 boundary_point_index const &operator=(segment_index<base_iterator> const &other);
925
926 ///
927 /// Create a new index for %boundary analysis \ref boundary_type "type" of the text
928 /// in range [begin,end) for locale \a loc.
929 ///
930 /// \note \ref rule() remains unchanged.
931 ///
932 void map(boundary_type type,base_iterator begin,base_iterator end,std::locale const &loc=std::locale())
933 {
934 map_ = mapping_type(type,begin,end,loc);
935 }
936
937 ///
938 /// Get the \ref iterator on the beginning of the %boundary points range.
939 ///
940 /// Preconditions: this boundary_point_index should have a mapping
941 ///
942 /// \note
943 ///
944 /// The returned iterator is invalidated by access to any non-const member functions of this object
945 ///
946 iterator begin() const
947 {
948 return iterator(true,&map_,mask_);
949 }
950
951 ///
952 /// Get the \ref iterator on the ending of the %boundary points range.
953 ///
954 /// Preconditions: this boundary_point_index should have a mapping
955 ///
956 /// \note
957 ///
958 /// The returned iterator is invalidated by access to any non-const member functions of this object
959 ///
960 iterator end() const
961 {
962 return iterator(false,&map_,mask_);
963 }
964
965 ///
966 /// Find a first valid %boundary point on a position \a p or following it.
967 ///
968 /// For example: For \ref word %boundary analysis of the text "to be or"
969 ///
970 /// - "|to be", would return %boundary point at "|to be",
971 /// - "t|o be", would point to "to| be"
972 ///
973 /// Preconditions: the boundary_point_index should have a mapping and \a p should be valid iterator
974 /// to the text in the mapped range.
975 ///
976 /// The returned iterator is invalidated by access to any non-const member functions of this object
977 ///
978 iterator find(base_iterator p) const
979 {
980 return iterator(p,&map_,mask_);
981 }
982
983 ///
984 /// Get the mask of rules that are used
985 ///
986 rule_type rule() const
987 {
988 return mask_;
989 }
990 ///
991 /// Set the mask of rules that are used
992 ///
993 void rule(rule_type v)
994 {
995 mask_ = v;
996 }
997
998 private:
999
1000 friend class segment_index<base_iterator>;
1001 typedef details::mapping<base_iterator> mapping_type;
1002 mapping_type map_;
1003 rule_type mask_;
1004 };
1005
1006 /// \cond INTERNAL
1007 template<typename BaseIterator>
1008 segment_index<BaseIterator>::segment_index(boundary_point_index<BaseIterator> const &other) :
1009 map_(other.map_),
1010 mask_(0xFFFFFFFFu),
1011 full_select_(false)
1012 {
1013 }
1014
1015 template<typename BaseIterator>
1016 boundary_point_index<BaseIterator>::boundary_point_index(segment_index<BaseIterator> const &other) :
1017 map_(other.map_),
1018 mask_(0xFFFFFFFFu)
1019 {
1020 }
1021
1022 template<typename BaseIterator>
1023 segment_index<BaseIterator> const &segment_index<BaseIterator>::operator=(boundary_point_index<BaseIterator> const &other)
1024 {
1025 map_ = other.map_;
1026 return *this;
1027 }
1028
1029 template<typename BaseIterator>
1030 boundary_point_index<BaseIterator> const &boundary_point_index<BaseIterator>::operator=(segment_index<BaseIterator> const &other)
1031 {
1032 map_ = other.map_;
1033 return *this;
1034 }
1035 /// \endcond
1036
1037 typedef segment_index<std::string::const_iterator> ssegment_index; ///< convenience typedef
1038 typedef segment_index<std::wstring::const_iterator> wssegment_index; ///< convenience typedef
1039 #ifdef BOOST_LOCALE_ENABLE_CHAR16_T
1040 typedef segment_index<std::u16string::const_iterator> u16ssegment_index;///< convenience typedef
1041 #endif
1042 #ifdef BOOST_LOCALE_ENABLE_CHAR32_T
1043 typedef segment_index<std::u32string::const_iterator> u32ssegment_index;///< convenience typedef
1044 #endif
1045
1046 typedef segment_index<char const *> csegment_index; ///< convenience typedef
1047 typedef segment_index<wchar_t const *> wcsegment_index; ///< convenience typedef
1048 #ifdef BOOST_LOCALE_ENABLE_CHAR16_T
1049 typedef segment_index<char16_t const *> u16csegment_index; ///< convenience typedef
1050 #endif
1051 #ifdef BOOST_LOCALE_ENABLE_CHAR32_T
1052 typedef segment_index<char32_t const *> u32csegment_index; ///< convenience typedef
1053 #endif
1054
1055 typedef boundary_point_index<std::string::const_iterator> sboundary_point_index;///< convenience typedef
1056 typedef boundary_point_index<std::wstring::const_iterator> wsboundary_point_index;///< convenience typedef
1057 #ifdef BOOST_LOCALE_ENABLE_CHAR16_T
1058 typedef boundary_point_index<std::u16string::const_iterator> u16sboundary_point_index;///< convenience typedef
1059 #endif
1060 #ifdef BOOST_LOCALE_ENABLE_CHAR32_T
1061 typedef boundary_point_index<std::u32string::const_iterator> u32sboundary_point_index;///< convenience typedef
1062 #endif
1063
1064 typedef boundary_point_index<char const *> cboundary_point_index; ///< convenience typedef
1065 typedef boundary_point_index<wchar_t const *> wcboundary_point_index; ///< convenience typedef
1066 #ifdef BOOST_LOCALE_ENABLE_CHAR16_T
1067 typedef boundary_point_index<char16_t const *> u16cboundary_point_index;///< convenience typedef
1068 #endif
1069 #ifdef BOOST_LOCALE_ENABLE_CHAR32_T
1070 typedef boundary_point_index<char32_t const *> u32cboundary_point_index;///< convenience typedef
1071 #endif
1072
1073
1074
1075 } // boundary
1076
1077 } // locale
1078 } // boost
1079
1080 ///
1081 /// \example boundary.cpp
1082 /// Example of using segment_index
1083 /// \example wboundary.cpp
1084 /// Example of using segment_index over wide strings
1085 ///
1086
1087 #ifdef BOOST_MSVC
1088 #pragma warning(pop)
1089 #endif
1090
1091 #endif
1092 // vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4