]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | // Boost string_algo library example file ---------------------------------// |
2 | ||
3 | // Copyright Pavol Droba 2002-2003. Use, modification and | |
4 | // distribution is subject to the Boost Software License, Version | |
5 | // 1.0. (See accompanying file LICENSE_1_0.txt or copy at | |
6 | // http://www.boost.org/LICENSE_1_0.txt) | |
7 | ||
8 | // See http://www.boost.org for updates, documentation, and revision history. | |
9 | ||
10 | /* | |
11 | RLE compression using replace framework. Goal is to compress a sequence of | |
12 | repeating characters into 3 bytes ( repeat mark, character and repetition count ). | |
13 | For simplification, it works only on numeric-value sequences. | |
14 | */ | |
15 | ||
16 | #include <string> | |
17 | #include <iostream> | |
18 | #include <limits> | |
20effc67 | 19 | #include <iterator> |
7c673cae FG |
20 | #include <boost/algorithm/string/find_format.hpp> |
21 | #include <boost/algorithm/string/finder.hpp> | |
22 | ||
23 | using namespace std; | |
24 | using namespace boost; | |
25 | ||
26 | // replace mark specification, specialize for a specific element type | |
27 | template< typename T > T repeat_mark() { return (std::numeric_limits<T>::max)(); }; | |
28 | ||
29 | // Compression ----------------------------------------------------------------------- | |
30 | ||
31 | ||
32 | // compress finder -rle | |
33 | /* | |
34 | Find a sequence which can be compressed. It has to be at least 3-character long | |
35 | sequence of repetitive characters | |
36 | */ | |
37 | struct find_compressF | |
38 | { | |
39 | // Construction | |
40 | find_compressF() {} | |
41 | ||
42 | // Operation | |
43 | template<typename ForwardIteratorT> | |
44 | iterator_range<ForwardIteratorT> operator()( | |
45 | ForwardIteratorT Begin, | |
46 | ForwardIteratorT End ) const | |
47 | { | |
48 | typedef ForwardIteratorT input_iterator_type; | |
20effc67 | 49 | typedef typename std::iterator_traits<input_iterator_type>::value_type value_type; |
7c673cae FG |
50 | typedef iterator_range<input_iterator_type> result_type; |
51 | ||
52 | // begin of the matching segment | |
53 | input_iterator_type MStart=End; | |
54 | // Repetition counter | |
55 | value_type Cnt=0; | |
56 | ||
57 | // Search for a sequence of repetitive characters | |
58 | for(input_iterator_type It=Begin; It!=End;) | |
59 | { | |
60 | input_iterator_type It2=It++; | |
61 | ||
62 | if ( It==End || Cnt>=(std::numeric_limits<value_type>::max)() ) | |
63 | { | |
64 | return result_type( MStart, It ); | |
65 | } | |
66 | ||
67 | if ( *It==*It2 ) | |
68 | { | |
69 | if ( MStart==End ) | |
70 | { | |
71 | // Mark the start | |
72 | MStart=It2; | |
73 | } | |
74 | ||
75 | // Increate repetition counter | |
76 | Cnt++; | |
77 | } | |
78 | else | |
79 | { | |
80 | if ( MStart!=End ) | |
81 | { | |
82 | if ( Cnt>2 ) | |
83 | return result_type( MStart, It ); | |
84 | else | |
85 | { | |
86 | MStart=End; | |
87 | Cnt=0; | |
88 | } | |
89 | } | |
90 | } | |
91 | } | |
92 | ||
93 | return result_type( End, End ); | |
94 | } | |
95 | }; | |
96 | ||
97 | // rle compress format | |
98 | /* | |
99 | Transform a sequence into repeat mark, character and count | |
100 | */ | |
101 | template<typename SeqT> | |
102 | struct format_compressF | |
103 | { | |
104 | private: | |
105 | typedef SeqT result_type; | |
106 | typedef typename SeqT::value_type value_type; | |
107 | ||
108 | public: | |
109 | // Construction | |
110 | format_compressF() {}; | |
111 | ||
112 | // Operation | |
113 | template< typename ReplaceT > | |
114 | result_type operator()( const ReplaceT& Replace ) const | |
115 | { | |
116 | SeqT r; | |
117 | if(!Replace.empty()) | |
118 | { | |
119 | r.push_back( repeat_mark<value_type>() ); | |
120 | r.push_back( *(Replace.begin()) ); | |
121 | r.push_back( value_type( Replace.size() ) ); | |
122 | } | |
123 | ||
124 | return r; | |
125 | } | |
126 | }; | |
127 | ||
128 | // Decompression ----------------------------------------------------------------------- | |
129 | ||
130 | ||
131 | // find decompress-rle functor | |
132 | /* | |
133 | find a repetition block | |
134 | */ | |
135 | struct find_decompressF | |
136 | { | |
137 | // Construction | |
138 | find_decompressF() {} | |
139 | ||
140 | // Operation | |
141 | template<typename ForwardIteratorT> | |
142 | iterator_range<ForwardIteratorT> operator()( | |
143 | ForwardIteratorT Begin, | |
144 | ForwardIteratorT End ) const | |
145 | { | |
146 | typedef ForwardIteratorT input_iterator_type; | |
20effc67 | 147 | typedef typename std::iterator_traits<input_iterator_type>::value_type value_type; |
7c673cae FG |
148 | typedef iterator_range<input_iterator_type> result_type; |
149 | ||
150 | for(input_iterator_type It=Begin; It!=End; It++) | |
151 | { | |
152 | if( *It==repeat_mark<value_type>() ) | |
153 | { | |
154 | // Repeat mark found, extract body | |
155 | input_iterator_type It2=It++; | |
20effc67 | 156 | |
7c673cae FG |
157 | if ( It==End ) break; |
158 | It++; | |
159 | if ( It==End ) break; | |
160 | It++; | |
20effc67 | 161 | |
7c673cae FG |
162 | return result_type( It2, It ); |
163 | } | |
164 | } | |
165 | ||
166 | return result_type( End, End ); | |
167 | } | |
168 | }; | |
169 | ||
170 | // rle decompress format | |
171 | /* | |
172 | transform a repetition block into a sequence of characters | |
173 | */ | |
174 | template< typename SeqT > | |
175 | struct format_decompressF | |
176 | { | |
177 | private: | |
178 | typedef SeqT result_type; | |
179 | typedef typename SeqT::value_type value_type; | |
180 | ||
181 | public: | |
182 | // Construction | |
183 | format_decompressF() {}; | |
184 | ||
185 | // Operation | |
186 | template< typename ReplaceT > | |
187 | result_type operator()( const ReplaceT& Replace ) const | |
188 | { | |
189 | SeqT r; | |
190 | ||
191 | if(!Replace.empty()) | |
192 | { | |
193 | // extract info | |
194 | typename ReplaceT::const_iterator It=Replace.begin(); | |
195 | ||
196 | value_type Value=*(++It); | |
197 | value_type Repeat=*(++It); | |
198 | ||
199 | for( value_type Index=0; Index<Repeat; Index++ ) r.push_back( Value ); | |
200 | } | |
201 | ||
202 | return r; | |
203 | } | |
204 | }; | |
205 | ||
206 | ||
207 | int main() | |
208 | { | |
209 | cout << "* RLE Compression Example *" << endl << endl; | |
210 | ||
211 | string original("123_AA_*ZZZZZZZZZZZZZZ*34"); | |
212 | ||
213 | // copy compression | |
214 | string compress=find_format_all_copy( | |
215 | original, | |
216 | find_compressF(), | |
217 | format_compressF<string>() ); | |
218 | ||
219 | cout << "Compressed string: " << compress << endl; | |
220 | ||
221 | // Copy decompression | |
222 | string decompress=find_format_all_copy( | |
223 | compress, | |
224 | find_decompressF(), | |
225 | format_decompressF<string>() ); | |
226 | ||
227 | cout << "Decompressed string: " << decompress << endl; | |
228 | ||
229 | // in-place compression | |
230 | find_format_all( | |
231 | original, | |
232 | find_compressF(), | |
233 | format_compressF<string>() ); | |
234 | ||
235 | cout << "Compressed string: " << original << endl; | |
236 | ||
237 | // in-place decompression | |
238 | find_format_all( | |
239 | original, | |
240 | find_decompressF(), | |
241 | format_decompressF<string>() ); | |
242 | ||
243 | cout << "Decompressed string: " << original << endl; | |
244 | ||
245 | cout << endl; | |
246 | ||
247 | return 0; | |
248 | } |