]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | /* |
2 | * Copyright 2003. Vladimir Prus | |
3 | * Distributed under the Boost Software License, Version 1.0. | |
1e59de90 TL |
4 | * (See accompanying file LICENSE.txt or copy at |
5 | * https://www.bfgroup.xyz/b2/LICENSE.txt) | |
7c673cae FG |
6 | */ |
7 | ||
8 | #include "../mem.h" | |
9 | #include "../native.h" | |
f67539c2 | 10 | #include "../jam_strings.h" |
7c673cae FG |
11 | #include "../subst.h" |
12 | ||
13 | /* | |
14 | rule split ( string separator ) | |
15 | { | |
16 | local result ; | |
17 | local s = $(string) ; | |
18 | ||
19 | local match = 1 ; | |
20 | while $(match) | |
21 | { | |
22 | match = [ MATCH ^(.*)($(separator))(.*) : $(s) ] ; | |
23 | if $(match) | |
24 | { | |
25 | match += "" ; # in case 3rd item was empty - works around MATCH bug | |
26 | result = $(match[3]) $(result) ; | |
27 | s = $(match[1]) ; | |
28 | } | |
29 | } | |
30 | return $(s) $(result) ; | |
31 | } | |
32 | */ | |
33 | ||
34 | LIST * regex_split( FRAME * frame, int flags ) | |
35 | { | |
36 | LIST * args = lol_get( frame->args, 0 ); | |
37 | OBJECT * s; | |
38 | OBJECT * separator; | |
39 | regexp * re; | |
92f5a8d4 | 40 | const char * pos, * prev; |
7c673cae FG |
41 | LIST * result = L0; |
42 | LISTITER iter = list_begin( args ); | |
43 | s = list_item( iter ); | |
44 | separator = list_item( list_next( iter ) ); | |
1e59de90 | 45 | |
7c673cae FG |
46 | re = regex_compile( separator ); |
47 | ||
92f5a8d4 | 48 | prev = pos = object_str( s ); |
7c673cae FG |
49 | while ( regexec( re, pos ) ) |
50 | { | |
1e59de90 | 51 | result = list_push_back( result, object_new_range( prev, int32_t(re->startp[ 0 ] - prev) ) ); |
92f5a8d4 TL |
52 | prev = re->endp[ 0 ]; |
53 | /* Handle empty matches */ | |
54 | if ( *pos == '\0' ) | |
55 | break; | |
56 | else if ( pos == re->endp[ 0 ] ) | |
57 | pos++; | |
58 | else | |
59 | pos = re->endp[ 0 ]; | |
7c673cae FG |
60 | } |
61 | ||
62 | result = list_push_back( result, object_new( pos ) ); | |
63 | ||
64 | return result; | |
65 | } | |
66 | ||
67 | /* | |
68 | rule replace ( | |
69 | string # The string to modify. | |
70 | match # The characters to replace. | |
71 | replacement # The string to replace with. | |
72 | ) | |
73 | { | |
74 | local result = "" ; | |
75 | local parts = 1 ; | |
76 | while $(parts) | |
77 | { | |
78 | parts = [ MATCH ^(.*)($(match))(.*) : $(string) ] ; | |
79 | if $(parts) | |
80 | { | |
81 | parts += "" ; | |
82 | result = "$(replacement)$(parts[3])$(result)" ; | |
83 | string = $(parts[1]) ; | |
84 | } | |
85 | } | |
86 | string ?= "" ; | |
87 | result = "$(string)$(result)" ; | |
88 | return $(result) ; | |
89 | } | |
90 | */ | |
91 | ||
92 | LIST * regex_replace( FRAME * frame, int flags ) | |
93 | { | |
94 | LIST * args = lol_get( frame->args, 0 ); | |
95 | OBJECT * s; | |
96 | OBJECT * match; | |
97 | OBJECT * replacement; | |
98 | regexp * re; | |
99 | const char * pos; | |
100 | string buf[ 1 ]; | |
101 | LIST * result; | |
102 | LISTITER iter = list_begin( args ); | |
103 | s = list_item( iter ); | |
104 | iter = list_next( iter ); | |
105 | match = list_item( iter ); | |
106 | iter = list_next( iter ); | |
107 | replacement = list_item(iter ); | |
1e59de90 | 108 | |
7c673cae | 109 | re = regex_compile( match ); |
1e59de90 | 110 | |
7c673cae FG |
111 | string_new( buf ); |
112 | ||
113 | pos = object_str( s ); | |
114 | while ( regexec( re, pos ) ) | |
115 | { | |
116 | string_append_range( buf, pos, re->startp[ 0 ] ); | |
117 | string_append( buf, object_str( replacement ) ); | |
92f5a8d4 TL |
118 | /* Handle empty matches */ |
119 | if ( *pos == '\0' ) | |
120 | break; | |
121 | else if ( pos == re->endp[ 0 ] ) | |
122 | string_push_back( buf, *pos++ ); | |
123 | else | |
124 | pos = re->endp[ 0 ]; | |
7c673cae FG |
125 | } |
126 | string_append( buf, pos ); | |
127 | ||
128 | result = list_new( object_new( buf->value ) ); | |
129 | ||
130 | string_free( buf ); | |
131 | ||
132 | return result; | |
133 | } | |
134 | ||
135 | /* | |
136 | rule transform ( list * : pattern : indices * ) | |
137 | { | |
138 | indices ?= 1 ; | |
139 | local result ; | |
140 | for local e in $(list) | |
141 | { | |
142 | local m = [ MATCH $(pattern) : $(e) ] ; | |
143 | if $(m) | |
144 | { | |
145 | result += $(m[$(indices)]) ; | |
146 | } | |
147 | } | |
148 | return $(result) ; | |
149 | } | |
150 | */ | |
151 | ||
152 | LIST * regex_transform( FRAME * frame, int flags ) | |
153 | { | |
154 | LIST * const l = lol_get( frame->args, 0 ); | |
155 | LIST * const pattern = lol_get( frame->args, 1 ); | |
156 | LIST * const indices_list = lol_get( frame->args, 2 ); | |
157 | int * indices = 0; | |
158 | int size; | |
159 | LIST * result = L0; | |
160 | ||
161 | if ( !list_empty( indices_list ) ) | |
162 | { | |
163 | int * p; | |
164 | LISTITER iter = list_begin( indices_list ); | |
165 | LISTITER const end = list_end( indices_list ); | |
166 | size = list_length( indices_list ); | |
167 | indices = (int *)BJAM_MALLOC( size * sizeof( int ) ); | |
168 | for ( p = indices; iter != end; iter = list_next( iter ) ) | |
169 | *p++ = atoi( object_str( list_item( iter ) ) ); | |
170 | } | |
171 | else | |
172 | { | |
173 | size = 1; | |
174 | indices = (int *)BJAM_MALLOC( sizeof( int ) ); | |
175 | *indices = 1; | |
176 | } | |
177 | ||
178 | { | |
179 | /* Result is cached and intentionally never freed */ | |
180 | regexp * const re = regex_compile( list_front( pattern ) ); | |
181 | ||
182 | LISTITER iter = list_begin( l ); | |
183 | LISTITER const end = list_end( l ); | |
184 | ||
185 | string buf[ 1 ]; | |
186 | string_new( buf ); | |
187 | ||
188 | for ( ; iter != end; iter = list_next( iter ) ) | |
189 | { | |
190 | if ( regexec( re, object_str( list_item( iter ) ) ) ) | |
191 | { | |
192 | int i = 0; | |
193 | for ( ; i < size; ++i ) | |
194 | { | |
195 | int const index = indices[ i ]; | |
196 | /* Skip empty submatches. Not sure it is right in all cases, | |
197 | * but surely is right for the case for which this routine | |
198 | * is optimized -- header scanning. | |
199 | */ | |
200 | if ( re->startp[ index ] != re->endp[ index ] ) | |
201 | { | |
202 | string_append_range( buf, re->startp[ index ], | |
203 | re->endp[ index ] ); | |
204 | result = list_push_back( result, object_new( buf->value | |
205 | ) ); | |
206 | string_truncate( buf, 0 ); | |
207 | } | |
208 | } | |
209 | } | |
210 | } | |
211 | string_free( buf ); | |
212 | } | |
213 | ||
214 | BJAM_FREE( indices ); | |
215 | return result; | |
216 | } | |
217 | ||
218 | ||
219 | void init_regex() | |
220 | { | |
221 | { | |
222 | char const * args[] = { "string", "separator", 0 }; | |
223 | declare_native_rule( "regex", "split", args, regex_split, 1 ); | |
224 | } | |
225 | { | |
226 | char const * args[] = { "string", "match", "replacement", 0 }; | |
227 | declare_native_rule( "regex", "replace", args, regex_replace, 1 ); | |
228 | } | |
229 | { | |
230 | char const * args[] = { "list", "*", ":", "pattern", ":", "indices", "*", 0 }; | |
231 | declare_native_rule( "regex", "transform", args, regex_transform, 2 ); | |
232 | } | |
233 | } |