]>
Commit | Line | Data |
---|---|---|
31f18b77 FG |
1 | #include <list> |
2 | #include <map> | |
3 | #include <string> | |
4 | #include <iostream> | |
5 | #include <boost/algorithm/string.hpp> | |
6 | ||
7 | #include "common/ceph_json.h" | |
8 | #include "rgw_common.h" | |
9 | #include "rgw_es_query.h" | |
10 | ||
11 | using namespace std; | |
12 | ||
13 | #define dout_context g_ceph_context | |
14 | #define dout_subsys ceph_subsys_rgw | |
15 | ||
16 | bool pop_front(list<string>& l, string *s) | |
17 | { | |
18 | if (l.empty()) { | |
19 | return false; | |
20 | } | |
21 | *s = l.front(); | |
22 | l.pop_front(); | |
23 | return true; | |
24 | } | |
25 | ||
26 | map<string, int> operator_map = { | |
27 | { "or", 1 }, | |
28 | { "and", 2 }, | |
29 | { "<", 3 }, | |
30 | { "<=", 3 }, | |
31 | { "==", 3 }, | |
32 | { ">=", 3 }, | |
33 | { ">", 3 }, | |
34 | }; | |
35 | ||
36 | bool is_operator(const string& s) | |
37 | { | |
38 | return (operator_map.find(s) != operator_map.end()); | |
39 | } | |
40 | ||
41 | int operand_value(const string& op) | |
42 | { | |
43 | auto i = operator_map.find(op); | |
44 | if (i == operator_map.end()) { | |
45 | return 0; | |
46 | } | |
47 | ||
48 | return i->second; | |
49 | } | |
50 | ||
51 | int check_precedence(const string& op1, const string& op2) | |
52 | { | |
53 | return operand_value(op1) - operand_value(op2); | |
54 | } | |
55 | ||
56 | static bool infix_to_prefix(list<string>& source, list<string> *out) | |
57 | { | |
58 | list<string> operator_stack; | |
59 | list<string> operand_stack; | |
60 | ||
61 | operator_stack.push_front("("); | |
62 | source.push_back(")"); | |
63 | ||
64 | for (string& entity : source) { | |
65 | if (entity == "(") { | |
66 | operator_stack.push_front(entity); | |
67 | } else if (entity == ")") { | |
68 | string popped_operator; | |
69 | if (!pop_front(operator_stack, &popped_operator)) { | |
70 | return false; | |
71 | } | |
72 | ||
73 | while (popped_operator != "(") { | |
74 | operand_stack.push_front(popped_operator); | |
75 | if (!pop_front(operator_stack, &popped_operator)) { | |
76 | return false; | |
77 | } | |
78 | } | |
79 | ||
80 | } else if (is_operator(entity)) { | |
81 | string popped_operator; | |
82 | if (!pop_front(operator_stack, &popped_operator)) { | |
83 | return false; | |
84 | } | |
85 | ||
86 | int precedence = check_precedence(popped_operator, entity); | |
87 | ||
88 | while (precedence >= 0) { | |
89 | operand_stack.push_front(popped_operator); | |
90 | if (!pop_front(operator_stack, &popped_operator)) { | |
91 | return false; | |
92 | } | |
93 | precedence = check_precedence(popped_operator, entity); | |
94 | } | |
95 | ||
96 | operator_stack.push_front(popped_operator); | |
97 | operator_stack.push_front(entity); | |
98 | } else { | |
99 | operand_stack.push_front(entity); | |
100 | } | |
101 | ||
102 | } | |
103 | ||
104 | if (!operator_stack.empty()) { | |
105 | return false; | |
106 | } | |
107 | ||
108 | out->swap(operand_stack); | |
109 | return true; | |
110 | } | |
111 | ||
112 | class ESQueryNode { | |
113 | protected: | |
114 | ESQueryCompiler *compiler; | |
115 | public: | |
116 | ESQueryNode(ESQueryCompiler *_compiler) : compiler(_compiler) {} | |
117 | virtual ~ESQueryNode() {} | |
118 | ||
119 | virtual bool init(ESQueryStack *s, ESQueryNode **pnode, string *perr) = 0; | |
120 | ||
121 | virtual void dump(Formatter *f) const = 0; | |
122 | }; | |
123 | ||
124 | static bool alloc_node(ESQueryCompiler *compiler, ESQueryStack *s, ESQueryNode **pnode, string *perr); | |
125 | ||
126 | class ESQueryNode_Bool : public ESQueryNode { | |
127 | string op; | |
128 | ESQueryNode *first{nullptr}; | |
129 | ESQueryNode *second{nullptr}; | |
130 | public: | |
131 | ESQueryNode_Bool(ESQueryCompiler *compiler) : ESQueryNode(compiler) {} | |
132 | ESQueryNode_Bool(ESQueryCompiler *compiler, const string& _op, ESQueryNode *_first, ESQueryNode *_second) :ESQueryNode(compiler), op(_op), first(_first), second(_second) {} | |
133 | bool init(ESQueryStack *s, ESQueryNode **pnode, string *perr) override { | |
134 | bool valid = s->pop(&op); | |
135 | if (!valid) { | |
136 | *perr = "incorrect expression"; | |
137 | return false; | |
138 | } | |
139 | valid = alloc_node(compiler, s, &first, perr) && | |
140 | alloc_node(compiler, s, &second, perr); | |
141 | if (!valid) { | |
142 | return false; | |
143 | } | |
144 | *pnode = this; | |
145 | return true; | |
146 | } | |
147 | virtual ~ESQueryNode_Bool() { | |
148 | delete first; | |
149 | delete second; | |
150 | } | |
151 | ||
152 | void dump(Formatter *f) const { | |
153 | f->open_object_section("bool"); | |
154 | const char *section = (op == "and" ? "must" : "should"); | |
155 | f->open_array_section(section); | |
156 | encode_json("entry", *first, f); | |
157 | encode_json("entry", *second, f); | |
158 | f->close_section(); | |
159 | f->close_section(); | |
160 | } | |
161 | ||
162 | }; | |
163 | ||
164 | class ESQueryNodeLeafVal { | |
165 | public: | |
166 | ESQueryNodeLeafVal() = default; | |
167 | virtual ~ESQueryNodeLeafVal() {} | |
168 | ||
169 | virtual bool init(const string& str_val, string *perr) = 0; | |
170 | virtual void encode_json(const string& field, Formatter *f) const = 0; | |
171 | }; | |
172 | ||
173 | class ESQueryNodeLeafVal_Str : public ESQueryNodeLeafVal { | |
174 | string val; | |
175 | public: | |
176 | ESQueryNodeLeafVal_Str() {} | |
177 | bool init(const string& str_val, string *perr) override { | |
178 | val = str_val; | |
179 | return true; | |
180 | } | |
181 | void encode_json(const string& field, Formatter *f) const { | |
182 | ::encode_json(field.c_str(), val.c_str(), f); | |
183 | } | |
184 | }; | |
185 | ||
186 | class ESQueryNodeLeafVal_Int : public ESQueryNodeLeafVal { | |
224ce89b | 187 | int64_t val{0}; |
31f18b77 FG |
188 | public: |
189 | ESQueryNodeLeafVal_Int() {} | |
190 | bool init(const string& str_val, string *perr) override { | |
191 | string err; | |
192 | val = strict_strtoll(str_val.c_str(), 10, &err); | |
193 | if (!err.empty()) { | |
194 | *perr = string("failed to parse integer: ") + err; | |
195 | return false; | |
196 | } | |
197 | return true; | |
198 | } | |
199 | void encode_json(const string& field, Formatter *f) const { | |
200 | ::encode_json(field.c_str(), val, f); | |
201 | } | |
202 | }; | |
203 | ||
204 | class ESQueryNodeLeafVal_Date : public ESQueryNodeLeafVal { | |
205 | ceph::real_time val; | |
206 | public: | |
207 | ESQueryNodeLeafVal_Date() {} | |
208 | bool init(const string& str_val, string *perr) override { | |
209 | if (parse_time(str_val.c_str(), &val) < 0) { | |
210 | *perr = string("failed to parse date: ") + str_val; | |
211 | return false; | |
212 | } | |
213 | return true; | |
214 | } | |
215 | void encode_json(const string& field, Formatter *f) const { | |
216 | string s; | |
217 | rgw_to_iso8601(val, &s); | |
218 | ::encode_json(field.c_str(), s, f); | |
219 | } | |
220 | }; | |
221 | ||
222 | class ESQueryNode_Op : public ESQueryNode { | |
223 | protected: | |
224 | string op; | |
225 | string field; | |
226 | string str_val; | |
227 | ESQueryNodeLeafVal *val{nullptr}; | |
228 | ESEntityTypeMap::EntityType entity_type{ESEntityTypeMap::ES_ENTITY_NONE}; | |
229 | bool allow_restricted{false}; | |
230 | ||
231 | bool val_from_str(string *perr) { | |
232 | switch (entity_type) { | |
233 | case ESEntityTypeMap::ES_ENTITY_DATE: | |
234 | val = new ESQueryNodeLeafVal_Date; | |
235 | break; | |
236 | case ESEntityTypeMap::ES_ENTITY_INT: | |
237 | val = new ESQueryNodeLeafVal_Int; | |
238 | break; | |
239 | default: | |
240 | val = new ESQueryNodeLeafVal_Str; | |
241 | } | |
242 | return val->init(str_val, perr); | |
243 | } | |
244 | bool do_init(ESQueryNode **pnode, string *perr) { | |
245 | field = compiler->unalias_field(field); | |
246 | ESQueryNode *effective_node; | |
247 | if (!handle_nested(&effective_node, perr)) { | |
248 | return false; | |
249 | } | |
250 | if (!val_from_str(perr)) { | |
251 | return false; | |
252 | } | |
253 | *pnode = effective_node; | |
254 | return true; | |
255 | } | |
256 | ||
257 | public: | |
258 | ESQueryNode_Op(ESQueryCompiler *compiler) : ESQueryNode(compiler) {} | |
259 | ~ESQueryNode_Op() { | |
260 | delete val; | |
261 | } | |
262 | virtual bool init(ESQueryStack *s, ESQueryNode **pnode, string *perr) override { | |
263 | bool valid = s->pop(&op) && | |
264 | s->pop(&str_val) && | |
265 | s->pop(&field); | |
266 | if (!valid) { | |
267 | *perr = "invalid expression"; | |
268 | return false; | |
269 | } | |
270 | return do_init(pnode, perr); | |
271 | } | |
272 | bool handle_nested(ESQueryNode **pnode, string *perr); | |
273 | ||
274 | void set_allow_restricted(bool allow) { | |
275 | allow_restricted = allow; | |
276 | } | |
277 | ||
278 | virtual void dump(Formatter *f) const = 0; | |
279 | }; | |
280 | ||
281 | class ESQueryNode_Op_Equal : public ESQueryNode_Op { | |
282 | public: | |
283 | ESQueryNode_Op_Equal(ESQueryCompiler *compiler) : ESQueryNode_Op(compiler) {} | |
284 | ESQueryNode_Op_Equal(ESQueryCompiler *compiler, const string& f, const string& v) : ESQueryNode_Op(compiler) { | |
285 | op = "=="; | |
286 | field = f; | |
287 | str_val = v; | |
288 | } | |
289 | ||
290 | bool init(ESQueryStack *s, ESQueryNode **pnode, string *perr) override { | |
291 | if (op.empty()) { | |
292 | return ESQueryNode_Op::init(s, pnode, perr); | |
293 | } | |
294 | return do_init(pnode, perr); | |
295 | } | |
296 | ||
297 | virtual void dump(Formatter *f) const { | |
298 | f->open_object_section("term"); | |
299 | val->encode_json(field, f); | |
300 | f->close_section(); | |
301 | } | |
302 | }; | |
303 | ||
304 | class ESQueryNode_Op_Range : public ESQueryNode_Op { | |
305 | string range_str; | |
306 | public: | |
307 | ESQueryNode_Op_Range(ESQueryCompiler *compiler, const string& rs) : ESQueryNode_Op(compiler), range_str(rs) {} | |
308 | ||
309 | virtual void dump(Formatter *f) const { | |
310 | f->open_object_section("range"); | |
311 | f->open_object_section(field.c_str()); | |
312 | val->encode_json(range_str, f); | |
313 | f->close_section(); | |
314 | f->close_section(); | |
315 | } | |
316 | }; | |
317 | ||
318 | class ESQueryNode_Op_Nested_Parent : public ESQueryNode_Op { | |
319 | public: | |
320 | ESQueryNode_Op_Nested_Parent(ESQueryCompiler *compiler) : ESQueryNode_Op(compiler) {} | |
321 | ||
322 | virtual string get_custom_leaf_field_name() = 0; | |
323 | }; | |
324 | ||
325 | template <class T> | |
326 | class ESQueryNode_Op_Nested : public ESQueryNode_Op_Nested_Parent { | |
327 | string name; | |
328 | ESQueryNode *next; | |
329 | public: | |
330 | ESQueryNode_Op_Nested(ESQueryCompiler *compiler, const string& _name, ESQueryNode *_next) : ESQueryNode_Op_Nested_Parent(compiler), | |
331 | name(_name), next(_next) {} | |
332 | ~ESQueryNode_Op_Nested() { | |
333 | delete next; | |
334 | } | |
335 | ||
336 | virtual void dump(Formatter *f) const { | |
337 | f->open_object_section("nested"); | |
338 | string s = string("meta.custom-") + type_str(); | |
339 | encode_json("path", s.c_str(), f); | |
340 | f->open_object_section("query"); | |
341 | f->open_object_section("bool"); | |
342 | f->open_array_section("must"); | |
343 | f->open_object_section("entry"); | |
344 | f->open_object_section("match"); | |
345 | string n = s + ".name"; | |
346 | encode_json(n.c_str(), name.c_str(), f); | |
347 | f->close_section(); | |
348 | f->close_section(); | |
349 | encode_json("entry", *next, f); | |
350 | f->close_section(); | |
351 | f->close_section(); | |
352 | f->close_section(); | |
353 | f->close_section(); | |
354 | } | |
355 | ||
356 | string type_str() const; | |
357 | string get_custom_leaf_field_name() { | |
358 | return string("meta.custom-") + type_str() + ".value"; | |
359 | } | |
360 | }; | |
361 | ||
362 | template<> | |
363 | string ESQueryNode_Op_Nested<string>::type_str() const { | |
364 | return "string"; | |
365 | } | |
366 | ||
367 | template<> | |
368 | string ESQueryNode_Op_Nested<int64_t>::type_str() const { | |
369 | return "int"; | |
370 | } | |
371 | ||
372 | template<> | |
373 | string ESQueryNode_Op_Nested<ceph::real_time>::type_str() const { | |
374 | return "date"; | |
375 | } | |
376 | ||
377 | bool ESQueryNode_Op::handle_nested(ESQueryNode **pnode, string *perr) | |
378 | { | |
379 | string field_name = field; | |
380 | const string& custom_prefix = compiler->get_custom_prefix(); | |
381 | if (!boost::algorithm::starts_with(field_name, custom_prefix)) { | |
382 | *pnode = this; | |
383 | auto m = compiler->get_generic_type_map(); | |
384 | if (m) { | |
385 | bool found = m->find(field_name, &entity_type) && | |
386 | (allow_restricted || !compiler->is_restricted(field_name)); | |
387 | if (!found) { | |
388 | *perr = string("unexpected generic field '") + field_name + "'"; | |
389 | } | |
390 | return found; | |
391 | } | |
392 | *perr = "query parser does not support generic types"; | |
393 | return false; | |
394 | } | |
395 | ||
396 | field_name = field_name.substr(custom_prefix.size()); | |
397 | auto m = compiler->get_custom_type_map(); | |
398 | if (m) { | |
399 | m->find(field_name, &entity_type); | |
400 | /* ignoring returned bool, for now just treat it as string */ | |
401 | } | |
402 | ||
403 | ESQueryNode_Op_Nested_Parent *new_node; | |
404 | switch (entity_type) { | |
405 | case ESEntityTypeMap::ES_ENTITY_INT: | |
406 | new_node = new ESQueryNode_Op_Nested<int64_t>(compiler, field_name, this); | |
407 | break; | |
408 | case ESEntityTypeMap::ES_ENTITY_DATE: | |
409 | new_node = new ESQueryNode_Op_Nested<ceph::real_time>(compiler, field_name, this); | |
410 | break; | |
411 | default: | |
412 | new_node = new ESQueryNode_Op_Nested<string>(compiler, field_name, this); | |
413 | } | |
414 | ||
415 | field = new_node->get_custom_leaf_field_name(); | |
416 | *pnode = new_node; | |
417 | ||
418 | return true; | |
419 | } | |
420 | ||
421 | static bool is_bool_op(const string& str) | |
422 | { | |
423 | return (str == "or" || str == "and"); | |
424 | } | |
425 | ||
426 | static bool alloc_node(ESQueryCompiler *compiler, ESQueryStack *s, ESQueryNode **pnode, string *perr) | |
427 | { | |
428 | string op; | |
429 | bool valid = s->peek(&op); | |
430 | if (!valid) { | |
431 | *perr = "incorrect expression"; | |
432 | return false; | |
433 | } | |
434 | ||
435 | ESQueryNode *node; | |
436 | ||
437 | if (is_bool_op(op)) { | |
438 | node = new ESQueryNode_Bool(compiler); | |
439 | } else if (op == "==") { | |
440 | node = new ESQueryNode_Op_Equal(compiler); | |
441 | } else { | |
442 | static map<string, string> range_op_map = { | |
443 | { "<", "lt"}, | |
444 | { "<=", "lte"}, | |
445 | { ">=", "gte"}, | |
446 | { ">", "gt"}, | |
447 | }; | |
448 | ||
449 | auto iter = range_op_map.find(op); | |
450 | if (iter == range_op_map.end()) { | |
451 | *perr = string("invalid operator: ") + op; | |
452 | return false; | |
453 | } | |
454 | ||
455 | node = new ESQueryNode_Op_Range(compiler, iter->second); | |
456 | } | |
457 | ||
458 | if (!node->init(s, pnode, perr)) { | |
459 | delete node; | |
460 | return false; | |
461 | } | |
462 | return true; | |
463 | } | |
464 | ||
465 | ||
466 | bool is_key_char(char c) | |
467 | { | |
468 | switch (c) { | |
469 | case '(': | |
470 | case ')': | |
471 | case '<': | |
472 | case '>': | |
473 | case '@': | |
474 | case ',': | |
475 | case ';': | |
476 | case ':': | |
477 | case '\\': | |
478 | case '"': | |
479 | case '/': | |
480 | case '[': | |
481 | case ']': | |
482 | case '?': | |
483 | case '=': | |
484 | case '{': | |
485 | case '}': | |
486 | case ' ': | |
487 | case '\t': | |
488 | return false; | |
489 | }; | |
490 | return (isascii(c) > 0); | |
491 | } | |
492 | ||
493 | static bool is_op_char(char c) | |
494 | { | |
495 | switch (c) { | |
496 | case '<': | |
497 | case '=': | |
498 | case '>': | |
499 | return true; | |
500 | }; | |
501 | return false; | |
502 | } | |
503 | ||
504 | static bool is_val_char(char c) | |
505 | { | |
506 | if (isspace(c)) { | |
507 | return false; | |
508 | } | |
509 | return (c != ')'); | |
510 | } | |
511 | ||
512 | void ESInfixQueryParser::skip_whitespace(const char *str, int size, int& pos) { | |
513 | while (pos < size && isspace(str[pos])) { | |
514 | ++pos; | |
515 | } | |
516 | } | |
517 | ||
518 | bool ESInfixQueryParser::get_next_token(bool (*filter)(char)) { | |
519 | skip_whitespace(str, size, pos); | |
520 | int token_start = pos; | |
521 | while (pos < size && filter(str[pos])) { | |
522 | ++pos; | |
523 | } | |
524 | if (pos == token_start) { | |
525 | return false; | |
526 | } | |
527 | string token = string(str + token_start, pos - token_start); | |
528 | args.push_back(token); | |
529 | return true; | |
530 | } | |
531 | ||
532 | bool ESInfixQueryParser::parse_condition() { | |
533 | /* | |
534 | * condition: <key> <operator> <val> | |
535 | * | |
536 | * whereas key: needs to conform to http header field restrictions | |
537 | * operator: one of the following: < <= == >= > | |
538 | * val: ascii, terminated by either space or ')' (or end of string) | |
539 | */ | |
540 | ||
541 | /* parse key */ | |
542 | bool valid = get_next_token(is_key_char) && | |
543 | get_next_token(is_op_char) && | |
544 | get_next_token(is_val_char); | |
545 | ||
546 | if (!valid) { | |
547 | return false; | |
548 | } | |
549 | ||
550 | return true; | |
551 | } | |
552 | ||
553 | bool ESInfixQueryParser::parse_and_or() { | |
554 | skip_whitespace(str, size, pos); | |
555 | if (pos + 3 <= size && strncmp(str + pos, "and", 3) == 0) { | |
556 | pos += 3; | |
557 | args.push_back("and"); | |
558 | return true; | |
559 | } | |
560 | ||
561 | if (pos + 2 <= size && strncmp(str + pos, "or", 2) == 0) { | |
562 | pos += 2; | |
563 | args.push_back("or"); | |
564 | return true; | |
565 | } | |
566 | ||
567 | return false; | |
568 | } | |
569 | ||
570 | bool ESInfixQueryParser::parse_specific_char(const char *pchar) { | |
571 | skip_whitespace(str, size, pos); | |
572 | if (pos >= size) { | |
573 | return false; | |
574 | } | |
575 | if (str[pos] != *pchar) { | |
576 | return false; | |
577 | } | |
578 | ||
579 | args.push_back(pchar); | |
580 | ++pos; | |
581 | return true; | |
582 | } | |
583 | ||
584 | bool ESInfixQueryParser::parse_open_bracket() { | |
585 | return parse_specific_char("("); | |
586 | } | |
587 | ||
588 | bool ESInfixQueryParser::parse_close_bracket() { | |
589 | return parse_specific_char(")"); | |
590 | } | |
591 | ||
592 | bool ESInfixQueryParser::parse(list<string> *result) { | |
593 | /* | |
594 | * expression: [(]<condition>[[and/or]<condition>][)][and/or]... | |
595 | */ | |
596 | ||
597 | while (pos < size) { | |
598 | parse_open_bracket(); | |
599 | if (!parse_condition()) { | |
600 | return false; | |
601 | } | |
602 | parse_close_bracket(); | |
603 | parse_and_or(); | |
604 | } | |
605 | ||
606 | result->swap(args); | |
607 | ||
608 | return true; | |
609 | } | |
610 | ||
611 | bool ESQueryCompiler::convert(list<string>& infix, string *perr) { | |
612 | list<string> prefix; | |
613 | if (!infix_to_prefix(infix, &prefix)) { | |
614 | *perr = "invalid query"; | |
615 | return false; | |
616 | } | |
617 | stack.assign(prefix); | |
618 | if (!alloc_node(this, &stack, &query_root, perr)) { | |
619 | return false; | |
620 | } | |
621 | if (!stack.done()) { | |
622 | *perr = "invalid query"; | |
623 | return false; | |
624 | } | |
625 | return true; | |
626 | } | |
627 | ||
628 | ESQueryCompiler::~ESQueryCompiler() { | |
629 | delete query_root; | |
630 | } | |
631 | ||
632 | bool ESQueryCompiler::compile(string *perr) { | |
633 | list<string> infix; | |
634 | if (!parser.parse(&infix)) { | |
635 | *perr = "failed to parse query"; | |
636 | return false; | |
637 | } | |
638 | ||
639 | if (!convert(infix, perr)) { | |
640 | return false; | |
641 | } | |
642 | ||
643 | for (auto& c : eq_conds) { | |
644 | ESQueryNode_Op_Equal *eq_node = new ESQueryNode_Op_Equal(this, c.first, c.second); | |
645 | eq_node->set_allow_restricted(true); /* can access restricted fields */ | |
646 | ESQueryNode *effective_node; | |
647 | if (!eq_node->init(nullptr, &effective_node, perr)) { | |
648 | delete eq_node; | |
649 | return false; | |
650 | } | |
651 | query_root = new ESQueryNode_Bool(this, "and", effective_node, query_root); | |
652 | } | |
653 | ||
654 | return true; | |
655 | } | |
656 | ||
657 | void ESQueryCompiler::dump(Formatter *f) const { | |
658 | encode_json("query", *query_root, f); | |
659 | } | |
660 |