]> git.proxmox.com Git - ceph.git/blob - ceph/src/rgw/rgw_es_query.cc
import 15.2.0 Octopus source
[ceph.git] / ceph / src / rgw / rgw_es_query.cc
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab ft=cpp
3
4 #include <list>
5 #include <map>
6 #include <string>
7 #include <iostream>
8 #include <boost/algorithm/string.hpp>
9
10 #include "common/ceph_json.h"
11 #include "rgw_common.h"
12 #include "rgw_es_query.h"
13
14
15 #define dout_context g_ceph_context
16 #define dout_subsys ceph_subsys_rgw
17
18 bool pop_front(list<string>& l, string *s)
19 {
20 if (l.empty()) {
21 return false;
22 }
23 *s = l.front();
24 l.pop_front();
25 return true;
26 }
27
28 map<string, int> operator_map = {
29 { "or", 1 },
30 { "and", 2 },
31 { "<", 3 },
32 { "<=", 3 },
33 { "==", 3 },
34 { "!=", 3 },
35 { ">=", 3 },
36 { ">", 3 },
37 };
38
39 bool is_operator(const string& s)
40 {
41 return (operator_map.find(s) != operator_map.end());
42 }
43
44 int operand_value(const string& op)
45 {
46 auto i = operator_map.find(op);
47 if (i == operator_map.end()) {
48 return 0;
49 }
50
51 return i->second;
52 }
53
54 int check_precedence(const string& op1, const string& op2)
55 {
56 return operand_value(op1) - operand_value(op2);
57 }
58
59 static bool infix_to_prefix(list<string>& source, list<string> *out)
60 {
61 list<string> operator_stack;
62 list<string> operand_stack;
63
64 operator_stack.push_front("(");
65 source.push_back(")");
66
67 for (string& entity : source) {
68 if (entity == "(") {
69 operator_stack.push_front(entity);
70 } else if (entity == ")") {
71 string popped_operator;
72 if (!pop_front(operator_stack, &popped_operator)) {
73 return false;
74 }
75
76 while (popped_operator != "(") {
77 operand_stack.push_front(popped_operator);
78 if (!pop_front(operator_stack, &popped_operator)) {
79 return false;
80 }
81 }
82
83 } else if (is_operator(entity)) {
84 string popped_operator;
85 if (!pop_front(operator_stack, &popped_operator)) {
86 return false;
87 }
88
89 int precedence = check_precedence(popped_operator, entity);
90
91 while (precedence >= 0) {
92 operand_stack.push_front(popped_operator);
93 if (!pop_front(operator_stack, &popped_operator)) {
94 return false;
95 }
96 precedence = check_precedence(popped_operator, entity);
97 }
98
99 operator_stack.push_front(popped_operator);
100 operator_stack.push_front(entity);
101 } else {
102 operand_stack.push_front(entity);
103 }
104
105 }
106
107 if (!operator_stack.empty()) {
108 return false;
109 }
110
111 out->swap(operand_stack);
112 return true;
113 }
114
115 class ESQueryNode {
116 protected:
117 ESQueryCompiler *compiler;
118 public:
119 ESQueryNode(ESQueryCompiler *_compiler) : compiler(_compiler) {}
120 virtual ~ESQueryNode() {}
121
122 virtual bool init(ESQueryStack *s, ESQueryNode **pnode, string *perr) = 0;
123
124 virtual void dump(Formatter *f) const = 0;
125 };
126
127 static bool alloc_node(ESQueryCompiler *compiler, ESQueryStack *s, ESQueryNode **pnode, string *perr);
128
129 class ESQueryNode_Bool : public ESQueryNode {
130 string op;
131 ESQueryNode *first{nullptr};
132 ESQueryNode *second{nullptr};
133 public:
134 explicit ESQueryNode_Bool(ESQueryCompiler *compiler) : ESQueryNode(compiler) {}
135 ESQueryNode_Bool(ESQueryCompiler *compiler, const string& _op, ESQueryNode *_first, ESQueryNode *_second) :ESQueryNode(compiler), op(_op), first(_first), second(_second) {}
136 bool init(ESQueryStack *s, ESQueryNode **pnode, string *perr) override {
137 bool valid = s->pop(&op);
138 if (!valid) {
139 *perr = "incorrect expression";
140 return false;
141 }
142 valid = alloc_node(compiler, s, &first, perr) &&
143 alloc_node(compiler, s, &second, perr);
144 if (!valid) {
145 return false;
146 }
147 *pnode = this;
148 return true;
149 }
150 virtual ~ESQueryNode_Bool() {
151 delete first;
152 delete second;
153 }
154
155 void dump(Formatter *f) const override {
156 f->open_object_section("bool");
157 const char *section = (op == "and" ? "must" : "should");
158 f->open_array_section(section);
159 encode_json("entry", *first, f);
160 encode_json("entry", *second, f);
161 f->close_section();
162 f->close_section();
163 }
164
165 };
166
167 class ESQueryNodeLeafVal {
168 public:
169 ESQueryNodeLeafVal() = default;
170 virtual ~ESQueryNodeLeafVal() {}
171
172 virtual bool init(const string& str_val, string *perr) = 0;
173 virtual void encode_json(const string& field, Formatter *f) const = 0;
174 };
175
176 class ESQueryNodeLeafVal_Str : public ESQueryNodeLeafVal {
177 string val;
178 public:
179 ESQueryNodeLeafVal_Str() {}
180 bool init(const string& str_val, string *perr) override {
181 val = str_val;
182 return true;
183 }
184 void encode_json(const string& field, Formatter *f) const override {
185 ::encode_json(field.c_str(), val.c_str(), f);
186 }
187 };
188
189 class ESQueryNodeLeafVal_Int : public ESQueryNodeLeafVal {
190 int64_t val{0};
191 public:
192 ESQueryNodeLeafVal_Int() {}
193 bool init(const string& str_val, string *perr) override {
194 string err;
195 val = strict_strtoll(str_val.c_str(), 10, &err);
196 if (!err.empty()) {
197 *perr = string("failed to parse integer: ") + err;
198 return false;
199 }
200 return true;
201 }
202 void encode_json(const string& field, Formatter *f) const override {
203 ::encode_json(field.c_str(), val, f);
204 }
205 };
206
207 class ESQueryNodeLeafVal_Date : public ESQueryNodeLeafVal {
208 ceph::real_time val;
209 public:
210 ESQueryNodeLeafVal_Date() {}
211 bool init(const string& str_val, string *perr) override {
212 if (parse_time(str_val.c_str(), &val) < 0) {
213 *perr = string("failed to parse date: ") + str_val;
214 return false;
215 }
216 return true;
217 }
218 void encode_json(const string& field, Formatter *f) const override {
219 string s;
220 rgw_to_iso8601(val, &s);
221 ::encode_json(field.c_str(), s, f);
222 }
223 };
224
225 class ESQueryNode_Op : public ESQueryNode {
226 protected:
227 string op;
228 string field;
229 string str_val;
230 ESQueryNodeLeafVal *val{nullptr};
231 ESEntityTypeMap::EntityType entity_type{ESEntityTypeMap::ES_ENTITY_NONE};
232 bool allow_restricted{false};
233
234 bool val_from_str(string *perr) {
235 switch (entity_type) {
236 case ESEntityTypeMap::ES_ENTITY_DATE:
237 val = new ESQueryNodeLeafVal_Date;
238 break;
239 case ESEntityTypeMap::ES_ENTITY_INT:
240 val = new ESQueryNodeLeafVal_Int;
241 break;
242 default:
243 val = new ESQueryNodeLeafVal_Str;
244 }
245 return val->init(str_val, perr);
246 }
247 bool do_init(ESQueryNode **pnode, string *perr) {
248 field = compiler->unalias_field(field);
249 ESQueryNode *effective_node;
250 if (!handle_nested(&effective_node, perr)) {
251 return false;
252 }
253 if (!val_from_str(perr)) {
254 return false;
255 }
256 *pnode = effective_node;
257 return true;
258 }
259
260 public:
261 ESQueryNode_Op(ESQueryCompiler *compiler) : ESQueryNode(compiler) {}
262 ~ESQueryNode_Op() {
263 delete val;
264 }
265 virtual bool init(ESQueryStack *s, ESQueryNode **pnode, string *perr) override {
266 bool valid = s->pop(&op) &&
267 s->pop(&str_val) &&
268 s->pop(&field);
269 if (!valid) {
270 *perr = "invalid expression";
271 return false;
272 }
273 return do_init(pnode, perr);
274 }
275 bool handle_nested(ESQueryNode **pnode, string *perr);
276
277 void set_allow_restricted(bool allow) {
278 allow_restricted = allow;
279 }
280
281 virtual void dump(Formatter *f) const override = 0;
282 };
283
284 class ESQueryNode_Op_Equal : public ESQueryNode_Op {
285 public:
286 explicit ESQueryNode_Op_Equal(ESQueryCompiler *compiler) : ESQueryNode_Op(compiler) {}
287 ESQueryNode_Op_Equal(ESQueryCompiler *compiler, const string& f, const string& v) : ESQueryNode_Op(compiler) {
288 op = "==";
289 field = f;
290 str_val = v;
291 }
292
293 bool init(ESQueryStack *s, ESQueryNode **pnode, string *perr) override {
294 if (op.empty()) {
295 return ESQueryNode_Op::init(s, pnode, perr);
296 }
297 return do_init(pnode, perr);
298 }
299
300 virtual void dump(Formatter *f) const override {
301 f->open_object_section("term");
302 val->encode_json(field, f);
303 f->close_section();
304 }
305 };
306
307 class ESQueryNode_Op_NotEqual : public ESQueryNode_Op {
308 public:
309 explicit ESQueryNode_Op_NotEqual(ESQueryCompiler *compiler) : ESQueryNode_Op(compiler) {}
310 ESQueryNode_Op_NotEqual(ESQueryCompiler *compiler, const string& f, const string& v) : ESQueryNode_Op(compiler) {
311 op = "!=";
312 field = f;
313 str_val = v;
314 }
315
316 bool init(ESQueryStack *s, ESQueryNode **pnode, string *perr) override {
317 if (op.empty()) {
318 return ESQueryNode_Op::init(s, pnode, perr);
319 }
320 return do_init(pnode, perr);
321 }
322
323 virtual void dump(Formatter *f) const override {
324 f->open_object_section("bool");
325 f->open_object_section("must_not");
326 f->open_object_section("term");
327 val->encode_json(field, f);
328 f->close_section();
329 f->close_section();
330 f->close_section();
331 }
332 };
333
334 class ESQueryNode_Op_Range : public ESQueryNode_Op {
335 string range_str;
336 public:
337 ESQueryNode_Op_Range(ESQueryCompiler *compiler, const string& rs) : ESQueryNode_Op(compiler), range_str(rs) {}
338
339 virtual void dump(Formatter *f) const override {
340 f->open_object_section("range");
341 f->open_object_section(field.c_str());
342 val->encode_json(range_str, f);
343 f->close_section();
344 f->close_section();
345 }
346 };
347
348 class ESQueryNode_Op_Nested_Parent : public ESQueryNode_Op {
349 public:
350 ESQueryNode_Op_Nested_Parent(ESQueryCompiler *compiler) : ESQueryNode_Op(compiler) {}
351
352 virtual string get_custom_leaf_field_name() = 0;
353 };
354
355 template <class T>
356 class ESQueryNode_Op_Nested : public ESQueryNode_Op_Nested_Parent {
357 string name;
358 ESQueryNode *next;
359 public:
360 ESQueryNode_Op_Nested(ESQueryCompiler *compiler, const string& _name, ESQueryNode *_next) : ESQueryNode_Op_Nested_Parent(compiler),
361 name(_name), next(_next) {}
362 ~ESQueryNode_Op_Nested() {
363 delete next;
364 }
365
366 virtual void dump(Formatter *f) const override {
367 f->open_object_section("nested");
368 string s = string("meta.custom-") + type_str();
369 encode_json("path", s.c_str(), f);
370 f->open_object_section("query");
371 f->open_object_section("bool");
372 f->open_array_section("must");
373 f->open_object_section("entry");
374 f->open_object_section("match");
375 string n = s + ".name";
376 encode_json(n.c_str(), name.c_str(), f);
377 f->close_section();
378 f->close_section();
379 encode_json("entry", *next, f);
380 f->close_section();
381 f->close_section();
382 f->close_section();
383 f->close_section();
384 }
385
386 string type_str() const;
387 string get_custom_leaf_field_name() override {
388 return string("meta.custom-") + type_str() + ".value";
389 }
390 };
391
392 template<>
393 string ESQueryNode_Op_Nested<string>::type_str() const {
394 return "string";
395 }
396
397 template<>
398 string ESQueryNode_Op_Nested<int64_t>::type_str() const {
399 return "int";
400 }
401
402 template<>
403 string ESQueryNode_Op_Nested<ceph::real_time>::type_str() const {
404 return "date";
405 }
406
407 bool ESQueryNode_Op::handle_nested(ESQueryNode **pnode, string *perr)
408 {
409 string field_name = field;
410 const string& custom_prefix = compiler->get_custom_prefix();
411 if (!boost::algorithm::starts_with(field_name, custom_prefix)) {
412 *pnode = this;
413 auto m = compiler->get_generic_type_map();
414 if (m) {
415 bool found = m->find(field_name, &entity_type) &&
416 (allow_restricted || !compiler->is_restricted(field_name));
417 if (!found) {
418 *perr = string("unexpected generic field '") + field_name + "'";
419 }
420 return found;
421 }
422 *perr = "query parser does not support generic types";
423 return false;
424 }
425
426 field_name = field_name.substr(custom_prefix.size());
427 auto m = compiler->get_custom_type_map();
428 if (m) {
429 m->find(field_name, &entity_type);
430 /* ignoring returned bool, for now just treat it as string */
431 }
432
433 ESQueryNode_Op_Nested_Parent *new_node;
434 switch (entity_type) {
435 case ESEntityTypeMap::ES_ENTITY_INT:
436 new_node = new ESQueryNode_Op_Nested<int64_t>(compiler, field_name, this);
437 break;
438 case ESEntityTypeMap::ES_ENTITY_DATE:
439 new_node = new ESQueryNode_Op_Nested<ceph::real_time>(compiler, field_name, this);
440 break;
441 default:
442 new_node = new ESQueryNode_Op_Nested<string>(compiler, field_name, this);
443 }
444
445 field = new_node->get_custom_leaf_field_name();
446 *pnode = new_node;
447
448 return true;
449 }
450
451 static bool is_bool_op(const string& str)
452 {
453 return (str == "or" || str == "and");
454 }
455
456 static bool alloc_node(ESQueryCompiler *compiler, ESQueryStack *s, ESQueryNode **pnode, string *perr)
457 {
458 string op;
459 bool valid = s->peek(&op);
460 if (!valid) {
461 *perr = "incorrect expression";
462 return false;
463 }
464
465 ESQueryNode *node;
466
467 if (is_bool_op(op)) {
468 node = new ESQueryNode_Bool(compiler);
469 } else if (op == "==") {
470 node = new ESQueryNode_Op_Equal(compiler);
471 } else if (op == "!=") {
472 node = new ESQueryNode_Op_NotEqual(compiler);
473 } else {
474 static map<string, string> range_op_map = {
475 { "<", "lt"},
476 { "<=", "lte"},
477 { ">=", "gte"},
478 { ">", "gt"},
479 };
480
481 auto iter = range_op_map.find(op);
482 if (iter == range_op_map.end()) {
483 *perr = string("invalid operator: ") + op;
484 return false;
485 }
486
487 node = new ESQueryNode_Op_Range(compiler, iter->second);
488 }
489
490 if (!node->init(s, pnode, perr)) {
491 delete node;
492 return false;
493 }
494 return true;
495 }
496
497
498 bool is_key_char(char c)
499 {
500 switch (c) {
501 case '(':
502 case ')':
503 case '<':
504 case '>':
505 case '!':
506 case '@':
507 case ',':
508 case ';':
509 case ':':
510 case '\\':
511 case '"':
512 case '/':
513 case '[':
514 case ']':
515 case '?':
516 case '=':
517 case '{':
518 case '}':
519 case ' ':
520 case '\t':
521 return false;
522 };
523 return (isascii(c) > 0);
524 }
525
526 static bool is_op_char(char c)
527 {
528 switch (c) {
529 case '!':
530 case '<':
531 case '=':
532 case '>':
533 return true;
534 };
535 return false;
536 }
537
538 static bool is_val_char(char c)
539 {
540 if (isspace(c)) {
541 return false;
542 }
543 return (c != ')');
544 }
545
546 void ESInfixQueryParser::skip_whitespace(const char *str, int size, int& pos) {
547 while (pos < size && isspace(str[pos])) {
548 ++pos;
549 }
550 }
551
552 bool ESInfixQueryParser::get_next_token(bool (*filter)(char)) {
553 skip_whitespace(str, size, pos);
554 int token_start = pos;
555 while (pos < size && filter(str[pos])) {
556 ++pos;
557 }
558 if (pos == token_start) {
559 return false;
560 }
561 string token = string(str + token_start, pos - token_start);
562 args.push_back(token);
563 return true;
564 }
565
566 bool ESInfixQueryParser::parse_condition() {
567 /*
568 * condition: <key> <operator> <val>
569 *
570 * whereas key: needs to conform to http header field restrictions
571 * operator: one of the following: < <= == != >= >
572 * val: ascii, terminated by either space or ')' (or end of string)
573 */
574
575 /* parse key */
576 bool valid = get_next_token(is_key_char) &&
577 get_next_token(is_op_char) &&
578 get_next_token(is_val_char);
579
580 if (!valid) {
581 return false;
582 }
583
584 return true;
585 }
586
587 bool ESInfixQueryParser::parse_and_or() {
588 skip_whitespace(str, size, pos);
589 if (pos + 3 <= size && strncmp(str + pos, "and", 3) == 0) {
590 pos += 3;
591 args.push_back("and");
592 return true;
593 }
594
595 if (pos + 2 <= size && strncmp(str + pos, "or", 2) == 0) {
596 pos += 2;
597 args.push_back("or");
598 return true;
599 }
600
601 return false;
602 }
603
604 bool ESInfixQueryParser::parse_specific_char(const char *pchar) {
605 skip_whitespace(str, size, pos);
606 if (pos >= size) {
607 return false;
608 }
609 if (str[pos] != *pchar) {
610 return false;
611 }
612
613 args.push_back(pchar);
614 ++pos;
615 return true;
616 }
617
618 bool ESInfixQueryParser::parse_open_bracket() {
619 return parse_specific_char("(");
620 }
621
622 bool ESInfixQueryParser::parse_close_bracket() {
623 return parse_specific_char(")");
624 }
625
626 bool ESInfixQueryParser::parse(list<string> *result) {
627 /*
628 * expression: [(]<condition>[[and/or]<condition>][)][and/or]...
629 */
630
631 while (pos < size) {
632 parse_open_bracket();
633 if (!parse_condition()) {
634 return false;
635 }
636 parse_close_bracket();
637 parse_and_or();
638 }
639
640 result->swap(args);
641
642 return true;
643 }
644
645 bool ESQueryCompiler::convert(list<string>& infix, string *perr) {
646 list<string> prefix;
647 if (!infix_to_prefix(infix, &prefix)) {
648 *perr = "invalid query";
649 return false;
650 }
651 stack.assign(prefix);
652 if (!alloc_node(this, &stack, &query_root, perr)) {
653 return false;
654 }
655 if (!stack.done()) {
656 *perr = "invalid query";
657 return false;
658 }
659 return true;
660 }
661
662 ESQueryCompiler::~ESQueryCompiler() {
663 delete query_root;
664 }
665
666 bool ESQueryCompiler::compile(string *perr) {
667 list<string> infix;
668 if (!parser.parse(&infix)) {
669 *perr = "failed to parse query";
670 return false;
671 }
672
673 if (!convert(infix, perr)) {
674 return false;
675 }
676
677 for (auto& c : eq_conds) {
678 ESQueryNode_Op_Equal *eq_node = new ESQueryNode_Op_Equal(this, c.first, c.second);
679 eq_node->set_allow_restricted(true); /* can access restricted fields */
680 ESQueryNode *effective_node;
681 if (!eq_node->init(nullptr, &effective_node, perr)) {
682 delete eq_node;
683 return false;
684 }
685 query_root = new ESQueryNode_Bool(this, "and", effective_node, query_root);
686 }
687
688 return true;
689 }
690
691 void ESQueryCompiler::dump(Formatter *f) const {
692 encode_json("query", *query_root, f);
693 }
694