]> git.proxmox.com Git - ceph.git/blob - ceph/src/rgw/rgw_es_query.cc
update sources to v12.1.1
[ceph.git] / ceph / src / rgw / rgw_es_query.cc
1 #include <list>
2 #include <map>
3 #include <string>
4 #include <iostream>
5 #include <boost/algorithm/string.hpp>
6
7 #include "common/ceph_json.h"
8 #include "rgw_common.h"
9 #include "rgw_es_query.h"
10
11 using namespace std;
12
13 #define dout_context g_ceph_context
14 #define dout_subsys ceph_subsys_rgw
15
16 bool pop_front(list<string>& l, string *s)
17 {
18 if (l.empty()) {
19 return false;
20 }
21 *s = l.front();
22 l.pop_front();
23 return true;
24 }
25
26 map<string, int> operator_map = {
27 { "or", 1 },
28 { "and", 2 },
29 { "<", 3 },
30 { "<=", 3 },
31 { "==", 3 },
32 { ">=", 3 },
33 { ">", 3 },
34 };
35
36 bool is_operator(const string& s)
37 {
38 return (operator_map.find(s) != operator_map.end());
39 }
40
41 int operand_value(const string& op)
42 {
43 auto i = operator_map.find(op);
44 if (i == operator_map.end()) {
45 return 0;
46 }
47
48 return i->second;
49 }
50
51 int check_precedence(const string& op1, const string& op2)
52 {
53 return operand_value(op1) - operand_value(op2);
54 }
55
56 static bool infix_to_prefix(list<string>& source, list<string> *out)
57 {
58 list<string> operator_stack;
59 list<string> operand_stack;
60
61 operator_stack.push_front("(");
62 source.push_back(")");
63
64 for (string& entity : source) {
65 if (entity == "(") {
66 operator_stack.push_front(entity);
67 } else if (entity == ")") {
68 string popped_operator;
69 if (!pop_front(operator_stack, &popped_operator)) {
70 return false;
71 }
72
73 while (popped_operator != "(") {
74 operand_stack.push_front(popped_operator);
75 if (!pop_front(operator_stack, &popped_operator)) {
76 return false;
77 }
78 }
79
80 } else if (is_operator(entity)) {
81 string popped_operator;
82 if (!pop_front(operator_stack, &popped_operator)) {
83 return false;
84 }
85
86 int precedence = check_precedence(popped_operator, entity);
87
88 while (precedence >= 0) {
89 operand_stack.push_front(popped_operator);
90 if (!pop_front(operator_stack, &popped_operator)) {
91 return false;
92 }
93 precedence = check_precedence(popped_operator, entity);
94 }
95
96 operator_stack.push_front(popped_operator);
97 operator_stack.push_front(entity);
98 } else {
99 operand_stack.push_front(entity);
100 }
101
102 }
103
104 if (!operator_stack.empty()) {
105 return false;
106 }
107
108 out->swap(operand_stack);
109 return true;
110 }
111
112 class ESQueryNode {
113 protected:
114 ESQueryCompiler *compiler;
115 public:
116 ESQueryNode(ESQueryCompiler *_compiler) : compiler(_compiler) {}
117 virtual ~ESQueryNode() {}
118
119 virtual bool init(ESQueryStack *s, ESQueryNode **pnode, string *perr) = 0;
120
121 virtual void dump(Formatter *f) const = 0;
122 };
123
124 static bool alloc_node(ESQueryCompiler *compiler, ESQueryStack *s, ESQueryNode **pnode, string *perr);
125
126 class ESQueryNode_Bool : public ESQueryNode {
127 string op;
128 ESQueryNode *first{nullptr};
129 ESQueryNode *second{nullptr};
130 public:
131 ESQueryNode_Bool(ESQueryCompiler *compiler) : ESQueryNode(compiler) {}
132 ESQueryNode_Bool(ESQueryCompiler *compiler, const string& _op, ESQueryNode *_first, ESQueryNode *_second) :ESQueryNode(compiler), op(_op), first(_first), second(_second) {}
133 bool init(ESQueryStack *s, ESQueryNode **pnode, string *perr) override {
134 bool valid = s->pop(&op);
135 if (!valid) {
136 *perr = "incorrect expression";
137 return false;
138 }
139 valid = alloc_node(compiler, s, &first, perr) &&
140 alloc_node(compiler, s, &second, perr);
141 if (!valid) {
142 return false;
143 }
144 *pnode = this;
145 return true;
146 }
147 virtual ~ESQueryNode_Bool() {
148 delete first;
149 delete second;
150 }
151
152 void dump(Formatter *f) const {
153 f->open_object_section("bool");
154 const char *section = (op == "and" ? "must" : "should");
155 f->open_array_section(section);
156 encode_json("entry", *first, f);
157 encode_json("entry", *second, f);
158 f->close_section();
159 f->close_section();
160 }
161
162 };
163
164 class ESQueryNodeLeafVal {
165 public:
166 ESQueryNodeLeafVal() = default;
167 virtual ~ESQueryNodeLeafVal() {}
168
169 virtual bool init(const string& str_val, string *perr) = 0;
170 virtual void encode_json(const string& field, Formatter *f) const = 0;
171 };
172
173 class ESQueryNodeLeafVal_Str : public ESQueryNodeLeafVal {
174 string val;
175 public:
176 ESQueryNodeLeafVal_Str() {}
177 bool init(const string& str_val, string *perr) override {
178 val = str_val;
179 return true;
180 }
181 void encode_json(const string& field, Formatter *f) const {
182 ::encode_json(field.c_str(), val.c_str(), f);
183 }
184 };
185
186 class ESQueryNodeLeafVal_Int : public ESQueryNodeLeafVal {
187 int64_t val{0};
188 public:
189 ESQueryNodeLeafVal_Int() {}
190 bool init(const string& str_val, string *perr) override {
191 string err;
192 val = strict_strtoll(str_val.c_str(), 10, &err);
193 if (!err.empty()) {
194 *perr = string("failed to parse integer: ") + err;
195 return false;
196 }
197 return true;
198 }
199 void encode_json(const string& field, Formatter *f) const {
200 ::encode_json(field.c_str(), val, f);
201 }
202 };
203
204 class ESQueryNodeLeafVal_Date : public ESQueryNodeLeafVal {
205 ceph::real_time val;
206 public:
207 ESQueryNodeLeafVal_Date() {}
208 bool init(const string& str_val, string *perr) override {
209 if (parse_time(str_val.c_str(), &val) < 0) {
210 *perr = string("failed to parse date: ") + str_val;
211 return false;
212 }
213 return true;
214 }
215 void encode_json(const string& field, Formatter *f) const {
216 string s;
217 rgw_to_iso8601(val, &s);
218 ::encode_json(field.c_str(), s, f);
219 }
220 };
221
222 class ESQueryNode_Op : public ESQueryNode {
223 protected:
224 string op;
225 string field;
226 string str_val;
227 ESQueryNodeLeafVal *val{nullptr};
228 ESEntityTypeMap::EntityType entity_type{ESEntityTypeMap::ES_ENTITY_NONE};
229 bool allow_restricted{false};
230
231 bool val_from_str(string *perr) {
232 switch (entity_type) {
233 case ESEntityTypeMap::ES_ENTITY_DATE:
234 val = new ESQueryNodeLeafVal_Date;
235 break;
236 case ESEntityTypeMap::ES_ENTITY_INT:
237 val = new ESQueryNodeLeafVal_Int;
238 break;
239 default:
240 val = new ESQueryNodeLeafVal_Str;
241 }
242 return val->init(str_val, perr);
243 }
244 bool do_init(ESQueryNode **pnode, string *perr) {
245 field = compiler->unalias_field(field);
246 ESQueryNode *effective_node;
247 if (!handle_nested(&effective_node, perr)) {
248 return false;
249 }
250 if (!val_from_str(perr)) {
251 return false;
252 }
253 *pnode = effective_node;
254 return true;
255 }
256
257 public:
258 ESQueryNode_Op(ESQueryCompiler *compiler) : ESQueryNode(compiler) {}
259 ~ESQueryNode_Op() {
260 delete val;
261 }
262 virtual bool init(ESQueryStack *s, ESQueryNode **pnode, string *perr) override {
263 bool valid = s->pop(&op) &&
264 s->pop(&str_val) &&
265 s->pop(&field);
266 if (!valid) {
267 *perr = "invalid expression";
268 return false;
269 }
270 return do_init(pnode, perr);
271 }
272 bool handle_nested(ESQueryNode **pnode, string *perr);
273
274 void set_allow_restricted(bool allow) {
275 allow_restricted = allow;
276 }
277
278 virtual void dump(Formatter *f) const = 0;
279 };
280
281 class ESQueryNode_Op_Equal : public ESQueryNode_Op {
282 public:
283 ESQueryNode_Op_Equal(ESQueryCompiler *compiler) : ESQueryNode_Op(compiler) {}
284 ESQueryNode_Op_Equal(ESQueryCompiler *compiler, const string& f, const string& v) : ESQueryNode_Op(compiler) {
285 op = "==";
286 field = f;
287 str_val = v;
288 }
289
290 bool init(ESQueryStack *s, ESQueryNode **pnode, string *perr) override {
291 if (op.empty()) {
292 return ESQueryNode_Op::init(s, pnode, perr);
293 }
294 return do_init(pnode, perr);
295 }
296
297 virtual void dump(Formatter *f) const {
298 f->open_object_section("term");
299 val->encode_json(field, f);
300 f->close_section();
301 }
302 };
303
304 class ESQueryNode_Op_Range : public ESQueryNode_Op {
305 string range_str;
306 public:
307 ESQueryNode_Op_Range(ESQueryCompiler *compiler, const string& rs) : ESQueryNode_Op(compiler), range_str(rs) {}
308
309 virtual void dump(Formatter *f) const {
310 f->open_object_section("range");
311 f->open_object_section(field.c_str());
312 val->encode_json(range_str, f);
313 f->close_section();
314 f->close_section();
315 }
316 };
317
318 class ESQueryNode_Op_Nested_Parent : public ESQueryNode_Op {
319 public:
320 ESQueryNode_Op_Nested_Parent(ESQueryCompiler *compiler) : ESQueryNode_Op(compiler) {}
321
322 virtual string get_custom_leaf_field_name() = 0;
323 };
324
325 template <class T>
326 class ESQueryNode_Op_Nested : public ESQueryNode_Op_Nested_Parent {
327 string name;
328 ESQueryNode *next;
329 public:
330 ESQueryNode_Op_Nested(ESQueryCompiler *compiler, const string& _name, ESQueryNode *_next) : ESQueryNode_Op_Nested_Parent(compiler),
331 name(_name), next(_next) {}
332 ~ESQueryNode_Op_Nested() {
333 delete next;
334 }
335
336 virtual void dump(Formatter *f) const {
337 f->open_object_section("nested");
338 string s = string("meta.custom-") + type_str();
339 encode_json("path", s.c_str(), f);
340 f->open_object_section("query");
341 f->open_object_section("bool");
342 f->open_array_section("must");
343 f->open_object_section("entry");
344 f->open_object_section("match");
345 string n = s + ".name";
346 encode_json(n.c_str(), name.c_str(), f);
347 f->close_section();
348 f->close_section();
349 encode_json("entry", *next, f);
350 f->close_section();
351 f->close_section();
352 f->close_section();
353 f->close_section();
354 }
355
356 string type_str() const;
357 string get_custom_leaf_field_name() {
358 return string("meta.custom-") + type_str() + ".value";
359 }
360 };
361
362 template<>
363 string ESQueryNode_Op_Nested<string>::type_str() const {
364 return "string";
365 }
366
367 template<>
368 string ESQueryNode_Op_Nested<int64_t>::type_str() const {
369 return "int";
370 }
371
372 template<>
373 string ESQueryNode_Op_Nested<ceph::real_time>::type_str() const {
374 return "date";
375 }
376
377 bool ESQueryNode_Op::handle_nested(ESQueryNode **pnode, string *perr)
378 {
379 string field_name = field;
380 const string& custom_prefix = compiler->get_custom_prefix();
381 if (!boost::algorithm::starts_with(field_name, custom_prefix)) {
382 *pnode = this;
383 auto m = compiler->get_generic_type_map();
384 if (m) {
385 bool found = m->find(field_name, &entity_type) &&
386 (allow_restricted || !compiler->is_restricted(field_name));
387 if (!found) {
388 *perr = string("unexpected generic field '") + field_name + "'";
389 }
390 return found;
391 }
392 *perr = "query parser does not support generic types";
393 return false;
394 }
395
396 field_name = field_name.substr(custom_prefix.size());
397 auto m = compiler->get_custom_type_map();
398 if (m) {
399 m->find(field_name, &entity_type);
400 /* ignoring returned bool, for now just treat it as string */
401 }
402
403 ESQueryNode_Op_Nested_Parent *new_node;
404 switch (entity_type) {
405 case ESEntityTypeMap::ES_ENTITY_INT:
406 new_node = new ESQueryNode_Op_Nested<int64_t>(compiler, field_name, this);
407 break;
408 case ESEntityTypeMap::ES_ENTITY_DATE:
409 new_node = new ESQueryNode_Op_Nested<ceph::real_time>(compiler, field_name, this);
410 break;
411 default:
412 new_node = new ESQueryNode_Op_Nested<string>(compiler, field_name, this);
413 }
414
415 field = new_node->get_custom_leaf_field_name();
416 *pnode = new_node;
417
418 return true;
419 }
420
421 static bool is_bool_op(const string& str)
422 {
423 return (str == "or" || str == "and");
424 }
425
426 static bool alloc_node(ESQueryCompiler *compiler, ESQueryStack *s, ESQueryNode **pnode, string *perr)
427 {
428 string op;
429 bool valid = s->peek(&op);
430 if (!valid) {
431 *perr = "incorrect expression";
432 return false;
433 }
434
435 ESQueryNode *node;
436
437 if (is_bool_op(op)) {
438 node = new ESQueryNode_Bool(compiler);
439 } else if (op == "==") {
440 node = new ESQueryNode_Op_Equal(compiler);
441 } else {
442 static map<string, string> range_op_map = {
443 { "<", "lt"},
444 { "<=", "lte"},
445 { ">=", "gte"},
446 { ">", "gt"},
447 };
448
449 auto iter = range_op_map.find(op);
450 if (iter == range_op_map.end()) {
451 *perr = string("invalid operator: ") + op;
452 return false;
453 }
454
455 node = new ESQueryNode_Op_Range(compiler, iter->second);
456 }
457
458 if (!node->init(s, pnode, perr)) {
459 delete node;
460 return false;
461 }
462 return true;
463 }
464
465
466 bool is_key_char(char c)
467 {
468 switch (c) {
469 case '(':
470 case ')':
471 case '<':
472 case '>':
473 case '@':
474 case ',':
475 case ';':
476 case ':':
477 case '\\':
478 case '"':
479 case '/':
480 case '[':
481 case ']':
482 case '?':
483 case '=':
484 case '{':
485 case '}':
486 case ' ':
487 case '\t':
488 return false;
489 };
490 return (isascii(c) > 0);
491 }
492
493 static bool is_op_char(char c)
494 {
495 switch (c) {
496 case '<':
497 case '=':
498 case '>':
499 return true;
500 };
501 return false;
502 }
503
504 static bool is_val_char(char c)
505 {
506 if (isspace(c)) {
507 return false;
508 }
509 return (c != ')');
510 }
511
512 void ESInfixQueryParser::skip_whitespace(const char *str, int size, int& pos) {
513 while (pos < size && isspace(str[pos])) {
514 ++pos;
515 }
516 }
517
518 bool ESInfixQueryParser::get_next_token(bool (*filter)(char)) {
519 skip_whitespace(str, size, pos);
520 int token_start = pos;
521 while (pos < size && filter(str[pos])) {
522 ++pos;
523 }
524 if (pos == token_start) {
525 return false;
526 }
527 string token = string(str + token_start, pos - token_start);
528 args.push_back(token);
529 return true;
530 }
531
532 bool ESInfixQueryParser::parse_condition() {
533 /*
534 * condition: <key> <operator> <val>
535 *
536 * whereas key: needs to conform to http header field restrictions
537 * operator: one of the following: < <= == >= >
538 * val: ascii, terminated by either space or ')' (or end of string)
539 */
540
541 /* parse key */
542 bool valid = get_next_token(is_key_char) &&
543 get_next_token(is_op_char) &&
544 get_next_token(is_val_char);
545
546 if (!valid) {
547 return false;
548 }
549
550 return true;
551 }
552
553 bool ESInfixQueryParser::parse_and_or() {
554 skip_whitespace(str, size, pos);
555 if (pos + 3 <= size && strncmp(str + pos, "and", 3) == 0) {
556 pos += 3;
557 args.push_back("and");
558 return true;
559 }
560
561 if (pos + 2 <= size && strncmp(str + pos, "or", 2) == 0) {
562 pos += 2;
563 args.push_back("or");
564 return true;
565 }
566
567 return false;
568 }
569
570 bool ESInfixQueryParser::parse_specific_char(const char *pchar) {
571 skip_whitespace(str, size, pos);
572 if (pos >= size) {
573 return false;
574 }
575 if (str[pos] != *pchar) {
576 return false;
577 }
578
579 args.push_back(pchar);
580 ++pos;
581 return true;
582 }
583
584 bool ESInfixQueryParser::parse_open_bracket() {
585 return parse_specific_char("(");
586 }
587
588 bool ESInfixQueryParser::parse_close_bracket() {
589 return parse_specific_char(")");
590 }
591
592 bool ESInfixQueryParser::parse(list<string> *result) {
593 /*
594 * expression: [(]<condition>[[and/or]<condition>][)][and/or]...
595 */
596
597 while (pos < size) {
598 parse_open_bracket();
599 if (!parse_condition()) {
600 return false;
601 }
602 parse_close_bracket();
603 parse_and_or();
604 }
605
606 result->swap(args);
607
608 return true;
609 }
610
611 bool ESQueryCompiler::convert(list<string>& infix, string *perr) {
612 list<string> prefix;
613 if (!infix_to_prefix(infix, &prefix)) {
614 *perr = "invalid query";
615 return false;
616 }
617 stack.assign(prefix);
618 if (!alloc_node(this, &stack, &query_root, perr)) {
619 return false;
620 }
621 if (!stack.done()) {
622 *perr = "invalid query";
623 return false;
624 }
625 return true;
626 }
627
628 ESQueryCompiler::~ESQueryCompiler() {
629 delete query_root;
630 }
631
632 bool ESQueryCompiler::compile(string *perr) {
633 list<string> infix;
634 if (!parser.parse(&infix)) {
635 *perr = "failed to parse query";
636 return false;
637 }
638
639 if (!convert(infix, perr)) {
640 return false;
641 }
642
643 for (auto& c : eq_conds) {
644 ESQueryNode_Op_Equal *eq_node = new ESQueryNode_Op_Equal(this, c.first, c.second);
645 eq_node->set_allow_restricted(true); /* can access restricted fields */
646 ESQueryNode *effective_node;
647 if (!eq_node->init(nullptr, &effective_node, perr)) {
648 delete eq_node;
649 return false;
650 }
651 query_root = new ESQueryNode_Bool(this, "and", effective_node, query_root);
652 }
653
654 return true;
655 }
656
657 void ESQueryCompiler::dump(Formatter *f) const {
658 encode_json("query", *query_root, f);
659 }
660