]> git.proxmox.com Git - ceph.git/blob - ceph/src/arrow/cpp/src/arrow/compute/exec/expression.h
import quincy 17.2.0
[ceph.git] / ceph / src / arrow / cpp / src / arrow / compute / exec / expression.h
1 // Licensed to the Apache Software Foundation (ASF) under one
2 // or more contributor license agreements. See the NOTICE file
3 // distributed with this work for additional information
4 // regarding copyright ownership. The ASF licenses this file
5 // to you under the Apache License, Version 2.0 (the
6 // "License"); you may not use this file except in compliance
7 // with the License. You may obtain a copy of the License at
8 //
9 // http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing,
12 // software distributed under the License is distributed on an
13 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, either express or implied. See the License for the
15 // specific language governing permissions and limitations
16 // under the License.
17
18 // This API is EXPERIMENTAL.
19
20 #pragma once
21
22 #include <memory>
23 #include <string>
24 #include <utility>
25 #include <vector>
26
27 #include "arrow/compute/type_fwd.h"
28 #include "arrow/datum.h"
29 #include "arrow/type_fwd.h"
30 #include "arrow/util/variant.h"
31
32 namespace arrow {
33 namespace compute {
34
35 /// An unbound expression which maps a single Datum to another Datum.
36 /// An expression is one of
37 /// - A literal Datum.
38 /// - A reference to a single (potentially nested) field of the input Datum.
39 /// - A call to a compute function, with arguments specified by other Expressions.
40 class ARROW_EXPORT Expression {
41 public:
42 struct Call {
43 std::string function_name;
44 std::vector<Expression> arguments;
45 std::shared_ptr<FunctionOptions> options;
46 // Cached hash value
47 size_t hash;
48
49 // post-Bind properties:
50 std::shared_ptr<Function> function;
51 const Kernel* kernel = NULLPTR;
52 std::shared_ptr<KernelState> kernel_state;
53 ValueDescr descr;
54
55 void ComputeHash();
56 };
57
58 std::string ToString() const;
59 bool Equals(const Expression& other) const;
60 size_t hash() const;
61 struct Hash {
62 size_t operator()(const Expression& expr) const { return expr.hash(); }
63 };
64
65 /// Bind this expression to the given input type, looking up Kernels and field types.
66 /// Some expression simplification may be performed and implicit casts will be inserted.
67 /// Any state necessary for execution will be initialized and returned.
68 Result<Expression> Bind(const ValueDescr& in, ExecContext* = NULLPTR) const;
69 Result<Expression> Bind(const Schema& in_schema, ExecContext* = NULLPTR) const;
70
71 // XXX someday
72 // Clone all KernelState in this bound expression. If any function referenced by this
73 // expression has mutable KernelState, it is not safe to execute or apply simplification
74 // passes to it (or copies of it!) from multiple threads. Cloning state produces new
75 // KernelStates where necessary to ensure that Expressions may be manipulated safely
76 // on multiple threads.
77 // Result<ExpressionState> CloneState() const;
78 // Status SetState(ExpressionState);
79
80 /// Return true if all an expression's field references have explicit ValueDescr and all
81 /// of its functions' kernels are looked up.
82 bool IsBound() const;
83
84 /// Return true if this expression is composed only of Scalar literals, field
85 /// references, and calls to ScalarFunctions.
86 bool IsScalarExpression() const;
87
88 /// Return true if this expression is literal and entirely null.
89 bool IsNullLiteral() const;
90
91 /// Return true if this expression could evaluate to true.
92 bool IsSatisfiable() const;
93
94 // XXX someday
95 // Result<PipelineGraph> GetPipelines();
96
97 /// Access a Call or return nullptr if this expression is not a call
98 const Call* call() const;
99 /// Access a Datum or return nullptr if this expression is not a literal
100 const Datum* literal() const;
101 /// Access a FieldRef or return nullptr if this expression is not a field_ref
102 const FieldRef* field_ref() const;
103
104 /// The type and shape to which this expression will evaluate
105 ValueDescr descr() const;
106 std::shared_ptr<DataType> type() const { return descr().type; }
107 // XXX someday
108 // NullGeneralization::type nullable() const;
109
110 struct Parameter {
111 FieldRef ref;
112
113 // post-bind properties
114 ValueDescr descr;
115 int index;
116 };
117 const Parameter* parameter() const;
118
119 Expression() = default;
120 explicit Expression(Call call);
121 explicit Expression(Datum literal);
122 explicit Expression(Parameter parameter);
123
124 private:
125 using Impl = util::Variant<Datum, Parameter, Call>;
126 std::shared_ptr<Impl> impl_;
127
128 ARROW_EXPORT friend bool Identical(const Expression& l, const Expression& r);
129
130 ARROW_EXPORT friend void PrintTo(const Expression&, std::ostream*);
131 };
132
133 inline bool operator==(const Expression& l, const Expression& r) { return l.Equals(r); }
134 inline bool operator!=(const Expression& l, const Expression& r) { return !l.Equals(r); }
135
136 // Factories
137
138 ARROW_EXPORT
139 Expression literal(Datum lit);
140
141 template <typename Arg>
142 Expression literal(Arg&& arg) {
143 return literal(Datum(std::forward<Arg>(arg)));
144 }
145
146 ARROW_EXPORT
147 Expression field_ref(FieldRef ref);
148
149 ARROW_EXPORT
150 Expression call(std::string function, std::vector<Expression> arguments,
151 std::shared_ptr<FunctionOptions> options = NULLPTR);
152
153 template <typename Options, typename = typename std::enable_if<
154 std::is_base_of<FunctionOptions, Options>::value>::type>
155 Expression call(std::string function, std::vector<Expression> arguments,
156 Options options) {
157 return call(std::move(function), std::move(arguments),
158 std::make_shared<Options>(std::move(options)));
159 }
160
161 /// Assemble a list of all fields referenced by an Expression at any depth.
162 ARROW_EXPORT
163 std::vector<FieldRef> FieldsInExpression(const Expression&);
164
165 /// Check if the expression references any fields.
166 ARROW_EXPORT
167 bool ExpressionHasFieldRefs(const Expression&);
168
169 /// Assemble a mapping from field references to known values.
170 struct ARROW_EXPORT KnownFieldValues;
171 ARROW_EXPORT
172 Result<KnownFieldValues> ExtractKnownFieldValues(
173 const Expression& guaranteed_true_predicate);
174
175 /// \defgroup expression-passes Functions for modification of Expressions
176 ///
177 /// @{
178 ///
179 /// These transform bound expressions. Some transforms utilize a guarantee, which is
180 /// provided as an Expression which is guaranteed to evaluate to true. The
181 /// guaranteed_true_predicate need not be bound, but canonicalization is currently
182 /// deferred to producers of guarantees. For example in order to be recognized as a
183 /// guarantee on a field value, an Expression must be a call to "equal" with field_ref LHS
184 /// and literal RHS. Flipping the arguments, "is_in" with a one-long value_set, ... or
185 /// other semantically identical Expressions will not be recognized.
186
187 /// Weak canonicalization which establishes guarantees for subsequent passes. Even
188 /// equivalent Expressions may result in different canonicalized expressions.
189 /// TODO this could be a strong canonicalization
190 ARROW_EXPORT
191 Result<Expression> Canonicalize(Expression, ExecContext* = NULLPTR);
192
193 /// Simplify Expressions based on literal arguments (for example, add(null, x) will always
194 /// be null so replace the call with a null literal). Includes early evaluation of all
195 /// calls whose arguments are entirely literal.
196 ARROW_EXPORT
197 Result<Expression> FoldConstants(Expression);
198
199 /// Simplify Expressions by replacing with known values of the fields which it references.
200 ARROW_EXPORT
201 Result<Expression> ReplaceFieldsWithKnownValues(const KnownFieldValues& known_values,
202 Expression);
203
204 /// Simplify an expression by replacing subexpressions based on a guarantee:
205 /// a boolean expression which is guaranteed to evaluate to `true`. For example, this is
206 /// used to remove redundant function calls from a filter expression or to replace a
207 /// reference to a constant-value field with a literal.
208 ARROW_EXPORT
209 Result<Expression> SimplifyWithGuarantee(Expression,
210 const Expression& guaranteed_true_predicate);
211
212 /// @}
213
214 // Execution
215
216 /// Create an ExecBatch suitable for passing to ExecuteScalarExpression() from a
217 /// RecordBatch which may have missing or incorrectly ordered columns.
218 /// Missing fields will be replaced with null scalars.
219 ARROW_EXPORT Result<ExecBatch> MakeExecBatch(const Schema& full_schema,
220 const Datum& partial);
221
222 /// Execute a scalar expression against the provided state and input ExecBatch. This
223 /// expression must be bound.
224 ARROW_EXPORT
225 Result<Datum> ExecuteScalarExpression(const Expression&, const ExecBatch& input,
226 ExecContext* = NULLPTR);
227
228 /// Convenience function for invoking against a RecordBatch
229 ARROW_EXPORT
230 Result<Datum> ExecuteScalarExpression(const Expression&, const Schema& full_schema,
231 const Datum& partial_input, ExecContext* = NULLPTR);
232
233 // Serialization
234
235 ARROW_EXPORT
236 Result<std::shared_ptr<Buffer>> Serialize(const Expression&);
237
238 ARROW_EXPORT
239 Result<Expression> Deserialize(std::shared_ptr<Buffer>);
240
241 // Convenience aliases for factories
242
243 ARROW_EXPORT Expression project(std::vector<Expression> values,
244 std::vector<std::string> names);
245
246 ARROW_EXPORT Expression equal(Expression lhs, Expression rhs);
247
248 ARROW_EXPORT Expression not_equal(Expression lhs, Expression rhs);
249
250 ARROW_EXPORT Expression less(Expression lhs, Expression rhs);
251
252 ARROW_EXPORT Expression less_equal(Expression lhs, Expression rhs);
253
254 ARROW_EXPORT Expression greater(Expression lhs, Expression rhs);
255
256 ARROW_EXPORT Expression greater_equal(Expression lhs, Expression rhs);
257
258 ARROW_EXPORT Expression is_null(Expression lhs, bool nan_is_null = false);
259
260 ARROW_EXPORT Expression is_valid(Expression lhs);
261
262 ARROW_EXPORT Expression and_(Expression lhs, Expression rhs);
263 ARROW_EXPORT Expression and_(const std::vector<Expression>&);
264 ARROW_EXPORT Expression or_(Expression lhs, Expression rhs);
265 ARROW_EXPORT Expression or_(const std::vector<Expression>&);
266 ARROW_EXPORT Expression not_(Expression operand);
267
268 } // namespace compute
269 } // namespace arrow