1 // Licensed to the Apache Software Foundation (ASF) under one
2 // or more contributor license agreements. See the NOTICE file
3 // distributed with this work for additional information
4 // regarding copyright ownership. The ASF licenses this file
5 // to you under the Apache License, Version 2.0 (the
6 // "License"); you may not use this file except in compliance
7 // with the License. You may obtain a copy of the License at
9 // http://www.apache.org/licenses/LICENSE-2.0
11 // Unless required by applicable law or agreed to in writing,
12 // software distributed under the License is distributed on an
13 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, either express or implied. See the License for the
15 // specific language governing permissions and limitations
18 // This API is EXPERIMENTAL.
27 #include "arrow/compute/type_fwd.h"
28 #include "arrow/datum.h"
29 #include "arrow/type_fwd.h"
30 #include "arrow/util/variant.h"
35 /// An unbound expression which maps a single Datum to another Datum.
36 /// An expression is one of
37 /// - A literal Datum.
38 /// - A reference to a single (potentially nested) field of the input Datum.
39 /// - A call to a compute function, with arguments specified by other Expressions.
40 class ARROW_EXPORT Expression
{
43 std::string function_name
;
44 std::vector
<Expression
> arguments
;
45 std::shared_ptr
<FunctionOptions
> options
;
49 // post-Bind properties:
50 std::shared_ptr
<Function
> function
;
51 const Kernel
* kernel
= NULLPTR
;
52 std::shared_ptr
<KernelState
> kernel_state
;
58 std::string
ToString() const;
59 bool Equals(const Expression
& other
) const;
62 size_t operator()(const Expression
& expr
) const { return expr
.hash(); }
65 /// Bind this expression to the given input type, looking up Kernels and field types.
66 /// Some expression simplification may be performed and implicit casts will be inserted.
67 /// Any state necessary for execution will be initialized and returned.
68 Result
<Expression
> Bind(const ValueDescr
& in
, ExecContext
* = NULLPTR
) const;
69 Result
<Expression
> Bind(const Schema
& in_schema
, ExecContext
* = NULLPTR
) const;
72 // Clone all KernelState in this bound expression. If any function referenced by this
73 // expression has mutable KernelState, it is not safe to execute or apply simplification
74 // passes to it (or copies of it!) from multiple threads. Cloning state produces new
75 // KernelStates where necessary to ensure that Expressions may be manipulated safely
76 // on multiple threads.
77 // Result<ExpressionState> CloneState() const;
78 // Status SetState(ExpressionState);
80 /// Return true if all an expression's field references have explicit ValueDescr and all
81 /// of its functions' kernels are looked up.
84 /// Return true if this expression is composed only of Scalar literals, field
85 /// references, and calls to ScalarFunctions.
86 bool IsScalarExpression() const;
88 /// Return true if this expression is literal and entirely null.
89 bool IsNullLiteral() const;
91 /// Return true if this expression could evaluate to true.
92 bool IsSatisfiable() const;
95 // Result<PipelineGraph> GetPipelines();
97 /// Access a Call or return nullptr if this expression is not a call
98 const Call
* call() const;
99 /// Access a Datum or return nullptr if this expression is not a literal
100 const Datum
* literal() const;
101 /// Access a FieldRef or return nullptr if this expression is not a field_ref
102 const FieldRef
* field_ref() const;
104 /// The type and shape to which this expression will evaluate
105 ValueDescr
descr() const;
106 std::shared_ptr
<DataType
> type() const { return descr().type
; }
108 // NullGeneralization::type nullable() const;
113 // post-bind properties
117 const Parameter
* parameter() const;
119 Expression() = default;
120 explicit Expression(Call call
);
121 explicit Expression(Datum literal
);
122 explicit Expression(Parameter parameter
);
125 using Impl
= util::Variant
<Datum
, Parameter
, Call
>;
126 std::shared_ptr
<Impl
> impl_
;
128 ARROW_EXPORT
friend bool Identical(const Expression
& l
, const Expression
& r
);
130 ARROW_EXPORT
friend void PrintTo(const Expression
&, std::ostream
*);
133 inline bool operator==(const Expression
& l
, const Expression
& r
) { return l
.Equals(r
); }
134 inline bool operator!=(const Expression
& l
, const Expression
& r
) { return !l
.Equals(r
); }
139 Expression
literal(Datum lit
);
141 template <typename Arg
>
142 Expression
literal(Arg
&& arg
) {
143 return literal(Datum(std::forward
<Arg
>(arg
)));
147 Expression
field_ref(FieldRef ref
);
150 Expression
call(std::string function
, std::vector
<Expression
> arguments
,
151 std::shared_ptr
<FunctionOptions
> options
= NULLPTR
);
153 template <typename Options
, typename
= typename
std::enable_if
<
154 std::is_base_of
<FunctionOptions
, Options
>::value
>::type
>
155 Expression
call(std::string function
, std::vector
<Expression
> arguments
,
157 return call(std::move(function
), std::move(arguments
),
158 std::make_shared
<Options
>(std::move(options
)));
161 /// Assemble a list of all fields referenced by an Expression at any depth.
163 std::vector
<FieldRef
> FieldsInExpression(const Expression
&);
165 /// Check if the expression references any fields.
167 bool ExpressionHasFieldRefs(const Expression
&);
169 /// Assemble a mapping from field references to known values.
170 struct ARROW_EXPORT KnownFieldValues
;
172 Result
<KnownFieldValues
> ExtractKnownFieldValues(
173 const Expression
& guaranteed_true_predicate
);
175 /// \defgroup expression-passes Functions for modification of Expressions
179 /// These transform bound expressions. Some transforms utilize a guarantee, which is
180 /// provided as an Expression which is guaranteed to evaluate to true. The
181 /// guaranteed_true_predicate need not be bound, but canonicalization is currently
182 /// deferred to producers of guarantees. For example in order to be recognized as a
183 /// guarantee on a field value, an Expression must be a call to "equal" with field_ref LHS
184 /// and literal RHS. Flipping the arguments, "is_in" with a one-long value_set, ... or
185 /// other semantically identical Expressions will not be recognized.
187 /// Weak canonicalization which establishes guarantees for subsequent passes. Even
188 /// equivalent Expressions may result in different canonicalized expressions.
189 /// TODO this could be a strong canonicalization
191 Result
<Expression
> Canonicalize(Expression
, ExecContext
* = NULLPTR
);
193 /// Simplify Expressions based on literal arguments (for example, add(null, x) will always
194 /// be null so replace the call with a null literal). Includes early evaluation of all
195 /// calls whose arguments are entirely literal.
197 Result
<Expression
> FoldConstants(Expression
);
199 /// Simplify Expressions by replacing with known values of the fields which it references.
201 Result
<Expression
> ReplaceFieldsWithKnownValues(const KnownFieldValues
& known_values
,
204 /// Simplify an expression by replacing subexpressions based on a guarantee:
205 /// a boolean expression which is guaranteed to evaluate to `true`. For example, this is
206 /// used to remove redundant function calls from a filter expression or to replace a
207 /// reference to a constant-value field with a literal.
209 Result
<Expression
> SimplifyWithGuarantee(Expression
,
210 const Expression
& guaranteed_true_predicate
);
216 /// Create an ExecBatch suitable for passing to ExecuteScalarExpression() from a
217 /// RecordBatch which may have missing or incorrectly ordered columns.
218 /// Missing fields will be replaced with null scalars.
219 ARROW_EXPORT Result
<ExecBatch
> MakeExecBatch(const Schema
& full_schema
,
220 const Datum
& partial
);
222 /// Execute a scalar expression against the provided state and input ExecBatch. This
223 /// expression must be bound.
225 Result
<Datum
> ExecuteScalarExpression(const Expression
&, const ExecBatch
& input
,
226 ExecContext
* = NULLPTR
);
228 /// Convenience function for invoking against a RecordBatch
230 Result
<Datum
> ExecuteScalarExpression(const Expression
&, const Schema
& full_schema
,
231 const Datum
& partial_input
, ExecContext
* = NULLPTR
);
236 Result
<std::shared_ptr
<Buffer
>> Serialize(const Expression
&);
239 Result
<Expression
> Deserialize(std::shared_ptr
<Buffer
>);
241 // Convenience aliases for factories
243 ARROW_EXPORT Expression
project(std::vector
<Expression
> values
,
244 std::vector
<std::string
> names
);
246 ARROW_EXPORT Expression
equal(Expression lhs
, Expression rhs
);
248 ARROW_EXPORT Expression
not_equal(Expression lhs
, Expression rhs
);
250 ARROW_EXPORT Expression
less(Expression lhs
, Expression rhs
);
252 ARROW_EXPORT Expression
less_equal(Expression lhs
, Expression rhs
);
254 ARROW_EXPORT Expression
greater(Expression lhs
, Expression rhs
);
256 ARROW_EXPORT Expression
greater_equal(Expression lhs
, Expression rhs
);
258 ARROW_EXPORT Expression
is_null(Expression lhs
, bool nan_is_null
= false);
260 ARROW_EXPORT Expression
is_valid(Expression lhs
);
262 ARROW_EXPORT Expression
and_(Expression lhs
, Expression rhs
);
263 ARROW_EXPORT Expression
and_(const std::vector
<Expression
>&);
264 ARROW_EXPORT Expression
or_(Expression lhs
, Expression rhs
);
265 ARROW_EXPORT Expression
or_(const std::vector
<Expression
>&);
266 ARROW_EXPORT Expression
not_(Expression operand
);
268 } // namespace compute