1 // Licensed to the Apache Software Foundation (ASF) under one
2 // or more contributor license agreements. See the NOTICE file
3 // distributed with this work for additional information
4 // regarding copyright ownership. The ASF licenses this file
5 // to you under the Apache License, Version 2.0 (the
6 // "License"); you may not use this file except in compliance
7 // with the License. You may obtain a copy of the License at
9 // http://www.apache.org/licenses/LICENSE-2.0
11 // Unless required by applicable law or agreed to in writing,
12 // software distributed under the License is distributed on an
13 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, either express or implied. See the License for the
15 // specific language governing permissions and limitations
18 // NOTE: API is EXPERIMENTAL and will change without going through a
27 #include "arrow/compute/kernel.h"
28 #include "arrow/compute/type_fwd.h"
29 #include "arrow/datum.h"
30 #include "arrow/result.h"
31 #include "arrow/status.h"
32 #include "arrow/util/compare.h"
33 #include "arrow/util/macros.h"
34 #include "arrow/util/visibility.h"
39 /// \defgroup compute-functions Abstract compute function API
43 /// \brief Extension point for defining options outside libarrow (but
44 /// still within this project).
45 class ARROW_EXPORT FunctionOptionsType
{
47 virtual ~FunctionOptionsType() = default;
49 virtual const char* type_name() const = 0;
50 virtual std::string
Stringify(const FunctionOptions
&) const = 0;
51 virtual bool Compare(const FunctionOptions
&, const FunctionOptions
&) const = 0;
52 virtual Result
<std::shared_ptr
<Buffer
>> Serialize(const FunctionOptions
&) const;
53 virtual Result
<std::unique_ptr
<FunctionOptions
>> Deserialize(
54 const Buffer
& buffer
) const;
55 virtual std::unique_ptr
<FunctionOptions
> Copy(const FunctionOptions
&) const = 0;
58 /// \brief Base class for specifying options configuring a function's behavior,
59 /// such as error handling.
60 class ARROW_EXPORT FunctionOptions
: public util::EqualityComparable
<FunctionOptions
> {
62 virtual ~FunctionOptions() = default;
64 const FunctionOptionsType
* options_type() const { return options_type_
; }
65 const char* type_name() const { return options_type()->type_name(); }
67 bool Equals(const FunctionOptions
& other
) const;
68 using util::EqualityComparable
<FunctionOptions
>::Equals
;
69 using util::EqualityComparable
<FunctionOptions
>::operator==;
70 using util::EqualityComparable
<FunctionOptions
>::operator!=;
71 std::string
ToString() const;
72 std::unique_ptr
<FunctionOptions
> Copy() const;
73 /// \brief Serialize an options struct to a buffer.
74 Result
<std::shared_ptr
<Buffer
>> Serialize() const;
75 /// \brief Deserialize an options struct from a buffer.
76 /// Note: this will only look for `type_name` in the default FunctionRegistry;
77 /// to use a custom FunctionRegistry, look up the FunctionOptionsType, then
78 /// call FunctionOptionsType::Deserialize().
79 static Result
<std::unique_ptr
<FunctionOptions
>> Deserialize(
80 const std::string
& type_name
, const Buffer
& buffer
);
83 explicit FunctionOptions(const FunctionOptionsType
* type
) : options_type_(type
) {}
84 const FunctionOptionsType
* options_type_
;
87 ARROW_EXPORT
void PrintTo(const FunctionOptions
&, std::ostream
*);
89 /// \brief Contains the number of required arguments for the function.
91 /// Naming conventions taken from https://en.wikipedia.org/wiki/Arity.
92 struct ARROW_EXPORT Arity
{
93 /// \brief A function taking no arguments
94 static Arity
Nullary() { return Arity(0, false); }
96 /// \brief A function taking 1 argument
97 static Arity
Unary() { return Arity(1, false); }
99 /// \brief A function taking 2 arguments
100 static Arity
Binary() { return Arity(2, false); }
102 /// \brief A function taking 3 arguments
103 static Arity
Ternary() { return Arity(3, false); }
105 /// \brief A function taking a variable number of arguments
107 /// \param[in] min_args the minimum number of arguments required when
108 /// invoking the function
109 static Arity
VarArgs(int min_args
= 0) { return Arity(min_args
, true); }
111 // NOTE: the 0-argument form (default constructor) is required for Cython
112 explicit Arity(int num_args
= 0, bool is_varargs
= false)
113 : num_args(num_args
), is_varargs(is_varargs
) {}
115 /// The number of required arguments (or the minimum number for varargs
119 /// If true, then the num_args is the minimum number of required arguments.
120 bool is_varargs
= false;
123 struct ARROW_EXPORT FunctionDoc
{
124 /// \brief A one-line summary of the function, using a verb.
126 /// For example, "Add two numeric arrays or scalars".
129 /// \brief A detailed description of the function, meant to follow the summary.
130 std::string description
;
132 /// \brief Symbolic names (identifiers) for the function arguments.
134 /// Some bindings may use this to generate nicer function signatures.
135 std::vector
<std::string
> arg_names
;
137 // TODO add argument descriptions?
139 /// \brief Name of the options class, if any.
140 std::string options_class
;
142 FunctionDoc() = default;
144 FunctionDoc(std::string summary
, std::string description
,
145 std::vector
<std::string
> arg_names
, std::string options_class
= "")
146 : summary(std::move(summary
)),
147 description(std::move(description
)),
148 arg_names(std::move(arg_names
)),
149 options_class(std::move(options_class
)) {}
151 static const FunctionDoc
& Empty();
154 /// \brief Base class for compute functions. Function implementations contain a
155 /// collection of "kernels" which are implementations of the function for
156 /// specific argument types. Selecting a viable kernel for executing a function
157 /// is referred to as "dispatching".
158 class ARROW_EXPORT Function
{
160 /// \brief The kind of function, which indicates in what contexts it is
163 /// A function that performs scalar data operations on whole arrays of
164 /// data. Can generally process Array or Scalar values. The size of the
165 /// output will be the same as the size (or broadcasted size, in the case
166 /// of mixing Array and Scalar inputs) of the input.
169 /// A function with array input and output whose behavior depends on the
170 /// values of the entire arrays passed, rather than the value of each scalar
174 /// A function that computes scalar summary statistics from array input.
177 /// A function that computes grouped summary statistics from array input
178 /// and an array of group identifiers.
181 /// A function that dispatches to other functions and does not contain its
186 virtual ~Function() = default;
188 /// \brief The name of the kernel. The registry enforces uniqueness of names.
189 const std::string
& name() const { return name_
; }
191 /// \brief The kind of kernel, which indicates in what contexts it is valid
193 Function::Kind
kind() const { return kind_
; }
195 /// \brief Contains the number of arguments the function requires, or if the
196 /// function accepts variable numbers of arguments.
197 const Arity
& arity() const { return arity_
; }
199 /// \brief Return the function documentation
200 const FunctionDoc
& doc() const { return *doc_
; }
202 /// \brief Returns the number of registered kernels for this function.
203 virtual int num_kernels() const = 0;
205 /// \brief Return a kernel that can execute the function given the exact
206 /// argument types (without implicit type casts or scalar->array promotions).
208 /// NB: This function is overridden in CastFunction.
209 virtual Result
<const Kernel
*> DispatchExact(
210 const std::vector
<ValueDescr
>& values
) const;
212 /// \brief Return a best-match kernel that can execute the function given the argument
213 /// types, after implicit casts are applied.
215 /// \param[in,out] values Argument types. An element may be modified to indicate that
216 /// the returned kernel only approximately matches the input value descriptors; callers
217 /// are responsible for casting inputs to the type and shape required by the kernel.
218 virtual Result
<const Kernel
*> DispatchBest(std::vector
<ValueDescr
>* values
) const;
220 /// \brief Execute the function eagerly with the passed input arguments with
221 /// kernel dispatch, batch iteration, and memory allocation details taken
224 /// If the `options` pointer is null, then `default_options()` will be used.
226 /// This function can be overridden in subclasses.
227 virtual Result
<Datum
> Execute(const std::vector
<Datum
>& args
,
228 const FunctionOptions
* options
, ExecContext
* ctx
) const;
230 /// \brief Returns the default options for this function.
232 /// Whatever option semantics a Function has, implementations must guarantee
233 /// that default_options() is valid to pass to Execute as options.
234 const FunctionOptions
* default_options() const { return default_options_
; }
236 virtual Status
Validate() const;
239 Function(std::string name
, Function::Kind kind
, const Arity
& arity
,
240 const FunctionDoc
* doc
, const FunctionOptions
* default_options
)
241 : name_(std::move(name
)),
244 doc_(doc
? doc
: &FunctionDoc::Empty()),
245 default_options_(default_options
) {}
247 Status
CheckArity(const std::vector
<InputType
>&) const;
248 Status
CheckArity(const std::vector
<ValueDescr
>&) const;
251 Function::Kind kind_
;
253 const FunctionDoc
* doc_
;
254 const FunctionOptions
* default_options_
= NULLPTR
;
259 template <typename KernelType
>
260 class FunctionImpl
: public Function
{
262 /// \brief Return pointers to current-available kernels for inspection
263 std::vector
<const KernelType
*> kernels() const {
264 std::vector
<const KernelType
*> result
;
265 for (const auto& kernel
: kernels_
) {
266 result
.push_back(&kernel
);
271 int num_kernels() const override
{ return static_cast<int>(kernels_
.size()); }
274 FunctionImpl(std::string name
, Function::Kind kind
, const Arity
& arity
,
275 const FunctionDoc
* doc
, const FunctionOptions
* default_options
)
276 : Function(std::move(name
), kind
, arity
, doc
, default_options
) {}
278 std::vector
<KernelType
> kernels_
;
281 /// \brief Look up a kernel in a function. If no Kernel is found, nullptr is returned.
283 const Kernel
* DispatchExactImpl(const Function
* func
, const std::vector
<ValueDescr
>&);
285 /// \brief Return an error message if no Kernel is found.
287 Status
NoMatchingKernel(const Function
* func
, const std::vector
<ValueDescr
>&);
289 } // namespace detail
291 /// \brief A function that executes elementwise operations on arrays or
292 /// scalars, and therefore whose results generally do not depend on the order
293 /// of the values in the arguments. Accepts and returns arrays that are all of
294 /// the same size. These functions roughly correspond to the functions used in
296 class ARROW_EXPORT ScalarFunction
: public detail::FunctionImpl
<ScalarKernel
> {
298 using KernelType
= ScalarKernel
;
300 ScalarFunction(std::string name
, const Arity
& arity
, const FunctionDoc
* doc
,
301 const FunctionOptions
* default_options
= NULLPTR
)
302 : detail::FunctionImpl
<ScalarKernel
>(std::move(name
), Function::SCALAR
, arity
, doc
,
305 /// \brief Add a kernel with given input/output types, no required state
306 /// initialization, preallocation for fixed-width types, and default null
307 /// handling (intersect validity bitmaps of inputs).
308 Status
AddKernel(std::vector
<InputType
> in_types
, OutputType out_type
,
309 ArrayKernelExec exec
, KernelInit init
= NULLPTR
);
311 /// \brief Add a kernel (function implementation). Returns error if the
312 /// kernel's signature does not match the function's arity.
313 Status
AddKernel(ScalarKernel kernel
);
316 /// \brief A function that executes general array operations that may yield
317 /// outputs of different sizes or have results that depend on the whole array
318 /// contents. These functions roughly correspond to the functions found in
319 /// non-SQL array languages like APL and its derivatives.
320 class ARROW_EXPORT VectorFunction
: public detail::FunctionImpl
<VectorKernel
> {
322 using KernelType
= VectorKernel
;
324 VectorFunction(std::string name
, const Arity
& arity
, const FunctionDoc
* doc
,
325 const FunctionOptions
* default_options
= NULLPTR
)
326 : detail::FunctionImpl
<VectorKernel
>(std::move(name
), Function::VECTOR
, arity
, doc
,
329 /// \brief Add a simple kernel with given input/output types, no required
330 /// state initialization, no data preallocation, and no preallocation of the
332 Status
AddKernel(std::vector
<InputType
> in_types
, OutputType out_type
,
333 ArrayKernelExec exec
, KernelInit init
= NULLPTR
);
335 /// \brief Add a kernel (function implementation). Returns error if the
336 /// kernel's signature does not match the function's arity.
337 Status
AddKernel(VectorKernel kernel
);
340 class ARROW_EXPORT ScalarAggregateFunction
341 : public detail::FunctionImpl
<ScalarAggregateKernel
> {
343 using KernelType
= ScalarAggregateKernel
;
345 ScalarAggregateFunction(std::string name
, const Arity
& arity
, const FunctionDoc
* doc
,
346 const FunctionOptions
* default_options
= NULLPTR
)
347 : detail::FunctionImpl
<ScalarAggregateKernel
>(
348 std::move(name
), Function::SCALAR_AGGREGATE
, arity
, doc
, default_options
) {}
350 /// \brief Add a kernel (function implementation). Returns error if the
351 /// kernel's signature does not match the function's arity.
352 Status
AddKernel(ScalarAggregateKernel kernel
);
355 class ARROW_EXPORT HashAggregateFunction
356 : public detail::FunctionImpl
<HashAggregateKernel
> {
358 using KernelType
= HashAggregateKernel
;
360 HashAggregateFunction(std::string name
, const Arity
& arity
, const FunctionDoc
* doc
,
361 const FunctionOptions
* default_options
= NULLPTR
)
362 : detail::FunctionImpl
<HashAggregateKernel
>(
363 std::move(name
), Function::HASH_AGGREGATE
, arity
, doc
, default_options
) {}
365 /// \brief Add a kernel (function implementation). Returns error if the
366 /// kernel's signature does not match the function's arity.
367 Status
AddKernel(HashAggregateKernel kernel
);
370 /// \brief A function that dispatches to other functions. Must implement
371 /// MetaFunction::ExecuteImpl.
373 /// For Array, ChunkedArray, and Scalar Datum kinds, may rely on the execution
374 /// of concrete Function types, but must handle other Datum kinds on its own.
375 class ARROW_EXPORT MetaFunction
: public Function
{
377 int num_kernels() const override
{ return 0; }
379 Result
<Datum
> Execute(const std::vector
<Datum
>& args
, const FunctionOptions
* options
,
380 ExecContext
* ctx
) const override
;
383 virtual Result
<Datum
> ExecuteImpl(const std::vector
<Datum
>& args
,
384 const FunctionOptions
* options
,
385 ExecContext
* ctx
) const = 0;
387 MetaFunction(std::string name
, const Arity
& arity
, const FunctionDoc
* doc
,
388 const FunctionOptions
* default_options
= NULLPTR
)
389 : Function(std::move(name
), Function::META
, arity
, doc
, default_options
) {}
394 } // namespace compute