]> git.proxmox.com Git - ceph.git/blob - ceph/src/arrow/cpp/src/arrow/compute/function.h
import quincy 17.2.0
[ceph.git] / ceph / src / arrow / cpp / src / arrow / compute / function.h
1 // Licensed to the Apache Software Foundation (ASF) under one
2 // or more contributor license agreements. See the NOTICE file
3 // distributed with this work for additional information
4 // regarding copyright ownership. The ASF licenses this file
5 // to you under the Apache License, Version 2.0 (the
6 // "License"); you may not use this file except in compliance
7 // with the License. You may obtain a copy of the License at
8 //
9 // http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing,
12 // software distributed under the License is distributed on an
13 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, either express or implied. See the License for the
15 // specific language governing permissions and limitations
16 // under the License.
17
18 // NOTE: API is EXPERIMENTAL and will change without going through a
19 // deprecation cycle.
20
21 #pragma once
22
23 #include <string>
24 #include <utility>
25 #include <vector>
26
27 #include "arrow/compute/kernel.h"
28 #include "arrow/compute/type_fwd.h"
29 #include "arrow/datum.h"
30 #include "arrow/result.h"
31 #include "arrow/status.h"
32 #include "arrow/util/compare.h"
33 #include "arrow/util/macros.h"
34 #include "arrow/util/visibility.h"
35
36 namespace arrow {
37 namespace compute {
38
39 /// \defgroup compute-functions Abstract compute function API
40 ///
41 /// @{
42
43 /// \brief Extension point for defining options outside libarrow (but
44 /// still within this project).
45 class ARROW_EXPORT FunctionOptionsType {
46 public:
47 virtual ~FunctionOptionsType() = default;
48
49 virtual const char* type_name() const = 0;
50 virtual std::string Stringify(const FunctionOptions&) const = 0;
51 virtual bool Compare(const FunctionOptions&, const FunctionOptions&) const = 0;
52 virtual Result<std::shared_ptr<Buffer>> Serialize(const FunctionOptions&) const;
53 virtual Result<std::unique_ptr<FunctionOptions>> Deserialize(
54 const Buffer& buffer) const;
55 virtual std::unique_ptr<FunctionOptions> Copy(const FunctionOptions&) const = 0;
56 };
57
58 /// \brief Base class for specifying options configuring a function's behavior,
59 /// such as error handling.
60 class ARROW_EXPORT FunctionOptions : public util::EqualityComparable<FunctionOptions> {
61 public:
62 virtual ~FunctionOptions() = default;
63
64 const FunctionOptionsType* options_type() const { return options_type_; }
65 const char* type_name() const { return options_type()->type_name(); }
66
67 bool Equals(const FunctionOptions& other) const;
68 using util::EqualityComparable<FunctionOptions>::Equals;
69 using util::EqualityComparable<FunctionOptions>::operator==;
70 using util::EqualityComparable<FunctionOptions>::operator!=;
71 std::string ToString() const;
72 std::unique_ptr<FunctionOptions> Copy() const;
73 /// \brief Serialize an options struct to a buffer.
74 Result<std::shared_ptr<Buffer>> Serialize() const;
75 /// \brief Deserialize an options struct from a buffer.
76 /// Note: this will only look for `type_name` in the default FunctionRegistry;
77 /// to use a custom FunctionRegistry, look up the FunctionOptionsType, then
78 /// call FunctionOptionsType::Deserialize().
79 static Result<std::unique_ptr<FunctionOptions>> Deserialize(
80 const std::string& type_name, const Buffer& buffer);
81
82 protected:
83 explicit FunctionOptions(const FunctionOptionsType* type) : options_type_(type) {}
84 const FunctionOptionsType* options_type_;
85 };
86
87 ARROW_EXPORT void PrintTo(const FunctionOptions&, std::ostream*);
88
89 /// \brief Contains the number of required arguments for the function.
90 ///
91 /// Naming conventions taken from https://en.wikipedia.org/wiki/Arity.
92 struct ARROW_EXPORT Arity {
93 /// \brief A function taking no arguments
94 static Arity Nullary() { return Arity(0, false); }
95
96 /// \brief A function taking 1 argument
97 static Arity Unary() { return Arity(1, false); }
98
99 /// \brief A function taking 2 arguments
100 static Arity Binary() { return Arity(2, false); }
101
102 /// \brief A function taking 3 arguments
103 static Arity Ternary() { return Arity(3, false); }
104
105 /// \brief A function taking a variable number of arguments
106 ///
107 /// \param[in] min_args the minimum number of arguments required when
108 /// invoking the function
109 static Arity VarArgs(int min_args = 0) { return Arity(min_args, true); }
110
111 // NOTE: the 0-argument form (default constructor) is required for Cython
112 explicit Arity(int num_args = 0, bool is_varargs = false)
113 : num_args(num_args), is_varargs(is_varargs) {}
114
115 /// The number of required arguments (or the minimum number for varargs
116 /// functions).
117 int num_args;
118
119 /// If true, then the num_args is the minimum number of required arguments.
120 bool is_varargs = false;
121 };
122
123 struct ARROW_EXPORT FunctionDoc {
124 /// \brief A one-line summary of the function, using a verb.
125 ///
126 /// For example, "Add two numeric arrays or scalars".
127 std::string summary;
128
129 /// \brief A detailed description of the function, meant to follow the summary.
130 std::string description;
131
132 /// \brief Symbolic names (identifiers) for the function arguments.
133 ///
134 /// Some bindings may use this to generate nicer function signatures.
135 std::vector<std::string> arg_names;
136
137 // TODO add argument descriptions?
138
139 /// \brief Name of the options class, if any.
140 std::string options_class;
141
142 FunctionDoc() = default;
143
144 FunctionDoc(std::string summary, std::string description,
145 std::vector<std::string> arg_names, std::string options_class = "")
146 : summary(std::move(summary)),
147 description(std::move(description)),
148 arg_names(std::move(arg_names)),
149 options_class(std::move(options_class)) {}
150
151 static const FunctionDoc& Empty();
152 };
153
154 /// \brief Base class for compute functions. Function implementations contain a
155 /// collection of "kernels" which are implementations of the function for
156 /// specific argument types. Selecting a viable kernel for executing a function
157 /// is referred to as "dispatching".
158 class ARROW_EXPORT Function {
159 public:
160 /// \brief The kind of function, which indicates in what contexts it is
161 /// valid for use.
162 enum Kind {
163 /// A function that performs scalar data operations on whole arrays of
164 /// data. Can generally process Array or Scalar values. The size of the
165 /// output will be the same as the size (or broadcasted size, in the case
166 /// of mixing Array and Scalar inputs) of the input.
167 SCALAR,
168
169 /// A function with array input and output whose behavior depends on the
170 /// values of the entire arrays passed, rather than the value of each scalar
171 /// value.
172 VECTOR,
173
174 /// A function that computes scalar summary statistics from array input.
175 SCALAR_AGGREGATE,
176
177 /// A function that computes grouped summary statistics from array input
178 /// and an array of group identifiers.
179 HASH_AGGREGATE,
180
181 /// A function that dispatches to other functions and does not contain its
182 /// own kernels.
183 META
184 };
185
186 virtual ~Function() = default;
187
188 /// \brief The name of the kernel. The registry enforces uniqueness of names.
189 const std::string& name() const { return name_; }
190
191 /// \brief The kind of kernel, which indicates in what contexts it is valid
192 /// for use.
193 Function::Kind kind() const { return kind_; }
194
195 /// \brief Contains the number of arguments the function requires, or if the
196 /// function accepts variable numbers of arguments.
197 const Arity& arity() const { return arity_; }
198
199 /// \brief Return the function documentation
200 const FunctionDoc& doc() const { return *doc_; }
201
202 /// \brief Returns the number of registered kernels for this function.
203 virtual int num_kernels() const = 0;
204
205 /// \brief Return a kernel that can execute the function given the exact
206 /// argument types (without implicit type casts or scalar->array promotions).
207 ///
208 /// NB: This function is overridden in CastFunction.
209 virtual Result<const Kernel*> DispatchExact(
210 const std::vector<ValueDescr>& values) const;
211
212 /// \brief Return a best-match kernel that can execute the function given the argument
213 /// types, after implicit casts are applied.
214 ///
215 /// \param[in,out] values Argument types. An element may be modified to indicate that
216 /// the returned kernel only approximately matches the input value descriptors; callers
217 /// are responsible for casting inputs to the type and shape required by the kernel.
218 virtual Result<const Kernel*> DispatchBest(std::vector<ValueDescr>* values) const;
219
220 /// \brief Execute the function eagerly with the passed input arguments with
221 /// kernel dispatch, batch iteration, and memory allocation details taken
222 /// care of.
223 ///
224 /// If the `options` pointer is null, then `default_options()` will be used.
225 ///
226 /// This function can be overridden in subclasses.
227 virtual Result<Datum> Execute(const std::vector<Datum>& args,
228 const FunctionOptions* options, ExecContext* ctx) const;
229
230 /// \brief Returns the default options for this function.
231 ///
232 /// Whatever option semantics a Function has, implementations must guarantee
233 /// that default_options() is valid to pass to Execute as options.
234 const FunctionOptions* default_options() const { return default_options_; }
235
236 virtual Status Validate() const;
237
238 protected:
239 Function(std::string name, Function::Kind kind, const Arity& arity,
240 const FunctionDoc* doc, const FunctionOptions* default_options)
241 : name_(std::move(name)),
242 kind_(kind),
243 arity_(arity),
244 doc_(doc ? doc : &FunctionDoc::Empty()),
245 default_options_(default_options) {}
246
247 Status CheckArity(const std::vector<InputType>&) const;
248 Status CheckArity(const std::vector<ValueDescr>&) const;
249
250 std::string name_;
251 Function::Kind kind_;
252 Arity arity_;
253 const FunctionDoc* doc_;
254 const FunctionOptions* default_options_ = NULLPTR;
255 };
256
257 namespace detail {
258
259 template <typename KernelType>
260 class FunctionImpl : public Function {
261 public:
262 /// \brief Return pointers to current-available kernels for inspection
263 std::vector<const KernelType*> kernels() const {
264 std::vector<const KernelType*> result;
265 for (const auto& kernel : kernels_) {
266 result.push_back(&kernel);
267 }
268 return result;
269 }
270
271 int num_kernels() const override { return static_cast<int>(kernels_.size()); }
272
273 protected:
274 FunctionImpl(std::string name, Function::Kind kind, const Arity& arity,
275 const FunctionDoc* doc, const FunctionOptions* default_options)
276 : Function(std::move(name), kind, arity, doc, default_options) {}
277
278 std::vector<KernelType> kernels_;
279 };
280
281 /// \brief Look up a kernel in a function. If no Kernel is found, nullptr is returned.
282 ARROW_EXPORT
283 const Kernel* DispatchExactImpl(const Function* func, const std::vector<ValueDescr>&);
284
285 /// \brief Return an error message if no Kernel is found.
286 ARROW_EXPORT
287 Status NoMatchingKernel(const Function* func, const std::vector<ValueDescr>&);
288
289 } // namespace detail
290
291 /// \brief A function that executes elementwise operations on arrays or
292 /// scalars, and therefore whose results generally do not depend on the order
293 /// of the values in the arguments. Accepts and returns arrays that are all of
294 /// the same size. These functions roughly correspond to the functions used in
295 /// SQL expressions.
296 class ARROW_EXPORT ScalarFunction : public detail::FunctionImpl<ScalarKernel> {
297 public:
298 using KernelType = ScalarKernel;
299
300 ScalarFunction(std::string name, const Arity& arity, const FunctionDoc* doc,
301 const FunctionOptions* default_options = NULLPTR)
302 : detail::FunctionImpl<ScalarKernel>(std::move(name), Function::SCALAR, arity, doc,
303 default_options) {}
304
305 /// \brief Add a kernel with given input/output types, no required state
306 /// initialization, preallocation for fixed-width types, and default null
307 /// handling (intersect validity bitmaps of inputs).
308 Status AddKernel(std::vector<InputType> in_types, OutputType out_type,
309 ArrayKernelExec exec, KernelInit init = NULLPTR);
310
311 /// \brief Add a kernel (function implementation). Returns error if the
312 /// kernel's signature does not match the function's arity.
313 Status AddKernel(ScalarKernel kernel);
314 };
315
316 /// \brief A function that executes general array operations that may yield
317 /// outputs of different sizes or have results that depend on the whole array
318 /// contents. These functions roughly correspond to the functions found in
319 /// non-SQL array languages like APL and its derivatives.
320 class ARROW_EXPORT VectorFunction : public detail::FunctionImpl<VectorKernel> {
321 public:
322 using KernelType = VectorKernel;
323
324 VectorFunction(std::string name, const Arity& arity, const FunctionDoc* doc,
325 const FunctionOptions* default_options = NULLPTR)
326 : detail::FunctionImpl<VectorKernel>(std::move(name), Function::VECTOR, arity, doc,
327 default_options) {}
328
329 /// \brief Add a simple kernel with given input/output types, no required
330 /// state initialization, no data preallocation, and no preallocation of the
331 /// validity bitmap.
332 Status AddKernel(std::vector<InputType> in_types, OutputType out_type,
333 ArrayKernelExec exec, KernelInit init = NULLPTR);
334
335 /// \brief Add a kernel (function implementation). Returns error if the
336 /// kernel's signature does not match the function's arity.
337 Status AddKernel(VectorKernel kernel);
338 };
339
340 class ARROW_EXPORT ScalarAggregateFunction
341 : public detail::FunctionImpl<ScalarAggregateKernel> {
342 public:
343 using KernelType = ScalarAggregateKernel;
344
345 ScalarAggregateFunction(std::string name, const Arity& arity, const FunctionDoc* doc,
346 const FunctionOptions* default_options = NULLPTR)
347 : detail::FunctionImpl<ScalarAggregateKernel>(
348 std::move(name), Function::SCALAR_AGGREGATE, arity, doc, default_options) {}
349
350 /// \brief Add a kernel (function implementation). Returns error if the
351 /// kernel's signature does not match the function's arity.
352 Status AddKernel(ScalarAggregateKernel kernel);
353 };
354
355 class ARROW_EXPORT HashAggregateFunction
356 : public detail::FunctionImpl<HashAggregateKernel> {
357 public:
358 using KernelType = HashAggregateKernel;
359
360 HashAggregateFunction(std::string name, const Arity& arity, const FunctionDoc* doc,
361 const FunctionOptions* default_options = NULLPTR)
362 : detail::FunctionImpl<HashAggregateKernel>(
363 std::move(name), Function::HASH_AGGREGATE, arity, doc, default_options) {}
364
365 /// \brief Add a kernel (function implementation). Returns error if the
366 /// kernel's signature does not match the function's arity.
367 Status AddKernel(HashAggregateKernel kernel);
368 };
369
370 /// \brief A function that dispatches to other functions. Must implement
371 /// MetaFunction::ExecuteImpl.
372 ///
373 /// For Array, ChunkedArray, and Scalar Datum kinds, may rely on the execution
374 /// of concrete Function types, but must handle other Datum kinds on its own.
375 class ARROW_EXPORT MetaFunction : public Function {
376 public:
377 int num_kernels() const override { return 0; }
378
379 Result<Datum> Execute(const std::vector<Datum>& args, const FunctionOptions* options,
380 ExecContext* ctx) const override;
381
382 protected:
383 virtual Result<Datum> ExecuteImpl(const std::vector<Datum>& args,
384 const FunctionOptions* options,
385 ExecContext* ctx) const = 0;
386
387 MetaFunction(std::string name, const Arity& arity, const FunctionDoc* doc,
388 const FunctionOptions* default_options = NULLPTR)
389 : Function(std::move(name), Function::META, arity, doc, default_options) {}
390 };
391
392 /// @}
393
394 } // namespace compute
395 } // namespace arrow