]>
Commit | Line | Data |
---|---|---|
1d09f67e TL |
1 | .. Licensed to the Apache Software Foundation (ASF) under one |
2 | .. or more contributor license agreements. See the NOTICE file | |
3 | .. distributed with this work for additional information | |
4 | .. regarding copyright ownership. The ASF licenses this file | |
5 | .. to you under the Apache License, Version 2.0 (the | |
6 | .. "License"); you may not use this file except in compliance | |
7 | .. with the License. You may obtain a copy of the License at | |
8 | ||
9 | .. http://www.apache.org/licenses/LICENSE-2.0 | |
10 | ||
11 | .. Unless required by applicable law or agreed to in writing, | |
12 | .. software distributed under the License is distributed on an | |
13 | .. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | |
14 | .. KIND, either express or implied. See the License for the | |
15 | .. specific language governing permissions and limitations | |
16 | .. under the License. | |
17 | ||
18 | .. highlight:: c | |
19 | ||
20 | .. _c-stream-interface: | |
21 | ||
22 | ============================ | |
23 | The Arrow C stream interface | |
24 | ============================ | |
25 | ||
26 | .. warning:: | |
27 | This interface is experimental and may evolve based on feedback from | |
28 | early users. ABI stability is not guaranteed yet. Feel free to | |
29 | `contact us <https://arrow.apache.org/community/>`__. | |
30 | ||
31 | The C stream interface builds on the structures defined in the | |
32 | :ref:`C data interface <c-data-interface>` and combines them into a higher-level | |
33 | specification so as to ease the communication of streaming data within a single | |
34 | process. | |
35 | ||
36 | Semantics | |
37 | ========= | |
38 | ||
39 | An Arrow C stream exposes a streaming source of data chunks, each with the | |
40 | same schema. Chunks are obtained by calling a blocking pull-style iteration | |
41 | function. | |
42 | ||
43 | Structure definition | |
44 | ==================== | |
45 | ||
46 | The C stream interface is defined by a single ``struct`` definition:: | |
47 | ||
48 | struct ArrowArrayStream { | |
49 | // Callbacks providing stream functionality | |
50 | int (*get_schema)(struct ArrowArrayStream*, struct ArrowSchema* out); | |
51 | int (*get_next)(struct ArrowArrayStream*, struct ArrowArray* out); | |
52 | const char* (*get_last_error)(struct ArrowArrayStream*); | |
53 | ||
54 | // Release callback | |
55 | void (*release)(struct ArrowArrayStream*); | |
56 | ||
57 | // Opaque producer-specific data | |
58 | void* private_data; | |
59 | }; | |
60 | ||
61 | The ArrowArrayStream structure | |
62 | ------------------------------ | |
63 | ||
64 | The ``ArrowArrayStream`` provides the required callbacks to interact with a | |
65 | streaming source of Arrow arrays. It has the following fields: | |
66 | ||
67 | .. c:member:: int (*ArrowArrayStream.get_schema)(struct ArrowArrayStream*, struct ArrowSchema* out) | |
68 | ||
69 | *Mandatory.* This callback allows the consumer to query the schema of | |
70 | the chunks of data in the stream. The schema is the same for all | |
71 | data chunks. | |
72 | ||
73 | This callback must NOT be called on a released ``ArrowArrayStream``. | |
74 | ||
75 | *Return value:* 0 on success, a non-zero | |
76 | :ref:`error code <c-stream-interface-error-codes>` otherwise. | |
77 | ||
78 | .. c:member:: int (*ArrowArrayStream.get_next)(struct ArrowArrayStream*, struct ArrowArray* out) | |
79 | ||
80 | *Mandatory.* This callback allows the consumer to get the next chunk | |
81 | of data in the stream. | |
82 | ||
83 | This callback must NOT be called on a released ``ArrowArrayStream``. | |
84 | ||
85 | *Return value:* 0 on success, a non-zero | |
86 | :ref:`error code <c-stream-interface-error-codes>` otherwise. | |
87 | ||
88 | On success, the consumer must check whether the ``ArrowArray`` is | |
89 | marked :ref:`released <c-data-interface-released>`. If the | |
90 | ``ArrowArray`` is released, then the end of stream has been reached. | |
91 | Otherwise, the ``ArrowArray`` contains a valid data chunk. | |
92 | ||
93 | .. c:member:: const char* (*ArrowArrayStream.get_last_error)(struct ArrowArrayStream*) | |
94 | ||
95 | *Mandatory.* This callback allows the consumer to get a textual description | |
96 | of the last error. | |
97 | ||
98 | This callback must ONLY be called if the last operation on the | |
99 | ``ArrowArrayStream`` returned an error. It must NOT be called on a | |
100 | released ``ArrowArrayStream``. | |
101 | ||
102 | *Return value:* a pointer to a NULL-terminated character string (UTF8-encoded). | |
103 | NULL can also be returned if no detailed description is available. | |
104 | ||
105 | The returned pointer is only guaranteed to be valid until the next call of | |
106 | one of the stream's callbacks. The character string it points to should | |
107 | be copied to consumer-managed storage if it is intended to survive longer. | |
108 | ||
109 | .. c:member:: void (*ArrowArrayStream.release)(struct ArrowArrayStream*) | |
110 | ||
111 | *Mandatory.* A pointer to a producer-provided release callback. | |
112 | ||
113 | .. c:member:: void* ArrowArrayStream.private_data | |
114 | ||
115 | *Optional.* An opaque pointer to producer-provided private data. | |
116 | ||
117 | Consumers MUST not process this member. Lifetime of this member | |
118 | is handled by the producer, and especially by the release callback. | |
119 | ||
120 | ||
121 | .. _c-stream-interface-error-codes: | |
122 | ||
123 | Error codes | |
124 | ----------- | |
125 | ||
126 | The ``get_schema`` and ``get_next`` callbacks may return an error under the form | |
127 | of a non-zero integer code. Such error codes should be interpreted like | |
128 | ``errno`` numbers (as defined by the local platform). Note that the symbolic | |
129 | forms of these constants are stable from platform to platform, but their numeric | |
130 | values are platform-specific. | |
131 | ||
132 | In particular, it is recommended to recognize the following values: | |
133 | ||
134 | * ``EINVAL``: for a parameter or input validation error | |
135 | * ``ENOMEM``: for a memory allocation failure (out of memory) | |
136 | * ``EIO``: for a generic input/output error | |
137 | ||
138 | .. seealso:: | |
139 | `Standard POSIX error codes <https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/errno.h.html>`__. | |
140 | ||
141 | `Error codes recognized by the Windows C runtime library | |
142 | <https://docs.microsoft.com/en-us/cpp/c-runtime-library/errno-doserrno-sys-errlist-and-sys-nerr>`__. | |
143 | ||
144 | Result lifetimes | |
145 | ---------------- | |
146 | ||
147 | The data returned by the ``get_schema`` and ``get_next`` callbacks must be | |
148 | released independently. Their lifetimes are not tied to that of the | |
149 | ``ArrowArrayStream``. | |
150 | ||
151 | Stream lifetime | |
152 | --------------- | |
153 | ||
154 | Lifetime of the C stream is managed using a release callback with similar | |
155 | usage as in the :ref:`C data interface <c-data-interface-released>`. | |
156 | ||
157 | ||
158 | C consumer example | |
159 | ================== | |
160 | ||
161 | Let's say a particular database provides the following C API to execute | |
162 | a SQL query and return the result set as a Arrow C stream:: | |
163 | ||
164 | void MyDB_Query(const char* query, struct ArrowArrayStream* result_set); | |
165 | ||
166 | Then a consumer could use the following code to iterate over the results:: | |
167 | ||
168 | static void handle_error(int errcode, struct ArrowArrayStream* stream) { | |
169 | // Print stream error | |
170 | const char* errdesc = stream->get_last_error(stream); | |
171 | if (errdesc != NULL) { | |
172 | fputs(errdesc, stderr); | |
173 | } else { | |
174 | fputs(strerror(errcode), stderr); | |
175 | } | |
176 | // Release stream and abort | |
177 | stream->release(stream), | |
178 | exit(1); | |
179 | } | |
180 | ||
181 | void run_query() { | |
182 | struct ArrowArrayStream stream; | |
183 | struct ArrowSchema schema; | |
184 | struct ArrowArray chunk; | |
185 | int errcode; | |
186 | ||
187 | MyDB_Query("SELECT * FROM my_table", &stream); | |
188 | ||
189 | // Query result set schema | |
190 | errcode = stream.get_schema(&stream, &schema); | |
191 | if (errcode != 0) { | |
192 | handle_error(errcode, &stream); | |
193 | } | |
194 | ||
195 | int64_t num_rows = 0; | |
196 | ||
197 | // Iterate over results: loop until error or end of stream | |
198 | while ((errcode = stream.get_next(&stream, &chunk) == 0) && | |
199 | chunk.release != NULL) { | |
200 | // Do something with chunk... | |
201 | fprintf(stderr, "Result chunk: got %lld rows\n", chunk.length); | |
202 | num_rows += chunk.length; | |
203 | ||
204 | // Release chunk | |
205 | chunk.release(&chunk); | |
206 | } | |
207 | ||
208 | // Was it an error? | |
209 | if (errcode != 0) { | |
210 | handle_error(errcode, &stream); | |
211 | } | |
212 | ||
213 | fprintf(stderr, "Result stream ended: total %lld rows\n", num_rows); | |
214 | ||
215 | // Release schema and stream | |
216 | schema.release(&schema); | |
217 | stream.release(&stream); | |
218 | } |