]>
Commit | Line | Data |
---|---|---|
1d09f67e TL |
1 | /* |
2 | * Licensed to the Apache Software Foundation (ASF) under one | |
3 | * or more contributor license agreements. See the NOTICE file | |
4 | * distributed with this work for additional information | |
5 | * regarding copyright ownership. The ASF licenses this file | |
6 | * to you under the Apache License, Version 2.0 (the | |
7 | * "License"); you may not use this file except in compliance | |
8 | * with the License. You may obtain a copy of the License at | |
9 | * | |
10 | * http://www.apache.org/licenses/LICENSE-2.0 | |
11 | * | |
12 | * Unless required by applicable law or agreed to in writing, | |
13 | * software distributed under the License is distributed on an | |
14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | |
15 | * KIND, either express or implied. See the License for the | |
16 | * specific language governing permissions and limitations | |
17 | * under the License. | |
18 | */ | |
19 | ||
20 | #include <arrow-glib/error.hpp> | |
21 | #include <arrow-glib/input-stream.hpp> | |
22 | #include <arrow-glib/orc-file-reader.hpp> | |
23 | #include <arrow-glib/record-batch.hpp> | |
24 | #include <arrow-glib/schema.hpp> | |
25 | #include <arrow-glib/table.hpp> | |
26 | ||
27 | G_BEGIN_DECLS | |
28 | ||
29 | /** | |
30 | * SECTION: orc-file-reader | |
31 | * @section_id: orc-file-reader | |
32 | * @title: ORC reader | |
33 | * @include: arrow-glib/orc-file-reader.h | |
34 | * | |
35 | * #GArrowORCFileReader is a class for reading stripes in ORC file | |
36 | * format from input. | |
37 | */ | |
38 | ||
39 | typedef struct GArrowORCFileReaderPrivate_ { | |
40 | GArrowSeekableInputStream *input; | |
41 | arrow::adapters::orc::ORCFileReader *orc_file_reader; | |
42 | GArray *field_indices; | |
43 | } GArrowORCFileReaderPrivate; | |
44 | ||
45 | enum { | |
46 | PROP_0, | |
47 | PROP_INPUT, | |
48 | PROP_ORC_FILE_READER | |
49 | }; | |
50 | ||
51 | G_DEFINE_TYPE_WITH_PRIVATE(GArrowORCFileReader, | |
52 | garrow_orc_file_reader, | |
53 | G_TYPE_OBJECT); | |
54 | ||
55 | #define GARROW_ORC_FILE_READER_GET_PRIVATE(obj) \ | |
56 | static_cast<GArrowORCFileReaderPrivate *>( \ | |
57 | garrow_orc_file_reader_get_instance_private( \ | |
58 | GARROW_ORC_FILE_READER(obj))) | |
59 | ||
60 | static void | |
61 | garrow_orc_file_reader_dispose(GObject *object) | |
62 | { | |
63 | auto priv = GARROW_ORC_FILE_READER_GET_PRIVATE(object); | |
64 | ||
65 | if (priv->input) { | |
66 | g_object_unref(priv->input); | |
67 | priv->input = NULL; | |
68 | } | |
69 | ||
70 | G_OBJECT_CLASS(garrow_orc_file_reader_parent_class)->dispose(object); | |
71 | } | |
72 | ||
73 | static void | |
74 | garrow_orc_file_reader_finalize(GObject *object) | |
75 | { | |
76 | auto priv = GARROW_ORC_FILE_READER_GET_PRIVATE(object); | |
77 | ||
78 | delete priv->orc_file_reader; | |
79 | ||
80 | if (priv->field_indices) { | |
81 | g_array_free(priv->field_indices, TRUE); | |
82 | } | |
83 | ||
84 | G_OBJECT_CLASS(garrow_orc_file_reader_parent_class)->finalize(object); | |
85 | } | |
86 | ||
87 | static void | |
88 | garrow_orc_file_reader_set_property(GObject *object, | |
89 | guint prop_id, | |
90 | const GValue *value, | |
91 | GParamSpec *pspec) | |
92 | { | |
93 | auto priv = GARROW_ORC_FILE_READER_GET_PRIVATE(object); | |
94 | ||
95 | switch (prop_id) { | |
96 | case PROP_INPUT: | |
97 | priv->input = GARROW_SEEKABLE_INPUT_STREAM(g_value_dup_object(value)); | |
98 | break; | |
99 | case PROP_ORC_FILE_READER: | |
100 | priv->orc_file_reader = | |
101 | static_cast<arrow::adapters::orc::ORCFileReader *>(g_value_get_pointer(value)); | |
102 | break; | |
103 | default: | |
104 | G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); | |
105 | break; | |
106 | } | |
107 | } | |
108 | ||
109 | static void | |
110 | garrow_orc_file_reader_get_property(GObject *object, | |
111 | guint prop_id, | |
112 | GValue *value, | |
113 | GParamSpec *pspec) | |
114 | { | |
115 | auto priv = GARROW_ORC_FILE_READER_GET_PRIVATE(object); | |
116 | ||
117 | switch (prop_id) { | |
118 | case PROP_INPUT: | |
119 | g_value_set_object(value, priv->input); | |
120 | break; | |
121 | default: | |
122 | G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); | |
123 | break; | |
124 | } | |
125 | } | |
126 | ||
127 | static void | |
128 | garrow_orc_file_reader_init(GArrowORCFileReader *object) | |
129 | { | |
130 | } | |
131 | ||
132 | static void | |
133 | garrow_orc_file_reader_class_init(GArrowORCFileReaderClass *klass) | |
134 | { | |
135 | auto gobject_class = G_OBJECT_CLASS(klass); | |
136 | ||
137 | gobject_class->dispose = garrow_orc_file_reader_dispose; | |
138 | gobject_class->finalize = garrow_orc_file_reader_finalize; | |
139 | gobject_class->set_property = garrow_orc_file_reader_set_property; | |
140 | gobject_class->get_property = garrow_orc_file_reader_get_property; | |
141 | ||
142 | GParamSpec *spec; | |
143 | spec = g_param_spec_object("input", | |
144 | "Input", | |
145 | "The input stream", | |
146 | GARROW_TYPE_SEEKABLE_INPUT_STREAM, | |
147 | static_cast<GParamFlags>(G_PARAM_READWRITE | | |
148 | G_PARAM_CONSTRUCT_ONLY)); | |
149 | g_object_class_install_property(gobject_class, PROP_INPUT, spec); | |
150 | ||
151 | spec = g_param_spec_pointer("orc-file-reader", | |
152 | "arrow::adapters::orc::ORCFileReader", | |
153 | "The raw arrow::adapters::orc::ORCFileReader *", | |
154 | static_cast<GParamFlags>(G_PARAM_WRITABLE | | |
155 | G_PARAM_CONSTRUCT_ONLY)); | |
156 | g_object_class_install_property(gobject_class, PROP_ORC_FILE_READER, spec); | |
157 | } | |
158 | ||
159 | ||
160 | /** | |
161 | * garrow_orc_file_reader_new: | |
162 | * @file: The file to be read. | |
163 | * @error: (nullable): Return location for a #GError or %NULL. | |
164 | * | |
165 | * Returns: (nullable): A newly created #GArrowORCFileReader | |
166 | * or %NULL on error. | |
167 | * | |
168 | * Since: 0.10.0 | |
169 | */ | |
170 | GArrowORCFileReader * | |
171 | garrow_orc_file_reader_new(GArrowSeekableInputStream *input, | |
172 | GError **error) | |
173 | { | |
174 | auto arrow_random_access_file = garrow_seekable_input_stream_get_raw(input); | |
175 | auto pool = arrow::default_memory_pool(); | |
176 | auto arrow_reader_result = | |
177 | arrow::adapters::orc::ORCFileReader::Open(arrow_random_access_file, | |
178 | pool); | |
179 | if (garrow::check(error, arrow_reader_result, "[orc-file-reader][new]")) { | |
180 | return garrow_orc_file_reader_new_raw(input, | |
181 | (*arrow_reader_result).release()); | |
182 | } else { | |
183 | return NULL; | |
184 | } | |
185 | } | |
186 | ||
187 | /** | |
188 | * garrow_orc_file_reader_set_field_indexes: | |
189 | * @reader: A #GArrowORCFileReader. | |
190 | * @field_indexes: (nullable) (array length=n_field_indexes): | |
191 | * The field indexes to be read. | |
192 | * @n_field_indexes: The number of the specified indexes. | |
193 | * | |
194 | * Since: 0.10.0 | |
195 | * | |
196 | * Deprecated: 0.12.0: | |
197 | * Use garrow_orc_file_reader_set_field_indices() instead. | |
198 | */ | |
199 | void | |
200 | garrow_orc_file_reader_set_field_indexes(GArrowORCFileReader *reader, | |
201 | const gint *field_indexes, | |
202 | guint n_field_indexes) | |
203 | { | |
204 | garrow_orc_file_reader_set_field_indices(reader, | |
205 | field_indexes, | |
206 | n_field_indexes); | |
207 | } | |
208 | ||
209 | /** | |
210 | * garrow_orc_file_reader_set_field_indices: | |
211 | * @reader: A #GArrowORCFileReader. | |
212 | * @field_indices: (nullable) (array length=n_field_indices): | |
213 | * The field indices to be read. | |
214 | * @n_field_indices: The number of the specified indices. | |
215 | * | |
216 | * Since: 0.12.0 | |
217 | */ | |
218 | void | |
219 | garrow_orc_file_reader_set_field_indices(GArrowORCFileReader *reader, | |
220 | const gint *field_indices, | |
221 | guint n_field_indices) | |
222 | { | |
223 | auto priv = GARROW_ORC_FILE_READER_GET_PRIVATE(reader); | |
224 | if (priv->field_indices) { | |
225 | g_array_free(priv->field_indices, TRUE); | |
226 | } | |
227 | if (n_field_indices == 0) { | |
228 | priv->field_indices = NULL; | |
229 | } else { | |
230 | priv->field_indices = g_array_sized_new(FALSE, | |
231 | FALSE, | |
232 | sizeof(gint), | |
233 | n_field_indices); | |
234 | g_array_append_vals(priv->field_indices, field_indices, n_field_indices); | |
235 | } | |
236 | } | |
237 | ||
238 | /** | |
239 | * garrow_orc_file_reader_get_field_indexes: | |
240 | * @reader: A #GArrowORCFileReader. | |
241 | * @n_field_indexes: The number of the specified indexes. | |
242 | * | |
243 | * Returns: (nullable) (array length=n_field_indexes) (transfer none): | |
244 | * The field indexes to be read. | |
245 | * | |
246 | * Since: 0.10.0 | |
247 | * | |
248 | * Deprecated: 0.12.0: | |
249 | * Use garrow_orc_file_reader_get_field_indices() instead. | |
250 | */ | |
251 | const gint * | |
252 | garrow_orc_file_reader_get_field_indexes(GArrowORCFileReader *reader, | |
253 | guint *n_field_indexes) | |
254 | { | |
255 | return garrow_orc_file_reader_get_field_indices(reader, n_field_indexes); | |
256 | } | |
257 | ||
258 | /** | |
259 | * garrow_orc_file_reader_get_field_indices: | |
260 | * @reader: A #GArrowORCFileReader. | |
261 | * @n_field_indices: The number of the specified indices. | |
262 | * | |
263 | * Returns: (nullable) (array length=n_field_indices) (transfer none): | |
264 | * The field indices to be read. | |
265 | * | |
266 | * Since: 0.12.0 | |
267 | */ | |
268 | const gint * | |
269 | garrow_orc_file_reader_get_field_indices(GArrowORCFileReader *reader, | |
270 | guint *n_field_indices) | |
271 | { | |
272 | auto priv = GARROW_ORC_FILE_READER_GET_PRIVATE(reader); | |
273 | if (priv->field_indices) { | |
274 | *n_field_indices = priv->field_indices->len; | |
275 | return reinterpret_cast<gint *>(priv->field_indices->data); | |
276 | } else { | |
277 | *n_field_indices = 0; | |
278 | return NULL; | |
279 | } | |
280 | } | |
281 | ||
282 | /** | |
283 | * garrow_orc_file_reader_read_type: | |
284 | * @reader: A #GArrowORCFileReader. | |
285 | * @error: (nullable): Return location for a #GError or %NULL. | |
286 | * | |
287 | * Returns: (nullable) (transfer full): A newly read type as | |
288 | * #GArrowSchema or %NULL on error. | |
289 | * | |
290 | * Since: 0.10.0 | |
291 | */ | |
292 | GArrowSchema * | |
293 | garrow_orc_file_reader_read_type(GArrowORCFileReader *reader, | |
294 | GError **error) | |
295 | { | |
296 | auto arrow_reader = garrow_orc_file_reader_get_raw(reader); | |
297 | auto arrow_schema_result = arrow_reader->ReadSchema(); | |
298 | if (garrow::check(error, | |
299 | arrow_schema_result, | |
300 | "[orc-file-reader][read-type]")) { | |
301 | auto arrow_schema = *arrow_schema_result; | |
302 | return garrow_schema_new_raw(&arrow_schema); | |
303 | } else { | |
304 | return NULL; | |
305 | } | |
306 | } | |
307 | ||
308 | /** | |
309 | * garrow_orc_file_reader_read_stripes: | |
310 | * @reader: A #GArrowORCFileReader. | |
311 | * @error: (nullable): Return location for a #GError or %NULL. | |
312 | * | |
313 | * Returns: (nullable) (transfer full): A newly read stripes as | |
314 | * #GArrowTable or %NULL on error. | |
315 | * | |
316 | * Since: 0.10.0 | |
317 | */ | |
318 | GArrowTable * | |
319 | garrow_orc_file_reader_read_stripes(GArrowORCFileReader *reader, | |
320 | GError **error) | |
321 | { | |
322 | auto arrow_reader = garrow_orc_file_reader_get_raw(reader); | |
323 | auto priv = GARROW_ORC_FILE_READER_GET_PRIVATE(reader); | |
324 | if (priv->field_indices) { | |
325 | std::vector<int> arrow_field_indices; | |
326 | auto field_indices = priv->field_indices; | |
327 | for (guint i = 0; i < field_indices->len; ++i) { | |
328 | arrow_field_indices.push_back(g_array_index(field_indices, gint, i)); | |
329 | } | |
330 | auto arrow_table_result = arrow_reader->Read(arrow_field_indices); | |
331 | if (garrow::check(error, | |
332 | arrow_table_result, | |
333 | "[orc-file-reader][read-stripes]")) { | |
334 | auto arrow_table = *arrow_table_result; | |
335 | return garrow_table_new_raw(&arrow_table); | |
336 | } else { | |
337 | return NULL; | |
338 | } | |
339 | } else { | |
340 | auto arrow_table_result = arrow_reader->Read(); | |
341 | if (garrow::check(error, | |
342 | arrow_table_result, | |
343 | "[orc-file-reader][read-stripes]")) { | |
344 | auto arrow_table = *arrow_table_result; | |
345 | return garrow_table_new_raw(&arrow_table); | |
346 | } else { | |
347 | return NULL; | |
348 | } | |
349 | } | |
350 | } | |
351 | ||
352 | /** | |
353 | * garrow_orc_file_reader_read_stripe: | |
354 | * @reader: A #GArrowORCFileReader. | |
355 | * @i: The stripe index to be read. | |
356 | * @error: (nullable): Return location for a #GError or %NULL. | |
357 | * | |
358 | * Returns: (nullable) (transfer full): A newly read stripe as | |
359 | * #GArrowRecordBatch or %NULL on error. | |
360 | * | |
361 | * Since: 0.10.0 | |
362 | */ | |
363 | GArrowRecordBatch * | |
364 | garrow_orc_file_reader_read_stripe(GArrowORCFileReader *reader, | |
365 | gint64 i, | |
366 | GError **error) | |
367 | { | |
368 | auto arrow_reader = garrow_orc_file_reader_get_raw(reader); | |
369 | if (i < 0) { | |
370 | i += arrow_reader->NumberOfStripes(); | |
371 | } | |
372 | auto priv = GARROW_ORC_FILE_READER_GET_PRIVATE(reader); | |
373 | if (priv->field_indices) { | |
374 | std::vector<int> arrow_field_indices; | |
375 | auto field_indices = priv->field_indices; | |
376 | for (guint j = 0; j < field_indices->len; ++j) { | |
377 | arrow_field_indices.push_back(g_array_index(field_indices, gint, j)); | |
378 | } | |
379 | std::shared_ptr<arrow::RecordBatch> arrow_record_batch; | |
380 | auto arrow_record_batch_result = | |
381 | arrow_reader->ReadStripe(i, arrow_field_indices); | |
382 | if (garrow::check(error, | |
383 | arrow_record_batch_result, | |
384 | "[orc-file-reader][read-stripe]")) { | |
385 | auto arrow_record_batch = *arrow_record_batch_result; | |
386 | return garrow_record_batch_new_raw(&arrow_record_batch); | |
387 | } else { | |
388 | return NULL; | |
389 | } | |
390 | } else { | |
391 | auto arrow_record_batch_result = arrow_reader->ReadStripe(i); | |
392 | if (garrow::check(error, | |
393 | arrow_record_batch_result, | |
394 | "[orc-file-reader][read-stripe]")) { | |
395 | auto arrow_record_batch = *arrow_record_batch_result; | |
396 | return garrow_record_batch_new_raw(&arrow_record_batch); | |
397 | } else { | |
398 | return NULL; | |
399 | } | |
400 | } | |
401 | } | |
402 | ||
403 | /** | |
404 | * garrow_orc_file_reader_get_n_stripes: | |
405 | * @reader: A #GArrowORCFileReader. | |
406 | * | |
407 | * Returns: The number of stripes in the file. | |
408 | * | |
409 | * Since: 0.10.0 | |
410 | */ | |
411 | gint64 | |
412 | garrow_orc_file_reader_get_n_stripes(GArrowORCFileReader *reader) | |
413 | { | |
414 | auto arrow_reader = garrow_orc_file_reader_get_raw(reader); | |
415 | return arrow_reader->NumberOfStripes(); | |
416 | } | |
417 | ||
418 | /** | |
419 | * garrow_orc_file_reader_get_n_rows: | |
420 | * @reader: A #GArrowORCFileReader. | |
421 | * | |
422 | * Returns: The number of rows in the file. | |
423 | * | |
424 | * Since: 0.10.0 | |
425 | */ | |
426 | gint64 | |
427 | garrow_orc_file_reader_get_n_rows(GArrowORCFileReader *reader) | |
428 | { | |
429 | auto arrow_reader = garrow_orc_file_reader_get_raw(reader); | |
430 | return arrow_reader->NumberOfRows(); | |
431 | } | |
432 | ||
433 | ||
434 | G_END_DECLS | |
435 | ||
436 | ||
437 | GArrowORCFileReader * | |
438 | garrow_orc_file_reader_new_raw(GArrowSeekableInputStream *input, | |
439 | arrow::adapters::orc::ORCFileReader *arrow_reader) | |
440 | { | |
441 | auto reader = | |
442 | GARROW_ORC_FILE_READER(g_object_new(GARROW_TYPE_ORC_FILE_READER, | |
443 | "input", input, | |
444 | "orc-file-reader", arrow_reader, | |
445 | NULL)); | |
446 | return reader; | |
447 | } | |
448 | ||
449 | arrow::adapters::orc::ORCFileReader * | |
450 | garrow_orc_file_reader_get_raw(GArrowORCFileReader *reader) | |
451 | { | |
452 | auto priv = GARROW_ORC_FILE_READER_GET_PRIVATE(reader); | |
453 | return priv->orc_file_reader; | |
454 | } |