]> git.proxmox.com Git - ceph.git/blame - ceph/src/arrow/c_glib/arrow-glib/orc-file-reader.cpp
import quincy 17.2.0
[ceph.git] / ceph / src / arrow / c_glib / arrow-glib / orc-file-reader.cpp
CommitLineData
1d09f67e
TL
1/*
2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License. You may obtain a copy of the License at
9 *
10 * http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing,
13 * software distributed under the License is distributed on an
14 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 * KIND, either express or implied. See the License for the
16 * specific language governing permissions and limitations
17 * under the License.
18 */
19
20#include <arrow-glib/error.hpp>
21#include <arrow-glib/input-stream.hpp>
22#include <arrow-glib/orc-file-reader.hpp>
23#include <arrow-glib/record-batch.hpp>
24#include <arrow-glib/schema.hpp>
25#include <arrow-glib/table.hpp>
26
27G_BEGIN_DECLS
28
29/**
30 * SECTION: orc-file-reader
31 * @section_id: orc-file-reader
32 * @title: ORC reader
33 * @include: arrow-glib/orc-file-reader.h
34 *
35 * #GArrowORCFileReader is a class for reading stripes in ORC file
36 * format from input.
37 */
38
39typedef struct GArrowORCFileReaderPrivate_ {
40 GArrowSeekableInputStream *input;
41 arrow::adapters::orc::ORCFileReader *orc_file_reader;
42 GArray *field_indices;
43} GArrowORCFileReaderPrivate;
44
45enum {
46 PROP_0,
47 PROP_INPUT,
48 PROP_ORC_FILE_READER
49};
50
51G_DEFINE_TYPE_WITH_PRIVATE(GArrowORCFileReader,
52 garrow_orc_file_reader,
53 G_TYPE_OBJECT);
54
55#define GARROW_ORC_FILE_READER_GET_PRIVATE(obj) \
56 static_cast<GArrowORCFileReaderPrivate *>( \
57 garrow_orc_file_reader_get_instance_private( \
58 GARROW_ORC_FILE_READER(obj)))
59
60static void
61garrow_orc_file_reader_dispose(GObject *object)
62{
63 auto priv = GARROW_ORC_FILE_READER_GET_PRIVATE(object);
64
65 if (priv->input) {
66 g_object_unref(priv->input);
67 priv->input = NULL;
68 }
69
70 G_OBJECT_CLASS(garrow_orc_file_reader_parent_class)->dispose(object);
71}
72
73static void
74garrow_orc_file_reader_finalize(GObject *object)
75{
76 auto priv = GARROW_ORC_FILE_READER_GET_PRIVATE(object);
77
78 delete priv->orc_file_reader;
79
80 if (priv->field_indices) {
81 g_array_free(priv->field_indices, TRUE);
82 }
83
84 G_OBJECT_CLASS(garrow_orc_file_reader_parent_class)->finalize(object);
85}
86
87static void
88garrow_orc_file_reader_set_property(GObject *object,
89 guint prop_id,
90 const GValue *value,
91 GParamSpec *pspec)
92{
93 auto priv = GARROW_ORC_FILE_READER_GET_PRIVATE(object);
94
95 switch (prop_id) {
96 case PROP_INPUT:
97 priv->input = GARROW_SEEKABLE_INPUT_STREAM(g_value_dup_object(value));
98 break;
99 case PROP_ORC_FILE_READER:
100 priv->orc_file_reader =
101 static_cast<arrow::adapters::orc::ORCFileReader *>(g_value_get_pointer(value));
102 break;
103 default:
104 G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
105 break;
106 }
107}
108
109static void
110garrow_orc_file_reader_get_property(GObject *object,
111 guint prop_id,
112 GValue *value,
113 GParamSpec *pspec)
114{
115 auto priv = GARROW_ORC_FILE_READER_GET_PRIVATE(object);
116
117 switch (prop_id) {
118 case PROP_INPUT:
119 g_value_set_object(value, priv->input);
120 break;
121 default:
122 G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
123 break;
124 }
125}
126
127static void
128garrow_orc_file_reader_init(GArrowORCFileReader *object)
129{
130}
131
132static void
133garrow_orc_file_reader_class_init(GArrowORCFileReaderClass *klass)
134{
135 auto gobject_class = G_OBJECT_CLASS(klass);
136
137 gobject_class->dispose = garrow_orc_file_reader_dispose;
138 gobject_class->finalize = garrow_orc_file_reader_finalize;
139 gobject_class->set_property = garrow_orc_file_reader_set_property;
140 gobject_class->get_property = garrow_orc_file_reader_get_property;
141
142 GParamSpec *spec;
143 spec = g_param_spec_object("input",
144 "Input",
145 "The input stream",
146 GARROW_TYPE_SEEKABLE_INPUT_STREAM,
147 static_cast<GParamFlags>(G_PARAM_READWRITE |
148 G_PARAM_CONSTRUCT_ONLY));
149 g_object_class_install_property(gobject_class, PROP_INPUT, spec);
150
151 spec = g_param_spec_pointer("orc-file-reader",
152 "arrow::adapters::orc::ORCFileReader",
153 "The raw arrow::adapters::orc::ORCFileReader *",
154 static_cast<GParamFlags>(G_PARAM_WRITABLE |
155 G_PARAM_CONSTRUCT_ONLY));
156 g_object_class_install_property(gobject_class, PROP_ORC_FILE_READER, spec);
157}
158
159
160/**
161 * garrow_orc_file_reader_new:
162 * @file: The file to be read.
163 * @error: (nullable): Return location for a #GError or %NULL.
164 *
165 * Returns: (nullable): A newly created #GArrowORCFileReader
166 * or %NULL on error.
167 *
168 * Since: 0.10.0
169 */
170GArrowORCFileReader *
171garrow_orc_file_reader_new(GArrowSeekableInputStream *input,
172 GError **error)
173{
174 auto arrow_random_access_file = garrow_seekable_input_stream_get_raw(input);
175 auto pool = arrow::default_memory_pool();
176 auto arrow_reader_result =
177 arrow::adapters::orc::ORCFileReader::Open(arrow_random_access_file,
178 pool);
179 if (garrow::check(error, arrow_reader_result, "[orc-file-reader][new]")) {
180 return garrow_orc_file_reader_new_raw(input,
181 (*arrow_reader_result).release());
182 } else {
183 return NULL;
184 }
185}
186
187/**
188 * garrow_orc_file_reader_set_field_indexes:
189 * @reader: A #GArrowORCFileReader.
190 * @field_indexes: (nullable) (array length=n_field_indexes):
191 * The field indexes to be read.
192 * @n_field_indexes: The number of the specified indexes.
193 *
194 * Since: 0.10.0
195 *
196 * Deprecated: 0.12.0:
197 * Use garrow_orc_file_reader_set_field_indices() instead.
198 */
199void
200garrow_orc_file_reader_set_field_indexes(GArrowORCFileReader *reader,
201 const gint *field_indexes,
202 guint n_field_indexes)
203{
204 garrow_orc_file_reader_set_field_indices(reader,
205 field_indexes,
206 n_field_indexes);
207}
208
209/**
210 * garrow_orc_file_reader_set_field_indices:
211 * @reader: A #GArrowORCFileReader.
212 * @field_indices: (nullable) (array length=n_field_indices):
213 * The field indices to be read.
214 * @n_field_indices: The number of the specified indices.
215 *
216 * Since: 0.12.0
217 */
218void
219garrow_orc_file_reader_set_field_indices(GArrowORCFileReader *reader,
220 const gint *field_indices,
221 guint n_field_indices)
222{
223 auto priv = GARROW_ORC_FILE_READER_GET_PRIVATE(reader);
224 if (priv->field_indices) {
225 g_array_free(priv->field_indices, TRUE);
226 }
227 if (n_field_indices == 0) {
228 priv->field_indices = NULL;
229 } else {
230 priv->field_indices = g_array_sized_new(FALSE,
231 FALSE,
232 sizeof(gint),
233 n_field_indices);
234 g_array_append_vals(priv->field_indices, field_indices, n_field_indices);
235 }
236}
237
238/**
239 * garrow_orc_file_reader_get_field_indexes:
240 * @reader: A #GArrowORCFileReader.
241 * @n_field_indexes: The number of the specified indexes.
242 *
243 * Returns: (nullable) (array length=n_field_indexes) (transfer none):
244 * The field indexes to be read.
245 *
246 * Since: 0.10.0
247 *
248 * Deprecated: 0.12.0:
249 * Use garrow_orc_file_reader_get_field_indices() instead.
250 */
251const gint *
252garrow_orc_file_reader_get_field_indexes(GArrowORCFileReader *reader,
253 guint *n_field_indexes)
254{
255 return garrow_orc_file_reader_get_field_indices(reader, n_field_indexes);
256}
257
258/**
259 * garrow_orc_file_reader_get_field_indices:
260 * @reader: A #GArrowORCFileReader.
261 * @n_field_indices: The number of the specified indices.
262 *
263 * Returns: (nullable) (array length=n_field_indices) (transfer none):
264 * The field indices to be read.
265 *
266 * Since: 0.12.0
267 */
268const gint *
269garrow_orc_file_reader_get_field_indices(GArrowORCFileReader *reader,
270 guint *n_field_indices)
271{
272 auto priv = GARROW_ORC_FILE_READER_GET_PRIVATE(reader);
273 if (priv->field_indices) {
274 *n_field_indices = priv->field_indices->len;
275 return reinterpret_cast<gint *>(priv->field_indices->data);
276 } else {
277 *n_field_indices = 0;
278 return NULL;
279 }
280}
281
282/**
283 * garrow_orc_file_reader_read_type:
284 * @reader: A #GArrowORCFileReader.
285 * @error: (nullable): Return location for a #GError or %NULL.
286 *
287 * Returns: (nullable) (transfer full): A newly read type as
288 * #GArrowSchema or %NULL on error.
289 *
290 * Since: 0.10.0
291 */
292GArrowSchema *
293garrow_orc_file_reader_read_type(GArrowORCFileReader *reader,
294 GError **error)
295{
296 auto arrow_reader = garrow_orc_file_reader_get_raw(reader);
297 auto arrow_schema_result = arrow_reader->ReadSchema();
298 if (garrow::check(error,
299 arrow_schema_result,
300 "[orc-file-reader][read-type]")) {
301 auto arrow_schema = *arrow_schema_result;
302 return garrow_schema_new_raw(&arrow_schema);
303 } else {
304 return NULL;
305 }
306}
307
308/**
309 * garrow_orc_file_reader_read_stripes:
310 * @reader: A #GArrowORCFileReader.
311 * @error: (nullable): Return location for a #GError or %NULL.
312 *
313 * Returns: (nullable) (transfer full): A newly read stripes as
314 * #GArrowTable or %NULL on error.
315 *
316 * Since: 0.10.0
317 */
318GArrowTable *
319garrow_orc_file_reader_read_stripes(GArrowORCFileReader *reader,
320 GError **error)
321{
322 auto arrow_reader = garrow_orc_file_reader_get_raw(reader);
323 auto priv = GARROW_ORC_FILE_READER_GET_PRIVATE(reader);
324 if (priv->field_indices) {
325 std::vector<int> arrow_field_indices;
326 auto field_indices = priv->field_indices;
327 for (guint i = 0; i < field_indices->len; ++i) {
328 arrow_field_indices.push_back(g_array_index(field_indices, gint, i));
329 }
330 auto arrow_table_result = arrow_reader->Read(arrow_field_indices);
331 if (garrow::check(error,
332 arrow_table_result,
333 "[orc-file-reader][read-stripes]")) {
334 auto arrow_table = *arrow_table_result;
335 return garrow_table_new_raw(&arrow_table);
336 } else {
337 return NULL;
338 }
339 } else {
340 auto arrow_table_result = arrow_reader->Read();
341 if (garrow::check(error,
342 arrow_table_result,
343 "[orc-file-reader][read-stripes]")) {
344 auto arrow_table = *arrow_table_result;
345 return garrow_table_new_raw(&arrow_table);
346 } else {
347 return NULL;
348 }
349 }
350}
351
352/**
353 * garrow_orc_file_reader_read_stripe:
354 * @reader: A #GArrowORCFileReader.
355 * @i: The stripe index to be read.
356 * @error: (nullable): Return location for a #GError or %NULL.
357 *
358 * Returns: (nullable) (transfer full): A newly read stripe as
359 * #GArrowRecordBatch or %NULL on error.
360 *
361 * Since: 0.10.0
362 */
363GArrowRecordBatch *
364garrow_orc_file_reader_read_stripe(GArrowORCFileReader *reader,
365 gint64 i,
366 GError **error)
367{
368 auto arrow_reader = garrow_orc_file_reader_get_raw(reader);
369 if (i < 0) {
370 i += arrow_reader->NumberOfStripes();
371 }
372 auto priv = GARROW_ORC_FILE_READER_GET_PRIVATE(reader);
373 if (priv->field_indices) {
374 std::vector<int> arrow_field_indices;
375 auto field_indices = priv->field_indices;
376 for (guint j = 0; j < field_indices->len; ++j) {
377 arrow_field_indices.push_back(g_array_index(field_indices, gint, j));
378 }
379 std::shared_ptr<arrow::RecordBatch> arrow_record_batch;
380 auto arrow_record_batch_result =
381 arrow_reader->ReadStripe(i, arrow_field_indices);
382 if (garrow::check(error,
383 arrow_record_batch_result,
384 "[orc-file-reader][read-stripe]")) {
385 auto arrow_record_batch = *arrow_record_batch_result;
386 return garrow_record_batch_new_raw(&arrow_record_batch);
387 } else {
388 return NULL;
389 }
390 } else {
391 auto arrow_record_batch_result = arrow_reader->ReadStripe(i);
392 if (garrow::check(error,
393 arrow_record_batch_result,
394 "[orc-file-reader][read-stripe]")) {
395 auto arrow_record_batch = *arrow_record_batch_result;
396 return garrow_record_batch_new_raw(&arrow_record_batch);
397 } else {
398 return NULL;
399 }
400 }
401}
402
403/**
404 * garrow_orc_file_reader_get_n_stripes:
405 * @reader: A #GArrowORCFileReader.
406 *
407 * Returns: The number of stripes in the file.
408 *
409 * Since: 0.10.0
410 */
411gint64
412garrow_orc_file_reader_get_n_stripes(GArrowORCFileReader *reader)
413{
414 auto arrow_reader = garrow_orc_file_reader_get_raw(reader);
415 return arrow_reader->NumberOfStripes();
416}
417
418/**
419 * garrow_orc_file_reader_get_n_rows:
420 * @reader: A #GArrowORCFileReader.
421 *
422 * Returns: The number of rows in the file.
423 *
424 * Since: 0.10.0
425 */
426gint64
427garrow_orc_file_reader_get_n_rows(GArrowORCFileReader *reader)
428{
429 auto arrow_reader = garrow_orc_file_reader_get_raw(reader);
430 return arrow_reader->NumberOfRows();
431}
432
433
434G_END_DECLS
435
436
437GArrowORCFileReader *
438garrow_orc_file_reader_new_raw(GArrowSeekableInputStream *input,
439 arrow::adapters::orc::ORCFileReader *arrow_reader)
440{
441 auto reader =
442 GARROW_ORC_FILE_READER(g_object_new(GARROW_TYPE_ORC_FILE_READER,
443 "input", input,
444 "orc-file-reader", arrow_reader,
445 NULL));
446 return reader;
447}
448
449arrow::adapters::orc::ORCFileReader *
450garrow_orc_file_reader_get_raw(GArrowORCFileReader *reader)
451{
452 auto priv = GARROW_ORC_FILE_READER_GET_PRIVATE(reader);
453 return priv->orc_file_reader;
454}