]>
Commit | Line | Data |
---|---|---|
1d09f67e TL |
1 | # Licensed to the Apache Software Foundation (ASF) under one |
2 | # or more contributor license agreements. See the NOTICE file | |
3 | # distributed with this work for additional information | |
4 | # regarding copyright ownership. The ASF licenses this file | |
5 | # to you under the Apache License, Version 2.0 (the | |
6 | # "License"); you may not use this file except in compliance | |
7 | # with the License. You may obtain a copy of the License at | |
8 | # | |
9 | # http://www.apache.org/licenses/LICENSE-2.0 | |
10 | # | |
11 | # Unless required by applicable law or agreed to in writing, | |
12 | # software distributed under the License is distributed on an | |
13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | |
14 | # KIND, either express or implied. See the License for the | |
15 | # specific language governing permissions and limitations | |
16 | # under the License. | |
17 | ||
18 | # distutils: language = c++ | |
19 | # cython: language_level = 3 | |
20 | ||
21 | from pyarrow.includes.common cimport * | |
22 | from pyarrow.includes.libarrow cimport (CChunkedArray, CSchema, CStatus, | |
23 | CTable, CMemoryPool, CBuffer, | |
24 | CKeyValueMetadata, | |
25 | CRandomAccessFile, COutputStream, | |
26 | TimeUnit, CRecordBatchReader) | |
27 | from pyarrow.lib cimport _Weakrefable | |
28 | ||
29 | ||
30 | cdef extern from "parquet/api/schema.h" namespace "parquet::schema" nogil: | |
31 | cdef cppclass Node: | |
32 | pass | |
33 | ||
34 | cdef cppclass GroupNode(Node): | |
35 | pass | |
36 | ||
37 | cdef cppclass PrimitiveNode(Node): | |
38 | pass | |
39 | ||
40 | cdef cppclass ColumnPath: | |
41 | c_string ToDotString() | |
42 | vector[c_string] ToDotVector() | |
43 | ||
44 | ||
45 | cdef extern from "parquet/api/schema.h" namespace "parquet" nogil: | |
46 | enum ParquetType" parquet::Type::type": | |
47 | ParquetType_BOOLEAN" parquet::Type::BOOLEAN" | |
48 | ParquetType_INT32" parquet::Type::INT32" | |
49 | ParquetType_INT64" parquet::Type::INT64" | |
50 | ParquetType_INT96" parquet::Type::INT96" | |
51 | ParquetType_FLOAT" parquet::Type::FLOAT" | |
52 | ParquetType_DOUBLE" parquet::Type::DOUBLE" | |
53 | ParquetType_BYTE_ARRAY" parquet::Type::BYTE_ARRAY" | |
54 | ParquetType_FIXED_LEN_BYTE_ARRAY" parquet::Type::FIXED_LEN_BYTE_ARRAY" | |
55 | ||
56 | enum ParquetLogicalTypeId" parquet::LogicalType::Type::type": | |
57 | ParquetLogicalType_UNDEFINED" parquet::LogicalType::Type::UNDEFINED" | |
58 | ParquetLogicalType_STRING" parquet::LogicalType::Type::STRING" | |
59 | ParquetLogicalType_MAP" parquet::LogicalType::Type::MAP" | |
60 | ParquetLogicalType_LIST" parquet::LogicalType::Type::LIST" | |
61 | ParquetLogicalType_ENUM" parquet::LogicalType::Type::ENUM" | |
62 | ParquetLogicalType_DECIMAL" parquet::LogicalType::Type::DECIMAL" | |
63 | ParquetLogicalType_DATE" parquet::LogicalType::Type::DATE" | |
64 | ParquetLogicalType_TIME" parquet::LogicalType::Type::TIME" | |
65 | ParquetLogicalType_TIMESTAMP" parquet::LogicalType::Type::TIMESTAMP" | |
66 | ParquetLogicalType_INT" parquet::LogicalType::Type::INT" | |
67 | ParquetLogicalType_JSON" parquet::LogicalType::Type::JSON" | |
68 | ParquetLogicalType_BSON" parquet::LogicalType::Type::BSON" | |
69 | ParquetLogicalType_UUID" parquet::LogicalType::Type::UUID" | |
70 | ParquetLogicalType_NONE" parquet::LogicalType::Type::NONE" | |
71 | ||
72 | enum ParquetTimeUnit" parquet::LogicalType::TimeUnit::unit": | |
73 | ParquetTimeUnit_UNKNOWN" parquet::LogicalType::TimeUnit::UNKNOWN" | |
74 | ParquetTimeUnit_MILLIS" parquet::LogicalType::TimeUnit::MILLIS" | |
75 | ParquetTimeUnit_MICROS" parquet::LogicalType::TimeUnit::MICROS" | |
76 | ParquetTimeUnit_NANOS" parquet::LogicalType::TimeUnit::NANOS" | |
77 | ||
78 | enum ParquetConvertedType" parquet::ConvertedType::type": | |
79 | ParquetConvertedType_NONE" parquet::ConvertedType::NONE" | |
80 | ParquetConvertedType_UTF8" parquet::ConvertedType::UTF8" | |
81 | ParquetConvertedType_MAP" parquet::ConvertedType::MAP" | |
82 | ParquetConvertedType_MAP_KEY_VALUE \ | |
83 | " parquet::ConvertedType::MAP_KEY_VALUE" | |
84 | ParquetConvertedType_LIST" parquet::ConvertedType::LIST" | |
85 | ParquetConvertedType_ENUM" parquet::ConvertedType::ENUM" | |
86 | ParquetConvertedType_DECIMAL" parquet::ConvertedType::DECIMAL" | |
87 | ParquetConvertedType_DATE" parquet::ConvertedType::DATE" | |
88 | ParquetConvertedType_TIME_MILLIS" parquet::ConvertedType::TIME_MILLIS" | |
89 | ParquetConvertedType_TIME_MICROS" parquet::ConvertedType::TIME_MICROS" | |
90 | ParquetConvertedType_TIMESTAMP_MILLIS \ | |
91 | " parquet::ConvertedType::TIMESTAMP_MILLIS" | |
92 | ParquetConvertedType_TIMESTAMP_MICROS \ | |
93 | " parquet::ConvertedType::TIMESTAMP_MICROS" | |
94 | ParquetConvertedType_UINT_8" parquet::ConvertedType::UINT_8" | |
95 | ParquetConvertedType_UINT_16" parquet::ConvertedType::UINT_16" | |
96 | ParquetConvertedType_UINT_32" parquet::ConvertedType::UINT_32" | |
97 | ParquetConvertedType_UINT_64" parquet::ConvertedType::UINT_64" | |
98 | ParquetConvertedType_INT_8" parquet::ConvertedType::INT_8" | |
99 | ParquetConvertedType_INT_16" parquet::ConvertedType::INT_16" | |
100 | ParquetConvertedType_INT_32" parquet::ConvertedType::INT_32" | |
101 | ParquetConvertedType_INT_64" parquet::ConvertedType::INT_64" | |
102 | ParquetConvertedType_JSON" parquet::ConvertedType::JSON" | |
103 | ParquetConvertedType_BSON" parquet::ConvertedType::BSON" | |
104 | ParquetConvertedType_INTERVAL" parquet::ConvertedType::INTERVAL" | |
105 | ||
106 | enum ParquetRepetition" parquet::Repetition::type": | |
107 | ParquetRepetition_REQUIRED" parquet::REPETITION::REQUIRED" | |
108 | ParquetRepetition_OPTIONAL" parquet::REPETITION::OPTIONAL" | |
109 | ParquetRepetition_REPEATED" parquet::REPETITION::REPEATED" | |
110 | ||
111 | enum ParquetEncoding" parquet::Encoding::type": | |
112 | ParquetEncoding_PLAIN" parquet::Encoding::PLAIN" | |
113 | ParquetEncoding_PLAIN_DICTIONARY" parquet::Encoding::PLAIN_DICTIONARY" | |
114 | ParquetEncoding_RLE" parquet::Encoding::RLE" | |
115 | ParquetEncoding_BIT_PACKED" parquet::Encoding::BIT_PACKED" | |
116 | ParquetEncoding_DELTA_BINARY_PACKED \ | |
117 | " parquet::Encoding::DELTA_BINARY_PACKED" | |
118 | ParquetEncoding_DELTA_LENGTH_BYTE_ARRAY \ | |
119 | " parquet::Encoding::DELTA_LENGTH_BYTE_ARRAY" | |
120 | ParquetEncoding_DELTA_BYTE_ARRAY" parquet::Encoding::DELTA_BYTE_ARRAY" | |
121 | ParquetEncoding_RLE_DICTIONARY" parquet::Encoding::RLE_DICTIONARY" | |
122 | ParquetEncoding_BYTE_STREAM_SPLIT \ | |
123 | " parquet::Encoding::BYTE_STREAM_SPLIT" | |
124 | ||
125 | enum ParquetCompression" parquet::Compression::type": | |
126 | ParquetCompression_UNCOMPRESSED" parquet::Compression::UNCOMPRESSED" | |
127 | ParquetCompression_SNAPPY" parquet::Compression::SNAPPY" | |
128 | ParquetCompression_GZIP" parquet::Compression::GZIP" | |
129 | ParquetCompression_LZO" parquet::Compression::LZO" | |
130 | ParquetCompression_BROTLI" parquet::Compression::BROTLI" | |
131 | ParquetCompression_LZ4" parquet::Compression::LZ4" | |
132 | ParquetCompression_ZSTD" parquet::Compression::ZSTD" | |
133 | ||
134 | enum ParquetVersion" parquet::ParquetVersion::type": | |
135 | ParquetVersion_V1" parquet::ParquetVersion::PARQUET_1_0" | |
136 | ParquetVersion_V2_0" parquet::ParquetVersion::PARQUET_2_0" | |
137 | ParquetVersion_V2_4" parquet::ParquetVersion::PARQUET_2_4" | |
138 | ParquetVersion_V2_6" parquet::ParquetVersion::PARQUET_2_6" | |
139 | ||
140 | enum ParquetSortOrder" parquet::SortOrder::type": | |
141 | ParquetSortOrder_SIGNED" parquet::SortOrder::SIGNED" | |
142 | ParquetSortOrder_UNSIGNED" parquet::SortOrder::UNSIGNED" | |
143 | ParquetSortOrder_UNKNOWN" parquet::SortOrder::UNKNOWN" | |
144 | ||
145 | cdef cppclass CParquetLogicalType" parquet::LogicalType": | |
146 | c_string ToString() const | |
147 | c_string ToJSON() const | |
148 | ParquetLogicalTypeId type() const | |
149 | ||
150 | cdef cppclass CParquetDecimalType \ | |
151 | " parquet::DecimalLogicalType"(CParquetLogicalType): | |
152 | int32_t precision() const | |
153 | int32_t scale() const | |
154 | ||
155 | cdef cppclass CParquetIntType \ | |
156 | " parquet::IntLogicalType"(CParquetLogicalType): | |
157 | int bit_width() const | |
158 | c_bool is_signed() const | |
159 | ||
160 | cdef cppclass CParquetTimeType \ | |
161 | " parquet::TimeLogicalType"(CParquetLogicalType): | |
162 | c_bool is_adjusted_to_utc() const | |
163 | ParquetTimeUnit time_unit() const | |
164 | ||
165 | cdef cppclass CParquetTimestampType \ | |
166 | " parquet::TimestampLogicalType"(CParquetLogicalType): | |
167 | c_bool is_adjusted_to_utc() const | |
168 | ParquetTimeUnit time_unit() const | |
169 | ||
170 | cdef cppclass ColumnDescriptor" parquet::ColumnDescriptor": | |
171 | c_bool Equals(const ColumnDescriptor& other) | |
172 | ||
173 | shared_ptr[ColumnPath] path() | |
174 | int16_t max_definition_level() | |
175 | int16_t max_repetition_level() | |
176 | ||
177 | ParquetType physical_type() | |
178 | const shared_ptr[const CParquetLogicalType]& logical_type() | |
179 | ParquetConvertedType converted_type() | |
180 | const c_string& name() | |
181 | int type_length() | |
182 | int type_precision() | |
183 | int type_scale() | |
184 | ||
185 | cdef cppclass SchemaDescriptor: | |
186 | const ColumnDescriptor* Column(int i) | |
187 | shared_ptr[Node] schema() | |
188 | GroupNode* group() | |
189 | c_bool Equals(const SchemaDescriptor& other) | |
190 | c_string ToString() | |
191 | int num_columns() | |
192 | ||
193 | cdef c_string FormatStatValue(ParquetType parquet_type, c_string val) | |
194 | ||
195 | ||
196 | cdef extern from "parquet/api/reader.h" namespace "parquet" nogil: | |
197 | cdef cppclass ColumnReader: | |
198 | pass | |
199 | ||
200 | cdef cppclass BoolReader(ColumnReader): | |
201 | pass | |
202 | ||
203 | cdef cppclass Int32Reader(ColumnReader): | |
204 | pass | |
205 | ||
206 | cdef cppclass Int64Reader(ColumnReader): | |
207 | pass | |
208 | ||
209 | cdef cppclass Int96Reader(ColumnReader): | |
210 | pass | |
211 | ||
212 | cdef cppclass FloatReader(ColumnReader): | |
213 | pass | |
214 | ||
215 | cdef cppclass DoubleReader(ColumnReader): | |
216 | pass | |
217 | ||
218 | cdef cppclass ByteArrayReader(ColumnReader): | |
219 | pass | |
220 | ||
221 | cdef cppclass RowGroupReader: | |
222 | pass | |
223 | ||
224 | cdef cppclass CEncodedStatistics" parquet::EncodedStatistics": | |
225 | const c_string& max() const | |
226 | const c_string& min() const | |
227 | int64_t null_count | |
228 | int64_t distinct_count | |
229 | bint has_min | |
230 | bint has_max | |
231 | bint has_null_count | |
232 | bint has_distinct_count | |
233 | ||
234 | cdef cppclass ParquetByteArray" parquet::ByteArray": | |
235 | uint32_t len | |
236 | const uint8_t* ptr | |
237 | ||
238 | cdef cppclass ParquetFLBA" parquet::FLBA": | |
239 | const uint8_t* ptr | |
240 | ||
241 | cdef cppclass CStatistics" parquet::Statistics": | |
242 | int64_t null_count() const | |
243 | int64_t distinct_count() const | |
244 | int64_t num_values() const | |
245 | bint HasMinMax() | |
246 | bint HasNullCount() | |
247 | bint HasDistinctCount() | |
248 | c_bool Equals(const CStatistics&) const | |
249 | void Reset() | |
250 | c_string EncodeMin() | |
251 | c_string EncodeMax() | |
252 | CEncodedStatistics Encode() | |
253 | void SetComparator() | |
254 | ParquetType physical_type() const | |
255 | const ColumnDescriptor* descr() const | |
256 | ||
257 | cdef cppclass CBoolStatistics" parquet::BoolStatistics"(CStatistics): | |
258 | c_bool min() | |
259 | c_bool max() | |
260 | ||
261 | cdef cppclass CInt32Statistics" parquet::Int32Statistics"(CStatistics): | |
262 | int32_t min() | |
263 | int32_t max() | |
264 | ||
265 | cdef cppclass CInt64Statistics" parquet::Int64Statistics"(CStatistics): | |
266 | int64_t min() | |
267 | int64_t max() | |
268 | ||
269 | cdef cppclass CFloatStatistics" parquet::FloatStatistics"(CStatistics): | |
270 | float min() | |
271 | float max() | |
272 | ||
273 | cdef cppclass CDoubleStatistics" parquet::DoubleStatistics"(CStatistics): | |
274 | double min() | |
275 | double max() | |
276 | ||
277 | cdef cppclass CByteArrayStatistics \ | |
278 | " parquet::ByteArrayStatistics"(CStatistics): | |
279 | ParquetByteArray min() | |
280 | ParquetByteArray max() | |
281 | ||
282 | cdef cppclass CFLBAStatistics" parquet::FLBAStatistics"(CStatistics): | |
283 | ParquetFLBA min() | |
284 | ParquetFLBA max() | |
285 | ||
286 | cdef cppclass CColumnChunkMetaData" parquet::ColumnChunkMetaData": | |
287 | int64_t file_offset() const | |
288 | const c_string& file_path() const | |
289 | ||
290 | ParquetType type() const | |
291 | int64_t num_values() const | |
292 | shared_ptr[ColumnPath] path_in_schema() const | |
293 | bint is_stats_set() const | |
294 | shared_ptr[CStatistics] statistics() const | |
295 | ParquetCompression compression() const | |
296 | const vector[ParquetEncoding]& encodings() const | |
297 | c_bool Equals(const CColumnChunkMetaData&) const | |
298 | ||
299 | int64_t has_dictionary_page() const | |
300 | int64_t dictionary_page_offset() const | |
301 | int64_t data_page_offset() const | |
302 | int64_t index_page_offset() const | |
303 | int64_t total_compressed_size() const | |
304 | int64_t total_uncompressed_size() const | |
305 | ||
306 | cdef cppclass CRowGroupMetaData" parquet::RowGroupMetaData": | |
307 | c_bool Equals(const CRowGroupMetaData&) const | |
308 | int num_columns() | |
309 | int64_t num_rows() | |
310 | int64_t total_byte_size() | |
311 | unique_ptr[CColumnChunkMetaData] ColumnChunk(int i) const | |
312 | ||
313 | cdef cppclass CFileMetaData" parquet::FileMetaData": | |
314 | c_bool Equals(const CFileMetaData&) const | |
315 | uint32_t size() | |
316 | int num_columns() | |
317 | int64_t num_rows() | |
318 | int num_row_groups() | |
319 | ParquetVersion version() | |
320 | const c_string created_by() | |
321 | int num_schema_elements() | |
322 | ||
323 | void set_file_path(const c_string& path) | |
324 | void AppendRowGroups(const CFileMetaData& other) except + | |
325 | ||
326 | unique_ptr[CRowGroupMetaData] RowGroup(int i) | |
327 | const SchemaDescriptor* schema() | |
328 | shared_ptr[const CKeyValueMetadata] key_value_metadata() const | |
329 | void WriteTo(COutputStream* dst) const | |
330 | ||
331 | cdef shared_ptr[CFileMetaData] CFileMetaData_Make \ | |
332 | " parquet::FileMetaData::Make"(const void* serialized_metadata, | |
333 | uint32_t* metadata_len) | |
334 | ||
335 | cdef cppclass CReaderProperties" parquet::ReaderProperties": | |
336 | c_bool is_buffered_stream_enabled() const | |
337 | void enable_buffered_stream() | |
338 | void disable_buffered_stream() | |
339 | void set_buffer_size(int64_t buf_size) | |
340 | int64_t buffer_size() const | |
341 | ||
342 | CReaderProperties default_reader_properties() | |
343 | ||
344 | cdef cppclass ArrowReaderProperties: | |
345 | ArrowReaderProperties() | |
346 | void set_read_dictionary(int column_index, c_bool read_dict) | |
347 | c_bool read_dictionary() | |
348 | void set_batch_size(int64_t batch_size) | |
349 | int64_t batch_size() | |
350 | void set_pre_buffer(c_bool pre_buffer) | |
351 | c_bool pre_buffer() const | |
352 | void set_coerce_int96_timestamp_unit(TimeUnit unit) | |
353 | TimeUnit coerce_int96_timestamp_unit() const | |
354 | ||
355 | ArrowReaderProperties default_arrow_reader_properties() | |
356 | ||
357 | cdef cppclass ParquetFileReader: | |
358 | shared_ptr[CFileMetaData] metadata() | |
359 | ||
360 | ||
361 | cdef extern from "parquet/api/writer.h" namespace "parquet" nogil: | |
362 | cdef cppclass WriterProperties: | |
363 | cppclass Builder: | |
364 | Builder* data_page_version(ParquetDataPageVersion version) | |
365 | Builder* version(ParquetVersion version) | |
366 | Builder* compression(ParquetCompression codec) | |
367 | Builder* compression(const c_string& path, | |
368 | ParquetCompression codec) | |
369 | Builder* compression_level(int compression_level) | |
370 | Builder* compression_level(const c_string& path, | |
371 | int compression_level) | |
372 | Builder* disable_dictionary() | |
373 | Builder* enable_dictionary() | |
374 | Builder* enable_dictionary(const c_string& path) | |
375 | Builder* disable_statistics() | |
376 | Builder* enable_statistics() | |
377 | Builder* enable_statistics(const c_string& path) | |
378 | Builder* data_pagesize(int64_t size) | |
379 | Builder* encoding(ParquetEncoding encoding) | |
380 | Builder* encoding(const c_string& path, | |
381 | ParquetEncoding encoding) | |
382 | Builder* write_batch_size(int64_t batch_size) | |
383 | shared_ptr[WriterProperties] build() | |
384 | ||
385 | cdef cppclass ArrowWriterProperties: | |
386 | cppclass Builder: | |
387 | Builder() | |
388 | Builder* disable_deprecated_int96_timestamps() | |
389 | Builder* enable_deprecated_int96_timestamps() | |
390 | Builder* coerce_timestamps(TimeUnit unit) | |
391 | Builder* allow_truncated_timestamps() | |
392 | Builder* disallow_truncated_timestamps() | |
393 | Builder* store_schema() | |
394 | Builder* enable_compliant_nested_types() | |
395 | Builder* disable_compliant_nested_types() | |
396 | Builder* set_engine_version(ArrowWriterEngineVersion version) | |
397 | shared_ptr[ArrowWriterProperties] build() | |
398 | c_bool support_deprecated_int96_timestamps() | |
399 | ||
400 | ||
401 | cdef extern from "parquet/arrow/reader.h" namespace "parquet::arrow" nogil: | |
402 | cdef cppclass FileReader: | |
403 | FileReader(CMemoryPool* pool, unique_ptr[ParquetFileReader] reader) | |
404 | ||
405 | CStatus GetSchema(shared_ptr[CSchema]* out) | |
406 | ||
407 | CStatus ReadColumn(int i, shared_ptr[CChunkedArray]* out) | |
408 | CStatus ReadSchemaField(int i, shared_ptr[CChunkedArray]* out) | |
409 | ||
410 | int num_row_groups() | |
411 | CStatus ReadRowGroup(int i, shared_ptr[CTable]* out) | |
412 | CStatus ReadRowGroup(int i, const vector[int]& column_indices, | |
413 | shared_ptr[CTable]* out) | |
414 | ||
415 | CStatus ReadRowGroups(const vector[int]& row_groups, | |
416 | shared_ptr[CTable]* out) | |
417 | CStatus ReadRowGroups(const vector[int]& row_groups, | |
418 | const vector[int]& column_indices, | |
419 | shared_ptr[CTable]* out) | |
420 | ||
421 | CStatus GetRecordBatchReader(const vector[int]& row_group_indices, | |
422 | const vector[int]& column_indices, | |
423 | unique_ptr[CRecordBatchReader]* out) | |
424 | CStatus GetRecordBatchReader(const vector[int]& row_group_indices, | |
425 | unique_ptr[CRecordBatchReader]* out) | |
426 | ||
427 | CStatus ReadTable(shared_ptr[CTable]* out) | |
428 | CStatus ReadTable(const vector[int]& column_indices, | |
429 | shared_ptr[CTable]* out) | |
430 | ||
431 | CStatus ScanContents(vector[int] columns, int32_t column_batch_size, | |
432 | int64_t* num_rows) | |
433 | ||
434 | const ParquetFileReader* parquet_reader() | |
435 | ||
436 | void set_use_threads(c_bool use_threads) | |
437 | ||
438 | void set_batch_size(int64_t batch_size) | |
439 | ||
440 | cdef cppclass FileReaderBuilder: | |
441 | FileReaderBuilder() | |
442 | CStatus Open(const shared_ptr[CRandomAccessFile]& file, | |
443 | const CReaderProperties& properties, | |
444 | const shared_ptr[CFileMetaData]& metadata) | |
445 | ||
446 | ParquetFileReader* raw_reader() | |
447 | FileReaderBuilder* memory_pool(CMemoryPool*) | |
448 | FileReaderBuilder* properties(const ArrowReaderProperties&) | |
449 | CStatus Build(unique_ptr[FileReader]* out) | |
450 | ||
451 | CStatus FromParquetSchema( | |
452 | const SchemaDescriptor* parquet_schema, | |
453 | const ArrowReaderProperties& properties, | |
454 | const shared_ptr[const CKeyValueMetadata]& key_value_metadata, | |
455 | shared_ptr[CSchema]* out) | |
456 | ||
457 | cdef extern from "parquet/arrow/schema.h" namespace "parquet::arrow" nogil: | |
458 | ||
459 | CStatus ToParquetSchema( | |
460 | const CSchema* arrow_schema, | |
461 | const ArrowReaderProperties& properties, | |
462 | const shared_ptr[const CKeyValueMetadata]& key_value_metadata, | |
463 | shared_ptr[SchemaDescriptor]* out) | |
464 | ||
465 | ||
466 | cdef extern from "parquet/properties.h" namespace "parquet" nogil: | |
467 | cdef enum ArrowWriterEngineVersion: | |
468 | V1 "parquet::ArrowWriterProperties::V1", | |
469 | V2 "parquet::ArrowWriterProperties::V2" | |
470 | ||
471 | cdef cppclass ParquetDataPageVersion: | |
472 | pass | |
473 | ||
474 | cdef ParquetDataPageVersion ParquetDataPageVersion_V1 \ | |
475 | " parquet::ParquetDataPageVersion::V1" | |
476 | cdef ParquetDataPageVersion ParquetDataPageVersion_V2 \ | |
477 | " parquet::ParquetDataPageVersion::V2" | |
478 | ||
479 | cdef extern from "parquet/arrow/writer.h" namespace "parquet::arrow" nogil: | |
480 | cdef cppclass FileWriter: | |
481 | ||
482 | @staticmethod | |
483 | CStatus Open(const CSchema& schema, CMemoryPool* pool, | |
484 | const shared_ptr[COutputStream]& sink, | |
485 | const shared_ptr[WriterProperties]& properties, | |
486 | const shared_ptr[ArrowWriterProperties]& arrow_properties, | |
487 | unique_ptr[FileWriter]* writer) | |
488 | ||
489 | CStatus WriteTable(const CTable& table, int64_t chunk_size) | |
490 | CStatus NewRowGroup(int64_t chunk_size) | |
491 | CStatus Close() | |
492 | ||
493 | const shared_ptr[CFileMetaData] metadata() const | |
494 | ||
495 | CStatus WriteMetaDataFile( | |
496 | const CFileMetaData& file_metadata, | |
497 | const COutputStream* sink) | |
498 | ||
499 | ||
500 | cdef shared_ptr[WriterProperties] _create_writer_properties( | |
501 | use_dictionary=*, | |
502 | compression=*, | |
503 | version=*, | |
504 | write_statistics=*, | |
505 | data_page_size=*, | |
506 | compression_level=*, | |
507 | use_byte_stream_split=*, | |
508 | data_page_version=*) except * | |
509 | ||
510 | ||
511 | cdef shared_ptr[ArrowWriterProperties] _create_arrow_writer_properties( | |
512 | use_deprecated_int96_timestamps=*, | |
513 | coerce_timestamps=*, | |
514 | allow_truncated_timestamps=*, | |
515 | writer_engine_version=*, | |
516 | use_compliant_nested_type=*) except * | |
517 | ||
518 | cdef class ParquetSchema(_Weakrefable): | |
519 | cdef: | |
520 | FileMetaData parent # the FileMetaData owning the SchemaDescriptor | |
521 | const SchemaDescriptor* schema | |
522 | ||
523 | cdef class FileMetaData(_Weakrefable): | |
524 | cdef: | |
525 | shared_ptr[CFileMetaData] sp_metadata | |
526 | CFileMetaData* _metadata | |
527 | ParquetSchema _schema | |
528 | ||
529 | cdef inline init(self, const shared_ptr[CFileMetaData]& metadata): | |
530 | self.sp_metadata = metadata | |
531 | self._metadata = metadata.get() | |
532 | ||
533 | cdef class RowGroupMetaData(_Weakrefable): | |
534 | cdef: | |
535 | int index # for pickling support | |
536 | unique_ptr[CRowGroupMetaData] up_metadata | |
537 | CRowGroupMetaData* metadata | |
538 | FileMetaData parent | |
539 | ||
540 | cdef class ColumnChunkMetaData(_Weakrefable): | |
541 | cdef: | |
542 | unique_ptr[CColumnChunkMetaData] up_metadata | |
543 | CColumnChunkMetaData* metadata | |
544 | RowGroupMetaData parent | |
545 | ||
546 | cdef inline init(self, RowGroupMetaData parent, int i): | |
547 | self.up_metadata = parent.metadata.ColumnChunk(i) | |
548 | self.metadata = self.up_metadata.get() | |
549 | self.parent = parent | |
550 | ||
551 | cdef class Statistics(_Weakrefable): | |
552 | cdef: | |
553 | shared_ptr[CStatistics] statistics | |
554 | ColumnChunkMetaData parent | |
555 | ||
556 | cdef inline init(self, const shared_ptr[CStatistics]& statistics, | |
557 | ColumnChunkMetaData parent): | |
558 | self.statistics = statistics | |
559 | self.parent = parent |