]>
git.proxmox.com Git - ceph.git/blob - ceph/src/arrow/cpp/src/parquet/test_util.cc
1 // Licensed to the Apache Software Foundation (ASF) under one
2 // or more contributor license agreements. See the NOTICE file
3 // distributed with this work for additional information
4 // regarding copyright ownership. The ASF licenses this file
5 // to you under the Apache License, Version 2.0 (the
6 // "License"); you may not use this file except in compliance
7 // with the License. You may obtain a copy of the License at
9 // http://www.apache.org/licenses/LICENSE-2.0
11 // Unless required by applicable law or agreed to in writing,
12 // software distributed under the License is distributed on an
13 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, either express or implied. See the License for the
15 // specific language governing permissions and limitations
18 // This module defines an abstract interface for iterating through pages in a
19 // Parquet column chunk within a row group. It could be extended in the future
20 // to iterate through all data pages in all chunks in a file.
22 #include "parquet/test_util.h"
33 #include "parquet/column_page.h"
34 #include "parquet/column_reader.h"
35 #include "parquet/column_writer.h"
36 #include "parquet/encoding.h"
37 #include "parquet/platform.h"
42 const char* get_data_dir() {
43 const auto result
= std::getenv("PARQUET_TEST_DATA");
44 if (!result
|| !result
[0]) {
45 throw ParquetTestException(
46 "Please point the PARQUET_TEST_DATA environment "
47 "variable to the test data directory");
52 std::string
get_bad_data_dir() {
53 // PARQUET_TEST_DATA should point to ARROW_HOME/cpp/submodules/parquet-testing/data
54 // so need to reach one folder up to access the "bad_data" folder.
55 std::string
data_dir(get_data_dir());
57 ss
<< data_dir
<< "/../bad_data";
61 std::string
get_data_file(const std::string
& filename
, bool is_good
) {
67 ss
<< get_bad_data_dir();
70 ss
<< "/" << filename
;
74 void random_bytes(int n
, uint32_t seed
, std::vector
<uint8_t>* out
) {
75 std::default_random_engine
gen(seed
);
76 std::uniform_int_distribution
<int> d(0, 255);
79 for (int i
= 0; i
< n
; ++i
) {
80 (*out
)[i
] = static_cast<uint8_t>(d(gen
));
84 void random_bools(int n
, double p
, uint32_t seed
, bool* out
) {
85 std::default_random_engine
gen(seed
);
86 std::bernoulli_distribution
d(p
);
87 for (int i
= 0; i
< n
; ++i
) {
92 void random_Int96_numbers(int n
, uint32_t seed
, int32_t min_value
, int32_t max_value
,
94 std::default_random_engine
gen(seed
);
95 std::uniform_int_distribution
<int32_t> d(min_value
, max_value
);
96 for (int i
= 0; i
< n
; ++i
) {
97 out
[i
].value
[0] = d(gen
);
98 out
[i
].value
[1] = d(gen
);
99 out
[i
].value
[2] = d(gen
);
103 void random_fixed_byte_array(int n
, uint32_t seed
, uint8_t* buf
, int len
, FLBA
* out
) {
104 std::default_random_engine
gen(seed
);
105 std::uniform_int_distribution
<int> d(0, 255);
106 for (int i
= 0; i
< n
; ++i
) {
108 for (int j
= 0; j
< len
; ++j
) {
109 buf
[j
] = static_cast<uint8_t>(d(gen
));
115 void random_byte_array(int n
, uint32_t seed
, uint8_t* buf
, ByteArray
* out
, int min_size
,
117 std::default_random_engine
gen(seed
);
118 std::uniform_int_distribution
<int> d1(min_size
, max_size
);
119 std::uniform_int_distribution
<int> d2(0, 255);
120 for (int i
= 0; i
< n
; ++i
) {
124 for (int j
= 0; j
< len
; ++j
) {
125 buf
[j
] = static_cast<uint8_t>(d2(gen
));
131 void random_byte_array(int n
, uint32_t seed
, uint8_t* buf
, ByteArray
* out
, int max_size
) {
132 random_byte_array(n
, seed
, buf
, out
, 0, max_size
);
136 } // namespace parquet