]> git.proxmox.com Git - ceph.git/blame - ceph/src/arrow/cpp/src/parquet/test_util.cc
import quincy 17.2.0
[ceph.git] / ceph / src / arrow / cpp / src / parquet / test_util.cc
CommitLineData
1d09f67e
TL
1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements. See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership. The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License. You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied. See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18// This module defines an abstract interface for iterating through pages in a
19// Parquet column chunk within a row group. It could be extended in the future
20// to iterate through all data pages in all chunks in a file.
21
22#include "parquet/test_util.h"
23
24#include <algorithm>
25#include <chrono>
26#include <limits>
27#include <memory>
28#include <random>
29#include <string>
30#include <utility>
31#include <vector>
32
33#include "parquet/column_page.h"
34#include "parquet/column_reader.h"
35#include "parquet/column_writer.h"
36#include "parquet/encoding.h"
37#include "parquet/platform.h"
38
39namespace parquet {
40namespace test {
41
42const char* get_data_dir() {
43 const auto result = std::getenv("PARQUET_TEST_DATA");
44 if (!result || !result[0]) {
45 throw ParquetTestException(
46 "Please point the PARQUET_TEST_DATA environment "
47 "variable to the test data directory");
48 }
49 return result;
50}
51
52std::string get_bad_data_dir() {
53 // PARQUET_TEST_DATA should point to ARROW_HOME/cpp/submodules/parquet-testing/data
54 // so need to reach one folder up to access the "bad_data" folder.
55 std::string data_dir(get_data_dir());
56 std::stringstream ss;
57 ss << data_dir << "/../bad_data";
58 return ss.str();
59}
60
61std::string get_data_file(const std::string& filename, bool is_good) {
62 std::stringstream ss;
63
64 if (is_good) {
65 ss << get_data_dir();
66 } else {
67 ss << get_bad_data_dir();
68 }
69
70 ss << "/" << filename;
71 return ss.str();
72}
73
74void random_bytes(int n, uint32_t seed, std::vector<uint8_t>* out) {
75 std::default_random_engine gen(seed);
76 std::uniform_int_distribution<int> d(0, 255);
77
78 out->resize(n);
79 for (int i = 0; i < n; ++i) {
80 (*out)[i] = static_cast<uint8_t>(d(gen));
81 }
82}
83
84void random_bools(int n, double p, uint32_t seed, bool* out) {
85 std::default_random_engine gen(seed);
86 std::bernoulli_distribution d(p);
87 for (int i = 0; i < n; ++i) {
88 out[i] = d(gen);
89 }
90}
91
92void random_Int96_numbers(int n, uint32_t seed, int32_t min_value, int32_t max_value,
93 Int96* out) {
94 std::default_random_engine gen(seed);
95 std::uniform_int_distribution<int32_t> d(min_value, max_value);
96 for (int i = 0; i < n; ++i) {
97 out[i].value[0] = d(gen);
98 out[i].value[1] = d(gen);
99 out[i].value[2] = d(gen);
100 }
101}
102
103void random_fixed_byte_array(int n, uint32_t seed, uint8_t* buf, int len, FLBA* out) {
104 std::default_random_engine gen(seed);
105 std::uniform_int_distribution<int> d(0, 255);
106 for (int i = 0; i < n; ++i) {
107 out[i].ptr = buf;
108 for (int j = 0; j < len; ++j) {
109 buf[j] = static_cast<uint8_t>(d(gen));
110 }
111 buf += len;
112 }
113}
114
115void random_byte_array(int n, uint32_t seed, uint8_t* buf, ByteArray* out, int min_size,
116 int max_size) {
117 std::default_random_engine gen(seed);
118 std::uniform_int_distribution<int> d1(min_size, max_size);
119 std::uniform_int_distribution<int> d2(0, 255);
120 for (int i = 0; i < n; ++i) {
121 int len = d1(gen);
122 out[i].len = len;
123 out[i].ptr = buf;
124 for (int j = 0; j < len; ++j) {
125 buf[j] = static_cast<uint8_t>(d2(gen));
126 }
127 buf += len;
128 }
129}
130
131void random_byte_array(int n, uint32_t seed, uint8_t* buf, ByteArray* out, int max_size) {
132 random_byte_array(n, seed, buf, out, 0, max_size);
133}
134
135} // namespace test
136} // namespace parquet