]> git.proxmox.com Git - ceph.git/blob - ceph/src/arrow/cpp/src/gandiva/tests/generate_data.h
import quincy 17.2.0
[ceph.git] / ceph / src / arrow / cpp / src / gandiva / tests / generate_data.h
1 // Licensed to the Apache Software Foundation (ASF) under one
2 // or more contributor license agreements. See the NOTICE file
3 // distributed with this work for additional information
4 // regarding copyright ownership. The ASF licenses this file
5 // to you under the Apache License, Version 2.0 (the
6 // "License"); you may not use this file except in compliance
7 // with the License. You may obtain a copy of the License at
8 //
9 // http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing,
12 // software distributed under the License is distributed on an
13 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, either express or implied. See the License for the
15 // specific language governing permissions and limitations
16 // under the License.
17
18 #include <stdlib.h>
19 #include <random>
20 #include <string>
21
22 #include "arrow/util/decimal.h"
23 #include "arrow/util/io_util.h"
24
25 #pragma once
26
27 namespace gandiva {
28
29 template <typename C_TYPE>
30 class DataGenerator {
31 public:
32 virtual ~DataGenerator() = default;
33
34 virtual C_TYPE GenerateData() = 0;
35 };
36
37 class Random {
38 public:
39 Random() : gen_(::arrow::internal::GetRandomSeed()) {}
40 explicit Random(uint64_t seed) : gen_(seed) {}
41
42 int32_t next() { return gen_(); }
43
44 private:
45 std::default_random_engine gen_;
46 };
47
48 class Int32DataGenerator : public DataGenerator<int32_t> {
49 public:
50 Int32DataGenerator() {}
51
52 int32_t GenerateData() { return random_.next(); }
53
54 protected:
55 Random random_;
56 };
57
58 class BoundedInt32DataGenerator : public Int32DataGenerator {
59 public:
60 explicit BoundedInt32DataGenerator(uint32_t upperBound)
61 : Int32DataGenerator(), upperBound_(upperBound) {}
62
63 int32_t GenerateData() {
64 int32_t value = (random_.next() % upperBound_);
65 return value;
66 }
67
68 protected:
69 uint32_t upperBound_;
70 };
71
72 class Int64DataGenerator : public DataGenerator<int64_t> {
73 public:
74 Int64DataGenerator() {}
75
76 int64_t GenerateData() { return random_.next(); }
77
78 protected:
79 Random random_;
80 };
81
82 class Decimal128DataGenerator : public DataGenerator<arrow::Decimal128> {
83 public:
84 explicit Decimal128DataGenerator(bool large) : large_(large) {}
85
86 arrow::Decimal128 GenerateData() {
87 uint64_t low = random_.next();
88 int64_t high = random_.next();
89 if (large_) {
90 high += (1ull << 62);
91 }
92 return arrow::Decimal128(high, low);
93 }
94
95 protected:
96 bool large_;
97 Random random_;
98 };
99
100 class FastUtf8DataGenerator : public DataGenerator<std::string> {
101 public:
102 explicit FastUtf8DataGenerator(int max_len) : max_len_(max_len), cur_char_('a') {}
103
104 std::string GenerateData() {
105 std::string generated_str;
106
107 int slen = random_.next() % max_len_;
108 for (int i = 0; i < slen; ++i) {
109 generated_str += generate_next_char();
110 }
111 return generated_str;
112 }
113
114 private:
115 char generate_next_char() {
116 ++cur_char_;
117 if (cur_char_ > 'z') {
118 cur_char_ = 'a';
119 }
120 return cur_char_;
121 }
122
123 Random random_;
124 unsigned int max_len_;
125 char cur_char_;
126 };
127
128 class Utf8IntDataGenerator : public DataGenerator<std::string> {
129 public:
130 Utf8IntDataGenerator() {}
131
132 std::string GenerateData() { return std::to_string(random_.next()); }
133
134 private:
135 Random random_;
136 };
137
138 class Utf8FloatDataGenerator : public DataGenerator<std::string> {
139 public:
140 Utf8FloatDataGenerator() {}
141
142 std::string GenerateData() {
143 return std::to_string(
144 static_cast<float>(random_.next()) /
145 static_cast<float>(RAND_MAX / 100)); // random float between 0.0 to 100.0
146 }
147
148 private:
149 Random random_;
150 };
151
152 } // namespace gandiva