1 // Licensed to the Apache Software Foundation (ASF) under one
2 // or more contributor license agreements. See the NOTICE file
3 // distributed with this work for additional information
4 // regarding copyright ownership. The ASF licenses this file
5 // to you under the Apache License, Version 2.0 (the
6 // "License"); you may not use this file except in compliance
7 // with the License. You may obtain a copy of the License at
9 // http://www.apache.org/licenses/LICENSE-2.0
11 // Unless required by applicable law or agreed to in writing,
12 // software distributed under the License is distributed on an
13 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, either express or implied. See the License for the
15 // specific language governing permissions and limitations
22 #include "arrow/util/decimal.h"
23 #include "arrow/util/io_util.h"
29 template <typename C_TYPE
>
32 virtual ~DataGenerator() = default;
34 virtual C_TYPE
GenerateData() = 0;
39 Random() : gen_(::arrow::internal::GetRandomSeed()) {}
40 explicit Random(uint64_t seed
) : gen_(seed
) {}
42 int32_t next() { return gen_(); }
45 std::default_random_engine gen_
;
48 class Int32DataGenerator
: public DataGenerator
<int32_t> {
50 Int32DataGenerator() {}
52 int32_t GenerateData() { return random_
.next(); }
58 class BoundedInt32DataGenerator
: public Int32DataGenerator
{
60 explicit BoundedInt32DataGenerator(uint32_t upperBound
)
61 : Int32DataGenerator(), upperBound_(upperBound
) {}
63 int32_t GenerateData() {
64 int32_t value
= (random_
.next() % upperBound_
);
72 class Int64DataGenerator
: public DataGenerator
<int64_t> {
74 Int64DataGenerator() {}
76 int64_t GenerateData() { return random_
.next(); }
82 class Decimal128DataGenerator
: public DataGenerator
<arrow::Decimal128
> {
84 explicit Decimal128DataGenerator(bool large
) : large_(large
) {}
86 arrow::Decimal128
GenerateData() {
87 uint64_t low
= random_
.next();
88 int64_t high
= random_
.next();
92 return arrow::Decimal128(high
, low
);
100 class FastUtf8DataGenerator
: public DataGenerator
<std::string
> {
102 explicit FastUtf8DataGenerator(int max_len
) : max_len_(max_len
), cur_char_('a') {}
104 std::string
GenerateData() {
105 std::string generated_str
;
107 int slen
= random_
.next() % max_len_
;
108 for (int i
= 0; i
< slen
; ++i
) {
109 generated_str
+= generate_next_char();
111 return generated_str
;
115 char generate_next_char() {
117 if (cur_char_
> 'z') {
124 unsigned int max_len_
;
128 class Utf8IntDataGenerator
: public DataGenerator
<std::string
> {
130 Utf8IntDataGenerator() {}
132 std::string
GenerateData() { return std::to_string(random_
.next()); }
138 class Utf8FloatDataGenerator
: public DataGenerator
<std::string
> {
140 Utf8FloatDataGenerator() {}
142 std::string
GenerateData() {
143 return std::to_string(
144 static_cast<float>(random_
.next()) /
145 static_cast<float>(RAND_MAX
/ 100)); // random float between 0.0 to 100.0
152 } // namespace gandiva