1 # Licensed to the Apache Software Foundation (ASF) under one
2 # or more contributor license agreements. See the NOTICE file
3 # distributed with this work for additional information
4 # regarding copyright ownership. The ASF licenses this file
5 # to you under the Apache License, Version 2.0 (the
6 # "License"); you may not use this file except in compliance
7 # with the License. You may obtain a copy of the License at
9 # http://www.apache.org/licenses/LICENSE-2.0
11 # Unless required by applicable law or agreed to in writing,
12 # software distributed under the License is distributed on an
13 # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 # KIND, either express or implied. See the License for the
15 # specific language governing permissions and limitations
21 $LOAD_PATH.unshift(File.expand_path("ext/arrow"))
22 $LOAD_PATH.unshift(File.expand_path("lib"))
27 state = ENV.fetch("FAKER_RANDOM_SEED", 17).to_i
28 Faker::Config.random = Random.new(state)
32 type = Arrow::DictionaryDataType.new(:int8, :string, true)
34 fields = n_columns.times.map {|i| ["column_#{i}".to_sym, type] }.to_h
35 schema = Arrow::Schema.new(**fields)
36 dictionary = Arrow::StringArray.new(
37 100.times.map { Faker::Book.genre }.uniq.sort
39 indices = Arrow::Int8Array.new(
41 Faker::Number.within(range: 0 ... dictionary.length)
44 arrays = n_columns.times.map do
45 Arrow::DictionaryArray.new(
51 record_batch = Arrow::RecordBatch.new(schema, n_rows, arrays)
53 def pure_ruby_raw_records(record_batch)
54 n_rows = record_batch.n_rows
55 n_columns = record_batch.n_columns
56 columns = record_batch.columns
63 record << columns[j].data.indices[i]
73 pure_ruby_raw_records(record_batch)
75 record_batch.raw_records