]>
Commit | Line | Data |
---|---|---|
1d09f67e TL |
1 | # Licensed to the Apache Software Foundation (ASF) under one |
2 | # or more contributor license agreements. See the NOTICE file | |
3 | # distributed with this work for additional information | |
4 | # regarding copyright ownership. The ASF licenses this file | |
5 | # to you under the Apache License, Version 2.0 (the | |
6 | # "License"); you may not use this file except in compliance | |
7 | # with the License. You may obtain a copy of the License at | |
8 | # | |
9 | # http://www.apache.org/licenses/LICENSE-2.0 | |
10 | # | |
11 | # Unless required by applicable law or agreed to in writing, | |
12 | # software distributed under the License is distributed on an | |
13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | |
14 | # KIND, either express or implied. See the License for the | |
15 | # specific language governing permissions and limitations | |
16 | # under the License. | |
17 | ||
18 | contexts: | |
19 | - name: master | |
20 | prelude: | | |
21 | $LOAD_PATH.unshift(File.expand_path("ext/arrow")) | |
22 | $LOAD_PATH.unshift(File.expand_path("lib")) | |
23 | prelude: |- | |
24 | require "arrow" | |
25 | require "faker" | |
26 | ||
27 | state = ENV.fetch("FAKER_RANDOM_SEED", 17).to_i | |
28 | Faker::Config.random = Random.new(state) | |
29 | ||
30 | n_rows = 1000 | |
31 | n_columns = 10 | |
32 | type = Arrow::TimestampDataType.new(:micro) | |
33 | base_timestamp = Time.at(Faker::Number.within(range: 0 ... 1_000_000_000)) | |
34 | thirty_days_in_sec = 30*24*3600 | |
35 | timestamp_range = { | |
36 | from: base_timestamp - thirty_days_in_sec, | |
37 | to: base_timestamp + thirty_days_in_sec, | |
38 | } | |
39 | ||
40 | fields = {} | |
41 | arrays = {} | |
42 | n_columns.times do |i| | |
43 | column_name = "column_#{i}" | |
44 | fields[column_name] = type | |
45 | arrays[column_name] = n_rows.times.map do | |
46 | sec = Faker::Time.between(timestamp_range).to_i | |
47 | micro = Faker::Number.within(range: 0 ... 1_000_000) | |
48 | sec * 1_000_000 + micro | |
49 | end | |
50 | end | |
51 | record_batch = Arrow::RecordBatch.new(fields, arrays) | |
52 | ||
53 | def pure_ruby_raw_records(record_batch) | |
54 | n_rows = record_batch.n_rows | |
55 | n_columns = record_batch.n_columns | |
56 | columns = record_batch.columns | |
57 | records = [] | |
58 | i = 0 | |
59 | while i < n_rows | |
60 | record = [] | |
61 | j = 0 | |
62 | while j < n_columns | |
63 | record << columns[j][i] | |
64 | j += 1 | |
65 | end | |
66 | records << record | |
67 | i += 1 | |
68 | end | |
69 | records | |
70 | end | |
71 | benchmark: | |
72 | pure_ruby: |- | |
73 | pure_ruby_raw_records(record_batch) | |
74 | raw_records: |- | |
75 | record_batch.raw_records |