]>
Commit | Line | Data |
---|---|---|
1d09f67e TL |
1 | /* |
2 | * Licensed to the Apache Software Foundation (ASF) under one or more | |
3 | * contributor license agreements. See the NOTICE file distributed with | |
4 | * this work for additional information regarding copyright ownership. | |
5 | * The ASF licenses this file to You under the Apache License, Version 2.0 | |
6 | * (the "License"); you may not use this file except in compliance with | |
7 | * the License. You may obtain a copy of the License at | |
8 | * | |
9 | * http://www.apache.org/licenses/LICENSE-2.0 | |
10 | * | |
11 | * Unless required by applicable law or agreed to in writing, software | |
12 | * distributed under the License is distributed on an "AS IS" BASIS, | |
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
14 | * See the License for the specific language governing permissions and | |
15 | * limitations under the License. | |
16 | */ | |
17 | ||
18 | package org.apache.arrow.algorithm.sort; | |
19 | ||
20 | import static org.junit.jupiter.api.Assertions.assertEquals; | |
21 | ||
22 | import java.lang.reflect.Array; | |
23 | import java.util.Arrays; | |
24 | import java.util.Random; | |
25 | import java.util.function.BiConsumer; | |
26 | import java.util.function.Supplier; | |
27 | ||
28 | import org.apache.arrow.vector.BigIntVector; | |
29 | import org.apache.arrow.vector.Float4Vector; | |
30 | import org.apache.arrow.vector.Float8Vector; | |
31 | import org.apache.arrow.vector.IntVector; | |
32 | import org.apache.arrow.vector.SmallIntVector; | |
33 | import org.apache.arrow.vector.TinyIntVector; | |
34 | import org.apache.arrow.vector.ValueVector; | |
35 | import org.apache.arrow.vector.VarCharVector; | |
36 | import org.apache.arrow.vector.testing.RandomDataGenerator; | |
37 | import org.apache.arrow.vector.testing.ValueVectorDataPopulator; | |
38 | ||
39 | /** | |
40 | * Utilities for sorting related utilities. | |
41 | */ | |
42 | public class TestSortingUtil { | |
43 | ||
44 | static final Random random = new Random(0); | |
45 | ||
46 | static final DataGenerator<TinyIntVector, Byte> TINY_INT_GENERATOR = new DataGenerator<>( | |
47 | RandomDataGenerator.TINY_INT_GENERATOR, | |
48 | (vector, array) -> ValueVectorDataPopulator.setVector(vector, array), Byte.class); | |
49 | ||
50 | static final DataGenerator<SmallIntVector, Short> SMALL_INT_GENERATOR = new DataGenerator<>( | |
51 | RandomDataGenerator.SMALL_INT_GENERATOR, | |
52 | (vector, array) -> ValueVectorDataPopulator.setVector(vector, array), Short.class); | |
53 | ||
54 | static final DataGenerator<IntVector, Integer> INT_GENERATOR = new DataGenerator<>( | |
55 | RandomDataGenerator.INT_GENERATOR, | |
56 | (vector, array) -> ValueVectorDataPopulator.setVector(vector, array), Integer.class); | |
57 | ||
58 | static final DataGenerator<BigIntVector, Long> LONG_GENERATOR = new DataGenerator<>( | |
59 | RandomDataGenerator.LONG_GENERATOR, | |
60 | (vector, array) -> ValueVectorDataPopulator.setVector(vector, array), Long.class); | |
61 | ||
62 | static final DataGenerator<Float4Vector, Float> FLOAT_GENERATOR = new DataGenerator<>( | |
63 | RandomDataGenerator.FLOAT_GENERATOR, | |
64 | (vector, array) -> ValueVectorDataPopulator.setVector(vector, array), Float.class); | |
65 | ||
66 | static final DataGenerator<Float8Vector, Double> DOUBLE_GENERATOR = new DataGenerator<>( | |
67 | RandomDataGenerator.DOUBLE_GENERATOR, | |
68 | (vector, array) -> ValueVectorDataPopulator.setVector(vector, array), Double.class); | |
69 | ||
70 | static final DataGenerator<VarCharVector, String> STRING_GENERATOR = new DataGenerator<>( | |
71 | () -> { | |
72 | int strLength = random.nextInt(20) + 1; | |
73 | return generateRandomString(strLength); | |
74 | }, | |
75 | (vector, array) -> ValueVectorDataPopulator.setVector(vector, array), String.class); | |
76 | ||
77 | private TestSortingUtil() { | |
78 | } | |
79 | ||
80 | /** | |
81 | * Verify that a vector is equal to an array. | |
82 | */ | |
83 | public static <V extends ValueVector, U> void verifyResults(V vector, U[] expected) { | |
84 | assertEquals(vector.getValueCount(), expected.length); | |
85 | for (int i = 0; i < expected.length; i++) { | |
86 | assertEquals(vector.getObject(i), expected[i]); | |
87 | } | |
88 | } | |
89 | ||
90 | /** | |
91 | * Sort an array with null values come first. | |
92 | */ | |
93 | public static <U extends Comparable<U>> void sortArray(U[] array) { | |
94 | Arrays.sort(array, (a, b) -> { | |
95 | if (a == null || b == null) { | |
96 | if (a == null && b == null) { | |
97 | return 0; | |
98 | } | |
99 | ||
100 | // exactly one is null | |
101 | if (a == null) { | |
102 | return -1; | |
103 | } else { | |
104 | return 1; | |
105 | } | |
106 | } | |
107 | return a.compareTo(b); | |
108 | }); | |
109 | } | |
110 | ||
111 | /** | |
112 | * Generate a string with alphabetic characters only. | |
113 | */ | |
114 | static String generateRandomString(int length) { | |
115 | byte[] str = new byte[length]; | |
116 | final int lower = 'a'; | |
117 | final int upper = 'z'; | |
118 | ||
119 | for (int i = 0; i < length; i++) { | |
120 | // make r non-negative | |
121 | int r = random.nextInt() & Integer.MAX_VALUE; | |
122 | str[i] = (byte) (r % (upper - lower + 1) + lower); | |
123 | } | |
124 | ||
125 | return new String(str); | |
126 | } | |
127 | ||
128 | /** | |
129 | * Utility to generate data for testing. | |
130 | * @param <V> vector type. | |
131 | * @param <U> data element type. | |
132 | */ | |
133 | static class DataGenerator<V extends ValueVector, U extends Comparable<U>> { | |
134 | ||
135 | final Supplier<U> dataGenerator; | |
136 | ||
137 | final BiConsumer<V, U[]> vectorPopulator; | |
138 | ||
139 | final Class<U> clazz; | |
140 | ||
141 | DataGenerator( | |
142 | Supplier<U> dataGenerator, BiConsumer<V, U[]> vectorPopulator, Class<U> clazz) { | |
143 | this.dataGenerator = dataGenerator; | |
144 | this.vectorPopulator = vectorPopulator; | |
145 | this.clazz = clazz; | |
146 | } | |
147 | ||
148 | /** | |
149 | * Populate the vector according to the specified parameters. | |
150 | * @param vector the vector to populate. | |
151 | * @param length vector length. | |
152 | * @param nullFraction the fraction of null values. | |
153 | * @return An array with the same data as the vector. | |
154 | */ | |
155 | U[] populate(V vector, int length, double nullFraction) { | |
156 | U[] array = (U[]) Array.newInstance(clazz, length); | |
157 | for (int i = 0; i < length; i++) { | |
158 | double r = Math.random(); | |
159 | U value = r < nullFraction ? null : dataGenerator.get(); | |
160 | array[i] = value; | |
161 | } | |
162 | vectorPopulator.accept(vector, array); | |
163 | return array; | |
164 | } | |
165 | } | |
166 | } |