]>
Commit | Line | Data |
---|---|---|
1d09f67e TL |
1 | /* |
2 | * Licensed to the Apache Software Foundation (ASF) under one or more | |
3 | * contributor license agreements. See the NOTICE file distributed with | |
4 | * this work for additional information regarding copyright ownership. | |
5 | * The ASF licenses this file to You under the Apache License, Version 2.0 | |
6 | * (the "License"); you may not use this file except in compliance with | |
7 | * the License. You may obtain a copy of the License at | |
8 | * | |
9 | * http://www.apache.org/licenses/LICENSE-2.0 | |
10 | * | |
11 | * Unless required by applicable law or agreed to in writing, software | |
12 | * distributed under the License is distributed on an "AS IS" BASIS, | |
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
14 | * See the License for the specific language governing permissions and | |
15 | * limitations under the License. | |
16 | */ | |
17 | ||
18 | package org.apache.arrow.algorithm.rank; | |
19 | ||
20 | import java.util.stream.IntStream; | |
21 | ||
22 | import org.apache.arrow.algorithm.sort.IndexSorter; | |
23 | import org.apache.arrow.algorithm.sort.VectorValueComparator; | |
24 | import org.apache.arrow.memory.BufferAllocator; | |
25 | import org.apache.arrow.util.Preconditions; | |
26 | import org.apache.arrow.vector.IntVector; | |
27 | import org.apache.arrow.vector.ValueVector; | |
28 | ||
29 | /** | |
30 | * Utility for calculating ranks of vector elements. | |
31 | * @param <V> the vector type | |
32 | */ | |
33 | public class VectorRank<V extends ValueVector> { | |
34 | ||
35 | private VectorValueComparator<V> comparator; | |
36 | ||
37 | /** | |
38 | * Vector indices. | |
39 | */ | |
40 | private IntVector indices; | |
41 | ||
42 | private final BufferAllocator allocator; | |
43 | ||
44 | /** | |
45 | * Constructs a vector rank utility. | |
46 | * @param allocator the allocator to use. | |
47 | */ | |
48 | public VectorRank(BufferAllocator allocator) { | |
49 | this.allocator = allocator; | |
50 | } | |
51 | ||
52 | /** | |
53 | * Given a rank r, gets the index of the element that is the rth smallest in the vector. | |
54 | * The operation is performed without changing the vector, and takes O(n) time, | |
55 | * where n is the length of the vector. | |
56 | * @param vector the vector from which to get the element index. | |
57 | * @param comparator the criteria for vector element comparison. | |
58 | * @param rank the rank to determine. | |
59 | * @return the element index with the given rank. | |
60 | */ | |
61 | public int indexAtRank(V vector, VectorValueComparator<V> comparator, int rank) { | |
62 | Preconditions.checkArgument(rank >= 0 && rank < vector.getValueCount()); | |
63 | try { | |
64 | indices = new IntVector("index vector", allocator); | |
65 | indices.allocateNew(vector.getValueCount()); | |
66 | IntStream.range(0, vector.getValueCount()).forEach(i -> indices.set(i, i)); | |
67 | ||
68 | comparator.attachVector(vector); | |
69 | this.comparator = comparator; | |
70 | ||
71 | int pos = getRank(0, vector.getValueCount() - 1, rank); | |
72 | return indices.get(pos); | |
73 | } finally { | |
74 | indices.close(); | |
75 | } | |
76 | } | |
77 | ||
78 | private int getRank(int low, int high, int rank) { | |
79 | int mid = IndexSorter.partition(low, high, indices, comparator); | |
80 | if (mid < rank) { | |
81 | return getRank(mid + 1, high, rank); | |
82 | } else if (mid > rank) { | |
83 | return getRank(low, mid - 1, rank); | |
84 | } else { | |
85 | // mid == rank | |
86 | return mid; | |
87 | } | |
88 | } | |
89 | } |