]>
Commit | Line | Data |
---|---|---|
1d09f67e TL |
1 | /* |
2 | * Licensed to the Apache Software Foundation (ASF) under one or more | |
3 | * contributor license agreements. See the NOTICE file distributed with | |
4 | * this work for additional information regarding copyright ownership. | |
5 | * The ASF licenses this file to You under the Apache License, Version 2.0 | |
6 | * (the "License"); you may not use this file except in compliance with | |
7 | * the License. You may obtain a copy of the License at | |
8 | * | |
9 | * http://www.apache.org/licenses/LICENSE-2.0 | |
10 | * | |
11 | * Unless required by applicable law or agreed to in writing, software | |
12 | * distributed under the License is distributed on an "AS IS" BASIS, | |
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
14 | * See the License for the specific language governing permissions and | |
15 | * limitations under the License. | |
16 | */ | |
17 | ||
18 | package org.apache.arrow.algorithm.sort; | |
19 | ||
20 | import org.apache.arrow.memory.ArrowBuf; | |
21 | import org.apache.arrow.util.Preconditions; | |
22 | import org.apache.arrow.vector.BaseVariableWidthVector; | |
23 | import org.apache.arrow.vector.BitVectorHelper; | |
24 | import org.apache.arrow.vector.IntVector; | |
25 | ||
26 | import io.netty.util.internal.PlatformDependent; | |
27 | ||
28 | /** | |
29 | * Default sorter for variable-width vectors. | |
30 | * It is an out-of-place sort, with time complexity O(n*log(n)). | |
31 | * @param <V> vector type. | |
32 | */ | |
33 | public class VariableWidthOutOfPlaceVectorSorter<V extends BaseVariableWidthVector> | |
34 | implements OutOfPlaceVectorSorter<V> { | |
35 | ||
36 | protected IndexSorter<V> indexSorter = new IndexSorter<>(); | |
37 | ||
38 | @Override | |
39 | public void sortOutOfPlace(V srcVector, V dstVector, VectorValueComparator<V> comparator) { | |
40 | comparator.attachVector(srcVector); | |
41 | ||
42 | // buffers referenced in the sort | |
43 | ArrowBuf srcValueBuffer = srcVector.getDataBuffer(); | |
44 | ArrowBuf srcOffsetBuffer = srcVector.getOffsetBuffer(); | |
45 | ArrowBuf dstValidityBuffer = dstVector.getValidityBuffer(); | |
46 | ArrowBuf dstValueBuffer = dstVector.getDataBuffer(); | |
47 | ArrowBuf dstOffsetBuffer = dstVector.getOffsetBuffer(); | |
48 | ||
49 | // check buffer size | |
50 | Preconditions.checkArgument(dstValidityBuffer.capacity() * 8 >= srcVector.getValueCount(), | |
51 | "Not enough capacity for the validity buffer of the dst vector. " + | |
52 | "Expected capacity %s, actual capacity %s", | |
53 | (srcVector.getValueCount() + 7) / 8, dstValidityBuffer.capacity()); | |
54 | Preconditions.checkArgument( | |
55 | dstOffsetBuffer.capacity() >= (srcVector.getValueCount() + 1) * BaseVariableWidthVector.OFFSET_WIDTH, | |
56 | "Not enough capacity for the offset buffer of the dst vector. " + | |
57 | "Expected capacity %s, actual capacity %s", | |
58 | (srcVector.getValueCount() + 1) * BaseVariableWidthVector.OFFSET_WIDTH, dstOffsetBuffer.capacity()); | |
59 | long dataSize = srcVector.getOffsetBuffer().getInt( | |
60 | srcVector.getValueCount() * BaseVariableWidthVector.OFFSET_WIDTH); | |
61 | Preconditions.checkArgument( | |
62 | dstValueBuffer.capacity() >= dataSize, "No enough capacity for the data buffer of the dst vector. " + | |
63 | "Expected capacity %s, actual capacity %s", dataSize, dstValueBuffer.capacity()); | |
64 | ||
65 | // sort value indices | |
66 | try (IntVector sortedIndices = new IntVector("", srcVector.getAllocator())) { | |
67 | sortedIndices.allocateNew(srcVector.getValueCount()); | |
68 | sortedIndices.setValueCount(srcVector.getValueCount()); | |
69 | indexSorter.sort(srcVector, sortedIndices, comparator); | |
70 | ||
71 | int dstOffset = 0; | |
72 | dstOffsetBuffer.setInt(0, 0); | |
73 | ||
74 | // copy sorted values to the output vector | |
75 | for (int dstIndex = 0; dstIndex < sortedIndices.getValueCount(); dstIndex++) { | |
76 | int srcIndex = sortedIndices.get(dstIndex); | |
77 | if (srcVector.isNull(srcIndex)) { | |
78 | BitVectorHelper.unsetBit(dstValidityBuffer, dstIndex); | |
79 | } else { | |
80 | BitVectorHelper.setBit(dstValidityBuffer, dstIndex); | |
81 | int srcOffset = srcOffsetBuffer.getInt(srcIndex * BaseVariableWidthVector.OFFSET_WIDTH); | |
82 | int valueLength = srcOffsetBuffer.getInt((srcIndex + 1) * BaseVariableWidthVector.OFFSET_WIDTH) - srcOffset; | |
83 | PlatformDependent.copyMemory( | |
84 | srcValueBuffer.memoryAddress() + srcOffset, | |
85 | dstValueBuffer.memoryAddress() + dstOffset, | |
86 | valueLength); | |
87 | dstOffset += valueLength; | |
88 | } | |
89 | dstOffsetBuffer.setInt((dstIndex + 1) * BaseVariableWidthVector.OFFSET_WIDTH, dstOffset); | |
90 | } | |
91 | } | |
92 | } | |
93 | } |