]> git.proxmox.com Git - ceph.git/blob - ceph/src/arrow/python/examples/plasma/sorting/multimerge.pyx
import quincy 17.2.0
[ceph.git] / ceph / src / arrow / python / examples / plasma / sorting / multimerge.pyx
1 # Licensed to the Apache Software Foundation (ASF) under one
2 # or more contributor license agreements. See the NOTICE file
3 # distributed with this work for additional information
4 # regarding copyright ownership. The ASF licenses this file
5 # to you under the Apache License, Version 2.0 (the
6 # "License"); you may not use this file except in compliance
7 # with the License. You may obtain a copy of the License at
8 #
9 # http://www.apache.org/licenses/LICENSE-2.0
10 #
11 # Unless required by applicable law or agreed to in writing,
12 # software distributed under the License is distributed on an
13 # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 # KIND, either express or implied. See the License for the
15 # specific language governing permissions and limitations
16 # under the License.
17
18 # cython: profile=False
19 # distutils: language = c++
20 # cython: embedsignature = True
21
22 from libc.stdint cimport uintptr_t
23 from libcpp.vector cimport vector
24 from libcpp.pair cimport pair
25
26 import numpy as np
27
28 cimport numpy as np
29
30 cdef extern from "<queue>" namespace "std" nogil:
31 cdef cppclass priority_queue[T]:
32 priority_queue() except +
33 priority_queue(priority_queue&) except +
34 bint empty()
35 void pop()
36 void push(T&)
37 size_t size()
38 T& top()
39
40
41 def multimerge2d(*arrays):
42 """Merge a list of sorted 2d arrays into a sorted 2d array.
43
44 This assumes C style ordering for both input and output arrays. For
45 each input array we have array[i,0] <= array[i+1,0] and for the output
46 array the same will hold.
47
48 Ideally this code would be simpler and also support both C style
49 and Fortran style ordering.
50 """
51 cdef int num_arrays = len(arrays)
52 assert num_arrays > 0
53
54 cdef int num_cols = arrays[0].shape[1]
55
56 for i in range(num_arrays):
57 assert arrays[i].ndim == 2
58 assert arrays[i].dtype == np.float64
59 assert arrays[i].shape[1] == num_cols
60 assert not np.isfortran(arrays[i])
61
62 cdef vector[double*] data
63
64 # The indices vector keeps track of the index of the next row to process in
65 # each array.
66 cdef vector[int] indices = num_arrays * [0]
67
68 # The sizes vector stores the total number of elements that each array has.
69 cdef vector[int] sizes
70
71 cdef priority_queue[pair[double, int]] queue
72 cdef pair[double, int] top
73 cdef int num_rows = sum([array.shape[0] for array in arrays])
74 cdef np.ndarray[np.float64_t, ndim=2] result = np.zeros(
75 (num_rows, num_cols), dtype=np.float64)
76 cdef double* result_ptr = <double*> np.PyArray_DATA(result)
77 for i in range(num_arrays):
78 if arrays[i].size > 0:
79 sizes.push_back(arrays[i].size)
80 data.push_back(<double*> np.PyArray_DATA(arrays[i]))
81 queue.push(pair[double, int](-data[i][0], i))
82
83 cdef int curr_idx = 0
84 cdef int j
85 cdef int col = 0
86
87 for j in range(num_rows):
88 top = queue.top()
89 for col in range(num_cols):
90 result_ptr[curr_idx + col] = (
91 data[top.second][indices[top.second] + col])
92
93 indices[top.second] += num_cols
94 curr_idx += num_cols
95
96 queue.pop()
97 if indices[top.second] < sizes[top.second]:
98 queue.push(
99 pair[double, int](-data[top.second][indices[top.second]],
100 top.second))
101
102 return result