]> git.proxmox.com Git - ceph.git/blame - ceph/src/arrow/julia/Arrow/src/arraytypes/fixedsizelist.jl
import quincy 17.2.0
[ceph.git] / ceph / src / arrow / julia / Arrow / src / arraytypes / fixedsizelist.jl
CommitLineData
1d09f67e
TL
1# Licensed to the Apache Software Foundation (ASF) under one
2# or more contributor license agreements. See the NOTICE file
3# distributed with this work for additional information
4# regarding copyright ownership. The ASF licenses this file
5# to you under the Apache License, Version 2.0 (the
6# "License"); you may not use this file except in compliance
7# with the License. You may obtain a copy of the License at
8#
9# http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16
17"""
18 Arrow.FixedSizeList
19
20An `ArrowVector` where each element is a "fixed size" list of some kind, like a `NTuple{N, T}`.
21"""
22struct FixedSizeList{T, A <: AbstractVector} <: ArrowVector{T}
23 arrow::Vector{UInt8} # need to hold a reference to arrow memory blob
24 validity::ValidityBitmap
25 data::A
26 ℓ::Int
27 metadata::Union{Nothing, Dict{String, String}}
28end
29
30Base.size(l::FixedSizeList) = (l.ℓ,)
31
32@propagate_inbounds function Base.getindex(l::FixedSizeList{T}, i::Integer) where {T}
33 @boundscheck checkbounds(l, i)
34 N = ArrowTypes.getsize(Base.nonmissingtype(T))
35 off = (i - 1) * N
36 if Base.nonmissingtype(T) !== T
37 return l.validity[i] ? ArrowTypes.arrowconvert(T, ntuple(j->l.data[off + j], N)) : missing
38 else
39 return ArrowTypes.arrowconvert(T, ntuple(j->l.data[off + j], N))
40 end
41end
42
43@propagate_inbounds function Base.setindex!(l::FixedSizeList{T}, v::T, i::Integer) where {T}
44 @boundscheck checkbounds(l, i)
45 if v === missing
46 @inbounds l.validity[i] = false
47 else
48 N = ArrowTypes.getsize(Base.nonmissingtype(T))
49 off = (i - 1) * N
50 foreach(1:N) do j
51 @inbounds l.data[off + j] = v[j]
52 end
53 end
54 return v
55end
56
57# lazy equal-spaced flattener
58struct ToFixedSizeList{T, N, A} <: AbstractVector{T}
59 data::A # A is AbstractVector of AbstractVector or AbstractString
60end
61
62function ToFixedSizeList(input)
63 NT = Base.nonmissingtype(eltype(input)) # typically NTuple{N, T}
64 return ToFixedSizeList{ArrowTypes.gettype(NT), ArrowTypes.getsize(NT), typeof(input)}(input)
65end
66
67Base.IndexStyle(::Type{<:ToFixedSizeList}) = Base.IndexLinear()
68Base.size(x::ToFixedSizeList{T, N}) where {T, N} = (N * length(x.data),)
69
70Base.@propagate_inbounds function Base.getindex(A::ToFixedSizeList{T, N}, i::Integer) where {T, N}
71 @boundscheck checkbounds(A, i)
72 a, b = fldmod1(i, N)
73 @inbounds x = A.data[a]
74 return @inbounds x === missing ? ArrowTypes.default(T) : x[b]
75end
76
77# efficient iteration
78@inline function Base.iterate(A::ToFixedSizeList{T, N}, (i, chunk, chunk_i, len)=(1, 1, 1, length(A))) where {T, N}
79 i > len && return nothing
80 @inbounds y = A.data[chunk]
81 @inbounds x = y === missing ? ArrowTypes.default(T) : y[chunk_i]
82 if chunk_i == N
83 chunk += 1
84 chunk_i = 1
85 else
86 chunk_i += 1
87 end
88 return x, (i + 1, chunk, chunk_i, len)
89end
90
91arrowvector(::FixedSizeListType, x::FixedSizeList, i, nl, fi, de, ded, meta; kw...) = x
92
93function arrowvector(::FixedSizeListType, x, i, nl, fi, de, ded, meta; kw...)
94 len = length(x)
95 validity = ValidityBitmap(x)
96 flat = ToFixedSizeList(x)
97 if eltype(flat) == UInt8
98 data = flat
99 else
100 data = arrowvector(flat, i, nl + 1, fi, de, ded, nothing; kw...)
101 end
102 return FixedSizeList{eltype(x), typeof(data)}(UInt8[], validity, data, len, meta)
103end
104
105function compress(Z::Meta.CompressionType, comp, x::FixedSizeList{T, A}) where {T, A}
106 len = length(x)
107 nc = nullcount(x)
108 validity = compress(Z, comp, x.validity)
109 buffers = [validity]
110 children = Compressed[]
111 if eltype(A) == UInt8
112 push!(buffers, compress(Z, comp, x.data))
113 else
114 push!(children, compress(Z, comp, x.data))
115 end
116 return Compressed{Z, typeof(x)}(x, buffers, len, nc, children)
117end
118
119function makenodesbuffers!(col::FixedSizeList{T, A}, fieldnodes, fieldbuffers, bufferoffset, alignment) where {T, A}
120 len = length(col)
121 nc = nullcount(col)
122 push!(fieldnodes, FieldNode(len, nc))
123 @debug 1 "made field node: nodeidx = $(length(fieldnodes)), col = $(typeof(col)), len = $(fieldnodes[end].length), nc = $(fieldnodes[end].null_count)"
124 # validity bitmap
125 blen = nc == 0 ? 0 : bitpackedbytes(len, alignment)
126 push!(fieldbuffers, Buffer(bufferoffset, blen))
127 @debug 1 "made field buffer: bufferidx = $(length(fieldbuffers)), offset = $(fieldbuffers[end].offset), len = $(fieldbuffers[end].length), padded = $(padding(fieldbuffers[end].length, alignment))"
128 bufferoffset += blen
129 if eltype(A) === UInt8
130 blen = ArrowTypes.getsize(Base.nonmissingtype(T)) * len
131 push!(fieldbuffers, Buffer(bufferoffset, blen))
132 @debug 1 "made field buffer: bufferidx = $(length(fieldbuffers)), offset = $(fieldbuffers[end].offset), len = $(fieldbuffers[end].length), padded = $(padding(fieldbuffers[end].length, alignment))"
133 bufferoffset += padding(blen, alignment)
134 else
135 bufferoffset = makenodesbuffers!(col.data, fieldnodes, fieldbuffers, bufferoffset, alignment)
136 end
137 return bufferoffset
138end
139
140function writebuffer(io, col::FixedSizeList{T, A}, alignment) where {T, A}
141 @debug 1 "writebuffer: col = $(typeof(col))"
142 @debug 2 col
143 writebitmap(io, col, alignment)
144 # write values array
145 if eltype(A) === UInt8
146 n = writearray(io, UInt8, col.data)
147 @debug 1 "writing array: col = $(typeof(col.data)), n = $n, padded = $(padding(n, alignment))"
148 writezeros(io, paddinglength(n, alignment))
149 else
150 writebuffer(io, col.data, alignment)
151 end
152 return
153end