]>
Commit | Line | Data |
---|---|---|
1d09f67e TL |
1 | # Licensed to the Apache Software Foundation (ASF) under one |
2 | # or more contributor license agreements. See the NOTICE file | |
3 | # distributed with this work for additional information | |
4 | # regarding copyright ownership. The ASF licenses this file | |
5 | # to you under the Apache License, Version 2.0 (the | |
6 | # "License"); you may not use this file except in compliance | |
7 | # with the License. You may obtain a copy of the License at | |
8 | # | |
9 | # http://www.apache.org/licenses/LICENSE-2.0 | |
10 | # | |
11 | # Unless required by applicable law or agreed to in writing, software | |
12 | # distributed under the License is distributed on an "AS IS" BASIS, | |
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
14 | # See the License for the specific language governing permissions and | |
15 | # limitations under the License. | |
16 | ||
17 | """ | |
18 | Arrow.FixedSizeList | |
19 | ||
20 | An `ArrowVector` where each element is a "fixed size" list of some kind, like a `NTuple{N, T}`. | |
21 | """ | |
22 | struct FixedSizeList{T, A <: AbstractVector} <: ArrowVector{T} | |
23 | arrow::Vector{UInt8} # need to hold a reference to arrow memory blob | |
24 | validity::ValidityBitmap | |
25 | data::A | |
26 | ℓ::Int | |
27 | metadata::Union{Nothing, Dict{String, String}} | |
28 | end | |
29 | ||
30 | Base.size(l::FixedSizeList) = (l.ℓ,) | |
31 | ||
32 | @propagate_inbounds function Base.getindex(l::FixedSizeList{T}, i::Integer) where {T} | |
33 | @boundscheck checkbounds(l, i) | |
34 | N = ArrowTypes.getsize(Base.nonmissingtype(T)) | |
35 | off = (i - 1) * N | |
36 | if Base.nonmissingtype(T) !== T | |
37 | return l.validity[i] ? ArrowTypes.arrowconvert(T, ntuple(j->l.data[off + j], N)) : missing | |
38 | else | |
39 | return ArrowTypes.arrowconvert(T, ntuple(j->l.data[off + j], N)) | |
40 | end | |
41 | end | |
42 | ||
43 | @propagate_inbounds function Base.setindex!(l::FixedSizeList{T}, v::T, i::Integer) where {T} | |
44 | @boundscheck checkbounds(l, i) | |
45 | if v === missing | |
46 | @inbounds l.validity[i] = false | |
47 | else | |
48 | N = ArrowTypes.getsize(Base.nonmissingtype(T)) | |
49 | off = (i - 1) * N | |
50 | foreach(1:N) do j | |
51 | @inbounds l.data[off + j] = v[j] | |
52 | end | |
53 | end | |
54 | return v | |
55 | end | |
56 | ||
57 | # lazy equal-spaced flattener | |
58 | struct ToFixedSizeList{T, N, A} <: AbstractVector{T} | |
59 | data::A # A is AbstractVector of AbstractVector or AbstractString | |
60 | end | |
61 | ||
62 | function ToFixedSizeList(input) | |
63 | NT = Base.nonmissingtype(eltype(input)) # typically NTuple{N, T} | |
64 | return ToFixedSizeList{ArrowTypes.gettype(NT), ArrowTypes.getsize(NT), typeof(input)}(input) | |
65 | end | |
66 | ||
67 | Base.IndexStyle(::Type{<:ToFixedSizeList}) = Base.IndexLinear() | |
68 | Base.size(x::ToFixedSizeList{T, N}) where {T, N} = (N * length(x.data),) | |
69 | ||
70 | Base.@propagate_inbounds function Base.getindex(A::ToFixedSizeList{T, N}, i::Integer) where {T, N} | |
71 | @boundscheck checkbounds(A, i) | |
72 | a, b = fldmod1(i, N) | |
73 | @inbounds x = A.data[a] | |
74 | return @inbounds x === missing ? ArrowTypes.default(T) : x[b] | |
75 | end | |
76 | ||
77 | # efficient iteration | |
78 | @inline function Base.iterate(A::ToFixedSizeList{T, N}, (i, chunk, chunk_i, len)=(1, 1, 1, length(A))) where {T, N} | |
79 | i > len && return nothing | |
80 | @inbounds y = A.data[chunk] | |
81 | @inbounds x = y === missing ? ArrowTypes.default(T) : y[chunk_i] | |
82 | if chunk_i == N | |
83 | chunk += 1 | |
84 | chunk_i = 1 | |
85 | else | |
86 | chunk_i += 1 | |
87 | end | |
88 | return x, (i + 1, chunk, chunk_i, len) | |
89 | end | |
90 | ||
91 | arrowvector(::FixedSizeListType, x::FixedSizeList, i, nl, fi, de, ded, meta; kw...) = x | |
92 | ||
93 | function arrowvector(::FixedSizeListType, x, i, nl, fi, de, ded, meta; kw...) | |
94 | len = length(x) | |
95 | validity = ValidityBitmap(x) | |
96 | flat = ToFixedSizeList(x) | |
97 | if eltype(flat) == UInt8 | |
98 | data = flat | |
99 | else | |
100 | data = arrowvector(flat, i, nl + 1, fi, de, ded, nothing; kw...) | |
101 | end | |
102 | return FixedSizeList{eltype(x), typeof(data)}(UInt8[], validity, data, len, meta) | |
103 | end | |
104 | ||
105 | function compress(Z::Meta.CompressionType, comp, x::FixedSizeList{T, A}) where {T, A} | |
106 | len = length(x) | |
107 | nc = nullcount(x) | |
108 | validity = compress(Z, comp, x.validity) | |
109 | buffers = [validity] | |
110 | children = Compressed[] | |
111 | if eltype(A) == UInt8 | |
112 | push!(buffers, compress(Z, comp, x.data)) | |
113 | else | |
114 | push!(children, compress(Z, comp, x.data)) | |
115 | end | |
116 | return Compressed{Z, typeof(x)}(x, buffers, len, nc, children) | |
117 | end | |
118 | ||
119 | function makenodesbuffers!(col::FixedSizeList{T, A}, fieldnodes, fieldbuffers, bufferoffset, alignment) where {T, A} | |
120 | len = length(col) | |
121 | nc = nullcount(col) | |
122 | push!(fieldnodes, FieldNode(len, nc)) | |
123 | @debug 1 "made field node: nodeidx = $(length(fieldnodes)), col = $(typeof(col)), len = $(fieldnodes[end].length), nc = $(fieldnodes[end].null_count)" | |
124 | # validity bitmap | |
125 | blen = nc == 0 ? 0 : bitpackedbytes(len, alignment) | |
126 | push!(fieldbuffers, Buffer(bufferoffset, blen)) | |
127 | @debug 1 "made field buffer: bufferidx = $(length(fieldbuffers)), offset = $(fieldbuffers[end].offset), len = $(fieldbuffers[end].length), padded = $(padding(fieldbuffers[end].length, alignment))" | |
128 | bufferoffset += blen | |
129 | if eltype(A) === UInt8 | |
130 | blen = ArrowTypes.getsize(Base.nonmissingtype(T)) * len | |
131 | push!(fieldbuffers, Buffer(bufferoffset, blen)) | |
132 | @debug 1 "made field buffer: bufferidx = $(length(fieldbuffers)), offset = $(fieldbuffers[end].offset), len = $(fieldbuffers[end].length), padded = $(padding(fieldbuffers[end].length, alignment))" | |
133 | bufferoffset += padding(blen, alignment) | |
134 | else | |
135 | bufferoffset = makenodesbuffers!(col.data, fieldnodes, fieldbuffers, bufferoffset, alignment) | |
136 | end | |
137 | return bufferoffset | |
138 | end | |
139 | ||
140 | function writebuffer(io, col::FixedSizeList{T, A}, alignment) where {T, A} | |
141 | @debug 1 "writebuffer: col = $(typeof(col))" | |
142 | @debug 2 col | |
143 | writebitmap(io, col, alignment) | |
144 | # write values array | |
145 | if eltype(A) === UInt8 | |
146 | n = writearray(io, UInt8, col.data) | |
147 | @debug 1 "writing array: col = $(typeof(col.data)), n = $n, padded = $(padding(n, alignment))" | |
148 | writezeros(io, paddinglength(n, alignment)) | |
149 | else | |
150 | writebuffer(io, col.data, alignment) | |
151 | end | |
152 | return | |
153 | end |