]> git.proxmox.com Git - ceph.git/blob - ceph/src/arrow/julia/Arrow/src/arraytypes/struct.jl
import quincy 17.2.0
[ceph.git] / ceph / src / arrow / julia / Arrow / src / arraytypes / struct.jl
1 # Licensed to the Apache Software Foundation (ASF) under one
2 # or more contributor license agreements. See the NOTICE file
3 # distributed with this work for additional information
4 # regarding copyright ownership. The ASF licenses this file
5 # to you under the Apache License, Version 2.0 (the
6 # "License"); you may not use this file except in compliance
7 # with the License. You may obtain a copy of the License at
8 #
9 # http://www.apache.org/licenses/LICENSE-2.0
10 #
11 # Unless required by applicable law or agreed to in writing, software
12 # distributed under the License is distributed on an "AS IS" BASIS,
13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 # See the License for the specific language governing permissions and
15 # limitations under the License.
16
17 """
18 Arrow.Struct
19
20 An `ArrowVector` where each element is a "struct" of some kind with ordered, named fields, like a `NamedTuple{names, types}` or regular julia `struct`.
21 """
22 struct Struct{T, S} <: ArrowVector{T}
23 validity::ValidityBitmap
24 data::S # Tuple of ArrowVector
25 ℓ::Int
26 metadata::Union{Nothing, Dict{String, String}}
27 end
28
29 Base.size(s::Struct) = (s.ℓ,)
30
31 @propagate_inbounds function Base.getindex(s::Struct{T}, i::Integer) where {T}
32 @boundscheck checkbounds(s, i)
33 NT = Base.nonmissingtype(T)
34 if ArrowTypes.structtype(NT) === ArrowTypes.NAMEDTUPLE
35 if NT !== T
36 return s.validity[i] ? NT(ntuple(j->s.data[j][i], fieldcount(NT))) : missing
37 else
38 return NT(ntuple(j->s.data[j][i], fieldcount(NT)))
39 end
40 elseif ArrowTypes.structtype(NT) === ArrowTypes.STRUCT
41 if NT !== T
42 return s.validity[i] ? NT(ntuple(j->s.data[j][i], fieldcount(NT))...) : missing
43 else
44 return NT(ntuple(j->s.data[j][i], fieldcount(NT))...)
45 end
46 end
47 end
48
49 @propagate_inbounds function Base.setindex!(s::Struct{T}, v::T, i::Integer) where {T}
50 @boundscheck checkbounds(s, i)
51 if v === missing
52 @inbounds s.validity[i] = false
53 else
54 NT = Base.nonmissingtype(T)
55 N = fieldcount(NT)
56 foreach(1:N) do j
57 @inbounds s.data[j][i] = getfield(v, j)
58 end
59 end
60 return v
61 end
62
63 struct ToStruct{T, i, A} <: AbstractVector{T}
64 data::A # eltype is NamedTuple or some struct
65 end
66
67 ToStruct(x::A, j::Integer) where {A} = ToStruct{fieldtype(Base.nonmissingtype(eltype(A)), j), j, A}(x)
68
69 Base.IndexStyle(::Type{<:ToStruct}) = Base.IndexLinear()
70 Base.size(x::ToStruct) = (length(x.data),)
71
72 Base.@propagate_inbounds function Base.getindex(A::ToStruct{T, j}, i::Integer) where {T, j}
73 @boundscheck checkbounds(A, i)
74 @inbounds x = A.data[i]
75 return x === missing ? ArrowTypes.default(T) : getfield(x, j)
76 end
77
78 arrowvector(::StructType, x::Struct, i, nl, fi, de, ded, meta; kw...) = x
79
80 function arrowvector(::StructType, x, i, nl, fi, de, ded, meta; kw...)
81 len = length(x)
82 validity = ValidityBitmap(x)
83 T = Base.nonmissingtype(eltype(x))
84 if ArrowTypes.structtype(T) === ArrowTypes.STRUCT
85 meta = meta === nothing ? Dict{String, String}() : meta
86 ArrowTypes.registertype!(T, T)
87 ArrowTypes.getarrowtype!(meta, T)
88 end
89 data = Tuple(arrowvector(ToStruct(x, j), i, nl + 1, j, de, ded, nothing; kw...) for j = 1:fieldcount(T))
90 return Struct{eltype(x), typeof(data)}(validity, data, len, meta)
91 end
92
93 function compress(Z::Meta.CompressionType, comp, x::A) where {A <: Struct}
94 len = length(x)
95 nc = nullcount(x)
96 validity = compress(Z, comp, x.validity)
97 buffers = [validity]
98 children = Compressed[]
99 for y in x.data
100 push!(children, compress(Z, comp, y))
101 end
102 return Compressed{Z, A}(x, buffers, len, nc, children)
103 end
104
105 function makenodesbuffers!(col::Struct{T}, fieldnodes, fieldbuffers, bufferoffset, alignment) where {T}
106 len = length(col)
107 nc = nullcount(col)
108 push!(fieldnodes, FieldNode(len, nc))
109 @debug 1 "made field node: nodeidx = $(length(fieldnodes)), col = $(typeof(col)), len = $(fieldnodes[end].length), nc = $(fieldnodes[end].null_count)"
110 # validity bitmap
111 blen = nc == 0 ? 0 : bitpackedbytes(len, alignment)
112 push!(fieldbuffers, Buffer(bufferoffset, blen))
113 @debug 1 "made field buffer: bufferidx = $(length(fieldbuffers)), offset = $(fieldbuffers[end].offset), len = $(fieldbuffers[end].length), padded = $(padding(fieldbuffers[end].length, alignment))"
114 bufferoffset += blen
115 for child in col.data
116 bufferoffset = makenodesbuffers!(child, fieldnodes, fieldbuffers, bufferoffset, alignment)
117 end
118 return bufferoffset
119 end
120
121 function writebuffer(io, col::Struct, alignment)
122 @debug 1 "writebuffer: col = $(typeof(col))"
123 @debug 2 col
124 writebitmap(io, col, alignment)
125 # write values arrays
126 for child in col.data
127 writebuffer(io, child, alignment)
128 end
129 return
130 end