]> git.proxmox.com Git - ceph.git/blobdiff - ceph/src/arrow/julia/Arrow/src/arraytypes/compressed.jl
import quincy 17.2.0
[ceph.git] / ceph / src / arrow / julia / Arrow / src / arraytypes / compressed.jl
diff --git a/ceph/src/arrow/julia/Arrow/src/arraytypes/compressed.jl b/ceph/src/arrow/julia/Arrow/src/arraytypes/compressed.jl
new file mode 100644 (file)
index 0000000..5f8e67e
--- /dev/null
@@ -0,0 +1,90 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+struct CompressedBuffer
+    data::Vector{UInt8}
+    uncompressedlength::Int64
+end
+
+"""
+    Arrow.Compressed
+
+Represents the compressed version of an [`ArrowVector`](@ref).
+Holds a reference to the original column. May have `Compressed`
+children for nested array types.
+"""
+struct Compressed{Z, A}
+    data::A
+    buffers::Vector{CompressedBuffer}
+    len::Int64
+    nullcount::Int64
+    children::Vector{Compressed}
+end
+
+Base.length(c::Compressed) = c.len
+Base.eltype(c::Compressed{Z, A}) where {Z, A} = eltype(A)
+getmetadata(x::Compressed) = getmetadata(x.data)
+compressiontype(c::Compressed{Z}) where {Z} = Z
+
+function compress(Z::Meta.CompressionType, comp, x::Array)
+    GC.@preserve x begin
+        y = unsafe_wrap(Array, convert(Ptr{UInt8}, pointer(x)), sizeof(x))
+        return CompressedBuffer(transcode(comp, y), length(y))
+    end
+end
+
+compress(Z::Meta.CompressionType, comp, x) = compress(Z, comp, convert(Array, x))
+
+compress(Z::Meta.CompressionType, comp, v::ValidityBitmap) =
+    v.nc == 0 ? CompressedBuffer(UInt8[], 0) : compress(Z, comp, view(v.bytes, v.pos:(v.pos + cld(v.ℓ, 8) - 1)))
+
+function makenodesbuffers!(col::Compressed, fieldnodes, fieldbuffers, bufferoffset, alignment)
+    push!(fieldnodes, FieldNode(col.len, col.nullcount))
+    @debug 1 "made field node: nodeidx = $(length(fieldnodes)), col = $(typeof(col)), len = $(fieldnodes[end].length), nc = $(fieldnodes[end].null_count)"
+    for buffer in col.buffers
+        blen = length(buffer.data) == 0 ? 0 : 8 + length(buffer.data)
+        push!(fieldbuffers, Buffer(bufferoffset, blen))
+        @debug 1 "made field buffer: bufferidx = $(length(fieldbuffers)), offset = $(fieldbuffers[end].offset), len = $(fieldbuffers[end].length), padded = $(padding(fieldbuffers[end].length, alignment))"
+        bufferoffset += padding(blen, alignment)
+    end
+    for child in col.children
+        bufferoffset = makenodesbuffers!(child, fieldnodes, fieldbuffers, bufferoffset, alignment)
+    end
+    return bufferoffset
+end
+
+function writearray(io, b::CompressedBuffer)
+    if length(b.data) > 0
+        n = Base.write(io, b.uncompressedlength)
+        @debug 1 "writing compressed buffer: uncompressedlength = $(b.uncompressedlength), n = $(length(b.data))"
+        @debug 2 b.data
+        return n + Base.write(io, b.data)
+    end
+    return 0
+end
+
+function writebuffer(io, col::Compressed, alignment)
+    @debug 1 "writebuffer: col = $(typeof(col))"
+    @debug 2 col
+    for buffer in col.buffers
+        n = writearray(io, buffer)
+        writezeros(io, paddinglength(n, alignment))
+    end
+    for child in col.children
+        writebuffer(io, child, alignment)
+    end
+    return
+end