1 // Licensed to the Apache Software Foundation (ASF) under one
2 // or more contributor license agreements. See the NOTICE file
3 // distributed with this work for additional information
4 // regarding copyright ownership. The ASF licenses this file
5 // to you under the Apache License, Version 2.0 (the
6 // "License"); you may not use this file except in compliance
7 // with the License. You may obtain a copy of the License at
9 // http://www.apache.org/licenses/LICENSE-2.0
11 // Unless required by applicable law or agreed to in writing,
12 // software distributed under the License is distributed on an
13 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, either express or implied. See the License for the
15 // specific language governing permissions and limitations
18 #include "arrow/util/compression.h"
24 #include "arrow/result.h"
25 #include "arrow/status.h"
26 #include "arrow/util/compression_internal.h"
27 #include "arrow/util/logging.h"
34 Status
CheckSupportsCompressionLevel(Compression::type type
) {
35 if (!Codec::SupportsCompressionLevel(type
)) {
36 return Status::Invalid(
37 "The specified codec does not support the compression level parameter");
44 int Codec::UseDefaultCompressionLevel() { return kUseDefaultCompressionLevel
; }
46 Status
Codec::Init() { return Status::OK(); }
48 const std::string
& Codec::GetCodecAsString(Compression::type t
) {
49 static const std::string uncompressed
= "uncompressed", snappy
= "snappy",
50 gzip
= "gzip", lzo
= "lzo", brotli
= "brotli",
51 lz4_raw
= "lz4_raw", lz4
= "lz4", lz4_hadoop
= "lz4_hadoop",
52 zstd
= "zstd", bz2
= "bz2", unknown
= "unknown";
55 case Compression::UNCOMPRESSED
:
57 case Compression::SNAPPY
:
59 case Compression::GZIP
:
61 case Compression::LZO
:
63 case Compression::BROTLI
:
65 case Compression::LZ4
:
67 case Compression::LZ4_FRAME
:
69 case Compression::LZ4_HADOOP
:
71 case Compression::ZSTD
:
73 case Compression::BZ2
:
80 Result
<Compression::type
> Codec::GetCompressionType(const std::string
& name
) {
81 if (name
== "uncompressed") {
82 return Compression::UNCOMPRESSED
;
83 } else if (name
== "gzip") {
84 return Compression::GZIP
;
85 } else if (name
== "snappy") {
86 return Compression::SNAPPY
;
87 } else if (name
== "lzo") {
88 return Compression::LZO
;
89 } else if (name
== "brotli") {
90 return Compression::BROTLI
;
91 } else if (name
== "lz4_raw") {
92 return Compression::LZ4
;
93 } else if (name
== "lz4") {
94 return Compression::LZ4_FRAME
;
95 } else if (name
== "lz4_hadoop") {
96 return Compression::LZ4_HADOOP
;
97 } else if (name
== "zstd") {
98 return Compression::ZSTD
;
99 } else if (name
== "bz2") {
100 return Compression::BZ2
;
102 return Status::Invalid("Unrecognized compression type: ", name
);
106 bool Codec::SupportsCompressionLevel(Compression::type codec
) {
108 case Compression::GZIP
:
109 case Compression::BROTLI
:
110 case Compression::ZSTD
:
111 case Compression::BZ2
:
118 Result
<int> Codec::MaximumCompressionLevel(Compression::type codec_type
) {
119 RETURN_NOT_OK(CheckSupportsCompressionLevel(codec_type
));
120 ARROW_ASSIGN_OR_RAISE(auto codec
, Codec::Create(codec_type
));
121 return codec
->maximum_compression_level();
124 Result
<int> Codec::MinimumCompressionLevel(Compression::type codec_type
) {
125 RETURN_NOT_OK(CheckSupportsCompressionLevel(codec_type
));
126 ARROW_ASSIGN_OR_RAISE(auto codec
, Codec::Create(codec_type
));
127 return codec
->minimum_compression_level();
130 Result
<int> Codec::DefaultCompressionLevel(Compression::type codec_type
) {
131 RETURN_NOT_OK(CheckSupportsCompressionLevel(codec_type
));
132 ARROW_ASSIGN_OR_RAISE(auto codec
, Codec::Create(codec_type
));
133 return codec
->default_compression_level();
136 Result
<std::unique_ptr
<Codec
>> Codec::Create(Compression::type codec_type
,
137 int compression_level
) {
138 if (!IsAvailable(codec_type
)) {
139 if (codec_type
== Compression::LZO
) {
140 return Status::NotImplemented("LZO codec not implemented");
143 auto name
= GetCodecAsString(codec_type
);
144 if (name
== "unknown") {
145 return Status::Invalid("Unrecognized codec");
148 return Status::NotImplemented("Support for codec '", GetCodecAsString(codec_type
),
152 if (compression_level
!= kUseDefaultCompressionLevel
&&
153 !SupportsCompressionLevel(codec_type
)) {
154 return Status::Invalid("Codec '", GetCodecAsString(codec_type
),
155 "' doesn't support setting a compression level.");
158 std::unique_ptr
<Codec
> codec
;
159 switch (codec_type
) {
160 case Compression::UNCOMPRESSED
:
162 case Compression::SNAPPY
:
163 #ifdef ARROW_WITH_SNAPPY
164 codec
= internal::MakeSnappyCodec();
167 case Compression::GZIP
:
168 #ifdef ARROW_WITH_ZLIB
169 codec
= internal::MakeGZipCodec(compression_level
);
172 case Compression::BROTLI
:
173 #ifdef ARROW_WITH_BROTLI
174 codec
= internal::MakeBrotliCodec(compression_level
);
177 case Compression::LZ4
:
178 #ifdef ARROW_WITH_LZ4
179 codec
= internal::MakeLz4RawCodec();
182 case Compression::LZ4_FRAME
:
183 #ifdef ARROW_WITH_LZ4
184 codec
= internal::MakeLz4FrameCodec();
187 case Compression::LZ4_HADOOP
:
188 #ifdef ARROW_WITH_LZ4
189 codec
= internal::MakeLz4HadoopRawCodec();
192 case Compression::ZSTD
:
193 #ifdef ARROW_WITH_ZSTD
194 codec
= internal::MakeZSTDCodec(compression_level
);
197 case Compression::BZ2
:
198 #ifdef ARROW_WITH_BZ2
199 codec
= internal::MakeBZ2Codec(compression_level
);
206 DCHECK_NE(codec
, nullptr);
207 RETURN_NOT_OK(codec
->Init());
208 return std::move(codec
);
211 bool Codec::IsAvailable(Compression::type codec_type
) {
212 switch (codec_type
) {
213 case Compression::UNCOMPRESSED
:
215 case Compression::SNAPPY
:
216 #ifdef ARROW_WITH_SNAPPY
221 case Compression::GZIP
:
222 #ifdef ARROW_WITH_ZLIB
227 case Compression::LZO
:
229 case Compression::BROTLI
:
230 #ifdef ARROW_WITH_BROTLI
235 case Compression::LZ4
:
236 case Compression::LZ4_FRAME
:
237 case Compression::LZ4_HADOOP
:
238 #ifdef ARROW_WITH_LZ4
243 case Compression::ZSTD
:
244 #ifdef ARROW_WITH_ZSTD
249 case Compression::BZ2
:
250 #ifdef ARROW_WITH_BZ2