1 // Licensed to the Apache Software Foundation (ASF) under one
2 // or more contributor license agreements. See the NOTICE file
3 // distributed with this work for additional information
4 // regarding copyright ownership. The ASF licenses this file
5 // to you under the Apache License, Version 2.0 (the
6 // "License"); you may not use this file except in compliance
7 // with the License. You may obtain a copy of the License at
9 // http://www.apache.org/licenses/LICENSE-2.0
11 // Unless required by applicable law or agreed to in writing,
12 // software distributed under the License is distributed on an
13 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, either express or implied. See the License for the
15 // specific language governing permissions and limitations
18 #include "arrow/result.h"
19 #include "arrow/util/logging.h"
20 #include "arrow/util/string.h"
21 #include "arrow/util/string_view.h"
23 #include "parquet/encryption/crypto_factory.h"
24 #include "parquet/encryption/encryption_internal.h"
25 #include "parquet/encryption/file_key_material_store.h"
26 #include "parquet/encryption/file_key_unwrapper.h"
27 #include "parquet/encryption/key_toolkit_internal.h"
30 namespace encryption
{
32 void CryptoFactory::RegisterKmsClientFactory(
33 std::shared_ptr
<KmsClientFactory
> kms_client_factory
) {
34 key_toolkit_
.RegisterKmsClientFactory(kms_client_factory
);
37 std::shared_ptr
<FileEncryptionProperties
> CryptoFactory::GetFileEncryptionProperties(
38 const KmsConnectionConfig
& kms_connection_config
,
39 const EncryptionConfiguration
& encryption_config
) {
40 if (!encryption_config
.uniform_encryption
&& encryption_config
.column_keys
.empty()) {
41 throw ParquetException("Either column_keys or uniform_encryption must be set");
42 } else if (encryption_config
.uniform_encryption
&&
43 !encryption_config
.column_keys
.empty()) {
44 throw ParquetException("Cannot set both column_keys and uniform_encryption");
46 const std::string
& footer_key_id
= encryption_config
.footer_key
;
47 const std::string
& column_key_str
= encryption_config
.column_keys
;
49 std::shared_ptr
<FileKeyMaterialStore
> key_material_store
= NULL
;
50 if (!encryption_config
.internal_key_material
) {
51 // TODO: using external key material store with Hadoop file system
52 throw ParquetException("External key material store is not supported yet.");
55 FileKeyWrapper
key_wrapper(&key_toolkit_
, kms_connection_config
, key_material_store
,
56 encryption_config
.cache_lifetime_seconds
,
57 encryption_config
.double_wrapping
);
59 int32_t dek_length_bits
= encryption_config
.data_key_length_bits
;
60 if (!internal::ValidateKeyLength(dek_length_bits
)) {
61 std::ostringstream ss
;
62 ss
<< "Wrong data key length : " << dek_length_bits
;
63 throw ParquetException(ss
.str());
66 int dek_length
= dek_length_bits
/ 8;
68 std::string
footer_key(dek_length
, '\0');
69 RandBytes(reinterpret_cast<uint8_t*>(&footer_key
[0]),
70 static_cast<int>(footer_key
.size()));
72 std::string footer_key_metadata
=
73 key_wrapper
.GetEncryptionKeyMetadata(footer_key
, footer_key_id
, true);
75 FileEncryptionProperties::Builder properties_builder
=
76 FileEncryptionProperties::Builder(footer_key
);
77 properties_builder
.footer_key_metadata(footer_key_metadata
);
78 properties_builder
.algorithm(encryption_config
.encryption_algorithm
);
80 if (!encryption_config
.uniform_encryption
) {
81 ColumnPathToEncryptionPropertiesMap encrypted_columns
=
82 GetColumnEncryptionProperties(dek_length
, column_key_str
, &key_wrapper
);
83 properties_builder
.encrypted_columns(encrypted_columns
);
85 if (encryption_config
.plaintext_footer
) {
86 properties_builder
.set_plaintext_footer();
90 return properties_builder
.build();
93 ColumnPathToEncryptionPropertiesMap
CryptoFactory::GetColumnEncryptionProperties(
94 int dek_length
, const std::string
& column_keys
, FileKeyWrapper
* key_wrapper
) {
95 ColumnPathToEncryptionPropertiesMap encrypted_columns
;
97 std::vector
<::arrow::util::string_view
> key_to_columns
=
98 ::arrow::internal::SplitString(column_keys
, ';');
99 for (size_t i
= 0; i
< key_to_columns
.size(); ++i
) {
100 std::string cur_key_to_columns
=
101 ::arrow::internal::TrimString(std::string(key_to_columns
[i
]));
102 if (cur_key_to_columns
.empty()) {
106 std::vector
<::arrow::util::string_view
> parts
=
107 ::arrow::internal::SplitString(cur_key_to_columns
, ':');
108 if (parts
.size() != 2) {
109 std::ostringstream message
;
110 message
<< "Incorrect key to columns mapping in column keys property"
111 << ": [" << cur_key_to_columns
<< "]";
112 throw ParquetException(message
.str());
115 std::string column_key_id
= ::arrow::internal::TrimString(std::string(parts
[0]));
116 if (column_key_id
.empty()) {
117 throw ParquetException("Empty key name in column keys property.");
120 std::string column_names_str
= ::arrow::internal::TrimString(std::string(parts
[1]));
121 std::vector
<::arrow::util::string_view
> column_names
=
122 ::arrow::internal::SplitString(column_names_str
, ',');
123 if (0 == column_names
.size()) {
124 throw ParquetException("No columns to encrypt defined for key: " + column_key_id
);
127 for (size_t j
= 0; j
< column_names
.size(); ++j
) {
128 std::string column_name
=
129 ::arrow::internal::TrimString(std::string(column_names
[j
]));
130 if (column_name
.empty()) {
131 std::ostringstream message
;
132 message
<< "Empty column name in column keys property for key: " << column_key_id
;
133 throw ParquetException(message
.str());
136 if (encrypted_columns
.find(column_name
) != encrypted_columns
.end()) {
137 throw ParquetException("Multiple keys defined for the same column: " +
141 std::string
column_key(dek_length
, '\0');
142 RandBytes(reinterpret_cast<uint8_t*>(&column_key
[0]),
143 static_cast<int>(column_key
.size()));
144 std::string column_key_key_metadata
=
145 key_wrapper
->GetEncryptionKeyMetadata(column_key
, column_key_id
, false);
147 std::shared_ptr
<ColumnEncryptionProperties
> cmd
=
148 ColumnEncryptionProperties::Builder(column_name
)
150 ->key_metadata(column_key_key_metadata
)
152 encrypted_columns
.insert({column_name
, cmd
});
155 if (encrypted_columns
.empty()) {
156 throw ParquetException("No column keys configured in column keys property.");
159 return encrypted_columns
;
162 std::shared_ptr
<FileDecryptionProperties
> CryptoFactory::GetFileDecryptionProperties(
163 const KmsConnectionConfig
& kms_connection_config
,
164 const DecryptionConfiguration
& decryption_config
) {
165 std::shared_ptr
<DecryptionKeyRetriever
> key_retriever(new FileKeyUnwrapper(
166 &key_toolkit_
, kms_connection_config
, decryption_config
.cache_lifetime_seconds
));
168 return FileDecryptionProperties::Builder()
169 .key_retriever(key_retriever
)
170 ->plaintext_files_allowed()
174 } // namespace encryption
175 } // namespace parquet