]> git.proxmox.com Git - ceph.git/blob - ceph/src/arrow/cpp/src/parquet/encryption/crypto_factory.cc
import quincy 17.2.0
[ceph.git] / ceph / src / arrow / cpp / src / parquet / encryption / crypto_factory.cc
1 // Licensed to the Apache Software Foundation (ASF) under one
2 // or more contributor license agreements. See the NOTICE file
3 // distributed with this work for additional information
4 // regarding copyright ownership. The ASF licenses this file
5 // to you under the Apache License, Version 2.0 (the
6 // "License"); you may not use this file except in compliance
7 // with the License. You may obtain a copy of the License at
8 //
9 // http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing,
12 // software distributed under the License is distributed on an
13 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, either express or implied. See the License for the
15 // specific language governing permissions and limitations
16 // under the License.
17
18 #include "arrow/result.h"
19 #include "arrow/util/logging.h"
20 #include "arrow/util/string.h"
21 #include "arrow/util/string_view.h"
22
23 #include "parquet/encryption/crypto_factory.h"
24 #include "parquet/encryption/encryption_internal.h"
25 #include "parquet/encryption/file_key_material_store.h"
26 #include "parquet/encryption/file_key_unwrapper.h"
27 #include "parquet/encryption/key_toolkit_internal.h"
28
29 namespace parquet {
30 namespace encryption {
31
32 void CryptoFactory::RegisterKmsClientFactory(
33 std::shared_ptr<KmsClientFactory> kms_client_factory) {
34 key_toolkit_.RegisterKmsClientFactory(kms_client_factory);
35 }
36
37 std::shared_ptr<FileEncryptionProperties> CryptoFactory::GetFileEncryptionProperties(
38 const KmsConnectionConfig& kms_connection_config,
39 const EncryptionConfiguration& encryption_config) {
40 if (!encryption_config.uniform_encryption && encryption_config.column_keys.empty()) {
41 throw ParquetException("Either column_keys or uniform_encryption must be set");
42 } else if (encryption_config.uniform_encryption &&
43 !encryption_config.column_keys.empty()) {
44 throw ParquetException("Cannot set both column_keys and uniform_encryption");
45 }
46 const std::string& footer_key_id = encryption_config.footer_key;
47 const std::string& column_key_str = encryption_config.column_keys;
48
49 std::shared_ptr<FileKeyMaterialStore> key_material_store = NULL;
50 if (!encryption_config.internal_key_material) {
51 // TODO: using external key material store with Hadoop file system
52 throw ParquetException("External key material store is not supported yet.");
53 }
54
55 FileKeyWrapper key_wrapper(&key_toolkit_, kms_connection_config, key_material_store,
56 encryption_config.cache_lifetime_seconds,
57 encryption_config.double_wrapping);
58
59 int32_t dek_length_bits = encryption_config.data_key_length_bits;
60 if (!internal::ValidateKeyLength(dek_length_bits)) {
61 std::ostringstream ss;
62 ss << "Wrong data key length : " << dek_length_bits;
63 throw ParquetException(ss.str());
64 }
65
66 int dek_length = dek_length_bits / 8;
67
68 std::string footer_key(dek_length, '\0');
69 RandBytes(reinterpret_cast<uint8_t*>(&footer_key[0]),
70 static_cast<int>(footer_key.size()));
71
72 std::string footer_key_metadata =
73 key_wrapper.GetEncryptionKeyMetadata(footer_key, footer_key_id, true);
74
75 FileEncryptionProperties::Builder properties_builder =
76 FileEncryptionProperties::Builder(footer_key);
77 properties_builder.footer_key_metadata(footer_key_metadata);
78 properties_builder.algorithm(encryption_config.encryption_algorithm);
79
80 if (!encryption_config.uniform_encryption) {
81 ColumnPathToEncryptionPropertiesMap encrypted_columns =
82 GetColumnEncryptionProperties(dek_length, column_key_str, &key_wrapper);
83 properties_builder.encrypted_columns(encrypted_columns);
84
85 if (encryption_config.plaintext_footer) {
86 properties_builder.set_plaintext_footer();
87 }
88 }
89
90 return properties_builder.build();
91 }
92
93 ColumnPathToEncryptionPropertiesMap CryptoFactory::GetColumnEncryptionProperties(
94 int dek_length, const std::string& column_keys, FileKeyWrapper* key_wrapper) {
95 ColumnPathToEncryptionPropertiesMap encrypted_columns;
96
97 std::vector<::arrow::util::string_view> key_to_columns =
98 ::arrow::internal::SplitString(column_keys, ';');
99 for (size_t i = 0; i < key_to_columns.size(); ++i) {
100 std::string cur_key_to_columns =
101 ::arrow::internal::TrimString(std::string(key_to_columns[i]));
102 if (cur_key_to_columns.empty()) {
103 continue;
104 }
105
106 std::vector<::arrow::util::string_view> parts =
107 ::arrow::internal::SplitString(cur_key_to_columns, ':');
108 if (parts.size() != 2) {
109 std::ostringstream message;
110 message << "Incorrect key to columns mapping in column keys property"
111 << ": [" << cur_key_to_columns << "]";
112 throw ParquetException(message.str());
113 }
114
115 std::string column_key_id = ::arrow::internal::TrimString(std::string(parts[0]));
116 if (column_key_id.empty()) {
117 throw ParquetException("Empty key name in column keys property.");
118 }
119
120 std::string column_names_str = ::arrow::internal::TrimString(std::string(parts[1]));
121 std::vector<::arrow::util::string_view> column_names =
122 ::arrow::internal::SplitString(column_names_str, ',');
123 if (0 == column_names.size()) {
124 throw ParquetException("No columns to encrypt defined for key: " + column_key_id);
125 }
126
127 for (size_t j = 0; j < column_names.size(); ++j) {
128 std::string column_name =
129 ::arrow::internal::TrimString(std::string(column_names[j]));
130 if (column_name.empty()) {
131 std::ostringstream message;
132 message << "Empty column name in column keys property for key: " << column_key_id;
133 throw ParquetException(message.str());
134 }
135
136 if (encrypted_columns.find(column_name) != encrypted_columns.end()) {
137 throw ParquetException("Multiple keys defined for the same column: " +
138 column_name);
139 }
140
141 std::string column_key(dek_length, '\0');
142 RandBytes(reinterpret_cast<uint8_t*>(&column_key[0]),
143 static_cast<int>(column_key.size()));
144 std::string column_key_key_metadata =
145 key_wrapper->GetEncryptionKeyMetadata(column_key, column_key_id, false);
146
147 std::shared_ptr<ColumnEncryptionProperties> cmd =
148 ColumnEncryptionProperties::Builder(column_name)
149 .key(column_key)
150 ->key_metadata(column_key_key_metadata)
151 ->build();
152 encrypted_columns.insert({column_name, cmd});
153 }
154 }
155 if (encrypted_columns.empty()) {
156 throw ParquetException("No column keys configured in column keys property.");
157 }
158
159 return encrypted_columns;
160 }
161
162 std::shared_ptr<FileDecryptionProperties> CryptoFactory::GetFileDecryptionProperties(
163 const KmsConnectionConfig& kms_connection_config,
164 const DecryptionConfiguration& decryption_config) {
165 std::shared_ptr<DecryptionKeyRetriever> key_retriever(new FileKeyUnwrapper(
166 &key_toolkit_, kms_connection_config, decryption_config.cache_lifetime_seconds));
167
168 return FileDecryptionProperties::Builder()
169 .key_retriever(key_retriever)
170 ->plaintext_files_allowed()
171 ->build();
172 }
173
174 } // namespace encryption
175 } // namespace parquet