]>
Commit | Line | Data |
---|---|---|
1d09f67e TL |
1 | // Licensed to the Apache Software Foundation (ASF) under one |
2 | // or more contributor license agreements. See the NOTICE file | |
3 | // distributed with this work for additional information | |
4 | // regarding copyright ownership. The ASF licenses this file | |
5 | // to you under the Apache License, Version 2.0 (the | |
6 | // "License"); you may not use this file except in compliance | |
7 | // with the License. You may obtain a copy of the License at | |
8 | // | |
9 | // http://www.apache.org/licenses/LICENSE-2.0 | |
10 | // | |
11 | // Unless required by applicable law or agreed to in writing, software | |
12 | // distributed under the License is distributed on an "AS IS" BASIS, | |
13 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
14 | // See the License for the specific language governing permissions and | |
15 | // limitations under the License. | |
16 | ||
17 | // Package encryption contains the internal helpers for the parquet AES encryption/decryption handling. | |
18 | // | |
19 | // Testing for this is done via integration testing at the top level parquet package via attempting to | |
20 | // read and write encrypted files with different configurations to match test files in parquet-testing | |
21 | package encryption | |
22 | ||
23 | import ( | |
24 | "bytes" | |
25 | "crypto/aes" | |
26 | "crypto/cipher" | |
27 | "crypto/rand" | |
28 | "encoding/binary" | |
29 | "io" | |
30 | ||
31 | "github.com/apache/arrow/go/v6/parquet" | |
32 | "golang.org/x/xerrors" | |
33 | ) | |
34 | ||
35 | // important constants for handling the aes encryption | |
36 | const ( | |
37 | GcmTagLength = 16 | |
38 | NonceLength = 12 | |
39 | ||
40 | gcmMode = 0 | |
41 | ctrMode = 1 | |
42 | ctrIVLen = 16 | |
43 | bufferSizeLength = 4 | |
44 | ) | |
45 | ||
46 | // Module constants for constructing the AAD bytes, the order here is | |
47 | // important as the constants are set via iota. | |
48 | const ( | |
49 | FooterModule int8 = iota | |
50 | ColumnMetaModule | |
51 | DataPageModule | |
52 | DictPageModule | |
53 | DataPageHeaderModule | |
54 | DictPageHeaderModule | |
55 | ColumnIndexModule | |
56 | OffsetIndexModule | |
57 | ) | |
58 | ||
59 | type aesEncryptor struct { | |
60 | mode int | |
61 | ciphertextSizeDelta int | |
62 | } | |
63 | ||
64 | // NewAesEncryptor constructs an encryptor for the passed in cipher and whether | |
65 | // or not it's being used to encrypt metadata. | |
66 | func NewAesEncryptor(alg parquet.Cipher, metadata bool) *aesEncryptor { | |
67 | ret := &aesEncryptor{} | |
68 | ret.ciphertextSizeDelta = bufferSizeLength + NonceLength | |
69 | if metadata || alg == parquet.AesGcm { | |
70 | ret.mode = gcmMode | |
71 | ret.ciphertextSizeDelta += GcmTagLength | |
72 | } else { | |
73 | ret.mode = ctrMode | |
74 | } | |
75 | ||
76 | return ret | |
77 | } | |
78 | ||
79 | // CiphertextSizeDelta is the number of extra bytes that are part of the encrypted data | |
80 | // above and beyond the plaintext value. | |
81 | func (a *aesEncryptor) CiphertextSizeDelta() int { return a.ciphertextSizeDelta } | |
82 | ||
83 | // SignedFooterEncrypt writes the signature for the provided footer bytes using the given key, AAD and nonce. | |
84 | // It returns the number of bytes that were written to w. | |
85 | func (a *aesEncryptor) SignedFooterEncrypt(w io.Writer, footer, key, aad, nonce []byte) int { | |
86 | if a.mode != gcmMode { | |
87 | panic("must use AES GCM (metadata) encryptor") | |
88 | } | |
89 | ||
90 | block, err := aes.NewCipher(key) | |
91 | if err != nil { | |
92 | panic(err) | |
93 | } | |
94 | ||
95 | aead, err := cipher.NewGCM(block) | |
96 | if err != nil { | |
97 | panic(err) | |
98 | } | |
99 | if aead.NonceSize() != NonceLength { | |
100 | panic(xerrors.Errorf("nonce size mismatch %d, %d", aead.NonceSize(), NonceLength)) | |
101 | } | |
102 | if aead.Overhead() != GcmTagLength { | |
103 | panic(xerrors.Errorf("tagsize mismatch %d %d", aead.Overhead(), GcmTagLength)) | |
104 | } | |
105 | ||
106 | ciphertext := aead.Seal(nil, nonce, footer, aad) | |
107 | bufferSize := uint32(len(ciphertext) + len(nonce)) | |
108 | // data is written with a prefix of the size written as a little endian 32bit int. | |
109 | if err := binary.Write(w, binary.LittleEndian, bufferSize); err != nil { | |
110 | panic(err) | |
111 | } | |
112 | w.Write(nonce) | |
113 | w.Write(ciphertext) | |
114 | return bufferSizeLength + int(bufferSize) | |
115 | } | |
116 | ||
117 | // Encrypt calculates the ciphertext for src with the given key and aad, then writes it to w. | |
118 | // Returns the total number of bytes written. | |
119 | func (a *aesEncryptor) Encrypt(w io.Writer, src, key, aad []byte) int { | |
120 | block, err := aes.NewCipher(key) | |
121 | if err != nil { | |
122 | panic(err) | |
123 | } | |
124 | ||
125 | nonce := make([]byte, NonceLength) | |
126 | rand.Read(nonce) | |
127 | ||
128 | if a.mode == gcmMode { | |
129 | aead, err := cipher.NewGCM(block) | |
130 | if err != nil { | |
131 | panic(err) | |
132 | } | |
133 | if aead.NonceSize() != NonceLength { | |
134 | panic(xerrors.Errorf("nonce size mismatch %d, %d", aead.NonceSize(), NonceLength)) | |
135 | } | |
136 | if aead.Overhead() != GcmTagLength { | |
137 | panic(xerrors.Errorf("tagsize mismatch %d %d", aead.Overhead(), GcmTagLength)) | |
138 | } | |
139 | ||
140 | ciphertext := aead.Seal(nil, nonce, src, aad) | |
141 | bufferSize := len(ciphertext) + len(nonce) | |
142 | // data is written with a prefix of the size written as a little endian 32bit int. | |
143 | if err := binary.Write(w, binary.LittleEndian, uint32(bufferSize)); err != nil { | |
144 | panic(err) | |
145 | } | |
146 | w.Write(nonce) | |
147 | w.Write(ciphertext) | |
148 | return bufferSizeLength + bufferSize | |
149 | } | |
150 | ||
151 | // Parquet CTR IVs are comprised of a 12-byte nonce and a 4-byte initial | |
152 | // counter field. | |
153 | // The first 31 bits of the initial counter field are set to 0, the last bit | |
154 | // is set to 1. | |
155 | iv := make([]byte, ctrIVLen) | |
156 | copy(iv, nonce) | |
157 | iv[ctrIVLen-1] = 1 | |
158 | ||
159 | bufferSize := NonceLength + len(src) | |
160 | // data is written with a prefix of the size written as a little endian 32bit int. | |
161 | if err := binary.Write(w, binary.LittleEndian, uint32(bufferSize)); err != nil { | |
162 | panic(err) | |
163 | } | |
164 | w.Write(nonce) | |
165 | cipher.StreamWriter{S: cipher.NewCTR(block, iv), W: w}.Write(src) | |
166 | return bufferSizeLength + bufferSize | |
167 | } | |
168 | ||
169 | type aesDecryptor struct { | |
170 | mode int | |
171 | ciphertextSizeDelta int | |
172 | } | |
173 | ||
174 | // newAesDecryptor constructs and returns a decryptor for the given cipher type and whether or | |
175 | // not it is intended to be used for decrypting metadata. | |
176 | func newAesDecryptor(alg parquet.Cipher, metadata bool) *aesDecryptor { | |
177 | ret := &aesDecryptor{} | |
178 | ret.ciphertextSizeDelta = bufferSizeLength + NonceLength | |
179 | if metadata || alg == parquet.AesGcm { | |
180 | ret.mode = gcmMode | |
181 | ret.ciphertextSizeDelta += GcmTagLength | |
182 | } else { | |
183 | ret.mode = ctrMode | |
184 | } | |
185 | ||
186 | return ret | |
187 | } | |
188 | ||
189 | // CiphertextSizeDelta is the number of bytes in the ciphertext that will not exist in the | |
190 | // plaintext due to be used for the decryption. The total size - the CiphertextSizeDelta is | |
191 | // the length of the plaintext after decryption. | |
192 | func (a *aesDecryptor) CiphertextSizeDelta() int { return a.ciphertextSizeDelta } | |
193 | ||
194 | // Decrypt returns the plaintext version of the given ciphertext when decrypted | |
195 | // with the provided key and AAD security bytes. | |
196 | func (a *aesDecryptor) Decrypt(cipherText, key, aad []byte) []byte { | |
197 | block, err := aes.NewCipher(key) | |
198 | if err != nil { | |
199 | panic(err) | |
200 | } | |
201 | ||
202 | writtenCiphertextLen := binary.LittleEndian.Uint32(cipherText) | |
203 | cipherLen := writtenCiphertextLen + bufferSizeLength | |
204 | nonce := cipherText[bufferSizeLength : bufferSizeLength+NonceLength] | |
205 | ||
206 | if a.mode == gcmMode { | |
207 | aead, err := cipher.NewGCM(block) | |
208 | if err != nil { | |
209 | panic(err) | |
210 | } | |
211 | ||
212 | plain, err := aead.Open(nil, nonce, cipherText[bufferSizeLength+NonceLength:cipherLen], aad) | |
213 | if err != nil { | |
214 | panic(err) | |
215 | } | |
216 | return plain | |
217 | } | |
218 | ||
219 | // Parquet CTR IVs are comprised of a 12-byte nonce and a 4-byte initial | |
220 | // counter field. | |
221 | // The first 31 bits of the initial counter field are set to 0, the last bit | |
222 | // is set to 1. | |
223 | iv := make([]byte, ctrIVLen) | |
224 | copy(iv, nonce) | |
225 | iv[ctrIVLen-1] = 1 | |
226 | ||
227 | stream := cipher.NewCTR(block, iv) | |
228 | dst := make([]byte, len(cipherText)-bufferSizeLength-NonceLength) | |
229 | stream.XORKeyStream(dst, cipherText[bufferSizeLength+NonceLength:]) | |
230 | return dst | |
231 | } | |
232 | ||
233 | // CreateModuleAad creates the section AAD security bytes for the file, module, row group, column and page. | |
234 | // | |
235 | // This should be used for being passed to the encryptor and decryptor whenever requesting AAD bytes. | |
236 | func CreateModuleAad(fileAad string, moduleType int8, rowGroupOrdinal, columnOrdinal, pageOrdinal int16) string { | |
237 | buf := bytes.NewBuffer([]byte(fileAad)) | |
238 | buf.WriteByte(byte(moduleType)) | |
239 | ||
240 | if moduleType == FooterModule { | |
241 | return buf.String() | |
242 | } | |
243 | ||
244 | binary.Write(buf, binary.LittleEndian, rowGroupOrdinal) | |
245 | binary.Write(buf, binary.LittleEndian, columnOrdinal) | |
246 | if DataPageModule != moduleType && DataPageHeaderModule != moduleType { | |
247 | return buf.String() | |
248 | } | |
249 | ||
250 | binary.Write(buf, binary.LittleEndian, pageOrdinal) | |
251 | return buf.String() | |
252 | } | |
253 | ||
254 | // CreateFooterAad takes an aadPrefix and constructs the security AAD bytes for encrypting | |
255 | // and decrypting the parquet footer bytes. | |
256 | func CreateFooterAad(aadPrefix string) string { | |
257 | return CreateModuleAad(aadPrefix, FooterModule, -1, -1, -1) | |
258 | } | |
259 | ||
260 | // QuickUpdatePageAad updates aad with the new page ordinal, modifying the | |
261 | // last two bytes of aad. | |
262 | func QuickUpdatePageAad(aad []byte, newPageOrdinal int16) { | |
263 | binary.LittleEndian.PutUint16(aad[len(aad)-2:], uint16(newPageOrdinal)) | |
264 | } |