1 // Licensed to the Apache Software Foundation (ASF) under one
2 // or more contributor license agreements. See the NOTICE file
3 // distributed with this work for additional information
4 // regarding copyright ownership. The ASF licenses this file
5 // to you under the Apache License, Version 2.0 (the
6 // "License"); you may not use this file except in compliance
7 // with the License. You may obtain a copy of the License at
9 // http://www.apache.org/licenses/LICENSE-2.0
11 // Unless required by applicable law or agreed to in writing, software
12 // distributed under the License is distributed on an "AS IS" BASIS,
13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 // See the License for the specific language governing permissions and
15 // limitations under the License.
17 // Package encryption contains the internal helpers for the parquet AES encryption/decryption handling.
19 // Testing for this is done via integration testing at the top level parquet package via attempting to
20 // read and write encrypted files with different configurations to match test files in parquet-testing
31 "github.com/apache/arrow/go/v6/parquet"
32 "golang.org/x/xerrors"
35 // important constants for handling the aes encryption
46 // Module constants for constructing the AAD bytes, the order here is
47 // important as the constants are set via iota.
49 FooterModule int8 = iota
59 type aesEncryptor struct {
61 ciphertextSizeDelta int
64 // NewAesEncryptor constructs an encryptor for the passed in cipher and whether
65 // or not it's being used to encrypt metadata.
66 func NewAesEncryptor(alg parquet.Cipher, metadata bool) *aesEncryptor {
67 ret := &aesEncryptor{}
68 ret.ciphertextSizeDelta = bufferSizeLength + NonceLength
69 if metadata || alg == parquet.AesGcm {
71 ret.ciphertextSizeDelta += GcmTagLength
79 // CiphertextSizeDelta is the number of extra bytes that are part of the encrypted data
80 // above and beyond the plaintext value.
81 func (a *aesEncryptor) CiphertextSizeDelta() int { return a.ciphertextSizeDelta }
83 // SignedFooterEncrypt writes the signature for the provided footer bytes using the given key, AAD and nonce.
84 // It returns the number of bytes that were written to w.
85 func (a *aesEncryptor) SignedFooterEncrypt(w io.Writer, footer, key, aad, nonce []byte) int {
86 if a.mode != gcmMode {
87 panic("must use AES GCM (metadata) encryptor")
90 block, err := aes.NewCipher(key)
95 aead, err := cipher.NewGCM(block)
99 if aead.NonceSize() != NonceLength {
100 panic(xerrors.Errorf("nonce size mismatch %d, %d", aead.NonceSize(), NonceLength))
102 if aead.Overhead() != GcmTagLength {
103 panic(xerrors.Errorf("tagsize mismatch %d %d", aead.Overhead(), GcmTagLength))
106 ciphertext := aead.Seal(nil, nonce, footer, aad)
107 bufferSize := uint32(len(ciphertext) + len(nonce))
108 // data is written with a prefix of the size written as a little endian 32bit int.
109 if err := binary.Write(w, binary.LittleEndian, bufferSize); err != nil {
114 return bufferSizeLength + int(bufferSize)
117 // Encrypt calculates the ciphertext for src with the given key and aad, then writes it to w.
118 // Returns the total number of bytes written.
119 func (a *aesEncryptor) Encrypt(w io.Writer, src, key, aad []byte) int {
120 block, err := aes.NewCipher(key)
125 nonce := make([]byte, NonceLength)
128 if a.mode == gcmMode {
129 aead, err := cipher.NewGCM(block)
133 if aead.NonceSize() != NonceLength {
134 panic(xerrors.Errorf("nonce size mismatch %d, %d", aead.NonceSize(), NonceLength))
136 if aead.Overhead() != GcmTagLength {
137 panic(xerrors.Errorf("tagsize mismatch %d %d", aead.Overhead(), GcmTagLength))
140 ciphertext := aead.Seal(nil, nonce, src, aad)
141 bufferSize := len(ciphertext) + len(nonce)
142 // data is written with a prefix of the size written as a little endian 32bit int.
143 if err := binary.Write(w, binary.LittleEndian, uint32(bufferSize)); err != nil {
148 return bufferSizeLength + bufferSize
151 // Parquet CTR IVs are comprised of a 12-byte nonce and a 4-byte initial
153 // The first 31 bits of the initial counter field are set to 0, the last bit
155 iv := make([]byte, ctrIVLen)
159 bufferSize := NonceLength + len(src)
160 // data is written with a prefix of the size written as a little endian 32bit int.
161 if err := binary.Write(w, binary.LittleEndian, uint32(bufferSize)); err != nil {
165 cipher.StreamWriter{S: cipher.NewCTR(block, iv), W: w}.Write(src)
166 return bufferSizeLength + bufferSize
169 type aesDecryptor struct {
171 ciphertextSizeDelta int
174 // newAesDecryptor constructs and returns a decryptor for the given cipher type and whether or
175 // not it is intended to be used for decrypting metadata.
176 func newAesDecryptor(alg parquet.Cipher, metadata bool) *aesDecryptor {
177 ret := &aesDecryptor{}
178 ret.ciphertextSizeDelta = bufferSizeLength + NonceLength
179 if metadata || alg == parquet.AesGcm {
181 ret.ciphertextSizeDelta += GcmTagLength
189 // CiphertextSizeDelta is the number of bytes in the ciphertext that will not exist in the
190 // plaintext due to be used for the decryption. The total size - the CiphertextSizeDelta is
191 // the length of the plaintext after decryption.
192 func (a *aesDecryptor) CiphertextSizeDelta() int { return a.ciphertextSizeDelta }
194 // Decrypt returns the plaintext version of the given ciphertext when decrypted
195 // with the provided key and AAD security bytes.
196 func (a *aesDecryptor) Decrypt(cipherText, key, aad []byte) []byte {
197 block, err := aes.NewCipher(key)
202 writtenCiphertextLen := binary.LittleEndian.Uint32(cipherText)
203 cipherLen := writtenCiphertextLen + bufferSizeLength
204 nonce := cipherText[bufferSizeLength : bufferSizeLength+NonceLength]
206 if a.mode == gcmMode {
207 aead, err := cipher.NewGCM(block)
212 plain, err := aead.Open(nil, nonce, cipherText[bufferSizeLength+NonceLength:cipherLen], aad)
219 // Parquet CTR IVs are comprised of a 12-byte nonce and a 4-byte initial
221 // The first 31 bits of the initial counter field are set to 0, the last bit
223 iv := make([]byte, ctrIVLen)
227 stream := cipher.NewCTR(block, iv)
228 dst := make([]byte, len(cipherText)-bufferSizeLength-NonceLength)
229 stream.XORKeyStream(dst, cipherText[bufferSizeLength+NonceLength:])
233 // CreateModuleAad creates the section AAD security bytes for the file, module, row group, column and page.
235 // This should be used for being passed to the encryptor and decryptor whenever requesting AAD bytes.
236 func CreateModuleAad(fileAad string, moduleType int8, rowGroupOrdinal, columnOrdinal, pageOrdinal int16) string {
237 buf := bytes.NewBuffer([]byte(fileAad))
238 buf.WriteByte(byte(moduleType))
240 if moduleType == FooterModule {
244 binary.Write(buf, binary.LittleEndian, rowGroupOrdinal)
245 binary.Write(buf, binary.LittleEndian, columnOrdinal)
246 if DataPageModule != moduleType && DataPageHeaderModule != moduleType {
250 binary.Write(buf, binary.LittleEndian, pageOrdinal)
254 // CreateFooterAad takes an aadPrefix and constructs the security AAD bytes for encrypting
255 // and decrypting the parquet footer bytes.
256 func CreateFooterAad(aadPrefix string) string {
257 return CreateModuleAad(aadPrefix, FooterModule, -1, -1, -1)
260 // QuickUpdatePageAad updates aad with the new page ordinal, modifying the
261 // last two bytes of aad.
262 func QuickUpdatePageAad(aad []byte, newPageOrdinal int16) {
263 binary.LittleEndian.PutUint16(aad[len(aad)-2:], uint16(newPageOrdinal))