/*
- * Copyright (c) 2017-present, Facebook, Inc.
+ * Copyright (c) 2017-2020, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
*/
/// Zstandard educational decoder implementation
/// See https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md
-#include <stdint.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
+#include <stdint.h> // uint8_t, etc.
+#include <stdlib.h> // malloc, free, exit
+#include <stdio.h> // fprintf
+#include <string.h> // memset, memcpy
#include "zstd_decompress.h"
-/******* UTILITY MACROS AND TYPES *********************************************/
-// Max block size decompressed size is 128 KB and literal blocks can't be
-// larger than their block
-#define MAX_LITERALS_SIZE ((size_t)128 * 1024)
+/******* IMPORTANT CONSTANTS *********************************************/
+
+// Zstandard frame
+// "Magic_Number
+// 4 Bytes, little-endian format. Value : 0xFD2FB528"
+#define ZSTD_MAGIC_NUMBER 0xFD2FB528U
+
+// The size of `Block_Content` is limited by `Block_Maximum_Size`,
+#define ZSTD_BLOCK_SIZE_MAX ((size_t)128 * 1024)
+
+// literal blocks can't be larger than their block
+#define MAX_LITERALS_SIZE ZSTD_BLOCK_SIZE_MAX
+
+
+/******* UTILITY MACROS AND TYPES *********************************************/
#define MAX(a, b) ((a) > (b) ? (a) : (b))
#define MIN(a, b) ((a) < (b) ? (a) : (b))
+#if defined(ZDEC_NO_MESSAGE)
+#define MESSAGE(...)
+#else
+#define MESSAGE(...) fprintf(stderr, "" __VA_ARGS__)
+#endif
+
/// This decoder calls exit(1) when it encounters an error, however a production
/// library should propagate error codes
#define ERROR(s) \
do { \
- fprintf(stderr, "Error: %s\n", s); \
+ MESSAGE("Error: %s\n", s); \
exit(1); \
} while (0)
#define INP_SIZE() \
#define BAD_ALLOC() ERROR("Memory allocation error")
#define IMPOSSIBLE() ERROR("An impossibility has occurred")
-typedef uint8_t u8;
+typedef uint8_t u8;
typedef uint16_t u16;
typedef uint32_t u32;
typedef uint64_t u64;
-typedef int8_t i8;
+typedef int8_t i8;
typedef int16_t i16;
typedef int32_t i32;
typedef int64_t i64;
/// Free the malloc'ed parts of a decoding table
static void HUF_free_dtable(HUF_dtable *const dtable);
-
-/// Deep copy a decoding table, so that it can be used and free'd without
-/// impacting the source table.
-static void HUF_copy_dtable(HUF_dtable *const dst, const HUF_dtable *const src);
/*** END HUFFMAN PRIMITIVES ***********/
/*** FSE PRIMITIVES *******************/
/// Free the malloc'ed parts of a decoding table
static void FSE_free_dtable(FSE_dtable *const dtable);
-
-/// Deep copy a decoding table, so that it can be used and free'd without
-/// impacting the source table.
-static void FSE_copy_dtable(FSE_dtable *const dst, const FSE_dtable *const src);
/*** END FSE PRIMITIVES ***************/
/******* END IMPLEMENTATION PRIMITIVE PROTOTYPES ******************************/
size_t ZSTD_decompress(void *const dst, const size_t dst_len,
const void *const src, const size_t src_len) {
- dictionary_t* uninit_dict = create_dictionary();
+ dictionary_t* const uninit_dict = create_dictionary();
size_t const decomp_size = ZSTD_decompress_with_dict(dst, dst_len, src,
src_len, uninit_dict);
free_dictionary(uninit_dict);
/* this decoder assumes decompression of a single frame */
decode_frame(&out, &in, parsed_dict);
- return out.ptr - (u8 *)dst;
+ return (size_t)(out.ptr - (u8 *)dst);
}
/******* FRAME DECODING ******************************************************/
static void decode_frame(ostream_t *const out, istream_t *const in,
const dictionary_t *const dict) {
- const u32 magic_number = IO_read_bits(in, 32);
- // Zstandard frame
- //
- // "Magic_Number
- //
- // 4 Bytes, little-endian format. Value : 0xFD2FB528"
- if (magic_number == 0xFD2FB528U) {
+ const u32 magic_number = (u32)IO_read_bits(in, 32);
+ if (magic_number == ZSTD_MAGIC_NUMBER) {
// ZSTD frame
decode_data_frame(out, in, dict);
// 3 Reserved_bit
// 2 Content_Checksum_flag
// 1-0 Dictionary_ID_flag"
- const u8 descriptor = IO_read_bits(in, 8);
+ const u8 descriptor = (u8)IO_read_bits(in, 8);
// decode frame header descriptor into flags
const u8 frame_content_size_flag = descriptor >> 6;
//
// Bit numbers 7-3 2-0
// Field name Exponent Mantissa"
- u8 window_descriptor = IO_read_bits(in, 8);
+ u8 window_descriptor = (u8)IO_read_bits(in, 8);
u8 exponent = window_descriptor >> 3;
u8 mantissa = window_descriptor & 7;
const int bytes_array[] = {0, 1, 2, 4};
const int bytes = bytes_array[dictionary_id_flag];
- header->dictionary_id = IO_read_bits(in, bytes * 8);
+ header->dictionary_id = (u32)IO_read_bits(in, bytes * 8);
} else {
header->dictionary_id = 0;
}
}
}
-/// A dictionary acts as initializing values for the frame context before
-/// decompression, so we implement it by applying it's predetermined
-/// tables and content to the context before beginning decompression
-static void frame_context_apply_dict(frame_context_t *const ctx,
- const dictionary_t *const dict) {
- // If the content pointer is NULL then it must be an empty dict
- if (!dict || !dict->content)
- return;
-
- // If the requested dictionary_id is non-zero, the correct dictionary must
- // be present
- if (ctx->header.dictionary_id != 0 &&
- ctx->header.dictionary_id != dict->dictionary_id) {
- ERROR("Wrong dictionary provided");
- }
-
- // Copy the dict content to the context for references during sequence
- // execution
- ctx->dict_content = dict->content;
- ctx->dict_content_len = dict->content_size;
-
- // If it's a formatted dict copy the precomputed tables in so they can
- // be used in the table repeat modes
- if (dict->dictionary_id != 0) {
- // Deep copy the entropy tables so they can be freed independently of
- // the dictionary struct
- HUF_copy_dtable(&ctx->literals_dtable, &dict->literals_dtable);
- FSE_copy_dtable(&ctx->ll_dtable, &dict->ll_dtable);
- FSE_copy_dtable(&ctx->of_dtable, &dict->of_dtable);
- FSE_copy_dtable(&ctx->ml_dtable, &dict->ml_dtable);
-
- // Copy the repeated offsets
- memcpy(ctx->previous_offsets, dict->previous_offsets,
- sizeof(ctx->previous_offsets));
- }
-}
-
/// Decompress the data from a frame block by block
static void decompress_data(frame_context_t *const ctx, ostream_t *const out,
istream_t *const in) {
//
// The next 2 bits represent the Block_Type, while the remaining 21 bits
// represent the Block_Size. Format is little-endian."
- last_block = IO_read_bits(in, 1);
- const int block_type = IO_read_bits(in, 2);
+ last_block = (int)IO_read_bits(in, 1);
+ const int block_type = (int)IO_read_bits(in, 2);
const size_t block_len = IO_read_bits(in, 21);
switch (block_type) {
// types"
//
// size_format takes between 1 and 2 bits
- int block_type = IO_read_bits(in, 2);
- int size_format = IO_read_bits(in, 2);
+ int block_type = (int)IO_read_bits(in, 2);
+ int size_format = (int)IO_read_bits(in, 2);
if (block_type <= 1) {
// Raw or RLE literals block
// bits (0-1023)."
num_streams = 1;
// Fall through as it has the same size format
+ /* fallthrough */
case 1:
// "4 streams. Both Compressed_Size and Regenerated_Size use 10 bits
// (0-1023)."
// Impossible
IMPOSSIBLE();
}
- if (regenerated_size > MAX_LITERALS_SIZE ||
- compressed_size >= regenerated_size) {
+ if (regenerated_size > MAX_LITERALS_SIZE) {
CORRUPTION();
}
static const u32 SEQ_LITERAL_LENGTH_BASELINES[36] = {
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
12, 13, 14, 15, 16, 18, 20, 22, 24, 28, 32, 40,
- 48, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384, 32768, 65538};
+ 48, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384, 32768, 65536};
static const u8 SEQ_LITERAL_LENGTH_EXTRA_BITS[36] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1,
1, 1, 2, 2, 3, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
2, 2, 3, 3, 4, 4, 5, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
/// Offset decoding is simpler so we just need a maximum code value
-static const u8 SEQ_MAX_CODES[3] = {35, -1, 52};
+static const u8 SEQ_MAX_CODES[3] = {35, (u8)-1, 52};
static void decompress_sequences(frame_context_t *const ctx,
istream_t *const in,
// a single 1-bit and then fills the byte with 0-7 0 bits of padding."
const int padding = 8 - highest_set_bit(src[len - 1]);
// The offset starts at the end because FSE streams are read backwards
- i64 bit_offset = len * 8 - padding;
+ i64 bit_offset = (i64)(len * 8 - (size_t)padding);
// "The bitstream starts with initial state values, each using the required
// number of bits in their respective accuracy, decoded previously from
// get decompressed size from ZSTD frame header
{
- const u32 magic_number = IO_read_bits(&in, 32);
+ const u32 magic_number = (u32)IO_read_bits(&in, 32);
- if (magic_number == 0xFD2FB528U) {
+ if (magic_number == ZSTD_MAGIC_NUMBER) {
// ZSTD frame
frame_header_t header;
parse_frame_header(&header, &in);
if (header.frame_content_size == 0 && !header.single_segment_flag) {
// Content size not provided, we can't tell
- return -1;
+ return (size_t)-1;
}
return header.frame_content_size;
/******* END OUTPUT SIZE COUNTING *********************************************/
/******* DICTIONARY PARSING ***************************************************/
-#define DICT_SIZE_ERROR() ERROR("Dictionary size cannot be less than 8 bytes")
-#define NULL_SRC() ERROR("Tried to create dictionary with pointer to null src");
-
dictionary_t* create_dictionary() {
- dictionary_t* dict = calloc(1, sizeof(dictionary_t));
+ dictionary_t* const dict = calloc(1, sizeof(dictionary_t));
if (!dict) {
BAD_ALLOC();
}
return dict;
}
+/// Free an allocated dictionary
+void free_dictionary(dictionary_t *const dict) {
+ HUF_free_dtable(&dict->literals_dtable);
+ FSE_free_dtable(&dict->ll_dtable);
+ FSE_free_dtable(&dict->of_dtable);
+ FSE_free_dtable(&dict->ml_dtable);
+
+ free(dict->content);
+
+ memset(dict, 0, sizeof(dictionary_t));
+
+ free(dict);
+}
+
+
+#if !defined(ZDEC_NO_DICTIONARY)
+#define DICT_SIZE_ERROR() ERROR("Dictionary size cannot be less than 8 bytes")
+#define NULL_SRC() ERROR("Tried to create dictionary with pointer to null src");
+
static void init_dictionary_content(dictionary_t *const dict,
istream_t *const in);
memcpy(dict->content, content, dict->content_size);
}
-/// Free an allocated dictionary
-void free_dictionary(dictionary_t *const dict) {
- HUF_free_dtable(&dict->literals_dtable);
- FSE_free_dtable(&dict->ll_dtable);
- FSE_free_dtable(&dict->of_dtable);
- FSE_free_dtable(&dict->ml_dtable);
+static void HUF_copy_dtable(HUF_dtable *const dst,
+ const HUF_dtable *const src) {
+ if (src->max_bits == 0) {
+ memset(dst, 0, sizeof(HUF_dtable));
+ return;
+ }
- free(dict->content);
+ const size_t size = (size_t)1 << src->max_bits;
+ dst->max_bits = src->max_bits;
- memset(dict, 0, sizeof(dictionary_t));
+ dst->symbols = malloc(size);
+ dst->num_bits = malloc(size);
+ if (!dst->symbols || !dst->num_bits) {
+ BAD_ALLOC();
+ }
- free(dict);
+ memcpy(dst->symbols, src->symbols, size);
+ memcpy(dst->num_bits, src->num_bits, size);
}
+
+static void FSE_copy_dtable(FSE_dtable *const dst, const FSE_dtable *const src) {
+ if (src->accuracy_log == 0) {
+ memset(dst, 0, sizeof(FSE_dtable));
+ return;
+ }
+
+ size_t size = (size_t)1 << src->accuracy_log;
+ dst->accuracy_log = src->accuracy_log;
+
+ dst->symbols = malloc(size);
+ dst->num_bits = malloc(size);
+ dst->new_state_base = malloc(size * sizeof(u16));
+ if (!dst->symbols || !dst->num_bits || !dst->new_state_base) {
+ BAD_ALLOC();
+ }
+
+ memcpy(dst->symbols, src->symbols, size);
+ memcpy(dst->num_bits, src->num_bits, size);
+ memcpy(dst->new_state_base, src->new_state_base, size * sizeof(u16));
+}
+
+/// A dictionary acts as initializing values for the frame context before
+/// decompression, so we implement it by applying it's predetermined
+/// tables and content to the context before beginning decompression
+static void frame_context_apply_dict(frame_context_t *const ctx,
+ const dictionary_t *const dict) {
+ // If the content pointer is NULL then it must be an empty dict
+ if (!dict || !dict->content)
+ return;
+
+ // If the requested dictionary_id is non-zero, the correct dictionary must
+ // be present
+ if (ctx->header.dictionary_id != 0 &&
+ ctx->header.dictionary_id != dict->dictionary_id) {
+ ERROR("Wrong dictionary provided");
+ }
+
+ // Copy the dict content to the context for references during sequence
+ // execution
+ ctx->dict_content = dict->content;
+ ctx->dict_content_len = dict->content_size;
+
+ // If it's a formatted dict copy the precomputed tables in so they can
+ // be used in the table repeat modes
+ if (dict->dictionary_id != 0) {
+ // Deep copy the entropy tables so they can be freed independently of
+ // the dictionary struct
+ HUF_copy_dtable(&ctx->literals_dtable, &dict->literals_dtable);
+ FSE_copy_dtable(&ctx->ll_dtable, &dict->ll_dtable);
+ FSE_copy_dtable(&ctx->of_dtable, &dict->of_dtable);
+ FSE_copy_dtable(&ctx->ml_dtable, &dict->ml_dtable);
+
+ // Copy the repeated offsets
+ memcpy(ctx->previous_offsets, dict->previous_offsets,
+ sizeof(ctx->previous_offsets));
+ }
+}
+
+#else // ZDEC_NO_DICTIONARY is defined
+
+static void frame_context_apply_dict(frame_context_t *const ctx,
+ const dictionary_t *const dict) {
+ (void)ctx;
+ if (dict && dict->content) ERROR("dictionary not supported");
+}
+
+#endif
/******* END DICTIONARY PARSING ***********************************************/
/******* IO STREAM OPERATIONS *************************************************/
-#define UNALIGNED() ERROR("Attempting to operate on a non-byte aligned stream")
+
/// Reads `num` bits from a bitstream, and updates the internal offset
static inline u64 IO_read_bits(istream_t *const in, const int num_bits) {
if (num_bits > 64 || num_bits <= 0) {
INP_SIZE();
}
if (in->bit_offset != 0) {
- UNALIGNED();
+ ERROR("Attempting to operate on a non-byte aligned stream");
}
const u8 *const ptr = in->ptr;
in->ptr += len;
INP_SIZE();
}
if (in->bit_offset != 0) {
- UNALIGNED();
+ ERROR("Attempting to operate on a non-byte aligned stream");
}
in->ptr += len;
free(dtable->num_bits);
memset(dtable, 0, sizeof(HUF_dtable));
}
-
-static void HUF_copy_dtable(HUF_dtable *const dst,
- const HUF_dtable *const src) {
- if (src->max_bits == 0) {
- memset(dst, 0, sizeof(HUF_dtable));
- return;
- }
-
- const size_t size = (size_t)1 << src->max_bits;
- dst->max_bits = src->max_bits;
-
- dst->symbols = malloc(size);
- dst->num_bits = malloc(size);
- if (!dst->symbols || !dst->num_bits) {
- BAD_ALLOC();
- }
-
- memcpy(dst->symbols, src->symbols, size);
- memcpy(dst->num_bits, src->num_bits, size);
-}
/******* END HUFFMAN PRIMITIVES ***********************************************/
/******* FSE PRIMITIVES *******************************************************/
free(dtable->new_state_base);
memset(dtable, 0, sizeof(FSE_dtable));
}
-
-static void FSE_copy_dtable(FSE_dtable *const dst, const FSE_dtable *const src) {
- if (src->accuracy_log == 0) {
- memset(dst, 0, sizeof(FSE_dtable));
- return;
- }
-
- size_t size = (size_t)1 << src->accuracy_log;
- dst->accuracy_log = src->accuracy_log;
-
- dst->symbols = malloc(size);
- dst->num_bits = malloc(size);
- dst->new_state_base = malloc(size * sizeof(u16));
- if (!dst->symbols || !dst->num_bits || !dst->new_state_base) {
- BAD_ALLOC();
- }
-
- memcpy(dst->symbols, src->symbols, size);
- memcpy(dst->num_bits, src->num_bits, size);
- memcpy(dst->new_state_base, src->new_state_base, size * sizeof(u16));
-}
/******* END FSE PRIMITIVES ***************************************************/