You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
306 lines
12 KiB
306 lines
12 KiB
// Copyright 2017 The Chromium OS Authors. All rights reserved.
|
|
// Use of this source code is governed by a BSD-style license that can be
|
|
// found in the LICENSE file.
|
|
|
|
#ifndef SRC_HUFFMAN_TABLE_H_
|
|
#define SRC_HUFFMAN_TABLE_H_
|
|
|
|
#include <cstddef>
|
|
#include <cstdint>
|
|
#include <string>
|
|
#include <vector>
|
|
|
|
#include "puffin/src/bit_reader.h"
|
|
#include "puffin/src/bit_writer.h"
|
|
#include "puffin/src/include/puffin/common.h"
|
|
#include "puffin/src/logging.h"
|
|
|
|
namespace puffin {
|
|
|
|
// Maximum Huffman code length based on RFC1951.
|
|
constexpr size_t kMaxHuffmanBits = 15;
|
|
|
|
// Permutations of input Huffman code lengths (used only to read
|
|
// |dynamic_code_lens_|).
|
|
extern const uint8_t kPermutations[];
|
|
|
|
// The bases of each alphabet which is added to the integer value of extra
|
|
// bits that comes after the Huffman code in the input to create the given
|
|
// length value. The last element is a guard.
|
|
extern const uint16_t kLengthBases[];
|
|
|
|
// Number of extra bits that comes after the associating Huffman code.
|
|
extern const uint8_t kLengthExtraBits[];
|
|
|
|
// same as |kLengthBases| except for the the distances instead of lengths.
|
|
// The last element is a guard.
|
|
extern const uint16_t kDistanceBases[];
|
|
|
|
// Same as |kLengthExtraBits| except for distances instead of lengths.
|
|
extern const uint8_t kDistanceExtraBits[];
|
|
|
|
class HuffmanTable {
|
|
public:
|
|
HuffmanTable();
|
|
virtual ~HuffmanTable() = default;
|
|
|
|
// Checks the lengths of Huffman length arrays for correctness
|
|
//
|
|
// |num_lit_len| IN The number of literal/lengths code lengths
|
|
// |num_distance| IN The number of distance code lengths
|
|
// |num_codes| IN The number of code lengths for reading Huffman table.
|
|
inline bool CheckHuffmanArrayLengths(size_t num_lit_len,
|
|
size_t num_distance,
|
|
size_t num_codes) {
|
|
if (num_lit_len > 286 || num_distance > 30 || num_codes > 19) {
|
|
LOG(ERROR) << "The lengths of the dynamic Huffman table are invalid: "
|
|
<< "num_lit_len(" << num_lit_len << ") "
|
|
<< "num_distance(" << num_distance << ") "
|
|
<< "num_codes(" << num_codes << ")";
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
// Returns the maximum number of bits used in the current literal/length
|
|
// Huffman codes.
|
|
inline size_t LitLenMaxBits() { return lit_len_max_bits_; }
|
|
|
|
// Returns the maximum number of bits used in the current distance Huffman
|
|
// codes.
|
|
inline size_t DistanceMaxBits() { return distance_max_bits_; }
|
|
|
|
// Returns the alphabet associated with the set of input bits for the code
|
|
// length array.
|
|
//
|
|
// |bits| IN The input Huffman bits read from the deflate stream.
|
|
// |alphabet| OUT The alphabet associated with the given |bits|.
|
|
// |nbits| OUT The number of bits in the Huffman code of alphabet.
|
|
// Returns true if there is an alphabet associated with |bits|.
|
|
inline bool CodeAlphabet(uint32_t bits, uint16_t* alphabet, size_t* nbits) {
|
|
auto hc = code_hcodes_[bits];
|
|
TEST_AND_RETURN_FALSE(hc & 0x8000);
|
|
*alphabet = hc & 0x7FFF;
|
|
*nbits = code_lens_[*alphabet];
|
|
return true;
|
|
}
|
|
|
|
// Returns the alphabet associated with the set of input bits for the
|
|
// literal/length code length array.
|
|
//
|
|
// |bits| IN The input Huffman bits read from the deflate stream.
|
|
// |alphabet| OUT The alphabet associated with the given |bits|.
|
|
// |nbits| OUT The number of bits in the Huffman code of the |alphabet|.
|
|
// Returns true if there is an alphabet associated with |bits|.
|
|
inline bool LitLenAlphabet(uint32_t bits, uint16_t* alphabet, size_t* nbits) {
|
|
auto hc = lit_len_hcodes_[bits];
|
|
TEST_AND_RETURN_FALSE(hc & 0x8000);
|
|
*alphabet = hc & 0x7FFF;
|
|
*nbits = lit_len_lens_[*alphabet];
|
|
return true;
|
|
}
|
|
|
|
// Returns the alphabet associated with the set of input bits for the
|
|
// distance code length array.
|
|
//
|
|
// |bits| IN The input Huffman bits read from the deflate stream.
|
|
// |alphabet| OUT The alphabet associated with the given |bits|.
|
|
// |nbits| OUT The number of bits in the Huffman code of the |alphabet|.
|
|
// Returns true if there is an alphabet associated with |bits|.
|
|
inline bool DistanceAlphabet(uint32_t bits,
|
|
uint16_t* alphabet,
|
|
size_t* nbits) {
|
|
auto hc = distance_hcodes_[bits];
|
|
TEST_AND_RETURN_FALSE(hc & 0x8000);
|
|
*alphabet = hc & 0x7FFF;
|
|
*nbits = distance_lens_[*alphabet];
|
|
return true;
|
|
}
|
|
|
|
// Returns the Huffman code of a give alphabet for Huffman table codes.
|
|
//
|
|
// |alphabet| IN The alphabet.
|
|
// |huffman| OUT The Huffman code for |alphabet|.
|
|
// |nbits| OUT The maximum number of bits in the Huffman code of the
|
|
// |alphabet|.
|
|
inline bool CodeHuffman(uint16_t alphabet, uint16_t* huffman, size_t* nbits) {
|
|
TEST_AND_RETURN_FALSE(alphabet < code_lens_.size());
|
|
*huffman = code_rcodes_[alphabet];
|
|
*nbits = code_lens_[alphabet];
|
|
return true;
|
|
}
|
|
|
|
// Returns the Huffman code of a give alphabet for literal/length codes.
|
|
//
|
|
// |alphabet| IN The alphabet.
|
|
// |huffman| OUT The Huffman code for |alphabet|.
|
|
// |nbits| OUT The maximum number of bits in the Huffman code of the
|
|
// |alphabet|.
|
|
inline bool LitLenHuffman(uint16_t alphabet,
|
|
uint16_t* huffman,
|
|
size_t* nbits) {
|
|
TEST_AND_RETURN_FALSE(alphabet < lit_len_lens_.size());
|
|
*huffman = lit_len_rcodes_[alphabet];
|
|
*nbits = lit_len_lens_[alphabet];
|
|
return true;
|
|
}
|
|
|
|
inline bool EndOfBlockBitLength(size_t* nbits) {
|
|
TEST_AND_RETURN_FALSE(256 < lit_len_lens_.size());
|
|
*nbits = lit_len_lens_[256];
|
|
return true;
|
|
}
|
|
|
|
// Returns the Huffman code of a give alphabet for distance codes.
|
|
//
|
|
// |alphabet| IN The alphabet.
|
|
// |huffman| OUT The Huffman code for |alphabet|.
|
|
// |nbits| OUT The maximum number of bits in the Huffman code of the
|
|
// |alphabet|.
|
|
inline bool DistanceHuffman(uint16_t alphabet,
|
|
uint16_t* huffman,
|
|
size_t* nbits) {
|
|
TEST_AND_RETURN_FALSE(alphabet < distance_lens_.size());
|
|
*huffman = distance_rcodes_[alphabet];
|
|
*nbits = distance_lens_[alphabet];
|
|
return true;
|
|
}
|
|
|
|
// This populates the object with fixed huffman table parameters.
|
|
// TODO(ahassani): Revamp the use of this function to be initiliazed once in
|
|
// the lifetime of the program and only one instance needed.
|
|
bool BuildFixedHuffmanTable();
|
|
|
|
// This functions first reads the Huffman code length arrays from the input
|
|
// deflate stream, then builds both literal/length and distance Huffman
|
|
// code arrays. It also writes the Huffman table into the puffed stream.
|
|
//
|
|
// |br| IN The |BitReaderInterface| reading the deflate stream.
|
|
// |buffer| OUT The object to write the Huffman table.
|
|
// |length| IN/OUT The length available in the |buffer| and in return it
|
|
// will be the length of Huffman table data written into
|
|
// the |buffer|.
|
|
bool BuildDynamicHuffmanTable(BitReaderInterface* br,
|
|
uint8_t* buffer,
|
|
size_t* length);
|
|
|
|
// This functions first reads the Huffman code length arrays from the input
|
|
// puffed |buffer|, then builds both literal/length and distance Huffman code
|
|
// arrays. It also writes the coded Huffman table arrays into the deflate
|
|
// stream.
|
|
//
|
|
// |buffer| IN The array to read the Huffman table from.
|
|
// |length| IN The length available in the |buffer|.
|
|
// |bw| IN/OUT The |BitWriterInterface| for writing into the deflate
|
|
// stream.
|
|
bool BuildDynamicHuffmanTable(const uint8_t* buffer,
|
|
size_t length,
|
|
BitWriterInterface* bw);
|
|
|
|
protected:
|
|
// Initializes the Huffman codes from an array of lengths.
|
|
//
|
|
// |lens| IN The input array of code lengths.
|
|
// |max_bits| OUT The maximum number of bits used for the Huffman codes.
|
|
bool InitHuffmanCodes(const Buffer& lens, size_t* max_bits);
|
|
|
|
// Creates the Huffman code to alphabet array.
|
|
// |lens| IN The input array of code lengths.
|
|
// |hcodes| OUT The Huffman to alphabet array.
|
|
// |max_bits| OUT The maximum number of bits used for the Huffman codes.
|
|
bool BuildHuffmanCodes(const Buffer& lens,
|
|
std::vector<uint16_t>* hcodes,
|
|
size_t* max_bits);
|
|
|
|
// Creates the alphabet to Huffman code array.
|
|
// |lens| IN The input array of code lengths.
|
|
// |rcodes| OUT The Huffman to Huffman array.
|
|
// |max_bits| OUT The maximum number of bits used for the Huffman codes.
|
|
bool BuildHuffmanReverseCodes(const Buffer& lens,
|
|
std::vector<uint16_t>* rcodes,
|
|
size_t* max_bits);
|
|
|
|
// Reads a specific Huffman code length array from input. At the same time
|
|
// writes the array into the puffed stream. The Huffman code length array is
|
|
// either the literal/lengths or distance codes.
|
|
//
|
|
// |br| IN The |BitReaderInterface| for reading the deflate
|
|
// stream.
|
|
// |buffer| OUT The array to write the Huffman table.
|
|
// |length| IN/OUT The length available in the |buffer| and in return it
|
|
// will be the length of data written into the |buffer|.
|
|
// |max_bits| IN The maximum number of bits in the Huffman codes.
|
|
// |num_codes| IN The size of the Huffman code length array in the input.
|
|
// |lens| OUT The resulting Huffman code length array.
|
|
bool BuildHuffmanCodeLengths(BitReaderInterface* br,
|
|
uint8_t* buffer,
|
|
size_t* length,
|
|
size_t max_bits,
|
|
size_t num_codes,
|
|
Buffer* lens);
|
|
|
|
// Similar to |BuildHuffmanCodeLengths| but for reading from puffed buffer and
|
|
// writing into deflate stream.
|
|
//
|
|
// |buffer| IN The array to read the Huffman table from.
|
|
// |length| IN The length available in the |buffer|.
|
|
// |bw| IN/OUT The |BitWriterInterface| for writing into the deflate
|
|
// stream.
|
|
// |num_codes| IN Number of Huffman code lengths to read from the
|
|
// |buffer|.
|
|
// |lens| OUT The Huffman code lengths array.
|
|
bool BuildHuffmanCodeLengths(const uint8_t* buffer,
|
|
size_t* length,
|
|
BitWriterInterface* bw,
|
|
size_t num_codes,
|
|
Buffer* lens);
|
|
|
|
private:
|
|
// A utility struct used to create Huffman codes.
|
|
struct CodeIndexPair {
|
|
uint16_t code; // The Huffman code
|
|
uint16_t index; // The alphabet
|
|
};
|
|
// A vector with maximum size of 286 that is only uses as temporary space for
|
|
// building Huffman codes.
|
|
std::vector<CodeIndexPair> codeindexpairs_;
|
|
|
|
// Used in building Huffman codes for literals/lengths and distances.
|
|
std::vector<uint8_t> lit_len_lens_;
|
|
std::vector<uint16_t> lit_len_hcodes_;
|
|
std::vector<uint16_t> lit_len_rcodes_;
|
|
size_t lit_len_max_bits_;
|
|
std::vector<uint8_t> distance_lens_;
|
|
std::vector<uint16_t> distance_hcodes_;
|
|
std::vector<uint16_t> distance_rcodes_;
|
|
size_t distance_max_bits_;
|
|
|
|
// The reason for keeping a temporary buffer here is to avoid reallocing each
|
|
// time.
|
|
std::vector<uint8_t> tmp_lens_;
|
|
|
|
// Used in building Huffman codes for reading and decoding literal/length and
|
|
// distance Huffman code length arrays.
|
|
std::vector<uint8_t> code_lens_;
|
|
std::vector<uint16_t> code_hcodes_;
|
|
std::vector<uint16_t> code_rcodes_;
|
|
size_t code_max_bits_;
|
|
|
|
bool initialized_;
|
|
|
|
DISALLOW_COPY_AND_ASSIGN(HuffmanTable);
|
|
};
|
|
|
|
// The type of a block in a deflate stream.
|
|
enum class BlockType : uint8_t {
|
|
kUncompressed = 0x00,
|
|
kFixed = 0x01,
|
|
kDynamic = 0x02,
|
|
};
|
|
|
|
std::string BlockTypeToString(BlockType type);
|
|
|
|
} // namespace puffin
|
|
|
|
#endif // SRC_HUFFMAN_TABLE_H_
|