/*
* LZMA2 decoder
*
* Authors: Lasse Collin <lasse.collin@tukaani.org>
* Igor Pavlov <http://7-zip.org/>
*
* This file has been put into the public domain.
* You can do whatever you want with this file.
*/
#include "xz_private.h"
#include "xz_lzma2.h"
/*
* Range decoder initialization eats the first five bytes of each LZMA chunk.
*/
#define RC_INIT_BYTES 5
/*
* Minimum number of usable input buffer to safely decode one LZMA symbol.
* The worst case is that we decode 22 bits using probabilities and 26
* direct bits. This may decode at maximum of 20 bytes of input. However,
* lzma_main() does an extra normalization before returning, thus we
* need to put 21 here.
*/
#define LZMA_IN_REQUIRED 21
/*
* Dictionary (history buffer)
*
* These are always true:
* start <= pos <= full <= end
* pos <= limit <= end
*
* In multi-call mode, also these are true:
* end == size
* size <= size_max
* allocated <= size
*
* Most of these variables are size_t to support single-call mode,
* in which the dictionary variables address the actual output
* buffer directly.
*/
struct dictionary {
/* Beginning of the history buffer */
uint8_t *buf;
/* Old position in buf (before decoding more data) */
size_t start;
/* Position in buf */
size_t pos;
/*
* How full dictionary is. This is used to detect corrupt input that
* would read beyond the beginning of the uncompressed stream.
*/
size_t full;
/* Write limit; we don't write to buf[limit] or later bytes. */
size_t limit;
/*
* End of the dictionary buffer. In multi-call mode, this is
* the same as the dictionary size. In single-call mode, this
* indicates the size of the output buffer.
*/
size_t end;
/*
* Size of the dictionary as specified in Block Header. This is used
* together with "full" to detect corrupt input that would make us
* read beyond the beginning of the uncompressed stream.
*/
uint32_t size;
/*
* Maximum allowed dictionary size in multi-call mode.
* This is ignored in single-call mode.
*/
uint32_t size_max;
/*
* Amount of memory currently allocated for the dictionary.
* This is used only with XZ_DYNALLOC. (With XZ_PREALLOC,
* size_max is always the same as the allocated size.)
*/
uint32_t allocated;
/* Operation mode */
enum xz_mode mode;
};
/* Range decoder */
struct rc_dec {
uint32_t range;
uint32_t code;
/*
* Number of initializing bytes remaining to be read
* by rc_read_init().
*/
uint32_t init_bytes_left;
/*
* Buffer from which we read our input. It can be either
* temp.buf or the caller-provided input buffer.
*/
const uint8_t *in;
size_t in_pos;
size_t in_limit;
};
/* Probabilities for a length decoder. */
struct lzma_len_dec {
/* Probability of match length being at least 10 */
uint16_t choice;
/* Probability of match length being at least 18 */
uint16_t choice2;
/* Probabilities for match lengths 2-9 */
uint16_t low[POS_STATES_MAX][LEN_LOW_SYMBOLS];
/* Probabilities for match lengths 10-17 */
uint16_t mid[POS_STATES_MAX][LEN_MID_SYMBOLS];
/* Probabilities for match lengths 18-273 */
uint16_t high[LEN_HIGH_SYMBOLS];
};
struct lzma_dec {
/* Distances of latest four matches */
uint32_t rep0;
uint32_t rep1;
uint32_t rep2;
uint32_t rep3;
/* Types of the most recently seen LZMA symbols */
enum lzma_state state;
/*
* Length of a match. This is updated so that dict_repeat can
* be called again to finish repeating the whole match.
*/
uint32_t len;
/*
* LZMA properties or related bit masks (number of literal
* context bits, a mask dervied from the number of literal
* position bits, and a mask dervied from the number
* position bits)
*/
uint32_t lc;
uint32_t literal_pos_mask; /* (1 << lp) - 1 */
uint32_t pos_mask; /* (1 << pb) - 1 */
/* If 1, it's a match. Otherwise it's a single 8-bit literal. */
uint16_t is_match[STATES][POS_STATES_MAX];
/* If 1, it's a repeated match. The distance is one of rep0 .. rep3. */
uint16_t is_rep[STATES];
/*
* If 0, distance of a repeated match is rep0.
* Otherwise check is_rep1.
*/
uint16_t is_rep0[STATES];
/*
* If 0, distance of a repeated match is rep1.
* Otherwise check is_rep2.
*/
uint16_t is_rep1[STATES];
/* If 0, distance of a repeated match is rep2. Otherwise it is rep3. */
uint16_t is_rep2[STATES];
/*
* If 1, the repeated match has length of one byte. Otherwise
* the length is decoded from rep_len_decoder.
*/
uint16_t is_rep0_long[STATES][POS_STATES_MAX];
/*
* Probability tree for the highest two bits of the match
* distance. There is a separate probability tree for match
* lengths of 2 (i.e. MATCH_LEN_MIN), 3, 4, and [5, 273].
*/
uint16_t dist_slot[DIST_STATES][DIST_SLOTS];
/*
* Probility trees for additional bits for match distance
* when the distance is in the range [4, 127].
*/
uint16_t dist_special[FULL_DISTANCES - DIST_MODEL_END];
/*
* Probability tree for the lowest four bits of a match
* distance that is equal to or greater than 128.
*/
uint16_t dist_align[ALIGN_SIZE];
/* Length of a normal match */
struct lzma_len_dec match_len_dec;
/* Length of a repeated match */
struct lzma_len_dec rep_len_dec;
/* Probabilities of literals */
uint16_t literal[LITERAL_CODERS_MAX][LITERAL_CODER_SIZE];
};
struct lzma2_dec {
/* Position in xz_dec_lzma2_run(). */
enum lzma2_seq {
SEQ_CONTROL,
SEQ_UNCOMPRESSED_1,
SEQ_UNCOMPRESSED_2,
SEQ_COMPRESSED_0,
SEQ_COMPRESSED_1,
SEQ_PROPERTIES