diff options
Diffstat (limited to 'lib/Support/Compressor.cpp')
-rw-r--r-- | lib/Support/Compressor.cpp | 489 |
1 files changed, 489 insertions, 0 deletions
diff --git a/lib/Support/Compressor.cpp b/lib/Support/Compressor.cpp new file mode 100644 index 0000000000..1233cf4f0a --- /dev/null +++ b/lib/Support/Compressor.cpp @@ -0,0 +1,489 @@ +//===- lib/Support/Compressor.cpp -------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Reid Spencer and is distributed under the +// University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the llvm::Compressor class, an abstraction for memory +// block compression. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Config/config.h" +#include "llvm/Support/Compressor.h" +#include "llvm/ADT/StringExtras.h" +#include <cassert> +#include <string> +#include <ostream> +#include "bzip2/bzlib.h" +using namespace llvm; + +enum CompressionTypes { + COMP_TYPE_NONE = '0', + COMP_TYPE_BZIP2 = '2', +}; + +static int getdata(char*& buffer, size_t &size, + llvm::Compressor::OutputDataCallback* cb, void* context) { + buffer = 0; + size = 0; + int result = (*cb)(buffer, size, context); + assert(buffer != 0 && "Invalid result from Compressor callback"); + assert(size != 0 && "Invalid result from Compressor callback"); + return result; +} + +static int getdata_uns(char*& buffer, unsigned &size, + llvm::Compressor::OutputDataCallback* cb, void* context) +{ + size_t SizeOut; + int Res = getdata(buffer, SizeOut, cb, context); + size = SizeOut; + return Res; +} + +//===----------------------------------------------------------------------===// +//=== NULLCOMP - a compression like set of routines that just copies data +//=== without doing any compression. This is provided so that if the +//=== configured environment doesn't have a compression library the +//=== program can still work, albeit using more data/memory. +//===----------------------------------------------------------------------===// + +struct NULLCOMP_stream { + // User provided fields + char* next_in; + size_t avail_in; + char* next_out; + size_t avail_out; + + // Information fields + size_t output_count; // Total count of output bytes +}; + +static void NULLCOMP_init(NULLCOMP_stream* s) { + s->output_count = 0; +} + +static bool NULLCOMP_compress(NULLCOMP_stream* s) { + assert(s && "Invalid NULLCOMP_stream"); + assert(s->next_in != 0); + assert(s->next_out != 0); + assert(s->avail_in >= 1); + assert(s->avail_out >= 1); + + if (s->avail_out >= s->avail_in) { + ::memcpy(s->next_out, s->next_in, s->avail_in); + s->output_count += s->avail_in; + s->avail_out -= s->avail_in; + s->next_in += s->avail_in; + s->avail_in = 0; + return true; + } else { + ::memcpy(s->next_out, s->next_in, s->avail_out); + s->output_count += s->avail_out; + s->avail_in -= s->avail_out; + s->next_in += s->avail_out; + s->avail_out = 0; + return false; + } +} + +static bool NULLCOMP_decompress(NULLCOMP_stream* s) { + assert(s && "Invalid NULLCOMP_stream"); + assert(s->next_in != 0); + assert(s->next_out != 0); + assert(s->avail_in >= 1); + assert(s->avail_out >= 1); + + if (s->avail_out >= s->avail_in) { + ::memcpy(s->next_out, s->next_in, s->avail_in); + s->output_count += s->avail_in; + s->avail_out -= s->avail_in; + s->next_in += s->avail_in; + s->avail_in = 0; + return true; + } else { + ::memcpy(s->next_out, s->next_in, s->avail_out); + s->output_count += s->avail_out; + s->avail_in -= s->avail_out; + s->next_in += s->avail_out; + s->avail_out = 0; + return false; + } +} + +static void NULLCOMP_end(NULLCOMP_stream* strm) { +} + +namespace { + +/// This structure is only used when a bytecode file is compressed. +/// As bytecode is being decompressed, the memory buffer might need +/// to be reallocated. The buffer allocation is handled in a callback +/// and this structure is needed to retain information across calls +/// to the callback. +/// @brief An internal buffer object used for handling decompression +struct BufferContext { + char* buff; + size_t size; + BufferContext(size_t compressedSize) { + // Null to indicate malloc of a new block + buff = 0; + + // Compute the initial length of the uncompression buffer. Note that this + // is twice the length of the compressed buffer and will be doubled again + // in the callback for an initial allocation of 4x compressedSize. This + // calculation is based on the typical compression ratio of bzip2 on LLVM + // bytecode files which typically ranges in the 50%-75% range. Since we + // typically get at least 50%, doubling is insufficient. By using a 4x + // multiplier on the first allocation, we minimize the impact of having to + // copy the buffer on reallocation. + size = compressedSize*2; + } + + /// trimTo - Reduce the size of the buffer down to the specified amount. This + /// is useful after have read in the bytecode file to discard extra unused + /// memory. + /// + void trimTo(size_t NewSize) { + buff = (char*)::realloc(buff, NewSize); + size = NewSize; + } + + /// This function handles allocation of the buffer used for decompression of + /// compressed bytecode files. It is called by Compressor::decompress which is + /// called by BytecodeReader::ParseBytecode. + static size_t callback(char*&buff, size_t &sz, void* ctxt){ + // Case the context variable to our BufferContext + BufferContext* bc = reinterpret_cast<BufferContext*>(ctxt); + + // Compute the new, doubled, size of the block + size_t new_size = bc->size * 2; + + // Extend or allocate the block (realloc(0,n) == malloc(n)) + char* new_buff = (char*) ::realloc(bc->buff, new_size); + + // Figure out what to return to the Compressor. If this is the first call, + // then bc->buff will be null. In this case we want to return the entire + // buffer because there was no previous allocation. Otherwise, when the + // buffer is reallocated, we save the new base pointer in the + // BufferContext.buff field but return the address of only the extension, + // mid-way through the buffer (since its size was doubled). Furthermore, + // the sz result must be 1/2 the total size of the buffer. + if (bc->buff == 0 ) { + buff = bc->buff = new_buff; + sz = new_size; + } else { + bc->buff = new_buff; + buff = new_buff + bc->size; + sz = bc->size; + } + + // Retain the size of the allocated block + bc->size = new_size; + + // Make sure we fail (return 1) if we didn't get any memory. + return (bc->buff == 0 ? 1 : 0); + } +}; + +} // end anonymous namespace + + +namespace { + +// This structure retains the context when compressing the bytecode file. The +// WriteCompressedData function below uses it to keep track of the previously +// filled chunk of memory (which it writes) and how many bytes have been +// written. +struct WriterContext { + // Initialize the context + WriterContext(std::ostream*OS, size_t CS) + : chunk(0), sz(0), written(0), compSize(CS), Out(OS) {} + + // Make sure we clean up memory + ~WriterContext() { + if (chunk) + delete [] chunk; + } + + // Write the chunk + void write(size_t size = 0) { + size_t write_size = (size == 0 ? sz : size); + Out->write(chunk,write_size); + written += write_size; + delete [] chunk; + chunk = 0; + sz = 0; + } + + // This function is a callback used by the Compressor::compress function to + // allocate memory for the compression buffer. This function fulfills that + // responsibility but also writes the previous (now filled) buffer out to the + // stream. + static size_t callback(char*& buffer, size_t &size, void* context) { + // Cast the context to the structure it must point to. + WriterContext* ctxt = reinterpret_cast<WriterContext*>(context); + + // If there's a previously allocated chunk, it must now be filled with + // compressed data, so we write it out and deallocate it. + if (ctxt->chunk != 0 && ctxt->sz > 0 ) { + ctxt->write(); + } + + // Compute the size of the next chunk to allocate. We attempt to allocate + // enough memory to handle the compression in a single memory allocation. In + // general, the worst we do on compression of bytecode is about 50% so we + // conservatively estimate compSize / 2 as the size needed for the + // compression buffer. compSize is the size of the compressed data, provided + // by WriteBytecodeToFile. + size = ctxt->sz = ctxt->compSize / 2; + + // Allocate the chunks + buffer = ctxt->chunk = new char [size]; + + // We must return 1 if the allocation failed so that the Compressor knows + // not to use the buffer pointer. + return (ctxt->chunk == 0 ? 1 : 0); + } + + char* chunk; // pointer to the chunk of memory filled by compression + size_t sz; // size of chunk + size_t written; // aggregate total of bytes written in all chunks + size_t compSize; // size of the uncompressed buffer + std::ostream* Out; // The stream we write the data to. +}; + +} // end anonymous namespace + +// Compress in one of three ways +size_t Compressor::compress(const char* in, size_t size, + OutputDataCallback* cb, void* context) { + assert(in && "Can't compress null buffer"); + assert(size && "Can't compress empty buffer"); + assert(cb && "Can't compress without a callback function"); + + size_t result = 0; + + // For small files, we just don't bother compressing. bzip2 isn't very good + // with tiny files and can actually make the file larger, so we just avoid + // it altogether. + if (size > 64*1024) { + // Set up the bz_stream + bz_stream bzdata; + bzdata.bzalloc = 0; + bzdata.bzfree = 0; + bzdata.opaque = 0; + bzdata.next_in = (char*)in; + bzdata.avail_in = size; + bzdata.next_out = 0; + bzdata.avail_out = 0; + switch ( BZ2_bzCompressInit(&bzdata, 5, 0, 100) ) { + case BZ_CONFIG_ERROR: throw std::string("bzip2 library mis-compiled"); + case BZ_PARAM_ERROR: throw std::string("Compressor internal error"); + case BZ_MEM_ERROR: throw std::string("Out of memory"); + case BZ_OK: + default: + break; + } + + // Get a block of memory + if (0 != getdata_uns(bzdata.next_out, bzdata.avail_out,cb,context)) { + BZ2_bzCompressEnd(&bzdata); + throw std::string("Can't allocate output buffer"); + } + + // Put compression code in first byte + (*bzdata.next_out++) = COMP_TYPE_BZIP2; + bzdata.avail_out--; + + // Compress it + int bzerr = BZ_FINISH_OK; + while (BZ_FINISH_OK == (bzerr = BZ2_bzCompress(&bzdata, BZ_FINISH))) { + if (0 != getdata_uns(bzdata.next_out, bzdata.avail_out,cb,context)) { + BZ2_bzCompressEnd(&bzdata); + throw std::string("Can't allocate output buffer"); + } + } + switch (bzerr) { + case BZ_SEQUENCE_ERROR: + case BZ_PARAM_ERROR: throw std::string("Param/Sequence error"); + case BZ_FINISH_OK: + case BZ_STREAM_END: break; + default: throw std::string("Oops: ") + utostr(unsigned(bzerr)); + } + + // Finish + result = bzdata.total_out_lo32 + 1; + if (sizeof(size_t) == sizeof(uint64_t)) + result |= static_cast<uint64_t>(bzdata.total_out_hi32) << 32; + + BZ2_bzCompressEnd(&bzdata); + } else { + // Do null compression, for small files + NULLCOMP_stream sdata; + sdata.next_in = (char*)in; + sdata.avail_in = size; + NULLCOMP_init(&sdata); + + if (0 != getdata(sdata.next_out, sdata.avail_out,cb,context)) { + throw std::string("Can't allocate output buffer"); + } + + *(sdata.next_out++) = COMP_TYPE_NONE; + sdata.avail_out--; + + while (!NULLCOMP_compress(&sdata)) { + if (0 != getdata(sdata.next_out, sdata.avail_out,cb,context)) { + throw std::string("Can't allocate output buffer"); + } + } + + result = sdata.output_count + 1; + NULLCOMP_end(&sdata); + } + return result; +} + +size_t Compressor::compressToNewBuffer(const char* in, size_t size, char*&out) { + BufferContext bc(size); + size_t result = compress(in,size,BufferContext::callback,(void*)&bc); + bc.trimTo(result); + out = bc.buff; + return result; +} + +size_t +Compressor::compressToStream(const char*in, size_t size, std::ostream& out) { + // Set up the context and writer + WriterContext ctxt(&out, size / 2); + + // Compress everything after the magic number (which we'll alter). + size_t zipSize = Compressor::compress(in,size, + WriterContext::callback, (void*)&ctxt); + + if (ctxt.chunk) { + ctxt.write(zipSize - ctxt.written); + } + return zipSize; +} + +// Decompress in one of three ways +size_t Compressor::decompress(const char *in, size_t size, + OutputDataCallback* cb, void* context) { + assert(in && "Can't decompress null buffer"); + assert(size > 1 && "Can't decompress empty buffer"); + assert(cb && "Can't decompress without a callback function"); + + size_t result = 0; + + switch (*in++) { + case COMP_TYPE_BZIP2: { + // Set up the bz_stream + bz_stream bzdata; + bzdata.bzalloc = 0; + bzdata.bzfree = 0; + bzdata.opaque = 0; + bzdata.next_in = (char*)in; + bzdata.avail_in = size - 1; + bzdata.next_out = 0; + bzdata.avail_out = 0; + switch ( BZ2_bzDecompressInit(&bzdata, 0, 0) ) { + case BZ_CONFIG_ERROR: throw std::string("bzip2 library mis-compiled"); + case BZ_PARAM_ERROR: throw std::string("Compressor internal error"); + case BZ_MEM_ERROR: throw std::string("Out of memory"); + case BZ_OK: + default: + break; + } + + // Get a block of memory + if (0 != getdata_uns(bzdata.next_out, bzdata.avail_out,cb,context)) { + BZ2_bzDecompressEnd(&bzdata); + throw std::string("Can't allocate output buffer"); + } + + // Decompress it + int bzerr = BZ_OK; + while ( BZ_OK == (bzerr = BZ2_bzDecompress(&bzdata)) && + bzdata.avail_in != 0 ) { + if (0 != getdata_uns(bzdata.next_out, bzdata.avail_out,cb,context)) { + BZ2_bzDecompressEnd(&bzdata); + throw std::string("Can't allocate output buffer"); + } + } + + switch (bzerr) { + case BZ_PARAM_ERROR: throw std::string("Compressor internal error"); + case BZ_MEM_ERROR: throw std::string("Out of memory"); + case BZ_DATA_ERROR: throw std::string("Data integrity error"); + case BZ_DATA_ERROR_MAGIC:throw std::string("Data is not BZIP2"); + case BZ_OK: throw std::string("Insufficient input for bzip2"); + case BZ_STREAM_END: break; + default: throw("Ooops"); + } + + + // Finish + result = bzdata.total_out_lo32; + if (sizeof(size_t) == sizeof(uint64_t)) + result |= (static_cast<uint64_t>(bzdata.total_out_hi32) << 32); + BZ2_bzDecompressEnd(&bzdata); + break; + } + + case COMP_TYPE_NONE: { + NULLCOMP_stream sdata; + sdata.next_in = (char*)in; + sdata.avail_in = size - 1; + NULLCOMP_init(&sdata); + + if (0 != getdata(sdata.next_out, sdata.avail_out,cb,context)) { + throw std::string("Can't allocate output buffer"); + } + + while (!NULLCOMP_decompress(&sdata)) { + if (0 != getdata(sdata.next_out, sdata.avail_out,cb,context)) { + throw std::string("Can't allocate output buffer"); + } + } + + result = sdata.output_count; + NULLCOMP_end(&sdata); + break; + } + + default: + throw std::string("Unknown type of compressed data"); + } + + return result; +} + +size_t +Compressor::decompressToNewBuffer(const char* in, size_t size, char*&out) { + BufferContext bc(size); + size_t result = decompress(in,size,BufferContext::callback,(void*)&bc); + out = bc.buff; + return result; +} + +size_t +Compressor::decompressToStream(const char*in, size_t size, std::ostream& out){ + // Set up the context and writer + WriterContext ctxt(&out,size / 2); + + // Decompress everything after the magic number (which we'll alter) + size_t zipSize = Compressor::decompress(in,size, + WriterContext::callback, (void*)&ctxt); + + if (ctxt.chunk) { + ctxt.write(zipSize - ctxt.written); + } + return zipSize; +} + +// vim: sw=2 ai |