aboutsummaryrefslogtreecommitdiff
path: root/lib/Support/Compressor.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Support/Compressor.cpp')
-rw-r--r--lib/Support/Compressor.cpp489
1 files changed, 489 insertions, 0 deletions
diff --git a/lib/Support/Compressor.cpp b/lib/Support/Compressor.cpp
new file mode 100644
index 0000000000..1233cf4f0a
--- /dev/null
+++ b/lib/Support/Compressor.cpp
@@ -0,0 +1,489 @@
+//===- lib/Support/Compressor.cpp -------------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by Reid Spencer and is distributed under the
+// University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the llvm::Compressor class, an abstraction for memory
+// block compression.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Config/config.h"
+#include "llvm/Support/Compressor.h"
+#include "llvm/ADT/StringExtras.h"
+#include <cassert>
+#include <string>
+#include <ostream>
+#include "bzip2/bzlib.h"
+using namespace llvm;
+
+enum CompressionTypes {
+ COMP_TYPE_NONE = '0',
+ COMP_TYPE_BZIP2 = '2',
+};
+
+static int getdata(char*& buffer, size_t &size,
+ llvm::Compressor::OutputDataCallback* cb, void* context) {
+ buffer = 0;
+ size = 0;
+ int result = (*cb)(buffer, size, context);
+ assert(buffer != 0 && "Invalid result from Compressor callback");
+ assert(size != 0 && "Invalid result from Compressor callback");
+ return result;
+}
+
+static int getdata_uns(char*& buffer, unsigned &size,
+ llvm::Compressor::OutputDataCallback* cb, void* context)
+{
+ size_t SizeOut;
+ int Res = getdata(buffer, SizeOut, cb, context);
+ size = SizeOut;
+ return Res;
+}
+
+//===----------------------------------------------------------------------===//
+//=== NULLCOMP - a compression like set of routines that just copies data
+//=== without doing any compression. This is provided so that if the
+//=== configured environment doesn't have a compression library the
+//=== program can still work, albeit using more data/memory.
+//===----------------------------------------------------------------------===//
+
+struct NULLCOMP_stream {
+ // User provided fields
+ char* next_in;
+ size_t avail_in;
+ char* next_out;
+ size_t avail_out;
+
+ // Information fields
+ size_t output_count; // Total count of output bytes
+};
+
+static void NULLCOMP_init(NULLCOMP_stream* s) {
+ s->output_count = 0;
+}
+
+static bool NULLCOMP_compress(NULLCOMP_stream* s) {
+ assert(s && "Invalid NULLCOMP_stream");
+ assert(s->next_in != 0);
+ assert(s->next_out != 0);
+ assert(s->avail_in >= 1);
+ assert(s->avail_out >= 1);
+
+ if (s->avail_out >= s->avail_in) {
+ ::memcpy(s->next_out, s->next_in, s->avail_in);
+ s->output_count += s->avail_in;
+ s->avail_out -= s->avail_in;
+ s->next_in += s->avail_in;
+ s->avail_in = 0;
+ return true;
+ } else {
+ ::memcpy(s->next_out, s->next_in, s->avail_out);
+ s->output_count += s->avail_out;
+ s->avail_in -= s->avail_out;
+ s->next_in += s->avail_out;
+ s->avail_out = 0;
+ return false;
+ }
+}
+
+static bool NULLCOMP_decompress(NULLCOMP_stream* s) {
+ assert(s && "Invalid NULLCOMP_stream");
+ assert(s->next_in != 0);
+ assert(s->next_out != 0);
+ assert(s->avail_in >= 1);
+ assert(s->avail_out >= 1);
+
+ if (s->avail_out >= s->avail_in) {
+ ::memcpy(s->next_out, s->next_in, s->avail_in);
+ s->output_count += s->avail_in;
+ s->avail_out -= s->avail_in;
+ s->next_in += s->avail_in;
+ s->avail_in = 0;
+ return true;
+ } else {
+ ::memcpy(s->next_out, s->next_in, s->avail_out);
+ s->output_count += s->avail_out;
+ s->avail_in -= s->avail_out;
+ s->next_in += s->avail_out;
+ s->avail_out = 0;
+ return false;
+ }
+}
+
+static void NULLCOMP_end(NULLCOMP_stream* strm) {
+}
+
+namespace {
+
+/// This structure is only used when a bytecode file is compressed.
+/// As bytecode is being decompressed, the memory buffer might need
+/// to be reallocated. The buffer allocation is handled in a callback
+/// and this structure is needed to retain information across calls
+/// to the callback.
+/// @brief An internal buffer object used for handling decompression
+struct BufferContext {
+ char* buff;
+ size_t size;
+ BufferContext(size_t compressedSize) {
+ // Null to indicate malloc of a new block
+ buff = 0;
+
+ // Compute the initial length of the uncompression buffer. Note that this
+ // is twice the length of the compressed buffer and will be doubled again
+ // in the callback for an initial allocation of 4x compressedSize. This
+ // calculation is based on the typical compression ratio of bzip2 on LLVM
+ // bytecode files which typically ranges in the 50%-75% range. Since we
+ // typically get at least 50%, doubling is insufficient. By using a 4x
+ // multiplier on the first allocation, we minimize the impact of having to
+ // copy the buffer on reallocation.
+ size = compressedSize*2;
+ }
+
+ /// trimTo - Reduce the size of the buffer down to the specified amount. This
+ /// is useful after have read in the bytecode file to discard extra unused
+ /// memory.
+ ///
+ void trimTo(size_t NewSize) {
+ buff = (char*)::realloc(buff, NewSize);
+ size = NewSize;
+ }
+
+ /// This function handles allocation of the buffer used for decompression of
+ /// compressed bytecode files. It is called by Compressor::decompress which is
+ /// called by BytecodeReader::ParseBytecode.
+ static size_t callback(char*&buff, size_t &sz, void* ctxt){
+ // Case the context variable to our BufferContext
+ BufferContext* bc = reinterpret_cast<BufferContext*>(ctxt);
+
+ // Compute the new, doubled, size of the block
+ size_t new_size = bc->size * 2;
+
+ // Extend or allocate the block (realloc(0,n) == malloc(n))
+ char* new_buff = (char*) ::realloc(bc->buff, new_size);
+
+ // Figure out what to return to the Compressor. If this is the first call,
+ // then bc->buff will be null. In this case we want to return the entire
+ // buffer because there was no previous allocation. Otherwise, when the
+ // buffer is reallocated, we save the new base pointer in the
+ // BufferContext.buff field but return the address of only the extension,
+ // mid-way through the buffer (since its size was doubled). Furthermore,
+ // the sz result must be 1/2 the total size of the buffer.
+ if (bc->buff == 0 ) {
+ buff = bc->buff = new_buff;
+ sz = new_size;
+ } else {
+ bc->buff = new_buff;
+ buff = new_buff + bc->size;
+ sz = bc->size;
+ }
+
+ // Retain the size of the allocated block
+ bc->size = new_size;
+
+ // Make sure we fail (return 1) if we didn't get any memory.
+ return (bc->buff == 0 ? 1 : 0);
+ }
+};
+
+} // end anonymous namespace
+
+
+namespace {
+
+// This structure retains the context when compressing the bytecode file. The
+// WriteCompressedData function below uses it to keep track of the previously
+// filled chunk of memory (which it writes) and how many bytes have been
+// written.
+struct WriterContext {
+ // Initialize the context
+ WriterContext(std::ostream*OS, size_t CS)
+ : chunk(0), sz(0), written(0), compSize(CS), Out(OS) {}
+
+ // Make sure we clean up memory
+ ~WriterContext() {
+ if (chunk)
+ delete [] chunk;
+ }
+
+ // Write the chunk
+ void write(size_t size = 0) {
+ size_t write_size = (size == 0 ? sz : size);
+ Out->write(chunk,write_size);
+ written += write_size;
+ delete [] chunk;
+ chunk = 0;
+ sz = 0;
+ }
+
+ // This function is a callback used by the Compressor::compress function to
+ // allocate memory for the compression buffer. This function fulfills that
+ // responsibility but also writes the previous (now filled) buffer out to the
+ // stream.
+ static size_t callback(char*& buffer, size_t &size, void* context) {
+ // Cast the context to the structure it must point to.
+ WriterContext* ctxt = reinterpret_cast<WriterContext*>(context);
+
+ // If there's a previously allocated chunk, it must now be filled with
+ // compressed data, so we write it out and deallocate it.
+ if (ctxt->chunk != 0 && ctxt->sz > 0 ) {
+ ctxt->write();
+ }
+
+ // Compute the size of the next chunk to allocate. We attempt to allocate
+ // enough memory to handle the compression in a single memory allocation. In
+ // general, the worst we do on compression of bytecode is about 50% so we
+ // conservatively estimate compSize / 2 as the size needed for the
+ // compression buffer. compSize is the size of the compressed data, provided
+ // by WriteBytecodeToFile.
+ size = ctxt->sz = ctxt->compSize / 2;
+
+ // Allocate the chunks
+ buffer = ctxt->chunk = new char [size];
+
+ // We must return 1 if the allocation failed so that the Compressor knows
+ // not to use the buffer pointer.
+ return (ctxt->chunk == 0 ? 1 : 0);
+ }
+
+ char* chunk; // pointer to the chunk of memory filled by compression
+ size_t sz; // size of chunk
+ size_t written; // aggregate total of bytes written in all chunks
+ size_t compSize; // size of the uncompressed buffer
+ std::ostream* Out; // The stream we write the data to.
+};
+
+} // end anonymous namespace
+
+// Compress in one of three ways
+size_t Compressor::compress(const char* in, size_t size,
+ OutputDataCallback* cb, void* context) {
+ assert(in && "Can't compress null buffer");
+ assert(size && "Can't compress empty buffer");
+ assert(cb && "Can't compress without a callback function");
+
+ size_t result = 0;
+
+ // For small files, we just don't bother compressing. bzip2 isn't very good
+ // with tiny files and can actually make the file larger, so we just avoid
+ // it altogether.
+ if (size > 64*1024) {
+ // Set up the bz_stream
+ bz_stream bzdata;
+ bzdata.bzalloc = 0;
+ bzdata.bzfree = 0;
+ bzdata.opaque = 0;
+ bzdata.next_in = (char*)in;
+ bzdata.avail_in = size;
+ bzdata.next_out = 0;
+ bzdata.avail_out = 0;
+ switch ( BZ2_bzCompressInit(&bzdata, 5, 0, 100) ) {
+ case BZ_CONFIG_ERROR: throw std::string("bzip2 library mis-compiled");
+ case BZ_PARAM_ERROR: throw std::string("Compressor internal error");
+ case BZ_MEM_ERROR: throw std::string("Out of memory");
+ case BZ_OK:
+ default:
+ break;
+ }
+
+ // Get a block of memory
+ if (0 != getdata_uns(bzdata.next_out, bzdata.avail_out,cb,context)) {
+ BZ2_bzCompressEnd(&bzdata);
+ throw std::string("Can't allocate output buffer");
+ }
+
+ // Put compression code in first byte
+ (*bzdata.next_out++) = COMP_TYPE_BZIP2;
+ bzdata.avail_out--;
+
+ // Compress it
+ int bzerr = BZ_FINISH_OK;
+ while (BZ_FINISH_OK == (bzerr = BZ2_bzCompress(&bzdata, BZ_FINISH))) {
+ if (0 != getdata_uns(bzdata.next_out, bzdata.avail_out,cb,context)) {
+ BZ2_bzCompressEnd(&bzdata);
+ throw std::string("Can't allocate output buffer");
+ }
+ }
+ switch (bzerr) {
+ case BZ_SEQUENCE_ERROR:
+ case BZ_PARAM_ERROR: throw std::string("Param/Sequence error");
+ case BZ_FINISH_OK:
+ case BZ_STREAM_END: break;
+ default: throw std::string("Oops: ") + utostr(unsigned(bzerr));
+ }
+
+ // Finish
+ result = bzdata.total_out_lo32 + 1;
+ if (sizeof(size_t) == sizeof(uint64_t))
+ result |= static_cast<uint64_t>(bzdata.total_out_hi32) << 32;
+
+ BZ2_bzCompressEnd(&bzdata);
+ } else {
+ // Do null compression, for small files
+ NULLCOMP_stream sdata;
+ sdata.next_in = (char*)in;
+ sdata.avail_in = size;
+ NULLCOMP_init(&sdata);
+
+ if (0 != getdata(sdata.next_out, sdata.avail_out,cb,context)) {
+ throw std::string("Can't allocate output buffer");
+ }
+
+ *(sdata.next_out++) = COMP_TYPE_NONE;
+ sdata.avail_out--;
+
+ while (!NULLCOMP_compress(&sdata)) {
+ if (0 != getdata(sdata.next_out, sdata.avail_out,cb,context)) {
+ throw std::string("Can't allocate output buffer");
+ }
+ }
+
+ result = sdata.output_count + 1;
+ NULLCOMP_end(&sdata);
+ }
+ return result;
+}
+
+size_t Compressor::compressToNewBuffer(const char* in, size_t size, char*&out) {
+ BufferContext bc(size);
+ size_t result = compress(in,size,BufferContext::callback,(void*)&bc);
+ bc.trimTo(result);
+ out = bc.buff;
+ return result;
+}
+
+size_t
+Compressor::compressToStream(const char*in, size_t size, std::ostream& out) {
+ // Set up the context and writer
+ WriterContext ctxt(&out, size / 2);
+
+ // Compress everything after the magic number (which we'll alter).
+ size_t zipSize = Compressor::compress(in,size,
+ WriterContext::callback, (void*)&ctxt);
+
+ if (ctxt.chunk) {
+ ctxt.write(zipSize - ctxt.written);
+ }
+ return zipSize;
+}
+
+// Decompress in one of three ways
+size_t Compressor::decompress(const char *in, size_t size,
+ OutputDataCallback* cb, void* context) {
+ assert(in && "Can't decompress null buffer");
+ assert(size > 1 && "Can't decompress empty buffer");
+ assert(cb && "Can't decompress without a callback function");
+
+ size_t result = 0;
+
+ switch (*in++) {
+ case COMP_TYPE_BZIP2: {
+ // Set up the bz_stream
+ bz_stream bzdata;
+ bzdata.bzalloc = 0;
+ bzdata.bzfree = 0;
+ bzdata.opaque = 0;
+ bzdata.next_in = (char*)in;
+ bzdata.avail_in = size - 1;
+ bzdata.next_out = 0;
+ bzdata.avail_out = 0;
+ switch ( BZ2_bzDecompressInit(&bzdata, 0, 0) ) {
+ case BZ_CONFIG_ERROR: throw std::string("bzip2 library mis-compiled");
+ case BZ_PARAM_ERROR: throw std::string("Compressor internal error");
+ case BZ_MEM_ERROR: throw std::string("Out of memory");
+ case BZ_OK:
+ default:
+ break;
+ }
+
+ // Get a block of memory
+ if (0 != getdata_uns(bzdata.next_out, bzdata.avail_out,cb,context)) {
+ BZ2_bzDecompressEnd(&bzdata);
+ throw std::string("Can't allocate output buffer");
+ }
+
+ // Decompress it
+ int bzerr = BZ_OK;
+ while ( BZ_OK == (bzerr = BZ2_bzDecompress(&bzdata)) &&
+ bzdata.avail_in != 0 ) {
+ if (0 != getdata_uns(bzdata.next_out, bzdata.avail_out,cb,context)) {
+ BZ2_bzDecompressEnd(&bzdata);
+ throw std::string("Can't allocate output buffer");
+ }
+ }
+
+ switch (bzerr) {
+ case BZ_PARAM_ERROR: throw std::string("Compressor internal error");
+ case BZ_MEM_ERROR: throw std::string("Out of memory");
+ case BZ_DATA_ERROR: throw std::string("Data integrity error");
+ case BZ_DATA_ERROR_MAGIC:throw std::string("Data is not BZIP2");
+ case BZ_OK: throw std::string("Insufficient input for bzip2");
+ case BZ_STREAM_END: break;
+ default: throw("Ooops");
+ }
+
+
+ // Finish
+ result = bzdata.total_out_lo32;
+ if (sizeof(size_t) == sizeof(uint64_t))
+ result |= (static_cast<uint64_t>(bzdata.total_out_hi32) << 32);
+ BZ2_bzDecompressEnd(&bzdata);
+ break;
+ }
+
+ case COMP_TYPE_NONE: {
+ NULLCOMP_stream sdata;
+ sdata.next_in = (char*)in;
+ sdata.avail_in = size - 1;
+ NULLCOMP_init(&sdata);
+
+ if (0 != getdata(sdata.next_out, sdata.avail_out,cb,context)) {
+ throw std::string("Can't allocate output buffer");
+ }
+
+ while (!NULLCOMP_decompress(&sdata)) {
+ if (0 != getdata(sdata.next_out, sdata.avail_out,cb,context)) {
+ throw std::string("Can't allocate output buffer");
+ }
+ }
+
+ result = sdata.output_count;
+ NULLCOMP_end(&sdata);
+ break;
+ }
+
+ default:
+ throw std::string("Unknown type of compressed data");
+ }
+
+ return result;
+}
+
+size_t
+Compressor::decompressToNewBuffer(const char* in, size_t size, char*&out) {
+ BufferContext bc(size);
+ size_t result = decompress(in,size,BufferContext::callback,(void*)&bc);
+ out = bc.buff;
+ return result;
+}
+
+size_t
+Compressor::decompressToStream(const char*in, size_t size, std::ostream& out){
+ // Set up the context and writer
+ WriterContext ctxt(&out,size / 2);
+
+ // Decompress everything after the magic number (which we'll alter)
+ size_t zipSize = Compressor::decompress(in,size,
+ WriterContext::callback, (void*)&ctxt);
+
+ if (ctxt.chunk) {
+ ctxt.write(zipSize - ctxt.written);
+ }
+ return zipSize;
+}
+
+// vim: sw=2 ai