/* Lzip - Data compressor based on the LZMA algorithm
Copyright (C) 2008, 2009, 2010, 2011 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see .
*/
/*
Return values: 0 for a normal exit, 1 for environmental problems
(file not found, invalid flags, I/O errors, etc), 2 to indicate a
corrupt or invalid input file, 3 for an internal consistency error
(eg, bug) which caused lzip to panic.
*/
#define _FILE_OFFSET_BITS 64
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#if defined(__MSVCRT__)
#include
#define fchmod(x,y) 0
#define fchown(x,y,z) 0
#define SIGHUP SIGTERM
#define S_ISSOCK(x) 0
#define S_IRGRP 0
#define S_IWGRP 0
#define S_IROTH 0
#define S_IWOTH 0
#endif
#if defined(__OS2__)
#include
#endif
#include "lzip.h"
#include "decoder.h"
#if !DECODER_ONLY
#include "encoder.h"
#include "fast_encoder.h"
#endif
#if CHAR_BIT != 8
#error "Environments where CHAR_BIT != 8 are not supported."
#endif
#ifndef LLONG_MAX
#define LLONG_MAX 0x7FFFFFFFFFFFFFFFLL
#endif
#ifndef LLONG_MIN
#define LLONG_MIN (-LLONG_MAX - 1LL)
#endif
#ifndef ULLONG_MAX
#define ULLONG_MAX 0xFFFFFFFFFFFFFFFFULL
#endif
void pp(const char *p) { if (p) fputs(p, stderr); }
namespace {
const char * const Program_name = "Lzip";
const char * const program_name = "lzip";
const char * const program_year = "2011";
const char * invocation_name = 0;
#ifdef O_BINARY
const int o_binary = O_BINARY;
#else
const int o_binary = 0;
#endif
struct { const char * from; const char * to; } const known_extensions[] = {
{ ".lz", "" },
{ ".tlz", ".tar" },
{ 0, 0 } };
struct Lzma_options
{
int dictionary_size; // 4KiB..512MiB
int match_len_limit; // 5..273
};
enum Mode { m_compress, m_decompress, m_test };
int outfd = -1;
int verbosity = 0;
bool delete_output_on_interrupt = false;
void show_help()
{
printf( "%s - Data compressor based on the LZMA algorithm.\n", Program_name );
printf( "<< Most of these are unsupported. Compressing/decompressing from stdin to stdout is the right way! >>\n" );
printf( "\nUsage: %s [options] [files]\n", invocation_name );
printf( "\nOptions:\n" );
printf( " -h, --help display this help and exit\n" );
printf( " -V, --version output version information and exit\n" );
printf( " -b, --member-size= set member size limit in bytes\n" );
printf( " -c, --stdout send output to standard output\n" );
printf( " -d, --decompress decompress\n" );
printf( " -f, --force overwrite existing output files\n" );
printf( " -F, --recompress force recompression of compressed files\n" );
printf( " -k, --keep keep (don't delete) input files\n" );
printf( " -m, --match-length= set match length limit in bytes [36]\n" );
printf( " -o, --output= if reading stdin, place the output into \n" );
printf( " -q, --quiet suppress all messages\n" );
printf( " -s, --dictionary-size= set dictionary size limit in bytes [8MiB]\n" );
printf( " -S, --volume-size= set volume size limit in bytes\n" );
printf( " -t, --test test compressed file integrity\n" );
printf( " -v, --verbose be verbose (a 2nd -v gives more)\n" );
printf( " -0 .. -9 set compression level [default 6]\n" );
printf( " --fast alias for -0\n" );
printf( " --best alias for -9\n" );
printf( "If no file names are given, %s compresses or decompresses\n", program_name );
printf( "from standard input to standard output.\n" );
printf( "Numbers may be followed by a multiplier: k = kB = 10^3 = 1000,\n" );
printf( "Ki = KiB = 2^10 = 1024, M = 10^6, Mi = 2^20, G = 10^9, Gi = 2^30, etc...\n" );
printf( "\nReport bugs to lzip-bug@nongnu.org\n" );
printf( "Lzip home page: http://www.nongnu.org/lzip/lzip.html\n" );
}
void show_version()
{
printf( "%s %s\n", Program_name, PROGVERSION );
printf( "Copyright (C) %s Antonio Diaz Diaz.\n", program_year );
printf( "License GPLv3+: GNU GPL version 3 or later \n" );
printf( "This is free software: you are free to change and redistribute it.\n" );
printf( "There is NO WARRANTY, to the extent permitted by law.\n" );
}
const char * format_num( long long num )
{
const char * const prefix[8] =
{ "Ki", "Mi", "Gi", "Ti", "Pi", "Ei", "Zi", "Yi" };
enum { buf_size = 16, factor = 1024 };
static char buf[buf_size];
const char *p = "";
for( int i = 0; i < 8 && ( llabs( num ) > 9999 ||
( llabs( num ) >= factor && num % factor == 0 ) ); ++i )
{ num /= factor; p = prefix[i]; }
snprintf( buf, buf_size, "%lld %s", num, p );
return buf;
}
bool open_outstream( const bool force )
{
return false;
}
bool check_tty( const int infd, const Mode program_mode )
{
if( program_mode == m_compress && outfd >= 0 && isatty( outfd ) )
{
show_error( "I won't write compressed data to a terminal.", 0, true );
return false;
}
if( ( program_mode == m_decompress || program_mode == m_test ) &&
isatty( infd ) )
{
show_error( "I won't read compressed data from a terminal.", 0, true );
return false;
}
return true;
}
void cleanup_and_fail( const int retval )
{
exit( retval );
}
// Set permissions, owner and times.
void close_and_set_permissions( const struct stat * const in_statsp )
{
bool error = false;
if( in_statsp )
{
if( ( fchown( outfd, in_statsp->st_uid, in_statsp->st_gid ) != 0 &&
errno != EPERM ) ||
fchmod( outfd, in_statsp->st_mode ) != 0 )
error = true;
// fchown will in many cases return with EPERM, which can be safely ignored.
}
if( close( outfd ) == 0 ) outfd = -1;
else cleanup_and_fail( 1 );
delete_output_on_interrupt = false;
if( !in_statsp ) return;
if( !error )
{
struct utimbuf t;
t.actime = in_statsp->st_atime;
t.modtime = in_statsp->st_mtime;
//if( utime( output_filename.c_str(), &t ) != 0 ) error = true;
}
if( error )
{
show_error( "Can't change output file attributes." );
cleanup_and_fail( 1 );
}
}
bool next_filename()
{
return false;
}
#if !DECODER_ONLY
int compress( const long long member_size, const long long volume_size,
const Lzma_options & encoder_options, const int infd,
const struct stat * const in_statsp )
{
File_header header;
header.set_magic();
if( !header.dictionary_size( encoder_options.dictionary_size ) ||
encoder_options.match_len_limit < min_match_len_limit ||
encoder_options.match_len_limit > max_match_len )
internal_error( "invalid argument to encoder" );
int retval = 0;
Matchfinder matchfinder( header.dictionary_size(),
encoder_options.match_len_limit, infd );
header.dictionary_size( matchfinder.dictionary_size() );
long long in_size = 0, out_size = 0, partial_volume_size = 0;
while( true ) // encode one member per iteration
{
LZ_encoder encoder( matchfinder, header, outfd );
const long long size =
min( member_size, volume_size - partial_volume_size );
if( !encoder.encode_member( size ) )
{ pp( "Encoder error" ); retval = 1; break; }
in_size += matchfinder.data_position();
out_size += encoder.member_position();
if( matchfinder.finished() ) break;
partial_volume_size += encoder.member_position();
if( partial_volume_size >= volume_size - min_dictionary_size )
{
partial_volume_size = 0;
if( delete_output_on_interrupt )
{
close_and_set_permissions( in_statsp );
if( !next_filename() )
{ pp( "Too many volume files" ); retval = 1; break; }
if( !open_outstream( true ) ) { retval = 1; break; }
delete_output_on_interrupt = true;
}
}
matchfinder.reset();
}
if( retval == 0 && verbosity >= 1 )
{
if( in_size <= 0 || out_size <= 0 )
fprintf( stderr, "No data compressed.\n" );
else
fprintf( stderr, "%6.3f:1, %6.3f bits/byte, "
"%5.2f%% saved, %lld in, %lld out.\n",
(double)in_size / out_size,
( 8.0 * out_size ) / in_size,
100.0 * ( 1.0 - ( (double)out_size / in_size ) ),
in_size, out_size );
}
return retval;
}
int fcompress( const long long member_size, const long long volume_size,
const int infd,
const struct stat * const in_statsp )
{
if( verbosity >= 1 ) pp();
File_header header;
header.set_magic();
int retval = 0;
Fmatchfinder fmatchfinder( infd );
header.dictionary_size( fmatchfinder.dictionary_size() );
long long in_size = 0, out_size = 0, partial_volume_size = 0;
while( true ) // encode one member per iteration
{
FLZ_encoder encoder( fmatchfinder, header, outfd );
const long long size =
min( member_size, volume_size - partial_volume_size );
if( !encoder.encode_member( size ) )
{ pp( "Encoder error" ); retval = 1; break; }
in_size += fmatchfinder.data_position();
out_size += encoder.member_position();
if( fmatchfinder.finished() ) break;
partial_volume_size += encoder.member_position();
if( partial_volume_size >= volume_size - min_dictionary_size )
{
partial_volume_size = 0;
if( delete_output_on_interrupt )
{
close_and_set_permissions( in_statsp );
if( !next_filename() )
{ pp( "Too many volume files" ); retval = 1; break; }
if( !open_outstream( true ) ) { retval = 1; break; }
delete_output_on_interrupt = true;
}
}
fmatchfinder.reset();
}
if( retval == 0 && verbosity >= 1 )
{
if( in_size <= 0 || out_size <= 0 )
fprintf( stderr, "No data compressed.\n" );
else
fprintf( stderr, "%6.3f:1, %6.3f bits/byte, "
"%5.2f%% saved, %lld in, %lld out.\n",
(double)in_size / out_size,
( 8.0 * out_size ) / in_size,
100.0 * ( 1.0 - ( (double)out_size / in_size ) ),
in_size, out_size );
}
return retval;
}
#endif
int decompress( const int infd, const bool testing )
{
int retval = 0;
Range_decoder rdec( infd );
long long partial_file_pos = 0;
for( bool first_member = true; ; first_member = false )
{
File_header header;
int size;
rdec.reset_member_position();
for( size = 0; size < File_header::size && !rdec.finished(); ++size )
header.data[size] = rdec.get_byte();
if( rdec.finished() ) // End Of File
{
if( first_member )
{ pp( "Error reading member header" ); retval = 1; }
break;
}
if( !header.verify_magic() )
{
if( first_member )
{ pp( "Bad magic number (file not in lzip format)" ); retval = 2; }
break;
}
if( !header.verify_version() )
{
if( verbosity >= 0 )
{ pp();
fprintf( stderr, "Version %d member format not supported.\n",
header.version() ); }
retval = 2; break;
}
if( header.dictionary_size() < min_dictionary_size ||
header.dictionary_size() > max_dictionary_size )
{ pp( "Invalid dictionary size in member header" ); retval = 2; break; }
if( verbosity >= 2 || ( verbosity == 1 && first_member ) )
{
pp();
if( verbosity >= 2 )
fprintf( stderr, "version %d, dictionary size %7sB. ",
header.version(),
format_num( header.dictionary_size() ) );
}
LZ_decoder decoder( header, rdec, outfd );
const int result = decoder.decode_member();
partial_file_pos += rdec.member_position();
if( result != 0 )
{
if( verbosity >= 0 && result <= 2 )
{
pp();
if( result == 2 )
fprintf( stderr, "File ends unexpectedly at pos %lld\n",
partial_file_pos );
else
fprintf( stderr, "Decoder error at pos %lld\n",
partial_file_pos );
}
retval = 2; break;
}
if( verbosity >= 2 )
{ if( testing ) fprintf( stderr, "ok\n" );
else fprintf( stderr, "done\n" ); }
}
if( verbosity == 1 && retval == 0 )
{ if( testing ) fprintf( stderr, "ok\n" );
else fprintf( stderr, "done\n" ); }
return retval;
}
} // end namespace
void show_error( const char * const msg, const int errcode, const bool help )
{
if( verbosity >= 0 )
{
if( msg && msg[0] )
{
fprintf( stderr, "%s: %s", program_name, msg );
if( errcode > 0 )
fprintf( stderr, ": %s", strerror( errcode ) );
fprintf( stderr, "\n" );
}
if( help && invocation_name && invocation_name[0] )
fprintf( stderr, "Try `%s --help' for more information.\n",
invocation_name );
}
}
void internal_error( const char * const msg )
{
if( verbosity >= 0 )
fprintf( stderr, "%s: internal error: %s.\n", program_name, msg );
exit( 3 );
}
int main( const int argc, const char * const argv[] )
{
// Mapping from gzip/bzip2 style 1..9 compression modes
// to the corresponding LZMA compression modes.
const Lzma_options option_mapping[] =
{
{ 1 << 16, 16 }, // -0 entry values not used
{ 1 << 20, 5 }, // -1
{ 3 << 19, 6 }, // -2
{ 1 << 21, 8 }, // -3
{ 3 << 20, 12 }, // -4
{ 1 << 22, 20 }, // -5
{ 1 << 23, 36 }, // -6
{ 1 << 24, 68 }, // -7
{ 3 << 23, 132 }, // -8
{ 1 << 25, 273 } }; // -9
Lzma_options encoder_options = option_mapping[6]; // default = "-6"
long long member_size = LLONG_MAX;
long long volume_size = LLONG_MAX;
int infd = -1;
Mode program_mode = m_compress;
bool keep_input_files = false;
bool to_stdout = false;
bool zero = false;
invocation_name = argv[0];
// Greatly simplified argument parsing
int argind = 1;
for( ; argind < argc; ++argind )
{
const int code = argv[argind][1];
switch( code )
{
case 'c': to_stdout = true; break;
case 'd': program_mode = m_decompress; break;
case 'h': show_help(); return 0;
case 'k': keep_input_files = true; break;
case 'q': verbosity = -1; break;
zero = false; break;
case 'v': if( verbosity < 4 ) ++verbosity; break;
case 'V': show_version(); return 0;
default : internal_error( "uncaught option" );
}
} // end process options
#if defined(__MSVCRT__) || defined(__OS2__)
_setmode( STDIN_FILENO, O_BINARY );
_setmode( STDOUT_FILENO, O_BINARY );
#endif
if( program_mode == m_test )
outfd = -1;
#if !DECODER_ONLY
else if( program_mode == m_compress )
{
dis_slots.init();
prob_prices.init();
}
#endif
int retval = 0;
{
struct stat in_stats;
infd = STDIN_FILENO;
outfd = STDOUT_FILENO;
if( !check_tty( infd, program_mode ) ) return 1;
const struct stat * const in_statsp = 0;
//pp.set_name( "-" );
int tmp = 0;
#if !DECODER_ONLY
if( program_mode == m_compress )
{
if( zero )
tmp = fcompress( member_size, volume_size, infd, in_statsp );
else
tmp = compress( member_size, volume_size, encoder_options, infd,
in_statsp );
}
else
#endif
tmp = decompress( infd, program_mode == m_test );
if( tmp > retval ) retval = tmp;
//if( tmp && program_mode != m_test ) cleanup_and_fail( retval );
if( delete_output_on_interrupt )
close_and_set_permissions( in_statsp );
}
if( outfd >= 0 && close( outfd ) != 0 )
{
show_error( "Can't close stdout", errno );
if( retval < 1 ) retval = 1;
}
return retval;
}