#pragma region CPL License
/*
Nuclex Native Framework
Copyright (C) 2002-2013 Nuclex Development Labs
This library is free software; you can redistribute it and/or
modify it under the terms of the IBM Common Public License as
published by the IBM Corporation; either version 1.0 of the
License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
IBM Common Public License for more details.
You should have received a copy of the IBM Common Public
License along with this library
*/
#pragma endregion // CPL License
// If the library is compiled as a DLL, this ensures symbols are exported
#define NUCLEX_STORAGE_SOURCE 1
#include "SevenZipReader.h"
#include "Decoder/MultiDecoderContext.h"
#include "Decoder/LzmaStreamProcessor.h"
#include "Decoder/Lzma2StreamProcessor.h"
namespace {
// ------------------------------------------------------------------------------------------- //
/// Size of the buffer used to decompress the zipped file
const std::size_t DecompressionBufferSize = 8192;
/// When the buffer gets below this size, it is refilled
///
/// The remaining buffer contents need to be copied to the front of the buffer,
/// so making this too large will cause needless copying of memory blocks.
///
const std::size_t BufferRefillSize = 64;
// ------------------------------------------------------------------------------------------- //
} // anonymous namespace
namespace {
// ------------------------------------------------------------------------------------------- //
namespace CodingMethod {
/// Compression methods that may be used in a 7-Zip archive
enum Enum {
/// No compression (data is stored uncompressed)
Store = 0,
/// LZMA compression
Lzma = 0x30101,
/// LZMA 2.0 compression
Lzma2 = 0x21,
/// PPMD compression
Ppmd = 0x30401,
/// Deflate (ZLib) compression
Deflate = 0x40108,
/// Intel/AMD x86 executable filter
Bcj = 0x03030103,
/// PowerPC executable filter
Ppc = 0x03030205,
/// ARM executable filter
Arm = 0x03030501,
/// ARM Thumb executable filter
ArmT = 0x03030701,
/// Sparc executable filter
Sparc = 0x03030805,
/// Improved Intel/AMD x86 executable filter
///
/// This filter splits the input into four streams:
/// s0: normal executable code (compressed)
/// s1: call addresses (compressed)
/// s2: jump addresses (compressed)
/// s3: BCJ2 data (uncompressed)
/// Each stream can have a different dictionary size and method.
///
Bcj2 = 0x0303011B
};
} // namespace CodingMethod
// ------------------------------------------------------------------------------------------- //
/// Checks whether the specified coding method is a compression
/// Coding method that will be checked
/// True if the specified coding method is a compression
bool isCompressionMethod(CodingMethod::Enum codingMethod) {
return
(codingMethod == CodingMethod::Store) ||
(codingMethod == CodingMethod::Lzma) ||
(codingMethod == CodingMethod::Lzma2) ||
(codingMethod == CodingMethod::Ppmd) ||
(codingMethod == CodingMethod::Deflate);
}
// ------------------------------------------------------------------------------------------- //
/// Checks whether the specified coding method is a preprocessor
/// Coding method that will be checked
/// True if the specified coding method is a preprocessor
bool isPreprocessingMethod(CodingMethod::Enum codingMethod) {
return
(codingMethod == CodingMethod::Bcj) ||
(codingMethod == CodingMethod::Bcj2) ||
(codingMethod == CodingMethod::Arm) ||
(codingMethod == CodingMethod::ArmT) ||
(codingMethod == CodingMethod::Ppc) ||
(codingMethod == CodingMethod::Sparc);
}
// ------------------------------------------------------------------------------------------- //
} // anonymous namespace
namespace Nuclex { namespace Storage { namespace FileSystem { namespace SevenZip {
// ------------------------------------------------------------------------------------------- //
SevenZipReader::SevenZipReader(const std::shared_ptr &sevenZipArchive) :
Cache(4),
sevenZipArchive(sevenZipArchive) {}
// ------------------------------------------------------------------------------------------- //
void SevenZipReader::ReadAt(
const SevenZippedFile::MetaData &metaData,
std::uint64_t location, void *buffer, std::size_t count
) {
if(location + count > metaData.UncompressedSize) {
throw std::out_of_range("Attempted to read beyond the end of the file");
}
location += this->sevenZipArchive->GetFileOffsetInSolidBlock(metaData.Index);
// If the data uses a single decoder and is uncompressed, we don't need to lock up
// a context or do anything complicated - read directly from the source blob!
const CSzFolder &solidBlock = this->sevenZipArchive->GetSolidBlock(
metaData.SolidBlockIndex
);
if(solidBlock.NumCoders == 1) {
if(solidBlock.Coders[0].MethodID == CodingMethod::Store) {
location += this->sevenZipArchive->GetStreamStartPosition(metaData.SolidBlockIndex);
this->sevenZipArchive->GetArchiveBlob()->ReadAt(location, buffer, count);
return;
}
}
// If the data is compressed or has been processed by filters (or both),
// we need to pick or create a decoder context and return the solid block
// back into its original state.
Cache::ReadAt(metaData.SolidBlockIndex, location, buffer, count);
}
// ------------------------------------------------------------------------------------------- //
SevenZipReader::Context *SevenZipReader::CreateContext(const std::size_t &solidBlockIndex) {
const CSzFolder &solidBlock = this->sevenZipArchive->GetSolidBlock(solidBlockIndex);
// Likely design:
//
// - Each solid block consists of one or more streams, called "PackStreams."
//
// - These serve as inputs to coders. For example, the BCJ2 coder (filters x86
// executables to increase compressability) has 4 inputs (BCJ2 data,
// JMP addresses, CALL addresses and the remaining data of the executable)
//
// - (?) "BindPairs" define which "PackStreams" become inputs into a coder.
// But how are they associated with a coder? Are .OutIndex streams to be
// created? Why are .InIndex numbered 3,4,5 for standard BCJ2?
switch(solidBlock.NumCoders) {
// Straight-forward compression of a single file using one coder
case 1: {
if(isCompressionMethod(static_cast(solidBlock.Coders[0].MethodID))) {
return createSingleCoderContext(solidBlock.Coders[0], solidBlockIndex);
}
break;
}
// Pre-processed file. Typically ARM executables
case 2: {
break;
}
// Pre-processed file. Typically x86 executables.
case 4: {
break;
}
}
throw std::runtime_error("Unsupported compression scheme");
}
// ------------------------------------------------------------------------------------------- //
SevenZipReader::Context *SevenZipReader::createSingleCoderContext(
CSzCoderInfo &coder, std::size_t solidBlockIndex
) {
const CSzFolder &solidBlock = this->sevenZipArchive->GetSolidBlock(solidBlockIndex);
MultiDecoderContext *context = new MultiDecoderContext(
this->sevenZipArchive, solidBlockIndex
);
switch(solidBlock.Coders[0].MethodID) {
// File is compressed using a single LZMA-compressed stream
case CodingMethod::Lzma: {
context->AddStreamProcessor(
std::make_shared(
coder,
&this->sevenZipArchive->GetAllocator(),
this->sevenZipArchive->GetStreamSize(solidBlockIndex)
)
);
break;
}
// File is compressed using a single LZMA 2.0-compressed stream
case CodingMethod::Lzma2: {
context->AddStreamProcessor(
std::make_shared(
coder,
&this->sevenZipArchive->GetAllocator(),
this->sevenZipArchive->GetStreamSize(solidBlockIndex)
)
);
break;
}
// File is compressed using a single PPMd-compressed stream
case CodingMethod::Ppmd: {
throw std::runtime_error("PPMd compression is not supported yet");
}
// File is compressed using a single Deflate-compressed stream
case CodingMethod::Deflate: {
throw std::runtime_error("Deflate compression is not supported yet");
}
// Unsupported compression method (this includes Deflate at the moment... change that?)
default: {
throw std::runtime_error("Unsupported compression method");
}
}
return context;
}
// ------------------------------------------------------------------------------------------- //
}}}} // namespace Nuclex::Storage::FileSystem::SevenZip