#pragma region CPL License /* Nuclex Native Framework Copyright (C) 2002-2013 Nuclex Development Labs This library is free software; you can redistribute it and/or modify it under the terms of the IBM Common Public License as published by the IBM Corporation; either version 1.0 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the IBM Common Public License for more details. You should have received a copy of the IBM Common Public License along with this library */ #pragma endregion // CPL License // If the library is compiled as a DLL, this ensures symbols are exported #define NUCLEX_STORAGE_SOURCE 1 #include "SevenZipReader.h" #include "Decoder/MultiDecoderContext.h" #include "Decoder/LzmaStreamProcessor.h" #include "Decoder/Lzma2StreamProcessor.h" namespace { // ------------------------------------------------------------------------------------------- // /// Size of the buffer used to decompress the zipped file const std::size_t DecompressionBufferSize = 8192; /// When the buffer gets below this size, it is refilled /// /// The remaining buffer contents need to be copied to the front of the buffer, /// so making this too large will cause needless copying of memory blocks. /// const std::size_t BufferRefillSize = 64; // ------------------------------------------------------------------------------------------- // } // anonymous namespace namespace { // ------------------------------------------------------------------------------------------- // namespace CodingMethod { /// Compression methods that may be used in a 7-Zip archive enum Enum { /// No compression (data is stored uncompressed) Store = 0, /// LZMA compression Lzma = 0x30101, /// LZMA 2.0 compression Lzma2 = 0x21, /// PPMD compression Ppmd = 0x30401, /// Deflate (ZLib) compression Deflate = 0x40108, /// Intel/AMD x86 executable filter Bcj = 0x03030103, /// PowerPC executable filter Ppc = 0x03030205, /// ARM executable filter Arm = 0x03030501, /// ARM Thumb executable filter ArmT = 0x03030701, /// Sparc executable filter Sparc = 0x03030805, /// Improved Intel/AMD x86 executable filter /// /// This filter splits the input into four streams: /// s0: normal executable code (compressed) /// s1: call addresses (compressed) /// s2: jump addresses (compressed) /// s3: BCJ2 data (uncompressed) /// Each stream can have a different dictionary size and method. /// Bcj2 = 0x0303011B }; } // namespace CodingMethod // ------------------------------------------------------------------------------------------- // /// Checks whether the specified coding method is a compression /// Coding method that will be checked /// True if the specified coding method is a compression bool isCompressionMethod(CodingMethod::Enum codingMethod) { return (codingMethod == CodingMethod::Store) || (codingMethod == CodingMethod::Lzma) || (codingMethod == CodingMethod::Lzma2) || (codingMethod == CodingMethod::Ppmd) || (codingMethod == CodingMethod::Deflate); } // ------------------------------------------------------------------------------------------- // /// Checks whether the specified coding method is a preprocessor /// Coding method that will be checked /// True if the specified coding method is a preprocessor bool isPreprocessingMethod(CodingMethod::Enum codingMethod) { return (codingMethod == CodingMethod::Bcj) || (codingMethod == CodingMethod::Bcj2) || (codingMethod == CodingMethod::Arm) || (codingMethod == CodingMethod::ArmT) || (codingMethod == CodingMethod::Ppc) || (codingMethod == CodingMethod::Sparc); } // ------------------------------------------------------------------------------------------- // } // anonymous namespace namespace Nuclex { namespace Storage { namespace FileSystem { namespace SevenZip { // ------------------------------------------------------------------------------------------- // SevenZipReader::SevenZipReader(const std::shared_ptr &sevenZipArchive) : Cache(4), sevenZipArchive(sevenZipArchive) {} // ------------------------------------------------------------------------------------------- // void SevenZipReader::ReadAt( const SevenZippedFile::MetaData &metaData, std::uint64_t location, void *buffer, std::size_t count ) { if(location + count > metaData.UncompressedSize) { throw std::out_of_range("Attempted to read beyond the end of the file"); } location += this->sevenZipArchive->GetFileOffsetInSolidBlock(metaData.Index); // If the data uses a single decoder and is uncompressed, we don't need to lock up // a context or do anything complicated - read directly from the source blob! const CSzFolder &solidBlock = this->sevenZipArchive->GetSolidBlock( metaData.SolidBlockIndex ); if(solidBlock.NumCoders == 1) { if(solidBlock.Coders[0].MethodID == CodingMethod::Store) { location += this->sevenZipArchive->GetStreamStartPosition(metaData.SolidBlockIndex); this->sevenZipArchive->GetArchiveBlob()->ReadAt(location, buffer, count); return; } } // If the data is compressed or has been processed by filters (or both), // we need to pick or create a decoder context and return the solid block // back into its original state. Cache::ReadAt(metaData.SolidBlockIndex, location, buffer, count); } // ------------------------------------------------------------------------------------------- // SevenZipReader::Context *SevenZipReader::CreateContext(const std::size_t &solidBlockIndex) { const CSzFolder &solidBlock = this->sevenZipArchive->GetSolidBlock(solidBlockIndex); // Likely design: // // - Each solid block consists of one or more streams, called "PackStreams." // // - These serve as inputs to coders. For example, the BCJ2 coder (filters x86 // executables to increase compressability) has 4 inputs (BCJ2 data, // JMP addresses, CALL addresses and the remaining data of the executable) // // - (?) "BindPairs" define which "PackStreams" become inputs into a coder. // But how are they associated with a coder? Are .OutIndex streams to be // created? Why are .InIndex numbered 3,4,5 for standard BCJ2? switch(solidBlock.NumCoders) { // Straight-forward compression of a single file using one coder case 1: { if(isCompressionMethod(static_cast(solidBlock.Coders[0].MethodID))) { return createSingleCoderContext(solidBlock.Coders[0], solidBlockIndex); } break; } // Pre-processed file. Typically ARM executables case 2: { break; } // Pre-processed file. Typically x86 executables. case 4: { break; } } throw std::runtime_error("Unsupported compression scheme"); } // ------------------------------------------------------------------------------------------- // SevenZipReader::Context *SevenZipReader::createSingleCoderContext( CSzCoderInfo &coder, std::size_t solidBlockIndex ) { const CSzFolder &solidBlock = this->sevenZipArchive->GetSolidBlock(solidBlockIndex); MultiDecoderContext *context = new MultiDecoderContext( this->sevenZipArchive, solidBlockIndex ); switch(solidBlock.Coders[0].MethodID) { // File is compressed using a single LZMA-compressed stream case CodingMethod::Lzma: { context->AddStreamProcessor( std::make_shared( coder, &this->sevenZipArchive->GetAllocator(), this->sevenZipArchive->GetStreamSize(solidBlockIndex) ) ); break; } // File is compressed using a single LZMA 2.0-compressed stream case CodingMethod::Lzma2: { context->AddStreamProcessor( std::make_shared( coder, &this->sevenZipArchive->GetAllocator(), this->sevenZipArchive->GetStreamSize(solidBlockIndex) ) ); break; } // File is compressed using a single PPMd-compressed stream case CodingMethod::Ppmd: { throw std::runtime_error("PPMd compression is not supported yet"); } // File is compressed using a single Deflate-compressed stream case CodingMethod::Deflate: { throw std::runtime_error("Deflate compression is not supported yet"); } // Unsupported compression method (this includes Deflate at the moment... change that?) default: { throw std::runtime_error("Unsupported compression method"); } } return context; } // ------------------------------------------------------------------------------------------- // }}}} // namespace Nuclex::Storage::FileSystem::SevenZip