#pragma region CPL License /* Nuclex Native Framework Copyright (C) 2002-2013 Nuclex Development Labs This library is free software; you can redistribute it and/or modify it under the terms of the IBM Common Public License as published by the IBM Corporation; either version 1.0 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the IBM Common Public License for more details. You should have received a copy of the IBM Common Public License along with this library */ #pragma endregion // CPL License #ifndef NUCLEX_STORAGE_FILESYSTEM_SEVENZIP_SEVENZIPARCHIVE_H #define NUCLEX_STORAGE_FILESYSTEM_SEVENZIP_SEVENZIPARCHIVE_H #include "Nuclex/Storage/Config.h" #include "UncheckedAlloc.h" #include "BlobLookInStream.h" #include "lzma/7z.h" #include #include #include namespace Nuclex { namespace Storage { namespace FileSystem { namespace SevenZip { // ------------------------------------------------------------------------------------------- // /// Accesses a 7-Zip archive via adapters to the 7-Zip interfaces /// /// /// The documentation on 7-Zip is very sparse, so here's the likely structure of /// a 7-Zip archive: /// /// /// CSzArEx describes the metadata for a 7-Zip archive (directory structure, /// position of files, mapping to solid blocks and compression methods used). /// /// /// Solid blocks are compressed chunks of data that may contain one or multiple /// files. In 7-Zip terminology these are called "folders." To figure out which /// solid block a file is in, the table CSzArEx::FileIndexToFolderIndexMap[fileIndex] /// can be used. These are indices directly into the CSzArEx::Folders[folderIndex] /// table. Via SzFolder_GetUnpackSize() the uncompressed size of a solid block can /// be determined. /// /// /// To get the offset of a file within its containing solid block, first the index /// of the first file in a solid block needs to be retrieved using /// the CSzArEx::FolderStartFileIndex[solidBlockIndex] method. The file list in /// a 7-Zip archive is sorted first by solid block and then my file offset within /// the solid block, so by summing up the uncompressed file sizes up to the index /// of the file that should be accessed a file's position within its solid block /// can be determined. /// /// /// Each solid block is compressed and/or preprocessed with one or more coders. /// Coders can be straight-forward stream-processors (compressed data in, /// uncompressed data out), but they can also split a file into multiple streams /// (for example, the BCJ2 x86 executable preprocessor produces a metadata stream, /// a stream containing only CALL addresses, a stream with only JMP addresses and /// another stream with the remaining content of an executable. The latter three /// streams are then compressed by another coder. /// /// /// 7-Zip seems to call these streams "PackStreams" and their association with /// the coders might be saved in the "BindPairs." But I couldn't make sense of /// any values stored in these "BindPairs" and 7-Zip currently doesn't really /// evaluate them but only checks that they exactly match with one of three /// hardcoded layouts (one each for solid blocks with 1 coder, 2 coders and 4 coders). /// So at the moment it is not necessary for "BindPairs" to actually make sense. /// /// class SevenZipArchive { /// /// Index used to indicate that a file does not have an extraction block /// public: static const UInt32 InvalidSolidBlockIndex; #pragma region struct SolidBlock /// Block of extracted data used by 7-Zip /// /// 7-Zip stores groups files in solid blocks (called "folder") that are /// compressed together. When accessing a file, you need to decompress or /// read the right extraction block. /// public: struct SolidBlock { /// Index of the solid block in the archive public: UInt32 Index; /// Buffer containing the extracted data from the block public: Byte *Buffer; /// Size of the extraction buffer in bytes public: UInt32 BufferSize; }; #pragma endregion // struct SolidBlock /// Initializes a new 7-Zip accessor /// Blob containing the data of the 7-Zip archive public: SevenZipArchive(const std::shared_ptr &archiveBlob); /// Frees all memory used by the 7-Zip archive accessor public: ~SevenZipArchive(); /// Access the blob containing the archive's raw data /// The blob through which the archive's raw data can be accessed public: const std::shared_ptr &GetArchiveBlob() const { return this->archiveBlob; } /// Counts the number of files contained in the 7-Zip archive /// The numebr of files stored in the 7-Zip Archive public: std::size_t CountFiles() const; /// Retrieves the full path of the file with the specified index /// Index of the file whose path will be retrieved /// The full path of the file with the specified index public: std::string GetFilePath(std::size_t index) const; /// Checks whether the file at the specified index is a directory /// Index of the file that will be checked /// True if the file at the specified index is a directory public: bool IsDirectory(std::size_t index) const; /// Returns the uncompressed size of the file with the specified index /// Index of the file whose uncompressed size will be returned /// The uncompressed size of the file with the specified index public: std::uint64_t GetUncompressedSize(std::size_t index) const; /// /// Returns the time of last modification of the file with the specified index /// /// /// Index of the file whose last modification time will be returned /// /// The time the file with the specified index was last modified public: std::time_t GetLastModificationTime(std::size_t index) const; /// Looks up the index of the solid block the specified file is in /// Index of the file whose extraction block will be looked up /// The index of the extraction block containing the specified file public: std::size_t GetSolidBlockIndex(std::size_t index) const; /// Looks up the offset at which a file begins within its solid block /// Index of the file which will be looked up /// The offset of the file within the uncompressed solid block public: std::uint64_t GetFileOffsetInSolidBlock(std::size_t index) const; /// Returns the index of the first file in a solid block /// /// Index of the solid block whose first file index will be determined /// /// The index of the file file in the specified solid block public: std::size_t GetFirstFileIndexInSolidBlock(std::size_t solidBlockIndex) const; /// /// Determines the abolute position within the archive of the stream for /// the specified solid block /// /// /// Index of the solid block for one of whose streams the absolute start position /// will be calculated /// /// Stream whose start position will be calculated /// /// The absolute start position of the stream within the specified solid block /// public: std::uint64_t GetStreamStartPosition( std::size_t solidBlockIndex, std::size_t streamIndex = 0 ) const; /// Looks up the length of a compressed stream for a solid block /// /// Index of the solid block of which the stream length will be looked up /// /// Index of the stream whose size will be retrieved /// The size of the specified stream in its solid block public: std::uint64_t SevenZipArchive::GetStreamSize( std::size_t solidBlockIndex, std::size_t streamIndex = 0 ) const; /// Retrieves the solid block with the specified index /// /// Index of the solid block that will be retrieved /// /// The solid block with the specified index public: const CSzFolder &GetSolidBlock(std::size_t solidBlockIndex); /// Accesses the allocator used by 7-Zip /// The public: UncheckedAlloc &GetAllocator() { return this->allocator; } /// Initializes 7-Zip's global CRC table private: static void initializeCrcTable(); /// Set when the 7-Zip CRC table has been initialized once private: static std::once_flag sevenZipCrcTableInitialized; /// Blob through which the archive's raw data can be accessed private: std::shared_ptr archiveBlob; /// Implementation of 7-Zip's stream interface on top of a blob private: BlobLookInStream archiveBlobStream; /// Plain allocator wrapping C++' new/delete operators private: UncheckedAlloc allocator; /// Interface to the 7-Zip archive's contents private: CSzArEx archiveInterface; }; // ------------------------------------------------------------------------------------------- // }}}} // namespace Nuclex::Storage::FileSystem::SevenZip #endif // NUCLEX_STORAGE_FILESYSTEM_SEVENZIP_SEVENZIPARCHIVE_H