#pragma region CPL License /* Nuclex Native Framework Copyright (C) 2002-2013 Nuclex Development Labs This library is free software; you can redistribute it and/or modify it under the terms of the IBM Common Public License as published by the IBM Corporation; either version 1.0 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the IBM Common Public License for more details. You should have received a copy of the IBM Common Public License along with this library */ #pragma endregion // CPL License // If the library is compiled as a DLL, this ensures symbols are exported #define NUCLEX_STORAGE_SOURCE 1 #include "ZipReader.h" #include "Nuclex/Storage/Blob.h" #include #include namespace { // ------------------------------------------------------------------------------------------- // /// Size of the buffer used to decompress the zipped file const std::size_t DecompressionBufferSize = 8192; /// When the buffer gets below this size, it is refilled /// /// The remaining buffer contents need to be copied to the front of the buffer, /// so making this too large will cause needless copying of memory blocks. /// const std::size_t BufferRefillSize = 64; // ------------------------------------------------------------------------------------------- // } // anonymous namespace namespace Nuclex { namespace Storage { namespace FileSystem { namespace Zip { // ------------------------------------------------------------------------------------------- // ZipReader::DeflateContext::DeflateContext( const std::shared_ptr &blob, const ZippedFile::MetaData &metaData ) : Context(metaData), blob(blob), compressedLocation(metaData.CompressedDataOffset), compressedRemaining(metaData.CompressedSize) { this->zlibStream.next_in = nullptr; this->zlibStream.avail_in = 0; this->zlibStream.zalloc = nullptr; this->zlibStream.zfree = nullptr; this->zlibStream.opaque = nullptr; // We must use inflateInit2() here because zip archives don't contain the ZLib // header (probably because the zip directory already contains the same data) int result = ::inflateInit2(&this->zlibStream, -MAX_WBITS); if(result != Z_OK) { throw std::runtime_error("Could not initialize ZLib inflate engine"); } // Pre-allocate the memory of the buffers used to hold compressed and uncompressed data. // Vectors will never shrink, thus, this guarantees that no additional memory allocations // will happen during extraction. this->compressedData.reserve(DecompressionBufferSize); this->uncompressedData.reserve(DecompressionBufferSize); } // ------------------------------------------------------------------------------------------- // ZipReader::DeflateContext::~DeflateContext() { using namespace std; int result = ::inflateEnd(&this->zlibStream); assert((result == Z_OK) && "ZLib inflate engine should terminate normally"); // ZLib's stream couldn't be destroyed, so at least get rid of the dangling pointers if(result != Z_OK) { this->zlibStream.next_in = nullptr; this->zlibStream.next_out = nullptr; } } // ------------------------------------------------------------------------------------------- // void ZipReader::DeflateContext::ReadAt( std::uint64_t location, void *buffer, std::size_t count ) { using namespace std; assert((location >= this->Location) && "Cache must be advanced in forward direction only"); assert( ((location + count) <= this->Identifier.UncompressedSize) && "Read must stay within the file" ); // Determine the offset the requested data has as seen from our current buffer. // This may be way beyond the buffer capacity, in which case we need to seek std::uint64_t bufferStartOffset = location - this->Location; // If the requested data is past the end of the data we have buffered currently, // skip ahead until we reach the desired area. if(bufferStartOffset > this->uncompressedData.size()) { skipTo(location); bufferStartOffset = location - this->Location; assert( (bufferStartOffset < this->uncompressedData.size()) && "Decompression buffer should be intersecting the requested data by now" ); } std::uint8_t *indexableBuffer = reinterpret_cast(buffer); // Copy any data the caller is interested in out of the current buffer. The buffer start // offset is guaranteed to fit within a normal size_t now. std::size_t bufferCount = std::min( this->uncompressedData.size() - static_cast(bufferStartOffset), count ); if(bufferCount > 0) { std::copy_n( &this->uncompressedData[static_cast(bufferStartOffset)], bufferCount, indexableBuffer ); // If this was all the data the caller wanted, we can exit early. if(bufferCount == count) { return; } // Advance the pointers and counters in accordance with the part of the buffer we have // already processed above indexableBuffer += bufferCount; count -= bufferCount; location += bufferCount; } // If the amount of data requested is more than the decompression buffer can hold, // there's no point in hitting the decompression buffer at all and we can directly // extract the decompressed data into the caller-provided buffer. if(count > DecompressionBufferSize) { count = extractDirectly(indexableBuffer, count); if(count > 0 ){ extractUsingBuffer(indexableBuffer, count); } } else { extractUsingBuffer(indexableBuffer, count); } } // ------------------------------------------------------------------------------------------- // void ZipReader::DeflateContext::skipTo(std::uint64_t location) { this->Location += this->uncompressedData.size(); this->uncompressedData.resize(this->uncompressedData.capacity()); for(;;) { if(this->zlibStream.avail_in <= BufferRefillSize) { refillInputBuffer(); } this->zlibStream.next_out = &this->uncompressedData[0]; this->zlibStream.avail_out = static_cast(this->uncompressedData.size()); int result = ::inflate(&this->zlibStream, Z_SYNC_FLUSH); if((result != Z_STREAM_END) && (result != Z_OK)) { throw std::runtime_error("Compressed data is corrupted"); } std::size_t bytesDecompressed = this->zlibStream.next_out - &this->uncompressedData[0]; if(this->Location + bytesDecompressed > location) { this->uncompressedData.resize(bytesDecompressed); return; } this->Location += bytesDecompressed; } } // ------------------------------------------------------------------------------------------- // std::size_t ZipReader::DeflateContext::extractDirectly( std::uint8_t *indexableBuffer, std::size_t count ) { using namespace std; this->Location += this->uncompressedData.size(); this->uncompressedData.resize(0); // When this method is called it is not known whether the input buffer is still // full or if it is running low. Only the output buffer is guaranteed to be empty. if(this->zlibStream.avail_in <= BufferRefillSize) { refillInputBuffer(); } // Keep extracting the input buffer into the user-provided memory block until we // have extracted everything the caller was interested in. for(;;) { this->zlibStream.next_out = indexableBuffer; this->zlibStream.avail_out = static_cast(count); int result = ::inflate(&this->zlibStream, Z_SYNC_FLUSH); if((result != Z_STREAM_END) && (result != Z_OK)) { throw std::runtime_error("Compressed data is corrupted"); } // Figure out how many bytes have been decompressed by comparing the target pointers. std::size_t bytesDecompressed = this->zlibStream.next_out - indexableBuffer; this->Location += bytesDecompressed; // If everything has been extracted in this go we can stop right here if(bytesDecompressed >= count) { assert( (bytesDecompressed == count) && "Inflate engine should only extract the amount of data it was instructed to" ); return 0; } indexableBuffer += bytesDecompressed; count -= bytesDecompressed; // There can only be one reason for reaching this point: the contents of the input buffer // have been completely consumed, so refill it. refillInputBuffer(); // If there is less than the low water mark of data remaining, extract the rest // through the buffer (we don't know if the decompressor supports point landings, // it might need to extract to the next multiple of something bytes). if(count < BufferRefillSize) { return count; } } } // ------------------------------------------------------------------------------------------- // void ZipReader::DeflateContext::extractUsingBuffer( std::uint8_t *indexableBuffer, std::size_t count ) { using namespace std; assert( (count <= DecompressionBufferSize) && "Buffered extraction should only be used if amount of data is less than buffer capacity" ); this->Location += this->uncompressedData.size(); this->uncompressedData.resize(this->uncompressedData.capacity()); // When this method is called it is not known whether the input buffer is still // full or if it is running low. Only the output buffer is guaranteed to be empty. if(this->zlibStream.avail_in <= BufferRefillSize) { refillInputBuffer(); } this->zlibStream.next_out = &this->uncompressedData[0]; this->zlibStream.avail_out = static_cast(this->uncompressedData.size()); for(;;) { int result = ::inflate(&this->zlibStream, Z_SYNC_FLUSH); if((result != Z_STREAM_END) && (result != Z_OK)) { throw std::runtime_error("Compressed data is corrupted"); } // Figure out how many bytes have been decompressed by comparing the target pointers. std::size_t bytesDecompressed = this->zlibStream.next_out - &this->uncompressedData[0]; // If everything has been extracted in this go we can stop right here if(bytesDecompressed >= count) { assert( (bytesDecompressed <= this->uncompressedData.size()) && "Inflate engine should only extract the amount of data it was instructed to" ); this->uncompressedData.resize(bytesDecompressed); std::copy_n(&this->uncompressedData[0], count, indexableBuffer); return; } // There can only be one reason for reaching this point: the contents of the input buffer // have been completely consumed, so refill it. refillInputBuffer(); } } // ------------------------------------------------------------------------------------------- // void ZipReader::DeflateContext::refillInputBuffer() { // First, move the remaining buffer contents to the start of the buffer so the new data // can be appended after it (a ring buffer would be cooler, but ZLib expects linear memory) if(this->zlibStream.avail_in > 0) { std::size_t dataStartOffset = this->compressedData.size() - this->zlibStream.avail_in; std::copy_n( &this->compressedData[dataStartOffset], this->zlibStream.avail_in, &this->compressedData[0] ); } // Find out how many bytes the buffer can be refilled with. Usually, the buffer is // refilled to capacity, but near the end of the compressed data blob, it may be less. std::size_t bytesToCopy = this->compressedData.capacity() - this->zlibStream.avail_in; if(this->compressedRemaining < bytesToCopy) { bytesToCopy = static_cast(this->compressedRemaining); } // Resize the buffer to the exact amount of data we're putting in it, then append // the data right after the leftover bits we moved to the beginning of the buffer. this->compressedData.resize(bytesToCopy + this->zlibStream.avail_in); this->blob->ReadAt( this->compressedLocation, &this->compressedData[this->zlibStream.avail_in], bytesToCopy ); this->compressedLocation += bytesToCopy; this->compressedRemaining -= bytesToCopy; // Update the ZLib stream pointers and counters this->zlibStream.next_in = &this->compressedData[0]; this->zlibStream.avail_in += static_cast(bytesToCopy); } // ------------------------------------------------------------------------------------------- // ZipReader::ZipReader(const std::shared_ptr &blob) : Cache(4), blob(blob) {} // ------------------------------------------------------------------------------------------- // void ZipReader::ReadAt( const ZippedFile::MetaData &metaData, std::uint64_t location, void *buffer, std::size_t count ) { if(location + count > metaData.UncompressedSize) { throw std::out_of_range("Attempted to read beyond the end of the file"); } switch(metaData.CompressionMethod) { // We don't need to lock up a context or initialize a deflate state if // the data is there, uncompressed, ready for the taking case CompressionMethod::Store: { this->blob->ReadAt(metaData.CompressedDataOffset + location, buffer, count); break; } // If the data is compressed, we need to pick or create a deflate context // that is // responsible for this file and nearest to the data that we want. case CompressionMethod::Deflate: { Cache::ReadAt(metaData, location, buffer, count); break; } // This seems to be a zip archive from the future using unknown // compression methods. Nothing we can do here. default: { throw std::runtime_error("Unsupported compression method"); } } } // ------------------------------------------------------------------------------------------- // ZipReader::Context *ZipReader::CreateContext(const ZippedFile::MetaData &metaData) { return new DeflateContext(this->blob, metaData); } // ------------------------------------------------------------------------------------------- // }}}} // namespace Nuclex::Storage::FileSystem::Zip