#pragma region CPL License /* Nuclex Native Framework Copyright (C) 2002-2021 Nuclex Development Labs This library is free software; you can redistribute it and/or modify it under the terms of the IBM Common Public License as published by the IBM Corporation; either version 1.0 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the IBM Common Public License for more details. You should have received a copy of the IBM Common Public License along with this library */ #pragma endregion // CPL License // If the library is compiled as a DLL, this ensures symbols are exported #define NUCLEX_STORAGE_SOURCE 1 #include "CompressorTest.h" #include // for std::default_random_engine and std::uniform_int_distribution #include namespace { // ------------------------------------------------------------------------------------------- // /// Generates the first pseudo-random number following a fixed seed /// Seed value, same seeds produce same pseudo-random numbers /// The first random number that followed the specified seed /// /// In some implementations of the C++ standard library (*cough* MSVC *cough*), /// std::default_random_engine has a substantial setup and/or processing time, /// taking 30+ seconds on a modern CPU to generate 128 KiB of data. Since quality /// of random numbers is not important here, we use this fast "Xor-Shift" generator. /// std::uint8_t fastRandomByte(std::size_t seed) { seed ^= (seed << 21); seed ^= (seed >> 35); seed ^= (seed << 4); return static_cast(seed); // truncate } // ------------------------------------------------------------------------------------------- // /// Generates a pseudo-random pattern to test compression /// Buffer into which the generated bytes will be written /// Index in the pattern for which bytes will be generated /// Number of bytes that will be generated void generatePattern( std::vector &buffer, std::size_t patternIndex, std::size_t count ) { std::size_t patternEndIndex = patternIndex + count; for(; patternIndex < patternEndIndex; ++patternIndex) { if((patternIndex % 32) >= 16) { buffer.push_back(fastRandomByte(patternIndex)); } else { buffer.push_back(static_cast(patternIndex >> 3)); } } } // ------------------------------------------------------------------------------------------- // /// Shifts the contents of the input buffer backwards /// Buffer whose contents will be shifted /// Index that will be come the new index 0 void shiftInputBufferBack( std::vector &buffer, std::size_t newStartIndex ) { if(newStartIndex == 0) { return; } assert(buffer.size() >= newStartIndex); std::size_t byteCount = buffer.size() - newStartIndex; for(std::size_t index = 0; index < byteCount; ++index) { buffer[index] = buffer[index + newStartIndex]; } buffer.resize(byteCount); } // ------------------------------------------------------------------------------------------- // /// Verifies that the input buffer contains the generated data /// Buffer whose contents will be verified /// Index of the pattern present at buffer index 0 /// Number of bytes in the buffer to verify void expectPattern( const std::vector &buffer, std::size_t patternIndex, std::size_t count ) { std::size_t bufferIndex = 0; std::size_t patternEndIndex = patternIndex + count; for(; patternIndex < patternEndIndex; ++patternIndex) { if((patternIndex % 32) >= 16) { EXPECT_EQ(buffer[bufferIndex], fastRandomByte(patternIndex)); } else { EXPECT_EQ(buffer[bufferIndex], static_cast(patternIndex >> 3)); } ++bufferIndex; } } // ------------------------------------------------------------------------------------------- // } // anonymous namespace namespace Nuclex { namespace Storage { namespace Compression { // ------------------------------------------------------------------------------------------- // void checkStreamCompression( Compressor &compressor, std::vector &target, std::size_t totalByteCount ) { static const std::size_t chunkByteCount = 16384; // 16 KiB std::size_t targetStartIndex = 0; // Generate data using a test pattern and feed it to the compressor { std::default_random_engine randomEngine; std::uniform_int_distribution randomDistribution( chunkByteCount / 16, chunkByteCount + (chunkByteCount / 16) ); // We resize twice the chunk size in the assumption that even the worst compressor // will not blatantly double the size of its input (otherwise) std::vector inputBuffer; inputBuffer.reserve(chunkByteCount); std::size_t generatedByteCount = 0; for(;;) { // Refill the input buffer with our test pattern generatePattern( inputBuffer, generatedByteCount, std::min(chunkByteCount - inputBuffer.size(), totalByteCount - generatedByteCount) ); std::size_t inputByteCount = inputBuffer.size(); generatedByteCount += inputByteCount; // We provide the algorithm with a random amount of available space in // the output buffer so it will sometimes not be able to process all input std::size_t targetByteCount = randomDistribution(randomEngine); target.resize(targetStartIndex + targetByteCount); // Let the compressor do its thing. Return value is ignored because // we don't care if input buffer exhausted or output buffer full... compressor.Process( inputBuffer.data(), inputByteCount, target.data() + targetStartIndex, targetByteCount ); inputByteCount = inputBuffer.size() - inputByteCount; targetStartIndex += targetByteCount; target.resize(targetStartIndex); // Shift the input buffer back and make space for new data. shiftInputBufferBack(inputBuffer, inputByteCount); if(generatedByteCount >= totalByteCount) { break; } } // We have no more input data, now call Compressor::Finish() until all output // has been flushed into our target buffer for(;;) { std::size_t targetByteCount = randomDistribution(randomEngine); target.resize(targetStartIndex + targetByteCount); StopReason stopReason = compressor.Finish( target.data() + targetStartIndex, targetByteCount ); targetStartIndex += targetByteCount; target.resize(targetStartIndex); // When the compressor returns 'Finished' as the stop reason, the decompression // is complete and no more output follows. if(stopReason == StopReason::Finished) { break; } } } } // ------------------------------------------------------------------------------------------- // void checkStreamDecompression( Decompressor &decompressor, const std::vector &source, std::size_t totalByteCount ) { static const std::size_t chunkByteCount = 16384; // 16 KiB std::vector outputBuffer; outputBuffer.resize(chunkByteCount); std::size_t outputStartIndex = 0; // Pump compressed data through Decompressor::Process() until the entire compressed // data array has been consumed std::size_t sourceIndex = 0; for(;;) { std::size_t inputByteCount = std::min(chunkByteCount, source.size() - sourceIndex); // Let the decompressor do its thing std::size_t outputByteCount = outputBuffer.size(); decompressor.Process( source.data() + sourceIndex, inputByteCount, outputBuffer.data(), outputByteCount ); inputByteCount = std::min(chunkByteCount, source.size() - sourceIndex) - inputByteCount; // The compressed data, generated via checkStreamCompression(), is pseudo-random with // a perfectly reproducible pattern. This function generates that pattern one more time // and compares the decompressed data against it to ensure it's bit-perfect. expectPattern(outputBuffer, outputStartIndex, outputByteCount); outputStartIndex += outputByteCount; sourceIndex += inputByteCount; // If all compressed source data has been consumed, we switch to calling Finish() if(sourceIndex >= source.size()) { break; } } // Call Decompressor::Finish() until it has flushed all waiting output for(;;) { std::size_t outputByteCount = outputBuffer.size(); StopReason stopReason = decompressor.Finish( outputBuffer.data(), outputByteCount ); // As before, compare the decompressed data with the reproducible pattern we know // was given to the compressor as source. If it doesn't match, the test fails. expectPattern(outputBuffer, outputStartIndex, outputByteCount); outputStartIndex += outputByteCount; // When the decompressor returns 'Finished' as the stop reason, the decompression // is complete and no more output follows. if(stopReason == StopReason::Finished) { break; } } // Finally, check that the expected number of uncompressed bytes have been produced. // Without this check, the decompressor could return fewer bytes and would not fail // the test so long as those bytes conform to the generated pattern. std::size_t decompressedByteCount = outputStartIndex; EXPECT_EQ(decompressedByteCount, totalByteCount); } // ------------------------------------------------------------------------------------------- // }}} // namespace Nuclex::Storage::Compression