#pragma region CPL License /* Nuclex Native Framework Copyright (C) 2002-2013 Nuclex Development Labs This library is free software; you can redistribute it and/or modify it under the terms of the IBM Common Public License as published by the IBM Corporation; either version 1.0 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the IBM Common Public License for more details. You should have received a copy of the IBM Common Public License along with this library */ #pragma endregion // CPL License #ifndef NUCLEX_STORAGE_HELPERS_CACHE_H #define NUCLEX_STORAGE_HELPERS_CACHE_H #include "Nuclex/Storage/Config.h" #include #include #include #include #include #include #include namespace Nuclex { namespace Storage { namespace Helpers { // ------------------------------------------------------------------------------------------- // /// Caches data read from disk or extracted from a compressed archive /// /// Identifier through which the cache differentiates which context handles which file /// /// /// /// This cache implements a simple buffering technique where each access is routed /// through a "context". Each context can buffer a varying amount of data. /// Recurring accesses pick the closest context before the data being accessed, /// ensuring that the smallest possible read is performed each time. Should two /// threads accesss the same location simultaneously, two contexts would be created /// (each thread takes a context, reads through it and then returns it). /// /// /// Caches are ideal to enable efficient random access on a sequential stream (usually /// encountered on compressed archives where the decompressor can only advance through /// the file in forward direction. /// /// /// It is designed for low to moderate concurrency situations where just a few threads /// contest the data being cached at a time (typically 2-8 threads). For heavily /// contested resources, a custom cache should be implemented. /// /// template < typename TIdentifier, typename TComparer = std::equal_to > class Cache { #pragma region struct Context /// Maintains the state of an individual decompressor or cache line /// /// A context performs the buffering of data in the cache. It is up to the context to /// decide on the size of its buffer (usually, a cache and context will be implemented /// together). Each context is guaranteed to be accessed by only 1 thread at a time, /// but this thread may be a different one each time. /// public: struct Context { /// Initializes a new cache context /// Identifier of the file the context is handling public: NUCLEX_STORAGE_API Context(const TIdentifier &identifier) : Location(0), Identifier(identifier) {} /// Frees all resources owned by the context public: NUCLEX_STORAGE_API virtual ~Context() {} /// Reads data from the context /// Location in the file from which to read data /// Buffer into which the data will be read /// Number of bytes that need to be read public: NUCLEX_STORAGE_API virtual void ReadAt( std::uint64_t location, void *buffer, std::size_t count ) = 0; /// Starting location of data this context has currently buffered public: std::uint64_t Location; /// Identifier that may be used for hashing or as unique id public: TIdentifier Identifier; }; #pragma endregion // struct Context #pragma region struct Indices /// Stores the results of a context search private: struct Indices { /// Initializes a new index set public: NUCLEX_STORAGE_API Indices() : Closest(static_cast(-1)), Oldest(static_cast(-1)) {} /// Index of the context closest to the requested location public: std::size_t Closest; /// Index of the least recently used context public: std::size_t Oldest; }; #pragma endregion // struct Indices #pragma region class ContextInsertionScope /// Inserts a context into the front of the cache private: class ContextInsertionScope { /// Initializes a new context inserter /// Cache into which the context will be inserted /// Method that will be called to insert the context /// Context that will be inserted into the cache public: NUCLEX_STORAGE_API ContextInsertionScope( Cache *cache, void (Cache::*insertAtFront)(Context *context), Context *context ) : cache(cache), insertAtFront(insertAtFront), context(context) {} /// Inserts the context into the cache public: NUCLEX_STORAGE_API ~ContextInsertionScope() { (this->cache->*this->insertAtFront)(this->context); } /// Cache into which the context will be inserted private: Cache *cache; /// Method that will be called to insert the context private: void (Cache::*insertAtFront)(Context *context); /// Context that will be inserted private: Context *context; }; #pragma endregion // class ContextInsertionScope /// Initializes the cache /// Number of contexts to keep alive at all times /// /// /// The cache is designed to handle a limited number of simultaneous reads. A new /// context will be created when no context exists that starts before the required /// position or when such contexts are being occupied by another thread at the time /// of the call. After reaching the specified, the least recently used one will be /// evicted each time a new context has to be created. /// /// /// There is one situation where more than the specified number of contexts may /// exist: if all of the contexts are occupied by other threads at the time of /// the call, a temporary context will be created for the calling thread. This means /// that for the affected thread, extraction will start from scratch, so it is wise /// to pick a context capacity high enough to prevent this situation. /// /// /// A typical value for the context capacity is 8, which should cover all normal /// use cases. Excessive capacities will waste memory and slow down context search. /// /// public: NUCLEX_STORAGE_API Cache(std::size_t contextCapacity) : contexts(nullptr), contextCount(0), contextCapacity(contextCapacity) { this->contexts = new Context *[contextCapacity]; std::fill(this->contexts, this->contexts + contextCapacity, nullptr); } /// Frees all resources owned by the instance public: NUCLEX_STORAGE_API virtual ~Cache() { for(std::size_t index = 0; index < this->contextCapacity; ++index) { delete this->contexts[index]; } delete []this->contexts; } /// Reads the specified number of bytes from the specified position /// Identifier of the file that will be accessed /// Absolute file position the data will be read from /// Buffer into which the data will be read /// Number of bytes that will be read public: NUCLEX_STORAGE_API void ReadAt( const TIdentifier &identifier, std::uint64_t position, void *buffer, std::size_t count ) { bool usePrivateContext; Context *context; { std::lock_guard scope(this->mutex); Indices indices = locate(identifier, position); // Was there a context we can reuse? if(indices.Closest != static_cast(-1)) { context = pick(indices.Closest); usePrivateContext = false; } else { // No, we need to add or replace a context context = nullptr; // Is there space left to create a new context? if(this->contextCount < this->contextCapacity) { ++this->contextCount; usePrivateContext = false; } else { // No, we are completely full // Is there a slot we can throw out for the new context we create? if(indices.Oldest != static_cast(-1)) { delete this->contexts[indices.Oldest]; remove(indices.Oldest); usePrivateContext = false; } else { // No, we have to resort to a temporary context usePrivateContext = true; } } } } // mutex scope // In case all slots were filled, a temporary context needs to be used. if(usePrivateContext) { std::unique_ptr temporaryContext(CreateContext(identifier)); temporaryContext->ReadAt(position, buffer, count); } else { // There were slots left, so take/create a context and return it afterwards if(context == nullptr) { context = CreateContext(identifier); } ContextInsertionScope insertionScope(this, &Cache::insertAtFront, context); { context->ReadAt(position, buffer, count); } } } /// Creates a new context storing the state of a cache line /// Identifies the file a context is required for /// The new context protected: virtual Context *CreateContext(const TIdentifier &identifier) = 0; /// Searches for the context closest to the specified position /// /// Identifier of the file for which a context will be located /// /// Position for which a context will be searched /// Indices of the closest, oldest and first free context private: Indices locate(const TIdentifier &identifier, std::uint64_t position) { Indices indices; std::uint64_t closestDistance = static_cast(-1); std::size_t closestIndex = static_cast(-1); for(std::size_t index = 0; index < this->contextCapacity; ++index) { Context *current = this->contexts[index]; if(current == nullptr) { break; } if(current->Location <= position) { if(this->identifierComparer(current->Identifier, identifier)) { if(closestIndex == static_cast(-1)) { closestDistance = position - current->Location; indices.Closest = index; } else { std::uint64_t currentDistance = position - current->Location; if(currentDistance < closestDistance) { closestDistance = currentDistance; indices.Closest = index; } } } } indices.Oldest = index; } return indices; } /// Picks a context to be used for reading by the current thread /// Index of the context that will be picked /// The context the thread should use to read /// /// This removes the context from the context list. It is up to the caller to ensure /// that it is returned whatever happens (otherwise the cache would be loosing contexts, /// resulting in memory leaks and eventually a maxed out contextCount without /// available contexts, causing performance to suffer dramatically). /// private: Context *pick(std::size_t index) { using namespace std; // If this triggers, there's an out-of-range index being calculated somewhere assert( (index < this->contextCapacity) && "Index must stay inside the context array" ); Context *picked = this->contexts[index]; remove(index); return picked; } /// Removes a context from the context array /// Index of the context that will be removed /// /// Shifts all contexts following after the index to the left by one. /// private: void remove(std::size_t index) { using namespace std; // If this triggers, there's an out-of-range index being calculated somewhere assert( (index < this->contextCapacity) && "Index must stay inside the context array" ); std::size_t lastIndex = this->contextCount - 1; for(; index < lastIndex; ++index) { if(this->contexts[index + 1] == nullptr) { break; } this->contexts[index] = this->contexts[index + 1]; } this->contexts[lastIndex] = nullptr; } /// Moves the specified context to the front of the context list /// Index of the context that will be moved to the front private: void insertAtFront(Context *context) { using namespace std; // If this should happen threads have attempted to return more contexts to the array // than have been picked up, which means that there's an error managing contexts somewhere. assert( (this->contexts[this->contextCapacity - 1] == nullptr) && "There must be open slots in the context array" ); // Normally, synchronization is up to the public methods and privates assume that // they're being called in an appropriate context. This method is being called from // a RAII scope and thus needs to perform its own synchronization. std::lock_guard scope(this->mutex); { // Skip until we encounter the first used slot std::size_t index = this->contextCount - 1; while(index > 0) { if(this->contexts[index] != nullptr) { break; } --index; } // Shift all slots to the right to make space at the beginning while(index > 0) { this->contexts[index + 1] = this->contexts[index]; --index; } this->contexts[0] = context; } // mutex scope } /// Synchronizes multi-threaded accesses to the cache private: std::mutex mutex; /// Currently active decompressor states private: Context **contexts; /// Maximum number of simultaneous decompression contexts private: std::size_t contextCapacity; /// Number of currently active contexts /// /// This must not be used to reason about the number of used slots in the context /// array. When a thread picks a context, it removes it from the context array, without /// decrementing this value. The context count is used to guarantee that there are /// no more than the specified number of contexts active at the same time. /// private: std::size_t contextCount; /// Compares identifiers to see if contexts can be reused private: TComparer identifierComparer; }; // ------------------------------------------------------------------------------------------- // }}} // namespace Nuclex::Storage::Helpers #endif // NUCLEX_STORAGE_HELPERS_CACHE_H