#pragma region CPL License /* Nuclex Native Framework Copyright (C) 2002-2013 Nuclex Development Labs This library is free software; you can redistribute it and/or modify it under the terms of the IBM Common Public License as published by the IBM Corporation; either version 1.0 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the IBM Common Public License for more details. You should have received a copy of the IBM Common Public License along with this library */ #pragma endregion // CPL License #ifndef NUCLEX_STORAGE_HELPERS_CACHE_H #define NUCLEX_STORAGE_HELPERS_CACHE_H #include "Nuclex/Storage/Config.h" #include #include #include #include #include #include #include namespace Nuclex { namespace Storage { namespace Helpers { // ------------------------------------------------------------------------------------------- // ///

Caches data read from disk or extracted from a compressed archive

/// /// Identifier through which the cache differentiates which context handles which file /// /// /// /// This cache implements a simple buffering technique where each access is routed /// through a "context". Each context can buffer a varying amount of data. /// Recurring accesses pick the closest context before the data being accessed, /// ensuring that the smallest possible read is performed each time. Should two /// threads accesss the same location simultaneously, two contexts would be created /// (each thread takes a context, reads through it and then returns it). /// /// /// Caches are ideal to enable efficient random access on a sequential stream (usually /// encountered on compressed archives where the decompressor can only advance through /// the file in forward direction. /// /// /// It is designed for low to moderate concurrency situations where just a few threads /// contest the data being cached at a time (typically 2-8 threads). For heavily /// contested resources, a custom cache should be implemented. /// /// template < typename TIdentifier, typename TComparer = std::equal_to > class Cache { #pragma region struct Context ///

Maintains the state of an individual decompressor or cache line

/// /// A context performs the buffering of data in the cache. It is up to the context to /// decide on the size of its buffer (usually, a cache and context will be implemented /// together). Each context is guaranteed to be accessed by only 1 thread at a time, /// but this thread may be a different one each time. /// public: struct Context { ///

Initializes a new cache context

/// Identifier of the file the context is handling public: NUCLEX_STORAGE_API Context(const TIdentifier &identifier) : Location(0), Identifier(identifier) {} ///

Frees all resources owned by the context

public: NUCLEX_STORAGE_API virtual ~Context() {} ///

Reads data from the context

/// Location in the file from which to read data /// Buffer into which the data will be read /// Number of bytes that need to be read public: NUCLEX_STORAGE_API virtual void ReadAt( std::uint64_t location, void *buffer, std::size_t count ) = 0; ///

Starting location of data this context has currently buffered

public: std::uint64_t Location; ///

Identifier that may be used for hashing or as unique id

public: TIdentifier Identifier; }; #pragma endregion // struct Context #pragma region struct Indices ///

Stores the results of a context search

private: struct Indices { ///

Initializes a new index set

public: NUCLEX_STORAGE_API Indices() : Closest(static_cast(-1)), Oldest(static_cast(-1)) {} ///

Index of the context closest to the requested location

public: std::size_t Closest; ///

Index of the least recently used context

public: std::size_t Oldest; }; #pragma endregion // struct Indices #pragma region class ContextInsertionScope ///

Inserts a context into the front of the cache

private: class ContextInsertionScope { ///

Initializes a new context inserter

/// Cache into which the context will be inserted /// Method that will be called to insert the context /// Context that will be inserted into the cache public: NUCLEX_STORAGE_API ContextInsertionScope( Cache *cache, void (Cache::*insertAtFront)(Context *context), Context *context ) : cache(cache), insertAtFront(insertAtFront), context(context) {} ///

Inserts the context into the cache

public: NUCLEX_STORAGE_API ~ContextInsertionScope() { (this->cache->*this->insertAtFront)(this->context); } ///

Cache into which the context will be inserted

private: Cache *cache; ///

Method that will be called to insert the context

private: void (Cache::*insertAtFront)(Context *context); ///

Context that will be inserted

private: Context *context; }; #pragma endregion // class ContextInsertionScope ///

Initializes the cache

/// Number of contexts to keep alive at all times /// /// /// The cache is designed to handle a limited number of simultaneous reads. A new /// context will be created when no context exists that starts before the required /// position or when such contexts are being occupied by another thread at the time /// of the call. After reaching the specified, the least recently used one will be /// evicted each time a new context has to be created. /// /// /// There is one situation where more than the specified number of contexts may /// exist: if all of the contexts are occupied by other threads at the time of /// the call, a temporary context will be created for the calling thread. This means /// that for the affected thread, extraction will start from scratch, so it is wise /// to pick a context capacity high enough to prevent this situation. /// /// /// A typical value for the context capacity is 8, which should cover all normal /// use cases. Excessive capacities will waste memory and slow down context search. /// /// public: NUCLEX_STORAGE_API Cache(std::size_t contextCapacity) : contexts(nullptr), contextCount(0), contextCapacity(contextCapacity) { this->contexts = new Context *[contextCapacity]; std::fill(this->contexts, this->contexts + contextCapacity, nullptr); } ///

Frees all resources owned by the instance

public: NUCLEX_STORAGE_API virtual ~Cache() { for(std::size_t index = 0; index < this->contextCapacity; ++index) { delete this->contexts[index]; } delete []this->contexts; } ///

Reads the specified number of bytes from the specified position

/// Identifier of the file that will be accessed /// Absolute file position the data will be read from /// Buffer into which the data will be read /// Number of bytes that will be read public: NUCLEX_STORAGE_API void ReadAt( const TIdentifier &identifier, std::uint64_t position, void *buffer, std::size_t count ) { bool usePrivateContext; Context *context; { std::lock_guard scope(this->mutex); Indices indices = locate(identifier, position); // Was there a context we can reuse? if(indices.Closest != static_cast(-1)) { context = pick(indices.Closest); usePrivateContext = false; } else { // No, we need to add or replace a context context = nullptr; // Is there space left to create a new context? if(this->contextCount < this->contextCapacity) { ++this->contextCount; usePrivateContext = false; } else { // No, we are completely full // Is there a slot we can throw out for the new context we create? if(indices.Oldest != static_cast(-1)) { delete this->contexts[indices.Oldest]; remove(indices.Oldest); usePrivateContext = false; } else { // No, we have to resort to a temporary context usePrivateContext = true; } } } } // mutex scope // In case all slots were filled, a temporary context needs to be used. if(usePrivateContext) { std::unique_ptr temporaryContext(CreateContext(identifier)); temporaryContext->ReadAt(position, buffer, count); } else { // There were slots left, so take/create a context and return it afterwards if(context == nullptr) { context = CreateContext(identifier); } ContextInsertionScope insertionScope(this, &Cache::insertAtFront, context); { context->ReadAt(position, buffer, count); } } } ///

Creates a new context storing the state of a cache line

/// Identifies the file a context is required for /// The new context protected: virtual Context *CreateContext(const TIdentifier &identifier) = 0; ///

Searches for the context closest to the specified position

/// /// Identifier of the file for which a context will be located /// /// Position for which a context will be searched /// Indices of the closest, oldest and first free context private: Indices locate(const TIdentifier &identifier, std::uint64_t position) { Indices indices; std::uint64_t closestDistance = static_cast(-1); std::size_t closestIndex = static_cast(-1); for(std::size_t index = 0; index < this->contextCapacity; ++index) { Context *current = this->contexts[index]; if(current == nullptr) { break; } if(current->Location <= position) { if(this->identifierComparer(current->Identifier, identifier)) { if(closestIndex == static_cast(-1)) { closestDistance = position - current->Location; indices.Closest = index; } else { std::uint64_t currentDistance = position - current->Location; if(currentDistance < closestDistance) { closestDistance = currentDistance; indices.Closest = index; } } } } indices.Oldest = index; } return indices; } ///

Picks a context to be used for reading by the current thread

/// Index of the context that will be picked /// The context the thread should use to read /// /// This removes the context from the context list. It is up to the caller to ensure /// that it is returned whatever happens (otherwise the cache would be loosing contexts, /// resulting in memory leaks and eventually a maxed out contextCount without /// available contexts, causing performance to suffer dramatically). /// private: Context *pick(std::size_t index) { using namespace std; // If this triggers, there's an out-of-range index being calculated somewhere assert( (index < this->contextCapacity) && "Index must stay inside the context array" ); Context *picked = this->contexts[index]; remove(index); return picked; } ///

Removes a context from the context array

/// Index of the context that will be removed /// /// Shifts all contexts following after the index to the left by one. /// private: void remove(std::size_t index) { using namespace std; // If this triggers, there's an out-of-range index being calculated somewhere assert( (index < this->contextCapacity) && "Index must stay inside the context array" ); std::size_t lastIndex = this->contextCount - 1; for(; index < lastIndex; ++index) { if(this->contexts[index + 1] == nullptr) { break; } this->contexts[index] = this->contexts[index + 1]; } this->contexts[lastIndex] = nullptr; } ///

Moves the specified context to the front of the context list

/// Index of the context that will be moved to the front private: void insertAtFront(Context *context) { using namespace std; // If this should happen threads have attempted to return more contexts to the array // than have been picked up, which means that there's an error managing contexts somewhere. assert( (this->contexts[this->contextCapacity - 1] == nullptr) && "There must be open slots in the context array" ); // Normally, synchronization is up to the public methods and privates assume that // they're being called in an appropriate context. This method is being called from // a RAII scope and thus needs to perform its own synchronization. std::lock_guard scope(this->mutex); { // Skip until we encounter the first used slot std::size_t index = this->contextCount - 1; while(index > 0) { if(this->contexts[index] != nullptr) { break; } --index; } // Shift all slots to the right to make space at the beginning while(index > 0) { this->contexts[index + 1] = this->contexts[index]; --index; } this->contexts[0] = context; } // mutex scope } ///

Synchronizes multi-threaded accesses to the cache

private: std::mutex mutex; ///

Currently active decompressor states

private: Context **contexts; ///

Maximum number of simultaneous decompression contexts

private: std::size_t contextCapacity; ///

Number of currently active contexts

/// /// This must not be used to reason about the number of used slots in the context /// array. When a thread picks a context, it removes it from the context array, without /// decrementing this value. The context count is used to guarantee that there are /// no more than the specified number of contexts active at the same time. /// private: std::size_t contextCount; ///

Compares identifiers to see if contexts can be reused

private: TComparer identifierComparer; }; // ------------------------------------------------------------------------------------------- // }}} // namespace Nuclex::Storage::Helpers #endif // NUCLEX_STORAGE_HELPERS_CACHE_H