//========= Copyright Valve Corporation, All rights reserved. =================// // // Purpose: index-based hash map container well suited for large and growing // datasets. It uses less memory than other hash maps and incrementally // rehashes to reduce reallocation spikes. // //=============================================================================// #ifndef UTLHASHMAPLARGE_H #define UTLHASHMAPLARGE_H #ifdef _WIN32 #pragma once #endif #include "tier0/dbg.h" #include "bitvec.h" #include "tier1/murmurhash3.h" // fast mod for power of 2 numbers namespace basetypes { template inline bool IsPowerOf2(T n) { return n > 0 && (n & (n-1)) == 0; } template inline T2 ModPowerOf2(T1 a, T2 b) { return T2(a) & (b-1); } } // default comparison operator template class CDefEquals { public: CDefEquals() {} CDefEquals( int i ) {} inline bool operator()( const T &lhs, const T &rhs ) const { return ( lhs == rhs ); } inline bool operator!() const { return false; } }; // Specialization to compare pointers template class CDefEquals { public: CDefEquals() {} CDefEquals( int i ) {} inline bool operator()( const T *lhs, const T *rhs ) const { if ( lhs == rhs ) return true; else if ( NULL == lhs || NULL == rhs ) return false; else return ( *lhs == *rhs ); } inline bool operator!() const { return false; } }; // Hash specialization for CUtlStrings template<> struct MurmurHash3Functor { typedef uint32 TargetType ; TargetType operator()(const CUtlString &strKey) const { return MurmurHash3Functor()( strKey.String() ); } }; //hash 3 function for a general case sensitive string compares struct MurmurHash3ConstCharPtr { typedef uint32 TargetType ; TargetType operator()( const char* pszKey ) const { return MurmurHash3Functor()( pszKey ); } }; struct CaseSensitiveStrEquals { bool operator()( const char* pszLhs, const char* pszRhs ) const { return strcmp( pszLhs, pszRhs ) == 0; } }; //----------------------------------------------------------------------------- // // Purpose: An associative container. Pretty much identical to CUtlMap without the ability to walk in-order // This container is well suited for large and growing datasets. It uses less // memory than other hash maps and incrementally rehashes to reduce reallocation spikes. // However, it is slower (by about 20%) than CUtlHashTable // //----------------------------------------------------------------------------- template , typename H = MurmurHash3Functor > class CUtlHashMapLarge { public: // This enum exists so that FOR_EACH_MAP and FOR_EACH_MAP_FAST cannot accidentally // be used on a type that is not a CUtlMap. If the code compiles then all is well. // The check for IsUtlMap being true should be free. // Using an enum rather than a static const bool ensures that this trick works even // with optimizations disabled on gcc. enum CompileTimeCheck { IsUtlMap = 1 }; typedef K KeyType_t; typedef T ElemType_t; typedef int IndexType_t; typedef L EqualityFunc_t; typedef H HashFunc_t; CUtlHashMapLarge() { m_cElements = 0; m_nMaxElement = 0; m_nMinRehashedBucket = InvalidIndex(); m_nMaxRehashedBucket = InvalidIndex(); m_iNodeFreeListHead = InvalidIndex(); } CUtlHashMapLarge( int cElementsExpected ) { m_cElements = 0; m_nMaxElement = 0; m_nMinRehashedBucket = InvalidIndex(); m_nMaxRehashedBucket = InvalidIndex(); m_iNodeFreeListHead = InvalidIndex(); EnsureCapacity( cElementsExpected ); } ~CUtlHashMapLarge() { RemoveAll(); } // gets particular elements ElemType_t & Element( IndexType_t i ) { return m_memNodes.Element( i ).m_elem; } const ElemType_t & Element( IndexType_t i ) const { return m_memNodes.Element( i ).m_elem; } ElemType_t & operator[]( IndexType_t i ) { return m_memNodes.Element( i ).m_elem; } const ElemType_t & operator[]( IndexType_t i ) const { return m_memNodes.Element( i ).m_elem; } KeyType_t & Key( IndexType_t i ) { return m_memNodes.Element( i ).m_key; } const KeyType_t & Key( IndexType_t i ) const { return m_memNodes.Element( i ).m_key; } // Num elements IndexType_t Count() const { return m_cElements; } // Max "size" of the vector IndexType_t MaxElement() const { return m_nMaxElement; } // Checks if a node is valid and in the map bool IsValidIndex( IndexType_t i ) const { return i >= 0 && i < m_nMaxElement && !IsFreeNodeID( m_memNodes[i].m_iNextNode ); } // Invalid index static IndexType_t InvalidIndex() { return -1; } // Insert method IndexType_t Insert( const KeyType_t &key, const ElemType_t &insert ) { return InsertInternal( key, insert, eInsert_UpdateExisting ); } IndexType_t Insert( const KeyType_t &key ) { return InsertInternal( key, ElemType_t(), eInsert_UpdateExisting ); } IndexType_t InsertWithDupes( const KeyType_t &key, const ElemType_t &insert ) { return InsertInternal( key, insert, eInsert_CreateDupes ); } IndexType_t FindOrInsert( const KeyType_t &key, const ElemType_t &insert ) { return InsertInternal( key, insert, eInsert_LeaveExisting ); } IndexType_t InsertOrReplace( const KeyType_t &key, const ElemType_t &insert ) { return InsertInternal( key, insert, eInsert_UpdateExisting ); } // Finds an element IndexType_t Find( const KeyType_t &key ) const; // has an element bool HasElement( const KeyType_t &key ) const { return Find( key ) != InvalidIndex(); } void EnsureCapacity( int num ); void RemoveAt( IndexType_t i ); bool Remove( const KeyType_t &key ) { int iMap = Find( key ); if ( iMap != InvalidIndex() ) { RemoveAt( iMap ); return true; } return false; } void RemoveAll(); void Purge(); void PurgeAndDeleteElements(); void Swap( CUtlHashMapLarge &rhs ) { m_vecHashBuckets.Swap( rhs.m_vecHashBuckets ); V_swap( m_bitsMigratedBuckets, rhs.m_bitsMigratedBuckets ); m_memNodes.Swap( rhs.m_memNodes ); V_swap( m_iNodeFreeListHead, rhs.m_iNodeFreeListHead ); V_swap( m_cElements, rhs.m_cElements ); V_swap( m_nMaxElement, rhs.m_nMaxElement ); V_swap( m_nMinRehashedBucket, rhs.m_nMinRehashedBucket ); V_swap( m_nMaxRehashedBucket, rhs.m_nMaxRehashedBucket ); V_swap( m_EqualityFunc, rhs.m_EqualityFunc ); V_swap( m_HashFunc, rhs.m_HashFunc ); } private: enum EInsertPolicy { eInsert_UpdateExisting, eInsert_LeaveExisting, eInsert_CreateDupes }; IndexType_t InsertInternal( const KeyType_t &key, const ElemType_t &insert, EInsertPolicy ePolicy ); inline IndexType_t FreeNodeIDToIndex( IndexType_t i ) const { return (0-i)-3; } inline IndexType_t FreeNodeIndexToID( IndexType_t i ) const { return (-3)-i; } inline bool IsFreeNodeID( IndexType_t i ) const { return i < InvalidIndex(); } int FindInBucket( int iBucket, const KeyType_t &key ) const; int AllocNode(); void RehashNodesInBucket( int iBucket ); void LinkNodeIntoBucket( int iBucket, int iNewNode ); void UnlinkNodeFromBucket( int iBucket, int iNewNode ); bool RemoveNodeFromBucket( int iBucket, int iNodeToRemove ); void IncrementalRehash(); struct HashBucket_t { IndexType_t m_iNode; }; CUtlVector m_vecHashBuckets; CLargeVarBitVec m_bitsMigratedBuckets; struct Node_t { KeyType_t m_key; ElemType_t m_elem; int m_iNextNode; }; CUtlMemory m_memNodes; IndexType_t m_iNodeFreeListHead; IndexType_t m_cElements; IndexType_t m_nMaxElement; IndexType_t m_nMinRehashedBucket, m_nMaxRehashedBucket; EqualityFunc_t m_EqualityFunc; HashFunc_t m_HashFunc; }; //----------------------------------------------------------------------------- // Purpose: inserts an item into the map //----------------------------------------------------------------------------- template inline int CUtlHashMapLarge::InsertInternal( const KeyType_t &key, const ElemType_t &insert, EInsertPolicy ePolicy ) { // make sure we have room in the hash table if ( m_cElements >= m_vecHashBuckets.Count() ) EnsureCapacity( MAX( 16, m_vecHashBuckets.Count() * 2 ) ); if ( m_cElements >= m_memNodes.Count() ) m_memNodes.Grow( m_memNodes.Count() * 2 ); // rehash incrementally IncrementalRehash(); // hash the item uint32 hash = m_HashFunc( key ); // migrate data forward, if necessary int cBucketsToModAgainst = m_vecHashBuckets.Count() >> 1; int iBucket = basetypes::ModPowerOf2(hash, cBucketsToModAgainst); while ( iBucket >= m_nMinRehashedBucket && !m_bitsMigratedBuckets.Get( iBucket ) ) { RehashNodesInBucket( iBucket ); cBucketsToModAgainst >>= 1; iBucket = basetypes::ModPowerOf2(hash, cBucketsToModAgainst); } // prevent duplicates if necessary if ( ( ePolicy != eInsert_CreateDupes ) && m_cElements ) { // look in the bucket to see if we have a conflict int iBucket2 = basetypes::ModPowerOf2( hash, m_vecHashBuckets.Count() ); IndexType_t iNode = FindInBucket( iBucket2, key ); if ( iNode != InvalidIndex() ) { // a duplicate - update in place (matching CUtlMap) if( ePolicy == eInsert_UpdateExisting ) { m_memNodes[iNode].m_elem = insert; } return iNode; } } // make an item int iNewNode = AllocNode(); m_memNodes[iNewNode].m_iNextNode = InvalidIndex(); CopyConstruct( &m_memNodes[iNewNode].m_key, key ); CopyConstruct( &m_memNodes[iNewNode].m_elem, insert ); iBucket = basetypes::ModPowerOf2( hash, m_vecHashBuckets.Count() ); // link ourselves in // ::OutputDebugStr( CFmtStr( "insert %d into bucket %d\n", key, iBucket ).Access() ); LinkNodeIntoBucket( iBucket, iNewNode ); // return the new node return iNewNode; } //----------------------------------------------------------------------------- // Purpose: grows the map to fit the specified amount //----------------------------------------------------------------------------- template inline void CUtlHashMapLarge::EnsureCapacity( int amount ) { m_memNodes.EnsureCapacity( amount ); // ::OutputDebugStr( CFmtStr( "grown m_memNodes from %d to %d\n", m_cElements, m_memNodes.Count() ).Access() ); if ( amount <= m_vecHashBuckets.Count() ) return; int cBucketsNeeded = MAX( 16, m_vecHashBuckets.Count() ); while ( cBucketsNeeded < amount ) cBucketsNeeded *= 2; // ::OutputDebugStr( CFmtStr( "grown m_vecHashBuckets from %d to %d\n", m_vecHashBuckets.Count(), cBucketsNeeded ).Access() ); // grow the hash buckets int grow = cBucketsNeeded - m_vecHashBuckets.Count(); int iFirst = m_vecHashBuckets.AddMultipleToTail( grow ); // clear all the new data to invalid bits memset( &m_vecHashBuckets[iFirst], 0xFFFFFFFF, grow*sizeof(m_vecHashBuckets[iFirst]) ); Assert( basetypes::IsPowerOf2( m_vecHashBuckets.Count() ) ); // we'll have to rehash, all the buckets that existed before growth m_nMinRehashedBucket = 0; m_nMaxRehashedBucket = iFirst; if ( m_cElements > 0 ) { // remove all the current bits m_bitsMigratedBuckets.Resize( 0 ); // re-add new bits; these will all be reset to 0 m_bitsMigratedBuckets.Resize( m_vecHashBuckets.Count() ); } else { // no elements - no rehashing m_nMinRehashedBucket = m_vecHashBuckets.Count(); } } //----------------------------------------------------------------------------- // Purpose: gets a new node, from the free list if possible //----------------------------------------------------------------------------- template inline int CUtlHashMapLarge::AllocNode() { // if we're out of free elements, get the max if ( m_cElements == m_nMaxElement ) { m_cElements++; return m_nMaxElement++; } // pull from the free list Assert( m_iNodeFreeListHead != InvalidIndex() ); int iNewNode = m_iNodeFreeListHead; m_iNodeFreeListHead = FreeNodeIDToIndex( m_memNodes[iNewNode].m_iNextNode ); m_cElements++; return iNewNode; } //----------------------------------------------------------------------------- // Purpose: takes a bucket of nodes and re-hashes them into a more optimal bucket //----------------------------------------------------------------------------- template inline void CUtlHashMapLarge::RehashNodesInBucket( int iBucketSrc ) { // mark us as migrated m_bitsMigratedBuckets.Set( iBucketSrc ); // walk the list of items, re-hashing them IndexType_t iNode = m_vecHashBuckets[iBucketSrc].m_iNode; while ( iNode != InvalidIndex() ) { IndexType_t iNodeNext = m_memNodes[iNode].m_iNextNode; Assert( iNodeNext != iNode ); // work out where the node should go const KeyType_t &key = m_memNodes[iNode].m_key; uint32 hash = m_HashFunc( key ); int iBucketDest = basetypes::ModPowerOf2( hash, m_vecHashBuckets.Count() ); // if the hash bucket has changed, move it if ( iBucketDest != iBucketSrc ) { // ::OutputDebugStr( CFmtStr( "moved key %d from bucket %d to %d\n", key, iBucketSrc, iBucketDest ).Access() ); // remove from this bucket list UnlinkNodeFromBucket( iBucketSrc, iNode ); // link into new bucket list LinkNodeIntoBucket( iBucketDest, iNode ); } iNode = iNodeNext; } } //----------------------------------------------------------------------------- // Purpose: searches for an item by key, returning the index handle //----------------------------------------------------------------------------- template inline int CUtlHashMapLarge::Find( const KeyType_t &key ) const { if ( m_cElements == 0 ) return InvalidIndex(); // hash the item uint32 hash = m_HashFunc( key ); // find the bucket int cBucketsToModAgainst = m_vecHashBuckets.Count(); int iBucket = basetypes::ModPowerOf2( hash, cBucketsToModAgainst ); // look in the bucket for the item int iNode = FindInBucket( iBucket, key ); if ( iNode != InvalidIndex() ) return iNode; // not found? we may have to look in older buckets cBucketsToModAgainst >>= 1; while ( cBucketsToModAgainst >= m_nMinRehashedBucket ) { iBucket = basetypes::ModPowerOf2( hash, cBucketsToModAgainst ); if ( !m_bitsMigratedBuckets.Get( iBucket ) ) { int iNode2 = FindInBucket( iBucket, key ); if ( iNode2 != InvalidIndex() ) return iNode2; } cBucketsToModAgainst >>= 1; } return InvalidIndex(); } //----------------------------------------------------------------------------- // Purpose: searches for an item by key, returning the index handle //----------------------------------------------------------------------------- template inline int CUtlHashMapLarge::FindInBucket( int iBucket, const KeyType_t &key ) const { if ( m_vecHashBuckets[iBucket].m_iNode != InvalidIndex() ) { IndexType_t iNode = m_vecHashBuckets[iBucket].m_iNode; Assert( iNode < m_nMaxElement ); while ( iNode != InvalidIndex() ) { // equality check if ( m_EqualityFunc( key, m_memNodes[iNode].m_key ) ) return iNode; iNode = m_memNodes[iNode].m_iNextNode; } } return InvalidIndex(); } //----------------------------------------------------------------------------- // Purpose: links a node into a bucket //----------------------------------------------------------------------------- template void CUtlHashMapLarge::LinkNodeIntoBucket( int iBucket, int iNewNode ) { // add into the start of the bucket's list m_memNodes[iNewNode].m_iNextNode = m_vecHashBuckets[iBucket].m_iNode; m_vecHashBuckets[iBucket].m_iNode = iNewNode; } //----------------------------------------------------------------------------- // Purpose: unlinks a node from the bucket //----------------------------------------------------------------------------- template void CUtlHashMapLarge::UnlinkNodeFromBucket( int iBucket, int iNodeToUnlink ) { int iNodeNext = m_memNodes[iNodeToUnlink].m_iNextNode; // if it's the first node, just update the bucket to point to the new place int iNode = m_vecHashBuckets[iBucket].m_iNode; if ( iNode == iNodeToUnlink ) { m_vecHashBuckets[iBucket].m_iNode = iNodeNext; return; } // walk the list to find where while ( iNode != InvalidIndex() ) { if ( m_memNodes[iNode].m_iNextNode == iNodeToUnlink ) { m_memNodes[iNode].m_iNextNode = iNodeNext; return; } iNode = m_memNodes[iNode].m_iNextNode; } // should always be valid to unlink Assert( false ); } //----------------------------------------------------------------------------- // Purpose: removes a single item from the map //----------------------------------------------------------------------------- template inline void CUtlHashMapLarge::RemoveAt( IndexType_t i ) { if ( !IsValidIndex( i ) ) { Assert( false ); return; } // unfortunately, we have to re-hash to find which bucket we're in uint32 hash = m_HashFunc( m_memNodes[i].m_key ); int cBucketsToModAgainst = m_vecHashBuckets.Count(); int iBucket = basetypes::ModPowerOf2( hash, cBucketsToModAgainst ); if ( RemoveNodeFromBucket( iBucket, i ) ) return; // wasn't found; look in older buckets cBucketsToModAgainst >>= 1; while ( cBucketsToModAgainst >= m_nMinRehashedBucket ) { iBucket = basetypes::ModPowerOf2( hash, cBucketsToModAgainst ); if ( !m_bitsMigratedBuckets.Get( iBucket ) ) { if ( RemoveNodeFromBucket( iBucket, i ) ) return; } cBucketsToModAgainst >>= 1; } // never found, container is busted Assert( false ); } //----------------------------------------------------------------------------- // Purpose: removes a node from the bucket, return true if it was found //----------------------------------------------------------------------------- template inline bool CUtlHashMapLarge::RemoveNodeFromBucket( IndexType_t iBucket, int iNodeToRemove ) { IndexType_t iNode = m_vecHashBuckets[iBucket].m_iNode; while ( iNode != InvalidIndex() ) { if ( iNodeToRemove == iNode ) { // found it, remove UnlinkNodeFromBucket( iBucket, iNodeToRemove ); Destruct( &m_memNodes[iNode].m_key ); Destruct( &m_memNodes[iNode].m_elem ); // link into free list m_memNodes[iNode].m_iNextNode = FreeNodeIndexToID( m_iNodeFreeListHead ); m_iNodeFreeListHead = iNode; m_cElements--; if ( m_cElements == 0 ) { m_nMinRehashedBucket = m_vecHashBuckets.Count(); } return true; } iNode = m_memNodes[iNode].m_iNextNode; } return false; } //----------------------------------------------------------------------------- // Purpose: removes all items from the hash map //----------------------------------------------------------------------------- template inline void CUtlHashMapLarge::RemoveAll() { FOR_EACH_MAP_FAST( *this, i ) { Destruct( &m_memNodes[i].m_key ); Destruct( &m_memNodes[i].m_elem ); } m_cElements = 0; m_nMaxElement = 0; m_iNodeFreeListHead = InvalidIndex(); m_nMinRehashedBucket = m_vecHashBuckets.Count(); m_nMaxRehashedBucket = InvalidIndex(); m_bitsMigratedBuckets.Resize( 0 ); memset( m_vecHashBuckets.Base(), 0xFF, m_vecHashBuckets.Count() * sizeof(HashBucket_t) ); } //----------------------------------------------------------------------------- // Purpose: removes all items from the hash map and releases memory //----------------------------------------------------------------------------- template inline void CUtlHashMapLarge::Purge() { FOR_EACH_MAP_FAST( *this, i ) { Destruct( &m_memNodes[i].m_key ); Destruct( &m_memNodes[i].m_elem ); } m_cElements = 0; m_nMaxElement = 0; m_iNodeFreeListHead = InvalidIndex(); m_nMinRehashedBucket = InvalidIndex(); m_nMaxRehashedBucket = InvalidIndex(); m_bitsMigratedBuckets.Resize( 0 ); m_memNodes.Purge(); m_vecHashBuckets.Purge(); } //----------------------------------------------------------------------------- // Purpose: removes and deletes all items from the hash map and releases memory //----------------------------------------------------------------------------- template inline void CUtlHashMapLarge::PurgeAndDeleteElements() { FOR_EACH_MAP_FAST( *this, i ) { delete this->Element( i ); } Purge(); } //----------------------------------------------------------------------------- // Purpose: rehashes buckets //----------------------------------------------------------------------------- template inline void CUtlHashMapLarge::IncrementalRehash() { if ( m_nMinRehashedBucket < m_nMaxRehashedBucket ) { while ( m_nMinRehashedBucket < m_nMaxRehashedBucket ) { // see if the bucket needs rehashing if ( m_vecHashBuckets[m_nMinRehashedBucket].m_iNode != InvalidIndex() && !m_bitsMigratedBuckets.Get(m_nMinRehashedBucket) ) { // rehash this bucket RehashNodesInBucket( m_nMinRehashedBucket ); // only actively do one - don't want to do it too fast since we may be on a rapid growth path ++m_nMinRehashedBucket; break; } // nothing to rehash in that bucket - increment and look again ++m_nMinRehashedBucket; } if ( m_nMinRehashedBucket >= m_nMaxRehashedBucket ) { // we're done; don't need any bits anymore m_nMinRehashedBucket = m_vecHashBuckets.Count(); m_nMaxRehashedBucket = InvalidIndex(); m_bitsMigratedBuckets.Resize( 0 ); } } } #endif // UTLHASHMAPLARGE_H