source-engine/gcsdk/steamextra/tier1/utlhashmaplarge.h

694 lines
21 KiB
C
Raw Permalink Normal View History

2020-04-22 12:56:21 -04:00
//========= Copyright Valve Corporation, All rights reserved. =================//
//
// Purpose: index-based hash map container well suited for large and growing
// datasets. It uses less memory than other hash maps and incrementally
// rehashes to reduce reallocation spikes.
//
//=============================================================================//
#ifndef UTLHASHMAPLARGE_H
#define UTLHASHMAPLARGE_H
#ifdef _WIN32
#pragma once
#endif
#include "tier0/dbg.h"
#include "bitvec.h"
#include "tier1/murmurhash3.h"
// fast mod for power of 2 numbers
namespace basetypes
{
template <class T>
inline bool IsPowerOf2(T n)
{
return n > 0 && (n & (n-1)) == 0;
}
template <class T1, class T2>
inline T2 ModPowerOf2(T1 a, T2 b)
{
return T2(a) & (b-1);
}
}
// default comparison operator
template <typename T>
class CDefEquals
{
public:
CDefEquals() {}
CDefEquals( int i ) {}
inline bool operator()( const T &lhs, const T &rhs ) const { return ( lhs == rhs ); }
inline bool operator!() const { return false; }
};
// Specialization to compare pointers
template <typename T>
class CDefEquals<T*>
{
public:
CDefEquals() {}
CDefEquals( int i ) {}
inline bool operator()( const T *lhs, const T *rhs ) const
{
if ( lhs == rhs )
return true;
else if ( NULL == lhs || NULL == rhs )
return false;
else
return ( *lhs == *rhs );
}
inline bool operator!() const { return false; }
};
// Hash specialization for CUtlStrings
template<>
struct MurmurHash3Functor<CUtlString>
{
typedef uint32 TargetType ;
TargetType operator()(const CUtlString &strKey) const
{
return MurmurHash3Functor<const char*>()( strKey.String() );
}
};
//hash 3 function for a general case sensitive string compares
struct MurmurHash3ConstCharPtr
{
typedef uint32 TargetType ;
TargetType operator()( const char* pszKey ) const { return MurmurHash3Functor<const char*>()( pszKey ); }
};
struct CaseSensitiveStrEquals
{
bool operator()( const char* pszLhs, const char* pszRhs ) const { return strcmp( pszLhs, pszRhs ) == 0; }
};
//-----------------------------------------------------------------------------
//
// Purpose: An associative container. Pretty much identical to CUtlMap without the ability to walk in-order
// This container is well suited for large and growing datasets. It uses less
// memory than other hash maps and incrementally rehashes to reduce reallocation spikes.
// However, it is slower (by about 20%) than CUtlHashTable
//
//-----------------------------------------------------------------------------
template <typename K, typename T, typename L = CDefEquals<K>, typename H = MurmurHash3Functor<K> >
class CUtlHashMapLarge
{
public:
// This enum exists so that FOR_EACH_MAP and FOR_EACH_MAP_FAST cannot accidentally
// be used on a type that is not a CUtlMap. If the code compiles then all is well.
// The check for IsUtlMap being true should be free.
// Using an enum rather than a static const bool ensures that this trick works even
// with optimizations disabled on gcc.
enum CompileTimeCheck
{
IsUtlMap = 1
};
typedef K KeyType_t;
typedef T ElemType_t;
typedef int IndexType_t;
typedef L EqualityFunc_t;
typedef H HashFunc_t;
CUtlHashMapLarge()
{
m_cElements = 0;
m_nMaxElement = 0;
m_nMinRehashedBucket = InvalidIndex();
m_nMaxRehashedBucket = InvalidIndex();
m_iNodeFreeListHead = InvalidIndex();
}
CUtlHashMapLarge( int cElementsExpected )
{
m_cElements = 0;
m_nMaxElement = 0;
m_nMinRehashedBucket = InvalidIndex();
m_nMaxRehashedBucket = InvalidIndex();
m_iNodeFreeListHead = InvalidIndex();
EnsureCapacity( cElementsExpected );
}
~CUtlHashMapLarge()
{
RemoveAll();
}
// gets particular elements
ElemType_t & Element( IndexType_t i ) { return m_memNodes.Element( i ).m_elem; }
const ElemType_t & Element( IndexType_t i ) const { return m_memNodes.Element( i ).m_elem; }
ElemType_t & operator[]( IndexType_t i ) { return m_memNodes.Element( i ).m_elem; }
const ElemType_t & operator[]( IndexType_t i ) const { return m_memNodes.Element( i ).m_elem; }
KeyType_t & Key( IndexType_t i ) { return m_memNodes.Element( i ).m_key; }
const KeyType_t & Key( IndexType_t i ) const { return m_memNodes.Element( i ).m_key; }
// Num elements
IndexType_t Count() const { return m_cElements; }
// Max "size" of the vector
IndexType_t MaxElement() const { return m_nMaxElement; }
// Checks if a node is valid and in the map
bool IsValidIndex( IndexType_t i ) const { return i >= 0 && i < m_nMaxElement && !IsFreeNodeID( m_memNodes[i].m_iNextNode ); }
// Invalid index
static IndexType_t InvalidIndex() { return -1; }
// Insert method
IndexType_t Insert( const KeyType_t &key, const ElemType_t &insert ) { return InsertInternal( key, insert, eInsert_UpdateExisting ); }
IndexType_t Insert( const KeyType_t &key ) { return InsertInternal( key, ElemType_t(), eInsert_UpdateExisting ); }
IndexType_t InsertWithDupes( const KeyType_t &key, const ElemType_t &insert ) { return InsertInternal( key, insert, eInsert_CreateDupes ); }
IndexType_t FindOrInsert( const KeyType_t &key, const ElemType_t &insert ) { return InsertInternal( key, insert, eInsert_LeaveExisting ); }
IndexType_t InsertOrReplace( const KeyType_t &key, const ElemType_t &insert ) { return InsertInternal( key, insert, eInsert_UpdateExisting ); }
// Finds an element
IndexType_t Find( const KeyType_t &key ) const;
// has an element
bool HasElement( const KeyType_t &key ) const
{
return Find( key ) != InvalidIndex();
}
void EnsureCapacity( int num );
void RemoveAt( IndexType_t i );
bool Remove( const KeyType_t &key )
{
int iMap = Find( key );
if ( iMap != InvalidIndex() )
{
RemoveAt( iMap );
return true;
}
return false;
}
void RemoveAll();
void Purge();
void PurgeAndDeleteElements();
void Swap( CUtlHashMapLarge<K,T,L,H> &rhs )
{
m_vecHashBuckets.Swap( rhs.m_vecHashBuckets );
V_swap( m_bitsMigratedBuckets, rhs.m_bitsMigratedBuckets );
m_memNodes.Swap( rhs.m_memNodes );
V_swap( m_iNodeFreeListHead, rhs.m_iNodeFreeListHead );
V_swap( m_cElements, rhs.m_cElements );
V_swap( m_nMaxElement, rhs.m_nMaxElement );
V_swap( m_nMinRehashedBucket, rhs.m_nMinRehashedBucket );
V_swap( m_nMaxRehashedBucket, rhs.m_nMaxRehashedBucket );
V_swap( m_EqualityFunc, rhs.m_EqualityFunc );
V_swap( m_HashFunc, rhs.m_HashFunc );
}
private:
enum EInsertPolicy { eInsert_UpdateExisting, eInsert_LeaveExisting, eInsert_CreateDupes };
IndexType_t InsertInternal( const KeyType_t &key, const ElemType_t &insert, EInsertPolicy ePolicy );
inline IndexType_t FreeNodeIDToIndex( IndexType_t i ) const { return (0-i)-3; }
inline IndexType_t FreeNodeIndexToID( IndexType_t i ) const { return (-3)-i; }
inline bool IsFreeNodeID( IndexType_t i ) const { return i < InvalidIndex(); }
int FindInBucket( int iBucket, const KeyType_t &key ) const;
int AllocNode();
void RehashNodesInBucket( int iBucket );
void LinkNodeIntoBucket( int iBucket, int iNewNode );
void UnlinkNodeFromBucket( int iBucket, int iNewNode );
bool RemoveNodeFromBucket( int iBucket, int iNodeToRemove );
void IncrementalRehash();
struct HashBucket_t
{
IndexType_t m_iNode;
};
CUtlVector<HashBucket_t> m_vecHashBuckets;
CLargeVarBitVec m_bitsMigratedBuckets;
struct Node_t
{
KeyType_t m_key;
ElemType_t m_elem;
int m_iNextNode;
};
CUtlMemory<Node_t> m_memNodes;
IndexType_t m_iNodeFreeListHead;
IndexType_t m_cElements;
IndexType_t m_nMaxElement;
IndexType_t m_nMinRehashedBucket, m_nMaxRehashedBucket;
EqualityFunc_t m_EqualityFunc;
HashFunc_t m_HashFunc;
};
//-----------------------------------------------------------------------------
// Purpose: inserts an item into the map
//-----------------------------------------------------------------------------
template <typename K, typename T, typename L, typename H>
inline int CUtlHashMapLarge<K,T,L,H>::InsertInternal( const KeyType_t &key, const ElemType_t &insert, EInsertPolicy ePolicy )
{
// make sure we have room in the hash table
if ( m_cElements >= m_vecHashBuckets.Count() )
EnsureCapacity( MAX( 16, m_vecHashBuckets.Count() * 2 ) );
if ( m_cElements >= m_memNodes.Count() )
m_memNodes.Grow( m_memNodes.Count() * 2 );
// rehash incrementally
IncrementalRehash();
// hash the item
uint32 hash = m_HashFunc( key );
// migrate data forward, if necessary
int cBucketsToModAgainst = m_vecHashBuckets.Count() >> 1;
int iBucket = basetypes::ModPowerOf2(hash, cBucketsToModAgainst);
while ( iBucket >= m_nMinRehashedBucket
&& !m_bitsMigratedBuckets.Get( iBucket ) )
{
RehashNodesInBucket( iBucket );
cBucketsToModAgainst >>= 1;
iBucket = basetypes::ModPowerOf2(hash, cBucketsToModAgainst);
}
// prevent duplicates if necessary
if ( ( ePolicy != eInsert_CreateDupes ) && m_cElements )
{
// look in the bucket to see if we have a conflict
int iBucket2 = basetypes::ModPowerOf2( hash, m_vecHashBuckets.Count() );
IndexType_t iNode = FindInBucket( iBucket2, key );
if ( iNode != InvalidIndex() )
{
// a duplicate - update in place (matching CUtlMap)
if( ePolicy == eInsert_UpdateExisting )
{
m_memNodes[iNode].m_elem = insert;
}
return iNode;
}
}
// make an item
int iNewNode = AllocNode();
m_memNodes[iNewNode].m_iNextNode = InvalidIndex();
CopyConstruct( &m_memNodes[iNewNode].m_key, key );
CopyConstruct( &m_memNodes[iNewNode].m_elem, insert );
iBucket = basetypes::ModPowerOf2( hash, m_vecHashBuckets.Count() );
// link ourselves in
// ::OutputDebugStr( CFmtStr( "insert %d into bucket %d\n", key, iBucket ).Access() );
LinkNodeIntoBucket( iBucket, iNewNode );
// return the new node
return iNewNode;
}
//-----------------------------------------------------------------------------
// Purpose: grows the map to fit the specified amount
//-----------------------------------------------------------------------------
template <typename K, typename T, typename L, typename H>
inline void CUtlHashMapLarge<K,T,L,H>::EnsureCapacity( int amount )
{
m_memNodes.EnsureCapacity( amount );
// ::OutputDebugStr( CFmtStr( "grown m_memNodes from %d to %d\n", m_cElements, m_memNodes.Count() ).Access() );
if ( amount <= m_vecHashBuckets.Count() )
return;
int cBucketsNeeded = MAX( 16, m_vecHashBuckets.Count() );
while ( cBucketsNeeded < amount )
cBucketsNeeded *= 2;
// ::OutputDebugStr( CFmtStr( "grown m_vecHashBuckets from %d to %d\n", m_vecHashBuckets.Count(), cBucketsNeeded ).Access() );
// grow the hash buckets
int grow = cBucketsNeeded - m_vecHashBuckets.Count();
int iFirst = m_vecHashBuckets.AddMultipleToTail( grow );
// clear all the new data to invalid bits
memset( &m_vecHashBuckets[iFirst], 0xFFFFFFFF, grow*sizeof(m_vecHashBuckets[iFirst]) );
Assert( basetypes::IsPowerOf2( m_vecHashBuckets.Count() ) );
// we'll have to rehash, all the buckets that existed before growth
m_nMinRehashedBucket = 0;
m_nMaxRehashedBucket = iFirst;
if ( m_cElements > 0 )
{
// remove all the current bits
m_bitsMigratedBuckets.Resize( 0 );
// re-add new bits; these will all be reset to 0
m_bitsMigratedBuckets.Resize( m_vecHashBuckets.Count() );
}
else
{
// no elements - no rehashing
m_nMinRehashedBucket = m_vecHashBuckets.Count();
}
}
//-----------------------------------------------------------------------------
// Purpose: gets a new node, from the free list if possible
//-----------------------------------------------------------------------------
template <typename K, typename T, typename L, typename H>
inline int CUtlHashMapLarge<K,T,L,H>::AllocNode()
{
// if we're out of free elements, get the max
if ( m_cElements == m_nMaxElement )
{
m_cElements++;
return m_nMaxElement++;
}
// pull from the free list
Assert( m_iNodeFreeListHead != InvalidIndex() );
int iNewNode = m_iNodeFreeListHead;
m_iNodeFreeListHead = FreeNodeIDToIndex( m_memNodes[iNewNode].m_iNextNode );
m_cElements++;
return iNewNode;
}
//-----------------------------------------------------------------------------
// Purpose: takes a bucket of nodes and re-hashes them into a more optimal bucket
//-----------------------------------------------------------------------------
template <typename K, typename T, typename L, typename H>
inline void CUtlHashMapLarge<K,T,L,H>::RehashNodesInBucket( int iBucketSrc )
{
// mark us as migrated
m_bitsMigratedBuckets.Set( iBucketSrc );
// walk the list of items, re-hashing them
IndexType_t iNode = m_vecHashBuckets[iBucketSrc].m_iNode;
while ( iNode != InvalidIndex() )
{
IndexType_t iNodeNext = m_memNodes[iNode].m_iNextNode;
Assert( iNodeNext != iNode );
// work out where the node should go
const KeyType_t &key = m_memNodes[iNode].m_key;
uint32 hash = m_HashFunc( key );
int iBucketDest = basetypes::ModPowerOf2( hash, m_vecHashBuckets.Count() );
// if the hash bucket has changed, move it
if ( iBucketDest != iBucketSrc )
{
// ::OutputDebugStr( CFmtStr( "moved key %d from bucket %d to %d\n", key, iBucketSrc, iBucketDest ).Access() );
// remove from this bucket list
UnlinkNodeFromBucket( iBucketSrc, iNode );
// link into new bucket list
LinkNodeIntoBucket( iBucketDest, iNode );
}
iNode = iNodeNext;
}
}
//-----------------------------------------------------------------------------
// Purpose: searches for an item by key, returning the index handle
//-----------------------------------------------------------------------------
template <typename K, typename T, typename L, typename H>
inline int CUtlHashMapLarge<K,T,L,H>::Find( const KeyType_t &key ) const
{
if ( m_cElements == 0 )
return InvalidIndex();
// hash the item
uint32 hash = m_HashFunc( key );
// find the bucket
int cBucketsToModAgainst = m_vecHashBuckets.Count();
int iBucket = basetypes::ModPowerOf2( hash, cBucketsToModAgainst );
// look in the bucket for the item
int iNode = FindInBucket( iBucket, key );
if ( iNode != InvalidIndex() )
return iNode;
// not found? we may have to look in older buckets
cBucketsToModAgainst >>= 1;
while ( cBucketsToModAgainst >= m_nMinRehashedBucket )
{
iBucket = basetypes::ModPowerOf2( hash, cBucketsToModAgainst );
if ( !m_bitsMigratedBuckets.Get( iBucket ) )
{
int iNode2 = FindInBucket( iBucket, key );
if ( iNode2 != InvalidIndex() )
return iNode2;
}
cBucketsToModAgainst >>= 1;
}
return InvalidIndex();
}
//-----------------------------------------------------------------------------
// Purpose: searches for an item by key, returning the index handle
//-----------------------------------------------------------------------------
template <typename K, typename T, typename L, typename H>
inline int CUtlHashMapLarge<K,T,L,H>::FindInBucket( int iBucket, const KeyType_t &key ) const
{
if ( m_vecHashBuckets[iBucket].m_iNode != InvalidIndex() )
{
IndexType_t iNode = m_vecHashBuckets[iBucket].m_iNode;
Assert( iNode < m_nMaxElement );
while ( iNode != InvalidIndex() )
{
// equality check
if ( m_EqualityFunc( key, m_memNodes[iNode].m_key ) )
return iNode;
iNode = m_memNodes[iNode].m_iNextNode;
}
}
return InvalidIndex();
}
//-----------------------------------------------------------------------------
// Purpose: links a node into a bucket
//-----------------------------------------------------------------------------
template <typename K, typename T, typename L, typename H>
void CUtlHashMapLarge<K,T,L,H>::LinkNodeIntoBucket( int iBucket, int iNewNode )
{
// add into the start of the bucket's list
m_memNodes[iNewNode].m_iNextNode = m_vecHashBuckets[iBucket].m_iNode;
m_vecHashBuckets[iBucket].m_iNode = iNewNode;
}
//-----------------------------------------------------------------------------
// Purpose: unlinks a node from the bucket
//-----------------------------------------------------------------------------
template <typename K, typename T, typename L, typename H>
void CUtlHashMapLarge<K,T,L,H>::UnlinkNodeFromBucket( int iBucket, int iNodeToUnlink )
{
int iNodeNext = m_memNodes[iNodeToUnlink].m_iNextNode;
// if it's the first node, just update the bucket to point to the new place
int iNode = m_vecHashBuckets[iBucket].m_iNode;
if ( iNode == iNodeToUnlink )
{
m_vecHashBuckets[iBucket].m_iNode = iNodeNext;
return;
}
// walk the list to find where
while ( iNode != InvalidIndex() )
{
if ( m_memNodes[iNode].m_iNextNode == iNodeToUnlink )
{
m_memNodes[iNode].m_iNextNode = iNodeNext;
return;
}
iNode = m_memNodes[iNode].m_iNextNode;
}
// should always be valid to unlink
Assert( false );
}
//-----------------------------------------------------------------------------
// Purpose: removes a single item from the map
//-----------------------------------------------------------------------------
template <typename K, typename T, typename L, typename H>
inline void CUtlHashMapLarge<K,T,L,H>::RemoveAt( IndexType_t i )
{
if ( !IsValidIndex( i ) )
{
Assert( false );
return;
}
// unfortunately, we have to re-hash to find which bucket we're in
uint32 hash = m_HashFunc( m_memNodes[i].m_key );
int cBucketsToModAgainst = m_vecHashBuckets.Count();
int iBucket = basetypes::ModPowerOf2( hash, cBucketsToModAgainst );
if ( RemoveNodeFromBucket( iBucket, i ) )
return;
// wasn't found; look in older buckets
cBucketsToModAgainst >>= 1;
while ( cBucketsToModAgainst >= m_nMinRehashedBucket )
{
iBucket = basetypes::ModPowerOf2( hash, cBucketsToModAgainst );
if ( !m_bitsMigratedBuckets.Get( iBucket ) )
{
if ( RemoveNodeFromBucket( iBucket, i ) )
return;
}
cBucketsToModAgainst >>= 1;
}
// never found, container is busted
Assert( false );
}
//-----------------------------------------------------------------------------
// Purpose: removes a node from the bucket, return true if it was found
//-----------------------------------------------------------------------------
template <typename K, typename T, typename L, typename H>
inline bool CUtlHashMapLarge<K,T,L,H>::RemoveNodeFromBucket( IndexType_t iBucket, int iNodeToRemove )
{
IndexType_t iNode = m_vecHashBuckets[iBucket].m_iNode;
while ( iNode != InvalidIndex() )
{
if ( iNodeToRemove == iNode )
{
// found it, remove
UnlinkNodeFromBucket( iBucket, iNodeToRemove );
Destruct( &m_memNodes[iNode].m_key );
Destruct( &m_memNodes[iNode].m_elem );
// link into free list
m_memNodes[iNode].m_iNextNode = FreeNodeIndexToID( m_iNodeFreeListHead );
m_iNodeFreeListHead = iNode;
m_cElements--;
if ( m_cElements == 0 )
{
m_nMinRehashedBucket = m_vecHashBuckets.Count();
}
return true;
}
iNode = m_memNodes[iNode].m_iNextNode;
}
return false;
}
//-----------------------------------------------------------------------------
// Purpose: removes all items from the hash map
//-----------------------------------------------------------------------------
template <typename K, typename T, typename L, typename H>
inline void CUtlHashMapLarge<K,T,L,H>::RemoveAll()
{
FOR_EACH_MAP_FAST( *this, i )
{
Destruct( &m_memNodes[i].m_key );
Destruct( &m_memNodes[i].m_elem );
}
m_cElements = 0;
m_nMaxElement = 0;
m_iNodeFreeListHead = InvalidIndex();
m_nMinRehashedBucket = m_vecHashBuckets.Count();
m_nMaxRehashedBucket = InvalidIndex();
m_bitsMigratedBuckets.Resize( 0 );
memset( m_vecHashBuckets.Base(), 0xFF, m_vecHashBuckets.Count() * sizeof(HashBucket_t) );
}
//-----------------------------------------------------------------------------
// Purpose: removes all items from the hash map and releases memory
//-----------------------------------------------------------------------------
template <typename K, typename T, typename L, typename H>
inline void CUtlHashMapLarge<K,T,L,H>::Purge()
{
FOR_EACH_MAP_FAST( *this, i )
{
Destruct( &m_memNodes[i].m_key );
Destruct( &m_memNodes[i].m_elem );
}
m_cElements = 0;
m_nMaxElement = 0;
m_iNodeFreeListHead = InvalidIndex();
m_nMinRehashedBucket = InvalidIndex();
m_nMaxRehashedBucket = InvalidIndex();
m_bitsMigratedBuckets.Resize( 0 );
m_memNodes.Purge();
m_vecHashBuckets.Purge();
}
//-----------------------------------------------------------------------------
// Purpose: removes and deletes all items from the hash map and releases memory
//-----------------------------------------------------------------------------
template <typename K, typename T, typename L, typename H>
inline void CUtlHashMapLarge<K,T,L,H>::PurgeAndDeleteElements()
{
FOR_EACH_MAP_FAST( *this, i )
{
delete this->Element( i );
}
Purge();
}
//-----------------------------------------------------------------------------
// Purpose: rehashes buckets
//-----------------------------------------------------------------------------
template <typename K, typename T, typename L, typename H>
inline void CUtlHashMapLarge<K,T,L,H>::IncrementalRehash()
{
if ( m_nMinRehashedBucket < m_nMaxRehashedBucket )
{
while ( m_nMinRehashedBucket < m_nMaxRehashedBucket )
{
// see if the bucket needs rehashing
if ( m_vecHashBuckets[m_nMinRehashedBucket].m_iNode != InvalidIndex()
&& !m_bitsMigratedBuckets.Get(m_nMinRehashedBucket) )
{
// rehash this bucket
RehashNodesInBucket( m_nMinRehashedBucket );
// only actively do one - don't want to do it too fast since we may be on a rapid growth path
++m_nMinRehashedBucket;
break;
}
// nothing to rehash in that bucket - increment and look again
++m_nMinRehashedBucket;
}
if ( m_nMinRehashedBucket >= m_nMaxRehashedBucket )
{
// we're done; don't need any bits anymore
m_nMinRehashedBucket = m_vecHashBuckets.Count();
m_nMaxRehashedBucket = InvalidIndex();
m_bitsMigratedBuckets.Resize( 0 );
}
}
}
#endif // UTLHASHMAPLARGE_H