You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
499 lines
15 KiB
499 lines
15 KiB
//========= Copyright Valve Corporation, All rights reserved. ============// |
|
// |
|
// Purpose: Defines a large symbol table (intp sized handles, can store more than 64k strings) |
|
// |
|
// $Header: $ |
|
// $NoKeywords: $ |
|
//===========================================================================// |
|
|
|
#ifndef UTLSYMBOLLARGE_H |
|
#define UTLSYMBOLLARGE_H |
|
|
|
#ifdef _WIN32 |
|
#pragma once |
|
#endif |
|
|
|
#include "tier0/threadtools.h" |
|
#include "tier1/utltshash.h" |
|
#include "tier1/stringpool.h" |
|
#include "tier0/vprof.h" |
|
#include "tier1/utltshash.h" |
|
|
|
//----------------------------------------------------------------------------- |
|
// CUtlSymbolTableLarge: |
|
// description: |
|
// This class defines a symbol table, which allows us to perform mappings |
|
// of strings to symbols and back. |
|
// |
|
// This class stores the strings in a series of string pools. The returned CUtlSymbolLarge is just a pointer |
|
// to the string data, the hash precedes it in memory and is used to speed up searching, etc. |
|
//----------------------------------------------------------------------------- |
|
|
|
typedef intp UtlSymLargeId_t; |
|
|
|
#define UTL_INVAL_SYMBOL_LARGE ((UtlSymLargeId_t)~0) |
|
|
|
class CUtlSymbolLarge |
|
{ |
|
public: |
|
// constructor, destructor |
|
CUtlSymbolLarge() |
|
{ |
|
u.m_Id = UTL_INVAL_SYMBOL_LARGE; |
|
} |
|
|
|
CUtlSymbolLarge( UtlSymLargeId_t id ) |
|
{ |
|
u.m_Id = id; |
|
} |
|
CUtlSymbolLarge( CUtlSymbolLarge const& sym ) |
|
{ |
|
u.m_Id = sym.u.m_Id; |
|
} |
|
|
|
// operator= |
|
CUtlSymbolLarge& operator=( CUtlSymbolLarge const& src ) |
|
{ |
|
u.m_Id = src.u.m_Id; |
|
return *this; |
|
} |
|
|
|
// operator== |
|
bool operator==( CUtlSymbolLarge const& src ) const |
|
{ |
|
return u.m_Id == src.u.m_Id; |
|
} |
|
|
|
// operator== |
|
bool operator==( UtlSymLargeId_t const& src ) const |
|
{ |
|
return u.m_Id == src; |
|
} |
|
|
|
// operator== |
|
bool operator!=( CUtlSymbolLarge const& src ) const |
|
{ |
|
return u.m_Id != src.u.m_Id; |
|
} |
|
|
|
// operator== |
|
bool operator!=( UtlSymLargeId_t const& src ) const |
|
{ |
|
return u.m_Id != src; |
|
} |
|
|
|
// Gets at the symbol |
|
operator UtlSymLargeId_t const() const |
|
{ |
|
return u.m_Id; |
|
} |
|
|
|
// Gets the string associated with the symbol |
|
inline const char* String( ) const |
|
{ |
|
if ( u.m_Id == UTL_INVAL_SYMBOL_LARGE ) |
|
return ""; |
|
return u.m_pAsString; |
|
} |
|
|
|
inline bool IsValid() const |
|
{ |
|
return u.m_Id != UTL_INVAL_SYMBOL_LARGE ? true : false; |
|
} |
|
|
|
private: |
|
// Disallowed |
|
CUtlSymbolLarge( const char* pStr ); // they need to go through the table to assign the ptr |
|
bool operator==( const char* pStr ) const; // disallow since we don't know if the table this is from was case sensitive or not... maybe we don't care |
|
|
|
union |
|
{ |
|
UtlSymLargeId_t m_Id; |
|
char const *m_pAsString; |
|
} u; |
|
}; |
|
|
|
#define MIN_STRING_POOL_SIZE 2048 |
|
|
|
inline uint32 CUtlSymbolLarge_Hash( bool CASEINSENSITIVE, const char *pString, int len ) |
|
{ |
|
return ( CASEINSENSITIVE ? HashStringCaseless( pString ) : HashString( pString ) ); |
|
} |
|
|
|
typedef uint32 LargeSymbolTableHashDecoration_t; |
|
|
|
// The structure consists of the hash immediately followed by the string data |
|
struct CUtlSymbolTableLargeBaseTreeEntry_t |
|
{ |
|
LargeSymbolTableHashDecoration_t m_Hash; |
|
// Variable length string data |
|
char m_String[1]; |
|
|
|
bool IsEmpty() const |
|
{ |
|
return ( ( m_Hash == 0 ) && ( 0 == m_String[0] ) ); |
|
} |
|
|
|
char const *String() const |
|
{ |
|
return (const char *)&m_String[ 0 ]; |
|
} |
|
|
|
CUtlSymbolLarge ToSymbol() const |
|
{ |
|
return reinterpret_cast< UtlSymLargeId_t >( String() ); |
|
} |
|
|
|
LargeSymbolTableHashDecoration_t HashValue() const |
|
{ |
|
return m_Hash; |
|
} |
|
}; |
|
|
|
template< class TreeType, bool CASEINSENSITIVE > |
|
class CTreeEntryLess |
|
{ |
|
public: |
|
CTreeEntryLess( int ignored = 0 ) {} // permits default initialization to NULL in CUtlRBTree |
|
bool operator!() const { return false; } |
|
bool operator()( CUtlSymbolTableLargeBaseTreeEntry_t * const &left, CUtlSymbolTableLargeBaseTreeEntry_t * const &right ) const |
|
{ |
|
// compare the hashes |
|
if ( left->m_Hash == right->m_Hash ) |
|
{ |
|
// if the hashes match compare the strings |
|
if ( !CASEINSENSITIVE ) |
|
return strcmp( left->String(), right->String() ) < 0; |
|
else |
|
return V_stricmp( left->String(), right->String() ) < 0; |
|
} |
|
else |
|
{ |
|
return left->m_Hash < right->m_Hash; |
|
} |
|
} |
|
}; |
|
|
|
// For non-threaded versions, simply index into CUtlRBTree |
|
template< bool CASEINSENSITIVE > |
|
class CNonThreadsafeTree : public CUtlRBTree<CUtlSymbolTableLargeBaseTreeEntry_t *, intp, CTreeEntryLess< CNonThreadsafeTree< CASEINSENSITIVE >, CASEINSENSITIVE > > |
|
{ |
|
public: |
|
typedef CUtlRBTree<CUtlSymbolTableLargeBaseTreeEntry_t *, intp, CTreeEntryLess< CNonThreadsafeTree, CASEINSENSITIVE > > CNonThreadsafeTreeType; |
|
|
|
CNonThreadsafeTree() : |
|
CNonThreadsafeTreeType( 0, 16 ) |
|
{ |
|
} |
|
inline void Commit() |
|
{ |
|
// Nothing, only matters for thread-safe tables |
|
} |
|
inline int Insert( CUtlSymbolTableLargeBaseTreeEntry_t *entry ) |
|
{ |
|
return CNonThreadsafeTreeType::Insert( entry ); |
|
} |
|
inline int Find( CUtlSymbolTableLargeBaseTreeEntry_t *entry ) const |
|
{ |
|
return CNonThreadsafeTreeType::Find( entry ); |
|
} |
|
inline int InvalidIndex() const |
|
{ |
|
return CNonThreadsafeTreeType::InvalidIndex(); |
|
} |
|
inline int GetElements( int nFirstElement, int nCount, CUtlSymbolLarge *pElements ) const |
|
{ |
|
CUtlVector< CUtlSymbolTableLargeBaseTreeEntry_t * > list; |
|
list.EnsureCount( nCount ); |
|
for ( int i = 0; i < nCount; ++i ) |
|
{ |
|
pElements[ i ] = CNonThreadsafeTreeType::Element( i )->ToSymbol(); |
|
} |
|
|
|
return nCount; |
|
} |
|
}; |
|
|
|
// Since CUtlSymbolTableLargeBaseTreeEntry_t already has the hash |
|
// contained inside of it, don't need to recompute a hash here |
|
template < int BUCKET_COUNT, class KEYTYPE, bool CASEINSENSITIVE > |
|
class CCThreadsafeTreeHashMethod |
|
{ |
|
public: |
|
static int Hash( const KEYTYPE &key, int nBucketMask ) |
|
{ |
|
uint32 nHash = key->HashValue(); |
|
return ( nHash & nBucketMask ); |
|
} |
|
|
|
static bool Compare( CUtlSymbolTableLargeBaseTreeEntry_t * const &lhs, CUtlSymbolTableLargeBaseTreeEntry_t * const &rhs ) |
|
{ |
|
if ( lhs->m_Hash != rhs->m_Hash ) |
|
return false; |
|
if ( !CASEINSENSITIVE ) |
|
{ |
|
return ( !Q_strcmp( lhs->String(), rhs->String() ) ? true : false ); |
|
} |
|
|
|
return ( !Q_stricmp( lhs->String(), rhs->String() ) ? true : false ); |
|
} |
|
}; |
|
|
|
/* |
|
NOTE: So the only crappy thing about using a CUtlTSHash here is that the KEYTYPE is a CUtlSymbolTableLargeBaseTreeEntry_t ptr which has both the |
|
hash and the string since with strings there is a good chance of hash collision after you have a fair number of strings so we have to implement |
|
a Compare method (above) which falls back to strcmp/stricmp if the hashes are equal. This means that all of the data is in the KEYTYPE of the hash and the |
|
payload doesn't matter. So I made the payload also be a pointer to a CUtlSymbolTableLargeBaseTreeEntry_t since that makes using the API more convenient |
|
|
|
TODO: If we have a CUtlTSHash that was all about the existence of the KEYTYPE and didn't require a payload (or template on 'void') then we could eliminate |
|
50% of the pointer overhead used for this data structure. |
|
*/ |
|
|
|
// Thread safe version is based on the |
|
template < bool CASEINSENSITIVE > |
|
class CThreadsafeTree : public CUtlTSHash< CUtlSymbolTableLargeBaseTreeEntry_t *, 2048, CUtlSymbolTableLargeBaseTreeEntry_t *, CCThreadsafeTreeHashMethod< 2048, CUtlSymbolTableLargeBaseTreeEntry_t *, CASEINSENSITIVE > > |
|
{ |
|
public: |
|
typedef CUtlTSHash< CUtlSymbolTableLargeBaseTreeEntry_t *, 2048, CUtlSymbolTableLargeBaseTreeEntry_t *, CCThreadsafeTreeHashMethod< 2048, CUtlSymbolTableLargeBaseTreeEntry_t *, CASEINSENSITIVE > > CThreadsafeTreeType; |
|
|
|
CThreadsafeTree() : |
|
CThreadsafeTreeType( 32 ) |
|
{ |
|
} |
|
inline void Commit() |
|
{ |
|
CThreadsafeTreeType::Commit(); |
|
} |
|
inline int Insert( CUtlSymbolTableLargeBaseTreeEntry_t *entry ) |
|
{ |
|
return CThreadsafeTreeType::Insert( entry, entry ); |
|
} |
|
inline int Find( CUtlSymbolTableLargeBaseTreeEntry_t *entry ) |
|
{ |
|
return CThreadsafeTreeType::Find( entry ); |
|
} |
|
inline int InvalidIndex() const |
|
{ |
|
return CThreadsafeTreeType::InvalidHandle(); |
|
} |
|
inline int GetElements( int nFirstElement, int nCount, CUtlSymbolLarge *pElements ) const |
|
{ |
|
CUtlVector< UtlTSHashHandle_t > list; |
|
list.EnsureCount( nCount ); |
|
int c = CThreadsafeTreeType::GetElements( nFirstElement, nCount, list.Base() ); |
|
for ( int i = 0; i < c; ++i ) |
|
{ |
|
pElements[ i ] = CThreadsafeTreeType::Element( list[ i ] )->ToSymbol(); |
|
} |
|
|
|
return c; |
|
} |
|
}; |
|
|
|
// Base Class for threaded and non-threaded types |
|
template < class TreeType, bool CASEINSENSITIVE, size_t POOL_SIZE = MIN_STRING_POOL_SIZE > |
|
class CUtlSymbolTableLargeBase |
|
{ |
|
public: |
|
// constructor, destructor |
|
CUtlSymbolTableLargeBase(); |
|
~CUtlSymbolTableLargeBase(); |
|
|
|
// Finds and/or creates a symbol based on the string |
|
CUtlSymbolLarge AddString( const char* pString ); |
|
|
|
// Finds the symbol for pString |
|
CUtlSymbolLarge Find( const char* pString ) const; |
|
|
|
// Remove all symbols in the table. |
|
void RemoveAll(); |
|
|
|
int GetNumStrings( void ) const |
|
{ |
|
return m_Lookup.Count(); |
|
} |
|
|
|
void Commit() |
|
{ |
|
m_Lookup.Commit(); |
|
} |
|
|
|
// Returns elements in the table |
|
int GetElements( int nFirstElement, int nCount, CUtlSymbolLarge *pElements ) const |
|
{ |
|
return m_Lookup.GetElements( nFirstElement, nCount, pElements ); |
|
} |
|
|
|
uint64 GetMemoryUsage() const |
|
{ |
|
uint64 unBytesUsed = 0u; |
|
|
|
for ( int i=0; i < m_StringPools.Count(); i++ ) |
|
{ |
|
StringPool_t *pPool = m_StringPools[i]; |
|
|
|
unBytesUsed += (uint64)pPool->m_TotalLen; |
|
} |
|
return unBytesUsed; |
|
} |
|
|
|
|
|
protected: |
|
|
|
struct StringPool_t |
|
{ |
|
int m_TotalLen; // How large is |
|
int m_SpaceUsed; |
|
char m_Data[1]; |
|
}; |
|
|
|
TreeType m_Lookup; |
|
|
|
// stores the string data |
|
CUtlVector< StringPool_t * > m_StringPools; |
|
|
|
private: |
|
int FindPoolWithSpace( int len ) const; |
|
}; |
|
|
|
//----------------------------------------------------------------------------- |
|
// constructor, destructor |
|
//----------------------------------------------------------------------------- |
|
template < class TreeType, bool CASEINSENSITIVE, size_t POOL_SIZE > |
|
inline CUtlSymbolTableLargeBase<TreeType, CASEINSENSITIVE, POOL_SIZE >::CUtlSymbolTableLargeBase() : |
|
m_StringPools( 8 ) |
|
{ |
|
} |
|
|
|
template < class TreeType, bool CASEINSENSITIVE, size_t POOL_SIZE > |
|
inline CUtlSymbolTableLargeBase<TreeType, CASEINSENSITIVE, POOL_SIZE>::~CUtlSymbolTableLargeBase() |
|
{ |
|
// Release the stringpool string data |
|
RemoveAll(); |
|
} |
|
|
|
template < class TreeType, bool CASEINSENSITIVE, size_t POOL_SIZE > |
|
inline CUtlSymbolLarge CUtlSymbolTableLargeBase<TreeType, CASEINSENSITIVE, POOL_SIZE>::Find( const char* pString ) const |
|
{ |
|
VPROF( "CUtlSymbolLarge::Find" ); |
|
if (!pString) |
|
return CUtlSymbolLarge(); |
|
|
|
// Passing this special invalid symbol makes the comparison function |
|
// use the string passed in the context |
|
int len = Q_strlen( pString ) + 1; |
|
|
|
CUtlSymbolTableLargeBaseTreeEntry_t *search = (CUtlSymbolTableLargeBaseTreeEntry_t *)_alloca( len + sizeof( LargeSymbolTableHashDecoration_t ) ); |
|
search->m_Hash = CUtlSymbolLarge_Hash( CASEINSENSITIVE, pString, len ); |
|
Q_memcpy( (char *)&search->m_String[ 0 ], pString, len ); |
|
|
|
int idx = const_cast< TreeType & >(m_Lookup).Find( search ); |
|
|
|
if ( idx == m_Lookup.InvalidIndex() ) |
|
return UTL_INVAL_SYMBOL_LARGE; |
|
|
|
const CUtlSymbolTableLargeBaseTreeEntry_t *entry = m_Lookup[ idx ]; |
|
return entry->ToSymbol(); |
|
} |
|
|
|
template < class TreeType, bool CASEINSENSITIVE, size_t POOL_SIZE > |
|
inline int CUtlSymbolTableLargeBase<TreeType, CASEINSENSITIVE, POOL_SIZE>::FindPoolWithSpace( int len ) const |
|
{ |
|
for ( int i=0; i < m_StringPools.Count(); i++ ) |
|
{ |
|
StringPool_t *pPool = m_StringPools[i]; |
|
|
|
if ( (pPool->m_TotalLen - pPool->m_SpaceUsed) >= len ) |
|
{ |
|
return i; |
|
} |
|
} |
|
|
|
return -1; |
|
} |
|
|
|
//----------------------------------------------------------------------------- |
|
// Finds and/or creates a symbol based on the string |
|
//----------------------------------------------------------------------------- |
|
template < class TreeType, bool CASEINSENSITIVE, size_t POOL_SIZE > |
|
inline CUtlSymbolLarge CUtlSymbolTableLargeBase<TreeType, CASEINSENSITIVE, POOL_SIZE>::AddString( const char* pString ) |
|
{ |
|
VPROF("CUtlSymbolLarge::AddString"); |
|
if (!pString) |
|
return UTL_INVAL_SYMBOL_LARGE; |
|
|
|
CUtlSymbolLarge id = Find( pString ); |
|
if ( id != UTL_INVAL_SYMBOL_LARGE ) |
|
return id; |
|
|
|
int lenString = Q_strlen(pString) + 1; // length of just the string |
|
int lenDecorated = lenString + sizeof(LargeSymbolTableHashDecoration_t); // and with its hash decoration |
|
// make sure that all strings are aligned on 2-byte boundaries so the hashes will read correctly |
|
// This assert seems to be invalid because LargeSymbolTableHashDecoration_t is always |
|
// a uint32, by design. |
|
//COMPILE_TIME_ASSERT(sizeof(LargeSymbolTableHashDecoration_t) == sizeof(intp)); |
|
lenDecorated = ALIGN_VALUE(lenDecorated, sizeof( intp ) ); |
|
|
|
// Find a pool with space for this string, or allocate a new one. |
|
int iPool = FindPoolWithSpace( lenDecorated ); |
|
if ( iPool == -1 ) |
|
{ |
|
// Add a new pool. |
|
int newPoolSize = MAX( lenDecorated + sizeof( StringPool_t ), POOL_SIZE ); |
|
StringPool_t *pPool = (StringPool_t*)malloc( newPoolSize ); |
|
|
|
pPool->m_TotalLen = newPoolSize - sizeof( StringPool_t ); |
|
pPool->m_SpaceUsed = 0; |
|
iPool = m_StringPools.AddToTail( pPool ); |
|
} |
|
|
|
// Compute a hash |
|
LargeSymbolTableHashDecoration_t hash = CUtlSymbolLarge_Hash( CASEINSENSITIVE, pString, lenString ); |
|
|
|
// Copy the string in. |
|
StringPool_t *pPool = m_StringPools[iPool]; |
|
// Assert( pPool->m_SpaceUsed < 0xFFFF ); // Pool could be bigger than 2k |
|
// This should never happen, because if we had a string > 64k, it |
|
// would have been given its entire own pool. |
|
|
|
CUtlSymbolTableLargeBaseTreeEntry_t *entry = ( CUtlSymbolTableLargeBaseTreeEntry_t * )&pPool->m_Data[ pPool->m_SpaceUsed ]; |
|
|
|
pPool->m_SpaceUsed += lenDecorated; |
|
|
|
entry->m_Hash = hash; |
|
char *pText = (char *)&entry->m_String [ 0 ]; |
|
Q_memcpy( pText, pString, lenString ); |
|
|
|
// insert the string into the database |
|
MEM_ALLOC_CREDIT(); |
|
int idx = m_Lookup.Insert( entry ); |
|
return m_Lookup.Element( idx )->ToSymbol(); |
|
} |
|
|
|
//----------------------------------------------------------------------------- |
|
// Remove all symbols in the table. |
|
//----------------------------------------------------------------------------- |
|
template < class TreeType, bool CASEINSENSITIVE, size_t POOL_SIZE > |
|
inline void CUtlSymbolTableLargeBase<TreeType, CASEINSENSITIVE, POOL_SIZE>::RemoveAll() |
|
{ |
|
m_Lookup.Purge(); |
|
|
|
for ( int i=0; i < m_StringPools.Count(); i++ ) |
|
{ |
|
StringPool_t * pString = m_StringPools[i]; |
|
free( pString ); |
|
} |
|
|
|
m_StringPools.RemoveAll(); |
|
} |
|
|
|
// Case-sensitive |
|
typedef CUtlSymbolTableLargeBase< CNonThreadsafeTree< false >, false > CUtlSymbolTableLarge; |
|
// Case-insensitive |
|
typedef CUtlSymbolTableLargeBase< CNonThreadsafeTree< true >, true > CUtlSymbolTableLarge_CI; |
|
// Multi-threaded case-sensitive |
|
typedef CUtlSymbolTableLargeBase< CThreadsafeTree< false >, false > CUtlSymbolTableLargeMT; |
|
// Multi-threaded case-insensitive |
|
typedef CUtlSymbolTableLargeBase< CThreadsafeTree< true >, true > CUtlSymbolTableLargeMT_CI; |
|
|
|
#endif // UTLSYMBOLLARGE_H
|
|
|