diff --git a/materialsystem/cmaterialsystem.cpp b/materialsystem/cmaterialsystem.cpp index a3c1c233..2293af42 100644 --- a/materialsystem/cmaterialsystem.cpp +++ b/materialsystem/cmaterialsystem.cpp @@ -1029,7 +1029,7 @@ bool CMaterialSystem::AllowThreading( bool bAllow, int nServiceThread ) bool bOldAllow = m_bAllowQueuedRendering; - if ( GetCPUInformation()->m_nPhysicalProcessors >= 2 ) + if ( GetCPUInformation()->m_nLogicalProcessors >= 2 ) { m_bAllowQueuedRendering = bAllow; bool bQueued = m_IdealThreadMode != MATERIAL_SINGLE_THREADED; @@ -1806,11 +1806,7 @@ static ConVar mat_normalmaps( "mat_normalmaps", "0", FCVAR_CHEAT ); static ConVar mat_measurefillrate( "mat_measurefillrate", "0", FCVAR_CHEAT ); static ConVar mat_fillrate( "mat_fillrate", "0", FCVAR_CHEAT ); static ConVar mat_reversedepth( "mat_reversedepth", "0", FCVAR_CHEAT ); -#ifdef DX_TO_GL_ABSTRACTION -static ConVar mat_bufferprimitives( "mat_bufferprimitives", "0" ); // I'm not seeing any benefit speed wise for buffered primitives on GLM/POSIX (checked via TF2 timedemo) - default to zero -#else static ConVar mat_bufferprimitives( "mat_bufferprimitives", "1" ); -#endif static ConVar mat_drawflat( "mat_drawflat","0", FCVAR_CHEAT ); static ConVar mat_softwarelighting( "mat_softwarelighting", "0", FCVAR_ALLOWED_IN_COMPETITIVE ); static ConVar mat_proxy( "mat_proxy", "0", FCVAR_CHEAT, "", MatProxyCallback ); @@ -2780,8 +2776,8 @@ IMaterial* CMaterialSystem::FindMaterialEx( char const* pMaterialName, const cha { // We need lower-case symbols for this to work int nLen = Q_strlen( pMaterialName ) + 1; - char *pFixedNameTemp = (char*)malloc( nLen ); - char *pTemp = (char*)malloc( nLen ); + char *pFixedNameTemp = (char*)stackalloc( nLen ); + char *pTemp = (char*)stackalloc( nLen ); Q_strncpy( pFixedNameTemp, pMaterialName, nLen ); Q_strlower( pFixedNameTemp ); #ifdef POSIX @@ -2883,9 +2879,6 @@ IMaterial* CMaterialSystem::FindMaterialEx( char const* pMaterialName, const cha } } - free(pTemp); - free(pFixedNameTemp); - return g_pErrorMaterial->GetRealTimeVersion(); } @@ -3103,20 +3096,12 @@ void CMaterialSystem::ResetTempHWMemory( bool bExitingLevel ) //----------------------------------------------------------------------------- void CMaterialSystem::CacheUsedMaterials( ) { + printf("Cache materials\n"); + g_pShaderAPI->EvictManagedResources(); - size_t count = 0; + for (MaterialHandle_t i = FirstMaterial(); i != InvalidMaterial(); i = NextMaterial(i) ) { - // Some (mac) drivers (amd) seem to keep extra resources around on uploads until the next frame swap. This - // injects pointless synthetic swaps (between already-static load frames) - if ( mat_texture_reload_frame_swap_workaround.GetBool() ) - { - if ( count++ % 20 == 0 ) - { - Flush(true); - SwapBuffers(); // Not the right thing to call - } - } IMaterialInternal* pMat = GetMaterialInternal(i); Assert( pMat->GetReferenceCount() >= 0 ); if( pMat->GetReferenceCount() > 0 ) @@ -3703,9 +3688,13 @@ void CMaterialSystem::EndFrame( void ) ThreadAcquire( true ); } + IThreadPool* pThreadPool = CreateMatQueueThreadPool(); + if ( m_pActiveAsyncJob && !m_pActiveAsyncJob->IsFinished() ) { - m_pActiveAsyncJob->WaitForFinish(); + m_pActiveAsyncJob->WaitForFinish(TT_INFINITE, pThreadPool); + + // Sync with GPU if we had a job for it, even if it finished early on CPU! if ( !IsPC() && g_config.ForceHWSync() ) { g_pShaderAPI->ForceHardwareSync(); @@ -3730,7 +3719,6 @@ void CMaterialSystem::EndFrame( void ) } } - IThreadPool *pThreadPool = CreateMatQueueThreadPool(); pThreadPool->AddJob( m_pActiveAsyncJob ); break; } @@ -4664,20 +4652,9 @@ void CMaterialSystem::BeginRenderTargetAllocation( void ) void CMaterialSystem::EndRenderTargetAllocation( void ) { - // Any GPU newer than 2005 doesn't need to do this, and it eats up ~40% of our level load time! - const bool cbRequiresRenderTargetAllocationFirst = mat_requires_rt_alloc_first.GetBool(); - g_pShaderAPI->FlushBufferedPrimitives(); m_bAllocatingRenderTargets = false; - if ( IsPC() && cbRequiresRenderTargetAllocationFirst && g_pShaderAPI->CanDownloadTextures() ) - { - // Simulate an Alt-Tab...will cause RTs to be allocated first - - g_pShaderDevice->ReleaseResources(); - g_pShaderDevice->ReacquireResources(); - } - TextureManager()->CacheExternalStandardRenderTargets(); } diff --git a/materialsystem/cmatqueuedrendercontext.cpp b/materialsystem/cmatqueuedrendercontext.cpp index 96d5bb0f..12e98858 100644 --- a/materialsystem/cmatqueuedrendercontext.cpp +++ b/materialsystem/cmatqueuedrendercontext.cpp @@ -455,14 +455,11 @@ public: } else { - ALIGN16 uint16 tempIndices[16]; + static ALIGN16 uint16 tempIndices[256]; + // original method int i = 0; - if ( (size_t)desc.m_pIndices % 4 == 2 ) - { - desc.m_pIndices[i] = pIndexData[i] + desc.m_nFirstVertex; - i++; - } + while ( i < nIndices ) { int nToCopy = min( (int)ARRAYSIZE(tempIndices), nIndices - i ); diff --git a/materialsystem/ctexture.cpp b/materialsystem/ctexture.cpp index 977d2deb..03998591 100644 --- a/materialsystem/ctexture.cpp +++ b/materialsystem/ctexture.cpp @@ -2458,15 +2458,8 @@ bool CTexture::AsyncReadTextureFromFile( IVTFTexture* pVTFTexture, unsigned int return false; } - if ( V_strstr( GetName(), "c_sniperrifle_scope" ) ) - { - int i = 0; - i = 3; - } - - tmZone( TELEMETRY_LEVEL0, TMZF_NONE, "%s - %s", __FUNCTION__, tmDynamicString( TELEMETRY_LEVEL0, pCacheFileName ) ); - + // OSX hackery int nPreserveFlags = nAdditionalCreationFlags; if ( m_nFlags & TEXTUREFLAGS_SRGB ) @@ -4189,12 +4182,6 @@ bool SLoadTextureBitsFromFile( IVTFTexture **ppOutVtfTexture, FileHandle_t hFile // NOTE! NOTE! NOTE! or by the streaming texture code! Assert( ppOutVtfTexture != NULL && *ppOutVtfTexture != NULL ); - if ( V_strstr( pName, "c_rocketlauncher/c_rocketlauncher" ) ) - { - int i = 0; - i = 3; - } - CUtlBuffer buf; { diff --git a/public/tier0/threadtools.h b/public/tier0/threadtools.h index 2b18dda7..b7216193 100644 --- a/public/tier0/threadtools.h +++ b/public/tier0/threadtools.h @@ -52,6 +52,12 @@ #pragma once #pragma warning(push) #pragma warning(disable:4251) + +extern "C" +{ + void __declspec(dllimport) __stdcall Sleep( unsigned long ); +} + #endif #ifdef COMPILER_MSVC64 @@ -194,8 +200,6 @@ PLATFORM_INTERFACE bool ReleaseThreadHandle( ThreadHandle_t ); //----------------------------------------------------------------------------- -PLATFORM_INTERFACE void ThreadSleep(unsigned duration = 0); -PLATFORM_INTERFACE void ThreadNanoSleep(unsigned ns); PLATFORM_INTERFACE ThreadId_t ThreadGetCurrentId(); PLATFORM_INTERFACE ThreadHandle_t ThreadGetCurrentHandle(); PLATFORM_INTERFACE int ThreadGetPriority( ThreadHandle_t hThread = NULL ); @@ -229,10 +233,10 @@ inline void ThreadPause() { #if defined( COMPILER_PS3 ) __db16cyc(); -#elif defined(__arm__) || defined(__aarch64__) - sched_yield(); -#elif defined( COMPILER_GCC ) +#elif defined( COMPILER_GCC ) && (defined( __i386__ ) || defined( __x86_64__ )) __asm __volatile( "pause" ); +#elif defined( POSIX ) + sched_yield(); #elif defined ( COMPILER_MSVC64 ) _mm_pause(); #elif defined( COMPILER_MSVC32 ) @@ -247,6 +251,36 @@ inline void ThreadPause() #endif } +inline void ThreadSleep(unsigned nMilliseconds = 0) +{ + if( nMilliseconds == 0 ) + { + ThreadPause(); + return; + } + +#ifdef _WIN32 + +#ifdef _WIN32_PC + static bool bInitialized = false; + if ( !bInitialized ) + { + bInitialized = true; + // Set the timer resolution to 1 ms (default is 10.0, 15.6, 2.5, 1.0 or + // some other value depending on hardware and software) so that we can + // use Sleep( 1 ) to avoid wasting CPU time without missing our frame + // rate. + timeBeginPeriod( 1 ); + } +#endif + Sleep( nMilliseconds ); +#elif PS3 + sys_timer_usleep( nMilliseconds * 1000 ); +#elif defined(POSIX) + usleep( nMilliseconds * 1000 ); +#endif +} + PLATFORM_INTERFACE bool ThreadJoin( ThreadHandle_t, unsigned timeout = TT_INFINITE ); PLATFORM_INTERFACE void ThreadSetDebugName( ThreadHandle_t hThread, const char *pszName ); diff --git a/public/tier1/memhelpers.h b/public/tier1/memhelpers.h index 898cafb2..35cd513e 100644 --- a/public/tier1/memhelpers.h +++ b/public/tier1/memhelpers.h @@ -11,21 +11,15 @@ namespace memutils template inline void copy( T *dest, const T *src, size_t n ) { - do - { - --n; - *(dest+n) = *(src+n); - } while( n ); + for(; n; n--) + *(dest++) = *(src++); } template - inline void set( T *dest, T value, size_t n ) + inline void set( T *dest, const T& value, size_t n ) { - do - { - --n; - *(dest+n) = value; - } while( n ); + for(; n; n--) + *(dest++) = value; } } diff --git a/public/vstdlib/jobthread.h b/public/vstdlib/jobthread.h index 05798a2c..4e722a5d 100644 --- a/public/vstdlib/jobthread.h +++ b/public/vstdlib/jobthread.h @@ -492,8 +492,8 @@ public: //----------------------------------------------------- // Thread event support (safe for NULL this to simplify code ) //----------------------------------------------------- - bool WaitForFinish( uint32 dwTimeout = TT_INFINITE ) { if (!this) return true; return ( !IsFinished() ) ? g_pThreadPool->YieldWait( this, dwTimeout ) : true; } - bool WaitForFinishAndRelease( uint32 dwTimeout = TT_INFINITE ) { if (!this) return true; bool bResult = WaitForFinish( dwTimeout); Release(); return bResult; } + inline bool WaitForFinish( uint32 dwTimeout = TT_INFINITE, IThreadPool *pool = g_pThreadPool ) { if (!this) return true; return ( !IsFinished() ) ? pool->YieldWait( this, dwTimeout ) : true; } + inline bool WaitForFinishAndRelease( uint32 dwTimeout = TT_INFINITE ) { if (!this) return true; bool bResult = WaitForFinish( dwTimeout); Release(); return bResult; } CThreadEvent *AccessEvent() { return &m_CompleteEvent; } //----------------------------------------------------- diff --git a/tier0/threadtools.cpp b/tier0/threadtools.cpp index b0dbb645..06c8296f 100644 --- a/tier0/threadtools.cpp +++ b/tier0/threadtools.cpp @@ -485,59 +485,6 @@ bool ReleaseThreadHandle( ThreadHandle_t hThread ) // //----------------------------------------------------------------------------- -void ThreadSleep(unsigned nMilliseconds) -{ -#ifdef _WIN32 - -#ifdef _WIN32_PC - static bool bInitialized = false; - if ( !bInitialized ) - { - bInitialized = true; - // Set the timer resolution to 1 ms (default is 10.0, 15.6, 2.5, 1.0 or - // some other value depending on hardware and software) so that we can - // use Sleep( 1 ) to avoid wasting CPU time without missing our frame - // rate. - timeBeginPeriod( 1 ); - } -#endif - - Sleep( nMilliseconds ); -#elif PS3 - if( nMilliseconds == 0 ) - { - // sys_ppu_thread_yield doesn't seem to function properly, so sleep instead. -// sys_timer_usleep( 60 ); - sys_ppu_thread_yield(); - } - else - { - sys_timer_usleep( nMilliseconds * 1000 ); - } -#elif defined(POSIX) - usleep( nMilliseconds * 1000 ); -#endif -} - -//----------------------------------------------------------------------------- -void ThreadNanoSleep(unsigned ns) -{ -#ifdef _WIN32 - // ceil - Sleep( ( ns + 999 ) / 1000 ); -#elif PS3 - sys_timer_usleep( ns ); -#elif defined(POSIX) - struct timespec tm; - tm.tv_sec = 0; - tm.tv_nsec = ns; - nanosleep( &tm, NULL ); -#endif -} - - -//----------------------------------------------------------------------------- - #ifndef ThreadGetCurrentId ThreadId_t ThreadGetCurrentId() { diff --git a/vstdlib/jobthread.cpp b/vstdlib/jobthread.cpp index 922b770f..634d5358 100644 --- a/vstdlib/jobthread.cpp +++ b/vstdlib/jobthread.cpp @@ -214,7 +214,11 @@ public: //----------------------------------------------------- virtual int YieldWait( CThreadEvent **pEvents, int nEvents, bool bWaitAll = true, unsigned timeout = TT_INFINITE ); virtual int YieldWait( CJob **, int nJobs, bool bWaitAll = true, unsigned timeout = TT_INFINITE ); - void Yield( unsigned timeout ); + inline void Yield( unsigned timeout ) + { + Assert( ThreadInMainThread() ); + ThreadSleep( timeout ); + } //----------------------------------------------------- // Add a native job to the queue (master thread) @@ -656,20 +660,6 @@ int CThreadPool::YieldWait( CJob **ppJobs, int nJobs, bool bWaitAll, unsigned ti return YieldWait( handles.Base(), handles.Count(), bWaitAll, timeout); } -//--------------------------------------------------------- - -void CThreadPool::Yield( unsigned timeout ) -{ - // @MULTICORE (toml 10/24/2006): not implemented - Assert( ThreadInMainThread() ); - if ( !ThreadInMainThread() ) - { - ThreadSleep( timeout ); - return; - } - ThreadSleep( timeout ); -} - //--------------------------------------------------------- // Add a job to the queue //---------------------------------------------------------