You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
1098 lines
31 KiB
1098 lines
31 KiB
//========= Copyright Valve Corporation, All rights reserved. ============// |
|
// |
|
// Purpose: |
|
// |
|
// $NoKeywords: $ |
|
// |
|
//===========================================================================// |
|
|
|
#ifndef DYNAMICVB_H |
|
#define DYNAMICVB_H |
|
|
|
#ifdef _WIN32 |
|
#pragma once |
|
#endif |
|
|
|
#include "locald3dtypes.h" |
|
#include "recording.h" |
|
#include "shaderapidx8_global.h" |
|
#include "shaderapidx8.h" |
|
#include "imeshdx8.h" |
|
#include "materialsystem/ivballoctracker.h" |
|
#include "gpubufferallocator.h" |
|
#include "tier1/utllinkedlist.h" |
|
#include "tier0/dbg.h" |
|
#include "tier1/memstack.h" |
|
|
|
///////////////////////////// |
|
// D. Sim Dietrich Jr. |
|
// sim.dietrich@nvidia.com |
|
////////////////////// |
|
|
|
|
|
// Helper function to unbind an vertex buffer |
|
void Unbind( IDirect3DVertexBuffer9 *pVertexBuffer ); |
|
|
|
#define X360_VERTEX_BUFFER_SIZE_MULTIPLIER 2.0 //minimum of 1, only affects dynamic buffers |
|
//#define X360_BLOCK_ON_VB_FLUSH //uncomment to block until all data is consumed when a flush is requested. Otherwise we only block when absolutely necessary |
|
|
|
//#define SPEW_VERTEX_BUFFER_STALLS //uncomment to allow buffer stall spewing. |
|
|
|
|
|
class CVertexBuffer |
|
{ |
|
public: |
|
CVertexBuffer( IDirect3DDevice9 * pD3D, VertexFormat_t fmt, DWORD theFVF, int vertexSize, |
|
int theVertexCount, const char *pTextureBudgetName, bool bSoftwareVertexProcessing, bool dynamic = false ); |
|
|
|
#ifdef _X360 |
|
CVertexBuffer(); |
|
void Init( IDirect3DDevice9 * pD3D, VertexFormat_t fmt, DWORD theFVF, uint8 *pVertexData, int vertexSize, int theVertexCount ); |
|
#endif |
|
|
|
~CVertexBuffer(); |
|
|
|
LPDIRECT3DVERTEXBUFFER GetInterface() const |
|
{ |
|
// If this buffer still exists, then Late Creation didn't happen. Best case: we'll render the wrong image. Worst case: Crash. |
|
Assert( !m_pSysmemBuffer ); |
|
return m_pVB; |
|
} |
|
|
|
// Use at beginning of frame to force a flush of VB contents on first draw |
|
void FlushAtFrameStart() { m_bFlush = true; } |
|
|
|
// lock, unlock |
|
unsigned char* Lock( int numVerts, int& baseVertexIndex ); |
|
unsigned char* Modify( bool bReadOnly, int firstVertex, int numVerts ); |
|
void Unlock( int numVerts ); |
|
|
|
void HandleLateCreation( ); |
|
|
|
// Vertex size |
|
int VertexSize() const { return m_VertexSize; } |
|
|
|
// Vertex count |
|
int VertexCount() const { return m_VertexCount; } |
|
#ifdef _X360 |
|
// For some VBs, memory allocation is managed by CGPUBufferAllocator, via ShaderAPI |
|
const GPUBufferHandle_t *GetBufferAllocationHandle( void ); |
|
void SetBufferAllocationHandle( const GPUBufferHandle_t &bufferAllocationHandle ); |
|
bool IsPooled( void ) { creturn m_GPUBufferHandle.IsValid(); } |
|
// Expose the data pointer for read-only CPU access to the data |
|
// (double-indirection supports relocation of the data by CGPUBufferAllocator) |
|
const byte **GetBufferDataPointerAddress( void ); |
|
#endif // _X360 |
|
|
|
static int BufferCount() |
|
{ |
|
#ifdef _DEBUG |
|
return s_BufferCount; |
|
#else |
|
return 0; |
|
#endif |
|
} |
|
|
|
// UID |
|
unsigned int UID() const |
|
{ |
|
#ifdef RECORDING |
|
return m_UID; |
|
#else |
|
return 0; |
|
#endif |
|
} |
|
|
|
void HandlePerFrameTextureStats( int frame ) |
|
{ |
|
#ifdef VPROF_ENABLED |
|
if ( m_Frame != frame && !m_bDynamic ) |
|
{ |
|
m_Frame = frame; |
|
m_pFrameCounter += m_nBufferSize; |
|
} |
|
#endif |
|
} |
|
|
|
// Do we have enough room without discarding? |
|
bool HasEnoughRoom( int numVertices ) const; |
|
|
|
// Is this dynamic? |
|
bool IsDynamic() const { return m_bDynamic; } |
|
bool IsExternal() const { return m_bExternalMemory; } |
|
|
|
// Block until this part of the vertex buffer is free |
|
void BlockUntilUnused( int nBufferSize ); |
|
|
|
// used to alter the characteristics after creation |
|
// allows one dynamic vb to be shared for multiple formats |
|
void ChangeConfiguration( int vertexSize, int totalSize ) |
|
{ |
|
Assert( m_bDynamic && !m_bLocked && vertexSize ); |
|
m_VertexSize = vertexSize; |
|
m_VertexCount = m_nBufferSize / vertexSize; |
|
} |
|
|
|
// Compute the next offset for the next lock |
|
int NextLockOffset( ) const; |
|
|
|
// Returns the allocated size |
|
int AllocationSize() const; |
|
|
|
// Returns the number of vertices we have enough room for |
|
int NumVerticesUntilFlush() const |
|
{ |
|
#if defined( _X360 ) |
|
if( m_AllocationRing.Count() ) |
|
{ |
|
//Cycle through the ring buffer and see what memory is free now |
|
int iNode = m_AllocationRing.Head(); |
|
while( m_AllocationRing.IsValidIndex( iNode ) ) |
|
{ |
|
if( Dx9Device()->IsFencePending( m_AllocationRing[iNode].m_Fence ) ) |
|
break; |
|
|
|
iNode = m_AllocationRing.Next( iNode ); |
|
} |
|
|
|
if( m_AllocationRing.IsValidIndex( iNode ) ) |
|
{ |
|
int iEndFreeOffset = m_AllocationRing[iNode].m_iEndOffset; |
|
if( iEndFreeOffset < m_Position ) |
|
{ |
|
//Wrapped. Making the arbitrary decision that the return value for this function *should* handle the singe giant allocation case which requires contiguous memory |
|
if( iEndFreeOffset > (m_iNextBlockingPosition - m_Position) ) |
|
return iEndFreeOffset / m_VertexSize; |
|
else |
|
return (m_iNextBlockingPosition - m_Position) / m_VertexSize; |
|
} |
|
} |
|
else |
|
{ |
|
//we didn't block on any fence |
|
return m_VertexCount; |
|
} |
|
} |
|
|
|
return m_VertexCount; |
|
#else |
|
return (m_nBufferSize - NextLockOffset()) / m_VertexSize; |
|
#endif |
|
} |
|
|
|
// Marks a fence indicating when this buffer was used |
|
void MarkUsedInRendering() |
|
{ |
|
#ifdef _X360 |
|
if ( m_bDynamic && m_pVB ) |
|
{ |
|
Assert( m_AllocationRing.Count() > 0 ); |
|
m_AllocationRing[m_AllocationRing.Tail()].m_Fence = Dx9Device()->GetCurrentFence(); |
|
} |
|
#endif |
|
} |
|
|
|
private: |
|
void Create( IDirect3DDevice9 *pD3D ); |
|
inline void ReallyUnlock( int unlockBytes ) |
|
{ |
|
#if DX_TO_GL_ABSTRACTION |
|
// Knowing how much data was actually written is critical for performance under OpenGL. |
|
m_pVB->UnlockActualSize( unlockBytes ); |
|
#else |
|
unlockBytes; // Unused here |
|
m_pVB->Unlock(); |
|
#endif |
|
} |
|
|
|
enum LOCK_FLAGS |
|
{ |
|
LOCKFLAGS_FLUSH = D3DLOCK_NOSYSLOCK | D3DLOCK_DISCARD, |
|
#if !defined( _X360 ) |
|
LOCKFLAGS_APPEND = D3DLOCK_NOSYSLOCK | D3DLOCK_NOOVERWRITE |
|
#else |
|
// X360BUG: forcing all locks to gpu flush, otherwise bizarre mesh corruption on decals |
|
// Currently iterating with microsoft 360 support to track source of gpu corruption |
|
LOCKFLAGS_APPEND = D3DLOCK_NOSYSLOCK |
|
#endif |
|
}; |
|
|
|
LPDIRECT3DVERTEXBUFFER m_pVB; |
|
|
|
#ifdef _X360 |
|
struct DynamicBufferAllocation_t |
|
{ |
|
DWORD m_Fence; //track whether this memory is safe to use again. |
|
int m_iStartOffset; |
|
int m_iEndOffset; |
|
unsigned int m_iZPassIdx; // The zpass during which this allocation was made |
|
}; |
|
|
|
int m_iNextBlockingPosition; // m_iNextBlockingPosition >= m_Position where another allocation is still in use. |
|
unsigned char *m_pAllocatedMemory; |
|
int m_iAllocationSize; //Total size of the ring buffer, usually more than what was asked for |
|
IDirect3DVertexBuffer9 m_D3DVertexBuffer; //Only need one shared D3D header for our usage patterns. |
|
CUtlLinkedList<DynamicBufferAllocation_t> m_AllocationRing; //tracks what chunks of our memory are potentially still in use by D3D |
|
|
|
GPUBufferHandle_t m_GPUBufferHandle; // Handle to a memory allocation within a shared physical memory pool (see CGPUBufferAllocator) |
|
#endif |
|
|
|
VertexFormat_t m_VertexBufferFormat; // yes, Vertex, only used for allocation tracking |
|
int m_nBufferSize; |
|
int m_Position; |
|
int m_VertexCount; |
|
int m_VertexSize; |
|
DWORD m_TheFVF; |
|
byte *m_pSysmemBuffer; |
|
int m_nSysmemBufferStartBytes; |
|
|
|
uint m_nLockCount; |
|
unsigned char m_bDynamic : 1; |
|
unsigned char m_bLocked : 1; |
|
unsigned char m_bFlush : 1; |
|
unsigned char m_bExternalMemory : 1; |
|
unsigned char m_bSoftwareVertexProcessing : 1; |
|
unsigned char m_bLateCreateShouldDiscard : 1; |
|
|
|
#ifdef VPROF_ENABLED |
|
int m_Frame; |
|
int *m_pFrameCounter; |
|
int *m_pGlobalCounter; |
|
#endif |
|
|
|
#ifdef _DEBUG |
|
static int s_BufferCount; |
|
#endif |
|
|
|
#ifdef RECORDING |
|
unsigned int m_UID; |
|
#endif |
|
}; |
|
|
|
#if defined( _X360 ) |
|
#include "utlmap.h" |
|
MEMALLOC_DECLARE_EXTERNAL_TRACKING( XMem_CVertexBuffer ); |
|
#endif |
|
|
|
//----------------------------------------------------------------------------- |
|
// constructor, destructor |
|
//----------------------------------------------------------------------------- |
|
inline CVertexBuffer::CVertexBuffer(IDirect3DDevice9 * pD3D, VertexFormat_t fmt, DWORD theFVF, |
|
int vertexSize, int vertexCount, const char *pTextureBudgetName, |
|
bool bSoftwareVertexProcessing, bool dynamic ) : |
|
m_pVB(0), |
|
m_Position(0), |
|
m_VertexSize(vertexSize), |
|
m_VertexCount(vertexCount), |
|
m_bFlush(true), |
|
m_bLocked(false), |
|
m_bExternalMemory( false ), |
|
m_nBufferSize(vertexSize * vertexCount), |
|
m_TheFVF( theFVF ), |
|
m_bSoftwareVertexProcessing( bSoftwareVertexProcessing ), |
|
m_bDynamic(dynamic), |
|
m_VertexBufferFormat( fmt ), |
|
m_bLateCreateShouldDiscard( false ) |
|
#ifdef _X360 |
|
,m_pAllocatedMemory(NULL) |
|
,m_iNextBlockingPosition(0) |
|
,m_iAllocationSize(0) |
|
#endif |
|
#ifdef VPROF_ENABLED |
|
,m_Frame( -1 ) |
|
#endif |
|
{ |
|
MEM_ALLOC_CREDIT_( pTextureBudgetName ); |
|
|
|
#ifdef RECORDING |
|
// assign a UID |
|
static unsigned int uid = 0; |
|
m_UID = uid++; |
|
#endif |
|
|
|
#ifdef _DEBUG |
|
++s_BufferCount; |
|
#endif |
|
|
|
#ifdef VPROF_ENABLED |
|
if ( !m_bDynamic ) |
|
{ |
|
char name[256]; |
|
V_strcpy_safe( name, "TexGroup_global_" ); |
|
V_strcat_safe( name, pTextureBudgetName, sizeof(name) ); |
|
m_pGlobalCounter = g_VProfCurrentProfile.FindOrCreateCounter( name, COUNTER_GROUP_TEXTURE_GLOBAL ); |
|
|
|
V_strcpy_safe( name, "TexGroup_frame_" ); |
|
V_strcat_safe( name, pTextureBudgetName, sizeof(name) ); |
|
m_pFrameCounter = g_VProfCurrentProfile.FindOrCreateCounter( name, COUNTER_GROUP_TEXTURE_PER_FRAME ); |
|
} |
|
else |
|
{ |
|
m_pGlobalCounter = g_VProfCurrentProfile.FindOrCreateCounter( "TexGroup_global_" TEXTURE_GROUP_DYNAMIC_VERTEX_BUFFER, COUNTER_GROUP_TEXTURE_GLOBAL ); |
|
} |
|
#endif |
|
|
|
if ( !g_pShaderUtil->IsRenderThreadSafe() ) |
|
{ |
|
m_pSysmemBuffer = ( byte * )MemAlloc_AllocAligned( m_nBufferSize, 16 ); |
|
m_nSysmemBufferStartBytes = 0; |
|
} |
|
else |
|
{ |
|
m_pSysmemBuffer = NULL; |
|
Create( pD3D ); |
|
} |
|
|
|
#ifdef VPROF_ENABLED |
|
if ( IsX360() || !m_bDynamic ) |
|
{ |
|
Assert( m_pGlobalCounter ); |
|
*m_pGlobalCounter += m_nBufferSize; |
|
} |
|
#endif |
|
} |
|
|
|
|
|
void CVertexBuffer::Create( IDirect3DDevice9 *pD3D ) |
|
{ |
|
D3DVERTEXBUFFER_DESC desc; |
|
memset( &desc, 0x00, sizeof( desc ) ); |
|
desc.Format = D3DFMT_VERTEXDATA; |
|
desc.Size = m_nBufferSize; |
|
desc.Type = D3DRTYPE_VERTEXBUFFER; |
|
desc.Pool = m_bDynamic ? D3DPOOL_DEFAULT : D3DPOOL_MANAGED; |
|
desc.FVF = m_TheFVF; |
|
|
|
#if defined(IS_WINDOWS_PC) && defined(SHADERAPIDX9) |
|
extern bool g_ShaderDeviceUsingD3D9Ex; |
|
if ( g_ShaderDeviceUsingD3D9Ex ) |
|
{ |
|
desc.Pool = D3DPOOL_DEFAULT; |
|
} |
|
#endif |
|
|
|
desc.Usage = D3DUSAGE_WRITEONLY; |
|
if ( m_bDynamic ) |
|
{ |
|
desc.Usage |= D3DUSAGE_DYNAMIC; |
|
// Dynamic meshes should never be compressed (slows down writing to them) |
|
Assert( CompressionType( m_TheFVF ) == VERTEX_COMPRESSION_NONE ); |
|
} |
|
if ( m_bSoftwareVertexProcessing ) |
|
{ |
|
desc.Usage |= D3DUSAGE_SOFTWAREPROCESSING; |
|
} |
|
|
|
#if !defined( _X360 ) |
|
RECORD_COMMAND( DX8_CREATE_VERTEX_BUFFER, 6 ); |
|
RECORD_INT( m_UID ); |
|
RECORD_INT( m_nBufferSize ); |
|
RECORD_INT( desc.Usage ); |
|
RECORD_INT( desc.FVF ); |
|
RECORD_INT( desc.Pool ); |
|
RECORD_INT( m_bDynamic ); |
|
|
|
HRESULT hr = pD3D->CreateVertexBuffer( m_nBufferSize, desc.Usage, desc.FVF, desc.Pool, &m_pVB, NULL ); |
|
|
|
if ( hr == D3DERR_OUTOFVIDEOMEMORY || hr == E_OUTOFMEMORY ) |
|
{ |
|
// Don't have the memory for this. Try flushing all managed resources |
|
// out of vid mem and try again. |
|
// FIXME: need to record this |
|
pD3D->EvictManagedResources(); |
|
pD3D->CreateVertexBuffer( m_nBufferSize, desc.Usage, desc.FVF, desc.Pool, &m_pVB, NULL ); |
|
} |
|
|
|
#ifdef _DEBUG |
|
if ( hr != D3D_OK ) |
|
{ |
|
switch ( hr ) |
|
{ |
|
case D3DERR_INVALIDCALL: |
|
Assert( !"D3DERR_INVALIDCALL" ); |
|
break; |
|
case D3DERR_OUTOFVIDEOMEMORY: |
|
Assert( !"D3DERR_OUTOFVIDEOMEMORY" ); |
|
break; |
|
case E_OUTOFMEMORY: |
|
Assert( !"E_OUTOFMEMORY" ); |
|
break; |
|
default: |
|
Assert( 0 ); |
|
break; |
|
} |
|
} |
|
#endif |
|
|
|
Assert( m_pVB ); |
|
#else |
|
// _X360 |
|
if ( m_bDynamic ) |
|
{ |
|
m_iAllocationSize = m_nBufferSize * X360_VERTEX_BUFFER_SIZE_MULTIPLIER; |
|
Assert( m_iAllocationSize >= m_nBufferSize ); |
|
m_pAllocatedMemory = (unsigned char*)XPhysicalAlloc( m_iAllocationSize, MAXULONG_PTR, 0, PAGE_READWRITE | MEM_LARGE_PAGES | PAGE_WRITECOMBINE ); |
|
} |
|
else if ( MeshMgr()->AllocatePooledVB( this, m_nBufferSize, pTextureBudgetName ) ) |
|
{ |
|
// Successfully allocated in a shared ShaderAPI memory pool (SetBufferAllocationHandle will have been called to set the pointer and stream offset) |
|
m_iAllocationSize = m_nBufferSize; |
|
Assert( m_pAllocatedMemory ); |
|
} |
|
else |
|
{ |
|
// Fall back to allocating a standalone VB |
|
// NOTE: write-combining (PAGE_WRITECOMBINE) is deliberately not used, since it slows down CPU access to the data (decals+defragmentation) |
|
m_iAllocationSize = m_nBufferSize; |
|
m_pAllocatedMemory = (unsigned char*)XPhysicalAlloc( m_iAllocationSize, MAXULONG_PTR, 0, PAGE_READWRITE ); |
|
} |
|
|
|
if ( m_pAllocatedMemory && !IsPooled() ) |
|
{ |
|
MemAlloc_RegisterExternalAllocation( XMem_CVertexBuffer, m_pAllocatedMemory, XPhysicalSize( m_pAllocatedMemory ) ); |
|
if ( !m_bDynamic ) |
|
{ |
|
// Track non-pooled physallocs, to help tune CGPUBufferAllocator usage |
|
g_SizeIndividualVBPhysAllocs += XPhysicalSize( m_pAllocatedMemory ); |
|
g_NumIndividualVBPhysAllocs++; |
|
} |
|
} |
|
|
|
m_iNextBlockingPosition = m_iAllocationSize; |
|
#endif |
|
|
|
#ifdef MEASURE_DRIVER_ALLOCATIONS |
|
int nMemUsed = 1024; |
|
VPROF_INCREMENT_GROUP_COUNTER( "vb count", COUNTER_GROUP_NO_RESET, 1 ); |
|
VPROF_INCREMENT_GROUP_COUNTER( "vb driver mem", COUNTER_GROUP_NO_RESET, nMemUsed ); |
|
VPROF_INCREMENT_GROUP_COUNTER( "total driver mem", COUNTER_GROUP_NO_RESET, nMemUsed ); |
|
#endif |
|
|
|
// Track VB allocations |
|
#if !defined( _X360 ) |
|
g_VBAllocTracker->CountVB( m_pVB, m_bDynamic, m_nBufferSize, m_VertexSize, m_VertexBufferFormat ); |
|
#else |
|
g_VBAllocTracker->CountVB( this, m_bDynamic, m_iAllocationSize, m_VertexSize, m_VertexBufferFormat ); |
|
#endif |
|
} |
|
|
|
|
|
#ifdef _X360 |
|
void *AllocateTempBuffer( size_t nSizeInBytes ); |
|
|
|
//----------------------------------------------------------------------------- |
|
// This variant is for when we already have the data in physical memory |
|
//----------------------------------------------------------------------------- |
|
inline CVertexBuffer::CVertexBuffer( ) : |
|
m_pVB( 0 ), |
|
m_Position( 0 ), |
|
m_VertexSize( 0 ), |
|
m_VertexCount( 0 ), |
|
m_bFlush( false ), |
|
m_bLocked( false ), |
|
m_bExternalMemory( true ), |
|
m_nBufferSize( 0 ), |
|
m_bDynamic( false ) |
|
#ifdef VPROF_ENABLED |
|
,m_Frame( -1 ) |
|
#endif |
|
{ |
|
m_iAllocationSize = 0; |
|
m_pAllocatedMemory = 0; |
|
m_iNextBlockingPosition = 0; |
|
} |
|
|
|
#include "tier0/memdbgoff.h" |
|
|
|
inline void CVertexBuffer::Init( IDirect3DDevice9 *pD3D, VertexFormat_t fmt, DWORD theFVF, uint8 *pVertexData, int vertexSize, int vertexCount ) |
|
{ |
|
m_nBufferSize = vertexSize * vertexCount; |
|
m_Position = m_Position; |
|
m_VertexSize = vertexSize; |
|
m_VertexCount = vertexCount; |
|
m_iAllocationSize = m_nBufferSize; |
|
m_pAllocatedMemory = pVertexData; |
|
m_iNextBlockingPosition = m_iAllocationSize; |
|
|
|
m_pVB = new( AllocateTempBuffer( sizeof( IDirect3DVertexBuffer9 ) ) ) IDirect3DVertexBuffer9; |
|
XGSetVertexBufferHeader( m_nBufferSize, 0, 0, 0, m_pVB ); |
|
XGOffsetResourceAddress( m_pVB, pVertexData ); |
|
} |
|
|
|
#include "tier0/memdbgon.h" |
|
|
|
#endif // _X360 |
|
|
|
inline CVertexBuffer::~CVertexBuffer() |
|
{ |
|
// Track VB allocations (even if pooled) |
|
#if !defined( _X360 ) |
|
if ( m_pVB != NULL ) |
|
{ |
|
g_VBAllocTracker->UnCountVB( m_pVB ); |
|
} |
|
#else |
|
if ( m_pVB && m_pVB->IsSet( Dx9Device() ) ) |
|
{ |
|
Unbind( m_pVB ); |
|
} |
|
|
|
if ( !m_bExternalMemory ) |
|
{ |
|
g_VBAllocTracker->UnCountVB( this ); |
|
} |
|
#endif |
|
|
|
if ( !m_bExternalMemory ) |
|
{ |
|
#ifdef MEASURE_DRIVER_ALLOCATIONS |
|
int nMemUsed = 1024; |
|
VPROF_INCREMENT_GROUP_COUNTER( "vb count", COUNTER_GROUP_NO_RESET, -1 ); |
|
VPROF_INCREMENT_GROUP_COUNTER( "vb driver mem", COUNTER_GROUP_NO_RESET, -nMemUsed ); |
|
VPROF_INCREMENT_GROUP_COUNTER( "total driver mem", COUNTER_GROUP_NO_RESET, -nMemUsed ); |
|
#endif |
|
|
|
#ifdef VPROF_ENABLED |
|
if ( IsX360() || !m_bDynamic ) |
|
{ |
|
Assert( m_pGlobalCounter ); |
|
*m_pGlobalCounter -= m_nBufferSize; |
|
} |
|
#endif |
|
|
|
#ifdef _DEBUG |
|
--s_BufferCount; |
|
#endif |
|
} |
|
|
|
Unlock( 0 ); |
|
|
|
if ( m_pSysmemBuffer ) |
|
{ |
|
MemAlloc_FreeAligned( m_pSysmemBuffer ); |
|
m_pSysmemBuffer = NULL; |
|
} |
|
|
|
#if !defined( _X360 ) |
|
if ( m_pVB ) |
|
{ |
|
RECORD_COMMAND( DX8_DESTROY_VERTEX_BUFFER, 1 ); |
|
RECORD_INT( m_UID ); |
|
|
|
m_pVB->Release(); |
|
} |
|
#else |
|
if ( m_pAllocatedMemory && !m_bExternalMemory ) |
|
{ |
|
if ( IsPooled() ) |
|
{ |
|
MeshMgr()->DeallocatePooledVB( this ); |
|
} |
|
else |
|
{ |
|
MemAlloc_RegisterExternalDeallocation( XMem_CVertexBuffer, m_pAllocatedMemory, XPhysicalSize( m_pAllocatedMemory ) ); |
|
if ( !m_bDynamic ) |
|
{ |
|
// Track non-pooled physallocs, to help tune CGPUBufferAllocator usage |
|
g_SizeIndividualVBPhysAllocs -= XPhysicalSize( m_pAllocatedMemory ); |
|
g_NumIndividualVBPhysAllocs--; |
|
} |
|
XPhysicalFree( m_pAllocatedMemory ); |
|
} |
|
} |
|
|
|
m_pAllocatedMemory = NULL; |
|
m_pVB = NULL; |
|
#endif // _X360 |
|
} |
|
#ifdef _X360 |
|
//----------------------------------------------------------------------------- |
|
// Get memory allocation data |
|
//----------------------------------------------------------------------------- |
|
inline const GPUBufferHandle_t *CVertexBuffer::GetBufferAllocationHandle( void ) |
|
{ |
|
Assert( IsPooled() ); |
|
return ( IsPooled() ? &m_GPUBufferHandle : NULL ); |
|
} |
|
|
|
//----------------------------------------------------------------------------- |
|
// Update memory allocation data |
|
//----------------------------------------------------------------------------- |
|
inline void CVertexBuffer::SetBufferAllocationHandle( const GPUBufferHandle_t &bufferAllocationHandle ) |
|
{ |
|
// This VB's memory has been reallocated or freed, update our cached pointer and the D3D header |
|
// NOTE: this should never be called while any rendering is in flight! |
|
Assert( ( m_pAllocatedMemory == NULL ) || IsPooled() ); |
|
if ( ( m_pAllocatedMemory == NULL ) || IsPooled() ) |
|
{ |
|
m_GPUBufferHandle = bufferAllocationHandle; |
|
m_pAllocatedMemory = m_GPUBufferHandle.pMemory; |
|
if ( m_pVB ) |
|
{ |
|
XGSetVertexBufferHeader( m_nBufferSize, 0, D3DPOOL_DEFAULT, 0, m_pVB ); |
|
XGOffsetResourceAddress( m_pVB, m_pAllocatedMemory ); |
|
} |
|
} |
|
} |
|
|
|
//----------------------------------------------------------------------------- |
|
// Expose the data pointer for read-only CPU access to the data |
|
//----------------------------------------------------------------------------- |
|
inline const byte **CVertexBuffer::GetBufferDataPointerAddress( void ) |
|
{ |
|
if ( m_bDynamic /* FIXME: || m_bExternalMemory*/ ) |
|
return NULL; |
|
return (const byte **)&m_pAllocatedMemory; |
|
} |
|
#endif // _X360 |
|
|
|
//----------------------------------------------------------------------------- |
|
// Compute the next offset for the next lock |
|
//----------------------------------------------------------------------------- |
|
inline int CVertexBuffer::NextLockOffset( ) const |
|
{ |
|
#if !defined( _X360 ) |
|
int nNextOffset = ( m_Position + m_VertexSize - 1 ) / m_VertexSize; |
|
nNextOffset *= m_VertexSize; |
|
return nNextOffset; |
|
#else |
|
return m_Position; //position is already aligned properly on unlocks for 360. |
|
#endif |
|
} |
|
|
|
|
|
//----------------------------------------------------------------------------- |
|
// Do we have enough room without discarding? |
|
//----------------------------------------------------------------------------- |
|
inline bool CVertexBuffer::HasEnoughRoom( int numVertices ) const |
|
{ |
|
#if defined( _X360 ) |
|
return numVertices <= m_VertexCount; //the ring buffer will free room as needed |
|
#else |
|
return (NextLockOffset() + (numVertices * m_VertexSize)) <= m_nBufferSize; |
|
#endif |
|
} |
|
|
|
//----------------------------------------------------------------------------- |
|
// Block until this part of the index buffer is free |
|
//----------------------------------------------------------------------------- |
|
inline void CVertexBuffer::BlockUntilUnused( int nBufferSize ) |
|
{ |
|
Assert( nBufferSize <= m_nBufferSize ); |
|
|
|
#ifdef _X360 |
|
int nLockOffset = NextLockOffset(); |
|
Assert( (m_AllocationRing.Count() != 0) || ((m_Position == 0) && (m_iNextBlockingPosition == m_iAllocationSize)) ); |
|
|
|
if ( (m_iNextBlockingPosition - nLockOffset) >= nBufferSize ) |
|
return; |
|
|
|
Assert( (m_AllocationRing[m_AllocationRing.Head()].m_iStartOffset == 0) || ((m_iNextBlockingPosition == m_AllocationRing[m_AllocationRing.Head()].m_iStartOffset) && (m_Position <= m_iNextBlockingPosition)) ); |
|
|
|
int iMinBlockPosition = nLockOffset + nBufferSize; |
|
if( iMinBlockPosition > m_iAllocationSize ) |
|
{ |
|
//Allocation requires us to wrap |
|
iMinBlockPosition = nBufferSize; |
|
m_Position = 0; |
|
|
|
//modify the last allocation so that it uses up the whole tail end of the buffer. Makes other code simpler |
|
Assert( m_AllocationRing.Count() != 0 ); |
|
m_AllocationRing[m_AllocationRing.Tail()].m_iEndOffset = m_iAllocationSize; |
|
|
|
//treat all allocations between the current position and the tail end of the ring as freed since they will be before we unblock |
|
while( m_AllocationRing.Count() ) |
|
{ |
|
unsigned int head = m_AllocationRing.Head(); |
|
if( m_AllocationRing[head].m_iStartOffset == 0 ) |
|
break; |
|
|
|
m_AllocationRing.Remove( head ); |
|
} |
|
} |
|
|
|
//now we go through the allocations until we find the last fence we care about. Treat everything up until that fence as freed. |
|
DWORD FinalFence = 0; |
|
unsigned int iFinalAllocationZPassIdx = 0; |
|
while( m_AllocationRing.Count() ) |
|
{ |
|
unsigned int head = m_AllocationRing.Head(); |
|
|
|
if( m_AllocationRing[head].m_iEndOffset >= iMinBlockPosition ) |
|
{ |
|
//When this frees, we'll finally have enough space for the allocation |
|
FinalFence = m_AllocationRing[head].m_Fence; |
|
iFinalAllocationZPassIdx = m_AllocationRing[head].m_iZPassIdx; |
|
m_iNextBlockingPosition = m_AllocationRing[head].m_iEndOffset; |
|
m_AllocationRing.Remove( head ); |
|
break; |
|
} |
|
m_AllocationRing.Remove( head ); |
|
} |
|
Assert( FinalFence != 0 ); |
|
|
|
if( Dx9Device()->IsFencePending( FinalFence ) ) |
|
{ |
|
#ifdef SPEW_VERTEX_BUFFER_STALLS |
|
float st = Plat_FloatTime(); |
|
#endif |
|
|
|
if ( ( Dx9Device()->GetDeviceState() & D3DDEVICESTATE_ZPASS_BRACKET ) && |
|
( iFinalAllocationZPassIdx == ShaderAPI()->Get360ZPassCounter() ) ) |
|
{ |
|
// We're about to overrun our VB ringbuffer in a single Z prepass. To avoid rendering corruption, close out the |
|
// Z prepass and continue. This will reduce early-Z rejection efficiency and could cause a momentary framerate drop, |
|
// but it's better than rendering corruption. |
|
Warning( "Dynamic VB ring buffer overrun in Z Prepass. Tell Thorsten.\n" ); |
|
|
|
ShaderAPI()->End360ZPass(); |
|
} |
|
|
|
Dx9Device()->BlockOnFence( FinalFence ); |
|
|
|
#ifdef SPEW_VERTEX_BUFFER_STALLS |
|
float dt = Plat_FloatTime() - st; |
|
Warning( "Blocked locking dynamic vertex buffer for %f ms!\n", 1000.0 * dt ); |
|
#endif |
|
} |
|
|
|
#endif |
|
} |
|
|
|
|
|
//----------------------------------------------------------------------------- |
|
// lock, unlock |
|
//----------------------------------------------------------------------------- |
|
inline unsigned char* CVertexBuffer::Lock( int numVerts, int& baseVertexIndex ) |
|
{ |
|
#if defined( _X360 ) |
|
if ( m_pVB && m_pVB->IsSet( Dx9Device() ) ) |
|
{ |
|
Unbind( m_pVB ); |
|
} |
|
#endif |
|
|
|
m_nLockCount = numVerts; |
|
|
|
unsigned char* pLockedData = 0; |
|
baseVertexIndex = 0; |
|
int nBufferSize = numVerts * m_VertexSize; |
|
|
|
Assert( IsPC() || ( IsX360() && !m_bLocked ) ); |
|
|
|
// Ensure there is enough space in the VB for this data |
|
if ( numVerts > m_VertexCount ) |
|
{ |
|
Assert( 0 ); |
|
return 0; |
|
} |
|
|
|
if ( !IsX360() && !m_pVB && !m_pSysmemBuffer ) |
|
return 0; |
|
|
|
DWORD dwFlags; |
|
if ( m_bDynamic ) |
|
{ |
|
dwFlags = LOCKFLAGS_APPEND; |
|
|
|
#if !defined( _X360 ) |
|
// If either the user forced us to flush, |
|
// or there is not enough space for the vertex data, |
|
// then flush the buffer contents |
|
if ( !m_Position || m_bFlush || !HasEnoughRoom(numVerts) ) |
|
{ |
|
if ( m_pSysmemBuffer || !g_pShaderUtil->IsRenderThreadSafe() ) |
|
m_bLateCreateShouldDiscard = true; |
|
m_bFlush = false; |
|
m_Position = 0; |
|
|
|
dwFlags = LOCKFLAGS_FLUSH; |
|
} |
|
#else |
|
if( m_bFlush ) |
|
{ |
|
# if ( defined( X360_BLOCK_ON_VB_FLUSH ) ) |
|
{ |
|
if( m_AllocationRing.Count() ) |
|
{ |
|
DWORD FinalFence = m_AllocationRing[m_AllocationRing.Tail()].m_Fence; |
|
|
|
m_AllocationRing.RemoveAll(); |
|
m_Position = 0; |
|
m_iNextBlockingPosition = m_iAllocationSize; |
|
|
|
# if ( defined( SPEW_VERTEX_BUFFER_STALLS ) ) |
|
if( Dx9Device()->IsFencePending( FinalFence ) ) |
|
{ |
|
float st = Plat_FloatTime(); |
|
# endif |
|
Dx9Device()->BlockOnFence( FinalFence ); |
|
# if ( defined ( SPEW_VERTEX_BUFFER_STALLS ) ) |
|
float dt = Plat_FloatTime() - st; |
|
Warning( "Blocked FLUSHING dynamic vertex buffer for %f ms!\n", 1000.0 * dt ); |
|
} |
|
# endif |
|
} |
|
} |
|
# endif |
|
m_bFlush = false; |
|
} |
|
#endif |
|
} |
|
else |
|
{ |
|
// Since we are a static VB, always lock the beginning of the buffer. |
|
dwFlags = D3DLOCK_NOSYSLOCK; |
|
m_Position = 0; |
|
} |
|
|
|
if ( IsX360() && m_bDynamic ) |
|
{ |
|
// Block until we have enough room in the buffer, this affects the result of NextLockOffset() in wrap conditions. |
|
BlockUntilUnused( nBufferSize ); |
|
m_pVB = NULL; |
|
} |
|
|
|
int nLockOffset = NextLockOffset( ); |
|
RECORD_COMMAND( DX8_LOCK_VERTEX_BUFFER, 4 ); |
|
RECORD_INT( m_UID ); |
|
RECORD_INT( nLockOffset ); |
|
RECORD_INT( nBufferSize ); |
|
RECORD_INT( dwFlags ); |
|
|
|
#if !defined( _X360 ) |
|
// If the caller isn't in the thread that owns the render lock, need to return a system memory pointer--cannot talk to GL from |
|
// the non-current thread. |
|
if ( !m_pSysmemBuffer && !g_pShaderUtil->IsRenderThreadSafe() ) |
|
{ |
|
m_pSysmemBuffer = ( byte * )MemAlloc_AllocAligned( m_nBufferSize, 16 ); |
|
m_nSysmemBufferStartBytes = nLockOffset; |
|
Assert( ( m_nSysmemBufferStartBytes % m_VertexSize ) == 0 ); |
|
} |
|
|
|
if ( m_pSysmemBuffer != NULL ) |
|
{ |
|
// Ensure that we're never moving backwards in a buffer--this code would need to be rewritten if so. |
|
// We theorize this can happen if you hit the end of a buffer and then wrap before drawing--but |
|
// this would probably break in other places as well. |
|
Assert( nLockOffset >= m_nSysmemBufferStartBytes ); |
|
pLockedData = m_pSysmemBuffer + nLockOffset; |
|
} |
|
else |
|
{ |
|
m_pVB->Lock( nLockOffset, |
|
nBufferSize, |
|
reinterpret_cast< void** >( &pLockedData ), |
|
dwFlags ); |
|
} |
|
#else |
|
pLockedData = m_pAllocatedMemory + nLockOffset; |
|
#endif |
|
|
|
Assert( pLockedData != 0 ); |
|
m_bLocked = true; |
|
if ( !IsX360() ) |
|
{ |
|
baseVertexIndex = nLockOffset / m_VertexSize; |
|
} |
|
else |
|
{ |
|
baseVertexIndex = 0; |
|
} |
|
return pLockedData; |
|
} |
|
|
|
inline unsigned char* CVertexBuffer::Modify( bool bReadOnly, int firstVertex, int numVerts ) |
|
{ |
|
unsigned char* pLockedData = 0; |
|
|
|
// D3D still returns a pointer when you call lock with 0 verts, so just in |
|
// case it's actually doing something, don't even try to lock the buffer with 0 verts. |
|
if ( numVerts == 0 ) |
|
return NULL; |
|
|
|
m_nLockCount = numVerts; |
|
|
|
// If this hits, m_pSysmemBuffer logic needs to be added to this code. |
|
Assert( g_pShaderUtil->IsRenderThreadSafe() ); |
|
Assert( !m_pSysmemBuffer ); // if this hits, then we need to add code to handle it |
|
|
|
Assert( m_pVB && !m_bDynamic ); |
|
|
|
if ( firstVertex + numVerts > m_VertexCount ) |
|
{ |
|
Assert( 0 ); |
|
return NULL; |
|
} |
|
|
|
DWORD dwFlags = D3DLOCK_NOSYSLOCK; |
|
if ( bReadOnly ) |
|
{ |
|
dwFlags |= D3DLOCK_READONLY; |
|
} |
|
|
|
RECORD_COMMAND( DX8_LOCK_VERTEX_BUFFER, 4 ); |
|
RECORD_INT( m_UID ); |
|
RECORD_INT( firstVertex * m_VertexSize ); |
|
RECORD_INT( numVerts * m_VertexSize ); |
|
RECORD_INT( dwFlags ); |
|
|
|
// mmw: for forcing all dynamic... LOCKFLAGS_FLUSH ); |
|
#if !defined( _X360 ) |
|
m_pVB->Lock( |
|
firstVertex * m_VertexSize, |
|
numVerts * m_VertexSize, |
|
reinterpret_cast< void** >( &pLockedData ), |
|
dwFlags ); |
|
#else |
|
if ( m_pVB->IsSet( Dx9Device() ) ) |
|
{ |
|
Unbind( m_pVB ); |
|
} |
|
pLockedData = m_pAllocatedMemory + (firstVertex * m_VertexSize); |
|
#endif |
|
|
|
m_Position = firstVertex * m_VertexSize; |
|
Assert( pLockedData != 0 ); |
|
m_bLocked = true; |
|
|
|
return pLockedData; |
|
} |
|
|
|
inline void CVertexBuffer::Unlock( int numVerts ) |
|
{ |
|
if ( !m_bLocked ) |
|
return; |
|
|
|
if ( !IsX360() && !m_pVB && !m_pSysmemBuffer ) |
|
return; |
|
|
|
int nLockOffset = NextLockOffset(); |
|
int nBufferSize = numVerts * m_VertexSize; |
|
|
|
RECORD_COMMAND( DX8_UNLOCK_VERTEX_BUFFER, 1 ); |
|
RECORD_INT( m_UID ); |
|
|
|
#if !defined( _X360 ) |
|
if ( m_pSysmemBuffer != NULL ) |
|
{ |
|
} |
|
else |
|
{ |
|
#if DX_TO_GL_ABSTRACTION |
|
Assert( numVerts <= (int)m_nLockCount ); |
|
int unlockBytes = ( m_bDynamic ? nBufferSize : ( m_nLockCount * m_VertexSize ) ); |
|
#else |
|
int unlockBytes = 0; |
|
#endif |
|
|
|
ReallyUnlock( unlockBytes ); |
|
} |
|
m_Position = nLockOffset + nBufferSize; |
|
#else |
|
if ( m_bDynamic ) |
|
{ |
|
if ( numVerts > 0 ) |
|
{ |
|
DynamicBufferAllocation_t LockData; |
|
LockData.m_Fence = Dx9Device()->GetCurrentFence(); //This isn't the correct fence, but it's all we have access to for now and it'll provide marginal safety if something goes really wrong. |
|
LockData.m_iStartOffset = nLockOffset; |
|
LockData.m_iEndOffset = LockData.m_iStartOffset + nBufferSize; |
|
LockData.m_iZPassIdx = ( Dx9Device()->GetDeviceState() & D3DDEVICESTATE_ZPASS_BRACKET ) ? ShaderAPI()->Get360ZPassCounter() : 0; |
|
|
|
// Round dynamic locks to 4k boundaries for GPU cache reasons |
|
LockData.m_iEndOffset = ALIGN_VALUE( LockData.m_iEndOffset, 4096 ); |
|
if( LockData.m_iEndOffset > m_iAllocationSize ) |
|
LockData.m_iEndOffset = m_iAllocationSize; |
|
|
|
m_AllocationRing.AddToTail( LockData ); |
|
m_Position = LockData.m_iEndOffset; |
|
|
|
void* pLockedData = m_pAllocatedMemory + LockData.m_iStartOffset; |
|
|
|
//Always re-use the same vertex buffer header based on the assumption that D3D copies it off in the draw calls. |
|
m_pVB = &m_D3DVertexBuffer; |
|
XGSetVertexBufferHeader( nBufferSize, 0, D3DPOOL_DEFAULT, 0, m_pVB ); |
|
XGOffsetResourceAddress( m_pVB, pLockedData ); |
|
|
|
// Invalidate the GPU caches for this memory. |
|
Dx9Device()->InvalidateGpuCache( pLockedData, nBufferSize, 0 ); |
|
} |
|
} |
|
else |
|
{ |
|
if ( !m_pVB ) |
|
{ |
|
m_pVB = &m_D3DVertexBuffer; |
|
XGSetVertexBufferHeader( m_nBufferSize, 0, D3DPOOL_DEFAULT, 0, m_pVB ); |
|
XGOffsetResourceAddress( m_pVB, m_pAllocatedMemory ); |
|
} |
|
m_Position = nLockOffset + nBufferSize; |
|
|
|
// Invalidate the GPU caches for this memory. |
|
Dx9Device()->InvalidateGpuCache( m_pAllocatedMemory, m_nBufferSize, 0 ); |
|
} |
|
#endif |
|
|
|
m_bLocked = false; |
|
} |
|
|
|
|
|
inline void CVertexBuffer::HandleLateCreation( ) |
|
{ |
|
if ( !m_pSysmemBuffer ) |
|
{ |
|
return; |
|
} |
|
|
|
if( !m_pVB ) |
|
{ |
|
bool bPrior = g_VBAllocTracker->TrackMeshAllocations( "HandleLateCreation" ); |
|
Create( Dx9Device() ); |
|
if ( !bPrior ) |
|
{ |
|
g_VBAllocTracker->TrackMeshAllocations( NULL ); |
|
} |
|
} |
|
|
|
void* pWritePtr = NULL; |
|
const int dataToWriteBytes = m_bDynamic ? ( m_Position - m_nSysmemBufferStartBytes ) : ( m_nLockCount * m_VertexSize ); |
|
DWORD dwFlags = D3DLOCK_NOSYSLOCK; |
|
if ( m_bDynamic ) |
|
{ |
|
dwFlags |= ( m_bLateCreateShouldDiscard ? D3DLOCK_DISCARD : D3DLOCK_NOOVERWRITE ); |
|
} |
|
|
|
// Always clear this. |
|
m_bLateCreateShouldDiscard = false; |
|
|
|
// Don't use the Lock function, it does a bunch of stuff we don't want. |
|
HRESULT hr = m_pVB->Lock( m_nSysmemBufferStartBytes, |
|
dataToWriteBytes, |
|
&pWritePtr, |
|
dwFlags); |
|
|
|
// If this fails we're about to crash. Consider skipping the update and leaving |
|
// m_pSysmemBuffer around to try again later. (For example in case of device loss) |
|
Assert( SUCCEEDED( hr ) ); hr; |
|
memcpy( pWritePtr, m_pSysmemBuffer + m_nSysmemBufferStartBytes, dataToWriteBytes ); |
|
ReallyUnlock( dataToWriteBytes ); |
|
|
|
MemAlloc_FreeAligned( m_pSysmemBuffer ); |
|
m_pSysmemBuffer = NULL; |
|
} |
|
|
|
|
|
// Returns the allocated size |
|
inline int CVertexBuffer::AllocationSize() const |
|
{ |
|
#ifdef _X360 |
|
return m_iAllocationSize; |
|
#else |
|
return m_VertexCount * m_VertexSize; |
|
#endif |
|
} |
|
|
|
|
|
#endif // DYNAMICVB_H |
|
|
|
|