Merge pull request #248 from nillerusr/mathlib-optimize

Mathlib optimize
This commit is contained in:
nillerusr 2023-05-05 16:17:29 +00:00 committed by GitHub
commit 697a9f34f9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 1484 additions and 1737 deletions

2
.gitignore vendored
View File

@ -37,5 +37,3 @@ waf3*/
.vscode/ .vscode/
.depproj/ .depproj/
source-engine.sln source-engine.sln
hl2/

View File

@ -33,6 +33,7 @@ CAI_PolicingBehavior::CAI_PolicingBehavior( void )
m_bEnabled = false; m_bEnabled = false;
m_nNumWarnings = 0; m_nNumWarnings = 0;
m_bTargetIsHostile = false; m_bTargetIsHostile = false;
m_hPoliceGoal = NULL;
} }
//----------------------------------------------------------------------------- //-----------------------------------------------------------------------------

View File

@ -31,9 +31,9 @@ public:
{ {
MEM_ALLOC_CREDIT_( "CMatCallQueue.m_Allocator" ); MEM_ALLOC_CREDIT_( "CMatCallQueue.m_Allocator" );
#ifdef SWDS #ifdef SWDS
m_Allocator.Init( 2*1024, 0, 0, 4 ); m_Allocator.Init( 2*1024, 0, 0, 16 );
#else #else
m_Allocator.Init( IsX360() ? 2*1024*1024 : 8*1024*1024, 64*1024, 256*1024, 4 ); m_Allocator.Init( IsX360() ? 2*1024*1024 : 8*1024*1024, 64*1024, 256*1024, 16 );
#endif #endif
m_FunctorFactory.SetAllocator( &m_Allocator ); m_FunctorFactory.SetAllocator( &m_Allocator );
m_pHead = m_pTail = NULL; m_pHead = m_pTail = NULL;

View File

@ -420,13 +420,6 @@ void MatrixGetColumn( const matrix3x4_t& in, int column, Vector &out )
out.z = in[2][column]; out.z = in[2][column];
} }
void MatrixSetColumn( const Vector &in, int column, matrix3x4_t& out )
{
out[0][column] = in.x;
out[1][column] = in.y;
out[2][column] = in.z;
}
void MatrixScaleBy ( const float flScale, matrix3x4_t &out ) void MatrixScaleBy ( const float flScale, matrix3x4_t &out )
{ {
out[0][0] *= flScale; out[0][0] *= flScale;
@ -1092,57 +1085,6 @@ void SetScaleMatrix( float x, float y, float z, matrix3x4_t &dst )
dst[2][0] = 0.0f; dst[2][1] = 0.0f; dst[2][2] = z; dst[2][3] = 0.0f; dst[2][0] = 0.0f; dst[2][1] = 0.0f; dst[2][2] = z; dst[2][3] = 0.0f;
} }
//-----------------------------------------------------------------------------
// Purpose: Builds the matrix for a counterclockwise rotation about an arbitrary axis.
//
// | ax2 + (1 - ax2)cosQ axay(1 - cosQ) - azsinQ azax(1 - cosQ) + aysinQ |
// Ra(Q) = | axay(1 - cosQ) + azsinQ ay2 + (1 - ay2)cosQ ayaz(1 - cosQ) - axsinQ |
// | azax(1 - cosQ) - aysinQ ayaz(1 - cosQ) + axsinQ az2 + (1 - az2)cosQ |
//
// Input : mat -
// vAxisOrRot -
// angle -
//-----------------------------------------------------------------------------
void MatrixBuildRotationAboutAxis( const Vector &vAxisOfRot, float angleDegrees, matrix3x4_t &dst )
{
float radians;
float axisXSquared;
float axisYSquared;
float axisZSquared;
float fSin;
float fCos;
radians = angleDegrees * ( M_PI / 180.0 );
fSin = sin( radians );
fCos = cos( radians );
axisXSquared = vAxisOfRot[0] * vAxisOfRot[0];
axisYSquared = vAxisOfRot[1] * vAxisOfRot[1];
axisZSquared = vAxisOfRot[2] * vAxisOfRot[2];
// Column 0:
dst[0][0] = axisXSquared + (1 - axisXSquared) * fCos;
dst[1][0] = vAxisOfRot[0] * vAxisOfRot[1] * (1 - fCos) + vAxisOfRot[2] * fSin;
dst[2][0] = vAxisOfRot[2] * vAxisOfRot[0] * (1 - fCos) - vAxisOfRot[1] * fSin;
// Column 1:
dst[0][1] = vAxisOfRot[0] * vAxisOfRot[1] * (1 - fCos) - vAxisOfRot[2] * fSin;
dst[1][1] = axisYSquared + (1 - axisYSquared) * fCos;
dst[2][1] = vAxisOfRot[1] * vAxisOfRot[2] * (1 - fCos) + vAxisOfRot[0] * fSin;
// Column 2:
dst[0][2] = vAxisOfRot[2] * vAxisOfRot[0] * (1 - fCos) + vAxisOfRot[1] * fSin;
dst[1][2] = vAxisOfRot[1] * vAxisOfRot[2] * (1 - fCos) - vAxisOfRot[0] * fSin;
dst[2][2] = axisZSquared + (1 - axisZSquared) * fCos;
// Column 3:
dst[0][3] = 0;
dst[1][3] = 0;
dst[2][3] = 0;
}
//----------------------------------------------------------------------------- //-----------------------------------------------------------------------------
// Computes the transpose // Computes the transpose
//----------------------------------------------------------------------------- //-----------------------------------------------------------------------------
@ -1450,33 +1392,6 @@ void VectorYawRotate( const Vector &in, float flYaw, Vector &out)
out.z = in.z; out.z = in.z;
} }
float Bias( float x, float biasAmt )
{
// WARNING: not thread safe
static float lastAmt = -1;
static float lastExponent = 0;
if( lastAmt != biasAmt )
{
lastExponent = log( biasAmt ) * -1.4427f; // (-1.4427 = 1 / log(0.5))
}
float fRet = pow( x, lastExponent );
Assert ( !IS_NAN( fRet ) );
return fRet;
}
float Gain( float x, float biasAmt )
{
// WARNING: not thread safe
if( x < 0.5 )
return 0.5f * Bias( 2*x, 1-biasAmt );
else
return 1 - 0.5f * Bias( 2 - 2*x, 1-biasAmt );
}
float SmoothCurve( float x ) float SmoothCurve( float x )
{ {
// Actual smooth curve. Visualization: // Actual smooth curve. Visualization:

File diff suppressed because it is too large Load Diff

View File

@ -22,10 +22,16 @@ extern float (*pfFastCos)(float x);
// The following are not declared as macros because they are often used in limiting situations, // The following are not declared as macros because they are often used in limiting situations,
// and sometimes the compiler simply refuses to inline them for some reason // and sometimes the compiler simply refuses to inline them for some reason
#define FastSqrt(x) (*pfSqrt)(x) #define FastSqrt(x) sqrtf(x)
#define FastRSqrt(x) (*pfRSqrt)(x) #define FastRSqrt(x) (1.f/sqrtf(x))
#define FastRSqrtFast(x) (*pfRSqrtFast)(x) #define FastRSqrtFast(x) (1.f/sqrtf(x))
#ifdef _WIN32
#define FastSinCos(x,s,c) (*pfFastSinCos)(x,s,c) #define FastSinCos(x,s,c) (*pfFastSinCos)(x,s,c)
#else
#define FastSinCos(x,s,c) sincosf(x,s,c)
#endif
#define FastCos(x) (*pfFastCos)(x) #define FastCos(x) (*pfFastCos)(x)
#if defined(__i386__) || defined(_M_IX86) #if defined(__i386__) || defined(_M_IX86)

View File

@ -30,7 +30,6 @@
// FP exception clean so this not a turnkey operation. // FP exception clean so this not a turnkey operation.
//#define FP_EXCEPTIONS_ENABLED //#define FP_EXCEPTIONS_ENABLED
#ifdef FP_EXCEPTIONS_ENABLED #ifdef FP_EXCEPTIONS_ENABLED
#include <float.h> // For _clearfp and _controlfp_s #include <float.h> // For _clearfp and _controlfp_s
#endif #endif
@ -93,37 +92,11 @@ private:
FPExceptionEnabler& operator=(const FPExceptionEnabler&); FPExceptionEnabler& operator=(const FPExceptionEnabler&);
}; };
inline float clamp( const float val, const float minVal, const float maxVal )
#ifdef DEBUG // stop crashing edit-and-continue
FORCEINLINE float clamp( float val, float minVal, float maxVal )
{ {
if ( maxVal < minVal ) const float t = val < minVal ? minVal : val;
return maxVal; return t > maxVal ? maxVal : t;
else if( val < minVal )
return minVal;
else if( val > maxVal )
return maxVal;
else
return val;
} }
#else // DEBUG
FORCEINLINE float clamp( float val, float minVal, float maxVal )
{
#if defined(__i386__) || defined(_M_IX86)
_mm_store_ss( &val,
_mm_min_ss(
_mm_max_ss(
_mm_load_ss(&val),
_mm_load_ss(&minVal) ),
_mm_load_ss(&maxVal) ) );
#else
val = fpmax(minVal, val);
val = fpmin(maxVal, val);
#endif
return val;
}
#endif // DEBUG
// //
// Returns a clamped value in the range [min, max]. // Returns a clamped value in the range [min, max].
@ -131,17 +104,10 @@ FORCEINLINE float clamp( float val, float minVal, float maxVal )
template< class T > template< class T >
inline T clamp( T const &val, T const &minVal, T const &maxVal ) inline T clamp( T const &val, T const &minVal, T const &maxVal )
{ {
if ( maxVal < minVal ) const T t = val< minVal ? minVal : val;
return maxVal; return t > maxVal ? maxVal : t;
else if( val < minVal )
return minVal;
else if( val > maxVal )
return maxVal;
else
return val;
} }
// plane_t structure // plane_t structure
// !!! if this is changed, it must be changed in asm code too !!! // !!! if this is changed, it must be changed in asm code too !!!
// FIXME: does the asm code even exist anymore? // FIXME: does the asm code even exist anymore?
@ -237,8 +203,8 @@ bool R_CullBoxSkipNear( const Vector& mins, const Vector& maxs, const Frustum_t
struct matrix3x4_t struct matrix3x4_t
{ {
matrix3x4_t() = default; inline matrix3x4_t() = default;
matrix3x4_t( inline matrix3x4_t(
float m00, float m01, float m02, float m03, float m00, float m01, float m02, float m03,
float m10, float m11, float m12, float m13, float m10, float m11, float m12, float m13,
float m20, float m21, float m22, float m23 ) float m20, float m21, float m22, float m23 )
@ -252,7 +218,7 @@ struct matrix3x4_t
// Creates a matrix where the X axis = forward // Creates a matrix where the X axis = forward
// the Y axis = left, and the Z axis = up // the Y axis = left, and the Z axis = up
//----------------------------------------------------------------------------- //-----------------------------------------------------------------------------
void Init( const Vector& xAxis, const Vector& yAxis, const Vector& zAxis, const Vector &vecOrigin ) inline void Init( const Vector& xAxis, const Vector& yAxis, const Vector& zAxis, const Vector &vecOrigin )
{ {
m_flMatVal[0][0] = xAxis.x; m_flMatVal[0][1] = yAxis.x; m_flMatVal[0][2] = zAxis.x; m_flMatVal[0][3] = vecOrigin.x; m_flMatVal[0][0] = xAxis.x; m_flMatVal[0][1] = yAxis.x; m_flMatVal[0][2] = zAxis.x; m_flMatVal[0][3] = vecOrigin.x;
m_flMatVal[1][0] = xAxis.y; m_flMatVal[1][1] = yAxis.y; m_flMatVal[1][2] = zAxis.y; m_flMatVal[1][3] = vecOrigin.y; m_flMatVal[1][0] = xAxis.y; m_flMatVal[1][1] = yAxis.y; m_flMatVal[1][2] = zAxis.y; m_flMatVal[1][3] = vecOrigin.y;
@ -263,26 +229,23 @@ struct matrix3x4_t
// Creates a matrix where the X axis = forward // Creates a matrix where the X axis = forward
// the Y axis = left, and the Z axis = up // the Y axis = left, and the Z axis = up
//----------------------------------------------------------------------------- //-----------------------------------------------------------------------------
matrix3x4_t( const Vector& xAxis, const Vector& yAxis, const Vector& zAxis, const Vector &vecOrigin ) inline matrix3x4_t( const Vector& xAxis, const Vector& yAxis, const Vector& zAxis, const Vector &vecOrigin )
{ {
Init( xAxis, yAxis, zAxis, vecOrigin ); Init( xAxis, yAxis, zAxis, vecOrigin );
} }
inline void Invalidate( void ) inline void Invalidate( void )
{ {
for (int i = 0; i < 3; i++) for( int i=0; i < 12; i++ )
{ {
for (int j = 0; j < 4; j++) ((float*)m_flMatVal)[i] = VEC_T_NAN;
{
m_flMatVal[i][j] = VEC_T_NAN;
}
} }
} }
float *operator[]( int i ) { Assert(( i >= 0 ) && ( i < 3 )); return m_flMatVal[i]; } inline float *operator[]( int i ) { Assert(( i >= 0 ) && ( i < 3 )); return m_flMatVal[i]; }
const float *operator[]( int i ) const { Assert(( i >= 0 ) && ( i < 3 )); return m_flMatVal[i]; } inline const float *operator[]( int i ) const { Assert(( i >= 0 ) && ( i < 3 )); return m_flMatVal[i]; }
float *Base() { return &m_flMatVal[0][0]; } inline float *Base() { return &m_flMatVal[0][0]; }
const float *Base() const { return &m_flMatVal[0][0]; } inline const float *Base() const { return &m_flMatVal[0][0]; }
float m_flMatVal[3][4]; float m_flMatVal[3][4];
}; };
@ -565,7 +528,13 @@ void MatrixInvert( const matrix3x4_t &in, matrix3x4_t &out );
bool MatricesAreEqual( const matrix3x4_t &src1, const matrix3x4_t &src2, float flTolerance = 1e-5 ); bool MatricesAreEqual( const matrix3x4_t &src1, const matrix3x4_t &src2, float flTolerance = 1e-5 );
void MatrixGetColumn( const matrix3x4_t &in, int column, Vector &out ); void MatrixGetColumn( const matrix3x4_t &in, int column, Vector &out );
void MatrixSetColumn( const Vector &in, int column, matrix3x4_t &out );
inline void MatrixSetColumn( const Vector &in, int column, matrix3x4_t& out )
{
out[0][column] = in.x;
out[1][column] = in.y;
out[2][column] = in.z;
}
inline void MatrixGetTranslation( const matrix3x4_t &in, Vector &out ) inline void MatrixGetTranslation( const matrix3x4_t &in, Vector &out )
{ {
@ -1079,7 +1048,19 @@ void VectorYawRotate( const Vector& in, float flYaw, Vector &out);
// 0 1 // 0 1
// //
// With a biasAmt of 0.5, Bias returns X. // With a biasAmt of 0.5, Bias returns X.
float Bias( float x, float biasAmt ); inline float Bias( float x, float biasAmt )
{
// WARNING: not thread safe
static float lastAmt = -1;
static float lastExponent = 0;
if( lastAmt != biasAmt )
{
lastExponent = log( biasAmt ) * -1.4427f; // (-1.4427 = 1 / log(0.5))
}
float fRet = pow( x, lastExponent );
Assert ( !IS_NAN( fRet ) );
return fRet;
}
// Gain is similar to Bias, but biasAmt biases towards or away from 0.5. // Gain is similar to Bias, but biasAmt biases towards or away from 0.5.
@ -1111,9 +1092,14 @@ float Bias( float x, float biasAmt );
// |***** // |*****
// |___________________ // |___________________
// 0 1 // 0 1
float Gain( float x, float biasAmt ); inline float Gain( float x, float biasAmt )
{
// WARNING: not thread safe
if( x < 0.5 )
return 0.5f * Bias( 2*x, 1-biasAmt );
else
return 1 - 0.5f * Bias( 2 - 2*x, 1-biasAmt );
}
// SmoothCurve maps a 0-1 value into another 0-1 value based on a cosine wave // SmoothCurve maps a 0-1 value into another 0-1 value based on a cosine wave
// where the derivatives of the function at 0 and 1 (and 0.5) are 0. This is useful for // where the derivatives of the function at 0 and 1 (and 0.5) are 0. This is useful for
// any fadein/fadeout effect where it should start and end smoothly. // any fadein/fadeout effect where it should start and end smoothly.

View File

@ -35,7 +35,7 @@ class Vector2D;
// 4D Vector4D // 4D Vector4D
//========================================================= //=========================================================
class Vector4D class alignas(16) Vector4D
{ {
public: public:
// Members // Members

File diff suppressed because it is too large Load Diff

View File

@ -1042,7 +1042,7 @@ typedef enum _D3DSHADER_PARAM_REGISTER_TYPE
D3DSPR_FORCE_DWORD = 0x7fffffff, // force 32-bit size enum D3DSPR_FORCE_DWORD = 0x7fffffff, // force 32-bit size enum
} D3DSHADER_PARAM_REGISTER_TYPE; } D3DSHADER_PARAM_REGISTER_TYPE;
struct D3DMATRIX struct alignas(16) D3DMATRIX
{ {
union union
{ {

View File

@ -1042,7 +1042,7 @@ typedef enum _D3DSHADER_PARAM_REGISTER_TYPE
D3DSPR_FORCE_DWORD = 0x7fffffff, // force 32-bit size enum D3DSPR_FORCE_DWORD = 0x7fffffff, // force 32-bit size enum
} D3DSHADER_PARAM_REGISTER_TYPE; } D3DSHADER_PARAM_REGISTER_TYPE;
struct D3DMATRIX struct alignas(16) D3DMATRIX
{ {
union union
{ {