Fix shaders compilation, replace some opengl functions with opengles analogs

This commit is contained in:
nillerusr 2021-11-26 20:51:33 +03:00
parent 1aa234af4d
commit 24b2f5892c
16 changed files with 3462 additions and 859 deletions

View File

@ -263,6 +263,7 @@ public:
GLint m_locVertexParams; // "vc" per dx9asmtogl2 convention
GLint m_locVertexBoneParams; // "vcbones"
GLint m_locVertexInteger0; // "i0"
GLint m_locAlphaRef; // "alpha_ref"
enum { cMaxVertexShaderBoolUniforms = 4, cMaxFragmentShaderBoolUniforms = 1 };

View File

@ -30,18 +30,16 @@ GL_FUNC_VOID(OpenGL,true,glAlphaFunc,(GLenum a,GLclampf b),(a,b))
GL_FUNC_VOID(OpenGL,true,glAttachShader,(GLuint a, GLuint b),(a,b))
GL_FUNC_VOID(OpenGL,true,glBindAttribLocation,(GLuint a,GLuint b,const GLchar *c),(a,b,c))
GL_FUNC_VOID(OpenGL,true,glBindBuffer,(GLenum a,GLuint b),(a,b))
GL_FUNC_VOID(OpenGL,true,glBindProgram,(GLenum a,GLuint b),(a,b))
GL_FUNC_VOID(OpenGL,true,glBindTexture,(GLenum a,GLuint b),(a,b))
GL_FUNC_VOID(OpenGL,true,glBlendColor,(GLclampf a,GLclampf b,GLclampf c,GLclampf d),(a,b,c,d))
GL_FUNC_VOID(OpenGL,true,glBlendEquation,(GLenum a),(a))
GL_FUNC_VOID(OpenGL,true,glBlendFunc,(GLenum a,GLenum b),(a,b))
GL_FUNC_VOID(OpenGL,true,glBufferData,(GLenum a, GLsizeiptr b, const GLvoid *c,GLenum d),(a,b,c,d))
GL_FUNC_VOID(OpenGL,true,glClear,(GLbitfield a),(a))
GL_FUNC_VOID(OpenGL,true,glClearDepthf,(GLfloat a),(a))
GL_FUNC_VOID(OpenGL,true,glClearColor,(GLclampf a,GLclampf b,GLclampf c,GLclampf d),(a,b,c,d))
GL_FUNC_VOID(OpenGL,true,glClearDepth,(GLclampd a),(a))
GL_FUNC_VOID(OpenGL,true,glReadPixels, (GLint a, GLint b, GLsizei c, GLsizei d, GLenum e, GLenum f, void * g), (a,b,c,d,e,f,g))
GL_FUNC_VOID(OpenGL,true,glClearStencil,(GLint a),(a))
GL_FUNC_VOID(OpenGL,true,glClipPlane,(GLenum a,const GLdouble *b),(a,b))
GL_FUNC_VOID(OpenGL,true,glColorMask,(GLboolean a,GLboolean b,GLboolean c,GLboolean d),(a,b,c,d))
GL_FUNC_VOID(OpenGL,true,glCompileShader,(GLuint a),(a))
GL_FUNC_VOID(OpenGL,true,glGetShaderiv,(GLuint a, GLenum b, GLint *c),(a,b,c))
@ -52,20 +50,17 @@ GL_FUNC_VOID(OpenGL,true,glCompressedTexImage3D,(GLenum a,GLint b,GLenum c,GLsiz
GL_FUNC(OpenGL,true,GLuint,glCreateProgram,(void),())
GL_FUNC(OpenGL,true,GLuint,glCreateShader,(GLenum a),(a))
GL_FUNC_VOID(OpenGL,true,glDeleteBuffers,(GLsizei a,const GLuint *b),(a,b))
GL_FUNC_VOID(OpenGL,true,glDeleteObject,(GLuint a),(a))
GL_FUNC_VOID(OpenGL,true,glDeletePrograms,(GLsizei a,const GLuint *b),(a,b))
GL_FUNC_VOID(OpenGL,true,glDeleteProgram,(GLuint a),(a))
GL_FUNC_VOID(OpenGL,true,glDeleteShader,(GLuint a),(a))
GL_FUNC_VOID(OpenGL,true,glDeleteTextures,(GLsizei a,const GLuint *b),(a,b))
GL_FUNC_VOID(OpenGL,true,glDepthFunc,(GLenum a),(a))
GL_FUNC_VOID(OpenGL,true,glDepthMask,(GLboolean a),(a))
GL_FUNC_VOID(OpenGL,true,glDepthRangef,(GLfloat a,GLfloat b),(a,b))
GL_FUNC_VOID(OpenGL,true,glDepthRange,(GLclampd a,GLclampd b),(a,b))
GL_FUNC_VOID(OpenGL,true,glDetachObject,(GLuint a,GLuint b),(a,b))
GL_FUNC_VOID(OpenGL,true,glDisable,(GLenum a),(a))
GL_FUNC_VOID(OpenGL,true,glDisableVertexAttribArray,(GLuint a),(a))
GL_FUNC_VOID(OpenGL,true,glDrawArrays,(GLenum a,GLint b,GLsizei c),(a,b,c))
GL_FUNC_VOID(OpenGL,true,glDrawBuffer,(GLenum a),(a))
GL_FUNC_VOID(OpenGL,true,glDrawBuffers,(GLsizei a,const GLenum *b),(a,b))
GL_FUNC_VOID(OpenGL,true,glDetachShader,(GLuint a,GLuint b),(a,b))
GL_FUNC_VOID(OpenGL,true,glDrawRangeElements,(GLenum a,GLuint b,GLuint c,GLsizei d,GLenum e,const GLvoid *f),(a,b,c,d,e,f))
#ifndef OSX // 10.6/GL 2.1 compatability
GL_FUNC_VOID(OpenGL,true,glDrawRangeElementsBaseVertex,(GLenum a,GLuint b,GLuint c,GLsizei d,GLenum e,const GLvoid *f, GLenum g),(a,b,c,d,e,f,g))
@ -76,35 +71,27 @@ GL_FUNC_VOID(OpenGL,true,glFinish,(void),())
GL_FUNC_VOID(OpenGL,true,glFlush,(void),())
GL_FUNC_VOID(OpenGL,true,glFrontFace,(GLenum a),(a))
GL_FUNC_VOID(OpenGL,true,glGenBuffers,(GLsizei a,GLuint *b),(a,b))
GL_FUNC_VOID(OpenGL,true,glGenPrograms,(GLsizei a,GLuint *b),(a,b))
GL_FUNC_VOID(OpenGL,true,glGenTextures,(GLsizei a,GLuint *b),(a,b))
GL_FUNC_VOID(OpenGL,true,glGetBooleanv,(GLenum a,GLboolean *b),(a,b))
GL_FUNC_VOID(OpenGL,true,glGetCompressedTexImage,(GLenum a,GLint b,GLvoid *c),(a,b,c))
GL_FUNC_VOID(OpenGL,true,glGetDoublev,(GLenum a,GLdouble *b),(a,b))
GL_FUNC_VOID(OpenGL,true,glGetFloatv,(GLenum a,GLfloat *b),(a,b))
GL_FUNC_VOID(OpenGL,true,glGetInfoLog,(GLuint a,GLsizei b,GLsizei *c,GLchar *d),(a,b,c,d))
//GL_FUNC_VOID(OpenGL,true,glGetInfoLog,(GLuint a,GLsizei b,GLsizei *c,GLchar *d),(a,b,c,d))
GL_FUNC_VOID(OpenGL,true,glGetIntegerv,(GLenum a,GLint *b),(a,b))
GL_FUNC_VOID(OpenGL,true,glGetObjectParameteriv,(GLuint a,GLenum b,GLint *c),(a,b,c))
GL_FUNC_VOID(OpenGL,true,glGetProgramiv,(GLenum a,GLenum b,GLint *c),(a,b,c))
GL_FUNC(OpenGL,true,const GLubyte *,glGetString,(GLenum a),(a))
GL_FUNC_VOID(OpenGL,true,glGetTexImage,(GLenum a,GLint b,GLenum c,GLenum d,GLvoid *e),(a,b,c,d,e))
GL_FUNC(OpenGL,true,GLint,glGetUniformLocation,(GLuint a,const GLchar *b),(a,b))
GL_FUNC(OpenGL,true,GLboolean,glIsEnabled,(GLenum a),(a))
GL_FUNC(OpenGL,true,GLboolean,glIsTexture,(GLuint a),(a))
GL_FUNC_VOID(OpenGL,true,glLinkProgram,(GLuint a),(a))
GL_FUNC_VOID(OpenGL,true,glOrtho,(GLdouble a,GLdouble b,GLdouble c,GLdouble d,GLdouble e,GLdouble f),(a,b,c,d,e,f))
//GL_FUNC_VOID(OpenGL,true,glOrtho,(GLdouble a,GLdouble b,GLdouble c,GLdouble d,GLdouble e,GLdouble f),(a,b,c,d,e,f))
GL_FUNC_VOID(OpenGL,true,glPixelStorei,(GLenum a,GLint b),(a,b))
GL_FUNC_VOID(OpenGL,true,glPolygonMode,(GLenum a,GLenum b),(a,b))
//GL_FUNC_VOID(OpenGL,true,glPolygonMode,(GLenum a,GLenum b),(a,b))
GL_FUNC_VOID(OpenGL,true,glPolygonOffset,(GLfloat a,GLfloat b),(a,b))
GL_FUNC_VOID(OpenGL,true,glPopAttrib,(void),())
GL_FUNC_VOID(OpenGL,true,glPushAttrib,(GLbitfield a),(a))
GL_FUNC_VOID(OpenGL,true,glReadBuffer,(GLenum a),(a))
GL_FUNC_VOID(OpenGL,true,glScissor,(GLint a,GLint b,GLsizei c,GLsizei d),(a,b,c,d))
GL_FUNC_VOID(OpenGL,true,glShaderSource,(GLuint a,GLsizei b,const GLchar **c,const GLint *d),(a,b,c,d))
GL_FUNC_VOID(OpenGL,true,glStencilFunc,(GLenum a,GLint b,GLuint c),(a,b,c))
GL_FUNC_VOID(OpenGL,true,glStencilMask,(GLuint a),(a))
GL_FUNC_VOID(OpenGL,true,glStencilOp,(GLenum a,GLenum b,GLenum c),(a,b,c))
GL_FUNC_VOID(OpenGL,true,glTexCoord2f,(GLfloat a,GLfloat b),(a,b))
GL_FUNC_VOID(OpenGL,true,glTexImage2D,(GLenum a,GLint b,GLint c,GLsizei d,GLsizei e,GLint f,GLenum g,GLenum h,const GLvoid *i),(a,b,c,d,e,f,g,h,i))
GL_FUNC_VOID(OpenGL,true,glTexImage3D,(GLenum a,GLint b,GLint c,GLsizei d,GLsizei e,GLsizei f,GLint g,GLenum h,GLenum i,const GLvoid *j),(a,b,c,d,e,f,g,h,i,j))
GL_FUNC_VOID(OpenGL,true,glTexParameterfv,(GLenum a,GLenum b,const GLfloat *c),(a,b,c))
@ -115,16 +102,9 @@ GL_FUNC_VOID(OpenGL,true,glUniform1i,(GLint a,GLint b),(a,b))
GL_FUNC_VOID(OpenGL,true,glUniform4fv,(GLint a,GLsizei b,const GLfloat *c),(a,b,c))
GL_FUNC(OpenGL,true,GLboolean,glUnmapBuffer,(GLenum a),(a))
GL_FUNC_VOID(OpenGL,true,glUseProgram,(GLuint a),(a))
GL_FUNC_VOID(OpenGL,true,glVertex3f,(GLfloat a,GLfloat b,GLfloat c),(a,b,c))
GL_FUNC_VOID(OpenGL,true,glVertexAttribPointer,(GLuint a,GLint b,GLenum c,GLboolean d,GLsizei e,const GLvoid *f),(a,b,c,d,e,f))
GL_FUNC_VOID(OpenGL,true,glViewport,(GLint a,GLint b,GLsizei c,GLsizei d),(a,b,c,d))
GL_FUNC_VOID(OpenGL,true,glEnableClientState,(GLenum a),(a))
GL_FUNC_VOID(OpenGL,true,glDisableClientState,(GLenum a),(a))
GL_FUNC_VOID(OpenGL,true,glClientActiveTexture,(GLenum a),(a))
GL_FUNC_VOID(OpenGL,true,glVertexPointer,(GLint a,GLenum b,GLsizei c,const GLvoid *d),(a,b,c,d))
GL_FUNC_VOID(OpenGL,true,glTexCoordPointer,(GLint a,GLenum b,GLsizei c,const GLvoid *d),(a,b,c,d))
GL_FUNC_VOID(OpenGL,true,glProgramEnvParameters4fvEXT,(GLenum a,GLuint b,GLsizei c,const GLfloat *d),(a,b,c,d))
GL_FUNC_VOID(OpenGL,true,glColor4sv,(const GLshort *a),(a))
GL_FUNC_VOID(OpenGL,true,glStencilOpSeparate,(GLenum a,GLenum b,GLenum c,GLenum d),(a,b,c,d))
GL_FUNC_VOID(OpenGL,true,glStencilFuncSeparate,(GLenum a,GLenum b,GLint c,GLuint d),(a,b,c,d))
GL_FUNC_VOID(OpenGL,true,glGetTexLevelParameteriv,(GLenum a,GLint b,GLenum c,GLint *d),(a,b,c,d))
@ -164,11 +144,6 @@ GL_FUNC_VOID(GL_ARB_sync,false,glWaitSync,(GLsync a, GLbitfield b, GLuint64 c),(
GL_FUNC_VOID(GL_ARB_sync,false,glDeleteSync,(GLsync a),(a))
GL_FUNC(GL_ARB_sync,false,GLsync,glFenceSync,(GLenum a, GLbitfield b),(a,b))
#endif
GL_EXT(GL_EXT_draw_buffers2,-1,-1)
GL_FUNC_VOID(GL_EXT_draw_buffers2,true,glColorMaskIndexedEXT,(GLuint a,GLboolean b,GLboolean c,GLboolean d,GLboolean e),(a,b,c,d,e))
GL_FUNC_VOID(GL_EXT_draw_buffers2,true,glEnableIndexedEXT,(GLenum a,GLuint b),(a,b))
GL_FUNC_VOID(GL_EXT_draw_buffers2,true,glDisableIndexedEXT,(GLenum a,GLuint b),(a,b))
GL_FUNC_VOID(GL_EXT_draw_buffers2,true,glGetBooleanIndexedvEXT,(GLenum a,GLuint b,GLboolean *c),(a,b,c))
GL_EXT(GL_EXT_bindable_uniform,-1,-1)
GL_FUNC_VOID(GL_EXT_bindable_uniform,false,glUniformBufferEXT,(GLuint a,GLint b,GLuint c),(a,b,c))
GL_FUNC(GL_EXT_bindable_uniform,false,int,glGetUniformBufferSizeEXT,(GLenum a, GLenum b),(a,b))
@ -212,7 +187,7 @@ GL_FUNC_VOID(OpenGL,false,glDeleteRenderbuffers,(GLsizei a,const GLuint *b),(a,b
GL_FUNC_VOID(OpenGL,false,glFramebufferRenderbuffer,(GLenum a,GLenum b,GLenum c,GLuint d),(a,b,c,d))
GL_FUNC_VOID(OpenGL,false,glFramebufferTexture2D,(GLenum a,GLenum b,GLenum c,GLuint d,GLint e),(a,b,c,d,e))
GL_FUNC_VOID(OpenGL,false,glFramebufferTexture3D,(GLenum a,GLenum b,GLenum c,GLuint d,GLint e,GLint f),(a,b,c,d,e,f))
GL_FUNC_VOID(OpenGL,false,glGenFramebuffers,(GLsizei a,GLuint *b),(a,b))
GL_FUNC_VOID(OpenGL,true,glGenFramebuffers,(GLsizei a,GLuint *b),(a,b))
GL_FUNC_VOID(OpenGL,false,glGenRenderbuffers,(GLsizei a,GLuint *b),(a,b))
GL_FUNC_VOID(OpenGL,false,glDeleteFramebuffers,(GLsizei a,const GLuint *b),(a,b))
GL_FUNC_VOID(OpenGL,false,glBlitFramebuffer,(GLint a,GLint b,GLint c,GLint d,GLint e,GLint f,GLint g,GLint h,GLbitfield i,GLenum j),(a,b,c,d,e,f,g,h,i,j))
@ -248,9 +223,6 @@ GL_FUNC_VOID(OpenGL,true,glGenQueries,(GLsizei n, GLuint *ids), (n, ids))
GL_FUNC_VOID(OpenGL,true,glDeleteQueries,(GLsizei n, const GLuint *ids),(n, ids))
GL_FUNC_VOID(OpenGL,true,glBeginQuery,(GLenum target, GLuint id), (target, id))
GL_FUNC_VOID(OpenGL,true,glEndQuery,(GLenum target), (target))
GL_FUNC_VOID(OpenGL,true,glQueryCounter,(GLuint id, GLenum target), (id, target))
GL_FUNC_VOID(OpenGL,true,glGetQueryObjectiv,(GLuint id, GLenum pname, GLint *params), (id, pname, params))
GL_FUNC_VOID(OpenGL,true,glGetQueryObjectui64v,(GLuint id, GLenum pname, GLuint64 *params), (id, pname, params))
GL_FUNC_VOID(OpenGL,true,glCopyBufferSubData,(GLenum readtarget, GLenum writetarget, GLintptr readoffset, GLintptr writeoffset, GLsizeiptr size),(readtarget, writetarget, readoffset, writeoffset, size))
#endif // !OSX
@ -264,8 +236,6 @@ GL_FUNC_VOID(OpenGL,true,glBindVertexArray,(GLuint a),(a))
#endif // !OSX
GL_EXT(GL_EXT_texture_sRGB_decode,-1,-1)
GL_FUNC_VOID(OpenGL,true,glPushClientAttrib,(GLbitfield a),(a))
GL_FUNC_VOID(OpenGL,true,glPopClientAttrib,(void),())
GL_EXT(GL_NVX_gpu_memory_info,-1,-1)
GL_EXT(GL_ATI_meminfo,-1,-1)
GL_EXT(GL_EXT_texture_compression_s3tc,-1,-1)

View File

@ -198,9 +198,11 @@ FORCEINLINE void glGetEnumv( GLenum which, GLenum *dst )
// shorthand macros
#define EQ(fff) ( (src.fff) == (fff) )
//rasterizer
struct GLAlphaTestEnable_t { GLint enable; inline bool operator==(const GLAlphaTestEnable_t& src) const { return EQ(enable); } };
struct GLAlphaTestFunc_t { GLenum func; GLclampf ref; inline bool operator==(const GLAlphaTestFunc_t& src) const { return EQ(func) && EQ(ref); } };
struct GLAlphaTest_t { GLint enable; GLenum func; GLclampf ref; };
struct GLCullFaceEnable_t { GLint enable; inline bool operator==(const GLCullFaceEnable_t& src) const { return EQ(enable); } };
struct GLCullFrontFace_t { GLenum value; inline bool operator==(const GLCullFrontFace_t& src) const { return EQ(value); } };
struct GLPolygonMode_t { GLenum values[2]; inline bool operator==(const GLPolygonMode_t& src) const { return EQ(values[0]) && EQ(values[1]); } };
@ -209,7 +211,7 @@ struct GLScissorEnable_t { GLint enable; inline bool operator==(co
struct GLScissorBox_t { GLint x,y; GLsizei width, height; inline bool operator==(const GLScissorBox_t& src) const { return EQ(x) && EQ(y) && EQ(width) && EQ(height); } };
struct GLAlphaToCoverageEnable_t{ GLint enable; inline bool operator==(const GLAlphaToCoverageEnable_t& src) const { return EQ(enable); } };
struct GLViewportBox_t { GLint x,y; GLsizei width, height; uint widthheight; inline bool operator==(const GLViewportBox_t& src) const { return EQ(x) && EQ(y) && EQ(width) && EQ(height); } };
struct GLViewportDepthRange_t { GLdouble flNear,flFar; inline bool operator==(const GLViewportDepthRange_t& src) const { return EQ(flNear) && EQ(flFar); } };
struct GLViewportDepthRange_t { GLfloat flNear,flFar; inline bool operator==(const GLViewportDepthRange_t& src) const { return EQ(flNear) && EQ(flFar); } };
struct GLClipPlaneEnable_t { GLint enable; inline bool operator==(const GLClipPlaneEnable_t& src) const { return EQ(enable); } };
struct GLClipPlaneEquation_t { GLfloat x,y,z,w; inline bool operator==(const GLClipPlaneEquation_t& src) const { return EQ(x) && EQ(y) && EQ(z) && EQ(w); } };
@ -235,7 +237,7 @@ struct GLStencilWriteMask_t { GLint mask; inline bool operator==(c
//clearing
struct GLClearColor_t { GLfloat r,g,b,a; inline bool operator==(const GLClearColor_t& src) const { return EQ(r) && EQ(g) && EQ(b) && EQ(a); } };
struct GLClearDepth_t { GLdouble d; inline bool operator==(const GLClearDepth_t& src) const { return EQ(d); } };
struct GLClearDepth_t { GLfloat d; inline bool operator==(const GLClearDepth_t& src) const { return EQ(d); } };
struct GLClearStencil_t { GLint s; inline bool operator==(const GLClearStencil_t& src) const { return EQ(s); } };
#undef EQ
@ -306,15 +308,20 @@ template<typename T> void GLContextGetDefaultIndexed( T *dst, int index );
//===============================================================================
// template specializations for each type of state
static GLAlphaTest_t g_alpha_test;
// --- GLAlphaTestEnable ---
FORCEINLINE void GLContextSet( GLAlphaTestEnable_t *src )
{
glSetEnable( GL_ALPHA_TEST, src->enable != 0 );
// glSetEnable( GL_ALPHA_TEST, src->enable != 0 );
g_alpha_test.enable = src->enable;
}
FORCEINLINE void GLContextGet( GLAlphaTestEnable_t *dst )
{
dst->enable = gGL->glIsEnabled( GL_ALPHA_TEST );
// dst->enable = gGL->glIsEnabled( GL_ALPHA_TEST );
dst->enable = g_alpha_test.enable;
}
FORCEINLINE void GLContextGetDefault( GLAlphaTestEnable_t *dst )
@ -326,12 +333,16 @@ FORCEINLINE void GLContextGetDefault( GLAlphaTestEnable_t *dst )
FORCEINLINE void GLContextSet( GLAlphaTestFunc_t *src )
{
// gGL->glAlphaFunc( src->func, src->ref );
g_alpha_test.func = src->func;
g_alpha_test.ref = src->ref;
}
FORCEINLINE void GLContextGet( GLAlphaTestFunc_t *dst )
{
glGetEnumv( GL_ALPHA_TEST_FUNC, &dst->func );
gGL->glGetFloatv( GL_ALPHA_TEST_REF, &dst->ref );
// glGetEnumv( GL_ALPHA_TEST_FUNC, &dst->func );
// gGL->glGetFloatv( GL_ALPHA_TEST_REF, &dst->ref );
dst->func = g_alpha_test.func;
dst->ref = g_alpha_test.ref;
}
FORCEINLINE void GLContextGetDefault( GLAlphaTestFunc_t *dst )
@ -502,7 +513,7 @@ FORCEINLINE void GLContextSet( GLViewportDepthRange_t *src )
FORCEINLINE void GLContextGet( GLViewportDepthRange_t *dst )
{
gGL->glGetDoublev ( GL_DEPTH_RANGE, &dst->flNear );
gGL->glGetFloatv( GL_DEPTH_RANGE, &dst->flNear );
}
FORCEINLINE void GLContextGetDefault( GLViewportDepthRange_t *dst )
@ -584,12 +595,26 @@ FORCEINLINE void GLContextGetDefault( GLColorMaskSingle_t *dst )
// --- GLColorMaskMultiple ---
FORCEINLINE void GLContextSetIndexed( GLColorMaskMultiple_t *src, int index )
{
gGL->glColorMaskIndexedEXT ( index, src->r, src->g, src->b, src->a );
GLint Rfbo = 0, Dfbo = 0;
gGL->glGetIntegerv( GL_DRAW_FRAMEBUFFER_BINDING, &Dfbo );
gGL->glGetIntegerv( GL_READ_FRAMEBUFFER_BINDING, &Rfbo );
GLint target = Dfbo == Rfbo?GL_FRAMEBUFFER:GL_DRAW_FRAMEBUFFER;
gGL->glBindFramebuffer( target, index );
gGL->glColorMask ( src->r, src->g, src->b, src->a );
gGL->glBindFramebuffer( target, Dfbo );
}
FORCEINLINE void GLContextGetIndexed( GLColorMaskMultiple_t *dst, int index )
{
gGL->glGetBooleanIndexedvEXT ( GL_COLOR_WRITEMASK, index, (GLboolean*)&dst->r );
GLint Rfbo = 0, Dfbo = 0;
gGL->glGetIntegerv( GL_DRAW_FRAMEBUFFER_BINDING, &Dfbo );
gGL->glGetIntegerv( GL_READ_FRAMEBUFFER_BINDING, &Rfbo );
GLint target = Dfbo == Rfbo?GL_FRAMEBUFFER:GL_DRAW_FRAMEBUFFER;
gGL->glBindFramebuffer( target, index );
gGL->glGetBooleanv( GL_COLOR_WRITEMASK, (GLboolean*)&dst->r );
gGL->glBindFramebuffer( target, Dfbo );
}
FORCEINLINE void GLContextGetDefaultIndexed( GLColorMaskMultiple_t *dst, int index )
@ -698,7 +723,7 @@ FORCEINLINE void GLContextSet( GLBlendEnableSRGB_t *src )
FORCEINLINE void GLContextGet( GLBlendEnableSRGB_t *dst )
{
//dst->enable = glIsEnabled( GL_FRAMEBUFFER_SRGB_EXT );
// dst->enable = gGL->glIsEnabled( GL_FRAMEBUFFER_SRGB_EXT );
dst->enable = true; // wtf ?
}
@ -864,13 +889,12 @@ FORCEINLINE void GLContextGetDefault( GLClearColor_t *dst )
// --- GLClearDepth ---
FORCEINLINE void GLContextSet( GLClearDepth_t *src )
{
// TOFUCK: wut
// gGL->glClearDepth ( src->d );
gGL->glClearDepthf( src->d );
}
FORCEINLINE void GLContextGet( GLClearDepth_t *dst )
{
gGL->glGetDoublev ( GL_DEPTH_CLEAR_VALUE, &dst->d );
gGL->glGetFloatv( GL_DEPTH_CLEAR_VALUE, &dst->d );
}
FORCEINLINE void GLContextGetDefault( GLClearDepth_t *dst )

View File

@ -718,7 +718,7 @@ CGLMShaderPair::~CGLMShaderPair( )
{
if (m_program)
{
gGL->glDeleteObject( m_program );
gGL->glDeleteProgram( m_program );
m_program = 0;
}
}
@ -746,7 +746,7 @@ bool CGLMShaderPair::ValidateProgramPair()
// check for success
GLint result = GL_TRUE;
gGL->glGetObjectParameteriv( m_program, GL_OBJECT_LINK_STATUS_ARB, &result ); // want GL_TRUE
gGL->glGetProgramiv(m_program, GL_LINK_STATUS, &result);
m_bCheckLinkStatus = false;
if (result == GL_TRUE)
@ -762,12 +762,6 @@ bool CGLMShaderPair::ValidateProgramPair()
GLint laux = 0;
// do some digging
gGL->glGetObjectParameteriv( m_program, GL_OBJECT_INFO_LOG_LENGTH_ARB, &length );
GLchar *logString = (GLchar *)malloc( length * sizeof(GLchar) );
gGL->glGetInfoLog( m_program, length, &laux, logString );
GLMPRINTF( ("-D- ----- GLSL link failed: \n %s ", logString) );
#if !GLM_FREE_SHADER_TEXT
char *vtemp = strdup( m_vertexProg->m_text );
vtemp[m_vertexProg->m_descs[kGLMGLSL].m_textOffset + m_vertexProg->m_descs[kGLMGLSL].m_textLength] = 0;
@ -784,8 +778,6 @@ bool CGLMShaderPair::ValidateProgramPair()
free( ftemp );
free( vtemp );
#endif
free( logString );
GLMPRINTF( ("-D- -----end-----") );
}
@ -793,11 +785,15 @@ bool CGLMShaderPair::ValidateProgramPair()
{
gGL->glUseProgram( m_program );
printf("Sample text\n");
m_ctx->NewLinkedProgram();
m_locVertexParams = gGL->glGetUniformLocation( m_program, "vc" );
m_locVertexBoneParams = gGL->glGetUniformLocation( m_program, "vcbones" );
m_locVertexScreenParams = gGL->glGetUniformLocation( m_program, "vcscreen" );
m_locAlphaRef = gGL->glGetUniformLocation( m_program, "alpha_ref" );
m_nScreenWidthHeight = 0xFFFFFFFF;
m_locVertexInteger0 = gGL->glGetUniformLocation( m_program, "i0" );
@ -940,13 +936,13 @@ bool CGLMShaderPair::SetProgramPair( CGLMProgram *vp, CGLMProgram *fp )
// attempt link. but first, detach any previously attached programs
if (m_vertexProg)
{
gGL->glDetachObject(m_program, m_vertexProg->m_descs[kGLMGLSL].m_object.glsl);
gGL->glDetachShader(m_program, m_vertexProg->m_descs[kGLMGLSL].m_object.glsl);
m_vertexProg = NULL;
}
if (m_fragmentProg)
{
gGL->glDetachObject(m_program, m_fragmentProg->m_descs[kGLMGLSL].m_object.glsl);
gGL->glDetachShader(m_program, m_fragmentProg->m_descs[kGLMGLSL].m_object.glsl);
m_fragmentProg = NULL;
}
@ -987,7 +983,7 @@ bool CGLMShaderPair::SetProgramPair( CGLMProgram *vp, CGLMProgram *fp )
gGL->glLinkProgram( m_program );
GLint isLinked = 0;
gGL->glGetShaderiv(m_program, GL_LINK_STATUS, &isLinked);
gGL->glGetProgramiv(m_program, GL_LINK_STATUS, &isLinked);
if(isLinked == GL_FALSE)
{
GLint maxLength = 0;
@ -997,8 +993,8 @@ bool CGLMShaderPair::SetProgramPair( CGLMProgram *vp, CGLMProgram *fp )
gGL->glGetProgramInfoLog( m_program, sizeof(log), &maxLength, log );
if( maxLength )
{
printf("vp: \n%s\nfp: \n%s\n", vp->m_text, fp->m_text );
printf("shader %d link log: %s\n", m_program, log);
Msg("vp: \n%s\nfp: \n%s\n", vp->m_text, fp->m_text );
Msg("shader %d link log: %s\n", m_program, log);
}
}

View File

@ -239,8 +239,8 @@ bool CGLMQuery::IsDone( void )
{
// prepare to pay a big price on drivers prior to 10.6.4+SLGU
GLint available = 0;
gGL->glGetQueryObjectiv(m_name, GL_QUERY_RESULT_AVAILABLE, &available );
GLuint available = 0;
gGL->glGetQueryObjectuiv(m_name, GL_QUERY_RESULT_AVAILABLE, &available );
m_done = (available != 0);
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,341 @@
#include <stdint.h>
#include <stddef.h>
/*
DXT1/DXT3/DXT5 texture decompression
The original code is from Benjamin Dobell, see below for details. Compared to
the original this one adds DXT3 decompression, is valid C89, and is x64
compatible as it uses fixed size integers everywhere. It also uses a different
PackRGBA order.
---
Copyright (c) 2012, Matth<EFBFBD>us G. "Anteru" Chajdas (http://anteru.net)
Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in
the Software without restriction, including without limitation the rights to
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
of the Software, and to permit persons to whom the Software is furnished to do
so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
---
Copyright (C) 2009 Benjamin Dobell, Glass Echidna
Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in
the Software without restriction, including without limitation the rights to
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
of the Software, and to permit persons to whom the Software is furnished to do
so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
---
*/
static uint32_t PackRGBA (uint8_t r, uint8_t g, uint8_t b, uint8_t a)
{
return r | (g << 8) | (b << 16) | (a << 24);
}
static void DecompressBlockDXT1Internal (const uint8_t* block,
uint32_t* output,
uint32_t outputStride,
int transparent0, int* simpleAlpha, int *complexAlpha,
const uint8_t* alphaValues)
{
uint32_t temp, code;
uint16_t color0, color1;
uint8_t r0, g0, b0, r1, g1, b1;
int i, j;
color0 = *(const uint16_t*)(block);
color1 = *(const uint16_t*)(block + 2);
temp = (color0 >> 11) * 255 + 16;
r0 = (uint8_t)((temp/32 + temp)/32);
temp = ((color0 & 0x07E0) >> 5) * 255 + 32;
g0 = (uint8_t)((temp/64 + temp)/64);
temp = (color0 & 0x001F) * 255 + 16;
b0 = (uint8_t)((temp/32 + temp)/32);
temp = (color1 >> 11) * 255 + 16;
r1 = (uint8_t)((temp/32 + temp)/32);
temp = ((color1 & 0x07E0) >> 5) * 255 + 32;
g1 = (uint8_t)((temp/64 + temp)/64);
temp = (color1 & 0x001F) * 255 + 16;
b1 = (uint8_t)((temp/32 + temp)/32);
code = *(const uint32_t*)(block + 4);
if (color0 > color1) {
for (j = 0; j < 4; ++j) {
for (i = 0; i < 4; ++i) {
uint32_t finalColor, positionCode;
uint8_t alpha;
alpha = alphaValues [j*4+i];
finalColor = 0;
positionCode = (code >> 2*(4*j+i)) & 0x03;
switch (positionCode) {
case 0:
finalColor = PackRGBA(r0, g0, b0, alpha);
break;
case 1:
finalColor = PackRGBA(r1, g1, b1, alpha);
break;
case 2:
finalColor = PackRGBA((2*r0+r1)/3, (2*g0+g1)/3, (2*b0+b1)/3, alpha);
break;
case 3:
finalColor = PackRGBA((r0+2*r1)/3, (g0+2*g1)/3, (b0+2*b1)/3, alpha);
break;
}
if(!alpha)
*simpleAlpha = 1;
else if(alpha<0xff)
*complexAlpha = 1;
output [j*outputStride + i] = finalColor;
}
}
} else {
for (j = 0; j < 4; ++j) {
for (i = 0; i < 4; ++i) {
uint32_t finalColor, positionCode;
uint8_t alpha;
alpha = alphaValues [j*4+i];
finalColor = 0;
positionCode = (code >> 2*(4*j+i)) & 0x03;
switch (positionCode) {
case 0:
finalColor = PackRGBA(r0, g0, b0, alpha);
break;
case 1:
finalColor = PackRGBA(r1, g1, b1, alpha);
break;
case 2:
finalColor = PackRGBA((r0+r1)/2, (g0+g1)/2, (b0+b1)/2, alpha);
break;
case 3:
if(transparent0) alpha=0;
finalColor = PackRGBA(0, 0, 0, alpha);
break;
}
if(!alpha)
*simpleAlpha = 1;
else if(alpha<0xff)
*complexAlpha = 1;
output [j*outputStride + i] = finalColor;
}
}
}
}
/*
void DecompressBlockDXT1(): Decompresses one block of a DXT1 texture and stores the resulting pixels at the appropriate offset in 'image'.
uint32_t x: x-coordinate of the first pixel in the block.
uint32_t y: y-coordinate of the first pixel in the block.
uint32_t width: width of the texture being decompressed.
const uint8_t *blockStorage: pointer to the block to decompress.
uint32_t *image: pointer to image where the decompressed pixel data should be stored.
*/
void DecompressBlockDXT1(uint32_t x, uint32_t y, uint32_t width,
const uint8_t* blockStorage,
int transparent0, int* simpleAlpha, int *complexAlpha,
uint32_t* image)
{
static const uint8_t const_alpha [] = {
255, 255, 255, 255,
255, 255, 255, 255,
255, 255, 255, 255,
255, 255, 255, 255
};
DecompressBlockDXT1Internal (blockStorage,
image + x + (y * width), width, transparent0, simpleAlpha, complexAlpha, const_alpha);
}
/*
void DecompressBlockDXT5(): Decompresses one block of a DXT5 texture and stores the resulting pixels at the appropriate offset in 'image'.
uint32_t x: x-coordinate of the first pixel in the block.
uint32_t y: y-coordinate of the first pixel in the block.
uint32_t width: width of the texture being decompressed.
const uint8_t *blockStorage: pointer to the block to decompress.
uint32_t *image: pointer to image where the decompressed pixel data should be stored.
*/
void DecompressBlockDXT5(uint32_t x, uint32_t y, uint32_t width,
const uint8_t* blockStorage,
int transparent0, int* simpleAlpha, int *complexAlpha,
uint32_t* image)
{
uint8_t alpha0, alpha1;
const uint8_t* bits;
uint32_t alphaCode1;
uint16_t alphaCode2;
uint16_t color0, color1;
uint8_t r0, g0, b0, r1, g1, b1;
int i, j;
uint32_t temp, code;
alpha0 = *(blockStorage);
alpha1 = *(blockStorage + 1);
bits = blockStorage + 2;
alphaCode1 = bits[2] | (bits[3] << 8) | (bits[4] << 16) | (bits[5] << 24);
alphaCode2 = bits[0] | (bits[1] << 8);
color0 = *(const uint16_t*)(blockStorage + 8);
color1 = *(const uint16_t*)(blockStorage + 10);
temp = (color0 >> 11) * 255 + 16;
r0 = (uint8_t)((temp/32 + temp)/32);
temp = ((color0 & 0x07E0) >> 5) * 255 + 32;
g0 = (uint8_t)((temp/64 + temp)/64);
temp = (color0 & 0x001F) * 255 + 16;
b0 = (uint8_t)((temp/32 + temp)/32);
temp = (color1 >> 11) * 255 + 16;
r1 = (uint8_t)((temp/32 + temp)/32);
temp = ((color1 & 0x07E0) >> 5) * 255 + 32;
g1 = (uint8_t)((temp/64 + temp)/64);
temp = (color1 & 0x001F) * 255 + 16;
b1 = (uint8_t)((temp/32 + temp)/32);
code = *(const uint32_t*)(blockStorage + 12);
for (j = 0; j < 4; j++) {
for (i = 0; i < 4; i++) {
uint8_t finalAlpha;
int alphaCode, alphaCodeIndex;
uint8_t colorCode;
uint32_t finalColor;
alphaCodeIndex = 3*(4*j+i);
if (alphaCodeIndex <= 12) {
alphaCode = (alphaCode2 >> alphaCodeIndex) & 0x07;
} else if (alphaCodeIndex == 15) {
alphaCode = (alphaCode2 >> 15) | ((alphaCode1 << 1) & 0x06);
} else /* alphaCodeIndex >= 18 && alphaCodeIndex <= 45 */ {
alphaCode = (alphaCode1 >> (alphaCodeIndex - 16)) & 0x07;
}
if (alphaCode == 0) {
finalAlpha = alpha0;
} else if (alphaCode == 1) {
finalAlpha = alpha1;
} else {
if (alpha0 > alpha1) {
finalAlpha = (uint8_t)(((8-alphaCode)*alpha0 + (alphaCode-1)*alpha1)/7);
} else {
if (alphaCode == 6) {
finalAlpha = 0;
} else if (alphaCode == 7) {
finalAlpha = 255;
} else {
finalAlpha = (uint8_t)(((6-alphaCode)*alpha0 + (alphaCode-1)*alpha1)/5);
}
}
}
colorCode = (code >> 2*(4*j+i)) & 0x03;
finalColor = 0;
switch (colorCode) {
case 0:
finalColor = PackRGBA(r0, g0, b0, finalAlpha);
break;
case 1:
finalColor = PackRGBA(r1, g1, b1, finalAlpha);
break;
case 2:
finalColor = PackRGBA((2*r0+r1)/3, (2*g0+g1)/3, (2*b0+b1)/3, finalAlpha);
break;
case 3:
finalColor = PackRGBA((r0+2*r1)/3, (g0+2*g1)/3, (b0+2*b1)/3, finalAlpha);
break;
}
if(finalAlpha==0) *simpleAlpha = 1;
else if(finalAlpha<0xff) *complexAlpha = 1;
image [i + x + (width* (y+j))] = finalColor;
}
}
}
/*
void DecompressBlockDXT3(): Decompresses one block of a DXT3 texture and stores the resulting pixels at the appropriate offset in 'image'.
uint32_t x: x-coordinate of the first pixel in the block.
uint32_t y: y-coordinate of the first pixel in the block.
uint32_t height: height of the texture being decompressed.
const uint8_t *blockStorage: pointer to the block to decompress.
uint32_t *image: pointer to image where the decompressed pixel data should be stored.
*/
void DecompressBlockDXT3(uint32_t x, uint32_t y, uint32_t width,
const uint8_t* blockStorage,
int transparent0, int* simpleAlpha, int *complexAlpha,
uint32_t* image)
{
int i;
uint8_t alphaValues [16] = { 0 };
for (i = 0; i < 4; ++i) {
const uint16_t* alphaData = (const uint16_t*) (blockStorage);
alphaValues [i*4 + 0] = (((*alphaData) >> 0) & 0xF ) * 17;
alphaValues [i*4 + 1] = (((*alphaData) >> 4) & 0xF ) * 17;
alphaValues [i*4 + 2] = (((*alphaData) >> 8) & 0xF ) * 17;
alphaValues [i*4 + 3] = (((*alphaData) >> 12) & 0xF) * 17;
blockStorage += 2;
}
DecompressBlockDXT1Internal (blockStorage,
image + x + (y * width), width, transparent0, simpleAlpha, complexAlpha, alphaValues);
}
// Texture DXT1 / DXT5 compression
// Using STB "on file" library
// go there https://github.com/nothings/stb
// for more details and other libs
#define STB_DXT_IMPLEMENTATION
#include "stb_dxt_104.h"

View File

@ -0,0 +1,19 @@
#ifndef _GL4ES_DECOMPRESS_H_
#define _GL4ES_DECOMPRESS_H_
void DecompressBlockDXT1(uint32_t x, uint32_t y, uint32_t width,
const uint8_t* blockStorage,
int transparent0, int* simpleAlpha, int *complexAlpha,
uint32_t* image);
void DecompressBlockDXT3(uint32_t x, uint32_t y, uint32_t width,
const uint8_t* blockStorage,
int transparent0, int* simpleAlpha, int *complexAlpha,
uint32_t* image);
void DecompressBlockDXT5(uint32_t x, uint32_t y, uint32_t width,
const uint8_t* blockStorage,
int transparent0, int* simpleAlpha, int *complexAlpha,
uint32_t* image);
#endif // _GL4ES_DECOMPRESS_H_

Binary file not shown.

View File

@ -69,13 +69,13 @@ static char g_szShadow2D[] =
"vec2 p2 = suv.xy+vec2(0.0,invSize);\n"
"vec2 p3 = suv.xy+vec2(invSize,0.0);\n"
"vec2 p4 = suv.xy+vec2(invSize);\n"
"float d = texture2D(u_depthTex,p1).r;\n"
"float d = texture(u_depthTex,p1).r;\n"
"float r = float(d>suv.z);\n"
"d = texture2D(u_depthTex,p2).r;\n"
"d = texture(u_depthTex,p2).r;\n"
"float r2 = float(d>suv.z);\n"
"d = texture2D(u_depthTex,p3).r;\n"
"d = texture(u_depthTex,p3).r;\n"
"float r3 = float(d>suv.z);\n"
"d = texture2D(u_depthTex,p4).r;\n"
"d = texture(u_depthTex,p4).r;\n"
"float r4 = float(d>suv.z);\n"
"p1*=size;\n"
"float a = p1.y-floor(p1.y);\n"
@ -955,7 +955,7 @@ void D3DToGL::PrintUsageAndIndexToString( uint32 dwToken, char* strUsageUsageInd
// if ( fSemanticFlags & SEMANTIC_OUTPUT )
// V_snprintf( strUsageUsageIndexName, nBufLen, dwUsageIndex != 0 ? "gl_BackColor" : "gl_FrontColor" );
// else
V_snprintf( strUsageUsageIndexName, nBufLen, dwUsageIndex != 0 ? "_gl_SecondaryColor" : "_gl_Color" );
V_snprintf( strUsageUsageIndexName, nBufLen, dwUsageIndex != 0 ? "_gl_FrontSecondaryColor" : "_gl_FrontColor" );
break;
case D3DDECLUSAGE_FOG:
TranslationError();
@ -1220,7 +1220,7 @@ void D3DToGL::PrintParameterToString ( uint32 dwToken, uint32 dwSourceOrDest, ch
}
else
{
V_snprintf( buff, sizeof( buff ), dwRegNum == 0 ? "_gl_Color" : "_gl_SecondaryColor" );
V_snprintf( buff, sizeof( buff ), dwRegNum == 0 ? "_gl_FrontColor" : "_gl_FrontSecondaryColor" );
}
strcat_s( pRegisterName, nBufLen, buff );
}
@ -1456,7 +1456,6 @@ void D3DToGL::PrintParameterToString ( uint32 dwToken, uint32 dwSourceOrDest, ch
m_dwConstIntUsageMask |= 0x00000001 << dwRegNum; // Keep track of the use of this integer constant
break;
case D3DSPR_COLOROUT:
// TODO(nillerusr): go fck urself
if( dwRegNum+1 > m_iFragDataCount )
m_iFragDataCount = dwRegNum+1;
@ -2546,7 +2545,7 @@ void D3DToGL::Handle_TEX( uint32 dwToken, bool bIsTexLDL )
V_snprintf( szExtra, sizeof( szExtra ), ".%c", GetSwizzleComponent( pSrc0Reg, 3 ) );
V_strncat( szLOD, szExtra, sizeof( szLOD ) );
PrintToBufWithIndents( *m_pBufALUCode, "%s = %s( %s, %s, %s );\n", pDestReg, bIsShadowSampler ? "shadow2DLod" : "texture2DLod", pSrc1Reg, sCoordVar.String(), szLOD );
PrintToBufWithIndents( *m_pBufALUCode, "%s = %s( %s, %s, %s );\n", pDestReg, bIsShadowSampler ? "shadow2DLod" : "textureLod", pSrc1Reg, sCoordVar.String(), szLOD );
}
else if ( bIsShadowSampler )
{
@ -2564,12 +2563,12 @@ void D3DToGL::Handle_TEX( uint32 dwToken, bool bIsTexLDL )
// We use the vec4 variant of texture2DProj() intentionally here, since it lines up well with Direct3D.
CUtlString s4DProjCoords = EnsureNumSwizzleComponents( pSrc0Reg, 4 ); // Ensure vec4 variant
PrintToBufWithIndents( *m_pBufALUCode, "%s = texture2DProj( %s, %s );\n", pDestReg, pSrc1Reg, s4DProjCoords.String() );
PrintToBufWithIndents( *m_pBufALUCode, "%s = textureProj( %s, %s );\n", pDestReg, pSrc1Reg, s4DProjCoords.String() );
}
else
{
CUtlString sCoordVar = EnsureNumSwizzleComponents( pSrc0Reg, bIsShadowSampler ? 3 : 2 );
PrintToBufWithIndents( *m_pBufALUCode, "%s = texture2D( %s, %s );\n", pDestReg, pSrc1Reg, sCoordVar.String() );
PrintToBufWithIndents( *m_pBufALUCode, "%s = texture( %s, %s );\n", pDestReg, pSrc1Reg, sCoordVar.String() );
}
}
else if ( nSamplerType == SAMPLER_TYPE_3D )
@ -2580,7 +2579,7 @@ void D3DToGL::Handle_TEX( uint32 dwToken, bool bIsTexLDL )
}
CUtlString sCoordVar = EnsureNumSwizzleComponents( pSrc0Reg, 3 );
PrintToBufWithIndents( *m_pBufALUCode, "%s = texture3D( %s, %s );\n", pDestReg, pSrc1Reg, sCoordVar.String() );
PrintToBufWithIndents( *m_pBufALUCode, "%s = texture( %s, %s );\n", pDestReg, pSrc1Reg, sCoordVar.String() );
}
else if ( nSamplerType == SAMPLER_TYPE_CUBE )
{
@ -2590,7 +2589,7 @@ void D3DToGL::Handle_TEX( uint32 dwToken, bool bIsTexLDL )
}
CUtlString sCoordVar = EnsureNumSwizzleComponents( pSrc0Reg, 3 );
PrintToBufWithIndents( *m_pBufALUCode, "%s = textureCube( %s, %s );\n", pDestReg, pSrc1Reg, sCoordVar.String() );
PrintToBufWithIndents( *m_pBufALUCode, "%s = texture( %s, %s );\n", pDestReg, pSrc1Reg, sCoordVar.String() );
}
else
{
@ -3049,7 +3048,7 @@ void D3DToGL::WriteGLSLInputVariableAssignments()
if ( dwUsage == D3DDECLUSAGE_COLOR )
{
PrintToBufWithIndents( *m_pBufAttribCode, "vec4 oTempT%d = %s;\n", i, dwUsageIndex ? "_gl_SecondaryColor" : "_gl_Color" );
PrintToBufWithIndents( *m_pBufAttribCode, "vec4 oTempT%d = %s;\n", i, dwUsageIndex ? "_gl_FrontSecondaryColor" : "_gl_FrontColor" );
}
else if ( dwUsage == D3DDECLUSAGE_TEXCOORD )
{
@ -3182,6 +3181,10 @@ int D3DToGL::TranslateShader( uint32* code, CUtlBuffer *pBufDisassembledCode, bo
m_bPutHexCodesAfterLines = (options & D3DToGL_PutHexCommentsAfterLines) != 0;
m_bGeneratingDebugText = (options & D3DToGL_GeneratingDebugText) != 0;
m_bGenerateSRGBWriteSuffix = (options & D3DToGL_OptionSRGBWriteSuffix) != 0;
// m_bGenerateSRGBWriteSuffix = true;
if( debugLabel && ( V_strstr( debugLabel ,"vertexlit_and_unlit_generic_bump_ps") ))
m_bGenerateSRGBWriteSuffix = true;
m_NumIndentTabs = 1; // start code indented one tab
m_nLoopDepth = 0;
@ -3675,6 +3678,7 @@ int D3DToGL::TranslateShader( uint32* code, CUtlBuffer *pBufDisassembledCode, bo
}
// Control bit for sRGB Write suffix
if ( m_bGenerateSRGBWriteSuffix )
{
// R500 Hookup
@ -3889,16 +3893,18 @@ int D3DToGL::TranslateShader( uint32* code, CUtlBuffer *pBufDisassembledCode, bo
{
StrcatToHeaderCode( g_szShadow2D );
StrcatToHeaderCode( g_szShadow2DProj );
}
else if( FindSubcode("shadow2D") )
StrcatToHeaderCode( g_szShadow2D );
if( FindSubcode("_gl_Color") )
StrcatToHeaderCode( "vec4 _gl_Color;\n" );
if( FindSubcode("_gl_FrontColor") && !m_bFrontColor )
StrcatToHeaderCode( "in vec4 _gl_FrontColor;\n" );
if( FindSubcode("_gl_SecondaryColor") )
StrcatToHeaderCode( "vec4 _gl_SecondaryColor;\n" );
if( FindSubcode("_gl_FrontSecondaryColor") && !m_bFrontSecondaryColor )
StrcatToHeaderCode( "in vec4 _gl_FrontSecondaryColor;\n" );
if( m_iFragDataCount && bVertexShader )
StrcatToHeaderCode( "\nuniform float alpha_ref;\n" );
StrcatToHeaderCode( "\nvoid main()\n{\n" );
if ( m_bUsedAtomicTempVar )
@ -3909,13 +3915,16 @@ int D3DToGL::TranslateShader( uint32* code, CUtlBuffer *pBufDisassembledCode, bo
// sRGB Write suffix
if ( m_bGenerateSRGBWriteSuffix )
{
StrcatToALUCode( "vec3 sRGBFragData;\n" );
StrcatToALUCode( "sRGBFragData.xyz = log( gl_FragData[0].xyz );\n" );
StrcatToALUCode( "sRGBFragData.xyz = sRGBFragData.xyz * vec3( 0.454545f, 0.454545f, 0.454545f );\n" );
StrcatToALUCode( "sRGBFragData.xyz = exp( sRGBFragData.xyz );\n" );
StrcatToALUCode( "gl_FragData[0].xyz = mix( gl_FragData[0].xyz, sRGBFragData, flSRGBWrite );\n" );
// StrcatToALUCode( "vec3 sRGBFragData;\n" );
// StrcatToALUCode( "sRGBFragData.xyz = log( gl_FragData[0].xyz );\n" );
// StrcatToALUCode( "sRGBFragData.xyz = sRGBFragData.xyz * vec3( 0.754545f, 0.754545f, 0.754545f );\n" );
// StrcatToALUCode( "sRGBFragData.xyz = exp( sRGBFragData.xyz );\n" );
StrcatToALUCode( "gl_FragData[0].xyz = pow(gl_FragData[0].xyz, vec3(1.0/2.2));\n" );
}
if( m_iFragDataCount && bVertexShader )
StrcatToALUCode( "if( gl_FragData[0].a < alpha_ref ) { discard; };\n" );
strcat_s( (char*)m_pBufALUCode->Base(), m_pBufALUCode->Size(), "}\n" );
// Put all of the strings together for final program ( pHeaderCode + pAttribCode + pParamCode + pALUCode )

View File

@ -1201,8 +1201,8 @@ static void FillD3DCaps9( const GLMRendererInfoFields &glmRendererInfo, D3DCAPS9
pCaps->MaxPixelShader30InstructionSlots = 0;
#if DX_TO_GL_ABSTRACTION
pCaps->FakeSRGBWrite = !glmRendererInfo.m_hasGammaWrites;
pCaps->CanDoSRGBReadFromRTs = !glmRendererInfo.m_cantAttachSRGB;
pCaps->FakeSRGBWrite = true;//!glmRendererInfo.m_hasGammaWrites;
pCaps->CanDoSRGBReadFromRTs = true;//!glmRendererInfo.m_cantAttachSRGB;
pCaps->MixedSizeTargets = glmRendererInfo.m_hasMixedAttachmentSizes;
#endif
}

View File

@ -389,6 +389,14 @@ COpenGLEntryPoints::COpenGLEntryPoints()
m_bHave_GL_EXT_framebuffer_blit = true;
m_bHave_GL_EXT_framebuffer_multisample = true;
m_bHave_GL_ARB_occlusion_query = true;
m_bHave_GL_ARB_map_buffer_range = true;
m_bHave_GL_ARB_vertex_buffer_object = true;
m_bHave_GL_ARB_vertex_array_bgra = true;
m_bHave_GL_EXT_vertex_array_bgra = true;
m_bHave_GL_ARB_debug_output = true;
m_bHave_GL_EXT_direct_state_access = false;
m_bHave_GL_EXT_framebuffer_multisample_blit_scaled = true;
m_bHave_GL_EXT_texture_sRGB_decode = true;
glBindFramebuffer.Force(glBindFramebuffer.Pointer());
glBindRenderbuffer.Force(glBindRenderbuffer.Pointer());
@ -456,12 +464,6 @@ COpenGLEntryPoints::COpenGLEntryPoints()
printf( "GL_EXT_buffer_storage: %s\n", m_bHave_GL_EXT_buffer_storage ? "AVAILABLE" : "NOT AVAILABLE" );
printf( "GL_EXT_texture_sRGB_decode: %s\n", m_bHave_GL_EXT_texture_sRGB_decode ? "AVAILABLE" : "NOT AVAILABLE" );
bool bGLCanDecodeS3TCTextures = m_bHave_GL_EXT_texture_compression_s3tc || ( m_bHave_GL_EXT_texture_compression_dxt1 && m_bHave_GL_ANGLE_texture_compression_dxt3 && m_bHave_GL_ANGLE_texture_compression_dxt5 );
if ( !bGLCanDecodeS3TCTextures )
{
Error( "This application requires either the GL_EXT_texture_compression_s3tc, or the GL_EXT_texture_compression_dxt1 + GL_ANGLE_texture_compression_dxt3 + GL_ANGLE_texture_compression_dxt5 OpenGL extensions. Please install S3TC texture support.\n" );
}
#ifdef OSX
if ( CommandLine()->FindParm( "-glmnosrgbdecode" ) )
{

View File

@ -116,7 +116,7 @@ char g_preload2DTexFragmentProgramText[] =
"void main() \n"
"{ \n"
"vec4 r0; \n"
"r0 = texture2D( sampler15, otex.xy ); \n"
"r0 = texture( sampler15, otex.xy ); \n"
"_gl_FragColor = r0; //discard; \n"
"} \n"
};
@ -137,7 +137,7 @@ char g_preload3DTexFragmentProgramText[] =
"void main() \n"
"{ \n"
"vec4 r0; \n"
"r0 = texture3D( sampler15, otex.xyz ); \n"
"r0 = texture( sampler15, otex.xyz ); \n"
"_gl_FragColor = vec4(0,0,0,0); //discard; \n"
"} \n"
};
@ -157,7 +157,7 @@ char g_preloadCubeTexFragmentProgramText[] =
"void main() \n"
"{ \n"
"vec4 r0; \n"
"r0 = textureCube( sampler15, otex.xyz ); \n"
"r0 = texture( sampler15, otex.xyz ); \n"
"_gl_FragColor = r0; //discard; \n"
"} \n"
};
@ -449,6 +449,20 @@ GLMgr::~GLMgr()
{
}
extern void CompressedTexImage2D(GLenum target, GLint level, GLenum internalformat,
GLsizei width, GLsizei height, GLint border,
GLsizei imageSize, const GLvoid *data);
extern void TexImage2D(GLenum target,
GLint level,
GLint internalformat,
GLsizei width,
GLsizei height,
GLint border,
GLenum format,
GLenum type,
const void * data);
//===============================================================================
GLMContext *GLMgr::NewContext( IDirect3DDevice9 *pDevice, GLMDisplayParams *params )
@ -953,14 +967,6 @@ void GLMContext::Blit2( CGLMTex *srcTex, GLMRect *srcRect, int srcFace, int srcM
bool srcGamma = srcTex && ((srcTex->m_layout->m_key.m_texFlags & kGLMTexSRGB) != 0);
bool dstGamma = dstTex && ((dstTex->m_layout->m_key.m_texFlags & kGLMTexSRGB) != 0);
bool doPushPop = (srcGamma != dstGamma) && gl_radar7954721_workaround_mixed.GetInt() && m_caps.m_nv; // workaround for cross gamma blit problems on NV
// ^^ need to re-check this on some post-10.6.3 build on NV to see if it was fixed
if (doPushPop)
{
gGL->glPushAttrib( 0 );
}
//----------------------------------------------------------------- figure out the plan
bool blitTwoStep = false; // think positive
@ -1053,7 +1059,7 @@ void GLMContext::Blit2( CGLMTex *srcTex, GLMRect *srcRect, int srcFace, int srcM
// set read and draw buffers appropriately
gGL->glReadBuffer( glAttachFromClass[formatClass] );
gGL->glDrawBuffer ( glAttachFromClass[formatClass] );
gGL->glDrawBuffers( 1, &glAttachFromClass[formatClass] );
// blit#1 - to resolve to scratch
// implicitly means no scaling, thus will be done with NEAREST sampling
@ -1119,7 +1125,9 @@ void GLMContext::Blit2( CGLMTex *srcTex, GLMRect *srcRect, int srcFace, int srcM
// backbuffer is special - FBO0 is left out (either scrubbed already, or not used)
BindFBOToCtx( NULL, GL_DRAW_FRAMEBUFFER );
gGL->glDrawBuffer ( GL_BACK );
GLenum bufs = GL_BACK;
gGL->glDrawBuffers( 1, &bufs );
yflip = true;
}
@ -1201,12 +1209,6 @@ void GLMContext::Blit2( CGLMTex *srcTex, GLMRect *srcRect, int srcFace, int srcM
// restore GLM drawing FBO
BindFBOToCtx( m_drawingFBO, GL_FRAMEBUFFER );
if (doPushPop)
{
gGL->glPopAttrib( );
}
//----------------------------------------------------------------- restore old scissor state
if (oldsciss.enable)
{
@ -1258,39 +1260,6 @@ void GLMContext::BlitTex( CGLMTex *srcTex, GLMRect *srcRect, int srcFace, int sr
GLMPRINTF(( "-D- dst tex layout is %s", dstTex->m_layout->m_layoutSummary ));
}
int pushed = 0;
uint pushmask = gl_radar7954721_workaround_maskval.GetInt();
//GL_COLOR_BUFFER_BIT
//| GL_CURRENT_BIT
//| GL_ENABLE_BIT
//| GL_FOG_BIT
//| GL_PIXEL_MODE_BIT
//| GL_SCISSOR_BIT
//| GL_STENCIL_BUFFER_BIT
//| GL_TEXTURE_BIT
//GL_VIEWPORT_BIT
//;
if (gl_radar7954721_workaround_all.GetInt()!=0)
{
gGL->glPushAttrib( pushmask );
pushed++;
}
else
{
bool srcGamma = (srcTex->m_layout->m_key.m_texFlags & kGLMTexSRGB) != 0;
bool dstGamma = (dstTex->m_layout->m_key.m_texFlags & kGLMTexSRGB) != 0;
if (srcGamma != dstGamma)
{
if (gl_radar7954721_workaround_mixed.GetInt())
{
gGL->glPushAttrib( pushmask );
pushed++;
}
}
}
if (useBlitFB)
{
// state we need to save
@ -1354,8 +1323,7 @@ void GLMContext::BlitTex( CGLMTex *srcTex, GLMRect *srcRect, int srcFace, int sr
attparams.m_zslice = 0;
m_blitReadFBO->TexAttach( &attparams, attachIndex, GL_READ_FRAMEBUFFER );
gGL->glReadBuffer( attachIndexGL );
gGL->glDrawBuffers( 1, &attachIndexGL );
// set the write fb and buffer, and attach write tex
BindFBOToCtx( m_blitDrawFBO, GL_DRAW_FRAMEBUFFER );
@ -1366,7 +1334,7 @@ void GLMContext::BlitTex( CGLMTex *srcTex, GLMRect *srcRect, int srcFace, int sr
attparams.m_zslice = 0;
m_blitDrawFBO->TexAttach( &attparams, attachIndex, GL_DRAW_FRAMEBUFFER );
gGL->glDrawBuffer( attachIndexGL );
gGL->glDrawBuffers( 1, &attachIndexGL );
// do the blit
gGL->glBlitFramebuffer( srcRect->xmin, srcRect->ymin, srcRect->xmax, srcRect->ymax,
@ -1425,7 +1393,7 @@ void GLMContext::BlitTex( CGLMTex *srcTex, GLMRect *srcRect, int srcFace, int sr
attparams.m_zslice = 0;
m_blitDrawFBO->TexAttach( &attparams, attachIndex, GL_DRAW_FRAMEBUFFER );
gGL->glDrawBuffer( attachIndexGL );
gGL->glDrawBuffers( 1, &attachIndexGL );
// attempt to just set states directly the way we want them, then use the latched states to repair them afterward.
NullProgram(); // out of program mode
@ -1456,22 +1424,21 @@ void GLMContext::BlitTex( CGLMTex *srcTex, GLMRect *srcRect, int srcFace, int sr
// immediate mode is fine
#if 0 // Does it needed?
const float topv = 1.0;
const float botv = 0.0;
const float verts[] = {-1.f, -1.f, 1.f, -1.f, 1.f, 1.f, -1.f, 1.f};
const float verts_tex[] = {0.f, botv, 1.f, botv, 1.f, topv, 0.f, topv};
gGL->glEnableClientState(GL_VERTEX_ARRAY);
gGL->glEnableClientState(GL_TEXTURE_COORD_ARRAY);
gGL->glVertexPointer(2, GL_FLOAT, 0, verts);
gGL->glTexCoordPointer(2, GL_FLOAT, 0, verts_tex);
glDrawArrays(GL_TRIANGLE_FAN, 0, 4);
gGL->glDrawArrays(GL_TRIANGLE_FAN, 0, 4);
gGL->glDisableClientState(GL_VERTEX_ARRAY);
gGL->glDisableClientState(GL_TEXTURE_COORD_ARRAY);
#endif
gGL->glBindTexture( GL_TEXTURE_2D, 0 );
@ -1509,12 +1476,6 @@ void GLMContext::BlitTex( CGLMTex *srcTex, GLMRect *srcRect, int srcFace, int sr
BindFBOToCtx( m_drawingFBO, GL_FRAMEBUFFER );
}
while(pushed)
{
gGL->glPopAttrib();
pushed--;
}
RestoreSavedColorMask();
}
@ -1632,7 +1593,7 @@ void GLMContext::ResolveTex( CGLMTex *tex, bool forceDirty )
gGL->glFramebufferTexture2D( GL_DRAW_FRAMEBUFFER, attachIndexGL, GL_TEXTURE_2D, tex->m_texName, 0 );
}
gGL->glDrawBuffer( attachIndexGL );
gGL->glDrawBuffers( 1, &attachIndexGL );
//-----------------------------------------------------------------------------------
@ -2369,10 +2330,6 @@ void GLMContext::Present( CGLMTex *tex )
tmMessage( TELEMETRY_LEVEL2, TMMF_ICON_EXCLAMATION, "VS Uniform Calls: %u, VS Uniforms: %u|VS Uniform Bone Calls: %u, VS Bone Uniforms: %u|PS Uniform Calls: %u, PS Uniforms: %u", m_nTotalVSUniformCalls, m_nTotalVSUniformsSet, m_nTotalVSUniformBoneCalls, m_nTotalVSUniformsBoneSet, m_nTotalPSUniformCalls, m_nTotalPSUniformsSet );
m_nTotalVSUniformCalls = 0, m_nTotalVSUniformBoneCalls = 0, m_nTotalVSUniformsSet = 0, m_nTotalVSUniformsBoneSet = 0, m_nTotalPSUniformCalls = 0, m_nTotalPSUniformsSet = 0;
#endif
#ifndef OSX
GLMGPUTimestampManagerTick();
#endif
}
//===============================================================================
@ -2845,7 +2802,6 @@ void GLMContext::BindTexToTMU( CGLMTex *pTex, int tmu )
if ( !pTex )
{
gGL->glBindTexture( GL_TEXTURE_1D, 0 );
gGL->glBindTexture( GL_TEXTURE_2D, 0 );
gGL->glBindTexture( GL_TEXTURE_3D, 0 );
gGL->glBindTexture( GL_TEXTURE_CUBE_MAP, 0 );
@ -2853,7 +2809,6 @@ void GLMContext::BindTexToTMU( CGLMTex *pTex, int tmu )
else
{
const GLenum texGLTarget = pTex->m_texGLTarget;
if ( texGLTarget != GL_TEXTURE_1D ) gGL->glBindTexture( GL_TEXTURE_1D, 0 );
if ( texGLTarget != GL_TEXTURE_2D ) gGL->glBindTexture( GL_TEXTURE_2D, 0 );
if ( texGLTarget != GL_TEXTURE_3D ) gGL->glBindTexture( GL_TEXTURE_3D, 0 );
if ( texGLTarget != GL_TEXTURE_CUBE_MAP ) gGL->glBindTexture( GL_TEXTURE_CUBE_MAP, 0 );
@ -3006,11 +2961,11 @@ void GLMContext::CleanupTex( GLenum texBind, GLMTexLayout* pLayout, GLuint tex )
const int dataSize = ( chunks * chunks ) * pLayout->m_format->m_bytesPerSquareChunk;
Assert( dataSize <= ( sizeof( uint32) * ARRAYSIZE( g_garbageTextureBits ) ) );
gGL->glCompressedTexImage2D( texBind, i, pLayout->m_format->m_glIntFormat, mipDim, mipDim, 0, dataSize, 0 );
CompressedTexImage2D( texBind, i, pLayout->m_format->m_glIntFormat, mipDim, mipDim, 0, dataSize, 0 );
}
else
{
gGL->glTexImage2D( texBind, i, pLayout->m_format->m_glIntFormat, mipDim, mipDim, 0, pLayout->m_format->m_glDataFormat, pLayout->m_format->m_glDataType, 0 );
TexImage2D( texBind, i, pLayout->m_format->m_glIntFormat, mipDim, mipDim, 0, pLayout->m_format->m_glDataFormat, pLayout->m_format->m_glDataType, 0 );
}
}
@ -4412,8 +4367,7 @@ void GLMContext::DebugHook( GLMDebugHookInfo *info )
break;
case 2:
short fakecolor[4] = { 0, 0, 0, 0 };
gGL->glColor4sv( fakecolor ); // break to OGLP
// What the fuck?
break;
}
// re-flush all GLM states so you can fiddle with them in the debugger. then run the batch again and spin..
@ -4766,36 +4720,11 @@ void GLMContext::DrawDebugText( float x, float y, float z, float drawCharWidth,
gGL->glEnable(GL_TEXTURE_2D);
if (0)
{
gGL->glEnableClientState(GL_VERTEX_ARRAY);
gGL->glEnableClientState(GL_TEXTURE_COORD_ARRAY);
gGL->glVertexPointer( 3, GL_FLOAT, sizeof( vtx[0] ), &vtx[0].x );
gGL->glClientActiveTexture(GL_TEXTURE0);
gGL->glTexCoordPointer( 2, GL_FLOAT, sizeof( vtx[0] ), &vtx[0].u );
}
else
{
SetVertexAttributes( &vertSetup );
}
gGL->glDrawArrays( GL_QUADS, 0, stringlen * 4 );
// disable all the input streams
if (0)
{
gGL->glDisableClientState(GL_VERTEX_ARRAY);
gGL->glDisableClientState(GL_TEXTURE_COORD_ARRAY);
}
else
{
SetVertexAttributes( NULL );
}
gGL->glDisable(GL_TEXTURE_2D);
@ -5288,7 +5217,7 @@ void GLMTester::StdSetup( void )
gGL->glScissor( 0,0, (GLsizei) m_drawWidth, (GLsizei) m_drawHeight );
CheckGLError("stdsetup scissor");
gGL->glOrtho( -1,1, -1,1, -1,1 );
//gGL->glOrtho( -1,1, -1,1, -1,1 );
CheckGLError("stdsetup ortho");
// activate debug font
@ -5331,7 +5260,7 @@ void GLMTester::Clear( void )
gGL->glViewport(0, 0, (GLsizei) m_drawWidth, (GLsizei) m_drawHeight );
gGL->glScissor( 0,0, (GLsizei) m_drawWidth, (GLsizei) m_drawHeight );
gGL->glOrtho( -1,1, -1,1, -1,1 );
//gGL->glOrtho( -1,1, -1,1, -1,1 );
CheckGLError("clearing viewport");
// clear to black
@ -6000,7 +5929,9 @@ void GLMTester::Test3( void )
void GLMTriggerDebuggerBreak()
{
// we call an obscure GL function which we know has been breakpointed in the OGLP function list
static signed short nada[] = { -1,-1,-1,-1 };
gGL->glColor4sv( nada );
// What the fuck is that?
// static signed short nada[] = { -1,-1,-1,-1 };
// gGL->glColor4sv( nada );
}
#endif

View File

@ -1,5 +1,7 @@
// BE VERY VERY CAREFUL what you do in these function. They are extremely hot, and calling the wrong GL API's in here will crush perf. (especially on NVidia threaded drivers).
#include "togles/linuxwin/glmgr.h"
FORCEINLINE uint32 bitmix32(uint32 a)
{
a -= (a<<6);
@ -433,7 +435,6 @@ FORCEINLINE void GLMContext::FlushDrawStates( uint nStartIndex, uint nEndIndex,
}
}
// see if VS uses i0, b0, b1, b2, b3.
// use a glUniform1i to set any one of these if active. skip all of them if no dirties reported.
// my kingdom for the UBO extension!
@ -478,6 +479,15 @@ FORCEINLINE void GLMContext::FlushDrawStates( uint nStartIndex, uint nEndIndex,
}
}
if( m_pBoundPair->m_locAlphaRef )
{
if( !m_AlphaTestEnable.GetData().enable )
gGL->glUniform1f( m_pBoundPair->m_locAlphaRef, 0.0 );
else
gGL->glUniform1f( m_pBoundPair->m_locAlphaRef, m_AlphaTestFunc.GetData().ref );
}
Assert( ( m_pDevice->m_streams[0].m_vtxBuffer && ( m_pDevice->m_streams[0].m_vtxBuffer->m_vtxBuffer == m_pDevice->m_vtx_buffers[0] ) ) || ( ( !m_pDevice->m_streams[0].m_vtxBuffer ) && ( m_pDevice->m_vtx_buffers[0] == m_pDevice->m_pDummy_vtx_buffer ) ) );
Assert( ( m_pDevice->m_streams[1].m_vtxBuffer && ( m_pDevice->m_streams[1].m_vtxBuffer->m_vtxBuffer == m_pDevice->m_vtx_buffers[1] ) ) || ( ( !m_pDevice->m_streams[1].m_vtxBuffer ) && ( m_pDevice->m_vtx_buffers[1] == m_pDevice->m_pDummy_vtx_buffer ) ) );
Assert( ( m_pDevice->m_streams[2].m_vtxBuffer && ( m_pDevice->m_streams[2].m_vtxBuffer->m_vtxBuffer == m_pDevice->m_vtx_buffers[2] ) ) || ( ( !m_pDevice->m_streams[2].m_vtxBuffer ) && ( m_pDevice->m_vtx_buffers[2] == m_pDevice->m_pDummy_vtx_buffer ) ) );

View File

@ -3116,624 +3116,13 @@ void GLMSetIndent( int indent )
char sg_pPIXName[128];
#ifndef OSX
ConVar gl_telemetry_gpu_pipeline_flushing( "gl_telemetry_gpu_pipeline_flushing", "0" );
class CGPUTimestampManager
{
CGPUTimestampManager( const CGPUTimestampManager & );
CGPUTimestampManager& operator= ( CGPUTimestampManager & );
public:
CGPUTimestampManager() :
m_bInitialized( false ),
m_nCurFrame( 0 ),
m_flGPUToCPUOffsetInS( 0 ),
m_flGPUToS( 0 ),
m_flRdtscToS( 0 ),
m_flSToRdtsc( 0 ),
m_nFreeQueryPoolSize( 0 ),
m_nOutstandingQueriesHead( 0 ),
m_nOutstandingQueriesTail( 0 ),
m_nNumOutstandingQueryZones( 0 ),
m_nQueryZoneStackSize( 0 ),
m_nNumFinishedZones( 0 ),
m_nTotalSpanWorkCount( 0 )
{
memset( m_FreeQueryPool, 0, sizeof( m_FreeQueryPool ) ) ;
memset( m_QueryZoneStack, 0, sizeof( m_QueryZoneStack ) );
memset( m_OutstandingQueryZones, 0, sizeof( m_OutstandingQueryZones ) );
memset( m_FinishedZones, 0, sizeof( m_FinishedZones ) );
}
~CGPUTimestampManager()
{
Deinit();
}
inline bool IsInitialized() const { return m_bInitialized; }
inline uint GetCurFrame() const { return m_nCurFrame; }
void Init()
{
Deinit();
memset( m_FreeQueryPool, 0, sizeof( m_FreeQueryPool ) ) ;
memset( m_QueryZoneStack, 0, sizeof( m_QueryZoneStack ) );
memset( m_OutstandingQueryZones, 0, sizeof( m_OutstandingQueryZones ) );
memset( m_FinishedZones, 0, sizeof( m_FinishedZones ) );
InitRdtsc();
m_nCurFrame = 0;
gGL->glGenQueries( cFreeQueryPoolSize, m_FreeQueryPool );
m_nFreeQueryPoolSize = cFreeQueryPoolSize;
m_nOutstandingQueriesHead = 0;
m_nOutstandingQueriesTail = 0;
m_nNumOutstandingQueryZones = 0;
m_nQueryZoneStackSize = 0;
m_nNumFinishedZones = 0;
m_bInitialized = true;
m_nTotalSpanWorkCount = 0;
Calibrate();
}
void Calibrate()
{
if ( !m_bInitialized )
return;
PipelineFlush();
m_flGPUToS = 1.0 / 1000000000.0;
//0.99997541250006794;
//0.99997530000006662;
// Correction factor to prevent excessive drift, only calibrated on my system, we need a better way of computing/recording this.
double flGPURatio = 0.99997425000007034000;
const uint NT = 1;
for ( uint nTrial = 0; nTrial < NT; nTrial++ )
{
const uint R = 16;
double flClockOffsetsInS[R];
for ( uint q = 0; q < R; q++)
{
uint64 nBestTotalCPUTimestamp = (uint64)-1;
uint64 nBestCPUTimestamp = 0;
GLuint64 nBestGPUTimestamp = 0;
for ( uint i = 0; i < 10; i++)
{
const uint64 nStartCPUTimestamp = Plat_Rdtsc();
gGL->glQueryCounter( m_FreeQueryPool[0], GL_TIMESTAMP);
PipelineFlush();
const uint64 nEndCPUTimestamp = Plat_Rdtsc();
GLint nAvailable;
do
{
gGL->glGetQueryObjectiv( m_FreeQueryPool[0], GL_QUERY_RESULT_AVAILABLE, &nAvailable );
} while ( !nAvailable );
GLuint64 nGPUTimestamp;
gGL->glGetQueryObjectui64v( m_FreeQueryPool[0], GL_QUERY_RESULT, &nGPUTimestamp );
const uint64 nTotalCPUTimestamp = nEndCPUTimestamp - nStartCPUTimestamp;
if ( nTotalCPUTimestamp < nBestTotalCPUTimestamp )
{
nBestTotalCPUTimestamp = nTotalCPUTimestamp;
nBestCPUTimestamp = nStartCPUTimestamp;
nBestGPUTimestamp = nGPUTimestamp;
}
}
double flCPUTimestampTimeInSeconds = nBestCPUTimestamp * m_flRdtscToS;
double flGPUTimestampTimeInSeconds = nBestGPUTimestamp * m_flGPUToS * flGPURatio;
flClockOffsetsInS[q] = flCPUTimestampTimeInSeconds - flGPUTimestampTimeInSeconds;
ThreadSleep(100);
DbgPrintf("%f %f %1.20f\n", flCPUTimestampTimeInSeconds, flGPUTimestampTimeInSeconds, flClockOffsetsInS[q] );
}
m_flGPUToCPUOffsetInS = 0.0f;
for ( uint i = 0; i < R; i++ )
m_flGPUToCPUOffsetInS += flClockOffsetsInS[i];
m_flGPUToCPUOffsetInS /= R;
if ( NT > 1 )
{
DbgPrintf("------- Ratio: %2.20f\n", flGPURatio );
double flDelta = flClockOffsetsInS[0] - flClockOffsetsInS[R - 1];
DbgPrintf("------- %1.20f\n", flDelta );
#if 1
if ( flDelta < 0.0000005f )
{
flGPURatio += .000000125f;
}
else if ( flDelta > 0.0000005f )
{
flGPURatio -= .000000125f;
}
#else
if ( flDelta < 0.0000005f )
{
flGPURatio += .0000000125f;
}
else if ( flDelta > 0.0000005f )
{
flGPURatio -= .0000000125f;
}
#endif
}
}
m_flGPUToS *= flGPURatio;
#if 0
// dump drift over time to debugger output
double flLatency = 0;
for ( ; ; )
{
// test
const uint64 nStartCPUTime = Plat_Rdtsc();
gGL->glQueryCounter( m_FreeQueryPool[0], GL_TIMESTAMP);
PipelineFlush();
GLint nAvailable;
do
{
gGL->glGetQueryObjectiv( m_FreeQueryPool[0], GL_QUERY_RESULT_AVAILABLE, &nAvailable );
} while ( !nAvailable );
GLuint64 nGPUTime;
gGL->glGetQueryObjectui64v( m_FreeQueryPool[0], GL_QUERY_RESULT, &nGPUTime );
double flStartGPUTime = ( ( nGPUTime * m_flGPUToS ) + m_flGPUToCPUOffsetInS );
flLatency = flStartGPUTime - nStartCPUTime * m_flRdtscToS;
DbgPrintf("%f\n", flLatency );
}
#endif
}
void Deinit()
{
if ( !m_bInitialized )
return;
if ( m_nFreeQueryPoolSize )
{
gGL->glDeleteQueries( m_nFreeQueryPoolSize, m_FreeQueryPool );
}
m_nFreeQueryPoolSize = 0;
for ( uint i = 0; i < m_nNumOutstandingQueryZones; i++ )
{
QueryZone_t &query = m_OutstandingQueryZones[ ( m_nOutstandingQueriesHead + i ) % cMaxQueryZones ];
if ( query.m_nBeginQuery )
{
gGL->glDeleteQueries( 1, &query.m_nBeginQuery );
}
if ( query.m_nEndQuery )
{
gGL->glDeleteQueries( 1, &query.m_nEndQuery );
}
}
m_nOutstandingQueriesHead = 0;
m_nOutstandingQueriesTail = 0;
m_nNumOutstandingQueryZones = 0;
for ( uint i = 0; i < m_nQueryZoneStackSize; i++ )
{
QueryZone_t &query = m_QueryZoneStack[i];
if ( query.m_nBeginQuery )
{
gGL->glDeleteQueries( 1, &query.m_nBeginQuery );
}
if ( query.m_nEndQuery )
{
gGL->glDeleteQueries( 1, &query.m_nEndQuery );
}
}
m_nQueryZoneStackSize = 0;
m_flGPUToCPUOffsetInS = 0;
m_flGPUToS = 0;
m_flRdtscToS = 0;
m_flSToRdtsc = 0;
m_bInitialized = false;
}
// pName is assumed to be a telemetry dynamic string!
void BeginZone( const char *pName )
{
if ( !m_bInitialized )
return;
if ( m_nQueryZoneStackSize >= cMaxQueryZoneStackSize )
{
Panic( "Increase cMaxQueryZoneStackSize!" );
}
QueryZone_t &zone = m_QueryZoneStack[m_nQueryZoneStackSize];
zone.m_pName = pName;
zone.m_nBeginQuery = AllocQueryHandle();
zone.m_nEndQuery = 0;
zone.m_nStackLevel = m_nQueryZoneStackSize;
zone.m_nTotalGPUWorkCount = g_nTotalDrawsOrClears;
#if GL_TELEMETRY_GPU_ZONES
zone.m_nTotalGPUWorkCount += g_TelemetryGPUStats.GetTotal();
#endif
gGL->glQueryCounter( m_QueryZoneStack[m_nQueryZoneStackSize].m_nBeginQuery, GL_TIMESTAMP );
m_nQueryZoneStackSize++;
}
void EndZone()
{
if ( !m_bInitialized )
return;
if ( ( !m_nQueryZoneStackSize ) || ( m_nNumOutstandingQueryZones == cMaxQueryZones ) )
{
Panic( "Query zone error!" );
}
m_nQueryZoneStackSize--;
uint nCurGPUWorkCount = g_nTotalDrawsOrClears;
#if GL_TELEMETRY_GPU_ZONES
nCurGPUWorkCount += g_TelemetryGPUStats.GetTotal();
#endif
uint nTotalDraws = nCurGPUWorkCount - m_QueryZoneStack[m_nQueryZoneStackSize].m_nTotalGPUWorkCount;
m_QueryZoneStack[m_nQueryZoneStackSize].m_nEndQuery = AllocQueryHandle();
gGL->glQueryCounter( m_QueryZoneStack[m_nQueryZoneStackSize].m_nEndQuery, GL_TIMESTAMP );
m_QueryZoneStack[m_nQueryZoneStackSize].m_nTotalGPUWorkCount = nTotalDraws;
m_OutstandingQueryZones[m_nOutstandingQueriesHead] = m_QueryZoneStack[m_nQueryZoneStackSize];
m_nOutstandingQueriesHead = ( m_nOutstandingQueriesHead + 1 ) % cMaxQueryZones;
m_nNumOutstandingQueryZones++;
COMPILE_TIME_ASSERT( ( int )cMaxQueryZones > ( int )cMaxQueryZoneStackSize );
if ( m_nNumOutstandingQueryZones >= ( cMaxQueryZones - cMaxQueryZoneStackSize ) )
{
tmMessage( TELEMETRY_LEVEL2, TMMF_ICON_NOTE | TMMF_SEVERITY_WARNING, "CGPUTimestampManager::EndZone: Too many outstanding query zones - forcing a pipeline flush! This is probably expensive." );
FlushOutstandingQueries( true );
}
if ( gl_telemetry_gpu_pipeline_flushing.GetBool() )
{
PipelineFlush();
}
}
void Tick()
{
m_nCurFrame++;
if ( !m_bInitialized )
return;
if ( m_nQueryZoneStackSize > 0 )
{
Panic( "Zone stack is not empty!" );
}
FlushOutstandingQueries( false );
tmMessage( TELEMETRY_LEVEL2, 0, "Total PIX timespan GPU work count: %u", m_nTotalSpanWorkCount );
m_nTotalSpanWorkCount = 0;
}
void FlushOutstandingQueries( bool bForce )
{
tmZone( TELEMETRY_LEVEL2, 0, "FlushOutstandingQueries: %u", m_nNumOutstandingQueryZones );
if ( bForce )
{
PipelineFlush();
}
while ( m_nNumOutstandingQueryZones )
{
QueryZone_t &zone = m_OutstandingQueryZones[m_nOutstandingQueriesTail];
GLint nEndAvailable = 0;
do
{
gGL->glGetQueryObjectiv( zone.m_nEndQuery, GL_QUERY_RESULT_AVAILABLE, &nEndAvailable );
} while ( ( bForce ) && ( nEndAvailable == 0 ) );
if ( !nEndAvailable )
{
if ( bForce )
{
Panic( "Query results not available after a full pipeline flush!" );
}
break;
}
GLuint64 nBeginGPUTime, nEndGPUTime;
gGL->glGetQueryObjectui64v( zone.m_nBeginQuery, GL_QUERY_RESULT, &nBeginGPUTime );
gGL->glGetQueryObjectui64v( zone.m_nEndQuery, GL_QUERY_RESULT, &nEndGPUTime );
ReleaseQueryHandle( zone.m_nBeginQuery );
zone.m_nBeginQuery = 0;
ReleaseQueryHandle( zone.m_nEndQuery );
zone.m_nEndQuery = 0;
if ( m_nNumFinishedZones >= cMaxQueryZones )
{
Panic( "Too many finished zones!" );
}
FinishedQueryZone_t &finishedZone = m_FinishedZones[m_nNumFinishedZones];
finishedZone.m_pName = zone.m_pName;
finishedZone.m_nBeginGPUTime = nBeginGPUTime;
finishedZone.m_nEndGPUTime = nEndGPUTime;
finishedZone.m_nStackLevel = zone.m_nStackLevel;
finishedZone.m_nTotalGPUWorkCount = zone.m_nTotalGPUWorkCount;
m_nNumFinishedZones++;
if ( !zone.m_nStackLevel )
{
std::sort( m_FinishedZones, m_FinishedZones + m_nNumFinishedZones );
FlushFinishedZones();
m_nNumFinishedZones = 0;
}
m_nOutstandingQueriesTail = ( m_nOutstandingQueriesTail + 1 ) % cMaxQueryZones;
m_nNumOutstandingQueryZones--;
}
}
private:
bool m_bInitialized;
uint m_nCurFrame;
double m_flGPUToCPUOffsetInS;
double m_flGPUToS;
double m_flRdtscToS;
double m_flSToRdtsc;
enum { cMaxQueryZones = 4096, cFreeQueryPoolSize = cMaxQueryZones * 2 };
GLuint m_FreeQueryPool[cFreeQueryPoolSize ];
uint m_nFreeQueryPoolSize;
GLuint AllocQueryHandle()
{
if ( !m_nFreeQueryPoolSize )
{
Panic( "Out of query handles!");
}
return m_FreeQueryPool[--m_nFreeQueryPoolSize];
}
void ReleaseQueryHandle( GLuint nHandle )
{
if ( m_nFreeQueryPoolSize >= cFreeQueryPoolSize )
{
Panic( "Query handle error!" );
}
m_FreeQueryPool[m_nFreeQueryPoolSize++] = nHandle;
}
struct QueryZone_t
{
const char *m_pName;
GLuint m_nBeginQuery;
GLuint m_nEndQuery;
uint m_nStackLevel;
uint m_nTotalGPUWorkCount;
};
QueryZone_t m_OutstandingQueryZones[cMaxQueryZones];
uint m_nOutstandingQueriesHead; // index of first outstanding query (oldest)
uint m_nOutstandingQueriesTail; // index where next query goes (newest)
uint m_nNumOutstandingQueryZones;
enum { cMaxQueryZoneStackSize = 256 };
QueryZone_t m_QueryZoneStack[cMaxQueryZoneStackSize];
uint m_nQueryZoneStackSize;
struct FinishedQueryZone_t
{
const char *m_pName;
GLuint64 m_nBeginGPUTime;
GLuint64 m_nEndGPUTime;
uint m_nStackLevel;
uint m_nTotalGPUWorkCount;
inline bool operator< ( const FinishedQueryZone_t &rhs ) const
{
if ( m_nBeginGPUTime == rhs.m_nBeginGPUTime)
return m_nStackLevel < rhs.m_nStackLevel;
return m_nBeginGPUTime < rhs.m_nBeginGPUTime;
}
};
FinishedQueryZone_t m_FinishedZones[cMaxQueryZones];
uint m_nNumFinishedZones;
uint m_nTotalSpanWorkCount;
void InitRdtsc()
{
m_flRdtscToS = 0.0f;
m_flSToRdtsc = 0.0f;
for ( uint i = 0; i < 10; i++ )
{
uint64 t0 = Plat_Rdtsc();
double d0 = Plat_FloatTime();
ThreadSleep( 250 );
uint64 t1 = Plat_Rdtsc();
double d1 = Plat_FloatTime();
double flRdtscToS = ( d1 - d0 ) / ( t1 - t0 );
double flSToRdtsc = ( t1 - t0 ) / ( d1 - d0 );
if ( flSToRdtsc > m_flSToRdtsc )
{
m_flRdtscToS = flRdtscToS;
m_flSToRdtsc = flSToRdtsc;
}
}
}
void PipelineFlush()
{
#ifdef HAVE_GL_ARB_SYNC
GLsync nSyncObj = gGL->glFenceSync( GL_SYNC_GPU_COMMANDS_COMPLETE, 0 );
if ( nSyncObj )
{
gGL->glClientWaitSync( nSyncObj, GL_SYNC_FLUSH_COMMANDS_BIT, 300000000000ULL );
gGL->glDeleteSync( nSyncObj );
}
#endif
}
inline void NewTimeSpan( uint64 nStartGPUTime, uint64 nEndGPUTime, const char *pName, uint nTotalDraws )
{
// apparently we must use level0 for timespans?
tmBeginTimeSpanAt( TELEMETRY_LEVEL0, 1, 0, nStartGPUTime, "%s [C:%u]", pName ? pName : "", nTotalDraws );
tmEndTimeSpanAt( TELEMETRY_LEVEL0, 1, 0, nEndGPUTime, "%s [C:%u]", pName ? pName : "", nTotalDraws );
}
void FlushFinishedZones()
{
for ( uint i = 0; i < m_nNumFinishedZones; i++ )
{
FinishedQueryZone_t &zone = m_FinishedZones[i];
if ( !zone.m_nTotalGPUWorkCount )
continue;
bool bEmit = false;
if ( i == ( m_nNumFinishedZones - 1 ) )
bEmit = true;
else
{
FinishedQueryZone_t &nextZone = m_FinishedZones[i + 1];
bEmit = zone.m_nEndGPUTime <= nextZone.m_nBeginGPUTime;
}
if ( bEmit )
{
uint64 nStartGPUTime = ( ( zone.m_nBeginGPUTime * m_flGPUToS ) + m_flGPUToCPUOffsetInS ) * m_flSToRdtsc;
uint64 nEndGPUTime = ( ( zone.m_nEndGPUTime * m_flGPUToS ) + m_flGPUToCPUOffsetInS ) * m_flSToRdtsc;
NewTimeSpan( nStartGPUTime, nEndGPUTime, zone.m_pName, zone.m_nTotalGPUWorkCount );
m_nTotalSpanWorkCount += zone.m_nTotalGPUWorkCount;
}
}
}
void Panic( const char *pMsg )
{
DXABSTRACT_BREAK_ON_ERROR();
Error( "%s", pMsg );
}
static void DbgPrintf( const char *pFmt, ... )
{
va_list vargs;
va_start( vargs, pFmt );
char buf[1024];
V_vsnprintf( buf, sizeof( buf ), pFmt, vargs );
#ifdef WIN32
OutputDebugStringA( buf );
#else
printf( "%s", buf );
#endif
va_end( vargs );
}
};
static CGPUTimestampManager g_GPUTimestampManager;
void GLMGPUTimestampManagerInit()
{
g_GPUTimestampManager.Init();
}
void GLMGPUTimestampManagerDeinit()
{
g_GPUTimestampManager.Deinit();
}
ConVar gl_telemetry_gpu( "gl_telemetry_gpu", "0" );
static bool g_bPrevTelemetryGPU;
void GLMGPUTimestampManagerTick()
{
if ( g_bPrevTelemetryGPU != gl_telemetry_gpu.GetBool() )
{
if ( !gl_telemetry_gpu.GetBool() )
g_GPUTimestampManager.Deinit();
else
{
#if !PIX_ENABLE || !GL_TELEMETRY_GPU_ZONES
ConMsg( "Must define PIX_ENABLE and GL_TELEMETRY_GPU_ZONES to use this feature" );
#else
g_GPUTimestampManager.Init();
#endif
}
g_bPrevTelemetryGPU = gl_telemetry_gpu.GetBool();
}
g_GPUTimestampManager.Tick();
}
#endif // !OSX
static uint g_nPIXEventIndex;
void GLMBeginPIXEvent( const char *str )
{
#ifndef OSX
char szName[1024];
V_snprintf( szName, sizeof( szName ), "[ID:%u FR:%u] %s", g_nPIXEventIndex, g_GPUTimestampManager.GetCurFrame(), str );
const char *p = tmDynamicString( TELEMETRY_LEVEL2, szName ); //p can be null if tm is getting shut down
tmEnter( TELEMETRY_LEVEL2, TMZF_NONE, "PIX %s", p ? p : "" );
g_nPIXEventIndex++;
g_GPUTimestampManager.BeginZone( p );
#endif // !OSX
V_strncpy( sg_pPIXName, str, 128 );
#if defined( OSX ) && defined( CGLPROFILER_ENABLE )
@ -3748,10 +3137,6 @@ void GLMBeginPIXEvent( const char *str )
void GLMEndPIXEvent( void )
{
#ifndef OSX
g_GPUTimestampManager.EndZone();
#endif
#if defined( OSX ) && defined( CGLPROFILER_ENABLE )
CGLSetOption( kCGLGOComment, (GLint)sg_pPIXName );
#endif

View File

@ -0,0 +1,624 @@
// stb_dxt.h - v1.04 - DXT1/DXT5 compressor - public domain
// original by fabian "ryg" giesen - ported to C by stb
// use '#define STB_DXT_IMPLEMENTATION' before including to create the implementation
//
// USAGE:
// call stb_compress_dxt_block() for every block (you must pad)
// source should be a 4x4 block of RGBA data in row-major order;
// A is ignored if you specify alpha=0; you can turn on dithering
// and "high quality" using mode.
//
// version history:
// v1.04 - (ryg) default to no rounding bias for lerped colors (as per S3TC/DX10 spec);
// single color match fix (allow for inexact color interpolation);
// optimal DXT5 index finder; "high quality" mode that runs multiple refinement steps.
// v1.03 - (stb) endianness support
// v1.02 - (stb) fix alpha encoding bug
// v1.01 - (stb) fix bug converting to RGB that messed up quality, thanks ryg & cbloom
// v1.00 - (stb) first release
#ifndef STB_INCLUDE_STB_DXT_H
#define STB_INCLUDE_STB_DXT_H
// compression mode (bitflags)
#define STB_DXT_NORMAL 0
#define STB_DXT_DITHER 1 // use dithering. dubious win. never use for normal maps and the like!
#define STB_DXT_HIGHQUAL 2 // high quality mode, does two refinement steps instead of 1. ~30-40% slower.
void stb_compress_dxt_block(unsigned char *dest, const unsigned char *src, int alpha, int mode);
#define STB_COMPRESS_DXT_BLOCK
#ifdef STB_DXT_IMPLEMENTATION
// configuration options for DXT encoder. set them in the project/makefile or just define
// them at the top.
// STB_DXT_USE_ROUNDING_BIAS
// use a rounding bias during color interpolation. this is closer to what "ideal"
// interpolation would do but doesn't match the S3TC/DX10 spec. old versions (pre-1.03)
// implicitly had this turned on.
//
// in case you're targeting a specific type of hardware (e.g. console programmers):
// NVidia and Intel GPUs (as of 2010) as well as DX9 ref use DXT decoders that are closer
// to STB_DXT_USE_ROUNDING_BIAS. AMD/ATI, S3 and DX10 ref are closer to rounding with no bias.
// you also see "(a*5 + b*3) / 8" on some old GPU designs.
// #define STB_DXT_USE_ROUNDING_BIAS
#include <stdlib.h>
#include <math.h>
#include <string.h> // memset
static unsigned char stb__Expand5[32];
static unsigned char stb__Expand6[64];
static unsigned char stb__OMatch5[256][2];
static unsigned char stb__OMatch6[256][2];
static unsigned char stb__QuantRBTab[256+16];
static unsigned char stb__QuantGTab[256+16];
static int stb__Mul8Bit(int a, int b)
{
int t = a*b + 128;
return (t + (t >> 8)) >> 8;
}
static void stb__From16Bit(unsigned char *out, unsigned short v)
{
int rv = (v & 0xf800) >> 11;
int gv = (v & 0x07e0) >> 5;
int bv = (v & 0x001f) >> 0;
out[0] = stb__Expand5[rv];
out[1] = stb__Expand6[gv];
out[2] = stb__Expand5[bv];
out[3] = 0;
}
static unsigned short stb__As16Bit(int r, int g, int b)
{
return (stb__Mul8Bit(r,31) << 11) + (stb__Mul8Bit(g,63) << 5) + stb__Mul8Bit(b,31);
}
// linear interpolation at 1/3 point between a and b, using desired rounding type
static int stb__Lerp13(int a, int b)
{
#ifdef STB_DXT_USE_ROUNDING_BIAS
// with rounding bias
return a + stb__Mul8Bit(b-a, 0x55);
#else
// without rounding bias
// replace "/ 3" by "* 0xaaab) >> 17" if your compiler sucks or you really need every ounce of speed.
return (2*a + b) / 3;
#endif
}
// lerp RGB color
static void stb__Lerp13RGB(unsigned char *out, unsigned char *p1, unsigned char *p2)
{
out[0] = stb__Lerp13(p1[0], p2[0]);
out[1] = stb__Lerp13(p1[1], p2[1]);
out[2] = stb__Lerp13(p1[2], p2[2]);
}
/****************************************************************************/
// compute table to reproduce constant colors as accurately as possible
static void stb__PrepareOptTable(unsigned char *Table,const unsigned char *expand,int size)
{
int i,mn,mx;
for (i=0;i<256;i++) {
int bestErr = 256;
for (mn=0;mn<size;mn++) {
for (mx=0;mx<size;mx++) {
int mine = expand[mn];
int maxe = expand[mx];
int err = abs(stb__Lerp13(maxe, mine) - i);
// DX10 spec says that interpolation must be within 3% of "correct" result,
// add this as error term. (normally we'd expect a random distribution of
// +-1.5% error, but nowhere in the spec does it say that the error has to be
// unbiased - better safe than sorry).
err += abs(maxe - mine) * 3 / 100;
if(err < bestErr)
{
Table[i*2+0] = mx;
Table[i*2+1] = mn;
bestErr = err;
}
}
}
}
}
static void stb__EvalColors(unsigned char *color,unsigned short c0,unsigned short c1)
{
stb__From16Bit(color+ 0, c0);
stb__From16Bit(color+ 4, c1);
stb__Lerp13RGB(color+ 8, color+0, color+4);
stb__Lerp13RGB(color+12, color+4, color+0);
}
// Block dithering function. Simply dithers a block to 565 RGB.
// (Floyd-Steinberg)
static void stb__DitherBlock(unsigned char *dest, unsigned char *block)
{
int err[8],*ep1 = err,*ep2 = err+4, *et;
int ch,y;
// process channels seperately
for (ch=0; ch<3; ++ch) {
unsigned char *bp = block+ch, *dp = dest+ch;
unsigned char *quant = (ch == 1) ? stb__QuantGTab+8 : stb__QuantRBTab+8;
memset(err, 0, sizeof(err));
for(y=0; y<4; ++y) {
dp[ 0] = quant[bp[ 0] + ((3*ep2[1] + 5*ep2[0]) >> 4)];
ep1[0] = bp[ 0] - dp[ 0];
dp[ 4] = quant[bp[ 4] + ((7*ep1[0] + 3*ep2[2] + 5*ep2[1] + ep2[0]) >> 4)];
ep1[1] = bp[ 4] - dp[ 4];
dp[ 8] = quant[bp[ 8] + ((7*ep1[1] + 3*ep2[3] + 5*ep2[2] + ep2[1]) >> 4)];
ep1[2] = bp[ 8] - dp[ 8];
dp[12] = quant[bp[12] + ((7*ep1[2] + 5*ep2[3] + ep2[2]) >> 4)];
ep1[3] = bp[12] - dp[12];
bp += 16;
dp += 16;
et = ep1, ep1 = ep2, ep2 = et; // swap
}
}
}
// The color matching function
static unsigned int stb__MatchColorsBlock(unsigned char *block, unsigned char *color,int dither)
{
unsigned int mask = 0;
int dirr = color[0*4+0] - color[1*4+0];
int dirg = color[0*4+1] - color[1*4+1];
int dirb = color[0*4+2] - color[1*4+2];
int dots[16];
int stops[4];
int i;
int c0Point, halfPoint, c3Point;
for(i=0;i<16;i++)
dots[i] = block[i*4+0]*dirr + block[i*4+1]*dirg + block[i*4+2]*dirb;
for(i=0;i<4;i++)
stops[i] = color[i*4+0]*dirr + color[i*4+1]*dirg + color[i*4+2]*dirb;
// think of the colors as arranged on a line; project point onto that line, then choose
// next color out of available ones. we compute the crossover points for "best color in top
// half"/"best in bottom half" and then the same inside that subinterval.
//
// relying on this 1d approximation isn't always optimal in terms of euclidean distance,
// but it's very close and a lot faster.
// http://cbloomrants.blogspot.com/2008/12/12-08-08-dxtc-summary.html
c0Point = (stops[1] + stops[3]) >> 1;
halfPoint = (stops[3] + stops[2]) >> 1;
c3Point = (stops[2] + stops[0]) >> 1;
if(!dither) {
// the version without dithering is straightforward
for (i=15;i>=0;i--) {
int dot = dots[i];
mask <<= 2;
if(dot < halfPoint)
mask |= (dot < c0Point) ? 1 : 3;
else
mask |= (dot < c3Point) ? 2 : 0;
}
} else {
// with floyd-steinberg dithering
int err[8],*ep1 = err,*ep2 = err+4;
int *dp = dots, y;
c0Point <<= 4;
halfPoint <<= 4;
c3Point <<= 4;
for(i=0;i<8;i++)
err[i] = 0;
for(y=0;y<4;y++)
{
int dot,lmask,step;
dot = (dp[0] << 4) + (3*ep2[1] + 5*ep2[0]);
if(dot < halfPoint)
step = (dot < c0Point) ? 1 : 3;
else
step = (dot < c3Point) ? 2 : 0;
ep1[0] = dp[0] - stops[step];
lmask = step;
dot = (dp[1] << 4) + (7*ep1[0] + 3*ep2[2] + 5*ep2[1] + ep2[0]);
if(dot < halfPoint)
step = (dot < c0Point) ? 1 : 3;
else
step = (dot < c3Point) ? 2 : 0;
ep1[1] = dp[1] - stops[step];
lmask |= step<<2;
dot = (dp[2] << 4) + (7*ep1[1] + 3*ep2[3] + 5*ep2[2] + ep2[1]);
if(dot < halfPoint)
step = (dot < c0Point) ? 1 : 3;
else
step = (dot < c3Point) ? 2 : 0;
ep1[2] = dp[2] - stops[step];
lmask |= step<<4;
dot = (dp[3] << 4) + (7*ep1[2] + 5*ep2[3] + ep2[2]);
if(dot < halfPoint)
step = (dot < c0Point) ? 1 : 3;
else
step = (dot < c3Point) ? 2 : 0;
ep1[3] = dp[3] - stops[step];
lmask |= step<<6;
dp += 4;
mask |= lmask << (y*8);
{ int *et = ep1; ep1 = ep2; ep2 = et; } // swap
}
}
return mask;
}
// The color optimization function. (Clever code, part 1)
static void stb__OptimizeColorsBlock(unsigned char *block, unsigned short *pmax16, unsigned short *pmin16)
{
int mind = 0x7fffffff,maxd = -0x7fffffff;
unsigned char *minp, *maxp;
double magn;
int v_r,v_g,v_b;
static const int nIterPower = 4;
float covf[6],vfr,vfg,vfb;
// determine color distribution
int cov[6];
int mu[3],min[3],max[3];
int ch,i,iter;
for(ch=0;ch<3;ch++)
{
const unsigned char *bp = ((const unsigned char *) block) + ch;
int muv,minv,maxv;
muv = minv = maxv = bp[0];
for(i=4;i<64;i+=4)
{
muv += bp[i];
if (bp[i] < minv) minv = bp[i];
else if (bp[i] > maxv) maxv = bp[i];
}
mu[ch] = (muv + 8) >> 4;
min[ch] = minv;
max[ch] = maxv;
}
// determine covariance matrix
for (i=0;i<6;i++)
cov[i] = 0;
for (i=0;i<16;i++)
{
int r = block[i*4+0] - mu[0];
int g = block[i*4+1] - mu[1];
int b = block[i*4+2] - mu[2];
cov[0] += r*r;
cov[1] += r*g;
cov[2] += r*b;
cov[3] += g*g;
cov[4] += g*b;
cov[5] += b*b;
}
// convert covariance matrix to float, find principal axis via power iter
for(i=0;i<6;i++)
covf[i] = cov[i] / 255.0f;
vfr = (float) (max[0] - min[0]);
vfg = (float) (max[1] - min[1]);
vfb = (float) (max[2] - min[2]);
for(iter=0;iter<nIterPower;iter++)
{
float r = vfr*covf[0] + vfg*covf[1] + vfb*covf[2];
float g = vfr*covf[1] + vfg*covf[3] + vfb*covf[4];
float b = vfr*covf[2] + vfg*covf[4] + vfb*covf[5];
vfr = r;
vfg = g;
vfb = b;
}
magn = fabs(vfr);
if (fabs(vfg) > magn) magn = fabs(vfg);
if (fabs(vfb) > magn) magn = fabs(vfb);
if(magn < 4.0f) { // too small, default to luminance
v_r = 299; // JPEG YCbCr luma coefs, scaled by 1000.
v_g = 587;
v_b = 114;
} else {
magn = 512.0 / magn;
v_r = (int) (vfr * magn);
v_g = (int) (vfg * magn);
v_b = (int) (vfb * magn);
}
// Pick colors at extreme points
for(i=0;i<16;i++)
{
int dot = block[i*4+0]*v_r + block[i*4+1]*v_g + block[i*4+2]*v_b;
if (dot < mind) {
mind = dot;
minp = block+i*4;
}
if (dot > maxd) {
maxd = dot;
maxp = block+i*4;
}
}
*pmax16 = stb__As16Bit(maxp[0],maxp[1],maxp[2]);
*pmin16 = stb__As16Bit(minp[0],minp[1],minp[2]);
}
static int stb__sclamp(float y, int p0, int p1)
{
int x = (int) y;
if (x < p0) return p0;
if (x > p1) return p1;
return x;
}
// The refinement function. (Clever code, part 2)
// Tries to optimize colors to suit block contents better.
// (By solving a least squares system via normal equations+Cramer's rule)
static int stb__RefineBlock(unsigned char *block, unsigned short *pmax16, unsigned short *pmin16, unsigned int mask)
{
static const int w1Tab[4] = { 3,0,2,1 };
static const int prods[4] = { 0x090000,0x000900,0x040102,0x010402 };
// ^some magic to save a lot of multiplies in the accumulating loop...
// (precomputed products of weights for least squares system, accumulated inside one 32-bit register)
float frb,fg;
unsigned short oldMin, oldMax, min16, max16;
int i, akku = 0, xx,xy,yy;
int At1_r,At1_g,At1_b;
int At2_r,At2_g,At2_b;
unsigned int cm = mask;
oldMin = *pmin16;
oldMax = *pmax16;
if((mask ^ (mask<<2)) < 4) // all pixels have the same index?
{
// yes, linear system would be singular; solve using optimal
// single-color match on average color
int r = 8, g = 8, b = 8;
for (i=0;i<16;++i) {
r += block[i*4+0];
g += block[i*4+1];
b += block[i*4+2];
}
r >>= 4; g >>= 4; b >>= 4;
max16 = (stb__OMatch5[r][0]<<11) | (stb__OMatch6[g][0]<<5) | stb__OMatch5[b][0];
min16 = (stb__OMatch5[r][1]<<11) | (stb__OMatch6[g][1]<<5) | stb__OMatch5[b][1];
} else {
At1_r = At1_g = At1_b = 0;
At2_r = At2_g = At2_b = 0;
for (i=0;i<16;++i,cm>>=2) {
int step = cm&3;
int w1 = w1Tab[step];
int r = block[i*4+0];
int g = block[i*4+1];
int b = block[i*4+2];
akku += prods[step];
At1_r += w1*r;
At1_g += w1*g;
At1_b += w1*b;
At2_r += r;
At2_g += g;
At2_b += b;
}
At2_r = 3*At2_r - At1_r;
At2_g = 3*At2_g - At1_g;
At2_b = 3*At2_b - At1_b;
// extract solutions and decide solvability
xx = akku >> 16;
yy = (akku >> 8) & 0xff;
xy = (akku >> 0) & 0xff;
frb = 3.0f * 31.0f / 255.0f / (xx*yy - xy*xy);
fg = frb * 63.0f / 31.0f;
// solve.
max16 = stb__sclamp((At1_r*yy - At2_r*xy)*frb+0.5f,0,31) << 11;
max16 |= stb__sclamp((At1_g*yy - At2_g*xy)*fg +0.5f,0,63) << 5;
max16 |= stb__sclamp((At1_b*yy - At2_b*xy)*frb+0.5f,0,31) << 0;
min16 = stb__sclamp((At2_r*xx - At1_r*xy)*frb+0.5f,0,31) << 11;
min16 |= stb__sclamp((At2_g*xx - At1_g*xy)*fg +0.5f,0,63) << 5;
min16 |= stb__sclamp((At2_b*xx - At1_b*xy)*frb+0.5f,0,31) << 0;
}
*pmin16 = min16;
*pmax16 = max16;
return oldMin != min16 || oldMax != max16;
}
// Color block compression
static void stb__CompressColorBlock(unsigned char *dest, unsigned char *block, int mode)
{
unsigned int mask;
int i;
int dither;
int refinecount;
unsigned short max16, min16;
unsigned char dblock[16*4],color[4*4];
dither = mode & STB_DXT_DITHER;
refinecount = (mode & STB_DXT_HIGHQUAL) ? 2 : 1;
// check if block is constant
for (i=1;i<16;i++)
if (((unsigned int *) block)[i] != ((unsigned int *) block)[0])
break;
if(i == 16) { // constant color
int r = block[0], g = block[1], b = block[2];
mask = 0xaaaaaaaa;
max16 = (stb__OMatch5[r][0]<<11) | (stb__OMatch6[g][0]<<5) | stb__OMatch5[b][0];
min16 = (stb__OMatch5[r][1]<<11) | (stb__OMatch6[g][1]<<5) | stb__OMatch5[b][1];
} else {
// first step: compute dithered version for PCA if desired
if(dither)
stb__DitherBlock(dblock,block);
// second step: pca+map along principal axis
stb__OptimizeColorsBlock(dither ? dblock : block,&max16,&min16);
if (max16 != min16) {
stb__EvalColors(color,max16,min16);
mask = stb__MatchColorsBlock(block,color,dither);
} else
mask = 0;
// third step: refine (multiple times if requested)
for (i=0;i<refinecount;i++) {
unsigned int lastmask = mask;
if (stb__RefineBlock(dither ? dblock : block,&max16,&min16,mask)) {
if (max16 != min16) {
stb__EvalColors(color,max16,min16);
mask = stb__MatchColorsBlock(block,color,dither);
} else {
mask = 0;
break;
}
}
if(mask == lastmask)
break;
}
}
// write the color block
if(max16 < min16)
{
unsigned short t = min16;
min16 = max16;
max16 = t;
mask ^= 0x55555555;
}
dest[0] = (unsigned char) (max16);
dest[1] = (unsigned char) (max16 >> 8);
dest[2] = (unsigned char) (min16);
dest[3] = (unsigned char) (min16 >> 8);
dest[4] = (unsigned char) (mask);
dest[5] = (unsigned char) (mask >> 8);
dest[6] = (unsigned char) (mask >> 16);
dest[7] = (unsigned char) (mask >> 24);
}
// Alpha block compression (this is easy for a change)
static void stb__CompressAlphaBlock(unsigned char *dest,unsigned char *src,int mode)
{
int i,dist,bias,dist4,dist2,bits,mask;
// find min/max color
int mn,mx;
mn = mx = src[3];
for (i=1;i<16;i++)
{
if (src[i*4+3] < mn) mn = src[i*4+3];
else if (src[i*4+3] > mx) mx = src[i*4+3];
}
// encode them
((unsigned char *)dest)[0] = mx;
((unsigned char *)dest)[1] = mn;
dest += 2;
// determine bias and emit color indices
// given the choice of mx/mn, these indices are optimal:
// http://fgiesen.wordpress.com/2009/12/15/dxt5-alpha-block-index-determination/
dist = mx-mn;
dist4 = dist*4;
dist2 = dist*2;
bias = (dist < 8) ? (dist - 1) : (dist/2 + 2);
bias -= mn * 7;
bits = 0,mask=0;
for (i=0;i<16;i++) {
int a = src[i*4+3]*7 + bias;
int ind,t;
// select index. this is a "linear scale" lerp factor between 0 (val=min) and 7 (val=max).
t = (a >= dist4) ? -1 : 0; ind = t & 4; a -= dist4 & t;
t = (a >= dist2) ? -1 : 0; ind += t & 2; a -= dist2 & t;
ind += (a >= dist);
// turn linear scale into DXT index (0/1 are extremal pts)
ind = -ind & 7;
ind ^= (2 > ind);
// write index
mask |= ind << bits;
if((bits += 3) >= 8) {
*dest++ = mask;
mask >>= 8;
bits -= 8;
}
}
}
static void stb__InitDXT()
{
int i;
for(i=0;i<32;i++)
stb__Expand5[i] = (i<<3)|(i>>2);
for(i=0;i<64;i++)
stb__Expand6[i] = (i<<2)|(i>>4);
for(i=0;i<256+16;i++)
{
int v = i-8 < 0 ? 0 : i-8 > 255 ? 255 : i-8;
stb__QuantRBTab[i] = stb__Expand5[stb__Mul8Bit(v,31)];
stb__QuantGTab[i] = stb__Expand6[stb__Mul8Bit(v,63)];
}
stb__PrepareOptTable(&stb__OMatch5[0][0],stb__Expand5,32);
stb__PrepareOptTable(&stb__OMatch6[0][0],stb__Expand6,64);
}
void stb_compress_dxt_block(unsigned char *dest, const unsigned char *src, int alpha, int mode)
{
static int init=1;
if (init) {
stb__InitDXT();
init=0;
}
if (alpha) {
stb__CompressAlphaBlock(dest,(unsigned char*) src,mode);
dest += 8;
}
stb__CompressColorBlock(dest,(unsigned char*) src,mode);
}
#endif // STB_DXT_IMPLEMENTATION
#endif // STB_INCLUDE_STB_DXT_H