Fix shaders compilation, replace some opengl functions with opengles analogs

3 years ago · 24b2f5892c
16 changed files with 3461 additions and 858 deletions
--- a/public/togles/linuxwin/cglmprogram.h
+++ b/public/togles/linuxwin/cglmprogram.h
@ -263,7 +263,8 @@ public:
				@@ -263,7 +263,8 @@ public:
 	GLint					m_locVertexParams;		// "vc" per dx9asmtogl2 convention
 	GLint					m_locVertexBoneParams;	// "vcbones"
 	GLint					m_locVertexInteger0;	// "i0"
-			
+	GLint					m_locAlphaRef; // "alpha_ref"		
+	
 	enum { cMaxVertexShaderBoolUniforms = 4, cMaxFragmentShaderBoolUniforms = 1 };

 	GLint					m_locVertexBool[cMaxVertexShaderBoolUniforms];		// "b0", etc.
--- a/public/togles/linuxwin/glfuncs.h
+++ b/public/togles/linuxwin/glfuncs.h
@ -30,18 +30,16 @@ GL_FUNC_VOID(OpenGL,true,glAlphaFunc,(GLenum a,GLclampf b),(a,b))
				@@ -30,18 +30,16 @@ GL_FUNC_VOID(OpenGL,true,glAlphaFunc,(GLenum a,GLclampf b),(a,b))
 GL_FUNC_VOID(OpenGL,true,glAttachShader,(GLuint a, GLuint b),(a,b))
 GL_FUNC_VOID(OpenGL,true,glBindAttribLocation,(GLuint a,GLuint b,const GLchar *c),(a,b,c))
 GL_FUNC_VOID(OpenGL,true,glBindBuffer,(GLenum a,GLuint b),(a,b))
-GL_FUNC_VOID(OpenGL,true,glBindProgram,(GLenum a,GLuint b),(a,b))
 GL_FUNC_VOID(OpenGL,true,glBindTexture,(GLenum a,GLuint b),(a,b))
 GL_FUNC_VOID(OpenGL,true,glBlendColor,(GLclampf a,GLclampf b,GLclampf c,GLclampf d),(a,b,c,d))
 GL_FUNC_VOID(OpenGL,true,glBlendEquation,(GLenum a),(a))
 GL_FUNC_VOID(OpenGL,true,glBlendFunc,(GLenum a,GLenum b),(a,b))
 GL_FUNC_VOID(OpenGL,true,glBufferData,(GLenum a, GLsizeiptr b, const GLvoid *c,GLenum d),(a,b,c,d))
 GL_FUNC_VOID(OpenGL,true,glClear,(GLbitfield a),(a))
+GL_FUNC_VOID(OpenGL,true,glClearDepthf,(GLfloat a),(a))
 GL_FUNC_VOID(OpenGL,true,glClearColor,(GLclampf a,GLclampf b,GLclampf c,GLclampf d),(a,b,c,d))
-GL_FUNC_VOID(OpenGL,true,glClearDepth,(GLclampd a),(a))
 GL_FUNC_VOID(OpenGL,true,glReadPixels, (GLint a, GLint b, GLsizei c, GLsizei d, GLenum e, GLenum f, void * g), (a,b,c,d,e,f,g))
 GL_FUNC_VOID(OpenGL,true,glClearStencil,(GLint a),(a))
-GL_FUNC_VOID(OpenGL,true,glClipPlane,(GLenum a,const GLdouble *b),(a,b))
 GL_FUNC_VOID(OpenGL,true,glColorMask,(GLboolean a,GLboolean b,GLboolean c,GLboolean d),(a,b,c,d))
 GL_FUNC_VOID(OpenGL,true,glCompileShader,(GLuint a),(a))
 GL_FUNC_VOID(OpenGL,true,glGetShaderiv,(GLuint a, GLenum b, GLint *c),(a,b,c))
@ -52,20 +50,17 @@ GL_FUNC_VOID(OpenGL,true,glCompressedTexImage3D,(GLenum a,GLint b,GLenum c,GLsiz
				@@ -52,20 +50,17 @@ GL_FUNC_VOID(OpenGL,true,glCompressedTexImage3D,(GLenum a,GLint b,GLenum c,GLsiz
 GL_FUNC(OpenGL,true,GLuint,glCreateProgram,(void),())
 GL_FUNC(OpenGL,true,GLuint,glCreateShader,(GLenum a),(a))
 GL_FUNC_VOID(OpenGL,true,glDeleteBuffers,(GLsizei a,const GLuint *b),(a,b))
-GL_FUNC_VOID(OpenGL,true,glDeleteObject,(GLuint a),(a))
-GL_FUNC_VOID(OpenGL,true,glDeletePrograms,(GLsizei a,const GLuint *b),(a,b))
+GL_FUNC_VOID(OpenGL,true,glDeleteProgram,(GLuint a),(a))
 GL_FUNC_VOID(OpenGL,true,glDeleteShader,(GLuint a),(a))
 GL_FUNC_VOID(OpenGL,true,glDeleteTextures,(GLsizei a,const GLuint *b),(a,b))
 GL_FUNC_VOID(OpenGL,true,glDepthFunc,(GLenum a),(a))
 GL_FUNC_VOID(OpenGL,true,glDepthMask,(GLboolean a),(a))
 GL_FUNC_VOID(OpenGL,true,glDepthRangef,(GLfloat a,GLfloat b),(a,b))
-GL_FUNC_VOID(OpenGL,true,glDepthRange,(GLclampd a,GLclampd b),(a,b))
-GL_FUNC_VOID(OpenGL,true,glDetachObject,(GLuint a,GLuint b),(a,b))
 GL_FUNC_VOID(OpenGL,true,glDisable,(GLenum a),(a))
 GL_FUNC_VOID(OpenGL,true,glDisableVertexAttribArray,(GLuint a),(a))
 GL_FUNC_VOID(OpenGL,true,glDrawArrays,(GLenum a,GLint b,GLsizei c),(a,b,c))
-GL_FUNC_VOID(OpenGL,true,glDrawBuffer,(GLenum a),(a))
 GL_FUNC_VOID(OpenGL,true,glDrawBuffers,(GLsizei a,const GLenum *b),(a,b))
+GL_FUNC_VOID(OpenGL,true,glDetachShader,(GLuint a,GLuint b),(a,b))
 GL_FUNC_VOID(OpenGL,true,glDrawRangeElements,(GLenum a,GLuint b,GLuint c,GLsizei d,GLenum e,const GLvoid *f),(a,b,c,d,e,f))
 #ifndef OSX // 10.6/GL 2.1 compatability
 GL_FUNC_VOID(OpenGL,true,glDrawRangeElementsBaseVertex,(GLenum a,GLuint b,GLuint c,GLsizei d,GLenum e,const GLvoid *f, GLenum g),(a,b,c,d,e,f,g))
@ -76,35 +71,27 @@ GL_FUNC_VOID(OpenGL,true,glFinish,(void),())
				@@ -76,35 +71,27 @@ GL_FUNC_VOID(OpenGL,true,glFinish,(void),())
 GL_FUNC_VOID(OpenGL,true,glFlush,(void),())
 GL_FUNC_VOID(OpenGL,true,glFrontFace,(GLenum a),(a))
 GL_FUNC_VOID(OpenGL,true,glGenBuffers,(GLsizei a,GLuint *b),(a,b))
-GL_FUNC_VOID(OpenGL,true,glGenPrograms,(GLsizei a,GLuint *b),(a,b))
 GL_FUNC_VOID(OpenGL,true,glGenTextures,(GLsizei a,GLuint *b),(a,b))
 GL_FUNC_VOID(OpenGL,true,glGetBooleanv,(GLenum a,GLboolean *b),(a,b))
-GL_FUNC_VOID(OpenGL,true,glGetCompressedTexImage,(GLenum a,GLint b,GLvoid *c),(a,b,c))
-GL_FUNC_VOID(OpenGL,true,glGetDoublev,(GLenum a,GLdouble *b),(a,b))
 GL_FUNC_VOID(OpenGL,true,glGetFloatv,(GLenum a,GLfloat *b),(a,b))
-GL_FUNC_VOID(OpenGL,true,glGetInfoLog,(GLuint a,GLsizei b,GLsizei *c,GLchar *d),(a,b,c,d))
+//GL_FUNC_VOID(OpenGL,true,glGetInfoLog,(GLuint a,GLsizei b,GLsizei *c,GLchar *d),(a,b,c,d))
 GL_FUNC_VOID(OpenGL,true,glGetIntegerv,(GLenum a,GLint *b),(a,b))
-GL_FUNC_VOID(OpenGL,true,glGetObjectParameteriv,(GLuint a,GLenum b,GLint *c),(a,b,c))
 GL_FUNC_VOID(OpenGL,true,glGetProgramiv,(GLenum a,GLenum b,GLint *c),(a,b,c))
 GL_FUNC(OpenGL,true,const GLubyte *,glGetString,(GLenum a),(a))
-GL_FUNC_VOID(OpenGL,true,glGetTexImage,(GLenum a,GLint b,GLenum c,GLenum d,GLvoid *e),(a,b,c,d,e))
 GL_FUNC(OpenGL,true,GLint,glGetUniformLocation,(GLuint a,const GLchar *b),(a,b))
 GL_FUNC(OpenGL,true,GLboolean,glIsEnabled,(GLenum a),(a))
 GL_FUNC(OpenGL,true,GLboolean,glIsTexture,(GLuint a),(a))
 GL_FUNC_VOID(OpenGL,true,glLinkProgram,(GLuint a),(a))
-GL_FUNC_VOID(OpenGL,true,glOrtho,(GLdouble a,GLdouble b,GLdouble c,GLdouble d,GLdouble e,GLdouble f),(a,b,c,d,e,f))
+//GL_FUNC_VOID(OpenGL,true,glOrtho,(GLdouble a,GLdouble b,GLdouble c,GLdouble d,GLdouble e,GLdouble f),(a,b,c,d,e,f))
 GL_FUNC_VOID(OpenGL,true,glPixelStorei,(GLenum a,GLint b),(a,b))
-GL_FUNC_VOID(OpenGL,true,glPolygonMode,(GLenum a,GLenum b),(a,b))
+//GL_FUNC_VOID(OpenGL,true,glPolygonMode,(GLenum a,GLenum b),(a,b))
 GL_FUNC_VOID(OpenGL,true,glPolygonOffset,(GLfloat a,GLfloat b),(a,b))
-GL_FUNC_VOID(OpenGL,true,glPopAttrib,(void),())
-GL_FUNC_VOID(OpenGL,true,glPushAttrib,(GLbitfield a),(a))
 GL_FUNC_VOID(OpenGL,true,glReadBuffer,(GLenum a),(a))
 GL_FUNC_VOID(OpenGL,true,glScissor,(GLint a,GLint b,GLsizei c,GLsizei d),(a,b,c,d))
 GL_FUNC_VOID(OpenGL,true,glShaderSource,(GLuint a,GLsizei b,const GLchar **c,const GLint *d),(a,b,c,d))
 GL_FUNC_VOID(OpenGL,true,glStencilFunc,(GLenum a,GLint b,GLuint c),(a,b,c))
 GL_FUNC_VOID(OpenGL,true,glStencilMask,(GLuint a),(a))
 GL_FUNC_VOID(OpenGL,true,glStencilOp,(GLenum a,GLenum b,GLenum c),(a,b,c))
-GL_FUNC_VOID(OpenGL,true,glTexCoord2f,(GLfloat a,GLfloat b),(a,b))
 GL_FUNC_VOID(OpenGL,true,glTexImage2D,(GLenum a,GLint b,GLint c,GLsizei d,GLsizei e,GLint f,GLenum g,GLenum h,const GLvoid *i),(a,b,c,d,e,f,g,h,i))
 GL_FUNC_VOID(OpenGL,true,glTexImage3D,(GLenum a,GLint b,GLint c,GLsizei d,GLsizei e,GLsizei f,GLint g,GLenum h,GLenum i,const GLvoid *j),(a,b,c,d,e,f,g,h,i,j))
 GL_FUNC_VOID(OpenGL,true,glTexParameterfv,(GLenum a,GLenum b,const GLfloat *c),(a,b,c))
@ -115,16 +102,9 @@ GL_FUNC_VOID(OpenGL,true,glUniform1i,(GLint a,GLint b),(a,b))
				@@ -115,16 +102,9 @@ GL_FUNC_VOID(OpenGL,true,glUniform1i,(GLint a,GLint b),(a,b))
 GL_FUNC_VOID(OpenGL,true,glUniform4fv,(GLint a,GLsizei b,const GLfloat *c),(a,b,c))
 GL_FUNC(OpenGL,true,GLboolean,glUnmapBuffer,(GLenum a),(a))
 GL_FUNC_VOID(OpenGL,true,glUseProgram,(GLuint a),(a))
-GL_FUNC_VOID(OpenGL,true,glVertex3f,(GLfloat a,GLfloat b,GLfloat c),(a,b,c))
 GL_FUNC_VOID(OpenGL,true,glVertexAttribPointer,(GLuint a,GLint b,GLenum c,GLboolean d,GLsizei e,const GLvoid *f),(a,b,c,d,e,f))
 GL_FUNC_VOID(OpenGL,true,glViewport,(GLint a,GLint b,GLsizei c,GLsizei d),(a,b,c,d))
-GL_FUNC_VOID(OpenGL,true,glEnableClientState,(GLenum a),(a))
-GL_FUNC_VOID(OpenGL,true,glDisableClientState,(GLenum a),(a))
 GL_FUNC_VOID(OpenGL,true,glClientActiveTexture,(GLenum a),(a))
-GL_FUNC_VOID(OpenGL,true,glVertexPointer,(GLint a,GLenum b,GLsizei c,const GLvoid *d),(a,b,c,d))
-GL_FUNC_VOID(OpenGL,true,glTexCoordPointer,(GLint a,GLenum b,GLsizei c,const GLvoid *d),(a,b,c,d))
-GL_FUNC_VOID(OpenGL,true,glProgramEnvParameters4fvEXT,(GLenum a,GLuint b,GLsizei c,const GLfloat *d),(a,b,c,d))
-GL_FUNC_VOID(OpenGL,true,glColor4sv,(const GLshort  *a),(a))
 GL_FUNC_VOID(OpenGL,true,glStencilOpSeparate,(GLenum a,GLenum b,GLenum c,GLenum d),(a,b,c,d))
 GL_FUNC_VOID(OpenGL,true,glStencilFuncSeparate,(GLenum a,GLenum b,GLint c,GLuint d),(a,b,c,d))
 GL_FUNC_VOID(OpenGL,true,glGetTexLevelParameteriv,(GLenum a,GLint b,GLenum c,GLint *d),(a,b,c,d))
@ -164,11 +144,6 @@ GL_FUNC_VOID(GL_ARB_sync,false,glWaitSync,(GLsync a, GLbitfield b, GLuint64 c),(
				@@ -164,11 +144,6 @@ GL_FUNC_VOID(GL_ARB_sync,false,glWaitSync,(GLsync a, GLbitfield b, GLuint64 c),(
 GL_FUNC_VOID(GL_ARB_sync,false,glDeleteSync,(GLsync a),(a))
 GL_FUNC(GL_ARB_sync,false,GLsync,glFenceSync,(GLenum a, GLbitfield b),(a,b))
 #endif
-GL_EXT(GL_EXT_draw_buffers2,-1,-1)
-GL_FUNC_VOID(GL_EXT_draw_buffers2,true,glColorMaskIndexedEXT,(GLuint a,GLboolean b,GLboolean c,GLboolean d,GLboolean e),(a,b,c,d,e))
-GL_FUNC_VOID(GL_EXT_draw_buffers2,true,glEnableIndexedEXT,(GLenum a,GLuint b),(a,b))
-GL_FUNC_VOID(GL_EXT_draw_buffers2,true,glDisableIndexedEXT,(GLenum a,GLuint b),(a,b))
-GL_FUNC_VOID(GL_EXT_draw_buffers2,true,glGetBooleanIndexedvEXT,(GLenum a,GLuint b,GLboolean  *c),(a,b,c))
 GL_EXT(GL_EXT_bindable_uniform,-1,-1)
 GL_FUNC_VOID(GL_EXT_bindable_uniform,false,glUniformBufferEXT,(GLuint a,GLint b,GLuint c),(a,b,c))
 GL_FUNC(GL_EXT_bindable_uniform,false,int,glGetUniformBufferSizeEXT,(GLenum a, GLenum b),(a,b))
@ -212,7 +187,7 @@ GL_FUNC_VOID(OpenGL,false,glDeleteRenderbuffers,(GLsizei a,const GLuint *b),(a,b
				@@ -212,7 +187,7 @@ GL_FUNC_VOID(OpenGL,false,glDeleteRenderbuffers,(GLsizei a,const GLuint *b),(a,b
 GL_FUNC_VOID(OpenGL,false,glFramebufferRenderbuffer,(GLenum a,GLenum b,GLenum c,GLuint d),(a,b,c,d))
 GL_FUNC_VOID(OpenGL,false,glFramebufferTexture2D,(GLenum a,GLenum b,GLenum c,GLuint d,GLint e),(a,b,c,d,e))
 GL_FUNC_VOID(OpenGL,false,glFramebufferTexture3D,(GLenum a,GLenum b,GLenum c,GLuint d,GLint e,GLint f),(a,b,c,d,e,f))
-GL_FUNC_VOID(OpenGL,false,glGenFramebuffers,(GLsizei a,GLuint *b),(a,b))
+GL_FUNC_VOID(OpenGL,true,glGenFramebuffers,(GLsizei a,GLuint *b),(a,b))
 GL_FUNC_VOID(OpenGL,false,glGenRenderbuffers,(GLsizei a,GLuint *b),(a,b))
 GL_FUNC_VOID(OpenGL,false,glDeleteFramebuffers,(GLsizei a,const GLuint *b),(a,b))
 GL_FUNC_VOID(OpenGL,false,glBlitFramebuffer,(GLint a,GLint b,GLint c,GLint d,GLint e,GLint f,GLint g,GLint h,GLbitfield i,GLenum j),(a,b,c,d,e,f,g,h,i,j))
@ -248,9 +223,6 @@ GL_FUNC_VOID(OpenGL,true,glGenQueries,(GLsizei n, GLuint *ids), (n, ids))
				@@ -248,9 +223,6 @@ GL_FUNC_VOID(OpenGL,true,glGenQueries,(GLsizei n, GLuint *ids), (n, ids))
 GL_FUNC_VOID(OpenGL,true,glDeleteQueries,(GLsizei n, const GLuint *ids),(n, ids))
 GL_FUNC_VOID(OpenGL,true,glBeginQuery,(GLenum target, GLuint id), (target, id))
 GL_FUNC_VOID(OpenGL,true,glEndQuery,(GLenum target), (target))
-GL_FUNC_VOID(OpenGL,true,glQueryCounter,(GLuint id, GLenum target), (id, target))
-GL_FUNC_VOID(OpenGL,true,glGetQueryObjectiv,(GLuint id, GLenum pname, GLint *params), (id, pname, params))
-GL_FUNC_VOID(OpenGL,true,glGetQueryObjectui64v,(GLuint id, GLenum pname, GLuint64 *params), (id, pname, params))
 GL_FUNC_VOID(OpenGL,true,glCopyBufferSubData,(GLenum readtarget, GLenum writetarget, GLintptr readoffset, GLintptr writeoffset, GLsizeiptr size),(readtarget, writetarget, readoffset, writeoffset, size))
 #endif // !OSX

@ -264,8 +236,6 @@ GL_FUNC_VOID(OpenGL,true,glBindVertexArray,(GLuint a),(a))
				@@ -264,8 +236,6 @@ GL_FUNC_VOID(OpenGL,true,glBindVertexArray,(GLuint a),(a))
 #endif // !OSX

 GL_EXT(GL_EXT_texture_sRGB_decode,-1,-1)
-GL_FUNC_VOID(OpenGL,true,glPushClientAttrib,(GLbitfield a),(a))
-GL_FUNC_VOID(OpenGL,true,glPopClientAttrib,(void),())
 GL_EXT(GL_NVX_gpu_memory_info,-1,-1)
 GL_EXT(GL_ATI_meminfo,-1,-1)
 GL_EXT(GL_EXT_texture_compression_s3tc,-1,-1)
--- a/public/togles/linuxwin/glmgr.h
+++ b/public/togles/linuxwin/glmgr.h
@ -198,9 +198,11 @@ FORCEINLINE void glGetEnumv( GLenum which, GLenum *dst )
				@@ -198,9 +198,11 @@ FORCEINLINE void glGetEnumv( GLenum which, GLenum *dst )
 // shorthand macros
 #define	EQ(fff) ( (src.fff) == (fff) )

+
 //rasterizer
 struct GLAlphaTestEnable_t		{ GLint		enable;													inline bool operator==(const GLAlphaTestEnable_t& src)		const { return EQ(enable);									} };
 struct GLAlphaTestFunc_t		{ GLenum	func; GLclampf ref;										inline bool operator==(const GLAlphaTestFunc_t& src)		const { return EQ(func) && EQ(ref);							} };
+struct GLAlphaTest_t { GLint enable; GLenum func; GLclampf ref; };
 struct GLCullFaceEnable_t		{ GLint		enable;													inline bool operator==(const GLCullFaceEnable_t& src)		const { return EQ(enable);									} };
 struct GLCullFrontFace_t		{ GLenum	value;													inline bool operator==(const GLCullFrontFace_t& src)		const { return EQ(value);									} };
 struct GLPolygonMode_t			{ GLenum	values[2];												inline bool operator==(const GLPolygonMode_t& src)			const { return EQ(values[0]) && EQ(values[1]);				} };
@ -209,7 +211,7 @@ struct GLScissorEnable_t		{ GLint		enable;													inline bool operator==(co
				@@ -209,7 +211,7 @@ struct GLScissorEnable_t		{ GLint		enable;													inline bool operator==(co
 struct GLScissorBox_t			{ GLint		x,y;		GLsizei width, height;						inline bool operator==(const GLScissorBox_t& src)			const { return EQ(x) && EQ(y) && EQ(width) && EQ(height);	} };
 struct GLAlphaToCoverageEnable_t{ GLint		enable;													inline bool operator==(const GLAlphaToCoverageEnable_t& src) const { return EQ(enable);								} };
 struct GLViewportBox_t			{ GLint		x,y;		GLsizei width, height; uint widthheight;	inline bool operator==(const GLViewportBox_t& src)			const { return EQ(x) && EQ(y) && EQ(width) && EQ(height);	} };
-struct GLViewportDepthRange_t	{ GLdouble	flNear,flFar;											inline bool operator==(const GLViewportDepthRange_t& src)	const { return EQ(flNear) && EQ(flFar);						} };
+struct GLViewportDepthRange_t	{ GLfloat	flNear,flFar;											inline bool operator==(const GLViewportDepthRange_t& src)	const { return EQ(flNear) && EQ(flFar);						} };
 struct GLClipPlaneEnable_t		{ GLint		enable;													inline bool operator==(const GLClipPlaneEnable_t& src)		const { return EQ(enable);									} };
 struct GLClipPlaneEquation_t	{ GLfloat	x,y,z,w;												inline bool operator==(const GLClipPlaneEquation_t& src)	const { return EQ(x) && EQ(y) && EQ(z) && EQ(w);			} };

@ -235,7 +237,7 @@ struct GLStencilWriteMask_t		{ GLint		mask;													inline bool operator==(c
				@@ -235,7 +237,7 @@ struct GLStencilWriteMask_t		{ GLint		mask;													inline bool operator==(c

 //clearing
 struct GLClearColor_t			{  GLfloat	r,g,b,a;												inline bool operator==(const GLClearColor_t& src)			const { return EQ(r) && EQ(g) && EQ(b) && EQ(a);			} };
-struct GLClearDepth_t			{  GLdouble	d;														inline bool operator==(const GLClearDepth_t& src)			const { return EQ(d);										} };
+struct GLClearDepth_t			{  GLfloat	d;														inline bool operator==(const GLClearDepth_t& src)			const { return EQ(d);										} };
 struct GLClearStencil_t			{  GLint	s;														inline bool operator==(const GLClearStencil_t& src)		const { return EQ(s);										} };

 #undef EQ
@ -306,15 +308,20 @@ template<typename T>	void GLContextGetDefaultIndexed( T *dst, int index );
				@@ -306,15 +308,20 @@ template<typename T>	void GLContextGetDefaultIndexed( T *dst, int index );
 //===============================================================================
 // template specializations for each type of state

+
+static GLAlphaTest_t g_alpha_test;
+
 //                                                                      --- GLAlphaTestEnable ---
 FORCEINLINE void GLContextSet( GLAlphaTestEnable_t *src )
 {
-	glSetEnable( GL_ALPHA_TEST, src->enable != 0 );
+//	glSetEnable( GL_ALPHA_TEST, src->enable != 0 );
+	g_alpha_test.enable = src->enable;
 }

 FORCEINLINE void GLContextGet( GLAlphaTestEnable_t *dst )
 {
-	dst->enable = gGL->glIsEnabled( GL_ALPHA_TEST );
+//	dst->enable = gGL->glIsEnabled( GL_ALPHA_TEST );
+	dst->enable = g_alpha_test.enable;
 }

 FORCEINLINE void GLContextGetDefault( GLAlphaTestEnable_t *dst )
@ -326,12 +333,16 @@ FORCEINLINE void GLContextGetDefault( GLAlphaTestEnable_t *dst )
				@@ -326,12 +333,16 @@ FORCEINLINE void GLContextGetDefault( GLAlphaTestEnable_t *dst )
 FORCEINLINE void GLContextSet( GLAlphaTestFunc_t *src )
 {
 // gGL->glAlphaFunc( src->func, src->ref );
+	g_alpha_test.func = src->func;
+	g_alpha_test.ref = src->ref;
 }

 FORCEINLINE void GLContextGet( GLAlphaTestFunc_t *dst )
 {
-	glGetEnumv( GL_ALPHA_TEST_FUNC, &dst->func );
-	gGL->glGetFloatv( GL_ALPHA_TEST_REF, &dst->ref );
+//	glGetEnumv( GL_ALPHA_TEST_FUNC, &dst->func );
+//	gGL->glGetFloatv( GL_ALPHA_TEST_REF, &dst->ref );
+	dst->func = g_alpha_test.func;
+	dst->ref = g_alpha_test.ref;
 }

 FORCEINLINE void GLContextGetDefault( GLAlphaTestFunc_t *dst )
@ -502,7 +513,7 @@ FORCEINLINE void GLContextSet( GLViewportDepthRange_t *src )
				@@ -502,7 +513,7 @@ FORCEINLINE void GLContextSet( GLViewportDepthRange_t *src )

 FORCEINLINE void GLContextGet( GLViewportDepthRange_t *dst )
 {
-	gGL->glGetDoublev	( GL_DEPTH_RANGE, &dst->flNear );
+	gGL->glGetFloatv( GL_DEPTH_RANGE, &dst->flNear );
 }

 FORCEINLINE void GLContextGetDefault( GLViewportDepthRange_t *dst )
@ -584,12 +595,26 @@ FORCEINLINE void GLContextGetDefault( GLColorMaskSingle_t *dst )
				@@ -584,12 +595,26 @@ FORCEINLINE void GLContextGetDefault( GLColorMaskSingle_t *dst )
 //                                                                      --- GLColorMaskMultiple ---
 FORCEINLINE void GLContextSetIndexed( GLColorMaskMultiple_t *src, int index )
 {
-	gGL->glColorMaskIndexedEXT ( index, src->r, src->g, src->b, src->a );
+	GLint Rfbo = 0, Dfbo = 0;
+
+	gGL->glGetIntegerv( GL_DRAW_FRAMEBUFFER_BINDING, &Dfbo );
+	gGL->glGetIntegerv( GL_READ_FRAMEBUFFER_BINDING, &Rfbo );
+	GLint target = Dfbo == Rfbo?GL_FRAMEBUFFER:GL_DRAW_FRAMEBUFFER;
+	gGL->glBindFramebuffer( target, index );
+	gGL->glColorMask ( src->r, src->g, src->b, src->a );
+	gGL->glBindFramebuffer( target, Dfbo );
 }

 FORCEINLINE void GLContextGetIndexed( GLColorMaskMultiple_t *dst, int index )
 {
-	gGL->glGetBooleanIndexedvEXT ( GL_COLOR_WRITEMASK, index, (GLboolean*)&dst->r );
+	GLint Rfbo = 0, Dfbo = 0;
+	
+	gGL->glGetIntegerv( GL_DRAW_FRAMEBUFFER_BINDING, &Dfbo );
+	gGL->glGetIntegerv( GL_READ_FRAMEBUFFER_BINDING, &Rfbo );
+	GLint target = Dfbo == Rfbo?GL_FRAMEBUFFER:GL_DRAW_FRAMEBUFFER;
+	gGL->glBindFramebuffer( target, index );
+	gGL->glGetBooleanv( GL_COLOR_WRITEMASK, (GLboolean*)&dst->r );
+	gGL->glBindFramebuffer( target, Dfbo );
 }

 FORCEINLINE void GLContextGetDefaultIndexed( GLColorMaskMultiple_t *dst, int index )
@ -698,7 +723,7 @@ FORCEINLINE void GLContextSet( GLBlendEnableSRGB_t *src )
				@@ -698,7 +723,7 @@ FORCEINLINE void GLContextSet( GLBlendEnableSRGB_t *src )

 FORCEINLINE void GLContextGet( GLBlendEnableSRGB_t *dst )
 {
-	//dst->enable = glIsEnabled( GL_FRAMEBUFFER_SRGB_EXT );
+//	dst->enable = gGL->glIsEnabled( GL_FRAMEBUFFER_SRGB_EXT );
 	dst->enable = true; // wtf ?
 }

@ -864,13 +889,12 @@ FORCEINLINE void GLContextGetDefault( GLClearColor_t *dst )
				@@ -864,13 +889,12 @@ FORCEINLINE void GLContextGetDefault( GLClearColor_t *dst )
 //                                                                      --- GLClearDepth ---
 FORCEINLINE void GLContextSet( GLClearDepth_t *src )
 {
-//	TOFUCK: wut
-//	gGL->glClearDepth ( src->d );
+	gGL->glClearDepthf( src->d );
 }

 FORCEINLINE void GLContextGet( GLClearDepth_t *dst )
 {
-	gGL->glGetDoublev ( GL_DEPTH_CLEAR_VALUE, &dst->d );
+	gGL->glGetFloatv( GL_DEPTH_CLEAR_VALUE, &dst->d );
 }

 FORCEINLINE void GLContextGetDefault( GLClearDepth_t *dst )
@ -2285,7 +2309,7 @@ public:
				@@ -2285,7 +2309,7 @@ public:
 };

 #define	kMaxCrawlFrames	100
-#define	kMaxCrawlText		(kMaxCrawlFrames * 256)
+#define	kMaxCrawlText (kMaxCrawlFrames * 256)
 class CStackCrawlParams
 {
 	public:
--- a/togles/linuxwin/cglmprogram.cpp
+++ b/togles/linuxwin/cglmprogram.cpp
@ -718,7 +718,7 @@ CGLMShaderPair::~CGLMShaderPair( )
				@@ -718,7 +718,7 @@ CGLMShaderPair::~CGLMShaderPair( )
 {
 	if (m_program)
 	{
-		gGL->glDeleteObject( m_program );
+		gGL->glDeleteProgram( m_program );
 		m_program = 0;
 	}
 }
@ -746,7 +746,7 @@ bool CGLMShaderPair::ValidateProgramPair()
				@@ -746,7 +746,7 @@ bool CGLMShaderPair::ValidateProgramPair()

 		// check for success
 		GLint result = GL_TRUE;
-		gGL->glGetObjectParameteriv( m_program, GL_OBJECT_LINK_STATUS_ARB, &result );	// want GL_TRUE
+		gGL->glGetProgramiv(m_program, GL_LINK_STATUS, &result);
 		m_bCheckLinkStatus = false;

 		if (result == GL_TRUE)
@ -762,12 +762,6 @@ bool CGLMShaderPair::ValidateProgramPair()
				@@ -762,12 +762,6 @@ bool CGLMShaderPair::ValidateProgramPair()
 			GLint laux = 0;

 			// do some digging
-			gGL->glGetObjectParameteriv( m_program, GL_OBJECT_INFO_LOG_LENGTH_ARB, &length );
-
-			GLchar *logString = (GLchar *)malloc( length * sizeof(GLchar) );
-			gGL->glGetInfoLog( m_program, length, &laux, logString );
-
-			GLMPRINTF( ("-D- ----- GLSL link failed: \n %s ", logString) );
 #if !GLM_FREE_SHADER_TEXT
 			char *vtemp = strdup( m_vertexProg->m_text );
 			vtemp[m_vertexProg->m_descs[kGLMGLSL].m_textOffset + m_vertexProg->m_descs[kGLMGLSL].m_textLength] = 0;
@ -784,8 +778,6 @@ bool CGLMShaderPair::ValidateProgramPair()
				@@ -784,8 +778,6 @@ bool CGLMShaderPair::ValidateProgramPair()
 			free( ftemp );
 			free( vtemp );
 #endif
-			free( logString );
-
 			GLMPRINTF( ("-D- -----end-----") );
 		}

@ -793,11 +785,15 @@ bool CGLMShaderPair::ValidateProgramPair()
				@@ -793,11 +785,15 @@ bool CGLMShaderPair::ValidateProgramPair()
 		{
 			gGL->glUseProgram( m_program );

+			printf("Sample text\n");
+
 			m_ctx->NewLinkedProgram();

 			m_locVertexParams = gGL->glGetUniformLocation( m_program, "vc" );
 			m_locVertexBoneParams = gGL->glGetUniformLocation( m_program, "vcbones" );
 			m_locVertexScreenParams = gGL->glGetUniformLocation( m_program, "vcscreen" );
+			m_locAlphaRef = gGL->glGetUniformLocation( m_program, "alpha_ref" );
+			
 			m_nScreenWidthHeight = 0xFFFFFFFF;

 			m_locVertexInteger0 = gGL->glGetUniformLocation( m_program, "i0" );
@ -940,13 +936,13 @@ bool CGLMShaderPair::SetProgramPair( CGLMProgram *vp, CGLMProgram *fp )
				@@ -940,13 +936,13 @@ bool CGLMShaderPair::SetProgramPair( CGLMProgram *vp, CGLMProgram *fp )
 		// attempt link. but first, detach any previously attached programs
 		if (m_vertexProg)
 		{
-			gGL->glDetachObject(m_program, m_vertexProg->m_descs[kGLMGLSL].m_object.glsl);
+			gGL->glDetachShader(m_program, m_vertexProg->m_descs[kGLMGLSL].m_object.glsl);
 			m_vertexProg = NULL;			
 		}
 		
 		if (m_fragmentProg)
 		{
-			gGL->glDetachObject(m_program, m_fragmentProg->m_descs[kGLMGLSL].m_object.glsl);
+			gGL->glDetachShader(m_program, m_fragmentProg->m_descs[kGLMGLSL].m_object.glsl);
 			m_fragmentProg = NULL;			
 		}
 		
@ -987,7 +983,7 @@ bool CGLMShaderPair::SetProgramPair( CGLMProgram *vp, CGLMProgram *fp )
				@@ -987,7 +983,7 @@ bool CGLMShaderPair::SetProgramPair( CGLMProgram *vp, CGLMProgram *fp )
 		gGL->glLinkProgram( m_program );

 		GLint isLinked = 0;
-		gGL->glGetShaderiv(m_program, GL_LINK_STATUS, &isLinked);
+		gGL->glGetProgramiv(m_program, GL_LINK_STATUS, &isLinked);
 		if(isLinked == GL_FALSE)
 		{
 			GLint maxLength = 0;
@ -997,8 +993,8 @@ bool CGLMShaderPair::SetProgramPair( CGLMProgram *vp, CGLMProgram *fp )
				@@ -997,8 +993,8 @@ bool CGLMShaderPair::SetProgramPair( CGLMProgram *vp, CGLMProgram *fp )
 			gGL->glGetProgramInfoLog( m_program, sizeof(log), &maxLength, log );
 			if( maxLength )
 			{
-				printf("vp: \n%s\nfp: \n%s\n", vp->m_text, fp->m_text );
-				printf("shader %d link log: %s\n", m_program, log);
+				Msg("vp: \n%s\nfp: \n%s\n", vp->m_text, fp->m_text );
+				Msg("shader %d link log: %s\n", m_program, log);
 			}
 		}
 		
--- a/togles/linuxwin/cglmquery.cpp
+++ b/togles/linuxwin/cglmquery.cpp
@ -239,8 +239,8 @@ bool	CGLMQuery::IsDone( void )
				@@ -239,8 +239,8 @@ bool	CGLMQuery::IsDone( void )
 				{
 					// prepare to pay a big price on drivers prior to 10.6.4+SLGU
 					
-					GLint available = 0;
-					gGL->glGetQueryObjectiv(m_name, GL_QUERY_RESULT_AVAILABLE, &available );
+					GLuint available = 0;
+					gGL->glGetQueryObjectuiv(m_name, GL_QUERY_RESULT_AVAILABLE, &available );
 					
 					m_done = (available != 0);					
 				}
--- a/togles/linuxwin/cglmtex.cpp
+++ b/togles/linuxwin/cglmtex.cpp
--- a/togles/linuxwin/decompress.c
+++ b/togles/linuxwin/decompress.c
@ -0,0 +1,341 @@
				@@ -0,0 +1,341 @@
+#include <stdint.h>
+#include <stddef.h>
+
+/*
+DXT1/DXT3/DXT5 texture decompression
+
+The original code is from Benjamin Dobell, see below for details. Compared to
+the original this one adds DXT3 decompression, is valid C89, and is x64 
+compatible as it uses fixed size integers everywhere. It also uses a different
+PackRGBA order.
+
+---
+
+Copyright (c) 2012, Matth<EFBFBD>us G. "Anteru" Chajdas (http://anteru.net)
+
+Permission is hereby granted, free of charge, to any person obtaining a copy of
+this software and associated documentation files (the "Software"), to deal in 
+the Software without restriction, including without limitation the rights to 
+use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 
+of the Software, and to permit persons to whom the Software is furnished to do 
+so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all 
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 
+SOFTWARE.
+
+---
+
+Copyright (C) 2009 Benjamin Dobell, Glass Echidna
+
+Permission is hereby granted, free of charge, to any person obtaining a copy of
+this software and associated documentation files (the "Software"), to deal in 
+the Software without restriction, including without limitation the rights to 
+use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 
+of the Software, and to permit persons to whom the Software is furnished to do 
+so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all 
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 
+SOFTWARE.
+
+---
+*/
+static uint32_t PackRGBA (uint8_t r, uint8_t g, uint8_t b, uint8_t a)
+{
+	return r | (g << 8) | (b << 16) | (a << 24);
+}
+
+static void DecompressBlockDXT1Internal (const uint8_t* block,
+	uint32_t* output,
+	uint32_t outputStride,
+	int transparent0, int* simpleAlpha, int *complexAlpha,
+	const uint8_t* alphaValues)
+{
+	uint32_t temp, code;
+
+	uint16_t color0, color1;
+	uint8_t r0, g0, b0, r1, g1, b1;
+
+	int i, j;
+
+	color0 = *(const uint16_t*)(block);
+	color1 = *(const uint16_t*)(block + 2);
+
+	temp = (color0 >> 11) * 255 + 16;
+	r0 = (uint8_t)((temp/32 + temp)/32);
+	temp = ((color0 & 0x07E0) >> 5) * 255 + 32;
+	g0 = (uint8_t)((temp/64 + temp)/64);
+	temp = (color0 & 0x001F) * 255 + 16;
+	b0 = (uint8_t)((temp/32 + temp)/32);
+
+	temp = (color1 >> 11) * 255 + 16;
+	r1 = (uint8_t)((temp/32 + temp)/32);
+	temp = ((color1 & 0x07E0) >> 5) * 255 + 32;
+	g1 = (uint8_t)((temp/64 + temp)/64);
+	temp = (color1 & 0x001F) * 255 + 16;
+	b1 = (uint8_t)((temp/32 + temp)/32);
+
+	code = *(const uint32_t*)(block + 4);
+
+	if (color0 > color1) {
+		for (j = 0; j < 4; ++j) {
+			for (i = 0; i < 4; ++i) {
+				uint32_t finalColor, positionCode;
+				uint8_t alpha;
+
+				alpha = alphaValues [j*4+i];
+
+				finalColor = 0;
+				positionCode = (code >>  2*(4*j+i)) & 0x03;
+
+				switch (positionCode) {
+				case 0:
+					finalColor = PackRGBA(r0, g0, b0, alpha);
+					break;
+				case 1:
+					finalColor = PackRGBA(r1, g1, b1, alpha);
+					break;
+				case 2:
+					finalColor = PackRGBA((2*r0+r1)/3, (2*g0+g1)/3, (2*b0+b1)/3, alpha);
+					break;
+				case 3:
+					finalColor = PackRGBA((r0+2*r1)/3, (g0+2*g1)/3, (b0+2*b1)/3, alpha);
+					break;
+				}
+				if(!alpha)
+					*simpleAlpha = 1;
+				else if(alpha<0xff)
+					*complexAlpha = 1;
+				output [j*outputStride + i] = finalColor;
+			}
+		}
+	} else {
+		for (j = 0; j < 4; ++j) {
+			for (i = 0; i < 4; ++i) {
+				uint32_t finalColor, positionCode;
+				uint8_t alpha;
+
+				alpha = alphaValues [j*4+i];
+
+				finalColor = 0;
+				positionCode = (code >>  2*(4*j+i)) & 0x03;
+
+				switch (positionCode) {
+				case 0:
+					finalColor = PackRGBA(r0, g0, b0, alpha);
+					break;
+				case 1:
+					finalColor = PackRGBA(r1, g1, b1, alpha);
+					break;
+				case 2:
+					finalColor = PackRGBA((r0+r1)/2, (g0+g1)/2, (b0+b1)/2, alpha);
+					break;
+				case 3:
+					if(transparent0) alpha=0;
+					finalColor = PackRGBA(0, 0, 0, alpha);
+					break;
+				}
+
+				if(!alpha)
+					*simpleAlpha = 1;
+				else if(alpha<0xff)
+					*complexAlpha = 1;
+
+				output [j*outputStride + i] = finalColor;
+			}
+		}
+	}
+}
+
+/*
+void DecompressBlockDXT1(): Decompresses one block of a DXT1 texture and stores the resulting pixels at the appropriate offset in 'image'.
+
+uint32_t x:						x-coordinate of the first pixel in the block.
+uint32_t y:						y-coordinate of the first pixel in the block.
+uint32_t width: 				width of the texture being decompressed.
+const uint8_t *blockStorage:	pointer to the block to decompress.
+uint32_t *image:				pointer to image where the decompressed pixel data should be stored.
+*/ 
+void DecompressBlockDXT1(uint32_t x, uint32_t y, uint32_t width,
+	const uint8_t* blockStorage,
+	int transparent0, int* simpleAlpha, int *complexAlpha,
+	uint32_t* image)
+{
+	static const uint8_t const_alpha [] = {
+		255, 255, 255, 255,
+		255, 255, 255, 255,
+		255, 255, 255, 255,
+		255, 255, 255, 255
+	};
+
+	DecompressBlockDXT1Internal (blockStorage,
+		image + x + (y * width), width, transparent0, simpleAlpha, complexAlpha, const_alpha);
+}
+
+/*
+void DecompressBlockDXT5(): Decompresses one block of a DXT5 texture and stores the resulting pixels at the appropriate offset in 'image'.
+
+uint32_t x:						x-coordinate of the first pixel in the block.
+uint32_t y:						y-coordinate of the first pixel in the block.
+uint32_t width: 				width of the texture being decompressed.
+const uint8_t *blockStorage:	pointer to the block to decompress.
+uint32_t *image:				pointer to image where the decompressed pixel data should be stored.
+*/ 
+void DecompressBlockDXT5(uint32_t x, uint32_t y, uint32_t width,
+	const uint8_t* blockStorage,
+	int transparent0, int* simpleAlpha, int *complexAlpha,
+	uint32_t* image)
+{
+	uint8_t alpha0, alpha1;
+	const uint8_t* bits;
+	uint32_t alphaCode1;
+	uint16_t alphaCode2;
+
+	uint16_t color0, color1;
+	uint8_t r0, g0, b0, r1, g1, b1;
+
+	int i, j;
+
+	uint32_t temp, code;
+
+	alpha0 = *(blockStorage);
+	alpha1 = *(blockStorage + 1);
+
+	bits = blockStorage + 2;
+	alphaCode1 = bits[2] | (bits[3] << 8) | (bits[4] << 16) | (bits[5] << 24);
+	alphaCode2 = bits[0] | (bits[1] << 8);
+
+	color0 = *(const uint16_t*)(blockStorage + 8);
+	color1 = *(const uint16_t*)(blockStorage + 10);	
+
+	temp = (color0 >> 11) * 255 + 16;
+	r0 = (uint8_t)((temp/32 + temp)/32);
+	temp = ((color0 & 0x07E0) >> 5) * 255 + 32;
+	g0 = (uint8_t)((temp/64 + temp)/64);
+	temp = (color0 & 0x001F) * 255 + 16;
+	b0 = (uint8_t)((temp/32 + temp)/32);
+
+	temp = (color1 >> 11) * 255 + 16;
+	r1 = (uint8_t)((temp/32 + temp)/32);
+	temp = ((color1 & 0x07E0) >> 5) * 255 + 32;
+	g1 = (uint8_t)((temp/64 + temp)/64);
+	temp = (color1 & 0x001F) * 255 + 16;
+	b1 = (uint8_t)((temp/32 + temp)/32);
+
+	code = *(const uint32_t*)(blockStorage + 12);
+
+	for (j = 0; j < 4; j++) {
+		for (i = 0; i < 4; i++) {
+			uint8_t finalAlpha;
+			int alphaCode, alphaCodeIndex;
+			uint8_t colorCode;
+			uint32_t finalColor;
+
+			alphaCodeIndex = 3*(4*j+i);
+			if (alphaCodeIndex <= 12) {
+				alphaCode = (alphaCode2 >> alphaCodeIndex) & 0x07;
+			} else if (alphaCodeIndex == 15) {
+				alphaCode = (alphaCode2 >> 15) | ((alphaCode1 << 1) & 0x06);
+			} else /* alphaCodeIndex >= 18 && alphaCodeIndex <= 45 */ {
+				alphaCode = (alphaCode1 >> (alphaCodeIndex - 16)) & 0x07;
+			}
+
+			if (alphaCode == 0) {
+				finalAlpha = alpha0;
+			} else if (alphaCode == 1) {
+				finalAlpha = alpha1;
+			} else {
+				if (alpha0 > alpha1) {
+					finalAlpha = (uint8_t)(((8-alphaCode)*alpha0 + (alphaCode-1)*alpha1)/7);
+				} else {
+					if (alphaCode == 6) {
+						finalAlpha = 0;
+					} else if (alphaCode == 7) {
+						finalAlpha = 255;
+					} else {
+						finalAlpha = (uint8_t)(((6-alphaCode)*alpha0 + (alphaCode-1)*alpha1)/5);
+					}
+				}
+			}
+
+			colorCode = (code >> 2*(4*j+i)) & 0x03; 
+			finalColor = 0;
+
+			switch (colorCode) {
+			case 0:
+				finalColor = PackRGBA(r0, g0, b0, finalAlpha);
+				break;
+			case 1:
+				finalColor = PackRGBA(r1, g1, b1, finalAlpha);
+				break;
+			case 2:
+				finalColor = PackRGBA((2*r0+r1)/3, (2*g0+g1)/3, (2*b0+b1)/3, finalAlpha);
+				break;
+			case 3:
+				finalColor = PackRGBA((r0+2*r1)/3, (g0+2*g1)/3, (b0+2*b1)/3, finalAlpha);
+				break;
+			}
+
+			if(finalAlpha==0) *simpleAlpha = 1;
+			else if(finalAlpha<0xff) *complexAlpha = 1;
+
+			image [i + x + (width* (y+j))] = finalColor; 
+		}
+	}
+}
+
+/*
+void DecompressBlockDXT3(): Decompresses one block of a DXT3 texture and stores the resulting pixels at the appropriate offset in 'image'.
+
+uint32_t x:						x-coordinate of the first pixel in the block.
+uint32_t y:						y-coordinate of the first pixel in the block.
+uint32_t height:				height of the texture being decompressed.
+const uint8_t *blockStorage:	pointer to the block to decompress.
+uint32_t *image:				pointer to image where the decompressed pixel data should be stored.
+*/ 
+void DecompressBlockDXT3(uint32_t x, uint32_t y, uint32_t width,
+	const uint8_t* blockStorage,
+	int transparent0, int* simpleAlpha, int *complexAlpha,
+	uint32_t* image)
+{
+	int i;
+
+	uint8_t alphaValues [16] = { 0 };
+
+	for (i = 0; i < 4; ++i) {
+		const uint16_t* alphaData = (const uint16_t*) (blockStorage);
+
+		alphaValues [i*4 + 0] = (((*alphaData) >> 0) & 0xF ) * 17;
+		alphaValues [i*4 + 1] = (((*alphaData) >> 4) & 0xF ) * 17;
+		alphaValues [i*4 + 2] = (((*alphaData) >> 8) & 0xF ) * 17;
+		alphaValues [i*4 + 3] = (((*alphaData) >> 12) & 0xF) * 17;
+
+		blockStorage += 2;
+	}
+
+	DecompressBlockDXT1Internal (blockStorage,
+		image + x + (y * width), width, transparent0, simpleAlpha, complexAlpha, alphaValues);
+}
+
+// Texture DXT1 / DXT5 compression
+// Using STB "on file" library
+// go there https://github.com/nothings/stb
+// for more details and other libs
+
+#define STB_DXT_IMPLEMENTATION
+#include "stb_dxt_104.h"
--- a/togles/linuxwin/decompress.h
+++ b/togles/linuxwin/decompress.h
@ -0,0 +1,19 @@
				@@ -0,0 +1,19 @@
+#ifndef _GL4ES_DECOMPRESS_H_
+#define _GL4ES_DECOMPRESS_H_
+
+void DecompressBlockDXT1(uint32_t x, uint32_t y, uint32_t width,
+	const uint8_t* blockStorage,
+	int transparent0, int* simpleAlpha, int *complexAlpha,
+	uint32_t* image);
+
+void DecompressBlockDXT3(uint32_t x, uint32_t y, uint32_t width,
+	const uint8_t* blockStorage,
+	int transparent0, int* simpleAlpha, int *complexAlpha,
+	uint32_t* image);
+
+void DecompressBlockDXT5(uint32_t x, uint32_t y, uint32_t width,
+	const uint8_t* blockStorage,
+	int transparent0, int* simpleAlpha, int *complexAlpha,
+	uint32_t* image);
+
+#endif // _GL4ES_DECOMPRESS_H_
--- a/togles/linuxwin/decompress.o
+++ b/togles/linuxwin/decompress.o
--- a/togles/linuxwin/dx9asmtogl2.cpp
+++ b/togles/linuxwin/dx9asmtogl2.cpp
@ -69,13 +69,13 @@ static char g_szShadow2D[] =
				@@ -69,13 +69,13 @@ static char g_szShadow2D[] =
 	"vec2 p2 = suv.xy+vec2(0.0,invSize);\n"
 	"vec2 p3 = suv.xy+vec2(invSize,0.0);\n"
 	"vec2 p4 = suv.xy+vec2(invSize);\n"
-	"float d = texture2D(u_depthTex,p1).r;\n"
+	"float d = texture(u_depthTex,p1).r;\n"
 	"float r = float(d>suv.z);\n"
-	"d = texture2D(u_depthTex,p2).r;\n"
+	"d = texture(u_depthTex,p2).r;\n"
 	"float r2 = float(d>suv.z);\n"
-	"d = texture2D(u_depthTex,p3).r;\n"
+	"d = texture(u_depthTex,p3).r;\n"
 	"float r3 = float(d>suv.z);\n"
-	"d = texture2D(u_depthTex,p4).r;\n"
+	"d = texture(u_depthTex,p4).r;\n"
 	"float r4 = float(d>suv.z);\n"
 	"p1*=size;\n"
 	"float a = p1.y-floor(p1.y);\n"
@ -955,7 +955,7 @@ void D3DToGL::PrintUsageAndIndexToString( uint32 dwToken, char* strUsageUsageInd
				@@ -955,7 +955,7 @@ void D3DToGL::PrintUsageAndIndexToString( uint32 dwToken, char* strUsageUsageInd
 //			if ( fSemanticFlags & SEMANTIC_OUTPUT )
 //				V_snprintf( strUsageUsageIndexName, nBufLen, dwUsageIndex != 0 ? "gl_BackColor" : "gl_FrontColor" );
 //			else
-			V_snprintf( strUsageUsageIndexName, nBufLen, dwUsageIndex != 0 ? "_gl_SecondaryColor" : "_gl_Color" );
+			V_snprintf( strUsageUsageIndexName, nBufLen, dwUsageIndex != 0 ? "_gl_FrontSecondaryColor" : "_gl_FrontColor" );
 			break;
 		case D3DDECLUSAGE_FOG:
 			TranslationError();
@ -1220,7 +1220,7 @@ void D3DToGL::PrintParameterToString ( uint32 dwToken, uint32 dwSourceOrDest, ch
				@@ -1220,7 +1220,7 @@ void D3DToGL::PrintParameterToString ( uint32 dwToken, uint32 dwSourceOrDest, ch
 				}
 				else
 				{
-					V_snprintf( buff, sizeof( buff ), dwRegNum == 0 ? "_gl_Color" : "_gl_SecondaryColor" );
+					V_snprintf( buff, sizeof( buff ), dwRegNum == 0 ? "_gl_FrontColor" : "_gl_FrontSecondaryColor" );
 				}
 				strcat_s( pRegisterName, nBufLen, buff );
 			}
@ -1456,7 +1456,6 @@ void D3DToGL::PrintParameterToString ( uint32 dwToken, uint32 dwSourceOrDest, ch
				@@ -1456,7 +1456,6 @@ void D3DToGL::PrintParameterToString ( uint32 dwToken, uint32 dwSourceOrDest, ch
 			m_dwConstIntUsageMask |= 0x00000001 << dwRegNum;		// Keep track of the use of this integer constant
 			break;
 		case D3DSPR_COLOROUT:
-			// TODO(nillerusr): go fck urself
 			if( dwRegNum+1 > m_iFragDataCount )
 				m_iFragDataCount = dwRegNum+1;

@ -2546,7 +2545,7 @@ void D3DToGL::Handle_TEX( uint32 dwToken, bool bIsTexLDL )
				@@ -2546,7 +2545,7 @@ void D3DToGL::Handle_TEX( uint32 dwToken, bool bIsTexLDL )
 			V_snprintf( szExtra, sizeof( szExtra ), ".%c", GetSwizzleComponent( pSrc0Reg, 3 ) );
 			V_strncat( szLOD, szExtra, sizeof( szLOD ) );

-			PrintToBufWithIndents( *m_pBufALUCode, "%s = %s( %s, %s, %s );\n", pDestReg, bIsShadowSampler ? "shadow2DLod" : "texture2DLod", pSrc1Reg, sCoordVar.String(), szLOD );
+			PrintToBufWithIndents( *m_pBufALUCode, "%s = %s( %s, %s, %s );\n", pDestReg, bIsShadowSampler ? "shadow2DLod" : "textureLod", pSrc1Reg, sCoordVar.String(), szLOD );
 		}
 		else if ( bIsShadowSampler )
 		{
@ -2564,12 +2563,12 @@ void D3DToGL::Handle_TEX( uint32 dwToken, bool bIsTexLDL )
				@@ -2564,12 +2563,12 @@ void D3DToGL::Handle_TEX( uint32 dwToken, bool bIsTexLDL )
 			// We use the vec4 variant of texture2DProj() intentionally here, since it lines up well with Direct3D.

 			CUtlString s4DProjCoords = EnsureNumSwizzleComponents( pSrc0Reg, 4 ); // Ensure vec4 variant
-			PrintToBufWithIndents( *m_pBufALUCode, "%s = texture2DProj( %s, %s );\n", pDestReg, pSrc1Reg, s4DProjCoords.String() );
+			PrintToBufWithIndents( *m_pBufALUCode, "%s = textureProj( %s, %s );\n", pDestReg, pSrc1Reg, s4DProjCoords.String() );
 		}
-		else				
+		else
 		{
 			CUtlString sCoordVar = EnsureNumSwizzleComponents( pSrc0Reg, bIsShadowSampler ? 3 : 2 );
-			PrintToBufWithIndents( *m_pBufALUCode, "%s = texture2D( %s, %s );\n", pDestReg, pSrc1Reg, sCoordVar.String() );
+			PrintToBufWithIndents( *m_pBufALUCode, "%s = texture( %s, %s );\n", pDestReg, pSrc1Reg, sCoordVar.String() );
 		}
 	}
 	else if ( nSamplerType == SAMPLER_TYPE_3D )
@ -2580,7 +2579,7 @@ void D3DToGL::Handle_TEX( uint32 dwToken, bool bIsTexLDL )
				@@ -2580,7 +2579,7 @@ void D3DToGL::Handle_TEX( uint32 dwToken, bool bIsTexLDL )
 		}

 		CUtlString sCoordVar = EnsureNumSwizzleComponents( pSrc0Reg, 3 );
-		PrintToBufWithIndents( *m_pBufALUCode, "%s = texture3D( %s, %s );\n", pDestReg, pSrc1Reg, sCoordVar.String() );
+		PrintToBufWithIndents( *m_pBufALUCode, "%s = texture( %s, %s );\n", pDestReg, pSrc1Reg, sCoordVar.String() );
 	}
 	else if ( nSamplerType == SAMPLER_TYPE_CUBE )
 	{
@ -2590,7 +2589,7 @@ void D3DToGL::Handle_TEX( uint32 dwToken, bool bIsTexLDL )
				@@ -2590,7 +2589,7 @@ void D3DToGL::Handle_TEX( uint32 dwToken, bool bIsTexLDL )
 		}

 		CUtlString sCoordVar = EnsureNumSwizzleComponents( pSrc0Reg, 3 );
-		PrintToBufWithIndents( *m_pBufALUCode, "%s = textureCube( %s, %s );\n", pDestReg, pSrc1Reg, sCoordVar.String() );
+		PrintToBufWithIndents( *m_pBufALUCode, "%s = texture( %s, %s );\n", pDestReg, pSrc1Reg, sCoordVar.String() );
 	}
 	else
 	{
@ -3049,7 +3048,7 @@ void D3DToGL::WriteGLSLInputVariableAssignments()
				@@ -3049,7 +3048,7 @@ void D3DToGL::WriteGLSLInputVariableAssignments()

 		if ( dwUsage == D3DDECLUSAGE_COLOR )
 		{
-			PrintToBufWithIndents( *m_pBufAttribCode, "vec4 oTempT%d = %s;\n", i, dwUsageIndex ? "_gl_SecondaryColor" : "_gl_Color" );
+			PrintToBufWithIndents( *m_pBufAttribCode, "vec4 oTempT%d = %s;\n", i, dwUsageIndex ? "_gl_FrontSecondaryColor" : "_gl_FrontColor" );
 		}
 		else if ( dwUsage == D3DDECLUSAGE_TEXCOORD )
 		{
@ -3182,6 +3181,10 @@ int D3DToGL::TranslateShader( uint32* code, CUtlBuffer *pBufDisassembledCode, bo
				@@ -3182,6 +3181,10 @@ int D3DToGL::TranslateShader( uint32* code, CUtlBuffer *pBufDisassembledCode, bo
 	m_bPutHexCodesAfterLines = (options & D3DToGL_PutHexCommentsAfterLines) != 0;
 	m_bGeneratingDebugText = (options & D3DToGL_GeneratingDebugText) != 0;
 	m_bGenerateSRGBWriteSuffix = (options & D3DToGL_OptionSRGBWriteSuffix) != 0;
+//	m_bGenerateSRGBWriteSuffix = true;
+
+	if( debugLabel && ( V_strstr( debugLabel ,"vertexlit_and_unlit_generic_bump_ps") ))
+		m_bGenerateSRGBWriteSuffix = true;

 	m_NumIndentTabs = 1; // start code indented one tab
 	m_nLoopDepth = 0;
@ -3675,6 +3678,7 @@ int D3DToGL::TranslateShader( uint32* code, CUtlBuffer *pBufDisassembledCode, bo
				@@ -3675,6 +3678,7 @@ int D3DToGL::TranslateShader( uint32* code, CUtlBuffer *pBufDisassembledCode, bo
 	}

 	// Control bit for sRGB Write suffix
+
 	if ( m_bGenerateSRGBWriteSuffix )
 	{
 		// R500 Hookup
@ -3889,33 +3893,38 @@ int D3DToGL::TranslateShader( uint32* code, CUtlBuffer *pBufDisassembledCode, bo
				@@ -3889,33 +3893,38 @@ int D3DToGL::TranslateShader( uint32* code, CUtlBuffer *pBufDisassembledCode, bo
 	{
 		StrcatToHeaderCode( g_szShadow2D );
 		StrcatToHeaderCode( g_szShadow2DProj );
-		
 	}
 	else if( FindSubcode("shadow2D") )
-		StrcatToHeaderCode( g_szShadow2D );		
-
-	if( FindSubcode("_gl_Color") )
-		StrcatToHeaderCode( "vec4 _gl_Color;\n" );
+		StrcatToHeaderCode( g_szShadow2D );
+	
+	if( FindSubcode("_gl_FrontColor") && !m_bFrontColor )
+		StrcatToHeaderCode( "in vec4 _gl_FrontColor;\n" );

-	if( FindSubcode("_gl_SecondaryColor") )
-		StrcatToHeaderCode( "vec4 _gl_SecondaryColor;\n" );
+	if( FindSubcode("_gl_FrontSecondaryColor") && !m_bFrontSecondaryColor )
+		StrcatToHeaderCode( "in vec4 _gl_FrontSecondaryColor;\n" );
 	
+	if( m_iFragDataCount && bVertexShader )
+		StrcatToHeaderCode( "\nuniform float alpha_ref;\n" );	
+
 	StrcatToHeaderCode( "\nvoid main()\n{\n" );
 	if ( m_bUsedAtomicTempVar )
 	{
 		PrintToBufWithIndents( *m_pBufHeaderCode, "vec4 %s;\n\n", g_pAtomicTempVarName );
 	}
-	
+
 	// sRGB Write suffix
 	if ( m_bGenerateSRGBWriteSuffix )
 	{
-		StrcatToALUCode( "vec3 sRGBFragData;\n" );
-		StrcatToALUCode( "sRGBFragData.xyz = log( gl_FragData[0].xyz );\n" );
-		StrcatToALUCode( "sRGBFragData.xyz = sRGBFragData.xyz * vec3( 0.454545f, 0.454545f, 0.454545f );\n" );
-		StrcatToALUCode( "sRGBFragData.xyz = exp( sRGBFragData.xyz );\n" );
-		StrcatToALUCode( "gl_FragData[0].xyz = mix( gl_FragData[0].xyz, sRGBFragData, flSRGBWrite );\n" );
+	//	StrcatToALUCode( "vec3 sRGBFragData;\n" );
+	//	StrcatToALUCode( "sRGBFragData.xyz = log( gl_FragData[0].xyz );\n" );
+	//	StrcatToALUCode( "sRGBFragData.xyz = sRGBFragData.xyz * vec3( 0.754545f, 0.754545f, 0.754545f );\n" );
+	//	StrcatToALUCode( "sRGBFragData.xyz = exp( sRGBFragData.xyz );\n" );
+		StrcatToALUCode( "gl_FragData[0].xyz = pow(gl_FragData[0].xyz, vec3(1.0/2.2));\n" );
 	}
 	
+	if( m_iFragDataCount && bVertexShader )
+		StrcatToALUCode( "if( gl_FragData[0].a < alpha_ref ) { discard; };\n" );
+
 	strcat_s( (char*)m_pBufALUCode->Base(), m_pBufALUCode->Size(), "}\n" );
 		
 	// Put all of the strings together for final program ( pHeaderCode + pAttribCode + pParamCode + pALUCode )
--- a/togles/linuxwin/dxabstract.cpp
+++ b/togles/linuxwin/dxabstract.cpp
@ -1201,8 +1201,8 @@ static void FillD3DCaps9( const GLMRendererInfoFields &glmRendererInfo, D3DCAPS9
				@@ -1201,8 +1201,8 @@ static void FillD3DCaps9( const GLMRendererInfoFields &glmRendererInfo, D3DCAPS9
 	pCaps->MaxPixelShader30InstructionSlots		=	0;

 #if DX_TO_GL_ABSTRACTION
-	pCaps->FakeSRGBWrite			=	!glmRendererInfo.m_hasGammaWrites;
-	pCaps->CanDoSRGBReadFromRTs		=	!glmRendererInfo.m_cantAttachSRGB;
+	pCaps->FakeSRGBWrite			=	true;//!glmRendererInfo.m_hasGammaWrites;
+	pCaps->CanDoSRGBReadFromRTs		=	true;//!glmRendererInfo.m_cantAttachSRGB;
 	pCaps->MixedSizeTargets			=	glmRendererInfo.m_hasMixedAttachmentSizes;
 #endif
 }
--- a/togles/linuxwin/glentrypoints.cpp
+++ b/togles/linuxwin/glentrypoints.cpp
@ -389,6 +389,14 @@ COpenGLEntryPoints::COpenGLEntryPoints()
				@@ -389,6 +389,14 @@ COpenGLEntryPoints::COpenGLEntryPoints()
 		m_bHave_GL_EXT_framebuffer_blit = true;
 		m_bHave_GL_EXT_framebuffer_multisample = true;
 		m_bHave_GL_ARB_occlusion_query = true;
+		m_bHave_GL_ARB_map_buffer_range = true;
+		m_bHave_GL_ARB_vertex_buffer_object = true;
+		m_bHave_GL_ARB_vertex_array_bgra = true;
+		m_bHave_GL_EXT_vertex_array_bgra = true;
+		m_bHave_GL_ARB_debug_output = true;
+		m_bHave_GL_EXT_direct_state_access = false;
+		m_bHave_GL_EXT_framebuffer_multisample_blit_scaled = true;
+		m_bHave_GL_EXT_texture_sRGB_decode = true;

 		glBindFramebuffer.Force(glBindFramebuffer.Pointer());
 		glBindRenderbuffer.Force(glBindRenderbuffer.Pointer());
@ -456,12 +464,6 @@ COpenGLEntryPoints::COpenGLEntryPoints()
				@@ -456,12 +464,6 @@ COpenGLEntryPoints::COpenGLEntryPoints()
 	printf( "GL_EXT_buffer_storage: %s\n", m_bHave_GL_EXT_buffer_storage ? "AVAILABLE" : "NOT AVAILABLE" );
 	printf( "GL_EXT_texture_sRGB_decode: %s\n", m_bHave_GL_EXT_texture_sRGB_decode ? "AVAILABLE" : "NOT AVAILABLE" );

-	bool bGLCanDecodeS3TCTextures = m_bHave_GL_EXT_texture_compression_s3tc || ( m_bHave_GL_EXT_texture_compression_dxt1 && m_bHave_GL_ANGLE_texture_compression_dxt3 && m_bHave_GL_ANGLE_texture_compression_dxt5 );
-	if ( !bGLCanDecodeS3TCTextures )
-	{
-		Error( "This application requires either the GL_EXT_texture_compression_s3tc, or the GL_EXT_texture_compression_dxt1 + GL_ANGLE_texture_compression_dxt3 + GL_ANGLE_texture_compression_dxt5 OpenGL extensions. Please install S3TC texture support.\n" );
-	}
-
 #ifdef OSX
 	if ( CommandLine()->FindParm( "-glmnosrgbdecode" ) )
 	{
--- a/togles/linuxwin/glmgr.cpp
+++ b/togles/linuxwin/glmgr.cpp
@ -92,11 +92,11 @@ char g_preloadTexVertexProgramText[] = // Гроб гроб кладбище п
				@@ -92,11 +92,11 @@ char g_preloadTexVertexProgramText[] = // Гроб гроб кладбище п
 	"precision mediump float;\n"
 	"out vec4 otex;\n"
 	"void main()  \n"
-	"{  \n"
+	"{\n"
 	"vec4 pos = vec4( 0.1, 0.1, 0.1, 0.1 );\n"
 	"vec4 tex = vec4( 0.0, 0.0, 0.0, 0.0 );\n"
-	"  \n"
-	"gl_Position = pos;  \n"
+	"\n"
+	"gl_Position = pos;\n"
 	"otex = tex;  \n"
 	"}  \n"
 };
@ -116,7 +116,7 @@ char g_preload2DTexFragmentProgramText[] =
				@@ -116,7 +116,7 @@ char g_preload2DTexFragmentProgramText[] =
 	"void main()  \n"
 	"{  \n"
 	"vec4 r0;  \n"
-	"r0 = texture2D( sampler15, otex.xy );  \n"
+	"r0 = texture( sampler15, otex.xy );  \n"
 	"_gl_FragColor = r0;	//discard;  \n"
 	"}  \n"
 };
@ -137,7 +137,7 @@ char g_preload3DTexFragmentProgramText[] =
				@@ -137,7 +137,7 @@ char g_preload3DTexFragmentProgramText[] =
 	"void main()  \n"
 	"{  \n"
 	"vec4 r0;  \n"
-	"r0 = texture3D( sampler15, otex.xyz );  \n"
+	"r0 = texture( sampler15, otex.xyz );  \n"
 	"_gl_FragColor = vec4(0,0,0,0);	//discard;  \n"
 	"}  \n"
 };
@ -157,7 +157,7 @@ char g_preloadCubeTexFragmentProgramText[] =
				@@ -157,7 +157,7 @@ char g_preloadCubeTexFragmentProgramText[] =
 	"void main()  \n"
 	"{  \n"
 	"vec4 r0;  \n"
-	"r0 = textureCube( sampler15, otex.xyz );  \n"
+	"r0 = texture( sampler15, otex.xyz );  \n"
 	"_gl_FragColor = r0;	//discard;  \n"
 	"}  \n"
 };
@ -449,6 +449,20 @@ GLMgr::~GLMgr()
				@@ -449,6 +449,20 @@ GLMgr::~GLMgr()
 {
 }

+extern void CompressedTexImage2D(GLenum target, GLint level, GLenum internalformat,
+                            GLsizei width, GLsizei height, GLint border,
+                            GLsizei imageSize, const GLvoid *data);
+
+extern void TexImage2D(GLenum target,
+					   GLint level,
+					   GLint internalformat,
+					   GLsizei width,
+					   GLsizei height,
+					   GLint border,
+					   GLenum format,
+					   GLenum type,
+					   const void * data);
+
 //===============================================================================

 GLMContext *GLMgr::NewContext( IDirect3DDevice9 *pDevice, GLMDisplayParams *params )
@ -953,14 +967,6 @@ void GLMContext::Blit2( CGLMTex *srcTex, GLMRect *srcRect, int srcFace, int srcM
				@@ -953,14 +967,6 @@ void GLMContext::Blit2( CGLMTex *srcTex, GLMRect *srcRect, int srcFace, int srcM
 	bool srcGamma = srcTex && ((srcTex->m_layout->m_key.m_texFlags & kGLMTexSRGB) != 0);
 	bool dstGamma = dstTex && ((dstTex->m_layout->m_key.m_texFlags & kGLMTexSRGB) != 0);

-	bool doPushPop = (srcGamma != dstGamma) && gl_radar7954721_workaround_mixed.GetInt() && m_caps.m_nv;		// workaround for cross gamma blit problems on NV
-		// ^^ need to re-check this on some post-10.6.3 build on NV to see if it was fixed
-
-	if (doPushPop)
-	{
-		gGL->glPushAttrib( 0 );
-	}
-	
 	//----------------------------------------------------------------- figure out the plan
 	
 	bool blitTwoStep = false;		// think positive
@ -1052,8 +1058,8 @@ void GLMContext::Blit2( CGLMTex *srcTex, GLMRect *srcRect, int srcFace, int srcM
				@@ -1052,8 +1058,8 @@ void GLMContext::Blit2( CGLMTex *srcTex, GLMRect *srcRect, int srcFace, int srcM
 		glAttachTex2DtoFBO	( GL_DRAW_FRAMEBUFFER, formatClass, srcTex->m_texName, 0 );

 		// set read and draw buffers appropriately		
-		gGL->glReadBuffer		( glAttachFromClass[formatClass] );
-		gGL->glDrawBuffer		( glAttachFromClass[formatClass] );
+		gGL->glReadBuffer( glAttachFromClass[formatClass] );
+		gGL->glDrawBuffers( 1, &glAttachFromClass[formatClass] );
 		
 		// blit#1 - to resolve to scratch
 		// implicitly means no scaling, thus will be done with NEAREST sampling
@ -1117,10 +1123,12 @@ void GLMContext::Blit2( CGLMTex *srcTex, GLMRect *srcRect, int srcFace, int srcM
				@@ -1117,10 +1123,12 @@ void GLMContext::Blit2( CGLMTex *srcTex, GLMRect *srcRect, int srcFace, int srcM
 	if (blitToBack)
 	{
 		// backbuffer is special - FBO0 is left out (either scrubbed already, or not used)
-		
-		BindFBOToCtx		( NULL, GL_DRAW_FRAMEBUFFER );
-		gGL->glDrawBuffer		( GL_BACK );
-		
+
+		BindFBOToCtx( NULL, GL_DRAW_FRAMEBUFFER );
+
+		GLenum bufs = GL_BACK;
+		gGL->glDrawBuffers( 1, &bufs );
+
 		yflip = true;
 	}
 	else
@ -1201,12 +1209,6 @@ void GLMContext::Blit2( CGLMTex *srcTex, GLMRect *srcRect, int srcFace, int srcM
				@@ -1201,12 +1209,6 @@ void GLMContext::Blit2( CGLMTex *srcTex, GLMRect *srcRect, int srcFace, int srcM
 	//	restore GLM drawing FBO
 	BindFBOToCtx( m_drawingFBO, GL_FRAMEBUFFER );
 	
-	if (doPushPop)
-	{
-		gGL->glPopAttrib( );
-	}
-	
-
 	//----------------------------------------------------------------- restore old scissor state
 	if (oldsciss.enable)
 	{
@ -1258,39 +1260,6 @@ void GLMContext::BlitTex( CGLMTex *srcTex, GLMRect *srcRect, int srcFace, int sr
				@@ -1258,39 +1260,6 @@ void GLMContext::BlitTex( CGLMTex *srcTex, GLMRect *srcRect, int srcFace, int sr
 		GLMPRINTF(( "-D-       dst tex layout is %s", dstTex->m_layout->m_layoutSummary ));
 	}

-	int pushed = 0;
-	uint pushmask = gl_radar7954721_workaround_maskval.GetInt();
-		//GL_COLOR_BUFFER_BIT
-		//| GL_CURRENT_BIT
-		//| GL_ENABLE_BIT
-		//| GL_FOG_BIT
-		//| GL_PIXEL_MODE_BIT
-		//| GL_SCISSOR_BIT
-		//| GL_STENCIL_BUFFER_BIT
-		//| GL_TEXTURE_BIT
-		//GL_VIEWPORT_BIT
-		//;
-	
-	if (gl_radar7954721_workaround_all.GetInt()!=0)
-	{
-		gGL->glPushAttrib( pushmask );
-		pushed++;
-	}
-	else
-	{
-		bool srcGamma = (srcTex->m_layout->m_key.m_texFlags & kGLMTexSRGB) != 0;
-		bool dstGamma = (dstTex->m_layout->m_key.m_texFlags & kGLMTexSRGB) != 0;
-
-		if (srcGamma != dstGamma)
-		{
-			if (gl_radar7954721_workaround_mixed.GetInt())
-			{
-				gGL->glPushAttrib( pushmask );
-				pushed++;
-			}
-		}
-	}
-
 	if (useBlitFB)
 	{
 		// state we need to save
@ -1354,8 +1323,7 @@ void GLMContext::BlitTex( CGLMTex *srcTex, GLMRect *srcRect, int srcFace, int sr
				@@ -1354,8 +1323,7 @@ void GLMContext::BlitTex( CGLMTex *srcTex, GLMRect *srcRect, int srcFace, int sr
 		attparams.m_zslice	=	0;
 		m_blitReadFBO->TexAttach( &attparams, attachIndex, GL_READ_FRAMEBUFFER );

-		gGL->glReadBuffer( attachIndexGL );
-		
+		gGL->glDrawBuffers( 1, &attachIndexGL );

 		//	set the write fb and buffer, and attach write tex
 		BindFBOToCtx( m_blitDrawFBO, GL_DRAW_FRAMEBUFFER );
@ -1366,7 +1334,7 @@ void GLMContext::BlitTex( CGLMTex *srcTex, GLMRect *srcRect, int srcFace, int sr
				@@ -1366,7 +1334,7 @@ void GLMContext::BlitTex( CGLMTex *srcTex, GLMRect *srcRect, int srcFace, int sr
 		attparams.m_zslice	=	0;
 		m_blitDrawFBO->TexAttach( &attparams, attachIndex, GL_DRAW_FRAMEBUFFER );

-		gGL->glDrawBuffer( attachIndexGL );
+		gGL->glDrawBuffers( 1, &attachIndexGL );

 		//	do the blit
 		gGL->glBlitFramebuffer(	srcRect->xmin, srcRect->ymin, srcRect->xmax, srcRect->ymax,
@ -1425,8 +1393,8 @@ void GLMContext::BlitTex( CGLMTex *srcTex, GLMRect *srcRect, int srcFace, int sr
				@@ -1425,8 +1393,8 @@ void GLMContext::BlitTex( CGLMTex *srcTex, GLMRect *srcRect, int srcFace, int sr
 		attparams.m_zslice	=	0;
 		m_blitDrawFBO->TexAttach( &attparams, attachIndex, GL_DRAW_FRAMEBUFFER );

-		gGL->glDrawBuffer( attachIndexGL );
-		
+		gGL->glDrawBuffers( 1, &attachIndexGL );
+
 		// attempt to just set states directly the way we want them, then use the latched states to repair them afterward.
 		NullProgram();	// out of program mode
 		
@ -1456,25 +1424,24 @@ void GLMContext::BlitTex( CGLMTex *srcTex, GLMRect *srcRect, int srcFace, int sr
				@@ -1456,25 +1424,24 @@ void GLMContext::BlitTex( CGLMTex *srcTex, GLMRect *srcRect, int srcFace, int sr

 		// immediate mode is fine

+#if 0 // Does it needed?
 		const float topv = 1.0;
 		const float botv = 0.0;

 		const float verts[] = {-1.f, -1.f, 1.f, -1.f, 1.f, 1.f, -1.f, 1.f};
-		const float verts_tex[] = {0.f, botv, 1.f, botv, 1.f, topv, 0.f, topv};
-
-		gGL->glEnableClientState(GL_VERTEX_ARRAY);
-		gGL->glEnableClientState(GL_TEXTURE_COORD_ARRAY);
-
+		const float verts_tex[] = {0.f, botv, 1.f, botv, 1.f, topv, 0.f, topv};		
+		
 		gGL->glVertexPointer(2, GL_FLOAT, 0, verts);
 		gGL->glTexCoordPointer(2, GL_FLOAT, 0, verts_tex);

-		glDrawArrays(GL_TRIANGLE_FAN, 0, 4);
+		gGL->glDrawArrays(GL_TRIANGLE_FAN, 0, 4);

 		gGL->glDisableClientState(GL_VERTEX_ARRAY);
 		gGL->glDisableClientState(GL_TEXTURE_COORD_ARRAY);
-
+#endif
+		
 		gGL->glBindTexture( GL_TEXTURE_2D, 0 );
-
+		
 		gGL->glDisable(GL_TEXTURE_2D);

 		BindTexToTMU( m_samplers[0].m_pBoundTex, 0 );
@ -1509,12 +1476,6 @@ void GLMContext::BlitTex( CGLMTex *srcTex, GLMRect *srcRect, int srcFace, int sr
				@@ -1509,12 +1476,6 @@ void GLMContext::BlitTex( CGLMTex *srcTex, GLMRect *srcRect, int srcFace, int sr
 		BindFBOToCtx( m_drawingFBO, GL_FRAMEBUFFER );
 	}
 	
-	while(pushed)
-	{
-		gGL->glPopAttrib();
-		pushed--;
-	}
-
 	RestoreSavedColorMask();
 }

@ -1632,7 +1593,7 @@ void GLMContext::ResolveTex( CGLMTex *tex, bool forceDirty )
				@@ -1632,7 +1593,7 @@ void GLMContext::ResolveTex( CGLMTex *tex, bool forceDirty )
 			gGL->glFramebufferTexture2D( GL_DRAW_FRAMEBUFFER, attachIndexGL, GL_TEXTURE_2D, tex->m_texName, 0 );
 		}

-		gGL->glDrawBuffer( attachIndexGL );
+		gGL->glDrawBuffers( 1, &attachIndexGL );

 		//-----------------------------------------------------------------------------------

@ -2369,10 +2330,6 @@ void GLMContext::Present( CGLMTex *tex )
				@@ -2369,10 +2330,6 @@ void GLMContext::Present( CGLMTex *tex )
 	tmMessage( TELEMETRY_LEVEL2, TMMF_ICON_EXCLAMATION, "VS Uniform Calls: %u, VS Uniforms: %u|VS Uniform Bone Calls: %u, VS Bone Uniforms: %u|PS Uniform Calls: %u, PS Uniforms: %u", m_nTotalVSUniformCalls, m_nTotalVSUniformsSet, m_nTotalVSUniformBoneCalls, m_nTotalVSUniformsBoneSet, m_nTotalPSUniformCalls, m_nTotalPSUniformsSet );
 	m_nTotalVSUniformCalls = 0, m_nTotalVSUniformBoneCalls = 0, m_nTotalVSUniformsSet = 0, m_nTotalVSUniformsBoneSet = 0, m_nTotalPSUniformCalls = 0, m_nTotalPSUniformsSet = 0;
 #endif
-
-#ifndef OSX
-	GLMGPUTimestampManagerTick();
-#endif
 }

 //===============================================================================
@ -2845,7 +2802,6 @@ void GLMContext::BindTexToTMU( CGLMTex *pTex, int tmu )
				@@ -2845,7 +2802,6 @@ void GLMContext::BindTexToTMU( CGLMTex *pTex, int tmu )
 		
 	if ( !pTex )
 	{
-		gGL->glBindTexture( GL_TEXTURE_1D, 0 );
 		gGL->glBindTexture( GL_TEXTURE_2D, 0 );
 		gGL->glBindTexture( GL_TEXTURE_3D, 0 );
 		gGL->glBindTexture( GL_TEXTURE_CUBE_MAP, 0 );
@ -2853,7 +2809,6 @@ void GLMContext::BindTexToTMU( CGLMTex *pTex, int tmu )
				@@ -2853,7 +2809,6 @@ void GLMContext::BindTexToTMU( CGLMTex *pTex, int tmu )
 	else
 	{
 		const GLenum texGLTarget = pTex->m_texGLTarget;
-		if ( texGLTarget != GL_TEXTURE_1D ) gGL->glBindTexture( GL_TEXTURE_1D, 0 );
 		if ( texGLTarget != GL_TEXTURE_2D ) gGL->glBindTexture( GL_TEXTURE_2D, 0 );
 		if ( texGLTarget != GL_TEXTURE_3D ) gGL->glBindTexture( GL_TEXTURE_3D, 0 );
 		if ( texGLTarget != GL_TEXTURE_CUBE_MAP ) gGL->glBindTexture( GL_TEXTURE_CUBE_MAP, 0 );
@ -3006,11 +2961,11 @@ void GLMContext::CleanupTex( GLenum texBind, GLMTexLayout* pLayout, GLuint tex )
				@@ -3006,11 +2961,11 @@ void GLMContext::CleanupTex( GLenum texBind, GLMTexLayout* pLayout, GLuint tex )
 			const int dataSize = ( chunks * chunks ) * pLayout->m_format->m_bytesPerSquareChunk;
 			Assert( dataSize <= ( sizeof( uint32) * ARRAYSIZE( g_garbageTextureBits ) ) );

-			gGL->glCompressedTexImage2D( texBind, i, pLayout->m_format->m_glIntFormat, mipDim, mipDim, 0, dataSize, 0 );
+			CompressedTexImage2D( texBind, i, pLayout->m_format->m_glIntFormat, mipDim, mipDim, 0, dataSize, 0 );
 		}
 		else
 		{
-			gGL->glTexImage2D( texBind, i, pLayout->m_format->m_glIntFormat, mipDim, mipDim, 0, pLayout->m_format->m_glDataFormat, pLayout->m_format->m_glDataType, 0 );
+			TexImage2D( texBind, i, pLayout->m_format->m_glIntFormat, mipDim, mipDim, 0, pLayout->m_format->m_glDataFormat, pLayout->m_format->m_glDataType, 0 );
 		}
 	}

@ -4412,8 +4367,7 @@ void GLMContext::DebugHook( GLMDebugHookInfo *info )
				@@ -4412,8 +4367,7 @@ void GLMContext::DebugHook( GLMDebugHookInfo *info )
 			break;
 			
 			case 2:
-				short fakecolor[4] = { 0, 0, 0, 0 };
-				gGL->glColor4sv( fakecolor );	// break to OGLP
+				// What the fuck?
 			break;
 		}
 		// re-flush all GLM states so you can fiddle with them in the debugger. then run the batch again and spin..
@ -4766,36 +4720,11 @@ void GLMContext::DrawDebugText( float x, float y, float z, float drawCharWidth,
				@@ -4766,36 +4720,11 @@ void GLMContext::DrawDebugText( float x, float y, float z, float drawCharWidth,
 	
 	gGL->glEnable(GL_TEXTURE_2D);

-	if (0)
-	{
-		gGL->glEnableClientState(GL_VERTEX_ARRAY);
-
-		gGL->glEnableClientState(GL_TEXTURE_COORD_ARRAY);
-		
-		gGL->glVertexPointer( 3, GL_FLOAT, sizeof( vtx[0] ), &vtx[0].x );
-		
-		gGL->glClientActiveTexture(GL_TEXTURE0);
-
-		gGL->glTexCoordPointer( 2, GL_FLOAT, sizeof( vtx[0] ), &vtx[0].u );
-	}
-	else
-	{
-		SetVertexAttributes( &vertSetup );
-	}
+	SetVertexAttributes( &vertSetup );

 	gGL->glDrawArrays( GL_QUADS, 0, stringlen * 4 );

-	// disable all the input streams
-	if (0)
-	{
-		gGL->glDisableClientState(GL_VERTEX_ARRAY);
-
-		gGL->glDisableClientState(GL_TEXTURE_COORD_ARRAY);
-	}
-	else
-	{
-		SetVertexAttributes( NULL );
-	}
+	SetVertexAttributes( NULL );

 	gGL->glDisable(GL_TEXTURE_2D);

@ -5288,7 +5217,7 @@ void GLMTester::StdSetup( void )
				@@ -5288,7 +5217,7 @@ void GLMTester::StdSetup( void )
 	gGL->glScissor( 0,0,  (GLsizei) m_drawWidth, (GLsizei) m_drawHeight );
 	CheckGLError("stdsetup scissor");

-	gGL->glOrtho( -1,1, -1,1, -1,1 );
+	//gGL->glOrtho( -1,1, -1,1, -1,1 );
 	CheckGLError("stdsetup ortho");
 	
 	// activate debug font
@ -5331,7 +5260,7 @@ void GLMTester::Clear( void )
				@@ -5331,7 +5260,7 @@ void GLMTester::Clear( void )
 	
 	gGL->glViewport(0, 0, (GLsizei) m_drawWidth, (GLsizei) m_drawHeight );
 	gGL->glScissor( 0,0,  (GLsizei) m_drawWidth, (GLsizei) m_drawHeight );
-	gGL->glOrtho( -1,1, -1,1, -1,1 );
+	//gGL->glOrtho( -1,1, -1,1, -1,1 );
 	CheckGLError("clearing viewport");

 	// clear to black
@ -6000,7 +5929,9 @@ void GLMTester::Test3( void )
				@@ -6000,7 +5929,9 @@ void GLMTester::Test3( void )
 void GLMTriggerDebuggerBreak()
 {
 	// we call an obscure GL function which we know has been breakpointed in the OGLP function list
-	static signed short nada[] = { -1,-1,-1,-1 };
-	gGL->glColor4sv( nada );
+
+// What the fuck is that?
+//	static signed short nada[] = { -1,-1,-1,-1 };
+//	gGL->glColor4sv( nada );
 }
 #endif
--- a/togles/linuxwin/glmgr_flush.inl
+++ b/togles/linuxwin/glmgr_flush.inl
@ -1,5 +1,7 @@
				@@ -1,5 +1,7 @@
 // BE VERY VERY CAREFUL what you do in these function. They are extremely hot, and calling the wrong GL API's in here will crush perf. (especially on NVidia threaded drivers).

+#include "togles/linuxwin/glmgr.h"
+
 FORCEINLINE uint32 bitmix32(uint32 a)
 {
 	a -= (a<<6);
@ -433,7 +435,6 @@ FORCEINLINE void GLMContext::FlushDrawStates( uint nStartIndex, uint nEndIndex,
				@@ -433,7 +435,6 @@ FORCEINLINE void GLMContext::FlushDrawStates( uint nStartIndex, uint nEndIndex,
 		}
 	}

-
 	// see if VS uses i0, b0, b1, b2, b3.
 	// use a glUniform1i to set any one of these if active.  skip all of them if no dirties reported.
 	// my kingdom for the UBO extension!
@ -478,6 +479,15 @@ FORCEINLINE void GLMContext::FlushDrawStates( uint nStartIndex, uint nEndIndex,
				@@ -478,6 +479,15 @@ FORCEINLINE void GLMContext::FlushDrawStates( uint nStartIndex, uint nEndIndex,
 		}
 	}

+
+	if( m_pBoundPair->m_locAlphaRef )
+	{
+		if( !m_AlphaTestEnable.GetData().enable )
+			gGL->glUniform1f( m_pBoundPair->m_locAlphaRef, 0.0 );
+		else
+			gGL->glUniform1f( m_pBoundPair->m_locAlphaRef, m_AlphaTestFunc.GetData().ref );			
+	}
+
 	Assert( ( m_pDevice->m_streams[0].m_vtxBuffer && ( m_pDevice->m_streams[0].m_vtxBuffer->m_vtxBuffer == m_pDevice->m_vtx_buffers[0] ) ) || ( ( !m_pDevice->m_streams[0].m_vtxBuffer ) && ( m_pDevice->m_vtx_buffers[0] == m_pDevice->m_pDummy_vtx_buffer ) ) );
 	Assert( ( m_pDevice->m_streams[1].m_vtxBuffer && ( m_pDevice->m_streams[1].m_vtxBuffer->m_vtxBuffer == m_pDevice->m_vtx_buffers[1] ) ) || ( ( !m_pDevice->m_streams[1].m_vtxBuffer ) && ( m_pDevice->m_vtx_buffers[1] == m_pDevice->m_pDummy_vtx_buffer ) ) );
 	Assert( ( m_pDevice->m_streams[2].m_vtxBuffer && ( m_pDevice->m_streams[2].m_vtxBuffer->m_vtxBuffer == m_pDevice->m_vtx_buffers[2] ) ) || ( ( !m_pDevice->m_streams[2].m_vtxBuffer ) && ( m_pDevice->m_vtx_buffers[2] == m_pDevice->m_pDummy_vtx_buffer ) ) );
--- a/togles/linuxwin/glmgrbasics.cpp
+++ b/togles/linuxwin/glmgrbasics.cpp
@ -3116,624 +3116,13 @@ void	GLMSetIndent( int indent )
				@@ -3116,624 +3116,13 @@ void	GLMSetIndent( int indent )
 char sg_pPIXName[128];


-#ifndef OSX
-ConVar gl_telemetry_gpu_pipeline_flushing( "gl_telemetry_gpu_pipeline_flushing", "0" );
-
-class CGPUTimestampManager
-{
-	CGPUTimestampManager( const CGPUTimestampManager & );
-	CGPUTimestampManager& operator= ( CGPUTimestampManager & );
-		
-public:
-	CGPUTimestampManager() :
-		m_bInitialized( false ),
-		m_nCurFrame( 0 ),
-		m_flGPUToCPUOffsetInS( 0 ),
-		m_flGPUToS( 0 ),
-		m_flRdtscToS( 0 ),
-		m_flSToRdtsc( 0 ),
-		m_nFreeQueryPoolSize( 0 ),
-		m_nOutstandingQueriesHead( 0 ),
-		m_nOutstandingQueriesTail( 0 ),
-		m_nNumOutstandingQueryZones( 0 ),
-		m_nQueryZoneStackSize( 0 ),
-		m_nNumFinishedZones( 0 ),
-		m_nTotalSpanWorkCount( 0 )
-	{
-		memset( m_FreeQueryPool, 0, sizeof( m_FreeQueryPool ) ) ;
-		memset( m_QueryZoneStack, 0, sizeof( m_QueryZoneStack ) );
-		memset( m_OutstandingQueryZones, 0, sizeof( m_OutstandingQueryZones ) );
-		memset( m_FinishedZones, 0, sizeof( m_FinishedZones ) );
-	}
-
-	~CGPUTimestampManager()
-	{
-		Deinit();
-	}
-
-	inline bool IsInitialized() const { return m_bInitialized; }
-	inline uint GetCurFrame() const { return m_nCurFrame; }
-		
-	void Init()
-	{
-		Deinit();
-
-		memset( m_FreeQueryPool, 0, sizeof( m_FreeQueryPool ) ) ;
-		memset( m_QueryZoneStack, 0, sizeof( m_QueryZoneStack ) );
-		memset( m_OutstandingQueryZones, 0, sizeof( m_OutstandingQueryZones ) );
-		memset( m_FinishedZones, 0, sizeof( m_FinishedZones ) );
-
-		InitRdtsc();
-
-		m_nCurFrame = 0;
-				
-		gGL->glGenQueries( cFreeQueryPoolSize, m_FreeQueryPool );
-		m_nFreeQueryPoolSize = cFreeQueryPoolSize;
-
-		m_nOutstandingQueriesHead = 0;
-		m_nOutstandingQueriesTail = 0;
-		m_nNumOutstandingQueryZones = 0;
-
-		m_nQueryZoneStackSize = 0;
-		m_nNumFinishedZones = 0;
-										
-		m_bInitialized = true;
-		
-		m_nTotalSpanWorkCount = 0;
-		
-		Calibrate();
-	}
-
-	void Calibrate()
-	{
-		if ( !m_bInitialized )
-			return;
-
-		PipelineFlush();
-
-		m_flGPUToS = 1.0 / 1000000000.0;
-
-		//0.99997541250006794; 
-		//0.99997530000006662;
-		// Correction factor to prevent excessive drift, only calibrated on my system, we need a better way of computing/recording this.
-		double flGPURatio = 0.99997425000007034000;
-		
-		const uint NT = 1;
-		for ( uint nTrial = 0; nTrial < NT; nTrial++ )
-		{
-			const uint R = 16;
-			double flClockOffsetsInS[R];
-			for ( uint q = 0; q < R; q++)
-			{
-				uint64 nBestTotalCPUTimestamp = (uint64)-1;
-				uint64 nBestCPUTimestamp = 0;
-				GLuint64 nBestGPUTimestamp = 0;
-						
-				for ( uint i = 0; i < 10; i++)
-				{
-					const uint64 nStartCPUTimestamp = Plat_Rdtsc();
-				
-					gGL->glQueryCounter( m_FreeQueryPool[0], GL_TIMESTAMP);				
-					PipelineFlush();
-								
-					const uint64 nEndCPUTimestamp = Plat_Rdtsc();
-				
-					GLint nAvailable;
-					do 
-					{ 
-						gGL->glGetQueryObjectiv( m_FreeQueryPool[0], GL_QUERY_RESULT_AVAILABLE, &nAvailable ); 
-					} while ( !nAvailable );
-
-					GLuint64 nGPUTimestamp;
-					gGL->glGetQueryObjectui64v( m_FreeQueryPool[0], GL_QUERY_RESULT, &nGPUTimestamp );
-
-					const uint64 nTotalCPUTimestamp = nEndCPUTimestamp - nStartCPUTimestamp;
-					if ( nTotalCPUTimestamp < nBestTotalCPUTimestamp )
-					{
-						nBestTotalCPUTimestamp = nTotalCPUTimestamp;
-						nBestCPUTimestamp = nStartCPUTimestamp;
-						nBestGPUTimestamp = nGPUTimestamp;
-					}
-				}
-
-				double flCPUTimestampTimeInSeconds = nBestCPUTimestamp * m_flRdtscToS;
-				double flGPUTimestampTimeInSeconds = nBestGPUTimestamp * m_flGPUToS * flGPURatio;
-
-				flClockOffsetsInS[q] = flCPUTimestampTimeInSeconds - flGPUTimestampTimeInSeconds;
-
-				ThreadSleep(100);
-
-				DbgPrintf("%f %f %1.20f\n", flCPUTimestampTimeInSeconds, flGPUTimestampTimeInSeconds, flClockOffsetsInS[q] );
-			}
-						
-			m_flGPUToCPUOffsetInS = 0.0f;
-			for ( uint i = 0; i < R; i++ )
-				m_flGPUToCPUOffsetInS += flClockOffsetsInS[i];
-			m_flGPUToCPUOffsetInS /= R;
-
-			if ( NT > 1 )
-			{
-				DbgPrintf("------- Ratio: %2.20f\n", flGPURatio );
-
-				double flDelta = flClockOffsetsInS[0] - flClockOffsetsInS[R - 1];
-
-				DbgPrintf("------- %1.20f\n", flDelta );
-
-#if 1
-				if ( flDelta < 0.0000005f )
-				{
-					flGPURatio += .000000125f;
-				}
-				else if ( flDelta > 0.0000005f )
-				{
-					flGPURatio -= .000000125f;
-				}
-#else				
-				if ( flDelta < 0.0000005f )
-				{
-					flGPURatio += .0000000125f;
-				}
-				else if ( flDelta > 0.0000005f )
-				{
-					flGPURatio -= .0000000125f;
-				}
-#endif
-			}
-		}
-
-		m_flGPUToS *= flGPURatio;
-
-#if 0
-		// dump drift over time to debugger output
-		double flLatency = 0;
-		for ( ; ; )
-		{
-			// test
-			const uint64 nStartCPUTime = Plat_Rdtsc();
-
-			gGL->glQueryCounter( m_FreeQueryPool[0], GL_TIMESTAMP);
-
-			PipelineFlush();
-
-			GLint nAvailable;
-			do 
-			{ 
-				gGL->glGetQueryObjectiv( m_FreeQueryPool[0], GL_QUERY_RESULT_AVAILABLE, &nAvailable ); 
-			} while ( !nAvailable );
-
-			GLuint64 nGPUTime;
-			gGL->glGetQueryObjectui64v( m_FreeQueryPool[0], GL_QUERY_RESULT, &nGPUTime );
-
-			double flStartGPUTime = ( ( nGPUTime * m_flGPUToS ) + m_flGPUToCPUOffsetInS );
-
-			flLatency = flStartGPUTime - nStartCPUTime * m_flRdtscToS;
-			DbgPrintf("%f\n", flLatency );
-		}
-#endif
-	}
-
-	void Deinit()
-	{
-		if ( !m_bInitialized )
-			return;
-
-		if ( m_nFreeQueryPoolSize )
-		{
-			gGL->glDeleteQueries( m_nFreeQueryPoolSize, m_FreeQueryPool );
-		}
-		m_nFreeQueryPoolSize = 0;
-
-		for ( uint i = 0; i < m_nNumOutstandingQueryZones; i++ )
-		{
-			QueryZone_t &query = m_OutstandingQueryZones[ ( m_nOutstandingQueriesHead + i ) % cMaxQueryZones ];
-			if ( query.m_nBeginQuery )
-			{
-				gGL->glDeleteQueries( 1, &query.m_nBeginQuery );
-			}
-			if ( query.m_nEndQuery )
-			{
-				gGL->glDeleteQueries( 1, &query.m_nEndQuery );
-			}
-		}
-		m_nOutstandingQueriesHead = 0;
-		m_nOutstandingQueriesTail = 0;
-		m_nNumOutstandingQueryZones = 0;
-
-		for ( uint i = 0; i < m_nQueryZoneStackSize; i++ )
-		{
-			QueryZone_t &query = m_QueryZoneStack[i];
-			if ( query.m_nBeginQuery )
-			{
-				gGL->glDeleteQueries( 1, &query.m_nBeginQuery );
-			}
-			if ( query.m_nEndQuery )
-			{
-				gGL->glDeleteQueries( 1, &query.m_nEndQuery );
-			}
-		}
-		m_nQueryZoneStackSize = 0;
-						
-		m_flGPUToCPUOffsetInS = 0;
-		m_flGPUToS = 0;
-		m_flRdtscToS = 0;
-		m_flSToRdtsc = 0;
-
-		m_bInitialized = false;
-	}
-
-	// pName is assumed to be a telemetry dynamic string!
-	void BeginZone( const char *pName )
-	{
-		if ( !m_bInitialized ) 
-			return;
-		
-		if ( m_nQueryZoneStackSize >= cMaxQueryZoneStackSize )
-		{
-			Panic( "Increase cMaxQueryZoneStackSize!" );
-		}
-
-		QueryZone_t &zone = m_QueryZoneStack[m_nQueryZoneStackSize];
-		
-		zone.m_pName = pName;
-
-		zone.m_nBeginQuery = AllocQueryHandle();
-		zone.m_nEndQuery = 0;
-		zone.m_nStackLevel = m_nQueryZoneStackSize;
-		
-		zone.m_nTotalGPUWorkCount = g_nTotalDrawsOrClears;
-#if GL_TELEMETRY_GPU_ZONES
-		zone.m_nTotalGPUWorkCount += g_TelemetryGPUStats.GetTotal();
-#endif
-
-		gGL->glQueryCounter( m_QueryZoneStack[m_nQueryZoneStackSize].m_nBeginQuery, GL_TIMESTAMP );
-
-		m_nQueryZoneStackSize++;
-	}
-	
-	void EndZone()
-	{
-		if ( !m_bInitialized ) 
-			return;
-				
-		if ( ( !m_nQueryZoneStackSize ) || ( m_nNumOutstandingQueryZones == cMaxQueryZones ) )
-		{
-			Panic( "Query zone error!" );
-		}
-
-		m_nQueryZoneStackSize--;
-
-		uint nCurGPUWorkCount = g_nTotalDrawsOrClears;
-#if GL_TELEMETRY_GPU_ZONES
-		nCurGPUWorkCount += g_TelemetryGPUStats.GetTotal();
-#endif
-
-		uint nTotalDraws = nCurGPUWorkCount - m_QueryZoneStack[m_nQueryZoneStackSize].m_nTotalGPUWorkCount;
-
-		m_QueryZoneStack[m_nQueryZoneStackSize].m_nEndQuery = AllocQueryHandle();
-		gGL->glQueryCounter( m_QueryZoneStack[m_nQueryZoneStackSize].m_nEndQuery, GL_TIMESTAMP );
-		m_QueryZoneStack[m_nQueryZoneStackSize].m_nTotalGPUWorkCount = nTotalDraws;
-
-		m_OutstandingQueryZones[m_nOutstandingQueriesHead] = m_QueryZoneStack[m_nQueryZoneStackSize];
-		m_nOutstandingQueriesHead = ( m_nOutstandingQueriesHead + 1 ) % cMaxQueryZones;
-		m_nNumOutstandingQueryZones++;
-		
-		COMPILE_TIME_ASSERT( ( int )cMaxQueryZones > ( int )cMaxQueryZoneStackSize );
-		if ( m_nNumOutstandingQueryZones >= ( cMaxQueryZones - cMaxQueryZoneStackSize ) )
-		{
-			tmMessage( TELEMETRY_LEVEL2, TMMF_ICON_NOTE | TMMF_SEVERITY_WARNING, "CGPUTimestampManager::EndZone: Too many outstanding query zones - forcing a pipeline flush! This is probably expensive." );
-
-			FlushOutstandingQueries( true );
-		}
-
-		if ( gl_telemetry_gpu_pipeline_flushing.GetBool() )
-		{
-			PipelineFlush();
-		}
-	}
-	
-	void Tick()
-	{
-		m_nCurFrame++;
-
-		if ( !m_bInitialized ) 
-			return;
-
-		if ( m_nQueryZoneStackSize > 0 )
-		{
-			Panic( "Zone stack is not empty!" );
-		}
-
-		FlushOutstandingQueries( false );
-
-		tmMessage( TELEMETRY_LEVEL2, 0, "Total PIX timespan GPU work count: %u", m_nTotalSpanWorkCount );
-		
-		m_nTotalSpanWorkCount = 0;
-	}
-
-	void FlushOutstandingQueries( bool bForce )
-	{
-		tmZone( TELEMETRY_LEVEL2, 0, "FlushOutstandingQueries: %u", m_nNumOutstandingQueryZones );
-
-		if ( bForce )
-		{
-			PipelineFlush();
-		}
-
-		while ( m_nNumOutstandingQueryZones )
-		{
-			QueryZone_t &zone = m_OutstandingQueryZones[m_nOutstandingQueriesTail];
-
-			GLint nEndAvailable = 0;
-			do 
-			{
-				gGL->glGetQueryObjectiv( zone.m_nEndQuery, GL_QUERY_RESULT_AVAILABLE, &nEndAvailable ); 
-
-			} while ( ( bForce ) && ( nEndAvailable == 0 ) );
-
-			if ( !nEndAvailable )
-			{
-				if ( bForce )
-				{
-					Panic( "Query results not available after a full pipeline flush!" );
-				}
-				break;
-			}
-
-			GLuint64 nBeginGPUTime, nEndGPUTime;
-			gGL->glGetQueryObjectui64v( zone.m_nBeginQuery, GL_QUERY_RESULT, &nBeginGPUTime );
-			gGL->glGetQueryObjectui64v( zone.m_nEndQuery, GL_QUERY_RESULT, &nEndGPUTime );
-
-			ReleaseQueryHandle( zone.m_nBeginQuery ); 
-			zone.m_nBeginQuery = 0;
-
-			ReleaseQueryHandle( zone.m_nEndQuery );
-			zone.m_nEndQuery = 0;
-
-			if ( m_nNumFinishedZones >= cMaxQueryZones )
-			{
-				Panic( "Too many finished zones!" );
-			}
-
-			FinishedQueryZone_t &finishedZone = m_FinishedZones[m_nNumFinishedZones];
-			finishedZone.m_pName = zone.m_pName;
-			finishedZone.m_nBeginGPUTime = nBeginGPUTime;
-			finishedZone.m_nEndGPUTime = nEndGPUTime;
-			finishedZone.m_nStackLevel = zone.m_nStackLevel;
-			finishedZone.m_nTotalGPUWorkCount = zone.m_nTotalGPUWorkCount;
-			m_nNumFinishedZones++;
-
-			if ( !zone.m_nStackLevel )
-			{
-				std::sort( m_FinishedZones, m_FinishedZones + m_nNumFinishedZones );
-				FlushFinishedZones();
-				m_nNumFinishedZones = 0;
-			}
-
-			m_nOutstandingQueriesTail = ( m_nOutstandingQueriesTail + 1 ) % cMaxQueryZones;
-			m_nNumOutstandingQueryZones--;
-		}
-	}
-
-private:
-	bool m_bInitialized;
-	uint m_nCurFrame;
-
-	double m_flGPUToCPUOffsetInS;
-	double m_flGPUToS;
-	double m_flRdtscToS;
-	double m_flSToRdtsc;
-
-	enum { cMaxQueryZones = 4096, cFreeQueryPoolSize = cMaxQueryZones * 2 };
-	GLuint m_FreeQueryPool[cFreeQueryPoolSize ];
-	uint m_nFreeQueryPoolSize;
-
-	GLuint AllocQueryHandle() 
-	{
-		if ( !m_nFreeQueryPoolSize )
-		{
-			Panic( "Out of query handles!");
-		}
-		return m_FreeQueryPool[--m_nFreeQueryPoolSize];
-	}
-
-	void ReleaseQueryHandle( GLuint nHandle )
-	{
-		if ( m_nFreeQueryPoolSize >= cFreeQueryPoolSize )
-		{
-			Panic( "Query handle error!" );
-		}
-		m_FreeQueryPool[m_nFreeQueryPoolSize++] = nHandle;
-	}
-
-	struct QueryZone_t
-	{
-		const char *m_pName;
-		GLuint m_nBeginQuery;
-		GLuint m_nEndQuery;
-		uint m_nStackLevel;
-		uint m_nTotalGPUWorkCount;
-	};
-
-	QueryZone_t m_OutstandingQueryZones[cMaxQueryZones];
-	uint m_nOutstandingQueriesHead; // index of first outstanding query (oldest)
-	uint m_nOutstandingQueriesTail;	// index where next query goes (newest)
-	uint m_nNumOutstandingQueryZones;
-
-	enum { cMaxQueryZoneStackSize = 256 };
-	QueryZone_t m_QueryZoneStack[cMaxQueryZoneStackSize];
-	uint m_nQueryZoneStackSize;
-
-	struct FinishedQueryZone_t
-	{
-		const char *m_pName;
-		GLuint64 m_nBeginGPUTime;
-		GLuint64 m_nEndGPUTime;
-		uint m_nStackLevel;
-		uint m_nTotalGPUWorkCount;
-
-		inline bool operator< ( const FinishedQueryZone_t &rhs ) const 
-		{ 
-			if ( m_nBeginGPUTime == rhs.m_nBeginGPUTime)
-				return m_nStackLevel < rhs.m_nStackLevel;
-
-			return m_nBeginGPUTime < rhs.m_nBeginGPUTime; 
-		}
-	};
-
-	FinishedQueryZone_t m_FinishedZones[cMaxQueryZones];
-	uint m_nNumFinishedZones;
-
-	uint m_nTotalSpanWorkCount;
-			
-	void InitRdtsc()
-	{
-		m_flRdtscToS = 0.0f;
-		m_flSToRdtsc = 0.0f;
-
-		for ( uint i = 0; i < 10; i++ )
-		{
-			uint64 t0 = Plat_Rdtsc();
-			double d0 = Plat_FloatTime();
-
-			ThreadSleep( 250 );
-
-			uint64 t1 = Plat_Rdtsc();
-			double d1 = Plat_FloatTime();
-
-			double flRdtscToS = ( d1 - d0 ) / ( t1 - t0 );
-			double flSToRdtsc = ( t1 - t0 ) / ( d1 - d0 );
-			if ( flSToRdtsc > m_flSToRdtsc )
-			{
-				m_flRdtscToS = flRdtscToS;
-				m_flSToRdtsc = flSToRdtsc;
-			}
-		}
-	}
-
-	void PipelineFlush()
-	{
-#ifdef HAVE_GL_ARB_SYNC
-		GLsync nSyncObj = gGL->glFenceSync( GL_SYNC_GPU_COMMANDS_COMPLETE, 0 );
-		if ( nSyncObj )
-		{
-			gGL->glClientWaitSync( nSyncObj, GL_SYNC_FLUSH_COMMANDS_BIT, 300000000000ULL );
-			gGL->glDeleteSync( nSyncObj );
-		}
-#endif
-	}
-
-	inline void NewTimeSpan( uint64 nStartGPUTime, uint64 nEndGPUTime, const char *pName, uint nTotalDraws )
-	{
-		// apparently we must use level0 for timespans?
-		tmBeginTimeSpanAt( TELEMETRY_LEVEL0, 1, 0, nStartGPUTime, "%s [C:%u]", pName ? pName : "", nTotalDraws );
-		tmEndTimeSpanAt( TELEMETRY_LEVEL0, 1, 0, nEndGPUTime, "%s [C:%u]", pName ? pName : "", nTotalDraws );
-	}
-
-	void FlushFinishedZones()
-	{
-		for ( uint i = 0; i < m_nNumFinishedZones; i++ )
-		{
-			FinishedQueryZone_t	&zone = m_FinishedZones[i];
-			if ( !zone.m_nTotalGPUWorkCount )
-				continue;
-
-			bool bEmit = false;
-			if ( i == ( m_nNumFinishedZones - 1 ) )
-				bEmit = true;
-			else
-			{
-				FinishedQueryZone_t	&nextZone = m_FinishedZones[i + 1];
-				bEmit = zone.m_nEndGPUTime <= nextZone.m_nBeginGPUTime;
-			}
-
-			if ( bEmit )
-			{
-				uint64 nStartGPUTime = ( ( zone.m_nBeginGPUTime * m_flGPUToS ) + m_flGPUToCPUOffsetInS ) * m_flSToRdtsc;
-				uint64 nEndGPUTime = ( ( zone.m_nEndGPUTime * m_flGPUToS ) + m_flGPUToCPUOffsetInS ) * m_flSToRdtsc;
-
-				NewTimeSpan( nStartGPUTime, nEndGPUTime, zone.m_pName, zone.m_nTotalGPUWorkCount );
-
-				m_nTotalSpanWorkCount += zone.m_nTotalGPUWorkCount;
-			}
-		}
-	}
-
-	void Panic( const char *pMsg )
-	{
-		DXABSTRACT_BREAK_ON_ERROR();
-		Error( "%s", pMsg );
-	}
-
-	static void DbgPrintf( const char *pFmt, ... )
-	{
-		va_list	vargs;
-		va_start( vargs, pFmt );
-		char buf[1024];
-		V_vsnprintf( buf, sizeof( buf ), pFmt, vargs );
-
-#ifdef WIN32
-		OutputDebugStringA( buf );
-#else
-		printf( "%s", buf );
-#endif
-
-		va_end( vargs );
-	}
-};
-
-
-static CGPUTimestampManager g_GPUTimestampManager;
-
-void GLMGPUTimestampManagerInit()
-{
-	g_GPUTimestampManager.Init();
-}
-
-void GLMGPUTimestampManagerDeinit()
-{
-	g_GPUTimestampManager.Deinit();
-}
-
 ConVar gl_telemetry_gpu( "gl_telemetry_gpu", "0" );
 static bool g_bPrevTelemetryGPU;

-void GLMGPUTimestampManagerTick()
-{
-	if ( g_bPrevTelemetryGPU != gl_telemetry_gpu.GetBool() )
-	{
-		if ( !gl_telemetry_gpu.GetBool() )
-			g_GPUTimestampManager.Deinit();
-		else
-		{
-#if !PIX_ENABLE || !GL_TELEMETRY_GPU_ZONES
-			ConMsg( "Must define PIX_ENABLE and GL_TELEMETRY_GPU_ZONES to use this feature" );
-#else
-			g_GPUTimestampManager.Init();
-#endif
-		}
-
-		g_bPrevTelemetryGPU = gl_telemetry_gpu.GetBool();
-	}
-
-	g_GPUTimestampManager.Tick();
-}
-
-#endif // !OSX
-
 static uint g_nPIXEventIndex;

 void GLMBeginPIXEvent( const char *str )
 {
-#ifndef OSX
-	char szName[1024];
-	V_snprintf( szName, sizeof( szName ), "[ID:%u FR:%u] %s", g_nPIXEventIndex, g_GPUTimestampManager.GetCurFrame(), str );
-	const char *p = tmDynamicString( TELEMETRY_LEVEL2, szName ); //p can be null if tm is getting shut down
-	tmEnter( TELEMETRY_LEVEL2, TMZF_NONE, "PIX %s", p ? p : ""  );
-
-	g_nPIXEventIndex++;
-			
-	g_GPUTimestampManager.BeginZone( p );
-#endif // !OSX
 	V_strncpy( sg_pPIXName, str, 128 );

 #if defined( OSX ) && defined( CGLPROFILER_ENABLE )
@ -3748,10 +3137,6 @@ void GLMBeginPIXEvent( const char *str )
				@@ -3748,10 +3137,6 @@ void GLMBeginPIXEvent( const char *str )

 void GLMEndPIXEvent( void )
 {
-#ifndef OSX
-	g_GPUTimestampManager.EndZone();
-#endif
-
 #if defined( OSX ) && defined( CGLPROFILER_ENABLE )
 	CGLSetOption( kCGLGOComment, (GLint)sg_pPIXName );
 #endif
--- a/togles/linuxwin/stb_dxt_104.h
+++ b/togles/linuxwin/stb_dxt_104.h
@ -0,0 +1,624 @@
				@@ -0,0 +1,624 @@
+// stb_dxt.h - v1.04 - DXT1/DXT5 compressor - public domain
+// original by fabian "ryg" giesen - ported to C by stb
+// use '#define STB_DXT_IMPLEMENTATION' before including to create the implementation
+//
+// USAGE:
+//   call stb_compress_dxt_block() for every block (you must pad)
+//     source should be a 4x4 block of RGBA data in row-major order;
+//     A is ignored if you specify alpha=0; you can turn on dithering
+//     and "high quality" using mode.
+//
+// version history:
+//   v1.04  - (ryg) default to no rounding bias for lerped colors (as per S3TC/DX10 spec);
+//            single color match fix (allow for inexact color interpolation);
+//            optimal DXT5 index finder; "high quality" mode that runs multiple refinement steps.
+//   v1.03  - (stb) endianness support
+//   v1.02  - (stb) fix alpha encoding bug
+//   v1.01  - (stb) fix bug converting to RGB that messed up quality, thanks ryg & cbloom
+//   v1.00  - (stb) first release
+
+#ifndef STB_INCLUDE_STB_DXT_H
+#define STB_INCLUDE_STB_DXT_H
+
+// compression mode (bitflags)
+#define STB_DXT_NORMAL    0
+#define STB_DXT_DITHER    1   // use dithering. dubious win. never use for normal maps and the like!
+#define STB_DXT_HIGHQUAL  2   // high quality mode, does two refinement steps instead of 1. ~30-40% slower.
+
+void stb_compress_dxt_block(unsigned char *dest, const unsigned char *src, int alpha, int mode);
+#define STB_COMPRESS_DXT_BLOCK
+
+#ifdef STB_DXT_IMPLEMENTATION
+
+// configuration options for DXT encoder. set them in the project/makefile or just define
+// them at the top.
+
+// STB_DXT_USE_ROUNDING_BIAS
+//     use a rounding bias during color interpolation. this is closer to what "ideal"
+//     interpolation would do but doesn't match the S3TC/DX10 spec. old versions (pre-1.03)
+//     implicitly had this turned on. 
+//
+//     in case you're targeting a specific type of hardware (e.g. console programmers):
+//     NVidia and Intel GPUs (as of 2010) as well as DX9 ref use DXT decoders that are closer
+//     to STB_DXT_USE_ROUNDING_BIAS. AMD/ATI, S3 and DX10 ref are closer to rounding with no bias.
+//     you also see "(a*5 + b*3) / 8" on some old GPU designs.
+// #define STB_DXT_USE_ROUNDING_BIAS
+
+#include <stdlib.h>
+#include <math.h>
+#include <string.h> // memset
+
+static unsigned char stb__Expand5[32];
+static unsigned char stb__Expand6[64];
+static unsigned char stb__OMatch5[256][2];
+static unsigned char stb__OMatch6[256][2];
+static unsigned char stb__QuantRBTab[256+16];
+static unsigned char stb__QuantGTab[256+16];
+
+static int stb__Mul8Bit(int a, int b)
+{
+  int t = a*b + 128;
+  return (t + (t >> 8)) >> 8;
+}
+
+static void stb__From16Bit(unsigned char *out, unsigned short v)
+{
+   int rv = (v & 0xf800) >> 11;
+   int gv = (v & 0x07e0) >>  5;
+   int bv = (v & 0x001f) >>  0;
+
+   out[0] = stb__Expand5[rv];
+   out[1] = stb__Expand6[gv];
+   out[2] = stb__Expand5[bv];
+   out[3] = 0;
+}
+
+static unsigned short stb__As16Bit(int r, int g, int b)
+{
+   return (stb__Mul8Bit(r,31) << 11) + (stb__Mul8Bit(g,63) << 5) + stb__Mul8Bit(b,31);
+}
+
+// linear interpolation at 1/3 point between a and b, using desired rounding type
+static int stb__Lerp13(int a, int b)
+{
+#ifdef STB_DXT_USE_ROUNDING_BIAS
+   // with rounding bias
+   return a + stb__Mul8Bit(b-a, 0x55);
+#else
+   // without rounding bias
+   // replace "/ 3" by "* 0xaaab) >> 17" if your compiler sucks or you really need every ounce of speed.
+   return (2*a + b) / 3;
+#endif
+}
+
+// lerp RGB color
+static void stb__Lerp13RGB(unsigned char *out, unsigned char *p1, unsigned char *p2)
+{
+   out[0] = stb__Lerp13(p1[0], p2[0]);
+   out[1] = stb__Lerp13(p1[1], p2[1]);
+   out[2] = stb__Lerp13(p1[2], p2[2]);
+}
+
+/****************************************************************************/
+
+// compute table to reproduce constant colors as accurately as possible
+static void stb__PrepareOptTable(unsigned char *Table,const unsigned char *expand,int size)
+{
+   int i,mn,mx;
+   for (i=0;i<256;i++) {
+      int bestErr = 256;
+      for (mn=0;mn<size;mn++) {
+         for (mx=0;mx<size;mx++) {
+            int mine = expand[mn];
+            int maxe = expand[mx];
+            int err = abs(stb__Lerp13(maxe, mine) - i);
+            
+            // DX10 spec says that interpolation must be within 3% of "correct" result,
+            // add this as error term. (normally we'd expect a random distribution of
+            // +-1.5% error, but nowhere in the spec does it say that the error has to be
+            // unbiased - better safe than sorry).
+            err += abs(maxe - mine) * 3 / 100;
+            
+            if(err < bestErr)
+            { 
+               Table[i*2+0] = mx;
+               Table[i*2+1] = mn;
+               bestErr = err;
+            }
+         }
+      }
+   }
+}
+
+static void stb__EvalColors(unsigned char *color,unsigned short c0,unsigned short c1)
+{
+   stb__From16Bit(color+ 0, c0);
+   stb__From16Bit(color+ 4, c1);
+   stb__Lerp13RGB(color+ 8, color+0, color+4);
+   stb__Lerp13RGB(color+12, color+4, color+0);
+}
+
+// Block dithering function. Simply dithers a block to 565 RGB.
+// (Floyd-Steinberg)
+static void stb__DitherBlock(unsigned char *dest, unsigned char *block)
+{
+  int err[8],*ep1 = err,*ep2 = err+4, *et;
+  int ch,y;
+
+  // process channels seperately
+  for (ch=0; ch<3; ++ch) {
+      unsigned char *bp = block+ch, *dp = dest+ch;
+      unsigned char *quant = (ch == 1) ? stb__QuantGTab+8 : stb__QuantRBTab+8;
+      memset(err, 0, sizeof(err));
+      for(y=0; y<4; ++y) {
+         dp[ 0] = quant[bp[ 0] + ((3*ep2[1] + 5*ep2[0]) >> 4)];
+         ep1[0] = bp[ 0] - dp[ 0];
+         dp[ 4] = quant[bp[ 4] + ((7*ep1[0] + 3*ep2[2] + 5*ep2[1] + ep2[0]) >> 4)];
+         ep1[1] = bp[ 4] - dp[ 4];
+         dp[ 8] = quant[bp[ 8] + ((7*ep1[1] + 3*ep2[3] + 5*ep2[2] + ep2[1]) >> 4)];
+         ep1[2] = bp[ 8] - dp[ 8];
+         dp[12] = quant[bp[12] + ((7*ep1[2] + 5*ep2[3] + ep2[2]) >> 4)];
+         ep1[3] = bp[12] - dp[12];
+         bp += 16;
+         dp += 16;
+         et = ep1, ep1 = ep2, ep2 = et; // swap
+      }
+   }
+}
+
+// The color matching function
+static unsigned int stb__MatchColorsBlock(unsigned char *block, unsigned char *color,int dither)
+{
+   unsigned int mask = 0;
+   int dirr = color[0*4+0] - color[1*4+0];
+   int dirg = color[0*4+1] - color[1*4+1];
+   int dirb = color[0*4+2] - color[1*4+2];
+   int dots[16];
+   int stops[4];
+   int i;
+   int c0Point, halfPoint, c3Point;
+
+   for(i=0;i<16;i++)
+      dots[i] = block[i*4+0]*dirr + block[i*4+1]*dirg + block[i*4+2]*dirb;
+
+   for(i=0;i<4;i++)
+      stops[i] = color[i*4+0]*dirr + color[i*4+1]*dirg + color[i*4+2]*dirb;
+
+   // think of the colors as arranged on a line; project point onto that line, then choose
+   // next color out of available ones. we compute the crossover points for "best color in top
+   // half"/"best in bottom half" and then the same inside that subinterval.
+   //
+   // relying on this 1d approximation isn't always optimal in terms of euclidean distance,
+   // but it's very close and a lot faster.
+   // http://cbloomrants.blogspot.com/2008/12/12-08-08-dxtc-summary.html
+   
+   c0Point   = (stops[1] + stops[3]) >> 1;
+   halfPoint = (stops[3] + stops[2]) >> 1;
+   c3Point   = (stops[2] + stops[0]) >> 1;
+
+   if(!dither) {
+      // the version without dithering is straightforward
+      for (i=15;i>=0;i--) {
+         int dot = dots[i];
+         mask <<= 2;
+
+         if(dot < halfPoint)
+           mask |= (dot < c0Point) ? 1 : 3;
+         else
+           mask |= (dot < c3Point) ? 2 : 0;
+      }
+  } else {
+      // with floyd-steinberg dithering
+      int err[8],*ep1 = err,*ep2 = err+4;
+      int *dp = dots, y;
+
+      c0Point   <<= 4;
+      halfPoint <<= 4;
+      c3Point   <<= 4;
+      for(i=0;i<8;i++)
+         err[i] = 0;
+
+      for(y=0;y<4;y++)
+      {
+         int dot,lmask,step;
+
+         dot = (dp[0] << 4) + (3*ep2[1] + 5*ep2[0]);
+         if(dot < halfPoint)
+           step = (dot < c0Point) ? 1 : 3;
+         else
+           step = (dot < c3Point) ? 2 : 0;
+         ep1[0] = dp[0] - stops[step];
+         lmask = step;
+
+         dot = (dp[1] << 4) + (7*ep1[0] + 3*ep2[2] + 5*ep2[1] + ep2[0]);
+         if(dot < halfPoint)
+           step = (dot < c0Point) ? 1 : 3;
+         else
+           step = (dot < c3Point) ? 2 : 0;
+         ep1[1] = dp[1] - stops[step];
+         lmask |= step<<2;
+
+         dot = (dp[2] << 4) + (7*ep1[1] + 3*ep2[3] + 5*ep2[2] + ep2[1]);
+         if(dot < halfPoint)
+           step = (dot < c0Point) ? 1 : 3;
+         else
+           step = (dot < c3Point) ? 2 : 0;
+         ep1[2] = dp[2] - stops[step];
+         lmask |= step<<4;
+
+         dot = (dp[3] << 4) + (7*ep1[2] + 5*ep2[3] + ep2[2]);
+         if(dot < halfPoint)
+           step = (dot < c0Point) ? 1 : 3;
+         else
+           step = (dot < c3Point) ? 2 : 0;
+         ep1[3] = dp[3] - stops[step];
+         lmask |= step<<6;
+
+         dp += 4;
+         mask |= lmask << (y*8);
+         { int *et = ep1; ep1 = ep2; ep2 = et; } // swap
+      }
+   }
+
+   return mask;
+}
+
+// The color optimization function. (Clever code, part 1)
+static void stb__OptimizeColorsBlock(unsigned char *block, unsigned short *pmax16, unsigned short *pmin16)
+{
+  int mind = 0x7fffffff,maxd = -0x7fffffff;
+  unsigned char *minp, *maxp;
+  double magn;
+  int v_r,v_g,v_b;
+  static const int nIterPower = 4;
+  float covf[6],vfr,vfg,vfb;
+
+  // determine color distribution
+  int cov[6];
+  int mu[3],min[3],max[3];
+  int ch,i,iter;
+
+  for(ch=0;ch<3;ch++)
+  {
+    const unsigned char *bp = ((const unsigned char *) block) + ch;
+    int muv,minv,maxv;
+
+    muv = minv = maxv = bp[0];
+    for(i=4;i<64;i+=4)
+    {
+      muv += bp[i];
+      if (bp[i] < minv) minv = bp[i];
+      else if (bp[i] > maxv) maxv = bp[i];
+    }
+
+    mu[ch] = (muv + 8) >> 4;
+    min[ch] = minv;
+    max[ch] = maxv;
+  }
+
+  // determine covariance matrix
+  for (i=0;i<6;i++)
+     cov[i] = 0;
+
+  for (i=0;i<16;i++)
+  {
+    int r = block[i*4+0] - mu[0];
+    int g = block[i*4+1] - mu[1];
+    int b = block[i*4+2] - mu[2];
+
+    cov[0] += r*r;
+    cov[1] += r*g;
+    cov[2] += r*b;
+    cov[3] += g*g;
+    cov[4] += g*b;
+    cov[5] += b*b;
+  }
+
+  // convert covariance matrix to float, find principal axis via power iter
+  for(i=0;i<6;i++)
+    covf[i] = cov[i] / 255.0f;
+
+  vfr = (float) (max[0] - min[0]);
+  vfg = (float) (max[1] - min[1]);
+  vfb = (float) (max[2] - min[2]);
+
+  for(iter=0;iter<nIterPower;iter++)
+  {
+    float r = vfr*covf[0] + vfg*covf[1] + vfb*covf[2];
+    float g = vfr*covf[1] + vfg*covf[3] + vfb*covf[4];
+    float b = vfr*covf[2] + vfg*covf[4] + vfb*covf[5];
+
+    vfr = r;
+    vfg = g;
+    vfb = b;
+  }
+
+  magn = fabs(vfr);
+  if (fabs(vfg) > magn) magn = fabs(vfg);
+  if (fabs(vfb) > magn) magn = fabs(vfb);
+
+   if(magn < 4.0f) { // too small, default to luminance
+      v_r = 299; // JPEG YCbCr luma coefs, scaled by 1000.
+      v_g = 587;
+      v_b = 114;
+   } else {
+      magn = 512.0 / magn;
+      v_r = (int) (vfr * magn);
+      v_g = (int) (vfg * magn);
+      v_b = (int) (vfb * magn);
+   }
+
+   // Pick colors at extreme points
+   for(i=0;i<16;i++)
+   {
+      int dot = block[i*4+0]*v_r + block[i*4+1]*v_g + block[i*4+2]*v_b;
+
+      if (dot < mind) {
+         mind = dot;
+         minp = block+i*4;
+      }
+
+      if (dot > maxd) {
+         maxd = dot;
+         maxp = block+i*4;
+      }
+   }
+
+   *pmax16 = stb__As16Bit(maxp[0],maxp[1],maxp[2]);
+   *pmin16 = stb__As16Bit(minp[0],minp[1],minp[2]);
+}
+
+static int stb__sclamp(float y, int p0, int p1)
+{
+   int x = (int) y;
+   if (x < p0) return p0;
+   if (x > p1) return p1;
+   return x;
+}
+
+// The refinement function. (Clever code, part 2)
+// Tries to optimize colors to suit block contents better.
+// (By solving a least squares system via normal equations+Cramer's rule)
+static int stb__RefineBlock(unsigned char *block, unsigned short *pmax16, unsigned short *pmin16, unsigned int mask)
+{
+   static const int w1Tab[4] = { 3,0,2,1 };
+   static const int prods[4] = { 0x090000,0x000900,0x040102,0x010402 };
+   // ^some magic to save a lot of multiplies in the accumulating loop...
+   // (precomputed products of weights for least squares system, accumulated inside one 32-bit register)
+
+   float frb,fg;
+   unsigned short oldMin, oldMax, min16, max16;
+   int i, akku = 0, xx,xy,yy;
+   int At1_r,At1_g,At1_b;
+   int At2_r,At2_g,At2_b;
+   unsigned int cm = mask;
+
+   oldMin = *pmin16;
+   oldMax = *pmax16;
+
+   if((mask ^ (mask<<2)) < 4) // all pixels have the same index?
+   {
+      // yes, linear system would be singular; solve using optimal
+      // single-color match on average color
+      int r = 8, g = 8, b = 8;
+      for (i=0;i<16;++i) {
+         r += block[i*4+0];
+         g += block[i*4+1];
+         b += block[i*4+2];
+      }
+
+      r >>= 4; g >>= 4; b >>= 4;
+
+      max16 = (stb__OMatch5[r][0]<<11) | (stb__OMatch6[g][0]<<5) | stb__OMatch5[b][0];
+      min16 = (stb__OMatch5[r][1]<<11) | (stb__OMatch6[g][1]<<5) | stb__OMatch5[b][1];
+   } else {
+      At1_r = At1_g = At1_b = 0;
+      At2_r = At2_g = At2_b = 0;
+      for (i=0;i<16;++i,cm>>=2) {
+         int step = cm&3;
+         int w1 = w1Tab[step];
+         int r = block[i*4+0];
+         int g = block[i*4+1];
+         int b = block[i*4+2];
+
+         akku    += prods[step];
+         At1_r   += w1*r;
+         At1_g   += w1*g;
+         At1_b   += w1*b;
+         At2_r   += r;
+         At2_g   += g;
+         At2_b   += b;
+      }
+
+      At2_r = 3*At2_r - At1_r;
+      At2_g = 3*At2_g - At1_g;
+      At2_b = 3*At2_b - At1_b;
+
+      // extract solutions and decide solvability
+      xx = akku >> 16;
+      yy = (akku >> 8) & 0xff;
+      xy = (akku >> 0) & 0xff;
+
+      frb = 3.0f * 31.0f / 255.0f / (xx*yy - xy*xy);
+      fg = frb * 63.0f / 31.0f;
+
+      // solve.
+      max16 =   stb__sclamp((At1_r*yy - At2_r*xy)*frb+0.5f,0,31) << 11;
+      max16 |=  stb__sclamp((At1_g*yy - At2_g*xy)*fg +0.5f,0,63) << 5;
+      max16 |=  stb__sclamp((At1_b*yy - At2_b*xy)*frb+0.5f,0,31) << 0;
+
+      min16 =   stb__sclamp((At2_r*xx - At1_r*xy)*frb+0.5f,0,31) << 11;
+      min16 |=  stb__sclamp((At2_g*xx - At1_g*xy)*fg +0.5f,0,63) << 5;
+      min16 |=  stb__sclamp((At2_b*xx - At1_b*xy)*frb+0.5f,0,31) << 0;
+   }
+
+   *pmin16 = min16;
+   *pmax16 = max16;
+   return oldMin != min16 || oldMax != max16;
+}
+
+// Color block compression
+static void stb__CompressColorBlock(unsigned char *dest, unsigned char *block, int mode)
+{
+   unsigned int mask;
+   int i;
+   int dither;
+   int refinecount;
+   unsigned short max16, min16;
+   unsigned char dblock[16*4],color[4*4];
+   
+   dither = mode & STB_DXT_DITHER;
+   refinecount = (mode & STB_DXT_HIGHQUAL) ? 2 : 1;
+
+   // check if block is constant
+   for (i=1;i<16;i++)
+      if (((unsigned int *) block)[i] != ((unsigned int *) block)[0])
+         break;
+
+   if(i == 16) { // constant color
+      int r = block[0], g = block[1], b = block[2];
+      mask  = 0xaaaaaaaa;
+      max16 = (stb__OMatch5[r][0]<<11) | (stb__OMatch6[g][0]<<5) | stb__OMatch5[b][0];
+      min16 = (stb__OMatch5[r][1]<<11) | (stb__OMatch6[g][1]<<5) | stb__OMatch5[b][1];
+   } else {
+      // first step: compute dithered version for PCA if desired
+      if(dither)
+         stb__DitherBlock(dblock,block);
+
+      // second step: pca+map along principal axis
+      stb__OptimizeColorsBlock(dither ? dblock : block,&max16,&min16);
+      if (max16 != min16) {
+         stb__EvalColors(color,max16,min16);
+         mask = stb__MatchColorsBlock(block,color,dither);
+      } else
+         mask = 0;
+
+      // third step: refine (multiple times if requested)
+      for (i=0;i<refinecount;i++) {
+         unsigned int lastmask = mask;
+         
+         if (stb__RefineBlock(dither ? dblock : block,&max16,&min16,mask)) {
+            if (max16 != min16) {
+               stb__EvalColors(color,max16,min16);
+               mask = stb__MatchColorsBlock(block,color,dither);
+            } else {
+               mask = 0;
+               break;
+            }
+         }
+         
+         if(mask == lastmask)
+            break;
+      }
+  }
+
+  // write the color block
+  if(max16 < min16)
+  {
+     unsigned short t = min16;
+     min16 = max16;
+     max16 = t;
+     mask ^= 0x55555555;
+  }
+
+  dest[0] = (unsigned char) (max16);
+  dest[1] = (unsigned char) (max16 >> 8);
+  dest[2] = (unsigned char) (min16);
+  dest[3] = (unsigned char) (min16 >> 8);
+  dest[4] = (unsigned char) (mask);
+  dest[5] = (unsigned char) (mask >> 8);
+  dest[6] = (unsigned char) (mask >> 16);
+  dest[7] = (unsigned char) (mask >> 24);
+}
+
+// Alpha block compression (this is easy for a change)
+static void stb__CompressAlphaBlock(unsigned char *dest,unsigned char *src,int mode)
+{
+   int i,dist,bias,dist4,dist2,bits,mask;
+
+   // find min/max color
+   int mn,mx;
+   mn = mx = src[3];
+
+   for (i=1;i<16;i++)
+   {
+      if (src[i*4+3] < mn) mn = src[i*4+3];
+      else if (src[i*4+3] > mx) mx = src[i*4+3];
+   }
+
+   // encode them
+   ((unsigned char *)dest)[0] = mx;
+   ((unsigned char *)dest)[1] = mn;
+   dest += 2;
+
+   // determine bias and emit color indices
+   // given the choice of mx/mn, these indices are optimal:
+   // http://fgiesen.wordpress.com/2009/12/15/dxt5-alpha-block-index-determination/
+   dist = mx-mn;
+   dist4 = dist*4;
+   dist2 = dist*2;
+   bias = (dist < 8) ? (dist - 1) : (dist/2 + 2);
+   bias -= mn * 7;
+   bits = 0,mask=0;
+   
+   for (i=0;i<16;i++) {
+      int a = src[i*4+3]*7 + bias;
+      int ind,t;
+
+      // select index. this is a "linear scale" lerp factor between 0 (val=min) and 7 (val=max).
+      t = (a >= dist4) ? -1 : 0; ind =  t & 4; a -= dist4 & t;
+      t = (a >= dist2) ? -1 : 0; ind += t & 2; a -= dist2 & t;
+      ind += (a >= dist);
+      
+      // turn linear scale into DXT index (0/1 are extremal pts)
+      ind = -ind & 7;
+      ind ^= (2 > ind);
+
+      // write index
+      mask |= ind << bits;
+      if((bits += 3) >= 8) {
+         *dest++ = mask;
+         mask >>= 8;
+         bits -= 8;
+      }
+   }
+}
+
+static void stb__InitDXT()
+{
+   int i;
+   for(i=0;i<32;i++)
+      stb__Expand5[i] = (i<<3)|(i>>2);
+
+   for(i=0;i<64;i++)
+      stb__Expand6[i] = (i<<2)|(i>>4);
+
+   for(i=0;i<256+16;i++)
+   {
+      int v = i-8 < 0 ? 0 : i-8 > 255 ? 255 : i-8;
+      stb__QuantRBTab[i] = stb__Expand5[stb__Mul8Bit(v,31)];
+      stb__QuantGTab[i] = stb__Expand6[stb__Mul8Bit(v,63)];
+   }
+
+   stb__PrepareOptTable(&stb__OMatch5[0][0],stb__Expand5,32);
+   stb__PrepareOptTable(&stb__OMatch6[0][0],stb__Expand6,64);
+}
+
+void stb_compress_dxt_block(unsigned char *dest, const unsigned char *src, int alpha, int mode)
+{
+   static int init=1;
+   if (init) {
+      stb__InitDXT();
+      init=0;
+   }
+
+   if (alpha) {
+      stb__CompressAlphaBlock(dest,(unsigned char*) src,mode);
+      dest += 8;
+   }
+
+   stb__CompressColorBlock(dest,(unsigned char*) src,mode);
+}
+#endif // STB_DXT_IMPLEMENTATION
+
+#endif // STB_INCLUDE_STB_DXT_H