From c7308906d1accfd217193cbc2de16e4416a7f2d1 Mon Sep 17 00:00:00 2001
From: hymei <hymei@tencent.com>
Date: Wed, 23 Feb 2022 20:00:04 +0800
Subject: [PATCH] arm64 detect marcos

---
 materialsystem/cmatlightmaps.h   |  2 +-
 mathlib/sse.cpp                  | 16 ++++++++------
 public/mathlib/mathlib.h         |  8 ++++---
 public/mathlib/ssemath.h         |  2 +-
 public/mathlib/vector4d.h        |  6 ++---
 public/steam/steamtypes.h        |  4 ++--
 public/tier0/wchartypes.h        |  2 +-
 tier0/cpu.cpp                    |  2 +-
 tier0/cpu_posix.cpp              | 12 +++++++++-
 tier1/processor_detect_linux.cpp | 38 ++++++++++++++++++++++++++------
 tier1/reliabletimer.cpp          |  2 +-
 11 files changed, 66 insertions(+), 28 deletions(-)

diff --git a/materialsystem/cmatlightmaps.h b/materialsystem/cmatlightmaps.h
index 1103fd5c..0a6ee02a 100644
--- a/materialsystem/cmatlightmaps.h
+++ b/materialsystem/cmatlightmaps.h
@@ -26,7 +26,7 @@ class CMaterialDict;
 class IMaterial;
 class IMaterialInternal;
 class FloatBitMap_t;
-typedef int ShaderAPITextureHandle_t;
+typedef intp ShaderAPITextureHandle_t;
 struct MaterialSystem_SortInfo_t;
 typedef unsigned short MaterialHandle_t;
 
diff --git a/mathlib/sse.cpp b/mathlib/sse.cpp
index 83dda7d9..86377a6f 100644
--- a/mathlib/sse.cpp
+++ b/mathlib/sse.cpp
@@ -11,7 +11,7 @@
 #include "tier0/dbg.h"
 #include "mathlib/mathlib.h"
 #include "mathlib/vector.h"
-#ifdef __arm__
+#if defined(__arm__) || defined(__arm64__)
 #include "sse2neon.h"
 #endif
 
@@ -180,7 +180,7 @@ float _SSE_RSqrtFast(float x)
 	Assert( s_bMathlibInitialized );
 
 	float rroot;
-#ifdef __arm__
+#if defined(__arm__) || defined(__arm64__)
         rroot = _SSE_RSqrtAccurate(x);
 #elif _WIN32
 	_asm
@@ -217,7 +217,7 @@ float FASTCALL _SSE_VectorNormalize (Vector& vec)
 	// be much of a performance win, considering you will very likely miss 3 branch predicts in a row.
 	if ( v[0] || v[1] || v[2] )
 	{
-#ifdef __arm__
+#if defined(__arm__) || defined(__arm64__)
 		float rsqrt = _SSE_RSqrtAccurate( v[0] * v[0] + v[1] * v[1] + v[2] * v[2] );
 		r[0] = v[0] * rsqrt;
 		r[1] = v[1] * rsqrt;
@@ -296,7 +296,7 @@ void FASTCALL _SSE_VectorNormalizeFast (Vector& vec)
 float _SSE_InvRSquared(const float* v)
 {
 	float	inv_r2 = 1.f;
-#ifdef __arm__
+#if defined(__arm__) || defined(__arm64__)
 	return _SSE_RSqrtAccurate( FLT_EPSILON + v[0] * v[0] + v[1] * v[1] + v[2] * v[2] );
 #elif _WIN32
 	_asm { // Intel SSE only routine
@@ -391,8 +391,10 @@ typedef __m64 v2si;   // vector of 2 int (mmx)
 
 void _SSE_SinCos(float x, float* s, float* c)
 {
-#ifdef __arm__
-#if defined( POSIX )
+#if defined(__arm__) || defined(__arm64__)
+#if defined( OSX )
+    __sincosf(x, s, c);
+#elif defined( POSIX )
         sincosf(x, s, c);
 #else
 	*s = sin( x );
@@ -605,7 +607,7 @@ void _SSE_SinCos(float x, float* s, float* c)
 
 float _SSE_cos( float x )
 {
-#ifdef __arm__
+#if defined(__arm__) || defined(__arm64__)
 	return cos(x);
 #elif _WIN32
 	float temp;
diff --git a/public/mathlib/mathlib.h b/public/mathlib/mathlib.h
index fe103e5e..4a765fcd 100644
--- a/public/mathlib/mathlib.h
+++ b/public/mathlib/mathlib.h
@@ -457,6 +457,8 @@ void inline SinCos( float radians, float *sine, float *cosine )
 #elif defined( PLATFORM_WINDOWS_PC64 )
 	*sine = sin( radians );
 	*cosine = cos( radians );
+#elif defined( OSX )
+    __sincosf(radians, sine, cosine);
 #elif defined( POSIX )
 	sincosf(radians, sine, cosine);
 #endif
@@ -1213,7 +1215,7 @@ FORCEINLINE int RoundFloatToInt(float f)
 	};
 	flResult = __fctiw( f );
 	return pResult[1];
-#elif defined (__arm__)
+#elif defined (__arm__) ||  defined (__arm64__)
         return (int)(f + 0.5f);
 #else
 #error Unknown architecture
@@ -1245,7 +1247,7 @@ FORCEINLINE unsigned long RoundFloatToUnsignedLong(float f)
 	Assert( pIntResult[1] >= 0 );
 	return pResult[1];
 #else  // !X360
-#ifdef __arm__
+#if defined(__arm__) || defined(__arm64__)
         return (unsigned long)(f + 0.5f);
 #elif defined( PLATFORM_WINDOWS_PC64 )
 	uint nRet = ( uint ) f;
@@ -2168,7 +2170,7 @@ inline bool CloseEnough( const Vector &a, const Vector &b, float epsilon = EQUAL
 // Fast compare
 // maxUlps is the maximum error in terms of Units in the Last Place. This 
 // specifies how big an error we are willing to accept in terms of the value
-// of the least significant digit of the floating point number’s 
+// of the least significant digit of the floating point numberďż˝s 
 // representation. maxUlps can also be interpreted in terms of how many 
 // representable floats we are willing to accept between A and B. 
 // This function will allow maxUlps-1 floats between A and B.
diff --git a/public/mathlib/ssemath.h b/public/mathlib/ssemath.h
index 6a73b3f6..4580a4bd 100644
--- a/public/mathlib/ssemath.h
+++ b/public/mathlib/ssemath.h
@@ -8,7 +8,7 @@
 
 #if defined( _X360 )
 #include <xboxmath.h>
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__arm64__)
 #include "sse2neon.h"
 #else
 #include <xmmintrin.h>
diff --git a/public/mathlib/vector4d.h b/public/mathlib/vector4d.h
index 89fcce01..72c63129 100644
--- a/public/mathlib/vector4d.h
+++ b/public/mathlib/vector4d.h
@@ -654,10 +654,10 @@ inline void Vector4DWeightMAD( vec_t w, Vector4DAligned const& vInA, Vector4DAli
 	vOutB.z += vInB.z * w;
 	vOutB.w += vInB.w * w;
 #else
-    __vector4 temp;
+	__vector4 temp;
 
-    temp = __lvlx( &w, 0 );
-    temp = __vspltw( temp, 0 );
+	temp = __lvlx( &w, 0 );
+	temp = __vspltw( temp, 0 );
 
 	vOutA.AsM128() = __vmaddfp( vInA.AsM128(), temp, vOutA.AsM128() );
 	vOutB.AsM128() = __vmaddfp( vInB.AsM128(), temp, vOutB.AsM128() );
diff --git a/public/steam/steamtypes.h b/public/steam/steamtypes.h
index 22ce3e61..f229f238 100644
--- a/public/steam/steamtypes.h
+++ b/public/steam/steamtypes.h
@@ -1,4 +1,4 @@
-//========= Copyright © 1996-2008, Valve LLC, All rights reserved. ============
+//========= Copyright ďż˝ 1996-2008, Valve LLC, All rights reserved. ============
 //
 // Purpose:
 //
@@ -24,7 +24,7 @@ typedef unsigned char uint8;
 	#define POSIX 1
 #endif
 
-#if defined(__x86_64__) || defined(_WIN64)
+#if defined(__x86_64__) || defined(_WIN64) || defined(__arm64__)
 #define X64BITS
 #endif
 
diff --git a/public/tier0/wchartypes.h b/public/tier0/wchartypes.h
index 814470fd..8d8838d8 100644
--- a/public/tier0/wchartypes.h
+++ b/public/tier0/wchartypes.h
@@ -20,7 +20,7 @@
 // Temporarily turn off Valve defines
 #include "tier0/valve_off.h"
 
-#if !defined(_WCHAR_T_DEFINED) && !defined(GNUC)
+#if !defined(_WCHAR_T_DEFINED)  && !defined( __WCHAR_TYPE__ ) && !defined(GNUC)
 typedef unsigned short wchar_t;
 #define _WCHAR_T_DEFINED
 #endif
diff --git a/tier0/cpu.cpp b/tier0/cpu.cpp
index 90ca43ac..a8a0814c 100644
--- a/tier0/cpu.cpp
+++ b/tier0/cpu.cpp
@@ -22,7 +22,7 @@ const tchar* GetProcessorVendorId();
 
 static bool cpuid(unsigned long function, unsigned long& out_eax, unsigned long& out_ebx, unsigned long& out_ecx, unsigned long& out_edx)
 {
-#if defined (__arm__) || defined( _X360 )
+#if defined (__arm__) || defined (__arm64__) || defined( _X360 )
 	return false;
 #elif defined(GNUC)
 	asm("mov %%ebx, %%esi\n\t"
diff --git a/tier0/cpu_posix.cpp b/tier0/cpu_posix.cpp
index c0deed41..094bc435 100644
--- a/tier0/cpu_posix.cpp
+++ b/tier0/cpu_posix.cpp
@@ -99,6 +99,15 @@ uint64 GetCPUFreqFromPROC()
 
 uint64 CalculateCPUFreq()
 {
+#ifdef __APPLE__
+    uint64 freq_hz = 0;
+    size_t freq_size = sizeof(freq_hz);
+    int retval = sysctlbyname("hw.cpufrequency_max", &freq_hz, &freq_size, NULL, 0);
+    // MoeMod : TODO dont know how to get freq on Apple Silicon
+    if(!freq_hz)
+        freq_hz = 3200000;
+    return freq_hz;
+#else
 	// Try to open cpuinfo_max_freq. If the kernel was built with cpu scaling support disabled, this will fail.
 	FILE *fp = fopen( "/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq", "r" );
 	if ( fp )
@@ -118,8 +127,9 @@ uint64 CalculateCPUFreq()
 			return retVal * 1000;
 		}
 	}
+#endif
 
-#ifndef __arm__
+#if !defined(__arm__) && !defined(__arm64__)
 	// Compute the period. Loop until we get 3 consecutive periods that
 	// are the same to within a small error. The error is chosen
 	// to be +/- 0.02% on a P-200.
diff --git a/tier1/processor_detect_linux.cpp b/tier1/processor_detect_linux.cpp
index 9e2490bd..8887926e 100644
--- a/tier1/processor_detect_linux.cpp
+++ b/tier1/processor_detect_linux.cpp
@@ -6,24 +6,48 @@
 // $NoKeywords: $
 //=============================================================================//
 
+#include "platform.h"
+
 #if defined __SANITIZE_ADDRESS__
 bool CheckMMXTechnology(void) { return false; }
 bool CheckSSETechnology(void) { return false; }
 bool CheckSSE2Technology(void) { return false; }
 bool Check3DNowTechnology(void) { return false; }
-#elif defined (__arm__)
+#elif defined (__arm__) || defined (__arm64__)
 bool CheckMMXTechnology(void) { return false; }
 bool CheckSSETechnology(void) { return false; }
 bool CheckSSE2Technology(void) { return false; }
 bool Check3DNowTechnology(void) { return false; }
 #else
 
-#define cpuid(in,a,b,c,d)												\
-	asm("pushl %%ebx\n\t" "cpuid\n\t" "movl %%ebx,%%esi\n\t" "pop %%ebx": "=a" (a), "=S" (b), "=c" (c), "=d" (d) : "a" (in));
+static void cpuid(uint32 function, uint32& out_eax, uint32& out_ebx, uint32& out_ecx, uint32& out_edx)
+{
+#if defined(PLATFORM_64BITS)
+        asm("mov %%rbx, %%rsi\n\t"
+                "cpuid\n\t"
+                "xchg %%rsi, %%rbx"
+                : "=a" (out_eax),
+                  "=S" (out_ebx),
+                  "=c" (out_ecx),
+                  "=d" (out_edx)
+                : "a" (function) 
+        );
+#else
+        asm("mov %%ebx, %%esi\n\t"
+                "cpuid\n\t"
+                "xchg %%esi, %%ebx"
+                : "=a" (out_eax),
+                  "=S" (out_ebx),
+                  "=c" (out_ecx),
+                  "=d" (out_edx)
+                : "a" (function) 
+        );
+#endif
+}
 
 bool CheckMMXTechnology(void)
 {
-    unsigned long eax,ebx,edx,unused;
+    uint32 eax,ebx,edx,unused;
     cpuid(1,eax,ebx,unused,edx);
 
     return edx & 0x800000;
@@ -31,7 +55,7 @@ bool CheckMMXTechnology(void)
 
 bool CheckSSETechnology(void)
 {
-    unsigned long eax,ebx,edx,unused;
+    uint32 eax,ebx,edx,unused;
     cpuid(1,eax,ebx,unused,edx);
 
     return edx & 0x2000000L;
@@ -39,7 +63,7 @@ bool CheckSSETechnology(void)
 
 bool CheckSSE2Technology(void)
 {
-    unsigned long eax,ebx,edx,unused;
+    uint32 eax,ebx,edx,unused;
     cpuid(1,eax,ebx,unused,edx);
 
     return edx & 0x04000000;
@@ -47,7 +71,7 @@ bool CheckSSE2Technology(void)
 
 bool Check3DNowTechnology(void)
 {
-    unsigned long eax, unused;
+    uint32 eax, unused;
     cpuid(0x80000000,eax,unused,unused,unused);
 
     if ( eax > 0x80000000L )
diff --git a/tier1/reliabletimer.cpp b/tier1/reliabletimer.cpp
index ab46596f..73556e90 100644
--- a/tier1/reliabletimer.cpp
+++ b/tier1/reliabletimer.cpp
@@ -87,7 +87,7 @@ int64 CReliableTimer::GetPerformanceCountNow()
 	uint64 ulNow;
 	SYS_TIMEBASE_GET( ulNow );
 	return ulNow;
-#elif defined( __arm__ ) && defined (POSIX)
+#elif (defined( __arm__ ) || defined( __arm64__ )) && defined (POSIX)
 	struct timespec ts;
 	clock_gettime(CLOCK_REALTIME, &ts);
 	return ts.tv_sec * 1000000000ULL + ts.tv_nsec;