From c7308906d1accfd217193cbc2de16e4416a7f2d1 Mon Sep 17 00:00:00 2001 From: hymei Date: Wed, 23 Feb 2022 20:00:04 +0800 Subject: [PATCH] arm64 detect marcos --- materialsystem/cmatlightmaps.h | 2 +- mathlib/sse.cpp | 16 ++++++++------ public/mathlib/mathlib.h | 8 ++++--- public/mathlib/ssemath.h | 2 +- public/mathlib/vector4d.h | 6 ++--- public/steam/steamtypes.h | 4 ++-- public/tier0/wchartypes.h | 2 +- tier0/cpu.cpp | 2 +- tier0/cpu_posix.cpp | 12 +++++++++- tier1/processor_detect_linux.cpp | 38 ++++++++++++++++++++++++++------ tier1/reliabletimer.cpp | 2 +- 11 files changed, 66 insertions(+), 28 deletions(-) diff --git a/materialsystem/cmatlightmaps.h b/materialsystem/cmatlightmaps.h index 1103fd5c..0a6ee02a 100644 --- a/materialsystem/cmatlightmaps.h +++ b/materialsystem/cmatlightmaps.h @@ -26,7 +26,7 @@ class CMaterialDict; class IMaterial; class IMaterialInternal; class FloatBitMap_t; -typedef int ShaderAPITextureHandle_t; +typedef intp ShaderAPITextureHandle_t; struct MaterialSystem_SortInfo_t; typedef unsigned short MaterialHandle_t; diff --git a/mathlib/sse.cpp b/mathlib/sse.cpp index 83dda7d9..86377a6f 100644 --- a/mathlib/sse.cpp +++ b/mathlib/sse.cpp @@ -11,7 +11,7 @@ #include "tier0/dbg.h" #include "mathlib/mathlib.h" #include "mathlib/vector.h" -#ifdef __arm__ +#if defined(__arm__) || defined(__arm64__) #include "sse2neon.h" #endif @@ -180,7 +180,7 @@ float _SSE_RSqrtFast(float x) Assert( s_bMathlibInitialized ); float rroot; -#ifdef __arm__ +#if defined(__arm__) || defined(__arm64__) rroot = _SSE_RSqrtAccurate(x); #elif _WIN32 _asm @@ -217,7 +217,7 @@ float FASTCALL _SSE_VectorNormalize (Vector& vec) // be much of a performance win, considering you will very likely miss 3 branch predicts in a row. if ( v[0] || v[1] || v[2] ) { -#ifdef __arm__ +#if defined(__arm__) || defined(__arm64__) float rsqrt = _SSE_RSqrtAccurate( v[0] * v[0] + v[1] * v[1] + v[2] * v[2] ); r[0] = v[0] * rsqrt; r[1] = v[1] * rsqrt; @@ -296,7 +296,7 @@ void FASTCALL _SSE_VectorNormalizeFast (Vector& vec) float _SSE_InvRSquared(const float* v) { float inv_r2 = 1.f; -#ifdef __arm__ +#if defined(__arm__) || defined(__arm64__) return _SSE_RSqrtAccurate( FLT_EPSILON + v[0] * v[0] + v[1] * v[1] + v[2] * v[2] ); #elif _WIN32 _asm { // Intel SSE only routine @@ -391,8 +391,10 @@ typedef __m64 v2si; // vector of 2 int (mmx) void _SSE_SinCos(float x, float* s, float* c) { -#ifdef __arm__ -#if defined( POSIX ) +#if defined(__arm__) || defined(__arm64__) +#if defined( OSX ) + __sincosf(x, s, c); +#elif defined( POSIX ) sincosf(x, s, c); #else *s = sin( x ); @@ -605,7 +607,7 @@ void _SSE_SinCos(float x, float* s, float* c) float _SSE_cos( float x ) { -#ifdef __arm__ +#if defined(__arm__) || defined(__arm64__) return cos(x); #elif _WIN32 float temp; diff --git a/public/mathlib/mathlib.h b/public/mathlib/mathlib.h index fe103e5e..4a765fcd 100644 --- a/public/mathlib/mathlib.h +++ b/public/mathlib/mathlib.h @@ -457,6 +457,8 @@ void inline SinCos( float radians, float *sine, float *cosine ) #elif defined( PLATFORM_WINDOWS_PC64 ) *sine = sin( radians ); *cosine = cos( radians ); +#elif defined( OSX ) + __sincosf(radians, sine, cosine); #elif defined( POSIX ) sincosf(radians, sine, cosine); #endif @@ -1213,7 +1215,7 @@ FORCEINLINE int RoundFloatToInt(float f) }; flResult = __fctiw( f ); return pResult[1]; -#elif defined (__arm__) +#elif defined (__arm__) || defined (__arm64__) return (int)(f + 0.5f); #else #error Unknown architecture @@ -1245,7 +1247,7 @@ FORCEINLINE unsigned long RoundFloatToUnsignedLong(float f) Assert( pIntResult[1] >= 0 ); return pResult[1]; #else // !X360 -#ifdef __arm__ +#if defined(__arm__) || defined(__arm64__) return (unsigned long)(f + 0.5f); #elif defined( PLATFORM_WINDOWS_PC64 ) uint nRet = ( uint ) f; @@ -2168,7 +2170,7 @@ inline bool CloseEnough( const Vector &a, const Vector &b, float epsilon = EQUAL // Fast compare // maxUlps is the maximum error in terms of Units in the Last Place. This // specifies how big an error we are willing to accept in terms of the value -// of the least significant digit of the floating point number’s +// of the least significant digit of the floating point number�s // representation. maxUlps can also be interpreted in terms of how many // representable floats we are willing to accept between A and B. // This function will allow maxUlps-1 floats between A and B. diff --git a/public/mathlib/ssemath.h b/public/mathlib/ssemath.h index 6a73b3f6..4580a4bd 100644 --- a/public/mathlib/ssemath.h +++ b/public/mathlib/ssemath.h @@ -8,7 +8,7 @@ #if defined( _X360 ) #include -#elif defined(__arm__) +#elif defined(__arm__) || defined(__arm64__) #include "sse2neon.h" #else #include diff --git a/public/mathlib/vector4d.h b/public/mathlib/vector4d.h index 89fcce01..72c63129 100644 --- a/public/mathlib/vector4d.h +++ b/public/mathlib/vector4d.h @@ -654,10 +654,10 @@ inline void Vector4DWeightMAD( vec_t w, Vector4DAligned const& vInA, Vector4DAli vOutB.z += vInB.z * w; vOutB.w += vInB.w * w; #else - __vector4 temp; + __vector4 temp; - temp = __lvlx( &w, 0 ); - temp = __vspltw( temp, 0 ); + temp = __lvlx( &w, 0 ); + temp = __vspltw( temp, 0 ); vOutA.AsM128() = __vmaddfp( vInA.AsM128(), temp, vOutA.AsM128() ); vOutB.AsM128() = __vmaddfp( vInB.AsM128(), temp, vOutB.AsM128() ); diff --git a/public/steam/steamtypes.h b/public/steam/steamtypes.h index 22ce3e61..f229f238 100644 --- a/public/steam/steamtypes.h +++ b/public/steam/steamtypes.h @@ -1,4 +1,4 @@ -//========= Copyright © 1996-2008, Valve LLC, All rights reserved. ============ +//========= Copyright � 1996-2008, Valve LLC, All rights reserved. ============ // // Purpose: // @@ -24,7 +24,7 @@ typedef unsigned char uint8; #define POSIX 1 #endif -#if defined(__x86_64__) || defined(_WIN64) +#if defined(__x86_64__) || defined(_WIN64) || defined(__arm64__) #define X64BITS #endif diff --git a/public/tier0/wchartypes.h b/public/tier0/wchartypes.h index 814470fd..8d8838d8 100644 --- a/public/tier0/wchartypes.h +++ b/public/tier0/wchartypes.h @@ -20,7 +20,7 @@ // Temporarily turn off Valve defines #include "tier0/valve_off.h" -#if !defined(_WCHAR_T_DEFINED) && !defined(GNUC) +#if !defined(_WCHAR_T_DEFINED) && !defined( __WCHAR_TYPE__ ) && !defined(GNUC) typedef unsigned short wchar_t; #define _WCHAR_T_DEFINED #endif diff --git a/tier0/cpu.cpp b/tier0/cpu.cpp index 90ca43ac..a8a0814c 100644 --- a/tier0/cpu.cpp +++ b/tier0/cpu.cpp @@ -22,7 +22,7 @@ const tchar* GetProcessorVendorId(); static bool cpuid(unsigned long function, unsigned long& out_eax, unsigned long& out_ebx, unsigned long& out_ecx, unsigned long& out_edx) { -#if defined (__arm__) || defined( _X360 ) +#if defined (__arm__) || defined (__arm64__) || defined( _X360 ) return false; #elif defined(GNUC) asm("mov %%ebx, %%esi\n\t" diff --git a/tier0/cpu_posix.cpp b/tier0/cpu_posix.cpp index c0deed41..094bc435 100644 --- a/tier0/cpu_posix.cpp +++ b/tier0/cpu_posix.cpp @@ -99,6 +99,15 @@ uint64 GetCPUFreqFromPROC() uint64 CalculateCPUFreq() { +#ifdef __APPLE__ + uint64 freq_hz = 0; + size_t freq_size = sizeof(freq_hz); + int retval = sysctlbyname("hw.cpufrequency_max", &freq_hz, &freq_size, NULL, 0); + // MoeMod : TODO dont know how to get freq on Apple Silicon + if(!freq_hz) + freq_hz = 3200000; + return freq_hz; +#else // Try to open cpuinfo_max_freq. If the kernel was built with cpu scaling support disabled, this will fail. FILE *fp = fopen( "/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq", "r" ); if ( fp ) @@ -118,8 +127,9 @@ uint64 CalculateCPUFreq() return retVal * 1000; } } +#endif -#ifndef __arm__ +#if !defined(__arm__) && !defined(__arm64__) // Compute the period. Loop until we get 3 consecutive periods that // are the same to within a small error. The error is chosen // to be +/- 0.02% on a P-200. diff --git a/tier1/processor_detect_linux.cpp b/tier1/processor_detect_linux.cpp index 9e2490bd..8887926e 100644 --- a/tier1/processor_detect_linux.cpp +++ b/tier1/processor_detect_linux.cpp @@ -6,24 +6,48 @@ // $NoKeywords: $ //=============================================================================// +#include "platform.h" + #if defined __SANITIZE_ADDRESS__ bool CheckMMXTechnology(void) { return false; } bool CheckSSETechnology(void) { return false; } bool CheckSSE2Technology(void) { return false; } bool Check3DNowTechnology(void) { return false; } -#elif defined (__arm__) +#elif defined (__arm__) || defined (__arm64__) bool CheckMMXTechnology(void) { return false; } bool CheckSSETechnology(void) { return false; } bool CheckSSE2Technology(void) { return false; } bool Check3DNowTechnology(void) { return false; } #else -#define cpuid(in,a,b,c,d) \ - asm("pushl %%ebx\n\t" "cpuid\n\t" "movl %%ebx,%%esi\n\t" "pop %%ebx": "=a" (a), "=S" (b), "=c" (c), "=d" (d) : "a" (in)); +static void cpuid(uint32 function, uint32& out_eax, uint32& out_ebx, uint32& out_ecx, uint32& out_edx) +{ +#if defined(PLATFORM_64BITS) + asm("mov %%rbx, %%rsi\n\t" + "cpuid\n\t" + "xchg %%rsi, %%rbx" + : "=a" (out_eax), + "=S" (out_ebx), + "=c" (out_ecx), + "=d" (out_edx) + : "a" (function) + ); +#else + asm("mov %%ebx, %%esi\n\t" + "cpuid\n\t" + "xchg %%esi, %%ebx" + : "=a" (out_eax), + "=S" (out_ebx), + "=c" (out_ecx), + "=d" (out_edx) + : "a" (function) + ); +#endif +} bool CheckMMXTechnology(void) { - unsigned long eax,ebx,edx,unused; + uint32 eax,ebx,edx,unused; cpuid(1,eax,ebx,unused,edx); return edx & 0x800000; @@ -31,7 +55,7 @@ bool CheckMMXTechnology(void) bool CheckSSETechnology(void) { - unsigned long eax,ebx,edx,unused; + uint32 eax,ebx,edx,unused; cpuid(1,eax,ebx,unused,edx); return edx & 0x2000000L; @@ -39,7 +63,7 @@ bool CheckSSETechnology(void) bool CheckSSE2Technology(void) { - unsigned long eax,ebx,edx,unused; + uint32 eax,ebx,edx,unused; cpuid(1,eax,ebx,unused,edx); return edx & 0x04000000; @@ -47,7 +71,7 @@ bool CheckSSE2Technology(void) bool Check3DNowTechnology(void) { - unsigned long eax, unused; + uint32 eax, unused; cpuid(0x80000000,eax,unused,unused,unused); if ( eax > 0x80000000L ) diff --git a/tier1/reliabletimer.cpp b/tier1/reliabletimer.cpp index ab46596f..73556e90 100644 --- a/tier1/reliabletimer.cpp +++ b/tier1/reliabletimer.cpp @@ -87,7 +87,7 @@ int64 CReliableTimer::GetPerformanceCountNow() uint64 ulNow; SYS_TIMEBASE_GET( ulNow ); return ulNow; -#elif defined( __arm__ ) && defined (POSIX) +#elif (defined( __arm__ ) || defined( __arm64__ )) && defined (POSIX) struct timespec ts; clock_gettime(CLOCK_REALTIME, &ts); return ts.tv_sec * 1000000000ULL + ts.tv_nsec;