From ee04c66aa5792c60a9907ddcfd5e1a6b946fc90c Mon Sep 17 00:00:00 2001 From: Er2 Date: Fri, 19 May 2023 20:15:23 +0300 Subject: [PATCH] 99% done Windows ARM32 port --- .gitignore | 1 + common/sse2neon.h | 60 ++++- engine/cmodel.cpp | 4 +- engine/l_studio.cpp | 2 +- engine/sys_engine.cpp | 2 +- game/client/detailobjectsystem.cpp | 4 +- inputsystem/inputsystem.cpp | 4 + inputsystem/inputsystem.h | 9 + inputsystem/joystick_win32.cpp | 370 +++++++++++++++++++++++++++++ inputsystem/wscript | 8 +- materialsystem/colorspace.h | 2 +- mathlib/3dnow.cpp | 2 +- mathlib/mathlib_base.cpp | 6 +- mathlib/sse.cpp | 20 +- mathlib/sseconst.cpp | 42 ++-- mathlib/ssenoise.cpp | 2 +- public/materialsystem/imesh.h | 23 +- public/mathlib/mathlib.h | 8 +- public/mathlib/simdvectormatrix.h | 3 +- public/mathlib/ssemath.h | 18 +- public/mathlib/vmatrix.h | 2 +- public/shaderapi/ishaderapi.h | 4 + public/tier0/commonmacros.h | 2 +- public/tier0/platform.h | 34 ++- public/tier0/threadtools.h | 4 +- studiorender/r_studiodraw.cpp | 8 +- studiorender/r_studiolight.cpp | 2 +- studiorender/r_studiolight.h | 4 +- studiorender/studiorender.h | 2 +- tier0/PMELib.cpp | 2 +- tier0/cpumonitoring.cpp | 2 +- tier0/threadtools.cpp | 12 +- tier0/wscript | 7 +- tier1/processor_detect.cpp | 2 +- vphysics/trace.cpp | 4 +- vstdlib/coroutine.cpp | 10 +- wscript | 36 +-- 37 files changed, 591 insertions(+), 136 deletions(-) create mode 100644 inputsystem/joystick_win32.cpp diff --git a/.gitignore b/.gitignore index 717bd12d..24b890ef 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ +*~ *.mak *.mak.vpc_crc *.vpc_crc diff --git a/common/sse2neon.h b/common/sse2neon.h index f328fea0..4e54e11e 100644 --- a/common/sse2neon.h +++ b/common/sse2neon.h @@ -89,9 +89,6 @@ #define _sse2neon_likely(x) __builtin_expect(!!(x), 1) #define _sse2neon_unlikely(x) __builtin_expect(!!(x), 0) #elif defined(_MSC_VER) -#if _MSVC_TRADITIONAL -#error Using the traditional MSVC preprocessor is not supported! Use /Zc:preprocessor instead. -#endif #ifndef FORCE_INLINE #define FORCE_INLINE static inline #endif @@ -184,6 +181,10 @@ } while (0) #endif +#ifdef _M_ARM +#define vst1q_lane_s64(a, b, c) +#endif + /* Memory barriers * __atomic_thread_fence does not include a compiler barrier; instead, * the barrier is part of __atomic_load/__atomic_store's "volatile-like" @@ -202,8 +203,12 @@ FORCE_INLINE void _sse2neon_smp_mb(void) #elif defined(__GNUC__) || defined(__clang__) __atomic_thread_fence(__ATOMIC_SEQ_CST); #else /* MSVC */ +#ifdef _M_ARM + __dmb(_ARM_BARRIER_ISH); +#else __dmb(_ARM64_BARRIER_ISH); #endif +#endif } /* Architecture-specific build options */ @@ -268,7 +273,7 @@ FORCE_INLINE void _sse2neon_smp_mb(void) * we have to perform syscall instead. */ #if (!defined(__aarch64__) && !defined(_M_ARM64)) -#include +#include #endif /* "__has_builtin" can be used to query support for built-in functions @@ -574,10 +579,10 @@ FORCE_INLINE uint32_t _mm_crc32_u8(uint32_t, uint8_t); /* Backwards compatibility for compilers with lack of specific type support */ // Older gcc does not define vld1q_u8_x4 type -#if defined(__GNUC__) && !defined(__clang__) && \ +#if defined(_M_ARM) || (defined(__GNUC__) && !defined(__clang__) && \ ((__GNUC__ <= 12 && defined(__arm__)) || \ (__GNUC__ == 10 && __GNUC_MINOR__ < 3 && defined(__aarch64__)) || \ - (__GNUC__ <= 9 && defined(__aarch64__))) + (__GNUC__ <= 9 && defined(__aarch64__)))) FORCE_INLINE uint8x16x4_t _sse2neon_vld1q_u8_x4(const uint8_t *p) { uint8x16x4_t ret; @@ -610,6 +615,9 @@ FORCE_INLINE uint8_t _sse2neon_vaddv_u8(uint8x8_t v8) } #endif +#if defined(_M_ARM) +#pragma message("TODO: Windows ARM32: Port many SSE2NEON functions") +#else #if !defined(__aarch64__) && !defined(_M_ARM64) /* emulate vaddvq u8 variant */ FORCE_INLINE uint8_t _sse2neon_vaddvq_u8(uint8x16_t a) @@ -645,6 +653,7 @@ FORCE_INLINE uint16_t _sse2neon_vaddvq_u16(uint16x8_t a) return vaddvq_u16(a); } #endif +#endif /* Function Naming Conventions * The naming convention of SSE intrinsics is straightforward. A generic SSE @@ -1765,6 +1774,7 @@ FORCE_INLINE void _mm_free(void *addr) } #endif +#ifndef _M_ARM FORCE_INLINE uint64_t _sse2neon_get_fpcr() { uint64_t value; @@ -1808,6 +1818,7 @@ FORCE_INLINE unsigned int _sse2neon_mm_get_flush_zero_mode() return r.field.bit24 ? _MM_FLUSH_ZERO_ON : _MM_FLUSH_ZERO_OFF; } +#endif // Macro: Get the rounding mode bits from the MXCSR control and status register. // The rounding mode may contain any of the following flags: _MM_ROUND_NEAREST, @@ -1826,6 +1837,8 @@ FORCE_INLINE unsigned int _MM_GET_ROUNDING_MODE() #if defined(__aarch64__) || defined(_M_ARM64) r.value = _sse2neon_get_fpcr(); +#elif defined(_M_ARM) + r.value = _MoveFromCoprocessor(10,7, 1,0,0); #else __asm__ __volatile__("vmrs %0, FPSCR" : "=r"(r.value)); /* read */ #endif @@ -2247,7 +2260,7 @@ FORCE_INLINE __m128 _mm_or_ps(__m128 a, __m128 b) FORCE_INLINE void _mm_prefetch(char const *p, int i) { (void) i; -#if defined(_MSC_VER) +#ifdef _M_ARM64 switch (i) { case _MM_HINT_NTA: __prefetch2(p, 1); @@ -2262,6 +2275,8 @@ FORCE_INLINE void _mm_prefetch(char const *p, int i) __prefetch2(p, 4); break; } +#elif defined(_M_ARM) + // TODO #else switch (i) { case _MM_HINT_NTA: @@ -2348,6 +2363,7 @@ FORCE_INLINE __m64 _mm_sad_pu8(__m64 a, __m64 b) vset_lane_u16((int) vget_lane_u64(t, 0), vdup_n_u16(0), 0)); } +#ifndef _M_ARM // Macro: Set the flush zero bits of the MXCSR control and status register to // the value in unsigned 32-bit integer a. The flush zero may contain any of the // following flags: _MM_FLUSH_ZERO_ON or _MM_FLUSH_ZERO_OFF @@ -2379,6 +2395,7 @@ FORCE_INLINE void _sse2neon_mm_set_flush_zero_mode(unsigned int flag) __asm__ __volatile__("vmsr FPSCR, %0" ::"r"(r)); /* write */ #endif } +#endif // Set packed single-precision (32-bit) floating-point elements in dst with the // supplied values. @@ -2404,6 +2421,7 @@ FORCE_INLINE __m128 _mm_set_ps1(float _w) // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_MM_SET_ROUNDING_MODE FORCE_INLINE void _MM_SET_ROUNDING_MODE(int rounding) { +#ifndef _M_ARM union { fpcr_bitfield field; #if defined(__aarch64__) || defined(_M_ARM64) @@ -2442,6 +2460,7 @@ FORCE_INLINE void _MM_SET_ROUNDING_MODE(int rounding) #else __asm__ __volatile__("vmsr FPSCR, %0" ::"r"(r)); /* write */ #endif +#endif } // Copy single-precision (32-bit) floating-point element a to the lower element @@ -3206,6 +3225,7 @@ FORCE_INLINE __m128d _mm_cmpeq_sd(__m128d a, __m128d b) return _mm_move_sd(a, _mm_cmpeq_pd(a, b)); } +#ifndef _M_ARM // Compare packed double-precision (64-bit) floating-point elements in a and b // for greater-than-or-equal, and store the results in dst. // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpge_pd @@ -3247,6 +3267,7 @@ FORCE_INLINE __m128d _mm_cmpge_sd(__m128d a, __m128d b) return vreinterpretq_m128d_u64(vld1q_u64(d)); #endif } +#endif // Compare packed signed 16-bit integers in a and b for greater-than, and store // the results in dst. @@ -3275,6 +3296,7 @@ FORCE_INLINE __m128i _mm_cmpgt_epi8(__m128i a, __m128i b) vcgtq_s8(vreinterpretq_s8_m128i(a), vreinterpretq_s8_m128i(b))); } +#ifndef _M_ARM // Compare packed double-precision (64-bit) floating-point elements in a and b // for greater-than, and store the results in dst. // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_pd @@ -3358,6 +3380,7 @@ FORCE_INLINE __m128d _mm_cmple_sd(__m128d a, __m128d b) return vreinterpretq_m128d_u64(vld1q_u64(d)); #endif } +#endif // Compare packed signed 16-bit integers in a and b for less-than, and store the // results in dst. Note: This intrinsic emits the pcmpgtw instruction with the @@ -3389,6 +3412,7 @@ FORCE_INLINE __m128i _mm_cmplt_epi8(__m128i a, __m128i b) vcltq_s8(vreinterpretq_s8_m128i(a), vreinterpretq_s8_m128i(b))); } +#ifndef _M_ARM // Compare packed double-precision (64-bit) floating-point elements in a and b // for less-than, and store the results in dst. // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_pd @@ -3429,6 +3453,7 @@ FORCE_INLINE __m128d _mm_cmplt_sd(__m128d a, __m128d b) return vreinterpretq_m128d_u64(vld1q_u64(d)); #endif } +#endif // Compare packed double-precision (64-bit) floating-point elements in a and b // for not-equal, and store the results in dst. @@ -3456,6 +3481,7 @@ FORCE_INLINE __m128d _mm_cmpneq_sd(__m128d a, __m128d b) return _mm_move_sd(a, _mm_cmpneq_pd(a, b)); } +#ifndef _M_ARM // Compare packed double-precision (64-bit) floating-point elements in a and b // for not-greater-than-or-equal, and store the results in dst. // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpnge_pd @@ -3756,6 +3782,7 @@ FORCE_INLINE int _mm_comilt_sd(__m128d a, __m128d b) return (*(double *) &a0 < *(double *) &b0); #endif } +#endif // Compare the lower double-precision (64-bit) floating-point element in a and b // for equality, and return the boolean result (0 or 1). @@ -4401,6 +4428,7 @@ FORCE_INLINE __m128i _mm_max_epu8(__m128i a, __m128i b) vmaxq_u8(vreinterpretq_u8_m128i(a), vreinterpretq_u8_m128i(b))); } +#ifndef _M_ARM // Compare packed double-precision (64-bit) floating-point elements in a and b, // and store packed maximum values in dst. // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_max_pd @@ -4487,6 +4515,7 @@ FORCE_INLINE __m128d _mm_min_pd(__m128d a, __m128d b) return vreinterpretq_m128d_u64(vld1q_u64(d)); #endif } +#endif // Compare the lower double-precision (64-bit) floating-point elements in a and // b, store the minimum value in the lower element of dst, and copy the upper @@ -4793,7 +4822,11 @@ FORCE_INLINE __m128i _mm_packus_epi16(const __m128i a, const __m128i b) FORCE_INLINE void _mm_pause() { #if defined(_MSC_VER) +#ifdef _M_ARM + __isb(_ARM_BARRIER_SY); +#else __isb(_ARM64_BARRIER_SY); +#endif #else __asm__ __volatile__("isb\n"); #endif @@ -7622,6 +7655,7 @@ FORCE_INLINE int _mm_testz_si128(__m128i a, __m128i b) } /* SSE4.2 */ +#ifndef _M_ARM const static uint16_t ALIGN_STRUCT(16) _sse2neon_cmpestr_mask16b[8] = { 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, @@ -8463,9 +8497,11 @@ FORCE_INLINE uint32_t _mm_crc32_u8(uint32_t crc, uint8_t v) return crc; } +#endif + /* AES */ -#if !defined(__ARM_FEATURE_CRYPTO) && !defined(_M_ARM64) +#if !defined(__ARM_FEATURE_CRYPTO) && !defined(_M_ARM64) && !defined(_M_ARM) /* clang-format off */ #define SSE2NEON_AES_SBOX(w) \ { \ @@ -8913,6 +8949,7 @@ FORCE_INLINE __m128i _mm_aeskeygenassist_si128(__m128i a, const int rcon) #undef SSE2NEON_MULTIPLY #endif +#elif defined(_M_ARM) #else /* __ARM_FEATURE_CRYPTO */ // Implements equivalent of 'aesenc' by combining AESE (with an empty key) and // AESMC and then manually applying the real key as an xor operation. This @@ -9034,6 +9071,7 @@ FORCE_INLINE __m128i _mm_clmulepi64_si128(__m128i _a, __m128i _b, const int imm) } } +#ifndef _M_ARM FORCE_INLINE unsigned int _sse2neon_mm_get_denormals_zero_mode() { union { @@ -9053,6 +9091,7 @@ FORCE_INLINE unsigned int _sse2neon_mm_get_denormals_zero_mode() return r.field.bit24 ? _MM_DENORMALS_ZERO_ON : _MM_DENORMALS_ZERO_OFF; } +#endif // Count the number of bits set to 1 in unsigned 32-bit integer a, and // return that count in dst. @@ -9113,6 +9152,7 @@ FORCE_INLINE int64_t _mm_popcnt_u64(uint64_t a) #endif } +#ifndef _M_ARM FORCE_INLINE void _sse2neon_mm_set_denormals_zero_mode(unsigned int flag) { // AArch32 Advanced SIMD arithmetic always uses the Flush-to-zero setting, @@ -9140,6 +9180,7 @@ FORCE_INLINE void _sse2neon_mm_set_denormals_zero_mode(unsigned int flag) __asm__ __volatile__("vmsr FPSCR, %0" ::"r"(r)); /* write */ #endif } +#endif // Return the current 64-bit value of the processor's time-stamp counter. // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=rdtsc @@ -9161,6 +9202,9 @@ FORCE_INLINE uint64_t _rdtsc(void) #endif return val; +#elif defined(_M_ARM) + uint32_t val = _MoveFromCoprocessor(15,0, 9,13,0); + return ((uint64_t)val) << 6; #else uint32_t pmccntr, pmuseren, pmcntenset; // Read the user mode Performance Monitoring Unit (PMU) diff --git a/engine/cmodel.cpp b/engine/cmodel.cpp index e241aa77..9eb72eb7 100644 --- a/engine/cmodel.cpp +++ b/engine/cmodel.cpp @@ -862,7 +862,7 @@ BOX TRACING // Custom SIMD implementation for box brushes -const fltx4 Four_DistEpsilons={DIST_EPSILON,DIST_EPSILON,DIST_EPSILON,DIST_EPSILON}; +const fltx4 Four_DistEpsilons=FLTX4(DIST_EPSILON,DIST_EPSILON,DIST_EPSILON,DIST_EPSILON); const int32 ALIGN16 g_CubeFaceIndex0[4] ALIGN16_POST = {0,1,2,-1}; const int32 ALIGN16 g_CubeFaceIndex1[4] ALIGN16_POST = {3,4,5,-1}; bool IntersectRayWithBoxBrush( TraceInfo_t *pTraceInfo, const cbrush_t *pBrush, cboxbrush_t *pBox ) @@ -1572,7 +1572,7 @@ void FASTCALL CM_TraceToLeaf( TraceInfo_t * RESTRICT pTraceInfo, int ndxLeaf, fl fltx4 traceStart = LoadUnaligned3SIMD(pTraceInfo->m_start.Base()); fltx4 traceDelta = LoadUnaligned3SIMD(pTraceInfo->m_delta.Base()); fltx4 traceInvDelta = LoadUnaligned3SIMD(pTraceInfo->m_invDelta.Base()); - static const fltx4 vecEpsilon = {DISPCOLL_DIST_EPSILON,DISPCOLL_DIST_EPSILON,DISPCOLL_DIST_EPSILON,DISPCOLL_DIST_EPSILON}; + static const fltx4 vecEpsilon = FLTX4(DISPCOLL_DIST_EPSILON,DISPCOLL_DIST_EPSILON,DISPCOLL_DIST_EPSILON,DISPCOLL_DIST_EPSILON); // only used in !IS_POINT version: fltx4 extents; if (!IS_POINT) diff --git a/engine/l_studio.cpp b/engine/l_studio.cpp index af94a4c6..9855e1a0 100644 --- a/engine/l_studio.cpp +++ b/engine/l_studio.cpp @@ -40,7 +40,7 @@ #include "materialsystem/materialsystem_config.h" #include "materialsystem/itexture.h" #include "IHammer.h" -#if defined( _WIN32 ) && !defined( _X360 ) +#if defined( _WIN32 ) && !defined( _X360 ) && !defined(_M_ARM) #include #endif #include "staticpropmgr.h" diff --git a/engine/sys_engine.cpp b/engine/sys_engine.cpp index c02ec295..f0ef97d3 100644 --- a/engine/sys_engine.cpp +++ b/engine/sys_engine.cpp @@ -104,7 +104,7 @@ extern ConVar host_timer_spin_ms; extern float host_nexttick; extern IVEngineClient *engineClient; -#ifdef WIN32 +#if defined(_WIN32) && !defined(_M_ARM) static void cpu_frequency_monitoring_callback( IConVar *var, const char *pOldValue, float flOldValue ) { // Set the specified interval for CPU frequency monitoring diff --git a/game/client/detailobjectsystem.cpp b/game/client/detailobjectsystem.cpp index ca2a6351..103452e4 100644 --- a/game/client/detailobjectsystem.cpp +++ b/game/client/detailobjectsystem.cpp @@ -2122,8 +2122,8 @@ int CDetailObjectSystem::SortSpritesBackToFront( int nLeaf, const Vector &viewOr #else #define MANTISSA_LSB_OFFSET 0 #endif -static fltx4 Four_MagicNumbers={ MAGIC_NUMBER, MAGIC_NUMBER, MAGIC_NUMBER, MAGIC_NUMBER }; -static fltx4 Four_255s={ 255.0, 255.0, 255.0, 255.0 }; +static fltx4 Four_MagicNumbers=FLTX4( MAGIC_NUMBER, MAGIC_NUMBER, MAGIC_NUMBER, MAGIC_NUMBER ); +static fltx4 Four_255s=FLTX4( 255.0, 255.0, 255.0, 255.0 ); static ALIGN16 int32 And255Mask[4] ALIGN16_POST = {0xff,0xff,0xff,0xff}; #define PIXMASK ( * ( reinterpret_cast< fltx4 *>( &And255Mask ) ) ) diff --git a/inputsystem/inputsystem.cpp b/inputsystem/inputsystem.cpp index 44a0c785..f948b5ce 100644 --- a/inputsystem/inputsystem.cpp +++ b/inputsystem/inputsystem.cpp @@ -167,8 +167,10 @@ InitReturnVal_t CInputSystem::Init() joy_xcontroller_found.SetValue( 0 ); +#ifdef USE_SDL if( !m_bConsoleTextMode ) InitializeTouch(); +#endif if ( IsPC() && !m_bConsoleTextMode ) { @@ -975,7 +977,9 @@ void CInputSystem::SetPrimaryUserId( int userId ) //----------------------------------------------------------------------------- void CInputSystem::SetRumble( float fLeftMotor, float fRightMotor, int userId ) { +#ifdef USE_SDL SetXDeviceRumble( fLeftMotor, fRightMotor, userId ); +#endif } diff --git a/inputsystem/inputsystem.h b/inputsystem/inputsystem.h index c369851e..490d6f33 100644 --- a/inputsystem/inputsystem.h +++ b/inputsystem/inputsystem.h @@ -145,10 +145,16 @@ public: struct JoystickInfo_t { +#ifdef USE_SDL void *m_pDevice; // Really an SDL_GameController*, NULL if not present. void *m_pHaptic; // Really an SDL_Haptic* float m_fCurrentRumble; bool m_bRumbleEnabled; +#elif defined(_WIN32) + JOYINFOEX m_JoyInfoEx; +#else +#error +#endif int m_nButtonCount; int m_nAxisFlags; int m_nDeviceId; @@ -271,6 +277,9 @@ public: //Added called and set to true when binding input and set to false once bound void SetNovintPure( bool bPure ); +#ifndef USE_SDL + unsigned int AxisValue( JoystickAxis_t axis, JOYINFOEX& ji ); +#endif #else void SetNovintPure( bool bPure ) {} // to satify the IInput virtual interface #endif diff --git a/inputsystem/joystick_win32.cpp b/inputsystem/joystick_win32.cpp new file mode 100644 index 00000000..9dd1176e --- /dev/null +++ b/inputsystem/joystick_win32.cpp @@ -0,0 +1,370 @@ +//========= Copyright Valve Corporation, All rights reserved. ============// +// +// Purpose: PC Joystick implementation for inputsystem.dll +// +//===========================================================================// + +/* For force feedback testing. */ +#include "inputsystem.h" +#include "tier1/convar.h" +#include "tier0/icommandline.h" + +//----------------------------------------------------------------------------- +// Joystick helpers +//----------------------------------------------------------------------------- +#define JOY_POVFWDRIGHT ( ( JOY_POVFORWARD + JOY_POVRIGHT ) >> 1 ) // 4500 +#define JOY_POVRIGHTBACK ( ( JOY_POVRIGHT + JOY_POVBACKWARD ) >> 1 ) // 13500 +#define JOY_POVFBACKLEFT ( ( JOY_POVBACKWARD + JOY_POVLEFT ) >> 1 ) // 22500 +#define JOY_POVLEFTFWD ( ( JOY_POVLEFT + JOY_POVFORWARD ) >> 1 ) // 31500 + +ConVar joy_wwhack1( "joy_wingmanwarrior_centerhack", "0", FCVAR_ARCHIVE, "Wingman warrior centering hack." ); +ConVar joy_axisbutton_threshold( "joy_axisbutton_threshold", "0.3", FCVAR_ARCHIVE, "Analog axis range before a button press is registered." ); + +//----------------------------------------------------------------------------- +// Initialize all joysticks +//----------------------------------------------------------------------------- +void CInputSystem::InitializeJoysticks( void ) +{ + // assume no joystick + m_nJoystickCount = 0; + + // abort startup if user requests no joystick + if ( CommandLine()->FindParm("-nojoy" ) ) + return; + + // verify joystick driver is present + int nMaxJoysticks = joyGetNumDevs(); + if ( nMaxJoysticks > MAX_JOYSTICKS ) + { + nMaxJoysticks = MAX_JOYSTICKS; + } + else if ( nMaxJoysticks <= 0 ) + { + DevMsg( 1, "joystick not found -- driver not present\n"); + return; + } + + // cycle through the joysticks looking for valid ones + MMRESULT mmr; + for ( int i=0; i < nMaxJoysticks; i++ ) + { + JOYINFOEX ji; + Q_memset( &ji, 0, sizeof( ji ) ); + ji.dwSize = sizeof(ji); + ji.dwFlags = JOY_RETURNCENTERED; + mmr = joyGetPosEx( i, &ji ); + if ( mmr != JOYERR_NOERROR ) + continue; + + // get the capabilities of the selected joystick + // abort startup if command fails + JOYCAPS jc; + Q_memset( &jc, 0, sizeof( jc ) ); + mmr = joyGetDevCaps( i, &jc, sizeof( jc ) ); + if ( mmr != JOYERR_NOERROR ) + continue; + + JoystickInfo_t &info = m_pJoystickInfo[m_nJoystickCount]; + info.m_nDeviceId = i; + info.m_JoyInfoEx = ji; + info.m_nButtonCount = (int)jc.wNumButtons; + info.m_bHasPOVControl = ( jc.wCaps & JOYCAPS_HASPOV ) ? true : false; + info.m_bDiagonalPOVControlEnabled = false; + info.m_nFlags = JOY_RETURNCENTERED | JOY_RETURNBUTTONS | JOY_RETURNX | JOY_RETURNY; + info.m_nAxisFlags = 0; + if ( jc.wNumAxes >= 2 ) + { + info.m_nAxisFlags |= 0x3; + } + if ( info.m_bHasPOVControl ) + { + info.m_nFlags |= JOY_RETURNPOV; + } + if ( jc.wCaps & JOYCAPS_HASZ ) + { + info.m_nFlags |= JOY_RETURNZ; + info.m_nAxisFlags |= 0x4; + } + if ( jc.wCaps & JOYCAPS_HASR ) + { + info.m_nFlags |= JOY_RETURNR; + info.m_nAxisFlags |= 0x8; + } + if ( jc.wCaps & JOYCAPS_HASU ) + { + info.m_nFlags |= JOY_RETURNU; + info.m_nAxisFlags |= 0x10; + } + if ( jc.wCaps & JOYCAPS_HASV ) + { + info.m_nFlags |= JOY_RETURNV; + info.m_nAxisFlags |= 0x20; + } + info.m_nLastPolledButtons = 0; + info.m_nLastPolledAxisButtons = 0; + info.m_nLastPolledPOVState = 0; + memset( info.m_pLastPolledAxes, 0, sizeof(info.m_pLastPolledAxes) ); + ++m_nJoystickCount; + + EnableJoystickInput( i, true ); + } +} + +void CInputSystem::ShutdownJoysticks() +{ +} + +//----------------------------------------------------------------------------- +// Process the event +//----------------------------------------------------------------------------- +void CInputSystem::JoystickButtonEvent( ButtonCode_t button, int sample ) +{ + // package the key + if ( sample ) + { + PostButtonPressedEvent( IE_ButtonPressed, m_nLastSampleTick, button, button ); + } + else + { + PostButtonReleasedEvent( IE_ButtonReleased, m_nLastSampleTick, button, button ); + } +} + + +//----------------------------------------------------------------------------- +// Update the joystick button state +//----------------------------------------------------------------------------- +void CInputSystem::UpdateJoystickButtonState( int nJoystick ) +{ + JoystickInfo_t &info = m_pJoystickInfo[nJoystick]; + JOYINFOEX& ji = info.m_JoyInfoEx; + + // Standard joystick buttons + unsigned int buttons = ji.dwButtons ^ info.m_nLastPolledButtons; + if ( buttons ) + { + for ( int j = 0 ; j < info.m_nButtonCount ; ++j ) + { + int mask = buttons & ( 1 << j ); + if ( !mask ) + continue; + + ButtonCode_t code = (ButtonCode_t)JOYSTICK_BUTTON( nJoystick, j ); + if ( mask & ji.dwButtons ) + { + // down event + JoystickButtonEvent( code, MAX_BUTTONSAMPLE ); + } + else + { + // up event + JoystickButtonEvent( code, 0 ); + } + } + + info.m_nLastPolledButtons = (unsigned int)ji.dwButtons; + } + + // Analog axis buttons + const float minValue = joy_axisbutton_threshold.GetFloat() * MAX_BUTTONSAMPLE; + for ( int j = 0 ; j < MAX_JOYSTICK_AXES; ++j ) + { + if ( ( info.m_nAxisFlags & (1 << j) ) == 0 ) + continue; + + // Positive side of the axis + int mask = ( 1 << (j << 1) ); + ButtonCode_t code = JOYSTICK_AXIS_BUTTON( nJoystick, (j << 1) ); + float value = GetAnalogValue( JOYSTICK_AXIS( nJoystick, j ) ); + + if ( value > minValue && !(info.m_nLastPolledAxisButtons & mask) ) + { + info.m_nLastPolledAxisButtons |= mask; + JoystickButtonEvent( code, MAX_BUTTONSAMPLE ); + } + if ( value <= minValue && (info.m_nLastPolledAxisButtons & mask) ) + { + info.m_nLastPolledAxisButtons &= ~mask; + JoystickButtonEvent( code, 0 ); + } + + // Negative side of the axis + mask <<= 1; + code = (ButtonCode_t)( code + 1 ); + if ( value < -minValue && !(info.m_nLastPolledAxisButtons & mask) ) + { + info.m_nLastPolledAxisButtons |= mask; + JoystickButtonEvent( code, MAX_BUTTONSAMPLE ); + } + if ( value >= -minValue && (info.m_nLastPolledAxisButtons & mask) ) + { + info.m_nLastPolledAxisButtons &= ~mask; + JoystickButtonEvent( code, 0 ); + } + } +} + + +//----------------------------------------------------------------------------- +// Purpose: Get raw joystick sample along axis +//----------------------------------------------------------------------------- +unsigned int CInputSystem::AxisValue( JoystickAxis_t axis, JOYINFOEX& ji ) +{ + switch (axis) + { + case JOY_AXIS_X: + return (unsigned int)ji.dwXpos; + case JOY_AXIS_Y: + return (unsigned int)ji.dwYpos; + case JOY_AXIS_Z: + return (unsigned int)ji.dwZpos; + case JOY_AXIS_R: + return (unsigned int)ji.dwRpos; + case JOY_AXIS_U: + return (unsigned int)ji.dwUpos; + case JOY_AXIS_V: + return (unsigned int)ji.dwVpos; + } + // FIX: need to do some kind of error + return (unsigned int)ji.dwXpos; +} + + +//----------------------------------------------------------------------------- +// Update the joystick POV control +//----------------------------------------------------------------------------- +void CInputSystem::UpdateJoystickPOVControl( int nJoystick ) +{ + JoystickInfo_t &info = m_pJoystickInfo[nJoystick]; + JOYINFOEX& ji = info.m_JoyInfoEx; + + if ( !info.m_bHasPOVControl ) + return; + + // convert POV information into 4 bits of state information + // this avoids any potential problems related to moving from one + // direction to another without going through the center position + unsigned int povstate = 0; + + if ( ji.dwPOV != JOY_POVCENTERED ) + { + if (ji.dwPOV == JOY_POVFORWARD) // 0 + { + povstate |= 0x01; + } + if (ji.dwPOV == JOY_POVRIGHT) // 9000 + { + povstate |= 0x02; + } + if (ji.dwPOV == JOY_POVBACKWARD) // 18000 + { + povstate |= 0x04; + } + if (ji.dwPOV == JOY_POVLEFT) // 27000 + { + povstate |= 0x08; + } + + // Deal with diagonals if user wants them + if ( info.m_bDiagonalPOVControlEnabled ) + { + if (ji.dwPOV == JOY_POVFWDRIGHT) // 4500 + { + povstate |= ( 0x01 | 0x02 ); + } + if (ji.dwPOV == JOY_POVRIGHTBACK) // 13500 + { + povstate |= ( 0x02 | 0x04 ); + } + if (ji.dwPOV == JOY_POVFBACKLEFT) // 22500 + { + povstate |= ( 0x04 | 0x08 ); + } + if (ji.dwPOV == JOY_POVLEFTFWD) // 31500 + { + povstate |= ( 0x08 | 0x01 ); + } + } + } + + // determine which bits have changed and key an auxillary event for each change + unsigned int buttons = povstate ^ info.m_nLastPolledPOVState; + if ( buttons ) + { + for ( int i = 0; i < JOYSTICK_POV_BUTTON_COUNT; ++i ) + { + unsigned int mask = buttons & ( 1 << i ); + if ( !mask ) + continue; + + ButtonCode_t code = (ButtonCode_t)JOYSTICK_POV_BUTTON( nJoystick, i ); + + if ( mask & povstate ) + { + // Keydown on POV buttons + JoystickButtonEvent( code, MAX_BUTTONSAMPLE ); + } + else + { + // KeyUp on POV buttons + JoystickButtonEvent( code, 0 ); + } + } + + // Latch old values + info.m_nLastPolledPOVState = povstate; + } +} + + +//----------------------------------------------------------------------------- +// Purpose: Sample the joystick +//----------------------------------------------------------------------------- +void CInputSystem::PollJoystick( void ) +{ + if ( !m_JoysticksEnabled.IsAnyFlagSet() ) + return; + + InputState_t &state = m_InputState[ m_bIsPolling ]; + for ( int i = 0; i < m_nJoystickCount; ++i ) + { + if ( !m_JoysticksEnabled.IsFlagSet( 1 << i ) ) + continue; + + JoystickInfo_t &info = m_pJoystickInfo[i]; + JOYINFOEX& ji = info.m_JoyInfoEx; + Q_memset( &ji, 0, sizeof( ji ) ); + ji.dwSize = sizeof( ji ); + ji.dwFlags = (DWORD)info.m_nFlags; + + if ( joyGetPosEx( info.m_nDeviceId, &ji ) != JOYERR_NOERROR ) + continue; + + // This hack fixes a bug in the Logitech WingMan Warrior DirectInput Driver + // rather than having 32768 be the zero point, they have the zero point at 32668 + // go figure -- anyway, now we get the full resolution out of the device + if ( joy_wwhack1.GetBool() ) + { + ji.dwUpos += 100; + } + + // Poll joystick axes + for ( int j = 0; j < MAX_JOYSTICK_AXES; ++j ) + { + if ( ( info.m_nAxisFlags & ( 1 << j ) ) == 0 ) + continue; + + AnalogCode_t code = JOYSTICK_AXIS( i, j ); + int nValue = AxisValue( (JoystickAxis_t)j, ji ) - MAX_BUTTONSAMPLE; + state.m_pAnalogDelta[ code ] = nValue - state.m_pAnalogValue[ code ]; + state.m_pAnalogValue[ code ] = nValue; + if ( state.m_pAnalogDelta[ code ] != 0 ) + { + PostEvent( IE_AnalogValueChanged, m_nLastSampleTick, code, state.m_pAnalogValue[ code ], state.m_pAnalogDelta[ code ] ); + } + } + + UpdateJoystickButtonState( i ); + UpdateJoystickPOVControl( i ); + } +} diff --git a/inputsystem/wscript b/inputsystem/wscript index 252452aa..af1a19ca 100755 --- a/inputsystem/wscript +++ b/inputsystem/wscript @@ -18,8 +18,6 @@ def configure(conf): def build(bld): source = [ 'inputsystem.cpp', - 'joystick_sdl.cpp', - 'touch_sdl.cpp', 'key_translation.cpp', 'steamcontroller.cpp', '../public/tier0/memoverride.cpp' @@ -41,6 +39,12 @@ def build(bld): libs = ['tier0','tier1','tier2','vstdlib','SDL2','steam_api'] + if bld.options.SDL: + source += ['joystick_sdl.cpp', 'touch_sdl.cpp'] + elif bld.env.DEST_OS == 'win32': + source += ['joystick_win32.cpp'] + libs += ['WINMM'] + if bld.env.DEST_OS == 'win32': libs += ['USER32'] diff --git a/materialsystem/colorspace.h b/materialsystem/colorspace.h index 4c13e0dd..3b91b243 100644 --- a/materialsystem/colorspace.h +++ b/materialsystem/colorspace.h @@ -287,7 +287,7 @@ namespace ColorSpace { // preload 3.0f onto the returns so that we don't need to multiply the bumpAverage by it // straight away (eg, reschedule this dependent op) - static const fltx4 vThree = { 3.0f, 3.0f, 3.0f, 0.0f }; + static const fltx4 vThree = FLTX4( 3.0f, 3.0f, 3.0f, 0.0f ); fltx4 retValBump1 = MulSIMD( vThree, linearBumpColor1); fltx4 retValBump2 = MulSIMD( vThree, linearBumpColor2); fltx4 retValBump3 = MulSIMD( vThree, linearBumpColor3); diff --git a/mathlib/3dnow.cpp b/mathlib/3dnow.cpp index 88e4ead7..42ce393f 100644 --- a/mathlib/3dnow.cpp +++ b/mathlib/3dnow.cpp @@ -16,7 +16,7 @@ // memdbgon must be the last include file in a .cpp file!!! #include "tier0/memdbgon.h" -#if !defined(COMPILER_MSVC64) && !defined(LINUX) && !defined(COMPILER_CLANG) +#if defined(_M_IX86) && !defined(LINUX) && !defined(COMPILER_CLANG) // Implement for 64-bit Windows if needed. // Clang hits "fatal error: error in backend:" and other errors when trying // to compile the inline assembly below. 3DNow support is highly unlikely to diff --git a/mathlib/mathlib_base.cpp b/mathlib/mathlib_base.cpp index 872ffec5..de46d232 100644 --- a/mathlib/mathlib_base.cpp +++ b/mathlib/mathlib_base.cpp @@ -3258,7 +3258,7 @@ void MathLib_Init( float gamma, float texGamma, float brightness, int overbright // SSE Generally performs better than 3DNow when present, so this is placed // first to allow SSE to override these settings. -#if !defined( OSX ) && !defined( PLATFORM_WINDOWS_PC64 ) && !defined(LINUX) && !defined(PLATFORM_BSD) +#ifdef _M_IX86 if ( bAllow3DNow && pi.m_b3DNow ) { s_b3DNowEnabled = true; @@ -3291,7 +3291,7 @@ void MathLib_Init( float gamma, float texGamma, float brightness, int overbright pfRSqrt = _SSE_RSqrtAccurate; pfRSqrtFast = _SSE_RSqrtFast; #endif -#ifdef PLATFORM_WINDOWS_PC32 +#ifdef _M_IX86 pfFastSinCos = _SSE_SinCos; pfFastCos = _SSE_cos; #endif @@ -3304,7 +3304,7 @@ void MathLib_Init( float gamma, float texGamma, float brightness, int overbright if ( bAllowSSE2 && pi.m_bSSE2 ) { s_bSSE2Enabled = true; -#ifdef PLATFORM_WINDOWS_PC32 +#ifdef _M_IX86 pfFastSinCos = _SSE2_SinCos; pfFastCos = _SSE2_cos; #endif diff --git a/mathlib/sse.cpp b/mathlib/sse.cpp index 6122b664..000f352c 100644 --- a/mathlib/sse.cpp +++ b/mathlib/sse.cpp @@ -91,7 +91,7 @@ float _SSE_Sqrt(float x) { Assert( s_bMathlibInitialized ); float root = 0.f; -#ifdef _WIN32 +#if defined(_WIN32) && !defined(_M_ARM) _asm { sqrtss xmm0, x @@ -122,7 +122,7 @@ float _SSE_RSqrtAccurate(float x) } #else -#ifdef POSIX +#if POSIX || defined(_M_ARM) const __m128 f3 = _mm_set_ss(3.0f); // 3 as SSE value const __m128 f05 = _mm_set_ss(0.5f); // 0.5 as SSE value #endif @@ -131,7 +131,7 @@ const __m128 f05 = _mm_set_ss(0.5f); // 0.5 as SSE value float _SSE_RSqrtAccurate(float a) { -#ifdef _WIN32 +#if defined(_WIN32) && !defined(_M_ARM) float x; float half = 0.5f; float three = 3.f; @@ -153,7 +153,7 @@ float _SSE_RSqrtAccurate(float a) } return x; -#elif POSIX +#elif POSIX || defined(_M_ARM) __m128 xx = _mm_load_ss( &a ); __m128 xr = _mm_rsqrt_ss( xx ); __m128 xt; @@ -764,7 +764,7 @@ float _SSE_cos( float x ) //----------------------------------------------------------------------------- // SSE2 implementations of optimized routines: //----------------------------------------------------------------------------- -#ifdef PLATFORM_WINDOWS_PC32 +#if defined(PLATFORM_WINDOWS_PC32) && !defined(_M_ARM) void _SSE2_SinCos(float x, float* s, float* c) // any x { #ifdef _WIN32 @@ -850,9 +850,7 @@ void _SSE2_SinCos(float x, float* s, float* c) // any x #error "Not Implemented" #endif } -#endif // PLATFORM_WINDOWS_PC32 -#ifdef PLATFORM_WINDOWS_PC32 float _SSE2_cos(float x) { #ifdef _WIN32 @@ -970,9 +968,7 @@ void VectorTransformSSE(const float *in1, const matrix3x4_t& in2, float *out1) #error "Not Implemented" #endif } -#endif -#if 0 void VectorRotateSSE( const float *in1, const matrix3x4_t& in2, float *out1 ) { Assert( s_bMathlibInitialized ); @@ -1026,9 +1022,7 @@ void VectorRotateSSE( const float *in1, const matrix3x4_t& in2, float *out1 ) #error "Not Implemented" #endif } -#endif -#ifdef _WIN32 void _declspec(naked) _SSE_VectorMA( const float *start, float scale, const float *direction, float *dest ) { // FIXME: This don't work!! It will overwrite memory in the write to dest @@ -1057,7 +1051,6 @@ void _declspec(naked) _SSE_VectorMA( const float *start, float scale, const floa #endif } } -#endif #ifdef _WIN32 #ifdef PFN_VECTORMA @@ -1101,7 +1094,6 @@ float (__cdecl *pfVectorMA)(Vector& v) = _VectorMA; // NJS: (Nov 1 2002) -NOT- faster. may time a couple cycles faster in a single function like // this, but when inlined, and instruction scheduled, the C version is faster. // Verified this via VTune -/* vec_t DotProduct (const vec_t *a, const vec_t *c) { vec_t temp; @@ -1124,6 +1116,6 @@ vec_t DotProduct (const vec_t *a, const vec_t *c) ret } } -*/ +#endif #endif // COMPILER_MSVC64 diff --git a/mathlib/sseconst.cpp b/mathlib/sseconst.cpp index d68588fd..79a4f2bb 100644 --- a/mathlib/sseconst.cpp +++ b/mathlib/sseconst.cpp @@ -7,35 +7,35 @@ #include "mathlib/ssemath.h" #include "mathlib/ssequaternion.h" -const fltx4 Four_PointFives={0.5,0.5,0.5,0.5}; +const fltx4 Four_PointFives=FLTX4(0.5,0.5,0.5,0.5); #ifndef _X360 -const fltx4 Four_Zeros={0.0,0.0,0.0,0.0}; -const fltx4 Four_Ones={1.0,1.0,1.0,1.0}; +const fltx4 Four_Zeros=FLTX4(0.0,0.0,0.0,0.0); +const fltx4 Four_Ones=FLTX4(1.0,1.0,1.0,1.0); #endif -const fltx4 Four_Twos={2.0,2.0,2.0,2.0}; -const fltx4 Four_Threes={3.0,3.0,3.0,3.0}; -const fltx4 Four_Fours={4.0,4.0,4.0,4.0}; -const fltx4 Four_Origin={0,0,0,1}; -const fltx4 Four_NegativeOnes={-1,-1,-1,-1}; +const fltx4 Four_Twos=FLTX4(2.0,2.0,2.0,2.0); +const fltx4 Four_Threes=FLTX4(3.0,3.0,3.0,3.0); +const fltx4 Four_Fours=FLTX4(4.0,4.0,4.0,4.0); +const fltx4 Four_Origin=FLTX4(0,0,0,1); +const fltx4 Four_NegativeOnes=FLTX4(-1,-1,-1,-1); -const fltx4 Four_2ToThe21s={ (float) (1<<21), (float) (1<<21), (float) (1<<21), (float)(1<<21) }; -const fltx4 Four_2ToThe22s={ (float) (1<<22), (float) (1<<22), (float) (1<<22), (float)(1<<22) }; -const fltx4 Four_2ToThe23s={ (float) (1<<23), (float) (1<<23), (float) (1<<23), (float)(1<<23) }; -const fltx4 Four_2ToThe24s={ (float) (1<<24), (float) (1<<24), (float) (1<<24), (float)(1<<24) }; +const fltx4 Four_2ToThe21s=FLTX4( (float) (1<<21), (float) (1<<21), (float) (1<<21), (float)(1<<21) ); +const fltx4 Four_2ToThe22s=FLTX4( (float) (1<<22), (float) (1<<22), (float) (1<<22), (float)(1<<22) ); +const fltx4 Four_2ToThe23s=FLTX4( (float) (1<<23), (float) (1<<23), (float) (1<<23), (float)(1<<23) ); +const fltx4 Four_2ToThe24s=FLTX4( (float) (1<<24), (float) (1<<24), (float) (1<<24), (float)(1<<24) ); -const fltx4 Four_Point225s={ .225, .225, .225, .225 }; -const fltx4 Four_Epsilons={FLT_EPSILON,FLT_EPSILON,FLT_EPSILON,FLT_EPSILON}; +const fltx4 Four_Point225s=FLTX4( .225, .225, .225, .225 ); +const fltx4 Four_Epsilons=FLTX4(FLT_EPSILON,FLT_EPSILON,FLT_EPSILON,FLT_EPSILON); -const fltx4 Four_FLT_MAX={FLT_MAX,FLT_MAX,FLT_MAX,FLT_MAX}; -const fltx4 Four_Negative_FLT_MAX={-FLT_MAX,-FLT_MAX,-FLT_MAX,-FLT_MAX}; -const fltx4 g_SIMD_0123 = { 0., 1., 2., 3. }; +const fltx4 Four_FLT_MAX=FLTX4(FLT_MAX,FLT_MAX,FLT_MAX,FLT_MAX); +const fltx4 Four_Negative_FLT_MAX=FLTX4(-FLT_MAX,-FLT_MAX,-FLT_MAX,-FLT_MAX); +const fltx4 g_SIMD_0123 = FLTX4( 0., 1., 2., 3. ); const fltx4 g_QuatMultRowSign[4] = { - { 1.0f, 1.0f, -1.0f, 1.0f }, - { -1.0f, 1.0f, 1.0f, 1.0f }, - { 1.0f, -1.0f, 1.0f, 1.0f }, - { -1.0f, -1.0f, -1.0f, 1.0f } + FLTX4( 1.0f, 1.0f, -1.0f, 1.0f ), + FLTX4( -1.0f, 1.0f, 1.0f, 1.0f ), + FLTX4( 1.0f, -1.0f, 1.0f, 1.0f ), + FLTX4( -1.0f, -1.0f, -1.0f, 1.0f ) }; const uint32 ALIGN16 g_SIMD_clear_signmask[4] ALIGN16_POST = {0x7fffffff,0x7fffffff,0x7fffffff,0x7fffffff}; diff --git a/mathlib/ssenoise.cpp b/mathlib/ssenoise.cpp index 6ead1c8d..379303ae 100644 --- a/mathlib/ssenoise.cpp +++ b/mathlib/ssenoise.cpp @@ -20,7 +20,7 @@ #define MAGIC_NUMBER (1<<15) // gives 8 bits of fraction -static fltx4 Four_MagicNumbers = { MAGIC_NUMBER, MAGIC_NUMBER, MAGIC_NUMBER, MAGIC_NUMBER }; +static fltx4 Four_MagicNumbers = FLTX4( MAGIC_NUMBER, MAGIC_NUMBER, MAGIC_NUMBER, MAGIC_NUMBER ); static ALIGN16 int32 idx_mask[4]= {0xffff, 0xffff, 0xffff, 0xffff}; diff --git a/public/materialsystem/imesh.h b/public/materialsystem/imesh.h index 6a952e00..e8f469a7 100644 --- a/public/materialsystem/imesh.h +++ b/public/materialsystem/imesh.h @@ -1220,7 +1220,7 @@ inline void CVertexBuilder::FastVertexSSE( const ModelVertexDX7_t &vertex ) Assert( m_CompressionType == VERTEX_COMPRESSION_NONE ); // FIXME: support compressed verts if needed Assert( m_nCurrentVertex < m_nMaxVertexCount ); -#if defined( _WIN32 ) && !defined( _X360 ) && !defined( PLATFORM_64BITS ) +#if defined( _WIN32 ) && !defined( _X360 ) && !defined( PLATFORM_64BITS ) && !defined(_M_ARM) const void *pRead = &vertex; void *pCurrPos = m_pCurrPosition; __asm @@ -1236,7 +1236,7 @@ inline void CVertexBuilder::FastVertexSSE( const ModelVertexDX7_t &vertex ) movntps [edi + 16], xmm1 movntps [edi + 32], xmm2 } -#elif defined(GNUC) || defined(PLATFORM_WINDOWS_PC64) +#elif defined(GNUC) || defined(_WIN32) const char *pRead = (char *)&vertex; char *pCurrPos = (char *)m_pCurrPosition; __m128 m1 = _mm_load_ps( (float *)pRead ); @@ -1267,7 +1267,7 @@ inline void CVertexBuilder::Fast4VerticesSSE( Assert( m_CompressionType == VERTEX_COMPRESSION_NONE ); // FIXME: support compressed verts if needed Assert( m_nCurrentVertex < m_nMaxVertexCount-3 ); -#if defined( _WIN32 ) && !defined( _X360 ) && !defined( PLATFORM_64BITS ) +#if defined( _WIN32 ) && !defined( _X360 ) && !defined( PLATFORM_64BITS ) && !defined(_M_ARM) void *pCurrPos = m_pCurrPosition; __asm { @@ -1309,7 +1309,7 @@ inline void CVertexBuilder::Fast4VerticesSSE( movntps [edi + 80+96], xmm5 } -#elif defined(__arm__) || defined(PLATFORM_WINDOWS_PC64) +#elif defined(__arm__) || defined(_WIN32) const void *pReadA = &vtx_a; const void *pReadB = &vtx_b; const void *pReadC = &vtx_c; @@ -1430,7 +1430,7 @@ inline void CVertexBuilder::FastVertexSSE( const ModelVertexDX8_t &vertex ) Assert( m_CompressionType == VERTEX_COMPRESSION_NONE ); // FIXME: support compressed verts if needed Assert( m_nCurrentVertex < m_nMaxVertexCount ); -#if defined( _WIN32 ) && !defined( _X360 ) && !defined( PLATFORM_64BITS ) +#if defined( _WIN32 ) && !defined( _X360 ) && !defined( PLATFORM_64BITS ) && !defined(_M_ARM) const void *pRead = &vertex; void *pCurrPos = m_pCurrPosition; __asm @@ -1448,21 +1448,10 @@ inline void CVertexBuilder::FastVertexSSE( const ModelVertexDX8_t &vertex ) movntps [edi + 32], xmm2 movntps [edi + 48], xmm3 } -#elif defined(GNUC) || defined(PLATFORM_WINDOWS_PC64) +#elif defined(GNUC) || defined(_WIN32) const void *pRead = &vertex; void *pCurrPos = m_pCurrPosition; -/* __asm__ __volatile__ ( - "movaps (%0), %%xmm0\n" - "movaps 16(%0), %%xmm1\n" - "movaps 32(%0), %%xmm2\n" - "movaps 48(%0), %%xmm3\n" - "movntps %%xmm0, (%1)\n" - "movntps %%xmm1, 16(%1)\n" - "movntps %%xmm2, 32(%1)\n" - "movntps %%xmm3, 48(%1)\n" - :: "r" (pRead), "r" (pCurrPos) : "memory"); */ - __m128 m1 = _mm_load_ps( (float *)pRead ); __m128 m2 = _mm_load_ps( (float *)((intp)pRead + 16) ); __m128 m3 = _mm_load_ps( (float *)((intp)pRead + 32) ); diff --git a/public/mathlib/mathlib.h b/public/mathlib/mathlib.h index 1e8e0266..43f5ecda 100644 --- a/public/mathlib/mathlib.h +++ b/public/mathlib/mathlib.h @@ -405,6 +405,9 @@ void inline SinCos( float radians, float *sine, float *cosine ) { #if defined( _X360 ) XMScalarSinCos( sine, cosine, radians ); +#elif defined( PLATFORM_WINDOWS_PC64 ) || defined(_M_ARM) + *sine = sin( radians ); + *cosine = cos( radians ); #elif defined( PLATFORM_WINDOWS_PC32 ) _asm { @@ -417,11 +420,8 @@ void inline SinCos( float radians, float *sine, float *cosine ) fstp DWORD PTR [edx] fstp DWORD PTR [eax] } -#elif defined( PLATFORM_WINDOWS_PC64 ) - *sine = sin( radians ); - *cosine = cos( radians ); #elif defined( OSX ) - __sincosf(radians, sine, cosine); + __sincosf(radians, sine, cosine); #elif defined( POSIX ) sincosf(radians, sine, cosine); #endif diff --git a/public/mathlib/simdvectormatrix.h b/public/mathlib/simdvectormatrix.h index f638152b..88cfef02 100644 --- a/public/mathlib/simdvectormatrix.h +++ b/public/mathlib/simdvectormatrix.h @@ -135,7 +135,8 @@ public: Assert( m_pData ); static FourVectors value{Four_Zeros, Four_Zeros, Four_Zeros}; - memutils::set( m_pData, value, m_nHeight*m_nPaddedWidth ); + for (size_t n = m_nHeight * m_nPaddedWidth; n; n--) + *(m_pData+n) = value; } void RaiseToPower( float power ); diff --git a/public/mathlib/ssemath.h b/public/mathlib/ssemath.h index 8b9def86..d54f2487 100644 --- a/public/mathlib/ssemath.h +++ b/public/mathlib/ssemath.h @@ -63,6 +63,12 @@ typedef __m128 fltx4; typedef __m128 i32x4; typedef __m128 u32x4; +#ifdef _M_ARM +#define FLTX4(w, x, y, z) {(w) + (unsigned long long(x) << 32), (y) + (unsigned long long(z) << 32)} +#else +#define FLTX4(w, x, y, z) {w, x, y, z} +#endif + #endif // The FLTX4 type is a fltx4 used as a parameter to a function. @@ -1828,7 +1834,7 @@ FORCEINLINE fltx4 ReplicateX4( float flValue ) FORCEINLINE float SubFloat( const fltx4 & a, int idx ) { // NOTE: if the output goes into a register, this causes a Load-Hit-Store stall (don't mix fpu/vpu math!) -#ifndef POSIX +#if defined(_WIN32) && !defined(_M_ARM) return a.m128_f32[ idx ]; #else return (reinterpret_cast(&a))[idx]; @@ -1837,7 +1843,7 @@ FORCEINLINE float SubFloat( const fltx4 & a, int idx ) FORCEINLINE float & SubFloat( fltx4 & a, int idx ) { -#ifndef POSIX +#if defined(_WIN32) && !defined(_M_ARM) return a.m128_f32[ idx ]; #else return (reinterpret_cast(&a))[idx]; @@ -1851,8 +1857,8 @@ FORCEINLINE uint32 SubFloatConvertToInt( const fltx4 & a, int idx ) FORCEINLINE uint32 SubInt( const fltx4 & a, int idx ) { -#ifndef POSIX - return a.m128_u32[idx]; +#if defined(_WIN32) && !defined(_M_ARM) + return a.m128_u32[ idx ]; #else return (reinterpret_cast(&a))[idx]; #endif @@ -1860,8 +1866,8 @@ FORCEINLINE uint32 SubInt( const fltx4 & a, int idx ) FORCEINLINE uint32 & SubInt( fltx4 & a, int idx ) { -#ifndef POSIX - return a.m128_u32[idx]; +#if defined(_WIN32) && !defined(_M_ARM) + return a.m128_u32[ idx ]; #else return (reinterpret_cast(&a))[idx]; #endif diff --git a/public/mathlib/vmatrix.h b/public/mathlib/vmatrix.h index b435a421..408267be 100644 --- a/public/mathlib/vmatrix.h +++ b/public/mathlib/vmatrix.h @@ -43,7 +43,7 @@ struct cplane_t; #define M_PI 3.14159265358979323846 // matches value in gcc v2 math.h #endif -class alignas(16) VMatrix +class VMatrix { public: diff --git a/public/shaderapi/ishaderapi.h b/public/shaderapi/ishaderapi.h index dc05fe2e..2044fe75 100644 --- a/public/shaderapi/ishaderapi.h +++ b/public/shaderapi/ishaderapi.h @@ -175,7 +175,9 @@ public: virtual void TexMagFilter( ShaderTexFilterMode_t texFilterMode ) = 0; virtual void TexWrap( ShaderTexCoordComponent_t coord, ShaderTexWrapMode_t wrapMode ) = 0; +#ifndef SHADERAPIDX10 virtual void CopyRenderTargetToTexture( ShaderAPITextureHandle_t textureHandle ) = 0; +#endif // Binds a particular material to render with virtual void Bind( IMaterial* pMaterial ) = 0; @@ -612,6 +614,7 @@ public: //extended clear buffers function with alpha independent from color virtual void ClearBuffersObeyStencilEx( bool bClearColor, bool bClearAlpha, bool bClearDepth ) = 0; +#ifndef SHADERAPIDX10 // Allows copying a render target to another texture by specifying them both. virtual void CopyRenderTargetToScratchTexture( ShaderAPITextureHandle_t srcRt, ShaderAPITextureHandle_t dstTex, Rect_t *pSrcRect = NULL, Rect_t *pDstRect = NULL ) = 0; @@ -627,6 +630,7 @@ public: virtual void CopyTextureToTexture( ShaderAPITextureHandle_t srcTex, ShaderAPITextureHandle_t dstTex ) = 0; +#endif }; diff --git a/public/tier0/commonmacros.h b/public/tier0/commonmacros.h index 34aec40a..f57a6f4e 100644 --- a/public/tier0/commonmacros.h +++ b/public/tier0/commonmacros.h @@ -68,7 +68,7 @@ inline bool IsPowerOfTwo( T value ) // From crtdefs.h #if !defined(UNALIGNED) -#if defined(_M_IA64) || defined(_M_AMD64) +#if defined(_M_AMD64) || defined(_M_ARM) #define UNALIGNED __unaligned #else #define UNALIGNED diff --git a/public/tier0/platform.h b/public/tier0/platform.h index a8ad1b02..37cf6058 100644 --- a/public/tier0/platform.h +++ b/public/tier0/platform.h @@ -852,7 +852,9 @@ static FORCEINLINE double fsel(double fComparand, double fValGE, double fLT) //----------------------------------------------------------------------------- //#define CHECK_FLOAT_EXCEPTIONS 1 -#if !defined( _X360 ) +#if defined (__arm__) || defined (__aarch64__) + inline void SetupFPUControlWord() {} +#elif !defined( _X360 ) #if defined( _MSC_VER ) #if defined( PLATFORM_WINDOWS_PC64 ) @@ -898,8 +900,6 @@ static FORCEINLINE double fsel(double fComparand, double fValGE, double fLT) #endif #endif -#elif defined (__arm__) || defined (__aarch64__) - inline void SetupFPUControlWord() {} #else inline void SetupFPUControlWord() { @@ -1025,7 +1025,7 @@ inline T QWordSwapC( T dw ) return output; } -#elif defined( _MSC_VER ) && !defined( PLATFORM_WINDOWS_PC64 ) +#elif defined( _MSC_VER ) && !defined( PLATFORM_WINDOWS_PC64 ) && !defined(_M_ARM) #define WordSwap WordSwapAsm #define DWordSwap DWordSwapAsm @@ -1229,8 +1229,18 @@ PLATFORM_INTERFACE time_t Plat_timegm( struct tm *timeptr ); PLATFORM_INTERFACE struct tm * Plat_localtime( const time_t *timep, struct tm *result ); #if defined( _WIN32 ) && defined( _MSC_VER ) && ( _MSC_VER >= 1400 ) +#ifdef _M_X64 extern "C" unsigned __int64 __rdtsc(); #pragma intrinsic(__rdtsc) +#else +#include +#define MSVC_ARM_SYSREG(op0, op1, crn, crm, op2) \ + ( ((op0 & 1) << 14) | \ + ((op1 & 7) << 11) | \ + ((crn & 15) << 7) | \ + ((crm & 15) << 3) | \ + ((op2 & 7) << 0) ) +#endif #endif inline uint64 Plat_Rdtsc() @@ -1241,15 +1251,15 @@ inline uint64 Plat_Rdtsc() return t.tv_sec * 1000000000ULL + t.tv_nsec; #elif defined( _X360 ) return ( uint64 )__mftb32(); -#elif defined( _WIN64 ) - return ( uint64 )__rdtsc(); -#elif defined( _WIN32 ) - #if defined( _MSC_VER ) && ( _MSC_VER >= 1400 ) +#elif defined( _M_IX86 ) + _asm rdtsc +#elif defined( _M_ARM ) + uint32 val = _MoveFromCoprocessor(15,0, 9,13,0); + return ((uint64)val) << 6; +#elif defined( _M_ARM64 ) || defined( _M_ARM64EC ) + return _ReadStatusReg(MSVC_ARM_SYSREG(3,3, 9,12,5)); +#elif defined( COMPILER_MSVC ) return ( uint64 )__rdtsc(); - #else - __asm rdtsc; - __asm ret; - #endif #elif defined( __i386__ ) uint64 val; __asm__ __volatile__ ( "rdtsc" : "=A" (val) ); diff --git a/public/tier0/threadtools.h b/public/tier0/threadtools.h index b7216193..13c6827f 100644 --- a/public/tier0/threadtools.h +++ b/public/tier0/threadtools.h @@ -241,6 +241,8 @@ inline void ThreadPause() _mm_pause(); #elif defined( COMPILER_MSVC32 ) __asm pause; +#elif defined(_M_ARM) + __yield(); #elif defined( COMPILER_MSVCX360 ) YieldProcessor(); __asm { or r0,r0,r0 } @@ -445,7 +447,7 @@ PLATFORM_INTERFACE bool ThreadInterlockedAssignIf64( volatile int64 *pDest, int6 PLATFORM_INTERFACE int64 ThreadInterlockedExchange64( int64 volatile *, int64 value ) NOINLINE; -#ifdef COMPILER_MSVC32 +#if COMPILER_MSVC32 || _M_ARM PLATFORM_INTERFACE int64 ThreadInterlockedIncrement64( int64 volatile * ) NOINLINE; PLATFORM_INTERFACE int64 ThreadInterlockedDecrement64( int64 volatile * ) NOINLINE; PLATFORM_INTERFACE int64 ThreadInterlockedExchangeAdd64( int64 volatile *, int64 value ) NOINLINE; diff --git a/studiorender/r_studiodraw.cpp b/studiorender/r_studiodraw.cpp index c2aee1fb..444dc832 100644 --- a/studiorender/r_studiodraw.cpp +++ b/studiorender/r_studiodraw.cpp @@ -657,7 +657,7 @@ static matrix3x4_t *ComputeSkinMatrix( mstudioboneweight_t &boneweights, matrix3 static matrix3x4_t *ComputeSkinMatrixSSE( mstudioboneweight_t &boneweights, matrix3x4_t *pPoseToWorld, matrix3x4_t &result ) { // NOTE: pPoseToWorld, being cache aligned, doesn't need explicit initialization -#if defined( _WIN32 ) && !defined( _X360 ) && !defined( PLATFORM_64BITS ) +#if defined( _WIN32 ) && !defined( _X360 ) && defined(_M_IX86) switch( boneweights.numbones ) { default: @@ -866,11 +866,9 @@ static matrix3x4_t *ComputeSkinMatrixSSE( mstudioboneweight_t &boneweights, matr return &result; #endif } -#elif POSIX || PLATFORM_WINDOWS_PC64 +#elif POSIX || _WIN32 // #warning "ComputeSkinMatrixSSE C implementation only" return ComputeSkinMatrix( boneweights, pPoseToWorld, result ); -#elif defined( _X360 ) - return ComputeSkinMatrix( boneweights, pPoseToWorld, result ); #else #error #endif @@ -909,7 +907,7 @@ inline void CStudioRender::R_ComputeLightAtPoint3( const Vector &pos, const Vect // define SPECIAL_SSE_MESH_PROCESSOR to enable code which contains a special optimized SSE lighting loop, significantly // improving software vertex processing performace. -#if defined( _WIN32 ) && !defined( _X360 ) +#if defined( _WIN32 ) && !defined( _X360 ) && !defined(_M_ARM) #define SPECIAL_SSE_MESH_PROCESSOR #endif diff --git a/studiorender/r_studiolight.cpp b/studiorender/r_studiolight.cpp index 2650b453..19847897 100644 --- a/studiorender/r_studiolight.cpp +++ b/studiorender/r_studiolight.cpp @@ -66,7 +66,7 @@ void R_LightAmbient_4D( const Vector& normal, Vector4D* pLightBoxColor, Vector & VectorMA( lv, normal[2]*normal[2], normal[2] > 0.f ? pLightBoxColor[4].AsVector3D() : pLightBoxColor[5].AsVector3D(), lv ); } -#if defined( _WIN32 ) && !defined( _X360 ) +#if defined( _WIN32 ) && !defined( _X360 ) && !defined(_M_ARM) void R_LightAmbient_4D( const FourVectors& normal, Vector4D* pLightBoxColor, FourVectors &lv ) { // VPROF( "R_LightAmbient" ); diff --git a/studiorender/r_studiolight.h b/studiorender/r_studiolight.h index 9a984b9b..1ac1eeba 100644 --- a/studiorender/r_studiolight.h +++ b/studiorender/r_studiolight.h @@ -13,7 +13,7 @@ #include "tier0/platform.h" -#if defined( _WIN32 ) && !defined( _X360 ) +#if defined( _WIN32 ) && !defined( _X360 ) && !defined(_M_ARM) #include #endif @@ -40,7 +40,7 @@ float FASTCALL R_WorldLightDistanceFalloff( const LightDesc_t *wl, const Vector& // Copies lighting state into a buffer, returns number of lights copied int CopyLocalLightingState( int nMaxLights, LightDesc_t *pDest, int nLightCount, const LightDesc_t *pSrc ); -#if defined( _WIN32 ) && !defined( _X360 ) +#if defined( _WIN32 ) && !defined( _X360 ) && !defined(_M_ARM) // SSE optimized versions void R_LightAmbient_4D( const FourVectors& normal, Vector4D* pLightBoxColor, FourVectors &lv ); __m128 FASTCALL R_WorldLightDistanceFalloff( const LightDesc_t *wl, const FourVectors& delta ); diff --git a/studiorender/studiorender.h b/studiorender/studiorender.h index f2c81032..fa83ba64 100644 --- a/studiorender/studiorender.h +++ b/studiorender/studiorender.h @@ -22,7 +22,7 @@ #include "flexrenderdata.h" #include "mathlib/compressed_vector.h" #include "r_studiolight.h" -#if defined( _WIN32 ) && !defined( _X360 ) +#if defined( _WIN32 ) && !defined( _X360 ) && !defined(_M_ARM) #include #endif #include "tier0/dbg.h" diff --git a/tier0/PMELib.cpp b/tier0/PMELib.cpp index 9e150866..e37acd2d 100644 --- a/tier0/PMELib.cpp +++ b/tier0/PMELib.cpp @@ -6,7 +6,7 @@ // //===========================================================================// -#ifdef _WIN32 +#if defined(_WIN32) && !defined(_M_ARM) #include #pragma warning( disable : 4530 ) // warning: exception handler -GX option diff --git a/tier0/cpumonitoring.cpp b/tier0/cpumonitoring.cpp index 0c9bb479..1b0e1963 100644 --- a/tier0/cpumonitoring.cpp +++ b/tier0/cpumonitoring.cpp @@ -23,7 +23,7 @@ #include "pch_tier0.h" #include "tier0/cpumonitoring.h" -#ifdef PLATFORM_WINDOWS_PC32 +#ifdef _M_IX86 #include "tier0/threadtools.h" #define NOMINMAX #undef min diff --git a/tier0/threadtools.cpp b/tier0/threadtools.cpp index d5194927..b856916c 100644 --- a/tier0/threadtools.cpp +++ b/tier0/threadtools.cpp @@ -1740,7 +1740,7 @@ bool ThreadInterlockedAssignIf( int32 volatile *pDest, int32 value, int32 comper { Assert( (size_t)pDest % 4 == 0 ); -#if !(defined(_WIN64) || defined (_X360)) +#if !(defined(_WIN64) || defined (_X360) || defined(_M_ARM)) __asm { mov eax,comperand @@ -1773,7 +1773,7 @@ void *ThreadInterlockedCompareExchangePointer( void * volatile *pDest, void *val bool ThreadInterlockedAssignPointerIf( void * volatile *pDest, void *value, void *comperand ) { Assert( (size_t)pDest % 4 == 0 ); -#if !(defined(_WIN64) || defined (_X360)) +#if !(defined(_WIN64) || defined (_X360) || defined(_M_ARM)) __asm { mov eax,comperand @@ -1807,13 +1807,19 @@ int64 ThreadInterlockedCompareExchange64( int64 volatile *pDest, int64 value, in lock CMPXCHG8B [esi]; } } +#elif defined(_M_ARM) +int64 ThreadInterlockedCompareExchange64( int64 volatile *pDest, int64 value, int64 comperand ) +{ + Assert( (size_t)pDest % 8 == 0 ); + return InterlockedCompareExchange64( pDest, value, comperand ); +} #endif bool ThreadInterlockedAssignIf64(volatile int64 *pDest, int64 value, int64 comperand ) { Assert( (size_t)pDest % 8 == 0 ); -#if defined(_X360) || defined(_WIN64) +#if defined(_X360) || defined(_WIN64) || defined(_M_ARM) return ( ThreadInterlockedCompareExchange64( pDest, value, comperand ) == comperand ); #else __asm diff --git a/tier0/wscript b/tier0/wscript index b02c60bf..57a49064 100755 --- a/tier0/wscript +++ b/tier0/wscript @@ -22,8 +22,8 @@ def build(bld): 'assert_dialog.cpp', 'commandline.cpp', 'cpu.cpp', - 'cpumonitoring.cpp', 'cpu_usage.cpp', + 'cpumonitoring.cpp', 'dbg.cpp', 'dynfunction.cpp', 'fasttimer.cpp', @@ -54,10 +54,13 @@ def build(bld): 'assert_dialog.rc', #'etwprof.cpp', [$WINDOWS] 'platform.cpp', - 'pme.cpp', 'vcrmode.cpp', 'win32consoleio.cpp' ] + if bld.env.DEST_CPU == 'arm': + source += ['pme_posix.cpp'] + else: + source += ['pme.cpp'] if bld.env.DEST_CPU == 'amd64': source += [ 'InterlockedCompareExchange128.masm' diff --git a/tier1/processor_detect.cpp b/tier1/processor_detect.cpp index 54542d02..91bb6d51 100644 --- a/tier1/processor_detect.cpp +++ b/tier1/processor_detect.cpp @@ -6,7 +6,7 @@ // $NoKeywords: $ //=============================================================================// -#if defined( _X360 ) || defined( WIN64 ) +#if defined( _X360 ) || defined( WIN64 ) || defined(_M_ARM) bool CheckMMXTechnology(void) { return false; } bool CheckSSETechnology(void) { return false; } diff --git a/vphysics/trace.cpp b/vphysics/trace.cpp index 18474522..fe13a8dd 100644 --- a/vphysics/trace.cpp +++ b/vphysics/trace.cpp @@ -453,7 +453,7 @@ private: #ifdef WIN32 static const #endif -fltx4 g_IVPToHLDir = { 1.0f, -1.0f, 1.0f, 1.0f }; +fltx4 g_IVPToHLDir = FLTX4( 1.0f, -1.0f, 1.0f, 1.0f ); //static const fltx4 g_IVPToHLPosition = { IVP2HL(1.0f), -IVP2HL(1.0f), IVP2HL(1.0f), IVP2HL(1.0f) }; @@ -680,7 +680,7 @@ bool CTraceIVP::BuildLeafmapCache( const leafmap_t * RESTRICT pLeafmap ) #endif } -static const fltx4 g_IndexBase = {0,1,2,3}; +static const fltx4 g_IndexBase =FLTX4(0,1,2,3); int CTraceIVP::SupportMapCached( const Vector &dir, Vector *pOut ) const { VPROF("SupportMapCached"); diff --git a/vstdlib/coroutine.cpp b/vstdlib/coroutine.cpp index dc148a8c..eff744e7 100644 --- a/vstdlib/coroutine.cpp +++ b/vstdlib/coroutine.cpp @@ -218,7 +218,11 @@ extern "C" byte *GetStackPtr64(); #define GetStackPtr( pStackPtr) byte *pStackPtr = GetStackPtr64(); #else #ifdef WIN32 -#define GetStackPtr( pStackPtr ) byte *pStackPtr; __asm mov pStackPtr, esp +# ifdef _M_ARM +# define GetStackPtr( pStackPtr ) byte x; byte *pStackPtr = &x +# else +# define GetStackPtr( pStackPtr ) byte *pStackPtr; __asm mov pStackPtr, esp +# endif #elif defined(GNUC) // Apple's version of gcc/g++ doesn't return the expected value using the intrinsic, so // do it the old fashioned way - this will also use asm on linux (since we don't compile @@ -649,7 +653,7 @@ bool Internal_Coroutine_Continue( HCoroutine hCoroutine, const char *pchDebugMsg bool bInCoroutineAlready = GCoroutineMgr().IsAnyCoroutineActive(); #ifdef _WIN32 -#ifndef _WIN64 +#if !defined( _WIN64 ) && !defined( _M_ARM ) // make sure nobody has a try/catch block and then yielded // because we hate that and we will crash uint32 topofexceptionchain; @@ -897,7 +901,7 @@ void Coroutine_YieldToMain() CoroutineDbgMsg( g_fmtstr.sprintf( "Coroutine_YieldToMain() %s#%x -> %s#%x\n", coroutine.m_pchName, coroutine.m_hCoroutine, coroutinePrev.m_pchName, coroutinePrev.m_hCoroutine ) ); #ifdef _WIN32 -#ifndef _WIN64 +#if !defined( _WIN64 ) && !defined( _M_ARM ) // make sure nobody has a try/catch block and then yielded // because we hate that and we will crash uint32 topofexceptionchain; diff --git a/wscript b/wscript index 4690d56d..fafdccec 100644 --- a/wscript +++ b/wscript @@ -1,5 +1,6 @@ #! /usr/bin/env python # encoding: utf-8 +# vim: noexpandtab # nillerusr from __future__ import print_function @@ -222,6 +223,8 @@ def define_platform(conf): '_ALLOW_MSC_VER_MISMATCH', 'NO_X360_XDK' ]) + if conf.env.DEST_CPU == 'arm': + conf.env.append_unique('DEFINES', ['__arm__=1']) elif conf.env.DEST_OS == 'darwin': conf.env.append_unique('DEFINES', [ 'OSX=1', '_OSX=1', @@ -384,14 +387,16 @@ def check_deps(conf): conf.check(lib='opus', uselib_store='OPUS') if conf.env.DEST_OS == 'win32': - conf.check(lib='libz', uselib_store='ZLIB', define_name='USE_ZLIB') - # conf.check(lib='nvtc', uselib_store='NVTC') - # conf.check(lib='ati_compress_mt_vc10', uselib_store='ATI_COMPRESS_MT_VC10') - conf.check(lib='SDL2', uselib_store='SDL2') - conf.check(lib='libjpeg', uselib_store='JPEG', define_name='HAVE_JPEG') - conf.check(lib='libpng', uselib_store='PNG', define_name='HAVE_PNG') - conf.check(lib='d3dx9', uselib_store='D3DX9') - conf.check(lib='d3d9', uselib_store='D3D9') + if conf.env.DEST_CPU == 'arm': + conf.check(lib='d3d9', uselib_store='D3D9') + conf.check(lib='d3dcompiler', uselib_store='D3DCOMPILER') + else: + conf.check(lib='libz', uselib_store='ZLIB', define_name='USE_ZLIB') + conf.check(lib='SDL2', uselib_store='SDL2') + conf.check(lib='libjpeg', uselib_store='JPEG', define_name='HAVE_JPEG') + conf.check(lib='libpng', uselib_store='PNG', define_name='HAVE_PNG') + conf.check(lib='d3dx9', uselib_store='D3DX9') + conf.check(lib='d3d9', uselib_store='D3D9') conf.check(lib='dsound', uselib_store='DSOUND') conf.check(lib='dxguid', uselib_store='DXGUID') if conf.options.OPUS: @@ -405,10 +410,12 @@ def configure(conf): # Force XP compability, all build targets should add # subsystem=bld.env.MSVC_SUBSYSTEM # TODO: wrapper around bld.stlib, bld.shlib and so on? - conf.env.MSVC_SUBSYSTEM = 'WINDOWS,5.01' - conf.env.MSVC_TARGETS = ['x86'] # explicitly request x86 target for MSVC - if conf.options.ALLOW64: - conf.env.MSVC_TARGETS = ['x64'] + conf.env.MSVC_TARGETS = ['x86' if not conf.options.ALLOW64 else 'x64'] + if conf.env.MSVC_TARGETS[0] == 'x86': + conf.env.MSVC_SUBSYSTEM = 'WINDOWS,5.01' + else: + conf.env.MSVC_SUBSYSTEM = 'WINDOWS' + if sys.platform == 'win32': conf.load('msvc_pdb_ext msdev msvs') conf.load('subproject xcompile compiler_c compiler_cxx gitversion clang_compilation_database strip_on_install_v2 waf_unit_test enforce_pic') @@ -504,7 +511,6 @@ def configure(conf): else: cflags += [ '/I'+os.path.abspath('.')+'/thirdparty/SDL', - '/arch:SSE' if conf.env.DEST_CPU == 'x86' else '/arch:AVX', '/GF', '/Gy', '/fp:fast', @@ -514,6 +520,8 @@ def configure(conf): '/TP', '/EHsc' ] + if conf.env.DEST_CPU != 'arm': + cflags += ['/arch:SSE' if conf.env.DEST_CPU == 'x86' else '/arch:AVX'] if conf.options.BUILD_TYPE == 'debug': linkflags += [ @@ -593,7 +601,7 @@ def configure(conf): def build(bld): os.environ["CCACHE_DIR"] = os.path.abspath('.ccache/'+bld.env.COMPILER_CC+'/'+bld.env.DEST_OS+'/'+bld.env.DEST_CPU) - if bld.env.DEST_OS in ['win32', 'android']: + if bld.env.SDL and bld.env.DEST_OS in ['win32', 'android']: sdl_name = 'SDL2.dll' if bld.env.DEST_OS == 'win32' else 'libSDL2.so' sdl_path = os.path.join('lib', bld.env.DEST_OS, bld.env.DEST_CPU, sdl_name) bld.install_files(bld.env.LIBDIR, [sdl_path])