Browse Source

client: studio: fix NEON vreinterpret types, fix memcpy

fix-cwd-path
Alibek Omarov 1 year ago
parent
commit
b4bd06603e
  1. 8
      cl_dll/studio_util.cpp

8
cl_dll/studio_util.cpp

@ -216,7 +216,7 @@ void ConcatTransforms( float in1[3][4], float in2[3][4], float out[3][4] )
out_reg.val[2] = vfmaq_laneq_f32(out_reg.val[2], in2_reg.val[1], in1_reg.val[2], 1); out_reg.val[2] = vfmaq_laneq_f32(out_reg.val[2], in2_reg.val[1], in1_reg.val[2], 1);
out_reg.val[2] = vfmaq_laneq_f32(out_reg.val[2], in2_reg.val[2], in1_reg.val[2], 2); out_reg.val[2] = vfmaq_laneq_f32(out_reg.val[2], in2_reg.val[2], in1_reg.val[2], 2);
memcpy(&out, &out_reg, sizeof(out)); memcpy(out, &out_reg, sizeof(float) * 3 * 4);
#else #else
out[0][0] = in1[0][0] * in2[0][0] + in1[0][1] * in2[1][0] + out[0][0] = in1[0][0] * in2[0][0] + in1[0][1] * in2[1][0] +
in1[0][2] * in2[2][0]; in1[0][2] * in2[2][0];
@ -256,8 +256,8 @@ AngleQuaternion
#if XASH_SIMD_NEON #if XASH_SIMD_NEON
static const float32x4_t AngleQuaternion_sign2 = static const float32x4_t AngleQuaternion_sign2 =
vzipq_f32( vzipq_f32(
vreinterpret_f32_u32(vdupq_n_u32(0x80000000)), vreinterpretq_f32_u32(vdupq_n_u32(0x80000000)),
vreinterpret_f32_u32(vdupq_n_u32(0x00000000)) vreinterpretq_f32_u32(vdupq_n_u32(0x00000000))
).val[0]; // { 0x80000000, 0x00000000, 0x80000000, 0x00000000 }; ).val[0]; // { 0x80000000, 0x00000000, 0x80000000, 0x00000000 };
#endif #endif
void AngleQuaternion( float *angles, vec4_t quaternion ) void AngleQuaternion( float *angles, vec4_t quaternion )
@ -276,7 +276,7 @@ void AngleQuaternion( float *angles, vec4_t quaternion )
float32x4_t sy_cr_cy_sr = vextq_f32(sr_sy_cr_cy_sp_0_cp_1.val[0], sr_sy_cr_cy_sp_0_cp_1.val[0], 1); float32x4_t sy_cr_cy_sr = vextq_f32(sr_sy_cr_cy_sp_0_cp_1.val[0], sr_sy_cr_cy_sp_0_cp_1.val[0], 1);
float32x4_t cr_cy_sr_sy = vextq_f32(sr_sy_cr_cy_sp_0_cp_1.val[0], sr_sy_cr_cy_sp_0_cp_1.val[0], 2); float32x4_t cr_cy_sr_sy = vextq_f32(sr_sy_cr_cy_sp_0_cp_1.val[0], sr_sy_cr_cy_sp_0_cp_1.val[0], 2);
float32x4_t cy_sr_sy_cr = vextq_f32(sr_sy_cr_cy_sp_0_cp_1.val[0], sr_sy_cr_cy_sp_0_cp_1.val[0], 3); float32x4_t cy_sr_sy_cr = vextq_f32(sr_sy_cr_cy_sp_0_cp_1.val[0], sr_sy_cr_cy_sp_0_cp_1.val[0], 3);
float32x4_t sp_sp_sp_sp_signed = veorq_u32(vreinterpret_u32_f32(sp_sp_sp_sp), AngleQuaternion_sign2); float32x4_t sp_sp_sp_sp_signed = veorq_u32(vreinterpretq_u32_f32(sp_sp_sp_sp), AngleQuaternion_sign2);
float32x4_t left = vmulq_f32(vmulq_f32(sr_sy_cr_cy, cp_cp_cp_cp), cy_sr_sy_cr); float32x4_t left = vmulq_f32(vmulq_f32(sr_sy_cr_cy, cp_cp_cp_cp), cy_sr_sy_cr);

Loading…
Cancel
Save