You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
665 lines
15 KiB
665 lines
15 KiB
//========= Copyright Valve Corporation, All rights reserved. ============// |
|
// |
|
// Purpose: |
|
// |
|
// $NoKeywords: $ |
|
// |
|
//===========================================================================// |
|
|
|
#ifdef _WIN32 |
|
#include <windows.h> |
|
|
|
#pragma warning( disable : 4530 ) // warning: exception handler -GX option |
|
|
|
#include "tier0/valve_off.h" |
|
#include "tier0/pmelib.h" |
|
#if _MSC_VER >=1300 |
|
#else |
|
#include "winioctl.h" |
|
#endif |
|
#include "tier0/valve_on.h" |
|
|
|
#include "tier0/ioctlcodes.h" |
|
|
|
// NOTE: This has to be the last file included! |
|
#include "tier0/memdbgon.h" |
|
|
|
|
|
PME* PME::_singleton = 0; |
|
|
|
// Single interface. |
|
PME* PME::Instance() |
|
{ |
|
if (_singleton == 0) |
|
{ |
|
_singleton = new PME; |
|
} |
|
return _singleton; |
|
} |
|
|
|
//--------------------------------------------------------------------------- |
|
// Open the device driver and detect the processor |
|
//--------------------------------------------------------------------------- |
|
HRESULT PME::Init( void ) |
|
{ |
|
OSVERSIONINFO OS; |
|
|
|
if ( bDriverOpen ) |
|
return E_DRIVER_ALREADY_OPEN; |
|
|
|
switch( vendor ) |
|
{ |
|
case INTEL: |
|
case AMD: |
|
break; |
|
default: |
|
bDriverOpen = FALSE; // not an Intel or Athlon processor so return false |
|
return E_UNKNOWN_CPU_VENDOR; |
|
} |
|
|
|
//----------------------------------------------------------------------- |
|
// Get the operating system version |
|
//----------------------------------------------------------------------- |
|
OS.dwOSVersionInfoSize = sizeof( OSVERSIONINFO ); |
|
GetVersionEx( &OS ); |
|
|
|
if ( OS.dwPlatformId == VER_PLATFORM_WIN32_NT ) |
|
{ |
|
hFile = CreateFile( // WINDOWS NT |
|
"\\\\.\\GDPERF", |
|
GENERIC_READ, |
|
0, |
|
NULL, |
|
OPEN_EXISTING, |
|
FILE_ATTRIBUTE_NORMAL, |
|
NULL); |
|
} |
|
else |
|
{ |
|
hFile = CreateFile( // WINDOWS 95 |
|
"\\\\.\\GDPERF.VXD", |
|
GENERIC_READ, |
|
0, |
|
NULL, |
|
OPEN_EXISTING, |
|
FILE_ATTRIBUTE_NORMAL, |
|
NULL); |
|
} |
|
|
|
if (hFile == INVALID_HANDLE_VALUE ) |
|
return E_CANT_OPEN_DRIVER; |
|
|
|
|
|
bDriverOpen = TRUE; |
|
|
|
|
|
//------------------------------------------------------------------- |
|
// We have successfully opened the device driver, get the family |
|
// of the processor. |
|
//------------------------------------------------------------------- |
|
|
|
|
|
|
|
//------------------------------------------------------------------- |
|
// We need to write to counter 0 on the pro family to enable both |
|
// of the performance counters. We write to both so they start in a |
|
// known state. For the pentium this is not necessary. |
|
//------------------------------------------------------------------- |
|
if (vendor == INTEL && version.Family == PENTIUMPRO_FAMILY) |
|
{ |
|
SelectP5P6PerformanceEvent(P6_CLOCK, 0, TRUE, TRUE); |
|
SelectP5P6PerformanceEvent(P6_CLOCK, 1, TRUE, TRUE); |
|
} |
|
|
|
return S_OK; |
|
|
|
|
|
} |
|
|
|
|
|
|
|
//--------------------------------------------------------------------------- |
|
// Close the device driver |
|
//--------------------------------------------------------------------------- |
|
HRESULT PME::Close(void) |
|
{ |
|
if (bDriverOpen == false) // driver is not going |
|
return E_DRIVER_NOT_OPEN; |
|
|
|
bDriverOpen = false; |
|
|
|
if (hFile) // if we have no driver handle, return FALSE |
|
{ |
|
BOOL result = CloseHandle(hFile); |
|
|
|
hFile = NULL; |
|
return result ? S_OK : HRESULT_FROM_WIN32( GetLastError() ); |
|
} |
|
else |
|
return E_DRIVER_NOT_OPEN; |
|
|
|
|
|
} |
|
|
|
//--------------------------------------------------------------------------- |
|
// Select the event to monitor with counter 0 |
|
// |
|
HRESULT PME::SelectP5P6PerformanceEvent(uint32 dw_event, uint32 dw_counter, |
|
bool b_user, bool b_kernel) |
|
{ |
|
HRESULT hr = S_OK; |
|
|
|
if (dw_counter>1) // is the counter valid |
|
return E_BAD_COUNTER; |
|
|
|
if (bDriverOpen == false) // driver is not going |
|
return E_DRIVER_NOT_OPEN; |
|
|
|
if ( ((dw_event>>28)&0xF) != (uint32)version.Family) |
|
{ |
|
return E_ILLEGAL_OPERATION; // this operation is not for this processor |
|
} |
|
|
|
if ( (((dw_event & 0x300)>>8) & (dw_counter+1)) == 0 ) |
|
{ |
|
return E_ILLEGAL_OPERATION; // this operation is not for this counter |
|
} |
|
|
|
switch(version.Family) |
|
{ |
|
case PENTIUM_FAMILY: |
|
{ |
|
uint64 i64_cesr; |
|
int i_kernel_bit,i_user_bit; |
|
BYTE u1_event = (BYTE)((dw_event & (0x3F0000))>>16); |
|
|
|
if (dw_counter==0) // the kernel and user mode bits depend on |
|
{ // counter being used. |
|
i_kernel_bit = 6; |
|
i_user_bit = 7; |
|
} |
|
else |
|
{ |
|
i_kernel_bit = 22; |
|
i_user_bit = 23; |
|
} |
|
|
|
ReadMSR(0x11, &i64_cesr); // get current P5 event select (cesr) |
|
|
|
// top 32bits of cesr are not valid so ignore them |
|
i64_cesr &= ((dw_counter == 0)?0xffff0000:0x0000ffff); |
|
WriteMSR(0x11,i64_cesr); // stop the counter |
|
WriteMSR((dw_counter==0)?0x12:0x13,0ui64); // clear the p.counter |
|
|
|
// set the user and kernel mode bits |
|
i64_cesr |= ( b_user?(1<<7):0 ) | ( b_kernel?(1<<6):0 ); |
|
|
|
// is this the special P5 value that signals count clocks?? |
|
if (u1_event == 0x3f) |
|
{ |
|
WriteMSR(0x11, i64_cesr|0x100); // Count clocks |
|
} |
|
else |
|
{ |
|
WriteMSR(0x11, i64_cesr|u1_event); // Count events |
|
} |
|
|
|
} |
|
break; |
|
|
|
case PENTIUMPRO_FAMILY: |
|
|
|
{ |
|
BYTE u1_event = (BYTE)((dw_event & (0xFF0000))>>16); |
|
BYTE u1_mask = (BYTE)((dw_event & 0xFF)); |
|
|
|
// Event select 0 and 1 are identical. |
|
hr = WriteMSR((dw_counter==0)?0x186:0x187, |
|
|
|
|
|
uint64((u1_event | (b_user?(1<<16):0) | (b_kernel?(1<<17):0) | (1<<22) | (1<<18) | (u1_mask<<8)) ) |
|
); |
|
} |
|
break; |
|
|
|
case PENTIUM4_FAMILY: |
|
// use the p4 path |
|
break; |
|
|
|
default: |
|
return E_UNKNOWN_CPU; |
|
} |
|
|
|
return hr; |
|
} |
|
|
|
//--------------------------------------------------------------------------- |
|
// Read model specific register |
|
//--------------------------------------------------------------------------- |
|
HRESULT PME::ReadMSR(uint32 dw_reg, int64 * pi64_value) |
|
{ |
|
DWORD dw_ret_len; |
|
|
|
if (bDriverOpen == false) // driver is not going |
|
return E_DRIVER_NOT_OPEN; |
|
|
|
BOOL result = DeviceIoControl |
|
( |
|
hFile, // Handle to device |
|
(DWORD) IOCTL_READ_MSR, // IO Control code for Read |
|
&dw_reg, // Input Buffer to driver. |
|
sizeof(uint32), // Length of input buffer. |
|
pi64_value, // Output Buffer from driver. |
|
sizeof(int64), // Length of output buffer in bytes. |
|
&dw_ret_len, // Bytes placed in output buffer. |
|
NULL // NULL means wait till op. completes |
|
); |
|
|
|
HRESULT hr = result ? S_OK : HRESULT_FROM_WIN32( GetLastError() ); |
|
if (hr == S_OK && dw_ret_len != sizeof(int64)) |
|
hr = E_BAD_DATA; |
|
|
|
return hr; |
|
} |
|
|
|
HRESULT PME::ReadMSR(uint32 dw_reg, uint64 * pi64_value) |
|
{ |
|
DWORD dw_ret_len; |
|
|
|
if (bDriverOpen == false) // driver is not going |
|
return E_DRIVER_NOT_OPEN; |
|
|
|
BOOL result = DeviceIoControl |
|
( |
|
hFile, // Handle to device |
|
(DWORD) IOCTL_READ_MSR, // IO Control code for Read |
|
&dw_reg, // Input Buffer to driver. |
|
sizeof(uint32), // Length of input buffer. |
|
pi64_value, // Output Buffer from driver. |
|
sizeof(uint64), // Length of output buffer in bytes. |
|
&dw_ret_len, // Bytes placed in output buffer. |
|
NULL // NULL means wait till op. completes |
|
); |
|
|
|
HRESULT hr = result ? S_OK : HRESULT_FROM_WIN32( GetLastError() ); |
|
if (hr == S_OK && dw_ret_len != sizeof(uint64)) |
|
hr = E_BAD_DATA; |
|
|
|
return hr; |
|
} |
|
|
|
//--------------------------------------------------------------------------- |
|
// Write model specific register |
|
//--------------------------------------------------------------------------- |
|
HRESULT PME::WriteMSR(uint32 dw_reg, const int64 & i64_value) |
|
{ |
|
DWORD dw_buffer[3]; |
|
DWORD dw_ret_len; |
|
|
|
if (bDriverOpen == false) // driver is not going |
|
return E_DRIVER_NOT_OPEN; |
|
|
|
dw_buffer[0] = dw_reg; // setup the 12 byte input |
|
*((int64*)(&dw_buffer[1]))= i64_value; |
|
|
|
BOOL result = DeviceIoControl |
|
( |
|
hFile, // Handle to device |
|
(DWORD) IOCTL_WRITE_MSR, // IO Control code for Read |
|
dw_buffer, // Input Buffer to driver. |
|
12, // Length of Input buffer |
|
NULL, // Buffer from driver, None for WRMSR |
|
0, // Length of output buffer in bytes. |
|
&dw_ret_len, // Bytes placed in DataBuffer. |
|
NULL // NULL means wait till op. completes. |
|
); |
|
|
|
HRESULT hr = result ? S_OK : HRESULT_FROM_WIN32( GetLastError() ); |
|
if (hr == S_OK && dw_ret_len != 0) |
|
hr = E_BAD_DATA; |
|
|
|
return hr; |
|
} |
|
|
|
|
|
|
|
HRESULT PME::WriteMSR(uint32 dw_reg, const uint64 & i64_value) |
|
{ |
|
DWORD dw_buffer[3]; |
|
DWORD dw_ret_len; |
|
|
|
if (bDriverOpen == false) // driver is not going |
|
return E_DRIVER_NOT_OPEN; |
|
|
|
dw_buffer[0] = dw_reg; // setup the 12 byte input |
|
*((uint64*)(&dw_buffer[1]))= i64_value; |
|
|
|
BOOL result = DeviceIoControl |
|
( |
|
hFile, // Handle to device |
|
(DWORD) IOCTL_WRITE_MSR, // IO Control code for Read |
|
dw_buffer, // Input Buffer to driver. |
|
12, // Length of Input buffer |
|
NULL, // Buffer from driver, None for WRMSR |
|
0, // Length of output buffer in bytes. |
|
&dw_ret_len, // Bytes placed in DataBuffer. |
|
NULL // NULL means wait till op. completes. |
|
); |
|
|
|
//E_POINTER |
|
HRESULT hr = result ? S_OK : HRESULT_FROM_WIN32( GetLastError() ); |
|
if (hr == S_OK && dw_ret_len != 0) |
|
hr = E_BAD_DATA; |
|
|
|
return hr; |
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#pragma hdrstop |
|
|
|
|
|
|
|
|
|
//--------------------------------------------------------------------------- |
|
// Return the frequency of the processor in Hz. |
|
// |
|
|
|
double PME::GetCPUClockSpeedFast(void) |
|
{ |
|
int64 i64_perf_start, i64_perf_freq, i64_perf_end; |
|
int64 i64_clock_start,i64_clock_end; |
|
double d_loop_period, d_clock_freq; |
|
|
|
//----------------------------------------------------------------------- |
|
// Query the performance of the Windows high resolution timer. |
|
//----------------------------------------------------------------------- |
|
QueryPerformanceFrequency((LARGE_INTEGER*)&i64_perf_freq); |
|
|
|
//----------------------------------------------------------------------- |
|
// Query the current value of the Windows high resolution timer. |
|
//----------------------------------------------------------------------- |
|
QueryPerformanceCounter((LARGE_INTEGER*)&i64_perf_start); |
|
i64_perf_end = 0; |
|
|
|
//----------------------------------------------------------------------- |
|
// Time of loop of 250000 windows cycles with RDTSC |
|
//----------------------------------------------------------------------- |
|
RDTSC(i64_clock_start); |
|
while(i64_perf_end<i64_perf_start+250000) |
|
{ |
|
QueryPerformanceCounter((LARGE_INTEGER*)&i64_perf_end); |
|
} |
|
RDTSC(i64_clock_end); |
|
|
|
//----------------------------------------------------------------------- |
|
// Caclulate the frequency of the RDTSC timer and therefore calculate |
|
// the frequency of the processor. |
|
//----------------------------------------------------------------------- |
|
i64_clock_end -= i64_clock_start; |
|
|
|
d_loop_period = ((double)(i64_perf_freq)) / 250000.0; |
|
d_clock_freq = ((double)(i64_clock_end & 0xffffffff))*d_loop_period; |
|
|
|
return (float)d_clock_freq; |
|
} |
|
|
|
|
|
|
|
// takes 1 second |
|
double PME::GetCPUClockSpeedSlow(void) |
|
{ |
|
|
|
if (m_CPUClockSpeed != 0) |
|
return m_CPUClockSpeed; |
|
|
|
unsigned long start_ms, stop_ms; |
|
unsigned long start_tsc,stop_tsc; |
|
|
|
// boosting priority helps with noise. its optional and i dont think |
|
// it helps all that much |
|
|
|
PME * pme = PME::Instance(); |
|
|
|
pme->SetProcessPriority(ProcessPriorityHigh); |
|
|
|
// wait for millisecond boundary |
|
start_ms = GetTickCount() + 5; |
|
while (start_ms <= GetTickCount()); |
|
|
|
// read timestamp (you could use QueryPerformanceCounter in hires mode if you want) |
|
#ifdef COMPILER_MSVC64 |
|
RDTSC(start_tsc); |
|
#else |
|
__asm |
|
{ |
|
rdtsc |
|
mov dword ptr [start_tsc+0],eax |
|
mov dword ptr [start_tsc+4],edx |
|
} |
|
#endif |
|
|
|
// wait for end |
|
stop_ms = start_ms + 1000; // longer wait gives better resolution |
|
while (stop_ms > GetTickCount()); |
|
|
|
// read timestamp (you could use QueryPerformanceCounter in hires mode if you want) |
|
#ifdef COMPILER_MSVC64 |
|
RDTSC(stop_tsc); |
|
#else |
|
__asm |
|
{ |
|
rdtsc |
|
mov dword ptr [stop_tsc+0],eax |
|
mov dword ptr [stop_tsc+4],edx |
|
} |
|
#endif |
|
|
|
|
|
// normalize priority |
|
pme->SetProcessPriority(ProcessPriorityNormal); |
|
|
|
// return clock speed |
|
// optionally here you could round to known clocks, like speeds that are multimples |
|
// of 100, 133, 166, etc. |
|
m_CPUClockSpeed = ((stop_tsc - start_tsc) * 1000.0) / (double)(stop_ms - start_ms); |
|
return m_CPUClockSpeed; |
|
|
|
} |
|
|
|
|
|
|
|
const unsigned short cccr_escr_map[NCOUNTERS][8] = |
|
{ |
|
{ |
|
0x3B2, |
|
0x3B4, |
|
0x3AA, |
|
0x3B6, |
|
0x3AC, |
|
0x3C8, |
|
0x3A2, |
|
0x3A0, |
|
}, |
|
{ |
|
0x3B2, |
|
0x3B4, |
|
0x3AA, |
|
0x3B6, |
|
0x3AC, |
|
0x3C8, |
|
0x3A2, |
|
0x3A0, |
|
}, |
|
{ |
|
0x3B3, |
|
0x3B5, |
|
0x3AB, |
|
0x3B7, |
|
0x3AD, |
|
0x3C9, |
|
0x3A3, |
|
0x3A1, |
|
}, |
|
{ |
|
0x3B3, |
|
0x3B5, |
|
0x3AB, |
|
0x3B7, |
|
0x3AD, |
|
0x3C9, |
|
0x3A3, |
|
0x3A1, |
|
}, |
|
{ |
|
|
|
0x3C0, |
|
0x3C4, |
|
0x3C2, |
|
}, |
|
{ |
|
0x3C0, |
|
0x3C4, |
|
0x3C2, |
|
}, |
|
{ |
|
0x3C1, |
|
0x3C5, |
|
0x3C3, |
|
}, |
|
{ |
|
0x3C1, |
|
0x3C5, |
|
0x3C3, |
|
}, |
|
{ |
|
0x3A6, |
|
0x3A4, |
|
0x3AE, |
|
0x3B0, |
|
0, |
|
0x3A8, |
|
}, |
|
{ |
|
0x3A6, |
|
0x3A4, |
|
0x3AE, |
|
0x3B0, |
|
0, |
|
0x3A8, |
|
}, |
|
{ |
|
|
|
0x3A7, |
|
0x3A5, |
|
0x3AF, |
|
0x3B1, |
|
0, |
|
0x3A9, |
|
}, |
|
{ |
|
|
|
0x3A7, |
|
0x3A5, |
|
0x3AF, |
|
0x3B1, |
|
0, |
|
0x3A9, |
|
}, |
|
{ |
|
|
|
0x3BA, |
|
0x3CA, |
|
0x3BC, |
|
0x3BE, |
|
0x3B8, |
|
0x3CC, |
|
0x3E0, |
|
}, |
|
{ |
|
|
|
0x3BA, |
|
0x3CA, |
|
0x3BC, |
|
0x3BE, |
|
0x3B8, |
|
0x3CC, |
|
0x3E0, |
|
}, |
|
{ |
|
|
|
0x3BB, |
|
0x3CB, |
|
0x3BD, |
|
0, |
|
0x3B9, |
|
0x3CD, |
|
0x3E1, |
|
}, |
|
{ |
|
|
|
|
|
0x3BB, |
|
0x3CB, |
|
0x3BD, |
|
0, |
|
0x3B9, |
|
0x3CD, |
|
0x3E1, |
|
}, |
|
{ |
|
0x3BA, |
|
0x3CA, |
|
0x3BC, |
|
0x3BE, |
|
0x3B8, |
|
0x3CC, |
|
0x3E0, |
|
}, |
|
{ |
|
|
|
0x3BB, |
|
0x3CB, |
|
0x3BD, |
|
0, |
|
0x3B9, |
|
0x3CD, |
|
0x3E1, |
|
}, |
|
}; |
|
|
|
#ifdef DBGFLAG_VALIDATE |
|
//----------------------------------------------------------------------------- |
|
// Purpose: Ensure that all of our internal structures are consistent, and |
|
// account for all memory that we've allocated. |
|
// Input: validator - Our global validator object |
|
// pchName - Our name (typically a member var in our container) |
|
//----------------------------------------------------------------------------- |
|
void PME::Validate( CValidator &validator, tchar *pchName ) |
|
{ |
|
validator.Push( _T("PME"), this, pchName ); |
|
|
|
validator.ClaimMemory( this ); |
|
|
|
validator.ClaimMemory( cache ); |
|
|
|
validator.ClaimMemory( ( void * ) vendor_name.c_str( ) ); |
|
validator.ClaimMemory( ( void * ) brand.c_str( ) ); |
|
|
|
validator.Pop( ); |
|
} |
|
#endif // DBGFLAG_VALIDATE |
|
|
|
#pragma warning( default : 4530 ) // warning: exception handler -GX option |
|
#endif |
|
|
|
|