Add nvml for GPU monitoring (squashed)

Based on mwhite73 <marvin.white@gmail.com> implementation

  Linked to the api system

  Also fix Makefile to support standard c++ files
  This prevent nvcc use without device code

Signed-off-by: Tanguy Pruvot <tanguy.pruvot@gmail.com>
This commit is contained in:
Tanguy Pruvot 2014-11-13 14:11:43 +01:00
parent 1118d6c3db
commit 49f3c454c2
13 changed files with 752 additions and 66 deletions

View File

@ -1,8 +1,9 @@
# allow to use Host cuda functions in C/C++
DEF_INCLUDES = @CUDA_INCLUDES@
JANSSON_INCLUDES=
if WANT_JANSSON
JANSSON_INCLUDES= -I$(top_srcdir)/compat/jansson
else
JANSSON_INCLUDES=
endif
EXTRA_DIST = autogen.sh README.txt LICENSE.txt \
@ -17,7 +18,7 @@ ccminer_SOURCES = elist.h miner.h compat.h \
compat/inttypes.h compat/stdbool.h compat/unistd.h \
compat/sys/time.h compat/getopt/getopt.h \
cpu-miner.c util.c crc32.c hefty1.c scrypt.c \
api.c hashlog.cpp stats.cpp cuda.cu \
api.cpp hashlog.cpp stats.cpp cuda.cpp \
heavy/heavy.cu \
heavy/cuda_blake512.cu heavy/cuda_blake512.h \
heavy/cuda_combine.cu heavy/cuda_combine.h \
@ -49,19 +50,25 @@ ccminer_SOURCES = elist.h miner.h compat.h \
x17/x17.cu x17/cuda_x17_haval512.cu x17/cuda_x17_sha512.cu \
x11/s3.cu
if HAVE_NVML
ccminer_SOURCES += nvml.cpp
nvml_defs = -DUSE_WRAPNVML
nvml_libs = -ldl
endif
if HAVE_WINDOWS
ccminer_SOURCES += compat/winansi.c
endif
ccminer_LDFLAGS = $(PTHREAD_FLAGS) @CUDA_LDFLAGS@
ccminer_LDADD = @LIBCURL@ @JANSSON_LIBS@ @PTHREAD_LIBS@ @WS2_LIBS@ @CUDA_LIBS@ @OPENMP_CFLAGS@ @LIBS@
ccminer_CPPFLAGS = @LIBCURL_CPPFLAGS@ @OPENMP_CFLAGS@ $(CPPFLAGS) $(PTHREAD_FLAGS) -fno-strict-aliasing $(JANSSON_INCLUDES) -DSCRYPT_KECCAK512 -DSCRYPT_CHACHA -DSCRYPT_CHOOSE_COMPILETIME
ccminer_LDFLAGS = $(PTHREAD_FLAGS) @CUDA_LDFLAGS@
ccminer_LDADD = @LIBCURL@ @JANSSON_LIBS@ @PTHREAD_LIBS@ @WS2_LIBS@ @CUDA_LIBS@ @OPENMP_CFLAGS@ @LIBS@ $(nvml_libs)
ccminer_CPPFLAGS = @LIBCURL_CPPFLAGS@ @OPENMP_CFLAGS@ $(CPPFLAGS) $(PTHREAD_FLAGS) -fno-strict-aliasing $(JANSSON_INCLUDES) $(DEF_INCLUDES) $(nvml_defs) -DSCRYPT_KECCAK512 -DSCRYPT_CHACHA -DSCRYPT_CHOOSE_COMPILETIME
nvcc_ARCH = -gencode=arch=compute_50,code=\"sm_50,compute_50\"
#nvcc_ARCH += -gencode=arch=compute_35,code=\"sm_35,compute_35\"
#nvcc_ARCH += -gencode=arch=compute_30,code=\"sm_30,compute_30\"
nvcc_FLAGS = $(nvcc_ARCH) -I . @CUDA_CFLAGS@
nvcc_FLAGS = $(nvcc_ARCH) @CUDA_INCLUDES@ -I. @CUDA_CFLAGS@
nvcc_FLAGS += $(JANSSON_INCLUDES) --ptxas-options="-v"
# we're now targeting all major compute architectures within one binary.

View File

@ -1,5 +1,5 @@
ccMiner release 1.4.8-tpruvot (12 Nov 2014) - "API Stats"
ccMiner release 1.4.9-tpruvot (Nov 2014) - "GPU Monitoring"
---------------------------------------------------------------
***************************************************************
@ -155,9 +155,13 @@ features.
>>> RELEASE HISTORY <<<
Nov. 13th 2014 v1.4.9
Add nvml unit to monitor nvidia cards (api)
API: small changes, bump v1.1
Nov. 12th 2014 v1.4.8
Add a basic API and sample php json wrapper
Add statsavg (def 20) and api-bind parameters
Add statsavg (def 20) and api-bind parameters
Fix displayed hashrate for multi gpus systems
Nov. 11th 2014 v1.4.7

View File

@ -8,7 +8,7 @@
* Software Foundation; either version 2 of the License, or (at your option)
* any later version. See COPYING for more details.
*/
#define APIVERSION "1.0"
#define APIVERSION "1.1"
#ifdef _MSC_VER
# define _WINSOCK_DEPRECATED_NO_WARNINGS
@ -35,6 +35,10 @@
#include "compat.h"
#include "miner.h"
#ifdef USE_WRAPNVML
#include "nvml.h"
#endif
#ifndef _MSC_VER
# include <errno.h>
# include <sys/socket.h>
@ -105,25 +109,26 @@ extern uint32_t rejected_count;
#define gpu_threads opt_n_threads
extern void get_currentalgo(char* buf, int sz);
/***************************************************************/
static void gpustatus(int thr_id)
{
char buf[MYBUFSIZ];
float gt;
int gf, gp;
int gp, gf;
if (thr_id >= 0 && thr_id < gpu_threads) {
struct cgpu_info *cgpu = &thr_info[thr_id].gpu;
#ifdef HAVE_HWMONITORING
cgpu->thr_id = thr_id;
#ifdef USE_WRAPNVML
// todo
if (gpu->has_monitoring) {
gt = gpu_temp(gpu);
gf = gpu_fanspeed(gpu);
gp = gpu_fanpercent(gpu);
if (1 || cgpu->has_monitoring) {
gf = gpu_fanpercent(cgpu);
gt = gpu_temp(cgpu);
gp = gpu_power(cgpu);
// gpu_clock(cgpu);
}
else
#endif
@ -148,7 +153,7 @@ static void gpustatus(int thr_id)
cgpu->khashes = stats_get_speed(thr_id) / 1000.0;
sprintf(buf, "GPU=%d;TEMP=%.1f;FAN=%d;FANP=%d;KHS=%.2f;"
sprintf(buf, "GPU=%d;TEMP=%.1f;FAN=%d;POWER=%d;KHS=%.2f;"
"HWF=%d;I=%d|",
thr_id, gt, gf, gp, cgpu->khashes,
cgpu->hw_errors, cgpu->intensity);
@ -162,14 +167,14 @@ static void gpustatus(int thr_id)
static char *getsummary(char *params)
{
char algo[64] = "";
time_t uptime = (time(NULL) - startup);
double accps = (60.0 * accepted_count) / (uptime ? (uint32_t) uptime : 1.0);
double uptime = difftime(time(NULL), startup);
double accps = (60.0 * accepted_count) / (uptime ? uptime : 1.0);
get_currentalgo(algo, sizeof(algo));
*buffer = '\0';
sprintf(buffer, "NAME=%s;VER=%s;API=%s;"
"ALGO=%s;KHS=%.2f;ACC=%d;REJ=%d;ACCMN=%.3f;UPTIME=%d|",
"ALGO=%s;KHS=%.2f;ACC=%d;REJ=%d;ACCMN=%.3f;UPTIME=%.1f|",
PACKAGE_NAME, PACKAGE_VERSION, APIVERSION,
algo, (double)global_hashrate / 1000.0,
accepted_count, rejected_count,
@ -186,7 +191,7 @@ static char *getstats(char *params)
}
struct CMDS {
char *name;
const char *name;
char *(*func)(char *);
} cmds[] = {
{ "summary", getsummary },
@ -195,15 +200,18 @@ struct CMDS {
#define CMDMAX 2
static void send_result(SOCKETTYPE c, char *result)
static int send_result(SOCKETTYPE c, char *result)
{
int n;
if (result == NULL)
result = "";
if (!result) {
n = send(c, "", 1, 0);
} else {
// ignore failure - it's closed immediately anyway
n = send(c, result, strlen(result) + 1, 0);
}
// ignore failure - it's closed immediately anyway
n = send(c, result, strlen(result) + 1, 0);
return n;
}
/*
@ -400,7 +408,8 @@ static void api()
if ((time(NULL) - bindstart) > 61)
break;
else {
applog(LOG_ERR, "API bind to port %d failed - trying again in 15sec", port);
if (!opt_quiet || opt_debug)
applog(LOG_WARNING, "API bind to port %d failed - trying again in 15sec", port);
sleep(15);
}
}
@ -409,7 +418,7 @@ static void api()
}
if (bound == 0) {
applog(LOG_ERR, "API bind to port %d failed (%s)%s", port, binderror, UNAVAILABLE);
applog(LOG_WARNING, "API bind to port %d failed (%s)%s", port, binderror, UNAVAILABLE);
free(apisock);
return;
}

View File

@ -87,7 +87,7 @@
<Optimization>Disabled</Optimization>
<RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
<FunctionLevelLinking>true</FunctionLevelLinking>
<PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;_CRT_SECURE_NO_WARNINGS;CURL_STATICLIB;SCRYPT_KECCAK512;SCRYPT_CHACHA;SCRYPT_CHOOSE_COMPILETIME;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;_CRT_SECURE_NO_WARNINGS;CURL_STATICLIB;USE_WRAPNVML;SCRYPT_KECCAK512;SCRYPT_CHACHA;SCRYPT_CHOOSE_COMPILETIME;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<AdditionalIncludeDirectories>.;compat;compat\curl-for-windows\curl\include;compat\jansson;compat\getopt;compat\pthreads;compat\curl-for-windows\openssl\openssl\include;compat\curl-for-windows\zlib;%(AdditionalIncludeDirectories);$(CudaToolkitIncludeDir)</AdditionalIncludeDirectories>
<EnableParallelCodeGeneration>true</EnableParallelCodeGeneration>
</ClCompile>
@ -114,7 +114,7 @@
<Optimization>Disabled</Optimization>
<RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
<FunctionLevelLinking>true</FunctionLevelLinking>
<PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;_CRT_SECURE_NO_WARNINGS;CURL_STATICLIB;SCRYPT_KECCAK512;SCRYPT_CHACHA;SCRYPT_CHOOSE_COMPILETIME;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;_CRT_SECURE_NO_WARNINGS;CURL_STATICLIB;USE_WRAPNVML;SCRYPT_KECCAK512;SCRYPT_CHACHA;SCRYPT_CHOOSE_COMPILETIME;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<AdditionalIncludeDirectories>.;compat;compat\curl-for-windows\curl\include;compat\jansson;compat\getopt;compat\pthreads;compat\curl-for-windows\openssl\openssl\include;compat\curl-for-windows\zlib;%(AdditionalIncludeDirectories);$(CudaToolkitIncludeDir)</AdditionalIncludeDirectories>
<StructMemberAlignment>8Bytes</StructMemberAlignment>
<EnableParallelCodeGeneration>true</EnableParallelCodeGeneration>
@ -150,7 +150,7 @@
<CompileAsManaged>false</CompileAsManaged>
<FunctionLevelLinking>true</FunctionLevelLinking>
<IntrinsicFunctions>true</IntrinsicFunctions>
<PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;_CRT_SECURE_NO_WARNINGS;CURL_STATICLIB;SCRYPT_KECCAK512;SCRYPT_CHACHA;SCRYPT_CHOOSE_COMPILETIME;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;_CRT_SECURE_NO_WARNINGS;CURL_STATICLIB;USE_WRAPNVML;SCRYPT_KECCAK512;SCRYPT_CHACHA;SCRYPT_CHOOSE_COMPILETIME;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<AdditionalIncludeDirectories>.;compat;compat\curl-for-windows\curl\include;compat\jansson;compat\getopt;compat\pthreads;compat\curl-for-windows\openssl\openssl\include;compat\curl-for-windows\zlib;%(AdditionalIncludeDirectories);$(CudaToolkitIncludeDir)</AdditionalIncludeDirectories>
<EnableParallelCodeGeneration>true</EnableParallelCodeGeneration>
<ExceptionHandling>SyncCThrow</ExceptionHandling>
@ -193,7 +193,7 @@
<CompileAsManaged>false</CompileAsManaged>
<FunctionLevelLinking>true</FunctionLevelLinking>
<IntrinsicFunctions>true</IntrinsicFunctions>
<PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;_CRT_SECURE_NO_WARNINGS;CURL_STATICLIB;SCRYPT_KECCAK512;SCRYPT_CHACHA;SCRYPT_CHOOSE_COMPILETIME;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;_CRT_SECURE_NO_WARNINGS;CURL_STATICLIB;USE_WRAPNVML;SCRYPT_KECCAK512;SCRYPT_CHACHA;SCRYPT_CHOOSE_COMPILETIME;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<AdditionalIncludeDirectories>.;compat;compat\curl-for-windows\curl\include;compat\jansson;compat\getopt;compat\pthreads;compat\curl-for-windows\openssl\openssl\include;compat\curl-for-windows\zlib;%(AdditionalIncludeDirectories);$(CudaToolkitIncludeDir)</AdditionalIncludeDirectories>
</ClCompile>
<Link>
@ -240,9 +240,8 @@
<ClCompile Include="groestlcoin.cpp" />
<ClCompile Include="hashlog.cpp" />
<ClCompile Include="stats.cpp" />
<ClCompile Include="api.c">
<AdditionalOptions>/Tp %(AdditionalOptions)</AdditionalOptions>
</ClCompile>
<ClCompile Include="nvml.cpp" />
<ClCompile Include="api.cpp" />
<ClCompile Include="hefty1.c" />
<ClCompile Include="myriadgroestl.cpp" />
<ClCompile Include="scrypt.c">
@ -321,7 +320,7 @@
<ClInclude Include="uint256.h" />
</ItemGroup>
<ItemGroup>
<CudaCompile Include="cuda.cu" />
<CudaCompile Include="cuda.cpp" />
<CudaCompile Include="bitslice_transformations_quad.cu">
<ExcludedFromBuild>true</ExcludedFromBuild>
</CudaCompile>

View File

@ -192,7 +192,10 @@
<ClCompile Include="stats.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="api.c">
<ClCompile Include="api.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="nvml.cpp">
<Filter>Source Files</Filter>
</ClCompile>
</ItemGroup>
@ -313,7 +316,7 @@
</ClInclude>
</ItemGroup>
<ItemGroup>
<CudaCompile Include="cuda.cu">
<CudaCompile Include="cuda.cpp">
<Filter>Source Files\CUDA</Filter>
</CudaCompile>
<CudaCompile Include="cuda_fugue256.cu">
@ -482,4 +485,4 @@
<Filter>Source Files\CUDA\x11</Filter>
</CudaCompile>
</ItemGroup>
</Project>
</Project>

View File

@ -1,4 +1,4 @@
AC_INIT([ccminer], [1.4.8])
AC_INIT([ccminer], [1.4.9])
AC_PREREQ([2.59c])
AC_CANONICAL_SYSTEM
@ -142,19 +142,32 @@ dnl Setup CUDA paths
AC_ARG_WITH([cuda],
[ --with-cuda=PATH prefix where cuda is installed [default=/usr/local/cuda]])
AC_ARG_WITH([nvml],
[ --with-nvml=PATH prefix where libnvml is installed [default=/usr/lib]])
AM_CONDITIONAL([HAVE_NVML], [test -n "$with_nvml"])
if test -n "$with_cuda"
then
CUDA_CFLAGS="-I$with_cuda/include $CUDA_CFLAGS"
CUDA_LIBS="-lcudart"
CUDA_LDFLAGS="-L$with_cuda/lib$SUFFIX"
NVCC="$with_cuda/bin/nvcc"
CUDA_INCLUDES="-I$with_cuda/include"
CUDA_LIBS="-lcudart"
CUDA_LDFLAGS="-L$with_cuda/lib$SUFFIX"
NVCC="$with_cuda/bin/nvcc"
else
CUDA_CFLAGS="-I/usr/local/cuda/include $CUDA_CFLAGS"
CUDA_LIBS="-lcudart -static-libstdc++"
CUDA_LDFLAGS="-L/usr/local/cuda/lib$SUFFIX"
NVCC="nvcc"
CUDA_INCLUDES="-I/usr/local/cuda/include"
CUDA_LIBS="-lcudart -static-libstdc++"
CUDA_LDFLAGS="-L/usr/local/cuda/lib$SUFFIX"
NVCC="nvcc"
fi
if test -n "$with_nvml" ; then
NVML_LIBPATH=$with_nvml
CUDA_LDFLAGS="$CUDA_LDFLAGS -ldl"
fi
AC_SUBST(NVML_LIBPATH)
AC_SUBST(CUDA_CFLAGS)
AC_SUBST(CUDA_INCLUDES)
AC_SUBST(CUDA_LIBS)
AC_SUBST(CUDA_LDFLAGS)
AC_SUBST(NVCC)

View File

@ -7,5 +7,5 @@
extracflags="-march=native -D_REENTRANT -falign-functions=16 -falign-jumps=16 -falign-labels=16"
CUDA_CFLAGS="-O3 -Xcompiler -Wall" ./configure CXXFLAGS="-O3 $extracflags" --with-cuda=/usr/local/cuda
CUDA_CFLAGS="-O3 -Xcompiler -Wall" ./configure CXXFLAGS="-O3 $extracflags" --with-cuda=/usr/local/cuda --with-nvml=libnvidia-ml.so

View File

@ -56,7 +56,7 @@ BOOL WINAPI ConsoleHandler(DWORD);
#define HEAVYCOIN_BLKHDR_SZ 84
#define MNR_BLKHDR_SZ 80
// from heavy.cu
// from cuda.cu
#ifdef __cplusplus
extern "C"
{
@ -69,6 +69,9 @@ int cuda_finddevice(char *name);
}
#endif
#ifdef USE_WRAPNVML
#include "nvml.h"
#endif
#ifdef __linux /* Linux specific policy and affinity management */
#include <sched.h>
@ -244,6 +247,10 @@ uint32_t opt_work_size = 0; /* default */
char *opt_api_allow = "127.0.0.1"; /* 0.0.0.0 for all ips */
int opt_api_listen = 4068; /* 0 to disable */
#ifdef USE_WRAPNVML
wrap_nvml_handle *nvmlh = NULL;
#endif
#ifdef HAVE_GETOPT_LONG
#include <getopt.h>
#else
@ -421,7 +428,10 @@ void proper_exit(int reason)
#ifdef WIN32
timeEndPeriod(1); // else never executed
#endif
#ifdef USE_WRAPNVML
if (nvmlh)
wrap_nvml_destroy(nvmlh);
#endif
exit(reason);
}
@ -2129,6 +2139,16 @@ int main(int argc, char *argv[])
tq_push(thr_info[stratum_thr_id].q, strdup(rpc_url));
}
#ifdef USE_WRAPNVML
nvmlh = wrap_nvml_create();
if (nvmlh) {
// todo: link threads info gpu
applog(LOG_INFO, "NVML GPU monitoring enabled.");
} else {
applog(LOG_INFO, "NVML GPU monitoring is not available.");
}
#endif
if (opt_api_listen) {
/* api thread */
api_thr_id = opt_n_threads + 3;

View File

@ -156,7 +156,7 @@
#define PACKAGE_NAME "ccminer"
/* Define to the full name and version of this package. */
#define PACKAGE_STRING "ccminer 1.4.8"
#define PACKAGE_STRING "ccminer 1.4.9"
/* Define to the one symbol short name of this package. */
#define PACKAGE_TARNAME "ccminer"
@ -165,7 +165,7 @@
#define PACKAGE_URL ""
/* Define to the version of this package. */
#define PACKAGE_VERSION "1.4.8"
#define PACKAGE_VERSION "1.4.9"
/* If using the C implementation of alloca, define if you know the
direction of stack growth for your system; otherwise it will be
@ -188,7 +188,7 @@
#define USE_XOP 1
/* Version number of package */
#define VERSION "1.4.8"
#define VERSION "1.4.9"
/* Define curl_free() as free() if our version of curl lacks curl_free. */
/* #undef curl_free */

View File

@ -9,16 +9,22 @@
#endif
// include thrust
#ifndef __cplusplus
#include <thrust/version.h>
#include <thrust/remove.h>
#include <thrust/device_vector.h>
#include <thrust/iterator/constant_iterator.h>
#else
#include <ctype.h>
#endif
#include "miner.h"
#include "cuda_helper.h"
#include "cuda_runtime.h"
extern char *device_name[8];
extern int device_map[8];
extern int device_sm[8];
// CUDA Devices on the System
extern "C" int cuda_num_devices()

18
miner.h
View File

@ -356,21 +356,20 @@ extern int scanhash_x17(int thr_id, uint32_t *pdata,
void *api_thread(void *userdata);
struct cgpu_info {
int thr_id;
int accepted;
int rejected;
int hw_errors;
double khashes;
int intensity;
#ifdef HAVE_HWMONITORING
#ifdef USE_WRAPNVML
bool has_monitoring;
int gpu_engine;
int min_engine;
int gpu_fan;
int min_fan;
int gpu_memclock;
int gpu_memdiff;
int gpu_powertune;
float gpu_vddc;
float gpu_temp;
unsigned int gpu_fan;
unsigned int gpu_power;
unsigned int gpu_clock;
unsigned int gpu_memclock;
double gpu_vddc;
#endif
};
@ -456,6 +455,7 @@ extern int timeval_subtract(struct timeval *result, struct timeval *x,
struct timeval *y);
extern bool fulltest(const uint32_t *hash, const uint32_t *target);
extern void diff_to_target(uint32_t *target, double diff);
extern void get_currentalgo(char* buf, int sz);
struct stratum_job {
char *job_id;

479
nvml.cpp Normal file
View File

@ -0,0 +1,479 @@
/*
* A trivial little dlopen()-based wrapper library for the
* NVIDIA NVML library, to allow runtime discovery of NVML on an
* arbitrary system. This is all very hackish and simple-minded, but
* it serves my immediate needs in the short term until NVIDIA provides
* a static NVML wrapper library themselves, hopefully in
* CUDA 6.5 or maybe sometime shortly after.
*
* This trivial code is made available under the "new" 3-clause BSD license,
* and/or any of the GPL licenses you prefer.
* Feel free to use the code and modify as you see fit.
*
* John E. Stone - john.stone@gmail.com
* Tanguy Pruvot - tpruvot@github
*
*/
#ifdef USE_WRAPNVML
#include <stdio.h>
#include <stdlib.h>
#include <errno.h>
#ifndef _MSC_VER
#include <libgen.h>
#endif
#include "miner.h"
#include "cuda_runtime.h"
#include "nvml.h"
/*
* Wrappers to emulate dlopen() on other systems like Windows
*/
#if defined(_MSC_VER) || defined(_WIN32) || defined(_WIN64)
#include <windows.h>
static void *wrap_dlopen(const char *filename) {
return (void *)LoadLibrary(filename);
}
static void *wrap_dlsym(void *h, const char *sym) {
return (void *)GetProcAddress((HINSTANCE)h, sym);
}
static int wrap_dlclose(void *h) {
/* FreeLibrary returns nonzero on success */
return (!FreeLibrary((HINSTANCE)h));
}
#else
/* assume we can use dlopen itself... */
#include <dlfcn.h>
static void *wrap_dlopen(const char *filename) {
return dlopen(filename, RTLD_NOW);
}
static void *wrap_dlsym(void *h, const char *sym) {
return dlsym(h, sym);
}
static int wrap_dlclose(void *h) {
return dlclose(h);
}
#endif
#if defined(__cplusplus)
extern "C" {
#endif
wrap_nvml_handle * wrap_nvml_create()
{
int i=0;
wrap_nvml_handle *nvmlh = NULL;
/*
* We use hard-coded library installation locations for the time being...
* No idea where or if libnvidia-ml.so is installed on MacOS X, a
* deep scouring of the filesystem on one of the Mac CUDA build boxes
* I used turned up nothing, so for now it's not going to work on OSX.
*/
#if defined(_WIN64)
/* 64-bit Windows */
#define libnvidia_ml "%PROGRAMFILES%/NVIDIA Corporation/NVSMI/nvml.dll"
#elif defined(_WIN32) || defined(_MSC_VER)
/* 32-bit Windows */
#define libnvidia_ml "%PROGRAMFILES%/NVIDIA Corporation/NVSMI/nvml.dll"
#elif defined(__linux) && (defined(__i386__) || defined(__ARM_ARCH_7A__))
/* 32-bit linux assumed */
#define libnvidia_ml "/usr/lib32/libnvidia-ml.so"
#elif defined(__linux)
/* 64-bit linux assumed */
#define libnvidia_ml "/usr/lib/libnvidia-ml.so"
#else
#error "Unrecognized platform: need NVML DLL path for this platform..."
#endif
#if WIN32
char tmp[512];
ExpandEnvironmentStringsA(libnvidia_ml, tmp, sizeof(tmp));
#else
char tmp[512] = libnvidia_ml;
#endif
void *nvml_dll = wrap_dlopen(tmp);
if (nvml_dll == NULL) {
#ifdef WIN32
char lib[] = "nvml.dll";
#else
char lib[64] = { '\0' };
snprintf(lib, sizeof(lib), "%s", basename(tmp));
/* try dlopen without path, here /usr/lib/nvidia-340/libnvidia-ml.so */
#endif
nvml_dll = wrap_dlopen(lib);
if (opt_debug)
applog(LOG_DEBUG, "dlopen: %s=%p", lib, nvml_dll);
}
if (nvml_dll == NULL) {
if (opt_debug)
applog(LOG_DEBUG, "dlopen(%d): failed to load %s", errno, tmp);
return NULL;
}
nvmlh = (wrap_nvml_handle *) calloc(1, sizeof(wrap_nvml_handle));
nvmlh->nvml_dll = nvml_dll;
nvmlh->nvmlInit = (wrap_nvmlReturn_t (*)(void))
wrap_dlsym(nvmlh->nvml_dll, "nvmlInit_v2");
if (!nvmlh->nvmlInit)
nvmlh->nvmlInit = (wrap_nvmlReturn_t (*)(void))
wrap_dlsym(nvmlh->nvml_dll, "nvmlInit");
nvmlh->nvmlDeviceGetCount = (wrap_nvmlReturn_t (*)(int *))
wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetCount_v2");
nvmlh->nvmlDeviceGetHandleByIndex = (wrap_nvmlReturn_t (*)(int, wrap_nvmlDevice_t *))
wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetHandleByIndex_v2");
nvmlh->nvmlDeviceGetClockInfo = (wrap_nvmlReturn_t (*)(wrap_nvmlDevice_t, wrap_nvmlClockType_t, unsigned int *))
wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetClockInfo");
nvmlh->nvmlDeviceGetPciInfo = (wrap_nvmlReturn_t (*)(wrap_nvmlDevice_t, wrap_nvmlPciInfo_t *))
wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetPciInfo");
nvmlh->nvmlDeviceGetName = (wrap_nvmlReturn_t (*)(wrap_nvmlDevice_t, char *, int))
wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetName");
nvmlh->nvmlDeviceGetTemperature = (wrap_nvmlReturn_t (*)(wrap_nvmlDevice_t, int, unsigned int *))
wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetTemperature");
nvmlh->nvmlDeviceGetFanSpeed = (wrap_nvmlReturn_t (*)(wrap_nvmlDevice_t, unsigned int *))
wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetFanSpeed");
nvmlh->nvmlDeviceGetPerformanceState = (wrap_nvmlReturn_t (*)(wrap_nvmlDevice_t, int *))
wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetPowerUsage");
nvmlh->nvmlDeviceGetPowerUsage = (wrap_nvmlReturn_t (*)(wrap_nvmlDevice_t, unsigned int *))
wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetPowerUsage");
nvmlh->nvmlErrorString = (char* (*)(wrap_nvmlReturn_t))
wrap_dlsym(nvmlh->nvml_dll, "nvmlErrorString");
nvmlh->nvmlShutdown = (wrap_nvmlReturn_t (*)())
wrap_dlsym(nvmlh->nvml_dll, "nvmlShutdown");
if (nvmlh->nvmlInit == NULL ||
nvmlh->nvmlShutdown == NULL ||
nvmlh->nvmlDeviceGetCount == NULL ||
nvmlh->nvmlDeviceGetHandleByIndex == NULL ||
nvmlh->nvmlDeviceGetPciInfo == NULL ||
nvmlh->nvmlDeviceGetName == NULL ||
nvmlh->nvmlDeviceGetTemperature == NULL ||
nvmlh->nvmlDeviceGetFanSpeed == NULL ||
nvmlh->nvmlDeviceGetPowerUsage == NULL)
{
if (opt_debug)
applog(LOG_DEBUG, "Failed to obtain all required NVML function pointers");
wrap_dlclose(nvmlh->nvml_dll);
free(nvmlh);
return NULL;
}
nvmlh->nvmlInit();
nvmlh->nvmlDeviceGetCount(&nvmlh->nvml_gpucount);
/* Query CUDA device count, in case it doesn't agree with NVML, since */
/* CUDA will only report GPUs with compute capability greater than 1.0 */
if (cudaGetDeviceCount(&nvmlh->cuda_gpucount) != cudaSuccess) {
if (opt_debug)
applog(LOG_DEBUG, "Failed to query CUDA device count!");
wrap_dlclose(nvmlh->nvml_dll);
free(nvmlh);
return NULL;
}
nvmlh->devs = (wrap_nvmlDevice_t *) calloc(nvmlh->nvml_gpucount, sizeof(wrap_nvmlDevice_t));
nvmlh->nvml_pci_domain_id = (unsigned int*) calloc(nvmlh->nvml_gpucount, sizeof(unsigned int));
nvmlh->nvml_pci_bus_id = (unsigned int*) calloc(nvmlh->nvml_gpucount, sizeof(unsigned int));
nvmlh->nvml_pci_device_id = (unsigned int*) calloc(nvmlh->nvml_gpucount, sizeof(unsigned int));
nvmlh->nvml_cuda_device_id = (int*) calloc(nvmlh->nvml_gpucount, sizeof(int));
nvmlh->cuda_nvml_device_id = (int*) calloc(nvmlh->cuda_gpucount, sizeof(int));
/* Obtain GPU device handles we're going to need repeatedly... */
for (i=0; i<nvmlh->nvml_gpucount; i++) {
nvmlh->nvmlDeviceGetHandleByIndex(i, &nvmlh->devs[i]);
}
/* Query PCI info for each NVML device, and build table for mapping of */
/* CUDA device IDs to NVML device IDs and vice versa */
for (i=0; i<nvmlh->nvml_gpucount; i++) {
wrap_nvmlPciInfo_t pciinfo;
nvmlh->nvmlDeviceGetPciInfo(nvmlh->devs[i], &pciinfo);
nvmlh->nvml_pci_domain_id[i] = pciinfo.domain;
nvmlh->nvml_pci_bus_id[i] = pciinfo.bus;
nvmlh->nvml_pci_device_id[i] = pciinfo.device;
}
/* build mapping of NVML device IDs to CUDA IDs */
for (i=0; i<nvmlh->nvml_gpucount; i++) {
nvmlh->nvml_cuda_device_id[i] = -1;
}
for (i=0; i<nvmlh->cuda_gpucount; i++) {
cudaDeviceProp props;
nvmlh->cuda_nvml_device_id[i] = -1;
if (cudaGetDeviceProperties(&props, i) == cudaSuccess) {
int j;
for (j=0; j<nvmlh->nvml_gpucount; j++) {
if ((nvmlh->nvml_pci_domain_id[j] == (uint32_t) props.pciDomainID) &&
(nvmlh->nvml_pci_bus_id[j] == (uint32_t) props.pciBusID) &&
(nvmlh->nvml_pci_device_id[j] == (uint32_t) props.pciDeviceID)) {
if (opt_debug)
applog(LOG_DEBUG, "CUDA GPU[%d] matches NVML GPU[%d]", i, j);
nvmlh->nvml_cuda_device_id[j] = i;
nvmlh->cuda_nvml_device_id[i] = j;
}
}
}
}
return nvmlh;
}
int wrap_nvml_get_gpucount(wrap_nvml_handle *nvmlh, int *gpucount)
{
*gpucount = nvmlh->nvml_gpucount;
return 0;
}
int wrap_cuda_get_gpucount(wrap_nvml_handle *nvmlh, int *gpucount)
{
*gpucount = nvmlh->cuda_gpucount;
return 0;
}
int wrap_nvml_get_gpu_name(wrap_nvml_handle *nvmlh, int cudaindex, char *namebuf, int bufsize)
{
int gpuindex = nvmlh->cuda_nvml_device_id[cudaindex];
if (gpuindex < 0 || gpuindex >= nvmlh->nvml_gpucount)
return -1;
if (nvmlh->nvmlDeviceGetName(nvmlh->devs[gpuindex], namebuf, bufsize) != WRAPNVML_SUCCESS)
return -1;
return 0;
}
int wrap_nvml_get_tempC(wrap_nvml_handle *nvmlh, int cudaindex, unsigned int *tempC)
{
wrap_nvmlReturn_t rc;
int gpuindex = nvmlh->cuda_nvml_device_id[cudaindex];
if (gpuindex < 0 || gpuindex >= nvmlh->nvml_gpucount)
return -1;
rc = nvmlh->nvmlDeviceGetTemperature(nvmlh->devs[gpuindex], 0u /* NVML_TEMPERATURE_GPU */, tempC);
if (rc != WRAPNVML_SUCCESS) {
return -1;
}
return 0;
}
int wrap_nvml_get_fanpcnt(wrap_nvml_handle *nvmlh, int cudaindex, unsigned int *fanpcnt)
{
wrap_nvmlReturn_t rc;
int gpuindex = nvmlh->cuda_nvml_device_id[cudaindex];
if (gpuindex < 0 || gpuindex >= nvmlh->nvml_gpucount)
return -1;
rc = nvmlh->nvmlDeviceGetFanSpeed(nvmlh->devs[gpuindex], fanpcnt);
if (rc != WRAPNVML_SUCCESS) {
return -1;
}
return 0;
}
/* Not Supported on 750Ti 340.23 */
int wrap_nvml_get_clock(wrap_nvml_handle *nvmlh, int cudaindex, int type, unsigned int *freq)
{
int gpuindex = nvmlh->cuda_nvml_device_id[cudaindex];
if (gpuindex < 0 || gpuindex >= nvmlh->nvml_gpucount)
return -1;
wrap_nvmlReturn_t res = nvmlh->nvmlDeviceGetClockInfo(nvmlh->devs[gpuindex], (wrap_nvmlClockType_t) type, freq);
if (res != WRAPNVML_SUCCESS) {
if (opt_debug)
applog(LOG_DEBUG, "nvmlDeviceGetClockInfo: %s", nvmlh->nvmlErrorString(res));
return -1;
}
return 0;
}
/* Not Supported on 750Ti 340.23 */
int wrap_nvml_get_power_usage(wrap_nvml_handle *nvmlh, int cudaindex, unsigned int *milliwatts)
{
int gpuindex = nvmlh->cuda_nvml_device_id[cudaindex];
if (gpuindex < 0 || gpuindex >= nvmlh->nvml_gpucount)
return -1;
wrap_nvmlReturn_t res = nvmlh->nvmlDeviceGetPowerUsage(nvmlh->devs[gpuindex], milliwatts);
if (res != WRAPNVML_SUCCESS) {
if (opt_debug)
applog(LOG_DEBUG, "nvmlDeviceGetPowerUsage: %s", nvmlh->nvmlErrorString(res));
return -1;
}
return 0;
}
/* Not Supported on 750Ti 340.23 */
int wrap_nvml_get_pstate(wrap_nvml_handle *nvmlh, int cudaindex, int *pstate)
{
int gpuindex = nvmlh->cuda_nvml_device_id[cudaindex];
if (gpuindex < 0 || gpuindex >= nvmlh->nvml_gpucount)
return -1;
wrap_nvmlReturn_t res = nvmlh->nvmlDeviceGetPerformanceState(nvmlh->devs[gpuindex], pstate);
if (res != WRAPNVML_SUCCESS) {
if (opt_debug)
applog(LOG_DEBUG, "nvmlDeviceGetPerformanceState: %s", nvmlh->nvmlErrorString(res));
return -1;
}
return 0;
}
int wrap_nvml_destroy(wrap_nvml_handle *nvmlh)
{
nvmlh->nvmlShutdown();
wrap_dlclose(nvmlh->nvml_dll);
free(nvmlh);
return 0;
}
/* api functions */
extern wrap_nvml_handle *nvmlh;
extern int device_map[8];
unsigned int gpu_fanpercent(struct cgpu_info *gpu)
{
unsigned int pct = 0;
if (nvmlh) {
wrap_nvml_get_fanpcnt(nvmlh, device_map[gpu->thr_id], &pct);
}
return pct;
}
double gpu_temp(struct cgpu_info *gpu)
{
double tc = 0.0;
if (nvmlh) {
unsigned int tmp = 0;
wrap_nvml_get_tempC(nvmlh, device_map[gpu->thr_id], &tmp);
tc = (double) tmp;
}
return tc;
}
unsigned int gpu_clock(struct cgpu_info *gpu)
{
unsigned int freq = 0;
if (nvmlh) {
wrap_nvml_get_clock(nvmlh, device_map[gpu->thr_id], NVML_CLOCK_SM, &freq);
}
return freq;
}
unsigned int gpu_power(struct cgpu_info *gpu)
{
unsigned int mw = 0;
if (nvmlh) {
wrap_nvml_get_power_usage(nvmlh, device_map[gpu->thr_id], &mw);
}
return mw;
}
int gpu_pstate(struct cgpu_info *gpu)
{
int pstate = 0;
if (nvmlh) {
wrap_nvml_get_pstate(nvmlh, device_map[gpu->thr_id], &pstate);
//gpu->gpu_pstate = pstate;
}
return pstate;
}
#if defined(__cplusplus)
}
#endif
#endif /* USE_WRAPNVML */
/* strings /usr/lib/nvidia-340/libnvidia-ml.so | grep nvmlDeviceGet | grep -v : | sort | uniq
nvmlDeviceGetAccountingBufferSize
nvmlDeviceGetAccountingMode
nvmlDeviceGetAccountingPids
nvmlDeviceGetAccountingStats
nvmlDeviceGetAPIRestriction
nvmlDeviceGetApplicationsClock
nvmlDeviceGetAutoBoostedClocksEnabled
nvmlDeviceGetBAR1MemoryInfo
nvmlDeviceGetBoardId
nvmlDeviceGetBrand
nvmlDeviceGetBridgeChipInfo
* nvmlDeviceGetClockInfo
nvmlDeviceGetComputeMode
nvmlDeviceGetComputeRunningProcesses
nvmlDeviceGetCount
nvmlDeviceGetCount_v2
nvmlDeviceGetCpuAffinity
nvmlDeviceGetCurrentClocksThrottleReasons
nvmlDeviceGetCurrPcieLinkGeneration
nvmlDeviceGetCurrPcieLinkWidth
nvmlDeviceGetDecoderUtilization
nvmlDeviceGetDefaultApplicationsClock
nvmlDeviceGetDetailedEccErrors
nvmlDeviceGetDisplayActive
nvmlDeviceGetDisplayMode
nvmlDeviceGetDriverModel
nvmlDeviceGetEccMode
nvmlDeviceGetEncoderUtilization
nvmlDeviceGetEnforcedPowerLimit
* nvmlDeviceGetFanSpeed
nvmlDeviceGetGpuOperationMode
nvmlDeviceGetHandleByIndex
nvmlDeviceGetHandleByIndex_v2
nvmlDeviceGetHandleByPciBusId
nvmlDeviceGetHandleByPciBusId_v2
nvmlDeviceGetHandleBySerial
nvmlDeviceGetHandleByUUID
nvmlDeviceGetIndex
nvmlDeviceGetInforomConfigurationChecksum
nvmlDeviceGetInforomImageVersion
nvmlDeviceGetInforomVersion
nvmlDeviceGetMaxClockInfo
nvmlDeviceGetMaxPcieLinkGeneration
nvmlDeviceGetMaxPcieLinkWidth
nvmlDeviceGetMemoryErrorCounter
nvmlDeviceGetMemoryInfo
nvmlDeviceGetMinorNumber
nvmlDeviceGetMultiGpuBoard
nvmlDeviceGetName
nvmlDeviceGetPciInfo
nvmlDeviceGetPciInfo_v2
* nvmlDeviceGetPerformanceState
nvmlDeviceGetPersistenceMode
nvmlDeviceGetPowerManagementDefaultLimit
nvmlDeviceGetPowerManagementLimit
nvmlDeviceGetPowerManagementLimitConstraints
nvmlDeviceGetPowerManagementMode
nvmlDeviceGetPowerState (deprecated)
* nvmlDeviceGetPowerUsage
nvmlDeviceGetRetiredPages
nvmlDeviceGetRetiredPagesPendingStatus
nvmlDeviceGetSamples
nvmlDeviceGetSerial
nvmlDeviceGetSupportedClocksThrottleReasons
nvmlDeviceGetSupportedEventTypes
nvmlDeviceGetSupportedGraphicsClocks
nvmlDeviceGetSupportedMemoryClocks
nvmlDeviceGetTemperature
nvmlDeviceGetTemperatureThreshold
nvmlDeviceGetTotalEccErrors
nvmlDeviceGetUtilizationRates
nvmlDeviceGetUUID
nvmlDeviceGetVbiosVersion
nvmlDeviceGetViolationStatus
*/

146
nvml.h Normal file
View File

@ -0,0 +1,146 @@
/*
* A trivial little dlopen()-based wrapper library for the
* NVIDIA NVML library, to allow runtime discovery of NVML on an
* arbitrary system. This is all very hackish and simple-minded, but
* it serves my immediate needs in the short term until NVIDIA provides
* a static NVML wrapper library themselves, hopefully in
* CUDA 6.5 or maybe sometime shortly after.
*
* This trivial code is made available under the "new" 3-clause BSD license,
* and/or any of the GPL licenses you prefer.
* Feel free to use the code and modify as you see fit.
*
* John E. Stone - john.stone@gmail.com
*
*/
#if defined(__cplusplus)
extern "C" {
#endif
/*
* Ugly hacks to avoid dependencies on the real nvml.h until it starts
* getting included with the CUDA toolkit or a GDK that's got a known
* install location, etc.
*/
typedef enum wrap_nvmlReturn_enum {
WRAPNVML_SUCCESS = 0
} wrap_nvmlReturn_t;
typedef void * wrap_nvmlDevice_t;
/* our own version of the PCI info struct */
typedef struct {
char bus_id_str[16]; /* string form of bus info */
unsigned int domain;
unsigned int bus;
unsigned int device;
unsigned int pci_device_id; /* combined device and vendor id */
unsigned int pci_subsystem_id;
unsigned int res0; /* NVML internal use only */
unsigned int res1;
unsigned int res2;
unsigned int res3;
} wrap_nvmlPciInfo_t;
typedef enum nvmlClockType_t {
NVML_CLOCK_GRAPHICS = 0,
NVML_CLOCK_SM = 1,
NVML_CLOCK_MEM = 2
} wrap_nvmlClockType_t;
/*
* Handle to hold the function pointers for the entry points we need,
* and the shared library itself.
*/
typedef struct {
void *nvml_dll;
int nvml_gpucount;
int cuda_gpucount;
unsigned int *nvml_pci_domain_id;
unsigned int *nvml_pci_bus_id;
unsigned int *nvml_pci_device_id;
int *nvml_cuda_device_id; /* map NVML dev to CUDA dev */
int *cuda_nvml_device_id; /* map CUDA dev to NVML dev */
wrap_nvmlDevice_t *devs;
wrap_nvmlReturn_t (*nvmlInit)(void);
wrap_nvmlReturn_t (*nvmlDeviceGetCount)(int *);
wrap_nvmlReturn_t (*nvmlDeviceGetHandleByIndex)(int, wrap_nvmlDevice_t *);
wrap_nvmlReturn_t (*nvmlDeviceGetClockInfo)(wrap_nvmlDevice_t, wrap_nvmlClockType_t, unsigned int *);
wrap_nvmlReturn_t (*nvmlDeviceGetPciInfo)(wrap_nvmlDevice_t, wrap_nvmlPciInfo_t *);
wrap_nvmlReturn_t (*nvmlDeviceGetName)(wrap_nvmlDevice_t, char *, int);
wrap_nvmlReturn_t (*nvmlDeviceGetTemperature)(wrap_nvmlDevice_t, int, unsigned int *);
wrap_nvmlReturn_t (*nvmlDeviceGetFanSpeed)(wrap_nvmlDevice_t, unsigned int *);
wrap_nvmlReturn_t (*nvmlDeviceGetPerformanceState)(wrap_nvmlDevice_t, int *); /* enum */
wrap_nvmlReturn_t (*nvmlDeviceGetPowerUsage)(wrap_nvmlDevice_t, unsigned int *);
char* (*nvmlErrorString)(wrap_nvmlReturn_t);
wrap_nvmlReturn_t (*nvmlShutdown)(void);
} wrap_nvml_handle;
wrap_nvml_handle * wrap_nvml_create();
int wrap_nvml_destroy(wrap_nvml_handle *nvmlh);
/*
* Query the number of GPUs seen by NVML
*/
int wrap_nvml_get_gpucount(wrap_nvml_handle *nvmlh, int *gpucount);
/*
* Query the number of GPUs seen by CUDA
*/
int wrap_cuda_get_gpucount(wrap_nvml_handle *nvmlh, int *gpucount);
/*
* query the name of the GPU model from the CUDA device ID
*
*/
int wrap_nvml_get_gpu_name(wrap_nvml_handle *nvmlh,
int gpuindex,
char *namebuf,
int bufsize);
/*
* Query the current GPU temperature (Celsius), from the CUDA device ID
*/
int wrap_nvml_get_tempC(wrap_nvml_handle *nvmlh,
int gpuindex, unsigned int *tempC);
/*
* Query the current GPU fan speed (percent) from the CUDA device ID
*/
int wrap_nvml_get_fanpcnt(wrap_nvml_handle *nvmlh,
int gpuindex, unsigned int *fanpcnt);
/*
* Query the current GPU speed from the CUDA device ID
*/
int wrap_nvml_get_clock(wrap_nvml_handle *nvmlh,
int gpuindex, int clktype, unsigned int *freq);
/*
* Query the current GPU power usage in millwatts from the CUDA device ID
*
* This feature is only available on recent GPU generations and may be
* limited in some cases only to Tesla series GPUs.
* If the query is run on an unsupported GPU, this routine will return -1.
*/
int wrap_nvml_get_power_usage(wrap_nvml_handle *nvmlh,
int gpuindex,
unsigned int *milliwatts);
/* api functions */
#include "miner.h"
unsigned int gpu_fanpercent(struct cgpu_info *gpu);
double gpu_temp(struct cgpu_info *gpu);
unsigned int gpu_clock(struct cgpu_info *gpu);
unsigned int gpu_power(struct cgpu_info *gpu);
int gpu_pstate(struct cgpu_info *gpu);
#if defined(__cplusplus)
}
#endif