Browse Source

Add nvml for GPU monitoring (squashed)

Based on mwhite73 <marvin.white@gmail.com> implementation

  Linked to the api system

  Also fix Makefile to support standard c++ files
  This prevent nvcc use without device code

Signed-off-by: Tanguy Pruvot <tanguy.pruvot@gmail.com>
master
Tanguy Pruvot 10 years ago
parent
commit
49f3c454c2
  1. 19
      Makefile.am
  2. 6
      README.txt
  3. 49
      api.cpp
  4. 15
      ccminer.vcxproj
  5. 7
      ccminer.vcxproj.filters
  6. 19
      configure.ac
  7. 2
      configure.sh
  8. 24
      cpu-miner.c
  9. 6
      cpuminer-config.h
  10. 8
      cuda.cpp
  11. 18
      miner.h
  12. 479
      nvml.cpp
  13. 146
      nvml.h

19
Makefile.am

@ -1,8 +1,9 @@
# allow to use Host cuda functions in C/C++
DEF_INCLUDES = @CUDA_INCLUDES@
JANSSON_INCLUDES=
if WANT_JANSSON if WANT_JANSSON
JANSSON_INCLUDES= -I$(top_srcdir)/compat/jansson JANSSON_INCLUDES= -I$(top_srcdir)/compat/jansson
else
JANSSON_INCLUDES=
endif endif
EXTRA_DIST = autogen.sh README.txt LICENSE.txt \ EXTRA_DIST = autogen.sh README.txt LICENSE.txt \
@ -17,7 +18,7 @@ ccminer_SOURCES = elist.h miner.h compat.h \
compat/inttypes.h compat/stdbool.h compat/unistd.h \ compat/inttypes.h compat/stdbool.h compat/unistd.h \
compat/sys/time.h compat/getopt/getopt.h \ compat/sys/time.h compat/getopt/getopt.h \
cpu-miner.c util.c crc32.c hefty1.c scrypt.c \ cpu-miner.c util.c crc32.c hefty1.c scrypt.c \
api.c hashlog.cpp stats.cpp cuda.cu \ api.cpp hashlog.cpp stats.cpp cuda.cpp \
heavy/heavy.cu \ heavy/heavy.cu \
heavy/cuda_blake512.cu heavy/cuda_blake512.h \ heavy/cuda_blake512.cu heavy/cuda_blake512.h \
heavy/cuda_combine.cu heavy/cuda_combine.h \ heavy/cuda_combine.cu heavy/cuda_combine.h \
@ -49,19 +50,25 @@ ccminer_SOURCES = elist.h miner.h compat.h \
x17/x17.cu x17/cuda_x17_haval512.cu x17/cuda_x17_sha512.cu \ x17/x17.cu x17/cuda_x17_haval512.cu x17/cuda_x17_sha512.cu \
x11/s3.cu x11/s3.cu
if HAVE_NVML
ccminer_SOURCES += nvml.cpp
nvml_defs = -DUSE_WRAPNVML
nvml_libs = -ldl
endif
if HAVE_WINDOWS if HAVE_WINDOWS
ccminer_SOURCES += compat/winansi.c ccminer_SOURCES += compat/winansi.c
endif endif
ccminer_LDFLAGS = $(PTHREAD_FLAGS) @CUDA_LDFLAGS@ ccminer_LDFLAGS = $(PTHREAD_FLAGS) @CUDA_LDFLAGS@
ccminer_LDADD = @LIBCURL@ @JANSSON_LIBS@ @PTHREAD_LIBS@ @WS2_LIBS@ @CUDA_LIBS@ @OPENMP_CFLAGS@ @LIBS@ ccminer_LDADD = @LIBCURL@ @JANSSON_LIBS@ @PTHREAD_LIBS@ @WS2_LIBS@ @CUDA_LIBS@ @OPENMP_CFLAGS@ @LIBS@ $(nvml_libs)
ccminer_CPPFLAGS = @LIBCURL_CPPFLAGS@ @OPENMP_CFLAGS@ $(CPPFLAGS) $(PTHREAD_FLAGS) -fno-strict-aliasing $(JANSSON_INCLUDES) -DSCRYPT_KECCAK512 -DSCRYPT_CHACHA -DSCRYPT_CHOOSE_COMPILETIME ccminer_CPPFLAGS = @LIBCURL_CPPFLAGS@ @OPENMP_CFLAGS@ $(CPPFLAGS) $(PTHREAD_FLAGS) -fno-strict-aliasing $(JANSSON_INCLUDES) $(DEF_INCLUDES) $(nvml_defs) -DSCRYPT_KECCAK512 -DSCRYPT_CHACHA -DSCRYPT_CHOOSE_COMPILETIME
nvcc_ARCH = -gencode=arch=compute_50,code=\"sm_50,compute_50\" nvcc_ARCH = -gencode=arch=compute_50,code=\"sm_50,compute_50\"
#nvcc_ARCH += -gencode=arch=compute_35,code=\"sm_35,compute_35\" #nvcc_ARCH += -gencode=arch=compute_35,code=\"sm_35,compute_35\"
#nvcc_ARCH += -gencode=arch=compute_30,code=\"sm_30,compute_30\" #nvcc_ARCH += -gencode=arch=compute_30,code=\"sm_30,compute_30\"
nvcc_FLAGS = $(nvcc_ARCH) -I . @CUDA_CFLAGS@ nvcc_FLAGS = $(nvcc_ARCH) @CUDA_INCLUDES@ -I. @CUDA_CFLAGS@
nvcc_FLAGS += $(JANSSON_INCLUDES) --ptxas-options="-v" nvcc_FLAGS += $(JANSSON_INCLUDES) --ptxas-options="-v"
# we're now targeting all major compute architectures within one binary. # we're now targeting all major compute architectures within one binary.

6
README.txt

@ -1,5 +1,5 @@
ccMiner release 1.4.8-tpruvot (12 Nov 2014) - "API Stats" ccMiner release 1.4.9-tpruvot (Nov 2014) - "GPU Monitoring"
--------------------------------------------------------------- ---------------------------------------------------------------
*************************************************************** ***************************************************************
@ -155,6 +155,10 @@ features.
>>> RELEASE HISTORY <<< >>> RELEASE HISTORY <<<
Nov. 13th 2014 v1.4.9
Add nvml unit to monitor nvidia cards (api)
API: small changes, bump v1.1
Nov. 12th 2014 v1.4.8 Nov. 12th 2014 v1.4.8
Add a basic API and sample php json wrapper Add a basic API and sample php json wrapper
Add statsavg (def 20) and api-bind parameters Add statsavg (def 20) and api-bind parameters

49
api.c → api.cpp

@ -8,7 +8,7 @@
* Software Foundation; either version 2 of the License, or (at your option) * Software Foundation; either version 2 of the License, or (at your option)
* any later version. See COPYING for more details. * any later version. See COPYING for more details.
*/ */
#define APIVERSION "1.0" #define APIVERSION "1.1"
#ifdef _MSC_VER #ifdef _MSC_VER
# define _WINSOCK_DEPRECATED_NO_WARNINGS # define _WINSOCK_DEPRECATED_NO_WARNINGS
@ -35,6 +35,10 @@
#include "compat.h" #include "compat.h"
#include "miner.h" #include "miner.h"
#ifdef USE_WRAPNVML
#include "nvml.h"
#endif
#ifndef _MSC_VER #ifndef _MSC_VER
# include <errno.h> # include <errno.h>
# include <sys/socket.h> # include <sys/socket.h>
@ -105,25 +109,26 @@ extern uint32_t rejected_count;
#define gpu_threads opt_n_threads #define gpu_threads opt_n_threads
extern void get_currentalgo(char* buf, int sz);
/***************************************************************/ /***************************************************************/
static void gpustatus(int thr_id) static void gpustatus(int thr_id)
{ {
char buf[MYBUFSIZ]; char buf[MYBUFSIZ];
float gt; float gt;
int gf, gp; int gp, gf;
if (thr_id >= 0 && thr_id < gpu_threads) { if (thr_id >= 0 && thr_id < gpu_threads) {
struct cgpu_info *cgpu = &thr_info[thr_id].gpu; struct cgpu_info *cgpu = &thr_info[thr_id].gpu;
#ifdef HAVE_HWMONITORING cgpu->thr_id = thr_id;
#ifdef USE_WRAPNVML
// todo // todo
if (gpu->has_monitoring) { if (1 || cgpu->has_monitoring) {
gt = gpu_temp(gpu); gf = gpu_fanpercent(cgpu);
gf = gpu_fanspeed(gpu); gt = gpu_temp(cgpu);
gp = gpu_fanpercent(gpu); gp = gpu_power(cgpu);
// gpu_clock(cgpu);
} }
else else
#endif #endif
@ -148,7 +153,7 @@ static void gpustatus(int thr_id)
cgpu->khashes = stats_get_speed(thr_id) / 1000.0; cgpu->khashes = stats_get_speed(thr_id) / 1000.0;
sprintf(buf, "GPU=%d;TEMP=%.1f;FAN=%d;FANP=%d;KHS=%.2f;" sprintf(buf, "GPU=%d;TEMP=%.1f;FAN=%d;POWER=%d;KHS=%.2f;"
"HWF=%d;I=%d|", "HWF=%d;I=%d|",
thr_id, gt, gf, gp, cgpu->khashes, thr_id, gt, gf, gp, cgpu->khashes,
cgpu->hw_errors, cgpu->intensity); cgpu->hw_errors, cgpu->intensity);
@ -162,14 +167,14 @@ static void gpustatus(int thr_id)
static char *getsummary(char *params) static char *getsummary(char *params)
{ {
char algo[64] = ""; char algo[64] = "";
time_t uptime = (time(NULL) - startup); double uptime = difftime(time(NULL), startup);
double accps = (60.0 * accepted_count) / (uptime ? (uint32_t) uptime : 1.0); double accps = (60.0 * accepted_count) / (uptime ? uptime : 1.0);
get_currentalgo(algo, sizeof(algo)); get_currentalgo(algo, sizeof(algo));
*buffer = '\0'; *buffer = '\0';
sprintf(buffer, "NAME=%s;VER=%s;API=%s;" sprintf(buffer, "NAME=%s;VER=%s;API=%s;"
"ALGO=%s;KHS=%.2f;ACC=%d;REJ=%d;ACCMN=%.3f;UPTIME=%d|", "ALGO=%s;KHS=%.2f;ACC=%d;REJ=%d;ACCMN=%.3f;UPTIME=%.1f|",
PACKAGE_NAME, PACKAGE_VERSION, APIVERSION, PACKAGE_NAME, PACKAGE_VERSION, APIVERSION,
algo, (double)global_hashrate / 1000.0, algo, (double)global_hashrate / 1000.0,
accepted_count, rejected_count, accepted_count, rejected_count,
@ -186,7 +191,7 @@ static char *getstats(char *params)
} }
struct CMDS { struct CMDS {
char *name; const char *name;
char *(*func)(char *); char *(*func)(char *);
} cmds[] = { } cmds[] = {
{ "summary", getsummary }, { "summary", getsummary },
@ -195,17 +200,20 @@ struct CMDS {
#define CMDMAX 2 #define CMDMAX 2
static void send_result(SOCKETTYPE c, char *result) static int send_result(SOCKETTYPE c, char *result)
{ {
int n; int n;
if (result == NULL) if (!result) {
result = ""; n = send(c, "", 1, 0);
} else {
// ignore failure - it's closed immediately anyway // ignore failure - it's closed immediately anyway
n = send(c, result, strlen(result) + 1, 0); n = send(c, result, strlen(result) + 1, 0);
} }
return n;
}
/* /*
* N.B. IP4 addresses are by Definition 32bit big endian on all platforms * N.B. IP4 addresses are by Definition 32bit big endian on all platforms
*/ */
@ -400,7 +408,8 @@ static void api()
if ((time(NULL) - bindstart) > 61) if ((time(NULL) - bindstart) > 61)
break; break;
else { else {
applog(LOG_ERR, "API bind to port %d failed - trying again in 15sec", port); if (!opt_quiet || opt_debug)
applog(LOG_WARNING, "API bind to port %d failed - trying again in 15sec", port);
sleep(15); sleep(15);
} }
} }
@ -409,7 +418,7 @@ static void api()
} }
if (bound == 0) { if (bound == 0) {
applog(LOG_ERR, "API bind to port %d failed (%s)%s", port, binderror, UNAVAILABLE); applog(LOG_WARNING, "API bind to port %d failed (%s)%s", port, binderror, UNAVAILABLE);
free(apisock); free(apisock);
return; return;
} }

15
ccminer.vcxproj

@ -87,7 +87,7 @@
<Optimization>Disabled</Optimization> <Optimization>Disabled</Optimization>
<RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary> <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
<FunctionLevelLinking>true</FunctionLevelLinking> <FunctionLevelLinking>true</FunctionLevelLinking>
<PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;_CRT_SECURE_NO_WARNINGS;CURL_STATICLIB;SCRYPT_KECCAK512;SCRYPT_CHACHA;SCRYPT_CHOOSE_COMPILETIME;%(PreprocessorDefinitions)</PreprocessorDefinitions> <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;_CRT_SECURE_NO_WARNINGS;CURL_STATICLIB;USE_WRAPNVML;SCRYPT_KECCAK512;SCRYPT_CHACHA;SCRYPT_CHOOSE_COMPILETIME;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<AdditionalIncludeDirectories>.;compat;compat\curl-for-windows\curl\include;compat\jansson;compat\getopt;compat\pthreads;compat\curl-for-windows\openssl\openssl\include;compat\curl-for-windows\zlib;%(AdditionalIncludeDirectories);$(CudaToolkitIncludeDir)</AdditionalIncludeDirectories> <AdditionalIncludeDirectories>.;compat;compat\curl-for-windows\curl\include;compat\jansson;compat\getopt;compat\pthreads;compat\curl-for-windows\openssl\openssl\include;compat\curl-for-windows\zlib;%(AdditionalIncludeDirectories);$(CudaToolkitIncludeDir)</AdditionalIncludeDirectories>
<EnableParallelCodeGeneration>true</EnableParallelCodeGeneration> <EnableParallelCodeGeneration>true</EnableParallelCodeGeneration>
</ClCompile> </ClCompile>
@ -114,7 +114,7 @@
<Optimization>Disabled</Optimization> <Optimization>Disabled</Optimization>
<RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary> <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
<FunctionLevelLinking>true</FunctionLevelLinking> <FunctionLevelLinking>true</FunctionLevelLinking>
<PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;_CRT_SECURE_NO_WARNINGS;CURL_STATICLIB;SCRYPT_KECCAK512;SCRYPT_CHACHA;SCRYPT_CHOOSE_COMPILETIME;%(PreprocessorDefinitions)</PreprocessorDefinitions> <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;_CRT_SECURE_NO_WARNINGS;CURL_STATICLIB;USE_WRAPNVML;SCRYPT_KECCAK512;SCRYPT_CHACHA;SCRYPT_CHOOSE_COMPILETIME;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<AdditionalIncludeDirectories>.;compat;compat\curl-for-windows\curl\include;compat\jansson;compat\getopt;compat\pthreads;compat\curl-for-windows\openssl\openssl\include;compat\curl-for-windows\zlib;%(AdditionalIncludeDirectories);$(CudaToolkitIncludeDir)</AdditionalIncludeDirectories> <AdditionalIncludeDirectories>.;compat;compat\curl-for-windows\curl\include;compat\jansson;compat\getopt;compat\pthreads;compat\curl-for-windows\openssl\openssl\include;compat\curl-for-windows\zlib;%(AdditionalIncludeDirectories);$(CudaToolkitIncludeDir)</AdditionalIncludeDirectories>
<StructMemberAlignment>8Bytes</StructMemberAlignment> <StructMemberAlignment>8Bytes</StructMemberAlignment>
<EnableParallelCodeGeneration>true</EnableParallelCodeGeneration> <EnableParallelCodeGeneration>true</EnableParallelCodeGeneration>
@ -150,7 +150,7 @@
<CompileAsManaged>false</CompileAsManaged> <CompileAsManaged>false</CompileAsManaged>
<FunctionLevelLinking>true</FunctionLevelLinking> <FunctionLevelLinking>true</FunctionLevelLinking>
<IntrinsicFunctions>true</IntrinsicFunctions> <IntrinsicFunctions>true</IntrinsicFunctions>
<PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;_CRT_SECURE_NO_WARNINGS;CURL_STATICLIB;SCRYPT_KECCAK512;SCRYPT_CHACHA;SCRYPT_CHOOSE_COMPILETIME;%(PreprocessorDefinitions)</PreprocessorDefinitions> <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;_CRT_SECURE_NO_WARNINGS;CURL_STATICLIB;USE_WRAPNVML;SCRYPT_KECCAK512;SCRYPT_CHACHA;SCRYPT_CHOOSE_COMPILETIME;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<AdditionalIncludeDirectories>.;compat;compat\curl-for-windows\curl\include;compat\jansson;compat\getopt;compat\pthreads;compat\curl-for-windows\openssl\openssl\include;compat\curl-for-windows\zlib;%(AdditionalIncludeDirectories);$(CudaToolkitIncludeDir)</AdditionalIncludeDirectories> <AdditionalIncludeDirectories>.;compat;compat\curl-for-windows\curl\include;compat\jansson;compat\getopt;compat\pthreads;compat\curl-for-windows\openssl\openssl\include;compat\curl-for-windows\zlib;%(AdditionalIncludeDirectories);$(CudaToolkitIncludeDir)</AdditionalIncludeDirectories>
<EnableParallelCodeGeneration>true</EnableParallelCodeGeneration> <EnableParallelCodeGeneration>true</EnableParallelCodeGeneration>
<ExceptionHandling>SyncCThrow</ExceptionHandling> <ExceptionHandling>SyncCThrow</ExceptionHandling>
@ -193,7 +193,7 @@
<CompileAsManaged>false</CompileAsManaged> <CompileAsManaged>false</CompileAsManaged>
<FunctionLevelLinking>true</FunctionLevelLinking> <FunctionLevelLinking>true</FunctionLevelLinking>
<IntrinsicFunctions>true</IntrinsicFunctions> <IntrinsicFunctions>true</IntrinsicFunctions>
<PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;_CRT_SECURE_NO_WARNINGS;CURL_STATICLIB;SCRYPT_KECCAK512;SCRYPT_CHACHA;SCRYPT_CHOOSE_COMPILETIME;%(PreprocessorDefinitions)</PreprocessorDefinitions> <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;_CRT_SECURE_NO_WARNINGS;CURL_STATICLIB;USE_WRAPNVML;SCRYPT_KECCAK512;SCRYPT_CHACHA;SCRYPT_CHOOSE_COMPILETIME;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<AdditionalIncludeDirectories>.;compat;compat\curl-for-windows\curl\include;compat\jansson;compat\getopt;compat\pthreads;compat\curl-for-windows\openssl\openssl\include;compat\curl-for-windows\zlib;%(AdditionalIncludeDirectories);$(CudaToolkitIncludeDir)</AdditionalIncludeDirectories> <AdditionalIncludeDirectories>.;compat;compat\curl-for-windows\curl\include;compat\jansson;compat\getopt;compat\pthreads;compat\curl-for-windows\openssl\openssl\include;compat\curl-for-windows\zlib;%(AdditionalIncludeDirectories);$(CudaToolkitIncludeDir)</AdditionalIncludeDirectories>
</ClCompile> </ClCompile>
<Link> <Link>
@ -240,9 +240,8 @@
<ClCompile Include="groestlcoin.cpp" /> <ClCompile Include="groestlcoin.cpp" />
<ClCompile Include="hashlog.cpp" /> <ClCompile Include="hashlog.cpp" />
<ClCompile Include="stats.cpp" /> <ClCompile Include="stats.cpp" />
<ClCompile Include="api.c"> <ClCompile Include="nvml.cpp" />
<AdditionalOptions>/Tp %(AdditionalOptions)</AdditionalOptions> <ClCompile Include="api.cpp" />
</ClCompile>
<ClCompile Include="hefty1.c" /> <ClCompile Include="hefty1.c" />
<ClCompile Include="myriadgroestl.cpp" /> <ClCompile Include="myriadgroestl.cpp" />
<ClCompile Include="scrypt.c"> <ClCompile Include="scrypt.c">
@ -321,7 +320,7 @@
<ClInclude Include="uint256.h" /> <ClInclude Include="uint256.h" />
</ItemGroup> </ItemGroup>
<ItemGroup> <ItemGroup>
<CudaCompile Include="cuda.cu" /> <CudaCompile Include="cuda.cpp" />
<CudaCompile Include="bitslice_transformations_quad.cu"> <CudaCompile Include="bitslice_transformations_quad.cu">
<ExcludedFromBuild>true</ExcludedFromBuild> <ExcludedFromBuild>true</ExcludedFromBuild>
</CudaCompile> </CudaCompile>

7
ccminer.vcxproj.filters

@ -192,7 +192,10 @@
<ClCompile Include="stats.cpp"> <ClCompile Include="stats.cpp">
<Filter>Source Files</Filter> <Filter>Source Files</Filter>
</ClCompile> </ClCompile>
<ClCompile Include="api.c"> <ClCompile Include="api.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="nvml.cpp">
<Filter>Source Files</Filter> <Filter>Source Files</Filter>
</ClCompile> </ClCompile>
</ItemGroup> </ItemGroup>
@ -313,7 +316,7 @@
</ClInclude> </ClInclude>
</ItemGroup> </ItemGroup>
<ItemGroup> <ItemGroup>
<CudaCompile Include="cuda.cu"> <CudaCompile Include="cuda.cpp">
<Filter>Source Files\CUDA</Filter> <Filter>Source Files\CUDA</Filter>
</CudaCompile> </CudaCompile>
<CudaCompile Include="cuda_fugue256.cu"> <CudaCompile Include="cuda_fugue256.cu">

19
configure.ac

@ -1,4 +1,4 @@
AC_INIT([ccminer], [1.4.8]) AC_INIT([ccminer], [1.4.9])
AC_PREREQ([2.59c]) AC_PREREQ([2.59c])
AC_CANONICAL_SYSTEM AC_CANONICAL_SYSTEM
@ -142,19 +142,32 @@ dnl Setup CUDA paths
AC_ARG_WITH([cuda], AC_ARG_WITH([cuda],
[ --with-cuda=PATH prefix where cuda is installed [default=/usr/local/cuda]]) [ --with-cuda=PATH prefix where cuda is installed [default=/usr/local/cuda]])
AC_ARG_WITH([nvml],
[ --with-nvml=PATH prefix where libnvml is installed [default=/usr/lib]])
AM_CONDITIONAL([HAVE_NVML], [test -n "$with_nvml"])
if test -n "$with_cuda" if test -n "$with_cuda"
then then
CUDA_CFLAGS="-I$with_cuda/include $CUDA_CFLAGS" CUDA_INCLUDES="-I$with_cuda/include"
CUDA_LIBS="-lcudart" CUDA_LIBS="-lcudart"
CUDA_LDFLAGS="-L$with_cuda/lib$SUFFIX" CUDA_LDFLAGS="-L$with_cuda/lib$SUFFIX"
NVCC="$with_cuda/bin/nvcc" NVCC="$with_cuda/bin/nvcc"
else else
CUDA_CFLAGS="-I/usr/local/cuda/include $CUDA_CFLAGS" CUDA_INCLUDES="-I/usr/local/cuda/include"
CUDA_LIBS="-lcudart -static-libstdc++" CUDA_LIBS="-lcudart -static-libstdc++"
CUDA_LDFLAGS="-L/usr/local/cuda/lib$SUFFIX" CUDA_LDFLAGS="-L/usr/local/cuda/lib$SUFFIX"
NVCC="nvcc" NVCC="nvcc"
fi fi
if test -n "$with_nvml" ; then
NVML_LIBPATH=$with_nvml
CUDA_LDFLAGS="$CUDA_LDFLAGS -ldl"
fi
AC_SUBST(NVML_LIBPATH)
AC_SUBST(CUDA_CFLAGS) AC_SUBST(CUDA_CFLAGS)
AC_SUBST(CUDA_INCLUDES)
AC_SUBST(CUDA_LIBS) AC_SUBST(CUDA_LIBS)
AC_SUBST(CUDA_LDFLAGS) AC_SUBST(CUDA_LDFLAGS)
AC_SUBST(NVCC) AC_SUBST(NVCC)

2
configure.sh

@ -7,5 +7,5 @@
extracflags="-march=native -D_REENTRANT -falign-functions=16 -falign-jumps=16 -falign-labels=16" extracflags="-march=native -D_REENTRANT -falign-functions=16 -falign-jumps=16 -falign-labels=16"
CUDA_CFLAGS="-O3 -Xcompiler -Wall" ./configure CXXFLAGS="-O3 $extracflags" --with-cuda=/usr/local/cuda CUDA_CFLAGS="-O3 -Xcompiler -Wall" ./configure CXXFLAGS="-O3 $extracflags" --with-cuda=/usr/local/cuda --with-nvml=libnvidia-ml.so

24
cpu-miner.c

@ -56,7 +56,7 @@ BOOL WINAPI ConsoleHandler(DWORD);
#define HEAVYCOIN_BLKHDR_SZ 84 #define HEAVYCOIN_BLKHDR_SZ 84
#define MNR_BLKHDR_SZ 80 #define MNR_BLKHDR_SZ 80
// from heavy.cu // from cuda.cu
#ifdef __cplusplus #ifdef __cplusplus
extern "C" extern "C"
{ {
@ -69,6 +69,9 @@ int cuda_finddevice(char *name);
} }
#endif #endif
#ifdef USE_WRAPNVML
#include "nvml.h"
#endif
#ifdef __linux /* Linux specific policy and affinity management */ #ifdef __linux /* Linux specific policy and affinity management */
#include <sched.h> #include <sched.h>
@ -244,6 +247,10 @@ uint32_t opt_work_size = 0; /* default */
char *opt_api_allow = "127.0.0.1"; /* 0.0.0.0 for all ips */ char *opt_api_allow = "127.0.0.1"; /* 0.0.0.0 for all ips */
int opt_api_listen = 4068; /* 0 to disable */ int opt_api_listen = 4068; /* 0 to disable */
#ifdef USE_WRAPNVML
wrap_nvml_handle *nvmlh = NULL;
#endif
#ifdef HAVE_GETOPT_LONG #ifdef HAVE_GETOPT_LONG
#include <getopt.h> #include <getopt.h>
#else #else
@ -421,7 +428,10 @@ void proper_exit(int reason)
#ifdef WIN32 #ifdef WIN32
timeEndPeriod(1); // else never executed timeEndPeriod(1); // else never executed
#endif #endif
#ifdef USE_WRAPNVML
if (nvmlh)
wrap_nvml_destroy(nvmlh);
#endif
exit(reason); exit(reason);
} }
@ -2129,6 +2139,16 @@ int main(int argc, char *argv[])
tq_push(thr_info[stratum_thr_id].q, strdup(rpc_url)); tq_push(thr_info[stratum_thr_id].q, strdup(rpc_url));
} }
#ifdef USE_WRAPNVML
nvmlh = wrap_nvml_create();
if (nvmlh) {
// todo: link threads info gpu
applog(LOG_INFO, "NVML GPU monitoring enabled.");
} else {
applog(LOG_INFO, "NVML GPU monitoring is not available.");
}
#endif
if (opt_api_listen) { if (opt_api_listen) {
/* api thread */ /* api thread */
api_thr_id = opt_n_threads + 3; api_thr_id = opt_n_threads + 3;

6
cpuminer-config.h

@ -156,7 +156,7 @@
#define PACKAGE_NAME "ccminer" #define PACKAGE_NAME "ccminer"
/* Define to the full name and version of this package. */ /* Define to the full name and version of this package. */
#define PACKAGE_STRING "ccminer 1.4.8" #define PACKAGE_STRING "ccminer 1.4.9"
/* Define to the one symbol short name of this package. */ /* Define to the one symbol short name of this package. */
#define PACKAGE_TARNAME "ccminer" #define PACKAGE_TARNAME "ccminer"
@ -165,7 +165,7 @@
#define PACKAGE_URL "" #define PACKAGE_URL ""
/* Define to the version of this package. */ /* Define to the version of this package. */
#define PACKAGE_VERSION "1.4.8" #define PACKAGE_VERSION "1.4.9"
/* If using the C implementation of alloca, define if you know the /* If using the C implementation of alloca, define if you know the
direction of stack growth for your system; otherwise it will be direction of stack growth for your system; otherwise it will be
@ -188,7 +188,7 @@
#define USE_XOP 1 #define USE_XOP 1
/* Version number of package */ /* Version number of package */
#define VERSION "1.4.8" #define VERSION "1.4.9"
/* Define curl_free() as free() if our version of curl lacks curl_free. */ /* Define curl_free() as free() if our version of curl lacks curl_free. */
/* #undef curl_free */ /* #undef curl_free */

8
cuda.cu → cuda.cpp

@ -9,16 +9,22 @@
#endif #endif
// include thrust // include thrust
#ifndef __cplusplus
#include <thrust/version.h> #include <thrust/version.h>
#include <thrust/remove.h> #include <thrust/remove.h>
#include <thrust/device_vector.h> #include <thrust/device_vector.h>
#include <thrust/iterator/constant_iterator.h> #include <thrust/iterator/constant_iterator.h>
#else
#include <ctype.h>
#endif
#include "miner.h" #include "miner.h"
#include "cuda_helper.h" #include "cuda_runtime.h"
extern char *device_name[8]; extern char *device_name[8];
extern int device_map[8];
extern int device_sm[8];
// CUDA Devices on the System // CUDA Devices on the System
extern "C" int cuda_num_devices() extern "C" int cuda_num_devices()

18
miner.h

@ -356,21 +356,20 @@ extern int scanhash_x17(int thr_id, uint32_t *pdata,
void *api_thread(void *userdata); void *api_thread(void *userdata);
struct cgpu_info { struct cgpu_info {
int thr_id;
int accepted; int accepted;
int rejected; int rejected;
int hw_errors; int hw_errors;
double khashes; double khashes;
int intensity; int intensity;
#ifdef HAVE_HWMONITORING #ifdef USE_WRAPNVML
bool has_monitoring; bool has_monitoring;
int gpu_engine; float gpu_temp;
int min_engine; unsigned int gpu_fan;
int gpu_fan; unsigned int gpu_power;
int min_fan; unsigned int gpu_clock;
int gpu_memclock; unsigned int gpu_memclock;
int gpu_memdiff; double gpu_vddc;
int gpu_powertune;
float gpu_vddc;
#endif #endif
}; };
@ -456,6 +455,7 @@ extern int timeval_subtract(struct timeval *result, struct timeval *x,
struct timeval *y); struct timeval *y);
extern bool fulltest(const uint32_t *hash, const uint32_t *target); extern bool fulltest(const uint32_t *hash, const uint32_t *target);
extern void diff_to_target(uint32_t *target, double diff); extern void diff_to_target(uint32_t *target, double diff);
extern void get_currentalgo(char* buf, int sz);
struct stratum_job { struct stratum_job {
char *job_id; char *job_id;

479
nvml.cpp

@ -0,0 +1,479 @@
/*
* A trivial little dlopen()-based wrapper library for the
* NVIDIA NVML library, to allow runtime discovery of NVML on an
* arbitrary system. This is all very hackish and simple-minded, but
* it serves my immediate needs in the short term until NVIDIA provides
* a static NVML wrapper library themselves, hopefully in
* CUDA 6.5 or maybe sometime shortly after.
*
* This trivial code is made available under the "new" 3-clause BSD license,
* and/or any of the GPL licenses you prefer.
* Feel free to use the code and modify as you see fit.
*
* John E. Stone - john.stone@gmail.com
* Tanguy Pruvot - tpruvot@github
*
*/
#ifdef USE_WRAPNVML
#include <stdio.h>
#include <stdlib.h>
#include <errno.h>
#ifndef _MSC_VER
#include <libgen.h>
#endif
#include "miner.h"
#include "cuda_runtime.h"
#include "nvml.h"
/*
* Wrappers to emulate dlopen() on other systems like Windows
*/
#if defined(_MSC_VER) || defined(_WIN32) || defined(_WIN64)
#include <windows.h>
static void *wrap_dlopen(const char *filename) {
return (void *)LoadLibrary(filename);
}
static void *wrap_dlsym(void *h, const char *sym) {
return (void *)GetProcAddress((HINSTANCE)h, sym);
}
static int wrap_dlclose(void *h) {
/* FreeLibrary returns nonzero on success */
return (!FreeLibrary((HINSTANCE)h));
}
#else
/* assume we can use dlopen itself... */
#include <dlfcn.h>
static void *wrap_dlopen(const char *filename) {
return dlopen(filename, RTLD_NOW);
}
static void *wrap_dlsym(void *h, const char *sym) {
return dlsym(h, sym);
}
static int wrap_dlclose(void *h) {
return dlclose(h);
}
#endif
#if defined(__cplusplus)
extern "C" {
#endif
wrap_nvml_handle * wrap_nvml_create()
{
int i=0;
wrap_nvml_handle *nvmlh = NULL;
/*
* We use hard-coded library installation locations for the time being...
* No idea where or if libnvidia-ml.so is installed on MacOS X, a
* deep scouring of the filesystem on one of the Mac CUDA build boxes
* I used turned up nothing, so for now it's not going to work on OSX.
*/
#if defined(_WIN64)
/* 64-bit Windows */
#define libnvidia_ml "%PROGRAMFILES%/NVIDIA Corporation/NVSMI/nvml.dll"
#elif defined(_WIN32) || defined(_MSC_VER)
/* 32-bit Windows */
#define libnvidia_ml "%PROGRAMFILES%/NVIDIA Corporation/NVSMI/nvml.dll"
#elif defined(__linux) && (defined(__i386__) || defined(__ARM_ARCH_7A__))
/* 32-bit linux assumed */
#define libnvidia_ml "/usr/lib32/libnvidia-ml.so"
#elif defined(__linux)
/* 64-bit linux assumed */
#define libnvidia_ml "/usr/lib/libnvidia-ml.so"
#else
#error "Unrecognized platform: need NVML DLL path for this platform..."
#endif
#if WIN32
char tmp[512];
ExpandEnvironmentStringsA(libnvidia_ml, tmp, sizeof(tmp));
#else
char tmp[512] = libnvidia_ml;
#endif
void *nvml_dll = wrap_dlopen(tmp);
if (nvml_dll == NULL) {
#ifdef WIN32
char lib[] = "nvml.dll";
#else
char lib[64] = { '\0' };
snprintf(lib, sizeof(lib), "%s", basename(tmp));
/* try dlopen without path, here /usr/lib/nvidia-340/libnvidia-ml.so */
#endif
nvml_dll = wrap_dlopen(lib);
if (opt_debug)
applog(LOG_DEBUG, "dlopen: %s=%p", lib, nvml_dll);
}
if (nvml_dll == NULL) {
if (opt_debug)
applog(LOG_DEBUG, "dlopen(%d): failed to load %s", errno, tmp);
return NULL;
}
nvmlh = (wrap_nvml_handle *) calloc(1, sizeof(wrap_nvml_handle));
nvmlh->nvml_dll = nvml_dll;
nvmlh->nvmlInit = (wrap_nvmlReturn_t (*)(void))
wrap_dlsym(nvmlh->nvml_dll, "nvmlInit_v2");
if (!nvmlh->nvmlInit)
nvmlh->nvmlInit = (wrap_nvmlReturn_t (*)(void))
wrap_dlsym(nvmlh->nvml_dll, "nvmlInit");
nvmlh->nvmlDeviceGetCount = (wrap_nvmlReturn_t (*)(int *))
wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetCount_v2");
nvmlh->nvmlDeviceGetHandleByIndex = (wrap_nvmlReturn_t (*)(int, wrap_nvmlDevice_t *))
wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetHandleByIndex_v2");
nvmlh->nvmlDeviceGetClockInfo = (wrap_nvmlReturn_t (*)(wrap_nvmlDevice_t, wrap_nvmlClockType_t, unsigned int *))
wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetClockInfo");
nvmlh->nvmlDeviceGetPciInfo = (wrap_nvmlReturn_t (*)(wrap_nvmlDevice_t, wrap_nvmlPciInfo_t *))
wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetPciInfo");
nvmlh->nvmlDeviceGetName = (wrap_nvmlReturn_t (*)(wrap_nvmlDevice_t, char *, int))
wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetName");
nvmlh->nvmlDeviceGetTemperature = (wrap_nvmlReturn_t (*)(wrap_nvmlDevice_t, int, unsigned int *))
wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetTemperature");
nvmlh->nvmlDeviceGetFanSpeed = (wrap_nvmlReturn_t (*)(wrap_nvmlDevice_t, unsigned int *))
wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetFanSpeed");
nvmlh->nvmlDeviceGetPerformanceState = (wrap_nvmlReturn_t (*)(wrap_nvmlDevice_t, int *))
wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetPowerUsage");
nvmlh->nvmlDeviceGetPowerUsage = (wrap_nvmlReturn_t (*)(wrap_nvmlDevice_t, unsigned int *))
wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetPowerUsage");
nvmlh->nvmlErrorString = (char* (*)(wrap_nvmlReturn_t))
wrap_dlsym(nvmlh->nvml_dll, "nvmlErrorString");
nvmlh->nvmlShutdown = (wrap_nvmlReturn_t (*)())
wrap_dlsym(nvmlh->nvml_dll, "nvmlShutdown");
if (nvmlh->nvmlInit == NULL ||
nvmlh->nvmlShutdown == NULL ||
nvmlh->nvmlDeviceGetCount == NULL ||
nvmlh->nvmlDeviceGetHandleByIndex == NULL ||
nvmlh->nvmlDeviceGetPciInfo == NULL ||
nvmlh->nvmlDeviceGetName == NULL ||
nvmlh->nvmlDeviceGetTemperature == NULL ||
nvmlh->nvmlDeviceGetFanSpeed == NULL ||
nvmlh->nvmlDeviceGetPowerUsage == NULL)
{
if (opt_debug)
applog(LOG_DEBUG, "Failed to obtain all required NVML function pointers");
wrap_dlclose(nvmlh->nvml_dll);
free(nvmlh);
return NULL;
}
nvmlh->nvmlInit();
nvmlh->nvmlDeviceGetCount(&nvmlh->nvml_gpucount);
/* Query CUDA device count, in case it doesn't agree with NVML, since */
/* CUDA will only report GPUs with compute capability greater than 1.0 */
if (cudaGetDeviceCount(&nvmlh->cuda_gpucount) != cudaSuccess) {
if (opt_debug)
applog(LOG_DEBUG, "Failed to query CUDA device count!");
wrap_dlclose(nvmlh->nvml_dll);
free(nvmlh);
return NULL;
}
nvmlh->devs = (wrap_nvmlDevice_t *) calloc(nvmlh->nvml_gpucount, sizeof(wrap_nvmlDevice_t));
nvmlh->nvml_pci_domain_id = (unsigned int*) calloc(nvmlh->nvml_gpucount, sizeof(unsigned int));
nvmlh->nvml_pci_bus_id = (unsigned int*) calloc(nvmlh->nvml_gpucount, sizeof(unsigned int));
nvmlh->nvml_pci_device_id = (unsigned int*) calloc(nvmlh->nvml_gpucount, sizeof(unsigned int));
nvmlh->nvml_cuda_device_id = (int*) calloc(nvmlh->nvml_gpucount, sizeof(int));
nvmlh->cuda_nvml_device_id = (int*) calloc(nvmlh->cuda_gpucount, sizeof(int));
/* Obtain GPU device handles we're going to need repeatedly... */
for (i=0; i<nvmlh->nvml_gpucount; i++) {
nvmlh->nvmlDeviceGetHandleByIndex(i, &nvmlh->devs[i]);
}
/* Query PCI info for each NVML device, and build table for mapping of */
/* CUDA device IDs to NVML device IDs and vice versa */
for (i=0; i<nvmlh->nvml_gpucount; i++) {
wrap_nvmlPciInfo_t pciinfo;
nvmlh->nvmlDeviceGetPciInfo(nvmlh->devs[i], &pciinfo);
nvmlh->nvml_pci_domain_id[i] = pciinfo.domain;
nvmlh->nvml_pci_bus_id[i] = pciinfo.bus;
nvmlh->nvml_pci_device_id[i] = pciinfo.device;
}
/* build mapping of NVML device IDs to CUDA IDs */
for (i=0; i<nvmlh->nvml_gpucount; i++) {
nvmlh->nvml_cuda_device_id[i] = -1;
}
for (i=0; i<nvmlh->cuda_gpucount; i++) {
cudaDeviceProp props;
nvmlh->cuda_nvml_device_id[i] = -1;
if (cudaGetDeviceProperties(&props, i) == cudaSuccess) {
int j;
for (j=0; j<nvmlh->nvml_gpucount; j++) {
if ((nvmlh->nvml_pci_domain_id[j] == (uint32_t) props.pciDomainID) &&
(nvmlh->nvml_pci_bus_id[j] == (uint32_t) props.pciBusID) &&
(nvmlh->nvml_pci_device_id[j] == (uint32_t) props.pciDeviceID)) {
if (opt_debug)
applog(LOG_DEBUG, "CUDA GPU[%d] matches NVML GPU[%d]", i, j);
nvmlh->nvml_cuda_device_id[j] = i;
nvmlh->cuda_nvml_device_id[i] = j;
}
}
}
}
return nvmlh;
}
int wrap_nvml_get_gpucount(wrap_nvml_handle *nvmlh, int *gpucount)
{
*gpucount = nvmlh->nvml_gpucount;
return 0;
}
int wrap_cuda_get_gpucount(wrap_nvml_handle *nvmlh, int *gpucount)
{
*gpucount = nvmlh->cuda_gpucount;
return 0;
}
int wrap_nvml_get_gpu_name(wrap_nvml_handle *nvmlh, int cudaindex, char *namebuf, int bufsize)
{
int gpuindex = nvmlh->cuda_nvml_device_id[cudaindex];
if (gpuindex < 0 || gpuindex >= nvmlh->nvml_gpucount)
return -1;
if (nvmlh->nvmlDeviceGetName(nvmlh->devs[gpuindex], namebuf, bufsize) != WRAPNVML_SUCCESS)
return -1;
return 0;
}
int wrap_nvml_get_tempC(wrap_nvml_handle *nvmlh, int cudaindex, unsigned int *tempC)
{
wrap_nvmlReturn_t rc;
int gpuindex = nvmlh->cuda_nvml_device_id[cudaindex];
if (gpuindex < 0 || gpuindex >= nvmlh->nvml_gpucount)
return -1;
rc = nvmlh->nvmlDeviceGetTemperature(nvmlh->devs[gpuindex], 0u /* NVML_TEMPERATURE_GPU */, tempC);
if (rc != WRAPNVML_SUCCESS) {
return -1;
}
return 0;
}
int wrap_nvml_get_fanpcnt(wrap_nvml_handle *nvmlh, int cudaindex, unsigned int *fanpcnt)
{
wrap_nvmlReturn_t rc;
int gpuindex = nvmlh->cuda_nvml_device_id[cudaindex];
if (gpuindex < 0 || gpuindex >= nvmlh->nvml_gpucount)
return -1;
rc = nvmlh->nvmlDeviceGetFanSpeed(nvmlh->devs[gpuindex], fanpcnt);
if (rc != WRAPNVML_SUCCESS) {
return -1;
}
return 0;
}
/* Not Supported on 750Ti 340.23 */
int wrap_nvml_get_clock(wrap_nvml_handle *nvmlh, int cudaindex, int type, unsigned int *freq)
{
int gpuindex = nvmlh->cuda_nvml_device_id[cudaindex];
if (gpuindex < 0 || gpuindex >= nvmlh->nvml_gpucount)
return -1;
wrap_nvmlReturn_t res = nvmlh->nvmlDeviceGetClockInfo(nvmlh->devs[gpuindex], (wrap_nvmlClockType_t) type, freq);
if (res != WRAPNVML_SUCCESS) {
if (opt_debug)
applog(LOG_DEBUG, "nvmlDeviceGetClockInfo: %s", nvmlh->nvmlErrorString(res));
return -1;
}
return 0;
}
/* Not Supported on 750Ti 340.23 */
int wrap_nvml_get_power_usage(wrap_nvml_handle *nvmlh, int cudaindex, unsigned int *milliwatts)
{
int gpuindex = nvmlh->cuda_nvml_device_id[cudaindex];
if (gpuindex < 0 || gpuindex >= nvmlh->nvml_gpucount)
return -1;
wrap_nvmlReturn_t res = nvmlh->nvmlDeviceGetPowerUsage(nvmlh->devs[gpuindex], milliwatts);
if (res != WRAPNVML_SUCCESS) {
if (opt_debug)
applog(LOG_DEBUG, "nvmlDeviceGetPowerUsage: %s", nvmlh->nvmlErrorString(res));
return -1;
}
return 0;
}
/* Not Supported on 750Ti 340.23 */
int wrap_nvml_get_pstate(wrap_nvml_handle *nvmlh, int cudaindex, int *pstate)
{
int gpuindex = nvmlh->cuda_nvml_device_id[cudaindex];
if (gpuindex < 0 || gpuindex >= nvmlh->nvml_gpucount)
return -1;
wrap_nvmlReturn_t res = nvmlh->nvmlDeviceGetPerformanceState(nvmlh->devs[gpuindex], pstate);
if (res != WRAPNVML_SUCCESS) {
if (opt_debug)
applog(LOG_DEBUG, "nvmlDeviceGetPerformanceState: %s", nvmlh->nvmlErrorString(res));
return -1;
}
return 0;
}
int wrap_nvml_destroy(wrap_nvml_handle *nvmlh)
{
nvmlh->nvmlShutdown();
wrap_dlclose(nvmlh->nvml_dll);
free(nvmlh);
return 0;
}
/* api functions */
extern wrap_nvml_handle *nvmlh;
extern int device_map[8];
unsigned int gpu_fanpercent(struct cgpu_info *gpu)
{
unsigned int pct = 0;
if (nvmlh) {
wrap_nvml_get_fanpcnt(nvmlh, device_map[gpu->thr_id], &pct);
}
return pct;
}
double gpu_temp(struct cgpu_info *gpu)
{
double tc = 0.0;
if (nvmlh) {
unsigned int tmp = 0;
wrap_nvml_get_tempC(nvmlh, device_map[gpu->thr_id], &tmp);
tc = (double) tmp;
}
return tc;
}
unsigned int gpu_clock(struct cgpu_info *gpu)
{
unsigned int freq = 0;
if (nvmlh) {
wrap_nvml_get_clock(nvmlh, device_map[gpu->thr_id], NVML_CLOCK_SM, &freq);
}
return freq;
}
unsigned int gpu_power(struct cgpu_info *gpu)
{
unsigned int mw = 0;
if (nvmlh) {
wrap_nvml_get_power_usage(nvmlh, device_map[gpu->thr_id], &mw);
}
return mw;
}
int gpu_pstate(struct cgpu_info *gpu)
{
int pstate = 0;
if (nvmlh) {
wrap_nvml_get_pstate(nvmlh, device_map[gpu->thr_id], &pstate);
//gpu->gpu_pstate = pstate;
}
return pstate;
}
#if defined(__cplusplus)
}
#endif
#endif /* USE_WRAPNVML */
/* strings /usr/lib/nvidia-340/libnvidia-ml.so | grep nvmlDeviceGet | grep -v : | sort | uniq
nvmlDeviceGetAccountingBufferSize
nvmlDeviceGetAccountingMode
nvmlDeviceGetAccountingPids
nvmlDeviceGetAccountingStats
nvmlDeviceGetAPIRestriction
nvmlDeviceGetApplicationsClock
nvmlDeviceGetAutoBoostedClocksEnabled
nvmlDeviceGetBAR1MemoryInfo
nvmlDeviceGetBoardId
nvmlDeviceGetBrand
nvmlDeviceGetBridgeChipInfo
* nvmlDeviceGetClockInfo
nvmlDeviceGetComputeMode
nvmlDeviceGetComputeRunningProcesses
nvmlDeviceGetCount
nvmlDeviceGetCount_v2
nvmlDeviceGetCpuAffinity
nvmlDeviceGetCurrentClocksThrottleReasons
nvmlDeviceGetCurrPcieLinkGeneration
nvmlDeviceGetCurrPcieLinkWidth
nvmlDeviceGetDecoderUtilization
nvmlDeviceGetDefaultApplicationsClock
nvmlDeviceGetDetailedEccErrors
nvmlDeviceGetDisplayActive
nvmlDeviceGetDisplayMode
nvmlDeviceGetDriverModel
nvmlDeviceGetEccMode
nvmlDeviceGetEncoderUtilization
nvmlDeviceGetEnforcedPowerLimit
* nvmlDeviceGetFanSpeed
nvmlDeviceGetGpuOperationMode
nvmlDeviceGetHandleByIndex
nvmlDeviceGetHandleByIndex_v2
nvmlDeviceGetHandleByPciBusId
nvmlDeviceGetHandleByPciBusId_v2
nvmlDeviceGetHandleBySerial
nvmlDeviceGetHandleByUUID
nvmlDeviceGetIndex
nvmlDeviceGetInforomConfigurationChecksum
nvmlDeviceGetInforomImageVersion
nvmlDeviceGetInforomVersion
nvmlDeviceGetMaxClockInfo
nvmlDeviceGetMaxPcieLinkGeneration
nvmlDeviceGetMaxPcieLinkWidth
nvmlDeviceGetMemoryErrorCounter
nvmlDeviceGetMemoryInfo
nvmlDeviceGetMinorNumber
nvmlDeviceGetMultiGpuBoard
nvmlDeviceGetName
nvmlDeviceGetPciInfo
nvmlDeviceGetPciInfo_v2
* nvmlDeviceGetPerformanceState
nvmlDeviceGetPersistenceMode
nvmlDeviceGetPowerManagementDefaultLimit
nvmlDeviceGetPowerManagementLimit
nvmlDeviceGetPowerManagementLimitConstraints
nvmlDeviceGetPowerManagementMode
nvmlDeviceGetPowerState (deprecated)
* nvmlDeviceGetPowerUsage
nvmlDeviceGetRetiredPages
nvmlDeviceGetRetiredPagesPendingStatus
nvmlDeviceGetSamples
nvmlDeviceGetSerial
nvmlDeviceGetSupportedClocksThrottleReasons
nvmlDeviceGetSupportedEventTypes
nvmlDeviceGetSupportedGraphicsClocks
nvmlDeviceGetSupportedMemoryClocks
nvmlDeviceGetTemperature
nvmlDeviceGetTemperatureThreshold
nvmlDeviceGetTotalEccErrors
nvmlDeviceGetUtilizationRates
nvmlDeviceGetUUID
nvmlDeviceGetVbiosVersion
nvmlDeviceGetViolationStatus
*/

146
nvml.h

@ -0,0 +1,146 @@
/*
* A trivial little dlopen()-based wrapper library for the
* NVIDIA NVML library, to allow runtime discovery of NVML on an
* arbitrary system. This is all very hackish and simple-minded, but
* it serves my immediate needs in the short term until NVIDIA provides
* a static NVML wrapper library themselves, hopefully in
* CUDA 6.5 or maybe sometime shortly after.
*
* This trivial code is made available under the "new" 3-clause BSD license,
* and/or any of the GPL licenses you prefer.
* Feel free to use the code and modify as you see fit.
*
* John E. Stone - john.stone@gmail.com
*
*/
#if defined(__cplusplus)
extern "C" {
#endif
/*
* Ugly hacks to avoid dependencies on the real nvml.h until it starts
* getting included with the CUDA toolkit or a GDK that's got a known
* install location, etc.
*/
typedef enum wrap_nvmlReturn_enum {
WRAPNVML_SUCCESS = 0
} wrap_nvmlReturn_t;
typedef void * wrap_nvmlDevice_t;
/* our own version of the PCI info struct */
typedef struct {
char bus_id_str[16]; /* string form of bus info */
unsigned int domain;
unsigned int bus;
unsigned int device;
unsigned int pci_device_id; /* combined device and vendor id */
unsigned int pci_subsystem_id;
unsigned int res0; /* NVML internal use only */
unsigned int res1;
unsigned int res2;
unsigned int res3;
} wrap_nvmlPciInfo_t;
typedef enum nvmlClockType_t {
NVML_CLOCK_GRAPHICS = 0,
NVML_CLOCK_SM = 1,
NVML_CLOCK_MEM = 2
} wrap_nvmlClockType_t;
/*
* Handle to hold the function pointers for the entry points we need,
* and the shared library itself.
*/
typedef struct {
void *nvml_dll;
int nvml_gpucount;
int cuda_gpucount;
unsigned int *nvml_pci_domain_id;
unsigned int *nvml_pci_bus_id;
unsigned int *nvml_pci_device_id;
int *nvml_cuda_device_id; /* map NVML dev to CUDA dev */
int *cuda_nvml_device_id; /* map CUDA dev to NVML dev */
wrap_nvmlDevice_t *devs;
wrap_nvmlReturn_t (*nvmlInit)(void);
wrap_nvmlReturn_t (*nvmlDeviceGetCount)(int *);
wrap_nvmlReturn_t (*nvmlDeviceGetHandleByIndex)(int, wrap_nvmlDevice_t *);
wrap_nvmlReturn_t (*nvmlDeviceGetClockInfo)(wrap_nvmlDevice_t, wrap_nvmlClockType_t, unsigned int *);
wrap_nvmlReturn_t (*nvmlDeviceGetPciInfo)(wrap_nvmlDevice_t, wrap_nvmlPciInfo_t *);
wrap_nvmlReturn_t (*nvmlDeviceGetName)(wrap_nvmlDevice_t, char *, int);
wrap_nvmlReturn_t (*nvmlDeviceGetTemperature)(wrap_nvmlDevice_t, int, unsigned int *);
wrap_nvmlReturn_t (*nvmlDeviceGetFanSpeed)(wrap_nvmlDevice_t, unsigned int *);
wrap_nvmlReturn_t (*nvmlDeviceGetPerformanceState)(wrap_nvmlDevice_t, int *); /* enum */
wrap_nvmlReturn_t (*nvmlDeviceGetPowerUsage)(wrap_nvmlDevice_t, unsigned int *);
char* (*nvmlErrorString)(wrap_nvmlReturn_t);
wrap_nvmlReturn_t (*nvmlShutdown)(void);
} wrap_nvml_handle;
wrap_nvml_handle * wrap_nvml_create();
int wrap_nvml_destroy(wrap_nvml_handle *nvmlh);
/*
* Query the number of GPUs seen by NVML
*/
int wrap_nvml_get_gpucount(wrap_nvml_handle *nvmlh, int *gpucount);
/*
* Query the number of GPUs seen by CUDA
*/
int wrap_cuda_get_gpucount(wrap_nvml_handle *nvmlh, int *gpucount);
/*
* query the name of the GPU model from the CUDA device ID
*
*/
int wrap_nvml_get_gpu_name(wrap_nvml_handle *nvmlh,
int gpuindex,
char *namebuf,
int bufsize);
/*
* Query the current GPU temperature (Celsius), from the CUDA device ID
*/
int wrap_nvml_get_tempC(wrap_nvml_handle *nvmlh,
int gpuindex, unsigned int *tempC);
/*
* Query the current GPU fan speed (percent) from the CUDA device ID
*/
int wrap_nvml_get_fanpcnt(wrap_nvml_handle *nvmlh,
int gpuindex, unsigned int *fanpcnt);
/*
* Query the current GPU speed from the CUDA device ID
*/
int wrap_nvml_get_clock(wrap_nvml_handle *nvmlh,
int gpuindex, int clktype, unsigned int *freq);
/*
* Query the current GPU power usage in millwatts from the CUDA device ID
*
* This feature is only available on recent GPU generations and may be
* limited in some cases only to Tesla series GPUs.
* If the query is run on an unsupported GPU, this routine will return -1.
*/
int wrap_nvml_get_power_usage(wrap_nvml_handle *nvmlh,
int gpuindex,
unsigned int *milliwatts);
/* api functions */
#include "miner.h"
unsigned int gpu_fanpercent(struct cgpu_info *gpu);
double gpu_temp(struct cgpu_info *gpu);
unsigned int gpu_clock(struct cgpu_info *gpu);
unsigned int gpu_power(struct cgpu_info *gpu);
int gpu_pstate(struct cgpu_info *gpu);
#if defined(__cplusplus)
}
#endif
Loading…
Cancel
Save