From 49f3c454c2aa4aa776221e53043154319bfe2cba Mon Sep 17 00:00:00 2001 From: Tanguy Pruvot Date: Thu, 13 Nov 2014 14:11:43 +0100 Subject: [PATCH] Add nvml for GPU monitoring (squashed) Based on mwhite73 implementation Linked to the api system Also fix Makefile to support standard c++ files This prevent nvcc use without device code Signed-off-by: Tanguy Pruvot --- Makefile.am | 21 +- README.txt | 8 +- api.c => api.cpp | 51 +++-- ccminer.vcxproj | 15 +- ccminer.vcxproj.filters | 9 +- configure.ac | 31 ++- configure.sh | 2 +- cpu-miner.c | 24 +- cpuminer-config.h | 6 +- cuda.cu => cuda.cpp | 8 +- miner.h | 18 +- nvml.cpp | 479 ++++++++++++++++++++++++++++++++++++++++ nvml.h | 146 ++++++++++++ 13 files changed, 752 insertions(+), 66 deletions(-) rename api.c => api.cpp (91%) rename cuda.cu => cuda.cpp (96%) create mode 100644 nvml.cpp create mode 100644 nvml.h diff --git a/Makefile.am b/Makefile.am index 26897ef..2f8c418 100644 --- a/Makefile.am +++ b/Makefile.am @@ -1,8 +1,9 @@ +# allow to use Host cuda functions in C/C++ +DEF_INCLUDES = @CUDA_INCLUDES@ +JANSSON_INCLUDES= if WANT_JANSSON JANSSON_INCLUDES= -I$(top_srcdir)/compat/jansson -else -JANSSON_INCLUDES= endif EXTRA_DIST = autogen.sh README.txt LICENSE.txt \ @@ -17,7 +18,7 @@ ccminer_SOURCES = elist.h miner.h compat.h \ compat/inttypes.h compat/stdbool.h compat/unistd.h \ compat/sys/time.h compat/getopt/getopt.h \ cpu-miner.c util.c crc32.c hefty1.c scrypt.c \ - api.c hashlog.cpp stats.cpp cuda.cu \ + api.cpp hashlog.cpp stats.cpp cuda.cpp \ heavy/heavy.cu \ heavy/cuda_blake512.cu heavy/cuda_blake512.h \ heavy/cuda_combine.cu heavy/cuda_combine.h \ @@ -49,19 +50,25 @@ ccminer_SOURCES = elist.h miner.h compat.h \ x17/x17.cu x17/cuda_x17_haval512.cu x17/cuda_x17_sha512.cu \ x11/s3.cu +if HAVE_NVML +ccminer_SOURCES += nvml.cpp +nvml_defs = -DUSE_WRAPNVML +nvml_libs = -ldl +endif + if HAVE_WINDOWS ccminer_SOURCES += compat/winansi.c endif -ccminer_LDFLAGS = $(PTHREAD_FLAGS) @CUDA_LDFLAGS@ -ccminer_LDADD = @LIBCURL@ @JANSSON_LIBS@ @PTHREAD_LIBS@ @WS2_LIBS@ @CUDA_LIBS@ @OPENMP_CFLAGS@ @LIBS@ -ccminer_CPPFLAGS = @LIBCURL_CPPFLAGS@ @OPENMP_CFLAGS@ $(CPPFLAGS) $(PTHREAD_FLAGS) -fno-strict-aliasing $(JANSSON_INCLUDES) -DSCRYPT_KECCAK512 -DSCRYPT_CHACHA -DSCRYPT_CHOOSE_COMPILETIME +ccminer_LDFLAGS = $(PTHREAD_FLAGS) @CUDA_LDFLAGS@ +ccminer_LDADD = @LIBCURL@ @JANSSON_LIBS@ @PTHREAD_LIBS@ @WS2_LIBS@ @CUDA_LIBS@ @OPENMP_CFLAGS@ @LIBS@ $(nvml_libs) +ccminer_CPPFLAGS = @LIBCURL_CPPFLAGS@ @OPENMP_CFLAGS@ $(CPPFLAGS) $(PTHREAD_FLAGS) -fno-strict-aliasing $(JANSSON_INCLUDES) $(DEF_INCLUDES) $(nvml_defs) -DSCRYPT_KECCAK512 -DSCRYPT_CHACHA -DSCRYPT_CHOOSE_COMPILETIME nvcc_ARCH = -gencode=arch=compute_50,code=\"sm_50,compute_50\" #nvcc_ARCH += -gencode=arch=compute_35,code=\"sm_35,compute_35\" #nvcc_ARCH += -gencode=arch=compute_30,code=\"sm_30,compute_30\" -nvcc_FLAGS = $(nvcc_ARCH) -I . @CUDA_CFLAGS@ +nvcc_FLAGS = $(nvcc_ARCH) @CUDA_INCLUDES@ -I. @CUDA_CFLAGS@ nvcc_FLAGS += $(JANSSON_INCLUDES) --ptxas-options="-v" # we're now targeting all major compute architectures within one binary. diff --git a/README.txt b/README.txt index 416fb5c..62a7e3c 100644 --- a/README.txt +++ b/README.txt @@ -1,5 +1,5 @@ -ccMiner release 1.4.8-tpruvot (12 Nov 2014) - "API Stats" +ccMiner release 1.4.9-tpruvot (Nov 2014) - "GPU Monitoring" --------------------------------------------------------------- *************************************************************** @@ -155,9 +155,13 @@ features. >>> RELEASE HISTORY <<< + Nov. 13th 2014 v1.4.9 + Add nvml unit to monitor nvidia cards (api) + API: small changes, bump v1.1 + Nov. 12th 2014 v1.4.8 Add a basic API and sample php json wrapper - Add statsavg (def 20) and api-bind parameters + Add statsavg (def 20) and api-bind parameters Fix displayed hashrate for multi gpus systems Nov. 11th 2014 v1.4.7 diff --git a/api.c b/api.cpp similarity index 91% rename from api.c rename to api.cpp index 8d65616..fa716ba 100644 --- a/api.c +++ b/api.cpp @@ -8,7 +8,7 @@ * Software Foundation; either version 2 of the License, or (at your option) * any later version. See COPYING for more details. */ -#define APIVERSION "1.0" +#define APIVERSION "1.1" #ifdef _MSC_VER # define _WINSOCK_DEPRECATED_NO_WARNINGS @@ -35,6 +35,10 @@ #include "compat.h" #include "miner.h" +#ifdef USE_WRAPNVML +#include "nvml.h" +#endif + #ifndef _MSC_VER # include # include @@ -105,25 +109,26 @@ extern uint32_t rejected_count; #define gpu_threads opt_n_threads -extern void get_currentalgo(char* buf, int sz); - /***************************************************************/ static void gpustatus(int thr_id) { char buf[MYBUFSIZ]; float gt; - int gf, gp; + int gp, gf; if (thr_id >= 0 && thr_id < gpu_threads) { struct cgpu_info *cgpu = &thr_info[thr_id].gpu; -#ifdef HAVE_HWMONITORING + cgpu->thr_id = thr_id; + +#ifdef USE_WRAPNVML // todo - if (gpu->has_monitoring) { - gt = gpu_temp(gpu); - gf = gpu_fanspeed(gpu); - gp = gpu_fanpercent(gpu); + if (1 || cgpu->has_monitoring) { + gf = gpu_fanpercent(cgpu); + gt = gpu_temp(cgpu); + gp = gpu_power(cgpu); + // gpu_clock(cgpu); } else #endif @@ -148,7 +153,7 @@ static void gpustatus(int thr_id) cgpu->khashes = stats_get_speed(thr_id) / 1000.0; - sprintf(buf, "GPU=%d;TEMP=%.1f;FAN=%d;FANP=%d;KHS=%.2f;" + sprintf(buf, "GPU=%d;TEMP=%.1f;FAN=%d;POWER=%d;KHS=%.2f;" "HWF=%d;I=%d|", thr_id, gt, gf, gp, cgpu->khashes, cgpu->hw_errors, cgpu->intensity); @@ -162,14 +167,14 @@ static void gpustatus(int thr_id) static char *getsummary(char *params) { char algo[64] = ""; - time_t uptime = (time(NULL) - startup); - double accps = (60.0 * accepted_count) / (uptime ? (uint32_t) uptime : 1.0); + double uptime = difftime(time(NULL), startup); + double accps = (60.0 * accepted_count) / (uptime ? uptime : 1.0); get_currentalgo(algo, sizeof(algo)); *buffer = '\0'; sprintf(buffer, "NAME=%s;VER=%s;API=%s;" - "ALGO=%s;KHS=%.2f;ACC=%d;REJ=%d;ACCMN=%.3f;UPTIME=%d|", + "ALGO=%s;KHS=%.2f;ACC=%d;REJ=%d;ACCMN=%.3f;UPTIME=%.1f|", PACKAGE_NAME, PACKAGE_VERSION, APIVERSION, algo, (double)global_hashrate / 1000.0, accepted_count, rejected_count, @@ -186,7 +191,7 @@ static char *getstats(char *params) } struct CMDS { - char *name; + const char *name; char *(*func)(char *); } cmds[] = { { "summary", getsummary }, @@ -195,15 +200,18 @@ struct CMDS { #define CMDMAX 2 -static void send_result(SOCKETTYPE c, char *result) +static int send_result(SOCKETTYPE c, char *result) { int n; - if (result == NULL) - result = ""; + if (!result) { + n = send(c, "", 1, 0); + } else { + // ignore failure - it's closed immediately anyway + n = send(c, result, strlen(result) + 1, 0); + } - // ignore failure - it's closed immediately anyway - n = send(c, result, strlen(result) + 1, 0); + return n; } /* @@ -400,7 +408,8 @@ static void api() if ((time(NULL) - bindstart) > 61) break; else { - applog(LOG_ERR, "API bind to port %d failed - trying again in 15sec", port); + if (!opt_quiet || opt_debug) + applog(LOG_WARNING, "API bind to port %d failed - trying again in 15sec", port); sleep(15); } } @@ -409,7 +418,7 @@ static void api() } if (bound == 0) { - applog(LOG_ERR, "API bind to port %d failed (%s)%s", port, binderror, UNAVAILABLE); + applog(LOG_WARNING, "API bind to port %d failed (%s)%s", port, binderror, UNAVAILABLE); free(apisock); return; } diff --git a/ccminer.vcxproj b/ccminer.vcxproj index 9d5603b..f14e5bc 100644 --- a/ccminer.vcxproj +++ b/ccminer.vcxproj @@ -87,7 +87,7 @@ Disabled MultiThreadedDebugDLL true - WIN32;_DEBUG;_CONSOLE;_CRT_SECURE_NO_WARNINGS;CURL_STATICLIB;SCRYPT_KECCAK512;SCRYPT_CHACHA;SCRYPT_CHOOSE_COMPILETIME;%(PreprocessorDefinitions) + WIN32;_DEBUG;_CONSOLE;_CRT_SECURE_NO_WARNINGS;CURL_STATICLIB;USE_WRAPNVML;SCRYPT_KECCAK512;SCRYPT_CHACHA;SCRYPT_CHOOSE_COMPILETIME;%(PreprocessorDefinitions) .;compat;compat\curl-for-windows\curl\include;compat\jansson;compat\getopt;compat\pthreads;compat\curl-for-windows\openssl\openssl\include;compat\curl-for-windows\zlib;%(AdditionalIncludeDirectories);$(CudaToolkitIncludeDir) true @@ -114,7 +114,7 @@ Disabled MultiThreadedDebugDLL true - WIN32;_DEBUG;_CONSOLE;_CRT_SECURE_NO_WARNINGS;CURL_STATICLIB;SCRYPT_KECCAK512;SCRYPT_CHACHA;SCRYPT_CHOOSE_COMPILETIME;%(PreprocessorDefinitions) + WIN32;_DEBUG;_CONSOLE;_CRT_SECURE_NO_WARNINGS;CURL_STATICLIB;USE_WRAPNVML;SCRYPT_KECCAK512;SCRYPT_CHACHA;SCRYPT_CHOOSE_COMPILETIME;%(PreprocessorDefinitions) .;compat;compat\curl-for-windows\curl\include;compat\jansson;compat\getopt;compat\pthreads;compat\curl-for-windows\openssl\openssl\include;compat\curl-for-windows\zlib;%(AdditionalIncludeDirectories);$(CudaToolkitIncludeDir) 8Bytes true @@ -150,7 +150,7 @@ false true true - WIN32;NDEBUG;_CONSOLE;_CRT_SECURE_NO_WARNINGS;CURL_STATICLIB;SCRYPT_KECCAK512;SCRYPT_CHACHA;SCRYPT_CHOOSE_COMPILETIME;%(PreprocessorDefinitions) + WIN32;NDEBUG;_CONSOLE;_CRT_SECURE_NO_WARNINGS;CURL_STATICLIB;USE_WRAPNVML;SCRYPT_KECCAK512;SCRYPT_CHACHA;SCRYPT_CHOOSE_COMPILETIME;%(PreprocessorDefinitions) .;compat;compat\curl-for-windows\curl\include;compat\jansson;compat\getopt;compat\pthreads;compat\curl-for-windows\openssl\openssl\include;compat\curl-for-windows\zlib;%(AdditionalIncludeDirectories);$(CudaToolkitIncludeDir) true SyncCThrow @@ -193,7 +193,7 @@ false true true - WIN32;NDEBUG;_CONSOLE;_CRT_SECURE_NO_WARNINGS;CURL_STATICLIB;SCRYPT_KECCAK512;SCRYPT_CHACHA;SCRYPT_CHOOSE_COMPILETIME;%(PreprocessorDefinitions) + WIN32;NDEBUG;_CONSOLE;_CRT_SECURE_NO_WARNINGS;CURL_STATICLIB;USE_WRAPNVML;SCRYPT_KECCAK512;SCRYPT_CHACHA;SCRYPT_CHOOSE_COMPILETIME;%(PreprocessorDefinitions) .;compat;compat\curl-for-windows\curl\include;compat\jansson;compat\getopt;compat\pthreads;compat\curl-for-windows\openssl\openssl\include;compat\curl-for-windows\zlib;%(AdditionalIncludeDirectories);$(CudaToolkitIncludeDir) @@ -240,9 +240,8 @@ - - /Tp %(AdditionalOptions) - + + @@ -321,7 +320,7 @@ - + true diff --git a/ccminer.vcxproj.filters b/ccminer.vcxproj.filters index 6d1ff7c..51cb1de 100644 --- a/ccminer.vcxproj.filters +++ b/ccminer.vcxproj.filters @@ -192,7 +192,10 @@ Source Files - + + Source Files + + Source Files @@ -313,7 +316,7 @@ - + Source Files\CUDA @@ -482,4 +485,4 @@ Source Files\CUDA\x11 - \ No newline at end of file + diff --git a/configure.ac b/configure.ac index 674b2b6..e6418ed 100644 --- a/configure.ac +++ b/configure.ac @@ -1,4 +1,4 @@ -AC_INIT([ccminer], [1.4.8]) +AC_INIT([ccminer], [1.4.9]) AC_PREREQ([2.59c]) AC_CANONICAL_SYSTEM @@ -142,19 +142,32 @@ dnl Setup CUDA paths AC_ARG_WITH([cuda], [ --with-cuda=PATH prefix where cuda is installed [default=/usr/local/cuda]]) +AC_ARG_WITH([nvml], + [ --with-nvml=PATH prefix where libnvml is installed [default=/usr/lib]]) + +AM_CONDITIONAL([HAVE_NVML], [test -n "$with_nvml"]) + if test -n "$with_cuda" then - CUDA_CFLAGS="-I$with_cuda/include $CUDA_CFLAGS" - CUDA_LIBS="-lcudart" - CUDA_LDFLAGS="-L$with_cuda/lib$SUFFIX" - NVCC="$with_cuda/bin/nvcc" + CUDA_INCLUDES="-I$with_cuda/include" + CUDA_LIBS="-lcudart" + CUDA_LDFLAGS="-L$with_cuda/lib$SUFFIX" + NVCC="$with_cuda/bin/nvcc" else - CUDA_CFLAGS="-I/usr/local/cuda/include $CUDA_CFLAGS" - CUDA_LIBS="-lcudart -static-libstdc++" - CUDA_LDFLAGS="-L/usr/local/cuda/lib$SUFFIX" - NVCC="nvcc" + CUDA_INCLUDES="-I/usr/local/cuda/include" + CUDA_LIBS="-lcudart -static-libstdc++" + CUDA_LDFLAGS="-L/usr/local/cuda/lib$SUFFIX" + NVCC="nvcc" fi + +if test -n "$with_nvml" ; then + NVML_LIBPATH=$with_nvml + CUDA_LDFLAGS="$CUDA_LDFLAGS -ldl" +fi +AC_SUBST(NVML_LIBPATH) + AC_SUBST(CUDA_CFLAGS) +AC_SUBST(CUDA_INCLUDES) AC_SUBST(CUDA_LIBS) AC_SUBST(CUDA_LDFLAGS) AC_SUBST(NVCC) diff --git a/configure.sh b/configure.sh index 7e277f7..1084ba7 100755 --- a/configure.sh +++ b/configure.sh @@ -7,5 +7,5 @@ extracflags="-march=native -D_REENTRANT -falign-functions=16 -falign-jumps=16 -falign-labels=16" -CUDA_CFLAGS="-O3 -Xcompiler -Wall" ./configure CXXFLAGS="-O3 $extracflags" --with-cuda=/usr/local/cuda +CUDA_CFLAGS="-O3 -Xcompiler -Wall" ./configure CXXFLAGS="-O3 $extracflags" --with-cuda=/usr/local/cuda --with-nvml=libnvidia-ml.so diff --git a/cpu-miner.c b/cpu-miner.c index 9cae3e3..f2ff544 100644 --- a/cpu-miner.c +++ b/cpu-miner.c @@ -56,7 +56,7 @@ BOOL WINAPI ConsoleHandler(DWORD); #define HEAVYCOIN_BLKHDR_SZ 84 #define MNR_BLKHDR_SZ 80 -// from heavy.cu +// from cuda.cu #ifdef __cplusplus extern "C" { @@ -69,6 +69,9 @@ int cuda_finddevice(char *name); } #endif +#ifdef USE_WRAPNVML +#include "nvml.h" +#endif #ifdef __linux /* Linux specific policy and affinity management */ #include @@ -244,6 +247,10 @@ uint32_t opt_work_size = 0; /* default */ char *opt_api_allow = "127.0.0.1"; /* 0.0.0.0 for all ips */ int opt_api_listen = 4068; /* 0 to disable */ +#ifdef USE_WRAPNVML +wrap_nvml_handle *nvmlh = NULL; +#endif + #ifdef HAVE_GETOPT_LONG #include #else @@ -421,7 +428,10 @@ void proper_exit(int reason) #ifdef WIN32 timeEndPeriod(1); // else never executed #endif - +#ifdef USE_WRAPNVML + if (nvmlh) + wrap_nvml_destroy(nvmlh); +#endif exit(reason); } @@ -2129,6 +2139,16 @@ int main(int argc, char *argv[]) tq_push(thr_info[stratum_thr_id].q, strdup(rpc_url)); } +#ifdef USE_WRAPNVML + nvmlh = wrap_nvml_create(); + if (nvmlh) { + // todo: link threads info gpu + applog(LOG_INFO, "NVML GPU monitoring enabled."); + } else { + applog(LOG_INFO, "NVML GPU monitoring is not available."); + } +#endif + if (opt_api_listen) { /* api thread */ api_thr_id = opt_n_threads + 3; diff --git a/cpuminer-config.h b/cpuminer-config.h index 1767071..469a029 100644 --- a/cpuminer-config.h +++ b/cpuminer-config.h @@ -156,7 +156,7 @@ #define PACKAGE_NAME "ccminer" /* Define to the full name and version of this package. */ -#define PACKAGE_STRING "ccminer 1.4.8" +#define PACKAGE_STRING "ccminer 1.4.9" /* Define to the one symbol short name of this package. */ #define PACKAGE_TARNAME "ccminer" @@ -165,7 +165,7 @@ #define PACKAGE_URL "" /* Define to the version of this package. */ -#define PACKAGE_VERSION "1.4.8" +#define PACKAGE_VERSION "1.4.9" /* If using the C implementation of alloca, define if you know the direction of stack growth for your system; otherwise it will be @@ -188,7 +188,7 @@ #define USE_XOP 1 /* Version number of package */ -#define VERSION "1.4.8" +#define VERSION "1.4.9" /* Define curl_free() as free() if our version of curl lacks curl_free. */ /* #undef curl_free */ diff --git a/cuda.cu b/cuda.cpp similarity index 96% rename from cuda.cu rename to cuda.cpp index eb76a7e..6d03f66 100644 --- a/cuda.cu +++ b/cuda.cpp @@ -9,16 +9,22 @@ #endif // include thrust +#ifndef __cplusplus #include #include #include #include +#else +#include +#endif #include "miner.h" -#include "cuda_helper.h" +#include "cuda_runtime.h" extern char *device_name[8]; +extern int device_map[8]; +extern int device_sm[8]; // CUDA Devices on the System extern "C" int cuda_num_devices() diff --git a/miner.h b/miner.h index 1de9bf6..4140df5 100644 --- a/miner.h +++ b/miner.h @@ -356,21 +356,20 @@ extern int scanhash_x17(int thr_id, uint32_t *pdata, void *api_thread(void *userdata); struct cgpu_info { + int thr_id; int accepted; int rejected; int hw_errors; double khashes; int intensity; -#ifdef HAVE_HWMONITORING +#ifdef USE_WRAPNVML bool has_monitoring; - int gpu_engine; - int min_engine; - int gpu_fan; - int min_fan; - int gpu_memclock; - int gpu_memdiff; - int gpu_powertune; - float gpu_vddc; + float gpu_temp; + unsigned int gpu_fan; + unsigned int gpu_power; + unsigned int gpu_clock; + unsigned int gpu_memclock; + double gpu_vddc; #endif }; @@ -456,6 +455,7 @@ extern int timeval_subtract(struct timeval *result, struct timeval *x, struct timeval *y); extern bool fulltest(const uint32_t *hash, const uint32_t *target); extern void diff_to_target(uint32_t *target, double diff); +extern void get_currentalgo(char* buf, int sz); struct stratum_job { char *job_id; diff --git a/nvml.cpp b/nvml.cpp new file mode 100644 index 0000000..6700567 --- /dev/null +++ b/nvml.cpp @@ -0,0 +1,479 @@ +/* + * A trivial little dlopen()-based wrapper library for the + * NVIDIA NVML library, to allow runtime discovery of NVML on an + * arbitrary system. This is all very hackish and simple-minded, but + * it serves my immediate needs in the short term until NVIDIA provides + * a static NVML wrapper library themselves, hopefully in + * CUDA 6.5 or maybe sometime shortly after. + * + * This trivial code is made available under the "new" 3-clause BSD license, + * and/or any of the GPL licenses you prefer. + * Feel free to use the code and modify as you see fit. + * + * John E. Stone - john.stone@gmail.com + * Tanguy Pruvot - tpruvot@github + * + */ + +#ifdef USE_WRAPNVML + +#include +#include +#include +#ifndef _MSC_VER +#include +#endif + +#include "miner.h" +#include "cuda_runtime.h" +#include "nvml.h" + +/* + * Wrappers to emulate dlopen() on other systems like Windows + */ +#if defined(_MSC_VER) || defined(_WIN32) || defined(_WIN64) + #include + static void *wrap_dlopen(const char *filename) { + return (void *)LoadLibrary(filename); + } + static void *wrap_dlsym(void *h, const char *sym) { + return (void *)GetProcAddress((HINSTANCE)h, sym); + } + static int wrap_dlclose(void *h) { + /* FreeLibrary returns nonzero on success */ + return (!FreeLibrary((HINSTANCE)h)); + } +#else + /* assume we can use dlopen itself... */ + #include + static void *wrap_dlopen(const char *filename) { + return dlopen(filename, RTLD_NOW); + } + static void *wrap_dlsym(void *h, const char *sym) { + return dlsym(h, sym); + } + static int wrap_dlclose(void *h) { + return dlclose(h); + } +#endif + +#if defined(__cplusplus) +extern "C" { +#endif + +wrap_nvml_handle * wrap_nvml_create() +{ + int i=0; + wrap_nvml_handle *nvmlh = NULL; + + /* + * We use hard-coded library installation locations for the time being... + * No idea where or if libnvidia-ml.so is installed on MacOS X, a + * deep scouring of the filesystem on one of the Mac CUDA build boxes + * I used turned up nothing, so for now it's not going to work on OSX. + */ +#if defined(_WIN64) + /* 64-bit Windows */ +#define libnvidia_ml "%PROGRAMFILES%/NVIDIA Corporation/NVSMI/nvml.dll" +#elif defined(_WIN32) || defined(_MSC_VER) + /* 32-bit Windows */ +#define libnvidia_ml "%PROGRAMFILES%/NVIDIA Corporation/NVSMI/nvml.dll" +#elif defined(__linux) && (defined(__i386__) || defined(__ARM_ARCH_7A__)) + /* 32-bit linux assumed */ +#define libnvidia_ml "/usr/lib32/libnvidia-ml.so" +#elif defined(__linux) + /* 64-bit linux assumed */ +#define libnvidia_ml "/usr/lib/libnvidia-ml.so" +#else +#error "Unrecognized platform: need NVML DLL path for this platform..." +#endif + +#if WIN32 + char tmp[512]; + ExpandEnvironmentStringsA(libnvidia_ml, tmp, sizeof(tmp)); +#else + char tmp[512] = libnvidia_ml; +#endif + + void *nvml_dll = wrap_dlopen(tmp); + if (nvml_dll == NULL) { +#ifdef WIN32 + char lib[] = "nvml.dll"; +#else + char lib[64] = { '\0' }; + snprintf(lib, sizeof(lib), "%s", basename(tmp)); + /* try dlopen without path, here /usr/lib/nvidia-340/libnvidia-ml.so */ +#endif + nvml_dll = wrap_dlopen(lib); + if (opt_debug) + applog(LOG_DEBUG, "dlopen: %s=%p", lib, nvml_dll); + } + if (nvml_dll == NULL) { + if (opt_debug) + applog(LOG_DEBUG, "dlopen(%d): failed to load %s", errno, tmp); + return NULL; + } + + nvmlh = (wrap_nvml_handle *) calloc(1, sizeof(wrap_nvml_handle)); + + nvmlh->nvml_dll = nvml_dll; + + nvmlh->nvmlInit = (wrap_nvmlReturn_t (*)(void)) + wrap_dlsym(nvmlh->nvml_dll, "nvmlInit_v2"); + if (!nvmlh->nvmlInit) + nvmlh->nvmlInit = (wrap_nvmlReturn_t (*)(void)) + wrap_dlsym(nvmlh->nvml_dll, "nvmlInit"); + nvmlh->nvmlDeviceGetCount = (wrap_nvmlReturn_t (*)(int *)) + wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetCount_v2"); + nvmlh->nvmlDeviceGetHandleByIndex = (wrap_nvmlReturn_t (*)(int, wrap_nvmlDevice_t *)) + wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetHandleByIndex_v2"); + nvmlh->nvmlDeviceGetClockInfo = (wrap_nvmlReturn_t (*)(wrap_nvmlDevice_t, wrap_nvmlClockType_t, unsigned int *)) + wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetClockInfo"); + nvmlh->nvmlDeviceGetPciInfo = (wrap_nvmlReturn_t (*)(wrap_nvmlDevice_t, wrap_nvmlPciInfo_t *)) + wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetPciInfo"); + nvmlh->nvmlDeviceGetName = (wrap_nvmlReturn_t (*)(wrap_nvmlDevice_t, char *, int)) + wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetName"); + nvmlh->nvmlDeviceGetTemperature = (wrap_nvmlReturn_t (*)(wrap_nvmlDevice_t, int, unsigned int *)) + wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetTemperature"); + nvmlh->nvmlDeviceGetFanSpeed = (wrap_nvmlReturn_t (*)(wrap_nvmlDevice_t, unsigned int *)) + wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetFanSpeed"); + nvmlh->nvmlDeviceGetPerformanceState = (wrap_nvmlReturn_t (*)(wrap_nvmlDevice_t, int *)) + wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetPowerUsage"); + nvmlh->nvmlDeviceGetPowerUsage = (wrap_nvmlReturn_t (*)(wrap_nvmlDevice_t, unsigned int *)) + wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetPowerUsage"); + nvmlh->nvmlErrorString = (char* (*)(wrap_nvmlReturn_t)) + wrap_dlsym(nvmlh->nvml_dll, "nvmlErrorString"); + nvmlh->nvmlShutdown = (wrap_nvmlReturn_t (*)()) + wrap_dlsym(nvmlh->nvml_dll, "nvmlShutdown"); + + if (nvmlh->nvmlInit == NULL || + nvmlh->nvmlShutdown == NULL || + nvmlh->nvmlDeviceGetCount == NULL || + nvmlh->nvmlDeviceGetHandleByIndex == NULL || + nvmlh->nvmlDeviceGetPciInfo == NULL || + nvmlh->nvmlDeviceGetName == NULL || + nvmlh->nvmlDeviceGetTemperature == NULL || + nvmlh->nvmlDeviceGetFanSpeed == NULL || + nvmlh->nvmlDeviceGetPowerUsage == NULL) + { + if (opt_debug) + applog(LOG_DEBUG, "Failed to obtain all required NVML function pointers"); + wrap_dlclose(nvmlh->nvml_dll); + free(nvmlh); + return NULL; + } + + nvmlh->nvmlInit(); + nvmlh->nvmlDeviceGetCount(&nvmlh->nvml_gpucount); + + /* Query CUDA device count, in case it doesn't agree with NVML, since */ + /* CUDA will only report GPUs with compute capability greater than 1.0 */ + if (cudaGetDeviceCount(&nvmlh->cuda_gpucount) != cudaSuccess) { + if (opt_debug) + applog(LOG_DEBUG, "Failed to query CUDA device count!"); + wrap_dlclose(nvmlh->nvml_dll); + free(nvmlh); + return NULL; + } + + nvmlh->devs = (wrap_nvmlDevice_t *) calloc(nvmlh->nvml_gpucount, sizeof(wrap_nvmlDevice_t)); + nvmlh->nvml_pci_domain_id = (unsigned int*) calloc(nvmlh->nvml_gpucount, sizeof(unsigned int)); + nvmlh->nvml_pci_bus_id = (unsigned int*) calloc(nvmlh->nvml_gpucount, sizeof(unsigned int)); + nvmlh->nvml_pci_device_id = (unsigned int*) calloc(nvmlh->nvml_gpucount, sizeof(unsigned int)); + nvmlh->nvml_cuda_device_id = (int*) calloc(nvmlh->nvml_gpucount, sizeof(int)); + nvmlh->cuda_nvml_device_id = (int*) calloc(nvmlh->cuda_gpucount, sizeof(int)); + + /* Obtain GPU device handles we're going to need repeatedly... */ + for (i=0; invml_gpucount; i++) { + nvmlh->nvmlDeviceGetHandleByIndex(i, &nvmlh->devs[i]); + } + + /* Query PCI info for each NVML device, and build table for mapping of */ + /* CUDA device IDs to NVML device IDs and vice versa */ + for (i=0; invml_gpucount; i++) { + wrap_nvmlPciInfo_t pciinfo; + nvmlh->nvmlDeviceGetPciInfo(nvmlh->devs[i], &pciinfo); + nvmlh->nvml_pci_domain_id[i] = pciinfo.domain; + nvmlh->nvml_pci_bus_id[i] = pciinfo.bus; + nvmlh->nvml_pci_device_id[i] = pciinfo.device; + } + + /* build mapping of NVML device IDs to CUDA IDs */ + for (i=0; invml_gpucount; i++) { + nvmlh->nvml_cuda_device_id[i] = -1; + } + for (i=0; icuda_gpucount; i++) { + cudaDeviceProp props; + nvmlh->cuda_nvml_device_id[i] = -1; + + if (cudaGetDeviceProperties(&props, i) == cudaSuccess) { + int j; + for (j=0; jnvml_gpucount; j++) { + if ((nvmlh->nvml_pci_domain_id[j] == (uint32_t) props.pciDomainID) && + (nvmlh->nvml_pci_bus_id[j] == (uint32_t) props.pciBusID) && + (nvmlh->nvml_pci_device_id[j] == (uint32_t) props.pciDeviceID)) { + if (opt_debug) + applog(LOG_DEBUG, "CUDA GPU[%d] matches NVML GPU[%d]", i, j); + nvmlh->nvml_cuda_device_id[j] = i; + nvmlh->cuda_nvml_device_id[i] = j; + } + } + } + } + + return nvmlh; +} + +int wrap_nvml_get_gpucount(wrap_nvml_handle *nvmlh, int *gpucount) +{ + *gpucount = nvmlh->nvml_gpucount; + return 0; +} + +int wrap_cuda_get_gpucount(wrap_nvml_handle *nvmlh, int *gpucount) +{ + *gpucount = nvmlh->cuda_gpucount; + return 0; +} + +int wrap_nvml_get_gpu_name(wrap_nvml_handle *nvmlh, int cudaindex, char *namebuf, int bufsize) +{ + int gpuindex = nvmlh->cuda_nvml_device_id[cudaindex]; + if (gpuindex < 0 || gpuindex >= nvmlh->nvml_gpucount) + return -1; + + if (nvmlh->nvmlDeviceGetName(nvmlh->devs[gpuindex], namebuf, bufsize) != WRAPNVML_SUCCESS) + return -1; + + return 0; +} + + +int wrap_nvml_get_tempC(wrap_nvml_handle *nvmlh, int cudaindex, unsigned int *tempC) +{ + wrap_nvmlReturn_t rc; + int gpuindex = nvmlh->cuda_nvml_device_id[cudaindex]; + if (gpuindex < 0 || gpuindex >= nvmlh->nvml_gpucount) + return -1; + + rc = nvmlh->nvmlDeviceGetTemperature(nvmlh->devs[gpuindex], 0u /* NVML_TEMPERATURE_GPU */, tempC); + if (rc != WRAPNVML_SUCCESS) { + return -1; + } + + return 0; +} + + +int wrap_nvml_get_fanpcnt(wrap_nvml_handle *nvmlh, int cudaindex, unsigned int *fanpcnt) +{ + wrap_nvmlReturn_t rc; + int gpuindex = nvmlh->cuda_nvml_device_id[cudaindex]; + if (gpuindex < 0 || gpuindex >= nvmlh->nvml_gpucount) + return -1; + + rc = nvmlh->nvmlDeviceGetFanSpeed(nvmlh->devs[gpuindex], fanpcnt); + if (rc != WRAPNVML_SUCCESS) { + return -1; + } + + return 0; +} + +/* Not Supported on 750Ti 340.23 */ +int wrap_nvml_get_clock(wrap_nvml_handle *nvmlh, int cudaindex, int type, unsigned int *freq) +{ + int gpuindex = nvmlh->cuda_nvml_device_id[cudaindex]; + if (gpuindex < 0 || gpuindex >= nvmlh->nvml_gpucount) + return -1; + + wrap_nvmlReturn_t res = nvmlh->nvmlDeviceGetClockInfo(nvmlh->devs[gpuindex], (wrap_nvmlClockType_t) type, freq); + if (res != WRAPNVML_SUCCESS) { + if (opt_debug) + applog(LOG_DEBUG, "nvmlDeviceGetClockInfo: %s", nvmlh->nvmlErrorString(res)); + return -1; + } + + return 0; +} + +/* Not Supported on 750Ti 340.23 */ +int wrap_nvml_get_power_usage(wrap_nvml_handle *nvmlh, int cudaindex, unsigned int *milliwatts) +{ + int gpuindex = nvmlh->cuda_nvml_device_id[cudaindex]; + if (gpuindex < 0 || gpuindex >= nvmlh->nvml_gpucount) + return -1; + + wrap_nvmlReturn_t res = nvmlh->nvmlDeviceGetPowerUsage(nvmlh->devs[gpuindex], milliwatts); + if (res != WRAPNVML_SUCCESS) { + if (opt_debug) + applog(LOG_DEBUG, "nvmlDeviceGetPowerUsage: %s", nvmlh->nvmlErrorString(res)); + return -1; + } + + return 0; +} + +/* Not Supported on 750Ti 340.23 */ +int wrap_nvml_get_pstate(wrap_nvml_handle *nvmlh, int cudaindex, int *pstate) +{ + int gpuindex = nvmlh->cuda_nvml_device_id[cudaindex]; + if (gpuindex < 0 || gpuindex >= nvmlh->nvml_gpucount) + return -1; + + wrap_nvmlReturn_t res = nvmlh->nvmlDeviceGetPerformanceState(nvmlh->devs[gpuindex], pstate); + if (res != WRAPNVML_SUCCESS) { + if (opt_debug) + applog(LOG_DEBUG, "nvmlDeviceGetPerformanceState: %s", nvmlh->nvmlErrorString(res)); + return -1; + } + + return 0; +} + +int wrap_nvml_destroy(wrap_nvml_handle *nvmlh) +{ + nvmlh->nvmlShutdown(); + + wrap_dlclose(nvmlh->nvml_dll); + free(nvmlh); + return 0; +} + +/* api functions */ + +extern wrap_nvml_handle *nvmlh; +extern int device_map[8]; + +unsigned int gpu_fanpercent(struct cgpu_info *gpu) +{ + unsigned int pct = 0; + if (nvmlh) { + wrap_nvml_get_fanpcnt(nvmlh, device_map[gpu->thr_id], &pct); + } + return pct; +} + +double gpu_temp(struct cgpu_info *gpu) +{ + double tc = 0.0; + if (nvmlh) { + unsigned int tmp = 0; + wrap_nvml_get_tempC(nvmlh, device_map[gpu->thr_id], &tmp); + tc = (double) tmp; + } + return tc; +} + +unsigned int gpu_clock(struct cgpu_info *gpu) +{ + unsigned int freq = 0; + if (nvmlh) { + wrap_nvml_get_clock(nvmlh, device_map[gpu->thr_id], NVML_CLOCK_SM, &freq); + } + return freq; +} + +unsigned int gpu_power(struct cgpu_info *gpu) +{ + unsigned int mw = 0; + if (nvmlh) { + wrap_nvml_get_power_usage(nvmlh, device_map[gpu->thr_id], &mw); + } + return mw; +} + +int gpu_pstate(struct cgpu_info *gpu) +{ + int pstate = 0; + if (nvmlh) { + wrap_nvml_get_pstate(nvmlh, device_map[gpu->thr_id], &pstate); + //gpu->gpu_pstate = pstate; + } + return pstate; +} + +#if defined(__cplusplus) +} +#endif + +#endif /* USE_WRAPNVML */ + +/* strings /usr/lib/nvidia-340/libnvidia-ml.so | grep nvmlDeviceGet | grep -v : | sort | uniq + + nvmlDeviceGetAccountingBufferSize + nvmlDeviceGetAccountingMode + nvmlDeviceGetAccountingPids + nvmlDeviceGetAccountingStats + nvmlDeviceGetAPIRestriction + nvmlDeviceGetApplicationsClock + nvmlDeviceGetAutoBoostedClocksEnabled + nvmlDeviceGetBAR1MemoryInfo + nvmlDeviceGetBoardId + nvmlDeviceGetBrand + nvmlDeviceGetBridgeChipInfo +* nvmlDeviceGetClockInfo + nvmlDeviceGetComputeMode + nvmlDeviceGetComputeRunningProcesses + nvmlDeviceGetCount + nvmlDeviceGetCount_v2 + nvmlDeviceGetCpuAffinity + nvmlDeviceGetCurrentClocksThrottleReasons + nvmlDeviceGetCurrPcieLinkGeneration + nvmlDeviceGetCurrPcieLinkWidth + nvmlDeviceGetDecoderUtilization + nvmlDeviceGetDefaultApplicationsClock + nvmlDeviceGetDetailedEccErrors + nvmlDeviceGetDisplayActive + nvmlDeviceGetDisplayMode + nvmlDeviceGetDriverModel + nvmlDeviceGetEccMode + nvmlDeviceGetEncoderUtilization + nvmlDeviceGetEnforcedPowerLimit +* nvmlDeviceGetFanSpeed + nvmlDeviceGetGpuOperationMode + nvmlDeviceGetHandleByIndex + nvmlDeviceGetHandleByIndex_v2 + nvmlDeviceGetHandleByPciBusId + nvmlDeviceGetHandleByPciBusId_v2 + nvmlDeviceGetHandleBySerial + nvmlDeviceGetHandleByUUID + nvmlDeviceGetIndex + nvmlDeviceGetInforomConfigurationChecksum + nvmlDeviceGetInforomImageVersion + nvmlDeviceGetInforomVersion + nvmlDeviceGetMaxClockInfo + nvmlDeviceGetMaxPcieLinkGeneration + nvmlDeviceGetMaxPcieLinkWidth + nvmlDeviceGetMemoryErrorCounter + nvmlDeviceGetMemoryInfo + nvmlDeviceGetMinorNumber + nvmlDeviceGetMultiGpuBoard + nvmlDeviceGetName + nvmlDeviceGetPciInfo + nvmlDeviceGetPciInfo_v2 +* nvmlDeviceGetPerformanceState + nvmlDeviceGetPersistenceMode + nvmlDeviceGetPowerManagementDefaultLimit + nvmlDeviceGetPowerManagementLimit + nvmlDeviceGetPowerManagementLimitConstraints + nvmlDeviceGetPowerManagementMode + nvmlDeviceGetPowerState (deprecated) +* nvmlDeviceGetPowerUsage + nvmlDeviceGetRetiredPages + nvmlDeviceGetRetiredPagesPendingStatus + nvmlDeviceGetSamples + nvmlDeviceGetSerial + nvmlDeviceGetSupportedClocksThrottleReasons + nvmlDeviceGetSupportedEventTypes + nvmlDeviceGetSupportedGraphicsClocks + nvmlDeviceGetSupportedMemoryClocks + nvmlDeviceGetTemperature + nvmlDeviceGetTemperatureThreshold + nvmlDeviceGetTotalEccErrors + nvmlDeviceGetUtilizationRates + nvmlDeviceGetUUID + nvmlDeviceGetVbiosVersion + nvmlDeviceGetViolationStatus + +*/ \ No newline at end of file diff --git a/nvml.h b/nvml.h new file mode 100644 index 0000000..7f200df --- /dev/null +++ b/nvml.h @@ -0,0 +1,146 @@ +/* + * A trivial little dlopen()-based wrapper library for the + * NVIDIA NVML library, to allow runtime discovery of NVML on an + * arbitrary system. This is all very hackish and simple-minded, but + * it serves my immediate needs in the short term until NVIDIA provides + * a static NVML wrapper library themselves, hopefully in + * CUDA 6.5 or maybe sometime shortly after. + * + * This trivial code is made available under the "new" 3-clause BSD license, + * and/or any of the GPL licenses you prefer. + * Feel free to use the code and modify as you see fit. + * + * John E. Stone - john.stone@gmail.com + * + */ + +#if defined(__cplusplus) +extern "C" { +#endif + +/* + * Ugly hacks to avoid dependencies on the real nvml.h until it starts + * getting included with the CUDA toolkit or a GDK that's got a known + * install location, etc. + */ +typedef enum wrap_nvmlReturn_enum { + WRAPNVML_SUCCESS = 0 +} wrap_nvmlReturn_t; + +typedef void * wrap_nvmlDevice_t; + +/* our own version of the PCI info struct */ +typedef struct { + char bus_id_str[16]; /* string form of bus info */ + unsigned int domain; + unsigned int bus; + unsigned int device; + unsigned int pci_device_id; /* combined device and vendor id */ + unsigned int pci_subsystem_id; + unsigned int res0; /* NVML internal use only */ + unsigned int res1; + unsigned int res2; + unsigned int res3; +} wrap_nvmlPciInfo_t; + +typedef enum nvmlClockType_t { +NVML_CLOCK_GRAPHICS = 0, +NVML_CLOCK_SM = 1, +NVML_CLOCK_MEM = 2 +} wrap_nvmlClockType_t; + +/* + * Handle to hold the function pointers for the entry points we need, + * and the shared library itself. + */ +typedef struct { + void *nvml_dll; + int nvml_gpucount; + int cuda_gpucount; + unsigned int *nvml_pci_domain_id; + unsigned int *nvml_pci_bus_id; + unsigned int *nvml_pci_device_id; + int *nvml_cuda_device_id; /* map NVML dev to CUDA dev */ + int *cuda_nvml_device_id; /* map CUDA dev to NVML dev */ + wrap_nvmlDevice_t *devs; + wrap_nvmlReturn_t (*nvmlInit)(void); + wrap_nvmlReturn_t (*nvmlDeviceGetCount)(int *); + wrap_nvmlReturn_t (*nvmlDeviceGetHandleByIndex)(int, wrap_nvmlDevice_t *); + wrap_nvmlReturn_t (*nvmlDeviceGetClockInfo)(wrap_nvmlDevice_t, wrap_nvmlClockType_t, unsigned int *); + wrap_nvmlReturn_t (*nvmlDeviceGetPciInfo)(wrap_nvmlDevice_t, wrap_nvmlPciInfo_t *); + wrap_nvmlReturn_t (*nvmlDeviceGetName)(wrap_nvmlDevice_t, char *, int); + wrap_nvmlReturn_t (*nvmlDeviceGetTemperature)(wrap_nvmlDevice_t, int, unsigned int *); + wrap_nvmlReturn_t (*nvmlDeviceGetFanSpeed)(wrap_nvmlDevice_t, unsigned int *); + wrap_nvmlReturn_t (*nvmlDeviceGetPerformanceState)(wrap_nvmlDevice_t, int *); /* enum */ + wrap_nvmlReturn_t (*nvmlDeviceGetPowerUsage)(wrap_nvmlDevice_t, unsigned int *); + char* (*nvmlErrorString)(wrap_nvmlReturn_t); + wrap_nvmlReturn_t (*nvmlShutdown)(void); +} wrap_nvml_handle; + + +wrap_nvml_handle * wrap_nvml_create(); +int wrap_nvml_destroy(wrap_nvml_handle *nvmlh); + +/* + * Query the number of GPUs seen by NVML + */ +int wrap_nvml_get_gpucount(wrap_nvml_handle *nvmlh, int *gpucount); + +/* + * Query the number of GPUs seen by CUDA + */ +int wrap_cuda_get_gpucount(wrap_nvml_handle *nvmlh, int *gpucount); + + +/* + * query the name of the GPU model from the CUDA device ID + * + */ +int wrap_nvml_get_gpu_name(wrap_nvml_handle *nvmlh, + int gpuindex, + char *namebuf, + int bufsize); + +/* + * Query the current GPU temperature (Celsius), from the CUDA device ID + */ +int wrap_nvml_get_tempC(wrap_nvml_handle *nvmlh, + int gpuindex, unsigned int *tempC); + +/* + * Query the current GPU fan speed (percent) from the CUDA device ID + */ +int wrap_nvml_get_fanpcnt(wrap_nvml_handle *nvmlh, + int gpuindex, unsigned int *fanpcnt); + +/* + * Query the current GPU speed from the CUDA device ID + */ +int wrap_nvml_get_clock(wrap_nvml_handle *nvmlh, + int gpuindex, int clktype, unsigned int *freq); + +/* + * Query the current GPU power usage in millwatts from the CUDA device ID + * + * This feature is only available on recent GPU generations and may be + * limited in some cases only to Tesla series GPUs. + * If the query is run on an unsupported GPU, this routine will return -1. + */ +int wrap_nvml_get_power_usage(wrap_nvml_handle *nvmlh, + int gpuindex, + unsigned int *milliwatts); + +/* api functions */ + +#include "miner.h" + +unsigned int gpu_fanpercent(struct cgpu_info *gpu); +double gpu_temp(struct cgpu_info *gpu); +unsigned int gpu_clock(struct cgpu_info *gpu); +unsigned int gpu_power(struct cgpu_info *gpu); +int gpu_pstate(struct cgpu_info *gpu); + +#if defined(__cplusplus) +} +#endif +