Add nvml for GPU monitoring (squashed)

Based on mwhite73 <marvin.white@gmail.com> implementation Linked to the api system Also fix Makefile to support standard c++ files This prevent nvcc use without device code Signed-off-by: Tanguy Pruvot <tanguy.pruvot@gmail.com>
10 years ago · 49f3c454c2
13 changed files with 752 additions and 66 deletions
--- a/Makefile.am
+++ b/Makefile.am
@ -1,8 +1,9 @@
 # allow to use Host cuda functions in C/C++
 DEF_INCLUDES = @CUDA_INCLUDES@
 JANSSON_INCLUDES=
 if WANT_JANSSON
 JANSSON_INCLUDES= -I$(top_srcdir)/compat/jansson
 else
 JANSSON_INCLUDES=
 endif
 EXTRA_DIST		= autogen.sh README.txt LICENSE.txt \
@ -17,7 +18,7 @@ ccminer_SOURCES		= elist.h miner.h compat.h \
 			  compat/inttypes.h compat/stdbool.h compat/unistd.h \
 			  compat/sys/time.h compat/getopt/getopt.h \
 			  cpu-miner.c util.c crc32.c hefty1.c scrypt.c \
-			  api.c hashlog.cpp stats.cpp cuda.cu \
+			  api.cpp hashlog.cpp stats.cpp cuda.cpp \
 			  heavy/heavy.cu \
 			  heavy/cuda_blake512.cu heavy/cuda_blake512.h \
 			  heavy/cuda_combine.cu heavy/cuda_combine.h \
@ -49,19 +50,25 @@ ccminer_SOURCES		= elist.h miner.h compat.h \
 			  x17/x17.cu x17/cuda_x17_haval512.cu x17/cuda_x17_sha512.cu \
 			  x11/s3.cu
 if HAVE_NVML
 ccminer_SOURCES += nvml.cpp
 nvml_defs = -DUSE_WRAPNVML
 nvml_libs = -ldl
 endif
 if HAVE_WINDOWS
 ccminer_SOURCES += compat/winansi.c
 endif
 ccminer_LDFLAGS  = $(PTHREAD_FLAGS) @CUDA_LDFLAGS@
-ccminer_LDADD		= @LIBCURL@ @JANSSON_LIBS@ @PTHREAD_LIBS@ @WS2_LIBS@ @CUDA_LIBS@ @OPENMP_CFLAGS@ @LIBS@
+ccminer_LDADD    = @LIBCURL@ @JANSSON_LIBS@ @PTHREAD_LIBS@ @WS2_LIBS@ @CUDA_LIBS@ @OPENMP_CFLAGS@ @LIBS@ $(nvml_libs)
-ccminer_CPPFLAGS	= @LIBCURL_CPPFLAGS@ @OPENMP_CFLAGS@ $(CPPFLAGS) $(PTHREAD_FLAGS) -fno-strict-aliasing $(JANSSON_INCLUDES) -DSCRYPT_KECCAK512 -DSCRYPT_CHACHA -DSCRYPT_CHOOSE_COMPILETIME
+ccminer_CPPFLAGS = @LIBCURL_CPPFLAGS@ @OPENMP_CFLAGS@ $(CPPFLAGS) $(PTHREAD_FLAGS) -fno-strict-aliasing $(JANSSON_INCLUDES) $(DEF_INCLUDES) $(nvml_defs) -DSCRYPT_KECCAK512 -DSCRYPT_CHACHA -DSCRYPT_CHOOSE_COMPILETIME
 nvcc_ARCH  = -gencode=arch=compute_50,code=\"sm_50,compute_50\"
 #nvcc_ARCH += -gencode=arch=compute_35,code=\"sm_35,compute_35\"
 #nvcc_ARCH += -gencode=arch=compute_30,code=\"sm_30,compute_30\"
-nvcc_FLAGS = $(nvcc_ARCH) -I . @CUDA_CFLAGS@
+nvcc_FLAGS = $(nvcc_ARCH) @CUDA_INCLUDES@ -I. @CUDA_CFLAGS@
 nvcc_FLAGS += $(JANSSON_INCLUDES) --ptxas-options="-v"
 # we're now targeting all major compute architectures within one binary.
--- a/README.txt
+++ b/README.txt
@ -1,5 +1,5 @@
-ccMiner release 1.4.8-tpruvot (12 Nov 2014) - "API Stats"
+ccMiner release 1.4.9-tpruvot (Nov 2014) - "GPU Monitoring"
 ---------------------------------------------------------------
 ***************************************************************
@ -155,6 +155,10 @@ features.
 >>> RELEASE HISTORY <<<
  Nov. 13th 2014  v1.4.9
                  Add nvml unit to monitor nvidia cards (api)
                  API: small changes, bump v1.1
  Nov. 12th 2014  v1.4.8
                  Add a basic API and sample php json wrapper
                  Add statsavg (def 20) and api-bind parameters
--- a/api.cpp
+++ b/api.cpp
@ -8,7 +8,7 @@
 * Software Foundation; either version 2 of the License, or (at your option)
 * any later version.  See COPYING for more details.
 */
-#define APIVERSION "1.0"
+#define APIVERSION "1.1"
 #ifdef _MSC_VER
 # define  _WINSOCK_DEPRECATED_NO_WARNINGS
@ -35,6 +35,10 @@
 #include "compat.h"
 #include "miner.h"
 #ifdef USE_WRAPNVML
 #include "nvml.h"
 #endif
 #ifndef _MSC_VER
 # include <errno.h>
 # include <sys/socket.h>
@ -105,25 +109,26 @@ extern uint32_t rejected_count;
 #define gpu_threads opt_n_threads
 extern void get_currentalgo(char* buf, int sz);
 /***************************************************************/
 static void gpustatus(int thr_id)
 {
 	char buf[MYBUFSIZ];
 	float gt;
-	int gf, gp;
+	int gp, gf;
 	if (thr_id >= 0 && thr_id < gpu_threads) {
 		struct cgpu_info *cgpu = &thr_info[thr_id].gpu;
-#ifdef HAVE_HWMONITORING
+		cgpu->thr_id = thr_id;
 #ifdef USE_WRAPNVML
 		// todo
-		if (gpu->has_monitoring) {
+		if (1 || cgpu->has_monitoring) {
-			gt = gpu_temp(gpu);
+			gf = gpu_fanpercent(cgpu);
-			gf = gpu_fanspeed(gpu);
+			gt = gpu_temp(cgpu);
-			gp = gpu_fanpercent(gpu);
+			gp = gpu_power(cgpu);
 			// gpu_clock(cgpu);
 		}
 		else
 #endif
@ -148,7 +153,7 @@ static void gpustatus(int thr_id)
 		cgpu->khashes = stats_get_speed(thr_id) / 1000.0;
-		sprintf(buf, "GPU=%d;TEMP=%.1f;FAN=%d;FANP=%d;KHS=%.2f;"
+		sprintf(buf, "GPU=%d;TEMP=%.1f;FAN=%d;POWER=%d;KHS=%.2f;"
 			"HWF=%d;I=%d|",
 			thr_id, gt, gf, gp, cgpu->khashes,
 			cgpu->hw_errors, cgpu->intensity);
@ -162,14 +167,14 @@ static void gpustatus(int thr_id)
 static char *getsummary(char *params)
 {
 	char algo[64] = "";
-	time_t uptime = (time(NULL) - startup);
+	double uptime = difftime(time(NULL), startup);
-	double accps = (60.0 * accepted_count) / (uptime ? (uint32_t) uptime : 1.0);
+	double accps = (60.0 * accepted_count) / (uptime ? uptime : 1.0);
 	get_currentalgo(algo, sizeof(algo));
 	*buffer = '\0';
 	sprintf(buffer, "NAME=%s;VER=%s;API=%s;"
-		"ALGO=%s;KHS=%.2f;ACC=%d;REJ=%d;ACCMN=%.3f;UPTIME=%d|",
+		"ALGO=%s;KHS=%.2f;ACC=%d;REJ=%d;ACCMN=%.3f;UPTIME=%.1f|",
 		PACKAGE_NAME, PACKAGE_VERSION, APIVERSION,
 		algo, (double)global_hashrate / 1000.0,
 		accepted_count, rejected_count,
@ -186,7 +191,7 @@ static char *getstats(char *params)
 }
 struct CMDS {
-	char *name;
+	const char *name;
 	char *(*func)(char *);
 } cmds[] = {
 	{ "summary", getsummary },
@ -195,17 +200,20 @@ struct CMDS {
 #define CMDMAX 2
-static void send_result(SOCKETTYPE c, char *result)
+static int send_result(SOCKETTYPE c, char *result)
 {
 	int n;
-	if (result == NULL)
+	if (!result) {
-		result = "";
+		n = send(c, "", 1, 0);
-
+	} else {
 		// ignore failure - it's closed immediately anyway
 		n = send(c, result, strlen(result) + 1, 0);
 	}
 	return n;
 }
 /*
 * N.B. IP4 addresses are by Definition 32bit big endian on all platforms
 */
@ -400,7 +408,8 @@ static void api()
 			if ((time(NULL) - bindstart) > 61)
 				break;
 			else {
-				applog(LOG_ERR, "API bind to port %d failed - trying again in 15sec", port);
+				if (!opt_quiet || opt_debug)
 					applog(LOG_WARNING, "API bind to port %d failed - trying again in 15sec", port);
 				sleep(15);
 			}
 		}
@ -409,7 +418,7 @@ static void api()
 	}
 	if (bound == 0) {
-		applog(LOG_ERR, "API bind to port %d failed (%s)%s", port, binderror, UNAVAILABLE);
+		applog(LOG_WARNING, "API bind to port %d failed (%s)%s", port, binderror, UNAVAILABLE);
 		free(apisock);
 		return;
 	}
--- a/ccminer.vcxproj
+++ b/ccminer.vcxproj
@ -87,7 +87,7 @@
      <Optimization>Disabled</Optimization>
      <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
      <FunctionLevelLinking>true</FunctionLevelLinking>
-      <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;_CRT_SECURE_NO_WARNINGS;CURL_STATICLIB;SCRYPT_KECCAK512;SCRYPT_CHACHA;SCRYPT_CHOOSE_COMPILETIME;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;_CRT_SECURE_NO_WARNINGS;CURL_STATICLIB;USE_WRAPNVML;SCRYPT_KECCAK512;SCRYPT_CHACHA;SCRYPT_CHOOSE_COMPILETIME;%(PreprocessorDefinitions)</PreprocessorDefinitions>
      <AdditionalIncludeDirectories>.;compat;compat\curl-for-windows\curl\include;compat\jansson;compat\getopt;compat\pthreads;compat\curl-for-windows\openssl\openssl\include;compat\curl-for-windows\zlib;%(AdditionalIncludeDirectories);$(CudaToolkitIncludeDir)</AdditionalIncludeDirectories>
      <EnableParallelCodeGeneration>true</EnableParallelCodeGeneration>
    </ClCompile>
@ -114,7 +114,7 @@
      <Optimization>Disabled</Optimization>
      <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
      <FunctionLevelLinking>true</FunctionLevelLinking>
-      <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;_CRT_SECURE_NO_WARNINGS;CURL_STATICLIB;SCRYPT_KECCAK512;SCRYPT_CHACHA;SCRYPT_CHOOSE_COMPILETIME;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;_CRT_SECURE_NO_WARNINGS;CURL_STATICLIB;USE_WRAPNVML;SCRYPT_KECCAK512;SCRYPT_CHACHA;SCRYPT_CHOOSE_COMPILETIME;%(PreprocessorDefinitions)</PreprocessorDefinitions>
      <AdditionalIncludeDirectories>.;compat;compat\curl-for-windows\curl\include;compat\jansson;compat\getopt;compat\pthreads;compat\curl-for-windows\openssl\openssl\include;compat\curl-for-windows\zlib;%(AdditionalIncludeDirectories);$(CudaToolkitIncludeDir)</AdditionalIncludeDirectories>
      <StructMemberAlignment>8Bytes</StructMemberAlignment>
      <EnableParallelCodeGeneration>true</EnableParallelCodeGeneration>
@ -150,7 +150,7 @@
      <CompileAsManaged>false</CompileAsManaged>
      <FunctionLevelLinking>true</FunctionLevelLinking>
      <IntrinsicFunctions>true</IntrinsicFunctions>
-      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;_CRT_SECURE_NO_WARNINGS;CURL_STATICLIB;SCRYPT_KECCAK512;SCRYPT_CHACHA;SCRYPT_CHOOSE_COMPILETIME;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;_CRT_SECURE_NO_WARNINGS;CURL_STATICLIB;USE_WRAPNVML;SCRYPT_KECCAK512;SCRYPT_CHACHA;SCRYPT_CHOOSE_COMPILETIME;%(PreprocessorDefinitions)</PreprocessorDefinitions>
      <AdditionalIncludeDirectories>.;compat;compat\curl-for-windows\curl\include;compat\jansson;compat\getopt;compat\pthreads;compat\curl-for-windows\openssl\openssl\include;compat\curl-for-windows\zlib;%(AdditionalIncludeDirectories);$(CudaToolkitIncludeDir)</AdditionalIncludeDirectories>
      <EnableParallelCodeGeneration>true</EnableParallelCodeGeneration>
      <ExceptionHandling>SyncCThrow</ExceptionHandling>
@ -193,7 +193,7 @@
      <CompileAsManaged>false</CompileAsManaged>
      <FunctionLevelLinking>true</FunctionLevelLinking>
      <IntrinsicFunctions>true</IntrinsicFunctions>
-      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;_CRT_SECURE_NO_WARNINGS;CURL_STATICLIB;SCRYPT_KECCAK512;SCRYPT_CHACHA;SCRYPT_CHOOSE_COMPILETIME;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;_CRT_SECURE_NO_WARNINGS;CURL_STATICLIB;USE_WRAPNVML;SCRYPT_KECCAK512;SCRYPT_CHACHA;SCRYPT_CHOOSE_COMPILETIME;%(PreprocessorDefinitions)</PreprocessorDefinitions>
      <AdditionalIncludeDirectories>.;compat;compat\curl-for-windows\curl\include;compat\jansson;compat\getopt;compat\pthreads;compat\curl-for-windows\openssl\openssl\include;compat\curl-for-windows\zlib;%(AdditionalIncludeDirectories);$(CudaToolkitIncludeDir)</AdditionalIncludeDirectories>
    </ClCompile>
    <Link>
@ -240,9 +240,8 @@
    <ClCompile Include="groestlcoin.cpp" />
    <ClCompile Include="hashlog.cpp" />
    <ClCompile Include="stats.cpp" />
-    <ClCompile Include="api.c">
+    <ClCompile Include="nvml.cpp" />
-      <AdditionalOptions>/Tp %(AdditionalOptions)</AdditionalOptions>
+    <ClCompile Include="api.cpp" />
    </ClCompile>
    <ClCompile Include="hefty1.c" />
    <ClCompile Include="myriadgroestl.cpp" />
    <ClCompile Include="scrypt.c">
@ -321,7 +320,7 @@
    <ClInclude Include="uint256.h" />
  </ItemGroup>
  <ItemGroup>
-    <CudaCompile Include="cuda.cu" />
+    <CudaCompile Include="cuda.cpp" />
    <CudaCompile Include="bitslice_transformations_quad.cu">
      <ExcludedFromBuild>true</ExcludedFromBuild>
    </CudaCompile>
--- a/ccminer.vcxproj.filters
+++ b/ccminer.vcxproj.filters
@ -192,7 +192,10 @@
    <ClCompile Include="stats.cpp">
      <Filter>Source Files</Filter>
    </ClCompile>
-    <ClCompile Include="api.c">
+    <ClCompile Include="api.cpp">
      <Filter>Source Files</Filter>
    </ClCompile>
    <ClCompile Include="nvml.cpp">
      <Filter>Source Files</Filter>
    </ClCompile>
  </ItemGroup>
@ -313,7 +316,7 @@
    </ClInclude>
  </ItemGroup>
  <ItemGroup>
-    <CudaCompile Include="cuda.cu">
+    <CudaCompile Include="cuda.cpp">
      <Filter>Source Files\CUDA</Filter>
    </CudaCompile>
    <CudaCompile Include="cuda_fugue256.cu">
--- a/configure.ac
+++ b/configure.ac
@ -1,4 +1,4 @@
-AC_INIT([ccminer], [1.4.8])
+AC_INIT([ccminer], [1.4.9])
 AC_PREREQ([2.59c])
 AC_CANONICAL_SYSTEM
@ -142,19 +142,32 @@ dnl Setup CUDA paths
 AC_ARG_WITH([cuda],
   [  --with-cuda=PATH    prefix where cuda is installed [default=/usr/local/cuda]])
 AC_ARG_WITH([nvml],
   [  --with-nvml=PATH    prefix where libnvml is installed [default=/usr/lib]])
 AM_CONDITIONAL([HAVE_NVML], [test -n "$with_nvml"])
 if test -n "$with_cuda"
 then
-   CUDA_CFLAGS="-I$with_cuda/include $CUDA_CFLAGS"
+  CUDA_INCLUDES="-I$with_cuda/include"
  CUDA_LIBS="-lcudart"
  CUDA_LDFLAGS="-L$with_cuda/lib$SUFFIX"
  NVCC="$with_cuda/bin/nvcc"
 else
-   CUDA_CFLAGS="-I/usr/local/cuda/include $CUDA_CFLAGS"
+  CUDA_INCLUDES="-I/usr/local/cuda/include"
  CUDA_LIBS="-lcudart -static-libstdc++"
  CUDA_LDFLAGS="-L/usr/local/cuda/lib$SUFFIX"
  NVCC="nvcc"
 fi
 if test -n "$with_nvml" ; then
  NVML_LIBPATH=$with_nvml
  CUDA_LDFLAGS="$CUDA_LDFLAGS -ldl"
 fi
 AC_SUBST(NVML_LIBPATH)
 AC_SUBST(CUDA_CFLAGS)
 AC_SUBST(CUDA_INCLUDES)
 AC_SUBST(CUDA_LIBS)
 AC_SUBST(CUDA_LDFLAGS)
 AC_SUBST(NVCC)
--- a/configure.sh
+++ b/configure.sh
@ -7,5 +7,5 @@
 extracflags="-march=native -D_REENTRANT -falign-functions=16 -falign-jumps=16 -falign-labels=16"
-CUDA_CFLAGS="-O3 -Xcompiler -Wall" ./configure CXXFLAGS="-O3 $extracflags" --with-cuda=/usr/local/cuda
+CUDA_CFLAGS="-O3 -Xcompiler -Wall" ./configure CXXFLAGS="-O3 $extracflags" --with-cuda=/usr/local/cuda --with-nvml=libnvidia-ml.so
--- a/cpu-miner.c
+++ b/cpu-miner.c
@ -56,7 +56,7 @@ BOOL WINAPI ConsoleHandler(DWORD);
 #define HEAVYCOIN_BLKHDR_SZ		84
 #define MNR_BLKHDR_SZ 80
-// from heavy.cu
+// from cuda.cu
 #ifdef __cplusplus
 extern "C"
 {
@ -69,6 +69,9 @@ int cuda_finddevice(char *name);
 }
 #endif
 #ifdef USE_WRAPNVML
 #include "nvml.h"
 #endif
 #ifdef __linux /* Linux specific policy and affinity management */
 #include <sched.h>
@ -244,6 +247,10 @@ uint32_t opt_work_size = 0; /* default */
 char *opt_api_allow = "127.0.0.1"; /* 0.0.0.0 for all ips */
 int opt_api_listen = 4068; /* 0 to disable */
 #ifdef USE_WRAPNVML
 wrap_nvml_handle *nvmlh = NULL;
 #endif
 #ifdef HAVE_GETOPT_LONG
 #include <getopt.h>
 #else
@ -421,7 +428,10 @@ void proper_exit(int reason)
 #ifdef WIN32
 	timeEndPeriod(1); // else never executed
 #endif
-
+#ifdef USE_WRAPNVML
 	if (nvmlh)
 		wrap_nvml_destroy(nvmlh);
 #endif
 	exit(reason);
 }
@ -2129,6 +2139,16 @@ int main(int argc, char *argv[])
 			tq_push(thr_info[stratum_thr_id].q, strdup(rpc_url));
 	}
 #ifdef USE_WRAPNVML
 	nvmlh = wrap_nvml_create();
 	if (nvmlh) {
 		// todo: link threads info gpu
 		applog(LOG_INFO, "NVML GPU monitoring enabled.");
 	} else {
 		applog(LOG_INFO, "NVML GPU monitoring is not available.");
 	}
 #endif
 	if (opt_api_listen) {
 		/* api thread */
 		api_thr_id = opt_n_threads + 3;
--- a/cpuminer-config.h
+++ b/cpuminer-config.h
@ -156,7 +156,7 @@
 #define PACKAGE_NAME "ccminer"
 /* Define to the full name and version of this package. */
-#define PACKAGE_STRING "ccminer 1.4.8"
+#define PACKAGE_STRING "ccminer 1.4.9"
 /* Define to the one symbol short name of this package. */
 #define PACKAGE_TARNAME "ccminer"
@ -165,7 +165,7 @@
 #define PACKAGE_URL ""
 /* Define to the version of this package. */
-#define PACKAGE_VERSION "1.4.8"
+#define PACKAGE_VERSION "1.4.9"
 /* If using the C implementation of alloca, define if you know the
   direction of stack growth for your system; otherwise it will be
@ -188,7 +188,7 @@
 #define USE_XOP 1
 /* Version number of package */
-#define VERSION "1.4.8"
+#define VERSION "1.4.9"
 /* Define curl_free() as free() if our version of curl lacks curl_free. */
 /* #undef curl_free */
--- a/cuda.cpp
+++ b/cuda.cpp
@ -9,16 +9,22 @@
 #endif
 // include thrust
 #ifndef __cplusplus
 #include <thrust/version.h>
 #include <thrust/remove.h>
 #include <thrust/device_vector.h>
 #include <thrust/iterator/constant_iterator.h>
 #else
 #include <ctype.h>
 #endif
 #include "miner.h"
-#include "cuda_helper.h"
+#include "cuda_runtime.h"
 extern char *device_name[8];
 extern int device_map[8];
 extern int device_sm[8];
 // CUDA Devices on the System
 extern "C" int cuda_num_devices()
--- a/miner.h
+++ b/miner.h
@ -356,21 +356,20 @@ extern int scanhash_x17(int thr_id, uint32_t *pdata,
 void *api_thread(void *userdata);
 struct cgpu_info {
 	int thr_id;
 	int accepted;
 	int rejected;
 	int hw_errors;
 	double khashes;
 	int intensity;
-#ifdef HAVE_HWMONITORING
+#ifdef USE_WRAPNVML
 	bool has_monitoring;
-	int gpu_engine;
+	float gpu_temp;
-	int min_engine;
+	unsigned int gpu_fan;
-	int gpu_fan;
+	unsigned int gpu_power;
-	int min_fan;
+	unsigned int gpu_clock;
-	int gpu_memclock;
+	unsigned int gpu_memclock;
-	int gpu_memdiff;
+	double gpu_vddc;
 	int gpu_powertune;
 	float gpu_vddc;
 #endif
 };
@ -456,6 +455,7 @@ extern int timeval_subtract(struct timeval *result, struct timeval *x,
 	struct timeval *y);
 extern bool fulltest(const uint32_t *hash, const uint32_t *target);
 extern void diff_to_target(uint32_t *target, double diff);
 extern void get_currentalgo(char* buf, int sz);
 struct stratum_job {
 	char *job_id;
--- a/nvml.cpp
+++ b/nvml.cpp
@ -0,0 +1,479 @@
 /*
 * A trivial little dlopen()-based wrapper library for the
 * NVIDIA NVML library, to allow runtime discovery of NVML on an
 * arbitrary system.  This is all very hackish and simple-minded, but
 * it serves my immediate needs in the short term until NVIDIA provides
 * a static NVML wrapper library themselves, hopefully in
 * CUDA 6.5 or maybe sometime shortly after.
 *
 * This trivial code is made available under the "new" 3-clause BSD license,
 * and/or any of the GPL licenses you prefer.
 * Feel free to use the code and modify as you see fit.
 *
 * John E. Stone - john.stone@gmail.com
 * Tanguy Pruvot - tpruvot@github
 *
 */
 #ifdef USE_WRAPNVML
 #include <stdio.h>
 #include <stdlib.h>
 #include <errno.h>
 #ifndef _MSC_VER
 #include <libgen.h>
 #endif
 #include "miner.h"
 #include "cuda_runtime.h"
 #include "nvml.h"
 /*
 * Wrappers to emulate dlopen() on other systems like Windows
 */
 #if defined(_MSC_VER) || defined(_WIN32) || defined(_WIN64)
 	#include <windows.h>
 	static void *wrap_dlopen(const char *filename) {
 		return (void *)LoadLibrary(filename);
 	}
 	static void *wrap_dlsym(void *h, const char *sym) {
 		return (void *)GetProcAddress((HINSTANCE)h, sym);
 	}
 	static int wrap_dlclose(void *h) {
 		/* FreeLibrary returns nonzero on success */
 		return (!FreeLibrary((HINSTANCE)h));
 	}
 #else
 	/* assume we can use dlopen itself... */
 	#include <dlfcn.h>
 	static void *wrap_dlopen(const char *filename) {
 		return dlopen(filename, RTLD_NOW);
 	}
 	static void *wrap_dlsym(void *h, const char *sym) {
 		return dlsym(h, sym);
 	}
 	static int wrap_dlclose(void *h) {
 		return dlclose(h);
 	}
 #endif
 #if defined(__cplusplus)
 extern "C" {
 #endif
 wrap_nvml_handle * wrap_nvml_create()
 {
 	int i=0;
 	wrap_nvml_handle *nvmlh = NULL;
 	/*
 	 * We use hard-coded library installation locations for the time being...
 	 * No idea where or if libnvidia-ml.so is installed on MacOS X, a
 	 * deep scouring of the filesystem on one of the Mac CUDA build boxes
 	 * I used turned up nothing, so for now it's not going to work on OSX.
 	 */
 #if defined(_WIN64)
 	/* 64-bit Windows */
 #define  libnvidia_ml "%PROGRAMFILES%/NVIDIA Corporation/NVSMI/nvml.dll"
 #elif defined(_WIN32) || defined(_MSC_VER)
 	/* 32-bit Windows */
 #define  libnvidia_ml "%PROGRAMFILES%/NVIDIA Corporation/NVSMI/nvml.dll"
 #elif defined(__linux) && (defined(__i386__) || defined(__ARM_ARCH_7A__))
 	/* 32-bit linux assumed */
 #define  libnvidia_ml "/usr/lib32/libnvidia-ml.so"
 #elif defined(__linux)
 	/* 64-bit linux assumed */
 #define  libnvidia_ml "/usr/lib/libnvidia-ml.so"
 #else
 #error "Unrecognized platform: need NVML DLL path for this platform..."
 #endif
 #if WIN32
 	char tmp[512];
 	ExpandEnvironmentStringsA(libnvidia_ml, tmp, sizeof(tmp));
 #else
 	char tmp[512] = libnvidia_ml;
 #endif
 	void *nvml_dll = wrap_dlopen(tmp);
 	if (nvml_dll == NULL) {
 #ifdef WIN32
 		char lib[] = "nvml.dll";
 #else
 		char lib[64] = { '\0' };
 		snprintf(lib, sizeof(lib), "%s", basename(tmp));
 		/* try dlopen without path, here /usr/lib/nvidia-340/libnvidia-ml.so */
 #endif
 		nvml_dll = wrap_dlopen(lib);
 		if (opt_debug)
 			applog(LOG_DEBUG, "dlopen: %s=%p", lib, nvml_dll);
 	}
 	if (nvml_dll == NULL) {
 		if (opt_debug)
 			applog(LOG_DEBUG, "dlopen(%d): failed to load %s", errno, tmp);
 		return NULL;
 	}
 	nvmlh = (wrap_nvml_handle *) calloc(1, sizeof(wrap_nvml_handle));
 	nvmlh->nvml_dll = nvml_dll;
 	nvmlh->nvmlInit = (wrap_nvmlReturn_t (*)(void))
 		wrap_dlsym(nvmlh->nvml_dll, "nvmlInit_v2");
 	if (!nvmlh->nvmlInit)
 		nvmlh->nvmlInit = (wrap_nvmlReturn_t (*)(void))
 			wrap_dlsym(nvmlh->nvml_dll, "nvmlInit");
 	nvmlh->nvmlDeviceGetCount = (wrap_nvmlReturn_t (*)(int *))
 		wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetCount_v2");
 	nvmlh->nvmlDeviceGetHandleByIndex = (wrap_nvmlReturn_t (*)(int, wrap_nvmlDevice_t *))
 		wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetHandleByIndex_v2");
 	nvmlh->nvmlDeviceGetClockInfo = (wrap_nvmlReturn_t (*)(wrap_nvmlDevice_t, wrap_nvmlClockType_t, unsigned int *))
 		wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetClockInfo");
 	nvmlh->nvmlDeviceGetPciInfo = (wrap_nvmlReturn_t (*)(wrap_nvmlDevice_t, wrap_nvmlPciInfo_t *))
 		wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetPciInfo");
 	nvmlh->nvmlDeviceGetName = (wrap_nvmlReturn_t (*)(wrap_nvmlDevice_t, char *, int))
 		wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetName");
 	nvmlh->nvmlDeviceGetTemperature = (wrap_nvmlReturn_t (*)(wrap_nvmlDevice_t, int, unsigned int *))
 		wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetTemperature");
 	nvmlh->nvmlDeviceGetFanSpeed = (wrap_nvmlReturn_t (*)(wrap_nvmlDevice_t, unsigned int *))
 		wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetFanSpeed");
 	nvmlh->nvmlDeviceGetPerformanceState = (wrap_nvmlReturn_t (*)(wrap_nvmlDevice_t, int *))
 		wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetPowerUsage");
 	nvmlh->nvmlDeviceGetPowerUsage = (wrap_nvmlReturn_t (*)(wrap_nvmlDevice_t, unsigned int *))
 		wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetPowerUsage");
 	nvmlh->nvmlErrorString = (char* (*)(wrap_nvmlReturn_t))
 		wrap_dlsym(nvmlh->nvml_dll, "nvmlErrorString");
 	nvmlh->nvmlShutdown = (wrap_nvmlReturn_t (*)())
 		wrap_dlsym(nvmlh->nvml_dll, "nvmlShutdown");
 	if (nvmlh->nvmlInit == NULL ||
 			nvmlh->nvmlShutdown == NULL ||
 			nvmlh->nvmlDeviceGetCount == NULL ||
 			nvmlh->nvmlDeviceGetHandleByIndex == NULL ||
 			nvmlh->nvmlDeviceGetPciInfo == NULL ||
 			nvmlh->nvmlDeviceGetName == NULL ||
 			nvmlh->nvmlDeviceGetTemperature == NULL ||
 			nvmlh->nvmlDeviceGetFanSpeed == NULL ||
 			nvmlh->nvmlDeviceGetPowerUsage == NULL)
 	{
 		if (opt_debug)
 			applog(LOG_DEBUG, "Failed to obtain all required NVML function pointers");
 		wrap_dlclose(nvmlh->nvml_dll);
 		free(nvmlh);
 		return NULL;
 	}
 	nvmlh->nvmlInit();
 	nvmlh->nvmlDeviceGetCount(&nvmlh->nvml_gpucount);
 	/* Query CUDA device count, in case it doesn't agree with NVML, since  */
 	/* CUDA will only report GPUs with compute capability greater than 1.0 */
 	if (cudaGetDeviceCount(&nvmlh->cuda_gpucount) != cudaSuccess) {
 		if (opt_debug)
 			applog(LOG_DEBUG, "Failed to query CUDA device count!");
 		wrap_dlclose(nvmlh->nvml_dll);
 		free(nvmlh);
 		return NULL;
 	}
 	nvmlh->devs = (wrap_nvmlDevice_t *) calloc(nvmlh->nvml_gpucount, sizeof(wrap_nvmlDevice_t));
 	nvmlh->nvml_pci_domain_id = (unsigned int*) calloc(nvmlh->nvml_gpucount, sizeof(unsigned int));
 	nvmlh->nvml_pci_bus_id = (unsigned int*) calloc(nvmlh->nvml_gpucount, sizeof(unsigned int));
 	nvmlh->nvml_pci_device_id = (unsigned int*) calloc(nvmlh->nvml_gpucount, sizeof(unsigned int));
 	nvmlh->nvml_cuda_device_id = (int*) calloc(nvmlh->nvml_gpucount, sizeof(int));
 	nvmlh->cuda_nvml_device_id = (int*) calloc(nvmlh->cuda_gpucount, sizeof(int));
 	/* Obtain GPU device handles we're going to need repeatedly... */
 	for (i=0; i<nvmlh->nvml_gpucount; i++) {
 		nvmlh->nvmlDeviceGetHandleByIndex(i, &nvmlh->devs[i]);
 	}
 	/* Query PCI info for each NVML device, and build table for mapping of */
 	/* CUDA device IDs to NVML device IDs and vice versa                   */
 	for (i=0; i<nvmlh->nvml_gpucount; i++) {
 		wrap_nvmlPciInfo_t pciinfo;
 		nvmlh->nvmlDeviceGetPciInfo(nvmlh->devs[i], &pciinfo);
 		nvmlh->nvml_pci_domain_id[i] = pciinfo.domain;
 		nvmlh->nvml_pci_bus_id[i]    = pciinfo.bus;
 		nvmlh->nvml_pci_device_id[i] = pciinfo.device;
 	}
 	/* build mapping of NVML device IDs to CUDA IDs */
 	for (i=0; i<nvmlh->nvml_gpucount; i++) {
 		nvmlh->nvml_cuda_device_id[i] = -1;
 	}
 	for (i=0; i<nvmlh->cuda_gpucount; i++) {
 		cudaDeviceProp props;
 		nvmlh->cuda_nvml_device_id[i] = -1;
 		if (cudaGetDeviceProperties(&props, i) == cudaSuccess) {
 			int j;
 			for (j=0; j<nvmlh->nvml_gpucount; j++) {
 				if ((nvmlh->nvml_pci_domain_id[j] == (uint32_t) props.pciDomainID) &&
 				    (nvmlh->nvml_pci_bus_id[j]    == (uint32_t) props.pciBusID) &&
 				    (nvmlh->nvml_pci_device_id[j] == (uint32_t) props.pciDeviceID)) {
 					if (opt_debug)
 						applog(LOG_DEBUG, "CUDA GPU[%d] matches NVML GPU[%d]", i, j);
 					nvmlh->nvml_cuda_device_id[j] = i;
 					nvmlh->cuda_nvml_device_id[i] = j;
 				}
 			}
 		}
 	}
 	return nvmlh;
 }
 int wrap_nvml_get_gpucount(wrap_nvml_handle *nvmlh, int *gpucount)
 {
 	*gpucount = nvmlh->nvml_gpucount;
 	return 0;
 }
 int wrap_cuda_get_gpucount(wrap_nvml_handle *nvmlh, int *gpucount)
 {
 	*gpucount = nvmlh->cuda_gpucount;
 	return 0;
 }
 int wrap_nvml_get_gpu_name(wrap_nvml_handle *nvmlh, int cudaindex, char *namebuf, int bufsize)
 {
 	int gpuindex = nvmlh->cuda_nvml_device_id[cudaindex];
 	if (gpuindex < 0 || gpuindex >= nvmlh->nvml_gpucount)
 		return -1;
 	if (nvmlh->nvmlDeviceGetName(nvmlh->devs[gpuindex], namebuf, bufsize) != WRAPNVML_SUCCESS)
 		return -1;
 	return 0;
 }
 int wrap_nvml_get_tempC(wrap_nvml_handle *nvmlh, int cudaindex, unsigned int *tempC)
 {
 	wrap_nvmlReturn_t rc;
 	int gpuindex = nvmlh->cuda_nvml_device_id[cudaindex];
 	if (gpuindex < 0 || gpuindex >= nvmlh->nvml_gpucount)
 		return -1;
 	rc = nvmlh->nvmlDeviceGetTemperature(nvmlh->devs[gpuindex], 0u /* NVML_TEMPERATURE_GPU */, tempC);
 	if (rc != WRAPNVML_SUCCESS) {
 		return -1;
 	}
 	return 0;
 }
 int wrap_nvml_get_fanpcnt(wrap_nvml_handle *nvmlh, int cudaindex, unsigned int *fanpcnt)
 {
 	wrap_nvmlReturn_t rc;
 	int gpuindex = nvmlh->cuda_nvml_device_id[cudaindex];
 	if (gpuindex < 0 || gpuindex >= nvmlh->nvml_gpucount)
 		return -1;
 	rc = nvmlh->nvmlDeviceGetFanSpeed(nvmlh->devs[gpuindex], fanpcnt);
 	if (rc != WRAPNVML_SUCCESS) {
 		return -1;
 	}
 	return 0;
 }
 /* Not Supported on 750Ti 340.23 */
 int wrap_nvml_get_clock(wrap_nvml_handle *nvmlh, int cudaindex, int type, unsigned int *freq)
 {
 	int gpuindex = nvmlh->cuda_nvml_device_id[cudaindex];
 	if (gpuindex < 0 || gpuindex >= nvmlh->nvml_gpucount)
 		return -1;
 	wrap_nvmlReturn_t res = nvmlh->nvmlDeviceGetClockInfo(nvmlh->devs[gpuindex], (wrap_nvmlClockType_t) type, freq);
 	if (res != WRAPNVML_SUCCESS) {
 		if (opt_debug)
 			applog(LOG_DEBUG, "nvmlDeviceGetClockInfo: %s", nvmlh->nvmlErrorString(res));
 		return -1;
 	}
 	return 0;
 }
 /* Not Supported on 750Ti 340.23 */
 int wrap_nvml_get_power_usage(wrap_nvml_handle *nvmlh, int cudaindex, unsigned int *milliwatts)
 {
 	int gpuindex = nvmlh->cuda_nvml_device_id[cudaindex];
 	if (gpuindex < 0 || gpuindex >= nvmlh->nvml_gpucount)
 		return -1;
 	wrap_nvmlReturn_t res = nvmlh->nvmlDeviceGetPowerUsage(nvmlh->devs[gpuindex], milliwatts);
 	if (res != WRAPNVML_SUCCESS) {
 		if (opt_debug)
 			applog(LOG_DEBUG, "nvmlDeviceGetPowerUsage: %s", nvmlh->nvmlErrorString(res));
 		return -1;
 	}
 	return 0;
 }
 /* Not Supported on 750Ti 340.23 */
 int wrap_nvml_get_pstate(wrap_nvml_handle *nvmlh, int cudaindex, int *pstate)
 {
 	int gpuindex = nvmlh->cuda_nvml_device_id[cudaindex];
 	if (gpuindex < 0 || gpuindex >= nvmlh->nvml_gpucount)
 		return -1;
 	wrap_nvmlReturn_t res = nvmlh->nvmlDeviceGetPerformanceState(nvmlh->devs[gpuindex], pstate);
 	if (res != WRAPNVML_SUCCESS) {
 		if (opt_debug)
 			applog(LOG_DEBUG, "nvmlDeviceGetPerformanceState: %s", nvmlh->nvmlErrorString(res));
 		return -1;
 	}
 	return 0;
 }
 int wrap_nvml_destroy(wrap_nvml_handle *nvmlh)
 {
 	nvmlh->nvmlShutdown();
 	wrap_dlclose(nvmlh->nvml_dll);
 	free(nvmlh);
 	return 0;
 }
 /* api functions */
 extern wrap_nvml_handle *nvmlh;
 extern int device_map[8];
 unsigned int gpu_fanpercent(struct cgpu_info *gpu)
 {
 	unsigned int pct = 0;
 	if (nvmlh) {
 		wrap_nvml_get_fanpcnt(nvmlh, device_map[gpu->thr_id], &pct);
 	}
 	return pct;
 }
 double gpu_temp(struct cgpu_info *gpu)
 {
 	double tc = 0.0;
 	if (nvmlh) {
 		unsigned int tmp = 0;
 		wrap_nvml_get_tempC(nvmlh, device_map[gpu->thr_id], &tmp);
 		tc = (double) tmp;
 	}
 	return tc;
 }
 unsigned int gpu_clock(struct cgpu_info *gpu)
 {
 	unsigned int freq = 0;
 	if (nvmlh) {
 		wrap_nvml_get_clock(nvmlh, device_map[gpu->thr_id], NVML_CLOCK_SM, &freq);
 	}
 	return freq;
 }
 unsigned int gpu_power(struct cgpu_info *gpu)
 {
 	unsigned int mw = 0;
 	if (nvmlh) {
 		wrap_nvml_get_power_usage(nvmlh, device_map[gpu->thr_id], &mw);
 	}
 	return mw;
 }
 int gpu_pstate(struct cgpu_info *gpu)
 {
 	int pstate = 0;
 	if (nvmlh) {
 		wrap_nvml_get_pstate(nvmlh, device_map[gpu->thr_id], &pstate);
 		//gpu->gpu_pstate = pstate;
 	}
 	return pstate;
 }
 #if defined(__cplusplus)
 }
 #endif
 #endif /* USE_WRAPNVML */
 /* strings /usr/lib/nvidia-340/libnvidia-ml.so | grep nvmlDeviceGet | grep -v : | sort | uniq
 	nvmlDeviceGetAccountingBufferSize
 	nvmlDeviceGetAccountingMode
 	nvmlDeviceGetAccountingPids
 	nvmlDeviceGetAccountingStats
 	nvmlDeviceGetAPIRestriction
 	nvmlDeviceGetApplicationsClock
 	nvmlDeviceGetAutoBoostedClocksEnabled
 	nvmlDeviceGetBAR1MemoryInfo
 	nvmlDeviceGetBoardId
 	nvmlDeviceGetBrand
 	nvmlDeviceGetBridgeChipInfo
 *	nvmlDeviceGetClockInfo
 	nvmlDeviceGetComputeMode
 	nvmlDeviceGetComputeRunningProcesses
 	nvmlDeviceGetCount
 	nvmlDeviceGetCount_v2
 	nvmlDeviceGetCpuAffinity
 	nvmlDeviceGetCurrentClocksThrottleReasons
 	nvmlDeviceGetCurrPcieLinkGeneration
 	nvmlDeviceGetCurrPcieLinkWidth
 	nvmlDeviceGetDecoderUtilization
 	nvmlDeviceGetDefaultApplicationsClock
 	nvmlDeviceGetDetailedEccErrors
 	nvmlDeviceGetDisplayActive
 	nvmlDeviceGetDisplayMode
 	nvmlDeviceGetDriverModel
 	nvmlDeviceGetEccMode
 	nvmlDeviceGetEncoderUtilization
 	nvmlDeviceGetEnforcedPowerLimit
 *	nvmlDeviceGetFanSpeed
 	nvmlDeviceGetGpuOperationMode
 	nvmlDeviceGetHandleByIndex
 	nvmlDeviceGetHandleByIndex_v2
 	nvmlDeviceGetHandleByPciBusId
 	nvmlDeviceGetHandleByPciBusId_v2
 	nvmlDeviceGetHandleBySerial
 	nvmlDeviceGetHandleByUUID
 	nvmlDeviceGetIndex
 	nvmlDeviceGetInforomConfigurationChecksum
 	nvmlDeviceGetInforomImageVersion
 	nvmlDeviceGetInforomVersion
 	nvmlDeviceGetMaxClockInfo
 	nvmlDeviceGetMaxPcieLinkGeneration
 	nvmlDeviceGetMaxPcieLinkWidth
 	nvmlDeviceGetMemoryErrorCounter
 	nvmlDeviceGetMemoryInfo
 	nvmlDeviceGetMinorNumber
 	nvmlDeviceGetMultiGpuBoard
 	nvmlDeviceGetName
 	nvmlDeviceGetPciInfo
 	nvmlDeviceGetPciInfo_v2
 *	nvmlDeviceGetPerformanceState
 	nvmlDeviceGetPersistenceMode
 	nvmlDeviceGetPowerManagementDefaultLimit
 	nvmlDeviceGetPowerManagementLimit
 	nvmlDeviceGetPowerManagementLimitConstraints
 	nvmlDeviceGetPowerManagementMode
 	nvmlDeviceGetPowerState (deprecated)
 *	nvmlDeviceGetPowerUsage
 	nvmlDeviceGetRetiredPages
 	nvmlDeviceGetRetiredPagesPendingStatus
 	nvmlDeviceGetSamples
 	nvmlDeviceGetSerial
 	nvmlDeviceGetSupportedClocksThrottleReasons
 	nvmlDeviceGetSupportedEventTypes
 	nvmlDeviceGetSupportedGraphicsClocks
 	nvmlDeviceGetSupportedMemoryClocks
 	nvmlDeviceGetTemperature
 	nvmlDeviceGetTemperatureThreshold
 	nvmlDeviceGetTotalEccErrors
 	nvmlDeviceGetUtilizationRates
 	nvmlDeviceGetUUID
 	nvmlDeviceGetVbiosVersion
 	nvmlDeviceGetViolationStatus
 */
--- a/nvml.h
+++ b/nvml.h
@ -0,0 +1,146 @@
 /*
 * A trivial little dlopen()-based wrapper library for the
 * NVIDIA NVML library, to allow runtime discovery of NVML on an
 * arbitrary system.  This is all very hackish and simple-minded, but
 * it serves my immediate needs in the short term until NVIDIA provides
 * a static NVML wrapper library themselves, hopefully in
 * CUDA 6.5 or maybe sometime shortly after.
 *
 * This trivial code is made available under the "new" 3-clause BSD license,
 * and/or any of the GPL licenses you prefer.
 * Feel free to use the code and modify as you see fit.
 *
 * John E. Stone - john.stone@gmail.com
 *
 */
 #if defined(__cplusplus)
 extern "C" {
 #endif
 /*
 * Ugly hacks to avoid dependencies on the real nvml.h until it starts
 * getting included with the CUDA toolkit or a GDK that's got a known
 * install location, etc.
 */
 typedef enum wrap_nvmlReturn_enum {
 	WRAPNVML_SUCCESS = 0
 } wrap_nvmlReturn_t;
 typedef void * wrap_nvmlDevice_t;
 /* our own version of the PCI info struct */
 typedef struct {
 	char bus_id_str[16];             /* string form of bus info */
 	unsigned int domain;
 	unsigned int bus;
 	unsigned int device;
 	unsigned int pci_device_id;      /* combined device and vendor id */
 	unsigned int pci_subsystem_id;
 	unsigned int res0;               /* NVML internal use only */
 	unsigned int res1;
 	unsigned int res2;
 	unsigned int res3;
 } wrap_nvmlPciInfo_t;
 typedef enum nvmlClockType_t {
 NVML_CLOCK_GRAPHICS = 0,
 NVML_CLOCK_SM = 1,
 NVML_CLOCK_MEM = 2
 } wrap_nvmlClockType_t;
 /*
 * Handle to hold the function pointers for the entry points we need,
 * and the shared library itself.
 */
 typedef struct {
 	void *nvml_dll;
 	int nvml_gpucount;
 	int cuda_gpucount;
 	unsigned int *nvml_pci_domain_id;
 	unsigned int *nvml_pci_bus_id;
 	unsigned int *nvml_pci_device_id;
 	int *nvml_cuda_device_id;          /* map NVML dev to CUDA dev */
 	int *cuda_nvml_device_id;          /* map CUDA dev to NVML dev */
 	wrap_nvmlDevice_t *devs;
 	wrap_nvmlReturn_t (*nvmlInit)(void);
 	wrap_nvmlReturn_t (*nvmlDeviceGetCount)(int *);
 	wrap_nvmlReturn_t (*nvmlDeviceGetHandleByIndex)(int, wrap_nvmlDevice_t *);
 	wrap_nvmlReturn_t (*nvmlDeviceGetClockInfo)(wrap_nvmlDevice_t, wrap_nvmlClockType_t, unsigned int *);
 	wrap_nvmlReturn_t (*nvmlDeviceGetPciInfo)(wrap_nvmlDevice_t, wrap_nvmlPciInfo_t *);
 	wrap_nvmlReturn_t (*nvmlDeviceGetName)(wrap_nvmlDevice_t, char *, int);
 	wrap_nvmlReturn_t (*nvmlDeviceGetTemperature)(wrap_nvmlDevice_t, int, unsigned int *);
 	wrap_nvmlReturn_t (*nvmlDeviceGetFanSpeed)(wrap_nvmlDevice_t, unsigned int *);
 	wrap_nvmlReturn_t (*nvmlDeviceGetPerformanceState)(wrap_nvmlDevice_t, int *); /* enum */
 	wrap_nvmlReturn_t (*nvmlDeviceGetPowerUsage)(wrap_nvmlDevice_t, unsigned int *);
 	char* (*nvmlErrorString)(wrap_nvmlReturn_t);
 	wrap_nvmlReturn_t (*nvmlShutdown)(void);
 } wrap_nvml_handle;
 wrap_nvml_handle * wrap_nvml_create();
 int wrap_nvml_destroy(wrap_nvml_handle *nvmlh);
 /*
 * Query the number of GPUs seen by NVML
 */
 int wrap_nvml_get_gpucount(wrap_nvml_handle *nvmlh, int *gpucount);
 /*
 * Query the number of GPUs seen by CUDA
 */
 int wrap_cuda_get_gpucount(wrap_nvml_handle *nvmlh, int *gpucount);
 /*
 * query the name of the GPU model from the CUDA device ID
 *
 */
 int wrap_nvml_get_gpu_name(wrap_nvml_handle *nvmlh,
                           int gpuindex,
                           char *namebuf,
                           int bufsize);
 /*
 * Query the current GPU temperature (Celsius), from the CUDA device ID
 */
 int wrap_nvml_get_tempC(wrap_nvml_handle *nvmlh,
                        int gpuindex, unsigned int *tempC);
 /*
 * Query the current GPU fan speed (percent) from the CUDA device ID
 */
 int wrap_nvml_get_fanpcnt(wrap_nvml_handle *nvmlh,
                          int gpuindex, unsigned int *fanpcnt);
 /*
 * Query the current GPU speed from the CUDA device ID
 */
 int wrap_nvml_get_clock(wrap_nvml_handle *nvmlh,
                          int gpuindex, int clktype, unsigned int *freq);
 /*
 * Query the current GPU power usage in millwatts from the CUDA device ID
 *
 * This feature is only available on recent GPU generations and may be
 * limited in some cases only to Tesla series GPUs.
 * If the query is run on an unsupported GPU, this routine will return -1.
 */
 int wrap_nvml_get_power_usage(wrap_nvml_handle *nvmlh,
                              int gpuindex,
                              unsigned int *milliwatts);
 /* api functions */
 #include "miner.h"
 unsigned int gpu_fanpercent(struct cgpu_info *gpu);
 double gpu_temp(struct cgpu_info *gpu);
 unsigned int gpu_clock(struct cgpu_info *gpu);
 unsigned int gpu_power(struct cgpu_info *gpu);
 int gpu_pstate(struct cgpu_info *gpu);
 #if defined(__cplusplus)
 }
 #endif
`@ -7,5 +7,5 @@`

	`extracflags="-march=native -D_REENTRANT -falign-functions=16 -falign-jumps=16 -falign-labels=16"`	`extracflags="-march=native -D_REENTRANT -falign-functions=16 -falign-jumps=16 -falign-labels=16"`

	`CUDA_CFLAGS="-O3 -Xcompiler -Wall" ./configure CXXFLAGS="-O3 $extracflags" --with-cuda=/usr/local/cuda`	`CUDA_CFLAGS="-O3 -Xcompiler -Wall" ./configure CXXFLAGS="-O3 $extracflags" --with-cuda=/usr/local/cuda --with-nvml=libnvidia-ml.so`