Store and display average hashrate (benchmark + on share)

Displayed data is the average of the last 50 scans in the 5 last minutes Also move cuda common functions in a new file (cuda.cu) Signed-off-by: Tanguy Pruvot <tanguy.pruvot@gmail.com>
2014-11-11 15:54:35 +01:00 · 2014-11-11 15:54:35 +01:00 · ec709af62f
commit ec709af62f
parent 8259ed7f29
9 changed files with 297 additions and 145 deletions
--- a/Makefile.am
+++ b/Makefile.am
@ -17,7 +17,7 @@ ccminer_SOURCES		= elist.h miner.h compat.h \
 			  compat/inttypes.h compat/stdbool.h compat/unistd.h \
 			  compat/sys/time.h compat/getopt/getopt.h \
 			  cpu-miner.c util.c crc32.c hefty1.c scrypt.c \
-			  hashlog.cpp \
+			  hashlog.cpp stats.cpp cuda.cu \
 			  heavy/heavy.cu \
 			  heavy/cuda_blake512.cu heavy/cuda_blake512.h \
 			  heavy/cuda_combine.cu heavy/cuda_combine.h \
--- a/README.txt
+++ b/README.txt
@ -155,6 +155,7 @@ features.
 >>> RELEASE HISTORY <<<

  Nov. 11th 2014  v1.4.7
+                  Average hashrate (based on the 50 last scans)
                  Rewrite blake algo
                  Add the -i (gpu threads/intensity parameter)
                  Add some X11 optimisations based on sp_ commits
--- a/ccminer.vcxproj
+++ b/ccminer.vcxproj
@ -239,6 +239,7 @@
    <ClCompile Include="fuguecoin.cpp" />
    <ClCompile Include="groestlcoin.cpp" />
    <ClCompile Include="hashlog.cpp" />
+    <ClCompile Include="stats.cpp" />
    <ClCompile Include="hefty1.c" />
    <ClCompile Include="myriadgroestl.cpp" />
    <ClCompile Include="scrypt.c">
@ -317,6 +318,7 @@
    <ClInclude Include="uint256.h" />
  </ItemGroup>
  <ItemGroup>
+    <CudaCompile Include="cuda.cu" />
    <CudaCompile Include="bitslice_transformations_quad.cu">
      <ExcludedFromBuild>true</ExcludedFromBuild>
    </CudaCompile>
@ -599,4 +601,4 @@
  <Target Name="AfterClean">
    <Delete Files="@(FilesToCopy->'$(OutDir)%(Filename)%(Extension)')" TreatErrorsAsWarnings="true" />
  </Target>
-</Project>
+</Project>
--- a/ccminer.vcxproj.filters
+++ b/ccminer.vcxproj.filters
@ -189,6 +189,9 @@
    <ClCompile Include="hashlog.cpp">
      <Filter>Source Files</Filter>
    </ClCompile>
+    <ClCompile Include="stats.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
  </ItemGroup>
  <ItemGroup>
    <ClInclude Include="compat.h">
@ -307,6 +310,9 @@
    </ClInclude>
  </ItemGroup>
  <ItemGroup>
+    <CudaCompile Include="cuda.cu">
+      <Filter>Source Files\CUDA</Filter>
+    </CudaCompile>
    <CudaCompile Include="cuda_fugue256.cu">
      <Filter>Source Files\CUDA</Filter>
    </CudaCompile>
@ -473,4 +479,4 @@
      <Filter>Source Files\CUDA\x11</Filter>
    </CudaCompile>
  </ItemGroup>
-</Project>
+</Project>
--- a/cpu-miner.c
+++ b/cpu-miner.c
@ -391,11 +391,20 @@ static struct work _ALIGN(64) g_work;
 static time_t g_work_time;
 static pthread_mutex_t g_work_lock;

-
+/**
+ * Exit app
+ */
 void proper_exit(int reason)
 {
 	cuda_devicereset();
+
 	hashlog_purge_all();
+	stats_purge_all();
+
+#ifdef WIN32
+	timeEndPeriod(1); // else never executed
+#endif
+
 	exit(reason);
 }

@ -487,12 +496,14 @@ static void calc_diff(struct work *work, int known)
 static int share_result(int result, const char *reason)
 {
 	char s[345];
-	double hashrate;
+	double hashrate = 0.;

-	hashrate = 0.;
 	pthread_mutex_lock(&stats_lock);
-	for (int i = 0; i < opt_n_threads; i++)
-		hashrate += thr_hashrates[i];
+	hashrate = stats_get_speed(-1);
+	if (hashrate == 0.) {
+		for (int i = 0; i < opt_n_threads; i++)
+			hashrate += thr_hashrates[i];
+	}
 	result ? accepted_count++ : rejected_count++;
 	pthread_mutex_unlock(&stats_lock);

@ -1297,6 +1308,7 @@ continue_scan:
 				thr_hashrates[thr_id] = hashes_done / (diff.tv_sec + 1e-6 * diff.tv_usec);
 				if (rc > 1)
 					thr_hashrates[thr_id] = (rc * hashes_done) / (diff.tv_sec + 1e-6 * diff.tv_usec);
+				stats_remember_speed(thr_id, hashes_done, thr_hashrates[thr_id]);
 			}
 			pthread_mutex_unlock(&stats_lock);
 		}
@ -1307,15 +1319,18 @@ continue_scan:
 				device_map[thr_id], device_name[device_map[thr_id]], s);
 		}
 		if (thr_id == opt_n_threads - 1) {
-			double hashrate = 0.;
-			for (int i = 0; i < opt_n_threads && thr_hashrates[i]; i++)
-				hashrate += thr_hashrates[i];
-
-			global_hashrate = llround(hashrate);
+			double hashrate = stats_get_speed(-1);
+			if (hashrate == 0.) {
+				for (int i = 0; i < opt_n_threads && thr_hashrates[i]; i++)
+					hashrate += thr_hashrates[i];
+			}
 			if (opt_benchmark) {
 				sprintf(s, hashrate >= 1e6 ? "%.0f" : "%.2f", hashrate / 1000.);
 				applog(LOG_NOTICE, "Total: %s kH/s", s);
 			}
+
+			// X-Mining-Hashrate
+			global_hashrate = llround(hashrate);
 		}

 		if (rc) {
@ -1520,6 +1535,7 @@ static void *stratum_thread(void *userdata)
 						stratum.bloc_height);
 				restart_threads();
 				hashlog_purge_old();
+				stats_purge_old();
 			} else if (opt_debug && !opt_quiet) {
 					applog(LOG_BLUE, "%s asks job %d for block %d", short_url,
 						strtoul(stratum.job.job_id, NULL, 16), stratum.bloc_height);
@ -2096,5 +2112,7 @@ int main(int argc, char *argv[])

 	applog(LOG_INFO, "workio thread dead, exiting.");

+	proper_exit(0);
+
 	return 0;
 }
--- a/cuda.cu
+++ b/cuda.cu
@ -0,0 +1,141 @@
+#include <stdio.h>
+#include <memory.h>
+#include <string.h>
+
+#include <map>
+
+#ifndef _WIN32
+#include <unistd.h>
+#endif
+
+// include thrust
+#include <thrust/version.h>
+#include <thrust/remove.h>
+#include <thrust/device_vector.h>
+#include <thrust/iterator/constant_iterator.h>
+
+#include "miner.h"
+
+#include "cuda_helper.h"
+
+extern char *device_name[8];
+extern int device_map[8];
+
+// CUDA Devices on the System
+extern "C" int cuda_num_devices()
+{
+	int version;
+	cudaError_t err = cudaDriverGetVersion(&version);
+	if (err != cudaSuccess)
+	{
+		applog(LOG_ERR, "Unable to query CUDA driver version! Is an nVidia driver installed?");
+		exit(1);
+	}
+
+	int maj = version / 1000, min = version % 100; // same as in deviceQuery sample
+	if (maj < 5 || (maj == 5 && min < 5))
+	{
+		applog(LOG_ERR, "Driver does not support CUDA %d.%d API! Update your nVidia driver!", 5, 5);
+		exit(1);
+	}
+
+	int GPU_N;
+	err = cudaGetDeviceCount(&GPU_N);
+	if (err != cudaSuccess)
+	{
+		applog(LOG_ERR, "Unable to query number of CUDA devices! Is an nVidia driver installed?");
+		exit(1);
+	}
+	return GPU_N;
+}
+
+extern "C" void cuda_devicenames()
+{
+	cudaError_t err;
+	int GPU_N;
+	err = cudaGetDeviceCount(&GPU_N);
+	if (err != cudaSuccess)
+	{
+		applog(LOG_ERR, "Unable to query number of CUDA devices! Is an nVidia driver installed?");
+		exit(1);
+	}
+
+	for (int i=0; i < GPU_N; i++)
+	{
+		cudaDeviceProp props;
+		cudaGetDeviceProperties(&props, device_map[i]);
+
+		device_name[i] = strdup(props.name);
+	}
+}
+
+// Can't be called directly in cpu-miner.c
+extern "C" void cuda_devicereset()
+{
+	cudaDeviceReset();
+}
+
+static bool substringsearch(const char *haystack, const char *needle, int &match)
+{
+	int hlen = (int) strlen(haystack);
+	int nlen = (int) strlen(needle);
+	for (int i=0; i < hlen; ++i)
+	{
+		if (haystack[i] == ' ') continue;
+		int j=0, x = 0;
+		while(j < nlen)
+		{
+			if (haystack[i+x] == ' ') {++x; continue;}
+			if (needle[j] == ' ') {++j; continue;}
+			if (needle[j] == '#') return ++match == needle[j+1]-'0';
+			if (tolower(haystack[i+x]) != tolower(needle[j])) break;
+			++j; ++x;
+		}
+		if (j == nlen) return true;
+	}
+	return false;
+}
+
+// CUDA Gerät nach Namen finden (gibt Geräte-Index zurück oder -1)
+extern "C" int cuda_finddevice(char *name)
+{
+	int num = cuda_num_devices();
+	int match = 0;
+	for (int i=0; i < num; ++i)
+	{
+		cudaDeviceProp props;
+		if (cudaGetDeviceProperties(&props, i) == cudaSuccess)
+			if (substringsearch(props.name, name, match)) return i;
+	}
+	return -1;
+}
+
+// Zeitsynchronisations-Routine von cudaminer mit CPU sleep
+typedef struct { double value[8]; } tsumarray;
+cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id)
+{
+	cudaError_t result = cudaSuccess;
+	if (situation >= 0)
+	{
+		static std::map<int, tsumarray> tsum;
+
+		double a = 0.95, b = 0.05;
+		if (tsum.find(situation) == tsum.end()) { a = 0.5; b = 0.5; } // faster initial convergence
+
+		double tsync = 0.0;
+		double tsleep = 0.95 * tsum[situation].value[thr_id];
+		if (cudaStreamQuery(stream) == cudaErrorNotReady)
+		{
+			usleep((useconds_t)(1e6*tsleep));
+			struct timeval tv_start, tv_end;
+			gettimeofday(&tv_start, NULL);
+			result = cudaStreamSynchronize(stream);
+			gettimeofday(&tv_end, NULL);
+			tsync = 1e-6 * (tv_end.tv_usec-tv_start.tv_usec) + (tv_end.tv_sec-tv_start.tv_sec);
+		}
+		if (tsync >= 0) tsum[situation].value[thr_id] = a * tsum[situation].value[thr_id] + b * (tsleep+tsync);
+	}
+	else
+		result = cudaStreamSynchronize(stream);
+	return result;
+}
--- a/heavy/heavy.cu
+++ b/heavy/heavy.cu
@ -139,147 +139,23 @@ struct check_nonce_for_remove
    uint32_t  m_startNonce;
 };

-// Zahl der CUDA Devices im System bestimmen
-extern "C" int cuda_num_devices()
-{
-    int version;
-    cudaError_t err = cudaDriverGetVersion(&version);
-    if (err != cudaSuccess)
-    {
-        applog(LOG_ERR, "Unable to query CUDA driver version! Is an nVidia driver installed?");
-        exit(1);
-    }
-
-    int maj = version / 1000, min = version % 100; // same as in deviceQuery sample
-    if (maj < 5 || (maj == 5 && min < 5))
-    {
-        applog(LOG_ERR, "Driver does not support CUDA %d.%d API! Update your nVidia driver!", 5, 5);
-        exit(1);
-    }
-
-    int GPU_N;
-    err = cudaGetDeviceCount(&GPU_N);
-    if (err != cudaSuccess)
-    {
-        applog(LOG_ERR, "Unable to query number of CUDA devices! Is an nVidia driver installed?");
-        exit(1);
-    }
-    return GPU_N;
-}
-
-// Gerätenamen holen
-extern char *device_name[8];
-extern int device_map[8];
-
-extern "C" void cuda_devicenames()
-{
-    cudaError_t err;
-    int GPU_N;
-    err = cudaGetDeviceCount(&GPU_N);
-    if (err != cudaSuccess)
-    {
-        applog(LOG_ERR, "Unable to query number of CUDA devices! Is an nVidia driver installed?");
-        exit(1);
-    }
-
-    for (int i=0; i < GPU_N; i++)
-    {
-        cudaDeviceProp props;
-        cudaGetDeviceProperties(&props, device_map[i]);
-
-        device_name[i] = strdup(props.name);
-    }
-}
-
-// Can't be called directly in cpu-miner
-extern "C" void cuda_devicereset()
-{
-    cudaDeviceReset();
-}
-
-static bool substringsearch(const char *haystack, const char *needle, int &match)
-{
-    int hlen = (int) strlen(haystack);
-    int nlen = (int) strlen(needle);
-    for (int i=0; i < hlen; ++i)
-    {
-        if (haystack[i] == ' ') continue;
-        int j=0, x = 0;
-        while(j < nlen)
-        {
-            if (haystack[i+x] == ' ') {++x; continue;}
-            if (needle[j] == ' ') {++j; continue;}
-            if (needle[j] == '#') return ++match == needle[j+1]-'0';
-            if (tolower(haystack[i+x]) != tolower(needle[j])) break;
-            ++j; ++x;
-        }
-        if (j == nlen) return true;
-    }
-    return false;
-}
-
-// CUDA Gerät nach Namen finden (gibt Geräte-Index zurück oder -1)
-extern "C" int cuda_finddevice(char *name)
-{
-    int num = cuda_num_devices();
-    int match = 0;
-    for (int i=0; i < num; ++i)
-    {
-        cudaDeviceProp props;
-        if (cudaGetDeviceProperties(&props, i) == cudaSuccess)
-            if (substringsearch(props.name, name, match)) return i;
-    }
-    return -1;
-}
-
-// Zeitsynchronisations-Routine von cudaminer mit CPU sleep
-typedef struct { double value[8]; } tsumarray;
-cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id)
-{
-    cudaError_t result = cudaSuccess;
-    if (situation >= 0)
-    {   
-        static std::map<int, tsumarray> tsum;
-
-        double a = 0.95, b = 0.05;
-        if (tsum.find(situation) == tsum.end()) { a = 0.5; b = 0.5; } // faster initial convergence
-
-        double tsync = 0.0;
-        double tsleep = 0.95 * tsum[situation].value[thr_id];
-        if (cudaStreamQuery(stream) == cudaErrorNotReady)
-        {
-            usleep((useconds_t)(1e6*tsleep));
-            struct timeval tv_start, tv_end;
-            gettimeofday(&tv_start, NULL);
-            result = cudaStreamSynchronize(stream);
-            gettimeofday(&tv_end, NULL);
-            tsync = 1e-6 * (tv_end.tv_usec-tv_start.tv_usec) + (tv_end.tv_sec-tv_start.tv_sec);
-        }
-        if (tsync >= 0) tsum[situation].value[thr_id] = a * tsum[situation].value[thr_id] + b * (tsleep+tsync);
-    }
-    else
-        result = cudaStreamSynchronize(stream);
-    return result;
-}
-
 int scanhash_heavy_cpp(int thr_id, uint32_t *pdata,
- const uint32_t *ptarget, uint32_t max_nonce,
- unsigned long *hashes_done, uint32_t maxvote, int blocklen);
+    const uint32_t *ptarget, uint32_t max_nonce,
+    unsigned long *hashes_done, uint32_t maxvote, int blocklen);

 extern "C"
 int scanhash_heavy(int thr_id, uint32_t *pdata,
- const uint32_t *ptarget, uint32_t max_nonce,
- unsigned long *hashes_done, uint32_t maxvote, int blocklen)
+    const uint32_t *ptarget, uint32_t max_nonce,
+    unsigned long *hashes_done, uint32_t maxvote, int blocklen)
 {
- return scanhash_heavy_cpp(thr_id, pdata,
-  ptarget, max_nonce, hashes_done, maxvote, blocklen);
+    return scanhash_heavy_cpp(thr_id, pdata,
+    ptarget, max_nonce, hashes_done, maxvote, blocklen);
 }

-extern bool opt_benchmark;

 int scanhash_heavy_cpp(int thr_id, uint32_t *pdata,
- const uint32_t *ptarget, uint32_t max_nonce,
- unsigned long *hashes_done, uint32_t maxvote, int blocklen)
+    const uint32_t *ptarget, uint32_t max_nonce,
+    unsigned long *hashes_done, uint32_t maxvote, int blocklen)
 {
    const uint32_t first_nonce = pdata[19]; /* to check */
    // CUDA will process thousands of threads.
--- a/miner.h
+++ b/miner.h
@ -470,6 +470,11 @@ void hashlog_purge_job(char* jobid);
 void hashlog_purge_all(void);
 void hashlog_dump_job(char* jobid);

+void stats_remember_speed(int thr_id, uint32_t hashcount, double hashrate);
+double stats_get_speed(int thr_id);
+void stats_purge_old(void);
+void stats_purge_all(void);
+
 struct thread_q;

 extern struct thread_q *tq_new(void);
--- a/stats.cpp
+++ b/stats.cpp
@ -0,0 +1,103 @@
+/**
+ * Stats place holder
+ *
+ * Note: this source is C++ (requires std::map)
+ *
+ * tpruvot@github 2014
+ */
+#include <stdlib.h>
+#include <memory.h>
+#include <map>
+
+#include "miner.h"
+
+struct stats_data {
+	uint32_t tm_stat;
+	uint32_t hashcount;
+	double hashrate;
+	uint8_t thr_id;
+};
+
+static std::map<uint64_t, stats_data> tlastscans;
+static uint64_t uid = 0;
+
+#define STATS_PURGE_TIMEOUT 5*60
+
+/**
+ * Store speed per thread (todo: compute here)
+ */
+extern "C" void stats_remember_speed(int thr_id, uint32_t hashcount, double hashrate)
+{
+	uint64_t thr = (0xff && thr_id);
+	uint64_t key = (thr << 56) + (uid++ % UINT_MAX);
+	stats_data data;
+
+	if (hashcount < 1000 || !hashrate)
+		return;
+
+	memset(&data, 0, sizeof(data));
+	data.thr_id = thr;
+	data.tm_stat = (uint32_t) time(NULL);
+	data.hashcount = hashcount;
+	data.hashrate = hashrate;
+	tlastscans[key] = data;
+}
+
+/**
+ * Get the computed average speed
+ * @param thr_id int (-1 for all threads)
+ */
+extern "C" double stats_get_speed(int thr_id)
+{
+	uint64_t thr = (0xff && thr_id);
+	uint64_t keypfx = (thr << 56);
+	double speed = 0.;
+	// uint64_t hashcount;
+	int records = 0;
+	stats_data data;
+
+	std::map<uint64_t, stats_data>::iterator i = tlastscans.end();
+	while (i != tlastscans.begin() && records < 50) {
+		if ((i->first & UINT_MAX) > 3) /* ignore firsts */
+		if (thr_id == -1 || (keypfx & i->first) == keypfx) {
+			if (i->second.hashcount > 1000) {
+				speed += i->second.hashrate;
+				records++;
+			}
+		}
+		i--;
+	}
+	if (!records)
+		return 0.;
+	return speed / (1.0 * records);
+}
+
+/**
+ * Remove old entries to reduce memory usage
+ */
+extern "C" void stats_purge_old(void)
+{
+	int deleted = 0;
+	uint32_t now = (uint32_t) time(NULL);
+	uint32_t sz = tlastscans.size();
+	std::map<uint64_t, stats_data>::iterator i = tlastscans.begin();
+	while (i != tlastscans.end()) {
+		if ((now - i->second.tm_stat) > STATS_PURGE_TIMEOUT) {
+			deleted++;
+			tlastscans.erase(i++);
+		}
+		else ++i;
+	}
+	if (opt_debug && deleted) {
+		applog(LOG_DEBUG, "hashlog: %d/%d purged", deleted, sz);
+	}
+}
+
+/**
+ * Reset the cache
+ */
+extern "C" void stats_purge_all(void)
+{
+	tlastscans.clear();
+}
+