Store and display average hashrate (benchmark + on share)

Displayed data is the average of the last 50 scans in the 5 last minutes Also move cuda common functions in a new file (cuda.cu) Signed-off-by: Tanguy Pruvot <tanguy.pruvot@gmail.com>
10 years ago · ec709af62f
9 changed files with 297 additions and 145 deletions
--- a/Makefile.am
+++ b/Makefile.am
@ -17,7 +17,7 @@ ccminer_SOURCES		= elist.h miner.h compat.h \
 			  compat/inttypes.h compat/stdbool.h compat/unistd.h \
 			  compat/sys/time.h compat/getopt/getopt.h \
 			  cpu-miner.c util.c crc32.c hefty1.c scrypt.c \
-			  hashlog.cpp \
+			  hashlog.cpp stats.cpp cuda.cu \
 			  heavy/heavy.cu \
 			  heavy/cuda_blake512.cu heavy/cuda_blake512.h \
 			  heavy/cuda_combine.cu heavy/cuda_combine.h \
--- a/README.txt
+++ b/README.txt
@ -155,6 +155,7 @@ features.
 >>> RELEASE HISTORY <<<
  Nov. 11th 2014  v1.4.7
                  Average hashrate (based on the 50 last scans)
                  Rewrite blake algo
                  Add the -i (gpu threads/intensity parameter)
                  Add some X11 optimisations based on sp_ commits
--- a/ccminer.vcxproj
+++ b/ccminer.vcxproj
@ -239,6 +239,7 @@
    <ClCompile Include="fuguecoin.cpp" />
    <ClCompile Include="groestlcoin.cpp" />
    <ClCompile Include="hashlog.cpp" />
    <ClCompile Include="stats.cpp" />
    <ClCompile Include="hefty1.c" />
    <ClCompile Include="myriadgroestl.cpp" />
    <ClCompile Include="scrypt.c">
@ -317,6 +318,7 @@
    <ClInclude Include="uint256.h" />
  </ItemGroup>
  <ItemGroup>
    <CudaCompile Include="cuda.cu" />
    <CudaCompile Include="bitslice_transformations_quad.cu">
      <ExcludedFromBuild>true</ExcludedFromBuild>
    </CudaCompile>
--- a/ccminer.vcxproj.filters
+++ b/ccminer.vcxproj.filters
@ -189,6 +189,9 @@
    <ClCompile Include="hashlog.cpp">
      <Filter>Source Files</Filter>
    </ClCompile>
    <ClCompile Include="stats.cpp">
      <Filter>Source Files</Filter>
    </ClCompile>
  </ItemGroup>
  <ItemGroup>
    <ClInclude Include="compat.h">
@ -307,6 +310,9 @@
    </ClInclude>
  </ItemGroup>
  <ItemGroup>
    <CudaCompile Include="cuda.cu">
      <Filter>Source Files\CUDA</Filter>
    </CudaCompile>
    <CudaCompile Include="cuda_fugue256.cu">
      <Filter>Source Files\CUDA</Filter>
    </CudaCompile>
--- a/cpu-miner.c
+++ b/cpu-miner.c
@ -391,11 +391,20 @@ static struct work _ALIGN(64) g_work;
 static time_t g_work_time;
 static pthread_mutex_t g_work_lock;
-
+/**
 * Exit app
 */
 void proper_exit(int reason)
 {
 	cuda_devicereset();
 	hashlog_purge_all();
 	stats_purge_all();
 #ifdef WIN32
 	timeEndPeriod(1); // else never executed
 #endif
 	exit(reason);
 }
@ -487,12 +496,14 @@ static void calc_diff(struct work *work, int known)
 static int share_result(int result, const char *reason)
 {
 	char s[345];
-	double hashrate;
+	double hashrate = 0.;
 	hashrate = 0.;
 	pthread_mutex_lock(&stats_lock);
 	hashrate = stats_get_speed(-1);
 	if (hashrate == 0.) {
 		for (int i = 0; i < opt_n_threads; i++)
 			hashrate += thr_hashrates[i];
 	}
 	result ? accepted_count++ : rejected_count++;
 	pthread_mutex_unlock(&stats_lock);
@ -1297,6 +1308,7 @@ continue_scan:
 				thr_hashrates[thr_id] = hashes_done / (diff.tv_sec + 1e-6 * diff.tv_usec);
 				if (rc > 1)
 					thr_hashrates[thr_id] = (rc * hashes_done) / (diff.tv_sec + 1e-6 * diff.tv_usec);
 				stats_remember_speed(thr_id, hashes_done, thr_hashrates[thr_id]);
 			}
 			pthread_mutex_unlock(&stats_lock);
 		}
@ -1307,15 +1319,18 @@ continue_scan:
 				device_map[thr_id], device_name[device_map[thr_id]], s);
 		}
 		if (thr_id == opt_n_threads - 1) {
-			double hashrate = 0.;
+			double hashrate = stats_get_speed(-1);
 			if (hashrate == 0.) {
 				for (int i = 0; i < opt_n_threads && thr_hashrates[i]; i++)
 					hashrate += thr_hashrates[i];
-
+			}
 			global_hashrate = llround(hashrate);
 			if (opt_benchmark) {
 				sprintf(s, hashrate >= 1e6 ? "%.0f" : "%.2f", hashrate / 1000.);
 				applog(LOG_NOTICE, "Total: %s kH/s", s);
 			}
 			// X-Mining-Hashrate
 			global_hashrate = llround(hashrate);
 		}
 		if (rc) {
@ -1520,6 +1535,7 @@ static void *stratum_thread(void *userdata)
 						stratum.bloc_height);
 				restart_threads();
 				hashlog_purge_old();
 				stats_purge_old();
 			} else if (opt_debug && !opt_quiet) {
 					applog(LOG_BLUE, "%s asks job %d for block %d", short_url,
 						strtoul(stratum.job.job_id, NULL, 16), stratum.bloc_height);
@ -2096,5 +2112,7 @@ int main(int argc, char *argv[])
 	applog(LOG_INFO, "workio thread dead, exiting.");
 	proper_exit(0);
 	return 0;
 }
--- a/cuda.cu
+++ b/cuda.cu
@ -0,0 +1,141 @@
 #include <stdio.h>
 #include <memory.h>
 #include <string.h>
 #include <map>
 #ifndef _WIN32
 #include <unistd.h>
 #endif
 // include thrust
 #include <thrust/version.h>
 #include <thrust/remove.h>
 #include <thrust/device_vector.h>
 #include <thrust/iterator/constant_iterator.h>
 #include "miner.h"
 #include "cuda_helper.h"
 extern char *device_name[8];
 extern int device_map[8];
 // CUDA Devices on the System
 extern "C" int cuda_num_devices()
 {
 	int version;
 	cudaError_t err = cudaDriverGetVersion(&version);
 	if (err != cudaSuccess)
 	{
 		applog(LOG_ERR, "Unable to query CUDA driver version! Is an nVidia driver installed?");
 		exit(1);
 	}
 	int maj = version / 1000, min = version % 100; // same as in deviceQuery sample
 	if (maj < 5 || (maj == 5 && min < 5))
 	{
 		applog(LOG_ERR, "Driver does not support CUDA %d.%d API! Update your nVidia driver!", 5, 5);
 		exit(1);
 	}
 	int GPU_N;
 	err = cudaGetDeviceCount(&GPU_N);
 	if (err != cudaSuccess)
 	{
 		applog(LOG_ERR, "Unable to query number of CUDA devices! Is an nVidia driver installed?");
 		exit(1);
 	}
 	return GPU_N;
 }
 extern "C" void cuda_devicenames()
 {
 	cudaError_t err;
 	int GPU_N;
 	err = cudaGetDeviceCount(&GPU_N);
 	if (err != cudaSuccess)
 	{
 		applog(LOG_ERR, "Unable to query number of CUDA devices! Is an nVidia driver installed?");
 		exit(1);
 	}
 	for (int i=0; i < GPU_N; i++)
 	{
 		cudaDeviceProp props;
 		cudaGetDeviceProperties(&props, device_map[i]);
 		device_name[i] = strdup(props.name);
 	}
 }
 // Can't be called directly in cpu-miner.c
 extern "C" void cuda_devicereset()
 {
 	cudaDeviceReset();
 }
 static bool substringsearch(const char *haystack, const char *needle, int &match)
 {
 	int hlen = (int) strlen(haystack);
 	int nlen = (int) strlen(needle);
 	for (int i=0; i < hlen; ++i)
 	{
 		if (haystack[i] == ' ') continue;
 		int j=0, x = 0;
 		while(j < nlen)
 		{
 			if (haystack[i+x] == ' ') {++x; continue;}
 			if (needle[j] == ' ') {++j; continue;}
 			if (needle[j] == '#') return ++match == needle[j+1]-'0';
 			if (tolower(haystack[i+x]) != tolower(needle[j])) break;
 			++j; ++x;
 		}
 		if (j == nlen) return true;
 	}
 	return false;
 }
 // CUDA Gerät nach Namen finden (gibt Geräte-Index zurück oder -1)
 extern "C" int cuda_finddevice(char *name)
 {
 	int num = cuda_num_devices();
 	int match = 0;
 	for (int i=0; i < num; ++i)
 	{
 		cudaDeviceProp props;
 		if (cudaGetDeviceProperties(&props, i) == cudaSuccess)
 			if (substringsearch(props.name, name, match)) return i;
 	}
 	return -1;
 }
 // Zeitsynchronisations-Routine von cudaminer mit CPU sleep
 typedef struct { double value[8]; } tsumarray;
 cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id)
 {
 	cudaError_t result = cudaSuccess;
 	if (situation >= 0)
 	{
 		static std::map<int, tsumarray> tsum;
 		double a = 0.95, b = 0.05;
 		if (tsum.find(situation) == tsum.end()) { a = 0.5; b = 0.5; } // faster initial convergence
 		double tsync = 0.0;
 		double tsleep = 0.95 * tsum[situation].value[thr_id];
 		if (cudaStreamQuery(stream) == cudaErrorNotReady)
 		{
 			usleep((useconds_t)(1e6*tsleep));
 			struct timeval tv_start, tv_end;
 			gettimeofday(&tv_start, NULL);
 			result = cudaStreamSynchronize(stream);
 			gettimeofday(&tv_end, NULL);
 			tsync = 1e-6 * (tv_end.tv_usec-tv_start.tv_usec) + (tv_end.tv_sec-tv_start.tv_sec);
 		}
 		if (tsync >= 0) tsum[situation].value[thr_id] = a * tsum[situation].value[thr_id] + b * (tsleep+tsync);
 	}
 	else
 		result = cudaStreamSynchronize(stream);
 	return result;
 }
--- a/heavy/heavy.cu
+++ b/heavy/heavy.cu
@ -139,129 +139,6 @@ struct check_nonce_for_remove
    uint32_t  m_startNonce;
 };
 // Zahl der CUDA Devices im System bestimmen
 extern "C" int cuda_num_devices()
 {
    int version;
    cudaError_t err = cudaDriverGetVersion(&version);
    if (err != cudaSuccess)
    {
        applog(LOG_ERR, "Unable to query CUDA driver version! Is an nVidia driver installed?");
        exit(1);
    }
    int maj = version / 1000, min = version % 100; // same as in deviceQuery sample
    if (maj < 5 || (maj == 5 && min < 5))
    {
        applog(LOG_ERR, "Driver does not support CUDA %d.%d API! Update your nVidia driver!", 5, 5);
        exit(1);
    }
    int GPU_N;
    err = cudaGetDeviceCount(&GPU_N);
    if (err != cudaSuccess)
    {
        applog(LOG_ERR, "Unable to query number of CUDA devices! Is an nVidia driver installed?");
        exit(1);
    }
    return GPU_N;
 }
 // Gerätenamen holen
 extern char *device_name[8];
 extern int device_map[8];
 extern "C" void cuda_devicenames()
 {
    cudaError_t err;
    int GPU_N;
    err = cudaGetDeviceCount(&GPU_N);
    if (err != cudaSuccess)
    {
        applog(LOG_ERR, "Unable to query number of CUDA devices! Is an nVidia driver installed?");
        exit(1);
    }
    for (int i=0; i < GPU_N; i++)
    {
        cudaDeviceProp props;
        cudaGetDeviceProperties(&props, device_map[i]);
        device_name[i] = strdup(props.name);
    }
 }
 // Can't be called directly in cpu-miner
 extern "C" void cuda_devicereset()
 {
    cudaDeviceReset();
 }
 static bool substringsearch(const char *haystack, const char *needle, int &match)
 {
    int hlen = (int) strlen(haystack);
    int nlen = (int) strlen(needle);
    for (int i=0; i < hlen; ++i)
    {
        if (haystack[i] == ' ') continue;
        int j=0, x = 0;
        while(j < nlen)
        {
            if (haystack[i+x] == ' ') {++x; continue;}
            if (needle[j] == ' ') {++j; continue;}
            if (needle[j] == '#') return ++match == needle[j+1]-'0';
            if (tolower(haystack[i+x]) != tolower(needle[j])) break;
            ++j; ++x;
        }
        if (j == nlen) return true;
    }
    return false;
 }
 // CUDA Gerät nach Namen finden (gibt Geräte-Index zurück oder -1)
 extern "C" int cuda_finddevice(char *name)
 {
    int num = cuda_num_devices();
    int match = 0;
    for (int i=0; i < num; ++i)
    {
        cudaDeviceProp props;
        if (cudaGetDeviceProperties(&props, i) == cudaSuccess)
            if (substringsearch(props.name, name, match)) return i;
    }
    return -1;
 }
 // Zeitsynchronisations-Routine von cudaminer mit CPU sleep
 typedef struct { double value[8]; } tsumarray;
 cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id)
 {
    cudaError_t result = cudaSuccess;
    if (situation >= 0)
    {   
        static std::map<int, tsumarray> tsum;
        double a = 0.95, b = 0.05;
        if (tsum.find(situation) == tsum.end()) { a = 0.5; b = 0.5; } // faster initial convergence
        double tsync = 0.0;
        double tsleep = 0.95 * tsum[situation].value[thr_id];
        if (cudaStreamQuery(stream) == cudaErrorNotReady)
        {
            usleep((useconds_t)(1e6*tsleep));
            struct timeval tv_start, tv_end;
            gettimeofday(&tv_start, NULL);
            result = cudaStreamSynchronize(stream);
            gettimeofday(&tv_end, NULL);
            tsync = 1e-6 * (tv_end.tv_usec-tv_start.tv_usec) + (tv_end.tv_sec-tv_start.tv_sec);
        }
        if (tsync >= 0) tsum[situation].value[thr_id] = a * tsum[situation].value[thr_id] + b * (tsleep+tsync);
    }
    else
        result = cudaStreamSynchronize(stream);
    return result;
 }
 int scanhash_heavy_cpp(int thr_id, uint32_t *pdata,
    const uint32_t *ptarget, uint32_t max_nonce,
    unsigned long *hashes_done, uint32_t maxvote, int blocklen);
@ -275,7 +152,6 @@ int scanhash_heavy(int thr_id, uint32_t *pdata,
    ptarget, max_nonce, hashes_done, maxvote, blocklen);
 }
 extern bool opt_benchmark;
 int scanhash_heavy_cpp(int thr_id, uint32_t *pdata,
    const uint32_t *ptarget, uint32_t max_nonce,
--- a/miner.h
+++ b/miner.h
@ -470,6 +470,11 @@ void hashlog_purge_job(char* jobid);
 void hashlog_purge_all(void);
 void hashlog_dump_job(char* jobid);
 void stats_remember_speed(int thr_id, uint32_t hashcount, double hashrate);
 double stats_get_speed(int thr_id);
 void stats_purge_old(void);
 void stats_purge_all(void);
 struct thread_q;
 extern struct thread_q *tq_new(void);
--- a/stats.cpp
+++ b/stats.cpp
@ -0,0 +1,103 @@
 /**
 * Stats place holder
 *
 * Note: this source is C++ (requires std::map)
 *
 * tpruvot@github 2014
 */
 #include <stdlib.h>
 #include <memory.h>
 #include <map>
 #include "miner.h"
 struct stats_data {
 	uint32_t tm_stat;
 	uint32_t hashcount;
 	double hashrate;
 	uint8_t thr_id;
 };
 static std::map<uint64_t, stats_data> tlastscans;
 static uint64_t uid = 0;
 #define STATS_PURGE_TIMEOUT 5*60
 /**
 * Store speed per thread (todo: compute here)
 */
 extern "C" void stats_remember_speed(int thr_id, uint32_t hashcount, double hashrate)
 {
 	uint64_t thr = (0xff && thr_id);
 	uint64_t key = (thr << 56) + (uid++ % UINT_MAX);
 	stats_data data;
 	if (hashcount < 1000 || !hashrate)
 		return;
 	memset(&data, 0, sizeof(data));
 	data.thr_id = thr;
 	data.tm_stat = (uint32_t) time(NULL);
 	data.hashcount = hashcount;
 	data.hashrate = hashrate;
 	tlastscans[key] = data;
 }
 /**
 * Get the computed average speed
 * @param thr_id int (-1 for all threads)
 */
 extern "C" double stats_get_speed(int thr_id)
 {
 	uint64_t thr = (0xff && thr_id);
 	uint64_t keypfx = (thr << 56);
 	double speed = 0.;
 	// uint64_t hashcount;
 	int records = 0;
 	stats_data data;
 	std::map<uint64_t, stats_data>::iterator i = tlastscans.end();
 	while (i != tlastscans.begin() && records < 50) {
 		if ((i->first & UINT_MAX) > 3) /* ignore firsts */
 		if (thr_id == -1 || (keypfx & i->first) == keypfx) {
 			if (i->second.hashcount > 1000) {
 				speed += i->second.hashrate;
 				records++;
 			}
 		}
 		i--;
 	}
 	if (!records)
 		return 0.;
 	return speed / (1.0 * records);
 }
 /**
 * Remove old entries to reduce memory usage
 */
 extern "C" void stats_purge_old(void)
 {
 	int deleted = 0;
 	uint32_t now = (uint32_t) time(NULL);
 	uint32_t sz = tlastscans.size();
 	std::map<uint64_t, stats_data>::iterator i = tlastscans.begin();
 	while (i != tlastscans.end()) {
 		if ((now - i->second.tm_stat) > STATS_PURGE_TIMEOUT) {
 			deleted++;
 			tlastscans.erase(i++);
 		}
 		else ++i;
 	}
 	if (opt_debug && deleted) {
 		applog(LOG_DEBUG, "hashlog: %d/%d purged", deleted, sz);
 	}
 }
 /**
 * Reset the cache
 */
 extern "C" void stats_purge_all(void)
 {
 	tlastscans.clear();
 }