From ec709af62f3efbe43616e3abffe7ff2b26b66de4 Mon Sep 17 00:00:00 2001 From: Tanguy Pruvot Date: Tue, 11 Nov 2014 15:54:35 +0100 Subject: [PATCH] Store and display average hashrate (benchmark + on share) Displayed data is the average of the last 50 scans in the 5 last minutes Also move cuda common functions in a new file (cuda.cu) Signed-off-by: Tanguy Pruvot --- Makefile.am | 2 +- README.txt | 1 + ccminer.vcxproj | 4 +- ccminer.vcxproj.filters | 8 ++- cpu-miner.c | 38 ++++++++--- cuda.cu | 141 ++++++++++++++++++++++++++++++++++++++++ heavy/heavy.cu | 140 +++------------------------------------ miner.h | 5 ++ stats.cpp | 103 +++++++++++++++++++++++++++++ 9 files changed, 297 insertions(+), 145 deletions(-) create mode 100644 cuda.cu create mode 100644 stats.cpp diff --git a/Makefile.am b/Makefile.am index 1bc4663..3a3caa2 100644 --- a/Makefile.am +++ b/Makefile.am @@ -17,7 +17,7 @@ ccminer_SOURCES = elist.h miner.h compat.h \ compat/inttypes.h compat/stdbool.h compat/unistd.h \ compat/sys/time.h compat/getopt/getopt.h \ cpu-miner.c util.c crc32.c hefty1.c scrypt.c \ - hashlog.cpp \ + hashlog.cpp stats.cpp cuda.cu \ heavy/heavy.cu \ heavy/cuda_blake512.cu heavy/cuda_blake512.h \ heavy/cuda_combine.cu heavy/cuda_combine.h \ diff --git a/README.txt b/README.txt index bea7d7c..caac95d 100644 --- a/README.txt +++ b/README.txt @@ -155,6 +155,7 @@ features. >>> RELEASE HISTORY <<< Nov. 11th 2014 v1.4.7 + Average hashrate (based on the 50 last scans) Rewrite blake algo Add the -i (gpu threads/intensity parameter) Add some X11 optimisations based on sp_ commits diff --git a/ccminer.vcxproj b/ccminer.vcxproj index 8eb0499..554b88f 100644 --- a/ccminer.vcxproj +++ b/ccminer.vcxproj @@ -239,6 +239,7 @@ + @@ -317,6 +318,7 @@ + true @@ -599,4 +601,4 @@ - \ No newline at end of file + diff --git a/ccminer.vcxproj.filters b/ccminer.vcxproj.filters index 334ca64..95543a5 100644 --- a/ccminer.vcxproj.filters +++ b/ccminer.vcxproj.filters @@ -189,6 +189,9 @@ Source Files + + Source Files + @@ -307,6 +310,9 @@ + + Source Files\CUDA + Source Files\CUDA @@ -473,4 +479,4 @@ Source Files\CUDA\x11 - \ No newline at end of file + diff --git a/cpu-miner.c b/cpu-miner.c index 8d3f632..e4c6d1a 100644 --- a/cpu-miner.c +++ b/cpu-miner.c @@ -391,11 +391,20 @@ static struct work _ALIGN(64) g_work; static time_t g_work_time; static pthread_mutex_t g_work_lock; - +/** + * Exit app + */ void proper_exit(int reason) { cuda_devicereset(); + hashlog_purge_all(); + stats_purge_all(); + +#ifdef WIN32 + timeEndPeriod(1); // else never executed +#endif + exit(reason); } @@ -487,12 +496,14 @@ static void calc_diff(struct work *work, int known) static int share_result(int result, const char *reason) { char s[345]; - double hashrate; + double hashrate = 0.; - hashrate = 0.; pthread_mutex_lock(&stats_lock); - for (int i = 0; i < opt_n_threads; i++) - hashrate += thr_hashrates[i]; + hashrate = stats_get_speed(-1); + if (hashrate == 0.) { + for (int i = 0; i < opt_n_threads; i++) + hashrate += thr_hashrates[i]; + } result ? accepted_count++ : rejected_count++; pthread_mutex_unlock(&stats_lock); @@ -1297,6 +1308,7 @@ continue_scan: thr_hashrates[thr_id] = hashes_done / (diff.tv_sec + 1e-6 * diff.tv_usec); if (rc > 1) thr_hashrates[thr_id] = (rc * hashes_done) / (diff.tv_sec + 1e-6 * diff.tv_usec); + stats_remember_speed(thr_id, hashes_done, thr_hashrates[thr_id]); } pthread_mutex_unlock(&stats_lock); } @@ -1307,15 +1319,18 @@ continue_scan: device_map[thr_id], device_name[device_map[thr_id]], s); } if (thr_id == opt_n_threads - 1) { - double hashrate = 0.; - for (int i = 0; i < opt_n_threads && thr_hashrates[i]; i++) - hashrate += thr_hashrates[i]; - - global_hashrate = llround(hashrate); + double hashrate = stats_get_speed(-1); + if (hashrate == 0.) { + for (int i = 0; i < opt_n_threads && thr_hashrates[i]; i++) + hashrate += thr_hashrates[i]; + } if (opt_benchmark) { sprintf(s, hashrate >= 1e6 ? "%.0f" : "%.2f", hashrate / 1000.); applog(LOG_NOTICE, "Total: %s kH/s", s); } + + // X-Mining-Hashrate + global_hashrate = llround(hashrate); } if (rc) { @@ -1520,6 +1535,7 @@ static void *stratum_thread(void *userdata) stratum.bloc_height); restart_threads(); hashlog_purge_old(); + stats_purge_old(); } else if (opt_debug && !opt_quiet) { applog(LOG_BLUE, "%s asks job %d for block %d", short_url, strtoul(stratum.job.job_id, NULL, 16), stratum.bloc_height); @@ -2096,5 +2112,7 @@ int main(int argc, char *argv[]) applog(LOG_INFO, "workio thread dead, exiting."); + proper_exit(0); + return 0; } diff --git a/cuda.cu b/cuda.cu new file mode 100644 index 0000000..3758335 --- /dev/null +++ b/cuda.cu @@ -0,0 +1,141 @@ +#include +#include +#include + +#include + +#ifndef _WIN32 +#include +#endif + +// include thrust +#include +#include +#include +#include + +#include "miner.h" + +#include "cuda_helper.h" + +extern char *device_name[8]; +extern int device_map[8]; + +// CUDA Devices on the System +extern "C" int cuda_num_devices() +{ + int version; + cudaError_t err = cudaDriverGetVersion(&version); + if (err != cudaSuccess) + { + applog(LOG_ERR, "Unable to query CUDA driver version! Is an nVidia driver installed?"); + exit(1); + } + + int maj = version / 1000, min = version % 100; // same as in deviceQuery sample + if (maj < 5 || (maj == 5 && min < 5)) + { + applog(LOG_ERR, "Driver does not support CUDA %d.%d API! Update your nVidia driver!", 5, 5); + exit(1); + } + + int GPU_N; + err = cudaGetDeviceCount(&GPU_N); + if (err != cudaSuccess) + { + applog(LOG_ERR, "Unable to query number of CUDA devices! Is an nVidia driver installed?"); + exit(1); + } + return GPU_N; +} + +extern "C" void cuda_devicenames() +{ + cudaError_t err; + int GPU_N; + err = cudaGetDeviceCount(&GPU_N); + if (err != cudaSuccess) + { + applog(LOG_ERR, "Unable to query number of CUDA devices! Is an nVidia driver installed?"); + exit(1); + } + + for (int i=0; i < GPU_N; i++) + { + cudaDeviceProp props; + cudaGetDeviceProperties(&props, device_map[i]); + + device_name[i] = strdup(props.name); + } +} + +// Can't be called directly in cpu-miner.c +extern "C" void cuda_devicereset() +{ + cudaDeviceReset(); +} + +static bool substringsearch(const char *haystack, const char *needle, int &match) +{ + int hlen = (int) strlen(haystack); + int nlen = (int) strlen(needle); + for (int i=0; i < hlen; ++i) + { + if (haystack[i] == ' ') continue; + int j=0, x = 0; + while(j < nlen) + { + if (haystack[i+x] == ' ') {++x; continue;} + if (needle[j] == ' ') {++j; continue;} + if (needle[j] == '#') return ++match == needle[j+1]-'0'; + if (tolower(haystack[i+x]) != tolower(needle[j])) break; + ++j; ++x; + } + if (j == nlen) return true; + } + return false; +} + +// CUDA Gerät nach Namen finden (gibt Geräte-Index zurück oder -1) +extern "C" int cuda_finddevice(char *name) +{ + int num = cuda_num_devices(); + int match = 0; + for (int i=0; i < num; ++i) + { + cudaDeviceProp props; + if (cudaGetDeviceProperties(&props, i) == cudaSuccess) + if (substringsearch(props.name, name, match)) return i; + } + return -1; +} + +// Zeitsynchronisations-Routine von cudaminer mit CPU sleep +typedef struct { double value[8]; } tsumarray; +cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id) +{ + cudaError_t result = cudaSuccess; + if (situation >= 0) + { + static std::map tsum; + + double a = 0.95, b = 0.05; + if (tsum.find(situation) == tsum.end()) { a = 0.5; b = 0.5; } // faster initial convergence + + double tsync = 0.0; + double tsleep = 0.95 * tsum[situation].value[thr_id]; + if (cudaStreamQuery(stream) == cudaErrorNotReady) + { + usleep((useconds_t)(1e6*tsleep)); + struct timeval tv_start, tv_end; + gettimeofday(&tv_start, NULL); + result = cudaStreamSynchronize(stream); + gettimeofday(&tv_end, NULL); + tsync = 1e-6 * (tv_end.tv_usec-tv_start.tv_usec) + (tv_end.tv_sec-tv_start.tv_sec); + } + if (tsync >= 0) tsum[situation].value[thr_id] = a * tsum[situation].value[thr_id] + b * (tsleep+tsync); + } + else + result = cudaStreamSynchronize(stream); + return result; +} diff --git a/heavy/heavy.cu b/heavy/heavy.cu index 3ae6f51..f733ef2 100644 --- a/heavy/heavy.cu +++ b/heavy/heavy.cu @@ -139,147 +139,23 @@ struct check_nonce_for_remove uint32_t m_startNonce; }; -// Zahl der CUDA Devices im System bestimmen -extern "C" int cuda_num_devices() -{ - int version; - cudaError_t err = cudaDriverGetVersion(&version); - if (err != cudaSuccess) - { - applog(LOG_ERR, "Unable to query CUDA driver version! Is an nVidia driver installed?"); - exit(1); - } - - int maj = version / 1000, min = version % 100; // same as in deviceQuery sample - if (maj < 5 || (maj == 5 && min < 5)) - { - applog(LOG_ERR, "Driver does not support CUDA %d.%d API! Update your nVidia driver!", 5, 5); - exit(1); - } - - int GPU_N; - err = cudaGetDeviceCount(&GPU_N); - if (err != cudaSuccess) - { - applog(LOG_ERR, "Unable to query number of CUDA devices! Is an nVidia driver installed?"); - exit(1); - } - return GPU_N; -} - -// Gerätenamen holen -extern char *device_name[8]; -extern int device_map[8]; - -extern "C" void cuda_devicenames() -{ - cudaError_t err; - int GPU_N; - err = cudaGetDeviceCount(&GPU_N); - if (err != cudaSuccess) - { - applog(LOG_ERR, "Unable to query number of CUDA devices! Is an nVidia driver installed?"); - exit(1); - } - - for (int i=0; i < GPU_N; i++) - { - cudaDeviceProp props; - cudaGetDeviceProperties(&props, device_map[i]); - - device_name[i] = strdup(props.name); - } -} - -// Can't be called directly in cpu-miner -extern "C" void cuda_devicereset() -{ - cudaDeviceReset(); -} - -static bool substringsearch(const char *haystack, const char *needle, int &match) -{ - int hlen = (int) strlen(haystack); - int nlen = (int) strlen(needle); - for (int i=0; i < hlen; ++i) - { - if (haystack[i] == ' ') continue; - int j=0, x = 0; - while(j < nlen) - { - if (haystack[i+x] == ' ') {++x; continue;} - if (needle[j] == ' ') {++j; continue;} - if (needle[j] == '#') return ++match == needle[j+1]-'0'; - if (tolower(haystack[i+x]) != tolower(needle[j])) break; - ++j; ++x; - } - if (j == nlen) return true; - } - return false; -} - -// CUDA Gerät nach Namen finden (gibt Geräte-Index zurück oder -1) -extern "C" int cuda_finddevice(char *name) -{ - int num = cuda_num_devices(); - int match = 0; - for (int i=0; i < num; ++i) - { - cudaDeviceProp props; - if (cudaGetDeviceProperties(&props, i) == cudaSuccess) - if (substringsearch(props.name, name, match)) return i; - } - return -1; -} - -// Zeitsynchronisations-Routine von cudaminer mit CPU sleep -typedef struct { double value[8]; } tsumarray; -cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id) -{ - cudaError_t result = cudaSuccess; - if (situation >= 0) - { - static std::map tsum; - - double a = 0.95, b = 0.05; - if (tsum.find(situation) == tsum.end()) { a = 0.5; b = 0.5; } // faster initial convergence - - double tsync = 0.0; - double tsleep = 0.95 * tsum[situation].value[thr_id]; - if (cudaStreamQuery(stream) == cudaErrorNotReady) - { - usleep((useconds_t)(1e6*tsleep)); - struct timeval tv_start, tv_end; - gettimeofday(&tv_start, NULL); - result = cudaStreamSynchronize(stream); - gettimeofday(&tv_end, NULL); - tsync = 1e-6 * (tv_end.tv_usec-tv_start.tv_usec) + (tv_end.tv_sec-tv_start.tv_sec); - } - if (tsync >= 0) tsum[situation].value[thr_id] = a * tsum[situation].value[thr_id] + b * (tsleep+tsync); - } - else - result = cudaStreamSynchronize(stream); - return result; -} - int scanhash_heavy_cpp(int thr_id, uint32_t *pdata, - const uint32_t *ptarget, uint32_t max_nonce, - unsigned long *hashes_done, uint32_t maxvote, int blocklen); + const uint32_t *ptarget, uint32_t max_nonce, + unsigned long *hashes_done, uint32_t maxvote, int blocklen); extern "C" int scanhash_heavy(int thr_id, uint32_t *pdata, - const uint32_t *ptarget, uint32_t max_nonce, - unsigned long *hashes_done, uint32_t maxvote, int blocklen) + const uint32_t *ptarget, uint32_t max_nonce, + unsigned long *hashes_done, uint32_t maxvote, int blocklen) { - return scanhash_heavy_cpp(thr_id, pdata, - ptarget, max_nonce, hashes_done, maxvote, blocklen); + return scanhash_heavy_cpp(thr_id, pdata, + ptarget, max_nonce, hashes_done, maxvote, blocklen); } -extern bool opt_benchmark; int scanhash_heavy_cpp(int thr_id, uint32_t *pdata, - const uint32_t *ptarget, uint32_t max_nonce, - unsigned long *hashes_done, uint32_t maxvote, int blocklen) + const uint32_t *ptarget, uint32_t max_nonce, + unsigned long *hashes_done, uint32_t maxvote, int blocklen) { const uint32_t first_nonce = pdata[19]; /* to check */ // CUDA will process thousands of threads. diff --git a/miner.h b/miner.h index 13faecd..3671044 100644 --- a/miner.h +++ b/miner.h @@ -470,6 +470,11 @@ void hashlog_purge_job(char* jobid); void hashlog_purge_all(void); void hashlog_dump_job(char* jobid); +void stats_remember_speed(int thr_id, uint32_t hashcount, double hashrate); +double stats_get_speed(int thr_id); +void stats_purge_old(void); +void stats_purge_all(void); + struct thread_q; extern struct thread_q *tq_new(void); diff --git a/stats.cpp b/stats.cpp new file mode 100644 index 0000000..c67e647 --- /dev/null +++ b/stats.cpp @@ -0,0 +1,103 @@ +/** + * Stats place holder + * + * Note: this source is C++ (requires std::map) + * + * tpruvot@github 2014 + */ +#include +#include +#include + +#include "miner.h" + +struct stats_data { + uint32_t tm_stat; + uint32_t hashcount; + double hashrate; + uint8_t thr_id; +}; + +static std::map tlastscans; +static uint64_t uid = 0; + +#define STATS_PURGE_TIMEOUT 5*60 + +/** + * Store speed per thread (todo: compute here) + */ +extern "C" void stats_remember_speed(int thr_id, uint32_t hashcount, double hashrate) +{ + uint64_t thr = (0xff && thr_id); + uint64_t key = (thr << 56) + (uid++ % UINT_MAX); + stats_data data; + + if (hashcount < 1000 || !hashrate) + return; + + memset(&data, 0, sizeof(data)); + data.thr_id = thr; + data.tm_stat = (uint32_t) time(NULL); + data.hashcount = hashcount; + data.hashrate = hashrate; + tlastscans[key] = data; +} + +/** + * Get the computed average speed + * @param thr_id int (-1 for all threads) + */ +extern "C" double stats_get_speed(int thr_id) +{ + uint64_t thr = (0xff && thr_id); + uint64_t keypfx = (thr << 56); + double speed = 0.; + // uint64_t hashcount; + int records = 0; + stats_data data; + + std::map::iterator i = tlastscans.end(); + while (i != tlastscans.begin() && records < 50) { + if ((i->first & UINT_MAX) > 3) /* ignore firsts */ + if (thr_id == -1 || (keypfx & i->first) == keypfx) { + if (i->second.hashcount > 1000) { + speed += i->second.hashrate; + records++; + } + } + i--; + } + if (!records) + return 0.; + return speed / (1.0 * records); +} + +/** + * Remove old entries to reduce memory usage + */ +extern "C" void stats_purge_old(void) +{ + int deleted = 0; + uint32_t now = (uint32_t) time(NULL); + uint32_t sz = tlastscans.size(); + std::map::iterator i = tlastscans.begin(); + while (i != tlastscans.end()) { + if ((now - i->second.tm_stat) > STATS_PURGE_TIMEOUT) { + deleted++; + tlastscans.erase(i++); + } + else ++i; + } + if (opt_debug && deleted) { + applog(LOG_DEBUG, "hashlog: %d/%d purged", deleted, sz); + } +} + +/** + * Reset the cache + */ +extern "C" void stats_purge_all(void) +{ + tlastscans.clear(); +} +