Store and display average hashrate (benchmark + on share)

Displayed data is the average of the last 50 scans in the 5 last minutes

Also move cuda common functions in a new file (cuda.cu)

Signed-off-by: Tanguy Pruvot <tanguy.pruvot@gmail.com>
This commit is contained in:
Tanguy Pruvot 2014-11-11 15:54:35 +01:00
parent 8259ed7f29
commit ec709af62f
9 changed files with 297 additions and 145 deletions

View File

@ -17,7 +17,7 @@ ccminer_SOURCES = elist.h miner.h compat.h \
compat/inttypes.h compat/stdbool.h compat/unistd.h \ compat/inttypes.h compat/stdbool.h compat/unistd.h \
compat/sys/time.h compat/getopt/getopt.h \ compat/sys/time.h compat/getopt/getopt.h \
cpu-miner.c util.c crc32.c hefty1.c scrypt.c \ cpu-miner.c util.c crc32.c hefty1.c scrypt.c \
hashlog.cpp \ hashlog.cpp stats.cpp cuda.cu \
heavy/heavy.cu \ heavy/heavy.cu \
heavy/cuda_blake512.cu heavy/cuda_blake512.h \ heavy/cuda_blake512.cu heavy/cuda_blake512.h \
heavy/cuda_combine.cu heavy/cuda_combine.h \ heavy/cuda_combine.cu heavy/cuda_combine.h \

View File

@ -155,6 +155,7 @@ features.
>>> RELEASE HISTORY <<< >>> RELEASE HISTORY <<<
Nov. 11th 2014 v1.4.7 Nov. 11th 2014 v1.4.7
Average hashrate (based on the 50 last scans)
Rewrite blake algo Rewrite blake algo
Add the -i (gpu threads/intensity parameter) Add the -i (gpu threads/intensity parameter)
Add some X11 optimisations based on sp_ commits Add some X11 optimisations based on sp_ commits

View File

@ -239,6 +239,7 @@
<ClCompile Include="fuguecoin.cpp" /> <ClCompile Include="fuguecoin.cpp" />
<ClCompile Include="groestlcoin.cpp" /> <ClCompile Include="groestlcoin.cpp" />
<ClCompile Include="hashlog.cpp" /> <ClCompile Include="hashlog.cpp" />
<ClCompile Include="stats.cpp" />
<ClCompile Include="hefty1.c" /> <ClCompile Include="hefty1.c" />
<ClCompile Include="myriadgroestl.cpp" /> <ClCompile Include="myriadgroestl.cpp" />
<ClCompile Include="scrypt.c"> <ClCompile Include="scrypt.c">
@ -317,6 +318,7 @@
<ClInclude Include="uint256.h" /> <ClInclude Include="uint256.h" />
</ItemGroup> </ItemGroup>
<ItemGroup> <ItemGroup>
<CudaCompile Include="cuda.cu" />
<CudaCompile Include="bitslice_transformations_quad.cu"> <CudaCompile Include="bitslice_transformations_quad.cu">
<ExcludedFromBuild>true</ExcludedFromBuild> <ExcludedFromBuild>true</ExcludedFromBuild>
</CudaCompile> </CudaCompile>
@ -599,4 +601,4 @@
<Target Name="AfterClean"> <Target Name="AfterClean">
<Delete Files="@(FilesToCopy->'$(OutDir)%(Filename)%(Extension)')" TreatErrorsAsWarnings="true" /> <Delete Files="@(FilesToCopy->'$(OutDir)%(Filename)%(Extension)')" TreatErrorsAsWarnings="true" />
</Target> </Target>
</Project> </Project>

View File

@ -189,6 +189,9 @@
<ClCompile Include="hashlog.cpp"> <ClCompile Include="hashlog.cpp">
<Filter>Source Files</Filter> <Filter>Source Files</Filter>
</ClCompile> </ClCompile>
<ClCompile Include="stats.cpp">
<Filter>Source Files</Filter>
</ClCompile>
</ItemGroup> </ItemGroup>
<ItemGroup> <ItemGroup>
<ClInclude Include="compat.h"> <ClInclude Include="compat.h">
@ -307,6 +310,9 @@
</ClInclude> </ClInclude>
</ItemGroup> </ItemGroup>
<ItemGroup> <ItemGroup>
<CudaCompile Include="cuda.cu">
<Filter>Source Files\CUDA</Filter>
</CudaCompile>
<CudaCompile Include="cuda_fugue256.cu"> <CudaCompile Include="cuda_fugue256.cu">
<Filter>Source Files\CUDA</Filter> <Filter>Source Files\CUDA</Filter>
</CudaCompile> </CudaCompile>
@ -473,4 +479,4 @@
<Filter>Source Files\CUDA\x11</Filter> <Filter>Source Files\CUDA\x11</Filter>
</CudaCompile> </CudaCompile>
</ItemGroup> </ItemGroup>
</Project> </Project>

View File

@ -391,11 +391,20 @@ static struct work _ALIGN(64) g_work;
static time_t g_work_time; static time_t g_work_time;
static pthread_mutex_t g_work_lock; static pthread_mutex_t g_work_lock;
/**
* Exit app
*/
void proper_exit(int reason) void proper_exit(int reason)
{ {
cuda_devicereset(); cuda_devicereset();
hashlog_purge_all(); hashlog_purge_all();
stats_purge_all();
#ifdef WIN32
timeEndPeriod(1); // else never executed
#endif
exit(reason); exit(reason);
} }
@ -487,12 +496,14 @@ static void calc_diff(struct work *work, int known)
static int share_result(int result, const char *reason) static int share_result(int result, const char *reason)
{ {
char s[345]; char s[345];
double hashrate; double hashrate = 0.;
hashrate = 0.;
pthread_mutex_lock(&stats_lock); pthread_mutex_lock(&stats_lock);
for (int i = 0; i < opt_n_threads; i++) hashrate = stats_get_speed(-1);
hashrate += thr_hashrates[i]; if (hashrate == 0.) {
for (int i = 0; i < opt_n_threads; i++)
hashrate += thr_hashrates[i];
}
result ? accepted_count++ : rejected_count++; result ? accepted_count++ : rejected_count++;
pthread_mutex_unlock(&stats_lock); pthread_mutex_unlock(&stats_lock);
@ -1297,6 +1308,7 @@ continue_scan:
thr_hashrates[thr_id] = hashes_done / (diff.tv_sec + 1e-6 * diff.tv_usec); thr_hashrates[thr_id] = hashes_done / (diff.tv_sec + 1e-6 * diff.tv_usec);
if (rc > 1) if (rc > 1)
thr_hashrates[thr_id] = (rc * hashes_done) / (diff.tv_sec + 1e-6 * diff.tv_usec); thr_hashrates[thr_id] = (rc * hashes_done) / (diff.tv_sec + 1e-6 * diff.tv_usec);
stats_remember_speed(thr_id, hashes_done, thr_hashrates[thr_id]);
} }
pthread_mutex_unlock(&stats_lock); pthread_mutex_unlock(&stats_lock);
} }
@ -1307,15 +1319,18 @@ continue_scan:
device_map[thr_id], device_name[device_map[thr_id]], s); device_map[thr_id], device_name[device_map[thr_id]], s);
} }
if (thr_id == opt_n_threads - 1) { if (thr_id == opt_n_threads - 1) {
double hashrate = 0.; double hashrate = stats_get_speed(-1);
for (int i = 0; i < opt_n_threads && thr_hashrates[i]; i++) if (hashrate == 0.) {
hashrate += thr_hashrates[i]; for (int i = 0; i < opt_n_threads && thr_hashrates[i]; i++)
hashrate += thr_hashrates[i];
global_hashrate = llround(hashrate); }
if (opt_benchmark) { if (opt_benchmark) {
sprintf(s, hashrate >= 1e6 ? "%.0f" : "%.2f", hashrate / 1000.); sprintf(s, hashrate >= 1e6 ? "%.0f" : "%.2f", hashrate / 1000.);
applog(LOG_NOTICE, "Total: %s kH/s", s); applog(LOG_NOTICE, "Total: %s kH/s", s);
} }
// X-Mining-Hashrate
global_hashrate = llround(hashrate);
} }
if (rc) { if (rc) {
@ -1520,6 +1535,7 @@ static void *stratum_thread(void *userdata)
stratum.bloc_height); stratum.bloc_height);
restart_threads(); restart_threads();
hashlog_purge_old(); hashlog_purge_old();
stats_purge_old();
} else if (opt_debug && !opt_quiet) { } else if (opt_debug && !opt_quiet) {
applog(LOG_BLUE, "%s asks job %d for block %d", short_url, applog(LOG_BLUE, "%s asks job %d for block %d", short_url,
strtoul(stratum.job.job_id, NULL, 16), stratum.bloc_height); strtoul(stratum.job.job_id, NULL, 16), stratum.bloc_height);
@ -2096,5 +2112,7 @@ int main(int argc, char *argv[])
applog(LOG_INFO, "workio thread dead, exiting."); applog(LOG_INFO, "workio thread dead, exiting.");
proper_exit(0);
return 0; return 0;
} }

141
cuda.cu Normal file
View File

@ -0,0 +1,141 @@
#include <stdio.h>
#include <memory.h>
#include <string.h>
#include <map>
#ifndef _WIN32
#include <unistd.h>
#endif
// include thrust
#include <thrust/version.h>
#include <thrust/remove.h>
#include <thrust/device_vector.h>
#include <thrust/iterator/constant_iterator.h>
#include "miner.h"
#include "cuda_helper.h"
extern char *device_name[8];
extern int device_map[8];
// CUDA Devices on the System
extern "C" int cuda_num_devices()
{
int version;
cudaError_t err = cudaDriverGetVersion(&version);
if (err != cudaSuccess)
{
applog(LOG_ERR, "Unable to query CUDA driver version! Is an nVidia driver installed?");
exit(1);
}
int maj = version / 1000, min = version % 100; // same as in deviceQuery sample
if (maj < 5 || (maj == 5 && min < 5))
{
applog(LOG_ERR, "Driver does not support CUDA %d.%d API! Update your nVidia driver!", 5, 5);
exit(1);
}
int GPU_N;
err = cudaGetDeviceCount(&GPU_N);
if (err != cudaSuccess)
{
applog(LOG_ERR, "Unable to query number of CUDA devices! Is an nVidia driver installed?");
exit(1);
}
return GPU_N;
}
extern "C" void cuda_devicenames()
{
cudaError_t err;
int GPU_N;
err = cudaGetDeviceCount(&GPU_N);
if (err != cudaSuccess)
{
applog(LOG_ERR, "Unable to query number of CUDA devices! Is an nVidia driver installed?");
exit(1);
}
for (int i=0; i < GPU_N; i++)
{
cudaDeviceProp props;
cudaGetDeviceProperties(&props, device_map[i]);
device_name[i] = strdup(props.name);
}
}
// Can't be called directly in cpu-miner.c
extern "C" void cuda_devicereset()
{
cudaDeviceReset();
}
static bool substringsearch(const char *haystack, const char *needle, int &match)
{
int hlen = (int) strlen(haystack);
int nlen = (int) strlen(needle);
for (int i=0; i < hlen; ++i)
{
if (haystack[i] == ' ') continue;
int j=0, x = 0;
while(j < nlen)
{
if (haystack[i+x] == ' ') {++x; continue;}
if (needle[j] == ' ') {++j; continue;}
if (needle[j] == '#') return ++match == needle[j+1]-'0';
if (tolower(haystack[i+x]) != tolower(needle[j])) break;
++j; ++x;
}
if (j == nlen) return true;
}
return false;
}
// CUDA Gerät nach Namen finden (gibt Geräte-Index zurück oder -1)
extern "C" int cuda_finddevice(char *name)
{
int num = cuda_num_devices();
int match = 0;
for (int i=0; i < num; ++i)
{
cudaDeviceProp props;
if (cudaGetDeviceProperties(&props, i) == cudaSuccess)
if (substringsearch(props.name, name, match)) return i;
}
return -1;
}
// Zeitsynchronisations-Routine von cudaminer mit CPU sleep
typedef struct { double value[8]; } tsumarray;
cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id)
{
cudaError_t result = cudaSuccess;
if (situation >= 0)
{
static std::map<int, tsumarray> tsum;
double a = 0.95, b = 0.05;
if (tsum.find(situation) == tsum.end()) { a = 0.5; b = 0.5; } // faster initial convergence
double tsync = 0.0;
double tsleep = 0.95 * tsum[situation].value[thr_id];
if (cudaStreamQuery(stream) == cudaErrorNotReady)
{
usleep((useconds_t)(1e6*tsleep));
struct timeval tv_start, tv_end;
gettimeofday(&tv_start, NULL);
result = cudaStreamSynchronize(stream);
gettimeofday(&tv_end, NULL);
tsync = 1e-6 * (tv_end.tv_usec-tv_start.tv_usec) + (tv_end.tv_sec-tv_start.tv_sec);
}
if (tsync >= 0) tsum[situation].value[thr_id] = a * tsum[situation].value[thr_id] + b * (tsleep+tsync);
}
else
result = cudaStreamSynchronize(stream);
return result;
}

View File

@ -139,147 +139,23 @@ struct check_nonce_for_remove
uint32_t m_startNonce; uint32_t m_startNonce;
}; };
// Zahl der CUDA Devices im System bestimmen
extern "C" int cuda_num_devices()
{
int version;
cudaError_t err = cudaDriverGetVersion(&version);
if (err != cudaSuccess)
{
applog(LOG_ERR, "Unable to query CUDA driver version! Is an nVidia driver installed?");
exit(1);
}
int maj = version / 1000, min = version % 100; // same as in deviceQuery sample
if (maj < 5 || (maj == 5 && min < 5))
{
applog(LOG_ERR, "Driver does not support CUDA %d.%d API! Update your nVidia driver!", 5, 5);
exit(1);
}
int GPU_N;
err = cudaGetDeviceCount(&GPU_N);
if (err != cudaSuccess)
{
applog(LOG_ERR, "Unable to query number of CUDA devices! Is an nVidia driver installed?");
exit(1);
}
return GPU_N;
}
// Gerätenamen holen
extern char *device_name[8];
extern int device_map[8];
extern "C" void cuda_devicenames()
{
cudaError_t err;
int GPU_N;
err = cudaGetDeviceCount(&GPU_N);
if (err != cudaSuccess)
{
applog(LOG_ERR, "Unable to query number of CUDA devices! Is an nVidia driver installed?");
exit(1);
}
for (int i=0; i < GPU_N; i++)
{
cudaDeviceProp props;
cudaGetDeviceProperties(&props, device_map[i]);
device_name[i] = strdup(props.name);
}
}
// Can't be called directly in cpu-miner
extern "C" void cuda_devicereset()
{
cudaDeviceReset();
}
static bool substringsearch(const char *haystack, const char *needle, int &match)
{
int hlen = (int) strlen(haystack);
int nlen = (int) strlen(needle);
for (int i=0; i < hlen; ++i)
{
if (haystack[i] == ' ') continue;
int j=0, x = 0;
while(j < nlen)
{
if (haystack[i+x] == ' ') {++x; continue;}
if (needle[j] == ' ') {++j; continue;}
if (needle[j] == '#') return ++match == needle[j+1]-'0';
if (tolower(haystack[i+x]) != tolower(needle[j])) break;
++j; ++x;
}
if (j == nlen) return true;
}
return false;
}
// CUDA Gerät nach Namen finden (gibt Geräte-Index zurück oder -1)
extern "C" int cuda_finddevice(char *name)
{
int num = cuda_num_devices();
int match = 0;
for (int i=0; i < num; ++i)
{
cudaDeviceProp props;
if (cudaGetDeviceProperties(&props, i) == cudaSuccess)
if (substringsearch(props.name, name, match)) return i;
}
return -1;
}
// Zeitsynchronisations-Routine von cudaminer mit CPU sleep
typedef struct { double value[8]; } tsumarray;
cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id)
{
cudaError_t result = cudaSuccess;
if (situation >= 0)
{
static std::map<int, tsumarray> tsum;
double a = 0.95, b = 0.05;
if (tsum.find(situation) == tsum.end()) { a = 0.5; b = 0.5; } // faster initial convergence
double tsync = 0.0;
double tsleep = 0.95 * tsum[situation].value[thr_id];
if (cudaStreamQuery(stream) == cudaErrorNotReady)
{
usleep((useconds_t)(1e6*tsleep));
struct timeval tv_start, tv_end;
gettimeofday(&tv_start, NULL);
result = cudaStreamSynchronize(stream);
gettimeofday(&tv_end, NULL);
tsync = 1e-6 * (tv_end.tv_usec-tv_start.tv_usec) + (tv_end.tv_sec-tv_start.tv_sec);
}
if (tsync >= 0) tsum[situation].value[thr_id] = a * tsum[situation].value[thr_id] + b * (tsleep+tsync);
}
else
result = cudaStreamSynchronize(stream);
return result;
}
int scanhash_heavy_cpp(int thr_id, uint32_t *pdata, int scanhash_heavy_cpp(int thr_id, uint32_t *pdata,
const uint32_t *ptarget, uint32_t max_nonce, const uint32_t *ptarget, uint32_t max_nonce,
unsigned long *hashes_done, uint32_t maxvote, int blocklen); unsigned long *hashes_done, uint32_t maxvote, int blocklen);
extern "C" extern "C"
int scanhash_heavy(int thr_id, uint32_t *pdata, int scanhash_heavy(int thr_id, uint32_t *pdata,
const uint32_t *ptarget, uint32_t max_nonce, const uint32_t *ptarget, uint32_t max_nonce,
unsigned long *hashes_done, uint32_t maxvote, int blocklen) unsigned long *hashes_done, uint32_t maxvote, int blocklen)
{ {
return scanhash_heavy_cpp(thr_id, pdata, return scanhash_heavy_cpp(thr_id, pdata,
ptarget, max_nonce, hashes_done, maxvote, blocklen); ptarget, max_nonce, hashes_done, maxvote, blocklen);
} }
extern bool opt_benchmark;
int scanhash_heavy_cpp(int thr_id, uint32_t *pdata, int scanhash_heavy_cpp(int thr_id, uint32_t *pdata,
const uint32_t *ptarget, uint32_t max_nonce, const uint32_t *ptarget, uint32_t max_nonce,
unsigned long *hashes_done, uint32_t maxvote, int blocklen) unsigned long *hashes_done, uint32_t maxvote, int blocklen)
{ {
const uint32_t first_nonce = pdata[19]; /* to check */ const uint32_t first_nonce = pdata[19]; /* to check */
// CUDA will process thousands of threads. // CUDA will process thousands of threads.

View File

@ -470,6 +470,11 @@ void hashlog_purge_job(char* jobid);
void hashlog_purge_all(void); void hashlog_purge_all(void);
void hashlog_dump_job(char* jobid); void hashlog_dump_job(char* jobid);
void stats_remember_speed(int thr_id, uint32_t hashcount, double hashrate);
double stats_get_speed(int thr_id);
void stats_purge_old(void);
void stats_purge_all(void);
struct thread_q; struct thread_q;
extern struct thread_q *tq_new(void); extern struct thread_q *tq_new(void);

103
stats.cpp Normal file
View File

@ -0,0 +1,103 @@
/**
* Stats place holder
*
* Note: this source is C++ (requires std::map)
*
* tpruvot@github 2014
*/
#include <stdlib.h>
#include <memory.h>
#include <map>
#include "miner.h"
struct stats_data {
uint32_t tm_stat;
uint32_t hashcount;
double hashrate;
uint8_t thr_id;
};
static std::map<uint64_t, stats_data> tlastscans;
static uint64_t uid = 0;
#define STATS_PURGE_TIMEOUT 5*60
/**
* Store speed per thread (todo: compute here)
*/
extern "C" void stats_remember_speed(int thr_id, uint32_t hashcount, double hashrate)
{
uint64_t thr = (0xff && thr_id);
uint64_t key = (thr << 56) + (uid++ % UINT_MAX);
stats_data data;
if (hashcount < 1000 || !hashrate)
return;
memset(&data, 0, sizeof(data));
data.thr_id = thr;
data.tm_stat = (uint32_t) time(NULL);
data.hashcount = hashcount;
data.hashrate = hashrate;
tlastscans[key] = data;
}
/**
* Get the computed average speed
* @param thr_id int (-1 for all threads)
*/
extern "C" double stats_get_speed(int thr_id)
{
uint64_t thr = (0xff && thr_id);
uint64_t keypfx = (thr << 56);
double speed = 0.;
// uint64_t hashcount;
int records = 0;
stats_data data;
std::map<uint64_t, stats_data>::iterator i = tlastscans.end();
while (i != tlastscans.begin() && records < 50) {
if ((i->first & UINT_MAX) > 3) /* ignore firsts */
if (thr_id == -1 || (keypfx & i->first) == keypfx) {
if (i->second.hashcount > 1000) {
speed += i->second.hashrate;
records++;
}
}
i--;
}
if (!records)
return 0.;
return speed / (1.0 * records);
}
/**
* Remove old entries to reduce memory usage
*/
extern "C" void stats_purge_old(void)
{
int deleted = 0;
uint32_t now = (uint32_t) time(NULL);
uint32_t sz = tlastscans.size();
std::map<uint64_t, stats_data>::iterator i = tlastscans.begin();
while (i != tlastscans.end()) {
if ((now - i->second.tm_stat) > STATS_PURGE_TIMEOUT) {
deleted++;
tlastscans.erase(i++);
}
else ++i;
}
if (opt_debug && deleted) {
applog(LOG_DEBUG, "hashlog: %d/%d purged", deleted, sz);
}
}
/**
* Reset the cache
*/
extern "C" void stats_purge_all(void)
{
tlastscans.clear();
}