Browse Source

Rework benchmark mode and min/max range

Was maybe my fault, but the benchmark mode was
always recomputing from nonce 0.

Also fix blake if -d 1 is used (one thread but second gpu)

stats: do not use thread id as key, prefer gpu id...
2upstream
Tanguy Pruvot 10 years ago
parent
commit
438308b3a2
  1. 13
      blake32.cu
  2. 86
      ccminer.cpp
  3. 4
      ccminer.vcxproj
  4. 16
      stats.cpp
  5. 4
      x11/x11.cu
  6. 22
      x13/x13.cu
  7. 5
      x15/x14.cu
  8. 2
      x15/x15.cu

13
blake32.cu

@ -17,7 +17,7 @@ extern "C" {
/* threads per block and throughput (intensity) */ /* threads per block and throughput (intensity) */
#define TPB 128 #define TPB 128
extern int opt_n_threads; extern int num_processors;
/* added in sph_blake.c */ /* added in sph_blake.c */
extern "C" int blake256_rounds = 14; extern "C" int blake256_rounds = 14;
@ -416,8 +416,10 @@ extern "C" int scanhash_blake256(int thr_id, uint32_t *pdata, const uint32_t *pt
} }
#endif #endif
if (opt_benchmark) if (opt_benchmark) {
targetHigh = 0x1ULL << 32; targetHigh = 0x1ULL << 32;
((uint32_t*)ptarget)[6] = swab32(0xff);
}
if (opt_tracegpu) { if (opt_tracegpu) {
/* test call from util.c */ /* test call from util.c */
@ -427,9 +429,8 @@ extern "C" int scanhash_blake256(int thr_id, uint32_t *pdata, const uint32_t *pt
} }
if (!init[thr_id]) { if (!init[thr_id]) {
if (opt_n_threads > 1) { if (num_processors > 1)
CUDA_SAFE_CALL(cudaSetDevice(device_map[thr_id])); CUDA_SAFE_CALL(cudaSetDevice(device_map[thr_id]));
}
CUDA_SAFE_CALL(cudaMallocHost(&h_resNonce[thr_id], NBN * sizeof(uint32_t))); CUDA_SAFE_CALL(cudaMallocHost(&h_resNonce[thr_id], NBN * sizeof(uint32_t)));
CUDA_SAFE_CALL(cudaMalloc(&d_resNonce[thr_id], NBN * sizeof(uint32_t))); CUDA_SAFE_CALL(cudaMalloc(&d_resNonce[thr_id], NBN * sizeof(uint32_t)));
init[thr_id] = true; init[thr_id] = true;
@ -489,8 +490,8 @@ extern "C" int scanhash_blake256(int thr_id, uint32_t *pdata, const uint32_t *pt
goto exit_scan; goto exit_scan;
} }
else if (opt_debug) { else if (opt_debug) {
applog_hash((uint8_t*)ptarget); applog_hash((uchar*)ptarget);
applog_compare_hash((uint8_t*)vhashcpu,(uint8_t*)ptarget); applog_compare_hash((uchar*)vhashcpu, (uchar*)ptarget);
applog(LOG_DEBUG, "GPU #%d: result for nonce %08x does not validate on CPU!", thr_id, foundNonce); applog(LOG_DEBUG, "GPU #%d: result for nonce %08x does not validate on CPU!", thr_id, foundNonce);
} }
} }

86
ccminer.cpp

@ -816,7 +816,7 @@ static bool get_work(struct thr_info *thr, struct work *work)
if (opt_benchmark) { if (opt_benchmark) {
memset(work->data, 0x55, 76); memset(work->data, 0x55, 76);
work->data[17] = swab32((uint32_t)time(NULL)); //work->data[17] = swab32((uint32_t)time(NULL));
memset(work->data + 19, 0x00, 52); memset(work->data + 19, 0x00, 52);
work->data[20] = 0x80000000; work->data[20] = 0x80000000;
work->data[31] = 0x00000280; work->data[31] = 0x00000280;
@ -1004,11 +1004,11 @@ static void *miner_thread(void *userdata)
} }
while (1) { while (1) {
struct timeval tv_start, tv_end, diff;
unsigned long hashes_done; unsigned long hashes_done;
uint32_t start_nonce; uint32_t start_nonce;
struct timeval tv_start, tv_end, diff; uint32_t scan_time = have_longpoll ? LP_SCANTIME : opt_scantime;
int64_t max64; uint64_t max64, minmax = 0x100000;
uint64_t umax64;
// &work.data[19] // &work.data[19]
int wcmplen = 76; int wcmplen = 76;
@ -1035,7 +1035,7 @@ static void *miner_thread(void *userdata)
stratum_gen_work(&stratum, &g_work); stratum_gen_work(&stratum, &g_work);
} }
} else { } else {
int min_scantime = have_longpoll ? LP_SCANTIME : opt_scantime; int min_scantime = scan_time;
/* obtain new work from internal workio thread */ /* obtain new work from internal workio thread */
pthread_mutex_lock(&g_work_lock); pthread_mutex_lock(&g_work_lock);
if (time(NULL) - g_work_time >= min_scantime || if (time(NULL) - g_work_time >= min_scantime ||
@ -1065,7 +1065,7 @@ static void *miner_thread(void *userdata)
goto continue_scan; goto continue_scan;
} }
if (memcmp(work.target, g_work.target, sizeof(work.target))) { if (!opt_benchmark && memcmp(work.target, g_work.target, sizeof(work.target))) {
calc_diff(&g_work, 0); calc_diff(&g_work, 0);
if (opt_debug) { if (opt_debug) {
uint64_t target64 = g_work.target[7] * 0x100000000ULL + g_work.target[6]; uint64_t target64 = g_work.target[7] * 0x100000000ULL + g_work.target[6];
@ -1080,56 +1080,59 @@ static void *miner_thread(void *userdata)
} }
} }
if (memcmp(work.data, g_work.data, wcmplen)) { if (memcmp(work.data, g_work.data, wcmplen)) {
#if 0
if (opt_debug) { if (opt_debug) {
#if 0
for (int n=0; n <= (wcmplen-8); n+=8) { for (int n=0; n <= (wcmplen-8); n+=8) {
if (memcmp(work.data + n, g_work.data + n, 8)) { if (memcmp(work.data + n, g_work.data + n, 8)) {
applog(LOG_DEBUG, "job %s work updated at offset %d:", g_work.job_id, n); applog(LOG_DEBUG, "job %s work updated at offset %d:", g_work.job_id, n);
applog_hash((uint8_t*) work.data + n); applog_hash((uchar*) &work.data[n]);
applog_compare_hash((uint8_t*) g_work.data + n, (uint8_t*) work.data + n); applog_compare_hash((uchar*) &g_work.data[n], (uchar*) &work.data[n]);
} }
} }
#endif
} }
#endif
memcpy(&work, &g_work, sizeof(struct work)); memcpy(&work, &g_work, sizeof(struct work));
(*nonceptr) = (0xffffffffUL / opt_n_threads) * thr_id; // 0 if single thr (*nonceptr) = (0xffffffffUL / opt_n_threads) * thr_id; // 0 if single thr
} else } else
(*nonceptr)++; //?? (*nonceptr)++; //??
work_restart[thr_id].restart = 0;
if (opt_debug) work_restart[thr_id].restart = 0;
applog(LOG_DEBUG, "job %s %08x", g_work.job_id, (*nonceptr));
pthread_mutex_unlock(&g_work_lock); pthread_mutex_unlock(&g_work_lock);
/* adjust max_nonce to meet target scan time */ /* adjust max_nonce to meet target scan time */
if (have_stratum) if (have_stratum)
max64 = LP_SCANTIME; max64 = LP_SCANTIME;
else else
max64 = g_work_time + (have_longpoll ? LP_SCANTIME : opt_scantime) max64 = max(1, scan_time + g_work_time - time(NULL));
- time(NULL);
max64 *= (int64_t)thr_hashrates[thr_id]; max64 *= (uint32_t)thr_hashrates[thr_id];
if (max64 <= 0) { /* on start, max64 should not be 0,
/* should not be set too high, * before hashrate is computed */
else you can miss multiple nounces */ if (max64 < minmax) {
switch (opt_algo) { switch (opt_algo) {
case ALGO_BLAKECOIN: case ALGO_BLAKECOIN:
max64 = 0x3ffffffLL; minmax = 0x4000000;
break; break;
case ALGO_BLAKE: case ALGO_BLAKE:
case ALGO_DOOM: case ALGO_DOOM:
case ALGO_JACKPOT: case ALGO_JACKPOT:
case ALGO_KECCAK: case ALGO_KECCAK:
case ALGO_LUFFA_DOOM: case ALGO_LUFFA_DOOM:
max64 = 0x1ffffffLL; minmax = 0x2000000;
break; break;
default: case ALGO_S3:
max64 = 0xfffffLL; case ALGO_X11:
case ALGO_X13:
minmax = 0x400000;
break; break;
} }
max64 = max(minmax-1, max64);
} }
if (opt_debug)
applog(LOG_DEBUG, "GPU #%d: start=%08x range=%llx", device_map[thr_id], *nonceptr, max64);
start_nonce = *nonceptr; start_nonce = *nonceptr;
/* do not recompute something already scanned */ /* do not recompute something already scanned */
@ -1140,7 +1143,7 @@ static void *miner_thread(void *userdata)
} range; } range;
range.data = hashlog_get_scan_range(work.job_id); range.data = hashlog_get_scan_range(work.job_id);
if (range.data) { if (range.data && !opt_benchmark) {
bool stall = false; bool stall = false;
if (range.scanned[0] == 1 && range.scanned[1] == 0xFFFFFFFFUL) { if (range.scanned[0] == 1 && range.scanned[1] == 0xFFFFFFFFUL) {
applog(LOG_WARNING, "detected a rescan of fully scanned job!"); applog(LOG_WARNING, "detected a rescan of fully scanned job!");
@ -1168,11 +1171,14 @@ static void *miner_thread(void *userdata)
} }
} }
umax64 = (uint64_t) max64; if ((max64 + start_nonce) >= end_nonce)
if ((umax64 + start_nonce) >= end_nonce)
max_nonce = end_nonce; max_nonce = end_nonce;
else else
max_nonce = (uint32_t) umax64 + start_nonce; max_nonce = (uint32_t) (max64 + start_nonce);
/* never let small ranges at end */
if (max_nonce >= UINT32_MAX - 256)
max_nonce = UINT32_MAX;
work.scanned_from = start_nonce; work.scanned_from = start_nonce;
(*nonceptr) = start_nonce; (*nonceptr) = start_nonce;
@ -1343,6 +1349,19 @@ continue_scan:
pthread_mutex_unlock(&stats_lock); pthread_mutex_unlock(&stats_lock);
} }
if (rc)
work.scanned_to = *nonceptr;
else {
work.scanned_to = max_nonce;
if (opt_debug && opt_benchmark) {
// to debug nonce ranges
applog(LOG_DEBUG, "GPU #%d: ends=%08x range=%llx", device_map[thr_id],
*nonceptr, ((*nonceptr) - start_nonce));
}
}
hashlog_remember_scan_range(work.job_id, work.scanned_from, work.scanned_to);
/* output */ /* output */
if (!opt_quiet && loopcnt) { if (!opt_quiet && loopcnt) {
sprintf(s, thr_hashrates[thr_id] >= 1e6 ? "%.0f" : "%.2f", sprintf(s, thr_hashrates[thr_id] >= 1e6 ? "%.0f" : "%.2f",
@ -1365,18 +1384,9 @@ continue_scan:
global_hashrate = llround(hashrate); global_hashrate = llround(hashrate);
} }
if (rc) {
work.scanned_to = *nonceptr;
} else {
work.scanned_to = max_nonce;
}
// could be used to store speeds too..
hashlog_remember_scan_range(work.job_id, work.scanned_from, work.scanned_to);
/* if nonce found, submit work */ /* if nonce found, submit work */
if (rc) { if (rc && !opt_benchmark) {
if (!opt_benchmark && !submit_work(mythr, &work)) if (!submit_work(mythr, &work))
break; break;
} }

4
ccminer.vcxproj

@ -172,7 +172,7 @@
<MaxRegCount>80</MaxRegCount> <MaxRegCount>80</MaxRegCount>
<PtxAsOptionV>true</PtxAsOptionV> <PtxAsOptionV>true</PtxAsOptionV>
<Keep>false</Keep> <Keep>false</Keep>
<CodeGeneration>compute_50,sm_50</CodeGeneration> <CodeGeneration>compute_30,sm_30;compute_35,sm_35;compute_50,sm_50;compute_52,sm_52</CodeGeneration>
<AdditionalOptions>--ptxas-options="-O2" %(AdditionalOptions)</AdditionalOptions> <AdditionalOptions>--ptxas-options="-O2" %(AdditionalOptions)</AdditionalOptions>
<Defines> <Defines>
</Defines> </Defines>
@ -473,4 +473,4 @@
<Target Name="AfterClean"> <Target Name="AfterClean">
<Delete Files="@(FilesToCopy->'$(OutDir)%(Filename)%(Extension)')" TreatErrorsAsWarnings="true" /> <Delete Files="@(FilesToCopy->'$(OutDir)%(Filename)%(Extension)')" TreatErrorsAsWarnings="true" />
</Target> </Target>
</Project> </Project>

16
stats.cpp

@ -27,8 +27,8 @@ extern int device_map[8];
*/ */
extern "C" void stats_remember_speed(int thr_id, uint32_t hashcount, double hashrate, uint8_t found) extern "C" void stats_remember_speed(int thr_id, uint32_t hashcount, double hashrate, uint8_t found)
{ {
uint64_t thr = (0xff & thr_id); uint64_t gpu = device_map[thr_id];
uint64_t key = (thr << 56) + (uid++ % UINT_MAX); uint64_t key = (gpu << 56) + (uid++ % UINT_MAX);
stats_data data; stats_data data;
// to enough hashes to give right stats // to enough hashes to give right stats
if (hashcount < 1000 || hashrate < 0.01) if (hashcount < 1000 || hashrate < 0.01)
@ -39,8 +39,8 @@ extern "C" void stats_remember_speed(int thr_id, uint32_t hashcount, double hash
return; return;
memset(&data, 0, sizeof(data)); memset(&data, 0, sizeof(data));
data.gpu_id = device_map[thr_id]; data.gpu_id = (uint8_t)gpu;
data.thr_id = (uint8_t)thr; data.thr_id = (uint8_t)thr_id;
data.tm_stat = (uint32_t) time(NULL); data.tm_stat = (uint32_t) time(NULL);
data.hashcount = hashcount; data.hashcount = hashcount;
data.hashfound = found; data.hashfound = found;
@ -61,8 +61,8 @@ extern "C" void stats_remember_speed(int thr_id, uint32_t hashcount, double hash
*/ */
extern "C" double stats_get_speed(int thr_id, double def_speed) extern "C" double stats_get_speed(int thr_id, double def_speed)
{ {
uint64_t thr = (0xff & thr_id); uint64_t gpu = device_map[thr_id];
uint64_t keypfx = (thr << 56); uint64_t keypfx = (gpu << 56);
uint64_t keymsk = (0xffULL << 56); uint64_t keymsk = (0xffULL << 56);
double speed = 0.0; double speed = 0.0;
int records = 0; int records = 0;
@ -93,8 +93,8 @@ extern "C" double stats_get_speed(int thr_id, double def_speed)
extern "C" int stats_get_history(int thr_id, struct stats_data *data, int max_records) extern "C" int stats_get_history(int thr_id, struct stats_data *data, int max_records)
{ {
uint64_t thr = (0xff & thr_id); uint64_t gpu = device_map[thr_id];
uint64_t keypfx = (thr << 56); uint64_t keypfx = (gpu << 56);
uint64_t keymsk = (0xffULL << 56); uint64_t keymsk = (0xffULL << 56);
double speed = 0.0; double speed = 0.0;
int records = 0; int records = 0;

4
x11/x11.cu

@ -135,13 +135,13 @@ extern "C" int scanhash_x11(int thr_id, uint32_t *pdata,
unsigned long *hashes_done) unsigned long *hashes_done)
{ {
const uint32_t first_nonce = pdata[19]; const uint32_t first_nonce = pdata[19];
static bool init[8] = {0,0,0,0,0,0,0,0}; static bool init[8] = { 0 };
int intensity = (device_sm[device_map[thr_id]] >= 500) ? 20 : 19; int intensity = (device_sm[device_map[thr_id]] >= 500) ? 20 : 19;
int throughput = opt_work_size ? opt_work_size : (1 << intensity); // 20=256*256*16; int throughput = opt_work_size ? opt_work_size : (1 << intensity); // 20=256*256*16;
throughput = min(throughput, max_nonce - first_nonce); throughput = min(throughput, max_nonce - first_nonce);
if (opt_benchmark) if (opt_benchmark)
((uint32_t*)ptarget)[7] = 0x0000f; ((uint32_t*)ptarget)[7] = 0x000f;
if (!init[thr_id]) if (!init[thr_id])
{ {

22
x13/x13.cu

@ -154,20 +154,17 @@ extern "C" int scanhash_x13(int thr_id, uint32_t *pdata,
unsigned long *hashes_done) unsigned long *hashes_done)
{ {
const uint32_t first_nonce = pdata[19]; const uint32_t first_nonce = pdata[19];
static bool init[8] = { 0 };
if (opt_benchmark)
((uint32_t*)ptarget)[7] = 0x0000ff;
const uint32_t Htarg = ptarget[7];
int throughput = opt_work_size ? opt_work_size : (1 << 19); // 256*256*8; int throughput = opt_work_size ? opt_work_size : (1 << 19); // 256*256*8;
throughput = min(throughput, max_nonce - first_nonce); throughput = min(throughput, max_nonce - first_nonce);
static bool init[8] = {0,0,0,0,0,0,0,0}; if (opt_benchmark)
((uint32_t*)ptarget)[7] = 0x000f;
if (!init[thr_id]) if (!init[thr_id])
{ {
CUDA_SAFE_CALL(cudaSetDevice(device_map[thr_id])); CUDA_SAFE_CALL(cudaSetDevice(device_map[thr_id]));
CUDA_SAFE_CALL(cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput)); CUDA_SAFE_CALL(cudaMalloc(&d_hash[thr_id], 2 * 32 * throughput));
quark_blake512_cpu_init(thr_id, throughput); quark_blake512_cpu_init(thr_id, throughput);
quark_groestl512_cpu_init(thr_id, throughput); quark_groestl512_cpu_init(thr_id, throughput);
@ -216,11 +213,12 @@ extern "C" int scanhash_x13(int thr_id, uint32_t *pdata,
foundNonce = cuda_check_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); foundNonce = cuda_check_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
if (foundNonce != 0xffffffff) if (foundNonce != 0xffffffff)
{ {
const uint32_t Htarg = ptarget[7];
uint32_t vhash64[8]; uint32_t vhash64[8];
be32enc(&endiandata[19], foundNonce); be32enc(&endiandata[19], foundNonce);
x13hash(vhash64, endiandata); x13hash(vhash64, endiandata);
if( (vhash64[7]<=Htarg) && fulltest(vhash64, ptarget) ) { if (vhash64[7] <= Htarg && fulltest(vhash64, ptarget) ) {
pdata[19] = foundNonce; pdata[19] = foundNonce;
*hashes_done = foundNonce - first_nonce + 1; *hashes_done = foundNonce - first_nonce + 1;
return 1; return 1;
@ -233,9 +231,13 @@ extern "C" int scanhash_x13(int thr_id, uint32_t *pdata,
} }
} }
if ((uint64_t)pdata[19] + throughput > (uint64_t)max_nonce) {
pdata[19] = max_nonce;
break;
}
pdata[19] += throughput; pdata[19] += throughput;
} while (pdata[19] < max_nonce && !work_restart[thr_id].restart); } while (!work_restart[thr_id].restart);
*hashes_done = pdata[19] - first_nonce + 1; *hashes_done = pdata[19] - first_nonce + 1;
return 0; return 0;

5
x15/x14.cu

@ -165,14 +165,13 @@ extern "C" int scanhash_x14(int thr_id, uint32_t *pdata,
unsigned long *hashes_done) unsigned long *hashes_done)
{ {
const uint32_t first_nonce = pdata[19]; const uint32_t first_nonce = pdata[19];
static bool init[8] = {0,0,0,0,0,0,0,0}; static bool init[8] = { 0 };
uint32_t endiandata[20]; uint32_t endiandata[20];
int throughput = opt_work_size ? opt_work_size : (1 << 19); // 256*256*8; int throughput = opt_work_size ? opt_work_size : (1 << 19); // 256*256*8;
throughput = min(throughput, max_nonce - first_nonce); throughput = min(throughput, max_nonce - first_nonce);
if (opt_benchmark) if (opt_benchmark)
((uint32_t*)ptarget)[7] = 0xff; ((uint32_t*)ptarget)[7] = 0x000f;
if (!init[thr_id]) if (!init[thr_id])
{ {

2
x15/x15.cu

@ -174,7 +174,7 @@ extern "C" int scanhash_x15(int thr_id, uint32_t *pdata,
unsigned long *hashes_done) unsigned long *hashes_done)
{ {
const uint32_t first_nonce = pdata[19]; const uint32_t first_nonce = pdata[19];
static bool init[8] = {0,0,0,0,0,0,0,0}; static bool init[8] = { 0 };
uint32_t endiandata[20]; uint32_t endiandata[20];
int throughput = opt_work_size ? opt_work_size : (1 << 19); // 256*256*8; int throughput = opt_work_size ? opt_work_size : (1 << 19); // 256*256*8;

Loading…
Cancel
Save