1
0
mirror of https://github.com/GOSTSec/sgminer synced 2025-01-22 20:44:19 +00:00

Make poclbm use 4 vectors and decrease worksize to keep pipelines fullish.

Make it possible to have 0 CPU threads and update docs.
Fix counter with no cpu threads.
This commit is contained in:
Con Kolivas 2011-06-22 23:07:30 +10:00
parent b4d2733cfc
commit f54d2cc0ed
2 changed files with 99 additions and 43 deletions

View File

@ -207,7 +207,7 @@ static struct option_help options_help[] = {
#endif #endif
{ "threads N", { "threads N",
"(-t N) Number of miner threads (default: 1)" }, "(-t N) Number of miner CPU threads (default: number of processors)" },
{ "url URL", { "url URL",
"URL for bitcoin JSON-RPC server " "URL for bitcoin JSON-RPC server "
@ -500,22 +500,21 @@ static void hashmeter(int thr_id, struct timeval *diff,
{ {
struct timeval temp_tv_end, total_diff; struct timeval temp_tv_end, total_diff;
double khashes, secs; double khashes, secs;
double total_mhashes, total_secs;
/* Don't bother calculating anything if we're not displaying it */ /* Don't bother calculating anything if we're not displaying it */
if (opt_quiet) if (opt_quiet)
return; return;
khashes = hashes_done / 1000.0; khashes = hashes_done / 1000.0;
secs = (double)diff->tv_sec + ((double)diff->tv_usec / 1000000.0); secs = (double)diff->tv_sec + ((double)diff->tv_usec / 1000000.0);
if (opt_debug)
applog(LOG_DEBUG, "[thread %d: %lu hashes, %.0f khash/sec]",
thr_id, hashes_done, hashes_done / secs);
gettimeofday(&temp_tv_end, NULL);
timeval_subtract(&total_diff, &temp_tv_end, &total_tv_end);
if (opt_n_threads + nDevs > 1) { if (opt_n_threads + nDevs > 1) {
double total_mhashes, total_secs;
if (opt_debug)
applog(LOG_DEBUG, "[thread %d: %lu hashes, %.0f khash/sec]",
thr_id, hashes_done, hashes_done / secs);
gettimeofday(&temp_tv_end, NULL);
timeval_subtract(&total_diff, &temp_tv_end, &total_tv_end);
/* Totals are updated by all threads so can race without locking */ /* Totals are updated by all threads so can race without locking */
pthread_mutex_lock(&hash_lock); pthread_mutex_lock(&hash_lock);
total_hashes_done += hashes_done; total_hashes_done += hashes_done;
@ -533,10 +532,19 @@ static void hashmeter(int thr_id, struct timeval *diff,
applog(LOG_INFO, "[%.2f Mhash/sec] [%d Accepted] [%d Rejected]", applog(LOG_INFO, "[%.2f Mhash/sec] [%d Accepted] [%d Rejected]",
total_mhashes / total_secs, accepted, rejected); total_mhashes / total_secs, accepted, rejected);
} else { } else {
if (opt_debug) total_hashes_done += hashes_done;
applog(LOG_DEBUG, "[%lu hashes]", hashes_done); if (total_diff.tv_sec < 5) {
applog(LOG_INFO, "%.0f khash/sec] [%d Accepted] [%d Rejected]", /* Only update the total every 5 seconds */
khashes / secs, accepted, rejected); pthread_mutex_unlock(&hash_lock);
return;
}
gettimeofday(&total_tv_end, NULL);
timeval_subtract(&total_diff, &total_tv_end, &total_tv_start);
total_mhashes = total_hashes_done / 1000000.0;
total_secs = (double)total_diff.tv_sec +
((double)total_diff.tv_usec / 1000000.0);
applog(LOG_INFO, "[%.2f Mhash/sec] [%d Accepted] [%d Rejected]",
total_mhashes / total_secs, accepted, rejected);
} }
} }
@ -608,6 +616,11 @@ bool submit_nonce(struct thr_info *thr, struct work *work, uint32_t nonce)
return submit_work(thr, work); return submit_work(thr, work);
} }
static inline int cpu_from_thr_id(int thr_id)
{
return (thr_id - nDevs) % num_processors;
}
static void *miner_thread(void *userdata) static void *miner_thread(void *userdata)
{ {
struct thr_info *mythr = userdata; struct thr_info *mythr = userdata;
@ -718,7 +731,7 @@ static void *miner_thread(void *userdata)
/* if nonce found, submit work */ /* if nonce found, submit work */
if (unlikely(rc)) { if (unlikely(rc)) {
applog(LOG_INFO, "CPU found something?"); applog(LOG_INFO, "CPU %d found something?", cpu_from_thr_id(thr_id));
if (!submit_work(mythr, &work)) if (!submit_work(mythr, &work))
break; break;
} }
@ -772,6 +785,11 @@ static inline cl_int queue_kernel_parameters(dev_blk_ctx *blk, cl_kernel *kernel
return status; return status;
} }
static inline int gpu_from_thr_id(int thr_id)
{
return thr_id;
}
static void *gpuminer_thread(void *userdata) static void *gpuminer_thread(void *userdata)
{ {
struct thr_info *mythr = userdata; struct thr_info *mythr = userdata;
@ -799,15 +817,19 @@ static void *gpuminer_thread(void *userdata)
struct work *work = malloc(sizeof(struct work)); struct work *work = malloc(sizeof(struct work));
bool need_work = true; bool need_work = true;
unsigned int threads = 1 << 22; unsigned int threads = 1 << 21;
unsigned int vectors = 4;
unsigned int hashes_done = threads * vectors;
gettimeofday(&tv_start, NULL); gettimeofday(&tv_start, NULL);
globalThreads[0] = threads; globalThreads[0] = threads;
localThreads[0] = 128; localThreads[0] = 64;
while (1) { while (1) {
struct timeval tv_end, diff; struct timeval tv_end, diff;
int i; unsigned int i;
clFinish(clState->commandQueue);
if (need_work) { if (need_work) {
/* obtain new work from internal workio thread */ /* obtain new work from internal workio thread */
@ -821,7 +843,7 @@ static void *gpuminer_thread(void *userdata)
work->blk.nonce = 0; work->blk.nonce = 0;
status = queue_kernel_parameters(&work->blk, kernel, clState->outputBuffer); status = queue_kernel_parameters(&work->blk, kernel, clState->outputBuffer);
if (unlikely(status != CL_SUCCESS)) if (unlikely(status != CL_SUCCESS))
{ applog(LOG_ERR, "Error: clSetKernelArg failed."); exit (1); } { applog(LOG_ERR, "Error: clSetKernelArg of all params failed."); exit (1); }
work_restart[thr_id].restart = 0; work_restart[thr_id].restart = 0;
need_work = false; need_work = false;
@ -829,8 +851,11 @@ static void *gpuminer_thread(void *userdata)
if (opt_debug) if (opt_debug)
applog(LOG_DEBUG, "getwork"); applog(LOG_DEBUG, "getwork");
} else {
status = clSetKernelArg(*kernel, 14, sizeof(uint), (void *)&work->blk.nonce);
if (unlikely(status != CL_SUCCESS))
{ applog(LOG_ERR, "Error: clSetKernelArg of nonce failed."); goto out; }
} }
clFinish(clState->commandQueue);
status = clEnqueueNDRangeKernel(clState->commandQueue, *kernel, 1, NULL, status = clEnqueueNDRangeKernel(clState->commandQueue, *kernel, 1, NULL,
globalThreads, localThreads, 0, NULL, NULL); globalThreads, localThreads, 0, NULL, NULL);
@ -846,7 +871,7 @@ static void *gpuminer_thread(void *userdata)
{ applog(LOG_ERR, "Error: clEnqueueWriteBuffer failed."); goto out; } { applog(LOG_ERR, "Error: clEnqueueWriteBuffer failed."); goto out; }
for (i = 0; i < 127; i++) { for (i = 0; i < 127; i++) {
if (res[i]) { if (res[i]) {
applog(LOG_INFO, "GPU Found something?"); applog(LOG_INFO, "GPU %d found something?", gpu_from_thr_id(thr_id));
postcalc_hash(mythr, &work->blk, work, res[i]); postcalc_hash(mythr, &work->blk, work, res[i]);
} else } else
break; break;
@ -861,19 +886,14 @@ static void *gpuminer_thread(void *userdata)
gettimeofday(&tv_end, NULL); gettimeofday(&tv_end, NULL);
timeval_subtract(&diff, &tv_end, &tv_start); timeval_subtract(&diff, &tv_end, &tv_start);
hashmeter(thr_id, &diff, threads); hashmeter(thr_id, &diff, hashes_done);
gettimeofday(&tv_start, NULL); gettimeofday(&tv_start, NULL);
work->blk.nonce += threads; work->blk.nonce += hashes_done;
if (unlikely(work->blk.nonce > MAXTHREADS - threads) || if (unlikely(work->blk.nonce > MAXTHREADS - hashes_done) ||
(work_restart[thr_id].restart)) (work_restart[thr_id].restart))
need_work = true; need_work = true;
clFinish(clState->commandQueue);
status = clSetKernelArg(*kernel, 14, sizeof(uint), (void *)&work->blk.nonce);
if (unlikely(status != CL_SUCCESS))
{ applog(LOG_ERR, "Error: clSetKernelArg failed."); goto out; }
} }
out: out:
tq_freeze(mythr->q); tq_freeze(mythr->q);
@ -982,6 +1002,15 @@ static void parse_arg (int key, char *arg)
{ {
int v, i; int v, i;
#ifdef WIN32
if (!opt_n_threads)
opt_n_threads = 1;
#else
num_processors = sysconf(_SC_NPROCESSORS_ONLN);
if (!opt_n_threads)
opt_n_threads = num_processors;
#endif /* !WIN32 */
switch(key) { switch(key) {
case 'a': case 'a':
for (i = 0; i < ARRAY_SIZE(algo_names); i++) { for (i = 0; i < ARRAY_SIZE(algo_names); i++) {
@ -1041,7 +1070,7 @@ static void parse_arg (int key, char *arg)
break; break;
case 't': case 't':
v = atoi(arg); v = atoi(arg);
if (v < 1 || v > 9999) /* sanity check */ if (v < 0 || v > 9999) /* sanity check */
show_usage(); show_usage();
opt_n_threads = v; opt_n_threads = v;
@ -1074,15 +1103,6 @@ static void parse_arg (int key, char *arg)
default: default:
show_usage(); show_usage();
} }
#ifdef WIN32
if (!opt_n_threads)
opt_n_threads = 1;
#else
num_processors = sysconf(_SC_NPROCESSORS_ONLN);
if (!opt_n_threads)
opt_n_threads = num_processors;
#endif /* !WIN32 */
} }
static void parse_config(void) static void parse_config(void)

View File

@ -1,8 +1,10 @@
// This file is taken and modified from the public-domain poclbm project, and // This file is taken and modified from the public-domain poclbm project, and
// we have therefore decided to keep it public-domain in Phoenix. // we have therefore decided to keep it public-domain in Phoenix.
#define VECTORS
#ifdef VECTORS #ifdef VECTORS
typedef uint2 u; typedef uint4 u;
#else #else
typedef uint u; typedef uint u;
#endif #endif
@ -35,6 +37,8 @@ __constant uint K[64] = {
// detected, use it for Ch. Otherwise, construct Ch out of simpler logical // detected, use it for Ch. Otherwise, construct Ch out of simpler logical
// primitives. // primitives.
#define BFI_INT
#ifdef BFI_INT #ifdef BFI_INT
// Well, slight problem... It turns out BFI_INT isn't actually exposed to // Well, slight problem... It turns out BFI_INT isn't actually exposed to
// OpenCL (or CAL IL for that matter) in any way. However, there is // OpenCL (or CAL IL for that matter) in any way. However, there is
@ -72,7 +76,7 @@ __kernel void search( const uint state0, const uint state1, const uint state2, c
uint it; uint it;
#ifdef VECTORS #ifdef VECTORS
nonce = ((base + get_global_id(0))<<1) + (uint2)(0, 1); nonce = ((base >> 2) + (get_global_id(0))<<2) + (uint4)(0, 1, 2, 3);
#else #else
nonce = base + get_global_id(0); nonce = base + get_global_id(0);
#endif #endif
@ -302,11 +306,43 @@ __kernel void search( const uint state0, const uint state1, const uint state2, c
#ifdef VECTORS #ifdef VECTORS
if (H.x == 0) if (H.x == 0)
{ {
output[OUTPUT_SIZE] = output[nonce.x & OUTPUT_MASK] = nonce.x; for (it = 0; it != 127; it++) {
if (!output[it]) {
output[it] = nonce.x;
output[127] = 1;
break;
}
}
} }
else if (H.y == 0) if (H.y == 0)
{ {
output[OUTPUT_SIZE] = output[nonce.y & OUTPUT_MASK] = nonce.y; for (it = 0; it != 127; it++) {
if (!output[it]) {
output[it] = nonce.y;
output[127] = 1;
break;
}
}
}
if (H.z == 0)
{
for (it = 0; it != 127; it++) {
if (!output[it]) {
output[it] = nonce.z;
output[127] = 1;
break;
}
}
}
if (H.w == 0)
{
for (it = 0; it != 127; it++) {
if (!output[it]) {
output[it] = nonce.w;
output[127] = 1;
break;
}
}
} }
#else #else
if (H == 0) if (H == 0)