Browse Source

Make poclbm use 4 vectors and decrease worksize to keep pipelines fullish.

Make it possible to have 0 CPU threads and update docs.
Fix counter with no cpu threads.
nfactor-troky
Con Kolivas 14 years ago
parent
commit
f54d2cc0ed
  1. 88
      cpu-miner.c
  2. 46
      poclbm.cl

88
cpu-miner.c

@ -207,7 +207,7 @@ static struct option_help options_help[] = { @@ -207,7 +207,7 @@ static struct option_help options_help[] = {
#endif
{ "threads N",
"(-t N) Number of miner threads (default: 1)" },
"(-t N) Number of miner CPU threads (default: number of processors)" },
{ "url URL",
"URL for bitcoin JSON-RPC server "
@ -500,22 +500,21 @@ static void hashmeter(int thr_id, struct timeval *diff, @@ -500,22 +500,21 @@ static void hashmeter(int thr_id, struct timeval *diff,
{
struct timeval temp_tv_end, total_diff;
double khashes, secs;
double total_mhashes, total_secs;
/* Don't bother calculating anything if we're not displaying it */
if (opt_quiet)
return;
khashes = hashes_done / 1000.0;
secs = (double)diff->tv_sec + ((double)diff->tv_usec / 1000000.0);
if (opt_n_threads + nDevs > 1) {
double total_mhashes, total_secs;
if (opt_debug)
applog(LOG_DEBUG, "[thread %d: %lu hashes, %.0f khash/sec]",
thr_id, hashes_done, hashes_done / secs);
gettimeofday(&temp_tv_end, NULL);
timeval_subtract(&total_diff, &temp_tv_end, &total_tv_end);
if (opt_n_threads + nDevs > 1) {
/* Totals are updated by all threads so can race without locking */
pthread_mutex_lock(&hash_lock);
total_hashes_done += hashes_done;
@ -533,10 +532,19 @@ static void hashmeter(int thr_id, struct timeval *diff, @@ -533,10 +532,19 @@ static void hashmeter(int thr_id, struct timeval *diff,
applog(LOG_INFO, "[%.2f Mhash/sec] [%d Accepted] [%d Rejected]",
total_mhashes / total_secs, accepted, rejected);
} else {
if (opt_debug)
applog(LOG_DEBUG, "[%lu hashes]", hashes_done);
applog(LOG_INFO, "%.0f khash/sec] [%d Accepted] [%d Rejected]",
khashes / secs, accepted, rejected);
total_hashes_done += hashes_done;
if (total_diff.tv_sec < 5) {
/* Only update the total every 5 seconds */
pthread_mutex_unlock(&hash_lock);
return;
}
gettimeofday(&total_tv_end, NULL);
timeval_subtract(&total_diff, &total_tv_end, &total_tv_start);
total_mhashes = total_hashes_done / 1000000.0;
total_secs = (double)total_diff.tv_sec +
((double)total_diff.tv_usec / 1000000.0);
applog(LOG_INFO, "[%.2f Mhash/sec] [%d Accepted] [%d Rejected]",
total_mhashes / total_secs, accepted, rejected);
}
}
@ -608,6 +616,11 @@ bool submit_nonce(struct thr_info *thr, struct work *work, uint32_t nonce) @@ -608,6 +616,11 @@ bool submit_nonce(struct thr_info *thr, struct work *work, uint32_t nonce)
return submit_work(thr, work);
}
static inline int cpu_from_thr_id(int thr_id)
{
return (thr_id - nDevs) % num_processors;
}
static void *miner_thread(void *userdata)
{
struct thr_info *mythr = userdata;
@ -718,7 +731,7 @@ static void *miner_thread(void *userdata) @@ -718,7 +731,7 @@ static void *miner_thread(void *userdata)
/* if nonce found, submit work */
if (unlikely(rc)) {
applog(LOG_INFO, "CPU found something?");
applog(LOG_INFO, "CPU %d found something?", cpu_from_thr_id(thr_id));
if (!submit_work(mythr, &work))
break;
}
@ -772,6 +785,11 @@ static inline cl_int queue_kernel_parameters(dev_blk_ctx *blk, cl_kernel *kernel @@ -772,6 +785,11 @@ static inline cl_int queue_kernel_parameters(dev_blk_ctx *blk, cl_kernel *kernel
return status;
}
static inline int gpu_from_thr_id(int thr_id)
{
return thr_id;
}
static void *gpuminer_thread(void *userdata)
{
struct thr_info *mythr = userdata;
@ -799,15 +817,19 @@ static void *gpuminer_thread(void *userdata) @@ -799,15 +817,19 @@ static void *gpuminer_thread(void *userdata)
struct work *work = malloc(sizeof(struct work));
bool need_work = true;
unsigned int threads = 1 << 22;
unsigned int threads = 1 << 21;
unsigned int vectors = 4;
unsigned int hashes_done = threads * vectors;
gettimeofday(&tv_start, NULL);
globalThreads[0] = threads;
localThreads[0] = 128;
localThreads[0] = 64;
while (1) {
struct timeval tv_end, diff;
int i;
unsigned int i;
clFinish(clState->commandQueue);
if (need_work) {
/* obtain new work from internal workio thread */
@ -821,7 +843,7 @@ static void *gpuminer_thread(void *userdata) @@ -821,7 +843,7 @@ static void *gpuminer_thread(void *userdata)
work->blk.nonce = 0;
status = queue_kernel_parameters(&work->blk, kernel, clState->outputBuffer);
if (unlikely(status != CL_SUCCESS))
{ applog(LOG_ERR, "Error: clSetKernelArg failed."); exit (1); }
{ applog(LOG_ERR, "Error: clSetKernelArg of all params failed."); exit (1); }
work_restart[thr_id].restart = 0;
need_work = false;
@ -829,8 +851,11 @@ static void *gpuminer_thread(void *userdata) @@ -829,8 +851,11 @@ static void *gpuminer_thread(void *userdata)
if (opt_debug)
applog(LOG_DEBUG, "getwork");
} else {
status = clSetKernelArg(*kernel, 14, sizeof(uint), (void *)&work->blk.nonce);
if (unlikely(status != CL_SUCCESS))
{ applog(LOG_ERR, "Error: clSetKernelArg of nonce failed."); goto out; }
}
clFinish(clState->commandQueue);
status = clEnqueueNDRangeKernel(clState->commandQueue, *kernel, 1, NULL,
globalThreads, localThreads, 0, NULL, NULL);
@ -846,7 +871,7 @@ static void *gpuminer_thread(void *userdata) @@ -846,7 +871,7 @@ static void *gpuminer_thread(void *userdata)
{ applog(LOG_ERR, "Error: clEnqueueWriteBuffer failed."); goto out; }
for (i = 0; i < 127; i++) {
if (res[i]) {
applog(LOG_INFO, "GPU Found something?");
applog(LOG_INFO, "GPU %d found something?", gpu_from_thr_id(thr_id));
postcalc_hash(mythr, &work->blk, work, res[i]);
} else
break;
@ -861,19 +886,14 @@ static void *gpuminer_thread(void *userdata) @@ -861,19 +886,14 @@ static void *gpuminer_thread(void *userdata)
gettimeofday(&tv_end, NULL);
timeval_subtract(&diff, &tv_end, &tv_start);
hashmeter(thr_id, &diff, threads);
hashmeter(thr_id, &diff, hashes_done);
gettimeofday(&tv_start, NULL);
work->blk.nonce += threads;
work->blk.nonce += hashes_done;
if (unlikely(work->blk.nonce > MAXTHREADS - threads) ||
if (unlikely(work->blk.nonce > MAXTHREADS - hashes_done) ||
(work_restart[thr_id].restart))
need_work = true;
clFinish(clState->commandQueue);
status = clSetKernelArg(*kernel, 14, sizeof(uint), (void *)&work->blk.nonce);
if (unlikely(status != CL_SUCCESS))
{ applog(LOG_ERR, "Error: clSetKernelArg failed."); goto out; }
}
out:
tq_freeze(mythr->q);
@ -982,6 +1002,15 @@ static void parse_arg (int key, char *arg) @@ -982,6 +1002,15 @@ static void parse_arg (int key, char *arg)
{
int v, i;
#ifdef WIN32
if (!opt_n_threads)
opt_n_threads = 1;
#else
num_processors = sysconf(_SC_NPROCESSORS_ONLN);
if (!opt_n_threads)
opt_n_threads = num_processors;
#endif /* !WIN32 */
switch(key) {
case 'a':
for (i = 0; i < ARRAY_SIZE(algo_names); i++) {
@ -1041,7 +1070,7 @@ static void parse_arg (int key, char *arg) @@ -1041,7 +1070,7 @@ static void parse_arg (int key, char *arg)
break;
case 't':
v = atoi(arg);
if (v < 1 || v > 9999) /* sanity check */
if (v < 0 || v > 9999) /* sanity check */
show_usage();
opt_n_threads = v;
@ -1074,15 +1103,6 @@ static void parse_arg (int key, char *arg) @@ -1074,15 +1103,6 @@ static void parse_arg (int key, char *arg)
default:
show_usage();
}
#ifdef WIN32
if (!opt_n_threads)
opt_n_threads = 1;
#else
num_processors = sysconf(_SC_NPROCESSORS_ONLN);
if (!opt_n_threads)
opt_n_threads = num_processors;
#endif /* !WIN32 */
}
static void parse_config(void)

46
poclbm.cl

@ -1,8 +1,10 @@ @@ -1,8 +1,10 @@
// This file is taken and modified from the public-domain poclbm project, and
// we have therefore decided to keep it public-domain in Phoenix.
#define VECTORS
#ifdef VECTORS
typedef uint2 u;
typedef uint4 u;
#else
typedef uint u;
#endif
@ -35,6 +37,8 @@ __constant uint K[64] = { @@ -35,6 +37,8 @@ __constant uint K[64] = {
// detected, use it for Ch. Otherwise, construct Ch out of simpler logical
// primitives.
#define BFI_INT
#ifdef BFI_INT
// Well, slight problem... It turns out BFI_INT isn't actually exposed to
// OpenCL (or CAL IL for that matter) in any way. However, there is
@ -72,7 +76,7 @@ __kernel void search( const uint state0, const uint state1, const uint state2, c @@ -72,7 +76,7 @@ __kernel void search( const uint state0, const uint state1, const uint state2, c
uint it;
#ifdef VECTORS
nonce = ((base + get_global_id(0))<<1) + (uint2)(0, 1);
nonce = ((base >> 2) + (get_global_id(0))<<2) + (uint4)(0, 1, 2, 3);
#else
nonce = base + get_global_id(0);
#endif
@ -302,11 +306,43 @@ __kernel void search( const uint state0, const uint state1, const uint state2, c @@ -302,11 +306,43 @@ __kernel void search( const uint state0, const uint state1, const uint state2, c
#ifdef VECTORS
if (H.x == 0)
{
output[OUTPUT_SIZE] = output[nonce.x & OUTPUT_MASK] = nonce.x;
for (it = 0; it != 127; it++) {
if (!output[it]) {
output[it] = nonce.x;
output[127] = 1;
break;
}
}
}
if (H.y == 0)
{
for (it = 0; it != 127; it++) {
if (!output[it]) {
output[it] = nonce.y;
output[127] = 1;
break;
}
}
}
if (H.z == 0)
{
for (it = 0; it != 127; it++) {
if (!output[it]) {
output[it] = nonce.z;
output[127] = 1;
break;
}
}
else if (H.y == 0)
}
if (H.w == 0)
{
output[OUTPUT_SIZE] = output[nonce.y & OUTPUT_MASK] = nonce.y;
for (it = 0; it != 127; it++) {
if (!output[it]) {
output[it] = nonce.w;
output[127] = 1;
break;
}
}
}
#else
if (H == 0)

Loading…
Cancel
Save