mirror of
https://github.com/GOSTSec/sgminer
synced 2025-01-22 20:44:19 +00:00
Make poclbm use 4 vectors and decrease worksize to keep pipelines fullish.
Make it possible to have 0 CPU threads and update docs. Fix counter with no cpu threads.
This commit is contained in:
parent
b4d2733cfc
commit
f54d2cc0ed
96
cpu-miner.c
96
cpu-miner.c
@ -207,7 +207,7 @@ static struct option_help options_help[] = {
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
{ "threads N",
|
{ "threads N",
|
||||||
"(-t N) Number of miner threads (default: 1)" },
|
"(-t N) Number of miner CPU threads (default: number of processors)" },
|
||||||
|
|
||||||
{ "url URL",
|
{ "url URL",
|
||||||
"URL for bitcoin JSON-RPC server "
|
"URL for bitcoin JSON-RPC server "
|
||||||
@ -500,22 +500,21 @@ static void hashmeter(int thr_id, struct timeval *diff,
|
|||||||
{
|
{
|
||||||
struct timeval temp_tv_end, total_diff;
|
struct timeval temp_tv_end, total_diff;
|
||||||
double khashes, secs;
|
double khashes, secs;
|
||||||
|
double total_mhashes, total_secs;
|
||||||
|
|
||||||
/* Don't bother calculating anything if we're not displaying it */
|
/* Don't bother calculating anything if we're not displaying it */
|
||||||
if (opt_quiet)
|
if (opt_quiet)
|
||||||
return;
|
return;
|
||||||
khashes = hashes_done / 1000.0;
|
khashes = hashes_done / 1000.0;
|
||||||
secs = (double)diff->tv_sec + ((double)diff->tv_usec / 1000000.0);
|
secs = (double)diff->tv_sec + ((double)diff->tv_usec / 1000000.0);
|
||||||
|
if (opt_debug)
|
||||||
|
applog(LOG_DEBUG, "[thread %d: %lu hashes, %.0f khash/sec]",
|
||||||
|
thr_id, hashes_done, hashes_done / secs);
|
||||||
|
gettimeofday(&temp_tv_end, NULL);
|
||||||
|
timeval_subtract(&total_diff, &temp_tv_end, &total_tv_end);
|
||||||
|
|
||||||
|
|
||||||
if (opt_n_threads + nDevs > 1) {
|
if (opt_n_threads + nDevs > 1) {
|
||||||
double total_mhashes, total_secs;
|
|
||||||
|
|
||||||
if (opt_debug)
|
|
||||||
applog(LOG_DEBUG, "[thread %d: %lu hashes, %.0f khash/sec]",
|
|
||||||
thr_id, hashes_done, hashes_done / secs);
|
|
||||||
gettimeofday(&temp_tv_end, NULL);
|
|
||||||
timeval_subtract(&total_diff, &temp_tv_end, &total_tv_end);
|
|
||||||
|
|
||||||
/* Totals are updated by all threads so can race without locking */
|
/* Totals are updated by all threads so can race without locking */
|
||||||
pthread_mutex_lock(&hash_lock);
|
pthread_mutex_lock(&hash_lock);
|
||||||
total_hashes_done += hashes_done;
|
total_hashes_done += hashes_done;
|
||||||
@ -533,10 +532,19 @@ static void hashmeter(int thr_id, struct timeval *diff,
|
|||||||
applog(LOG_INFO, "[%.2f Mhash/sec] [%d Accepted] [%d Rejected]",
|
applog(LOG_INFO, "[%.2f Mhash/sec] [%d Accepted] [%d Rejected]",
|
||||||
total_mhashes / total_secs, accepted, rejected);
|
total_mhashes / total_secs, accepted, rejected);
|
||||||
} else {
|
} else {
|
||||||
if (opt_debug)
|
total_hashes_done += hashes_done;
|
||||||
applog(LOG_DEBUG, "[%lu hashes]", hashes_done);
|
if (total_diff.tv_sec < 5) {
|
||||||
applog(LOG_INFO, "%.0f khash/sec] [%d Accepted] [%d Rejected]",
|
/* Only update the total every 5 seconds */
|
||||||
khashes / secs, accepted, rejected);
|
pthread_mutex_unlock(&hash_lock);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
gettimeofday(&total_tv_end, NULL);
|
||||||
|
timeval_subtract(&total_diff, &total_tv_end, &total_tv_start);
|
||||||
|
total_mhashes = total_hashes_done / 1000000.0;
|
||||||
|
total_secs = (double)total_diff.tv_sec +
|
||||||
|
((double)total_diff.tv_usec / 1000000.0);
|
||||||
|
applog(LOG_INFO, "[%.2f Mhash/sec] [%d Accepted] [%d Rejected]",
|
||||||
|
total_mhashes / total_secs, accepted, rejected);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -608,6 +616,11 @@ bool submit_nonce(struct thr_info *thr, struct work *work, uint32_t nonce)
|
|||||||
return submit_work(thr, work);
|
return submit_work(thr, work);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline int cpu_from_thr_id(int thr_id)
|
||||||
|
{
|
||||||
|
return (thr_id - nDevs) % num_processors;
|
||||||
|
}
|
||||||
|
|
||||||
static void *miner_thread(void *userdata)
|
static void *miner_thread(void *userdata)
|
||||||
{
|
{
|
||||||
struct thr_info *mythr = userdata;
|
struct thr_info *mythr = userdata;
|
||||||
@ -718,7 +731,7 @@ static void *miner_thread(void *userdata)
|
|||||||
|
|
||||||
/* if nonce found, submit work */
|
/* if nonce found, submit work */
|
||||||
if (unlikely(rc)) {
|
if (unlikely(rc)) {
|
||||||
applog(LOG_INFO, "CPU found something?");
|
applog(LOG_INFO, "CPU %d found something?", cpu_from_thr_id(thr_id));
|
||||||
if (!submit_work(mythr, &work))
|
if (!submit_work(mythr, &work))
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@ -772,6 +785,11 @@ static inline cl_int queue_kernel_parameters(dev_blk_ctx *blk, cl_kernel *kernel
|
|||||||
return status;
|
return status;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline int gpu_from_thr_id(int thr_id)
|
||||||
|
{
|
||||||
|
return thr_id;
|
||||||
|
}
|
||||||
|
|
||||||
static void *gpuminer_thread(void *userdata)
|
static void *gpuminer_thread(void *userdata)
|
||||||
{
|
{
|
||||||
struct thr_info *mythr = userdata;
|
struct thr_info *mythr = userdata;
|
||||||
@ -799,15 +817,19 @@ static void *gpuminer_thread(void *userdata)
|
|||||||
|
|
||||||
struct work *work = malloc(sizeof(struct work));
|
struct work *work = malloc(sizeof(struct work));
|
||||||
bool need_work = true;
|
bool need_work = true;
|
||||||
unsigned int threads = 1 << 22;
|
unsigned int threads = 1 << 21;
|
||||||
|
unsigned int vectors = 4;
|
||||||
|
unsigned int hashes_done = threads * vectors;
|
||||||
|
|
||||||
gettimeofday(&tv_start, NULL);
|
gettimeofday(&tv_start, NULL);
|
||||||
globalThreads[0] = threads;
|
globalThreads[0] = threads;
|
||||||
localThreads[0] = 128;
|
localThreads[0] = 64;
|
||||||
|
|
||||||
while (1) {
|
while (1) {
|
||||||
struct timeval tv_end, diff;
|
struct timeval tv_end, diff;
|
||||||
int i;
|
unsigned int i;
|
||||||
|
|
||||||
|
clFinish(clState->commandQueue);
|
||||||
|
|
||||||
if (need_work) {
|
if (need_work) {
|
||||||
/* obtain new work from internal workio thread */
|
/* obtain new work from internal workio thread */
|
||||||
@ -821,7 +843,7 @@ static void *gpuminer_thread(void *userdata)
|
|||||||
work->blk.nonce = 0;
|
work->blk.nonce = 0;
|
||||||
status = queue_kernel_parameters(&work->blk, kernel, clState->outputBuffer);
|
status = queue_kernel_parameters(&work->blk, kernel, clState->outputBuffer);
|
||||||
if (unlikely(status != CL_SUCCESS))
|
if (unlikely(status != CL_SUCCESS))
|
||||||
{ applog(LOG_ERR, "Error: clSetKernelArg failed."); exit (1); }
|
{ applog(LOG_ERR, "Error: clSetKernelArg of all params failed."); exit (1); }
|
||||||
|
|
||||||
work_restart[thr_id].restart = 0;
|
work_restart[thr_id].restart = 0;
|
||||||
need_work = false;
|
need_work = false;
|
||||||
@ -829,8 +851,11 @@ static void *gpuminer_thread(void *userdata)
|
|||||||
if (opt_debug)
|
if (opt_debug)
|
||||||
applog(LOG_DEBUG, "getwork");
|
applog(LOG_DEBUG, "getwork");
|
||||||
|
|
||||||
|
} else {
|
||||||
|
status = clSetKernelArg(*kernel, 14, sizeof(uint), (void *)&work->blk.nonce);
|
||||||
|
if (unlikely(status != CL_SUCCESS))
|
||||||
|
{ applog(LOG_ERR, "Error: clSetKernelArg of nonce failed."); goto out; }
|
||||||
}
|
}
|
||||||
clFinish(clState->commandQueue);
|
|
||||||
|
|
||||||
status = clEnqueueNDRangeKernel(clState->commandQueue, *kernel, 1, NULL,
|
status = clEnqueueNDRangeKernel(clState->commandQueue, *kernel, 1, NULL,
|
||||||
globalThreads, localThreads, 0, NULL, NULL);
|
globalThreads, localThreads, 0, NULL, NULL);
|
||||||
@ -846,7 +871,7 @@ static void *gpuminer_thread(void *userdata)
|
|||||||
{ applog(LOG_ERR, "Error: clEnqueueWriteBuffer failed."); goto out; }
|
{ applog(LOG_ERR, "Error: clEnqueueWriteBuffer failed."); goto out; }
|
||||||
for (i = 0; i < 127; i++) {
|
for (i = 0; i < 127; i++) {
|
||||||
if (res[i]) {
|
if (res[i]) {
|
||||||
applog(LOG_INFO, "GPU Found something?");
|
applog(LOG_INFO, "GPU %d found something?", gpu_from_thr_id(thr_id));
|
||||||
postcalc_hash(mythr, &work->blk, work, res[i]);
|
postcalc_hash(mythr, &work->blk, work, res[i]);
|
||||||
} else
|
} else
|
||||||
break;
|
break;
|
||||||
@ -861,19 +886,14 @@ static void *gpuminer_thread(void *userdata)
|
|||||||
|
|
||||||
gettimeofday(&tv_end, NULL);
|
gettimeofday(&tv_end, NULL);
|
||||||
timeval_subtract(&diff, &tv_end, &tv_start);
|
timeval_subtract(&diff, &tv_end, &tv_start);
|
||||||
hashmeter(thr_id, &diff, threads);
|
hashmeter(thr_id, &diff, hashes_done);
|
||||||
gettimeofday(&tv_start, NULL);
|
gettimeofday(&tv_start, NULL);
|
||||||
|
|
||||||
work->blk.nonce += threads;
|
work->blk.nonce += hashes_done;
|
||||||
|
|
||||||
if (unlikely(work->blk.nonce > MAXTHREADS - threads) ||
|
if (unlikely(work->blk.nonce > MAXTHREADS - hashes_done) ||
|
||||||
(work_restart[thr_id].restart))
|
(work_restart[thr_id].restart))
|
||||||
need_work = true;
|
need_work = true;
|
||||||
|
|
||||||
clFinish(clState->commandQueue);
|
|
||||||
status = clSetKernelArg(*kernel, 14, sizeof(uint), (void *)&work->blk.nonce);
|
|
||||||
if (unlikely(status != CL_SUCCESS))
|
|
||||||
{ applog(LOG_ERR, "Error: clSetKernelArg failed."); goto out; }
|
|
||||||
}
|
}
|
||||||
out:
|
out:
|
||||||
tq_freeze(mythr->q);
|
tq_freeze(mythr->q);
|
||||||
@ -982,6 +1002,15 @@ static void parse_arg (int key, char *arg)
|
|||||||
{
|
{
|
||||||
int v, i;
|
int v, i;
|
||||||
|
|
||||||
|
#ifdef WIN32
|
||||||
|
if (!opt_n_threads)
|
||||||
|
opt_n_threads = 1;
|
||||||
|
#else
|
||||||
|
num_processors = sysconf(_SC_NPROCESSORS_ONLN);
|
||||||
|
if (!opt_n_threads)
|
||||||
|
opt_n_threads = num_processors;
|
||||||
|
#endif /* !WIN32 */
|
||||||
|
|
||||||
switch(key) {
|
switch(key) {
|
||||||
case 'a':
|
case 'a':
|
||||||
for (i = 0; i < ARRAY_SIZE(algo_names); i++) {
|
for (i = 0; i < ARRAY_SIZE(algo_names); i++) {
|
||||||
@ -1041,7 +1070,7 @@ static void parse_arg (int key, char *arg)
|
|||||||
break;
|
break;
|
||||||
case 't':
|
case 't':
|
||||||
v = atoi(arg);
|
v = atoi(arg);
|
||||||
if (v < 1 || v > 9999) /* sanity check */
|
if (v < 0 || v > 9999) /* sanity check */
|
||||||
show_usage();
|
show_usage();
|
||||||
|
|
||||||
opt_n_threads = v;
|
opt_n_threads = v;
|
||||||
@ -1074,15 +1103,6 @@ static void parse_arg (int key, char *arg)
|
|||||||
default:
|
default:
|
||||||
show_usage();
|
show_usage();
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef WIN32
|
|
||||||
if (!opt_n_threads)
|
|
||||||
opt_n_threads = 1;
|
|
||||||
#else
|
|
||||||
num_processors = sysconf(_SC_NPROCESSORS_ONLN);
|
|
||||||
if (!opt_n_threads)
|
|
||||||
opt_n_threads = num_processors;
|
|
||||||
#endif /* !WIN32 */
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void parse_config(void)
|
static void parse_config(void)
|
||||||
|
46
poclbm.cl
46
poclbm.cl
@ -1,8 +1,10 @@
|
|||||||
// This file is taken and modified from the public-domain poclbm project, and
|
// This file is taken and modified from the public-domain poclbm project, and
|
||||||
// we have therefore decided to keep it public-domain in Phoenix.
|
// we have therefore decided to keep it public-domain in Phoenix.
|
||||||
|
|
||||||
|
#define VECTORS
|
||||||
|
|
||||||
#ifdef VECTORS
|
#ifdef VECTORS
|
||||||
typedef uint2 u;
|
typedef uint4 u;
|
||||||
#else
|
#else
|
||||||
typedef uint u;
|
typedef uint u;
|
||||||
#endif
|
#endif
|
||||||
@ -35,6 +37,8 @@ __constant uint K[64] = {
|
|||||||
// detected, use it for Ch. Otherwise, construct Ch out of simpler logical
|
// detected, use it for Ch. Otherwise, construct Ch out of simpler logical
|
||||||
// primitives.
|
// primitives.
|
||||||
|
|
||||||
|
#define BFI_INT
|
||||||
|
|
||||||
#ifdef BFI_INT
|
#ifdef BFI_INT
|
||||||
// Well, slight problem... It turns out BFI_INT isn't actually exposed to
|
// Well, slight problem... It turns out BFI_INT isn't actually exposed to
|
||||||
// OpenCL (or CAL IL for that matter) in any way. However, there is
|
// OpenCL (or CAL IL for that matter) in any way. However, there is
|
||||||
@ -72,7 +76,7 @@ __kernel void search( const uint state0, const uint state1, const uint state2, c
|
|||||||
uint it;
|
uint it;
|
||||||
|
|
||||||
#ifdef VECTORS
|
#ifdef VECTORS
|
||||||
nonce = ((base + get_global_id(0))<<1) + (uint2)(0, 1);
|
nonce = ((base >> 2) + (get_global_id(0))<<2) + (uint4)(0, 1, 2, 3);
|
||||||
#else
|
#else
|
||||||
nonce = base + get_global_id(0);
|
nonce = base + get_global_id(0);
|
||||||
#endif
|
#endif
|
||||||
@ -302,11 +306,43 @@ __kernel void search( const uint state0, const uint state1, const uint state2, c
|
|||||||
#ifdef VECTORS
|
#ifdef VECTORS
|
||||||
if (H.x == 0)
|
if (H.x == 0)
|
||||||
{
|
{
|
||||||
output[OUTPUT_SIZE] = output[nonce.x & OUTPUT_MASK] = nonce.x;
|
for (it = 0; it != 127; it++) {
|
||||||
|
if (!output[it]) {
|
||||||
|
output[it] = nonce.x;
|
||||||
|
output[127] = 1;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
else if (H.y == 0)
|
if (H.y == 0)
|
||||||
{
|
{
|
||||||
output[OUTPUT_SIZE] = output[nonce.y & OUTPUT_MASK] = nonce.y;
|
for (it = 0; it != 127; it++) {
|
||||||
|
if (!output[it]) {
|
||||||
|
output[it] = nonce.y;
|
||||||
|
output[127] = 1;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (H.z == 0)
|
||||||
|
{
|
||||||
|
for (it = 0; it != 127; it++) {
|
||||||
|
if (!output[it]) {
|
||||||
|
output[it] = nonce.z;
|
||||||
|
output[127] = 1;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (H.w == 0)
|
||||||
|
{
|
||||||
|
for (it = 0; it != 127; it++) {
|
||||||
|
if (!output[it]) {
|
||||||
|
output[it] = nonce.w;
|
||||||
|
output[127] = 1;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
if (H == 0)
|
if (H == 0)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user