Browse Source

Optimise work loop to make cl calls asynchronous where possible.

nfactor-troky
Con Kolivas 14 years ago
parent
commit
f117675ac2
  1. 70
      cpu-miner.c
  2. 16
      findnonce.c
  3. 4
      findnonce.h

70
cpu-miner.c

@ -743,11 +743,11 @@ static void *gpuminer_thread(void *userdata)
struct thr_info *mythr = userdata; struct thr_info *mythr = userdata;
struct timeval tv_start; struct timeval tv_start;
int thr_id = mythr->id; int thr_id = mythr->id;
uint32_t res[128]; uint32_t res[128], blank_res[128];
setpriority(PRIO_PROCESS, 0, 19); setpriority(PRIO_PROCESS, 0, 19);
memset(res, 0, BUFFERSIZE); memset(blank_res, 0, BUFFERSIZE);
size_t globalThreads[1]; size_t globalThreads[1];
size_t localThreads[1]; size_t localThreads[1];
@ -765,26 +765,23 @@ static void *gpuminer_thread(void *userdata)
{ applog(LOG_ERR, "Error: Setting kernel argument 2.\n"); goto out; } { applog(LOG_ERR, "Error: Setting kernel argument 2.\n"); goto out; }
status = clEnqueueWriteBuffer(clState->commandQueue, clState->outputBuffer, CL_TRUE, 0, status = clEnqueueWriteBuffer(clState->commandQueue, clState->outputBuffer, CL_TRUE, 0,
BUFFERSIZE, res, 0, NULL, NULL); BUFFERSIZE, blank_res, 0, NULL, NULL);
if (unlikely(status != CL_SUCCESS)) if (unlikely(status != CL_SUCCESS))
{ applog(LOG_ERR, "Error: clEnqueueWriteBuffer failed."); goto out; } { applog(LOG_ERR, "Error: clEnqueueWriteBuffer failed."); goto out; }
struct work *work = malloc(sizeof(struct work)); struct work *work = malloc(sizeof(struct work));
bool need_work = true; bool need_work = true;
unsigned int threads = 1 << 22; unsigned int threads = 1 << 22;
unsigned int h0count = 0;
gettimeofday(&tv_start, NULL); gettimeofday(&tv_start, NULL);
globalThreads[0] = threads;
localThreads[0] = 128;
while (1) { while (1) {
struct timeval tv_end, diff; struct timeval tv_end, diff;
int i; int i;
if (need_work) { if (need_work) {
work_restart[thr_id].restart = 0;
if (opt_debug)
applog(LOG_DEBUG, "getwork");
/* obtain new work from internal workio thread */ /* obtain new work from internal workio thread */
if (unlikely(!get_work(mythr, work))) { if (unlikely(!get_work(mythr, work))) {
applog(LOG_ERR, "work retrieval failed, exiting " applog(LOG_ERR, "work retrieval failed, exiting "
@ -793,47 +790,48 @@ static void *gpuminer_thread(void *userdata)
} }
precalc_hash(&work->blk, (uint32_t *)(work->midstate), (uint32_t *)(work->data + 64)); precalc_hash(&work->blk, (uint32_t *)(work->midstate), (uint32_t *)(work->data + 64));
work->blk.nonce = 0; work->blk.nonce = 0;
status = clEnqueueWriteBuffer(clState->commandQueue, clState->inputBuffer, CL_FALSE, 0,
sizeof(dev_blk_ctx), (void *)&work->blk, 0, NULL, NULL);
if (unlikely(status != CL_SUCCESS))
{ applog(LOG_ERR, "Error: clEnqueueWriteBuffer failed."); goto out; }
work_restart[thr_id].restart = 0;
need_work = false; need_work = false;
}
globalThreads[0] = threads;
localThreads[0] = 128;
status = clEnqueueWriteBuffer(clState->commandQueue, clState->inputBuffer, CL_TRUE, 0, if (opt_debug)
sizeof(dev_blk_ctx), (void *)&work->blk, 0, NULL, NULL); applog(LOG_DEBUG, "getwork");
if (unlikely(status != CL_SUCCESS))
{ applog(LOG_ERR, "Error: clEnqueueWriteBuffer failed."); goto out; } }
clFinish(clState->commandQueue);
status = clEnqueueNDRangeKernel(clState->commandQueue, clState->kernel, 1, NULL, status = clEnqueueNDRangeKernel(clState->commandQueue, clState->kernel, 1, NULL,
globalThreads, localThreads, 0, NULL, NULL); globalThreads, localThreads, 0, NULL, NULL);
if (unlikely(status != CL_SUCCESS)) if (unlikely(status != CL_SUCCESS))
{ applog(LOG_ERR, "Error: Enqueueing kernel onto command queue. (clEnqueueNDRangeKernel)"); goto out; } { applog(LOG_ERR, "Error: Enqueueing kernel onto command queue. (clEnqueueNDRangeKernel)"); goto out; }
status = clEnqueueReadBuffer(clState->commandQueue, clState->outputBuffer, CL_TRUE, 0, /* 127 is used as a flag to say nonces exist */
BUFFERSIZE, res, 0, NULL, NULL);
if (unlikely(status != CL_SUCCESS))
{ applog(LOG_ERR, "Error: clEnqueueReadBuffer failed. (clEnqueueReadBuffer)"); goto out;}
if (unlikely(res[127])) { if (unlikely(res[127])) {
/* 127 is used as a flag to say nonces exist */ /* Clear the buffer again */
status = clEnqueueWriteBuffer(clState->commandQueue, clState->outputBuffer, CL_FALSE, 0,
BUFFERSIZE, blank_res, 0, NULL, NULL);
if (unlikely(status != CL_SUCCESS))
{ applog(LOG_ERR, "Error: clEnqueueWriteBuffer failed."); goto out; }
for (i = 0; i < 127; i++) { for (i = 0; i < 127; i++) {
if (res[i]) { if (res[i]) {
uint32_t start = res[i];
uint32_t my_g, my_nonce;
applog(LOG_INFO, "GPU Found something?"); applog(LOG_INFO, "GPU Found something?");
my_g = postcalc_hash(mythr, &work->blk, work, start, start + 1026, &my_nonce, &h0count); postcalc_hash(mythr, &work->blk, work, res[i]);
res[i] = 0;
} else } else
break; break;
} }
/* Clear the buffer again */ clFinish(clState->commandQueue);
status = clEnqueueWriteBuffer(clState->commandQueue, clState->outputBuffer, CL_TRUE, 0,
BUFFERSIZE, res, 0, NULL, NULL);
if (unlikely(status != CL_SUCCESS))
{ applog(LOG_ERR, "Error: clEnqueueWriteBuffer failed."); goto out; }
} }
status = clEnqueueReadBuffer(clState->commandQueue, clState->outputBuffer, CL_FALSE, 0,
BUFFERSIZE, res, 0, NULL, NULL);
if (unlikely(status != CL_SUCCESS))
{ applog(LOG_ERR, "Error: clEnqueueReadBuffer failed. (clEnqueueReadBuffer)"); goto out;}
gettimeofday(&tv_end, NULL); gettimeofday(&tv_end, NULL);
timeval_subtract(&diff, &tv_end, &tv_start); timeval_subtract(&diff, &tv_end, &tv_start);
hashmeter(thr_id, &diff, threads); hashmeter(thr_id, &diff, threads);
@ -844,6 +842,14 @@ static void *gpuminer_thread(void *userdata)
if (unlikely(work->blk.nonce > MAXTHREADS - threads) || if (unlikely(work->blk.nonce > MAXTHREADS - threads) ||
(work_restart[thr_id].restart)) (work_restart[thr_id].restart))
need_work = true; need_work = true;
clFinish(clState->commandQueue);
status = clEnqueueWriteBuffer(clState->commandQueue, clState->inputBuffer, CL_FALSE, 0,
sizeof(dev_blk_ctx), (void *)&work->blk, 0, NULL, NULL);
if (unlikely(status != CL_SUCCESS))
{ applog(LOG_ERR, "Error: clEnqueueWriteBuffer failed."); goto out; }
} }
out: out:
tq_freeze(mythr->q); tq_freeze(mythr->q);

16
findnonce.c

@ -131,14 +131,13 @@ void precalc_hash(dev_blk_ctx *blk, uint32_t *state, uint32_t *data) {
R(E, F, G, H, A, B, C, D, P(u+4), SHA256_K[u+4]); \ R(E, F, G, H, A, B, C, D, P(u+4), SHA256_K[u+4]); \
R(D, E, F, G, H, A, B, C, P(u+5), SHA256_K[u+5]) R(D, E, F, G, H, A, B, C, P(u+5), SHA256_K[u+5])
uint32_t postcalc_hash(struct thr_info *thr, dev_blk_ctx *blk, void postcalc_hash(struct thr_info *thr, dev_blk_ctx *blk, struct work *work, uint32_t start)
struct work *work, uint32_t start, uint32_t end,
uint32_t *best_nonce, unsigned int *h0count)
{ {
cl_uint A, B, C, D, E, F, G, H; cl_uint A, B, C, D, E, F, G, H;
cl_uint W[16]; cl_uint W[16];
cl_uint nonce; cl_uint nonce;
cl_uint best_g = ~0; cl_uint best_g = ~0;
uint32_t end = start + 1026;
for (nonce = start; nonce != end; nonce+=1) { for (nonce = start; nonce != end; nonce+=1) {
A = blk->cty_a; B = blk->cty_b; A = blk->cty_a; B = blk->cty_b;
@ -171,8 +170,6 @@ uint32_t postcalc_hash(struct thr_info *thr, dev_blk_ctx *blk,
FR(48); PFR(56); FR(48); PFR(56);
if (unlikely(H == 0xA41F32E7)) { if (unlikely(H == 0xA41F32E7)) {
(*h0count)++;
if (unlikely(submit_nonce(thr, work, nonce) == false)) { if (unlikely(submit_nonce(thr, work, nonce) == false)) {
applog(LOG_ERR, "Failed to submit work, exiting"); applog(LOG_ERR, "Failed to submit work, exiting");
goto out; goto out;
@ -181,14 +178,11 @@ uint32_t postcalc_hash(struct thr_info *thr, dev_blk_ctx *blk,
G += 0x1f83d9ab; G += 0x1f83d9ab;
G = ByteReverse(G); G = ByteReverse(G);
if (G < best_g) { if (G < best_g)
*best_nonce = nonce;
best_g = G; best_g = G;
}
} }
} }
out: out:
// if (unlikely(best_g == ~0)) applog(LOG_ERR, "No best_g found! Error in OpenCL code?"); if (unlikely(best_g == ~0))
applog(LOG_ERR, "No best_g found! Error in OpenCL code?");
return best_g;
} }

4
findnonce.h

@ -19,6 +19,4 @@ typedef struct {
} dev_blk_ctx; } dev_blk_ctx;
extern void precalc_hash(dev_blk_ctx *blk, uint32_t *state, uint32_t *data); extern void precalc_hash(dev_blk_ctx *blk, uint32_t *state, uint32_t *data);
extern uint32_t postcalc_hash(struct thr_info *thr, dev_blk_ctx *blk, extern void postcalc_hash(struct thr_info *thr, dev_blk_ctx *blk, struct work *work, uint32_t start);
struct work *work, uint32_t start, uint32_t end,
uint32_t *best_nonce, unsigned int *h0count);

Loading…
Cancel
Save