Browse Source

Optimise work loop to make cl calls asynchronous where possible.

nfactor-troky
Con Kolivas 14 years ago
parent
commit
f117675ac2
  1. 72
      cpu-miner.c
  2. 16
      findnonce.c
  3. 4
      findnonce.h

72
cpu-miner.c

@ -743,11 +743,11 @@ static void *gpuminer_thread(void *userdata) @@ -743,11 +743,11 @@ static void *gpuminer_thread(void *userdata)
struct thr_info *mythr = userdata;
struct timeval tv_start;
int thr_id = mythr->id;
uint32_t res[128];
uint32_t res[128], blank_res[128];
setpriority(PRIO_PROCESS, 0, 19);
memset(res, 0, BUFFERSIZE);
memset(blank_res, 0, BUFFERSIZE);
size_t globalThreads[1];
size_t localThreads[1];
@ -765,26 +765,23 @@ static void *gpuminer_thread(void *userdata) @@ -765,26 +765,23 @@ static void *gpuminer_thread(void *userdata)
{ applog(LOG_ERR, "Error: Setting kernel argument 2.\n"); goto out; }
status = clEnqueueWriteBuffer(clState->commandQueue, clState->outputBuffer, CL_TRUE, 0,
BUFFERSIZE, res, 0, NULL, NULL);
BUFFERSIZE, blank_res, 0, NULL, NULL);
if (unlikely(status != CL_SUCCESS))
{ applog(LOG_ERR, "Error: clEnqueueWriteBuffer failed."); goto out; }
struct work *work = malloc(sizeof(struct work));
bool need_work = true;
unsigned int threads = 1 << 22;
unsigned int h0count = 0;
gettimeofday(&tv_start, NULL);
globalThreads[0] = threads;
localThreads[0] = 128;
while (1) {
struct timeval tv_end, diff;
int i;
if (need_work) {
work_restart[thr_id].restart = 0;
if (opt_debug)
applog(LOG_DEBUG, "getwork");
/* obtain new work from internal workio thread */
if (unlikely(!get_work(mythr, work))) {
applog(LOG_ERR, "work retrieval failed, exiting "
@ -793,47 +790,48 @@ static void *gpuminer_thread(void *userdata) @@ -793,47 +790,48 @@ static void *gpuminer_thread(void *userdata)
}
precalc_hash(&work->blk, (uint32_t *)(work->midstate), (uint32_t *)(work->data + 64));
work->blk.nonce = 0;
status = clEnqueueWriteBuffer(clState->commandQueue, clState->inputBuffer, CL_FALSE, 0,
sizeof(dev_blk_ctx), (void *)&work->blk, 0, NULL, NULL);
if (unlikely(status != CL_SUCCESS))
{ applog(LOG_ERR, "Error: clEnqueueWriteBuffer failed."); goto out; }
work_restart[thr_id].restart = 0;
need_work = false;
}
globalThreads[0] = threads;
localThreads[0] = 128;
status = clEnqueueWriteBuffer(clState->commandQueue, clState->inputBuffer, CL_TRUE, 0,
sizeof(dev_blk_ctx), (void *)&work->blk, 0, NULL, NULL);
if (unlikely(status != CL_SUCCESS))
{ applog(LOG_ERR, "Error: clEnqueueWriteBuffer failed."); goto out; }
if (opt_debug)
applog(LOG_DEBUG, "getwork");
}
clFinish(clState->commandQueue);
status = clEnqueueNDRangeKernel(clState->commandQueue, clState->kernel, 1, NULL,
status = clEnqueueNDRangeKernel(clState->commandQueue, clState->kernel, 1, NULL,
globalThreads, localThreads, 0, NULL, NULL);
if (unlikely(status != CL_SUCCESS))
{ applog(LOG_ERR, "Error: Enqueueing kernel onto command queue. (clEnqueueNDRangeKernel)"); goto out; }
status = clEnqueueReadBuffer(clState->commandQueue, clState->outputBuffer, CL_TRUE, 0,
BUFFERSIZE, res, 0, NULL, NULL);
if (unlikely(status != CL_SUCCESS))
{ applog(LOG_ERR, "Error: clEnqueueReadBuffer failed. (clEnqueueReadBuffer)"); goto out;}
/* 127 is used as a flag to say nonces exist */
if (unlikely(res[127])) {
/* 127 is used as a flag to say nonces exist */
/* Clear the buffer again */
status = clEnqueueWriteBuffer(clState->commandQueue, clState->outputBuffer, CL_FALSE, 0,
BUFFERSIZE, blank_res, 0, NULL, NULL);
if (unlikely(status != CL_SUCCESS))
{ applog(LOG_ERR, "Error: clEnqueueWriteBuffer failed."); goto out; }
for (i = 0; i < 127; i++) {
if (res[i]) {
uint32_t start = res[i];
uint32_t my_g, my_nonce;
applog(LOG_INFO, "GPU Found something?");
my_g = postcalc_hash(mythr, &work->blk, work, start, start + 1026, &my_nonce, &h0count);
res[i] = 0;
postcalc_hash(mythr, &work->blk, work, res[i]);
} else
break;
}
/* Clear the buffer again */
status = clEnqueueWriteBuffer(clState->commandQueue, clState->outputBuffer, CL_TRUE, 0,
BUFFERSIZE, res, 0, NULL, NULL);
if (unlikely(status != CL_SUCCESS))
{ applog(LOG_ERR, "Error: clEnqueueWriteBuffer failed."); goto out; }
clFinish(clState->commandQueue);
}
status = clEnqueueReadBuffer(clState->commandQueue, clState->outputBuffer, CL_FALSE, 0,
BUFFERSIZE, res, 0, NULL, NULL);
if (unlikely(status != CL_SUCCESS))
{ applog(LOG_ERR, "Error: clEnqueueReadBuffer failed. (clEnqueueReadBuffer)"); goto out;}
gettimeofday(&tv_end, NULL);
timeval_subtract(&diff, &tv_end, &tv_start);
hashmeter(thr_id, &diff, threads);
@ -844,6 +842,14 @@ static void *gpuminer_thread(void *userdata) @@ -844,6 +842,14 @@ static void *gpuminer_thread(void *userdata)
if (unlikely(work->blk.nonce > MAXTHREADS - threads) ||
(work_restart[thr_id].restart))
need_work = true;
clFinish(clState->commandQueue);
status = clEnqueueWriteBuffer(clState->commandQueue, clState->inputBuffer, CL_FALSE, 0,
sizeof(dev_blk_ctx), (void *)&work->blk, 0, NULL, NULL);
if (unlikely(status != CL_SUCCESS))
{ applog(LOG_ERR, "Error: clEnqueueWriteBuffer failed."); goto out; }
}
out:
tq_freeze(mythr->q);

16
findnonce.c

@ -131,14 +131,13 @@ void precalc_hash(dev_blk_ctx *blk, uint32_t *state, uint32_t *data) { @@ -131,14 +131,13 @@ void precalc_hash(dev_blk_ctx *blk, uint32_t *state, uint32_t *data) {
R(E, F, G, H, A, B, C, D, P(u+4), SHA256_K[u+4]); \
R(D, E, F, G, H, A, B, C, P(u+5), SHA256_K[u+5])
uint32_t postcalc_hash(struct thr_info *thr, dev_blk_ctx *blk,
struct work *work, uint32_t start, uint32_t end,
uint32_t *best_nonce, unsigned int *h0count)
void postcalc_hash(struct thr_info *thr, dev_blk_ctx *blk, struct work *work, uint32_t start)
{
cl_uint A, B, C, D, E, F, G, H;
cl_uint W[16];
cl_uint nonce;
cl_uint best_g = ~0;
uint32_t end = start + 1026;
for (nonce = start; nonce != end; nonce+=1) {
A = blk->cty_a; B = blk->cty_b;
@ -171,8 +170,6 @@ uint32_t postcalc_hash(struct thr_info *thr, dev_blk_ctx *blk, @@ -171,8 +170,6 @@ uint32_t postcalc_hash(struct thr_info *thr, dev_blk_ctx *blk,
FR(48); PFR(56);
if (unlikely(H == 0xA41F32E7)) {
(*h0count)++;
if (unlikely(submit_nonce(thr, work, nonce) == false)) {
applog(LOG_ERR, "Failed to submit work, exiting");
goto out;
@ -181,14 +178,11 @@ uint32_t postcalc_hash(struct thr_info *thr, dev_blk_ctx *blk, @@ -181,14 +178,11 @@ uint32_t postcalc_hash(struct thr_info *thr, dev_blk_ctx *blk,
G += 0x1f83d9ab;
G = ByteReverse(G);
if (G < best_g) {
*best_nonce = nonce;
if (G < best_g)
best_g = G;
}
}
}
out:
// if (unlikely(best_g == ~0)) applog(LOG_ERR, "No best_g found! Error in OpenCL code?");
return best_g;
if (unlikely(best_g == ~0))
applog(LOG_ERR, "No best_g found! Error in OpenCL code?");
}

4
findnonce.h

@ -19,6 +19,4 @@ typedef struct { @@ -19,6 +19,4 @@ typedef struct {
} dev_blk_ctx;
extern void precalc_hash(dev_blk_ctx *blk, uint32_t *state, uint32_t *data);
extern uint32_t postcalc_hash(struct thr_info *thr, dev_blk_ctx *blk,
struct work *work, uint32_t start, uint32_t end,
uint32_t *best_nonce, unsigned int *h0count);
extern void postcalc_hash(struct thr_info *thr, dev_blk_ctx *blk, struct work *work, uint32_t start);

Loading…
Cancel
Save