mirror of
https://github.com/GOSTSec/sgminer
synced 2025-01-22 12:34:27 +00:00
Optimise work loop to make cl calls asynchronous where possible.
This commit is contained in:
parent
f05270b8c8
commit
f117675ac2
74
cpu-miner.c
74
cpu-miner.c
@ -743,11 +743,11 @@ static void *gpuminer_thread(void *userdata)
|
||||
struct thr_info *mythr = userdata;
|
||||
struct timeval tv_start;
|
||||
int thr_id = mythr->id;
|
||||
uint32_t res[128];
|
||||
uint32_t res[128], blank_res[128];
|
||||
|
||||
setpriority(PRIO_PROCESS, 0, 19);
|
||||
|
||||
memset(res, 0, BUFFERSIZE);
|
||||
memset(blank_res, 0, BUFFERSIZE);
|
||||
|
||||
size_t globalThreads[1];
|
||||
size_t localThreads[1];
|
||||
@ -765,26 +765,23 @@ static void *gpuminer_thread(void *userdata)
|
||||
{ applog(LOG_ERR, "Error: Setting kernel argument 2.\n"); goto out; }
|
||||
|
||||
status = clEnqueueWriteBuffer(clState->commandQueue, clState->outputBuffer, CL_TRUE, 0,
|
||||
BUFFERSIZE, res, 0, NULL, NULL);
|
||||
BUFFERSIZE, blank_res, 0, NULL, NULL);
|
||||
if (unlikely(status != CL_SUCCESS))
|
||||
{ applog(LOG_ERR, "Error: clEnqueueWriteBuffer failed."); goto out; }
|
||||
|
||||
struct work *work = malloc(sizeof(struct work));
|
||||
bool need_work = true;
|
||||
unsigned int threads = 1 << 22;
|
||||
unsigned int h0count = 0;
|
||||
|
||||
gettimeofday(&tv_start, NULL);
|
||||
globalThreads[0] = threads;
|
||||
localThreads[0] = 128;
|
||||
|
||||
while (1) {
|
||||
struct timeval tv_end, diff;
|
||||
int i;
|
||||
|
||||
if (need_work) {
|
||||
work_restart[thr_id].restart = 0;
|
||||
|
||||
if (opt_debug)
|
||||
applog(LOG_DEBUG, "getwork");
|
||||
|
||||
/* obtain new work from internal workio thread */
|
||||
if (unlikely(!get_work(mythr, work))) {
|
||||
applog(LOG_ERR, "work retrieval failed, exiting "
|
||||
@ -793,47 +790,48 @@ static void *gpuminer_thread(void *userdata)
|
||||
}
|
||||
|
||||
precalc_hash(&work->blk, (uint32_t *)(work->midstate), (uint32_t *)(work->data + 64));
|
||||
|
||||
work->blk.nonce = 0;
|
||||
need_work = false;
|
||||
}
|
||||
globalThreads[0] = threads;
|
||||
localThreads[0] = 128;
|
||||
|
||||
status = clEnqueueWriteBuffer(clState->commandQueue, clState->inputBuffer, CL_TRUE, 0,
|
||||
status = clEnqueueWriteBuffer(clState->commandQueue, clState->inputBuffer, CL_FALSE, 0,
|
||||
sizeof(dev_blk_ctx), (void *)&work->blk, 0, NULL, NULL);
|
||||
if (unlikely(status != CL_SUCCESS))
|
||||
{ applog(LOG_ERR, "Error: clEnqueueWriteBuffer failed."); goto out; }
|
||||
if (unlikely(status != CL_SUCCESS))
|
||||
{ applog(LOG_ERR, "Error: clEnqueueWriteBuffer failed."); goto out; }
|
||||
|
||||
status = clEnqueueNDRangeKernel(clState->commandQueue, clState->kernel, 1, NULL,
|
||||
work_restart[thr_id].restart = 0;
|
||||
need_work = false;
|
||||
|
||||
if (opt_debug)
|
||||
applog(LOG_DEBUG, "getwork");
|
||||
|
||||
}
|
||||
clFinish(clState->commandQueue);
|
||||
|
||||
status = clEnqueueNDRangeKernel(clState->commandQueue, clState->kernel, 1, NULL,
|
||||
globalThreads, localThreads, 0, NULL, NULL);
|
||||
if (unlikely(status != CL_SUCCESS))
|
||||
{ applog(LOG_ERR, "Error: Enqueueing kernel onto command queue. (clEnqueueNDRangeKernel)"); goto out; }
|
||||
|
||||
status = clEnqueueReadBuffer(clState->commandQueue, clState->outputBuffer, CL_TRUE, 0,
|
||||
BUFFERSIZE, res, 0, NULL, NULL);
|
||||
if (unlikely(status != CL_SUCCESS))
|
||||
{ applog(LOG_ERR, "Error: clEnqueueReadBuffer failed. (clEnqueueReadBuffer)"); goto out;}
|
||||
/* 127 is used as a flag to say nonces exist */
|
||||
if (unlikely(res[127])) {
|
||||
/* 127 is used as a flag to say nonces exist */
|
||||
/* Clear the buffer again */
|
||||
status = clEnqueueWriteBuffer(clState->commandQueue, clState->outputBuffer, CL_FALSE, 0,
|
||||
BUFFERSIZE, blank_res, 0, NULL, NULL);
|
||||
if (unlikely(status != CL_SUCCESS))
|
||||
{ applog(LOG_ERR, "Error: clEnqueueWriteBuffer failed."); goto out; }
|
||||
for (i = 0; i < 127; i++) {
|
||||
if (res[i]) {
|
||||
uint32_t start = res[i];
|
||||
uint32_t my_g, my_nonce;
|
||||
|
||||
applog(LOG_INFO, "GPU Found something?");
|
||||
my_g = postcalc_hash(mythr, &work->blk, work, start, start + 1026, &my_nonce, &h0count);
|
||||
res[i] = 0;
|
||||
postcalc_hash(mythr, &work->blk, work, res[i]);
|
||||
} else
|
||||
break;
|
||||
}
|
||||
/* Clear the buffer again */
|
||||
status = clEnqueueWriteBuffer(clState->commandQueue, clState->outputBuffer, CL_TRUE, 0,
|
||||
BUFFERSIZE, res, 0, NULL, NULL);
|
||||
if (unlikely(status != CL_SUCCESS))
|
||||
{ applog(LOG_ERR, "Error: clEnqueueWriteBuffer failed."); goto out; }
|
||||
clFinish(clState->commandQueue);
|
||||
}
|
||||
|
||||
status = clEnqueueReadBuffer(clState->commandQueue, clState->outputBuffer, CL_FALSE, 0,
|
||||
BUFFERSIZE, res, 0, NULL, NULL);
|
||||
if (unlikely(status != CL_SUCCESS))
|
||||
{ applog(LOG_ERR, "Error: clEnqueueReadBuffer failed. (clEnqueueReadBuffer)"); goto out;}
|
||||
|
||||
gettimeofday(&tv_end, NULL);
|
||||
timeval_subtract(&diff, &tv_end, &tv_start);
|
||||
hashmeter(thr_id, &diff, threads);
|
||||
@ -844,6 +842,14 @@ static void *gpuminer_thread(void *userdata)
|
||||
if (unlikely(work->blk.nonce > MAXTHREADS - threads) ||
|
||||
(work_restart[thr_id].restart))
|
||||
need_work = true;
|
||||
|
||||
clFinish(clState->commandQueue);
|
||||
|
||||
status = clEnqueueWriteBuffer(clState->commandQueue, clState->inputBuffer, CL_FALSE, 0,
|
||||
sizeof(dev_blk_ctx), (void *)&work->blk, 0, NULL, NULL);
|
||||
if (unlikely(status != CL_SUCCESS))
|
||||
{ applog(LOG_ERR, "Error: clEnqueueWriteBuffer failed."); goto out; }
|
||||
|
||||
}
|
||||
out:
|
||||
tq_freeze(mythr->q);
|
||||
|
16
findnonce.c
16
findnonce.c
@ -131,14 +131,13 @@ void precalc_hash(dev_blk_ctx *blk, uint32_t *state, uint32_t *data) {
|
||||
R(E, F, G, H, A, B, C, D, P(u+4), SHA256_K[u+4]); \
|
||||
R(D, E, F, G, H, A, B, C, P(u+5), SHA256_K[u+5])
|
||||
|
||||
uint32_t postcalc_hash(struct thr_info *thr, dev_blk_ctx *blk,
|
||||
struct work *work, uint32_t start, uint32_t end,
|
||||
uint32_t *best_nonce, unsigned int *h0count)
|
||||
void postcalc_hash(struct thr_info *thr, dev_blk_ctx *blk, struct work *work, uint32_t start)
|
||||
{
|
||||
cl_uint A, B, C, D, E, F, G, H;
|
||||
cl_uint W[16];
|
||||
cl_uint nonce;
|
||||
cl_uint best_g = ~0;
|
||||
uint32_t end = start + 1026;
|
||||
|
||||
for (nonce = start; nonce != end; nonce+=1) {
|
||||
A = blk->cty_a; B = blk->cty_b;
|
||||
@ -171,8 +170,6 @@ uint32_t postcalc_hash(struct thr_info *thr, dev_blk_ctx *blk,
|
||||
FR(48); PFR(56);
|
||||
|
||||
if (unlikely(H == 0xA41F32E7)) {
|
||||
(*h0count)++;
|
||||
|
||||
if (unlikely(submit_nonce(thr, work, nonce) == false)) {
|
||||
applog(LOG_ERR, "Failed to submit work, exiting");
|
||||
goto out;
|
||||
@ -181,14 +178,11 @@ uint32_t postcalc_hash(struct thr_info *thr, dev_blk_ctx *blk,
|
||||
G += 0x1f83d9ab;
|
||||
G = ByteReverse(G);
|
||||
|
||||
if (G < best_g) {
|
||||
*best_nonce = nonce;
|
||||
if (G < best_g)
|
||||
best_g = G;
|
||||
}
|
||||
}
|
||||
}
|
||||
out:
|
||||
// if (unlikely(best_g == ~0)) applog(LOG_ERR, "No best_g found! Error in OpenCL code?");
|
||||
|
||||
return best_g;
|
||||
if (unlikely(best_g == ~0))
|
||||
applog(LOG_ERR, "No best_g found! Error in OpenCL code?");
|
||||
}
|
||||
|
@ -19,6 +19,4 @@ typedef struct {
|
||||
} dev_blk_ctx;
|
||||
|
||||
extern void precalc_hash(dev_blk_ctx *blk, uint32_t *state, uint32_t *data);
|
||||
extern uint32_t postcalc_hash(struct thr_info *thr, dev_blk_ctx *blk,
|
||||
struct work *work, uint32_t start, uint32_t end,
|
||||
uint32_t *best_nonce, unsigned int *h0count);
|
||||
extern void postcalc_hash(struct thr_info *thr, dev_blk_ctx *blk, struct work *work, uint32_t start);
|
||||
|
Loading…
x
Reference in New Issue
Block a user