1
0
mirror of https://github.com/GOSTSec/sgminer synced 2025-09-11 13:42:17 +00:00

Use a buffer of up to 512 * 4 integers when retrieving work from the GPU.

This allows each local thread id to have one slot to put any positive results into, thus making overlapping results far less likely.
Thus races will be much rarer, allowing more threads.
It should also pick up blocks close to each other more reliably and hopefully decrease the number of rejects and opencl errors.
Do the search over the buffer entirely in a separate thread to allow the GPU to stay as busy as possible.
Detach threads from themselves to prevent unlucky even where dereferencing occurs by freeing the data that stores the thread info.
This commit is contained in:
Con Kolivas 2011-06-29 23:38:16 +10:00
parent 6af84770d0
commit 2b6e841673
6 changed files with 125 additions and 82 deletions

View File

@ -676,6 +676,7 @@ static void *submit_work(void *userdata)
err_out: err_out:
workio_cmd_free(wc); workio_cmd_free(wc);
out: out:
pthread_detach(pthread_self());
free(sd); free(sd);
return NULL; return NULL;
} }
@ -696,7 +697,6 @@ static bool submit_work_async(struct thr_info *thr, const struct work *work_in)
applog(LOG_ERR, "Failed to create submit_thread"); applog(LOG_ERR, "Failed to create submit_thread");
return false; return false;
} }
pthread_detach(sd->pth);
return true; return true;
} }
@ -900,12 +900,8 @@ static void *gpuminer_thread(void *userdata)
{ {
struct thr_info *mythr = userdata; struct thr_info *mythr = userdata;
struct timeval tv_start, diff; struct timeval tv_start, diff;
int thr_id = mythr->id; const int thr_id = mythr->id;
uint32_t res[128], blank_res[128]; uint32_t *res, *blank_res;
cl_kernel *kernel;
memset(res, 0, BUFFERSIZE);
memset(blank_res, 0, BUFFERSIZE);
size_t globalThreads[1]; size_t globalThreads[1];
size_t localThreads[1]; size_t localThreads[1];
@ -913,7 +909,7 @@ static void *gpuminer_thread(void *userdata)
cl_int status; cl_int status;
_clState *clState = clStates[thr_id]; _clState *clState = clStates[thr_id];
kernel = &clState->kernel; const cl_kernel *kernel = &clState->kernel;
struct work *work = malloc(sizeof(struct work)); struct work *work = malloc(sizeof(struct work));
unsigned const int threads = 1 << (15 + scan_intensity); unsigned const int threads = 1 << (15 + scan_intensity);
@ -921,6 +917,14 @@ static void *gpuminer_thread(void *userdata)
unsigned const int hashes = threads * vectors; unsigned const int hashes = threads * vectors;
unsigned int hashes_done = 0; unsigned int hashes_done = 0;
res = calloc(BUFFERSIZE, 1);
blank_res = calloc(BUFFERSIZE, 1);
if (!res || !blank_res) {
applog(LOG_ERR, "Failed to calloc in gpuminer_thread");
goto out;
}
gettimeofday(&tv_start, NULL); gettimeofday(&tv_start, NULL);
globalThreads[0] = threads; globalThreads[0] = threads;
localThreads[0] = clState->work_size; localThreads[0] = clState->work_size;
@ -966,21 +970,17 @@ static void *gpuminer_thread(void *userdata)
{ applog(LOG_ERR, "Error: clSetKernelArg of nonce failed."); goto out; } { applog(LOG_ERR, "Error: clSetKernelArg of nonce failed."); goto out; }
} }
/* 127 is used as a flag to say nonces exist */ /* MAXBUFFERS entry is used as a flag to say nonces exist */
if (unlikely(res[127])) { if (res[MAXBUFFERS]) {
/* Clear the buffer again */ /* Clear the buffer again */
status = clEnqueueWriteBuffer(clState->commandQueue, clState->outputBuffer, CL_FALSE, 0, status = clEnqueueWriteBuffer(clState->commandQueue, clState->outputBuffer, CL_FALSE, 0,
BUFFERSIZE, blank_res, 0, NULL, NULL); BUFFERSIZE, blank_res, 0, NULL, NULL);
if (unlikely(status != CL_SUCCESS)) if (unlikely(status != CL_SUCCESS))
{ applog(LOG_ERR, "Error: clEnqueueWriteBuffer failed."); goto out; } { applog(LOG_ERR, "Error: clEnqueueWriteBuffer failed."); goto out; }
for (i = 0; i < 127; i++) { if (opt_debug)
if (res[i]) { applog(LOG_DEBUG, "GPU %d found something?", gpu_from_thr_id(thr_id));
if (opt_debug) postcalc_hash_async(mythr, work, res);
applog(LOG_DEBUG, "GPU %d found something?", gpu_from_thr_id(thr_id)); memset(res, 0, BUFFERSIZE);
postcalc_hash_async(mythr, work, res[i]);
} else
break;
}
clFinish(clState->commandQueue); clFinish(clState->commandQueue);
} }

View File

@ -138,7 +138,7 @@ void precalc_hash(dev_blk_ctx *blk, uint32_t *state, uint32_t *data) {
struct pc_data { struct pc_data {
struct thr_info *thr; struct thr_info *thr;
struct work work; struct work work;
uint32_t start; uint32_t res[MAXBUFFERS];
pthread_t pth; pthread_t pth;
}; };
@ -148,13 +148,28 @@ static void *postcalc_hash(void *userdata)
struct thr_info *thr = pcd->thr; struct thr_info *thr = pcd->thr;
dev_blk_ctx *blk = &pcd->work.blk; dev_blk_ctx *blk = &pcd->work.blk;
struct work *work = &pcd->work; struct work *work = &pcd->work;
uint32_t start = pcd->start; uint32_t start;
cl_uint A, B, C, D, E, F, G, H; cl_uint A, B, C, D, E, F, G, H;
cl_uint W[16]; cl_uint W[16];
cl_uint nonce; cl_uint nonce;
cl_uint best_g = ~0; cl_uint best_g;
uint32_t end = start + 1026; uint32_t end;
int entry = 0;
cycle:
while (entry < MAXBUFFERS) {
if (pcd->res[entry]) {
start = pcd->res[entry++];
break;
}
entry++;
}
if (entry == MAXBUFFERS)
goto out;
best_g = ~0;
end = start + 1026;
for (nonce = start; nonce != end; nonce+=1) { for (nonce = start; nonce != end; nonce+=1) {
A = blk->cty_a; B = blk->cty_b; A = blk->cty_a; B = blk->cty_b;
@ -189,7 +204,7 @@ static void *postcalc_hash(void *userdata)
if (unlikely(H == 0xA41F32E7)) { if (unlikely(H == 0xA41F32E7)) {
if (unlikely(submit_nonce(thr, work, nonce) == false)) { if (unlikely(submit_nonce(thr, work, nonce) == false)) {
applog(LOG_ERR, "Failed to submit work, exiting"); applog(LOG_ERR, "Failed to submit work, exiting");
goto out; break;
} }
G += 0x1f83d9ab; G += 0x1f83d9ab;
@ -199,17 +214,22 @@ static void *postcalc_hash(void *userdata)
best_g = G; best_g = G;
} }
} }
out:
if (unlikely(best_g == ~0)) { if (unlikely(best_g == ~0)) {
if (opt_debug) if (opt_debug)
applog(LOG_DEBUG, "No best_g found! Error in OpenCL code?"); applog(LOG_DEBUG, "No best_g found! Error in OpenCL code?");
hw_errors++; hw_errors++;
thr->cgpu->hw_errors++; thr->cgpu->hw_errors++;
} }
if (entry < MAXBUFFERS)
goto cycle;
out:
pthread_detach(pthread_self());
free(pcd); free(pcd);
return NULL;
} }
void postcalc_hash_async(struct thr_info *thr, struct work *work, uint32_t start) void postcalc_hash_async(struct thr_info *thr, struct work *work, uint32_t *res)
{ {
struct pc_data *pcd = malloc(sizeof(struct pc_data)); struct pc_data *pcd = malloc(sizeof(struct pc_data));
if (unlikely(!pcd)) { if (unlikely(!pcd)) {
@ -219,11 +239,10 @@ void postcalc_hash_async(struct thr_info *thr, struct work *work, uint32_t start
pcd->thr = thr; pcd->thr = thr;
memcpy(&pcd->work, work, sizeof(struct work)); memcpy(&pcd->work, work, sizeof(struct work));
pcd->start = start; memcpy(&pcd->res, res, BUFFERSIZE);
if (pthread_create(&pcd->pth, NULL, postcalc_hash, (void *)pcd)) { if (pthread_create(&pcd->pth, NULL, postcalc_hash, (void *)pcd)) {
applog(LOG_ERR, "Failed to create postcalc_hash thread"); applog(LOG_ERR, "Failed to create postcalc_hash thread");
return; return;
} }
pthread_detach(pcd->pth);
} }

View File

@ -3,8 +3,10 @@
#include "miner.h" #include "miner.h"
#define MAXTHREADS (0xFFFFFFFEULL) #define MAXTHREADS (0xFFFFFFFEULL)
#define BUFFERSIZE (sizeof(uint32_t) * 128) /* Maximum worksize 512 * maximum vectors 4 plus one flag entry */
#define MAXBUFFERS (4 * 512)
#define BUFFERSIZE (sizeof(uint32_t) * (MAXBUFFERS + 1))
extern void precalc_hash(dev_blk_ctx *blk, uint32_t *state, uint32_t *data); extern void precalc_hash(dev_blk_ctx *blk, uint32_t *state, uint32_t *data);
extern void postcalc_hash_async(struct thr_info *thr, struct work *work, uint32_t start); extern void postcalc_hash_async(struct thr_info *thr, struct work *work, uint32_t *res);
#endif /*__FINDNONCE_H__*/ #endif /*__FINDNONCE_H__*/

2
ocl.c
View File

@ -513,7 +513,7 @@ retry:
return NULL; return NULL;
} }
clState->outputBuffer = clCreateBuffer(clState->context, CL_MEM_READ_WRITE, sizeof(uint32_t) * 128, NULL, &status); clState->outputBuffer = clCreateBuffer(clState->context, CL_MEM_READ_WRITE, BUFFERSIZE, NULL, &status);
if(status != CL_SUCCESS) { if(status != CL_SUCCESS) {
applog(LOG_ERR, "Error: clCreateBuffer (outputBuffer)"); applog(LOG_ERR, "Error: clCreateBuffer (outputBuffer)");
return NULL; return NULL;

View File

@ -141,7 +141,7 @@ void search( const uint state0, const uint state1, const uint state2, const uint
W[19] = P1(19) + P2(19) + P3(19); W[19] = P1(19) + P2(19) + P3(19);
W[18] = P1(18) + P3(18) + P4(18); W[18] = P1(18) + P3(18) + P4(18);
W[20] = P2(20) + P3(20) + P4(20); W[20] = P2(20) + P3(20) + P4(20);
uint it; uint it = get_local_id(0);
#ifdef VECTORS4 #ifdef VECTORS4
W[3] = base + (get_global_id(0)<<2) + (uint4)(0, 1, 2, 3); W[3] = base + (get_global_id(0)<<2) + (uint4)(0, 1, 2, 3);
@ -363,59 +363,70 @@ void search( const uint state0, const uint state1, const uint state2, const uint
partround(64 + 60); partround(64 + 60);
Vals[7] += H[7]; Vals[7] += H[7];
#define MAXBUFFERS (4 * 512)
#if defined(VECTORS4) || defined(VECTORS2) #if defined(VECTORS4) || defined(VECTORS2)
if (Vals[7].x == 0) if (Vals[7].x == 0)
{ {
for (it = 0; it != 127; it++) { // Unlikely event there is something here already !
if (!output[it]) { if (output[it]) {
output[it] = W[3].x; for (it = 0; it < MAXBUFFERS; it++) {
output[127] = 1; if (!output[it])
break; break;
} }
} }
output[it] = W[3].x;
output[MAXBUFFERS] = 1;
} }
if (Vals[7].y == 0) if (Vals[7].y == 0)
{ {
for (it = 0; it != 127; it++) { it += 512;
if (!output[it]) { if (output[it]) {
output[it] = W[3].y; for (it = 0; it < MAXBUFFERS; it++) {
output[127] = 1; if (!output[it])
break; break;
} }
} }
output[it] = W[3].y;
output[MAXBUFFERS] = 1;
} }
#ifdef VECTORS4 #ifdef VECTORS4
if (Vals[7].z == 0) if (Vals[7].z == 0)
{ {
for (it = 0; it != 127; it++) { it += 1024;
if (!output[it]) { if (output[it]) {
output[it] = W[3].z; for (it = 0; it < MAXBUFFERS; it++) {
output[127] = 1; if (!output[it])
break; break;
} }
} }
output[it] = W[3].z;
output[MAXBUFFERS] = 1;
} }
if (Vals[7].w == 0) if (Vals[7].w == 0)
{ {
for (it = 0; it != 127; it++) { it += 1536;
if (!output[it]) { if (output[it]) {
output[it] = W[3].w; for (it = 0; it < MAXBUFFERS; it++) {
output[127] = 1; if (!output[it])
break; break;
} }
} }
output[it] = W[3].w;
output[MAXBUFFERS] = 1;
} }
#endif #endif
#else #else
if (Vals[7] == 0) if (Vals[7] == 0)
{ {
for (it = 0; it != 127; it++) { if (output[it]) {
if (!output[it]) { for (it = 0; it < MAXBUFFERS; it++) {
output[it] = W[3]; if (!output[it])
output[127] = 1; break;
break;
} }
} }
output[it] = W[3];
output[MAXBUFFERS] = 1;
} }
#endif #endif

View File

@ -79,7 +79,7 @@ __kernel void search( const uint state0, const uint state1, const uint state2, c
u W[24]; u W[24];
u Vals[8]; u Vals[8];
u nonce; u nonce;
u it; uint it = get_local_id(0);
#ifdef VECTORS4 #ifdef VECTORS4
nonce = base + (get_global_id(0)<<2) + (uint4)(0, 1, 2, 3); nonce = base + (get_global_id(0)<<2) + (uint4)(0, 1, 2, 3);
@ -627,59 +627,70 @@ __kernel void search( const uint state0, const uint state1, const uint state2, c
Vals[7]+=0x5be0cd19U; Vals[7]+=0x5be0cd19U;
#define MAXBUFFERS (4 * 512)
#if defined(VECTORS4) || defined(VECTORS2) #if defined(VECTORS4) || defined(VECTORS2)
if (Vals[7].x == 0) if (Vals[7].x == 0)
{ {
for (it.x = 0; it.x != 127; it.x++) { // Unlikely event there is something here already !
if (!output[it.x]) { if (output[it]) {
output[it.x] = nonce.x; for (it = 0; it < MAXBUFFERS; it++) {
output[127] = 1; if (!output[it])
break; break;
} }
} }
output[it] = nonce.x;
output[MAXBUFFERS] = 1;
} }
if (Vals[7].y == 0) if (Vals[7].y == 0)
{ {
for (it.y = 0; it.y != 127; it.y++) { it += 512;
if (!output[it.y]) { if (output[it]) {
output[it.y] = nonce.y; for (it = 0; it < MAXBUFFERS; it++) {
output[127] = 1; if (!output[it])
break; break;
} }
} }
output[it] = nonce.y;
output[MAXBUFFERS] = 1;
} }
#ifdef VECTORS4 #ifdef VECTORS4
if (Vals[7].z == 0) if (Vals[7].z == 0)
{ {
for (it.z = 0; it.z != 127; it.z++) { it += 1024;
if (!output[it.z]) { if (output[it]) {
output[it.z] = nonce.z; for (it = 0; it < MAXBUFFERS; it++) {
output[127] = 1; if (!output[it])
break; break;
} }
} }
output[it] = nonce.z;
output[MAXBUFFERS] = 1;
} }
if (Vals[7].w == 0) if (Vals[7].w == 0)
{ {
for (it.w = 0; it.w != 127; it.w++) { it += 1536;
if (!output[it.w]) { if (output[it]) {
output[it.w] = nonce.w; for (it = 0; it < MAXBUFFERS; it++) {
output[127] = 1; if (!output[it])
break; break;
} }
} }
output[it] = nonce.w;
output[MAXBUFFERS] = 1;
} }
#endif #endif
#else #else
if (Vals[7] == 0) if (Vals[7] == 0)
{ {
for (it = 0; it != 127; it++) { if (output[it]) {
if (!output[it]) { for (it = 0; it < MAXBUFFERS; it++) {
output[it] = nonce; if (!output[it])
output[127] = 1; break;
break;
} }
} }
output[it] = nonce;
output[MAXBUFFERS] = 1;
} }
#endif #endif
} }