diff --git a/cpu-miner.c b/cpu-miner.c index fb4acb76..c87ecab8 100644 --- a/cpu-miner.c +++ b/cpu-miner.c @@ -676,6 +676,7 @@ static void *submit_work(void *userdata) err_out: workio_cmd_free(wc); out: + pthread_detach(pthread_self()); free(sd); return NULL; } @@ -696,7 +697,6 @@ static bool submit_work_async(struct thr_info *thr, const struct work *work_in) applog(LOG_ERR, "Failed to create submit_thread"); return false; } - pthread_detach(sd->pth); return true; } @@ -900,12 +900,8 @@ static void *gpuminer_thread(void *userdata) { struct thr_info *mythr = userdata; struct timeval tv_start, diff; - int thr_id = mythr->id; - uint32_t res[128], blank_res[128]; - cl_kernel *kernel; - - memset(res, 0, BUFFERSIZE); - memset(blank_res, 0, BUFFERSIZE); + const int thr_id = mythr->id; + uint32_t *res, *blank_res; size_t globalThreads[1]; size_t localThreads[1]; @@ -913,7 +909,7 @@ static void *gpuminer_thread(void *userdata) cl_int status; _clState *clState = clStates[thr_id]; - kernel = &clState->kernel; + const cl_kernel *kernel = &clState->kernel; struct work *work = malloc(sizeof(struct work)); unsigned const int threads = 1 << (15 + scan_intensity); @@ -921,6 +917,14 @@ static void *gpuminer_thread(void *userdata) unsigned const int hashes = threads * vectors; unsigned int hashes_done = 0; + res = calloc(BUFFERSIZE, 1); + blank_res = calloc(BUFFERSIZE, 1); + + if (!res || !blank_res) { + applog(LOG_ERR, "Failed to calloc in gpuminer_thread"); + goto out; + } + gettimeofday(&tv_start, NULL); globalThreads[0] = threads; localThreads[0] = clState->work_size; @@ -966,21 +970,17 @@ static void *gpuminer_thread(void *userdata) { applog(LOG_ERR, "Error: clSetKernelArg of nonce failed."); goto out; } } - /* 127 is used as a flag to say nonces exist */ - if (unlikely(res[127])) { + /* MAXBUFFERS entry is used as a flag to say nonces exist */ + if (res[MAXBUFFERS]) { /* Clear the buffer again */ status = clEnqueueWriteBuffer(clState->commandQueue, clState->outputBuffer, CL_FALSE, 0, BUFFERSIZE, blank_res, 0, NULL, NULL); if (unlikely(status != CL_SUCCESS)) { applog(LOG_ERR, "Error: clEnqueueWriteBuffer failed."); goto out; } - for (i = 0; i < 127; i++) { - if (res[i]) { - if (opt_debug) - applog(LOG_DEBUG, "GPU %d found something?", gpu_from_thr_id(thr_id)); - postcalc_hash_async(mythr, work, res[i]); - } else - break; - } + if (opt_debug) + applog(LOG_DEBUG, "GPU %d found something?", gpu_from_thr_id(thr_id)); + postcalc_hash_async(mythr, work, res); + memset(res, 0, BUFFERSIZE); clFinish(clState->commandQueue); } diff --git a/findnonce.c b/findnonce.c index ca9b12fa..877e3f59 100644 --- a/findnonce.c +++ b/findnonce.c @@ -138,7 +138,7 @@ void precalc_hash(dev_blk_ctx *blk, uint32_t *state, uint32_t *data) { struct pc_data { struct thr_info *thr; struct work work; - uint32_t start; + uint32_t res[MAXBUFFERS]; pthread_t pth; }; @@ -148,13 +148,28 @@ static void *postcalc_hash(void *userdata) struct thr_info *thr = pcd->thr; dev_blk_ctx *blk = &pcd->work.blk; struct work *work = &pcd->work; - uint32_t start = pcd->start; + uint32_t start; cl_uint A, B, C, D, E, F, G, H; cl_uint W[16]; cl_uint nonce; - cl_uint best_g = ~0; - uint32_t end = start + 1026; + cl_uint best_g; + uint32_t end; + int entry = 0; + +cycle: + while (entry < MAXBUFFERS) { + if (pcd->res[entry]) { + start = pcd->res[entry++]; + break; + } + entry++; + } + if (entry == MAXBUFFERS) + goto out; + + best_g = ~0; + end = start + 1026; for (nonce = start; nonce != end; nonce+=1) { A = blk->cty_a; B = blk->cty_b; @@ -189,7 +204,7 @@ static void *postcalc_hash(void *userdata) if (unlikely(H == 0xA41F32E7)) { if (unlikely(submit_nonce(thr, work, nonce) == false)) { applog(LOG_ERR, "Failed to submit work, exiting"); - goto out; + break; } G += 0x1f83d9ab; @@ -199,17 +214,22 @@ static void *postcalc_hash(void *userdata) best_g = G; } } -out: + if (unlikely(best_g == ~0)) { if (opt_debug) applog(LOG_DEBUG, "No best_g found! Error in OpenCL code?"); hw_errors++; thr->cgpu->hw_errors++; } + if (entry < MAXBUFFERS) + goto cycle; +out: + pthread_detach(pthread_self()); free(pcd); + return NULL; } -void postcalc_hash_async(struct thr_info *thr, struct work *work, uint32_t start) +void postcalc_hash_async(struct thr_info *thr, struct work *work, uint32_t *res) { struct pc_data *pcd = malloc(sizeof(struct pc_data)); if (unlikely(!pcd)) { @@ -219,11 +239,10 @@ void postcalc_hash_async(struct thr_info *thr, struct work *work, uint32_t start pcd->thr = thr; memcpy(&pcd->work, work, sizeof(struct work)); - pcd->start = start; + memcpy(&pcd->res, res, BUFFERSIZE); if (pthread_create(&pcd->pth, NULL, postcalc_hash, (void *)pcd)) { applog(LOG_ERR, "Failed to create postcalc_hash thread"); return; } - pthread_detach(pcd->pth); } diff --git a/findnonce.h b/findnonce.h index 34e28d09..bc5070f1 100644 --- a/findnonce.h +++ b/findnonce.h @@ -3,8 +3,10 @@ #include "miner.h" #define MAXTHREADS (0xFFFFFFFEULL) -#define BUFFERSIZE (sizeof(uint32_t) * 128) +/* Maximum worksize 512 * maximum vectors 4 plus one flag entry */ +#define MAXBUFFERS (4 * 512) +#define BUFFERSIZE (sizeof(uint32_t) * (MAXBUFFERS + 1)) extern void precalc_hash(dev_blk_ctx *blk, uint32_t *state, uint32_t *data); -extern void postcalc_hash_async(struct thr_info *thr, struct work *work, uint32_t start); +extern void postcalc_hash_async(struct thr_info *thr, struct work *work, uint32_t *res); #endif /*__FINDNONCE_H__*/ diff --git a/ocl.c b/ocl.c index cd608756..ded65ea8 100644 --- a/ocl.c +++ b/ocl.c @@ -513,7 +513,7 @@ retry: return NULL; } - clState->outputBuffer = clCreateBuffer(clState->context, CL_MEM_READ_WRITE, sizeof(uint32_t) * 128, NULL, &status); + clState->outputBuffer = clCreateBuffer(clState->context, CL_MEM_READ_WRITE, BUFFERSIZE, NULL, &status); if(status != CL_SUCCESS) { applog(LOG_ERR, "Error: clCreateBuffer (outputBuffer)"); return NULL; diff --git a/phatk.cl b/phatk.cl index 2af85f5a..ed045415 100644 --- a/phatk.cl +++ b/phatk.cl @@ -141,7 +141,7 @@ void search( const uint state0, const uint state1, const uint state2, const uint W[19] = P1(19) + P2(19) + P3(19); W[18] = P1(18) + P3(18) + P4(18); W[20] = P2(20) + P3(20) + P4(20); - uint it; + uint it = get_local_id(0); #ifdef VECTORS4 W[3] = base + (get_global_id(0)<<2) + (uint4)(0, 1, 2, 3); @@ -363,59 +363,70 @@ void search( const uint state0, const uint state1, const uint state2, const uint partround(64 + 60); Vals[7] += H[7]; +#define MAXBUFFERS (4 * 512) + #if defined(VECTORS4) || defined(VECTORS2) if (Vals[7].x == 0) { - for (it = 0; it != 127; it++) { - if (!output[it]) { - output[it] = W[3].x; - output[127] = 1; - break; + // Unlikely event there is something here already ! + if (output[it]) { + for (it = 0; it < MAXBUFFERS; it++) { + if (!output[it]) + break; } } + output[it] = W[3].x; + output[MAXBUFFERS] = 1; } if (Vals[7].y == 0) { - for (it = 0; it != 127; it++) { - if (!output[it]) { - output[it] = W[3].y; - output[127] = 1; - break; + it += 512; + if (output[it]) { + for (it = 0; it < MAXBUFFERS; it++) { + if (!output[it]) + break; } } + output[it] = W[3].y; + output[MAXBUFFERS] = 1; } #ifdef VECTORS4 if (Vals[7].z == 0) { - for (it = 0; it != 127; it++) { - if (!output[it]) { - output[it] = W[3].z; - output[127] = 1; - break; + it += 1024; + if (output[it]) { + for (it = 0; it < MAXBUFFERS; it++) { + if (!output[it]) + break; } } + output[it] = W[3].z; + output[MAXBUFFERS] = 1; } if (Vals[7].w == 0) { - for (it = 0; it != 127; it++) { - if (!output[it]) { - output[it] = W[3].w; - output[127] = 1; - break; + it += 1536; + if (output[it]) { + for (it = 0; it < MAXBUFFERS; it++) { + if (!output[it]) + break; } } + output[it] = W[3].w; + output[MAXBUFFERS] = 1; } #endif #else if (Vals[7] == 0) { - for (it = 0; it != 127; it++) { - if (!output[it]) { - output[it] = W[3]; - output[127] = 1; - break; + if (output[it]) { + for (it = 0; it < MAXBUFFERS; it++) { + if (!output[it]) + break; } } + output[it] = W[3]; + output[MAXBUFFERS] = 1; } #endif diff --git a/poclbm.cl b/poclbm.cl index 4b959cfd..cf4f2408 100644 --- a/poclbm.cl +++ b/poclbm.cl @@ -79,7 +79,7 @@ __kernel void search( const uint state0, const uint state1, const uint state2, c u W[24]; u Vals[8]; u nonce; - u it; + uint it = get_local_id(0); #ifdef VECTORS4 nonce = base + (get_global_id(0)<<2) + (uint4)(0, 1, 2, 3); @@ -627,59 +627,70 @@ __kernel void search( const uint state0, const uint state1, const uint state2, c Vals[7]+=0x5be0cd19U; +#define MAXBUFFERS (4 * 512) + #if defined(VECTORS4) || defined(VECTORS2) if (Vals[7].x == 0) { - for (it.x = 0; it.x != 127; it.x++) { - if (!output[it.x]) { - output[it.x] = nonce.x; - output[127] = 1; - break; + // Unlikely event there is something here already ! + if (output[it]) { + for (it = 0; it < MAXBUFFERS; it++) { + if (!output[it]) + break; } } + output[it] = nonce.x; + output[MAXBUFFERS] = 1; } if (Vals[7].y == 0) { - for (it.y = 0; it.y != 127; it.y++) { - if (!output[it.y]) { - output[it.y] = nonce.y; - output[127] = 1; - break; + it += 512; + if (output[it]) { + for (it = 0; it < MAXBUFFERS; it++) { + if (!output[it]) + break; } } + output[it] = nonce.y; + output[MAXBUFFERS] = 1; } #ifdef VECTORS4 if (Vals[7].z == 0) { - for (it.z = 0; it.z != 127; it.z++) { - if (!output[it.z]) { - output[it.z] = nonce.z; - output[127] = 1; - break; + it += 1024; + if (output[it]) { + for (it = 0; it < MAXBUFFERS; it++) { + if (!output[it]) + break; } } + output[it] = nonce.z; + output[MAXBUFFERS] = 1; } if (Vals[7].w == 0) { - for (it.w = 0; it.w != 127; it.w++) { - if (!output[it.w]) { - output[it.w] = nonce.w; - output[127] = 1; - break; + it += 1536; + if (output[it]) { + for (it = 0; it < MAXBUFFERS; it++) { + if (!output[it]) + break; } } + output[it] = nonce.w; + output[MAXBUFFERS] = 1; } #endif #else if (Vals[7] == 0) { - for (it = 0; it != 127; it++) { - if (!output[it]) { - output[it] = nonce; - output[127] = 1; - break; + if (output[it]) { + for (it = 0; it < MAXBUFFERS; it++) { + if (!output[it]) + break; } } + output[it] = nonce; + output[MAXBUFFERS] = 1; } #endif } \ No newline at end of file