diff --git a/configure.ac b/configure.ac index ff3df97c..18490818 100644 --- a/configure.ac +++ b/configure.ac @@ -427,7 +427,7 @@ AC_DEFINE_UNQUOTED([PHATK_KERNNAME], ["phatk121016"], [Filename for phatk kernel AC_DEFINE_UNQUOTED([POCLBM_KERNNAME], ["poclbm130302"], [Filename for poclbm kernel]) AC_DEFINE_UNQUOTED([DIAKGCN_KERNNAME], ["diakgcn121016"], [Filename for diakgcn kernel]) AC_DEFINE_UNQUOTED([DIABLO_KERNNAME], ["diablo130302"], [Filename for diablo kernel]) -AC_DEFINE_UNQUOTED([SCRYPT_KERNNAME], ["scrypt130302"], [Filename for scrypt kernel]) +AC_DEFINE_UNQUOTED([SCRYPT_KERNNAME], ["scrypt130511"], [Filename for scrypt kernel]) AC_SUBST(OPENCL_LIBS) diff --git a/driver-opencl.c b/driver-opencl.c index acd5f221..8168aa95 100644 --- a/driver-opencl.c +++ b/driver-opencl.c @@ -1325,9 +1325,10 @@ static bool opencl_thread_prepare(struct thr_info *thr) int virtual_gpu = cgpu->virtual_gpu; int i = thr->id; static bool failmessage = false; + int buffersize = opt_scrypt ? SCRYPT_BUFFERSIZE : BUFFERSIZE; if (!blank_res) - blank_res = calloc(BUFFERSIZE, 1); + blank_res = calloc(buffersize, 1); if (!blank_res) { applog(LOG_ERR, "Failed to calloc in opencl_thread_init"); return false; @@ -1406,6 +1407,7 @@ static bool opencl_thread_init(struct thr_info *thr) cl_int status = 0; thrdata = calloc(1, sizeof(*thrdata)); thr->cgpu_data = thrdata; + int buffersize = opt_scrypt ? SCRYPT_BUFFERSIZE : BUFFERSIZE; if (!thrdata) { applog(LOG_ERR, "Failed to calloc in opencl_thread_init"); @@ -1433,7 +1435,7 @@ static bool opencl_thread_init(struct thr_info *thr) break; } - thrdata->res = calloc(BUFFERSIZE, 1); + thrdata->res = calloc(buffersize, 1); if (!thrdata->res) { free(thrdata); @@ -1442,7 +1444,7 @@ static bool opencl_thread_init(struct thr_info *thr) } status |= clEnqueueWriteBuffer(clState->commandQueue, clState->outputBuffer, CL_TRUE, 0, - BUFFERSIZE, blank_res, 0, NULL, NULL); + buffersize, blank_res, 0, NULL, NULL); if (unlikely(status != CL_SUCCESS)) { applog(LOG_ERR, "Error: clEnqueueWriteBuffer failed."); return false; @@ -1483,6 +1485,8 @@ static int64_t opencl_scanhash(struct thr_info *thr, struct work *work, size_t globalThreads[1]; size_t localThreads[1] = { clState->wsize }; int64_t hashes; + int found = opt_scrypt ? SCRYPT_FOUND : FOUND; + int buffersize = opt_scrypt ? SCRYPT_BUFFERSIZE : BUFFERSIZE; /* Windows' timer resolution is only 15ms so oversample 5x */ if (gpu->dynamic && (++gpu->intervals * dynamic_us) > 70000) { @@ -1527,7 +1531,7 @@ static int64_t opencl_scanhash(struct thr_info *thr, struct work *work, } status = clEnqueueReadBuffer(clState->commandQueue, clState->outputBuffer, CL_FALSE, 0, - BUFFERSIZE, thrdata->res, 0, NULL, NULL); + buffersize, thrdata->res, 0, NULL, NULL); if (unlikely(status != CL_SUCCESS)) { applog(LOG_ERR, "Error: clEnqueueReadBuffer failed error %d. (clEnqueueReadBuffer)", status); return -1; @@ -1542,17 +1546,17 @@ static int64_t opencl_scanhash(struct thr_info *thr, struct work *work, clFinish(clState->commandQueue); /* FOUND entry is used as a counter to say how many nonces exist */ - if (thrdata->res[FOUND]) { + if (thrdata->res[found]) { /* Clear the buffer again */ status = clEnqueueWriteBuffer(clState->commandQueue, clState->outputBuffer, CL_FALSE, 0, - BUFFERSIZE, blank_res, 0, NULL, NULL); + buffersize, blank_res, 0, NULL, NULL); if (unlikely(status != CL_SUCCESS)) { applog(LOG_ERR, "Error: clEnqueueWriteBuffer failed."); return -1; } applog(LOG_DEBUG, "GPU %d found something?", gpu->device_id); postcalc_hash_async(thr, work, thrdata->res); - memset(thrdata->res, 0, BUFFERSIZE); + memset(thrdata->res, 0, buffersize); /* This finish flushes the writebuffer set with CL_FALSE in clEnqueueWriteBuffer */ clFinish(clState->commandQueue); } diff --git a/findnonce.c b/findnonce.c index 2f9d27a7..baa652f1 100644 --- a/findnonce.c +++ b/findnonce.c @@ -174,7 +174,7 @@ void precalc_hash(dev_blk_ctx *blk, uint32_t *state, uint32_t *data) struct pc_data { struct thr_info *thr; struct work *work; - uint32_t res[MAXBUFFERS]; + uint32_t res[SCRYPT_MAXBUFFERS]; pthread_t pth; int found; }; @@ -184,20 +184,21 @@ static void *postcalc_hash(void *userdata) struct pc_data *pcd = (struct pc_data *)userdata; struct thr_info *thr = pcd->thr; unsigned int entry = 0; + int found = opt_scrypt ? SCRYPT_FOUND : FOUND; pthread_detach(pthread_self()); /* To prevent corrupt values in FOUND from trying to read beyond the * end of the res[] array */ - if (unlikely(pcd->res[FOUND] & ~FOUND)) { + if (unlikely(pcd->res[found] & ~found)) { applog(LOG_WARNING, "%s%d: invalid nonce count - HW error", thr->cgpu->drv->name, thr->cgpu->device_id); hw_errors++; thr->cgpu->hw_errors++; - pcd->res[FOUND] &= FOUND; + pcd->res[found] &= found; } - for (entry = 0; entry < pcd->res[FOUND]; entry++) { + for (entry = 0; entry < pcd->res[found]; entry++) { uint32_t nonce = pcd->res[entry]; applog(LOG_DEBUG, "OCL NONCE %u found in slot %d", nonce, entry); @@ -213,6 +214,8 @@ static void *postcalc_hash(void *userdata) void postcalc_hash_async(struct thr_info *thr, struct work *work, uint32_t *res) { struct pc_data *pcd = malloc(sizeof(struct pc_data)); + int buffersize; + if (unlikely(!pcd)) { applog(LOG_ERR, "Failed to malloc pc_data in postcalc_hash_async"); return; @@ -220,7 +223,8 @@ void postcalc_hash_async(struct thr_info *thr, struct work *work, uint32_t *res) pcd->thr = thr; pcd->work = copy_work(work); - memcpy(&pcd->res, res, BUFFERSIZE); + buffersize = opt_scrypt ? SCRYPT_BUFFERSIZE : BUFFERSIZE; + memcpy(&pcd->res, res, buffersize); if (pthread_create(&pcd->pth, NULL, postcalc_hash, (void *)pcd)) { applog(LOG_ERR, "Failed to create postcalc_hash thread"); diff --git a/findnonce.h b/findnonce.h index 610f6f8d..fc5a157f 100644 --- a/findnonce.h +++ b/findnonce.h @@ -8,6 +8,10 @@ #define BUFFERSIZE (sizeof(uint32_t) * MAXBUFFERS) #define FOUND (0x0F) +#define SCRYPT_MAXBUFFERS (0x100) +#define SCRYPT_BUFFERSIZE (sizeof(uint32_t) * SCRYPT_MAXBUFFERS) +#define SCRYPT_FOUND (0xFF) + #ifdef HAVE_OPENCL extern void precalc_hash(dev_blk_ctx *blk, uint32_t *state, uint32_t *data); extern void postcalc_hash_async(struct thr_info *thr, struct work *work, uint32_t *res); diff --git a/ocl.c b/ocl.c index 8709f4f0..4a1724df 100644 --- a/ocl.c +++ b/ocl.c @@ -826,7 +826,8 @@ built: applog(LOG_ERR, "Error %d: clCreateBuffer (CLbuffer0)", status); return NULL; } - } + clState->outputBuffer = clCreateBuffer(clState->context, CL_MEM_WRITE_ONLY, SCRYPT_BUFFERSIZE, NULL, &status); + } else #endif clState->outputBuffer = clCreateBuffer(clState->context, CL_MEM_WRITE_ONLY, BUFFERSIZE, NULL, &status); if (status != CL_SUCCESS) { diff --git a/scrypt130302.cl b/scrypt130511.cl similarity index 96% rename from scrypt130302.cl rename to scrypt130511.cl index 6979458d..5ae0304d 100644 --- a/scrypt130302.cl +++ b/scrypt130511.cl @@ -808,8 +808,8 @@ void scrypt_core(uint4 X[8], __global uint4*restrict lookup) unshittify(X); } -#define FOUND (0x0F) -#define SETFOUND(Xnonce) output[output[FOUND]++] = Xnonce +#define SCRYPT_FOUND (0xFF) +#define SETFOUND(Xnonce) output[output[SCRYPT_FOUND]++] = Xnonce __attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) __kernel void search(__global const uint4 * restrict input,