diff --git a/cpu-miner.c b/cpu-miner.c index e6d7d419..7fb9c461 100644 --- a/cpu-miner.c +++ b/cpu-miner.c @@ -738,12 +738,52 @@ enum { static _clState *clStates[16]; +/* queue kernel parameter */ +static inline int qkp(cl_kernel *kernel, void *param, int param_num) +{ + return clSetKernelArg(*kernel, param_num, sizeof(param), param); +} + +static inline cl_int queue_kernel_parameters(dev_blk_ctx *blk, cl_kernel *kernel, + struct _cl_mem *output) +{ + cl_int status = 0; + + status |= qkp(kernel, (void *)&blk->ctx_a, 0); + status |= qkp(kernel, (void *)&blk->ctx_b, 1); + status |= qkp(kernel, (void *)&blk->ctx_c, 2); + status |= qkp(kernel, (void *)&blk->ctx_d, 3); + status |= qkp(kernel, (void *)&blk->ctx_e, 4); + status |= qkp(kernel, (void *)&blk->ctx_f, 5); + status |= qkp(kernel, (void *)&blk->ctx_g, 6); + status |= qkp(kernel, (void *)&blk->ctx_h, 7); + status |= qkp(kernel, (void *)&blk->cty_b, 8); + status |= qkp(kernel, (void *)&blk->cty_c, 9); + status |= qkp(kernel, (void *)&blk->cty_d, 10); + status |= qkp(kernel, (void *)&blk->cty_f, 11); + status |= qkp(kernel, (void *)&blk->cty_g, 12); + status |= qkp(kernel, (void *)&blk->cty_h, 13); + status |= qkp(kernel, (void *)&blk->nonce, 14); + status |= qkp(kernel, (void *)&blk->fW0, 15); + status |= qkp(kernel, (void *)&blk->fW1, 16); + status |= qkp(kernel, (void *)&blk->fW2, 17); + status |= qkp(kernel, (void *)&blk->fW3, 18); + status |= qkp(kernel, (void *)&blk->fW15, 19); + status |= qkp(kernel, (void *)&blk->fW01r, 20); + status |= qkp(kernel, (void *)&blk->fcty_e, 21); + status |= qkp(kernel, (void *)&blk->fcty_e2, 22); + status |= qkp(kernel, (void *)output, 23); + + return status; +} + static void *gpuminer_thread(void *userdata) { struct thr_info *mythr = userdata; struct timeval tv_start; int thr_id = mythr->id; uint32_t res[128], blank_res[128]; + cl_kernel *kernel; setpriority(PRIO_PROCESS, 0, 19); @@ -755,14 +795,7 @@ static void *gpuminer_thread(void *userdata) cl_int status; _clState *clState = clStates[thr_id]; - - status = clSetKernelArg(clState->kernel, 0, sizeof(cl_mem), (void *)&clState->inputBuffer); - if (unlikely(status != CL_SUCCESS)) - { applog(LOG_ERR, "Error: Setting kernel argument 1.\n"); goto out; } - - status = clSetKernelArg(clState->kernel, 1, sizeof(cl_mem), (void *)&clState->outputBuffer); - if (unlikely(status != CL_SUCCESS)) - { applog(LOG_ERR, "Error: Setting kernel argument 2.\n"); goto out; } + kernel = &clState->kernel; status = clEnqueueWriteBuffer(clState->commandQueue, clState->outputBuffer, CL_TRUE, 0, BUFFERSIZE, blank_res, 0, NULL, NULL); @@ -791,10 +824,9 @@ static void *gpuminer_thread(void *userdata) precalc_hash(&work->blk, (uint32_t *)(work->midstate), (uint32_t *)(work->data + 64)); work->blk.nonce = 0; - status = clEnqueueWriteBuffer(clState->commandQueue, clState->inputBuffer, CL_FALSE, 0, - sizeof(dev_blk_ctx), (void *)&work->blk, 0, NULL, NULL); + status = queue_kernel_parameters(&work->blk, kernel, clState->outputBuffer); if (unlikely(status != CL_SUCCESS)) - { applog(LOG_ERR, "Error: clEnqueueWriteBuffer failed."); goto out; } + { applog(LOG_ERR, "Error: clSetKernelArg failed."); exit (1); } work_restart[thr_id].restart = 0; need_work = false; @@ -805,7 +837,7 @@ static void *gpuminer_thread(void *userdata) } clFinish(clState->commandQueue); - status = clEnqueueNDRangeKernel(clState->commandQueue, clState->kernel, 1, NULL, + status = clEnqueueNDRangeKernel(clState->commandQueue, *kernel, 1, NULL, globalThreads, localThreads, 0, NULL, NULL); if (unlikely(status != CL_SUCCESS)) { applog(LOG_ERR, "Error: Enqueueing kernel onto command queue. (clEnqueueNDRangeKernel)"); goto out; } @@ -844,12 +876,9 @@ static void *gpuminer_thread(void *userdata) need_work = true; clFinish(clState->commandQueue); - - status = clEnqueueWriteBuffer(clState->commandQueue, clState->inputBuffer, CL_FALSE, 0, - sizeof(dev_blk_ctx), (void *)&work->blk, 0, NULL, NULL); + status = qkp(kernel, (void *)&work->blk.nonce, 14); if (unlikely(status != CL_SUCCESS)) - { applog(LOG_ERR, "Error: clEnqueueWriteBuffer failed."); goto out; } - + { applog(LOG_ERR, "Error: clSetKernelArg failed."); goto out; } } out: tq_freeze(mythr->q); diff --git a/ocl.c b/ocl.c index 326f303d..7cfb4000 100644 --- a/ocl.c +++ b/ocl.c @@ -374,7 +374,7 @@ _clState *initCl(int gpu, char *name, size_t nameSize) { } /* get a kernel object handle for a kernel with the given name */ - clState->kernel = clCreateKernel(clState->program, "oclminer", &status); + clState->kernel = clCreateKernel(clState->program, "search", &status); if(status != CL_SUCCESS) { printf("Error: Creating Kernel from program. (clCreateKernel)\n"); @@ -391,12 +391,6 @@ _clState *initCl(int gpu, char *name, size_t nameSize) { return NULL; } - clState->inputBuffer = clCreateBuffer(clState->context, CL_MEM_READ_ONLY, sizeof(dev_blk_ctx), NULL, &status); - if(status != CL_SUCCESS) { - printf("Error: clCreateBuffer (inputBuffer)\n"); - return NULL; - } - clState->outputBuffer = clCreateBuffer(clState->context, CL_MEM_READ_WRITE, sizeof(uint32_t) * 128, NULL, &status); if(status != CL_SUCCESS) { printf("Error: clCreateBuffer (outputBuffer)\n"); diff --git a/ocl.h b/ocl.h index 68c16702..eacb33c9 100644 --- a/ocl.h +++ b/ocl.h @@ -11,7 +11,6 @@ typedef struct { cl_kernel kernel; cl_command_queue commandQueue; cl_program program; - cl_mem inputBuffer; cl_mem outputBuffer; } _clState; diff --git a/oclminer.cl b/oclminer.cl index 54fd1a0d..209e4b1b 100644 --- a/oclminer.cl +++ b/oclminer.cl @@ -57,38 +57,21 @@ uint fcty_e; uint fcty_e2; } dev_blk_ctx; -__kernel __attribute__((vec_type_hint(uint))) WGS void oclminer( - __constant dev_blk_ctx *ctx, __global uint *output) +__kernel __attribute__((vec_type_hint(uint))) WGS void search( +const uint state0, const uint state1, const uint state2, const uint state3, +const uint state4, const uint state5, const uint state6, const uint state7, +const uint B1, const uint C1, const uint D1, +const uint F1, const uint G1, const uint H1, +const uint base, +const uint fW0, const uint fW1, const uint fW2, const uint fW3, const uint fW15, const uint fW01r, const uint fcty_e, const uint fcty_e2, +__global uint *output) { - const uint fW0 = ctx->fW0; - const uint fW1 = ctx->fW1; - const uint fW2 = ctx->fW2; - const uint fW3 = ctx->fW3; - const uint fW15 = ctx->fW15; - const uint fW01r = ctx->fW01r; - const uint fcty_e = ctx->fcty_e; - const uint fcty_e2 = ctx->fcty_e2; - const uint state0 = ctx->ctx_a; - const uint state1 = ctx->ctx_b; - const uint state2 = ctx->ctx_c; - const uint state3 = ctx->ctx_d; - const uint state4 = ctx->ctx_e; - const uint state5 = ctx->ctx_f; - const uint state6 = ctx->ctx_g; - const uint state7 = ctx->ctx_h; - const uint B1 = ctx->cty_b; - const uint C1 = ctx->cty_c; - const uint D1 = ctx->cty_d; - const uint F1 = ctx->cty_f; - const uint G1 = ctx->cty_g; - const uint H1 = ctx->cty_h; - uint A, B, C, D, E, F, G, H; uint W0, W1, W2, W3, W4, W5, W6, W7, W8, W9, W10, W11, W12, W13, W14, W15; uint it; const uint myid = get_global_id(0); - const uint tnonce = ctx->nonce + myid; + const uint tnonce = base + myid; W3 = 0 ^ tnonce; E = fcty_e + W3;