From 94c8be0574ddd8bd91db39c72df94f2ce9b25e53 Mon Sep 17 00:00:00 2001 From: Wolf Date: Fri, 20 May 2016 20:29:06 -0500 Subject: [PATCH] Added host code required to utilize a custom Lyra2REv2 AMD binary. --- algorithm.c | 49 +++++++++++++------------ ocl.c | 104 ---------------------------------------------------- 2 files changed, 25 insertions(+), 128 deletions(-) diff --git a/algorithm.c b/algorithm.c index aab68d95..90775e8c 100644 --- a/algorithm.c +++ b/algorithm.c @@ -880,48 +880,49 @@ static cl_int queue_lyra2rev2_kernel(struct __clState *clState, struct _dev_blk_ unsigned int num; cl_int status = 0; cl_ulong le_target; + uint32_t buf[11]; - // le_target = *(cl_uint *)(blk->work->device_target + 28); le_target = *(cl_ulong *)(blk->work->device_target + 24); - flip80(clState->cldata, blk->work->data); - status = clEnqueueWriteBuffer(clState->commandQueue, clState->CLbuffer0, true, 0, 80, clState->cldata, 0, NULL, NULL); // blake - search kernel = &clState->kernel; num = 0; - // CL_SET_ARG(clState->CLbuffer0); - CL_SET_ARG(clState->buffer1); - CL_SET_ARG(blk->work->blk.ctx_a); - CL_SET_ARG(blk->work->blk.ctx_b); - CL_SET_ARG(blk->work->blk.ctx_c); - CL_SET_ARG(blk->work->blk.ctx_d); - CL_SET_ARG(blk->work->blk.ctx_e); - CL_SET_ARG(blk->work->blk.ctx_f); - CL_SET_ARG(blk->work->blk.ctx_g); - CL_SET_ARG(blk->work->blk.ctx_h); - CL_SET_ARG(blk->work->blk.cty_a); - CL_SET_ARG(blk->work->blk.cty_b); - CL_SET_ARG(blk->work->blk.cty_c); + + buf[0] = blk->work->blk.ctx_a; + buf[1] = blk->work->blk.ctx_b; + buf[2] = blk->work->blk.ctx_c; + buf[3] = blk->work->blk.ctx_d; + buf[4] = blk->work->blk.ctx_e; + buf[5] = blk->work->blk.ctx_f; + buf[6] = blk->work->blk.ctx_g; + buf[7] = blk->work->blk.ctx_h; + buf[8] = blk->work->blk.cty_a; + buf[9] = blk->work->blk.cty_b; + buf[10] = blk->work->blk.cty_c; + + status = clEnqueueWriteBuffer(clState->commandQueue, clState->CLbuffer0, true, 0, 44, buf, 0, NULL, NULL); + + CL_SET_ARG(clState->CLbuffer0); + CL_SET_ARG(clState->padbuffer8); // keccak - search1 kernel = clState->extra_kernels; - CL_SET_ARG_0(clState->buffer1); + CL_SET_ARG_0(clState->padbuffer8); // cubehash - search2 num = 0; - CL_NEXTKERNEL_SET_ARG_0(clState->buffer1); + CL_NEXTKERNEL_SET_ARG_0(clState->padbuffer8); // lyra - search3 num = 0; - CL_NEXTKERNEL_SET_ARG_N(0, clState->buffer1); - CL_SET_ARG_N(1, clState->padbuffer8); + CL_NEXTKERNEL_SET_ARG_0(clState->padbuffer8); // skein -search4 num = 0; - CL_NEXTKERNEL_SET_ARG_0(clState->buffer1); + CL_NEXTKERNEL_SET_ARG_0(clState->padbuffer8); // cubehash - search5 num = 0; - CL_NEXTKERNEL_SET_ARG_0(clState->buffer1); + CL_NEXTKERNEL_SET_ARG_0(clState->padbuffer8); // bmw - search6 num = 0; - CL_NEXTKERNEL_SET_ARG(clState->buffer1); + CL_NEXTKERNEL_SET_ARG(clState->padbuffer8); CL_SET_ARG(clState->outputBuffer); CL_SET_ARG(le_target); @@ -1258,7 +1259,7 @@ static algorithm_settings_t algos[] = { { "fresh", ALGO_FRESH, "", 1, 256, 256, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 4, 4 * 16 * 4194304, 0, fresh_regenhash, NULL, NULL, queue_fresh_kernel, gen_hash, NULL }, { "lyra2re", ALGO_LYRA2RE, "", 1, 128, 128, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 4, 2 * 8 * 4194304, 0, lyra2re_regenhash, blake256_midstate, blake256_prepare_work, queue_lyra2re_kernel, gen_hash, NULL }, - { "lyra2rev2", ALGO_LYRA2REV2, "", 1, 256, 256, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 6, -1, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, lyra2rev2_regenhash, blake256_midstate, blake256_prepare_work, queue_lyra2rev2_kernel, gen_hash, append_neoscrypt_compiler_options }, + { "lyra2rev2", ALGO_LYRA2REV2, "", 1, 256, 256, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 6, 2 * 8 * 4194304, 0, lyra2rev2_regenhash, blake256_midstate, blake256_prepare_work, queue_lyra2rev2_kernel, gen_hash, NULL }, // kernels starting from this will have difficulty calculated by using fuguecoin algorithm #define A_FUGUE(a, b, c) \ diff --git a/ocl.c b/ocl.c index 8eb83c78..e1a6b500 100644 --- a/ocl.c +++ b/ocl.c @@ -612,90 +612,6 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg applog(LOG_DEBUG, "GPU %d: computing max. global thread count to %u", gpu, (unsigned)(cgpu->thread_concurrency)); } - - // Lyra2re v2 TC - else if (cgpu->algorithm.type == ALGO_LYRA2REV2 && !cgpu->opt_tc) { - size_t glob_thread_count; - long max_int; - unsigned char type = 0; - - // determine which intensity type to use - if (cgpu->rawintensity > 0) { - glob_thread_count = cgpu->rawintensity; - max_int = glob_thread_count; - type = 2; - } - else if (cgpu->xintensity > 0) { - glob_thread_count = clState->compute_shaders * ((cgpu->algorithm.xintensity_shift) ? (1UL << (cgpu->algorithm.xintensity_shift + cgpu->xintensity)) : cgpu->xintensity); - max_int = cgpu->xintensity; - type = 1; - } - else { - glob_thread_count = 1UL << (cgpu->algorithm.intensity_shift + cgpu->intensity); - max_int = ((cgpu->dynamic) ? MAX_INTENSITY : cgpu->intensity); - } - - glob_thread_count = ((glob_thread_count < cgpu->work_size) ? cgpu->work_size : glob_thread_count); - - // if TC * scratchbuf size is too big for memory... reduce to max - if ((glob_thread_count * LYRA_SCRATCHBUF_SIZE) >= (uint64_t)cgpu->max_alloc) { - - /* Selected intensity will not run on this GPU. Not enough memory. - * Adapt the memory setting. */ - // depending on intensity type used, reduce the intensity until it fits into the GPU max_alloc - switch (type) { - //raw intensity - case 2: - while ((glob_thread_count * LYRA_SCRATCHBUF_SIZE) > (uint64_t)cgpu->max_alloc) { - --glob_thread_count; - } - - max_int = glob_thread_count; - cgpu->rawintensity = glob_thread_count; - break; - - //x intensity - case 1: - glob_thread_count = cgpu->max_alloc / LYRA_SCRATCHBUF_SIZE; - max_int = glob_thread_count / clState->compute_shaders; - - while (max_int && ((clState->compute_shaders * (1UL << max_int)) > glob_thread_count)) { - --max_int; - } - - /* Check if max_intensity is >0. */ - if (max_int < MIN_XINTENSITY) { - applog(LOG_ERR, "GPU %d: Max xintensity is below minimum.", gpu); - max_int = MIN_XINTENSITY; - } - - cgpu->xintensity = max_int; - glob_thread_count = clState->compute_shaders * (1UL << max_int); - break; - - default: - glob_thread_count = cgpu->max_alloc / LYRA_SCRATCHBUF_SIZE; - while (max_int && ((1UL << max_int) & glob_thread_count) == 0) { - --max_int; - } - - /* Check if max_intensity is >0. */ - if (max_int < MIN_INTENSITY) { - applog(LOG_ERR, "GPU %d: Max intensity is below minimum.", gpu); - max_int = MIN_INTENSITY; - } - - cgpu->intensity = max_int; - glob_thread_count = 1UL << max_int; - break; - } - } - - // TC is glob thread count - cgpu->thread_concurrency = glob_thread_count; - - applog(LOG_DEBUG, "GPU %d: computing max. global thread count to %u", gpu, (unsigned)(cgpu->thread_concurrency)); - } else if (!cgpu->opt_tc) { unsigned int sixtyfours; @@ -827,18 +743,6 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg applog(LOG_DEBUG, "yescrypt buffer sizes: %lu RW, %lu R", (unsigned long)bufsize, (unsigned long)readbufsize); // scrypt/n-scrypt } - else if (algorithm->type == ALGO_LYRA2REV2) { - /* The scratch/pad-buffer needs 32kBytes memory per thread. */ - bufsize = LYRA_SCRATCHBUF_SIZE * cgpu->thread_concurrency; - buf1size = 4* 8 * cgpu->thread_concurrency; //matrix - - /* This is the input buffer. For yescrypt this is guaranteed to be - * 80 bytes only. */ - readbufsize = 80; - - applog(LOG_DEBUG, "lyra2REv2 buffer sizes: %lu RW, %lu RW", (unsigned long)bufsize, (unsigned long)buf1size); - // scrypt/n-scrypt - } else { size_t ipt = (algorithm->n / cgpu->lookup_gap + (algorithm->n % cgpu->lookup_gap > 0)); bufsize = 128 * ipt * cgpu->thread_concurrency; @@ -904,14 +808,6 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg return NULL; } } - else if (algorithm->type == ALGO_LYRA2REV2) { - // need additionnal buffers - clState->buffer1 = clCreateBuffer(clState->context, CL_MEM_READ_WRITE, buf1size, NULL, &status); - if (status != CL_SUCCESS && !clState->buffer1) { - applog(LOG_DEBUG, "Error %d: clCreateBuffer (buffer1), decrease TC or increase LG", status); - return NULL; - } - } else { clState->buffer1 = clCreateBuffer(clState->context, CL_MEM_READ_WRITE, bufsize, NULL, &status); // we don't need that much just tired... if (status != CL_SUCCESS && !clState->buffer1) {