diff --git a/DiabloMiner120210.cl b/DiabloMiner120210.cl index 3025961a..3a5bcb82 100644 --- a/DiabloMiner120210.cl +++ b/DiabloMiner120210.cl @@ -25,7 +25,7 @@ typedef uint z; #define Zrotr(a, b) rotate((z)a, (z)b) #endif -#if BFIINT +#if BFI_INT #define ZCh(a, b, c) amd_bytealign(a, b, c) #define ZMa(a, b, c) amd_bytealign((c ^ a), (b), (a)) #else @@ -60,24 +60,8 @@ __kernel __attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) void search( z ZG[4]; z ZH[4]; - #ifdef USEBASE - uint noncebase = base + get_global_id(0); - #else - uint noncebase = get_global_id(0); - #endif + z Znonce = base + get_global_id(0); - #ifdef DOLOOPS - noncebase *= LOOPS; - #endif - - z Znonce = noncebase; - uintzz nonce = (uintzz)0; - - #ifdef DOLOOPS - uintzz loopout = 0; - - for(int i = 0; i < LOOPS; i++) { - #endif ZA[0] = PreVal4_plus_state0 + Znonce; ZB[0] = PreVal4_plus_T1 + Znonce; @@ -539,30 +523,33 @@ __kernel __attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) void search( ZF[2] = ZA[1] + ZH[0] + 0x8cc70208U + ZR15(ZH[1]) + ZR25(ZA[3]) + ZA[2] + ZC[2] + ZCh(ZE[2], ZB[2], ZF[1]) + ZR26(ZE[2]); ZG[2] = ZG[1] + ZF[1] + ZR26(ZF[2]) + ZCh(ZF[2], ZE[2], ZB[2]) + ZR15(ZD[2]) + ZH[2] + ZR25(ZH[3]) + ZA[3]; - bool Zio = any(ZG[2] == (z)0x136032EDU); - - bool io = false; - io = (Zio) ? Zio : io; - - nonce = Znonce; - - #ifdef DOLOOPS - loopout = (io) ? nonce : loopout; - - Znonce += (z)1; - } +#define FOUND (0x80) +#define NFLAG (0x7F) - nonce = loopout; - - bool io = any(nonce > (uintzz)0); - #endif - - #ifdef VSTORE - if(io) { vstorezz(nonce, 0, output); } - #else - if(io) { output[0] = (uintzz)nonce; } - #endif +#if defined(VECTORS4) + ZG[2] ^= 0x136032EDU; + bool result = ZG[2].x & ZG[2].y & ZG[2].z & ZG[2].w; + if (!result) { + if (!ZG[2].x) + output[FOUND] = output[NFLAG & Znonce.x] = Znonce.x; + if (!ZG[2].y) + output[FOUND] = output[NFLAG & Znonce.y] = Znonce.y; + if (!ZG[2].z) + output[FOUND] = output[NFLAG & Znonce.z] = Znonce.z; + if (!ZG[2].w) + output[FOUND] = output[NFLAG & Znonce.w] = Znonce.w; + } +#elif defined(VECTORS2) + ZG[2] ^= 0x136032EDU; + bool result = ZG[2].x & ZG[2].y; + if (!result) { + if (!ZG[2].x) + output[FOUND] = output[NFLAG & Znonce.x] = Znonce.x; + if (!ZG[2].y) + output[FOUND] = output[NFLAG & Znonce.y] = Znonce.y; + } +#else + if (ZG[2] == 0x136032EDU) + output[FOUND] = output[NFLAG & Znonce] = Znonce; +#endif } - -// vim: set ft=c - diff --git a/configure.ac b/configure.ac index b8a26b39..517559e2 100644 --- a/configure.ac +++ b/configure.ac @@ -296,6 +296,7 @@ AC_DEFINE_UNQUOTED([CGMINER_PREFIX], ["$prefix/bin"], [Path to cgminer install]) AC_DEFINE_UNQUOTED([PHATK_KERNNAME], ["phatk120203"], [Filename for phatk kernel]) AC_DEFINE_UNQUOTED([POCLBM_KERNNAME], ["poclbm120203"], [Filename for poclbm kernel]) AC_DEFINE_UNQUOTED([DIAKGCN_KERNNAME], ["diakgcn120208"], [Filename for diakgcn kernel]) +AC_DEFINE_UNQUOTED([DIABLO_KERNNAME], ["DiabloMiner120210"], [Filename for diablo kernel]) AC_SUBST(OPENCL_LIBS) diff --git a/device-gpu.c b/device-gpu.c index 7b219bc0..f0d7f875 100644 --- a/device-gpu.c +++ b/device-gpu.c @@ -806,6 +806,46 @@ static cl_int queue_diakgcn_kernel(_clState *clState, dev_blk_ctx *blk) return status; } +static cl_int queue_diablo_kernel(_clState *clState, dev_blk_ctx *blk) +{ + cl_kernel *kernel = &clState->kernel; + cl_int status = 0; + int num = 0; + + CL_SET_BLKARG(nonce); + CL_SET_BLKARG(PreVal0); + CL_SET_BLKARG(PreVal4_2); + CL_SET_BLKARG(PreW18); + CL_SET_BLKARG(PreW19); + CL_SET_BLKARG(W16); + CL_SET_BLKARG(W17); + CL_SET_BLKARG(PreW31); + CL_SET_BLKARG(PreW32); + + CL_SET_BLKARG(cty_d); + CL_SET_BLKARG(cty_b); + CL_SET_BLKARG(cty_c); + CL_SET_BLKARG(cty_h); + CL_SET_BLKARG(cty_f); + CL_SET_BLKARG(cty_g); + + CL_SET_BLKARG(C1addK5); + CL_SET_BLKARG(B1addK6); + + CL_SET_BLKARG(ctx_a); + CL_SET_BLKARG(ctx_b); + CL_SET_BLKARG(ctx_c); + CL_SET_BLKARG(ctx_d); + CL_SET_BLKARG(ctx_e); + CL_SET_BLKARG(ctx_f); + CL_SET_BLKARG(ctx_g); + CL_SET_BLKARG(ctx_h); + + CL_SET_ARG(clState->outputBuffer); + + return status; +} + static void set_threads_hashes(unsigned int vectors, unsigned int *threads, unsigned int *hashes, size_t *globalThreads, unsigned int minthreads, int intensity) @@ -957,12 +997,17 @@ static void opencl_detect() return; if (opt_kernel) { - if (strcmp(opt_kernel, "poclbm") && strcmp(opt_kernel, "phatk") && strcmp(opt_kernel, "diakgcn")) - quit(1, "Invalid kernel name specified - must be poclbm, phatk or diakgcn"); + if (strcmp(opt_kernel, "poclbm") && + strcmp(opt_kernel, "phatk") && + strcmp(opt_kernel, "diakgcn") && + strcmp(opt_kernel, "diablo")) + quit(1, "Invalid kernel name specified - must be poclbm, phatk, diakgcn or diablo"); if (!strcmp(opt_kernel, "diakgcn")) chosen_kernel = KL_DIAKGCN; else if (!strcmp(opt_kernel, "poclbm")) chosen_kernel = KL_POCLBM; + else if (!strcmp(opt_kernel, "diablo")) + chosen_kernel = KL_DIABLO; else chosen_kernel = KL_PHATK; } else @@ -1100,6 +1145,9 @@ static bool opencl_thread_init(struct thr_info *thr) case KL_DIAKGCN: thrdata->queue_kernel_parameters = &queue_diakgcn_kernel; break; + case KL_DIABLO: + thrdata->queue_kernel_parameters = &queue_diablo_kernel; + break; } thrdata->res = calloc(BUFFERSIZE, 1); @@ -1184,7 +1232,6 @@ static uint64_t opencl_scanhash(struct thr_info *thr, struct work *work, } set_threads_hashes(clState->preferred_vwidth, &threads, &hashes, globalThreads, localThreads[0], gpu->intensity); - status = thrdata->queue_kernel_parameters(clState, &work->blk); if (unlikely(status != CL_SUCCESS)) { applog(LOG_ERR, "Error: clSetKernelArg of all params failed."); diff --git a/miner.h b/miner.h index 0c11a326..42c0a16d 100644 --- a/miner.h +++ b/miner.h @@ -680,6 +680,7 @@ enum cl_kernels { KL_POCLBM, KL_PHATK, KL_DIAKGCN, + KL_DIABLO, }; extern void get_datestamp(char *, struct timeval *); diff --git a/ocl.c b/ocl.c index 3f9e29ce..677d6ca5 100644 --- a/ocl.c +++ b/ocl.c @@ -362,7 +362,7 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize) if (chosen_kernel == KL_NONE) { if (strstr(name, "Tahiti")) - clState->chosen_kernel = KL_DIAKGCN; + clState->chosen_kernel = KL_DIABLO; else if (!clState->hasBitAlign) clState->chosen_kernel = KL_POCLBM; else @@ -371,6 +371,10 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize) clState->chosen_kernel = chosen_kernel; switch (clState->chosen_kernel) { + case KL_DIABLO: + strcpy(filename, DIABLO_KERNNAME".cl"); + strcpy(binaryfilename, DIABLO_KERNNAME); + break; case KL_DIAKGCN: strcpy(filename, DIAKGCN_KERNNAME".cl"); strcpy(binaryfilename, DIAKGCN_KERNNAME);