Browse Source

Added host code required to utilize a custom Quark AMD binary.

tweaked
Wolf 9 years ago committed by elbandi
parent
commit
e5bd5f869d
  1. 1
      .gitignore
  2. 113
      algorithm.c
  3. 3
      algorithm.h
  4. 73
      driver-opencl.c
  5. 43
      ocl.c
  6. 2
      ocl.h

1
.gitignore vendored

@ -8,6 +8,7 @@ minerd.exe
autom4te.cache autom4te.cache
.deps .deps
m4
Makefile Makefile
Makefile.in Makefile.in

113
algorithm.c

@ -1007,6 +1007,108 @@ static cl_int queue_decred_kernel(_clState *clState, dev_blk_ctx *blk, __maybe_u
return status; return status;
} }
static cl_int queue_quarkcoin_kernel(struct __clState *clState, struct _dev_blk_ctx *blk, __maybe_unused cl_uint threads)
{
cl_kernel *kernel = &clState->kernel;
unsigned int num = 0;
cl_ulong le_target;
cl_int status = 0;
le_target = *(cl_ulong *)(blk->work->device_target + 24);
flip80(clState->cldata, blk->work->data);
status = clEnqueueWriteBuffer(clState->commandQueue, clState->CLbuffer0, true, 0, 80, clState->cldata, 0, NULL,NULL);
// search
CL_SET_ARG(clState->CLbuffer0);
CL_SET_ARG(clState->padbuffer8);
num = 0;
kernel = clState->extra_kernels;
for(int i = 0; i < 3; ++i, kernel++)
{
CL_SET_ARG(clState->padbuffer8);
CL_SET_ARG(clState->BranchBuffer[i << 1]);
CL_SET_ARG(clState->BranchBuffer[(i << 1) + 1]);
CL_SET_ARG(clState->GlobalThreadCount);
num = 0;
CL_NEXTKERNEL_SET_ARG(clState->padbuffer8);
CL_SET_ARG(clState->BranchBuffer[i << 1]);
if(i == 2)
{
CL_SET_ARG(clState->outputBuffer);
CL_SET_ARG(le_target);
}
num = 0;
CL_NEXTKERNEL_SET_ARG(clState->padbuffer8);
CL_SET_ARG(clState->BranchBuffer[(i << 1) + 1]);
if(i < 2)
{
CL_NEXTKERNEL_SET_ARG_0(clState->padbuffer8);
}
else
{
CL_SET_ARG(clState->outputBuffer);
CL_SET_ARG(le_target);
}
num = 0;
}
/*
// search1
CL_SET_ARG(clState->padbuffer8);
CL_SET_ARG(clState->Branch1Nonces);
CL_SET_ARG(clState->Branch2Nonces);
CL_SET_ARG(clState->GlobalThreadCount);
num = 0;
// search2
CL_NEXTKERNEL_SET_ARG(clState->padbuffer8);
CL_SET_ARG(clState->Branch1Nonces);
num = 0;
// search3
CL_NEXTKERNEL_SET_ARG(clState->padbuffer8);
CL_SET_ARG(clState->Branch2Nonces);
num = 0;
// search4
CL_NEXTKERNEL_SET_ARG_0(clState->padbuffer8);
// search5
CL_NEXTKERNEL_SET_ARG(clState->padbuffer8);
CL_SET_ARG(clState->Branch3Nonces);
CL_SET_ARG(clState->Branch4Nonces);
CL_SET_ARG(clState->GlobalThreadCount);
num = 0;
// search6
CL_NEXTKERNEL_SET_ARG(clState->padbuffer8);
CL_SET_ARG(clState->Branch3Nonces);
num = 0;
//search7
CL_NEXTKERNEL_SET_ARG(clState->padbuffer8);
CL_SET_ARG(clState->Branch4Nonces);
num = 0;
// search8
CL_NEXTKERNEL_SET_ARG_0(clState->padbuffer8);
// search9
CL_NEXTKERNEL_SET_ARG(clState->padbuffer8);
CL_SET_ARG(clState->Branch5Nonces);
CL_SET_ARG(clState->Branch6Nonces);
CL_SET_ARG(clState->GlobalThreadCount);
num = 0;
// search10
CL_NEXTKERNEL_SET_ARG(clState->padbuffer8);
CL_SET_ARG(clState->Branch5Nonces);
CL_SET_ARG(clState->outputBuffer);
CL_SET_ARG(le_target);
num = 0;
// search11
CL_NEXTKERNEL_SET_ARG(clState->padbuffer8);
CL_SET_ARG(clState->Branch6Nonces);
CL_SET_ARG(clState->outputBuffer);
CL_SET_ARG(le_target);
*/
return status;
}
static algorithm_settings_t algos[] = { static algorithm_settings_t algos[] = {
// kernels starting from this will have difficulty calculated by using litecoin algorithm // kernels starting from this will have difficulty calculated by using litecoin algorithm
#define A_SCRYPT(a) \ #define A_SCRYPT(a) \
@ -1051,13 +1153,10 @@ static algorithm_settings_t algos[] = {
#undef A_YESCRYPT_MULTI #undef A_YESCRYPT_MULTI
// kernels starting from this will have difficulty calculated by using quarkcoin algorithm // kernels starting from this will have difficulty calculated by using quarkcoin algorithm
#define A_QUARK(a, b) \ { "quarkcoin", ALGO_QUARK, "", 256, 256, 256, 0, 0, 0xFF, 0xFFFFFFULL, 0x0000ffffUL, 11, 8 * 16 * 4194304, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, quarkcoin_regenhash, NULL, NULL, queue_quarkcoin_kernel, gen_hash, append_x11_compiler_options },
{ a, ALGO_QUARK, "", 256, 256, 256, 0, 0, 0xFF, 0xFFFFFFULL, 0x0000ffffUL, 0, 0, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, b, NULL, NULL, queue_sph_kernel, gen_hash, append_x11_compiler_options } { "qubitcoin", ALGO_QUBIT, "", 256, 256, 256, 0, 0, 0xFF, 0xFFFFFFULL, 0x0000ffffUL, 0, 0, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, qubitcoin_regenhash, NULL, NULL, queue_sph_kernel, gen_hash, append_x11_compiler_options },
A_QUARK("quarkcoin", quarkcoin_regenhash), { "animecoin", ALGO_ANIME, "", 256, 256, 256, 0, 0, 0xFF, 0xFFFFFFULL, 0x0000ffffUL, 0, 0, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, animecoin_regenhash, NULL, NULL, queue_sph_kernel, gen_hash, append_x11_compiler_options },
A_QUARK("qubitcoin", qubitcoin_regenhash), { "sifcoin", ALGO_SIF, "", 256, 256, 256, 0, 0, 0xFF, 0xFFFFFFULL, 0x0000ffffUL, 0, 0, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, sifcoin_regenhash, NULL, NULL, queue_sph_kernel, gen_hash, append_x11_compiler_options },
A_QUARK("animecoin", animecoin_regenhash),
A_QUARK("sifcoin", sifcoin_regenhash),
#undef A_QUARK
// kernels starting from this will have difficulty calculated by using bitcoin algorithm // kernels starting from this will have difficulty calculated by using bitcoin algorithm
#define A_DARK(a, b) \ #define A_DARK(a, b) \

3
algorithm.h

@ -22,6 +22,9 @@ typedef enum {
ALGO_X15, ALGO_X15,
ALGO_KECCAK, ALGO_KECCAK,
ALGO_QUARK, ALGO_QUARK,
ALGO_QUBIT,
ALGO_ANIME,
ALGO_SIF,
ALGO_TWE, ALGO_TWE,
ALGO_FUGUE, ALGO_FUGUE,
ALGO_NIST, ALGO_NIST,

73
driver-opencl.c

@ -1425,21 +1425,86 @@ static int64_t opencl_scanhash(struct thr_info *thr, struct work *work,
if (clState->goffset) if (clState->goffset)
p_global_work_offset = (size_t *)&work->blk.nonce; p_global_work_offset = (size_t *)&work->blk.nonce;
status = clEnqueueNDRangeKernel(clState->commandQueue, clState->kernel, 1, p_global_work_offset, if (gpu->algorithm.type == ALGO_QUARK) {
globalThreads, localThreads, 0, NULL, NULL); cl_event WaitEvents[2];
cl_uint zero = 0;
for (int i = 0; i < 6; ++i) {
status = clEnqueueWriteBuffer(clState->commandQueue, clState->BranchBuffer[i], CL_TRUE, sizeof(cl_uint) * clState->GlobalThreadCount, sizeof(cl_uint), &zero, 0, NULL, NULL);
if (status != CL_SUCCESS) {
applog(LOG_ERR, "Error %d while attempting to nuke BranchBuffer %d.", status, i);
return -1;
}
}
for (int i = 0, x = 0; i < 9; ++i, ++x) {
size_t BranchCount0, BranchCount1;
status = clEnqueueNDRangeKernel(clState->commandQueue, ((i) ? clState->extra_kernels[i] : clState->kernel), 1, p_global_work_offset, globalThreads, localThreads, 0, NULL, WaitEvents);
if (status != CL_SUCCESS) {
if (i) applog(LOG_ERR, "Error %d while attempting to enqueue kernel with index %d.", status, i);
else applog(LOG_ERR, "Error %d while attempting to enqueue initial kernel.", status);
return -1;
}
clWaitForEvents(1, WaitEvents);
clReleaseEvent(WaitEvents[0]);
if (i) ++i;
status = clEnqueueNDRangeKernel(clState->commandQueue, clState->extra_kernels[i], 1, p_global_work_offset, globalThreads, localThreads, 0, NULL, WaitEvents);
if (status != CL_SUCCESS) {
applog(LOG_ERR, "Error %d while attempting to enqueue kernel with index %d.", status, i);
return -1;
}
clWaitForEvents(1, WaitEvents);
clReleaseEvent(WaitEvents[0]);
// Do a blocking read for the found counter in both buffers
// so we know how many threads to dispatch to each branch
status = clEnqueueReadBuffer(clState->commandQueue, clState->BranchBuffer[x << 1], CL_TRUE, sizeof(cl_uint) * globalThreads[0], sizeof(cl_uint), &BranchCount0, 0, NULL, NULL);
if (status != CL_SUCCESS) {
applog(LOG_ERR, "Error %d while attempting to read intermediate data from GPU.", status, i);
return -1;
}
BranchCount1 = globalThreads[0] - BranchCount0;
++i;
status = clEnqueueNDRangeKernel(clState->commandQueue, clState->extra_kernels[i], 1, p_global_work_offset, &BranchCount0, localThreads, 0, NULL, WaitEvents);
if (status != CL_SUCCESS) {
applog(LOG_ERR, "Error %d while attempting to enqueue kernel with index %d.", status, i);
return -1;
}
++i;
status = clEnqueueNDRangeKernel(clState->commandQueue, clState->extra_kernels[i], 1, p_global_work_offset, &BranchCount1, localThreads, 0, NULL, WaitEvents + 1);
if (status != CL_SUCCESS) {
applog(LOG_ERR, "Error %d while attempting to enqueue kernel with index %d.", status, i);
return -1;
}
clWaitForEvents(2, WaitEvents);
clReleaseEvent(WaitEvents[0]);
clReleaseEvent(WaitEvents[1]);
}
}
else {
status = clEnqueueNDRangeKernel(clState->commandQueue, clState->kernel, 1, p_global_work_offset, globalThreads, localThreads, 0, NULL, NULL);
if (unlikely(status != CL_SUCCESS)) { if (unlikely(status != CL_SUCCESS)) {
applog(LOG_ERR, "Error %d: Enqueueing kernel onto command queue. (clEnqueueNDRangeKernel)", status); applog(LOG_ERR, "Error %d: Enqueueing kernel onto command queue. (clEnqueueNDRangeKernel)", status);
return -1; return -1;
} }
for (i = 0; i < clState->n_extra_kernels; i++) { for (i = 0; i < clState->n_extra_kernels; i++) {
status = clEnqueueNDRangeKernel(clState->commandQueue, clState->extra_kernels[i], 1, p_global_work_offset, status = clEnqueueNDRangeKernel(clState->commandQueue, clState->extra_kernels[i], 1, p_global_work_offset, globalThreads, localThreads, 0, NULL, NULL);
globalThreads, localThreads, 0, NULL, NULL);
if (unlikely(status != CL_SUCCESS)) { if (unlikely(status != CL_SUCCESS)) {
applog(LOG_ERR, "Error %d: Enqueueing kernel onto command queue. (clEnqueueNDRangeKernel)", status); applog(LOG_ERR, "Error %d: Enqueueing kernel onto command queue. (clEnqueueNDRangeKernel)", status);
return -1; return -1;
} }
} }
}
status = clEnqueueReadBuffer(clState->commandQueue, clState->outputBuffer, CL_FALSE, 0, status = clEnqueueReadBuffer(clState->commandQueue, clState->outputBuffer, CL_FALSE, 0,
buffersize, thrdata->res, 0, NULL, NULL); buffersize, thrdata->res, 0, NULL, NULL);

43
ocl.c

@ -180,6 +180,35 @@ static cl_int create_opencl_command_queue(cl_command_queue *command_queue, cl_co
return status; return status;
} }
// Copied from set_threads_hashes() in driver-opencl.c
static size_t CalcGlobalThreads(unsigned int compute_shaders, unsigned int minthreads, __maybe_unused int *intensity, __maybe_unused int *xintensity, __maybe_unused int *rawintensity, algorithm_t *algorithm)
{
size_t threads = 0;
while (threads < minthreads) {
if (*rawintensity > 0) {
threads = *rawintensity;
}
else if (*xintensity > 0) {
threads = compute_shaders * ((algorithm->xintensity_shift)?(1 << (algorithm->xintensity_shift + *xintensity)):*xintensity);
}
else {
threads = 1 << (algorithm->intensity_shift + *intensity);
}
if (threads < minthreads) {
if (likely(*intensity < MAX_INTENSITY)) {
(*intensity)++;
}
else {
threads = minthreads;
}
}
}
return(threads);
}
_clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *algorithm) _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *algorithm)
{ {
cl_int status = 0; cl_int status = 0;
@ -837,7 +866,19 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg
applog(LOG_WARNING, "Your settings come to %lu", (unsigned long)bufsize); applog(LOG_WARNING, "Your settings come to %lu", (unsigned long)bufsize);
} }
if (algorithm->type == ALGO_YESCRYPT || algorithm->type == ALGO_YESCRYPT_MULTI) { // By the way, if you change the intensity between now and opencl_scanhash()
// calculating the global thread count, God help you.
if (algorithm->type == ALGO_QUARK) {
clState->GlobalThreadCount = CalcGlobalThreads(clState->compute_shaders, clState->wsize, &cgpu->intensity, &cgpu->xintensity, &cgpu->rawintensity, &cgpu->algorithm);
for (int i = 0; i < 6; ++i) {
clState->BranchBuffer[i] = clCreateBuffer(clState->context, CL_MEM_READ_WRITE, sizeof(cl_uint) * (clState->GlobalThreadCount + 2), NULL, &status);
if (status != CL_SUCCESS && !clState->BranchBuffer[i]) {
applog(LOG_ERR, "Error %d while creating BranchBuffer %d.", status, i);
return NULL;
}
}
}
else if (algorithm->type == ALGO_YESCRYPT || algorithm->type == ALGO_YESCRYPT_MULTI) {
// need additionnal buffers // need additionnal buffers
clState->buffer1 = clCreateBuffer(clState->context, CL_MEM_READ_WRITE, buf1size, NULL, &status); clState->buffer1 = clCreateBuffer(clState->context, CL_MEM_READ_WRITE, buf1size, NULL, &status);
if (status != CL_SUCCESS && !clState->buffer1) { if (status != CL_SUCCESS && !clState->buffer1) {

2
ocl.h

@ -13,6 +13,8 @@ typedef struct __clState {
cl_mem outputBuffer; cl_mem outputBuffer;
cl_mem CLbuffer0; cl_mem CLbuffer0;
cl_mem MidstateBuf; cl_mem MidstateBuf;
cl_mem BranchBuffer[6];
size_t GlobalThreadCount;
cl_mem padbuffer8; cl_mem padbuffer8;
cl_mem buffer1; cl_mem buffer1;
cl_mem buffer2; cl_mem buffer2;

Loading…
Cancel
Save