mirror of
https://github.com/GOSTSec/sgminer
synced 2025-03-13 06:01:03 +00:00
Added host code required to utilize a custom Quark AMD binary.
This commit is contained in:
parent
12fbc94288
commit
e5bd5f869d
1
.gitignore
vendored
1
.gitignore
vendored
@ -8,6 +8,7 @@ minerd.exe
|
||||
|
||||
autom4te.cache
|
||||
.deps
|
||||
m4
|
||||
|
||||
Makefile
|
||||
Makefile.in
|
||||
|
113
algorithm.c
113
algorithm.c
@ -1007,6 +1007,108 @@ static cl_int queue_decred_kernel(_clState *clState, dev_blk_ctx *blk, __maybe_u
|
||||
return status;
|
||||
}
|
||||
|
||||
static cl_int queue_quarkcoin_kernel(struct __clState *clState, struct _dev_blk_ctx *blk, __maybe_unused cl_uint threads)
|
||||
{
|
||||
cl_kernel *kernel = &clState->kernel;
|
||||
unsigned int num = 0;
|
||||
cl_ulong le_target;
|
||||
cl_int status = 0;
|
||||
le_target = *(cl_ulong *)(blk->work->device_target + 24);
|
||||
flip80(clState->cldata, blk->work->data);
|
||||
status = clEnqueueWriteBuffer(clState->commandQueue, clState->CLbuffer0, true, 0, 80, clState->cldata, 0, NULL,NULL);
|
||||
// search
|
||||
CL_SET_ARG(clState->CLbuffer0);
|
||||
CL_SET_ARG(clState->padbuffer8);
|
||||
num = 0;
|
||||
kernel = clState->extra_kernels;
|
||||
|
||||
for(int i = 0; i < 3; ++i, kernel++)
|
||||
{
|
||||
CL_SET_ARG(clState->padbuffer8);
|
||||
CL_SET_ARG(clState->BranchBuffer[i << 1]);
|
||||
CL_SET_ARG(clState->BranchBuffer[(i << 1) + 1]);
|
||||
CL_SET_ARG(clState->GlobalThreadCount);
|
||||
num = 0;
|
||||
|
||||
CL_NEXTKERNEL_SET_ARG(clState->padbuffer8);
|
||||
CL_SET_ARG(clState->BranchBuffer[i << 1]);
|
||||
|
||||
if(i == 2)
|
||||
{
|
||||
CL_SET_ARG(clState->outputBuffer);
|
||||
CL_SET_ARG(le_target);
|
||||
}
|
||||
|
||||
num = 0;
|
||||
|
||||
CL_NEXTKERNEL_SET_ARG(clState->padbuffer8);
|
||||
CL_SET_ARG(clState->BranchBuffer[(i << 1) + 1]);
|
||||
|
||||
if(i < 2)
|
||||
{
|
||||
CL_NEXTKERNEL_SET_ARG_0(clState->padbuffer8);
|
||||
}
|
||||
else
|
||||
{
|
||||
CL_SET_ARG(clState->outputBuffer);
|
||||
CL_SET_ARG(le_target);
|
||||
}
|
||||
num = 0;
|
||||
}
|
||||
/*
|
||||
// search1
|
||||
CL_SET_ARG(clState->padbuffer8);
|
||||
CL_SET_ARG(clState->Branch1Nonces);
|
||||
CL_SET_ARG(clState->Branch2Nonces);
|
||||
CL_SET_ARG(clState->GlobalThreadCount);
|
||||
num = 0;
|
||||
// search2
|
||||
CL_NEXTKERNEL_SET_ARG(clState->padbuffer8);
|
||||
CL_SET_ARG(clState->Branch1Nonces);
|
||||
num = 0;
|
||||
// search3
|
||||
CL_NEXTKERNEL_SET_ARG(clState->padbuffer8);
|
||||
CL_SET_ARG(clState->Branch2Nonces);
|
||||
num = 0;
|
||||
// search4
|
||||
CL_NEXTKERNEL_SET_ARG_0(clState->padbuffer8);
|
||||
// search5
|
||||
CL_NEXTKERNEL_SET_ARG(clState->padbuffer8);
|
||||
CL_SET_ARG(clState->Branch3Nonces);
|
||||
CL_SET_ARG(clState->Branch4Nonces);
|
||||
CL_SET_ARG(clState->GlobalThreadCount);
|
||||
num = 0;
|
||||
// search6
|
||||
CL_NEXTKERNEL_SET_ARG(clState->padbuffer8);
|
||||
CL_SET_ARG(clState->Branch3Nonces);
|
||||
num = 0;
|
||||
//search7
|
||||
CL_NEXTKERNEL_SET_ARG(clState->padbuffer8);
|
||||
CL_SET_ARG(clState->Branch4Nonces);
|
||||
num = 0;
|
||||
// search8
|
||||
CL_NEXTKERNEL_SET_ARG_0(clState->padbuffer8);
|
||||
// search9
|
||||
CL_NEXTKERNEL_SET_ARG(clState->padbuffer8);
|
||||
CL_SET_ARG(clState->Branch5Nonces);
|
||||
CL_SET_ARG(clState->Branch6Nonces);
|
||||
CL_SET_ARG(clState->GlobalThreadCount);
|
||||
num = 0;
|
||||
// search10
|
||||
CL_NEXTKERNEL_SET_ARG(clState->padbuffer8);
|
||||
CL_SET_ARG(clState->Branch5Nonces);
|
||||
CL_SET_ARG(clState->outputBuffer);
|
||||
CL_SET_ARG(le_target);
|
||||
num = 0;
|
||||
// search11
|
||||
CL_NEXTKERNEL_SET_ARG(clState->padbuffer8);
|
||||
CL_SET_ARG(clState->Branch6Nonces);
|
||||
CL_SET_ARG(clState->outputBuffer);
|
||||
CL_SET_ARG(le_target);
|
||||
*/
|
||||
return status;
|
||||
}
|
||||
|
||||
static algorithm_settings_t algos[] = {
|
||||
// kernels starting from this will have difficulty calculated by using litecoin algorithm
|
||||
#define A_SCRYPT(a) \
|
||||
@ -1051,13 +1153,10 @@ static algorithm_settings_t algos[] = {
|
||||
#undef A_YESCRYPT_MULTI
|
||||
|
||||
// kernels starting from this will have difficulty calculated by using quarkcoin algorithm
|
||||
#define A_QUARK(a, b) \
|
||||
{ a, ALGO_QUARK, "", 256, 256, 256, 0, 0, 0xFF, 0xFFFFFFULL, 0x0000ffffUL, 0, 0, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, b, NULL, NULL, queue_sph_kernel, gen_hash, append_x11_compiler_options }
|
||||
A_QUARK("quarkcoin", quarkcoin_regenhash),
|
||||
A_QUARK("qubitcoin", qubitcoin_regenhash),
|
||||
A_QUARK("animecoin", animecoin_regenhash),
|
||||
A_QUARK("sifcoin", sifcoin_regenhash),
|
||||
#undef A_QUARK
|
||||
{ "quarkcoin", ALGO_QUARK, "", 256, 256, 256, 0, 0, 0xFF, 0xFFFFFFULL, 0x0000ffffUL, 11, 8 * 16 * 4194304, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, quarkcoin_regenhash, NULL, NULL, queue_quarkcoin_kernel, gen_hash, append_x11_compiler_options },
|
||||
{ "qubitcoin", ALGO_QUBIT, "", 256, 256, 256, 0, 0, 0xFF, 0xFFFFFFULL, 0x0000ffffUL, 0, 0, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, qubitcoin_regenhash, NULL, NULL, queue_sph_kernel, gen_hash, append_x11_compiler_options },
|
||||
{ "animecoin", ALGO_ANIME, "", 256, 256, 256, 0, 0, 0xFF, 0xFFFFFFULL, 0x0000ffffUL, 0, 0, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, animecoin_regenhash, NULL, NULL, queue_sph_kernel, gen_hash, append_x11_compiler_options },
|
||||
{ "sifcoin", ALGO_SIF, "", 256, 256, 256, 0, 0, 0xFF, 0xFFFFFFULL, 0x0000ffffUL, 0, 0, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, sifcoin_regenhash, NULL, NULL, queue_sph_kernel, gen_hash, append_x11_compiler_options },
|
||||
|
||||
// kernels starting from this will have difficulty calculated by using bitcoin algorithm
|
||||
#define A_DARK(a, b) \
|
||||
|
@ -22,6 +22,9 @@ typedef enum {
|
||||
ALGO_X15,
|
||||
ALGO_KECCAK,
|
||||
ALGO_QUARK,
|
||||
ALGO_QUBIT,
|
||||
ALGO_ANIME,
|
||||
ALGO_SIF,
|
||||
ALGO_TWE,
|
||||
ALGO_FUGUE,
|
||||
ALGO_NIST,
|
||||
|
@ -1425,20 +1425,85 @@ static int64_t opencl_scanhash(struct thr_info *thr, struct work *work,
|
||||
if (clState->goffset)
|
||||
p_global_work_offset = (size_t *)&work->blk.nonce;
|
||||
|
||||
status = clEnqueueNDRangeKernel(clState->commandQueue, clState->kernel, 1, p_global_work_offset,
|
||||
globalThreads, localThreads, 0, NULL, NULL);
|
||||
if (unlikely(status != CL_SUCCESS)) {
|
||||
applog(LOG_ERR, "Error %d: Enqueueing kernel onto command queue. (clEnqueueNDRangeKernel)", status);
|
||||
return -1;
|
||||
}
|
||||
if (gpu->algorithm.type == ALGO_QUARK) {
|
||||
cl_event WaitEvents[2];
|
||||
cl_uint zero = 0;
|
||||
|
||||
for (i = 0; i < clState->n_extra_kernels; i++) {
|
||||
status = clEnqueueNDRangeKernel(clState->commandQueue, clState->extra_kernels[i], 1, p_global_work_offset,
|
||||
globalThreads, localThreads, 0, NULL, NULL);
|
||||
for (int i = 0; i < 6; ++i) {
|
||||
status = clEnqueueWriteBuffer(clState->commandQueue, clState->BranchBuffer[i], CL_TRUE, sizeof(cl_uint) * clState->GlobalThreadCount, sizeof(cl_uint), &zero, 0, NULL, NULL);
|
||||
if (status != CL_SUCCESS) {
|
||||
applog(LOG_ERR, "Error %d while attempting to nuke BranchBuffer %d.", status, i);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
for (int i = 0, x = 0; i < 9; ++i, ++x) {
|
||||
size_t BranchCount0, BranchCount1;
|
||||
status = clEnqueueNDRangeKernel(clState->commandQueue, ((i) ? clState->extra_kernels[i] : clState->kernel), 1, p_global_work_offset, globalThreads, localThreads, 0, NULL, WaitEvents);
|
||||
if (status != CL_SUCCESS) {
|
||||
if (i) applog(LOG_ERR, "Error %d while attempting to enqueue kernel with index %d.", status, i);
|
||||
else applog(LOG_ERR, "Error %d while attempting to enqueue initial kernel.", status);
|
||||
return -1;
|
||||
}
|
||||
|
||||
clWaitForEvents(1, WaitEvents);
|
||||
clReleaseEvent(WaitEvents[0]);
|
||||
|
||||
if (i) ++i;
|
||||
|
||||
status = clEnqueueNDRangeKernel(clState->commandQueue, clState->extra_kernels[i], 1, p_global_work_offset, globalThreads, localThreads, 0, NULL, WaitEvents);
|
||||
if (status != CL_SUCCESS) {
|
||||
applog(LOG_ERR, "Error %d while attempting to enqueue kernel with index %d.", status, i);
|
||||
return -1;
|
||||
}
|
||||
|
||||
clWaitForEvents(1, WaitEvents);
|
||||
clReleaseEvent(WaitEvents[0]);
|
||||
|
||||
// Do a blocking read for the found counter in both buffers
|
||||
// so we know how many threads to dispatch to each branch
|
||||
status = clEnqueueReadBuffer(clState->commandQueue, clState->BranchBuffer[x << 1], CL_TRUE, sizeof(cl_uint) * globalThreads[0], sizeof(cl_uint), &BranchCount0, 0, NULL, NULL);
|
||||
if (status != CL_SUCCESS) {
|
||||
applog(LOG_ERR, "Error %d while attempting to read intermediate data from GPU.", status, i);
|
||||
return -1;
|
||||
}
|
||||
|
||||
BranchCount1 = globalThreads[0] - BranchCount0;
|
||||
++i;
|
||||
|
||||
status = clEnqueueNDRangeKernel(clState->commandQueue, clState->extra_kernels[i], 1, p_global_work_offset, &BranchCount0, localThreads, 0, NULL, WaitEvents);
|
||||
if (status != CL_SUCCESS) {
|
||||
applog(LOG_ERR, "Error %d while attempting to enqueue kernel with index %d.", status, i);
|
||||
return -1;
|
||||
}
|
||||
|
||||
++i;
|
||||
|
||||
status = clEnqueueNDRangeKernel(clState->commandQueue, clState->extra_kernels[i], 1, p_global_work_offset, &BranchCount1, localThreads, 0, NULL, WaitEvents + 1);
|
||||
if (status != CL_SUCCESS) {
|
||||
applog(LOG_ERR, "Error %d while attempting to enqueue kernel with index %d.", status, i);
|
||||
return -1;
|
||||
}
|
||||
|
||||
clWaitForEvents(2, WaitEvents);
|
||||
clReleaseEvent(WaitEvents[0]);
|
||||
clReleaseEvent(WaitEvents[1]);
|
||||
}
|
||||
}
|
||||
else {
|
||||
status = clEnqueueNDRangeKernel(clState->commandQueue, clState->kernel, 1, p_global_work_offset, globalThreads, localThreads, 0, NULL, NULL);
|
||||
if (unlikely(status != CL_SUCCESS)) {
|
||||
applog(LOG_ERR, "Error %d: Enqueueing kernel onto command queue. (clEnqueueNDRangeKernel)", status);
|
||||
return -1;
|
||||
}
|
||||
|
||||
for (i = 0; i < clState->n_extra_kernels; i++) {
|
||||
status = clEnqueueNDRangeKernel(clState->commandQueue, clState->extra_kernels[i], 1, p_global_work_offset, globalThreads, localThreads, 0, NULL, NULL);
|
||||
if (unlikely(status != CL_SUCCESS)) {
|
||||
applog(LOG_ERR, "Error %d: Enqueueing kernel onto command queue. (clEnqueueNDRangeKernel)", status);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
status = clEnqueueReadBuffer(clState->commandQueue, clState->outputBuffer, CL_FALSE, 0,
|
||||
|
43
ocl.c
43
ocl.c
@ -180,6 +180,35 @@ static cl_int create_opencl_command_queue(cl_command_queue *command_queue, cl_co
|
||||
return status;
|
||||
}
|
||||
|
||||
// Copied from set_threads_hashes() in driver-opencl.c
|
||||
static size_t CalcGlobalThreads(unsigned int compute_shaders, unsigned int minthreads, __maybe_unused int *intensity, __maybe_unused int *xintensity, __maybe_unused int *rawintensity, algorithm_t *algorithm)
|
||||
{
|
||||
size_t threads = 0;
|
||||
while (threads < minthreads) {
|
||||
|
||||
if (*rawintensity > 0) {
|
||||
threads = *rawintensity;
|
||||
}
|
||||
else if (*xintensity > 0) {
|
||||
threads = compute_shaders * ((algorithm->xintensity_shift)?(1 << (algorithm->xintensity_shift + *xintensity)):*xintensity);
|
||||
}
|
||||
else {
|
||||
threads = 1 << (algorithm->intensity_shift + *intensity);
|
||||
}
|
||||
|
||||
if (threads < minthreads) {
|
||||
if (likely(*intensity < MAX_INTENSITY)) {
|
||||
(*intensity)++;
|
||||
}
|
||||
else {
|
||||
threads = minthreads;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return(threads);
|
||||
}
|
||||
|
||||
_clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *algorithm)
|
||||
{
|
||||
cl_int status = 0;
|
||||
@ -837,7 +866,19 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg
|
||||
applog(LOG_WARNING, "Your settings come to %lu", (unsigned long)bufsize);
|
||||
}
|
||||
|
||||
if (algorithm->type == ALGO_YESCRYPT || algorithm->type == ALGO_YESCRYPT_MULTI) {
|
||||
// By the way, if you change the intensity between now and opencl_scanhash()
|
||||
// calculating the global thread count, God help you.
|
||||
if (algorithm->type == ALGO_QUARK) {
|
||||
clState->GlobalThreadCount = CalcGlobalThreads(clState->compute_shaders, clState->wsize, &cgpu->intensity, &cgpu->xintensity, &cgpu->rawintensity, &cgpu->algorithm);
|
||||
for (int i = 0; i < 6; ++i) {
|
||||
clState->BranchBuffer[i] = clCreateBuffer(clState->context, CL_MEM_READ_WRITE, sizeof(cl_uint) * (clState->GlobalThreadCount + 2), NULL, &status);
|
||||
if (status != CL_SUCCESS && !clState->BranchBuffer[i]) {
|
||||
applog(LOG_ERR, "Error %d while creating BranchBuffer %d.", status, i);
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (algorithm->type == ALGO_YESCRYPT || algorithm->type == ALGO_YESCRYPT_MULTI) {
|
||||
// need additionnal buffers
|
||||
clState->buffer1 = clCreateBuffer(clState->context, CL_MEM_READ_WRITE, buf1size, NULL, &status);
|
||||
if (status != CL_SUCCESS && !clState->buffer1) {
|
||||
|
Loading…
x
Reference in New Issue
Block a user