Browse Source

Add improved Darkcoin (X11-mod) algorithm

djm34
Jan Berdajs 10 years ago
parent
commit
dca83c1112
  1. 93
      algorithm.c
  2. 3
      algorithm.h
  3. 21
      driver-opencl.c
  4. 8443
      kernel/darkcoin-mod.cl
  5. 73
      ocl.c
  6. 3
      ocl.h

93
algorithm.c

@ -62,39 +62,101 @@ static cl_int queue_scrypt_kernel(struct __clState *clState, struct _dev_blk_ctx @@ -62,39 +62,101 @@ static cl_int queue_scrypt_kernel(struct __clState *clState, struct _dev_blk_ctx
return status;
}
static cl_int queue_sph_kernel(struct __clState *clState, struct _dev_blk_ctx *blk, __maybe_unused cl_uint threads)
static cl_int queue_maxcoin_kernel(struct __clState *clState, struct _dev_blk_ctx *blk, __maybe_unused cl_uint threads)
{
cl_kernel *kernel = &clState->kernel;
unsigned int num = 0;
cl_ulong le_target;
cl_int status = 0;
le_target = *(cl_ulong *)(blk->work->device_target + 24);
flip80(clState->cldata, blk->work->data);
status = clEnqueueWriteBuffer(clState->commandQueue, clState->CLbuffer0, true, 0, 80, clState->cldata, 0, NULL,NULL);
CL_SET_ARG(clState->CLbuffer0);
CL_SET_ARG(clState->outputBuffer);
CL_SET_ARG(le_target);
return status;
}
static cl_int queue_maxcoin_kernel(struct __clState *clState, struct _dev_blk_ctx *blk, __maybe_unused cl_uint threads)
static cl_int queue_sph_kernel(struct __clState *clState, struct _dev_blk_ctx *blk, __maybe_unused cl_uint threads)
{
cl_kernel *kernel = &clState->kernel;
unsigned int num = 0;
cl_ulong le_target;
cl_int status = 0;
le_target = *(cl_ulong *)(blk->work->device_target + 24);
flip80(clState->cldata, blk->work->data);
status = clEnqueueWriteBuffer(clState->commandQueue, clState->CLbuffer0, true, 0, 80, clState->cldata, 0, NULL,NULL);
CL_SET_ARG(clState->CLbuffer0);
CL_SET_ARG(clState->outputBuffer);
CL_SET_ARG(le_target);
return status;
}
static cl_int queue_darkcoin_mod_kernel(struct __clState *clState, struct _dev_blk_ctx *blk, __maybe_unused cl_uint threads)
{
cl_kernel *kernel;
unsigned int num;
cl_ulong le_target;
cl_int status = 0;
le_target = *(cl_ulong *)(blk->work->device_target + 24);
flip80(clState->cldata, blk->work->data);
status = clEnqueueWriteBuffer(clState->commandQueue, clState->CLbuffer0, true, 0, 80, clState->cldata, 0, NULL,NULL);
// blake - search
kernel = &clState->kernel;
num = 0;
CL_SET_ARG(clState->CLbuffer0);
CL_SET_ARG(clState->padbuffer8);
// bmw - search1
kernel = clState->extra_kernels;
num = 0;
CL_SET_ARG(clState->padbuffer8);
// groestl - search2
kernel++;
num = 0;
CL_SET_ARG(clState->padbuffer8);
// skein - search3
kernel++;
num = 0;
CL_SET_ARG(clState->padbuffer8);
// jh - search4
kernel++;
num = 0;
CL_SET_ARG(clState->padbuffer8);
// keccak - search5
kernel++;
num = 0;
CL_SET_ARG(clState->padbuffer8);
// luffa - search6
kernel++;
num = 0;
CL_SET_ARG(clState->padbuffer8);
// cubehash - search7
kernel++;
num = 0;
CL_SET_ARG(clState->padbuffer8);
// shavite - search8
kernel++;
num = 0;
CL_SET_ARG(clState->padbuffer8);
// simd - search9
kernel++;
num = 0;
CL_SET_ARG(clState->padbuffer8);
// echo - search10
kernel++;
num = 0;
CL_SET_ARG(clState->padbuffer8);
CL_SET_ARG(clState->outputBuffer);
CL_SET_ARG(le_target);
return status;
}
typedef struct _algorithm_settings_t {
const char *name; /* Human-readable identifier */
double diff_multiplier1;
@ -106,6 +168,9 @@ typedef struct _algorithm_settings_t { @@ -106,6 +168,9 @@ typedef struct _algorithm_settings_t {
unsigned long long diff_nonce;
unsigned long long diff_numerator;
uint32_t diff1targ;
size_t n_extra_kernels;
long rw_buffer_size;
cl_command_queue_properties cq_properties;
void (*regenhash)(struct work *);
cl_int (*queue_kernel)(struct __clState *, struct _dev_blk_ctx *, cl_uint);
void (*gen_hash)(const unsigned char *, unsigned int, unsigned char *);
@ -114,7 +179,7 @@ typedef struct _algorithm_settings_t { @@ -114,7 +179,7 @@ typedef struct _algorithm_settings_t {
static algorithm_settings_t algos[] = {
// kernels starting from this will have difficulty calculated by using litecoin algorithm
#define A_SCRYPT(a) \
{ a, 1, 65536, 65536, 0, 0, 0xFF, 0x0000ffff00000000ULL, 0xFFFFFFFFULL, 0x0000ffffUL, scrypt_regenhash, queue_scrypt_kernel, gen_hash}
{ a, 1, 65536, 65536, 0, 0, 0xFF, 0x0000ffff00000000ULL, 0xFFFFFFFFULL, 0x0000ffffUL, 0, -1, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, scrypt_regenhash, queue_scrypt_kernel, gen_hash}
A_SCRYPT( "ckolivas" ),
A_SCRYPT( "alexkarnew" ),
A_SCRYPT( "alexkarnold" ),
@ -125,7 +190,7 @@ static algorithm_settings_t algos[] = { @@ -125,7 +190,7 @@ static algorithm_settings_t algos[] = {
// kernels starting from this will have difficulty calculated by using quarkcoin algorithm
#define A_QUARK(a, b) \
{ a, 256, 256, 256, 0, 0, 0xFF, 0x000000ffff000000ULL, 0xFFFFFFULL, 0x0000ffffUL, b, queue_sph_kernel, gen_hash}
{ a, 256, 256, 256, 0, 0, 0xFF, 0x000000ffff000000ULL, 0xFFFFFFULL, 0x0000ffffUL, 0, 0, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, b, queue_sph_kernel, gen_hash}
A_QUARK( "quarkcoin", quarkcoin_regenhash),
A_QUARK( "qubitcoin", qubitcoin_regenhash),
A_QUARK( "animecoin", animecoin_regenhash),
@ -134,25 +199,26 @@ static algorithm_settings_t algos[] = { @@ -134,25 +199,26 @@ static algorithm_settings_t algos[] = {
// kernels starting from this will have difficulty calculated by using bitcoin algorithm
#define A_DARK(a, b) \
{ a, 1, 1, 1, 0, 0, 0xFF, 0x00000000ffff0000ULL, 0xFFFFULL, 0x0000ffffUL, b, queue_sph_kernel, gen_hash}
{ a, 1, 1, 1, 0, 0, 0xFF, 0x00000000ffff0000ULL, 0xFFFFULL, 0x0000ffffUL, 0, 0, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, b, queue_sph_kernel, gen_hash}
A_DARK( "darkcoin", darkcoin_regenhash),
A_DARK( "inkcoin", inkcoin_regenhash),
A_DARK( "myriadcoin-groestl", myriadcoin_groestl_regenhash),
A_DARK( "marucoin", marucoin_regenhash),
#undef A_DARK
{ "twecoin", 1, 1, 1, 0, 0, 0xFF, 0x00000000ffff0000ULL, 0xFFFFULL, 0x0000ffffUL, twecoin_regenhash, queue_sph_kernel, sha256},
{ "maxcoin", 1, 256, 1, 4, 15, 0x0F, 0x00000000ffff0000ULL, 0xFFFFULL, 0x000000ffUL, maxcoin_regenhash, queue_maxcoin_kernel, sha256},
{ "twecoin", 1, 1, 1, 0, 0, 0xFF, 0x00000000ffff0000ULL, 0xFFFFULL, 0x0000ffffUL, 0, 0, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, twecoin_regenhash, queue_sph_kernel, sha256},
{ "maxcoin", 1, 256, 1, 4, 15, 0x0F, 0x00000000ffff0000ULL, 0xFFFFULL, 0x000000ffUL, 0, 0, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, maxcoin_regenhash, queue_maxcoin_kernel, sha256},
{ "darkcoin-mod", 1, 1, 1, 0, 0, 0xFF, 0x00000000ffff0000ULL, 0xFFFFULL, 0x0000ffffUL, 10, 8 * 16 * 4194304, 0, darkcoin_regenhash, queue_darkcoin_mod_kernel, gen_hash},
// kernels starting from this will have difficulty calculated by using fuguecoin algorithm
#define A_FUGUE(a, b) \
{ a, 1, 256, 256, 0, 0, 0xFF, 0x00000000ffff0000ULL, 0xFFFFULL, 0x0000ffffUL, b, queue_sph_kernel, sha256}
{ a, 1, 256, 256, 0, 0, 0xFF, 0x00000000ffff0000ULL, 0xFFFFULL, 0x0000ffffUL, 0, 0, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, b, queue_sph_kernel, sha256}
A_FUGUE( "fuguecoin", fuguecoin_regenhash),
A_FUGUE( "groestlcoin", groestlcoin_regenhash),
#undef A_FUGUE
// Terminator (do not remove)
{ NULL, 0, 0, 0, 0, 0, 0, 0, 0, 0, NULL, NULL, NULL}
{ NULL, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, NULL, NULL, NULL}
};
void copy_algorithm_settings(algorithm_t* dest, const char* algo) {
@ -172,6 +238,9 @@ void copy_algorithm_settings(algorithm_t* dest, const char* algo) { @@ -172,6 +238,9 @@ void copy_algorithm_settings(algorithm_t* dest, const char* algo) {
dest->diff_nonce = src->diff_nonce;
dest->diff_numerator = src->diff_numerator;
dest->diff1targ = src->diff1targ;
dest->n_extra_kernels = src->n_extra_kernels;
dest->rw_buffer_size = src->rw_buffer_size;
dest->cq_properties = src->cq_properties;
dest->regenhash = src->regenhash;
dest->queue_kernel = src->queue_kernel;
dest->gen_hash = src->gen_hash;

3
algorithm.h

@ -32,6 +32,9 @@ typedef struct _algorithm_t { @@ -32,6 +32,9 @@ typedef struct _algorithm_t {
unsigned long long diff_nonce;
unsigned long long diff_numerator;
uint32_t diff1targ;
size_t n_extra_kernels;
long rw_buffer_size;
cl_command_queue_properties cq_properties;
void (*regenhash)(struct work *);
cl_int (*queue_kernel)(struct __clState *, struct _dev_blk_ctx *, cl_uint);
void (*gen_hash)(const unsigned char *, unsigned int, unsigned char *);

21
driver-opencl.c

@ -1304,7 +1304,6 @@ static int64_t opencl_scanhash(struct thr_info *thr, struct work *work, @@ -1304,7 +1304,6 @@ static int64_t opencl_scanhash(struct thr_info *thr, struct work *work,
struct opencl_thread_data *thrdata = (struct opencl_thread_data *)thr->cgpu_data;
struct cgpu_info *gpu = thr->cgpu;
_clState *clState = clStates[thr_id];
const cl_kernel *kernel = &clState->kernel;
const int dynamic_us = opt_dynamic_interval * 1000;
cl_int status;
@ -1314,6 +1313,7 @@ static int64_t opencl_scanhash(struct thr_info *thr, struct work *work, @@ -1314,6 +1313,7 @@ static int64_t opencl_scanhash(struct thr_info *thr, struct work *work,
int64_t hashes;
int found = gpu->algorithm.found_idx;
int buffersize = BUFFERSIZE;
unsigned int i;
/* Windows' timer resolution is only 15ms so oversample 5x */
if (gpu->dynamic && (++gpu->intervals * dynamic_us) > 70000) {
@ -1346,13 +1346,23 @@ static int64_t opencl_scanhash(struct thr_info *thr, struct work *work, @@ -1346,13 +1346,23 @@ static int64_t opencl_scanhash(struct thr_info *thr, struct work *work,
if (clState->goffset)
p_global_work_offset = &work->blk.nonce;
status = clEnqueueNDRangeKernel(clState->commandQueue, *kernel, 1, p_global_work_offset,
status = clEnqueueNDRangeKernel(clState->commandQueue, clState->kernel, 1, p_global_work_offset,
globalThreads, localThreads, 0, NULL, NULL);
if (unlikely(status != CL_SUCCESS)) {
applog(LOG_ERR, "Error %d: Enqueueing kernel onto command queue. (clEnqueueNDRangeKernel)", status);
return -1;
}
for (i = 0; i < clState->n_extra_kernels; i++) {
status = clEnqueueNDRangeKernel(clState->commandQueue, clState->extra_kernels[i], 1, p_global_work_offset,
globalThreads, localThreads, 0, NULL, NULL);
if (unlikely(status != CL_SUCCESS)) {
applog(LOG_ERR, "Error %d: Enqueueing kernel onto command queue. (clEnqueueNDRangeKernel)", status);
return -1;
}
}
status = clEnqueueReadBuffer(clState->commandQueue, clState->outputBuffer, CL_FALSE, 0,
buffersize, thrdata->res, 0, NULL, NULL);
if (unlikely(status != CL_SUCCESS)) {
@ -1393,14 +1403,19 @@ static void opencl_thread_shutdown(struct thr_info *thr) @@ -1393,14 +1403,19 @@ static void opencl_thread_shutdown(struct thr_info *thr)
{
const int thr_id = thr->id;
_clState *clState = clStates[thr_id];
cl_kernel *kernel = clState->extra_kernels;
clStates[thr_id] = NULL;
unsigned int i;
if (clState) {
clFinish(clState->commandQueue);
clReleaseMemObject(clState->outputBuffer);
clReleaseMemObject(clState->CLbuffer0);
clReleaseMemObject(clState->padbuffer8);
if (clState->padbuffer8)
clReleaseMemObject(clState->padbuffer8);
clReleaseKernel(clState->kernel);
for (i = 0; i < clState->n_extra_kernels; i++)
clReleaseKernel(clState->extra_kernels[i]);
clReleaseProgram(clState->program);
clReleaseCommandQueue(clState->commandQueue);
clReleaseContext(clState->context);

8443
kernel/darkcoin-mod.cl

File diff suppressed because it is too large Load Diff

73
ocl.c

@ -329,7 +329,7 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg @@ -329,7 +329,7 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg
// Create an OpenCL command queue
/////////////////////////////////////////////////////////////////
clState->commandQueue = clCreateCommandQueue(clState->context, devices[gpu],
CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &status);
cgpu->algorithm.cq_properties, &status);
if (status != CL_SUCCESS) /* Try again without OOE enable */
clState->commandQueue = clCreateCommandQueue(clState->context, devices[gpu], 0 , &status);
if (status != CL_SUCCESS) {
@ -760,29 +760,56 @@ built: @@ -760,29 +760,56 @@ built:
return NULL;
}
size_t ipt = (algorithm->n / cgpu->lookup_gap +
(algorithm->n % cgpu->lookup_gap > 0));
size_t bufsize = 128 * ipt * cgpu->thread_concurrency;
/* Use the max alloc value which has been rounded to a power of
* 2 greater >= required amount earlier */
if (bufsize > cgpu->max_alloc) {
applog(LOG_WARNING, "Maximum buffer memory device %d supports says %lu",
gpu, (unsigned long)(cgpu->max_alloc));
applog(LOG_WARNING, "Your scrypt settings come to %lu", (unsigned long)bufsize);
}
applog(LOG_DEBUG, "Creating scrypt buffer sized %lu", (unsigned long)bufsize);
clState->padbufsize = bufsize;
/* This buffer is weird and might work to some degree even if
* the create buffer call has apparently failed, so check if we
* get anything back before we call it a failure. */
clState->padbuffer8 = NULL;
clState->padbuffer8 = clCreateBuffer(clState->context, CL_MEM_READ_WRITE, bufsize, NULL, &status);
if (status != CL_SUCCESS && !clState->padbuffer8) {
applog(LOG_ERR, "Error %d: clCreateBuffer (padbuffer8), decrease TC or increase LG", status);
return NULL;
}
clState->n_extra_kernels = algorithm->n_extra_kernels;
if (clState->n_extra_kernels > 0) {
unsigned int i;
char *kernel_name = (char *)malloc(9); // max: search99 + 0x0
clState->extra_kernels = (cl_kernel *)malloc(sizeof(cl_kernel) * clState->n_extra_kernels);
for (i = 0; i < clState->n_extra_kernels; i++) {
snprintf(kernel_name, 9, "%s%d", "search", i + 1);
clState->extra_kernels[i] = clCreateKernel(clState->program, kernel_name, &status);
if (status != CL_SUCCESS) {
applog(LOG_ERR, "Error %d: Creating ExtraKernel #%d from program. (clCreateKernel)", status, i);
return NULL;
}
}
free(kernel_name);
}
size_t bufsize;
if (algorithm->rw_buffer_size < 0) {
size_t ipt = (algorithm->n / cgpu->lookup_gap +
(algorithm->n % cgpu->lookup_gap > 0));
bufsize = 128 * ipt * cgpu->thread_concurrency;
} else
bufsize = (size_t) algorithm->rw_buffer_size;
clState->padbuffer8 = NULL;
if (bufsize > 0) {
/* Use the max alloc value which has been rounded to a power of
* 2 greater >= required amount earlier */
if (bufsize > cgpu->max_alloc) {
applog(LOG_WARNING, "Maximum buffer memory device %d supports says %lu",
gpu, (unsigned long)(cgpu->max_alloc));
applog(LOG_WARNING, "Your settings come to %lu", (unsigned long)bufsize);
}
applog(LOG_DEBUG, "Creating buffer sized %lu", (unsigned long)bufsize);
/* This buffer is weird and might work to some degree even if
* the create buffer call has apparently failed, so check if we
* get anything back before we call it a failure. */
clState->padbuffer8 = clCreateBuffer(clState->context, CL_MEM_READ_WRITE, bufsize, NULL, &status);
if (status != CL_SUCCESS && !clState->padbuffer8) {
applog(LOG_ERR, "Error %d: clCreateBuffer (padbuffer8), decrease TC or increase LG", status);
return NULL;
}
}
clState->CLbuffer0 = clCreateBuffer(clState->context, CL_MEM_READ_ONLY, 128, NULL, &status);
if (status != CL_SUCCESS) {

3
ocl.h

@ -15,12 +15,13 @@ @@ -15,12 +15,13 @@
typedef struct __clState {
cl_context context;
cl_kernel kernel;
cl_kernel *extra_kernels;
size_t n_extra_kernels;
cl_command_queue commandQueue;
cl_program program;
cl_mem outputBuffer;
cl_mem CLbuffer0;
cl_mem padbuffer8;
size_t padbufsize;
unsigned char cldata[80];
bool hasBitAlign;
bool hasOpenCL11plus;

Loading…
Cancel
Save