mirror of
https://github.com/GOSTSec/sgminer
synced 2025-01-24 21:44:38 +00:00
Add improved Darkcoin (X11-mod) algorithm
This commit is contained in:
parent
d12cda0589
commit
dca83c1112
101
algorithm.c
101
algorithm.c
@ -62,6 +62,21 @@ static cl_int queue_scrypt_kernel(struct __clState *clState, struct _dev_blk_ctx
|
|||||||
return status;
|
return status;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static cl_int queue_maxcoin_kernel(struct __clState *clState, struct _dev_blk_ctx *blk, __maybe_unused cl_uint threads)
|
||||||
|
{
|
||||||
|
cl_kernel *kernel = &clState->kernel;
|
||||||
|
unsigned int num = 0;
|
||||||
|
cl_int status = 0;
|
||||||
|
|
||||||
|
flip80(clState->cldata, blk->work->data);
|
||||||
|
status = clEnqueueWriteBuffer(clState->commandQueue, clState->CLbuffer0, true, 0, 80, clState->cldata, 0, NULL,NULL);
|
||||||
|
|
||||||
|
CL_SET_ARG(clState->CLbuffer0);
|
||||||
|
CL_SET_ARG(clState->outputBuffer);
|
||||||
|
|
||||||
|
return status;
|
||||||
|
}
|
||||||
|
|
||||||
static cl_int queue_sph_kernel(struct __clState *clState, struct _dev_blk_ctx *blk, __maybe_unused cl_uint threads)
|
static cl_int queue_sph_kernel(struct __clState *clState, struct _dev_blk_ctx *blk, __maybe_unused cl_uint threads)
|
||||||
{
|
{
|
||||||
cl_kernel *kernel = &clState->kernel;
|
cl_kernel *kernel = &clState->kernel;
|
||||||
@ -80,19 +95,66 @@ static cl_int queue_sph_kernel(struct __clState *clState, struct _dev_blk_ctx *b
|
|||||||
return status;
|
return status;
|
||||||
}
|
}
|
||||||
|
|
||||||
static cl_int queue_maxcoin_kernel(struct __clState *clState, struct _dev_blk_ctx *blk, __maybe_unused cl_uint threads)
|
static cl_int queue_darkcoin_mod_kernel(struct __clState *clState, struct _dev_blk_ctx *blk, __maybe_unused cl_uint threads)
|
||||||
{
|
{
|
||||||
cl_kernel *kernel = &clState->kernel;
|
cl_kernel *kernel;
|
||||||
unsigned int num = 0;
|
unsigned int num;
|
||||||
cl_int status = 0;
|
cl_ulong le_target;
|
||||||
|
cl_int status = 0;
|
||||||
|
|
||||||
flip80(clState->cldata, blk->work->data);
|
le_target = *(cl_ulong *)(blk->work->device_target + 24);
|
||||||
status = clEnqueueWriteBuffer(clState->commandQueue, clState->CLbuffer0, true, 0, 80, clState->cldata, 0, NULL,NULL);
|
flip80(clState->cldata, blk->work->data);
|
||||||
|
status = clEnqueueWriteBuffer(clState->commandQueue, clState->CLbuffer0, true, 0, 80, clState->cldata, 0, NULL,NULL);
|
||||||
|
|
||||||
CL_SET_ARG(clState->CLbuffer0);
|
// blake - search
|
||||||
CL_SET_ARG(clState->outputBuffer);
|
kernel = &clState->kernel;
|
||||||
|
num = 0;
|
||||||
|
CL_SET_ARG(clState->CLbuffer0);
|
||||||
|
CL_SET_ARG(clState->padbuffer8);
|
||||||
|
// bmw - search1
|
||||||
|
kernel = clState->extra_kernels;
|
||||||
|
num = 0;
|
||||||
|
CL_SET_ARG(clState->padbuffer8);
|
||||||
|
// groestl - search2
|
||||||
|
kernel++;
|
||||||
|
num = 0;
|
||||||
|
CL_SET_ARG(clState->padbuffer8);
|
||||||
|
// skein - search3
|
||||||
|
kernel++;
|
||||||
|
num = 0;
|
||||||
|
CL_SET_ARG(clState->padbuffer8);
|
||||||
|
// jh - search4
|
||||||
|
kernel++;
|
||||||
|
num = 0;
|
||||||
|
CL_SET_ARG(clState->padbuffer8);
|
||||||
|
// keccak - search5
|
||||||
|
kernel++;
|
||||||
|
num = 0;
|
||||||
|
CL_SET_ARG(clState->padbuffer8);
|
||||||
|
// luffa - search6
|
||||||
|
kernel++;
|
||||||
|
num = 0;
|
||||||
|
CL_SET_ARG(clState->padbuffer8);
|
||||||
|
// cubehash - search7
|
||||||
|
kernel++;
|
||||||
|
num = 0;
|
||||||
|
CL_SET_ARG(clState->padbuffer8);
|
||||||
|
// shavite - search8
|
||||||
|
kernel++;
|
||||||
|
num = 0;
|
||||||
|
CL_SET_ARG(clState->padbuffer8);
|
||||||
|
// simd - search9
|
||||||
|
kernel++;
|
||||||
|
num = 0;
|
||||||
|
CL_SET_ARG(clState->padbuffer8);
|
||||||
|
// echo - search10
|
||||||
|
kernel++;
|
||||||
|
num = 0;
|
||||||
|
CL_SET_ARG(clState->padbuffer8);
|
||||||
|
CL_SET_ARG(clState->outputBuffer);
|
||||||
|
CL_SET_ARG(le_target);
|
||||||
|
|
||||||
return status;
|
return status;
|
||||||
}
|
}
|
||||||
|
|
||||||
typedef struct _algorithm_settings_t {
|
typedef struct _algorithm_settings_t {
|
||||||
@ -106,6 +168,9 @@ typedef struct _algorithm_settings_t {
|
|||||||
unsigned long long diff_nonce;
|
unsigned long long diff_nonce;
|
||||||
unsigned long long diff_numerator;
|
unsigned long long diff_numerator;
|
||||||
uint32_t diff1targ;
|
uint32_t diff1targ;
|
||||||
|
size_t n_extra_kernels;
|
||||||
|
long rw_buffer_size;
|
||||||
|
cl_command_queue_properties cq_properties;
|
||||||
void (*regenhash)(struct work *);
|
void (*regenhash)(struct work *);
|
||||||
cl_int (*queue_kernel)(struct __clState *, struct _dev_blk_ctx *, cl_uint);
|
cl_int (*queue_kernel)(struct __clState *, struct _dev_blk_ctx *, cl_uint);
|
||||||
void (*gen_hash)(const unsigned char *, unsigned int, unsigned char *);
|
void (*gen_hash)(const unsigned char *, unsigned int, unsigned char *);
|
||||||
@ -114,7 +179,7 @@ typedef struct _algorithm_settings_t {
|
|||||||
static algorithm_settings_t algos[] = {
|
static algorithm_settings_t algos[] = {
|
||||||
// kernels starting from this will have difficulty calculated by using litecoin algorithm
|
// kernels starting from this will have difficulty calculated by using litecoin algorithm
|
||||||
#define A_SCRYPT(a) \
|
#define A_SCRYPT(a) \
|
||||||
{ a, 1, 65536, 65536, 0, 0, 0xFF, 0x0000ffff00000000ULL, 0xFFFFFFFFULL, 0x0000ffffUL, scrypt_regenhash, queue_scrypt_kernel, gen_hash}
|
{ a, 1, 65536, 65536, 0, 0, 0xFF, 0x0000ffff00000000ULL, 0xFFFFFFFFULL, 0x0000ffffUL, 0, -1, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, scrypt_regenhash, queue_scrypt_kernel, gen_hash}
|
||||||
A_SCRYPT( "ckolivas" ),
|
A_SCRYPT( "ckolivas" ),
|
||||||
A_SCRYPT( "alexkarnew" ),
|
A_SCRYPT( "alexkarnew" ),
|
||||||
A_SCRYPT( "alexkarnold" ),
|
A_SCRYPT( "alexkarnold" ),
|
||||||
@ -125,7 +190,7 @@ static algorithm_settings_t algos[] = {
|
|||||||
|
|
||||||
// kernels starting from this will have difficulty calculated by using quarkcoin algorithm
|
// kernels starting from this will have difficulty calculated by using quarkcoin algorithm
|
||||||
#define A_QUARK(a, b) \
|
#define A_QUARK(a, b) \
|
||||||
{ a, 256, 256, 256, 0, 0, 0xFF, 0x000000ffff000000ULL, 0xFFFFFFULL, 0x0000ffffUL, b, queue_sph_kernel, gen_hash}
|
{ a, 256, 256, 256, 0, 0, 0xFF, 0x000000ffff000000ULL, 0xFFFFFFULL, 0x0000ffffUL, 0, 0, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, b, queue_sph_kernel, gen_hash}
|
||||||
A_QUARK( "quarkcoin", quarkcoin_regenhash),
|
A_QUARK( "quarkcoin", quarkcoin_regenhash),
|
||||||
A_QUARK( "qubitcoin", qubitcoin_regenhash),
|
A_QUARK( "qubitcoin", qubitcoin_regenhash),
|
||||||
A_QUARK( "animecoin", animecoin_regenhash),
|
A_QUARK( "animecoin", animecoin_regenhash),
|
||||||
@ -134,25 +199,26 @@ static algorithm_settings_t algos[] = {
|
|||||||
|
|
||||||
// kernels starting from this will have difficulty calculated by using bitcoin algorithm
|
// kernels starting from this will have difficulty calculated by using bitcoin algorithm
|
||||||
#define A_DARK(a, b) \
|
#define A_DARK(a, b) \
|
||||||
{ a, 1, 1, 1, 0, 0, 0xFF, 0x00000000ffff0000ULL, 0xFFFFULL, 0x0000ffffUL, b, queue_sph_kernel, gen_hash}
|
{ a, 1, 1, 1, 0, 0, 0xFF, 0x00000000ffff0000ULL, 0xFFFFULL, 0x0000ffffUL, 0, 0, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, b, queue_sph_kernel, gen_hash}
|
||||||
A_DARK( "darkcoin", darkcoin_regenhash),
|
A_DARK( "darkcoin", darkcoin_regenhash),
|
||||||
A_DARK( "inkcoin", inkcoin_regenhash),
|
A_DARK( "inkcoin", inkcoin_regenhash),
|
||||||
A_DARK( "myriadcoin-groestl", myriadcoin_groestl_regenhash),
|
A_DARK( "myriadcoin-groestl", myriadcoin_groestl_regenhash),
|
||||||
A_DARK( "marucoin", marucoin_regenhash),
|
A_DARK( "marucoin", marucoin_regenhash),
|
||||||
#undef A_DARK
|
#undef A_DARK
|
||||||
|
|
||||||
{ "twecoin", 1, 1, 1, 0, 0, 0xFF, 0x00000000ffff0000ULL, 0xFFFFULL, 0x0000ffffUL, twecoin_regenhash, queue_sph_kernel, sha256},
|
{ "twecoin", 1, 1, 1, 0, 0, 0xFF, 0x00000000ffff0000ULL, 0xFFFFULL, 0x0000ffffUL, 0, 0, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, twecoin_regenhash, queue_sph_kernel, sha256},
|
||||||
{ "maxcoin", 1, 256, 1, 4, 15, 0x0F, 0x00000000ffff0000ULL, 0xFFFFULL, 0x000000ffUL, maxcoin_regenhash, queue_maxcoin_kernel, sha256},
|
{ "maxcoin", 1, 256, 1, 4, 15, 0x0F, 0x00000000ffff0000ULL, 0xFFFFULL, 0x000000ffUL, 0, 0, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, maxcoin_regenhash, queue_maxcoin_kernel, sha256},
|
||||||
|
{ "darkcoin-mod", 1, 1, 1, 0, 0, 0xFF, 0x00000000ffff0000ULL, 0xFFFFULL, 0x0000ffffUL, 10, 8 * 16 * 4194304, 0, darkcoin_regenhash, queue_darkcoin_mod_kernel, gen_hash},
|
||||||
|
|
||||||
// kernels starting from this will have difficulty calculated by using fuguecoin algorithm
|
// kernels starting from this will have difficulty calculated by using fuguecoin algorithm
|
||||||
#define A_FUGUE(a, b) \
|
#define A_FUGUE(a, b) \
|
||||||
{ a, 1, 256, 256, 0, 0, 0xFF, 0x00000000ffff0000ULL, 0xFFFFULL, 0x0000ffffUL, b, queue_sph_kernel, sha256}
|
{ a, 1, 256, 256, 0, 0, 0xFF, 0x00000000ffff0000ULL, 0xFFFFULL, 0x0000ffffUL, 0, 0, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, b, queue_sph_kernel, sha256}
|
||||||
A_FUGUE( "fuguecoin", fuguecoin_regenhash),
|
A_FUGUE( "fuguecoin", fuguecoin_regenhash),
|
||||||
A_FUGUE( "groestlcoin", groestlcoin_regenhash),
|
A_FUGUE( "groestlcoin", groestlcoin_regenhash),
|
||||||
#undef A_FUGUE
|
#undef A_FUGUE
|
||||||
|
|
||||||
// Terminator (do not remove)
|
// Terminator (do not remove)
|
||||||
{ NULL, 0, 0, 0, 0, 0, 0, 0, 0, 0, NULL, NULL, NULL}
|
{ NULL, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, NULL, NULL, NULL}
|
||||||
};
|
};
|
||||||
|
|
||||||
void copy_algorithm_settings(algorithm_t* dest, const char* algo) {
|
void copy_algorithm_settings(algorithm_t* dest, const char* algo) {
|
||||||
@ -172,6 +238,9 @@ void copy_algorithm_settings(algorithm_t* dest, const char* algo) {
|
|||||||
dest->diff_nonce = src->diff_nonce;
|
dest->diff_nonce = src->diff_nonce;
|
||||||
dest->diff_numerator = src->diff_numerator;
|
dest->diff_numerator = src->diff_numerator;
|
||||||
dest->diff1targ = src->diff1targ;
|
dest->diff1targ = src->diff1targ;
|
||||||
|
dest->n_extra_kernels = src->n_extra_kernels;
|
||||||
|
dest->rw_buffer_size = src->rw_buffer_size;
|
||||||
|
dest->cq_properties = src->cq_properties;
|
||||||
dest->regenhash = src->regenhash;
|
dest->regenhash = src->regenhash;
|
||||||
dest->queue_kernel = src->queue_kernel;
|
dest->queue_kernel = src->queue_kernel;
|
||||||
dest->gen_hash = src->gen_hash;
|
dest->gen_hash = src->gen_hash;
|
||||||
|
@ -32,6 +32,9 @@ typedef struct _algorithm_t {
|
|||||||
unsigned long long diff_nonce;
|
unsigned long long diff_nonce;
|
||||||
unsigned long long diff_numerator;
|
unsigned long long diff_numerator;
|
||||||
uint32_t diff1targ;
|
uint32_t diff1targ;
|
||||||
|
size_t n_extra_kernels;
|
||||||
|
long rw_buffer_size;
|
||||||
|
cl_command_queue_properties cq_properties;
|
||||||
void (*regenhash)(struct work *);
|
void (*regenhash)(struct work *);
|
||||||
cl_int (*queue_kernel)(struct __clState *, struct _dev_blk_ctx *, cl_uint);
|
cl_int (*queue_kernel)(struct __clState *, struct _dev_blk_ctx *, cl_uint);
|
||||||
void (*gen_hash)(const unsigned char *, unsigned int, unsigned char *);
|
void (*gen_hash)(const unsigned char *, unsigned int, unsigned char *);
|
||||||
|
@ -1304,7 +1304,6 @@ static int64_t opencl_scanhash(struct thr_info *thr, struct work *work,
|
|||||||
struct opencl_thread_data *thrdata = (struct opencl_thread_data *)thr->cgpu_data;
|
struct opencl_thread_data *thrdata = (struct opencl_thread_data *)thr->cgpu_data;
|
||||||
struct cgpu_info *gpu = thr->cgpu;
|
struct cgpu_info *gpu = thr->cgpu;
|
||||||
_clState *clState = clStates[thr_id];
|
_clState *clState = clStates[thr_id];
|
||||||
const cl_kernel *kernel = &clState->kernel;
|
|
||||||
const int dynamic_us = opt_dynamic_interval * 1000;
|
const int dynamic_us = opt_dynamic_interval * 1000;
|
||||||
|
|
||||||
cl_int status;
|
cl_int status;
|
||||||
@ -1314,6 +1313,7 @@ static int64_t opencl_scanhash(struct thr_info *thr, struct work *work,
|
|||||||
int64_t hashes;
|
int64_t hashes;
|
||||||
int found = gpu->algorithm.found_idx;
|
int found = gpu->algorithm.found_idx;
|
||||||
int buffersize = BUFFERSIZE;
|
int buffersize = BUFFERSIZE;
|
||||||
|
unsigned int i;
|
||||||
|
|
||||||
/* Windows' timer resolution is only 15ms so oversample 5x */
|
/* Windows' timer resolution is only 15ms so oversample 5x */
|
||||||
if (gpu->dynamic && (++gpu->intervals * dynamic_us) > 70000) {
|
if (gpu->dynamic && (++gpu->intervals * dynamic_us) > 70000) {
|
||||||
@ -1346,13 +1346,23 @@ static int64_t opencl_scanhash(struct thr_info *thr, struct work *work,
|
|||||||
|
|
||||||
if (clState->goffset)
|
if (clState->goffset)
|
||||||
p_global_work_offset = &work->blk.nonce;
|
p_global_work_offset = &work->blk.nonce;
|
||||||
status = clEnqueueNDRangeKernel(clState->commandQueue, *kernel, 1, p_global_work_offset,
|
|
||||||
|
status = clEnqueueNDRangeKernel(clState->commandQueue, clState->kernel, 1, p_global_work_offset,
|
||||||
globalThreads, localThreads, 0, NULL, NULL);
|
globalThreads, localThreads, 0, NULL, NULL);
|
||||||
if (unlikely(status != CL_SUCCESS)) {
|
if (unlikely(status != CL_SUCCESS)) {
|
||||||
applog(LOG_ERR, "Error %d: Enqueueing kernel onto command queue. (clEnqueueNDRangeKernel)", status);
|
applog(LOG_ERR, "Error %d: Enqueueing kernel onto command queue. (clEnqueueNDRangeKernel)", status);
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
for (i = 0; i < clState->n_extra_kernels; i++) {
|
||||||
|
status = clEnqueueNDRangeKernel(clState->commandQueue, clState->extra_kernels[i], 1, p_global_work_offset,
|
||||||
|
globalThreads, localThreads, 0, NULL, NULL);
|
||||||
|
if (unlikely(status != CL_SUCCESS)) {
|
||||||
|
applog(LOG_ERR, "Error %d: Enqueueing kernel onto command queue. (clEnqueueNDRangeKernel)", status);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
status = clEnqueueReadBuffer(clState->commandQueue, clState->outputBuffer, CL_FALSE, 0,
|
status = clEnqueueReadBuffer(clState->commandQueue, clState->outputBuffer, CL_FALSE, 0,
|
||||||
buffersize, thrdata->res, 0, NULL, NULL);
|
buffersize, thrdata->res, 0, NULL, NULL);
|
||||||
if (unlikely(status != CL_SUCCESS)) {
|
if (unlikely(status != CL_SUCCESS)) {
|
||||||
@ -1393,14 +1403,19 @@ static void opencl_thread_shutdown(struct thr_info *thr)
|
|||||||
{
|
{
|
||||||
const int thr_id = thr->id;
|
const int thr_id = thr->id;
|
||||||
_clState *clState = clStates[thr_id];
|
_clState *clState = clStates[thr_id];
|
||||||
|
cl_kernel *kernel = clState->extra_kernels;
|
||||||
clStates[thr_id] = NULL;
|
clStates[thr_id] = NULL;
|
||||||
|
unsigned int i;
|
||||||
|
|
||||||
if (clState) {
|
if (clState) {
|
||||||
clFinish(clState->commandQueue);
|
clFinish(clState->commandQueue);
|
||||||
clReleaseMemObject(clState->outputBuffer);
|
clReleaseMemObject(clState->outputBuffer);
|
||||||
clReleaseMemObject(clState->CLbuffer0);
|
clReleaseMemObject(clState->CLbuffer0);
|
||||||
clReleaseMemObject(clState->padbuffer8);
|
if (clState->padbuffer8)
|
||||||
|
clReleaseMemObject(clState->padbuffer8);
|
||||||
clReleaseKernel(clState->kernel);
|
clReleaseKernel(clState->kernel);
|
||||||
|
for (i = 0; i < clState->n_extra_kernels; i++)
|
||||||
|
clReleaseKernel(clState->extra_kernels[i]);
|
||||||
clReleaseProgram(clState->program);
|
clReleaseProgram(clState->program);
|
||||||
clReleaseCommandQueue(clState->commandQueue);
|
clReleaseCommandQueue(clState->commandQueue);
|
||||||
clReleaseContext(clState->context);
|
clReleaseContext(clState->context);
|
||||||
|
8443
kernel/darkcoin-mod.cl
Normal file
8443
kernel/darkcoin-mod.cl
Normal file
File diff suppressed because it is too large
Load Diff
71
ocl.c
71
ocl.c
@ -329,7 +329,7 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg
|
|||||||
// Create an OpenCL command queue
|
// Create an OpenCL command queue
|
||||||
/////////////////////////////////////////////////////////////////
|
/////////////////////////////////////////////////////////////////
|
||||||
clState->commandQueue = clCreateCommandQueue(clState->context, devices[gpu],
|
clState->commandQueue = clCreateCommandQueue(clState->context, devices[gpu],
|
||||||
CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &status);
|
cgpu->algorithm.cq_properties, &status);
|
||||||
if (status != CL_SUCCESS) /* Try again without OOE enable */
|
if (status != CL_SUCCESS) /* Try again without OOE enable */
|
||||||
clState->commandQueue = clCreateCommandQueue(clState->context, devices[gpu], 0 , &status);
|
clState->commandQueue = clCreateCommandQueue(clState->context, devices[gpu], 0 , &status);
|
||||||
if (status != CL_SUCCESS) {
|
if (status != CL_SUCCESS) {
|
||||||
@ -760,29 +760,56 @@ built:
|
|||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t ipt = (algorithm->n / cgpu->lookup_gap +
|
|
||||||
(algorithm->n % cgpu->lookup_gap > 0));
|
|
||||||
size_t bufsize = 128 * ipt * cgpu->thread_concurrency;
|
|
||||||
|
|
||||||
/* Use the max alloc value which has been rounded to a power of
|
clState->n_extra_kernels = algorithm->n_extra_kernels;
|
||||||
* 2 greater >= required amount earlier */
|
if (clState->n_extra_kernels > 0) {
|
||||||
if (bufsize > cgpu->max_alloc) {
|
unsigned int i;
|
||||||
applog(LOG_WARNING, "Maximum buffer memory device %d supports says %lu",
|
char *kernel_name = (char *)malloc(9); // max: search99 + 0x0
|
||||||
gpu, (unsigned long)(cgpu->max_alloc));
|
|
||||||
applog(LOG_WARNING, "Your scrypt settings come to %lu", (unsigned long)bufsize);
|
|
||||||
}
|
|
||||||
applog(LOG_DEBUG, "Creating scrypt buffer sized %lu", (unsigned long)bufsize);
|
|
||||||
clState->padbufsize = bufsize;
|
|
||||||
|
|
||||||
/* This buffer is weird and might work to some degree even if
|
clState->extra_kernels = (cl_kernel *)malloc(sizeof(cl_kernel) * clState->n_extra_kernels);
|
||||||
* the create buffer call has apparently failed, so check if we
|
|
||||||
* get anything back before we call it a failure. */
|
for (i = 0; i < clState->n_extra_kernels; i++) {
|
||||||
clState->padbuffer8 = NULL;
|
snprintf(kernel_name, 9, "%s%d", "search", i + 1);
|
||||||
clState->padbuffer8 = clCreateBuffer(clState->context, CL_MEM_READ_WRITE, bufsize, NULL, &status);
|
clState->extra_kernels[i] = clCreateKernel(clState->program, kernel_name, &status);
|
||||||
if (status != CL_SUCCESS && !clState->padbuffer8) {
|
if (status != CL_SUCCESS) {
|
||||||
applog(LOG_ERR, "Error %d: clCreateBuffer (padbuffer8), decrease TC or increase LG", status);
|
applog(LOG_ERR, "Error %d: Creating ExtraKernel #%d from program. (clCreateKernel)", status, i);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
free(kernel_name);
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t bufsize;
|
||||||
|
|
||||||
|
if (algorithm->rw_buffer_size < 0) {
|
||||||
|
size_t ipt = (algorithm->n / cgpu->lookup_gap +
|
||||||
|
(algorithm->n % cgpu->lookup_gap > 0));
|
||||||
|
bufsize = 128 * ipt * cgpu->thread_concurrency;
|
||||||
|
} else
|
||||||
|
bufsize = (size_t) algorithm->rw_buffer_size;
|
||||||
|
|
||||||
|
clState->padbuffer8 = NULL;
|
||||||
|
|
||||||
|
if (bufsize > 0) {
|
||||||
|
/* Use the max alloc value which has been rounded to a power of
|
||||||
|
* 2 greater >= required amount earlier */
|
||||||
|
if (bufsize > cgpu->max_alloc) {
|
||||||
|
applog(LOG_WARNING, "Maximum buffer memory device %d supports says %lu",
|
||||||
|
gpu, (unsigned long)(cgpu->max_alloc));
|
||||||
|
applog(LOG_WARNING, "Your settings come to %lu", (unsigned long)bufsize);
|
||||||
|
}
|
||||||
|
applog(LOG_DEBUG, "Creating buffer sized %lu", (unsigned long)bufsize);
|
||||||
|
|
||||||
|
/* This buffer is weird and might work to some degree even if
|
||||||
|
* the create buffer call has apparently failed, so check if we
|
||||||
|
* get anything back before we call it a failure. */
|
||||||
|
clState->padbuffer8 = clCreateBuffer(clState->context, CL_MEM_READ_WRITE, bufsize, NULL, &status);
|
||||||
|
if (status != CL_SUCCESS && !clState->padbuffer8) {
|
||||||
|
applog(LOG_ERR, "Error %d: clCreateBuffer (padbuffer8), decrease TC or increase LG", status);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
clState->CLbuffer0 = clCreateBuffer(clState->context, CL_MEM_READ_ONLY, 128, NULL, &status);
|
clState->CLbuffer0 = clCreateBuffer(clState->context, CL_MEM_READ_ONLY, 128, NULL, &status);
|
||||||
if (status != CL_SUCCESS) {
|
if (status != CL_SUCCESS) {
|
||||||
|
3
ocl.h
3
ocl.h
@ -15,12 +15,13 @@
|
|||||||
typedef struct __clState {
|
typedef struct __clState {
|
||||||
cl_context context;
|
cl_context context;
|
||||||
cl_kernel kernel;
|
cl_kernel kernel;
|
||||||
|
cl_kernel *extra_kernels;
|
||||||
|
size_t n_extra_kernels;
|
||||||
cl_command_queue commandQueue;
|
cl_command_queue commandQueue;
|
||||||
cl_program program;
|
cl_program program;
|
||||||
cl_mem outputBuffer;
|
cl_mem outputBuffer;
|
||||||
cl_mem CLbuffer0;
|
cl_mem CLbuffer0;
|
||||||
cl_mem padbuffer8;
|
cl_mem padbuffer8;
|
||||||
size_t padbufsize;
|
|
||||||
unsigned char cldata[80];
|
unsigned char cldata[80];
|
||||||
bool hasBitAlign;
|
bool hasBitAlign;
|
||||||
bool hasOpenCL11plus;
|
bool hasOpenCL11plus;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user