Browse Source

move algorithm-specific OpenCL compile options into algorithm_t

djm34
Jan Berdajs 11 years ago
parent
commit
e134ab75ad
  1. 48
      algorithm.c
  2. 3
      algorithm.h
  3. 53
      ocl.c
  4. 25
      ocl/build_kernel.c
  5. 2
      ocl/build_kernel.h

48
algorithm.c

@ -10,6 +10,7 @@ @@ -10,6 +10,7 @@
#include "algorithm.h"
#include "sha2.h"
#include "ocl.h"
#include "ocl/build_kernel.h"
#include "algorithm/scrypt.h"
#include "algorithm/animecoin.h"
@ -42,6 +43,28 @@ void gen_hash(const unsigned char *data, unsigned int len, unsigned char *hash) @@ -42,6 +43,28 @@ void gen_hash(const unsigned char *data, unsigned int len, unsigned char *hash)
#define CL_SET_ARG(var) status |= clSetKernelArg(*kernel, num++, sizeof(var), (void *)&var)
#define CL_SET_VARG(args, var) status |= clSetKernelArg(*kernel, num++, args * sizeof(uint), (void *)var)
static void append_scrypt_compiler_options(struct _build_kernel_data *data, struct cgpu_info *cgpu, struct _algorithm_t *algorithm)
{
char buf[255];
sprintf(buf, " -D LOOKUP_GAP=%d -D CONCURRENT_THREADS=%u -D NFACTOR=%d",
cgpu->lookup_gap, (unsigned int)cgpu->thread_concurrency, algorithm->nfactor);
strcat(data->compiler_options, buf);
sprintf(buf, "lg%utc%unf%u", cgpu->lookup_gap, (unsigned int)cgpu->thread_concurrency, algorithm->nfactor);
strcat(data->binary_filename, buf);
}
static void append_hamsi_compiler_options(struct _build_kernel_data *data, struct cgpu_info *cgpu, struct _algorithm_t *algorithm)
{
char buf[255];
sprintf(buf, " -D SPH_HAMSI_EXPAND_BIG=%d",
opt_hamsi_expand_big);
strcat(data->compiler_options, buf);
sprintf(buf, "big%u", (unsigned int)opt_hamsi_expand_big);
strcat(data->binary_filename, buf);
}
static cl_int queue_scrypt_kernel(struct __clState *clState, struct _dev_blk_ctx *blk, __maybe_unused cl_uint threads)
{
unsigned char *midstate = blk->work->midstate;
@ -309,12 +332,13 @@ typedef struct _algorithm_settings_t { @@ -309,12 +332,13 @@ typedef struct _algorithm_settings_t {
void (*regenhash)(struct work *);
cl_int (*queue_kernel)(struct __clState *, struct _dev_blk_ctx *, cl_uint);
void (*gen_hash)(const unsigned char *, unsigned int, unsigned char *);
void (*set_compile_options)(build_kernel_data *, struct cgpu_info *, algorithm_t *);
} algorithm_settings_t;
static algorithm_settings_t algos[] = {
// kernels starting from this will have difficulty calculated by using litecoin algorithm
#define A_SCRYPT(a) \
{ a, 1, 65536, 65536, 0, 0, 0xFF, 0x0000ffff00000000ULL, 0xFFFFFFFFULL, 0x0000ffffUL, 0, -1, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, scrypt_regenhash, queue_scrypt_kernel, gen_hash}
{ a, 1, 65536, 65536, 0, 0, 0xFF, 0x0000ffff00000000ULL, 0xFFFFFFFFULL, 0x0000ffffUL, 0, -1, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, scrypt_regenhash, queue_scrypt_kernel, gen_hash, append_scrypt_compiler_options}
A_SCRYPT( "ckolivas" ),
A_SCRYPT( "alexkarnew" ),
A_SCRYPT( "alexkarnold" ),
@ -325,7 +349,7 @@ static algorithm_settings_t algos[] = { @@ -325,7 +349,7 @@ static algorithm_settings_t algos[] = {
// kernels starting from this will have difficulty calculated by using quarkcoin algorithm
#define A_QUARK(a, b) \
{ a, 256, 256, 256, 0, 0, 0xFF, 0x000000ffff000000ULL, 0xFFFFFFULL, 0x0000ffffUL, 0, 0, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, b, queue_sph_kernel, gen_hash}
{ a, 256, 256, 256, 0, 0, 0xFF, 0x000000ffff000000ULL, 0xFFFFFFULL, 0x0000ffffUL, 0, 0, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, b, queue_sph_kernel, gen_hash, NULL}
A_QUARK( "quarkcoin", quarkcoin_regenhash),
A_QUARK( "qubitcoin", qubitcoin_regenhash),
A_QUARK( "animecoin", animecoin_regenhash),
@ -334,28 +358,29 @@ static algorithm_settings_t algos[] = { @@ -334,28 +358,29 @@ static algorithm_settings_t algos[] = {
// kernels starting from this will have difficulty calculated by using bitcoin algorithm
#define A_DARK(a, b) \
{ a, 1, 1, 1, 0, 0, 0xFF, 0x00000000ffff0000ULL, 0xFFFFULL, 0x0000ffffUL, 0, 0, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, b, queue_sph_kernel, gen_hash}
{ a, 1, 1, 1, 0, 0, 0xFF, 0x00000000ffff0000ULL, 0xFFFFULL, 0x0000ffffUL, 0, 0, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, b, queue_sph_kernel, gen_hash, NULL}
A_DARK( "darkcoin", darkcoin_regenhash),
A_DARK( "inkcoin", inkcoin_regenhash),
A_DARK( "myriadcoin-groestl", myriadcoin_groestl_regenhash),
A_DARK( "marucoin", marucoin_regenhash),
#undef A_DARK
{ "twecoin", 1, 1, 1, 0, 0, 0xFF, 0x00000000ffff0000ULL, 0xFFFFULL, 0x0000ffffUL, 0, 0, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, twecoin_regenhash, queue_sph_kernel, sha256},
{ "maxcoin", 1, 256, 1, 4, 15, 0x0F, 0x00000000ffff0000ULL, 0xFFFFULL, 0x000000ffUL, 0, 0, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, maxcoin_regenhash, queue_maxcoin_kernel, sha256},
{ "darkcoin-mod", 1, 1, 1, 0, 0, 0xFF, 0x00000000ffff0000ULL, 0xFFFFULL, 0x0000ffffUL, 10, 8 * 16 * 4194304, 0, darkcoin_regenhash, queue_darkcoin_mod_kernel, gen_hash},
{ "marucoin-mod", 1, 1, 1, 0, 0, 0xFF, 0x00000000ffff0000ULL, 0xFFFFULL, 0x0000ffffUL, 12, 8 * 16 * 4194304, 0, marucoin_regenhash, queue_marucoin_mod_kernel, gen_hash},
{ "marucoin-modold", 1, 1, 1, 0, 0, 0xFF, 0x00000000ffff0000ULL, 0xFFFFULL, 0x0000ffffUL, 10, 8 * 16 * 4194304, 0, marucoin_regenhash, queue_marucoin_mod_old_kernel, gen_hash},
{ "twecoin", 1, 1, 1, 0, 0, 0xFF, 0x00000000ffff0000ULL, 0xFFFFULL, 0x0000ffffUL, 0, 0, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, twecoin_regenhash, queue_sph_kernel, sha256, NULL},
{ "maxcoin", 1, 256, 1, 4, 15, 0x0F, 0x00000000ffff0000ULL, 0xFFFFULL, 0x000000ffUL, 0, 0, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, maxcoin_regenhash, queue_maxcoin_kernel, sha256, NULL},
{ "darkcoin-mod", 1, 1, 1, 0, 0, 0xFF, 0x00000000ffff0000ULL, 0xFFFFULL, 0x0000ffffUL, 10, 8 * 16 * 4194304, 0, darkcoin_regenhash, queue_darkcoin_mod_kernel, gen_hash, NULL},
{ "marucoin", 1, 1, 1, 0, 0, 0xFF, 0x00000000ffff0000ULL, 0xFFFFULL, 0x0000ffffUL, 0, 0, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, marucoin_regenhash, queue_sph_kernel, gen_hash, append_hamsi_compiler_options},
{ "marucoin-mod", 1, 1, 1, 0, 0, 0xFF, 0x00000000ffff0000ULL, 0xFFFFULL, 0x0000ffffUL, 12, 8 * 16 * 4194304, 0, marucoin_regenhash, queue_marucoin_mod_kernel, gen_hash, append_hamsi_compiler_options},
{ "marucoin-modold", 1, 1, 1, 0, 0, 0xFF, 0x00000000ffff0000ULL, 0xFFFFULL, 0x0000ffffUL, 10, 8 * 16 * 4194304, 0, marucoin_regenhash, queue_marucoin_mod_old_kernel, gen_hash, append_hamsi_compiler_options},
// kernels starting from this will have difficulty calculated by using fuguecoin algorithm
#define A_FUGUE(a, b) \
{ a, 1, 256, 256, 0, 0, 0xFF, 0x00000000ffff0000ULL, 0xFFFFULL, 0x0000ffffUL, 0, 0, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, b, queue_sph_kernel, sha256}
{ a, 1, 256, 256, 0, 0, 0xFF, 0x00000000ffff0000ULL, 0xFFFFULL, 0x0000ffffUL, 0, 0, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, b, queue_sph_kernel, sha256, NULL}
A_FUGUE( "fuguecoin", fuguecoin_regenhash),
A_FUGUE( "groestlcoin", groestlcoin_regenhash),
#undef A_FUGUE
// Terminator (do not remove)
{ NULL, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, NULL, NULL, NULL}
{ NULL, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, NULL, NULL, NULL, NULL}
};
void copy_algorithm_settings(algorithm_t* dest, const char* algo) {
@ -381,6 +406,7 @@ void copy_algorithm_settings(algorithm_t* dest, const char* algo) { @@ -381,6 +406,7 @@ void copy_algorithm_settings(algorithm_t* dest, const char* algo) {
dest->regenhash = src->regenhash;
dest->queue_kernel = src->queue_kernel;
dest->gen_hash = src->gen_hash;
dest->set_compile_options = src->set_compile_options;
break;
}
}

3
algorithm.h

@ -14,6 +14,8 @@ extern void gen_hash(const unsigned char *data, unsigned int len, unsigned char @@ -14,6 +14,8 @@ extern void gen_hash(const unsigned char *data, unsigned int len, unsigned char
struct __clState;
struct _dev_blk_ctx;
struct _build_kernel_data;
struct cgpu_info;
struct work;
/* Describes the Scrypt parameters and hashing functions used to mine
@ -38,6 +40,7 @@ typedef struct _algorithm_t { @@ -38,6 +40,7 @@ typedef struct _algorithm_t {
void (*regenhash)(struct work *);
cl_int (*queue_kernel)(struct __clState *, struct _dev_blk_ctx *, cl_uint);
void (*gen_hash)(const unsigned char *, unsigned int, unsigned char *);
void (*set_compile_options)(struct _build_kernel_data *, struct cgpu_info *, struct _algorithm_t *);
} algorithm_t;
/* Set default parameters based on name. */

53
ocl.c

@ -295,13 +295,11 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg @@ -295,13 +295,11 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg
* would have otherwise created. The filename is:
* name + g + lg + lookup_gap + tc + thread_concurrency + nf + nfactor + w + work_size + l + sizeof(long) + .bin
*/
char binaryfilename[255];
char filename[255];
char strbuf[32];
sprintf(strbuf, "%s.cl", cgpu->algorithm.name);
strcpy(filename, strbuf);
strcpy(binaryfilename, cgpu->algorithm.name);
/* For some reason 2 vectors is still better even if the card says
* otherwise, and many cards lie about their max so use 256 as max
@ -364,40 +362,36 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg @@ -364,40 +362,36 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg
slot = cpnd = 0;
strcat(binaryfilename, name);
if (clState->goffset)
strcat(binaryfilename, "g");
build_data->context = clState->context;
build_data->device = &devices[gpu];
sprintf(strbuf, "lg%utc%unf%u", cgpu->lookup_gap, (unsigned int)cgpu->thread_concurrency, algorithm->nfactor);
strcat(binaryfilename, strbuf);
// Build information
strcpy(build_data->source_filename, filename);
strcpy(build_data->platform, name);
strcpy(build_data->sgminer_path, sgminer_path);
if (opt_kernel_path && *opt_kernel_path)
build_data->kernel_path = opt_kernel_path;
sprintf(strbuf, "w%d", (int)clState->wsize);
strcat(binaryfilename, strbuf);
sprintf(strbuf, "l%d", (int)sizeof(long));
strcat(binaryfilename, strbuf);
strcat(binaryfilename, ".bin");
build_data->work_size = clState->wsize;
build_data->has_bit_align = clState->hasBitAlign;
strcpy(build_data->binary_filename, binaryfilename);
build_data->context = clState->context;
build_data->device = &devices[gpu];
if (!(clState->program = load_opencl_binary_kernel(build_data))) {
applog(LOG_NOTICE, "Building binary %s", binaryfilename);
build_data->opencl_version = get_opencl_version(devices[gpu]);
build_data->patch_bfi = needs_bfi_patch(build_data);
strcpy(build_data->source_filename, filename);
strcpy(build_data->platform, name);
strcpy(build_data->sgminer_path, sgminer_path);
if (opt_kernel_path && *opt_kernel_path)
build_data->kernel_path = opt_kernel_path;
strcpy(build_data->binary_filename, cgpu->algorithm.name);
strcat(build_data->binary_filename, name);
if (clState->goffset)
strcat(build_data->binary_filename, "g");
build_data->work_size = clState->wsize;
build_data->has_bit_align = clState->hasBitAlign;
set_base_compiler_options(build_data);
if (algorithm->set_compile_options)
algorithm->set_compile_options(build_data, cgpu, algorithm);
build_data->opencl_version = get_opencl_version(devices[gpu]);
build_data->patch_bfi = needs_bfi_patch(build_data);
strcat(build_data->binary_filename, ".bin");
set_base_compiler_options(build_data);
append_scrypt_compiler_options(build_data, cgpu->lookup_gap, cgpu->thread_concurrency, algorithm->nfactor);
append_hamsi_compiler_options(build_data, opt_hamsi_expand_big);
// Load program from file or build it if it doesn't exist
if (!(clState->program = load_opencl_binary_kernel(build_data))) {
applog(LOG_NOTICE, "Building binary %s", build_data->binary_filename);
if (!(clState->program = build_opencl_kernel(build_data, filename)))
return NULL;
@ -414,6 +408,7 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg @@ -414,6 +408,7 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg
}
}
// Load kernels
applog(LOG_NOTICE, "Initialising kernel %s with%s bitalign, %spatched BFI, nfactor %d, n %d",
filename, clState->hasBitAlign ? "" : "out", build_data->patch_bfi ? "" : "un",
algorithm->nfactor, algorithm->n);

25
ocl/build_kernel.c

@ -48,11 +48,14 @@ static char *file_contents(const char *filename, int *length) @@ -48,11 +48,14 @@ static char *file_contents(const char *filename, int *length)
void set_base_compiler_options(build_kernel_data *data)
{
char buf[255];
sprintf(data->compiler_options, "-I \"%s\" -I \"%skernel\" -I \".\" -D WORKSIZE=%d",
data->sgminer_path, data->sgminer_path, (int)data->work_size);
applog(LOG_DEBUG, "Setting worksize to %d", (int)(data->work_size));
sprintf(buf, "w%dl%d", (int)data->work_size, (int)sizeof(long));
strcat(data->binary_filename, buf);
if (data->has_bit_align) {
strcat(data->compiler_options, " -D BITALIGN");
applog(LOG_DEBUG, "cl_amd_media_ops found, setting BITALIGN");
@ -97,26 +100,6 @@ bool needs_bfi_patch(build_kernel_data *data) @@ -97,26 +100,6 @@ bool needs_bfi_patch(build_kernel_data *data)
return false;
}
// TODO: move away, specific
void append_scrypt_compiler_options(build_kernel_data *data, int lookup_gap, unsigned int thread_concurrency, unsigned int nfactor)
{
char buf[255];
sprintf(buf, " -D LOOKUP_GAP=%d -D CONCURRENT_THREADS=%d -D NFACTOR=%d",
lookup_gap, thread_concurrency, nfactor);
strcat(data->compiler_options, buf);
}
// TODO: move away, specific
void append_hamsi_compiler_options(build_kernel_data *data, int expand_big)
{
char buf[255];
sprintf(buf, " -D SPH_HAMSI_EXPAND_BIG=%d",
expand_big);
strcat(data->compiler_options, buf);
}
cl_program build_opencl_kernel(build_kernel_data *data, const char *filename)
{
int pl;

2
ocl/build_kernel.h

@ -26,7 +26,5 @@ bool needs_bfi_patch(build_kernel_data *data); @@ -26,7 +26,5 @@ bool needs_bfi_patch(build_kernel_data *data);
cl_program build_opencl_kernel(build_kernel_data *data, const char *filename);
bool save_opencl_kernel(build_kernel_data *data, cl_program program);
void set_base_compiler_options(build_kernel_data *data);
void append_scrypt_compiler_options(build_kernel_data *data, int lookup_gap, unsigned int thread_concurrency, unsigned int nfactor);
void append_hamsi_compiler_options(build_kernel_data *data, int expand_big);
#endif /* BUILD_KERNEL_H */

Loading…
Cancel
Save