diff --git a/algorithm.c b/algorithm.c index 53c2fff0..76733320 100644 --- a/algorithm.c +++ b/algorithm.c @@ -10,6 +10,7 @@ #include "algorithm.h" #include "sha2.h" #include "ocl.h" +#include "ocl/build_kernel.h" #include "algorithm/scrypt.h" #include "algorithm/animecoin.h" @@ -42,6 +43,28 @@ void gen_hash(const unsigned char *data, unsigned int len, unsigned char *hash) #define CL_SET_ARG(var) status |= clSetKernelArg(*kernel, num++, sizeof(var), (void *)&var) #define CL_SET_VARG(args, var) status |= clSetKernelArg(*kernel, num++, args * sizeof(uint), (void *)var) +static void append_scrypt_compiler_options(struct _build_kernel_data *data, struct cgpu_info *cgpu, struct _algorithm_t *algorithm) +{ + char buf[255]; + sprintf(buf, " -D LOOKUP_GAP=%d -D CONCURRENT_THREADS=%u -D NFACTOR=%d", + cgpu->lookup_gap, (unsigned int)cgpu->thread_concurrency, algorithm->nfactor); + strcat(data->compiler_options, buf); + + sprintf(buf, "lg%utc%unf%u", cgpu->lookup_gap, (unsigned int)cgpu->thread_concurrency, algorithm->nfactor); + strcat(data->binary_filename, buf); +} + +static void append_hamsi_compiler_options(struct _build_kernel_data *data, struct cgpu_info *cgpu, struct _algorithm_t *algorithm) +{ + char buf[255]; + sprintf(buf, " -D SPH_HAMSI_EXPAND_BIG=%d", + opt_hamsi_expand_big); + strcat(data->compiler_options, buf); + + sprintf(buf, "big%u", (unsigned int)opt_hamsi_expand_big); + strcat(data->binary_filename, buf); +} + static cl_int queue_scrypt_kernel(struct __clState *clState, struct _dev_blk_ctx *blk, __maybe_unused cl_uint threads) { unsigned char *midstate = blk->work->midstate; @@ -309,12 +332,13 @@ typedef struct _algorithm_settings_t { void (*regenhash)(struct work *); cl_int (*queue_kernel)(struct __clState *, struct _dev_blk_ctx *, cl_uint); void (*gen_hash)(const unsigned char *, unsigned int, unsigned char *); + void (*set_compile_options)(build_kernel_data *, struct cgpu_info *, algorithm_t *); } algorithm_settings_t; static algorithm_settings_t algos[] = { // kernels starting from this will have difficulty calculated by using litecoin algorithm #define A_SCRYPT(a) \ - { a, 1, 65536, 65536, 0, 0, 0xFF, 0x0000ffff00000000ULL, 0xFFFFFFFFULL, 0x0000ffffUL, 0, -1, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, scrypt_regenhash, queue_scrypt_kernel, gen_hash} + { a, 1, 65536, 65536, 0, 0, 0xFF, 0x0000ffff00000000ULL, 0xFFFFFFFFULL, 0x0000ffffUL, 0, -1, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, scrypt_regenhash, queue_scrypt_kernel, gen_hash, append_scrypt_compiler_options} A_SCRYPT( "ckolivas" ), A_SCRYPT( "alexkarnew" ), A_SCRYPT( "alexkarnold" ), @@ -325,7 +349,7 @@ static algorithm_settings_t algos[] = { // kernels starting from this will have difficulty calculated by using quarkcoin algorithm #define A_QUARK(a, b) \ - { a, 256, 256, 256, 0, 0, 0xFF, 0x000000ffff000000ULL, 0xFFFFFFULL, 0x0000ffffUL, 0, 0, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, b, queue_sph_kernel, gen_hash} + { a, 256, 256, 256, 0, 0, 0xFF, 0x000000ffff000000ULL, 0xFFFFFFULL, 0x0000ffffUL, 0, 0, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, b, queue_sph_kernel, gen_hash, NULL} A_QUARK( "quarkcoin", quarkcoin_regenhash), A_QUARK( "qubitcoin", qubitcoin_regenhash), A_QUARK( "animecoin", animecoin_regenhash), @@ -334,28 +358,29 @@ static algorithm_settings_t algos[] = { // kernels starting from this will have difficulty calculated by using bitcoin algorithm #define A_DARK(a, b) \ - { a, 1, 1, 1, 0, 0, 0xFF, 0x00000000ffff0000ULL, 0xFFFFULL, 0x0000ffffUL, 0, 0, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, b, queue_sph_kernel, gen_hash} + { a, 1, 1, 1, 0, 0, 0xFF, 0x00000000ffff0000ULL, 0xFFFFULL, 0x0000ffffUL, 0, 0, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, b, queue_sph_kernel, gen_hash, NULL} A_DARK( "darkcoin", darkcoin_regenhash), A_DARK( "inkcoin", inkcoin_regenhash), A_DARK( "myriadcoin-groestl", myriadcoin_groestl_regenhash), - A_DARK( "marucoin", marucoin_regenhash), #undef A_DARK - { "twecoin", 1, 1, 1, 0, 0, 0xFF, 0x00000000ffff0000ULL, 0xFFFFULL, 0x0000ffffUL, 0, 0, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, twecoin_regenhash, queue_sph_kernel, sha256}, - { "maxcoin", 1, 256, 1, 4, 15, 0x0F, 0x00000000ffff0000ULL, 0xFFFFULL, 0x000000ffUL, 0, 0, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, maxcoin_regenhash, queue_maxcoin_kernel, sha256}, - { "darkcoin-mod", 1, 1, 1, 0, 0, 0xFF, 0x00000000ffff0000ULL, 0xFFFFULL, 0x0000ffffUL, 10, 8 * 16 * 4194304, 0, darkcoin_regenhash, queue_darkcoin_mod_kernel, gen_hash}, - { "marucoin-mod", 1, 1, 1, 0, 0, 0xFF, 0x00000000ffff0000ULL, 0xFFFFULL, 0x0000ffffUL, 12, 8 * 16 * 4194304, 0, marucoin_regenhash, queue_marucoin_mod_kernel, gen_hash}, - { "marucoin-modold", 1, 1, 1, 0, 0, 0xFF, 0x00000000ffff0000ULL, 0xFFFFULL, 0x0000ffffUL, 10, 8 * 16 * 4194304, 0, marucoin_regenhash, queue_marucoin_mod_old_kernel, gen_hash}, + { "twecoin", 1, 1, 1, 0, 0, 0xFF, 0x00000000ffff0000ULL, 0xFFFFULL, 0x0000ffffUL, 0, 0, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, twecoin_regenhash, queue_sph_kernel, sha256, NULL}, + { "maxcoin", 1, 256, 1, 4, 15, 0x0F, 0x00000000ffff0000ULL, 0xFFFFULL, 0x000000ffUL, 0, 0, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, maxcoin_regenhash, queue_maxcoin_kernel, sha256, NULL}, + { "darkcoin-mod", 1, 1, 1, 0, 0, 0xFF, 0x00000000ffff0000ULL, 0xFFFFULL, 0x0000ffffUL, 10, 8 * 16 * 4194304, 0, darkcoin_regenhash, queue_darkcoin_mod_kernel, gen_hash, NULL}, + + { "marucoin", 1, 1, 1, 0, 0, 0xFF, 0x00000000ffff0000ULL, 0xFFFFULL, 0x0000ffffUL, 0, 0, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, marucoin_regenhash, queue_sph_kernel, gen_hash, append_hamsi_compiler_options}, + { "marucoin-mod", 1, 1, 1, 0, 0, 0xFF, 0x00000000ffff0000ULL, 0xFFFFULL, 0x0000ffffUL, 12, 8 * 16 * 4194304, 0, marucoin_regenhash, queue_marucoin_mod_kernel, gen_hash, append_hamsi_compiler_options}, + { "marucoin-modold", 1, 1, 1, 0, 0, 0xFF, 0x00000000ffff0000ULL, 0xFFFFULL, 0x0000ffffUL, 10, 8 * 16 * 4194304, 0, marucoin_regenhash, queue_marucoin_mod_old_kernel, gen_hash, append_hamsi_compiler_options}, // kernels starting from this will have difficulty calculated by using fuguecoin algorithm #define A_FUGUE(a, b) \ - { a, 1, 256, 256, 0, 0, 0xFF, 0x00000000ffff0000ULL, 0xFFFFULL, 0x0000ffffUL, 0, 0, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, b, queue_sph_kernel, sha256} + { a, 1, 256, 256, 0, 0, 0xFF, 0x00000000ffff0000ULL, 0xFFFFULL, 0x0000ffffUL, 0, 0, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, b, queue_sph_kernel, sha256, NULL} A_FUGUE( "fuguecoin", fuguecoin_regenhash), A_FUGUE( "groestlcoin", groestlcoin_regenhash), #undef A_FUGUE // Terminator (do not remove) - { NULL, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, NULL, NULL, NULL} + { NULL, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, NULL, NULL, NULL, NULL} }; void copy_algorithm_settings(algorithm_t* dest, const char* algo) { @@ -381,6 +406,7 @@ void copy_algorithm_settings(algorithm_t* dest, const char* algo) { dest->regenhash = src->regenhash; dest->queue_kernel = src->queue_kernel; dest->gen_hash = src->gen_hash; + dest->set_compile_options = src->set_compile_options; break; } } diff --git a/algorithm.h b/algorithm.h index 7376d9a5..6dbe2784 100644 --- a/algorithm.h +++ b/algorithm.h @@ -14,6 +14,8 @@ extern void gen_hash(const unsigned char *data, unsigned int len, unsigned char struct __clState; struct _dev_blk_ctx; +struct _build_kernel_data; +struct cgpu_info; struct work; /* Describes the Scrypt parameters and hashing functions used to mine @@ -38,6 +40,7 @@ typedef struct _algorithm_t { void (*regenhash)(struct work *); cl_int (*queue_kernel)(struct __clState *, struct _dev_blk_ctx *, cl_uint); void (*gen_hash)(const unsigned char *, unsigned int, unsigned char *); + void (*set_compile_options)(struct _build_kernel_data *, struct cgpu_info *, struct _algorithm_t *); } algorithm_t; /* Set default parameters based on name. */ diff --git a/ocl.c b/ocl.c index 126978dc..be4a90de 100644 --- a/ocl.c +++ b/ocl.c @@ -295,13 +295,11 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg * would have otherwise created. The filename is: * name + g + lg + lookup_gap + tc + thread_concurrency + nf + nfactor + w + work_size + l + sizeof(long) + .bin */ - char binaryfilename[255]; char filename[255]; char strbuf[32]; sprintf(strbuf, "%s.cl", cgpu->algorithm.name); strcpy(filename, strbuf); - strcpy(binaryfilename, cgpu->algorithm.name); /* For some reason 2 vectors is still better even if the card says * otherwise, and many cards lie about their max so use 256 as max @@ -364,40 +362,36 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg slot = cpnd = 0; - strcat(binaryfilename, name); - if (clState->goffset) - strcat(binaryfilename, "g"); + build_data->context = clState->context; + build_data->device = &devices[gpu]; - sprintf(strbuf, "lg%utc%unf%u", cgpu->lookup_gap, (unsigned int)cgpu->thread_concurrency, algorithm->nfactor); - strcat(binaryfilename, strbuf); + // Build information + strcpy(build_data->source_filename, filename); + strcpy(build_data->platform, name); + strcpy(build_data->sgminer_path, sgminer_path); + if (opt_kernel_path && *opt_kernel_path) + build_data->kernel_path = opt_kernel_path; - sprintf(strbuf, "w%d", (int)clState->wsize); - strcat(binaryfilename, strbuf); - sprintf(strbuf, "l%d", (int)sizeof(long)); - strcat(binaryfilename, strbuf); - strcat(binaryfilename, ".bin"); + build_data->work_size = clState->wsize; + build_data->has_bit_align = clState->hasBitAlign; - strcpy(build_data->binary_filename, binaryfilename); - build_data->context = clState->context; - build_data->device = &devices[gpu]; - if (!(clState->program = load_opencl_binary_kernel(build_data))) { - applog(LOG_NOTICE, "Building binary %s", binaryfilename); + build_data->opencl_version = get_opencl_version(devices[gpu]); + build_data->patch_bfi = needs_bfi_patch(build_data); - strcpy(build_data->source_filename, filename); - strcpy(build_data->platform, name); - strcpy(build_data->sgminer_path, sgminer_path); - if (opt_kernel_path && *opt_kernel_path) - build_data->kernel_path = opt_kernel_path; + strcpy(build_data->binary_filename, cgpu->algorithm.name); + strcat(build_data->binary_filename, name); + if (clState->goffset) + strcat(build_data->binary_filename, "g"); - build_data->work_size = clState->wsize; - build_data->has_bit_align = clState->hasBitAlign; + set_base_compiler_options(build_data); + if (algorithm->set_compile_options) + algorithm->set_compile_options(build_data, cgpu, algorithm); - build_data->opencl_version = get_opencl_version(devices[gpu]); - build_data->patch_bfi = needs_bfi_patch(build_data); + strcat(build_data->binary_filename, ".bin"); - set_base_compiler_options(build_data); - append_scrypt_compiler_options(build_data, cgpu->lookup_gap, cgpu->thread_concurrency, algorithm->nfactor); - append_hamsi_compiler_options(build_data, opt_hamsi_expand_big); + // Load program from file or build it if it doesn't exist + if (!(clState->program = load_opencl_binary_kernel(build_data))) { + applog(LOG_NOTICE, "Building binary %s", build_data->binary_filename); if (!(clState->program = build_opencl_kernel(build_data, filename))) return NULL; @@ -414,6 +408,7 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg } } + // Load kernels applog(LOG_NOTICE, "Initialising kernel %s with%s bitalign, %spatched BFI, nfactor %d, n %d", filename, clState->hasBitAlign ? "" : "out", build_data->patch_bfi ? "" : "un", algorithm->nfactor, algorithm->n); diff --git a/ocl/build_kernel.c b/ocl/build_kernel.c index a8969d90..bcab9239 100644 --- a/ocl/build_kernel.c +++ b/ocl/build_kernel.c @@ -48,11 +48,14 @@ static char *file_contents(const char *filename, int *length) void set_base_compiler_options(build_kernel_data *data) { + char buf[255]; sprintf(data->compiler_options, "-I \"%s\" -I \"%skernel\" -I \".\" -D WORKSIZE=%d", data->sgminer_path, data->sgminer_path, (int)data->work_size); - applog(LOG_DEBUG, "Setting worksize to %d", (int)(data->work_size)); + sprintf(buf, "w%dl%d", (int)data->work_size, (int)sizeof(long)); + strcat(data->binary_filename, buf); + if (data->has_bit_align) { strcat(data->compiler_options, " -D BITALIGN"); applog(LOG_DEBUG, "cl_amd_media_ops found, setting BITALIGN"); @@ -97,26 +100,6 @@ bool needs_bfi_patch(build_kernel_data *data) return false; } -// TODO: move away, specific -void append_scrypt_compiler_options(build_kernel_data *data, int lookup_gap, unsigned int thread_concurrency, unsigned int nfactor) -{ - char buf[255]; - sprintf(buf, " -D LOOKUP_GAP=%d -D CONCURRENT_THREADS=%d -D NFACTOR=%d", - lookup_gap, thread_concurrency, nfactor); - - strcat(data->compiler_options, buf); -} - -// TODO: move away, specific -void append_hamsi_compiler_options(build_kernel_data *data, int expand_big) -{ - char buf[255]; - sprintf(buf, " -D SPH_HAMSI_EXPAND_BIG=%d", - expand_big); - - strcat(data->compiler_options, buf); -} - cl_program build_opencl_kernel(build_kernel_data *data, const char *filename) { int pl; diff --git a/ocl/build_kernel.h b/ocl/build_kernel.h index 52c6233a..841ee017 100644 --- a/ocl/build_kernel.h +++ b/ocl/build_kernel.h @@ -26,7 +26,5 @@ bool needs_bfi_patch(build_kernel_data *data); cl_program build_opencl_kernel(build_kernel_data *data, const char *filename); bool save_opencl_kernel(build_kernel_data *data, cl_program program); void set_base_compiler_options(build_kernel_data *data); -void append_scrypt_compiler_options(build_kernel_data *data, int lookup_gap, unsigned int thread_concurrency, unsigned int nfactor); -void append_hamsi_compiler_options(build_kernel_data *data, int expand_big); #endif /* BUILD_KERNEL_H */