mirror of
https://github.com/GOSTSec/sgminer
synced 2025-01-09 06:18:06 +00:00
Merge branch 'wolf-whirlpoolx'
This commit is contained in:
commit
137519dc16
60
algorithm.c
60
algorithm.c
@ -638,21 +638,36 @@ static cl_int queue_whirlcoin_kernel(struct __clState *clState, struct _dev_blk_
|
||||
|
||||
static cl_int queue_whirlpoolx_kernel(struct __clState *clState, struct _dev_blk_ctx *blk, __maybe_unused cl_uint threads)
|
||||
{
|
||||
cl_kernel *kernel;
|
||||
uint64_t midblock[8], key[8] = { 0 }, tmp[8] = { 0 };
|
||||
cl_ulong le_target;
|
||||
cl_int status = 0;
|
||||
cl_int status;
|
||||
|
||||
le_target = *(cl_ulong *)(blk->work->device_target + 24);
|
||||
flip80(clState->cldata, blk->work->data);
|
||||
status = clEnqueueWriteBuffer(clState->commandQueue, clState->CLbuffer0, true, 0, 80, clState->cldata, 0, NULL, NULL);
|
||||
|
||||
//clbuffer, hashes
|
||||
kernel = &clState->kernel;
|
||||
CL_SET_ARG_N(0, clState->CLbuffer0);
|
||||
CL_SET_ARG_N(1, clState->padbuffer8);
|
||||
memcpy(midblock, clState->cldata, 64);
|
||||
|
||||
CL_SET_ARG_N(2, clState->outputBuffer);
|
||||
CL_SET_ARG_N(3, le_target);
|
||||
// midblock = n, key = h
|
||||
for (int i = 0; i < 10; ++i) {
|
||||
tmp[0] = WHIRLPOOL_ROUND_CONSTANTS[i];
|
||||
whirlpool_round(key, tmp);
|
||||
tmp[0] = 0;
|
||||
whirlpool_round(midblock, tmp);
|
||||
|
||||
for (int x = 0; x < 8; ++x) {
|
||||
midblock[x] ^= key[x];
|
||||
}
|
||||
}
|
||||
|
||||
for (int i = 0; i < 8; ++i) {
|
||||
midblock[i] ^= ((uint64_t *)(clState->cldata))[i];
|
||||
}
|
||||
|
||||
status = clSetKernelArg(clState->kernel, 0, sizeof(cl_ulong8), (cl_ulong8 *)&midblock);
|
||||
status |= clSetKernelArg(clState->kernel, 1, sizeof(cl_ulong), (void *)(((uint64_t *)clState->cldata) + 8));
|
||||
status |= clSetKernelArg(clState->kernel, 2, sizeof(cl_ulong), (void *)(((uint64_t *)clState->cldata) + 9));
|
||||
status |= clSetKernelArg(clState->kernel, 3, sizeof(cl_mem), (void *)&clState->outputBuffer);
|
||||
status |= clSetKernelArg(clState->kernel, 4, sizeof(cl_ulong), (void *)&le_target);
|
||||
|
||||
return status;
|
||||
}
|
||||
@ -720,27 +735,6 @@ static cl_int queue_pluck_kernel(_clState *clState, dev_blk_ctx *blk, __maybe_un
|
||||
return status;
|
||||
}
|
||||
|
||||
typedef struct _algorithm_settings_t {
|
||||
const char *name; /* Human-readable identifier */
|
||||
algorithm_type_t type; //common algorithm type
|
||||
const char *kernelfile; /* alternate kernel file */
|
||||
double diff_multiplier1;
|
||||
double diff_multiplier2;
|
||||
double share_diff_multiplier;
|
||||
uint32_t xintensity_shift;
|
||||
uint32_t intensity_shift;
|
||||
uint32_t found_idx;
|
||||
unsigned long long diff_numerator;
|
||||
uint32_t diff1targ;
|
||||
size_t n_extra_kernels;
|
||||
long rw_buffer_size;
|
||||
cl_command_queue_properties cq_properties;
|
||||
void(*regenhash)(struct work *);
|
||||
cl_int(*queue_kernel)(struct __clState *, struct _dev_blk_ctx *, cl_uint);
|
||||
void(*gen_hash)(const unsigned char *, unsigned int, unsigned char *);
|
||||
void(*set_compile_options)(build_kernel_data *, struct cgpu_info *, algorithm_t *);
|
||||
} algorithm_settings_t;
|
||||
|
||||
static algorithm_settings_t algos[] = {
|
||||
// kernels starting from this will have difficulty calculated by using litecoin algorithm
|
||||
#define A_SCRYPT(a) \
|
||||
@ -810,7 +804,7 @@ static algorithm_settings_t algos[] = {
|
||||
#undef A_FUGUE
|
||||
|
||||
{ "whirlcoin", ALGO_WHIRL, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 3, 8 * 16 * 4194304, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, whirlcoin_regenhash, queue_whirlcoin_kernel, sha256, NULL },
|
||||
{ "whirlpoolx", ALGO_WHIRL, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 0, 0, 0, whirlpoolx_regenhash, queue_sph_kernel, gen_hash, NULL },
|
||||
{ "whirlpoolx", ALGO_WHIRLPOOLX, "", 1, 1, 1, 0, 0, 0xFFU, 0xFFFFULL, 0x0000FFFFUL, 0, 0, 0, whirlpoolx_regenhash, queue_whirlpoolx_kernel, gen_hash, NULL },
|
||||
|
||||
// Terminator (do not remove)
|
||||
{ NULL, ALGO_UNK, "", 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, NULL, NULL, NULL, NULL }
|
||||
@ -883,7 +877,6 @@ static const char *lookup_algorithm_alias(const char *lookup_alias, uint8_t *nfa
|
||||
ALGO_ALIAS("nist5", "talkcoin-mod");
|
||||
ALGO_ALIAS("keccak", "maxcoin");
|
||||
ALGO_ALIAS("whirlpool", "whirlcoin");
|
||||
ALGO_ALIAS("whirlpoolx", "whirlpoolx");
|
||||
ALGO_ALIAS("Lyra2RE", "lyra2re");
|
||||
ALGO_ALIAS("lyra2", "lyra2re");
|
||||
|
||||
@ -945,8 +938,7 @@ void set_algorithm_nfactor(algorithm_t* algo, const uint8_t nfactor)
|
||||
}
|
||||
}
|
||||
|
||||
bool cmp_algorithm(algorithm_t* algo1, algorithm_t* algo2)
|
||||
bool cmp_algorithm(const algorithm_t* algo1, const algorithm_t* algo2)
|
||||
{
|
||||
// return (strcmp(algo1->name, algo2->name) == 0) && (algo1->nfactor == algo2->nfactor);
|
||||
return (!safe_cmp(algo1->name, algo2->name) && !safe_cmp(algo1->kernelfile, algo2->kernelfile) && (algo1->nfactor == algo2->nfactor));
|
||||
}
|
||||
|
26
algorithm.h
26
algorithm.h
@ -9,6 +9,7 @@
|
||||
|
||||
#include <inttypes.h>
|
||||
#include <stdbool.h>
|
||||
#include "ocl/build_kernel.h" // For the build_kernel_data type
|
||||
|
||||
typedef enum {
|
||||
ALGO_UNK,
|
||||
@ -26,6 +27,7 @@ typedef enum {
|
||||
ALGO_FRESH,
|
||||
ALGO_WHIRL,
|
||||
ALGO_NEOSCRYPT,
|
||||
ALGO_WHIRLPOOLX,
|
||||
ALGO_LYRA2RE,
|
||||
ALGO_PLUCK
|
||||
} algorithm_type_t;
|
||||
@ -66,6 +68,28 @@ typedef struct _algorithm_t {
|
||||
void(*set_compile_options)(struct _build_kernel_data *, struct cgpu_info *, struct _algorithm_t *);
|
||||
} algorithm_t;
|
||||
|
||||
typedef struct _algorithm_settings_t
|
||||
{
|
||||
const char *name;
|
||||
algorithm_type_t type;
|
||||
const char *kernelfile;
|
||||
double diff_multiplier1;
|
||||
double diff_multiplier2;
|
||||
double share_diff_multiplier;
|
||||
uint32_t xintensity_shift;
|
||||
uint32_t intensity_shift;
|
||||
uint32_t found_idx;
|
||||
unsigned long long diff_numerator;
|
||||
uint32_t diff1targ;
|
||||
size_t n_extra_kernels;
|
||||
long rw_buffer_size;
|
||||
cl_command_queue_properties cq_properties;
|
||||
void (*regenhash)(struct work *);
|
||||
cl_int (*queue_kernel)(struct __clState *, struct _dev_blk_ctx *, cl_uint);
|
||||
void (*gen_hash)(const unsigned char *, unsigned int, unsigned char *);
|
||||
void (*set_compile_options)(build_kernel_data *, struct cgpu_info *, algorithm_t *);
|
||||
} algorithm_settings_t;
|
||||
|
||||
/* Set default parameters based on name. */
|
||||
void set_algorithm(algorithm_t* algo, const char* name);
|
||||
|
||||
@ -73,6 +97,6 @@ void set_algorithm(algorithm_t* algo, const char* name);
|
||||
void set_algorithm_nfactor(algorithm_t* algo, const uint8_t nfactor);
|
||||
|
||||
/* Compare two algorithm parameters */
|
||||
bool cmp_algorithm(algorithm_t* algo1, algorithm_t* algo2);
|
||||
bool cmp_algorithm(const algorithm_t* algo1, const algorithm_t* algo2);
|
||||
|
||||
#endif /* ALGORITHM_H */
|
||||
|
@ -81,8 +81,8 @@ inline void whirlcoin_hash(void *state, const void *input)
|
||||
|
||||
memcpy(&ctx, &base_contexts, sizeof(base_contexts));
|
||||
|
||||
sph_whirlpool1 (&ctx.whirlpool1, input, 80);
|
||||
sph_whirlpool1_close (&ctx.whirlpool1, hashA);
|
||||
sph_whirlpool1(&ctx.whirlpool1, input, 80);
|
||||
sph_whirlpool1_close(&ctx.whirlpool1, hashA);
|
||||
|
||||
sph_whirlpool1(&ctx.whirlpool2, hashA, 64);
|
||||
sph_whirlpool1_close(&ctx.whirlpool2, hashB);
|
||||
@ -98,9 +98,8 @@ inline void whirlcoin_hash(void *state, const void *input)
|
||||
|
||||
static const uint32_t diff1targ = 0x0000ffff;
|
||||
|
||||
|
||||
/* Used externally as confirmation of correct OCL code */
|
||||
int whirlcoin_test(unsigned char *pdata, const unsigned char *ptarget, uint32_t nonce)
|
||||
int whirlcoin_test_old(unsigned char *pdata, const unsigned char *ptarget, uint32_t nonce)
|
||||
{
|
||||
uint32_t tmp_hash7, Htarg = le32toh(((const uint32_t *)ptarget)[7]);
|
||||
uint32_t data[20], ohash[8];
|
||||
@ -132,7 +131,7 @@ void whirlcoin_regenhash(struct work *work)
|
||||
data[19] = htobe32(*nonce);
|
||||
whirlcoin_hash(ohash, data);
|
||||
}
|
||||
|
||||
/*
|
||||
bool scanhash_whirlcoin(struct thr_info *thr, const unsigned char __maybe_unused *pmidstate,
|
||||
unsigned char *pdata, unsigned char __maybe_unused *phash1,
|
||||
unsigned char __maybe_unused *phash, const unsigned char *ptarget,
|
||||
@ -146,7 +145,7 @@ bool scanhash_whirlcoin(struct thr_info *thr, const unsigned char __maybe_unused
|
||||
|
||||
be32enc_vect(data, (const uint32_t *)pdata, 19);
|
||||
|
||||
while(1) {
|
||||
while (1) {
|
||||
uint32_t ostate[8];
|
||||
|
||||
*nonce = ++n;
|
||||
@ -172,3 +171,4 @@ bool scanhash_whirlcoin(struct thr_info *thr, const unsigned char __maybe_unused
|
||||
|
||||
return ret;
|
||||
}
|
||||
*/
|
@ -34,7 +34,7 @@
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "sph/sph_whirlpool.h"
|
||||
#include "whirlpoolx.h"
|
||||
|
||||
/*
|
||||
* Encode a length len/4 vector of (uint32_t) into a length len vector of
|
||||
@ -49,56 +49,143 @@ be32enc_vect(uint32_t *dst, const uint32_t *src, uint32_t len)
|
||||
dst[i] = htobe32(src[i]);
|
||||
}
|
||||
|
||||
inline void whirlpoolx_hash(void *state, const void *input)
|
||||
{
|
||||
sph_whirlpool1_context ctx;
|
||||
|
||||
sph_whirlpool1_init(&ctx);
|
||||
void whirlpool_compress(uint8_t state[64], const uint8_t block[64])
|
||||
{
|
||||
const int NUM_ROUNDS = 10;
|
||||
uint64_t tempState[8];
|
||||
uint64_t tempBlock[8];
|
||||
int i;
|
||||
|
||||
// Initialization
|
||||
for (i = 0; i < 8; i++) {
|
||||
tempState[i] =
|
||||
(uint64_t)state[i << 3]
|
||||
| (uint64_t)state[(i << 3) + 1] << 8
|
||||
| (uint64_t)state[(i << 3) + 2] << 16
|
||||
| (uint64_t)state[(i << 3) + 3] << 24
|
||||
| (uint64_t)state[(i << 3) + 4] << 32
|
||||
| (uint64_t)state[(i << 3) + 5] << 40
|
||||
| (uint64_t)state[(i << 3) + 6] << 48
|
||||
| (uint64_t)state[(i << 3) + 7] << 56;
|
||||
tempBlock[i] = (
|
||||
(uint64_t)block[i << 3]
|
||||
| (uint64_t)block[(i << 3) + 1] << 8
|
||||
| (uint64_t)block[(i << 3) + 2] << 16
|
||||
| (uint64_t)block[(i << 3) + 3] << 24
|
||||
| (uint64_t)block[(i << 3) + 4] << 32
|
||||
| (uint64_t)block[(i << 3) + 5] << 40
|
||||
| (uint64_t)block[(i << 3) + 6] << 48
|
||||
| (uint64_t)block[(i << 3) + 7] << 56) ^ tempState[i];
|
||||
}
|
||||
|
||||
// Hashing rounds
|
||||
uint64_t rcon[8];
|
||||
memset(rcon + 1, 0, sizeof(rcon[0]) * 7);
|
||||
for (i = 0; i < NUM_ROUNDS; i++) {
|
||||
rcon[0] = WHIRLPOOL_ROUND_CONSTANTS[i];
|
||||
whirlpool_round(tempState, rcon);
|
||||
whirlpool_round(tempBlock, tempState);
|
||||
}
|
||||
|
||||
// Final combining
|
||||
for (i = 0; i < 64; i++)
|
||||
state[i] ^= block[i] ^ (uint8_t)(tempBlock[i >> 3] >> ((i & 7) << 3));
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
void whirlpool_round(uint64_t block[8], const uint64_t key[8]) {
|
||||
uint64_t a = block[0];
|
||||
uint64_t b = block[1];
|
||||
uint64_t c = block[2];
|
||||
uint64_t d = block[3];
|
||||
uint64_t e = block[4];
|
||||
uint64_t f = block[5];
|
||||
uint64_t g = block[6];
|
||||
uint64_t h = block[7];
|
||||
|
||||
uint64_t r;
|
||||
#define DOROW(i, s, t, u, v, w, x, y, z) \
|
||||
r = MAGIC_TABLE[(uint8_t)s]; r = (r << 56) | (r >> 8); \
|
||||
r ^= MAGIC_TABLE[(uint8_t)(t >> 8)]; r = (r << 56) | (r >> 8); \
|
||||
r ^= MAGIC_TABLE[(uint8_t)(u >> 16)]; r = (r << 56) | (r >> 8); \
|
||||
r ^= MAGIC_TABLE[(uint8_t)(v >> 24)]; r = (r << 56) | (r >> 8); \
|
||||
r ^= MAGIC_TABLE[(uint8_t)(w >> 32)]; r = (r << 56) | (r >> 8); \
|
||||
r ^= MAGIC_TABLE[(uint8_t)(x >> 40)]; r = (r << 56) | (r >> 8); \
|
||||
r ^= MAGIC_TABLE[(uint8_t)(y >> 48)]; r = (r << 56) | (r >> 8); \
|
||||
r ^= MAGIC_TABLE[(uint8_t)(z >> 56)]; r = (r << 56) | (r >> 8); \
|
||||
block[i] = r ^ key[i];
|
||||
|
||||
DOROW(0, a, h, g, f, e, d, c, b)
|
||||
DOROW(1, b, a, h, g, f, e, d, c)
|
||||
DOROW(2, c, b, a, h, g, f, e, d)
|
||||
DOROW(3, d, c, b, a, h, g, f, e)
|
||||
DOROW(4, e, d, c, b, a, h, g, f)
|
||||
DOROW(5, f, e, d, c, b, a, h, g)
|
||||
DOROW(6, g, f, e, d, c, b, a, h)
|
||||
DOROW(7, h, g, f, e, d, c, b, a)
|
||||
}
|
||||
|
||||
void whirlpool_hash(const uint8_t *message, uint32_t len, uint8_t hash[64]) {
|
||||
memset(hash, 0, 64);
|
||||
|
||||
uint32_t i;
|
||||
for (i = 0; len - i >= 64; i += 64)
|
||||
whirlpool_compress(hash, message + i);
|
||||
|
||||
uint8_t block[64];
|
||||
uint32_t rem = len - i;
|
||||
memcpy(block, message + i, rem);
|
||||
|
||||
block[rem] = 0x80;
|
||||
rem++;
|
||||
if (64 - rem >= 32)
|
||||
memset(block + rem, 0, 56 - rem);
|
||||
else {
|
||||
memset(block + rem, 0, 64 - rem);
|
||||
whirlpool_compress(hash, block);
|
||||
memset(block, 0, 56);
|
||||
}
|
||||
|
||||
uint64_t longLen = ((uint64_t)len) << 3;
|
||||
for (i = 0; i < 8; i++)
|
||||
block[64 - 1 - i] = (uint8_t)(longLen >> (i * 8));
|
||||
whirlpool_compress(hash, block);
|
||||
}
|
||||
|
||||
void whirlpoolx_hash(void *state, const void *input)
|
||||
{
|
||||
//sph_whirlpool1_context ctx;
|
||||
|
||||
//sph_whirlpool1_init(&ctx);
|
||||
|
||||
uint8_t digest[64];
|
||||
|
||||
sph_whirlpool(&ctx, input, 80);
|
||||
sph_whirlpool_close(&ctx, digest);
|
||||
//sph_whirlpool(&ctx, input, 80);
|
||||
//sph_whirlpool_close(&ctx, digest);
|
||||
|
||||
((uint8_t *)state)[0] = digest[0] ^ digest[16];
|
||||
((uint8_t *)state)[1] = digest[1] ^ digest[17];
|
||||
((uint8_t *)state)[2] = digest[2] ^ digest[18];
|
||||
((uint8_t *)state)[3] = digest[3] ^ digest[19];
|
||||
((uint8_t *)state)[4] = digest[4] ^ digest[20];
|
||||
((uint8_t *)state)[5] = digest[5] ^ digest[21];
|
||||
((uint8_t *)state)[6] = digest[6] ^ digest[22];
|
||||
((uint8_t *)state)[7] = digest[7] ^ digest[23];
|
||||
((uint8_t *)state)[8] = digest[8] ^ digest[24];
|
||||
((uint8_t *)state)[9] = digest[9] ^ digest[25];
|
||||
((uint8_t *)state)[10] = digest[10] ^ digest[26];
|
||||
((uint8_t *)state)[11] = digest[11] ^ digest[27];
|
||||
((uint8_t *)state)[12] = digest[12] ^ digest[28];
|
||||
((uint8_t *)state)[13] = digest[13] ^ digest[29];
|
||||
((uint8_t *)state)[14] = digest[14] ^ digest[30];
|
||||
((uint8_t *)state)[15] = digest[15] ^ digest[31];
|
||||
((uint8_t *)state)[16] = digest[16] ^ digest[32];
|
||||
((uint8_t *)state)[17] = digest[17] ^ digest[33];
|
||||
((uint8_t *)state)[18] = digest[18] ^ digest[34];
|
||||
((uint8_t *)state)[19] = digest[19] ^ digest[35];
|
||||
((uint8_t *)state)[20] = digest[20] ^ digest[36];
|
||||
((uint8_t *)state)[21] = digest[21] ^ digest[37];
|
||||
((uint8_t *)state)[22] = digest[22] ^ digest[38];
|
||||
((uint8_t *)state)[23] = digest[23] ^ digest[39];
|
||||
((uint8_t *)state)[24] = digest[24] ^ digest[40];
|
||||
((uint8_t *)state)[25] = digest[25] ^ digest[41];
|
||||
((uint8_t *)state)[26] = digest[26] ^ digest[42];
|
||||
((uint8_t *)state)[27] = digest[27] ^ digest[43];
|
||||
((uint8_t *)state)[28] = digest[28] ^ digest[44];
|
||||
((uint8_t *)state)[29] = digest[29] ^ digest[45];
|
||||
((uint8_t *)state)[30] = digest[30] ^ digest[46];
|
||||
((uint8_t *)state)[31] = digest[31] ^ digest[47];
|
||||
whirlpool_hash((uint8_t *)input, 80, digest);
|
||||
|
||||
uint8_t digest_xored[32];
|
||||
|
||||
for (uint32_t i = 0; i < (64 / 2); i++)
|
||||
{
|
||||
digest_xored[i] =
|
||||
digest[i] ^ digest[i + ((64 / 2) / 2)]
|
||||
;
|
||||
}
|
||||
|
||||
memcpy(state, digest_xored, sizeof(digest_xored));
|
||||
}
|
||||
|
||||
static const uint32_t diff1targ = 0x0000ffff;
|
||||
|
||||
|
||||
/* Used externally as confirmation of correct OCL code */
|
||||
int whirlpoolx_test(unsigned char *pdata, const unsigned char *ptarget, uint32_t nonce)
|
||||
int whirlcoin_test(unsigned char *pdata, const unsigned char *ptarget, uint32_t nonce)
|
||||
{
|
||||
uint32_t tmp_hash7, Htarg = le32toh(((const uint32_t *)ptarget)[7]);
|
||||
uint32_t data[20], ohash[8];
|
||||
@ -131,7 +218,7 @@ void whirlpoolx_regenhash(struct work *work)
|
||||
whirlpoolx_hash(ohash, data);
|
||||
}
|
||||
|
||||
bool scanhash_whirlpoolx(struct thr_info *thr, const unsigned char __maybe_unused *pmidstate,
|
||||
bool scanhash_whirlcoin(struct thr_info *thr, const unsigned char __maybe_unused *pmidstate,
|
||||
unsigned char *pdata, unsigned char __maybe_unused *phash1,
|
||||
unsigned char __maybe_unused *phash, const unsigned char *ptarget,
|
||||
uint32_t max_nonce, uint32_t *last_nonce, uint32_t n)
|
||||
|
@ -1,9 +1,58 @@
|
||||
#ifndef WHIRLPOOLX_H
|
||||
#define WHIRLPOOLX_H
|
||||
|
||||
#include <stdint.h>
|
||||
#include "miner.h"
|
||||
|
||||
// The combined effect of gamma (SubBytes) and theta (MixRows)
|
||||
static uint64_t MAGIC_TABLE[256] = {
|
||||
UINT64_C(0xD83078C018601818), UINT64_C(0x2646AF05238C2323), UINT64_C(0xB891F97EC63FC6C6), UINT64_C(0xFBCD6F13E887E8E8), UINT64_C(0xCB13A14C87268787), UINT64_C(0x116D62A9B8DAB8B8), UINT64_C(0x0902050801040101), UINT64_C(0x0D9E6E424F214F4F),
|
||||
UINT64_C(0x9B6CEEAD36D83636), UINT64_C(0xFF510459A6A2A6A6), UINT64_C(0x0CB9BDDED26FD2D2), UINT64_C(0x0EF706FBF5F3F5F5), UINT64_C(0x96F280EF79F97979), UINT64_C(0x30DECE5F6FA16F6F), UINT64_C(0x6D3FEFFC917E9191), UINT64_C(0xF8A407AA52555252),
|
||||
UINT64_C(0x47C0FD27609D6060), UINT64_C(0x35657689BCCABCBC), UINT64_C(0x372BCDAC9B569B9B), UINT64_C(0x8A018C048E028E8E), UINT64_C(0xD25B1571A3B6A3A3), UINT64_C(0x6C183C600C300C0C), UINT64_C(0x84F68AFF7BF17B7B), UINT64_C(0x806AE1B535D43535),
|
||||
UINT64_C(0xF53A69E81D741D1D), UINT64_C(0xB3DD4753E0A7E0E0), UINT64_C(0x21B3ACF6D77BD7D7), UINT64_C(0x9C99ED5EC22FC2C2), UINT64_C(0x435C966D2EB82E2E), UINT64_C(0x29967A624B314B4B), UINT64_C(0x5DE121A3FEDFFEFE), UINT64_C(0xD5AE168257415757),
|
||||
UINT64_C(0xBD2A41A815541515), UINT64_C(0xE8EEB69F77C17777), UINT64_C(0x926EEBA537DC3737), UINT64_C(0x9ED7567BE5B3E5E5), UINT64_C(0x1323D98C9F469F9F), UINT64_C(0x23FD17D3F0E7F0F0), UINT64_C(0x20947F6A4A354A4A), UINT64_C(0x44A9959EDA4FDADA),
|
||||
UINT64_C(0xA2B025FA587D5858), UINT64_C(0xCF8FCA06C903C9C9), UINT64_C(0x7C528D5529A42929), UINT64_C(0x5A1422500A280A0A), UINT64_C(0x507F4FE1B1FEB1B1), UINT64_C(0xC95D1A69A0BAA0A0), UINT64_C(0x14D6DA7F6BB16B6B), UINT64_C(0xD917AB5C852E8585),
|
||||
UINT64_C(0x3C677381BDCEBDBD), UINT64_C(0x8FBA34D25D695D5D), UINT64_C(0x9020508010401010), UINT64_C(0x07F503F3F4F7F4F4), UINT64_C(0xDD8BC016CB0BCBCB), UINT64_C(0xD37CC6ED3EF83E3E), UINT64_C(0x2D0A112805140505), UINT64_C(0x78CEE61F67816767),
|
||||
UINT64_C(0x97D55373E4B7E4E4), UINT64_C(0x024EBB25279C2727), UINT64_C(0x7382583241194141), UINT64_C(0xA70B9D2C8B168B8B), UINT64_C(0xF6530151A7A6A7A7), UINT64_C(0xB2FA94CF7DE97D7D), UINT64_C(0x4937FBDC956E9595), UINT64_C(0x56AD9F8ED847D8D8),
|
||||
UINT64_C(0x70EB308BFBCBFBFB), UINT64_C(0xCDC17123EE9FEEEE), UINT64_C(0xBBF891C77CED7C7C), UINT64_C(0x71CCE31766856666), UINT64_C(0x7BA78EA6DD53DDDD), UINT64_C(0xAF2E4BB8175C1717), UINT64_C(0x458E460247014747), UINT64_C(0x1A21DC849E429E9E),
|
||||
UINT64_C(0xD489C51ECA0FCACA), UINT64_C(0x585A99752DB42D2D), UINT64_C(0x2E637991BFC6BFBF), UINT64_C(0x3F0E1B38071C0707), UINT64_C(0xAC472301AD8EADAD), UINT64_C(0xB0B42FEA5A755A5A), UINT64_C(0xEF1BB56C83368383), UINT64_C(0xB666FF8533CC3333),
|
||||
UINT64_C(0x5CC6F23F63916363), UINT64_C(0x12040A1002080202), UINT64_C(0x93493839AA92AAAA), UINT64_C(0xDEE2A8AF71D97171), UINT64_C(0xC68DCF0EC807C8C8), UINT64_C(0xD1327DC819641919), UINT64_C(0x3B92707249394949), UINT64_C(0x5FAF9A86D943D9D9),
|
||||
UINT64_C(0x31F91DC3F2EFF2F2), UINT64_C(0xA8DB484BE3ABE3E3), UINT64_C(0xB9B62AE25B715B5B), UINT64_C(0xBC0D9234881A8888), UINT64_C(0x3E29C8A49A529A9A), UINT64_C(0x0B4CBE2D26982626), UINT64_C(0xBF64FA8D32C83232), UINT64_C(0x597D4AE9B0FAB0B0),
|
||||
UINT64_C(0xF2CF6A1BE983E9E9), UINT64_C(0x771E33780F3C0F0F), UINT64_C(0x33B7A6E6D573D5D5), UINT64_C(0xF41DBA74803A8080), UINT64_C(0x27617C99BEC2BEBE), UINT64_C(0xEB87DE26CD13CDCD), UINT64_C(0x8968E4BD34D03434), UINT64_C(0x3290757A483D4848),
|
||||
UINT64_C(0x54E324ABFFDBFFFF), UINT64_C(0x8DF48FF77AF57A7A), UINT64_C(0x643DEAF4907A9090), UINT64_C(0x9DBE3EC25F615F5F), UINT64_C(0x3D40A01D20802020), UINT64_C(0x0FD0D56768BD6868), UINT64_C(0xCA3472D01A681A1A), UINT64_C(0xB7412C19AE82AEAE),
|
||||
UINT64_C(0x7D755EC9B4EAB4B4), UINT64_C(0xCEA8199A544D5454), UINT64_C(0x7F3BE5EC93769393), UINT64_C(0x2F44AA0D22882222), UINT64_C(0x63C8E907648D6464), UINT64_C(0x2AFF12DBF1E3F1F1), UINT64_C(0xCCE6A2BF73D17373), UINT64_C(0x82245A9012481212),
|
||||
UINT64_C(0x7A805D3A401D4040), UINT64_C(0x4810284008200808), UINT64_C(0x959BE856C32BC3C3), UINT64_C(0xDFC57B33EC97ECEC), UINT64_C(0x4DAB9096DB4BDBDB), UINT64_C(0xC05F1F61A1BEA1A1), UINT64_C(0x9107831C8D0E8D8D), UINT64_C(0xC87AC9F53DF43D3D),
|
||||
UINT64_C(0x5B33F1CC97669797), UINT64_C(0x0000000000000000), UINT64_C(0xF983D436CF1BCFCF), UINT64_C(0x6E5687452BAC2B2B), UINT64_C(0xE1ECB39776C57676), UINT64_C(0xE619B06482328282), UINT64_C(0x28B1A9FED67FD6D6), UINT64_C(0xC33677D81B6C1B1B),
|
||||
UINT64_C(0x74775BC1B5EEB5B5), UINT64_C(0xBE432911AF86AFAF), UINT64_C(0x1DD4DF776AB56A6A), UINT64_C(0xEAA00DBA505D5050), UINT64_C(0x578A4C1245094545), UINT64_C(0x38FB18CBF3EBF3F3), UINT64_C(0xAD60F09D30C03030), UINT64_C(0xC4C3742BEF9BEFEF),
|
||||
UINT64_C(0xDA7EC3E53FFC3F3F), UINT64_C(0xC7AA1C9255495555), UINT64_C(0xDB591079A2B2A2A2), UINT64_C(0xE9C96503EA8FEAEA), UINT64_C(0x6ACAEC0F65896565), UINT64_C(0x036968B9BAD2BABA), UINT64_C(0x4A5E93652FBC2F2F), UINT64_C(0x8E9DE74EC027C0C0),
|
||||
UINT64_C(0x60A181BEDE5FDEDE), UINT64_C(0xFC386CE01C701C1C), UINT64_C(0x46E72EBBFDD3FDFD), UINT64_C(0x1F9A64524D294D4D), UINT64_C(0x7639E0E492729292), UINT64_C(0xFAEABC8F75C97575), UINT64_C(0x360C1E3006180606), UINT64_C(0xAE0998248A128A8A),
|
||||
UINT64_C(0x4B7940F9B2F2B2B2), UINT64_C(0x85D15963E6BFE6E6), UINT64_C(0x7E1C36700E380E0E), UINT64_C(0xE73E63F81F7C1F1F), UINT64_C(0x55C4F73762956262), UINT64_C(0x3AB5A3EED477D4D4), UINT64_C(0x814D3229A89AA8A8), UINT64_C(0x5231F4C496629696),
|
||||
UINT64_C(0x62EF3A9BF9C3F9F9), UINT64_C(0xA397F666C533C5C5), UINT64_C(0x104AB13525942525), UINT64_C(0xABB220F259795959), UINT64_C(0xD015AE54842A8484), UINT64_C(0xC5E4A7B772D57272), UINT64_C(0xEC72DDD539E43939), UINT64_C(0x1698615A4C2D4C4C),
|
||||
UINT64_C(0x94BC3BCA5E655E5E), UINT64_C(0x9FF085E778FD7878), UINT64_C(0xE570D8DD38E03838), UINT64_C(0x980586148C0A8C8C), UINT64_C(0x17BFB2C6D163D1D1), UINT64_C(0xE4570B41A5AEA5A5), UINT64_C(0xA1D94D43E2AFE2E2), UINT64_C(0x4EC2F82F61996161),
|
||||
UINT64_C(0x427B45F1B3F6B3B3), UINT64_C(0x3442A51521842121), UINT64_C(0x0825D6949C4A9C9C), UINT64_C(0xEE3C66F01E781E1E), UINT64_C(0x6186522243114343), UINT64_C(0xB193FC76C73BC7C7), UINT64_C(0x4FE52BB3FCD7FCFC), UINT64_C(0x2408142004100404),
|
||||
UINT64_C(0xE3A208B251595151), UINT64_C(0x252FC7BC995E9999), UINT64_C(0x22DAC44F6DA96D6D), UINT64_C(0x651A39680D340D0D), UINT64_C(0x79E93583FACFFAFA), UINT64_C(0x69A384B6DF5BDFDF), UINT64_C(0xA9FC9BD77EE57E7E), UINT64_C(0x1948B43D24902424),
|
||||
UINT64_C(0xFE76D7C53BEC3B3B), UINT64_C(0x9A4B3D31AB96ABAB), UINT64_C(0xF081D13ECE1FCECE), UINT64_C(0x9922558811441111), UINT64_C(0x8303890C8F068F8F), UINT64_C(0x049C6B4A4E254E4E), UINT64_C(0x667351D1B7E6B7B7), UINT64_C(0xE0CB600BEB8BEBEB),
|
||||
UINT64_C(0xC178CCFD3CF03C3C), UINT64_C(0xFD1FBF7C813E8181), UINT64_C(0x4035FED4946A9494), UINT64_C(0x1CF30CEBF7FBF7F7), UINT64_C(0x186F67A1B9DEB9B9), UINT64_C(0x8B265F98134C1313), UINT64_C(0x51589C7D2CB02C2C), UINT64_C(0x05BBB8D6D36BD3D3),
|
||||
UINT64_C(0x8CD35C6BE7BBE7E7), UINT64_C(0x39DCCB576EA56E6E), UINT64_C(0xAA95F36EC437C4C4), UINT64_C(0x1B060F18030C0303), UINT64_C(0xDCAC138A56455656), UINT64_C(0x5E88491A440D4444), UINT64_C(0xA0FE9EDF7FE17F7F), UINT64_C(0x884F3721A99EA9A9),
|
||||
UINT64_C(0x6754824D2AA82A2A), UINT64_C(0x0A6B6DB1BBD6BBBB), UINT64_C(0x879FE246C123C1C1), UINT64_C(0xF1A602A253515353), UINT64_C(0x72A58BAEDC57DCDC), UINT64_C(0x531627580B2C0B0B), UINT64_C(0x0127D39C9D4E9D9D), UINT64_C(0x2BD8C1476CAD6C6C),
|
||||
UINT64_C(0xA462F59531C43131), UINT64_C(0xF3E8B98774CD7474), UINT64_C(0x15F109E3F6FFF6F6), UINT64_C(0x4C8C430A46054646), UINT64_C(0xA5452609AC8AACAC), UINT64_C(0xB50F973C891E8989), UINT64_C(0xB42844A014501414), UINT64_C(0xBADF425BE1A3E1E1),
|
||||
UINT64_C(0xA62C4EB016581616), UINT64_C(0xF774D2CD3AE83A3A), UINT64_C(0x06D2D06F69B96969), UINT64_C(0x41122D4809240909), UINT64_C(0xD7E0ADA770DD7070), UINT64_C(0x6F7154D9B6E2B6B6), UINT64_C(0x1EBDB7CED067D0D0), UINT64_C(0xD6C77E3BED93EDED),
|
||||
UINT64_C(0xE285DB2ECC17CCCC), UINT64_C(0x6884572A42154242), UINT64_C(0x2C2DC2B4985A9898), UINT64_C(0xED550E49A4AAA4A4), UINT64_C(0x7550885D28A02828), UINT64_C(0x86B831DA5C6D5C5C), UINT64_C(0x6BED3F93F8C7F8F8), UINT64_C(0xC211A44486228686),
|
||||
};
|
||||
|
||||
static uint64_t WHIRLPOOL_ROUND_CONSTANTS[32] = {
|
||||
UINT64_C(0x4F01B887E8C62318), UINT64_C(0x52916F79F5D2A636), UINT64_C(0x357B0CA38E9BBC60), UINT64_C(0x57FE4B2EC2D7E01D),
|
||||
UINT64_C(0xDA4AF09FE5377715), UINT64_C(0x856BA0B10A29C958), UINT64_C(0x67053ECBF4105DBD), UINT64_C(0xD8957DA78B4127E4),
|
||||
UINT64_C(0x9E4717DD667CEEFB), UINT64_C(0x33835AAD07BF2DCA), UINT64_C(0xD94919C871AA0263), UINT64_C(0xB032269A885BE3F2),
|
||||
UINT64_C(0x4834CDBE80D50FE9), UINT64_C(0xAE1A68205F907AFF), UINT64_C(0x1273F164229354B4), UINT64_C(0x3D8DA1DBECC30840),
|
||||
UINT64_C(0x1BD682762BCF0097), UINT64_C(0xEF30F345506AAFB5), UINT64_C(0xC02FBA65EAA2553F), UINT64_C(0x8A0675924DFD1CDE),
|
||||
UINT64_C(0x96A8D4621F0EE6B2), UINT64_C(0x4C3972845925C5F9), UINT64_C(0x61E2A5D18C38785E), UINT64_C(0x04FCC7431E9C21B3),
|
||||
UINT64_C(0x247EDFFA0D6D9951), UINT64_C(0xEBB74E8F11CEAB3B), UINT64_C(0xD32C13B9F794813C), UINT64_C(0xA97F445603C46EE7),
|
||||
UINT64_C(0x6C9D0BDC53C1BB2A), UINT64_C(0xE11489AC46F67431), UINT64_C(0xEDD0B67009693A16), UINT64_C(0x86F85C28A49842CC),
|
||||
};
|
||||
|
||||
extern int whirlpoolx_test(unsigned char *pdata, const unsigned char *ptarget, uint32_t nonce);
|
||||
extern void whirlpoolx_regenhash(struct work *work);
|
||||
extern void whirlpool_round(uint64_t block[8], const uint64_t key[8]);
|
||||
|
||||
#endif /* W_H */
|
||||
#endif /* WHIRLPOOLX_H */
|
1525
kernel/whirlpoolx.cl
1525
kernel/whirlpoolx.cl
File diff suppressed because it is too large
Load Diff
119
ocl.c
119
ocl.c
@ -146,16 +146,6 @@ static cl_int create_opencl_context(cl_context *context, cl_platform_id *platfor
|
||||
return status;
|
||||
}
|
||||
|
||||
static cl_int create_opencl_command_queue(cl_command_queue *command_queue, cl_context *context, cl_device_id *device, cl_command_queue_properties cq_properties)
|
||||
{
|
||||
cl_int status;
|
||||
*command_queue = clCreateCommandQueue(*context, *device,
|
||||
cq_properties, &status);
|
||||
if (status != CL_SUCCESS) /* Try again without OOE enable */
|
||||
*command_queue = clCreateCommandQueue(*context, *device, 0, &status);
|
||||
return status;
|
||||
}
|
||||
|
||||
static float get_opencl_version(cl_device_id device)
|
||||
{
|
||||
/* Check for OpenCL >= 1.0 support, needed for global offset parameter usage. */
|
||||
@ -193,27 +183,42 @@ static bool get_opencl_bit_align_support(cl_device_id *device)
|
||||
return !!find;
|
||||
}
|
||||
|
||||
static cl_int create_opencl_command_queue(cl_command_queue *command_queue, cl_context *context, cl_device_id *device, cl_command_queue_properties cq_properties)
|
||||
{
|
||||
cl_int status;
|
||||
*command_queue = clCreateCommandQueue(*context, *device,
|
||||
cq_properties, &status);
|
||||
if (status != CL_SUCCESS) /* Try again without OOE enable */
|
||||
*command_queue = clCreateCommandQueue(*context, *device, 0, &status);
|
||||
return status;
|
||||
}
|
||||
|
||||
_clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *algorithm)
|
||||
{
|
||||
_clState *clState = (_clState *)calloc(1, sizeof(_clState));
|
||||
struct cgpu_info *cgpu = &gpus[gpu];
|
||||
cl_int status = 0;
|
||||
size_t compute_units = 0;
|
||||
cl_platform_id platform = NULL;
|
||||
char pbuff[256];
|
||||
struct cgpu_info *cgpu = &gpus[gpu];
|
||||
_clState *clState = (_clState *)calloc(1, sizeof(_clState));
|
||||
cl_uint preferred_vwidth, slot = 0, cpnd = 0, numDevices = clDevicesNum();
|
||||
cl_device_id *devices = (cl_device_id *)alloca(numDevices * sizeof(cl_device_id));
|
||||
build_kernel_data *build_data = (build_kernel_data *)alloca(sizeof(struct _build_kernel_data));
|
||||
cl_uint preferred_vwidth;
|
||||
cl_device_id *devices;
|
||||
cl_uint numDevices;
|
||||
cl_int status;
|
||||
char **pbuff = (char **)alloca(sizeof(char *) * numDevices), filename[256];
|
||||
|
||||
// sanity check
|
||||
if (!get_opencl_platform(opt_platform_id, &platform)) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
numDevices = clDevicesNum();
|
||||
if (numDevices <= 0) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (numDevices <= 0) return NULL;
|
||||
if (gpu >= numDevices) {
|
||||
applog(LOG_ERR, "Invalid GPU %i", gpu);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
devices = (cl_device_id *)alloca(numDevices*sizeof(cl_device_id));
|
||||
|
||||
/* Now, get the device list data */
|
||||
|
||||
@ -225,27 +230,26 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg
|
||||
|
||||
applog(LOG_INFO, "List of devices:");
|
||||
|
||||
unsigned int i;
|
||||
for (i = 0; i < numDevices; i++) {
|
||||
status = clGetDeviceInfo(devices[i], CL_DEVICE_NAME, sizeof(pbuff), pbuff, NULL);
|
||||
if (status != CL_SUCCESS) {
|
||||
applog(LOG_ERR, "Error %d: Getting Device Info", status);
|
||||
for (int i = 0; i < numDevices; ++i) {
|
||||
size_t tmpsize;
|
||||
if (clGetDeviceInfo(devices[i], CL_DEVICE_NAME, 0, NULL, &tmpsize) != CL_SUCCESS) {
|
||||
applog(LOG_ERR, "Error while getting the length of the name for GPU #%d.", i);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
applog(LOG_INFO, "\t%i\t%s", i, pbuff);
|
||||
|
||||
if (i == gpu) {
|
||||
applog(LOG_INFO, "Selected %i: %s", gpu, pbuff);
|
||||
strncpy(name, pbuff, nameSize);
|
||||
}
|
||||
}
|
||||
|
||||
if (gpu >= numDevices) {
|
||||
applog(LOG_ERR, "Invalid GPU %i", gpu);
|
||||
// Does the size include the NULL terminator? Who knows, just add one, it's faster than looking it up.
|
||||
pbuff[i] = (char *)alloca(sizeof(char) * (tmpsize + 1));
|
||||
if (clGetDeviceInfo(devices[i], CL_DEVICE_NAME, sizeof(char) * tmpsize, pbuff[i], NULL) != CL_SUCCESS) {
|
||||
applog(LOG_ERR, "Error while attempting to get device information.");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
applog(LOG_INFO, "\t%i\t%s", i, pbuff[i]);
|
||||
}
|
||||
|
||||
applog(LOG_INFO, "Selected %d: %s", gpu, pbuff[gpu]);
|
||||
strncpy(name, pbuff[gpu], nameSize);
|
||||
|
||||
status = create_opencl_context(&clState->context, &platform);
|
||||
if (status != CL_SUCCESS) {
|
||||
applog(LOG_ERR, "Error %d: Creating Context. (clCreateContextFromType)", status);
|
||||
@ -274,7 +278,6 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg
|
||||
}
|
||||
applog(LOG_DEBUG, "Max work group size reported %d", (int)(clState->max_work_size));
|
||||
|
||||
size_t compute_units = 0;
|
||||
status = clGetDeviceInfo(devices[gpu], CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(size_t), (void *)&compute_units, NULL);
|
||||
if (status != CL_SUCCESS) {
|
||||
applog(LOG_ERR, "Error %d: Failed to clGetDeviceInfo when trying to get CL_DEVICE_MAX_COMPUTE_UNITS", status);
|
||||
@ -282,8 +285,10 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg
|
||||
}
|
||||
// AMD architechture got 64 compute shaders per compute unit.
|
||||
// Source: http://www.amd.com/us/Documents/GCN_Architecture_whitepaper.pdf
|
||||
clState->compute_shaders = compute_units * 64;
|
||||
applog(LOG_DEBUG, "Max shaders calculated %d", (int)(clState->compute_shaders));
|
||||
clState->compute_shaders = compute_units << 6;
|
||||
applog(LOG_INFO, "Maximum work size for this GPU (%d) is %d.", gpu, clState->max_work_size);
|
||||
applog(LOG_INFO, "Your GPU (#%d) has %d compute units, and all AMD cards in the 7 series or newer (GCN cards) \
|
||||
have 64 shaders per compute unit - this means it has %d shaders.", gpu, compute_units, clState->compute_shaders);
|
||||
|
||||
status = clGetDeviceInfo(devices[gpu], CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(cl_ulong), (void *)&cgpu->max_alloc, NULL);
|
||||
if (status != CL_SUCCESS) {
|
||||
@ -297,12 +302,8 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg
|
||||
* would have otherwise created. The filename is:
|
||||
* name + g + lg + lookup_gap + tc + thread_concurrency + nf + nfactor + w + work_size + l + sizeof(long) + .bin
|
||||
*/
|
||||
char filename[255];
|
||||
char strbuf[32];
|
||||
|
||||
sprintf(strbuf, "%s.cl", (!empty_string(cgpu->algorithm.kernelfile) ? cgpu->algorithm.kernelfile : cgpu->algorithm.name));
|
||||
strcpy(filename, strbuf);
|
||||
|
||||
sprintf(filename, "%s.cl", (!empty_string(cgpu->algorithm.kernelfile) ? cgpu->algorithm.kernelfile : cgpu->algorithm.name));
|
||||
applog(LOG_DEBUG, "Using source file %s", filename);
|
||||
|
||||
/* For some reason 2 vectors is still better even if the card says
|
||||
@ -326,10 +327,7 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg
|
||||
|
||||
clState->goffset = true;
|
||||
|
||||
if (cgpu->work_size && cgpu->work_size <= clState->max_work_size)
|
||||
clState->wsize = cgpu->work_size;
|
||||
else
|
||||
clState->wsize = 256;
|
||||
clState->wsize = (cgpu->work_size && cgpu->work_size <= clState->max_work_size) ? cgpu->work_size : 256;
|
||||
|
||||
if (!cgpu->opt_lg) {
|
||||
applog(LOG_DEBUG, "GPU %d: selecting lookup gap of 2", gpu);
|
||||
@ -536,10 +534,6 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg
|
||||
cgpu->thread_concurrency = cgpu->opt_tc;
|
||||
}
|
||||
|
||||
cl_uint slot, cpnd;
|
||||
|
||||
slot = cpnd = 0;
|
||||
|
||||
build_data->context = clState->context;
|
||||
build_data->device = &devices[gpu];
|
||||
|
||||
@ -547,27 +541,25 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg
|
||||
strcpy(build_data->source_filename, filename);
|
||||
strcpy(build_data->platform, name);
|
||||
strcpy(build_data->sgminer_path, sgminer_path);
|
||||
if (opt_kernel_path && *opt_kernel_path) {
|
||||
build_data->kernel_path = opt_kernel_path;
|
||||
}
|
||||
else {
|
||||
build_data->kernel_path = NULL;
|
||||
}
|
||||
|
||||
build_data->kernel_path = (*opt_kernel_path) ? opt_kernel_path : NULL;
|
||||
build_data->work_size = clState->wsize;
|
||||
build_data->has_bit_align = clState->hasBitAlign;
|
||||
|
||||
build_data->opencl_version = get_opencl_version(devices[gpu]);
|
||||
build_data->patch_bfi = needs_bfi_patch(build_data);
|
||||
|
||||
strcpy(build_data->binary_filename, (!empty_string(cgpu->algorithm.kernelfile) ? cgpu->algorithm.kernelfile : cgpu->algorithm.name));
|
||||
strcat(build_data->binary_filename, name);
|
||||
if (clState->goffset)
|
||||
strcpy(build_data->binary_filename, filename);
|
||||
build_data->binary_filename[strlen(filename) - 3] = 0x00; // And one NULL terminator, cutting off the .cl suffix.
|
||||
strcat(build_data->binary_filename, pbuff[gpu]);
|
||||
|
||||
if (clState->goffset) {
|
||||
strcat(build_data->binary_filename, "g");
|
||||
}
|
||||
|
||||
set_base_compiler_options(build_data);
|
||||
if (algorithm->set_compile_options)
|
||||
if (algorithm->set_compile_options) {
|
||||
algorithm->set_compile_options(build_data, cgpu, algorithm);
|
||||
}
|
||||
|
||||
strcat(build_data->binary_filename, ".bin");
|
||||
applog(LOG_DEBUG, "Using binary file %s", build_data->binary_filename);
|
||||
@ -576,8 +568,9 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg
|
||||
if (!(clState->program = load_opencl_binary_kernel(build_data))) {
|
||||
applog(LOG_NOTICE, "Building binary %s", build_data->binary_filename);
|
||||
|
||||
if (!(clState->program = build_opencl_kernel(build_data, filename)))
|
||||
if (!(clState->program = build_opencl_kernel(build_data, filename))) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (save_opencl_kernel(build_data, clState->program)) {
|
||||
/* Program needs to be rebuilt, because the binary was patched */
|
||||
|
3
ocl.h
3
ocl.h
@ -10,7 +10,7 @@
|
||||
#include <CL/cl.h>
|
||||
#endif
|
||||
|
||||
#include "miner.h"
|
||||
#include "algorithm.h"
|
||||
|
||||
typedef struct __clState {
|
||||
cl_context context;
|
||||
@ -21,6 +21,7 @@ typedef struct __clState {
|
||||
cl_program program;
|
||||
cl_mem outputBuffer;
|
||||
cl_mem CLbuffer0;
|
||||
cl_mem MidstateBuf;
|
||||
cl_mem padbuffer8;
|
||||
unsigned char cldata[80];
|
||||
bool hasBitAlign;
|
||||
|
@ -1,5 +1,7 @@
|
||||
#include "binary_kernel.h"
|
||||
#include "miner.h"
|
||||
#include <sys/stat.h>
|
||||
#include <stdio.h>
|
||||
|
||||
cl_program load_opencl_binary_kernel(build_kernel_data *data)
|
||||
{
|
||||
|
@ -1,5 +1,7 @@
|
||||
#include <stdio.h>
|
||||
#include "build_kernel.h"
|
||||
#include "patch_kernel.h"
|
||||
#include "miner.h"
|
||||
|
||||
static char *file_contents(const char *filename, int *length)
|
||||
{
|
||||
|
@ -1,8 +1,14 @@
|
||||
#ifndef BUILD_KERNEL_H
|
||||
#define BUILD_KERNEL_H
|
||||
|
||||
#include "ocl.h"
|
||||
#include <stdbool.h>
|
||||
#include "logging.h"
|
||||
|
||||
#ifdef __APPLE_CC__
|
||||
#include <OpenCL/opencl.h>
|
||||
#else
|
||||
#include <CL/cl.h>
|
||||
#endif
|
||||
|
||||
typedef struct _build_kernel_data {
|
||||
char source_filename[255];
|
||||
|
@ -1,6 +1,8 @@
|
||||
|
||||
Microsoft Visual Studio Solution File, Format Version 11.00
|
||||
# Visual C++ Express 2010
|
||||
Microsoft Visual Studio Solution File, Format Version 12.00
|
||||
# Visual Studio 2013
|
||||
VisualStudioVersion = 12.0.31101.0
|
||||
MinimumVisualStudioVersion = 10.0.40219.1
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "sgminer", "sgminer.vcxproj", "{CCA64DCD-6401-42A3-ABC3-89E48A36D239}"
|
||||
EndProject
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "jansson", "jansson\jansson.vcxproj", "{AFE7D2AA-025C-4837-B4B2-81117E010B3B}"
|
||||
@ -17,6 +19,7 @@ Global
|
||||
{CCA64DCD-6401-42A3-ABC3-89E48A36D239}.Debug|Win32.Build.0 = Debug|Win32
|
||||
{CCA64DCD-6401-42A3-ABC3-89E48A36D239}.Debug|x64.ActiveCfg = Debug|x64
|
||||
{CCA64DCD-6401-42A3-ABC3-89E48A36D239}.Debug|x64.Build.0 = Debug|x64
|
||||
{CCA64DCD-6401-42A3-ABC3-89E48A36D239}.Debug|x64.Deploy.0 = Debug|x64
|
||||
{CCA64DCD-6401-42A3-ABC3-89E48A36D239}.Release|Win32.ActiveCfg = Release|Win32
|
||||
{CCA64DCD-6401-42A3-ABC3-89E48A36D239}.Release|Win32.Build.0 = Release|Win32
|
||||
{CCA64DCD-6401-42A3-ABC3-89E48A36D239}.Release|x64.ActiveCfg = Release|x64
|
||||
|
Loading…
Reference in New Issue
Block a user