Browse Source

Merge remote-tracking branch 'sgminer-dev/master'

Getting updated with others, nothing more.
windows
netswift 10 years ago
parent
commit
632ea79431
  1. 60
      algorithm.c
  2. 26
      algorithm.h
  3. 130
      algorithm/whirlcoin.c
  4. 171
      algorithm/whirlpoolx.c
  5. 51
      algorithm/whirlpoolx.h
  6. 1539
      kernel/whirlpoolx.cl
  7. 129
      ocl.c
  8. 3
      ocl.h
  9. 2
      ocl/binary_kernel.c
  10. 2
      ocl/build_kernel.c
  11. 8
      ocl/build_kernel.h
  12. 7
      winbuild/sgminer.sln

60
algorithm.c

@ -638,21 +638,36 @@ static cl_int queue_whirlcoin_kernel(struct __clState *clState, struct _dev_blk_
static cl_int queue_whirlpoolx_kernel(struct __clState *clState, struct _dev_blk_ctx *blk, __maybe_unused cl_uint threads) static cl_int queue_whirlpoolx_kernel(struct __clState *clState, struct _dev_blk_ctx *blk, __maybe_unused cl_uint threads)
{ {
cl_kernel *kernel; uint64_t midblock[8], key[8] = { 0 }, tmp[8] = { 0 };
cl_ulong le_target; cl_ulong le_target;
cl_int status = 0; cl_int status;
le_target = *(cl_ulong *)(blk->work->device_target + 24); le_target = *(cl_ulong *)(blk->work->device_target + 24);
flip80(clState->cldata, blk->work->data); flip80(clState->cldata, blk->work->data);
status = clEnqueueWriteBuffer(clState->commandQueue, clState->CLbuffer0, true, 0, 80, clState->cldata, 0, NULL, NULL);
//clbuffer, hashes memcpy(midblock, clState->cldata, 64);
kernel = &clState->kernel;
CL_SET_ARG_N(0, clState->CLbuffer0);
CL_SET_ARG_N(1, clState->padbuffer8);
CL_SET_ARG_N(2, clState->outputBuffer); // midblock = n, key = h
CL_SET_ARG_N(3, le_target); for (int i = 0; i < 10; ++i) {
tmp[0] = WHIRLPOOL_ROUND_CONSTANTS[i];
whirlpool_round(key, tmp);
tmp[0] = 0;
whirlpool_round(midblock, tmp);
for (int x = 0; x < 8; ++x) {
midblock[x] ^= key[x];
}
}
for (int i = 0; i < 8; ++i) {
midblock[i] ^= ((uint64_t *)(clState->cldata))[i];
}
status = clSetKernelArg(clState->kernel, 0, sizeof(cl_ulong8), (cl_ulong8 *)&midblock);
status |= clSetKernelArg(clState->kernel, 1, sizeof(cl_ulong), (void *)(((uint64_t *)clState->cldata) + 8));
status |= clSetKernelArg(clState->kernel, 2, sizeof(cl_ulong), (void *)(((uint64_t *)clState->cldata) + 9));
status |= clSetKernelArg(clState->kernel, 3, sizeof(cl_mem), (void *)&clState->outputBuffer);
status |= clSetKernelArg(clState->kernel, 4, sizeof(cl_ulong), (void *)&le_target);
return status; return status;
} }
@ -720,27 +735,6 @@ static cl_int queue_pluck_kernel(_clState *clState, dev_blk_ctx *blk, __maybe_un
return status; return status;
} }
typedef struct _algorithm_settings_t {
const char *name; /* Human-readable identifier */
algorithm_type_t type; //common algorithm type
const char *kernelfile; /* alternate kernel file */
double diff_multiplier1;
double diff_multiplier2;
double share_diff_multiplier;
uint32_t xintensity_shift;
uint32_t intensity_shift;
uint32_t found_idx;
unsigned long long diff_numerator;
uint32_t diff1targ;
size_t n_extra_kernels;
long rw_buffer_size;
cl_command_queue_properties cq_properties;
void(*regenhash)(struct work *);
cl_int(*queue_kernel)(struct __clState *, struct _dev_blk_ctx *, cl_uint);
void(*gen_hash)(const unsigned char *, unsigned int, unsigned char *);
void(*set_compile_options)(build_kernel_data *, struct cgpu_info *, algorithm_t *);
} algorithm_settings_t;
static algorithm_settings_t algos[] = { static algorithm_settings_t algos[] = {
// kernels starting from this will have difficulty calculated by using litecoin algorithm // kernels starting from this will have difficulty calculated by using litecoin algorithm
#define A_SCRYPT(a) \ #define A_SCRYPT(a) \
@ -810,7 +804,7 @@ static algorithm_settings_t algos[] = {
#undef A_FUGUE #undef A_FUGUE
{ "whirlcoin", ALGO_WHIRL, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 3, 8 * 16 * 4194304, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, whirlcoin_regenhash, queue_whirlcoin_kernel, sha256, NULL }, { "whirlcoin", ALGO_WHIRL, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 3, 8 * 16 * 4194304, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, whirlcoin_regenhash, queue_whirlcoin_kernel, sha256, NULL },
{ "whirlpoolx", ALGO_WHIRL, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 0, 0, 0, whirlpoolx_regenhash, queue_sph_kernel, gen_hash, NULL }, { "whirlpoolx", ALGO_WHIRLPOOLX, "", 1, 1, 1, 0, 0, 0xFFU, 0xFFFFULL, 0x0000FFFFUL, 0, 0, 0, whirlpoolx_regenhash, queue_whirlpoolx_kernel, gen_hash, NULL },
// Terminator (do not remove) // Terminator (do not remove)
{ NULL, ALGO_UNK, "", 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, NULL, NULL, NULL, NULL } { NULL, ALGO_UNK, "", 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, NULL, NULL, NULL, NULL }
@ -883,7 +877,6 @@ static const char *lookup_algorithm_alias(const char *lookup_alias, uint8_t *nfa
ALGO_ALIAS("nist5", "talkcoin-mod"); ALGO_ALIAS("nist5", "talkcoin-mod");
ALGO_ALIAS("keccak", "maxcoin"); ALGO_ALIAS("keccak", "maxcoin");
ALGO_ALIAS("whirlpool", "whirlcoin"); ALGO_ALIAS("whirlpool", "whirlcoin");
ALGO_ALIAS("whirlpoolx", "whirlpoolx");
ALGO_ALIAS("Lyra2RE", "lyra2re"); ALGO_ALIAS("Lyra2RE", "lyra2re");
ALGO_ALIAS("lyra2", "lyra2re"); ALGO_ALIAS("lyra2", "lyra2re");
@ -945,8 +938,7 @@ void set_algorithm_nfactor(algorithm_t* algo, const uint8_t nfactor)
} }
} }
bool cmp_algorithm(algorithm_t* algo1, algorithm_t* algo2) bool cmp_algorithm(const algorithm_t* algo1, const algorithm_t* algo2)
{ {
// return (strcmp(algo1->name, algo2->name) == 0) && (algo1->nfactor == algo2->nfactor);
return (!safe_cmp(algo1->name, algo2->name) && !safe_cmp(algo1->kernelfile, algo2->kernelfile) && (algo1->nfactor == algo2->nfactor)); return (!safe_cmp(algo1->name, algo2->name) && !safe_cmp(algo1->kernelfile, algo2->kernelfile) && (algo1->nfactor == algo2->nfactor));
} }

26
algorithm.h

@ -9,6 +9,7 @@
#include <inttypes.h> #include <inttypes.h>
#include <stdbool.h> #include <stdbool.h>
#include "ocl/build_kernel.h" // For the build_kernel_data type
typedef enum { typedef enum {
ALGO_UNK, ALGO_UNK,
@ -26,6 +27,7 @@ typedef enum {
ALGO_FRESH, ALGO_FRESH,
ALGO_WHIRL, ALGO_WHIRL,
ALGO_NEOSCRYPT, ALGO_NEOSCRYPT,
ALGO_WHIRLPOOLX,
ALGO_LYRA2RE, ALGO_LYRA2RE,
ALGO_PLUCK ALGO_PLUCK
} algorithm_type_t; } algorithm_type_t;
@ -66,6 +68,28 @@ typedef struct _algorithm_t {
void(*set_compile_options)(struct _build_kernel_data *, struct cgpu_info *, struct _algorithm_t *); void(*set_compile_options)(struct _build_kernel_data *, struct cgpu_info *, struct _algorithm_t *);
} algorithm_t; } algorithm_t;
typedef struct _algorithm_settings_t
{
const char *name;
algorithm_type_t type;
const char *kernelfile;
double diff_multiplier1;
double diff_multiplier2;
double share_diff_multiplier;
uint32_t xintensity_shift;
uint32_t intensity_shift;
uint32_t found_idx;
unsigned long long diff_numerator;
uint32_t diff1targ;
size_t n_extra_kernels;
long rw_buffer_size;
cl_command_queue_properties cq_properties;
void (*regenhash)(struct work *);
cl_int (*queue_kernel)(struct __clState *, struct _dev_blk_ctx *, cl_uint);
void (*gen_hash)(const unsigned char *, unsigned int, unsigned char *);
void (*set_compile_options)(build_kernel_data *, struct cgpu_info *, algorithm_t *);
} algorithm_settings_t;
/* Set default parameters based on name. */ /* Set default parameters based on name. */
void set_algorithm(algorithm_t* algo, const char* name); void set_algorithm(algorithm_t* algo, const char* name);
@ -73,6 +97,6 @@ void set_algorithm(algorithm_t* algo, const char* name);
void set_algorithm_nfactor(algorithm_t* algo, const uint8_t nfactor); void set_algorithm_nfactor(algorithm_t* algo, const uint8_t nfactor);
/* Compare two algorithm parameters */ /* Compare two algorithm parameters */
bool cmp_algorithm(algorithm_t* algo1, algorithm_t* algo2); bool cmp_algorithm(const algorithm_t* algo1, const algorithm_t* algo2);
#endif /* ALGORITHM_H */ #endif /* ALGORITHM_H */

130
algorithm/whirlcoin.c

@ -49,10 +49,10 @@ Whash_context_holder base_contexts;
void init_whirlcoin_hash_contexts() void init_whirlcoin_hash_contexts()
{ {
sph_whirlpool1_init(&base_contexts.whirlpool1); sph_whirlpool1_init(&base_contexts.whirlpool1);
sph_whirlpool1_init(&base_contexts.whirlpool2); sph_whirlpool1_init(&base_contexts.whirlpool2);
sph_whirlpool1_init(&base_contexts.whirlpool3); sph_whirlpool1_init(&base_contexts.whirlpool3);
sph_whirlpool1_init(&base_contexts.whirlpool4); sph_whirlpool1_init(&base_contexts.whirlpool4);
} }
/* /*
@ -62,10 +62,10 @@ void init_whirlcoin_hash_contexts()
static inline void static inline void
be32enc_vect(uint32_t *dst, const uint32_t *src, uint32_t len) be32enc_vect(uint32_t *dst, const uint32_t *src, uint32_t len)
{ {
uint32_t i; uint32_t i;
for (i = 0; i < len; i++) for (i = 0; i < len; i++)
dst[i] = htobe32(src[i]); dst[i] = htobe32(src[i]);
} }
@ -74,33 +74,32 @@ static
#endif #endif
inline void whirlcoin_hash(void *state, const void *input) inline void whirlcoin_hash(void *state, const void *input)
{ {
init_whirlcoin_hash_contexts(); init_whirlcoin_hash_contexts();
Whash_context_holder ctx; Whash_context_holder ctx;
uint32_t hashA[16], hashB[16]; uint32_t hashA[16], hashB[16];
memcpy(&ctx, &base_contexts, sizeof(base_contexts)); memcpy(&ctx, &base_contexts, sizeof(base_contexts));
sph_whirlpool1 (&ctx.whirlpool1, input, 80); sph_whirlpool1(&ctx.whirlpool1, input, 80);
sph_whirlpool1_close (&ctx.whirlpool1, hashA); sph_whirlpool1_close(&ctx.whirlpool1, hashA);
sph_whirlpool1(&ctx.whirlpool2, hashA, 64); sph_whirlpool1(&ctx.whirlpool2, hashA, 64);
sph_whirlpool1_close(&ctx.whirlpool2, hashB); sph_whirlpool1_close(&ctx.whirlpool2, hashB);
sph_whirlpool1(&ctx.whirlpool3, hashB, 64); sph_whirlpool1(&ctx.whirlpool3, hashB, 64);
sph_whirlpool1_close(&ctx.whirlpool3, hashA); sph_whirlpool1_close(&ctx.whirlpool3, hashA);
sph_whirlpool1(&ctx.whirlpool4, hashA, 64); sph_whirlpool1(&ctx.whirlpool4, hashA, 64);
sph_whirlpool1_close(&ctx.whirlpool4, hashB); sph_whirlpool1_close(&ctx.whirlpool4, hashB);
memcpy(state, hashB, 32); memcpy(state, hashB, 32);
} }
static const uint32_t diff1targ = 0x0000ffff; static const uint32_t diff1targ = 0x0000ffff;
/* Used externally as confirmation of correct OCL code */ /* Used externally as confirmation of correct OCL code */
int whirlcoin_test(unsigned char *pdata, const unsigned char *ptarget, uint32_t nonce) int whirlcoin_test_old(unsigned char *pdata, const unsigned char *ptarget, uint32_t nonce)
{ {
uint32_t tmp_hash7, Htarg = le32toh(((const uint32_t *)ptarget)[7]); uint32_t tmp_hash7, Htarg = le32toh(((const uint32_t *)ptarget)[7]);
uint32_t data[20], ohash[8]; uint32_t data[20], ohash[8];
@ -124,51 +123,52 @@ int whirlcoin_test(unsigned char *pdata, const unsigned char *ptarget, uint32_t
void whirlcoin_regenhash(struct work *work) void whirlcoin_regenhash(struct work *work)
{ {
uint32_t data[20]; uint32_t data[20];
uint32_t *nonce = (uint32_t *)(work->data + 76); uint32_t *nonce = (uint32_t *)(work->data + 76);
uint32_t *ohash = (uint32_t *)(work->hash); uint32_t *ohash = (uint32_t *)(work->hash);
be32enc_vect(data, (const uint32_t *)work->data, 19); be32enc_vect(data, (const uint32_t *)work->data, 19);
data[19] = htobe32(*nonce); data[19] = htobe32(*nonce);
whirlcoin_hash(ohash, data); whirlcoin_hash(ohash, data);
} }
/*
bool scanhash_whirlcoin(struct thr_info *thr, const unsigned char __maybe_unused *pmidstate, bool scanhash_whirlcoin(struct thr_info *thr, const unsigned char __maybe_unused *pmidstate,
unsigned char *pdata, unsigned char __maybe_unused *phash1, unsigned char *pdata, unsigned char __maybe_unused *phash1,
unsigned char __maybe_unused *phash, const unsigned char *ptarget, unsigned char __maybe_unused *phash, const unsigned char *ptarget,
uint32_t max_nonce, uint32_t *last_nonce, uint32_t n) uint32_t max_nonce, uint32_t *last_nonce, uint32_t n)
{ {
uint32_t *nonce = (uint32_t *)(pdata + 76); uint32_t *nonce = (uint32_t *)(pdata + 76);
uint32_t data[20]; uint32_t data[20];
uint32_t tmp_hash7; uint32_t tmp_hash7;
uint32_t Htarg = le32toh(((const uint32_t *)ptarget)[7]); uint32_t Htarg = le32toh(((const uint32_t *)ptarget)[7]);
bool ret = false; bool ret = false;
be32enc_vect(data, (const uint32_t *)pdata, 19); be32enc_vect(data, (const uint32_t *)pdata, 19);
while(1) { while (1) {
uint32_t ostate[8]; uint32_t ostate[8];
*nonce = ++n; *nonce = ++n;
data[19] = (n); data[19] = (n);
whirlcoin_hash(ostate, data); whirlcoin_hash(ostate, data);
tmp_hash7 = (ostate[7]); tmp_hash7 = (ostate[7]);
applog(LOG_INFO, "data7 %08lx", applog(LOG_INFO, "data7 %08lx",
(long unsigned int)data[7]); (long unsigned int)data[7]);
if (unlikely(tmp_hash7 <= Htarg)) { if (unlikely(tmp_hash7 <= Htarg)) {
((uint32_t *)pdata)[19] = htobe32(n); ((uint32_t *)pdata)[19] = htobe32(n);
*last_nonce = n; *last_nonce = n;
ret = true; ret = true;
break; break;
} }
if (unlikely((n >= max_nonce) || thr->work_restart)) { if (unlikely((n >= max_nonce) || thr->work_restart)) {
*last_nonce = n; *last_nonce = n;
break; break;
} }
} }
return ret; return ret;
} }
*/

171
algorithm/whirlpoolx.c

@ -34,7 +34,7 @@
#include <stdint.h> #include <stdint.h>
#include <string.h> #include <string.h>
#include "sph/sph_whirlpool.h" #include "whirlpoolx.h"
/* /*
* Encode a length len/4 vector of (uint32_t) into a length len vector of * Encode a length len/4 vector of (uint32_t) into a length len vector of
@ -49,56 +49,143 @@ be32enc_vect(uint32_t *dst, const uint32_t *src, uint32_t len)
dst[i] = htobe32(src[i]); dst[i] = htobe32(src[i]);
} }
inline void whirlpoolx_hash(void *state, const void *input)
void whirlpool_compress(uint8_t state[64], const uint8_t block[64])
{
const int NUM_ROUNDS = 10;
uint64_t tempState[8];
uint64_t tempBlock[8];
int i;
// Initialization
for (i = 0; i < 8; i++) {
tempState[i] =
(uint64_t)state[i << 3]
| (uint64_t)state[(i << 3) + 1] << 8
| (uint64_t)state[(i << 3) + 2] << 16
| (uint64_t)state[(i << 3) + 3] << 24
| (uint64_t)state[(i << 3) + 4] << 32
| (uint64_t)state[(i << 3) + 5] << 40
| (uint64_t)state[(i << 3) + 6] << 48
| (uint64_t)state[(i << 3) + 7] << 56;
tempBlock[i] = (
(uint64_t)block[i << 3]
| (uint64_t)block[(i << 3) + 1] << 8
| (uint64_t)block[(i << 3) + 2] << 16
| (uint64_t)block[(i << 3) + 3] << 24
| (uint64_t)block[(i << 3) + 4] << 32
| (uint64_t)block[(i << 3) + 5] << 40
| (uint64_t)block[(i << 3) + 6] << 48
| (uint64_t)block[(i << 3) + 7] << 56) ^ tempState[i];
}
// Hashing rounds
uint64_t rcon[8];
memset(rcon + 1, 0, sizeof(rcon[0]) * 7);
for (i = 0; i < NUM_ROUNDS; i++) {
rcon[0] = WHIRLPOOL_ROUND_CONSTANTS[i];
whirlpool_round(tempState, rcon);
whirlpool_round(tempBlock, tempState);
}
// Final combining
for (i = 0; i < 64; i++)
state[i] ^= block[i] ^ (uint8_t)(tempBlock[i >> 3] >> ((i & 7) << 3));
}
void whirlpool_round(uint64_t block[8], const uint64_t key[8]) {
uint64_t a = block[0];
uint64_t b = block[1];
uint64_t c = block[2];
uint64_t d = block[3];
uint64_t e = block[4];
uint64_t f = block[5];
uint64_t g = block[6];
uint64_t h = block[7];
uint64_t r;
#define DOROW(i, s, t, u, v, w, x, y, z) \
r = MAGIC_TABLE[(uint8_t)s]; r = (r << 56) | (r >> 8); \
r ^= MAGIC_TABLE[(uint8_t)(t >> 8)]; r = (r << 56) | (r >> 8); \
r ^= MAGIC_TABLE[(uint8_t)(u >> 16)]; r = (r << 56) | (r >> 8); \
r ^= MAGIC_TABLE[(uint8_t)(v >> 24)]; r = (r << 56) | (r >> 8); \
r ^= MAGIC_TABLE[(uint8_t)(w >> 32)]; r = (r << 56) | (r >> 8); \
r ^= MAGIC_TABLE[(uint8_t)(x >> 40)]; r = (r << 56) | (r >> 8); \
r ^= MAGIC_TABLE[(uint8_t)(y >> 48)]; r = (r << 56) | (r >> 8); \
r ^= MAGIC_TABLE[(uint8_t)(z >> 56)]; r = (r << 56) | (r >> 8); \
block[i] = r ^ key[i];
DOROW(0, a, h, g, f, e, d, c, b)
DOROW(1, b, a, h, g, f, e, d, c)
DOROW(2, c, b, a, h, g, f, e, d)
DOROW(3, d, c, b, a, h, g, f, e)
DOROW(4, e, d, c, b, a, h, g, f)
DOROW(5, f, e, d, c, b, a, h, g)
DOROW(6, g, f, e, d, c, b, a, h)
DOROW(7, h, g, f, e, d, c, b, a)
}
void whirlpool_hash(const uint8_t *message, uint32_t len, uint8_t hash[64]) {
memset(hash, 0, 64);
uint32_t i;
for (i = 0; len - i >= 64; i += 64)
whirlpool_compress(hash, message + i);
uint8_t block[64];
uint32_t rem = len - i;
memcpy(block, message + i, rem);
block[rem] = 0x80;
rem++;
if (64 - rem >= 32)
memset(block + rem, 0, 56 - rem);
else {
memset(block + rem, 0, 64 - rem);
whirlpool_compress(hash, block);
memset(block, 0, 56);
}
uint64_t longLen = ((uint64_t)len) << 3;
for (i = 0; i < 8; i++)
block[64 - 1 - i] = (uint8_t)(longLen >> (i * 8));
whirlpool_compress(hash, block);
}
void whirlpoolx_hash(void *state, const void *input)
{ {
sph_whirlpool1_context ctx; //sph_whirlpool1_context ctx;
sph_whirlpool1_init(&ctx); //sph_whirlpool1_init(&ctx);
uint8_t digest[64]; uint8_t digest[64];
sph_whirlpool(&ctx, input, 80); //sph_whirlpool(&ctx, input, 80);
sph_whirlpool_close(&ctx, digest); //sph_whirlpool_close(&ctx, digest);
((uint8_t *)state)[0] = digest[0] ^ digest[16]; whirlpool_hash((uint8_t *)input, 80, digest);
((uint8_t *)state)[1] = digest[1] ^ digest[17];
((uint8_t *)state)[2] = digest[2] ^ digest[18]; uint8_t digest_xored[32];
((uint8_t *)state)[3] = digest[3] ^ digest[19];
((uint8_t *)state)[4] = digest[4] ^ digest[20]; for (uint32_t i = 0; i < (64 / 2); i++)
((uint8_t *)state)[5] = digest[5] ^ digest[21]; {
((uint8_t *)state)[6] = digest[6] ^ digest[22]; digest_xored[i] =
((uint8_t *)state)[7] = digest[7] ^ digest[23]; digest[i] ^ digest[i + ((64 / 2) / 2)]
((uint8_t *)state)[8] = digest[8] ^ digest[24]; ;
((uint8_t *)state)[9] = digest[9] ^ digest[25]; }
((uint8_t *)state)[10] = digest[10] ^ digest[26];
((uint8_t *)state)[11] = digest[11] ^ digest[27]; memcpy(state, digest_xored, sizeof(digest_xored));
((uint8_t *)state)[12] = digest[12] ^ digest[28];
((uint8_t *)state)[13] = digest[13] ^ digest[29];
((uint8_t *)state)[14] = digest[14] ^ digest[30];
((uint8_t *)state)[15] = digest[15] ^ digest[31];
((uint8_t *)state)[16] = digest[16] ^ digest[32];
((uint8_t *)state)[17] = digest[17] ^ digest[33];
((uint8_t *)state)[18] = digest[18] ^ digest[34];
((uint8_t *)state)[19] = digest[19] ^ digest[35];
((uint8_t *)state)[20] = digest[20] ^ digest[36];
((uint8_t *)state)[21] = digest[21] ^ digest[37];
((uint8_t *)state)[22] = digest[22] ^ digest[38];
((uint8_t *)state)[23] = digest[23] ^ digest[39];
((uint8_t *)state)[24] = digest[24] ^ digest[40];
((uint8_t *)state)[25] = digest[25] ^ digest[41];
((uint8_t *)state)[26] = digest[26] ^ digest[42];
((uint8_t *)state)[27] = digest[27] ^ digest[43];
((uint8_t *)state)[28] = digest[28] ^ digest[44];
((uint8_t *)state)[29] = digest[29] ^ digest[45];
((uint8_t *)state)[30] = digest[30] ^ digest[46];
((uint8_t *)state)[31] = digest[31] ^ digest[47];
} }
static const uint32_t diff1targ = 0x0000ffff; static const uint32_t diff1targ = 0x0000ffff;
/* Used externally as confirmation of correct OCL code */ /* Used externally as confirmation of correct OCL code */
int whirlpoolx_test(unsigned char *pdata, const unsigned char *ptarget, uint32_t nonce) int whirlcoin_test(unsigned char *pdata, const unsigned char *ptarget, uint32_t nonce)
{ {
uint32_t tmp_hash7, Htarg = le32toh(((const uint32_t *)ptarget)[7]); uint32_t tmp_hash7, Htarg = le32toh(((const uint32_t *)ptarget)[7]);
uint32_t data[20], ohash[8]; uint32_t data[20], ohash[8];
@ -131,7 +218,7 @@ void whirlpoolx_regenhash(struct work *work)
whirlpoolx_hash(ohash, data); whirlpoolx_hash(ohash, data);
} }
bool scanhash_whirlpoolx(struct thr_info *thr, const unsigned char __maybe_unused *pmidstate, bool scanhash_whirlcoin(struct thr_info *thr, const unsigned char __maybe_unused *pmidstate,
unsigned char *pdata, unsigned char __maybe_unused *phash1, unsigned char *pdata, unsigned char __maybe_unused *phash1,
unsigned char __maybe_unused *phash, const unsigned char *ptarget, unsigned char __maybe_unused *phash, const unsigned char *ptarget,
uint32_t max_nonce, uint32_t *last_nonce, uint32_t n) uint32_t max_nonce, uint32_t *last_nonce, uint32_t n)
@ -169,4 +256,4 @@ bool scanhash_whirlpoolx(struct thr_info *thr, const unsigned char __maybe_unuse
} }
return ret; return ret;
} }

51
algorithm/whirlpoolx.h

@ -1,9 +1,58 @@
#ifndef WHIRLPOOLX_H #ifndef WHIRLPOOLX_H
#define WHIRLPOOLX_H #define WHIRLPOOLX_H
#include <stdint.h>
#include "miner.h" #include "miner.h"
// The combined effect of gamma (SubBytes) and theta (MixRows)
static uint64_t MAGIC_TABLE[256] = {
UINT64_C(0xD83078C018601818), UINT64_C(0x2646AF05238C2323), UINT64_C(0xB891F97EC63FC6C6), UINT64_C(0xFBCD6F13E887E8E8), UINT64_C(0xCB13A14C87268787), UINT64_C(0x116D62A9B8DAB8B8), UINT64_C(0x0902050801040101), UINT64_C(0x0D9E6E424F214F4F),
UINT64_C(0x9B6CEEAD36D83636), UINT64_C(0xFF510459A6A2A6A6), UINT64_C(0x0CB9BDDED26FD2D2), UINT64_C(0x0EF706FBF5F3F5F5), UINT64_C(0x96F280EF79F97979), UINT64_C(0x30DECE5F6FA16F6F), UINT64_C(0x6D3FEFFC917E9191), UINT64_C(0xF8A407AA52555252),
UINT64_C(0x47C0FD27609D6060), UINT64_C(0x35657689BCCABCBC), UINT64_C(0x372BCDAC9B569B9B), UINT64_C(0x8A018C048E028E8E), UINT64_C(0xD25B1571A3B6A3A3), UINT64_C(0x6C183C600C300C0C), UINT64_C(0x84F68AFF7BF17B7B), UINT64_C(0x806AE1B535D43535),
UINT64_C(0xF53A69E81D741D1D), UINT64_C(0xB3DD4753E0A7E0E0), UINT64_C(0x21B3ACF6D77BD7D7), UINT64_C(0x9C99ED5EC22FC2C2), UINT64_C(0x435C966D2EB82E2E), UINT64_C(0x29967A624B314B4B), UINT64_C(0x5DE121A3FEDFFEFE), UINT64_C(0xD5AE168257415757),
UINT64_C(0xBD2A41A815541515), UINT64_C(0xE8EEB69F77C17777), UINT64_C(0x926EEBA537DC3737), UINT64_C(0x9ED7567BE5B3E5E5), UINT64_C(0x1323D98C9F469F9F), UINT64_C(0x23FD17D3F0E7F0F0), UINT64_C(0x20947F6A4A354A4A), UINT64_C(0x44A9959EDA4FDADA),
UINT64_C(0xA2B025FA587D5858), UINT64_C(0xCF8FCA06C903C9C9), UINT64_C(0x7C528D5529A42929), UINT64_C(0x5A1422500A280A0A), UINT64_C(0x507F4FE1B1FEB1B1), UINT64_C(0xC95D1A69A0BAA0A0), UINT64_C(0x14D6DA7F6BB16B6B), UINT64_C(0xD917AB5C852E8585),
UINT64_C(0x3C677381BDCEBDBD), UINT64_C(0x8FBA34D25D695D5D), UINT64_C(0x9020508010401010), UINT64_C(0x07F503F3F4F7F4F4), UINT64_C(0xDD8BC016CB0BCBCB), UINT64_C(0xD37CC6ED3EF83E3E), UINT64_C(0x2D0A112805140505), UINT64_C(0x78CEE61F67816767),
UINT64_C(0x97D55373E4B7E4E4), UINT64_C(0x024EBB25279C2727), UINT64_C(0x7382583241194141), UINT64_C(0xA70B9D2C8B168B8B), UINT64_C(0xF6530151A7A6A7A7), UINT64_C(0xB2FA94CF7DE97D7D), UINT64_C(0x4937FBDC956E9595), UINT64_C(0x56AD9F8ED847D8D8),
UINT64_C(0x70EB308BFBCBFBFB), UINT64_C(0xCDC17123EE9FEEEE), UINT64_C(0xBBF891C77CED7C7C), UINT64_C(0x71CCE31766856666), UINT64_C(0x7BA78EA6DD53DDDD), UINT64_C(0xAF2E4BB8175C1717), UINT64_C(0x458E460247014747), UINT64_C(0x1A21DC849E429E9E),
UINT64_C(0xD489C51ECA0FCACA), UINT64_C(0x585A99752DB42D2D), UINT64_C(0x2E637991BFC6BFBF), UINT64_C(0x3F0E1B38071C0707), UINT64_C(0xAC472301AD8EADAD), UINT64_C(0xB0B42FEA5A755A5A), UINT64_C(0xEF1BB56C83368383), UINT64_C(0xB666FF8533CC3333),
UINT64_C(0x5CC6F23F63916363), UINT64_C(0x12040A1002080202), UINT64_C(0x93493839AA92AAAA), UINT64_C(0xDEE2A8AF71D97171), UINT64_C(0xC68DCF0EC807C8C8), UINT64_C(0xD1327DC819641919), UINT64_C(0x3B92707249394949), UINT64_C(0x5FAF9A86D943D9D9),
UINT64_C(0x31F91DC3F2EFF2F2), UINT64_C(0xA8DB484BE3ABE3E3), UINT64_C(0xB9B62AE25B715B5B), UINT64_C(0xBC0D9234881A8888), UINT64_C(0x3E29C8A49A529A9A), UINT64_C(0x0B4CBE2D26982626), UINT64_C(0xBF64FA8D32C83232), UINT64_C(0x597D4AE9B0FAB0B0),
UINT64_C(0xF2CF6A1BE983E9E9), UINT64_C(0x771E33780F3C0F0F), UINT64_C(0x33B7A6E6D573D5D5), UINT64_C(0xF41DBA74803A8080), UINT64_C(0x27617C99BEC2BEBE), UINT64_C(0xEB87DE26CD13CDCD), UINT64_C(0x8968E4BD34D03434), UINT64_C(0x3290757A483D4848),
UINT64_C(0x54E324ABFFDBFFFF), UINT64_C(0x8DF48FF77AF57A7A), UINT64_C(0x643DEAF4907A9090), UINT64_C(0x9DBE3EC25F615F5F), UINT64_C(0x3D40A01D20802020), UINT64_C(0x0FD0D56768BD6868), UINT64_C(0xCA3472D01A681A1A), UINT64_C(0xB7412C19AE82AEAE),
UINT64_C(0x7D755EC9B4EAB4B4), UINT64_C(0xCEA8199A544D5454), UINT64_C(0x7F3BE5EC93769393), UINT64_C(0x2F44AA0D22882222), UINT64_C(0x63C8E907648D6464), UINT64_C(0x2AFF12DBF1E3F1F1), UINT64_C(0xCCE6A2BF73D17373), UINT64_C(0x82245A9012481212),
UINT64_C(0x7A805D3A401D4040), UINT64_C(0x4810284008200808), UINT64_C(0x959BE856C32BC3C3), UINT64_C(0xDFC57B33EC97ECEC), UINT64_C(0x4DAB9096DB4BDBDB), UINT64_C(0xC05F1F61A1BEA1A1), UINT64_C(0x9107831C8D0E8D8D), UINT64_C(0xC87AC9F53DF43D3D),
UINT64_C(0x5B33F1CC97669797), UINT64_C(0x0000000000000000), UINT64_C(0xF983D436CF1BCFCF), UINT64_C(0x6E5687452BAC2B2B), UINT64_C(0xE1ECB39776C57676), UINT64_C(0xE619B06482328282), UINT64_C(0x28B1A9FED67FD6D6), UINT64_C(0xC33677D81B6C1B1B),
UINT64_C(0x74775BC1B5EEB5B5), UINT64_C(0xBE432911AF86AFAF), UINT64_C(0x1DD4DF776AB56A6A), UINT64_C(0xEAA00DBA505D5050), UINT64_C(0x578A4C1245094545), UINT64_C(0x38FB18CBF3EBF3F3), UINT64_C(0xAD60F09D30C03030), UINT64_C(0xC4C3742BEF9BEFEF),
UINT64_C(0xDA7EC3E53FFC3F3F), UINT64_C(0xC7AA1C9255495555), UINT64_C(0xDB591079A2B2A2A2), UINT64_C(0xE9C96503EA8FEAEA), UINT64_C(0x6ACAEC0F65896565), UINT64_C(0x036968B9BAD2BABA), UINT64_C(0x4A5E93652FBC2F2F), UINT64_C(0x8E9DE74EC027C0C0),
UINT64_C(0x60A181BEDE5FDEDE), UINT64_C(0xFC386CE01C701C1C), UINT64_C(0x46E72EBBFDD3FDFD), UINT64_C(0x1F9A64524D294D4D), UINT64_C(0x7639E0E492729292), UINT64_C(0xFAEABC8F75C97575), UINT64_C(0x360C1E3006180606), UINT64_C(0xAE0998248A128A8A),
UINT64_C(0x4B7940F9B2F2B2B2), UINT64_C(0x85D15963E6BFE6E6), UINT64_C(0x7E1C36700E380E0E), UINT64_C(0xE73E63F81F7C1F1F), UINT64_C(0x55C4F73762956262), UINT64_C(0x3AB5A3EED477D4D4), UINT64_C(0x814D3229A89AA8A8), UINT64_C(0x5231F4C496629696),
UINT64_C(0x62EF3A9BF9C3F9F9), UINT64_C(0xA397F666C533C5C5), UINT64_C(0x104AB13525942525), UINT64_C(0xABB220F259795959), UINT64_C(0xD015AE54842A8484), UINT64_C(0xC5E4A7B772D57272), UINT64_C(0xEC72DDD539E43939), UINT64_C(0x1698615A4C2D4C4C),
UINT64_C(0x94BC3BCA5E655E5E), UINT64_C(0x9FF085E778FD7878), UINT64_C(0xE570D8DD38E03838), UINT64_C(0x980586148C0A8C8C), UINT64_C(0x17BFB2C6D163D1D1), UINT64_C(0xE4570B41A5AEA5A5), UINT64_C(0xA1D94D43E2AFE2E2), UINT64_C(0x4EC2F82F61996161),
UINT64_C(0x427B45F1B3F6B3B3), UINT64_C(0x3442A51521842121), UINT64_C(0x0825D6949C4A9C9C), UINT64_C(0xEE3C66F01E781E1E), UINT64_C(0x6186522243114343), UINT64_C(0xB193FC76C73BC7C7), UINT64_C(0x4FE52BB3FCD7FCFC), UINT64_C(0x2408142004100404),
UINT64_C(0xE3A208B251595151), UINT64_C(0x252FC7BC995E9999), UINT64_C(0x22DAC44F6DA96D6D), UINT64_C(0x651A39680D340D0D), UINT64_C(0x79E93583FACFFAFA), UINT64_C(0x69A384B6DF5BDFDF), UINT64_C(0xA9FC9BD77EE57E7E), UINT64_C(0x1948B43D24902424),
UINT64_C(0xFE76D7C53BEC3B3B), UINT64_C(0x9A4B3D31AB96ABAB), UINT64_C(0xF081D13ECE1FCECE), UINT64_C(0x9922558811441111), UINT64_C(0x8303890C8F068F8F), UINT64_C(0x049C6B4A4E254E4E), UINT64_C(0x667351D1B7E6B7B7), UINT64_C(0xE0CB600BEB8BEBEB),
UINT64_C(0xC178CCFD3CF03C3C), UINT64_C(0xFD1FBF7C813E8181), UINT64_C(0x4035FED4946A9494), UINT64_C(0x1CF30CEBF7FBF7F7), UINT64_C(0x186F67A1B9DEB9B9), UINT64_C(0x8B265F98134C1313), UINT64_C(0x51589C7D2CB02C2C), UINT64_C(0x05BBB8D6D36BD3D3),
UINT64_C(0x8CD35C6BE7BBE7E7), UINT64_C(0x39DCCB576EA56E6E), UINT64_C(0xAA95F36EC437C4C4), UINT64_C(0x1B060F18030C0303), UINT64_C(0xDCAC138A56455656), UINT64_C(0x5E88491A440D4444), UINT64_C(0xA0FE9EDF7FE17F7F), UINT64_C(0x884F3721A99EA9A9),
UINT64_C(0x6754824D2AA82A2A), UINT64_C(0x0A6B6DB1BBD6BBBB), UINT64_C(0x879FE246C123C1C1), UINT64_C(0xF1A602A253515353), UINT64_C(0x72A58BAEDC57DCDC), UINT64_C(0x531627580B2C0B0B), UINT64_C(0x0127D39C9D4E9D9D), UINT64_C(0x2BD8C1476CAD6C6C),
UINT64_C(0xA462F59531C43131), UINT64_C(0xF3E8B98774CD7474), UINT64_C(0x15F109E3F6FFF6F6), UINT64_C(0x4C8C430A46054646), UINT64_C(0xA5452609AC8AACAC), UINT64_C(0xB50F973C891E8989), UINT64_C(0xB42844A014501414), UINT64_C(0xBADF425BE1A3E1E1),
UINT64_C(0xA62C4EB016581616), UINT64_C(0xF774D2CD3AE83A3A), UINT64_C(0x06D2D06F69B96969), UINT64_C(0x41122D4809240909), UINT64_C(0xD7E0ADA770DD7070), UINT64_C(0x6F7154D9B6E2B6B6), UINT64_C(0x1EBDB7CED067D0D0), UINT64_C(0xD6C77E3BED93EDED),
UINT64_C(0xE285DB2ECC17CCCC), UINT64_C(0x6884572A42154242), UINT64_C(0x2C2DC2B4985A9898), UINT64_C(0xED550E49A4AAA4A4), UINT64_C(0x7550885D28A02828), UINT64_C(0x86B831DA5C6D5C5C), UINT64_C(0x6BED3F93F8C7F8F8), UINT64_C(0xC211A44486228686),
};
static uint64_t WHIRLPOOL_ROUND_CONSTANTS[32] = {
UINT64_C(0x4F01B887E8C62318), UINT64_C(0x52916F79F5D2A636), UINT64_C(0x357B0CA38E9BBC60), UINT64_C(0x57FE4B2EC2D7E01D),
UINT64_C(0xDA4AF09FE5377715), UINT64_C(0x856BA0B10A29C958), UINT64_C(0x67053ECBF4105DBD), UINT64_C(0xD8957DA78B4127E4),
UINT64_C(0x9E4717DD667CEEFB), UINT64_C(0x33835AAD07BF2DCA), UINT64_C(0xD94919C871AA0263), UINT64_C(0xB032269A885BE3F2),
UINT64_C(0x4834CDBE80D50FE9), UINT64_C(0xAE1A68205F907AFF), UINT64_C(0x1273F164229354B4), UINT64_C(0x3D8DA1DBECC30840),
UINT64_C(0x1BD682762BCF0097), UINT64_C(0xEF30F345506AAFB5), UINT64_C(0xC02FBA65EAA2553F), UINT64_C(0x8A0675924DFD1CDE),
UINT64_C(0x96A8D4621F0EE6B2), UINT64_C(0x4C3972845925C5F9), UINT64_C(0x61E2A5D18C38785E), UINT64_C(0x04FCC7431E9C21B3),
UINT64_C(0x247EDFFA0D6D9951), UINT64_C(0xEBB74E8F11CEAB3B), UINT64_C(0xD32C13B9F794813C), UINT64_C(0xA97F445603C46EE7),
UINT64_C(0x6C9D0BDC53C1BB2A), UINT64_C(0xE11489AC46F67431), UINT64_C(0xEDD0B67009693A16), UINT64_C(0x86F85C28A49842CC),
};
extern int whirlpoolx_test(unsigned char *pdata, const unsigned char *ptarget, uint32_t nonce); extern int whirlpoolx_test(unsigned char *pdata, const unsigned char *ptarget, uint32_t nonce);
extern void whirlpoolx_regenhash(struct work *work); extern void whirlpoolx_regenhash(struct work *work);
extern void whirlpool_round(uint64_t block[8], const uint64_t key[8]);
#endif /* W_H */ #endif /* WHIRLPOOLX_H */

1539
kernel/whirlpoolx.cl

File diff suppressed because it is too large Load Diff

129
ocl.c

@ -146,16 +146,6 @@ static cl_int create_opencl_context(cl_context *context, cl_platform_id *platfor
return status; return status;
} }
static cl_int create_opencl_command_queue(cl_command_queue *command_queue, cl_context *context, cl_device_id *device, cl_command_queue_properties cq_properties)
{
cl_int status;
*command_queue = clCreateCommandQueue(*context, *device,
cq_properties, &status);
if (status != CL_SUCCESS) /* Try again without OOE enable */
*command_queue = clCreateCommandQueue(*context, *device, 0, &status);
return status;
}
static float get_opencl_version(cl_device_id device) static float get_opencl_version(cl_device_id device)
{ {
/* Check for OpenCL >= 1.0 support, needed for global offset parameter usage. */ /* Check for OpenCL >= 1.0 support, needed for global offset parameter usage. */
@ -193,27 +183,42 @@ static bool get_opencl_bit_align_support(cl_device_id *device)
return !!find; return !!find;
} }
_clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *algorithm) static cl_int create_opencl_command_queue(cl_command_queue *command_queue, cl_context *context, cl_device_id *device, cl_command_queue_properties cq_properties)
{ {
_clState *clState = (_clState *)calloc(1, sizeof(_clState));
struct cgpu_info *cgpu = &gpus[gpu];
cl_platform_id platform = NULL;
char pbuff[256];
build_kernel_data *build_data = (build_kernel_data *)alloca(sizeof(struct _build_kernel_data));
cl_uint preferred_vwidth;
cl_device_id *devices;
cl_uint numDevices;
cl_int status; cl_int status;
*command_queue = clCreateCommandQueue(*context, *device,
cq_properties, &status);
if (status != CL_SUCCESS) /* Try again without OOE enable */
*command_queue = clCreateCommandQueue(*context, *device, 0, &status);
return status;
}
_clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *algorithm)
{
cl_int status = 0;
size_t compute_units = 0;
cl_platform_id platform = NULL;
struct cgpu_info *cgpu = &gpus[gpu];
_clState *clState = (_clState *)calloc(1, sizeof(_clState));
cl_uint preferred_vwidth, slot = 0, cpnd = 0, numDevices = clDevicesNum();
cl_device_id *devices = (cl_device_id *)alloca(numDevices * sizeof(cl_device_id));
build_kernel_data *build_data = (build_kernel_data *)alloca(sizeof(struct _build_kernel_data));
char **pbuff = (char **)alloca(sizeof(char *) * numDevices), filename[256];
// sanity check
if (!get_opencl_platform(opt_platform_id, &platform)) { if (!get_opencl_platform(opt_platform_id, &platform)) {
return NULL; return NULL;
} }
numDevices = clDevicesNum(); if (numDevices <= 0) {
return NULL;
}
if (numDevices <= 0) return NULL; if (gpu >= numDevices) {
applog(LOG_ERR, "Invalid GPU %i", gpu);
return NULL;
}
devices = (cl_device_id *)alloca(numDevices*sizeof(cl_device_id));
/* Now, get the device list data */ /* Now, get the device list data */
@ -225,27 +230,26 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg
applog(LOG_INFO, "List of devices:"); applog(LOG_INFO, "List of devices:");
unsigned int i; for (int i = 0; i < numDevices; ++i) {
for (i = 0; i < numDevices; i++) { size_t tmpsize;
status = clGetDeviceInfo(devices[i], CL_DEVICE_NAME, sizeof(pbuff), pbuff, NULL); if (clGetDeviceInfo(devices[i], CL_DEVICE_NAME, 0, NULL, &tmpsize) != CL_SUCCESS) {
if (status != CL_SUCCESS) { applog(LOG_ERR, "Error while getting the length of the name for GPU #%d.", i);
applog(LOG_ERR, "Error %d: Getting Device Info", status);
return NULL; return NULL;
} }
applog(LOG_INFO, "\t%i\t%s", i, pbuff); // Does the size include the NULL terminator? Who knows, just add one, it's faster than looking it up.
pbuff[i] = (char *)alloca(sizeof(char) * (tmpsize + 1));
if (i == gpu) { if (clGetDeviceInfo(devices[i], CL_DEVICE_NAME, sizeof(char) * tmpsize, pbuff[i], NULL) != CL_SUCCESS) {
applog(LOG_INFO, "Selected %i: %s", gpu, pbuff); applog(LOG_ERR, "Error while attempting to get device information.");
strncpy(name, pbuff, nameSize); return NULL;
} }
}
if (gpu >= numDevices) {
applog(LOG_ERR, "Invalid GPU %i", gpu);
return NULL;
}
applog(LOG_INFO, "\t%i\t%s", i, pbuff[i]);
}
applog(LOG_INFO, "Selected %d: %s", gpu, pbuff[gpu]);
strncpy(name, pbuff[gpu], nameSize);
status = create_opencl_context(&clState->context, &platform); status = create_opencl_context(&clState->context, &platform);
if (status != CL_SUCCESS) { if (status != CL_SUCCESS) {
applog(LOG_ERR, "Error %d: Creating Context. (clCreateContextFromType)", status); applog(LOG_ERR, "Error %d: Creating Context. (clCreateContextFromType)", status);
@ -274,7 +278,6 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg
} }
applog(LOG_DEBUG, "Max work group size reported %d", (int)(clState->max_work_size)); applog(LOG_DEBUG, "Max work group size reported %d", (int)(clState->max_work_size));
size_t compute_units = 0;
status = clGetDeviceInfo(devices[gpu], CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(size_t), (void *)&compute_units, NULL); status = clGetDeviceInfo(devices[gpu], CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(size_t), (void *)&compute_units, NULL);
if (status != CL_SUCCESS) { if (status != CL_SUCCESS) {
applog(LOG_ERR, "Error %d: Failed to clGetDeviceInfo when trying to get CL_DEVICE_MAX_COMPUTE_UNITS", status); applog(LOG_ERR, "Error %d: Failed to clGetDeviceInfo when trying to get CL_DEVICE_MAX_COMPUTE_UNITS", status);
@ -282,8 +285,10 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg
} }
// AMD architechture got 64 compute shaders per compute unit. // AMD architechture got 64 compute shaders per compute unit.
// Source: http://www.amd.com/us/Documents/GCN_Architecture_whitepaper.pdf // Source: http://www.amd.com/us/Documents/GCN_Architecture_whitepaper.pdf
clState->compute_shaders = compute_units * 64; clState->compute_shaders = compute_units << 6;
applog(LOG_DEBUG, "Max shaders calculated %d", (int)(clState->compute_shaders)); applog(LOG_INFO, "Maximum work size for this GPU (%d) is %d.", gpu, clState->max_work_size);
applog(LOG_INFO, "Your GPU (#%d) has %d compute units, and all AMD cards in the 7 series or newer (GCN cards) \
have 64 shaders per compute unit - this means it has %d shaders.", gpu, compute_units, clState->compute_shaders);
status = clGetDeviceInfo(devices[gpu], CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(cl_ulong), (void *)&cgpu->max_alloc, NULL); status = clGetDeviceInfo(devices[gpu], CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(cl_ulong), (void *)&cgpu->max_alloc, NULL);
if (status != CL_SUCCESS) { if (status != CL_SUCCESS) {
@ -297,12 +302,8 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg
* would have otherwise created. The filename is: * would have otherwise created. The filename is:
* name + g + lg + lookup_gap + tc + thread_concurrency + nf + nfactor + w + work_size + l + sizeof(long) + .bin * name + g + lg + lookup_gap + tc + thread_concurrency + nf + nfactor + w + work_size + l + sizeof(long) + .bin
*/ */
char filename[255];
char strbuf[32];
sprintf(strbuf, "%s.cl", (!empty_string(cgpu->algorithm.kernelfile) ? cgpu->algorithm.kernelfile : cgpu->algorithm.name));
strcpy(filename, strbuf);
sprintf(filename, "%s.cl", (!empty_string(cgpu->algorithm.kernelfile) ? cgpu->algorithm.kernelfile : cgpu->algorithm.name));
applog(LOG_DEBUG, "Using source file %s", filename); applog(LOG_DEBUG, "Using source file %s", filename);
/* For some reason 2 vectors is still better even if the card says /* For some reason 2 vectors is still better even if the card says
@ -326,10 +327,7 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg
clState->goffset = true; clState->goffset = true;
if (cgpu->work_size && cgpu->work_size <= clState->max_work_size) clState->wsize = (cgpu->work_size && cgpu->work_size <= clState->max_work_size) ? cgpu->work_size : 256;
clState->wsize = cgpu->work_size;
else
clState->wsize = 256;
if (!cgpu->opt_lg) { if (!cgpu->opt_lg) {
applog(LOG_DEBUG, "GPU %d: selecting lookup gap of 2", gpu); applog(LOG_DEBUG, "GPU %d: selecting lookup gap of 2", gpu);
@ -536,38 +534,32 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg
cgpu->thread_concurrency = cgpu->opt_tc; cgpu->thread_concurrency = cgpu->opt_tc;
} }
cl_uint slot, cpnd;
slot = cpnd = 0;
build_data->context = clState->context; build_data->context = clState->context;
build_data->device = &devices[gpu]; build_data->device = &devices[gpu];
// Build information // Build information
strcpy(build_data->source_filename, filename); strcpy(build_data->source_filename, filename);
strcpy(build_data->platform, name); strcpy(build_data->platform, name);
strcpy(build_data->sgminer_path, sgminer_path); strcpy(build_data->sgminer_path, sgminer_path);
if (opt_kernel_path && *opt_kernel_path) {
build_data->kernel_path = opt_kernel_path;
}
else {
build_data->kernel_path = NULL;
}
build_data->kernel_path = (*opt_kernel_path) ? opt_kernel_path : NULL;
build_data->work_size = clState->wsize; build_data->work_size = clState->wsize;
build_data->has_bit_align = clState->hasBitAlign; build_data->has_bit_align = clState->hasBitAlign;
build_data->opencl_version = get_opencl_version(devices[gpu]); build_data->opencl_version = get_opencl_version(devices[gpu]);
build_data->patch_bfi = needs_bfi_patch(build_data); build_data->patch_bfi = needs_bfi_patch(build_data);
strcpy(build_data->binary_filename, (!empty_string(cgpu->algorithm.kernelfile) ? cgpu->algorithm.kernelfile : cgpu->algorithm.name)); strcpy(build_data->binary_filename, filename);
strcat(build_data->binary_filename, name); build_data->binary_filename[strlen(filename) - 3] = 0x00; // And one NULL terminator, cutting off the .cl suffix.
if (clState->goffset) strcat(build_data->binary_filename, pbuff[gpu]);
if (clState->goffset) {
strcat(build_data->binary_filename, "g"); strcat(build_data->binary_filename, "g");
}
set_base_compiler_options(build_data); set_base_compiler_options(build_data);
if (algorithm->set_compile_options) if (algorithm->set_compile_options) {
algorithm->set_compile_options(build_data, cgpu, algorithm); algorithm->set_compile_options(build_data, cgpu, algorithm);
}
strcat(build_data->binary_filename, ".bin"); strcat(build_data->binary_filename, ".bin");
applog(LOG_DEBUG, "Using binary file %s", build_data->binary_filename); applog(LOG_DEBUG, "Using binary file %s", build_data->binary_filename);
@ -576,8 +568,9 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg
if (!(clState->program = load_opencl_binary_kernel(build_data))) { if (!(clState->program = load_opencl_binary_kernel(build_data))) {
applog(LOG_NOTICE, "Building binary %s", build_data->binary_filename); applog(LOG_NOTICE, "Building binary %s", build_data->binary_filename);
if (!(clState->program = build_opencl_kernel(build_data, filename))) if (!(clState->program = build_opencl_kernel(build_data, filename))) {
return NULL; return NULL;
}
if (save_opencl_kernel(build_data, clState->program)) { if (save_opencl_kernel(build_data, clState->program)) {
/* Program needs to be rebuilt, because the binary was patched */ /* Program needs to be rebuilt, because the binary was patched */

3
ocl.h

@ -10,7 +10,7 @@
#include <CL/cl.h> #include <CL/cl.h>
#endif #endif
#include "miner.h" #include "algorithm.h"
typedef struct __clState { typedef struct __clState {
cl_context context; cl_context context;
@ -21,6 +21,7 @@ typedef struct __clState {
cl_program program; cl_program program;
cl_mem outputBuffer; cl_mem outputBuffer;
cl_mem CLbuffer0; cl_mem CLbuffer0;
cl_mem MidstateBuf;
cl_mem padbuffer8; cl_mem padbuffer8;
unsigned char cldata[80]; unsigned char cldata[80];
bool hasBitAlign; bool hasBitAlign;

2
ocl/binary_kernel.c

@ -1,5 +1,7 @@
#include "binary_kernel.h" #include "binary_kernel.h"
#include "miner.h"
#include <sys/stat.h> #include <sys/stat.h>
#include <stdio.h>
cl_program load_opencl_binary_kernel(build_kernel_data *data) cl_program load_opencl_binary_kernel(build_kernel_data *data)
{ {

2
ocl/build_kernel.c

@ -1,5 +1,7 @@
#include <stdio.h>
#include "build_kernel.h" #include "build_kernel.h"
#include "patch_kernel.h" #include "patch_kernel.h"
#include "miner.h"
static char *file_contents(const char *filename, int *length) static char *file_contents(const char *filename, int *length)
{ {

8
ocl/build_kernel.h

@ -1,8 +1,14 @@
#ifndef BUILD_KERNEL_H #ifndef BUILD_KERNEL_H
#define BUILD_KERNEL_H #define BUILD_KERNEL_H
#include "ocl.h"
#include <stdbool.h> #include <stdbool.h>
#include "logging.h"
#ifdef __APPLE_CC__
#include <OpenCL/opencl.h>
#else
#include <CL/cl.h>
#endif
typedef struct _build_kernel_data { typedef struct _build_kernel_data {
char source_filename[255]; char source_filename[255];

7
winbuild/sgminer.sln

@ -1,6 +1,8 @@
 
Microsoft Visual Studio Solution File, Format Version 11.00 Microsoft Visual Studio Solution File, Format Version 12.00
# Visual C++ Express 2010 # Visual Studio 2013
VisualStudioVersion = 12.0.31101.0
MinimumVisualStudioVersion = 10.0.40219.1
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "sgminer", "sgminer.vcxproj", "{CCA64DCD-6401-42A3-ABC3-89E48A36D239}" Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "sgminer", "sgminer.vcxproj", "{CCA64DCD-6401-42A3-ABC3-89E48A36D239}"
EndProject EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "jansson", "jansson\jansson.vcxproj", "{AFE7D2AA-025C-4837-B4B2-81117E010B3B}" Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "jansson", "jansson\jansson.vcxproj", "{AFE7D2AA-025C-4837-B4B2-81117E010B3B}"
@ -17,6 +19,7 @@ Global
{CCA64DCD-6401-42A3-ABC3-89E48A36D239}.Debug|Win32.Build.0 = Debug|Win32 {CCA64DCD-6401-42A3-ABC3-89E48A36D239}.Debug|Win32.Build.0 = Debug|Win32
{CCA64DCD-6401-42A3-ABC3-89E48A36D239}.Debug|x64.ActiveCfg = Debug|x64 {CCA64DCD-6401-42A3-ABC3-89E48A36D239}.Debug|x64.ActiveCfg = Debug|x64
{CCA64DCD-6401-42A3-ABC3-89E48A36D239}.Debug|x64.Build.0 = Debug|x64 {CCA64DCD-6401-42A3-ABC3-89E48A36D239}.Debug|x64.Build.0 = Debug|x64
{CCA64DCD-6401-42A3-ABC3-89E48A36D239}.Debug|x64.Deploy.0 = Debug|x64
{CCA64DCD-6401-42A3-ABC3-89E48A36D239}.Release|Win32.ActiveCfg = Release|Win32 {CCA64DCD-6401-42A3-ABC3-89E48A36D239}.Release|Win32.ActiveCfg = Release|Win32
{CCA64DCD-6401-42A3-ABC3-89E48A36D239}.Release|Win32.Build.0 = Release|Win32 {CCA64DCD-6401-42A3-ABC3-89E48A36D239}.Release|Win32.Build.0 = Release|Win32
{CCA64DCD-6401-42A3-ABC3-89E48A36D239}.Release|x64.ActiveCfg = Release|x64 {CCA64DCD-6401-42A3-ABC3-89E48A36D239}.Release|x64.ActiveCfg = Release|x64

Loading…
Cancel
Save