Browse Source

allium algo

pull/5/head
Tanguy Pruvot 7 years ago
parent
commit
b97567a451
  1. 1
      Makefile.am
  2. 2
      algos.h
  3. 1
      bench.cpp
  4. 5
      ccminer.cpp
  5. 1
      ccminer.vcxproj
  6. 3
      ccminer.vcxproj.filters
  7. 2
      configure.ac
  8. 213
      lyra2/allium.cu
  9. 3
      miner.h
  10. 3
      util.cpp

1
Makefile.am

@ -39,6 +39,7 @@ ccminer_SOURCES = elist.h miner.h compat.h \
lyra2/lyra2RE.cu lyra2/cuda_lyra2.cu \ lyra2/lyra2RE.cu lyra2/cuda_lyra2.cu \
lyra2/lyra2REv2.cu lyra2/cuda_lyra2v2.cu \ lyra2/lyra2REv2.cu lyra2/cuda_lyra2v2.cu \
lyra2/Lyra2Z.c lyra2/lyra2Z.cu lyra2/cuda_lyra2Z.cu \ lyra2/Lyra2Z.c lyra2/lyra2Z.cu lyra2/cuda_lyra2Z.cu \
lyra2/allium.cu \
Algo256/cuda_bmw256.cu Algo256/cuda_cubehash256.cu \ Algo256/cuda_bmw256.cu Algo256/cuda_cubehash256.cu \
Algo256/cuda_blake256.cu Algo256/cuda_groestl256.cu \ Algo256/cuda_blake256.cu Algo256/cuda_groestl256.cu \
Algo256/cuda_keccak256_sm3.cu Algo256/cuda_keccak256.cu Algo256/cuda_skein256.cu \ Algo256/cuda_keccak256_sm3.cu Algo256/cuda_keccak256.cu Algo256/cuda_skein256.cu \

2
algos.h

@ -8,6 +8,7 @@ enum sha_algos {
ALGO_BLAKECOIN = 0, ALGO_BLAKECOIN = 0,
ALGO_BLAKE, ALGO_BLAKE,
ALGO_BLAKE2S, ALGO_BLAKE2S,
ALGO_ALLIUM,
ALGO_BMW, ALGO_BMW,
ALGO_BASTION, ALGO_BASTION,
ALGO_C11, ALGO_C11,
@ -80,6 +81,7 @@ static const char *algo_names[] = {
"blakecoin", "blakecoin",
"blake", "blake",
"blake2s", "blake2s",
"allium",
"bmw", "bmw",
"bastion", "bastion",
"c11", "c11",

1
bench.cpp

@ -49,6 +49,7 @@ void bench_free()
void algo_free_all(int thr_id) void algo_free_all(int thr_id)
{ {
// only initialized algos will be freed // only initialized algos will be freed
free_allium(thr_id);
free_bastion(thr_id); free_bastion(thr_id);
free_bitcore(thr_id); free_bitcore(thr_id);
free_blake256(thr_id); free_blake256(thr_id);

5
ccminer.cpp

@ -1698,6 +1698,7 @@ static bool stratum_gen_work(struct stratum_ctx *sctx, struct work *work)
case ALGO_SCRYPT_JANE: case ALGO_SCRYPT_JANE:
work_set_target(work, sctx->job.diff / (65536.0 * opt_difficulty)); work_set_target(work, sctx->job.diff / (65536.0 * opt_difficulty));
break; break;
case ALGO_ALLIUM:
case ALGO_DMD_GR: case ALGO_DMD_GR:
case ALGO_FRESH: case ALGO_FRESH:
case ALGO_FUGUE256: case ALGO_FUGUE256:
@ -2234,6 +2235,7 @@ static void *miner_thread(void *userdata)
case ALGO_TRIBUS: case ALGO_TRIBUS:
minmax = 0x1000000; minmax = 0x1000000;
break; break;
case ALGO_ALLIUM:
case ALGO_C11: case ALGO_C11:
case ALGO_DEEP: case ALGO_DEEP:
case ALGO_HEAVY: case ALGO_HEAVY:
@ -2323,6 +2325,9 @@ static void *miner_thread(void *userdata)
/* scan nonces for a proof-of-work hash */ /* scan nonces for a proof-of-work hash */
switch (opt_algo) { switch (opt_algo) {
case ALGO_ALLIUM:
rc = scanhash_allium(thr_id, &work, max_nonce, &hashes_done);
break;
case ALGO_BASTION: case ALGO_BASTION:
rc = scanhash_bastion(thr_id, &work, max_nonce, &hashes_done); rc = scanhash_bastion(thr_id, &work, max_nonce, &hashes_done);
break; break;

1
ccminer.vcxproj

@ -519,6 +519,7 @@
<CudaCompile Include="qubit\luffa.cu" /> <CudaCompile Include="qubit\luffa.cu" />
<CudaCompile Include="qubit\qubit.cu" /> <CudaCompile Include="qubit\qubit.cu" />
<CudaCompile Include="qubit\qubit_luffa512.cu" /> <CudaCompile Include="qubit\qubit_luffa512.cu" />
<CudaCompile Include="lyra2\allium.cu" />
<CudaCompile Include="lyra2\lyra2RE.cu" /> <CudaCompile Include="lyra2\lyra2RE.cu" />
<CudaCompile Include="lyra2\cuda_lyra2.cu" /> <CudaCompile Include="lyra2\cuda_lyra2.cu" />
<CudaCompile Include="lyra2\lyra2REv2.cu" /> <CudaCompile Include="lyra2\lyra2REv2.cu" />

3
ccminer.vcxproj.filters

@ -910,6 +910,9 @@
<CudaCompile Include="Algo256\cuda_bmw.cu"> <CudaCompile Include="Algo256\cuda_bmw.cu">
<Filter>Source Files\CUDA\Algo256</Filter> <Filter>Source Files\CUDA\Algo256</Filter>
</CudaCompile> </CudaCompile>
<CudaCompile Include="lyra2\allium.cu">
<Filter>Source Files\CUDA\lyra2</Filter>
</CudaCompile>
<CudaCompile Include="lyra2\cuda_lyra2.cu"> <CudaCompile Include="lyra2\cuda_lyra2.cu">
<Filter>Source Files\CUDA\lyra2</Filter> <Filter>Source Files\CUDA\lyra2</Filter>
</CudaCompile> </CudaCompile>

2
configure.ac

@ -1,4 +1,4 @@
AC_INIT([ccminer], [2.2.5], [], [ccminer], [http://github.com/tpruvot/ccminer]) AC_INIT([ccminer], [2.2.6], [], [ccminer], [http://github.com/tpruvot/ccminer])
AC_PREREQ([2.59c]) AC_PREREQ([2.59c])
AC_CANONICAL_SYSTEM AC_CANONICAL_SYSTEM

213
lyra2/allium.cu

@ -0,0 +1,213 @@
extern "C" {
#include "sph/sph_blake.h"
#include "sph/sph_keccak.h"
#include "sph/sph_cubehash.h"
#include "sph/sph_skein.h"
#include "sph/sph_groestl.h"
#include "lyra2/Lyra2.h"
}
#include <miner.h>
#include <cuda_helper.h>
static uint64_t* d_hash[MAX_GPUS];
static uint64_t* d_matrix[MAX_GPUS];
extern void blake256_cpu_init(int thr_id, uint32_t threads);
extern void blake256_cpu_setBlock_80(uint32_t *pdata);
//extern void blake256_cpu_hash_80(const int thr_id, const uint32_t threads, const uint32_t startNonce, uint64_t *Hash, int order);
//extern void keccak256_sm3_hash_32(int thr_id, uint32_t threads, uint32_t startNonce, uint64_t *d_outputHash, int order);
//extern void keccak256_sm3_init(int thr_id, uint32_t threads);
//extern void keccak256_sm3_free(int thr_id);
extern void blakeKeccak256_cpu_hash_80(const int thr_id, const uint32_t threads, const uint32_t startNonce, uint64_t *Hash, int order);
extern void skein256_cpu_hash_32(int thr_id, uint32_t threads, uint32_t startNonce, uint64_t *d_outputHash, int order);
extern void cubehash256_cpu_hash_32(int thr_id, uint32_t threads, uint32_t startNounce, uint64_t *d_hash, int order);
extern void skein256_cpu_init(int thr_id, uint32_t threads);
extern void lyra2_cpu_init(int thr_id, uint32_t threads, uint64_t *d_matrix);
extern void lyra2_cpu_hash_32(int thr_id, uint32_t threads, uint32_t startNonce, uint64_t *d_outputHash, bool gtx750ti);
extern void groestl256_cpu_init(int thr_id, uint32_t threads);
extern void groestl256_cpu_free(int thr_id);
extern void groestl256_setTarget(const void *ptarget);
extern uint32_t groestl256_cpu_hash_32(int thr_id, uint32_t threads, uint32_t startNounce, uint64_t *d_outputHash, int order);
extern uint32_t groestl256_getSecNonce(int thr_id, int num);
extern "C" void allium_hash(void *state, const void *input)
{
uint32_t hashA[8], hashB[8];
sph_blake256_context ctx_blake;
sph_keccak256_context ctx_keccak;
sph_cubehash256_context ctx_cube;
sph_skein256_context ctx_skein;
sph_groestl256_context ctx_groestl;
sph_blake256_set_rounds(14);
sph_blake256_init(&ctx_blake);
sph_blake256(&ctx_blake, input, 80);
sph_blake256_close(&ctx_blake, hashA);
sph_keccak256_init(&ctx_keccak);
sph_keccak256(&ctx_keccak, hashA, 32);
sph_keccak256_close(&ctx_keccak, hashB);
LYRA2(hashA, 32, hashB, 32, hashB, 32, 1, 8, 8);
sph_cubehash256_init(&ctx_cube);
sph_cubehash256(&ctx_cube, hashA, 32);
sph_cubehash256_close(&ctx_cube, hashB);
LYRA2(hashA, 32, hashB, 32, hashB, 32, 1, 8, 8);
sph_skein256_init(&ctx_skein);
sph_skein256(&ctx_skein, hashA, 32);
sph_skein256_close(&ctx_skein, hashB);
sph_groestl256_init(&ctx_groestl);
sph_groestl256(&ctx_groestl, hashB, 32);
sph_groestl256_close(&ctx_groestl, hashA);
memcpy(state, hashA, 32);
}
static bool init[MAX_GPUS] = { 0 };
static __thread uint32_t throughput = 0;
extern "C" int scanhash_allium(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done)
{
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
const uint32_t first_nonce = pdata[19];
if (opt_benchmark)
ptarget[7] = 0x00ff;
static __thread bool gtx750ti;
if (!init[thr_id])
{
int dev_id = device_map[thr_id];
cudaSetDevice(dev_id);
CUDA_LOG_ERROR();
int intensity = (device_sm[dev_id] >= 500 && !is_windows()) ? 17 : 16;
if (device_sm[device_map[thr_id]] == 500) intensity = 15;
throughput = cuda_default_throughput(thr_id, 1U << intensity); // 18=256*256*4;
if (init[thr_id]) throughput = min(throughput, max_nonce - first_nonce);
cudaDeviceProp props;
cudaGetDeviceProperties(&props, dev_id);
if (strstr(props.name, "750 Ti")) gtx750ti = true;
else gtx750ti = false;
gpulog(LOG_INFO, thr_id, "Intensity set to %g, %u cuda threads", throughput2intensity(throughput), throughput);
blake256_cpu_init(thr_id, throughput);
//keccak256_sm3_init(thr_id, throughput);
skein256_cpu_init(thr_id, throughput);
groestl256_cpu_init(thr_id, throughput);
//cuda_get_arch(thr_id);
if (device_sm[dev_id] >= 500)
{
size_t matrix_sz = device_sm[dev_id] > 500 ? sizeof(uint64_t) * 4 * 4 : sizeof(uint64_t) * 8 * 8 * 3 * 4;
CUDA_SAFE_CALL(cudaMalloc(&d_matrix[thr_id], matrix_sz * throughput));
lyra2_cpu_init(thr_id, throughput, d_matrix[thr_id]);
}
CUDA_SAFE_CALL(cudaMalloc(&d_hash[thr_id], (size_t)32 * throughput));
init[thr_id] = true;
}
uint32_t _ALIGN(128) endiandata[20];
for (int k=0; k < 20; k++)
be32enc(&endiandata[k], pdata[k]);
blake256_cpu_setBlock_80(pdata);
groestl256_setTarget(ptarget);
do {
int order = 0;
//blake256_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], order++);
//keccak256_sm3_hash_32(thr_id, throughput, pdata[19], d_hash[thr_id], order++);
blakeKeccak256_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], order++);
lyra2_cpu_hash_32(thr_id, throughput, pdata[19], d_hash[thr_id], gtx750ti);
cubehash256_cpu_hash_32(thr_id, throughput, pdata[19], d_hash[thr_id], order++);
lyra2_cpu_hash_32(thr_id, throughput, pdata[19], d_hash[thr_id], gtx750ti);
skein256_cpu_hash_32(thr_id, throughput, pdata[19], d_hash[thr_id], order++);
*hashes_done = pdata[19] - first_nonce + throughput;
work->nonces[0] = groestl256_cpu_hash_32(thr_id, throughput, pdata[19], d_hash[thr_id], order++);
if (work->nonces[0] != UINT32_MAX)
{
const uint32_t Htarg = ptarget[7];
uint32_t _ALIGN(64) vhash[8];
be32enc(&endiandata[19], work->nonces[0]);
allium_hash(vhash, endiandata);
if (vhash[7] <= Htarg && fulltest(vhash, ptarget)) {
work->valid_nonces = 1;
work_set_target_ratio(work, vhash);
work->nonces[1] = groestl256_getSecNonce(thr_id, 1);
if (work->nonces[1] != UINT32_MAX) {
be32enc(&endiandata[19], work->nonces[1]);
allium_hash(vhash, endiandata);
bn_set_target_ratio(work, vhash, 1);
work->valid_nonces++;
pdata[19] = max(work->nonces[0], work->nonces[1]) + 1;
} else {
pdata[19] = work->nonces[0] + 1; // cursor
}
return work->valid_nonces;
}
else if (vhash[7] > Htarg) {
gpu_increment_reject(thr_id);
if (!opt_quiet)
gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", work->nonces[0]);
pdata[19] = work->nonces[0] + 1;
continue;
}
}
if ((uint64_t)throughput + pdata[19] >= max_nonce) {
pdata[19] = max_nonce;
break;
}
pdata[19] += throughput;
} while (!work_restart[thr_id].restart);
*hashes_done = pdata[19] - first_nonce;
return 0;
}
// cleanup
extern "C" void free_allium(int thr_id)
{
if (!init[thr_id])
return;
cudaThreadSynchronize();
cudaFree(d_hash[thr_id]);
cudaFree(d_matrix[thr_id]);
//keccak256_sm3_free(thr_id);
groestl256_cpu_free(thr_id);
init[thr_id] = false;
cudaDeviceSynchronize();
}

3
miner.h

@ -273,6 +273,7 @@ void sha256d(unsigned char *hash, const unsigned char *data, int len);
struct work; struct work;
extern int scanhash_allium(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done);
extern int scanhash_bastion(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done); extern int scanhash_bastion(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done);
extern int scanhash_blake256(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done, int8_t blakerounds); extern int scanhash_blake256(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done, int8_t blakerounds);
extern int scanhash_blake2s(int thr_id, struct work *work, uint32_t max_nonce, unsigned long *hashes_done); extern int scanhash_blake2s(int thr_id, struct work *work, uint32_t max_nonce, unsigned long *hashes_done);
@ -339,6 +340,7 @@ extern int scanhash_scrypt_jane(int thr_id, struct work *work, uint32_t max_nonc
/* free device allocated memory per algo */ /* free device allocated memory per algo */
void algo_free_all(int thr_id); void algo_free_all(int thr_id);
extern void free_allium(int thr_id);
extern void free_bastion(int thr_id); extern void free_bastion(int thr_id);
extern void free_bitcore(int thr_id); extern void free_bitcore(int thr_id);
extern void free_blake256(int thr_id); extern void free_blake256(int thr_id);
@ -887,6 +889,7 @@ void applog_hash64(void *hash);
void applog_compare_hash(void *hash, void *hash_ref); void applog_compare_hash(void *hash, void *hash_ref);
void print_hash_tests(void); void print_hash_tests(void);
void allium_hash(void *state, const void *input);
void bastionhash(void* output, const unsigned char* input); void bastionhash(void* output, const unsigned char* input);
void blake256hash(void *output, const void *input, int8_t rounds); void blake256hash(void *output, const void *input, int8_t rounds);
void blake2b_hash(void *output, const void *input); void blake2b_hash(void *output, const void *input);

3
util.cpp

@ -2164,6 +2164,9 @@ void print_hash_tests(void)
printf(CL_WHT "CPU HASH ON EMPTY BUFFER RESULTS:" CL_N "\n"); printf(CL_WHT "CPU HASH ON EMPTY BUFFER RESULTS:" CL_N "\n");
allium_hash(&hash[0], &buf[0]);
printpfx("allium", hash);
bastionhash(&hash[0], &buf[0]); bastionhash(&hash[0], &buf[0]);
printpfx("bastion", hash); printpfx("bastion", hash);

Loading…
Cancel
Save