GOSTcoin support for ccminer CUDA miner project, compatible with most nvidia cards
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 

355 lines
12 KiB

/*
* m7 algorithm
*
*/
extern "C"
{
#include "sph/sph_sha2.h"
#include "sph/sph_keccak.h"
#include "sph/sph_ripemd.h"
#include "sph/sph_haval.h"
#include "sph/sph_tiger.h"
#include "sph/sph_whirlpool.h"
#include "sph/sph_blake.h"
#include "miner.h"
}
#include "cuda_helper.h"
// configure with --with-mpir-src=...
#include "mpir.h"
// from cpu-miner.c
extern int device_map[8];
extern bool opt_benchmark;
//static uint64_t *d_hash[8];
static uint64_t *FinalHash[8];
static uint64_t *KeccakH[8];
static uint64_t *WhirlpoolH[8];
static uint64_t *Sha512H[8];
static uint64_t *Sha256H[8];
static uint64_t *HavalH[8];
static uint64_t *TigerH[8];
static uint64_t *RipemdH[8];
static uint64_t *d_prod0[8];
static uint64_t *d_prod1[8];
extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id);
#if 0
static void mpz_set_uint256(mpz_t r, uint8_t *u)
{
mpz_import(r, 32 / sizeof(unsigned long), -1, sizeof(unsigned long), -1, 0, u);
}
static void mpz_get_uint256(mpz_t r, uint8_t *u)
{
u=0;
mpz_export(u, 0, -1, sizeof(unsigned long), -1, 0, r);
}
#endif
static void mpz_set_uint512(mpz_t r, uint8_t *u)
{
mpz_import(r, 64 / sizeof(unsigned long), -1, sizeof(unsigned long), -1, 0, u);
}
static void set_one_if_zero(uint8_t *hash512) {
for (int i = 0; i < 32; i++) {
if (hash512[i] != 0) {
return;
}
}
hash512[0] = 1;
}
extern uint32_t m7_sha256_cpu_hash_300(int thr_id, int threads, uint32_t startNounce, uint64_t *d_nonceVector, uint64_t *d_hash, int order);
extern void m7_sha256_setBlock_120(void *data,const void *ptarget);
extern void m7_sha256_cpu_hash_120(int thr_id, int threads, uint32_t startNounce, uint64_t *d_outputHash, int order);
extern void m7_sha256_cpu_init(int thr_id, int threads);
extern void m7_sha512_cpu_init(int thr_id, int threads);
extern void m7_sha512_setBlock_120(void *pdata);
extern void m7_sha512_cpu_hash_120(int thr_id, int threads, uint32_t startNounce, uint64_t *d_hash, int order);
extern void ripemd160_cpu_init(int thr_id, int threads);
extern void ripemd160_setBlock_120(void *pdata);
extern void m7_ripemd160_cpu_hash_120(int thr_id, int threads, uint32_t startNounce, uint64_t *d_hash, int order);
extern void tiger192_cpu_init(int thr_id, int threads);
extern void tiger192_setBlock_120(void *pdata);
extern void m7_tiger192_cpu_hash_120(int thr_id, int threads, uint32_t startNounce, uint64_t *d_hash, int order);
extern void m7_bigmul_cpu(int thr_id, int threads, uint64_t* Hash1, uint64_t* Hash2, uint64_t* Hash3, uint64_t* Hash4,
uint64_t *Hash5, uint64_t* Hash6, uint64_t *Hash7, uint32_t foundNonce, uint32_t StartNonce,int order);
extern void m7_bigmul1_cpu(int thr_id, int threads, int len1, int len2, uint64_t* Hash1, uint64_t* Hash2, uint64_t *finalHash, int order);
extern void m7_bigmul_init(int thr_id, int threads);
extern void m7_bigmul_unroll1_cpu(int thr_id, int threads,uint64_t* Hash1, uint64_t* Hash2,uint64_t *finalHash,int order);
extern void m7_bigmul_unroll2_cpu(int thr_id, int threads,uint64_t* Hash1, uint64_t* Hash2,uint64_t *finalHash,int order);
extern void cpu_mul(int thr_id, int threads, uint32_t alegs, uint32_t blegs, uint64_t *g_a, uint64_t *g_b, uint64_t *g_p, int order);
extern void cpu_mulT4(int thr_id, int threads, uint32_t alegs, uint32_t blegs, uint64_t *g_a, uint64_t *g_b, uint64_t *g_p, int order);
extern void mul_init();
extern void m7_keccak512_setBlock_120(void *pdata);
extern void m7_keccak512_cpu_hash(int thr_id, int threads, uint32_t startNounce, uint64_t *d_hash, int order);
extern void m7_keccak512_cpu_init(int thr_id, int threads);
extern void m7_whirlpool512_cpu_init(int thr_id, int threads, int flag);
extern void m7_whirlpool512_setBlock_120(void *pdata);
extern void m7_whirlpool512_cpu_hash_120(int thr_id, int threads, uint32_t startNounce, uint64_t *d_outputHash, int order);
extern void m7_haval256_cpu_init(int thr_id, int threads);
extern void m7_haval256_setBlock_120(void *data);
extern void m7_haval256_cpu_hash_120(int thr_id, int threads, uint32_t startNounce, uint64_t *d_outputHash, int order);
extern void cuda_check_cpu_init(int thr_id, int threads);
extern void cuda_check_cpu_setTarget(const void *ptarget);
// m7 Hashfunktion
extern "C" void m7_hash(void *state, const void *input,uint32_t TheNonce, int debug)
{
// sha256(sha256*sha512*keccak512*ripemd160*haval*tiger1*whirlpool)
uint8_t *bdata = 0;
mpz_t bns[7];
mpz_t product;
for(int i=0; i < 7; i++) {
mpz_init(bns[i]);
}
mpz_init(product);
uint32_t data[32] ;
uint32_t *data_p64 = data + (116 / sizeof(data[0]));
uint8_t bhash[7][64];
uint32_t hash[8];
memcpy(data,input,122);
int M7_MIDSTATE_LEN = 116;
for(int i=0; i < 7; i++) {
mpz_init(bns[i]);
}
sph_sha256_context ctx_final_sha256;
sph_sha256_context ctx_sha256;
sph_sha512_context ctx_sha512;
sph_keccak512_context ctx_keccak;
sph_whirlpool_context ctx_whirlpool;
sph_haval256_5_context ctx_haval;
sph_tiger_context ctx_tiger;
sph_ripemd160_context ctx_ripemd;
sph_sha256_init(&ctx_sha256);
sph_sha256(&ctx_sha256, data, M7_MIDSTATE_LEN);
sph_sha512_init(&ctx_sha512);
sph_sha512(&ctx_sha512, data, M7_MIDSTATE_LEN);
sph_keccak512_init(&ctx_keccak);
sph_keccak512(&ctx_keccak, data, M7_MIDSTATE_LEN);
sph_whirlpool_init(&ctx_whirlpool);
sph_whirlpool(&ctx_whirlpool, data, M7_MIDSTATE_LEN);
sph_haval256_5_init(&ctx_haval);
sph_haval256_5(&ctx_haval, data, M7_MIDSTATE_LEN);
sph_tiger_init(&ctx_tiger);
sph_tiger(&ctx_tiger, data, M7_MIDSTATE_LEN);
sph_ripemd160_init(&ctx_ripemd);
sph_ripemd160(&ctx_ripemd, data, M7_MIDSTATE_LEN);
sph_sha256_context ctx2_sha256;
sph_sha512_context ctx2_sha512;
sph_keccak512_context ctx2_keccak;
sph_whirlpool_context ctx2_whirlpool;
sph_haval256_5_context ctx2_haval;
sph_tiger_context ctx2_tiger;
sph_ripemd160_context ctx2_ripemd;
data[29] = TheNonce;
memset(bhash, 0, 7 * 64);
ctx2_sha256 = ctx_sha256;
sph_sha256(&ctx2_sha256, data_p64, 122 - M7_MIDSTATE_LEN);
sph_sha256_close(&ctx2_sha256, (void*)(bhash[0]));
ctx2_sha512 = ctx_sha512;
sph_sha512(&ctx2_sha512, data_p64, 122 - M7_MIDSTATE_LEN);
sph_sha512_close(&ctx2_sha512, (void*)(bhash[1]));
ctx2_keccak = ctx_keccak;
sph_keccak512(&ctx2_keccak, data_p64, 122 - M7_MIDSTATE_LEN);
sph_keccak512_close(&ctx2_keccak, (void*)(bhash[2]));
ctx2_whirlpool = ctx_whirlpool;
sph_whirlpool(&ctx2_whirlpool, data_p64, 122 - M7_MIDSTATE_LEN);
sph_whirlpool_close(&ctx2_whirlpool, (void*)(bhash[3]));
ctx2_haval = ctx_haval;
sph_haval256_5(&ctx2_haval, data_p64, 122 - M7_MIDSTATE_LEN);
sph_haval256_5_close(&ctx2_haval, (void*)(bhash[4]));
ctx2_tiger = ctx_tiger;
sph_tiger(&ctx2_tiger, data_p64, 122 - M7_MIDSTATE_LEN);
sph_tiger_close(&ctx2_tiger, (void*)(bhash[5]));
ctx2_ripemd = ctx_ripemd;
sph_ripemd160(&ctx2_ripemd, data_p64, 122 - M7_MIDSTATE_LEN);
sph_ripemd160_close(&ctx2_ripemd, (void*)(bhash[6]));
if (debug == 1) {
for (int i=0; i<16; i++) {
applog(LOG_INFO,"sha256[%d]=%02x %02x %02x %02x sha512[%d]=%02x %02x %02x %02x keccak[%d]=%02x %02x %02x %02x whirlpool[2][%d]=%02x %02x %02x %02x "
"haval[%d]=%02x %02x %02x %02x tiger[%d]=%02x %02x %02x %02x ripemd[%d]=%02x %02x %02x %02x\n",
i,bhash[0][4*i+3],bhash[0][4*i+2],bhash[0][4*i+1],bhash[0][4*i+0],
i,bhash[1][4*i+3],bhash[1][4*i+2],bhash[1][4*i+1],bhash[1][4*i+0],
i,bhash[2][4*i+3],bhash[2][4*i+2],bhash[2][4*i+1],bhash[2][4*i+0],
i,bhash[3][4*i+3],bhash[3][4*i+2],bhash[3][4*i+1],bhash[3][4*i+0],
i,bhash[4][4*i+3],bhash[4][4*i+2],bhash[4][4*i+1],bhash[4][4*i+0],
i,bhash[5][4*i+3],bhash[5][4*i+2],bhash[5][4*i+1],bhash[5][4*i+0],
i,bhash[6][4*i+3],bhash[6][4*i+2],bhash[6][4*i+1],bhash[6][4*i+0]
);
}
}
for(int i=0; i < 7; i++){
set_one_if_zero(bhash[i]);
mpz_set_uint512(bns[i],bhash[i]);
}
for(int i=6; i > 0; i--){
mpz_mul(bns[i-1], bns[i-1], bns[i]);
}
int bytes = mpz_sizeinbase(bns[0], 256);
bdata = (uint8_t *)realloc(bdata, bytes);
mpz_export((void *)bdata, NULL, -1, 1, 0, 0, bns[0]);
sph_sha256_init(&ctx_final_sha256);
sph_sha256(&ctx_final_sha256, bdata, bytes);
sph_sha256_close(&ctx_final_sha256, (void*)(hash));
memcpy(state, hash, 32);
}
extern "C" int scanhash_m7(int thr_id, uint32_t *pdata,
const uint32_t *ptarget, uint32_t max_nonce,
unsigned long *hashes_done)
{
const int throughput = 256*256*8*2;
const uint32_t FirstNonce = pdata[29];
static bool init[8] = {0,0,0,0,0,0,0,0};
if (opt_benchmark)
((uint32_t*)ptarget)[7] = 0x0000ff;
if (!init[thr_id])
{
cudaSetDevice(device_map[thr_id]);
cudaMalloc(&d_prod0[thr_id], 38 *sizeof(uint64_t) * throughput);
cudaMalloc(&d_prod1[thr_id], 38 *sizeof(uint64_t) * throughput);
cudaMalloc(&FinalHash[thr_id], 8 *sizeof(uint64_t) * throughput);
cudaMalloc(&KeccakH[thr_id], 38 *sizeof(uint64_t) * throughput);
cudaMalloc(&WhirlpoolH[thr_id], 8 *sizeof(uint64_t) * throughput);
cudaMalloc(&Sha256H[thr_id], 8 *sizeof(uint64_t) * throughput);
cudaMalloc(&Sha512H[thr_id], 8 *sizeof(uint64_t) * throughput);
cudaMalloc(&HavalH[thr_id], 8 *sizeof(uint64_t) * throughput);
cudaMalloc(&RipemdH[thr_id], 8 *sizeof(uint64_t) * throughput);
cudaMalloc(&TigerH[thr_id], 8 *sizeof(uint64_t) * throughput);
m7_sha256_cpu_init(thr_id, throughput);
m7_sha512_cpu_init(thr_id, throughput);
m7_keccak512_cpu_init(thr_id, throughput);
m7_haval256_cpu_init(thr_id, throughput);
tiger192_cpu_init(thr_id, throughput);
m7_whirlpool512_cpu_init(thr_id, throughput,0);
ripemd160_cpu_init(thr_id, throughput);
cuda_check_cpu_init(thr_id, throughput);
m7_bigmul_init(thr_id, throughput);
//mul_init();
init[thr_id] = true;
}
uint32_t Htarg = ptarget[7];
m7_whirlpool512_setBlock_120((void*)pdata);
m7_sha256_setBlock_120((void*)pdata, ptarget);
m7_sha512_setBlock_120((void*)pdata);
m7_haval256_setBlock_120((void*)pdata);
m7_keccak512_setBlock_120((void*)pdata);
ripemd160_setBlock_120((void*)pdata);
tiger192_setBlock_120((void*)pdata);
cuda_check_cpu_setTarget(ptarget);
do {
int order = 0;
uint32_t foundNonce;
m7_sha256_cpu_hash_120(thr_id, throughput, pdata[29], Sha256H[thr_id], order++);
m7_sha512_cpu_hash_120(thr_id, throughput, pdata[29], Sha512H[thr_id], order++);
m7_keccak512_cpu_hash(thr_id, throughput, pdata[29], KeccakH[thr_id], order++);
m7_haval256_cpu_hash_120(thr_id, throughput, pdata[29], HavalH[thr_id], order++);
m7_tiger192_cpu_hash_120(thr_id, throughput, pdata[29], TigerH[thr_id], order++);
m7_ripemd160_cpu_hash_120(thr_id, throughput, pdata[29], RipemdH[thr_id], order++);
m7_whirlpool512_cpu_hash_120(thr_id, throughput, pdata[29], WhirlpoolH[thr_id], order++);
cpu_mulT4(0, throughput, 8, 8, Sha512H[thr_id], KeccakH[thr_id], d_prod0[thr_id],order); //64
MyStreamSynchronize(0,order++,thr_id);
cpu_mulT4(0, throughput,8, 16, WhirlpoolH[thr_id], d_prod0[thr_id], d_prod1[thr_id],order); //128
MyStreamSynchronize(0,order++,thr_id);
cpu_mulT4(0, throughput, 4, 24, Sha256H[thr_id], d_prod1[thr_id], d_prod0[thr_id],order); //96
MyStreamSynchronize(0,order++,thr_id);
cpu_mulT4(0, throughput, 4, 28, HavalH[thr_id], d_prod0[thr_id], d_prod1[thr_id],order); //112
MyStreamSynchronize(0,order++,thr_id);
m7_bigmul_unroll1_cpu(0, throughput, TigerH[thr_id], d_prod1[thr_id], d_prod0[thr_id],order);
MyStreamSynchronize(0,order++,thr_id);
m7_bigmul_unroll2_cpu(0, throughput, RipemdH[thr_id], d_prod0[thr_id], d_prod1[thr_id],order);
MyStreamSynchronize(0,order++,thr_id);
foundNonce = m7_sha256_cpu_hash_300(thr_id, throughput, pdata[29], NULL, d_prod1[thr_id], order);
if (foundNonce != 0xffffffff)
{
uint32_t vhash64[8];
m7_hash(vhash64, pdata, foundNonce, 0);
if (vhash64[7] <= Htarg)
{
pdata[29] = foundNonce;
*hashes_done = foundNonce - FirstNonce + 1;
return 1;
} else {
applog(LOG_INFO, "GPU #%d: result for nonce %08x does not validate on CPU! vhash64 %08x and htarg %08x", thr_id, foundNonce, vhash64[7], Htarg);
////////////////////////////////////////////
// m7_bigmul_cpu(thr_id,throughput,Sha256H[thr_id],Sha512H[thr_id],KeccakH[thr_id],WhirlpoolH[thr_id],HavalH[thr_id],TigerH[thr_id],RipemdH[thr_id],foundNonce,FirstNonce,order++);
// m7_hash(vhash64, pdata, foundNonce, 1);
////////////////////////////////////////////
}
}
pdata[29] += throughput;
} while (pdata[29] < max_nonce && !work_restart[thr_id].restart);
*hashes_done = pdata[29] - FirstNonce + 1;
return 0;
}