/* * m7 algorithm * */ extern "C" { #include "sph/sph_sha2.h" #include "sph/sph_keccak.h" #include "sph/sph_ripemd.h" #include "sph/sph_haval.h" #include "sph/sph_tiger.h" #include "sph/sph_whirlpool.h" #include "sph/sph_blake.h" #include "miner.h" } #include "cuda_helper.h" // configure with --with-mpir-src=... #include "mpir.h" // from cpu-miner.c extern int device_map[8]; extern bool opt_benchmark; //static uint64_t *d_hash[8]; static uint64_t *FinalHash[8]; static uint64_t *KeccakH[8]; static uint64_t *WhirlpoolH[8]; static uint64_t *Sha512H[8]; static uint64_t *Sha256H[8]; static uint64_t *HavalH[8]; static uint64_t *TigerH[8]; static uint64_t *RipemdH[8]; static uint64_t *d_prod0[8]; static uint64_t *d_prod1[8]; extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id); #if 0 static void mpz_set_uint256(mpz_t r, uint8_t *u) { mpz_import(r, 32 / sizeof(unsigned long), -1, sizeof(unsigned long), -1, 0, u); } static void mpz_get_uint256(mpz_t r, uint8_t *u) { u=0; mpz_export(u, 0, -1, sizeof(unsigned long), -1, 0, r); } #endif static void mpz_set_uint512(mpz_t r, uint8_t *u) { mpz_import(r, 64 / sizeof(unsigned long), -1, sizeof(unsigned long), -1, 0, u); } static void set_one_if_zero(uint8_t *hash512) { for (int i = 0; i < 32; i++) { if (hash512[i] != 0) { return; } } hash512[0] = 1; } extern uint32_t m7_sha256_cpu_hash_300(int thr_id, int threads, uint32_t startNounce, uint64_t *d_nonceVector, uint64_t *d_hash, int order); extern void m7_sha256_setBlock_120(void *data,const void *ptarget); extern void m7_sha256_cpu_hash_120(int thr_id, int threads, uint32_t startNounce, uint64_t *d_outputHash, int order); extern void m7_sha256_cpu_init(int thr_id, int threads); extern void m7_sha512_cpu_init(int thr_id, int threads); extern void m7_sha512_setBlock_120(void *pdata); extern void m7_sha512_cpu_hash_120(int thr_id, int threads, uint32_t startNounce, uint64_t *d_hash, int order); extern void ripemd160_cpu_init(int thr_id, int threads); extern void ripemd160_setBlock_120(void *pdata); extern void m7_ripemd160_cpu_hash_120(int thr_id, int threads, uint32_t startNounce, uint64_t *d_hash, int order); extern void tiger192_cpu_init(int thr_id, int threads); extern void tiger192_setBlock_120(void *pdata); extern void m7_tiger192_cpu_hash_120(int thr_id, int threads, uint32_t startNounce, uint64_t *d_hash, int order); extern void m7_bigmul_cpu(int thr_id, int threads, uint64_t* Hash1, uint64_t* Hash2, uint64_t* Hash3, uint64_t* Hash4, uint64_t *Hash5, uint64_t* Hash6, uint64_t *Hash7, uint32_t foundNonce, uint32_t StartNonce,int order); extern void m7_bigmul1_cpu(int thr_id, int threads, int len1, int len2, uint64_t* Hash1, uint64_t* Hash2, uint64_t *finalHash, int order); extern void m7_bigmul_init(int thr_id, int threads); extern void m7_bigmul_unroll1_cpu(int thr_id, int threads,uint64_t* Hash1, uint64_t* Hash2,uint64_t *finalHash,int order); extern void m7_bigmul_unroll2_cpu(int thr_id, int threads,uint64_t* Hash1, uint64_t* Hash2,uint64_t *finalHash,int order); extern void cpu_mul(int thr_id, int threads, uint32_t alegs, uint32_t blegs, uint64_t *g_a, uint64_t *g_b, uint64_t *g_p, int order); extern void cpu_mulT4(int thr_id, int threads, uint32_t alegs, uint32_t blegs, uint64_t *g_a, uint64_t *g_b, uint64_t *g_p, int order); extern void mul_init(); extern void m7_keccak512_setBlock_120(void *pdata); extern void m7_keccak512_cpu_hash(int thr_id, int threads, uint32_t startNounce, uint64_t *d_hash, int order); extern void m7_keccak512_cpu_init(int thr_id, int threads); extern void m7_whirlpool512_cpu_init(int thr_id, int threads, int flag); extern void m7_whirlpool512_setBlock_120(void *pdata); extern void m7_whirlpool512_cpu_hash_120(int thr_id, int threads, uint32_t startNounce, uint64_t *d_outputHash, int order); extern void m7_haval256_cpu_init(int thr_id, int threads); extern void m7_haval256_setBlock_120(void *data); extern void m7_haval256_cpu_hash_120(int thr_id, int threads, uint32_t startNounce, uint64_t *d_outputHash, int order); extern void cuda_check_cpu_init(int thr_id, int threads); extern void cuda_check_cpu_setTarget(const void *ptarget); // m7 Hashfunktion extern "C" void m7_hash(void *state, const void *input,uint32_t TheNonce, int debug) { // sha256(sha256*sha512*keccak512*ripemd160*haval*tiger1*whirlpool) uint8_t *bdata = 0; mpz_t bns[7]; mpz_t product; for(int i=0; i < 7; i++) { mpz_init(bns[i]); } mpz_init(product); uint32_t data[32] ; uint32_t *data_p64 = data + (116 / sizeof(data[0])); uint8_t bhash[7][64]; uint32_t hash[8]; memcpy(data,input,122); int M7_MIDSTATE_LEN = 116; for(int i=0; i < 7; i++) { mpz_init(bns[i]); } sph_sha256_context ctx_final_sha256; sph_sha256_context ctx_sha256; sph_sha512_context ctx_sha512; sph_keccak512_context ctx_keccak; sph_whirlpool_context ctx_whirlpool; sph_haval256_5_context ctx_haval; sph_tiger_context ctx_tiger; sph_ripemd160_context ctx_ripemd; sph_sha256_init(&ctx_sha256); sph_sha256(&ctx_sha256, data, M7_MIDSTATE_LEN); sph_sha512_init(&ctx_sha512); sph_sha512(&ctx_sha512, data, M7_MIDSTATE_LEN); sph_keccak512_init(&ctx_keccak); sph_keccak512(&ctx_keccak, data, M7_MIDSTATE_LEN); sph_whirlpool_init(&ctx_whirlpool); sph_whirlpool(&ctx_whirlpool, data, M7_MIDSTATE_LEN); sph_haval256_5_init(&ctx_haval); sph_haval256_5(&ctx_haval, data, M7_MIDSTATE_LEN); sph_tiger_init(&ctx_tiger); sph_tiger(&ctx_tiger, data, M7_MIDSTATE_LEN); sph_ripemd160_init(&ctx_ripemd); sph_ripemd160(&ctx_ripemd, data, M7_MIDSTATE_LEN); sph_sha256_context ctx2_sha256; sph_sha512_context ctx2_sha512; sph_keccak512_context ctx2_keccak; sph_whirlpool_context ctx2_whirlpool; sph_haval256_5_context ctx2_haval; sph_tiger_context ctx2_tiger; sph_ripemd160_context ctx2_ripemd; data[29] = TheNonce; memset(bhash, 0, 7 * 64); ctx2_sha256 = ctx_sha256; sph_sha256(&ctx2_sha256, data_p64, 122 - M7_MIDSTATE_LEN); sph_sha256_close(&ctx2_sha256, (void*)(bhash[0])); ctx2_sha512 = ctx_sha512; sph_sha512(&ctx2_sha512, data_p64, 122 - M7_MIDSTATE_LEN); sph_sha512_close(&ctx2_sha512, (void*)(bhash[1])); ctx2_keccak = ctx_keccak; sph_keccak512(&ctx2_keccak, data_p64, 122 - M7_MIDSTATE_LEN); sph_keccak512_close(&ctx2_keccak, (void*)(bhash[2])); ctx2_whirlpool = ctx_whirlpool; sph_whirlpool(&ctx2_whirlpool, data_p64, 122 - M7_MIDSTATE_LEN); sph_whirlpool_close(&ctx2_whirlpool, (void*)(bhash[3])); ctx2_haval = ctx_haval; sph_haval256_5(&ctx2_haval, data_p64, 122 - M7_MIDSTATE_LEN); sph_haval256_5_close(&ctx2_haval, (void*)(bhash[4])); ctx2_tiger = ctx_tiger; sph_tiger(&ctx2_tiger, data_p64, 122 - M7_MIDSTATE_LEN); sph_tiger_close(&ctx2_tiger, (void*)(bhash[5])); ctx2_ripemd = ctx_ripemd; sph_ripemd160(&ctx2_ripemd, data_p64, 122 - M7_MIDSTATE_LEN); sph_ripemd160_close(&ctx2_ripemd, (void*)(bhash[6])); if (debug == 1) { for (int i=0; i<16; i++) { applog(LOG_INFO,"sha256[%d]=%02x %02x %02x %02x sha512[%d]=%02x %02x %02x %02x keccak[%d]=%02x %02x %02x %02x whirlpool[2][%d]=%02x %02x %02x %02x " "haval[%d]=%02x %02x %02x %02x tiger[%d]=%02x %02x %02x %02x ripemd[%d]=%02x %02x %02x %02x\n", i,bhash[0][4*i+3],bhash[0][4*i+2],bhash[0][4*i+1],bhash[0][4*i+0], i,bhash[1][4*i+3],bhash[1][4*i+2],bhash[1][4*i+1],bhash[1][4*i+0], i,bhash[2][4*i+3],bhash[2][4*i+2],bhash[2][4*i+1],bhash[2][4*i+0], i,bhash[3][4*i+3],bhash[3][4*i+2],bhash[3][4*i+1],bhash[3][4*i+0], i,bhash[4][4*i+3],bhash[4][4*i+2],bhash[4][4*i+1],bhash[4][4*i+0], i,bhash[5][4*i+3],bhash[5][4*i+2],bhash[5][4*i+1],bhash[5][4*i+0], i,bhash[6][4*i+3],bhash[6][4*i+2],bhash[6][4*i+1],bhash[6][4*i+0] ); } } for(int i=0; i < 7; i++){ set_one_if_zero(bhash[i]); mpz_set_uint512(bns[i],bhash[i]); } for(int i=6; i > 0; i--){ mpz_mul(bns[i-1], bns[i-1], bns[i]); } int bytes = mpz_sizeinbase(bns[0], 256); bdata = (uint8_t *)realloc(bdata, bytes); mpz_export((void *)bdata, NULL, -1, 1, 0, 0, bns[0]); sph_sha256_init(&ctx_final_sha256); sph_sha256(&ctx_final_sha256, bdata, bytes); sph_sha256_close(&ctx_final_sha256, (void*)(hash)); memcpy(state, hash, 32); } extern "C" int scanhash_m7(int thr_id, uint32_t *pdata, const uint32_t *ptarget, uint32_t max_nonce, unsigned long *hashes_done) { const int throughput = 256*256*8*2; const uint32_t FirstNonce = pdata[29]; static bool init[8] = {0,0,0,0,0,0,0,0}; if (opt_benchmark) ((uint32_t*)ptarget)[7] = 0x0000ff; if (!init[thr_id]) { cudaSetDevice(device_map[thr_id]); cudaMalloc(&d_prod0[thr_id], 38 *sizeof(uint64_t) * throughput); cudaMalloc(&d_prod1[thr_id], 38 *sizeof(uint64_t) * throughput); cudaMalloc(&FinalHash[thr_id], 8 *sizeof(uint64_t) * throughput); cudaMalloc(&KeccakH[thr_id], 38 *sizeof(uint64_t) * throughput); cudaMalloc(&WhirlpoolH[thr_id], 8 *sizeof(uint64_t) * throughput); cudaMalloc(&Sha256H[thr_id], 8 *sizeof(uint64_t) * throughput); cudaMalloc(&Sha512H[thr_id], 8 *sizeof(uint64_t) * throughput); cudaMalloc(&HavalH[thr_id], 8 *sizeof(uint64_t) * throughput); cudaMalloc(&RipemdH[thr_id], 8 *sizeof(uint64_t) * throughput); cudaMalloc(&TigerH[thr_id], 8 *sizeof(uint64_t) * throughput); m7_sha256_cpu_init(thr_id, throughput); m7_sha512_cpu_init(thr_id, throughput); m7_keccak512_cpu_init(thr_id, throughput); m7_haval256_cpu_init(thr_id, throughput); tiger192_cpu_init(thr_id, throughput); m7_whirlpool512_cpu_init(thr_id, throughput,0); ripemd160_cpu_init(thr_id, throughput); cuda_check_cpu_init(thr_id, throughput); m7_bigmul_init(thr_id, throughput); //mul_init(); init[thr_id] = true; } uint32_t Htarg = ptarget[7]; m7_whirlpool512_setBlock_120((void*)pdata); m7_sha256_setBlock_120((void*)pdata, ptarget); m7_sha512_setBlock_120((void*)pdata); m7_haval256_setBlock_120((void*)pdata); m7_keccak512_setBlock_120((void*)pdata); ripemd160_setBlock_120((void*)pdata); tiger192_setBlock_120((void*)pdata); cuda_check_cpu_setTarget(ptarget); do { int order = 0; uint32_t foundNonce; m7_sha256_cpu_hash_120(thr_id, throughput, pdata[29], Sha256H[thr_id], order++); m7_sha512_cpu_hash_120(thr_id, throughput, pdata[29], Sha512H[thr_id], order++); m7_keccak512_cpu_hash(thr_id, throughput, pdata[29], KeccakH[thr_id], order++); m7_haval256_cpu_hash_120(thr_id, throughput, pdata[29], HavalH[thr_id], order++); m7_tiger192_cpu_hash_120(thr_id, throughput, pdata[29], TigerH[thr_id], order++); m7_ripemd160_cpu_hash_120(thr_id, throughput, pdata[29], RipemdH[thr_id], order++); m7_whirlpool512_cpu_hash_120(thr_id, throughput, pdata[29], WhirlpoolH[thr_id], order++); cpu_mulT4(0, throughput, 8, 8, Sha512H[thr_id], KeccakH[thr_id], d_prod0[thr_id],order); //64 MyStreamSynchronize(0,order++,thr_id); cpu_mulT4(0, throughput,8, 16, WhirlpoolH[thr_id], d_prod0[thr_id], d_prod1[thr_id],order); //128 MyStreamSynchronize(0,order++,thr_id); cpu_mulT4(0, throughput, 4, 24, Sha256H[thr_id], d_prod1[thr_id], d_prod0[thr_id],order); //96 MyStreamSynchronize(0,order++,thr_id); cpu_mulT4(0, throughput, 4, 28, HavalH[thr_id], d_prod0[thr_id], d_prod1[thr_id],order); //112 MyStreamSynchronize(0,order++,thr_id); m7_bigmul_unroll1_cpu(0, throughput, TigerH[thr_id], d_prod1[thr_id], d_prod0[thr_id],order); MyStreamSynchronize(0,order++,thr_id); m7_bigmul_unroll2_cpu(0, throughput, RipemdH[thr_id], d_prod0[thr_id], d_prod1[thr_id],order); MyStreamSynchronize(0,order++,thr_id); foundNonce = m7_sha256_cpu_hash_300(thr_id, throughput, pdata[29], NULL, d_prod1[thr_id], order); if (foundNonce != 0xffffffff) { uint32_t vhash64[8]; m7_hash(vhash64, pdata, foundNonce, 0); if (vhash64[7] <= Htarg) { pdata[29] = foundNonce; *hashes_done = foundNonce - FirstNonce + 1; return 1; } else { applog(LOG_INFO, "GPU #%d: result for nonce %08x does not validate on CPU! vhash64 %08x and htarg %08x", thr_id, foundNonce, vhash64[7], Htarg); //////////////////////////////////////////// // m7_bigmul_cpu(thr_id,throughput,Sha256H[thr_id],Sha512H[thr_id],KeccakH[thr_id],WhirlpoolH[thr_id],HavalH[thr_id],TigerH[thr_id],RipemdH[thr_id],foundNonce,FirstNonce,order++); // m7_hash(vhash64, pdata, foundNonce, 1); //////////////////////////////////////////// } } pdata[29] += throughput; } while (pdata[29] < max_nonce && !work_restart[thr_id].restart); *hashes_done = pdata[29] - FirstNonce + 1; return 0; }