Browse Source

blake: add also blakecoin (8-rounds) variant

master
Tanguy Pruvot 10 years ago
parent
commit
5682b7d241
  1. 33
      blake32.cu
  2. 3
      ccminer.vcxproj
  3. 13
      cpu-miner.c
  4. 8
      miner.h
  5. 4
      sph/blake.c
  6. 5
      sph/sph_blake.h
  7. 8
      util.c

33
blake32.cu

@ -15,11 +15,17 @@ extern "C" {
/* threads per block */ /* threads per block */
#define TPB 128 #define TPB 128
extern "C" int blake256_rounds = 14;
/* hash by cpu with blake 256 */ /* hash by cpu with blake 256 */
extern "C" void blake32hash(void *output, const void *input) extern "C" void blake256hash(void *output, const void *input, int rounds = 14)
{ {
unsigned char hash[64]; unsigned char hash[64];
sph_blake256_context ctx; sph_blake256_context ctx;
/* in sph_blake.c */
blake256_rounds = rounds;
sph_blake256_init(&ctx); sph_blake256_init(&ctx);
sph_blake256(&ctx, input, 80); sph_blake256(&ctx, input, 80);
sph_blake256_close(&ctx, hash); sph_blake256_close(&ctx, hash);
@ -133,10 +139,8 @@ static const uint32_t __align__(32) c_u256[16] = {
v[b] = SPH_ROTR32(v[b] ^ v[c], 7); \ v[b] = SPH_ROTR32(v[b] ^ v[c], 7); \
} }
#define BLAKE256_ROUNDS 14
__device__ static __device__ static
void blake256_compress(uint32_t *h, const uint32_t *block, const uint32_t T0) void blake256_compress(uint32_t *h, const uint32_t *block, const uint32_t T0, int blakerounds)
{ {
uint32_t /* __align__(8) */ v[16]; uint32_t /* __align__(8) */ v[16];
uint32_t /* __align__(8) */ m[16]; uint32_t /* __align__(8) */ m[16];
@ -162,8 +166,7 @@ void blake256_compress(uint32_t *h, const uint32_t *block, const uint32_t T0)
v[14] = u256[6]; v[14] = u256[6];
v[15] = u256[7]; v[15] = u256[7];
//#pragma unroll for (int i = 0; i < blakerounds; i++) {
for (int i = 0; i < BLAKE256_ROUNDS; i++) {
/* column step */ /* column step */
GS(0, 4, 0x8, 0xC, 0); GS(0, 4, 0x8, 0xC, 0);
GS(1, 5, 0x9, 0xD, 2); GS(1, 5, 0x9, 0xD, 2);
@ -182,7 +185,7 @@ void blake256_compress(uint32_t *h, const uint32_t *block, const uint32_t T0)
} }
__global__ __global__
void blake256_gpu_hash_80(uint32_t threads, uint32_t startNounce, uint32_t *resNounce) void blake256_gpu_hash_80(uint32_t threads, uint32_t startNounce, uint32_t *resNounce, int blakerounds)
{ {
uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x); uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x);
if (thread < threads) if (thread < threads)
@ -195,7 +198,7 @@ void blake256_gpu_hash_80(uint32_t threads, uint32_t startNounce, uint32_t *resN
for(int i=0; i<8; i++) for(int i=0; i<8; i++)
h[i] = c_IV256[i]; h[i] = c_IV256[i];
blake256_compress(h, c_PaddedMessage80, 0x200); /* 512 = 0x200 */ blake256_compress(h, c_PaddedMessage80, 0x200, blakerounds); /* 512 = 0x200 */
// ------ Close: Bytes 64 to 80 ------ // ------ Close: Bytes 64 to 80 ------
@ -218,7 +221,7 @@ void blake256_gpu_hash_80(uint32_t threads, uint32_t startNounce, uint32_t *resN
msg[14] = 0; msg[14] = 0;
msg[15] = 0x280; msg[15] = 0x280;
blake256_compress(h, msg, 0x280); blake256_compress(h, msg, 0x280, blakerounds);
for (int i = 7; i >= 0; i--) { for (int i = 7; i >= 0; i--) {
uint32_t hash = cuda_swab32(h[i]); uint32_t hash = cuda_swab32(h[i]);
@ -237,7 +240,7 @@ void blake256_gpu_hash_80(uint32_t threads, uint32_t startNounce, uint32_t *resN
} }
__host__ __host__
uint32_t blake256_cpu_hash_80(int thr_id, uint32_t threads, uint32_t startNounce) uint32_t blake256_cpu_hash_80(int thr_id, uint32_t threads, uint32_t startNounce, int blakerounds)
{ {
const int threadsperblock = TPB; const int threadsperblock = TPB;
uint32_t result = MAXU; uint32_t result = MAXU;
@ -250,7 +253,7 @@ uint32_t blake256_cpu_hash_80(int thr_id, uint32_t threads, uint32_t startNounce
if (cudaMemset(d_resNounce[thr_id], 0xff, sizeof(uint32_t)) != cudaSuccess) if (cudaMemset(d_resNounce[thr_id], 0xff, sizeof(uint32_t)) != cudaSuccess)
return result; return result;
blake256_gpu_hash_80<<<grid, block, shared_size>>>(threads, startNounce, d_resNounce[thr_id]); blake256_gpu_hash_80<<<grid, block, shared_size>>>(threads, startNounce, d_resNounce[thr_id], blakerounds);
cudaDeviceSynchronize(); cudaDeviceSynchronize();
if (cudaSuccess == cudaMemcpy(h_resNounce[thr_id], d_resNounce[thr_id], sizeof(uint32_t), cudaMemcpyDeviceToHost)) { if (cudaSuccess == cudaMemcpy(h_resNounce[thr_id], d_resNounce[thr_id], sizeof(uint32_t), cudaMemcpyDeviceToHost)) {
cudaThreadSynchronize(); cudaThreadSynchronize();
@ -270,8 +273,8 @@ void blake256_cpu_setBlock_80(uint32_t *pdata, const uint32_t *ptarget)
CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_Target, ptarget, 32, 0, cudaMemcpyHostToDevice)); CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_Target, ptarget, 32, 0, cudaMemcpyHostToDevice));
} }
extern "C" int scanhash_blake32(int thr_id, uint32_t *pdata, const uint32_t *ptarget, extern "C" int scanhash_blake256(int thr_id, uint32_t *pdata, const uint32_t *ptarget,
uint32_t max_nonce, unsigned long *hashes_done) uint32_t max_nonce, unsigned long *hashes_done, uint32_t blakerounds=14)
{ {
const uint32_t first_nonce = pdata[19]; const uint32_t first_nonce = pdata[19];
static bool init[8] = { 0, 0, 0, 0, 0, 0, 0, 0 }; static bool init[8] = { 0, 0, 0, 0, 0, 0, 0, 0 };
@ -297,7 +300,7 @@ extern "C" int scanhash_blake32(int thr_id, uint32_t *pdata, const uint32_t *pta
do { do {
// GPU HASH // GPU HASH
uint32_t foundNonce = blake256_cpu_hash_80(thr_id, throughput, pdata[19]); uint32_t foundNonce = blake256_cpu_hash_80(thr_id, throughput, pdata[19], blakerounds);
if (foundNonce != 0xffffffff) if (foundNonce != 0xffffffff)
{ {
uint32_t endiandata[20]; uint32_t endiandata[20];
@ -315,7 +318,7 @@ extern "C" int scanhash_blake32(int thr_id, uint32_t *pdata, const uint32_t *pta
be32enc(&endiandata[19], foundNonce); be32enc(&endiandata[19], foundNonce);
blake32hash(vhashcpu, endiandata); blake256hash(vhashcpu, endiandata, blakerounds);
if (vhashcpu[7] <= Htarg && fulltest(vhashcpu, ptarget)) if (vhashcpu[7] <= Htarg && fulltest(vhashcpu, ptarget))
{ {

3
ccminer.vcxproj

@ -400,8 +400,9 @@ copy "$(CudaToolkitBinDir)\cudart64*.dll" "$(OutDir)"</Command>
</CudaCompile> </CudaCompile>
<CudaCompile Include="blake32.cu"> <CudaCompile Include="blake32.cu">
<MaxRegCount>64</MaxRegCount> <MaxRegCount>64</MaxRegCount>
<AdditionalOptions Condition="'$(Configuration)'=='Release'">--ptxas-options=-O2 %(AdditionalOptions)</AdditionalOptions> <AdditionalOptions Condition="'$(Configuration)'=='Release'">--ptxas-options="-O2 -dlcm=cg" %(AdditionalOptions)</AdditionalOptions>
<AdditionalOptions Condition="'$(Configuration)'=='Debug'">%(AdditionalOptions)</AdditionalOptions> <AdditionalOptions Condition="'$(Configuration)'=='Debug'">%(AdditionalOptions)</AdditionalOptions>
<FastMath>true</FastMath>
</CudaCompile> </CudaCompile>
<CudaCompile Include="quark\animecoin.cu"> <CudaCompile Include="quark\animecoin.cu">
<AdditionalOptions Condition="'$(Configuration)'=='Release'">--ptxas-options=-O2 %(AdditionalOptions)</AdditionalOptions> <AdditionalOptions Condition="'$(Configuration)'=='Release'">--ptxas-options=-O2 %(AdditionalOptions)</AdditionalOptions>

13
cpu-miner.c

@ -128,6 +128,7 @@ struct workio_cmd {
typedef enum { typedef enum {
ALGO_ANIME, ALGO_ANIME,
ALGO_BLAKE, ALGO_BLAKE,
ALGO_BLAKECOIN,
ALGO_FRESH, ALGO_FRESH,
ALGO_FUGUE256, /* Fugue256 */ ALGO_FUGUE256, /* Fugue256 */
ALGO_GROESTL, ALGO_GROESTL,
@ -149,6 +150,7 @@ typedef enum {
static const char *algo_names[] = { static const char *algo_names[] = {
"anime", "anime",
"blake", "blake",
"blakecoin",
"fresh", "fresh",
"fugue256", "fugue256",
"groestl", "groestl",
@ -231,6 +233,7 @@ Options:\n\
-a, --algo=ALGO specify the algorithm to use\n\ -a, --algo=ALGO specify the algorithm to use\n\
anime Animecoin hash\n\ anime Animecoin hash\n\
blake Blake 256 (like NEOS blake)\n\ blake Blake 256 (like NEOS blake)\n\
blakecoin Old Blake 256 (8 rounds)\n\
fresh Freshcoin hash (shavite 80)\n\ fresh Freshcoin hash (shavite 80)\n\
fugue256 Fuguecoin hash\n\ fugue256 Fuguecoin hash\n\
groestl Groestlcoin hash\n\ groestl Groestlcoin hash\n\
@ -961,6 +964,7 @@ static void *miner_thread(void *userdata)
case ALGO_JACKPOT: case ALGO_JACKPOT:
max64 = 0x1fffLL; max64 = 0x1fffLL;
break; break;
case ALGO_BLAKECOIN:
case ALGO_BLAKE: case ALGO_BLAKE:
/* based on the 750Ti hashrate (100kH) */ /* based on the 750Ti hashrate (100kH) */
max64 = 0x3ffffffLL; max64 = 0x3ffffffLL;
@ -1065,9 +1069,14 @@ static void *miner_thread(void *userdata)
max_nonce, &hashes_done); max_nonce, &hashes_done);
break; break;
case ALGO_BLAKECOIN:
rc = scanhash_blake256(thr_id, work.data, work.target,
max_nonce, &hashes_done, 8);
break;
case ALGO_BLAKE: case ALGO_BLAKE:
rc = scanhash_blake32(thr_id, work.data, work.target, rc = scanhash_blake256(thr_id, work.data, work.target,
max_nonce, &hashes_done); max_nonce, &hashes_done, 14);
break; break;
case ALGO_FRESH: case ALGO_FRESH:

8
miner.h

@ -237,11 +237,11 @@ extern int scanhash_anime(int thr_id, uint32_t *pdata,
const uint32_t *ptarget, uint32_t max_nonce, const uint32_t *ptarget, uint32_t max_nonce,
unsigned long *hashes_done); unsigned long *hashes_done);
extern int scanhash_fresh(int thr_id, uint32_t *pdata, extern int scanhash_blake256(int thr_id, uint32_t *pdata,
const uint32_t *ptarget, uint32_t max_nonce, const uint32_t *ptarget, uint32_t max_nonce,
unsigned long *hashes_done); unsigned long *hashes_done, uint32_t blakerounds);
extern int scanhash_blake32(int thr_id, uint32_t *pdata, extern int scanhash_fresh(int thr_id, uint32_t *pdata,
const uint32_t *ptarget, uint32_t max_nonce, const uint32_t *ptarget, uint32_t max_nonce,
unsigned long *hashes_done); unsigned long *hashes_done);
@ -420,7 +420,7 @@ void applog_hash(unsigned char *hash);
void print_hash_tests(void); void print_hash_tests(void);
void animehash(void *state, const void *input); void animehash(void *state, const void *input);
void blake32hash(void *output, const void *input); void blake256hash(void *output, const void *input, int rounds);
void fresh_hash(void *state, const void *input); void fresh_hash(void *state, const void *input);
void fugue256_hash(unsigned char* output, const unsigned char* input, int len); void fugue256_hash(unsigned char* output, const unsigned char* input, int len);
void heavycoin_hash(unsigned char* output, const unsigned char* input, int len); void heavycoin_hash(unsigned char* output, const unsigned char* input, int len);

4
sph/blake.c

@ -548,7 +548,7 @@ static const sph_u64 CB[16] = {
M[0xD] = sph_dec32be_aligned(buf + 52); \ M[0xD] = sph_dec32be_aligned(buf + 52); \
M[0xE] = sph_dec32be_aligned(buf + 56); \ M[0xE] = sph_dec32be_aligned(buf + 56); \
M[0xF] = sph_dec32be_aligned(buf + 60); \ M[0xF] = sph_dec32be_aligned(buf + 60); \
for (r = 0; r < 14; r ++) \ for (r = 0; r < blake256_rounds; r ++) \
ROUND_S(r); \ ROUND_S(r); \
H0 ^= S0 ^ V0 ^ V8; \ H0 ^= S0 ^ V0 ^ V8; \
H1 ^= S1 ^ V1 ^ V9; \ H1 ^= S1 ^ V1 ^ V9; \
@ -592,6 +592,7 @@ static const sph_u64 CB[16] = {
M6 = sph_dec32be_aligned(buf + 24); \ M6 = sph_dec32be_aligned(buf + 24); \
M7 = sph_dec32be_aligned(buf + 28); \ M7 = sph_dec32be_aligned(buf + 28); \
M8 = sph_dec32be_aligned(buf + 32); \ M8 = sph_dec32be_aligned(buf + 32); \
if (blake256_rounds == 14) { \
M9 = sph_dec32be_aligned(buf + 36); \ M9 = sph_dec32be_aligned(buf + 36); \
MA = sph_dec32be_aligned(buf + 40); \ MA = sph_dec32be_aligned(buf + 40); \
MB = sph_dec32be_aligned(buf + 44); \ MB = sph_dec32be_aligned(buf + 44); \
@ -599,6 +600,7 @@ static const sph_u64 CB[16] = {
MD = sph_dec32be_aligned(buf + 52); \ MD = sph_dec32be_aligned(buf + 52); \
ME = sph_dec32be_aligned(buf + 56); \ ME = sph_dec32be_aligned(buf + 56); \
MF = sph_dec32be_aligned(buf + 60); \ MF = sph_dec32be_aligned(buf + 60); \
} \
ROUND_S(0); \ ROUND_S(0); \
ROUND_S(1); \ ROUND_S(1); \
ROUND_S(2); \ ROUND_S(2); \

5
sph/sph_blake.h

@ -181,6 +181,11 @@ void sph_blake224_close(void *cc, void *dst);
void sph_blake224_addbits_and_close( void sph_blake224_addbits_and_close(
void *cc, unsigned ub, unsigned n, void *dst); void *cc, unsigned ub, unsigned n, void *dst);
/**
* Switch for the number of rounds (old blake was 8)
*/
extern int blake256_rounds;
/** /**
* Initialize a BLAKE-256 context. This process performs no memory allocation. * Initialize a BLAKE-256 context. This process performs no memory allocation.
* *

8
util.c

@ -1042,7 +1042,7 @@ static bool stratum_notify(struct stratum_ctx *sctx, json_t *params)
/* store stratum server time diff */ /* store stratum server time diff */
hex2bin((unsigned char *)&ntime, stime, 4); hex2bin((unsigned char *)&ntime, stime, 4);
ntime = swab32(ntime) - time(0); ntime = swab32(ntime) - (uint32_t) time(0);
if (ntime > sctx->srvtime_diff) { if (ntime > sctx->srvtime_diff) {
sctx->srvtime_diff = ntime; sctx->srvtime_diff = ntime;
if (!opt_quiet) if (!opt_quiet)
@ -1420,7 +1420,11 @@ void print_hash_tests(void)
printpfx("anime", hash); printpfx("anime", hash);
memset(hash, 0, sizeof hash); memset(hash, 0, sizeof hash);
blake32hash(&hash[0], &buf[0]); blake256hash(&hash[0], &buf[0], 8);
printpfx("blakecoin", hash);
memset(hash, 0, sizeof hash);
blake256hash(&hash[0], &buf[0], 14);
printpfx("blake", hash); printpfx("blake", hash);
memset(hash, 0, sizeof hash); memset(hash, 0, sizeof hash);

Loading…
Cancel
Save