Browse Source

blake: add also blakecoin (8-rounds) variant

master
Tanguy Pruvot 10 years ago
parent
commit
5682b7d241
  1. 33
      blake32.cu
  2. 5
      ccminer.vcxproj
  3. 13
      cpu-miner.c
  4. 8
      miner.h
  5. 4
      sph/blake.c
  6. 5
      sph/sph_blake.h
  7. 8
      util.c

33
blake32.cu

@ -15,11 +15,17 @@ extern "C" { @@ -15,11 +15,17 @@ extern "C" {
/* threads per block */
#define TPB 128
extern "C" int blake256_rounds = 14;
/* hash by cpu with blake 256 */
extern "C" void blake32hash(void *output, const void *input)
extern "C" void blake256hash(void *output, const void *input, int rounds = 14)
{
unsigned char hash[64];
sph_blake256_context ctx;
/* in sph_blake.c */
blake256_rounds = rounds;
sph_blake256_init(&ctx);
sph_blake256(&ctx, input, 80);
sph_blake256_close(&ctx, hash);
@ -133,10 +139,8 @@ static const uint32_t __align__(32) c_u256[16] = { @@ -133,10 +139,8 @@ static const uint32_t __align__(32) c_u256[16] = {
v[b] = SPH_ROTR32(v[b] ^ v[c], 7); \
}
#define BLAKE256_ROUNDS 14
__device__ static
void blake256_compress(uint32_t *h, const uint32_t *block, const uint32_t T0)
void blake256_compress(uint32_t *h, const uint32_t *block, const uint32_t T0, int blakerounds)
{
uint32_t /* __align__(8) */ v[16];
uint32_t /* __align__(8) */ m[16];
@ -162,8 +166,7 @@ void blake256_compress(uint32_t *h, const uint32_t *block, const uint32_t T0) @@ -162,8 +166,7 @@ void blake256_compress(uint32_t *h, const uint32_t *block, const uint32_t T0)
v[14] = u256[6];
v[15] = u256[7];
//#pragma unroll
for (int i = 0; i < BLAKE256_ROUNDS; i++) {
for (int i = 0; i < blakerounds; i++) {
/* column step */
GS(0, 4, 0x8, 0xC, 0);
GS(1, 5, 0x9, 0xD, 2);
@ -182,7 +185,7 @@ void blake256_compress(uint32_t *h, const uint32_t *block, const uint32_t T0) @@ -182,7 +185,7 @@ void blake256_compress(uint32_t *h, const uint32_t *block, const uint32_t T0)
}
__global__
void blake256_gpu_hash_80(uint32_t threads, uint32_t startNounce, uint32_t *resNounce)
void blake256_gpu_hash_80(uint32_t threads, uint32_t startNounce, uint32_t *resNounce, int blakerounds)
{
uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x);
if (thread < threads)
@ -195,7 +198,7 @@ void blake256_gpu_hash_80(uint32_t threads, uint32_t startNounce, uint32_t *resN @@ -195,7 +198,7 @@ void blake256_gpu_hash_80(uint32_t threads, uint32_t startNounce, uint32_t *resN
for(int i=0; i<8; i++)
h[i] = c_IV256[i];
blake256_compress(h, c_PaddedMessage80, 0x200); /* 512 = 0x200 */
blake256_compress(h, c_PaddedMessage80, 0x200, blakerounds); /* 512 = 0x200 */
// ------ Close: Bytes 64 to 80 ------
@ -218,7 +221,7 @@ void blake256_gpu_hash_80(uint32_t threads, uint32_t startNounce, uint32_t *resN @@ -218,7 +221,7 @@ void blake256_gpu_hash_80(uint32_t threads, uint32_t startNounce, uint32_t *resN
msg[14] = 0;
msg[15] = 0x280;
blake256_compress(h, msg, 0x280);
blake256_compress(h, msg, 0x280, blakerounds);
for (int i = 7; i >= 0; i--) {
uint32_t hash = cuda_swab32(h[i]);
@ -237,7 +240,7 @@ void blake256_gpu_hash_80(uint32_t threads, uint32_t startNounce, uint32_t *resN @@ -237,7 +240,7 @@ void blake256_gpu_hash_80(uint32_t threads, uint32_t startNounce, uint32_t *resN
}
__host__
uint32_t blake256_cpu_hash_80(int thr_id, uint32_t threads, uint32_t startNounce)
uint32_t blake256_cpu_hash_80(int thr_id, uint32_t threads, uint32_t startNounce, int blakerounds)
{
const int threadsperblock = TPB;
uint32_t result = MAXU;
@ -250,7 +253,7 @@ uint32_t blake256_cpu_hash_80(int thr_id, uint32_t threads, uint32_t startNounce @@ -250,7 +253,7 @@ uint32_t blake256_cpu_hash_80(int thr_id, uint32_t threads, uint32_t startNounce
if (cudaMemset(d_resNounce[thr_id], 0xff, sizeof(uint32_t)) != cudaSuccess)
return result;
blake256_gpu_hash_80<<<grid, block, shared_size>>>(threads, startNounce, d_resNounce[thr_id]);
blake256_gpu_hash_80<<<grid, block, shared_size>>>(threads, startNounce, d_resNounce[thr_id], blakerounds);
cudaDeviceSynchronize();
if (cudaSuccess == cudaMemcpy(h_resNounce[thr_id], d_resNounce[thr_id], sizeof(uint32_t), cudaMemcpyDeviceToHost)) {
cudaThreadSynchronize();
@ -270,8 +273,8 @@ void blake256_cpu_setBlock_80(uint32_t *pdata, const uint32_t *ptarget) @@ -270,8 +273,8 @@ void blake256_cpu_setBlock_80(uint32_t *pdata, const uint32_t *ptarget)
CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_Target, ptarget, 32, 0, cudaMemcpyHostToDevice));
}
extern "C" int scanhash_blake32(int thr_id, uint32_t *pdata, const uint32_t *ptarget,
uint32_t max_nonce, unsigned long *hashes_done)
extern "C" int scanhash_blake256(int thr_id, uint32_t *pdata, const uint32_t *ptarget,
uint32_t max_nonce, unsigned long *hashes_done, uint32_t blakerounds=14)
{
const uint32_t first_nonce = pdata[19];
static bool init[8] = { 0, 0, 0, 0, 0, 0, 0, 0 };
@ -297,7 +300,7 @@ extern "C" int scanhash_blake32(int thr_id, uint32_t *pdata, const uint32_t *pta @@ -297,7 +300,7 @@ extern "C" int scanhash_blake32(int thr_id, uint32_t *pdata, const uint32_t *pta
do {
// GPU HASH
uint32_t foundNonce = blake256_cpu_hash_80(thr_id, throughput, pdata[19]);
uint32_t foundNonce = blake256_cpu_hash_80(thr_id, throughput, pdata[19], blakerounds);
if (foundNonce != 0xffffffff)
{
uint32_t endiandata[20];
@ -315,7 +318,7 @@ extern "C" int scanhash_blake32(int thr_id, uint32_t *pdata, const uint32_t *pta @@ -315,7 +318,7 @@ extern "C" int scanhash_blake32(int thr_id, uint32_t *pdata, const uint32_t *pta
be32enc(&endiandata[19], foundNonce);
blake32hash(vhashcpu, endiandata);
blake256hash(vhashcpu, endiandata, blakerounds);
if (vhashcpu[7] <= Htarg && fulltest(vhashcpu, ptarget))
{

5
ccminer.vcxproj

@ -400,8 +400,9 @@ copy "$(CudaToolkitBinDir)\cudart64*.dll" "$(OutDir)"</Command> @@ -400,8 +400,9 @@ copy "$(CudaToolkitBinDir)\cudart64*.dll" "$(OutDir)"</Command>
</CudaCompile>
<CudaCompile Include="blake32.cu">
<MaxRegCount>64</MaxRegCount>
<AdditionalOptions Condition="'$(Configuration)'=='Release'">--ptxas-options=-O2 %(AdditionalOptions)</AdditionalOptions>
<AdditionalOptions Condition="'$(Configuration)'=='Release'">--ptxas-options="-O2 -dlcm=cg" %(AdditionalOptions)</AdditionalOptions>
<AdditionalOptions Condition="'$(Configuration)'=='Debug'">%(AdditionalOptions)</AdditionalOptions>
<FastMath>true</FastMath>
</CudaCompile>
<CudaCompile Include="quark\animecoin.cu">
<AdditionalOptions Condition="'$(Configuration)'=='Release'">--ptxas-options=-O2 %(AdditionalOptions)</AdditionalOptions>
@ -562,4 +563,4 @@ copy "$(CudaToolkitBinDir)\cudart64*.dll" "$(OutDir)"</Command> @@ -562,4 +563,4 @@ copy "$(CudaToolkitBinDir)\cudart64*.dll" "$(OutDir)"</Command>
<ImportGroup Label="ExtensionTargets">
<Import Project="$(VCTargetsPath)\BuildCustomizations\CUDA 6.5.targets" />
</ImportGroup>
</Project>
</Project>

13
cpu-miner.c

@ -128,6 +128,7 @@ struct workio_cmd { @@ -128,6 +128,7 @@ struct workio_cmd {
typedef enum {
ALGO_ANIME,
ALGO_BLAKE,
ALGO_BLAKECOIN,
ALGO_FRESH,
ALGO_FUGUE256, /* Fugue256 */
ALGO_GROESTL,
@ -149,6 +150,7 @@ typedef enum { @@ -149,6 +150,7 @@ typedef enum {
static const char *algo_names[] = {
"anime",
"blake",
"blakecoin",
"fresh",
"fugue256",
"groestl",
@ -231,6 +233,7 @@ Options:\n\ @@ -231,6 +233,7 @@ Options:\n\
-a, --algo=ALGO specify the algorithm to use\n\
anime Animecoin hash\n\
blake Blake 256 (like NEOS blake)\n\
blakecoin Old Blake 256 (8 rounds)\n\
fresh Freshcoin hash (shavite 80)\n\
fugue256 Fuguecoin hash\n\
groestl Groestlcoin hash\n\
@ -961,6 +964,7 @@ static void *miner_thread(void *userdata) @@ -961,6 +964,7 @@ static void *miner_thread(void *userdata)
case ALGO_JACKPOT:
max64 = 0x1fffLL;
break;
case ALGO_BLAKECOIN:
case ALGO_BLAKE:
/* based on the 750Ti hashrate (100kH) */
max64 = 0x3ffffffLL;
@ -1065,9 +1069,14 @@ static void *miner_thread(void *userdata) @@ -1065,9 +1069,14 @@ static void *miner_thread(void *userdata)
max_nonce, &hashes_done);
break;
case ALGO_BLAKECOIN:
rc = scanhash_blake256(thr_id, work.data, work.target,
max_nonce, &hashes_done, 8);
break;
case ALGO_BLAKE:
rc = scanhash_blake32(thr_id, work.data, work.target,
max_nonce, &hashes_done);
rc = scanhash_blake256(thr_id, work.data, work.target,
max_nonce, &hashes_done, 14);
break;
case ALGO_FRESH:

8
miner.h

@ -237,11 +237,11 @@ extern int scanhash_anime(int thr_id, uint32_t *pdata, @@ -237,11 +237,11 @@ extern int scanhash_anime(int thr_id, uint32_t *pdata,
const uint32_t *ptarget, uint32_t max_nonce,
unsigned long *hashes_done);
extern int scanhash_fresh(int thr_id, uint32_t *pdata,
extern int scanhash_blake256(int thr_id, uint32_t *pdata,
const uint32_t *ptarget, uint32_t max_nonce,
unsigned long *hashes_done);
unsigned long *hashes_done, uint32_t blakerounds);
extern int scanhash_blake32(int thr_id, uint32_t *pdata,
extern int scanhash_fresh(int thr_id, uint32_t *pdata,
const uint32_t *ptarget, uint32_t max_nonce,
unsigned long *hashes_done);
@ -420,7 +420,7 @@ void applog_hash(unsigned char *hash); @@ -420,7 +420,7 @@ void applog_hash(unsigned char *hash);
void print_hash_tests(void);
void animehash(void *state, const void *input);
void blake32hash(void *output, const void *input);
void blake256hash(void *output, const void *input, int rounds);
void fresh_hash(void *state, const void *input);
void fugue256_hash(unsigned char* output, const unsigned char* input, int len);
void heavycoin_hash(unsigned char* output, const unsigned char* input, int len);

4
sph/blake.c

@ -548,7 +548,7 @@ static const sph_u64 CB[16] = { @@ -548,7 +548,7 @@ static const sph_u64 CB[16] = {
M[0xD] = sph_dec32be_aligned(buf + 52); \
M[0xE] = sph_dec32be_aligned(buf + 56); \
M[0xF] = sph_dec32be_aligned(buf + 60); \
for (r = 0; r < 14; r ++) \
for (r = 0; r < blake256_rounds; r ++) \
ROUND_S(r); \
H0 ^= S0 ^ V0 ^ V8; \
H1 ^= S1 ^ V1 ^ V9; \
@ -592,6 +592,7 @@ static const sph_u64 CB[16] = { @@ -592,6 +592,7 @@ static const sph_u64 CB[16] = {
M6 = sph_dec32be_aligned(buf + 24); \
M7 = sph_dec32be_aligned(buf + 28); \
M8 = sph_dec32be_aligned(buf + 32); \
if (blake256_rounds == 14) { \
M9 = sph_dec32be_aligned(buf + 36); \
MA = sph_dec32be_aligned(buf + 40); \
MB = sph_dec32be_aligned(buf + 44); \
@ -599,6 +600,7 @@ static const sph_u64 CB[16] = { @@ -599,6 +600,7 @@ static const sph_u64 CB[16] = {
MD = sph_dec32be_aligned(buf + 52); \
ME = sph_dec32be_aligned(buf + 56); \
MF = sph_dec32be_aligned(buf + 60); \
} \
ROUND_S(0); \
ROUND_S(1); \
ROUND_S(2); \

5
sph/sph_blake.h

@ -181,6 +181,11 @@ void sph_blake224_close(void *cc, void *dst); @@ -181,6 +181,11 @@ void sph_blake224_close(void *cc, void *dst);
void sph_blake224_addbits_and_close(
void *cc, unsigned ub, unsigned n, void *dst);
/**
* Switch for the number of rounds (old blake was 8)
*/
extern int blake256_rounds;
/**
* Initialize a BLAKE-256 context. This process performs no memory allocation.
*

8
util.c

@ -1042,7 +1042,7 @@ static bool stratum_notify(struct stratum_ctx *sctx, json_t *params) @@ -1042,7 +1042,7 @@ static bool stratum_notify(struct stratum_ctx *sctx, json_t *params)
/* store stratum server time diff */
hex2bin((unsigned char *)&ntime, stime, 4);
ntime = swab32(ntime) - time(0);
ntime = swab32(ntime) - (uint32_t) time(0);
if (ntime > sctx->srvtime_diff) {
sctx->srvtime_diff = ntime;
if (!opt_quiet)
@ -1420,7 +1420,11 @@ void print_hash_tests(void) @@ -1420,7 +1420,11 @@ void print_hash_tests(void)
printpfx("anime", hash);
memset(hash, 0, sizeof hash);
blake32hash(&hash[0], &buf[0]);
blake256hash(&hash[0], &buf[0], 8);
printpfx("blakecoin", hash);
memset(hash, 0, sizeof hash);
blake256hash(&hash[0], &buf[0], 14);
printpfx("blake", hash);
memset(hash, 0, sizeof hash);

Loading…
Cancel
Save