whirlpool: remove dead code, win 2ms in final hash
This commit is contained in:
parent
52dade56b5
commit
10314d844f
@ -40,7 +40,7 @@ ccminer_SOURCES = elist.h miner.h compat.h \
|
||||
x11/x11.cu x11/fresh.cu x11/cuda_x11_luffa512.cu x11/cuda_x11_cubehash512.cu \
|
||||
x11/cuda_x11_shavite512.cu x11/cuda_x11_simd512.cu x11/cuda_x11_echo.cu \
|
||||
x13/x13.cu x13/cuda_x13_hamsi512.cu x13/cuda_x13_fugue512.cu \
|
||||
x15/x14.cu x15/x15.cu x15/cuda_x14_shabal512.cu x15/cuda_x15_whirlpool.cu x15/whirlcoin.cu \
|
||||
x15/x14.cu x15/x15.cu x15/cuda_x14_shabal512.cu x15/cuda_x15_whirlpool.cu x15/whirlpool.cu \
|
||||
x17/x17.cu x17/cuda_x17_haval512.cu x17/cuda_x17_sha512.cu
|
||||
|
||||
ccminer_LDFLAGS = $(PTHREAD_FLAGS) @CUDA_LDFLAGS@
|
||||
|
@ -523,7 +523,7 @@ copy "$(CudaToolkitBinDir)\cudart64*.dll" "$(OutDir)"</Command>
|
||||
<TargetMachinePlatform Condition="'$(Platform)'=='x64'">64</TargetMachinePlatform>
|
||||
<MaxRegCount>80</MaxRegCount>
|
||||
</CudaCompile>
|
||||
<CudaCompile Include="x15\whirlcoin.cu">
|
||||
<CudaCompile Include="x15\whirlpool.cu">
|
||||
<AdditionalOptions Condition="'$(Configuration)'=='Release'">--ptxas-options=-O2 %(AdditionalOptions)</AdditionalOptions>
|
||||
<AdditionalOptions Condition="'$(Configuration)'=='Debug'">%(AdditionalOptions)</AdditionalOptions>
|
||||
<TargetMachinePlatform Condition="'$(Platform)'=='x64'">64</TargetMachinePlatform>
|
||||
@ -548,4 +548,4 @@ copy "$(CudaToolkitBinDir)\cudart64*.dll" "$(OutDir)"</Command>
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(VCTargetsPath)\BuildCustomizations\CUDA 6.5.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
</Project>
|
||||
|
@ -298,6 +298,12 @@
|
||||
<CudaCompile Include="cuda_groestlcoin.cu">
|
||||
<Filter>Source Files\CUDA</Filter>
|
||||
</CudaCompile>
|
||||
<CudaCompile Include="groestl_functions_quad.cu">
|
||||
<Filter>Source Files\CUDA</Filter>
|
||||
</CudaCompile>
|
||||
<CudaCompile Include="bitslice_transformations_quad.cu">
|
||||
<Filter>Source Files\CUDA</Filter>
|
||||
</CudaCompile>
|
||||
<CudaCompile Include="JHA\cuda_jha_keccak512.cu">
|
||||
<Filter>Source Files\CUDA\JHA</Filter>
|
||||
</CudaCompile>
|
||||
@ -406,21 +412,15 @@
|
||||
<CudaCompile Include="x15\cuda_x14_shabal512.cu">
|
||||
<Filter>Source Files\CUDA\x15</Filter>
|
||||
</CudaCompile>
|
||||
<CudaCompile Include="x15\whirlpool.cu">
|
||||
<Filter>Source Files\CUDA\x15</Filter>
|
||||
</CudaCompile>
|
||||
<CudaCompile Include="x15\x15.cu">
|
||||
<Filter>Source Files\CUDA\x15</Filter>
|
||||
</CudaCompile>
|
||||
<CudaCompile Include="x15\cuda_x15_whirlpool.cu">
|
||||
<Filter>Source Files\CUDA\x15</Filter>
|
||||
</CudaCompile>
|
||||
<CudaCompile Include="groestl_functions_quad.cu">
|
||||
<Filter>Source Files\CUDA</Filter>
|
||||
</CudaCompile>
|
||||
<CudaCompile Include="bitslice_transformations_quad.cu">
|
||||
<Filter>Source Files\CUDA</Filter>
|
||||
</CudaCompile>
|
||||
<CudaCompile Include="x15\whirlcoin.cu">
|
||||
<Filter>Source Files\CUDA\x15</Filter>
|
||||
</CudaCompile>
|
||||
<CudaCompile Include="x17\cuda_x17_haval512.cu">
|
||||
<Filter>Source Files\CUDA\x17</Filter>
|
||||
</CudaCompile>
|
||||
@ -431,4 +431,4 @@
|
||||
<Filter>Source Files\CUDA\x17</Filter>
|
||||
</CudaCompile>
|
||||
</ItemGroup>
|
||||
</Project>
|
||||
</Project>
|
||||
|
@ -2214,7 +2214,7 @@ static uint64_t table_skew(uint64_t val, int num) {
|
||||
}
|
||||
|
||||
__device__ __forceinline__
|
||||
static uint64_t ROUND_ELT(const uint64_t* __restrict__ sharedMemory, uint64_t in[8],
|
||||
static uint64_t ROUND_ELT(const uint64_t* sharedMemory, uint64_t* __restrict__ in,
|
||||
int i0,int i1,int i2,int i3,int i4,int i5,int i6,int i7)
|
||||
{
|
||||
uint32_t idx0, idx1, idx2, idx3, idx4, idx5, idx6, idx7;
|
||||
@ -2242,7 +2242,7 @@ static uint64_t ROUND_ELT(const uint64_t* __restrict__ sharedMemory, uint64_t in
|
||||
#else
|
||||
|
||||
__device__ __forceinline__
|
||||
static uint64_t ROUND_ELT(const uint64_t* __restrict__ sharedMemory, uint64_t in[8],
|
||||
static uint64_t ROUND_ELT(const uint64_t* sharedMemory, uint64_t* __restrict__ in,
|
||||
int i0,int i1,int i2,int i3,int i4,int i5,int i6,int i7)
|
||||
{
|
||||
uint32_t idx0, idx1, idx2, idx3, idx4, idx5, idx6, idx7;
|
||||
@ -2317,7 +2317,6 @@ void oldwhirlpool_gpu_hash_80(int threads, uint32_t startNounce, void *outputHas
|
||||
for (int i=0; i<8; i++) {
|
||||
n[i] = c_PaddedMessage80[i]; // read data
|
||||
h[i] = 0; // read state
|
||||
//n[i] = xor1(n[i], h[i]);
|
||||
}
|
||||
|
||||
#pragma unroll 10
|
||||
@ -2330,7 +2329,6 @@ void oldwhirlpool_gpu_hash_80(int threads, uint32_t startNounce, void *outputHas
|
||||
#pragma unroll 8
|
||||
for (int i=0; i < 8; i++) {
|
||||
state[i] = xor1(n[i],c_PaddedMessage80[i]);
|
||||
n[i]=0;
|
||||
}
|
||||
|
||||
/// round 2 ///////
|
||||
@ -2338,10 +2336,14 @@ void oldwhirlpool_gpu_hash_80(int threads, uint32_t startNounce, void *outputHas
|
||||
n[0] = c_PaddedMessage80[8]; //read data
|
||||
n[1] = REPLACE_HIWORD(c_PaddedMessage80[9], cuda_swab32(nounce)); //whirlpool
|
||||
n[2] = 0x0000000000000080; //whirlpool
|
||||
n[3] = 0;
|
||||
n[4] = 0;
|
||||
n[5] = 0;
|
||||
n[6] = 0;
|
||||
n[7] = 0x8002000000000000;
|
||||
|
||||
#pragma unroll 8
|
||||
for (int i=0;i<8;i++) {
|
||||
for (int i=0; i<8; i++) {
|
||||
h[i] = state[i]; //read state
|
||||
n[i] = xor1(n[i],h[i]);
|
||||
}
|
||||
@ -2396,15 +2398,14 @@ void x15_whirlpool_gpu_hash_64(int threads, uint32_t startNounce, uint64_t *g_ha
|
||||
int thread = (blockDim.x * blockIdx.x + threadIdx.x);
|
||||
if (thread < threads)
|
||||
{
|
||||
uint32_t nounce = (g_nonceVector != NULL) ? g_nonceVector[thread] : (startNounce + thread);
|
||||
uint32_t nounce = g_nonceVector ? g_nonceVector[thread] : (startNounce + thread);
|
||||
uint32_t hashPosition = (nounce - startNounce) << 3;
|
||||
uint64_t hash[8], state[8], n[8], h[8] = { 0 };
|
||||
uint8_t i;
|
||||
|
||||
#pragma unroll 8
|
||||
for (i=0; i<8; i++) {
|
||||
for (i=0; i<8; i++)
|
||||
n[i] = hash[i] = g_hash[hashPosition + i];
|
||||
}
|
||||
|
||||
#pragma unroll 10
|
||||
for (i=0; i < 10; i++) {
|
||||
@ -2414,10 +2415,12 @@ void x15_whirlpool_gpu_hash_64(int threads, uint32_t startNounce, uint64_t *g_ha
|
||||
}
|
||||
|
||||
#pragma unroll 8
|
||||
for (i=0; i<8; i++) {
|
||||
for (i=0; i<8; i++)
|
||||
state[i] = xor1(n[i], hash[i]);
|
||||
|
||||
#pragma unroll 6
|
||||
for (i=1; i<7; i++)
|
||||
n[i]=0;
|
||||
}
|
||||
|
||||
n[0] = 0x80;
|
||||
n[7] = 0x2000000000000;
|
||||
@ -2472,18 +2475,15 @@ void oldwhirlpool_gpu_finalhash_64(int threads, uint32_t startNounce, uint64_t *
|
||||
int thread = (blockDim.x * blockIdx.x + threadIdx.x);
|
||||
if (thread < threads)
|
||||
{
|
||||
uint32_t nounce = (g_nonceVector != NULL) ? g_nonceVector[thread] : (startNounce + thread);
|
||||
uint32_t nounce = g_nonceVector ? g_nonceVector[thread] : (startNounce + thread);
|
||||
|
||||
int hashPosition = nounce - startNounce;
|
||||
uint32_t *inpHash = (uint32_t*)&g_hash[8 * hashPosition];
|
||||
union {
|
||||
uint32_t h4[16];
|
||||
uint64_t h8[8];
|
||||
} hash;
|
||||
uint64_t *inpHash = (uint64_t*) &g_hash[8 * hashPosition];
|
||||
uint64_t h8[8];
|
||||
|
||||
#pragma unroll 16
|
||||
for (int i=0; i<16; i++) {
|
||||
hash.h4[i]= inpHash[i];
|
||||
#pragma unroll 8
|
||||
for (int i=0; i<8; i++) {
|
||||
h8[i] = inpHash[i];
|
||||
}
|
||||
|
||||
uint64_t state[8];
|
||||
@ -2492,9 +2492,8 @@ void oldwhirlpool_gpu_finalhash_64(int threads, uint32_t startNounce, uint64_t *
|
||||
|
||||
#pragma unroll 8
|
||||
for (int i=0; i<8; i++) {
|
||||
n[i] = hash.h8[i];
|
||||
n[i] = h8[i];
|
||||
h[i] = 0;
|
||||
n[i] = xor1(n[i], h[i]);
|
||||
}
|
||||
|
||||
#pragma unroll 10
|
||||
@ -2506,7 +2505,7 @@ void oldwhirlpool_gpu_finalhash_64(int threads, uint32_t startNounce, uint64_t *
|
||||
|
||||
#pragma unroll 8
|
||||
for (int i=0; i<8; i++) {
|
||||
state[i] = xor1(n[i], hash.h8[i]);
|
||||
state[i] = xor1(n[i], h8[i]);
|
||||
n[i]=0;
|
||||
}
|
||||
|
||||
@ -2516,7 +2515,7 @@ void oldwhirlpool_gpu_finalhash_64(int threads, uint32_t startNounce, uint64_t *
|
||||
#pragma unroll 8
|
||||
for (int i=0; i<8; i++) {
|
||||
h[i] = state[i];
|
||||
n[i] = xor1(n[i],h[i]);
|
||||
n[i] = xor1(n[i], h[i]);
|
||||
}
|
||||
|
||||
#pragma unroll 10
|
||||
@ -2535,22 +2534,7 @@ void oldwhirlpool_gpu_finalhash_64(int threads, uint32_t startNounce, uint64_t *
|
||||
state[6] = xor1(state[6], n[6]);
|
||||
state[7] = xor3(state[7], n[7], 0x2000000000000);
|
||||
|
||||
#pragma unroll 8
|
||||
for (unsigned i = 0; i < 8; i++)
|
||||
hash.h8[i] = state[i];
|
||||
|
||||
bool rc = true;
|
||||
for (int i = 7; i >= 0; i--) {
|
||||
if (hash.h4[i] > pTarget[i]) {
|
||||
rc = false;
|
||||
break;
|
||||
}
|
||||
if (hash.h4[i] < pTarget[i]) {
|
||||
rc = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
bool rc = (state[3] <= ((uint64_t*)pTarget)[3]);
|
||||
if (rc && resNounce[0] > nounce)
|
||||
resNounce[0] = nounce;
|
||||
}
|
||||
@ -2574,7 +2558,7 @@ extern void x15_whirlpool_cpu_init(int thr_id, int threads, int mode)
|
||||
#endif
|
||||
break;
|
||||
|
||||
case 1: /* old (whirlcoin?) */
|
||||
case 1: /* old whirlpool */
|
||||
cudaMemcpyToSymbol(InitVector_RC, old1_RC, sizeof(plain_RC), 0, cudaMemcpyHostToDevice);
|
||||
cudaMemcpyToSymbol(mixTob0Tox, old1_T0, sizeof(plain_T0), 0, cudaMemcpyHostToDevice);
|
||||
cudaMemcpyToSymbol(mixTob1Tox, old1_T1, (256*8), 0, cudaMemcpyHostToDevice);
|
||||
@ -2653,6 +2637,7 @@ void whirlpool512_setBlock_80(void *pdata, const void *ptarget)
|
||||
unsigned char PaddedMessage[128];
|
||||
memcpy(PaddedMessage, pdata, 80);
|
||||
memset(PaddedMessage+80, 0, 48);
|
||||
PaddedMessage[80] = 0x80; /* ending */
|
||||
cudaMemcpyToSymbol(pTarget, ptarget, 8*sizeof(uint32_t), 0, cudaMemcpyHostToDevice);
|
||||
cudaMemcpyToSymbol(c_PaddedMessage80, PaddedMessage, 16*sizeof(uint64_t), 0, cudaMemcpyHostToDevice);
|
||||
}
|
||||
|
@ -29,24 +29,27 @@ extern "C" void wcoinhash(void *state, const void *input)
|
||||
{
|
||||
sph_whirlpool_context ctx_whirlpool;
|
||||
|
||||
uint32_t hash[16];
|
||||
unsigned char hash[128]; // uint32_t hashA[16], hashB[16];
|
||||
#define hashB hash+64
|
||||
|
||||
memset(hash, 0, sizeof hash);
|
||||
|
||||
// shavite 1
|
||||
sph_whirlpool1_init(&ctx_whirlpool);
|
||||
sph_whirlpool1(&ctx_whirlpool, input, 80);
|
||||
sph_whirlpool1_close(&ctx_whirlpool, (void*) hash);
|
||||
sph_whirlpool1_close(&ctx_whirlpool, hash);
|
||||
|
||||
sph_whirlpool1_init(&ctx_whirlpool);
|
||||
sph_whirlpool1(&ctx_whirlpool, (const void*) hash, 64);
|
||||
sph_whirlpool1_close(&ctx_whirlpool, (void*) hash);
|
||||
sph_whirlpool1(&ctx_whirlpool, hash, 64);
|
||||
sph_whirlpool1_close(&ctx_whirlpool, hashB);
|
||||
|
||||
sph_whirlpool1_init(&ctx_whirlpool);
|
||||
sph_whirlpool1(&ctx_whirlpool, (const void*) hash, 64);
|
||||
sph_whirlpool1_close(&ctx_whirlpool, (void*) hash);
|
||||
sph_whirlpool1(&ctx_whirlpool, hashB, 64);
|
||||
sph_whirlpool1_close(&ctx_whirlpool, hash);
|
||||
|
||||
sph_whirlpool1_init(&ctx_whirlpool);
|
||||
sph_whirlpool1(&ctx_whirlpool, (const void*) hash, 64);
|
||||
sph_whirlpool1_close(&ctx_whirlpool, (void*) hash);
|
||||
sph_whirlpool1(&ctx_whirlpool, hash, 64);
|
||||
sph_whirlpool1_close(&ctx_whirlpool, hash);
|
||||
|
||||
memcpy(state, hash, 32);
|
||||
}
|
||||
@ -68,7 +71,7 @@ extern "C" int scanhash_whc(int thr_id, uint32_t *pdata,
|
||||
cudaSetDevice(device_map[thr_id]);
|
||||
// Konstanten kopieren, Speicher belegen
|
||||
cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput);
|
||||
x15_whirlpool_cpu_init(thr_id, throughput,1);
|
||||
x15_whirlpool_cpu_init(thr_id, throughput, 1 /* old whirlpool */);
|
||||
|
||||
init[thr_id] = true;
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user