mirror of
https://github.com/GOSTSec/ccminer
synced 2025-01-31 08:54:38 +00:00
m7: fine tune threads and max registers
TODO: on 750 Ti, this version of ripemd seems very slow... It could be made in 3ms and take actually 25ms whirlpool can also be optimized, with last djm code...
This commit is contained in:
parent
fcd381cda2
commit
c9ce05a82b
12
Makefile.am
12
Makefile.am
@ -82,6 +82,18 @@ x17/cuda_x17_sha512.o: x17/cuda_x17_sha512.cu
|
||||
m7/cuda_tiger192.o: m7/cuda_tiger192.cu
|
||||
$(NVCC) $(nvcc_FLAGS) -O2 --maxrregcount=64 -o $@ -c $<
|
||||
|
||||
m7/cuda_m7_sha256.o: m7/cuda_m7_sha256.cu
|
||||
$(NVCC) $(nvcc_FLAGS) -O2 --maxrregcount=80 -o $@ -c $<
|
||||
|
||||
m7/m7_keccak512.o: m7/m7_keccak512.cu
|
||||
$(NVCC) $(nvcc_FLAGS) -O2 --maxrregcount=80 -o $@ -c $<
|
||||
|
||||
m7/cuda_m7_whirlpool.o: m7/cuda_m7_whirlpool.cu
|
||||
$(NVCC) $(nvcc_FLAGS) -O2 --maxrregcount=64 -o $@ -c $<
|
||||
|
||||
m7/cuda_mul.o: m7/cuda_mul.cu
|
||||
$(NVCC) $(nvcc_FLAGS) -O2 --maxrregcount=32 -o $@ -c $<
|
||||
|
||||
# ABI requiring code modules
|
||||
quark/cuda_quark_compactionTest.o: quark/cuda_quark_compactionTest.cu
|
||||
$(NVCC) $(nvcc_FLAGS) -I cudpp-2.1/include @CFLAGS@ -Xptxas "-abi=yes -v" --maxrregcount=80 -o $@ -c $<
|
||||
|
@ -282,7 +282,7 @@ static const uint32_t IV[5] = {
|
||||
(h)[0] = tmp; \
|
||||
}
|
||||
|
||||
__global__
|
||||
__global__ __launch_bounds__(256, 4)
|
||||
void m7_ripemd160_gpu_hash_120(int threads, uint32_t startNounce, uint64_t *outputHash)
|
||||
{
|
||||
int thread = (blockDim.x * blockIdx.x + threadIdx.x);
|
||||
@ -307,10 +307,9 @@ void m7_ripemd160_gpu_hash_120(int threads, uint32_t startNounce, uint64_t *outp
|
||||
#define F4(x, y, z) xandx(z,x,y)
|
||||
#define F5(x, y, z) xornt64(x,y,z)
|
||||
|
||||
uint32_t in2[16],in3[16];
|
||||
uint32_t in[16],buf[5];
|
||||
uint32_t buf[5], in2[16], in3[16];
|
||||
#pragma unroll 16
|
||||
for (int i=0;i<16;i++) {
|
||||
for (int i=0; i<16; i++) {
|
||||
if ((i+16) < 29)
|
||||
in2[i] = c_PaddedMessage80[i+16];
|
||||
else if ((i+16)==29)
|
||||
|
Loading…
x
Reference in New Issue
Block a user