diff --git a/Makefile.am b/Makefile.am index 21918a0..c39d821 100644 --- a/Makefile.am +++ b/Makefile.am @@ -82,6 +82,18 @@ x17/cuda_x17_sha512.o: x17/cuda_x17_sha512.cu m7/cuda_tiger192.o: m7/cuda_tiger192.cu $(NVCC) $(nvcc_FLAGS) -O2 --maxrregcount=64 -o $@ -c $< +m7/cuda_m7_sha256.o: m7/cuda_m7_sha256.cu + $(NVCC) $(nvcc_FLAGS) -O2 --maxrregcount=80 -o $@ -c $< + +m7/m7_keccak512.o: m7/m7_keccak512.cu + $(NVCC) $(nvcc_FLAGS) -O2 --maxrregcount=80 -o $@ -c $< + +m7/cuda_m7_whirlpool.o: m7/cuda_m7_whirlpool.cu + $(NVCC) $(nvcc_FLAGS) -O2 --maxrregcount=64 -o $@ -c $< + +m7/cuda_mul.o: m7/cuda_mul.cu + $(NVCC) $(nvcc_FLAGS) -O2 --maxrregcount=32 -o $@ -c $< + # ABI requiring code modules quark/cuda_quark_compactionTest.o: quark/cuda_quark_compactionTest.cu $(NVCC) $(nvcc_FLAGS) -I cudpp-2.1/include @CFLAGS@ -Xptxas "-abi=yes -v" --maxrregcount=80 -o $@ -c $< diff --git a/m7/cuda_ripemd160.cu b/m7/cuda_ripemd160.cu index 240ef14..20b67f3 100644 --- a/m7/cuda_ripemd160.cu +++ b/m7/cuda_ripemd160.cu @@ -282,7 +282,7 @@ static const uint32_t IV[5] = { (h)[0] = tmp; \ } -__global__ +__global__ __launch_bounds__(256, 4) void m7_ripemd160_gpu_hash_120(int threads, uint32_t startNounce, uint64_t *outputHash) { int thread = (blockDim.x * blockIdx.x + threadIdx.x); @@ -307,10 +307,9 @@ void m7_ripemd160_gpu_hash_120(int threads, uint32_t startNounce, uint64_t *outp #define F4(x, y, z) xandx(z,x,y) #define F5(x, y, z) xornt64(x,y,z) - uint32_t in2[16],in3[16]; - uint32_t in[16],buf[5]; + uint32_t buf[5], in2[16], in3[16]; #pragma unroll 16 - for (int i=0;i<16;i++) { + for (int i=0; i<16; i++) { if ((i+16) < 29) in2[i] = c_PaddedMessage80[i+16]; else if ((i+16)==29)