mirror of
https://github.com/GOSTSec/ccminer
synced 2025-02-05 19:34:18 +00:00
scrypt: remove some unused functions/algo checks
there was remains of blake algo
This commit is contained in:
parent
1ad34dc13d
commit
4f379803d3
@ -127,7 +127,7 @@ JHA/cuda_jha_compactionTest.o: JHA/cuda_jha_compactionTest.cu
|
|||||||
|
|
||||||
# This kernel need also an older SM to be able to autotune kernels
|
# This kernel need also an older SM to be able to autotune kernels
|
||||||
scrypt/salsa_kernel.o: scrypt/salsa_kernel.cu
|
scrypt/salsa_kernel.o: scrypt/salsa_kernel.cu
|
||||||
$(NVCC) $(nvcc_FLAGS) -gencode=arch=compute_20,code=\"sm_21,compute_20\" --maxrregcount=80 -o $@ -c $<
|
$(NVCC) -I . -gencode=arch=compute_20,code=\"sm_21,compute_20\" -o $@ -c $<
|
||||||
|
|
||||||
skein.o: skein.cu
|
skein.o: skein.cu
|
||||||
$(NVCC) $(nvcc_FLAGS) --maxrregcount=64 -o $@ -c $<
|
$(NVCC) $(nvcc_FLAGS) --maxrregcount=64 -o $@ -c $<
|
||||||
|
@ -1,10 +1,7 @@
|
|||||||
|
|
||||||
//
|
//
|
||||||
// Contains the autotuning logic and some utility functions.
|
// Contains the autotuning logic and some utility functions.
|
||||||
// Note that all CUDA kernels have been moved to other .cu files
|
// Note that all CUDA kernels have been moved to other .cu files
|
||||||
//
|
//
|
||||||
// NOTE: compile this .cu module for compute_20,sm_21 with --maxrregcount=64
|
|
||||||
//
|
|
||||||
|
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <map>
|
#include <map>
|
||||||
@ -152,13 +149,6 @@ std::map<int, uint32_t *> context_hash[2];
|
|||||||
|
|
||||||
int find_optimal_blockcount(int thr_id, KernelInterface* &kernel, bool &concurrent, int &wpb);
|
int find_optimal_blockcount(int thr_id, KernelInterface* &kernel, bool &concurrent, int &wpb);
|
||||||
|
|
||||||
void cuda_shutdown(int thr_id)
|
|
||||||
{
|
|
||||||
cudaDeviceSynchronize();
|
|
||||||
cudaDeviceReset();
|
|
||||||
cudaThreadExit();
|
|
||||||
}
|
|
||||||
|
|
||||||
int cuda_throughput(int thr_id)
|
int cuda_throughput(int thr_id)
|
||||||
{
|
{
|
||||||
int GRID_BLOCKS, WARPS_PER_BLOCK;
|
int GRID_BLOCKS, WARPS_PER_BLOCK;
|
||||||
@ -215,7 +205,7 @@ int cuda_throughput(int thr_id)
|
|||||||
checkCudaErrors(cudaMalloc((void **) &tmp, state_size)); context_hash[1][thr_id] = tmp;
|
checkCudaErrors(cudaMalloc((void **) &tmp, state_size)); context_hash[1][thr_id] = tmp;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if (IS_SCRYPT_JANE())
|
else /* if (IS_SCRYPT_JANE()) */
|
||||||
{
|
{
|
||||||
// allocate pinned host memory for scrypt_core input/output
|
// allocate pinned host memory for scrypt_core input/output
|
||||||
checkCudaErrors(cudaHostAlloc((void **) &tmp, mem_size, cudaHostAllocDefault)); context_X[0][thr_id] = tmp;
|
checkCudaErrors(cudaHostAlloc((void **) &tmp, mem_size, cudaHostAllocDefault)); context_X[0][thr_id] = tmp;
|
||||||
@ -502,9 +492,7 @@ int find_optimal_blockcount(int thr_id, KernelInterface* &kernel, bool &concurre
|
|||||||
}
|
}
|
||||||
MAXWARPS[thr_id] = warp;
|
MAXWARPS[thr_id] = warp;
|
||||||
}
|
}
|
||||||
if (IS_SCRYPT() || IS_SCRYPT_JANE()) {
|
kernel->set_scratchbuf_constants(MAXWARPS[thr_id], h_V[thr_id]);
|
||||||
kernel->set_scratchbuf_constants(MAXWARPS[thr_id], h_V[thr_id]);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (validate_config(device_config[thr_id], optimal_blocks, WARPS_PER_BLOCK))
|
if (validate_config(device_config[thr_id], optimal_blocks, WARPS_PER_BLOCK))
|
||||||
{
|
{
|
||||||
@ -531,28 +519,16 @@ int find_optimal_blockcount(int thr_id, KernelInterface* &kernel, bool &concurre
|
|||||||
|
|
||||||
// allocate device memory
|
// allocate device memory
|
||||||
uint32_t *d_idata = NULL, *d_odata = NULL;
|
uint32_t *d_idata = NULL, *d_odata = NULL;
|
||||||
if (IS_SCRYPT() || IS_SCRYPT_JANE()) {
|
unsigned int mem_size = MAXWARPS[thr_id] * WU_PER_WARP * sizeof(uint32_t) * 32;
|
||||||
unsigned int mem_size = MAXWARPS[thr_id] * WU_PER_WARP * sizeof(uint32_t) * 32;
|
checkCudaErrors(cudaMalloc((void **) &d_idata, mem_size));
|
||||||
checkCudaErrors(cudaMalloc((void **) &d_idata, mem_size));
|
checkCudaErrors(cudaMalloc((void **) &d_odata, mem_size));
|
||||||
checkCudaErrors(cudaMalloc((void **) &d_odata, mem_size));
|
|
||||||
|
// pre-initialize some device memory
|
||||||
|
uint32_t *h_idata = (uint32_t*)malloc(mem_size);
|
||||||
|
for (unsigned int i=0; i < mem_size/sizeof(uint32_t); ++i) h_idata[i] = i*2654435761UL; // knuth's method
|
||||||
|
checkCudaErrors(cudaMemcpy(d_idata, h_idata, mem_size, cudaMemcpyHostToDevice));
|
||||||
|
free(h_idata);
|
||||||
|
|
||||||
// pre-initialize some device memory
|
|
||||||
uint32_t *h_idata = (uint32_t*)malloc(mem_size);
|
|
||||||
for (unsigned int i=0; i < mem_size/sizeof(uint32_t); ++i) h_idata[i] = i*2654435761UL; // knuth's method
|
|
||||||
checkCudaErrors(cudaMemcpy(d_idata, h_idata, mem_size, cudaMemcpyHostToDevice));
|
|
||||||
free(h_idata);
|
|
||||||
}
|
|
||||||
#if 0
|
|
||||||
else if (opt_algo == ALGO_KECCAK) {
|
|
||||||
uint32_t pdata[20] = {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20};
|
|
||||||
uint32_t ptarget[8] = {0,0,0,0,0,0,0,0};
|
|
||||||
kernel->prepare_keccak256(thr_id, pdata, ptarget);
|
|
||||||
} else if (opt_algo == ALGO_BLAKE) {
|
|
||||||
uint32_t pdata[20] = {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20};
|
|
||||||
uint32_t ptarget[8] = {0,0,0,0,0,0,0,0};
|
|
||||||
kernel->prepare_blake256(thr_id, pdata, ptarget);
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
double best_hash_sec = 0.0;
|
double best_hash_sec = 0.0;
|
||||||
int best_wpb = 0;
|
int best_wpb = 0;
|
||||||
|
|
||||||
@ -592,11 +568,10 @@ int find_optimal_blockcount(int thr_id, KernelInterface* &kernel, bool &concurre
|
|||||||
int repeat = 0;
|
int repeat = 0;
|
||||||
do // average several measurements for better exactness
|
do // average several measurements for better exactness
|
||||||
{
|
{
|
||||||
if (IS_SCRYPT() || IS_SCRYPT_JANE())
|
kernel->run_kernel(
|
||||||
kernel->run_kernel(
|
grid, threads, WARPS_PER_BLOCK, thr_id, NULL, d_idata, d_odata, N,
|
||||||
grid, threads, WARPS_PER_BLOCK, thr_id, NULL,
|
LOOKUP_GAP, device_interactive[thr_id], true, device_texturecache[thr_id]
|
||||||
d_idata, d_odata, N, LOOKUP_GAP, device_interactive[thr_id], true, device_texturecache[thr_id]
|
);
|
||||||
);
|
|
||||||
if(cudaDeviceSynchronize() != cudaSuccess)
|
if(cudaDeviceSynchronize() != cudaSuccess)
|
||||||
break;
|
break;
|
||||||
++repeat;
|
++repeat;
|
||||||
@ -609,8 +584,11 @@ int find_optimal_blockcount(int thr_id, KernelInterface* &kernel, bool &concurre
|
|||||||
|
|
||||||
// for scrypt: in interactive mode only find launch configs where kernel launch times are short enough
|
// for scrypt: in interactive mode only find launch configs where kernel launch times are short enough
|
||||||
// TODO: instead we could reduce the batchsize parameter to meet the launch time requirement.
|
// TODO: instead we could reduce the batchsize parameter to meet the launch time requirement.
|
||||||
if (IS_SCRYPT() && device_interactive[thr_id] && GRID_BLOCKS > 2*props.multiProcessorCount && tdelta > 1.0/30)
|
if (IS_SCRYPT() && device_interactive[thr_id]
|
||||||
|
&& GRID_BLOCKS > 2*props.multiProcessorCount && tdelta > 1.0/30)
|
||||||
|
{
|
||||||
if (WARPS_PER_BLOCK == 1) goto skip; else goto skip2;
|
if (WARPS_PER_BLOCK == 1) goto skip; else goto skip2;
|
||||||
|
}
|
||||||
|
|
||||||
hash_sec = (double)WU_PER_LAUNCH / tdelta;
|
hash_sec = (double)WU_PER_LAUNCH / tdelta;
|
||||||
Hash[WARPS_PER_BLOCK] = hash_sec;
|
Hash[WARPS_PER_BLOCK] = hash_sec;
|
||||||
@ -668,10 +646,8 @@ skip2: ;
|
|||||||
skip: ;
|
skip: ;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (IS_SCRYPT() || IS_SCRYPT_JANE()) {
|
checkCudaErrors(cudaFree(d_odata));
|
||||||
checkCudaErrors(cudaFree(d_odata));
|
checkCudaErrors(cudaFree(d_idata));
|
||||||
checkCudaErrors(cudaFree(d_idata));
|
|
||||||
}
|
|
||||||
|
|
||||||
WARPS_PER_BLOCK = best_wpb;
|
WARPS_PER_BLOCK = best_wpb;
|
||||||
applog(LOG_INFO, "GPU #%d: %7.2f hash/s with configuration %c%dx%d", device_map[thr_id], best_hash_sec, kernel->get_identifier(), optimal_blocks, WARPS_PER_BLOCK);
|
applog(LOG_INFO, "GPU #%d: %7.2f hash/s with configuration %c%dx%d", device_map[thr_id], best_hash_sec, kernel->get_identifier(), optimal_blocks, WARPS_PER_BLOCK);
|
||||||
@ -775,9 +751,7 @@ skip: ;
|
|||||||
}
|
}
|
||||||
|
|
||||||
// update pointers to scratch buffer in constant memory after reallocation
|
// update pointers to scratch buffer in constant memory after reallocation
|
||||||
if (IS_SCRYPT() || IS_SCRYPT_JANE()) {
|
kernel->set_scratchbuf_constants(MAXWARPS[thr_id], h_V[thr_id]);
|
||||||
kernel->set_scratchbuf_constants(MAXWARPS[thr_id], h_V[thr_id]);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -48,10 +48,7 @@ static __inline bool IS_SCRYPT() { get_scrypt_type(); return (scrypt_algo == A_S
|
|||||||
static __inline bool IS_SCRYPT_JANE() { get_scrypt_type(); return (scrypt_algo == A_SCRYPT_JANE); }
|
static __inline bool IS_SCRYPT_JANE() { get_scrypt_type(); return (scrypt_algo == A_SCRYPT_JANE); }
|
||||||
|
|
||||||
// CUDA externals
|
// CUDA externals
|
||||||
extern int cuda_num_devices();
|
|
||||||
extern void cuda_shutdown(int thr_id);
|
|
||||||
extern int cuda_throughput(int thr_id);
|
extern int cuda_throughput(int thr_id);
|
||||||
|
|
||||||
extern uint32_t *cuda_transferbuffer(int thr_id, int stream);
|
extern uint32_t *cuda_transferbuffer(int thr_id, int stream);
|
||||||
extern uint32_t *cuda_hashbuffer(int thr_id, int stream);
|
extern uint32_t *cuda_hashbuffer(int thr_id, int stream);
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user