1.7.1 release

set schedule flags to reduce linux cpu usage without MyStreamSynchronize()
This commit is contained in:
Tanguy Pruvot 2016-01-26 20:38:17 +01:00
parent 2e16d00f63
commit a237601747
18 changed files with 107 additions and 9 deletions

View File

@ -256,7 +256,7 @@ uint32_t blake256_cpu_hash_80(const int thr_id, const uint32_t threads, const ui
return result;
blake256_gpu_hash_80<<<grid, block, shared_size>>>(threads, startNonce, d_resNonce[thr_id], highTarget, crcsum, (int) rounds);
MyStreamSynchronize(NULL, 0, thr_id);
//MyStreamSynchronize(NULL, 0, thr_id);
if (cudaSuccess == cudaMemcpy(h_resNonce[thr_id], d_resNonce[thr_id], NBN*sizeof(uint32_t), cudaMemcpyDeviceToHost)) {
result = h_resNonce[thr_id][0];
for (int n=0; n < (NBN-1); n++)
@ -343,7 +343,7 @@ static uint32_t blake256_cpu_hash_16(const int thr_id, const uint32_t threads, c
return result;
blake256_gpu_hash_16 <<<grid, block>>> (threads, startNonce, d_resNonce[thr_id], highTarget, (int) rounds, opt_tracegpu);
MyStreamSynchronize(NULL, 0, thr_id);
//MyStreamSynchronize(NULL, 0, thr_id);
if (cudaSuccess == cudaMemcpy(h_resNonce[thr_id], d_resNonce[thr_id], NBN*sizeof(uint32_t), cudaMemcpyDeviceToHost)) {
result = h_resNonce[thr_id][0];
for (int n=0; n < (NBN-1); n++)
@ -413,7 +413,12 @@ extern "C" int scanhash_blake256(int thr_id, struct work* work, uint32_t max_non
if (!init[thr_id]) {
cudaSetDevice(device_map[thr_id]);
CUDA_LOG_ERROR();
if (opt_cudaschedule == -1 && gpu_threads == 1) {
cudaDeviceReset();
// reduce cpu usage (linux)
cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);
CUDA_LOG_ERROR();
}
cudaMallocHost(&h_resNonce[thr_id], NBN * sizeof(uint32_t));
cudaMalloc(&d_resNonce[thr_id], NBN * sizeof(uint32_t));

View File

@ -94,6 +94,12 @@ extern "C" int scanhash_jackpot(int thr_id, struct work *work, uint32_t max_nonc
if (!init[thr_id])
{
cudaSetDevice(dev_id);
if (opt_cudaschedule == -1 && gpu_threads == 1) {
cudaDeviceReset();
// reduce cpu usage
cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);
CUDA_LOG_ERROR();
}
cuda_get_arch(thr_id);
if (device_sm[dev_id] < 300 || cuda_arch[dev_id] < 300) {
gpulog(LOG_ERR, thr_id, "Sorry, This algo is not supported by this GPU arch (SM 3.0 required)");

View File

@ -44,6 +44,12 @@ int scanhash_groestlcoin(int thr_id, struct work *work, uint32_t max_nonce, unsi
if (!init[thr_id])
{
cudaSetDevice(device_map[thr_id]);
if (opt_cudaschedule == -1 && gpu_threads == 1) {
cudaDeviceReset();
// reduce cpu usage
cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);
CUDA_LOG_ERROR();
}
CUDA_LOG_ERROR();
groestlcoin_cpu_init(thr_id, throughput);
init[thr_id] = true;

View File

@ -172,6 +172,12 @@ int scanhash_heavy(int thr_id, struct work *work, uint32_t max_nonce, unsigned l
if (!init[thr_id])
{
cudaSetDevice(device_map[thr_id]);
if (opt_cudaschedule == -1 && gpu_threads == 1) {
cudaDeviceReset();
// reduce cpu usage
cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);
CUDA_LOG_ERROR();
}
hefty_cpu_init(thr_id, throughput);
sha256_cpu_init(thr_id, throughput);

View File

@ -49,6 +49,12 @@ int scanhash_myriad(int thr_id, struct work *work, uint32_t max_nonce, unsigned
if(!init[thr_id])
{
cudaSetDevice(device_map[thr_id]);
if (opt_cudaschedule == -1 && gpu_threads == 1) {
cudaDeviceReset();
// reduce cpu usage
cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);
CUDA_LOG_ERROR();
}
myriadgroestl_cpu_init(thr_id, throughput);
init[thr_id] = true;
}

View File

@ -63,7 +63,12 @@ extern "C" int scanhash_pentablake(int thr_id, struct work *work, uint32_t max_n
if (!init[thr_id]) {
cudaSetDevice(device_map[thr_id]);
CUDA_LOG_ERROR();
if (opt_cudaschedule == -1 && gpu_threads == 1) {
cudaDeviceReset();
// reduce cpu usage
cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);
CUDA_LOG_ERROR();
}
CUDA_SAFE_CALL(cudaMalloc(&d_hash[thr_id], (size_t) 64 * throughput));

View File

@ -139,6 +139,12 @@ extern "C" int scanhash_quark(int thr_id, struct work* work, uint32_t max_nonce,
if (!init[thr_id])
{
cudaSetDevice(dev_id);
if (opt_cudaschedule == -1 && gpu_threads == 1) {
cudaDeviceReset();
// reduce cpu usage
cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);
CUDA_LOG_ERROR();
}
cudaGetLastError();
CUDA_SAFE_CALL(cudaMalloc(&d_hash[thr_id], (size_t) 64 * throughput));

View File

@ -62,6 +62,11 @@ extern "C" int scanhash_deep(int thr_id, struct work* work, uint32_t max_nonce,
if (!init[thr_id])
{
cudaSetDevice(device_map[thr_id]);
if (opt_cudaschedule == -1 && gpu_threads == 1) {
cudaDeviceReset();
// reduce cpu usage
cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);
}
CUDA_LOG_ERROR();
CUDA_SAFE_CALL(cudaMalloc(&d_hash[thr_id], (size_t) 64 * throughput));

View File

@ -73,6 +73,12 @@ extern "C" int scanhash_qubit(int thr_id, struct work* work, uint32_t max_nonce,
if (!init[thr_id])
{
cudaSetDevice(device_map[thr_id]);
if (opt_cudaschedule == -1 && gpu_threads == 1) {
cudaDeviceReset();
// reduce cpu usage
cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);
CUDA_LOG_ERROR();
}
qubit_luffa512_cpu_init(thr_id, throughput);
x11_cubehash512_cpu_init(thr_id, throughput);

View File

@ -60,8 +60,8 @@ IDI_ICON1 ICON "ccminer.ico"
//
VS_VERSION_INFO VERSIONINFO
FILEVERSION 1,7,0,0
PRODUCTVERSION 1,7,0,0
FILEVERSION 1,7,1,0
PRODUCTVERSION 1,7,1,0
FILEFLAGSMASK 0x3fL
#ifdef _DEBUG
FILEFLAGS 0x21L
@ -76,10 +76,10 @@ BEGIN
BEGIN
BLOCK "040904e4"
BEGIN
VALUE "FileVersion", "1.7"
VALUE "FileVersion", "1.7.1"
VALUE "LegalCopyright", "Copyright (C) 2015"
VALUE "ProductName", "ccminer"
VALUE "ProductVersion", "1.7"
VALUE "ProductVersion", "1.7.1"
END
END
BLOCK "VarFileInfo"

View File

@ -22,7 +22,7 @@ extern void skeincoin_free(int thr_id);
extern void skeincoin_setBlock_80(int thr_id, void *pdata);
extern uint32_t skeincoin_hash_sm5(int thr_id, uint32_t threads, uint32_t startNounce, int swap, uint64_t target64, uint32_t *secNonce);
static __device__ __constant__ uint32_t sha256_hashTable[] = {
static __device__ uint32_t sha256_hashTable[] = {
0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a, 0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19
};
@ -372,6 +372,12 @@ extern "C" int scanhash_skeincoin(int thr_id, struct work* work, uint32_t max_no
if (!init[thr_id])
{
cudaSetDevice(device_map[thr_id]);
if (opt_cudaschedule == -1 && gpu_threads == 1) {
cudaDeviceReset();
// reduce cpu usage
cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);
CUDA_LOG_ERROR();
}
if (sm5) {
skeincoin_init(thr_id);

View File

@ -53,6 +53,12 @@ int scanhash_skein2(int thr_id, struct work* work, uint32_t max_nonce, unsigned
if (!init[thr_id])
{
cudaSetDevice(dev_id);
if (opt_cudaschedule == -1 && gpu_threads == 1) {
cudaDeviceReset();
// reduce cpu usage
cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);
CUDA_LOG_ERROR();
}
cudaMalloc(&d_hash[thr_id], (size_t) 64 * throughput);

View File

@ -119,6 +119,12 @@ extern "C" int scanhash_c11(int thr_id, struct work* work, uint32_t max_nonce, u
if (!init[thr_id])
{
cudaSetDevice(device_map[thr_id]);
if (opt_cudaschedule == -1 && gpu_threads == 1) {
cudaDeviceReset();
// reduce cpu usage
cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);
CUDA_LOG_ERROR();
}
quark_blake512_cpu_init(thr_id, throughput);
quark_bmw512_cpu_init(thr_id, throughput);

View File

@ -127,6 +127,12 @@ extern "C" int scanhash_x13(int thr_id, struct work* work, uint32_t max_nonce, u
if (!init[thr_id])
{
cudaSetDevice(device_map[thr_id]);
if (opt_cudaschedule == -1 && gpu_threads == 1) {
cudaDeviceReset();
// reduce cpu usage
cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);
CUDA_LOG_ERROR();
}
quark_blake512_cpu_init(thr_id, throughput);
quark_groestl512_cpu_init(thr_id, throughput);

View File

@ -53,6 +53,12 @@ extern "C" int scanhash_whirlx(int thr_id, struct work* work, uint32_t max_nonc
if (!init[thr_id]) {
cudaSetDevice(device_map[thr_id]);
if (opt_cudaschedule == -1 && gpu_threads == 1) {
cudaDeviceReset();
// reduce cpu usage
cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);
CUDA_LOG_ERROR();
}
CUDA_CALL_OR_RET_X(cudaMalloc(&d_hash[thr_id], (size_t) 64 * throughput), -1);

View File

@ -141,6 +141,12 @@ extern "C" int scanhash_x14(int thr_id, struct work* work, uint32_t max_nonce,
if (!init[thr_id])
{
cudaSetDevice(device_map[thr_id]);
if (opt_cudaschedule == -1 && gpu_threads == 1) {
cudaDeviceReset();
// reduce cpu usage
cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);
CUDA_LOG_ERROR();
}
quark_blake512_cpu_init(thr_id, throughput);
quark_groestl512_cpu_init(thr_id, throughput);

View File

@ -169,6 +169,11 @@ extern "C" int scanhash_x17(int thr_id, struct work* work, uint32_t max_nonce, u
if (!init[thr_id])
{
cudaSetDevice(device_map[thr_id]);
if (opt_cudaschedule == -1 && gpu_threads == 1) {
cudaDeviceReset();
// reduce cpu usage
cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);
}
quark_blake512_cpu_init(thr_id, throughput);
quark_groestl512_cpu_init(thr_id, throughput);

6
zr5.cu
View File

@ -351,6 +351,12 @@ extern "C" int scanhash_zr5(int thr_id, struct work *work,
if (!init[thr_id])
{
cudaSetDevice(device_map[thr_id]);
if (opt_cudaschedule == -1 && gpu_threads == 1) {
cudaDeviceReset();
// reduce cpu usage
cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);
CUDA_LOG_ERROR();
}
// constants
cudaMemcpyToSymbol(c_permut, permut, 24*4, 0, cudaMemcpyHostToDevice);