Browse Source

1.7.1 release

set schedule flags to reduce linux cpu usage without MyStreamSynchronize()
2upstream
Tanguy Pruvot 9 years ago
parent
commit
a237601747
  1. 9
      Algo256/blake256.cu
  2. 6
      JHA/jackpotcoin.cu
  3. 6
      groestlcoin.cpp
  4. 6
      heavy/heavy.cu
  5. 6
      myriadgroestl.cpp
  6. 5
      pentablake.cu
  7. 6
      quark/quarkcoin.cu
  8. 5
      qubit/deep.cu
  9. 6
      qubit/qubit.cu
  10. 8
      res/ccminer.rc
  11. 8
      skein.cu
  12. 6
      skein2.cpp
  13. 6
      x11/c11.cu
  14. 6
      x13/x13.cu
  15. 6
      x15/whirlpoolx.cu
  16. 6
      x15/x14.cu
  17. 5
      x17/x17.cu
  18. 6
      zr5.cu

9
Algo256/blake256.cu

@ -256,7 +256,7 @@ uint32_t blake256_cpu_hash_80(const int thr_id, const uint32_t threads, const ui
return result; return result;
blake256_gpu_hash_80<<<grid, block, shared_size>>>(threads, startNonce, d_resNonce[thr_id], highTarget, crcsum, (int) rounds); blake256_gpu_hash_80<<<grid, block, shared_size>>>(threads, startNonce, d_resNonce[thr_id], highTarget, crcsum, (int) rounds);
MyStreamSynchronize(NULL, 0, thr_id); //MyStreamSynchronize(NULL, 0, thr_id);
if (cudaSuccess == cudaMemcpy(h_resNonce[thr_id], d_resNonce[thr_id], NBN*sizeof(uint32_t), cudaMemcpyDeviceToHost)) { if (cudaSuccess == cudaMemcpy(h_resNonce[thr_id], d_resNonce[thr_id], NBN*sizeof(uint32_t), cudaMemcpyDeviceToHost)) {
result = h_resNonce[thr_id][0]; result = h_resNonce[thr_id][0];
for (int n=0; n < (NBN-1); n++) for (int n=0; n < (NBN-1); n++)
@ -343,7 +343,7 @@ static uint32_t blake256_cpu_hash_16(const int thr_id, const uint32_t threads, c
return result; return result;
blake256_gpu_hash_16 <<<grid, block>>> (threads, startNonce, d_resNonce[thr_id], highTarget, (int) rounds, opt_tracegpu); blake256_gpu_hash_16 <<<grid, block>>> (threads, startNonce, d_resNonce[thr_id], highTarget, (int) rounds, opt_tracegpu);
MyStreamSynchronize(NULL, 0, thr_id); //MyStreamSynchronize(NULL, 0, thr_id);
if (cudaSuccess == cudaMemcpy(h_resNonce[thr_id], d_resNonce[thr_id], NBN*sizeof(uint32_t), cudaMemcpyDeviceToHost)) { if (cudaSuccess == cudaMemcpy(h_resNonce[thr_id], d_resNonce[thr_id], NBN*sizeof(uint32_t), cudaMemcpyDeviceToHost)) {
result = h_resNonce[thr_id][0]; result = h_resNonce[thr_id][0];
for (int n=0; n < (NBN-1); n++) for (int n=0; n < (NBN-1); n++)
@ -413,7 +413,12 @@ extern "C" int scanhash_blake256(int thr_id, struct work* work, uint32_t max_non
if (!init[thr_id]) { if (!init[thr_id]) {
cudaSetDevice(device_map[thr_id]); cudaSetDevice(device_map[thr_id]);
if (opt_cudaschedule == -1 && gpu_threads == 1) {
cudaDeviceReset();
// reduce cpu usage (linux)
cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);
CUDA_LOG_ERROR(); CUDA_LOG_ERROR();
}
cudaMallocHost(&h_resNonce[thr_id], NBN * sizeof(uint32_t)); cudaMallocHost(&h_resNonce[thr_id], NBN * sizeof(uint32_t));
cudaMalloc(&d_resNonce[thr_id], NBN * sizeof(uint32_t)); cudaMalloc(&d_resNonce[thr_id], NBN * sizeof(uint32_t));

6
JHA/jackpotcoin.cu

@ -94,6 +94,12 @@ extern "C" int scanhash_jackpot(int thr_id, struct work *work, uint32_t max_nonc
if (!init[thr_id]) if (!init[thr_id])
{ {
cudaSetDevice(dev_id); cudaSetDevice(dev_id);
if (opt_cudaschedule == -1 && gpu_threads == 1) {
cudaDeviceReset();
// reduce cpu usage
cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);
CUDA_LOG_ERROR();
}
cuda_get_arch(thr_id); cuda_get_arch(thr_id);
if (device_sm[dev_id] < 300 || cuda_arch[dev_id] < 300) { if (device_sm[dev_id] < 300 || cuda_arch[dev_id] < 300) {
gpulog(LOG_ERR, thr_id, "Sorry, This algo is not supported by this GPU arch (SM 3.0 required)"); gpulog(LOG_ERR, thr_id, "Sorry, This algo is not supported by this GPU arch (SM 3.0 required)");

6
groestlcoin.cpp

@ -44,6 +44,12 @@ int scanhash_groestlcoin(int thr_id, struct work *work, uint32_t max_nonce, unsi
if (!init[thr_id]) if (!init[thr_id])
{ {
cudaSetDevice(device_map[thr_id]); cudaSetDevice(device_map[thr_id]);
if (opt_cudaschedule == -1 && gpu_threads == 1) {
cudaDeviceReset();
// reduce cpu usage
cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);
CUDA_LOG_ERROR();
}
CUDA_LOG_ERROR(); CUDA_LOG_ERROR();
groestlcoin_cpu_init(thr_id, throughput); groestlcoin_cpu_init(thr_id, throughput);
init[thr_id] = true; init[thr_id] = true;

6
heavy/heavy.cu

@ -172,6 +172,12 @@ int scanhash_heavy(int thr_id, struct work *work, uint32_t max_nonce, unsigned l
if (!init[thr_id]) if (!init[thr_id])
{ {
cudaSetDevice(device_map[thr_id]); cudaSetDevice(device_map[thr_id]);
if (opt_cudaschedule == -1 && gpu_threads == 1) {
cudaDeviceReset();
// reduce cpu usage
cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);
CUDA_LOG_ERROR();
}
hefty_cpu_init(thr_id, throughput); hefty_cpu_init(thr_id, throughput);
sha256_cpu_init(thr_id, throughput); sha256_cpu_init(thr_id, throughput);

6
myriadgroestl.cpp

@ -49,6 +49,12 @@ int scanhash_myriad(int thr_id, struct work *work, uint32_t max_nonce, unsigned
if(!init[thr_id]) if(!init[thr_id])
{ {
cudaSetDevice(device_map[thr_id]); cudaSetDevice(device_map[thr_id]);
if (opt_cudaschedule == -1 && gpu_threads == 1) {
cudaDeviceReset();
// reduce cpu usage
cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);
CUDA_LOG_ERROR();
}
myriadgroestl_cpu_init(thr_id, throughput); myriadgroestl_cpu_init(thr_id, throughput);
init[thr_id] = true; init[thr_id] = true;
} }

5
pentablake.cu

@ -63,7 +63,12 @@ extern "C" int scanhash_pentablake(int thr_id, struct work *work, uint32_t max_n
if (!init[thr_id]) { if (!init[thr_id]) {
cudaSetDevice(device_map[thr_id]); cudaSetDevice(device_map[thr_id]);
if (opt_cudaschedule == -1 && gpu_threads == 1) {
cudaDeviceReset();
// reduce cpu usage
cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);
CUDA_LOG_ERROR(); CUDA_LOG_ERROR();
}
CUDA_SAFE_CALL(cudaMalloc(&d_hash[thr_id], (size_t) 64 * throughput)); CUDA_SAFE_CALL(cudaMalloc(&d_hash[thr_id], (size_t) 64 * throughput));

6
quark/quarkcoin.cu

@ -139,6 +139,12 @@ extern "C" int scanhash_quark(int thr_id, struct work* work, uint32_t max_nonce,
if (!init[thr_id]) if (!init[thr_id])
{ {
cudaSetDevice(dev_id); cudaSetDevice(dev_id);
if (opt_cudaschedule == -1 && gpu_threads == 1) {
cudaDeviceReset();
// reduce cpu usage
cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);
CUDA_LOG_ERROR();
}
cudaGetLastError(); cudaGetLastError();
CUDA_SAFE_CALL(cudaMalloc(&d_hash[thr_id], (size_t) 64 * throughput)); CUDA_SAFE_CALL(cudaMalloc(&d_hash[thr_id], (size_t) 64 * throughput));

5
qubit/deep.cu

@ -62,6 +62,11 @@ extern "C" int scanhash_deep(int thr_id, struct work* work, uint32_t max_nonce,
if (!init[thr_id]) if (!init[thr_id])
{ {
cudaSetDevice(device_map[thr_id]); cudaSetDevice(device_map[thr_id]);
if (opt_cudaschedule == -1 && gpu_threads == 1) {
cudaDeviceReset();
// reduce cpu usage
cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);
}
CUDA_LOG_ERROR(); CUDA_LOG_ERROR();
CUDA_SAFE_CALL(cudaMalloc(&d_hash[thr_id], (size_t) 64 * throughput)); CUDA_SAFE_CALL(cudaMalloc(&d_hash[thr_id], (size_t) 64 * throughput));

6
qubit/qubit.cu

@ -73,6 +73,12 @@ extern "C" int scanhash_qubit(int thr_id, struct work* work, uint32_t max_nonce,
if (!init[thr_id]) if (!init[thr_id])
{ {
cudaSetDevice(device_map[thr_id]); cudaSetDevice(device_map[thr_id]);
if (opt_cudaschedule == -1 && gpu_threads == 1) {
cudaDeviceReset();
// reduce cpu usage
cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);
CUDA_LOG_ERROR();
}
qubit_luffa512_cpu_init(thr_id, throughput); qubit_luffa512_cpu_init(thr_id, throughput);
x11_cubehash512_cpu_init(thr_id, throughput); x11_cubehash512_cpu_init(thr_id, throughput);

8
res/ccminer.rc

@ -60,8 +60,8 @@ IDI_ICON1 ICON "ccminer.ico"
// //
VS_VERSION_INFO VERSIONINFO VS_VERSION_INFO VERSIONINFO
FILEVERSION 1,7,0,0 FILEVERSION 1,7,1,0
PRODUCTVERSION 1,7,0,0 PRODUCTVERSION 1,7,1,0
FILEFLAGSMASK 0x3fL FILEFLAGSMASK 0x3fL
#ifdef _DEBUG #ifdef _DEBUG
FILEFLAGS 0x21L FILEFLAGS 0x21L
@ -76,10 +76,10 @@ BEGIN
BEGIN BEGIN
BLOCK "040904e4" BLOCK "040904e4"
BEGIN BEGIN
VALUE "FileVersion", "1.7" VALUE "FileVersion", "1.7.1"
VALUE "LegalCopyright", "Copyright (C) 2015" VALUE "LegalCopyright", "Copyright (C) 2015"
VALUE "ProductName", "ccminer" VALUE "ProductName", "ccminer"
VALUE "ProductVersion", "1.7" VALUE "ProductVersion", "1.7.1"
END END
END END
BLOCK "VarFileInfo" BLOCK "VarFileInfo"

8
skein.cu

@ -22,7 +22,7 @@ extern void skeincoin_free(int thr_id);
extern void skeincoin_setBlock_80(int thr_id, void *pdata); extern void skeincoin_setBlock_80(int thr_id, void *pdata);
extern uint32_t skeincoin_hash_sm5(int thr_id, uint32_t threads, uint32_t startNounce, int swap, uint64_t target64, uint32_t *secNonce); extern uint32_t skeincoin_hash_sm5(int thr_id, uint32_t threads, uint32_t startNounce, int swap, uint64_t target64, uint32_t *secNonce);
static __device__ __constant__ uint32_t sha256_hashTable[] = { static __device__ uint32_t sha256_hashTable[] = {
0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a, 0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19 0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a, 0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19
}; };
@ -372,6 +372,12 @@ extern "C" int scanhash_skeincoin(int thr_id, struct work* work, uint32_t max_no
if (!init[thr_id]) if (!init[thr_id])
{ {
cudaSetDevice(device_map[thr_id]); cudaSetDevice(device_map[thr_id]);
if (opt_cudaschedule == -1 && gpu_threads == 1) {
cudaDeviceReset();
// reduce cpu usage
cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);
CUDA_LOG_ERROR();
}
if (sm5) { if (sm5) {
skeincoin_init(thr_id); skeincoin_init(thr_id);

6
skein2.cpp

@ -53,6 +53,12 @@ int scanhash_skein2(int thr_id, struct work* work, uint32_t max_nonce, unsigned
if (!init[thr_id]) if (!init[thr_id])
{ {
cudaSetDevice(dev_id); cudaSetDevice(dev_id);
if (opt_cudaschedule == -1 && gpu_threads == 1) {
cudaDeviceReset();
// reduce cpu usage
cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);
CUDA_LOG_ERROR();
}
cudaMalloc(&d_hash[thr_id], (size_t) 64 * throughput); cudaMalloc(&d_hash[thr_id], (size_t) 64 * throughput);

6
x11/c11.cu

@ -119,6 +119,12 @@ extern "C" int scanhash_c11(int thr_id, struct work* work, uint32_t max_nonce, u
if (!init[thr_id]) if (!init[thr_id])
{ {
cudaSetDevice(device_map[thr_id]); cudaSetDevice(device_map[thr_id]);
if (opt_cudaschedule == -1 && gpu_threads == 1) {
cudaDeviceReset();
// reduce cpu usage
cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);
CUDA_LOG_ERROR();
}
quark_blake512_cpu_init(thr_id, throughput); quark_blake512_cpu_init(thr_id, throughput);
quark_bmw512_cpu_init(thr_id, throughput); quark_bmw512_cpu_init(thr_id, throughput);

6
x13/x13.cu

@ -127,6 +127,12 @@ extern "C" int scanhash_x13(int thr_id, struct work* work, uint32_t max_nonce, u
if (!init[thr_id]) if (!init[thr_id])
{ {
cudaSetDevice(device_map[thr_id]); cudaSetDevice(device_map[thr_id]);
if (opt_cudaschedule == -1 && gpu_threads == 1) {
cudaDeviceReset();
// reduce cpu usage
cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);
CUDA_LOG_ERROR();
}
quark_blake512_cpu_init(thr_id, throughput); quark_blake512_cpu_init(thr_id, throughput);
quark_groestl512_cpu_init(thr_id, throughput); quark_groestl512_cpu_init(thr_id, throughput);

6
x15/whirlpoolx.cu

@ -53,6 +53,12 @@ extern "C" int scanhash_whirlx(int thr_id, struct work* work, uint32_t max_nonc
if (!init[thr_id]) { if (!init[thr_id]) {
cudaSetDevice(device_map[thr_id]); cudaSetDevice(device_map[thr_id]);
if (opt_cudaschedule == -1 && gpu_threads == 1) {
cudaDeviceReset();
// reduce cpu usage
cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);
CUDA_LOG_ERROR();
}
CUDA_CALL_OR_RET_X(cudaMalloc(&d_hash[thr_id], (size_t) 64 * throughput), -1); CUDA_CALL_OR_RET_X(cudaMalloc(&d_hash[thr_id], (size_t) 64 * throughput), -1);

6
x15/x14.cu

@ -141,6 +141,12 @@ extern "C" int scanhash_x14(int thr_id, struct work* work, uint32_t max_nonce,
if (!init[thr_id]) if (!init[thr_id])
{ {
cudaSetDevice(device_map[thr_id]); cudaSetDevice(device_map[thr_id]);
if (opt_cudaschedule == -1 && gpu_threads == 1) {
cudaDeviceReset();
// reduce cpu usage
cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);
CUDA_LOG_ERROR();
}
quark_blake512_cpu_init(thr_id, throughput); quark_blake512_cpu_init(thr_id, throughput);
quark_groestl512_cpu_init(thr_id, throughput); quark_groestl512_cpu_init(thr_id, throughput);

5
x17/x17.cu

@ -169,6 +169,11 @@ extern "C" int scanhash_x17(int thr_id, struct work* work, uint32_t max_nonce, u
if (!init[thr_id]) if (!init[thr_id])
{ {
cudaSetDevice(device_map[thr_id]); cudaSetDevice(device_map[thr_id]);
if (opt_cudaschedule == -1 && gpu_threads == 1) {
cudaDeviceReset();
// reduce cpu usage
cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);
}
quark_blake512_cpu_init(thr_id, throughput); quark_blake512_cpu_init(thr_id, throughput);
quark_groestl512_cpu_init(thr_id, throughput); quark_groestl512_cpu_init(thr_id, throughput);

6
zr5.cu

@ -351,6 +351,12 @@ extern "C" int scanhash_zr5(int thr_id, struct work *work,
if (!init[thr_id]) if (!init[thr_id])
{ {
cudaSetDevice(device_map[thr_id]); cudaSetDevice(device_map[thr_id]);
if (opt_cudaschedule == -1 && gpu_threads == 1) {
cudaDeviceReset();
// reduce cpu usage
cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);
CUDA_LOG_ERROR();
}
// constants // constants
cudaMemcpyToSymbol(c_permut, permut, 24*4, 0, cudaMemcpyHostToDevice); cudaMemcpyToSymbol(c_permut, permut, 24*4, 0, cudaMemcpyHostToDevice);

Loading…
Cancel
Save