Browse Source

1.7.1 release

set schedule flags to reduce linux cpu usage without MyStreamSynchronize()
2upstream
Tanguy Pruvot 8 years ago
parent
commit
a237601747
  1. 11
      Algo256/blake256.cu
  2. 6
      JHA/jackpotcoin.cu
  3. 6
      groestlcoin.cpp
  4. 6
      heavy/heavy.cu
  5. 6
      myriadgroestl.cpp
  6. 7
      pentablake.cu
  7. 6
      quark/quarkcoin.cu
  8. 5
      qubit/deep.cu
  9. 6
      qubit/qubit.cu
  10. 8
      res/ccminer.rc
  11. 8
      skein.cu
  12. 6
      skein2.cpp
  13. 6
      x11/c11.cu
  14. 6
      x13/x13.cu
  15. 6
      x15/whirlpoolx.cu
  16. 6
      x15/x14.cu
  17. 5
      x17/x17.cu
  18. 6
      zr5.cu

11
Algo256/blake256.cu

@ -256,7 +256,7 @@ uint32_t blake256_cpu_hash_80(const int thr_id, const uint32_t threads, const ui @@ -256,7 +256,7 @@ uint32_t blake256_cpu_hash_80(const int thr_id, const uint32_t threads, const ui
return result;
blake256_gpu_hash_80<<<grid, block, shared_size>>>(threads, startNonce, d_resNonce[thr_id], highTarget, crcsum, (int) rounds);
MyStreamSynchronize(NULL, 0, thr_id);
//MyStreamSynchronize(NULL, 0, thr_id);
if (cudaSuccess == cudaMemcpy(h_resNonce[thr_id], d_resNonce[thr_id], NBN*sizeof(uint32_t), cudaMemcpyDeviceToHost)) {
result = h_resNonce[thr_id][0];
for (int n=0; n < (NBN-1); n++)
@ -343,7 +343,7 @@ static uint32_t blake256_cpu_hash_16(const int thr_id, const uint32_t threads, c @@ -343,7 +343,7 @@ static uint32_t blake256_cpu_hash_16(const int thr_id, const uint32_t threads, c
return result;
blake256_gpu_hash_16 <<<grid, block>>> (threads, startNonce, d_resNonce[thr_id], highTarget, (int) rounds, opt_tracegpu);
MyStreamSynchronize(NULL, 0, thr_id);
//MyStreamSynchronize(NULL, 0, thr_id);
if (cudaSuccess == cudaMemcpy(h_resNonce[thr_id], d_resNonce[thr_id], NBN*sizeof(uint32_t), cudaMemcpyDeviceToHost)) {
result = h_resNonce[thr_id][0];
for (int n=0; n < (NBN-1); n++)
@ -413,7 +413,12 @@ extern "C" int scanhash_blake256(int thr_id, struct work* work, uint32_t max_non @@ -413,7 +413,12 @@ extern "C" int scanhash_blake256(int thr_id, struct work* work, uint32_t max_non
if (!init[thr_id]) {
cudaSetDevice(device_map[thr_id]);
CUDA_LOG_ERROR();
if (opt_cudaschedule == -1 && gpu_threads == 1) {
cudaDeviceReset();
// reduce cpu usage (linux)
cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);
CUDA_LOG_ERROR();
}
cudaMallocHost(&h_resNonce[thr_id], NBN * sizeof(uint32_t));
cudaMalloc(&d_resNonce[thr_id], NBN * sizeof(uint32_t));

6
JHA/jackpotcoin.cu

@ -94,6 +94,12 @@ extern "C" int scanhash_jackpot(int thr_id, struct work *work, uint32_t max_nonc @@ -94,6 +94,12 @@ extern "C" int scanhash_jackpot(int thr_id, struct work *work, uint32_t max_nonc
if (!init[thr_id])
{
cudaSetDevice(dev_id);
if (opt_cudaschedule == -1 && gpu_threads == 1) {
cudaDeviceReset();
// reduce cpu usage
cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);
CUDA_LOG_ERROR();
}
cuda_get_arch(thr_id);
if (device_sm[dev_id] < 300 || cuda_arch[dev_id] < 300) {
gpulog(LOG_ERR, thr_id, "Sorry, This algo is not supported by this GPU arch (SM 3.0 required)");

6
groestlcoin.cpp

@ -44,6 +44,12 @@ int scanhash_groestlcoin(int thr_id, struct work *work, uint32_t max_nonce, unsi @@ -44,6 +44,12 @@ int scanhash_groestlcoin(int thr_id, struct work *work, uint32_t max_nonce, unsi
if (!init[thr_id])
{
cudaSetDevice(device_map[thr_id]);
if (opt_cudaschedule == -1 && gpu_threads == 1) {
cudaDeviceReset();
// reduce cpu usage
cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);
CUDA_LOG_ERROR();
}
CUDA_LOG_ERROR();
groestlcoin_cpu_init(thr_id, throughput);
init[thr_id] = true;

6
heavy/heavy.cu

@ -172,6 +172,12 @@ int scanhash_heavy(int thr_id, struct work *work, uint32_t max_nonce, unsigned l @@ -172,6 +172,12 @@ int scanhash_heavy(int thr_id, struct work *work, uint32_t max_nonce, unsigned l
if (!init[thr_id])
{
cudaSetDevice(device_map[thr_id]);
if (opt_cudaschedule == -1 && gpu_threads == 1) {
cudaDeviceReset();
// reduce cpu usage
cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);
CUDA_LOG_ERROR();
}
hefty_cpu_init(thr_id, throughput);
sha256_cpu_init(thr_id, throughput);

6
myriadgroestl.cpp

@ -49,6 +49,12 @@ int scanhash_myriad(int thr_id, struct work *work, uint32_t max_nonce, unsigned @@ -49,6 +49,12 @@ int scanhash_myriad(int thr_id, struct work *work, uint32_t max_nonce, unsigned
if(!init[thr_id])
{
cudaSetDevice(device_map[thr_id]);
if (opt_cudaschedule == -1 && gpu_threads == 1) {
cudaDeviceReset();
// reduce cpu usage
cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);
CUDA_LOG_ERROR();
}
myriadgroestl_cpu_init(thr_id, throughput);
init[thr_id] = true;
}

7
pentablake.cu

@ -63,7 +63,12 @@ extern "C" int scanhash_pentablake(int thr_id, struct work *work, uint32_t max_n @@ -63,7 +63,12 @@ extern "C" int scanhash_pentablake(int thr_id, struct work *work, uint32_t max_n
if (!init[thr_id]) {
cudaSetDevice(device_map[thr_id]);
CUDA_LOG_ERROR();
if (opt_cudaschedule == -1 && gpu_threads == 1) {
cudaDeviceReset();
// reduce cpu usage
cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);
CUDA_LOG_ERROR();
}
CUDA_SAFE_CALL(cudaMalloc(&d_hash[thr_id], (size_t) 64 * throughput));

6
quark/quarkcoin.cu

@ -139,6 +139,12 @@ extern "C" int scanhash_quark(int thr_id, struct work* work, uint32_t max_nonce, @@ -139,6 +139,12 @@ extern "C" int scanhash_quark(int thr_id, struct work* work, uint32_t max_nonce,
if (!init[thr_id])
{
cudaSetDevice(dev_id);
if (opt_cudaschedule == -1 && gpu_threads == 1) {
cudaDeviceReset();
// reduce cpu usage
cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);
CUDA_LOG_ERROR();
}
cudaGetLastError();
CUDA_SAFE_CALL(cudaMalloc(&d_hash[thr_id], (size_t) 64 * throughput));

5
qubit/deep.cu

@ -62,6 +62,11 @@ extern "C" int scanhash_deep(int thr_id, struct work* work, uint32_t max_nonce, @@ -62,6 +62,11 @@ extern "C" int scanhash_deep(int thr_id, struct work* work, uint32_t max_nonce,
if (!init[thr_id])
{
cudaSetDevice(device_map[thr_id]);
if (opt_cudaschedule == -1 && gpu_threads == 1) {
cudaDeviceReset();
// reduce cpu usage
cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);
}
CUDA_LOG_ERROR();
CUDA_SAFE_CALL(cudaMalloc(&d_hash[thr_id], (size_t) 64 * throughput));

6
qubit/qubit.cu

@ -73,6 +73,12 @@ extern "C" int scanhash_qubit(int thr_id, struct work* work, uint32_t max_nonce, @@ -73,6 +73,12 @@ extern "C" int scanhash_qubit(int thr_id, struct work* work, uint32_t max_nonce,
if (!init[thr_id])
{
cudaSetDevice(device_map[thr_id]);
if (opt_cudaschedule == -1 && gpu_threads == 1) {
cudaDeviceReset();
// reduce cpu usage
cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);
CUDA_LOG_ERROR();
}
qubit_luffa512_cpu_init(thr_id, throughput);
x11_cubehash512_cpu_init(thr_id, throughput);

8
res/ccminer.rc

@ -60,8 +60,8 @@ IDI_ICON1 ICON "ccminer.ico" @@ -60,8 +60,8 @@ IDI_ICON1 ICON "ccminer.ico"
//
VS_VERSION_INFO VERSIONINFO
FILEVERSION 1,7,0,0
PRODUCTVERSION 1,7,0,0
FILEVERSION 1,7,1,0
PRODUCTVERSION 1,7,1,0
FILEFLAGSMASK 0x3fL
#ifdef _DEBUG
FILEFLAGS 0x21L
@ -76,10 +76,10 @@ BEGIN @@ -76,10 +76,10 @@ BEGIN
BEGIN
BLOCK "040904e4"
BEGIN
VALUE "FileVersion", "1.7"
VALUE "FileVersion", "1.7.1"
VALUE "LegalCopyright", "Copyright (C) 2015"
VALUE "ProductName", "ccminer"
VALUE "ProductVersion", "1.7"
VALUE "ProductVersion", "1.7.1"
END
END
BLOCK "VarFileInfo"

8
skein.cu

@ -22,7 +22,7 @@ extern void skeincoin_free(int thr_id); @@ -22,7 +22,7 @@ extern void skeincoin_free(int thr_id);
extern void skeincoin_setBlock_80(int thr_id, void *pdata);
extern uint32_t skeincoin_hash_sm5(int thr_id, uint32_t threads, uint32_t startNounce, int swap, uint64_t target64, uint32_t *secNonce);
static __device__ __constant__ uint32_t sha256_hashTable[] = {
static __device__ uint32_t sha256_hashTable[] = {
0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a, 0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19
};
@ -372,6 +372,12 @@ extern "C" int scanhash_skeincoin(int thr_id, struct work* work, uint32_t max_no @@ -372,6 +372,12 @@ extern "C" int scanhash_skeincoin(int thr_id, struct work* work, uint32_t max_no
if (!init[thr_id])
{
cudaSetDevice(device_map[thr_id]);
if (opt_cudaschedule == -1 && gpu_threads == 1) {
cudaDeviceReset();
// reduce cpu usage
cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);
CUDA_LOG_ERROR();
}
if (sm5) {
skeincoin_init(thr_id);

6
skein2.cpp

@ -53,6 +53,12 @@ int scanhash_skein2(int thr_id, struct work* work, uint32_t max_nonce, unsigned @@ -53,6 +53,12 @@ int scanhash_skein2(int thr_id, struct work* work, uint32_t max_nonce, unsigned
if (!init[thr_id])
{
cudaSetDevice(dev_id);
if (opt_cudaschedule == -1 && gpu_threads == 1) {
cudaDeviceReset();
// reduce cpu usage
cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);
CUDA_LOG_ERROR();
}
cudaMalloc(&d_hash[thr_id], (size_t) 64 * throughput);

6
x11/c11.cu

@ -119,6 +119,12 @@ extern "C" int scanhash_c11(int thr_id, struct work* work, uint32_t max_nonce, u @@ -119,6 +119,12 @@ extern "C" int scanhash_c11(int thr_id, struct work* work, uint32_t max_nonce, u
if (!init[thr_id])
{
cudaSetDevice(device_map[thr_id]);
if (opt_cudaschedule == -1 && gpu_threads == 1) {
cudaDeviceReset();
// reduce cpu usage
cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);
CUDA_LOG_ERROR();
}
quark_blake512_cpu_init(thr_id, throughput);
quark_bmw512_cpu_init(thr_id, throughput);

6
x13/x13.cu

@ -127,6 +127,12 @@ extern "C" int scanhash_x13(int thr_id, struct work* work, uint32_t max_nonce, u @@ -127,6 +127,12 @@ extern "C" int scanhash_x13(int thr_id, struct work* work, uint32_t max_nonce, u
if (!init[thr_id])
{
cudaSetDevice(device_map[thr_id]);
if (opt_cudaschedule == -1 && gpu_threads == 1) {
cudaDeviceReset();
// reduce cpu usage
cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);
CUDA_LOG_ERROR();
}
quark_blake512_cpu_init(thr_id, throughput);
quark_groestl512_cpu_init(thr_id, throughput);

6
x15/whirlpoolx.cu

@ -53,6 +53,12 @@ extern "C" int scanhash_whirlx(int thr_id, struct work* work, uint32_t max_nonc @@ -53,6 +53,12 @@ extern "C" int scanhash_whirlx(int thr_id, struct work* work, uint32_t max_nonc
if (!init[thr_id]) {
cudaSetDevice(device_map[thr_id]);
if (opt_cudaschedule == -1 && gpu_threads == 1) {
cudaDeviceReset();
// reduce cpu usage
cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);
CUDA_LOG_ERROR();
}
CUDA_CALL_OR_RET_X(cudaMalloc(&d_hash[thr_id], (size_t) 64 * throughput), -1);

6
x15/x14.cu

@ -141,6 +141,12 @@ extern "C" int scanhash_x14(int thr_id, struct work* work, uint32_t max_nonce, @@ -141,6 +141,12 @@ extern "C" int scanhash_x14(int thr_id, struct work* work, uint32_t max_nonce,
if (!init[thr_id])
{
cudaSetDevice(device_map[thr_id]);
if (opt_cudaschedule == -1 && gpu_threads == 1) {
cudaDeviceReset();
// reduce cpu usage
cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);
CUDA_LOG_ERROR();
}
quark_blake512_cpu_init(thr_id, throughput);
quark_groestl512_cpu_init(thr_id, throughput);

5
x17/x17.cu

@ -169,6 +169,11 @@ extern "C" int scanhash_x17(int thr_id, struct work* work, uint32_t max_nonce, u @@ -169,6 +169,11 @@ extern "C" int scanhash_x17(int thr_id, struct work* work, uint32_t max_nonce, u
if (!init[thr_id])
{
cudaSetDevice(device_map[thr_id]);
if (opt_cudaschedule == -1 && gpu_threads == 1) {
cudaDeviceReset();
// reduce cpu usage
cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);
}
quark_blake512_cpu_init(thr_id, throughput);
quark_groestl512_cpu_init(thr_id, throughput);

6
zr5.cu

@ -351,6 +351,12 @@ extern "C" int scanhash_zr5(int thr_id, struct work *work, @@ -351,6 +351,12 @@ extern "C" int scanhash_zr5(int thr_id, struct work *work,
if (!init[thr_id])
{
cudaSetDevice(device_map[thr_id]);
if (opt_cudaschedule == -1 && gpu_threads == 1) {
cudaDeviceReset();
// reduce cpu usage
cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);
CUDA_LOG_ERROR();
}
// constants
cudaMemcpyToSymbol(c_permut, permut, 24*4, 0, cudaMemcpyHostToDevice);

Loading…
Cancel
Save