diff --git a/api.cpp b/api.cpp index 44320de..f15f117 100644 --- a/api.cpp +++ b/api.cpp @@ -101,8 +101,7 @@ extern int num_cpus; extern float cpu_temp(int); extern uint32_t cpu_clock(int); // cuda.cpp -int cuda_num_devices(); -int cuda_gpu_clocks(struct cgpu_info *gpu); +extern int cuda_gpu_clocks(struct cgpu_info *gpu); char driver_version[32] = { 0 }; diff --git a/cuda.cpp b/cuda.cpp index 64dd2ac..4c8d748 100644 --- a/cuda.cpp +++ b/cuda.cpp @@ -23,6 +23,11 @@ #include "cuda_runtime.h" +#ifdef __cplusplus +/* miner.h functions are declared in C type, not C++ */ +extern "C" { +#endif + // CUDA Devices on the System int cuda_num_devices() { @@ -150,49 +155,6 @@ uint32_t device_intensity(int thr_id, const char *func, uint32_t defcount) return throughput; } -// Zeitsynchronisations-Routine von cudaminer mit CPU sleep -// Note: if you disable all of these calls, CPU usage will hit 100% -typedef struct { double value[8]; } tsumarray; -cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id) -{ - cudaError_t result = cudaSuccess; - if (situation >= 0) - { - static std::map tsum; - - double a = 0.95, b = 0.05; - if (tsum.find(situation) == tsum.end()) { a = 0.5; b = 0.5; } // faster initial convergence - - double tsync = 0.0; - double tsleep = 0.95 * tsum[situation].value[thr_id]; - if (cudaStreamQuery(stream) == cudaErrorNotReady) - { - usleep((useconds_t)(1e6*tsleep)); - struct timeval tv_start, tv_end; - gettimeofday(&tv_start, NULL); - result = cudaStreamSynchronize(stream); - gettimeofday(&tv_end, NULL); - tsync = 1e-6 * (tv_end.tv_usec-tv_start.tv_usec) + (tv_end.tv_sec-tv_start.tv_sec); - } - if (tsync >= 0) tsum[situation].value[thr_id] = a * tsum[situation].value[thr_id] + b * (tsleep+tsync); - } - else - result = cudaStreamSynchronize(stream); - return result; -} - -int cuda_gpu_clocks(struct cgpu_info *gpu) -{ - cudaDeviceProp props; - if (cudaGetDeviceProperties(&props, gpu->gpu_id) == cudaSuccess) { - gpu->gpu_clock = props.clockRate; - gpu->gpu_memclock = props.memoryClockRate; - gpu->gpu_mem = props.totalGlobalMem; - return 0; - } - return -1; -} - // if we use 2 threads on the same gpu, we need to reinit the threads void cuda_reset_device(int thr_id, bool *init) { @@ -228,6 +190,53 @@ int cuda_available_memory(int thr_id) return (int) (mfree / (1024 * 1024)); } +#ifdef __cplusplus +} /* extern "C" */ +#endif + +int cuda_gpu_clocks(struct cgpu_info *gpu) +{ + cudaDeviceProp props; + if (cudaGetDeviceProperties(&props, gpu->gpu_id) == cudaSuccess) { + gpu->gpu_clock = props.clockRate; + gpu->gpu_memclock = props.memoryClockRate; + gpu->gpu_mem = props.totalGlobalMem; + return 0; + } + return -1; +} + +// Zeitsynchronisations-Routine von cudaminer mit CPU sleep +// Note: if you disable all of these calls, CPU usage will hit 100% +typedef struct { double value[8]; } tsumarray; +cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id) +{ + cudaError_t result = cudaSuccess; + if (situation >= 0) + { + static std::map tsum; + + double a = 0.95, b = 0.05; + if (tsum.find(situation) == tsum.end()) { a = 0.5; b = 0.5; } // faster initial convergence + + double tsync = 0.0; + double tsleep = 0.95 * tsum[situation].value[thr_id]; + if (cudaStreamQuery(stream) == cudaErrorNotReady) + { + usleep((useconds_t)(1e6*tsleep)); + struct timeval tv_start, tv_end; + gettimeofday(&tv_start, NULL); + result = cudaStreamSynchronize(stream); + gettimeofday(&tv_end, NULL); + tsync = 1e-6 * (tv_end.tv_usec-tv_start.tv_usec) + (tv_end.tv_sec-tv_start.tv_sec); + } + if (tsync >= 0) tsum[situation].value[thr_id] = a * tsum[situation].value[thr_id] + b * (tsleep+tsync); + } + else + result = cudaStreamSynchronize(stream); + return result; +} + void cudaReportHardwareFailure(int thr_id, cudaError_t err, const char* func) { struct cgpu_info *gpu = &thr_info[thr_id].gpu; diff --git a/miner.h b/miner.h index 4058b83..a4dc10b 100644 --- a/miner.h +++ b/miner.h @@ -39,9 +39,6 @@ extern "C" { # include # define alloca _alloca # elif !defined HAVE_ALLOCA -# ifdef __cplusplus -extern "C" -# endif void *alloca (size_t); # endif #endif @@ -483,7 +480,7 @@ extern long device_sm[MAX_GPUS]; extern uint32_t gpus_intensity[MAX_GPUS]; extern int opt_cudaschedule; -// decl. from cuda.cpp +// cuda.cpp int cuda_num_devices(); void cuda_devicenames(); void cuda_reset_device(int thr_id, bool *init); diff --git a/nvml.cpp b/nvml.cpp index 417a86a..2691057 100644 --- a/nvml.cpp +++ b/nvml.cpp @@ -25,9 +25,6 @@ #include "nvml.h" #include "cuda_runtime.h" -// cuda.cpp -int cuda_num_devices(); - #ifdef USE_WRAPNVML extern nvml_handle *hnvml;