ccminer/cuda.cpp

#include <stdio.h>
#include <memory.h>
#include <string.h>
#include <unistd.h>
#include <map>

#ifndef _WIN32
#include <unistd.h>
#endif

// include thrust
#ifndef __cplusplus
#include <thrust/version.h>
#include <thrust/remove.h>
#include <thrust/device_vector.h>
#include <thrust/iterator/constant_iterator.h>
#else
#include <ctype.h>
#endif

#include "miner.h"

#include "cuda_runtime.h"

// CUDA Devices on the System
int cuda_num_devices()
{
	int version;
	cudaError_t err = cudaDriverGetVersion(&version);
	if (err != cudaSuccess)
	{
		applog(LOG_ERR, "Unable to query CUDA driver version! Is an nVidia driver installed?");
		exit(1);
	}

	int maj = version / 1000, min = version % 100; // same as in deviceQuery sample
	if (maj < 5 || (maj == 5 && min < 5))
	{
		applog(LOG_ERR, "Driver does not support CUDA %d.%d API! Update your nVidia driver!", 5, 5);
		exit(1);
	}

	int GPU_N;
	err = cudaGetDeviceCount(&GPU_N);
	if (err != cudaSuccess)
	{
		applog(LOG_ERR, "Unable to query number of CUDA devices! Is an nVidia driver installed?");
		exit(1);
	}
	return GPU_N;
}

void cuda_devicenames()
{
	cudaError_t err;
	int GPU_N;
	err = cudaGetDeviceCount(&GPU_N);
	if (err != cudaSuccess)
	{
		applog(LOG_ERR, "Unable to query number of CUDA devices! Is an nVidia driver installed?");
		exit(1);
	}

	GPU_N = min(MAX_GPUS, GPU_N);
	for (int i=0; i < GPU_N; i++)
	{
		cudaDeviceProp props;
		cudaGetDeviceProperties(&props, device_map[i]);

		device_name[i] = strdup(props.name);
		device_sm[i] = (props.major * 100 + props.minor * 10);
	}
}

void cuda_print_devices()
{
	int ngpus = cuda_num_devices();
	for (int n=0; n < ngpus; n++) {
		int m = device_map[n];
		cudaDeviceProp props;
		cudaGetDeviceProperties(&props, m);
		if (!opt_n_threads || n < opt_n_threads)
			fprintf(stderr, "GPU #%d: SM %d.%d %s\n", m, props.major, props.minor, props.name);
	}
}

void cuda_shutdown()
{
	cudaDeviceSynchronize();
	cudaDeviceReset();
}

static bool substringsearch(const char *haystack, const char *needle, int &match)
{
	int hlen = (int) strlen(haystack);
	int nlen = (int) strlen(needle);
	for (int i=0; i < hlen; ++i)
	{
		if (haystack[i] == ' ') continue;
		int j=0, x = 0;
		while(j < nlen)
		{
			if (haystack[i+x] == ' ') {++x; continue;}
			if (needle[j] == ' ') {++j; continue;}
			if (needle[j] == '#') return ++match == needle[j+1]-'0';
			if (tolower(haystack[i+x]) != tolower(needle[j])) break;
			++j; ++x;
		}
		if (j == nlen) return true;
	}
	return false;
}

// CUDA Gerät nach Namen finden (gibt Geräte-Index zurück oder -1)
int cuda_finddevice(char *name)
{
	int num = cuda_num_devices();
	int match = 0;
	for (int i=0; i < num; ++i)
	{
		cudaDeviceProp props;
		if (cudaGetDeviceProperties(&props, i) == cudaSuccess)
			if (substringsearch(props.name, name, match)) return i;
	}
	return -1;
}

uint32_t device_intensity(int thr_id, const char *func, uint32_t defcount)
{
	uint32_t throughput = gpus_intensity[thr_id] ? gpus_intensity[thr_id] : defcount;
	api_set_throughput(thr_id, throughput);
	return throughput;
}

// Zeitsynchronisations-Routine von cudaminer mit CPU sleep
// Note: if you disable all of these calls, CPU usage will hit 100%
typedef struct { double value[8]; } tsumarray;
cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id)
{
	cudaError_t result = cudaSuccess;
	if (situation >= 0)
	{
		static std::map<int, tsumarray> tsum;

		double a = 0.95, b = 0.05;
		if (tsum.find(situation) == tsum.end()) { a = 0.5; b = 0.5; } // faster initial convergence

		double tsync = 0.0;
		double tsleep = 0.95 * tsum[situation].value[thr_id];
		if (cudaStreamQuery(stream) == cudaErrorNotReady)
		{
			usleep((useconds_t)(1e6*tsleep));
			struct timeval tv_start, tv_end;
			gettimeofday(&tv_start, NULL);
			result = cudaStreamSynchronize(stream);
			gettimeofday(&tv_end, NULL);
			tsync = 1e-6 * (tv_end.tv_usec-tv_start.tv_usec) + (tv_end.tv_sec-tv_start.tv_sec);
		}
		if (tsync >= 0) tsum[situation].value[thr_id] = a * tsum[situation].value[thr_id] + b * (tsleep+tsync);
	}
	else
		result = cudaStreamSynchronize(stream);
	return result;
}

int cuda_gpu_clocks(struct cgpu_info *gpu)
{
	cudaDeviceProp props;
	if (cudaGetDeviceProperties(&props, gpu->gpu_id) == cudaSuccess) {
		gpu->gpu_clock = props.clockRate;
		gpu->gpu_memclock = props.memoryClockRate;
		gpu->gpu_mem = props.totalGlobalMem;
		return 0;
	}
	return -1;
}

// if we use 2 threads on the same gpu, we need to reinit the threads
void cuda_reset_device(int thr_id, bool *init)
{
	int dev_id = device_map[thr_id];
	cudaSetDevice(dev_id);
	if (init != NULL) {
		// with init array, its meant to be used in algo's scan code...
		for (int i=0; i < MAX_GPUS; i++) {
			if (device_map[i] == dev_id) {
				init[i] = false;
			}
		}
		// force exit from algo's scan loops/function
		restart_threads();
		cudaDeviceSynchronize();
		while (cudaStreamQuery(NULL) == cudaErrorNotReady)
			usleep(1000);
	}
	cudaDeviceReset();
}

void cudaReportHardwareFailure(int thr_id, cudaError_t err, const char* func)
{
	struct cgpu_info *gpu = &thr_info[thr_id].gpu;
	gpu->hw_errors++;
	applog(LOG_ERR, "GPU #%d: %s %s", device_map[thr_id], func, cudaGetErrorString(err));
	sleep(1);
}
-												Store and display average hashrate (benchmark + on share)

Displayed data is the average of the last 50 scans in the 5 last minutes

Also move cuda common functions in a new file (cuda.cu)

Signed-off-by: Tanguy Pruvot <tanguy.pruvot@gmail.com>

											
										
										
											10 years ago
+								#include <stdio.h>
 								#include <memory.h>
 								#include <string.h>
-												cuda: reduce possible segfaults on exit

not perfect but helps...

											
										
										
											9 years ago
+								#include <unistd.h>
-												Store and display average hashrate (benchmark + on share)

Displayed data is the average of the last 50 scans in the 5 last minutes

Also move cuda common functions in a new file (cuda.cu)

Signed-off-by: Tanguy Pruvot <tanguy.pruvot@gmail.com>

											
										
										
											10 years ago
+								#include <map>
 								#ifndef _WIN32
 								#include <unistd.h>
 								#endif
 								// include thrust
-												Add nvml for GPU monitoring (squashed)

  Based on mwhite73 <marvin.white@gmail.com> implementation

  Linked to the api system

  Also fix Makefile to support standard c++ files
  This prevent nvcc use without device code

Signed-off-by: Tanguy Pruvot <tanguy.pruvot@gmail.com>

											
										
										
											10 years ago
+								#ifndef __cplusplus
-												Store and display average hashrate (benchmark + on share)

Displayed data is the average of the last 50 scans in the 5 last minutes

Also move cuda common functions in a new file (cuda.cu)

Signed-off-by: Tanguy Pruvot <tanguy.pruvot@gmail.com>

											
										
										
											10 years ago
+								#include <thrust/version.h>
 								#include <thrust/remove.h>
 								#include <thrust/device_vector.h>
 								#include <thrust/iterator/constant_iterator.h>
-												Add nvml for GPU monitoring (squashed)

  Based on mwhite73 <marvin.white@gmail.com> implementation

  Linked to the api system

  Also fix Makefile to support standard c++ files
  This prevent nvcc use without device code

Signed-off-by: Tanguy Pruvot <tanguy.pruvot@gmail.com>

											
										
										
											10 years ago
+								#else
 								#include <ctype.h>
 								#endif
-												Store and display average hashrate (benchmark + on share)

Displayed data is the average of the last 50 scans in the 5 last minutes

Also move cuda common functions in a new file (cuda.cu)

Signed-off-by: Tanguy Pruvot <tanguy.pruvot@gmail.com>

											
										
										
											10 years ago
 								#include "miner.h"
-												Add nvml for GPU monitoring (squashed)

  Based on mwhite73 <marvin.white@gmail.com> implementation

  Linked to the api system

  Also fix Makefile to support standard c++ files
  This prevent nvcc use without device code

Signed-off-by: Tanguy Pruvot <tanguy.pruvot@gmail.com>

											
										
										
											10 years ago
+								#include "cuda_runtime.h"
-												Store and display average hashrate (benchmark + on share)

Displayed data is the average of the last 50 scans in the 5 last minutes

Also move cuda common functions in a new file (cuda.cu)

Signed-off-by: Tanguy Pruvot <tanguy.pruvot@gmail.com>

											
										
										
											10 years ago
 								// CUDA Devices on the System
-												ccminer: rename main file and switch to C++

There was a different behavior on linux and visual studio

That was making it hard to link functions correctly

That remove some ifdef / extern "C" requirements

note about x86 releases, x86 nvml.dll is not installed on Windows x64!

											
										
										
											10 years ago
+								int cuda_num_devices()
-												Store and display average hashrate (benchmark + on share)

Displayed data is the average of the last 50 scans in the 5 last minutes

Also move cuda common functions in a new file (cuda.cu)

Signed-off-by: Tanguy Pruvot <tanguy.pruvot@gmail.com>

											
										
										
											10 years ago
+								{
 									int version;
 									cudaError_t err = cudaDriverGetVersion(&version);
 									if (err != cudaSuccess)
 									{
 										applog(LOG_ERR, "Unable to query CUDA driver version! Is an nVidia driver installed?");
 										exit(1);
 									}
 									int maj = version / 1000, min = version % 100; // same as in deviceQuery sample
 									if (maj < 5 || (maj == 5 && min < 5))
 									{
 										applog(LOG_ERR, "Driver does not support CUDA %d.%d API! Update your nVidia driver!", 5, 5);
 										exit(1);
 									}
 									int GPU_N;
 									err = cudaGetDeviceCount(&GPU_N);
 									if (err != cudaSuccess)
 									{
 										applog(LOG_ERR, "Unable to query number of CUDA devices! Is an nVidia driver installed?");
 										exit(1);
 									}
 									return GPU_N;
 								}
-												ccminer: rename main file and switch to C++

There was a different behavior on linux and visual studio

That was making it hard to link functions correctly

That remove some ifdef / extern "C" requirements

note about x86 releases, x86 nvml.dll is not installed on Windows x64!

											
										
										
											10 years ago
+								void cuda_devicenames()
-												Store and display average hashrate (benchmark + on share)

Displayed data is the average of the last 50 scans in the 5 last minutes

Also move cuda common functions in a new file (cuda.cu)

Signed-off-by: Tanguy Pruvot <tanguy.pruvot@gmail.com>

											
										
										
											10 years ago
+								{
 									cudaError_t err;
 									int GPU_N;
 									err = cudaGetDeviceCount(&GPU_N);
 									if (err != cudaSuccess)
 									{
 										applog(LOG_ERR, "Unable to query number of CUDA devices! Is an nVidia driver installed?");
 										exit(1);
 									}
-												cuda: reduce possible segfaults on exit

not perfect but helps...

											
										
										
											9 years ago
+									GPU_N = min(MAX_GPUS, GPU_N);
-												Store and display average hashrate (benchmark + on share)

Displayed data is the average of the last 50 scans in the 5 last minutes

Also move cuda common functions in a new file (cuda.cu)

Signed-off-by: Tanguy Pruvot <tanguy.pruvot@gmail.com>

											
										
										
											10 years ago
+									for (int i=0; i < GPU_N; i++)
 									{
 										cudaDeviceProp props;
 										cudaGetDeviceProperties(&props, device_map[i]);
 										device_name[i] = strdup(props.name);
-												various extern cleanup + api history uids and gpu SM

uids could be useful to create graphes from history data

Note: please do a clean build after this commit (changes in miner.h)

											
										
										
											10 years ago
+										device_sm[i] = (props.major * 100 + props.minor * 10);
-												Store and display average hashrate (benchmark + on share)

Displayed data is the average of the last 50 scans in the 5 last minutes

Also move cuda common functions in a new file (cuda.cu)

Signed-off-by: Tanguy Pruvot <tanguy.pruvot@gmail.com>

											
										
										
											10 years ago
+									}
 								}
-												Add the -n (--ndevs) option like cgminer

											
										
										
											9 years ago
+								void cuda_print_devices()
 								{
 									int ngpus = cuda_num_devices();
 									for (int n=0; n < ngpus; n++) {
 										int m = device_map[n];
 										cudaDeviceProp props;
 										cudaGetDeviceProperties(&props, m);
 										if (!opt_n_threads || n < opt_n_threads)
 											fprintf(stderr, "GPU #%d: SM %d.%d %s\n", m, props.major, props.minor, props.name);
 									}
 								}
-												reset: take care of multi-threaded gpus (-d 0,0)

to be tested... could create problems when reset in a chain like x11...

											
										
										
											9 years ago
+								void cuda_shutdown()
-												Store and display average hashrate (benchmark + on share)

Displayed data is the average of the last 50 scans in the 5 last minutes

Also move cuda common functions in a new file (cuda.cu)

Signed-off-by: Tanguy Pruvot <tanguy.pruvot@gmail.com>

											
										
										
											10 years ago
+								{
-												reset: take care of multi-threaded gpus (-d 0,0)

to be tested... could create problems when reset in a chain like x11...

											
										
										
											9 years ago
+									cudaDeviceSynchronize();
-												Store and display average hashrate (benchmark + on share)

Displayed data is the average of the last 50 scans in the 5 last minutes

Also move cuda common functions in a new file (cuda.cu)

Signed-off-by: Tanguy Pruvot <tanguy.pruvot@gmail.com>

											
										
										
											10 years ago
+									cudaDeviceReset();
 								}
 								static bool substringsearch(const char *haystack, const char *needle, int &match)
 								{
 									int hlen = (int) strlen(haystack);
 									int nlen = (int) strlen(needle);
 									for (int i=0; i < hlen; ++i)
 									{
 										if (haystack[i] == ' ') continue;
 										int j=0, x = 0;
 										while(j < nlen)
 										{
 											if (haystack[i+x] == ' ') {++x; continue;}
 											if (needle[j] == ' ') {++j; continue;}
 											if (needle[j] == '#') return ++match == needle[j+1]-'0';
 											if (tolower(haystack[i+x]) != tolower(needle[j])) break;
 											++j; ++x;
 										}
 										if (j == nlen) return true;
 									}
 									return false;
 								}
 								// CUDA Gerät nach Namen finden (gibt Geräte-Index zurück oder -1)
-												ccminer: rename main file and switch to C++

There was a different behavior on linux and visual studio

That was making it hard to link functions correctly

That remove some ifdef / extern "C" requirements

note about x86 releases, x86 nvml.dll is not installed on Windows x64!

											
										
										
											10 years ago
+								int cuda_finddevice(char *name)
-												Store and display average hashrate (benchmark + on share)

Displayed data is the average of the last 50 scans in the 5 last minutes

Also move cuda common functions in a new file (cuda.cu)

Signed-off-by: Tanguy Pruvot <tanguy.pruvot@gmail.com>

											
										
										
											10 years ago
+								{
 									int num = cuda_num_devices();
 									int match = 0;
 									for (int i=0; i < num; ++i)
 									{
 										cudaDeviceProp props;
 										if (cudaGetDeviceProperties(&props, i) == cudaSuccess)
 											if (substringsearch(props.name, name, match)) return i;
 									}
 									return -1;
 								}
-												Allow different intensity per device

and clean the old variables, no more required

											
										
										
											10 years ago
+								uint32_t device_intensity(int thr_id, const char *func, uint32_t defcount)
 								{
 									uint32_t throughput = gpus_intensity[thr_id] ? gpus_intensity[thr_id] : defcount;
 									api_set_throughput(thr_id, throughput);
 									return throughput;
 								}
-												Store and display average hashrate (benchmark + on share)

Displayed data is the average of the last 50 scans in the 5 last minutes

Also move cuda common functions in a new file (cuda.cu)

Signed-off-by: Tanguy Pruvot <tanguy.pruvot@gmail.com>

											
										
										
											10 years ago
+								// Zeitsynchronisations-Routine von cudaminer mit CPU sleep
-												debug: x11 algo traces for cuda 7 problem

											
										
										
											9 years ago
+								// Note: if you disable all of these calls, CPU usage will hit 100%
-												Store and display average hashrate (benchmark + on share)

Displayed data is the average of the last 50 scans in the 5 last minutes

Also move cuda common functions in a new file (cuda.cu)

Signed-off-by: Tanguy Pruvot <tanguy.pruvot@gmail.com>

											
										
										
											10 years ago
+								typedef struct { double value[8]; } tsumarray;
 								cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id)
 								{
 									cudaError_t result = cudaSuccess;
 									if (situation >= 0)
 									{
 										static std::map<int, tsumarray> tsum;
 										double a = 0.95, b = 0.05;
 										if (tsum.find(situation) == tsum.end()) { a = 0.5; b = 0.5; } // faster initial convergence
 										double tsync = 0.0;
 										double tsleep = 0.95 * tsum[situation].value[thr_id];
 										if (cudaStreamQuery(stream) == cudaErrorNotReady)
 										{
 											usleep((useconds_t)(1e6*tsleep));
 											struct timeval tv_start, tv_end;
 											gettimeofday(&tv_start, NULL);
 											result = cudaStreamSynchronize(stream);
 											gettimeofday(&tv_end, NULL);
 											tsync = 1e-6 * (tv_end.tv_usec-tv_start.tv_usec) + (tv_end.tv_sec-tv_start.tv_sec);
 										}
 										if (tsync >= 0) tsum[situation].value[thr_id] = a * tsum[situation].value[thr_id] + b * (tsleep+tsync);
 									}
 									else
 										result = cudaStreamSynchronize(stream);
 									return result;
 								}
-												Prepare trap of hardware/mem failures

											
										
										
											10 years ago
-												various small changes

heavy: reduce by 256 threads default intensity to all -i 20
cuda: put static thread init bools outside the code (made once)
api: fix nvml header to build without

											
										
										
											10 years ago
+								int cuda_gpu_clocks(struct cgpu_info *gpu)
 								{
 									cudaDeviceProp props;
 									if (cudaGetDeviceProperties(&props, gpu->gpu_id) == cudaSuccess) {
 										gpu->gpu_clock = props.clockRate;
 										gpu->gpu_memclock = props.memoryClockRate;
 										gpu->gpu_mem = props.totalGlobalMem;
 										return 0;
 									}
 									return -1;
 								}
-												reset: take care of multi-threaded gpus (-d 0,0)

to be tested... could create problems when reset in a chain like x11...

											
										
										
											9 years ago
+								// if we use 2 threads on the same gpu, we need to reinit the threads
 								void cuda_reset_device(int thr_id, bool *init)
 								{
 									int dev_id = device_map[thr_id];
-												cuda: reduce possible segfaults on exit

not perfect but helps...

											
										
										
											9 years ago
+									cudaSetDevice(dev_id);
 									if (init != NULL) {
 										// with init array, its meant to be used in algo's scan code...
 										for (int i=0; i < MAX_GPUS; i++) {
 											if (device_map[i] == dev_id) {
 												init[i] = false;
 											}
-												reset: take care of multi-threaded gpus (-d 0,0)

to be tested... could create problems when reset in a chain like x11...

											
										
										
											9 years ago
+										}
-												cuda: reduce possible segfaults on exit

not perfect but helps...

											
										
										
											9 years ago
+										// force exit from algo's scan loops/function
 										restart_threads();
 										cudaDeviceSynchronize();
 										while (cudaStreamQuery(NULL) == cudaErrorNotReady)
 											usleep(1000);
-												reset: take care of multi-threaded gpus (-d 0,0)

to be tested... could create problems when reset in a chain like x11...

											
										
										
											9 years ago
+									}
 									cudaDeviceReset();
 								}
-												Prepare trap of hardware/mem failures

											
										
										
											10 years ago
+								void cudaReportHardwareFailure(int thr_id, cudaError_t err, const char* func)
 								{
 									struct cgpu_info *gpu = &thr_info[thr_id].gpu;
 									gpu->hw_errors++;
 									applog(LOG_ERR, "GPU #%d: %s %s", device_map[thr_id], func, cudaGetErrorString(err));
 									sleep(1);
 								}