ccminer/cuda.cpp

#include <stdio.h>
#include <memory.h>
#include <string.h>
#include <unistd.h>
#include <map>

// include thrust
#ifndef __cplusplus
#include <thrust/version.h>
#include <thrust/remove.h>
#include <thrust/device_vector.h>
#include <thrust/iterator/constant_iterator.h>
#else
#include <ctype.h>
#endif

#include "miner.h"
#include "nvml.h"

#include "cuda_runtime.h"

#ifdef __cplusplus
/* miner.h functions are declared in C type, not C++ */
extern "C" {
#endif

// CUDA Devices on the System
int cuda_num_devices()
{
	int version;
	cudaError_t err = cudaDriverGetVersion(&version);
	if (err != cudaSuccess)
	{
		applog(LOG_ERR, "Unable to query CUDA driver version! Is an nVidia driver installed?");
		exit(1);
	}

	int maj = version / 1000, min = version % 100; // same as in deviceQuery sample
	if (maj < 5 || (maj == 5 && min < 5))
	{
		applog(LOG_ERR, "Driver does not support CUDA %d.%d API! Update your nVidia driver!", 5, 5);
		exit(1);
	}

	int GPU_N;
	err = cudaGetDeviceCount(&GPU_N);
	if (err != cudaSuccess)
	{
		applog(LOG_ERR, "Unable to query number of CUDA devices! Is an nVidia driver installed?");
		exit(1);
	}
	return GPU_N;
}

void cuda_devicenames()
{
	cudaError_t err;
	int GPU_N;
	err = cudaGetDeviceCount(&GPU_N);
	if (err != cudaSuccess)
	{
		applog(LOG_ERR, "Unable to query number of CUDA devices! Is an nVidia driver installed?");
		exit(1);
	}

	if (opt_n_threads)
		GPU_N = min(MAX_GPUS, opt_n_threads);
	for (int i=0; i < GPU_N; i++)
	{
		char vendorname[32] = { 0 };
		cudaDeviceProp props;
		cudaGetDeviceProperties(&props, device_map[i]);

		device_sm[i] = (props.major * 100 + props.minor * 10);

		if (device_name[i]) {
			free(device_name[i]);
			device_name[i] = NULL;
		}
#ifdef USE_WRAPNVML
		if (gpu_vendor((uint8_t)props.pciBusID, vendorname) > 0 && strlen(vendorname)) {
			device_name[i] = (char*) calloc(1, strlen(vendorname) + strlen(props.name) + 2);
			if (!strncmp(props.name, "GeForce ", 8))
				sprintf(device_name[i], "%s %s", vendorname, &props.name[8]);
			else
				sprintf(device_name[i], "%s %s", vendorname, props.name);
		} else
#endif
			device_name[i] = strdup(props.name);
	}
}

void cuda_print_devices()
{
	int ngpus = cuda_num_devices();
	cuda_devicenames();
	for (int n=0; n < ngpus; n++) {
		int m = device_map[n % MAX_GPUS];
		cudaDeviceProp props;
		cudaGetDeviceProperties(&props, m);
		if (!opt_n_threads || n < opt_n_threads) {
			fprintf(stderr, "GPU #%d: SM %d.%d %s\n", m, props.major, props.minor, device_name[n]);
		}
	}
}

void cuda_shutdown()
{
	cudaDeviceSynchronize();
	cudaDeviceReset();
}

static bool substringsearch(const char *haystack, const char *needle, int &match)
{
	int hlen = (int) strlen(haystack);
	int nlen = (int) strlen(needle);
	for (int i=0; i < hlen; ++i)
	{
		if (haystack[i] == ' ') continue;
		int j=0, x = 0;
		while(j < nlen)
		{
			if (haystack[i+x] == ' ') {++x; continue;}
			if (needle[j] == ' ') {++j; continue;}
			if (needle[j] == '#') return ++match == needle[j+1]-'0';
			if (tolower(haystack[i+x]) != tolower(needle[j])) break;
			++j; ++x;
		}
		if (j == nlen) return true;
	}
	return false;
}

// CUDA Gerät nach Namen finden (gibt Geräte-Index zurück oder -1)
int cuda_finddevice(char *name)
{
	int num = cuda_num_devices();
	int match = 0;
	for (int i=0; i < num; ++i)
	{
		cudaDeviceProp props;
		if (cudaGetDeviceProperties(&props, i) == cudaSuccess)
			if (substringsearch(props.name, name, match)) return i;
	}
	return -1;
}

// since 1.7
uint32_t cuda_default_throughput(int thr_id, uint32_t defcount)
{
	//int dev_id = device_map[thr_id % MAX_GPUS];
	uint32_t throughput = gpus_intensity[thr_id] ? gpus_intensity[thr_id] : defcount;
	if (gpu_threads > 1 && throughput == defcount) throughput /= (gpu_threads-1);
	if (api_thr_id != -1) api_set_throughput(thr_id, throughput);
	//gpulog(LOG_INFO, thr_id, "throughput %u", throughput);
	return throughput;
}

// if we use 2 threads on the same gpu, we need to reinit the threads
void cuda_reset_device(int thr_id, bool *init)
{
	int dev_id = device_map[thr_id % MAX_GPUS];
	cudaSetDevice(dev_id);
	if (init != NULL) {
		// with init array, its meant to be used in algo's scan code...
		for (int i=0; i < MAX_GPUS; i++) {
			if (device_map[i] == dev_id) {
				init[i] = false;
			}
		}
		// force exit from algo's scan loops/function
		restart_threads();
		cudaDeviceSynchronize();
		while (cudaStreamQuery(NULL) == cudaErrorNotReady)
			usleep(1000);
	}
	cudaDeviceReset();
	if (opt_cudaschedule >= 0) {
		cudaSetDevice(dev_id);
		cudaSetDeviceFlags((unsigned)(opt_cudaschedule & cudaDeviceScheduleMask));
	}
}

// return free memory in megabytes
int cuda_available_memory(int thr_id)
{
	int dev_id = device_map[thr_id % MAX_GPUS];
	size_t mtotal, mfree = 0;
	cudaSetDevice(dev_id);
	cudaMemGetInfo(&mfree, &mtotal);
	return (int) (mfree / (1024 * 1024));
}

// Check (and reset) last cuda error, and report it in logs
void cuda_log_lasterror(int thr_id, const char* func, int line)
{
	cudaError_t err = cudaGetLastError();
	if (err != cudaSuccess && !opt_quiet)
		gpulog(LOG_WARNING, thr_id, "%s:%d %s", func, line, cudaGetErrorString(err));
}

#ifdef __cplusplus
} /* extern "C" */
#endif

int cuda_gpu_clocks(struct cgpu_info *gpu)
{
	cudaDeviceProp props;
	if (cudaGetDeviceProperties(&props, gpu->gpu_id) == cudaSuccess) {
		gpu->gpu_clock = props.clockRate;
		gpu->gpu_memclock = props.memoryClockRate;
		gpu->gpu_mem = props.totalGlobalMem;
		return 0;
	}
	return -1;
}

// Zeitsynchronisations-Routine von cudaminer mit CPU sleep
// Note: if you disable all of these calls, CPU usage will hit 100%
typedef struct { double value[8]; } tsumarray;
cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id)
{
	cudaError_t result = cudaSuccess;
	if (situation >= 0)
	{
		static std::map<int, tsumarray> tsum;

		double a = 0.95, b = 0.05;
		if (tsum.find(situation) == tsum.end()) { a = 0.5; b = 0.5; } // faster initial convergence

		double tsync = 0.0;
		double tsleep = 0.95 * tsum[situation].value[thr_id];
		if (cudaStreamQuery(stream) == cudaErrorNotReady)
		{
			usleep((useconds_t)(1e6*tsleep));
			struct timeval tv_start, tv_end;
			gettimeofday(&tv_start, NULL);
			result = cudaStreamSynchronize(stream);
			gettimeofday(&tv_end, NULL);
			tsync = 1e-6 * (tv_end.tv_usec-tv_start.tv_usec) + (tv_end.tv_sec-tv_start.tv_sec);
		}
		if (tsync >= 0) tsum[situation].value[thr_id] = a * tsum[situation].value[thr_id] + b * (tsleep+tsync);
	}
	else
		result = cudaStreamSynchronize(stream);
	return result;
}

void cudaReportHardwareFailure(int thr_id, cudaError_t err, const char* func)
{
	struct cgpu_info *gpu = &thr_info[thr_id].gpu;
	gpu->hw_errors++;
	gpulog(LOG_ERR, thr_id, "%s %s", func, cudaGetErrorString(err));
	sleep(1);
}
-												Store and display average hashrate (benchmark + on share)

Displayed data is the average of the last 50 scans in the 5 last minutes

Also move cuda common functions in a new file (cuda.cu)

Signed-off-by: Tanguy Pruvot <tanguy.pruvot@gmail.com>

											
										
										
											10 years ago
+								#include <stdio.h>
 								#include <memory.h>
 								#include <string.h>
-												cuda: reduce possible segfaults on exit

not perfect but helps...

											
										
										
											10 years ago
+								#include <unistd.h>
-												Store and display average hashrate (benchmark + on share)

Displayed data is the average of the last 50 scans in the 5 last minutes

Also move cuda common functions in a new file (cuda.cu)

Signed-off-by: Tanguy Pruvot <tanguy.pruvot@gmail.com>

											
										
										
											10 years ago
+								#include <map>
 								// include thrust
-												Add nvml for GPU monitoring (squashed)

  Based on mwhite73 <marvin.white@gmail.com> implementation

  Linked to the api system

  Also fix Makefile to support standard c++ files
  This prevent nvcc use without device code

Signed-off-by: Tanguy Pruvot <tanguy.pruvot@gmail.com>

											
										
										
											10 years ago
+								#ifndef __cplusplus
-												Store and display average hashrate (benchmark + on share)

Displayed data is the average of the last 50 scans in the 5 last minutes

Also move cuda common functions in a new file (cuda.cu)

Signed-off-by: Tanguy Pruvot <tanguy.pruvot@gmail.com>

											
										
										
											10 years ago
+								#include <thrust/version.h>
 								#include <thrust/remove.h>
 								#include <thrust/device_vector.h>
 								#include <thrust/iterator/constant_iterator.h>
-												Add nvml for GPU monitoring (squashed)

  Based on mwhite73 <marvin.white@gmail.com> implementation

  Linked to the api system

  Also fix Makefile to support standard c++ files
  This prevent nvcc use without device code

Signed-off-by: Tanguy Pruvot <tanguy.pruvot@gmail.com>

											
										
										
											10 years ago
+								#else
 								#include <ctype.h>
 								#endif
-												Store and display average hashrate (benchmark + on share)

Displayed data is the average of the last 50 scans in the 5 last minutes

Also move cuda common functions in a new file (cuda.cu)

Signed-off-by: Tanguy Pruvot <tanguy.pruvot@gmail.com>

											
										
										
											10 years ago
 								#include "miner.h"
-												nvml: get devices vendor names with libpci

made for linux and require libpci-dev (optional)

if libpci is not installed, card's vendor names are not handled...

Note: only a few vendor names were added, common GeForce vendors.

Signed-off-by: Tanguy Pruvot <tanguy.pruvot@gmail.com>

											
										
										
											10 years ago
+								#include "nvml.h"
-												Store and display average hashrate (benchmark + on share)

Displayed data is the average of the last 50 scans in the 5 last minutes

Also move cuda common functions in a new file (cuda.cu)

Signed-off-by: Tanguy Pruvot <tanguy.pruvot@gmail.com>

											
										
										
											10 years ago
-												Add nvml for GPU monitoring (squashed)

  Based on mwhite73 <marvin.white@gmail.com> implementation

  Linked to the api system

  Also fix Makefile to support standard c++ files
  This prevent nvcc use without device code

Signed-off-by: Tanguy Pruvot <tanguy.pruvot@gmail.com>

											
										
										
											10 years ago
+								#include "cuda_runtime.h"
-												Store and display average hashrate (benchmark + on share)

Displayed data is the average of the last 50 scans in the 5 last minutes

Also move cuda common functions in a new file (cuda.cu)

Signed-off-by: Tanguy Pruvot <tanguy.pruvot@gmail.com>

											
										
										
											10 years ago
-												Fix windows linkage, C/C++ mismatch

											
										
										
											9 years ago
+								#ifdef __cplusplus
 								/* miner.h functions are declared in C type, not C++ */
 								extern "C" {
 								#endif
-												Store and display average hashrate (benchmark + on share)

Displayed data is the average of the last 50 scans in the 5 last minutes

Also move cuda common functions in a new file (cuda.cu)

Signed-off-by: Tanguy Pruvot <tanguy.pruvot@gmail.com>

											
										
										
											10 years ago
+								// CUDA Devices on the System
-												ccminer: rename main file and switch to C++

There was a different behavior on linux and visual studio

That was making it hard to link functions correctly

That remove some ifdef / extern "C" requirements

note about x86 releases, x86 nvml.dll is not installed on Windows x64!

											
										
										
											10 years ago
+								int cuda_num_devices()
-												Store and display average hashrate (benchmark + on share)

Displayed data is the average of the last 50 scans in the 5 last minutes

Also move cuda common functions in a new file (cuda.cu)

Signed-off-by: Tanguy Pruvot <tanguy.pruvot@gmail.com>

											
										
										
											10 years ago
+								{
 									int version;
 									cudaError_t err = cudaDriverGetVersion(&version);
 									if (err != cudaSuccess)
 									{
 										applog(LOG_ERR, "Unable to query CUDA driver version! Is an nVidia driver installed?");
 										exit(1);
 									}
 									int maj = version / 1000, min = version % 100; // same as in deviceQuery sample
 									if (maj < 5 || (maj == 5 && min < 5))
 									{
 										applog(LOG_ERR, "Driver does not support CUDA %d.%d API! Update your nVidia driver!", 5, 5);
 										exit(1);
 									}
 									int GPU_N;
 									err = cudaGetDeviceCount(&GPU_N);
 									if (err != cudaSuccess)
 									{
 										applog(LOG_ERR, "Unable to query number of CUDA devices! Is an nVidia driver installed?");
 										exit(1);
 									}
 									return GPU_N;
 								}
-												ccminer: rename main file and switch to C++

There was a different behavior on linux and visual studio

That was making it hard to link functions correctly

That remove some ifdef / extern "C" requirements

note about x86 releases, x86 nvml.dll is not installed on Windows x64!

											
										
										
											10 years ago
+								void cuda_devicenames()
-												Store and display average hashrate (benchmark + on share)

Displayed data is the average of the last 50 scans in the 5 last minutes

Also move cuda common functions in a new file (cuda.cu)

Signed-off-by: Tanguy Pruvot <tanguy.pruvot@gmail.com>

											
										
										
											10 years ago
+								{
 									cudaError_t err;
 									int GPU_N;
 									err = cudaGetDeviceCount(&GPU_N);
 									if (err != cudaSuccess)
 									{
 										applog(LOG_ERR, "Unable to query number of CUDA devices! Is an nVidia driver installed?");
 										exit(1);
 									}
-												benchmark: show mem and default throughput in results

and prepare a new function to get the default intensity

also, take care of multiple threads per gpu...

											
										
										
											9 years ago
+									if (opt_n_threads)
 										GPU_N = min(MAX_GPUS, opt_n_threads);
-												Store and display average hashrate (benchmark + on share)

Displayed data is the average of the last 50 scans in the 5 last minutes

Also move cuda common functions in a new file (cuda.cu)

Signed-off-by: Tanguy Pruvot <tanguy.pruvot@gmail.com>

											
										
										
											10 years ago
+									for (int i=0; i < GPU_N; i++)
 									{
-												nvml: get devices vendor names with libpci

made for linux and require libpci-dev (optional)

if libpci is not installed, card's vendor names are not handled...

Note: only a few vendor names were added, common GeForce vendors.

Signed-off-by: Tanguy Pruvot <tanguy.pruvot@gmail.com>

											
										
										
											10 years ago
+										char vendorname[32] = { 0 };
-												Store and display average hashrate (benchmark + on share)

Displayed data is the average of the last 50 scans in the 5 last minutes

Also move cuda common functions in a new file (cuda.cu)

Signed-off-by: Tanguy Pruvot <tanguy.pruvot@gmail.com>

											
										
										
											10 years ago
+										cudaDeviceProp props;
 										cudaGetDeviceProperties(&props, device_map[i]);
-												various extern cleanup + api history uids and gpu SM

uids could be useful to create graphes from history data

Note: please do a clean build after this commit (changes in miner.h)

											
										
										
											10 years ago
+										device_sm[i] = (props.major * 100 + props.minor * 10);
-												nvml: get devices vendor names with libpci

made for linux and require libpci-dev (optional)

if libpci is not installed, card's vendor names are not handled...

Note: only a few vendor names were added, common GeForce vendors.

Signed-off-by: Tanguy Pruvot <tanguy.pruvot@gmail.com>

											
										
										
											10 years ago
 										if (device_name[i]) {
 											free(device_name[i]);
 											device_name[i] = NULL;
 										}
-												nvml: add missing ifdef for vendors

											
										
										
											10 years ago
+								#ifdef USE_WRAPNVML
 										if (gpu_vendor((uint8_t)props.pciBusID, vendorname) > 0 && strlen(vendorname)) {
-												nvml: get devices vendor names with libpci

made for linux and require libpci-dev (optional)

if libpci is not installed, card's vendor names are not handled...

Note: only a few vendor names were added, common GeForce vendors.

Signed-off-by: Tanguy Pruvot <tanguy.pruvot@gmail.com>

											
										
										
											10 years ago
+											device_name[i] = (char*) calloc(1, strlen(vendorname) + strlen(props.name) + 2);
 											if (!strncmp(props.name, "GeForce ", 8))
 												sprintf(device_name[i], "%s %s", vendorname, &props.name[8]);
 											else
 												sprintf(device_name[i], "%s %s", vendorname, props.name);
 										} else
-												nvml: add missing ifdef for vendors

											
										
										
											10 years ago
+								#endif
-												nvml: get devices vendor names with libpci

made for linux and require libpci-dev (optional)

if libpci is not installed, card's vendor names are not handled...

Note: only a few vendor names were added, common GeForce vendors.

Signed-off-by: Tanguy Pruvot <tanguy.pruvot@gmail.com>

											
										
										
											10 years ago
+											device_name[i] = strdup(props.name);
-												Store and display average hashrate (benchmark + on share)

Displayed data is the average of the last 50 scans in the 5 last minutes

Also move cuda common functions in a new file (cuda.cu)

Signed-off-by: Tanguy Pruvot <tanguy.pruvot@gmail.com>

											
										
										
											10 years ago
+									}
 								}
-												Add the -n (--ndevs) option like cgminer

											
										
										
											10 years ago
+								void cuda_print_devices()
 								{
 									int ngpus = cuda_num_devices();
-												nvml: get devices vendor names with libpci

made for linux and require libpci-dev (optional)

if libpci is not installed, card's vendor names are not handled...

Note: only a few vendor names were added, common GeForce vendors.

Signed-off-by: Tanguy Pruvot <tanguy.pruvot@gmail.com>

											
										
										
											10 years ago
+									cuda_devicenames();
-												Add the -n (--ndevs) option like cgminer

											
										
										
											10 years ago
+									for (int n=0; n < ngpus; n++) {
-												benchmark: show mem and default throughput in results

and prepare a new function to get the default intensity

also, take care of multiple threads per gpu...

											
										
										
											9 years ago
+										int m = device_map[n % MAX_GPUS];
-												Add the -n (--ndevs) option like cgminer

											
										
										
											10 years ago
+										cudaDeviceProp props;
 										cudaGetDeviceProperties(&props, m);
-												nvml: get devices vendor names with libpci

made for linux and require libpci-dev (optional)

if libpci is not installed, card's vendor names are not handled...

Note: only a few vendor names were added, common GeForce vendors.

Signed-off-by: Tanguy Pruvot <tanguy.pruvot@gmail.com>

											
										
										
											10 years ago
+										if (!opt_n_threads || n < opt_n_threads) {
 											fprintf(stderr, "GPU #%d: SM %d.%d %s\n", m, props.major, props.minor, device_name[n]);
 										}
-												Add the -n (--ndevs) option like cgminer

											
										
										
											10 years ago
+									}
 								}
-												reset: take care of multi-threaded gpus (-d 0,0)

to be tested... could create problems when reset in a chain like x11...

											
										
										
											10 years ago
+								void cuda_shutdown()
-												Store and display average hashrate (benchmark + on share)

Displayed data is the average of the last 50 scans in the 5 last minutes

Also move cuda common functions in a new file (cuda.cu)

Signed-off-by: Tanguy Pruvot <tanguy.pruvot@gmail.com>

											
										
										
											10 years ago
+								{
-												reset: take care of multi-threaded gpus (-d 0,0)

to be tested... could create problems when reset in a chain like x11...

											
										
										
											10 years ago
+									cudaDeviceSynchronize();
-												Store and display average hashrate (benchmark + on share)

Displayed data is the average of the last 50 scans in the 5 last minutes

Also move cuda common functions in a new file (cuda.cu)

Signed-off-by: Tanguy Pruvot <tanguy.pruvot@gmail.com>

											
										
										
											10 years ago
+									cudaDeviceReset();
 								}
 								static bool substringsearch(const char *haystack, const char *needle, int &match)
 								{
 									int hlen = (int) strlen(haystack);
 									int nlen = (int) strlen(needle);
 									for (int i=0; i < hlen; ++i)
 									{
 										if (haystack[i] == ' ') continue;
 										int j=0, x = 0;
 										while(j < nlen)
 										{
 											if (haystack[i+x] == ' ') {++x; continue;}
 											if (needle[j] == ' ') {++j; continue;}
 											if (needle[j] == '#') return ++match == needle[j+1]-'0';
 											if (tolower(haystack[i+x]) != tolower(needle[j])) break;
 											++j; ++x;
 										}
 										if (j == nlen) return true;
 									}
 									return false;
 								}
 								// CUDA Gerät nach Namen finden (gibt Geräte-Index zurück oder -1)
-												ccminer: rename main file and switch to C++

There was a different behavior on linux and visual studio

That was making it hard to link functions correctly

That remove some ifdef / extern "C" requirements

note about x86 releases, x86 nvml.dll is not installed on Windows x64!

											
										
										
											10 years ago
+								int cuda_finddevice(char *name)
-												Store and display average hashrate (benchmark + on share)

Displayed data is the average of the last 50 scans in the 5 last minutes

Also move cuda common functions in a new file (cuda.cu)

Signed-off-by: Tanguy Pruvot <tanguy.pruvot@gmail.com>

											
										
										
											10 years ago
+								{
 									int num = cuda_num_devices();
 									int match = 0;
 									for (int i=0; i < num; ++i)
 									{
 										cudaDeviceProp props;
 										if (cudaGetDeviceProperties(&props, i) == cudaSuccess)
 											if (substringsearch(props.name, name, match)) return i;
 									}
 									return -1;
 								}
-												benchmark: show mem and default throughput in results

and prepare a new function to get the default intensity

also, take care of multiple threads per gpu...

											
										
										
											9 years ago
+								// since 1.7
 								uint32_t cuda_default_throughput(int thr_id, uint32_t defcount)
 								{
 									//int dev_id = device_map[thr_id % MAX_GPUS];
 									uint32_t throughput = gpus_intensity[thr_id] ? gpus_intensity[thr_id] : defcount;
-												intensity: fix typo and drop old function

											
										
										
											9 years ago
+									if (gpu_threads > 1 && throughput == defcount) throughput /= (gpu_threads-1);
-												pool switch: add thr_id param to handle a future barrier

Switching to a pool with a different algo will require a barrier
to free ressources, like what was made in the global benchmark.

add also the algo in pool structure...

											
										
										
											9 years ago
+									if (api_thr_id != -1) api_set_throughput(thr_id, throughput);
-												warn on cuda errors + various small changes

The full benchmark can now be launched with "ccminer --benchmark"

add a new helper function which log a warning with last cuda error
(not shown with the quiet option) : CUDA_LOG_ERROR();
it can be used where miner.h is included (.c/.cpp/.cu)

fix x14 (in ccminer.cpp), a break was missing in switch..case

											
										
										
											9 years ago
+									//gpulog(LOG_INFO, thr_id, "throughput %u", throughput);
-												Allow different intensity per device

and clean the old variables, no more required

											
										
										
											10 years ago
+									return throughput;
 								}
-												reset: take care of multi-threaded gpus (-d 0,0)

to be tested... could create problems when reset in a chain like x11...

											
										
										
											10 years ago
+								// if we use 2 threads on the same gpu, we need to reinit the threads
 								void cuda_reset_device(int thr_id, bool *init)
 								{
-												benchmark: allow -a auto to bench all algos at once

											
										
										
											9 years ago
+									int dev_id = device_map[thr_id % MAX_GPUS];
-												cuda: reduce possible segfaults on exit

not perfect but helps...

											
										
										
											10 years ago
+									cudaSetDevice(dev_id);
 									if (init != NULL) {
 										// with init array, its meant to be used in algo's scan code...
 										for (int i=0; i < MAX_GPUS; i++) {
 											if (device_map[i] == dev_id) {
 												init[i] = false;
 											}
-												reset: take care of multi-threaded gpus (-d 0,0)

to be tested... could create problems when reset in a chain like x11...

											
										
										
											10 years ago
+										}
-												cuda: reduce possible segfaults on exit

not perfect but helps...

											
										
										
											10 years ago
+										// force exit from algo's scan loops/function
 										restart_threads();
 										cudaDeviceSynchronize();
 										while (cudaStreamQuery(NULL) == cudaErrorNotReady)
 											usleep(1000);
-												reset: take care of multi-threaded gpus (-d 0,0)

to be tested... could create problems when reset in a chain like x11...

											
										
										
											10 years ago
+									}
 									cudaDeviceReset();
-												refactor: create bench.cpp and algos.h

Also enhance multi-thread benchmark synchro. with pthread barriers

											
										
										
											9 years ago
+									if (opt_cudaschedule >= 0) {
 										cudaSetDevice(dev_id);
-												Add a new cuda-schedule parameter

0: cudaDeviceScheduleAuto
1: cudaDeviceScheduleSpin
2: cudaDeviceScheduleYield
4: cudaDeviceScheduleBlockingSync

Also set the best one (4) for luffa algo by default...

											
										
										
											9 years ago
+										cudaSetDeviceFlags((unsigned)(opt_cudaschedule & cudaDeviceScheduleMask));
-												refactor: create bench.cpp and algos.h

Also enhance multi-thread benchmark synchro. with pthread barriers

											
										
										
											9 years ago
+									}
-												reset: take care of multi-threaded gpus (-d 0,0)

to be tested... could create problems when reset in a chain like x11...

											
										
										
											10 years ago
+								}
-												benchmark: allow -a auto to bench all algos at once

											
										
										
											9 years ago
+								// return free memory in megabytes
 								int cuda_available_memory(int thr_id)
 								{
 									int dev_id = device_map[thr_id % MAX_GPUS];
 									size_t mtotal, mfree = 0;
 									cudaSetDevice(dev_id);
 									cudaMemGetInfo(&mfree, &mtotal);
 									return (int) (mfree / (1024 * 1024));
 								}
-												warn on cuda errors + various small changes

The full benchmark can now be launched with "ccminer --benchmark"

add a new helper function which log a warning with last cuda error
(not shown with the quiet option) : CUDA_LOG_ERROR();
it can be used where miner.h is included (.c/.cpp/.cu)

fix x14 (in ccminer.cpp), a break was missing in switch..case

											
										
										
											9 years ago
+								// Check (and reset) last cuda error, and report it in logs
 								void cuda_log_lasterror(int thr_id, const char* func, int line)
 								{
 									cudaError_t err = cudaGetLastError();
 									if (err != cudaSuccess && !opt_quiet)
 										gpulog(LOG_WARNING, thr_id, "%s:%d %s", func, line, cudaGetErrorString(err));
 								}
-												Fix windows linkage, C/C++ mismatch

											
										
										
											9 years ago
+								#ifdef __cplusplus
 								} /* extern "C" */
 								#endif
 								int cuda_gpu_clocks(struct cgpu_info *gpu)
 								{
 									cudaDeviceProp props;
 									if (cudaGetDeviceProperties(&props, gpu->gpu_id) == cudaSuccess) {
 										gpu->gpu_clock = props.clockRate;
 										gpu->gpu_memclock = props.memoryClockRate;
 										gpu->gpu_mem = props.totalGlobalMem;
 										return 0;
 									}
 									return -1;
 								}
 								// Zeitsynchronisations-Routine von cudaminer mit CPU sleep
 								// Note: if you disable all of these calls, CPU usage will hit 100%
 								typedef struct { double value[8]; } tsumarray;
 								cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id)
 								{
 									cudaError_t result = cudaSuccess;
 									if (situation >= 0)
 									{
 										static std::map<int, tsumarray> tsum;
 										double a = 0.95, b = 0.05;
 										if (tsum.find(situation) == tsum.end()) { a = 0.5; b = 0.5; } // faster initial convergence
 										double tsync = 0.0;
 										double tsleep = 0.95 * tsum[situation].value[thr_id];
 										if (cudaStreamQuery(stream) == cudaErrorNotReady)
 										{
 											usleep((useconds_t)(1e6*tsleep));
 											struct timeval tv_start, tv_end;
 											gettimeofday(&tv_start, NULL);
 											result = cudaStreamSynchronize(stream);
 											gettimeofday(&tv_end, NULL);
 											tsync = 1e-6 * (tv_end.tv_usec-tv_start.tv_usec) + (tv_end.tv_sec-tv_start.tv_sec);
 										}
 										if (tsync >= 0) tsum[situation].value[thr_id] = a * tsum[situation].value[thr_id] + b * (tsleep+tsync);
 									}
 									else
 										result = cudaStreamSynchronize(stream);
 									return result;
 								}
-												Prepare trap of hardware/mem failures

											
										
										
											10 years ago
+								void cudaReportHardwareFailure(int thr_id, cudaError_t err, const char* func)
 								{
 									struct cgpu_info *gpu = &thr_info[thr_id].gpu;
 									gpu->hw_errors++;
-												add gpulog() function helper, simple and multi-threads

when using multiple cpu threads per gpu, use the T prefix, ex:

[2015-10-11 09:52:49] GPU #0: app clocks set to P0 (3600/1228)
 vs
[2015-10-11 09:52:51] GPU T0: MSI GTX 960, 5953.35 kH/s

Only thr_id is required, the function take care of the dev id

											
										
										
											9 years ago
+									gpulog(LOG_ERR, thr_id, "%s %s", func, cudaGetErrorString(err));
-												Prepare trap of hardware/mem failures

											
										
										
											10 years ago
+									sleep(1);
 								}