1
0
mirror of https://github.com/GOSTSec/ccminer synced 2025-01-24 05:24:16 +00:00

various small changes

heavy: reduce by 256 threads default intensity to all -i 20
cuda: put static thread init bools outside the code (made once)
api: fix nvml header to build without
This commit is contained in:
Tanguy Pruvot 2014-11-28 20:15:41 +01:00
parent f524917bef
commit 8ad180cc70
30 changed files with 76 additions and 79 deletions

View File

@ -83,6 +83,8 @@ extern "C" unsigned int jackpothash(void *state, const void *input)
return round;
}
static bool init[8] = { 0 };
extern "C" int scanhash_jackpot(int thr_id, uint32_t *pdata,
const uint32_t *ptarget, uint32_t max_nonce,
unsigned long *hashes_done)
@ -95,7 +97,6 @@ extern "C" int scanhash_jackpot(int thr_id, uint32_t *pdata,
int throughput = opt_work_size ? opt_work_size : (1 << 20); // 256*4096
throughput = min(throughput, (int)(max_nonce - first_nonce));
static bool init[8] = {0,0,0,0,0,0,0,0};
if (!init[thr_id])
{
cudaSetDevice(device_map[thr_id]);

View File

@ -19,7 +19,7 @@ ccminer_SOURCES = elist.h miner.h compat.h \
compat/sys/time.h compat/getopt/getopt.h \
crc32.c hefty1.c scrypt.c \
ccminer.cpp util.cpp \
api.cpp hashlog.cpp stats.cpp sysinfos.cpp cuda.cpp \
api.cpp hashlog.cpp nvml.cpp stats.cpp sysinfos.cpp cuda.cpp \
heavy/heavy.cu \
heavy/cuda_blake512.cu heavy/cuda_blake512.h \
heavy/cuda_combine.cu heavy/cuda_combine.h \
@ -53,7 +53,6 @@ ccminer_SOURCES = elist.h miner.h compat.h \
x11/s3.cu
if HAVE_NVML
ccminer_SOURCES += nvml.cpp
nvml_defs = -DUSE_WRAPNVML
nvml_libs = -ldl
endif

27
api.cpp
View File

@ -31,13 +31,9 @@
#include <sys/stat.h>
#include <sys/types.h>
#include "compat.h"
#include "miner.h"
#ifdef USE_WRAPNVML
#include "nvml.h"
#endif
#ifndef WIN32
# include <errno.h>
@ -83,18 +79,11 @@ struct IP4ACCESS {
static int ips = 1;
static struct IP4ACCESS *ipaccess = NULL;
// Big enough for largest API request
// though a PC with 100s of CPUs may exceed the size ...
// Current code assumes it can socket send this size also
#define MYBUFSIZ 16384
#define SOCK_REC_BUFSZ 256
// Socket is on 127.0.0.1
#define QUEUE 10
#define ALLIP4 "0.0.0.0"
#define MYBUFSIZ 16384
#define SOCK_REC_BUFSZ 256
#define QUEUE 10
#define ALLIP4 "0.0.0.0"
static const char *localaddr = "127.0.0.1";
static const char *UNAVAILABLE = " - API will not be available";
static char *buffer = NULL;
@ -106,7 +95,6 @@ extern int opt_api_listen; /* port */
extern uint32_t accepted_count;
extern uint32_t rejected_count;
extern int num_cpus;
extern char driver_version[32];
extern struct stratum_ctx stratum;
extern char* rpc_user;
@ -115,6 +103,9 @@ extern float cpu_temp(int);
extern uint32_t cpu_clock(int);
// cuda.cpp
int cuda_num_devices();
int cuda_gpu_clocks(struct cgpu_info *gpu);
char driver_version[32] = { 0 };
/***************************************************************/
@ -132,7 +123,7 @@ static void gpustatus(int thr_id)
cgpu->gpu_temp = gpu_temp(cgpu);
cgpu->gpu_fan = gpu_fanpercent(cgpu);
#endif
gpu_clocks(cgpu);
cuda_gpu_clocks(cgpu);
// todo: can be 0 if set by algo (auto)
if (opt_intensity == 0 && opt_work_size) {
@ -260,7 +251,7 @@ static void gpuhwinfos(int gpu_id)
gpu_info(cgpu);
#endif
gpu_clocks(cgpu);
cuda_gpu_clocks(cgpu);
memset(pstate, 0, sizeof(pstate));
if (cgpu->gpu_pstate != -1)

View File

@ -385,11 +385,12 @@ void blake256_cpu_setBlock_16(uint32_t *penddata, const uint32_t *midstate, cons
}
#endif
static bool init[8] = { 0 };
extern "C" int scanhash_blake256(int thr_id, uint32_t *pdata, const uint32_t *ptarget,
uint32_t max_nonce, unsigned long *hashes_done, int8_t blakerounds=14)
{
const uint32_t first_nonce = pdata[19];
static bool init[8] = { 0, 0, 0, 0, 0, 0, 0, 0 };
uint64_t targetHigh = ((uint64_t*)ptarget)[3];
uint32_t _ALIGN(64) endiandata[20];
#if PRECALC64

View File

@ -61,8 +61,8 @@ void cuda_devicenames();
void cuda_devicereset();
int cuda_finddevice(char *name);
#ifdef USE_WRAPNVML
#include "nvml.h"
#ifdef USE_WRAPNVML
wrap_nvml_handle *hnvml = NULL;
#endif

View File

@ -1,4 +1,4 @@
AC_INIT([ccminer], [1.5-git])
AC_INIT([ccminer], [1.5.1-git])
AC_PREREQ([2.59c])
AC_CANONICAL_SYSTEM

View File

@ -156,7 +156,7 @@
#define PACKAGE_NAME "ccminer"
/* Define to the full name and version of this package. */
#define PACKAGE_STRING "ccminer 1.5.0"
#define PACKAGE_STRING "ccminer 1.5.1-git"
/* Define to the one symbol short name of this package. */
#define PACKAGE_TARNAME "ccminer"
@ -165,7 +165,7 @@
#define PACKAGE_URL ""
/* Define to the version of this package. */
#define PACKAGE_VERSION "1.5.0"
#define PACKAGE_VERSION "1.5.1-git"
/* If using the C implementation of alloca, define if you know the
direction of stack growth for your system; otherwise it will be
@ -188,7 +188,7 @@
#define USE_XOP 1
/* Version number of package */
#define VERSION "1.5.0"
#define VERSION "1.5.1-git"
/* Define curl_free() as free() if our version of curl lacks curl_free. */
/* #undef curl_free */

View File

@ -145,6 +145,18 @@ cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id)
return result;
}
int cuda_gpu_clocks(struct cgpu_info *gpu)
{
cudaDeviceProp props;
if (cudaGetDeviceProperties(&props, gpu->gpu_id) == cudaSuccess) {
gpu->gpu_clock = props.clockRate;
gpu->gpu_memclock = props.memoryClockRate;
gpu->gpu_mem = props.totalGlobalMem;
return 0;
}
return -1;
}
void cudaReportHardwareFailure(int thr_id, cudaError_t err, const char* func)
{
struct cgpu_info *gpu = &thr_info[thr_id].gpu;

View File

@ -65,6 +65,8 @@ extern "C" void nist5hash(void *state, const void *input)
memcpy(state, hash, 32);
}
static bool init[8] = { 0 };
extern "C" int scanhash_nist5(int thr_id, uint32_t *pdata,
const uint32_t *ptarget, uint32_t max_nonce,
unsigned long *hashes_done)
@ -77,18 +79,19 @@ extern "C" int scanhash_nist5(int thr_id, uint32_t *pdata,
int throughput = opt_work_size ? opt_work_size : (1 << 20); // 256*4096
throughput = min(throughput, (int) (max_nonce - first_nonce));
static bool init[8] = {0,0,0,0,0,0,0,0};
if (!init[thr_id])
{
cudaSetDevice(device_map[thr_id]);
// Konstanten kopieren, Speicher belegen
cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput);
quark_blake512_cpu_init(thr_id, throughput);
quark_groestl512_cpu_init(thr_id, throughput);
quark_jh512_cpu_init(thr_id, throughput);
quark_keccak512_cpu_init(thr_id, throughput);
quark_skein512_cpu_init(thr_id, throughput);
CUDA_SAFE_CALL(cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput));
cuda_check_cpu_init(thr_id, throughput);
init[thr_id] = true;
}

View File

@ -20,6 +20,8 @@ sph_fugue256_context ctx_fugue_const[8];
((((x) << 24) & 0xff000000u) | (((x) << 8) & 0x00ff0000u) | \
(((x) >> 8) & 0x0000ff00u) | (((x) >> 24) & 0x000000ffu))
static bool init[8] = { 0 };
extern "C" int scanhash_fugue256(int thr_id, uint32_t *pdata, const uint32_t *ptarget,
uint32_t max_nonce, unsigned long *hashes_done)
{
@ -32,7 +34,6 @@ extern "C" int scanhash_fugue256(int thr_id, uint32_t *pdata, const uint32_t *pt
((uint32_t*)ptarget)[7] = 0xf;
// init
static bool init[8] = { false, false, false, false, false, false, false, false };
if(!init[thr_id])
{
fugue256_cpu_init(thr_id, throughPut);

View File

@ -58,7 +58,7 @@ extern "C" void groestlhash(void *state, const void *input)
memcpy(state, hashB, 32);
}
extern bool opt_benchmark;
static bool init[8] = { 0 };
extern "C" int scanhash_groestlcoin(int thr_id, uint32_t *pdata, const uint32_t *ptarget,
uint32_t max_nonce, unsigned long *hashes_done)
@ -73,7 +73,6 @@ extern "C" int scanhash_groestlcoin(int thr_id, uint32_t *pdata, const uint32_t
((uint32_t*)ptarget)[7] = 0x000000ff;
// init
static bool init[8] = { false, false, false, false, false, false, false, false };
if(!init[thr_id])
{
groestlcoin_cpu_init(thr_id, throughPut);

View File

@ -134,9 +134,9 @@ int scanhash_heavy(int thr_id, uint32_t *pdata,
const uint32_t *ptarget, uint32_t max_nonce,
unsigned long *hashes_done, uint32_t maxvote, int blocklen)
{
const uint32_t first_nonce = pdata[19]; /* to check */
const uint32_t first_nonce = pdata[19];
// CUDA will process thousands of threads.
int throughput = opt_work_size ? opt_work_size : (1 << 19); // 256*2048
int throughput = opt_work_size ? opt_work_size : (1 << 19) - 256; // 256*2048
throughput = min(throughput, (int)(max_nonce - first_nonce));
int rc = 0;

View File

@ -34,6 +34,8 @@ extern "C" void keccak256_hash(void *state, const void *input)
memcpy(state, hash, 32);
}
static bool init[8] = { 0 };
extern "C" int scanhash_keccak256(int thr_id, uint32_t *pdata,
const uint32_t *ptarget, uint32_t max_nonce,
unsigned long *hashes_done)
@ -46,7 +48,6 @@ extern "C" int scanhash_keccak256(int thr_id, uint32_t *pdata,
int throughput = opt_work_size ? opt_work_size : (1 << 21); // 256*256*8*4
throughput = min(throughput, (int)(max_nonce - first_nonce));
static bool init[8] = {0,0,0,0,0,0,0,0};
if (!init[thr_id]) {
cudaSetDevice(device_map[thr_id]);

View File

@ -32,7 +32,7 @@ extern "C" void myriadhash(void *state, const void *input)
memcpy(state, hashB, 32);
}
extern bool opt_benchmark;
static bool init[8] = { 0 };
extern "C" int scanhash_myriad(int thr_id, uint32_t *pdata, const uint32_t *ptarget,
uint32_t max_nonce, unsigned long *hashes_done)
@ -51,7 +51,6 @@ extern "C" int scanhash_myriad(int thr_id, uint32_t *pdata, const uint32_t *ptar
((uint32_t*)ptarget)[7] = 0x0000ff;
// init
static bool init[8] = { false, false, false, false, false, false, false, false };
if(!init[thr_id])
{
#if BIG_DEBUG

View File

@ -23,19 +23,16 @@
#endif
#include "miner.h"
#include "nvml.h"
#include "cuda_runtime.h"
// cuda.cpp
int cuda_num_devices();
// geforce driver version
char driver_version[32] = { 0 };
#ifdef USE_WRAPNVML
#include "nvml.h"
extern wrap_nvml_handle *hnvml;
extern char driver_version[32];
static uint32_t device_bus_ids[8] = { 0 };
@ -770,15 +767,3 @@ int gpu_info(struct cgpu_info *gpu)
}
#endif /* USE_WRAPNVML */
int gpu_clocks(struct cgpu_info *gpu)
{
cudaDeviceProp props;
if (cudaGetDeviceProperties(&props, gpu->gpu_id) == cudaSuccess) {
gpu->gpu_clock = props.clockRate;
gpu->gpu_memclock = props.memoryClockRate;
gpu->gpu_mem = props.totalGlobalMem;
return 0;
}
return -1;
}

18
nvml.h
View File

@ -13,6 +13,9 @@
* John E. Stone - john.stone@gmail.com
*
*/
#ifdef USE_WRAPNVML
#include "miner.h"
/*
* Ugly hacks to avoid dependencies on the real nvml.h until it starts
@ -130,17 +133,8 @@ int wrap_nvml_get_power_usage(wrap_nvml_handle *nvmlh,
int gpuindex,
unsigned int *milliwatts);
/* nvapi functions */
#ifdef WIN32
int wrap_nvapi_init();
#endif
/* api functions */
#include "miner.h"
#ifdef USE_WRAPNVML
int gpu_fanpercent(struct cgpu_info *gpu);
float gpu_temp(struct cgpu_info *gpu);
unsigned int gpu_power(struct cgpu_info *gpu);
@ -151,7 +145,9 @@ int gpu_busid(struct cgpu_info *gpu);
/* pid/vid, sn and bios rev */
int gpu_info(struct cgpu_info *gpu);
/* nvapi functions */
#ifdef WIN32
int wrap_nvapi_init();
#endif
// cuda api based
int gpu_clocks(struct cgpu_info *gpu);
#endif /* USE_WRAPNVML */

View File

@ -486,11 +486,12 @@ void pentablake_cpu_setBlock_80(uint32_t *pdata, const uint32_t *ptarget)
CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_Target, ptarget, 32, 0, cudaMemcpyHostToDevice));
}
static bool init[8] = { 0 };
extern "C" int scanhash_pentablake(int thr_id, uint32_t *pdata, const uint32_t *ptarget,
uint32_t max_nonce, unsigned long *hashes_done)
{
const uint32_t first_nonce = pdata[19];
static bool init[8] = { 0, 0, 0, 0, 0, 0, 0, 0 };
uint32_t endiandata[20];
int rc = 0;
int throughput = opt_work_size ? opt_work_size : (128 * 2560); // 18.5

View File

@ -158,12 +158,13 @@ struct HashPredicate
};
*/
static bool init[8] = { 0 };
extern "C" int scanhash_anime(int thr_id, uint32_t *pdata,
const uint32_t *ptarget, uint32_t max_nonce,
unsigned long *hashes_done)
{
const uint32_t first_nonce = pdata[19];
static bool init[8] = { 0 };
int throughput = opt_work_size ? opt_work_size : (1 << 19); // 256*2048
throughput = min(throughput, (int)(max_nonce - first_nonce));

View File

@ -128,12 +128,13 @@ extern "C" void quarkhash(void *state, const void *input)
memcpy(state, hash, 32);
}
static bool init[8] = { 0 };
extern "C" int scanhash_quark(int thr_id, uint32_t *pdata,
const uint32_t *ptarget, uint32_t max_nonce,
unsigned long *hashes_done)
{
const uint32_t first_nonce = pdata[19];
static bool init[8] = { 0 };
int throughput = opt_work_size ? opt_work_size : (1 << 20); // 256*4096
throughput = min(throughput, (int)(max_nonce - first_nonce));

View File

@ -52,13 +52,13 @@ extern "C" void deephash(void *state, const void *input)
memcpy(state, hash, 32);
}
static bool init[8] = { 0 };
extern "C" int scanhash_deep(int thr_id, uint32_t *pdata,
const uint32_t *ptarget, uint32_t max_nonce,
unsigned long *hashes_done)
{
const uint32_t first_nonce = pdata[19];
static bool init[8] = {0,0,0,0,0,0,0,0};
uint32_t endiandata[20];
int throughput = opt_work_size ? opt_work_size : (1 << 19); // 256*256*8
throughput = min(throughput, (int)(max_nonce - first_nonce));

View File

@ -32,13 +32,13 @@ extern void doomhash(void *state, const void *input)
memcpy(state, hash, 32);
}
static bool init[8] = { 0 };
extern "C" int scanhash_doom(int thr_id, uint32_t *pdata,
const uint32_t *ptarget, uint32_t max_nonce,
unsigned long *hashes_done)
{
const uint32_t first_nonce = pdata[19];
static bool init[8] = {0,0,0,0,0,0,0,0};
uint32_t endiandata[20];
int throughput = opt_work_size ? opt_work_size : (1 << 22); // 256*256*8*8
throughput = min(throughput, (int)(max_nonce - first_nonce));

View File

@ -72,11 +72,12 @@ extern "C" void qubithash(void *state, const void *input)
memcpy(state, hash, 32);
}
static bool init[8] = { 0 };
extern "C" int scanhash_qubit(int thr_id, uint32_t *pdata,
const uint32_t *ptarget, uint32_t max_nonce,
unsigned long *hashes_done)
{
static bool init[8] = {0,0,0,0,0,0,0,0};
uint32_t endiandata[20];
const uint32_t first_nonce = pdata[19];
int throughput = opt_work_size ? opt_work_size : (1 << 19); // 256*256*8

View File

@ -68,12 +68,13 @@ extern "C" void fresh_hash(void *state, const void *input)
memcpy(state, hash, 32);
}
static bool init[8] = { 0 };
extern "C" int scanhash_fresh(int thr_id, uint32_t *pdata,
const uint32_t *ptarget, uint32_t max_nonce,
unsigned long *hashes_done)
{
const uint32_t first_nonce = pdata[19];
static bool init[8] = {0,0,0,0,0,0,0,0};
uint32_t endiandata[20];
int throughput = opt_work_size ? opt_work_size : (1 << 19); // 256*256*8;

View File

@ -49,13 +49,14 @@ extern "C" void s3hash(void *output, const void *input)
memcpy(output, hash, 32);
}
static bool init[8] = { 0 };
/* Main S3 entry point */
extern "C" int scanhash_s3(int thr_id, uint32_t *pdata,
const uint32_t *ptarget, uint32_t max_nonce,
unsigned long *hashes_done)
{
const uint32_t first_nonce = pdata[19];
static bool init[8] = { 0 };
int intensity = 20; // 256*256*8*2;
#ifdef WIN32
// reduce by one the intensity on windows

View File

@ -129,12 +129,13 @@ extern "C" void x11hash(void *output, const void *input)
memcpy(output, hash, 32);
}
static bool init[8] = { 0 };
extern "C" int scanhash_x11(int thr_id, uint32_t *pdata,
const uint32_t *ptarget, uint32_t max_nonce,
unsigned long *hashes_done)
{
const uint32_t first_nonce = pdata[19];
static bool init[8] = { 0 };
int intensity = (device_sm[device_map[thr_id]] >= 500 && !is_windows()) ? 20 : 19;
int throughput = opt_work_size ? opt_work_size : (1 << intensity); // 20=256*256*16;
throughput = min(throughput, (int)(max_nonce - first_nonce));

View File

@ -144,13 +144,13 @@ extern "C" void x13hash(void *output, const void *input)
memcpy(output, hash, 32);
}
static bool init[8] = { 0 };
extern "C" int scanhash_x13(int thr_id, uint32_t *pdata,
const uint32_t *ptarget, uint32_t max_nonce,
unsigned long *hashes_done)
{
const uint32_t first_nonce = pdata[19];
static bool init[8] = { 0 };
int intensity = 19; // (device_sm[device_map[thr_id]] > 500 && !is_windows()) ? 20 : 19;
int throughput = opt_work_size ? opt_work_size : (1 << intensity); // 19=256*256*8;
throughput = min(throughput, (int)(max_nonce - first_nonce));

View File

@ -49,12 +49,13 @@ extern "C" void wcoinhash(void *state, const void *input)
memcpy(state, hash, 32);
}
static bool init[8] = { 0 };
extern "C" int scanhash_whc(int thr_id, uint32_t *pdata,
const uint32_t *ptarget, uint32_t max_nonce,
unsigned long *hashes_done)
{
const uint32_t first_nonce = pdata[19];
static bool init[8] = {0,0,0,0,0,0,0,0};
uint32_t endiandata[20];
int throughput = opt_work_size ? opt_work_size : (1 << 19); // 256*256*8;
throughput = min(throughput, (int)(max_nonce - first_nonce));

View File

@ -155,13 +155,13 @@ extern "C" void x14hash(void *output, const void *input)
memcpy(output, hash, 32);
}
static bool init[8] = { 0 };
extern "C" int scanhash_x14(int thr_id, uint32_t *pdata,
const uint32_t *ptarget, uint32_t max_nonce,
unsigned long *hashes_done)
{
const uint32_t first_nonce = pdata[19];
static bool init[8] = { 0 };
uint32_t endiandata[20];
int throughput = opt_work_size ? opt_work_size : (1 << 19); // 256*256*8;
throughput = min(throughput, (int)(max_nonce - first_nonce));

View File

@ -165,12 +165,13 @@ extern "C" void x15hash(void *output, const void *input)
memcpy(output, hash, 32);
}
static bool init[8] = { 0 };
extern "C" int scanhash_x15(int thr_id, uint32_t *pdata,
const uint32_t *ptarget, uint32_t max_nonce,
unsigned long *hashes_done)
{
const uint32_t first_nonce = pdata[19];
static bool init[8] = { 0 };
uint32_t endiandata[20];
int throughput = opt_work_size ? opt_work_size : (1 << 19); // 256*256*8;

View File

@ -184,13 +184,13 @@ extern "C" void x17hash(void *output, const void *input)
memcpy(output, hash, 32);
}
static bool init[8] = { 0 };
extern "C" int scanhash_x17(int thr_id, uint32_t *pdata,
const uint32_t *ptarget, uint32_t max_nonce,
unsigned long *hashes_done)
{
const uint32_t first_nonce = pdata[19];
static bool init[8] = { 0 };
int throughput = opt_work_size ? opt_work_size : (1 << 19); // 256*256*8;
throughput = min(throughput, (int)(max_nonce - first_nonce));