mirror of
https://github.com/GOSTSec/ccminer
synced 2025-08-26 05:42:02 +00:00
Some work on data alignment
linux: add -march=native (we build it ourself) and some other flags + remove unused vars (seen with -Wall)
This commit is contained in:
parent
93bb428bdf
commit
5bc969fa57
@ -49,10 +49,13 @@ ccminer_SOURCES = elist.h miner.h compat.h \
|
|||||||
x17/x17.cu x17/cuda_x17_haval512.cu x17/cuda_x17_sha512.cu \
|
x17/x17.cu x17/cuda_x17_haval512.cu x17/cuda_x17_sha512.cu \
|
||||||
x11/s3.cu
|
x11/s3.cu
|
||||||
|
|
||||||
|
if HAVE_WINDOWS
|
||||||
|
ccminer_SOURCES += compat/winansi.c
|
||||||
|
endif
|
||||||
|
|
||||||
ccminer_LDFLAGS = $(PTHREAD_FLAGS) @CUDA_LDFLAGS@
|
ccminer_LDFLAGS = $(PTHREAD_FLAGS) @CUDA_LDFLAGS@
|
||||||
ccminer_LDADD = @LIBCURL@ @JANSSON_LIBS@ @PTHREAD_LIBS@ @WS2_LIBS@ @CUDA_LIBS@ @OPENMP_CFLAGS@ @LIBS@
|
ccminer_LDADD = @LIBCURL@ @JANSSON_LIBS@ @PTHREAD_LIBS@ @WS2_LIBS@ @CUDA_LIBS@ @OPENMP_CFLAGS@ @LIBS@
|
||||||
ccminer_CPPFLAGS = -msse2 @LIBCURL_CPPFLAGS@ @OPENMP_CFLAGS@ $(PTHREAD_FLAGS) -fno-strict-aliasing $(JANSSON_INCLUDES) -DSCRYPT_KECCAK512 -DSCRYPT_CHACHA -DSCRYPT_CHOOSE_COMPILETIME
|
ccminer_CPPFLAGS = @LIBCURL_CPPFLAGS@ @OPENMP_CFLAGS@ $(CPPFLAGS) $(PTHREAD_FLAGS) -fno-strict-aliasing $(JANSSON_INCLUDES) -DSCRYPT_KECCAK512 -DSCRYPT_CHACHA -DSCRYPT_CHOOSE_COMPILETIME
|
||||||
|
|
||||||
nvcc_ARCH = -gencode=arch=compute_50,code=\"sm_50,compute_50\"
|
nvcc_ARCH = -gencode=arch=compute_50,code=\"sm_50,compute_50\"
|
||||||
#nvcc_ARCH += -gencode=arch=compute_35,code=\"sm_35,compute_35\"
|
#nvcc_ARCH += -gencode=arch=compute_35,code=\"sm_35,compute_35\"
|
||||||
|
@ -386,7 +386,7 @@ static void blake256mid(uint32_t *output, const uint32_t *input, int8_t rounds =
|
|||||||
__host__
|
__host__
|
||||||
void blake256_cpu_setBlock_16(uint32_t *penddata, const uint32_t *midstate, const uint32_t *ptarget)
|
void blake256_cpu_setBlock_16(uint32_t *penddata, const uint32_t *midstate, const uint32_t *ptarget)
|
||||||
{
|
{
|
||||||
uint32_t data[11];
|
uint32_t _ALIGN(64) data[11];
|
||||||
memcpy(data, midstate, 32);
|
memcpy(data, midstate, 32);
|
||||||
data[8] = penddata[0];
|
data[8] = penddata[0];
|
||||||
data[9] = penddata[1];
|
data[9] = penddata[1];
|
||||||
@ -402,9 +402,9 @@ extern "C" int scanhash_blake256(int thr_id, uint32_t *pdata, const uint32_t *pt
|
|||||||
const uint32_t first_nonce = pdata[19];
|
const uint32_t first_nonce = pdata[19];
|
||||||
static bool init[8] = { 0, 0, 0, 0, 0, 0, 0, 0 };
|
static bool init[8] = { 0, 0, 0, 0, 0, 0, 0, 0 };
|
||||||
uint64_t targetHigh = ((uint64_t*)ptarget)[3]; // 0x00000000.0fffffff
|
uint64_t targetHigh = ((uint64_t*)ptarget)[3]; // 0x00000000.0fffffff
|
||||||
uint32_t endiandata[20];
|
uint32_t _ALIGN(64) endiandata[20];
|
||||||
#if PRECALC64
|
#if PRECALC64
|
||||||
uint32_t midstate[8];
|
uint32_t _ALIGN(64) midstate[8];
|
||||||
#else
|
#else
|
||||||
uint32_t crcsum;
|
uint32_t crcsum;
|
||||||
#endif
|
#endif
|
||||||
|
@ -5,5 +5,7 @@
|
|||||||
|
|
||||||
#--ptxas-options=\"-v -dlcm=cg\""
|
#--ptxas-options=\"-v -dlcm=cg\""
|
||||||
|
|
||||||
CUDA_CFLAGS="-O3" ./configure "CFLAGS=-O3" "CXXFLAGS=-O3" --with-cuda=/usr/local/cuda
|
extracflags="-march=native -D_REENTRANT -falign-functions=16 -falign-jumps=16 -falign-labels=16"
|
||||||
|
|
||||||
|
CUDA_CFLAGS="-O3 -Xcompiler -Wall" ./configure CXXFLAGS="-O3 $extracflags" --with-cuda=/usr/local/cuda
|
||||||
|
|
||||||
|
20
cpu-miner.c
20
cpu-miner.c
@ -383,7 +383,7 @@ struct work {
|
|||||||
uint32_t scanned_to;
|
uint32_t scanned_to;
|
||||||
};
|
};
|
||||||
|
|
||||||
static struct work g_work;
|
static struct work _ALIGN(64) g_work;
|
||||||
static time_t g_work_time;
|
static time_t g_work_time;
|
||||||
static pthread_mutex_t g_work_lock;
|
static pthread_mutex_t g_work_lock;
|
||||||
|
|
||||||
@ -484,11 +484,10 @@ static int share_result(int result, const char *reason)
|
|||||||
{
|
{
|
||||||
char s[345];
|
char s[345];
|
||||||
double hashrate;
|
double hashrate;
|
||||||
int i, ret = 0;
|
|
||||||
|
|
||||||
hashrate = 0.;
|
hashrate = 0.;
|
||||||
pthread_mutex_lock(&stats_lock);
|
pthread_mutex_lock(&stats_lock);
|
||||||
for (i = 0; i < opt_n_threads; i++)
|
for (int i = 0; i < opt_n_threads; i++)
|
||||||
hashrate += thr_hashrates[i];
|
hashrate += thr_hashrates[i];
|
||||||
result ? accepted_count++ : rejected_count++;
|
result ? accepted_count++ : rejected_count++;
|
||||||
pthread_mutex_unlock(&stats_lock);
|
pthread_mutex_unlock(&stats_lock);
|
||||||
@ -651,8 +650,8 @@ static bool get_upstream_work(CURL *curl, struct work *work)
|
|||||||
if (opt_protocol && rc) {
|
if (opt_protocol && rc) {
|
||||||
timeval_subtract(&diff, &tv_end, &tv_start);
|
timeval_subtract(&diff, &tv_end, &tv_start);
|
||||||
/* show time because curl can be slower against versions/config */
|
/* show time because curl can be slower against versions/config */
|
||||||
applog(LOG_DEBUG, "got new work in %u µs",
|
applog(LOG_DEBUG, "got new work in %.2f ms",
|
||||||
diff.tv_sec * 1000000 + diff.tv_usec);
|
(1000.0 * diff.tv_sec) + (0.001 * diff.tv_usec));
|
||||||
}
|
}
|
||||||
|
|
||||||
json_decref(val);
|
json_decref(val);
|
||||||
@ -667,7 +666,7 @@ static void workio_cmd_free(struct workio_cmd *wc)
|
|||||||
|
|
||||||
switch (wc->cmd) {
|
switch (wc->cmd) {
|
||||||
case WC_SUBMIT_WORK:
|
case WC_SUBMIT_WORK:
|
||||||
free(wc->u.work);
|
aligned_free(wc->u.work);
|
||||||
break;
|
break;
|
||||||
default: /* do nothing */
|
default: /* do nothing */
|
||||||
break;
|
break;
|
||||||
@ -682,7 +681,7 @@ static bool workio_get_work(struct workio_cmd *wc, CURL *curl)
|
|||||||
struct work *ret_work;
|
struct work *ret_work;
|
||||||
int failures = 0;
|
int failures = 0;
|
||||||
|
|
||||||
ret_work = (struct work*)calloc(1, sizeof(*ret_work));
|
ret_work = (struct work*)aligned_calloc(sizeof(*ret_work));
|
||||||
if (!ret_work)
|
if (!ret_work)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
@ -690,7 +689,7 @@ static bool workio_get_work(struct workio_cmd *wc, CURL *curl)
|
|||||||
while (!get_upstream_work(curl, ret_work)) {
|
while (!get_upstream_work(curl, ret_work)) {
|
||||||
if (unlikely((opt_retries >= 0) && (++failures > opt_retries))) {
|
if (unlikely((opt_retries >= 0) && (++failures > opt_retries))) {
|
||||||
applog(LOG_ERR, "json_rpc_call failed, terminating workio thread");
|
applog(LOG_ERR, "json_rpc_call failed, terminating workio thread");
|
||||||
free(ret_work);
|
aligned_free(ret_work);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -702,7 +701,7 @@ static bool workio_get_work(struct workio_cmd *wc, CURL *curl)
|
|||||||
|
|
||||||
/* send work to requesting thread */
|
/* send work to requesting thread */
|
||||||
if (!tq_push(wc->thr->q, ret_work))
|
if (!tq_push(wc->thr->q, ret_work))
|
||||||
free(ret_work);
|
aligned_free(ret_work);
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@ -822,7 +821,7 @@ static bool submit_work(struct thr_info *thr, const struct work *work_in)
|
|||||||
if (!wc)
|
if (!wc)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
wc->u.work = (struct work *)malloc(sizeof(*work_in));
|
wc->u.work = (struct work *)aligned_calloc(sizeof(*work_in));
|
||||||
if (!wc->u.work)
|
if (!wc->u.work)
|
||||||
goto err_out;
|
goto err_out;
|
||||||
|
|
||||||
@ -946,7 +945,6 @@ static void *miner_thread(void *userdata)
|
|||||||
struct work work;
|
struct work work;
|
||||||
uint32_t max_nonce;
|
uint32_t max_nonce;
|
||||||
uint32_t end_nonce = 0xffffffffU / opt_n_threads * (thr_id + 1) - (thr_id + 1);
|
uint32_t end_nonce = 0xffffffffU / opt_n_threads * (thr_id + 1) - (thr_id + 1);
|
||||||
unsigned char *scratchbuf = NULL;
|
|
||||||
bool work_done = false;
|
bool work_done = false;
|
||||||
bool extrajob = false;
|
bool extrajob = false;
|
||||||
char s[16];
|
char s[16];
|
||||||
|
@ -4,7 +4,7 @@
|
|||||||
#include <cuda.h>
|
#include <cuda.h>
|
||||||
#include <cuda_runtime.h>
|
#include <cuda_runtime.h>
|
||||||
|
|
||||||
#if defined(_MSC_VER)
|
#if defined(__INTELLISENSE__)
|
||||||
/* reduce warnings */
|
/* reduce warnings */
|
||||||
#include <device_functions.h>
|
#include <device_functions.h>
|
||||||
#include <device_launch_parameters.h>
|
#include <device_launch_parameters.h>
|
||||||
|
@ -42,13 +42,8 @@ void sha256func(unsigned char *hash, const unsigned char *data, int len)
|
|||||||
|
|
||||||
extern "C" void groestlhash(void *state, const void *input)
|
extern "C" void groestlhash(void *state, const void *input)
|
||||||
{
|
{
|
||||||
// Tryout GPU-groestl
|
// CPU-groestl
|
||||||
|
sph_groestl512_context ctx_groestl[2];
|
||||||
sph_groestl512_context ctx_groestl[2];
|
|
||||||
static unsigned char pblank[1];
|
|
||||||
uint32_t mask = 8;
|
|
||||||
uint32_t zero = 0;
|
|
||||||
|
|
||||||
|
|
||||||
//these uint512 in the c++ source of the client are backed by an array of uint32
|
//these uint512 in the c++ source of the client are backed by an array of uint32
|
||||||
uint32_t hashA[16], hashB[16];
|
uint32_t hashA[16], hashB[16];
|
||||||
|
@ -219,10 +219,9 @@ extern "C" void hashlog_purge_all(void)
|
|||||||
extern "C" void hashlog_dump_job(char* jobid)
|
extern "C" void hashlog_dump_job(char* jobid)
|
||||||
{
|
{
|
||||||
if (opt_debug) {
|
if (opt_debug) {
|
||||||
int deleted = 0;
|
|
||||||
uint64_t njobid = hextouint(jobid);
|
uint64_t njobid = hextouint(jobid);
|
||||||
uint64_t keypfx = (njobid << 32);
|
uint64_t keypfx = (njobid << 32);
|
||||||
uint32_t sz = tlastshares.size();
|
// uint32_t sz = tlastshares.size();
|
||||||
std::map<uint64_t, hashlog_data>::iterator i = tlastshares.begin();
|
std::map<uint64_t, hashlog_data>::iterator i = tlastshares.begin();
|
||||||
while (i != tlastshares.end()) {
|
while (i != tlastshares.end()) {
|
||||||
if ((keypfx & i->first) == keypfx) {
|
if ((keypfx & i->first) == keypfx) {
|
||||||
|
12
miner.h
12
miner.h
@ -51,6 +51,14 @@ void *alloca (size_t);
|
|||||||
# endif
|
# endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ > 0
|
||||||
|
# define _ALIGN(x) __align__(x)
|
||||||
|
#elif _MSC_VER
|
||||||
|
# define _ALIGN(x) __declspec(align(x))
|
||||||
|
#else
|
||||||
|
# define _ALIGN(x) __attribute__ ((aligned(x)))
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifdef HAVE_SYSLOG_H
|
#ifdef HAVE_SYSLOG_H
|
||||||
#include <syslog.h>
|
#include <syslog.h>
|
||||||
#define LOG_BLUE 0x10 /* unique value */
|
#define LOG_BLUE 0x10 /* unique value */
|
||||||
@ -200,6 +208,10 @@ static inline void le16enc(void *pp, uint16_t x)
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
/* used for struct work */
|
||||||
|
void *aligned_calloc(int size);
|
||||||
|
void aligned_free(void *ptr);
|
||||||
|
|
||||||
#if JANSSON_MAJOR_VERSION >= 2
|
#if JANSSON_MAJOR_VERSION >= 2
|
||||||
#define JSON_LOADS(str, err_ptr) json_loads((str), 0, (err_ptr))
|
#define JSON_LOADS(str, err_ptr) json_loads((str), 0, (err_ptr))
|
||||||
#else
|
#else
|
||||||
|
@ -149,7 +149,7 @@ static void
|
|||||||
SPH_XCAT(SPH_XCAT(haval, PASSES), _close)(sph_haval_context *sc,
|
SPH_XCAT(SPH_XCAT(haval, PASSES), _close)(sph_haval_context *sc,
|
||||||
unsigned ub, unsigned n, void *dst)
|
unsigned ub, unsigned n, void *dst)
|
||||||
{
|
{
|
||||||
unsigned current,j;
|
unsigned current;
|
||||||
DSTATE;
|
DSTATE;
|
||||||
|
|
||||||
#if SPH_64
|
#if SPH_64
|
||||||
|
29
util.c
29
util.c
@ -382,7 +382,7 @@ json_t *json_rpc_call(CURL *curl, const char *url,
|
|||||||
curl_easy_setopt(curl, CURLOPT_POST, 1);
|
curl_easy_setopt(curl, CURLOPT_POST, 1);
|
||||||
|
|
||||||
if (opt_protocol)
|
if (opt_protocol)
|
||||||
applog(LOG_DEBUG, "JSON protocol request:\n%s\n", rpc_req);
|
applog(LOG_DEBUG, "JSON protocol request:\n%s", rpc_req);
|
||||||
|
|
||||||
upload_data.buf = rpc_req;
|
upload_data.buf = rpc_req;
|
||||||
upload_data.len = strlen(rpc_req);
|
upload_data.len = strlen(rpc_req);
|
||||||
@ -481,6 +481,33 @@ err_out:
|
|||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Unlike malloc, calloc set the memory to zero
|
||||||
|
*/
|
||||||
|
void *aligned_calloc(int size)
|
||||||
|
{
|
||||||
|
const int ALIGN = 64; // cache line
|
||||||
|
#ifdef _MSC_VER
|
||||||
|
void* res = _aligned_malloc(size, ALIGN);
|
||||||
|
memset(res, 0, size);
|
||||||
|
return res;
|
||||||
|
#else
|
||||||
|
void *mem = calloc(1, size+ALIGN+sizeof(void*));
|
||||||
|
void **ptr = (void**)((size_t)(mem+ALIGN+sizeof(void*)) & ~(ALIGN-1));
|
||||||
|
ptr[-1] = mem;
|
||||||
|
return ptr;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
void aligned_free(void *ptr)
|
||||||
|
{
|
||||||
|
#ifdef _MSC_VER
|
||||||
|
return _aligned_free(ptr);
|
||||||
|
#else
|
||||||
|
free(((void**)ptr)[-1]);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
void cbin2hex(char *out, const char *in, size_t len)
|
void cbin2hex(char *out, const char *in, size_t len)
|
||||||
{
|
{
|
||||||
if (out) {
|
if (out) {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user