mirror of
https://github.com/GOSTSec/ccminer
synced 2025-02-02 01:44:32 +00:00
Some work on data alignment
linux: add -march=native (we build it ourself) and some other flags + remove unused vars (seen with -Wall)
This commit is contained in:
parent
93bb428bdf
commit
5bc969fa57
@ -49,10 +49,13 @@ ccminer_SOURCES = elist.h miner.h compat.h \
|
||||
x17/x17.cu x17/cuda_x17_haval512.cu x17/cuda_x17_sha512.cu \
|
||||
x11/s3.cu
|
||||
|
||||
if HAVE_WINDOWS
|
||||
ccminer_SOURCES += compat/winansi.c
|
||||
endif
|
||||
|
||||
ccminer_LDFLAGS = $(PTHREAD_FLAGS) @CUDA_LDFLAGS@
|
||||
ccminer_LDADD = @LIBCURL@ @JANSSON_LIBS@ @PTHREAD_LIBS@ @WS2_LIBS@ @CUDA_LIBS@ @OPENMP_CFLAGS@ @LIBS@
|
||||
ccminer_CPPFLAGS = -msse2 @LIBCURL_CPPFLAGS@ @OPENMP_CFLAGS@ $(PTHREAD_FLAGS) -fno-strict-aliasing $(JANSSON_INCLUDES) -DSCRYPT_KECCAK512 -DSCRYPT_CHACHA -DSCRYPT_CHOOSE_COMPILETIME
|
||||
ccminer_CPPFLAGS = @LIBCURL_CPPFLAGS@ @OPENMP_CFLAGS@ $(CPPFLAGS) $(PTHREAD_FLAGS) -fno-strict-aliasing $(JANSSON_INCLUDES) -DSCRYPT_KECCAK512 -DSCRYPT_CHACHA -DSCRYPT_CHOOSE_COMPILETIME
|
||||
|
||||
nvcc_ARCH = -gencode=arch=compute_50,code=\"sm_50,compute_50\"
|
||||
#nvcc_ARCH += -gencode=arch=compute_35,code=\"sm_35,compute_35\"
|
||||
|
@ -386,7 +386,7 @@ static void blake256mid(uint32_t *output, const uint32_t *input, int8_t rounds =
|
||||
__host__
|
||||
void blake256_cpu_setBlock_16(uint32_t *penddata, const uint32_t *midstate, const uint32_t *ptarget)
|
||||
{
|
||||
uint32_t data[11];
|
||||
uint32_t _ALIGN(64) data[11];
|
||||
memcpy(data, midstate, 32);
|
||||
data[8] = penddata[0];
|
||||
data[9] = penddata[1];
|
||||
@ -402,9 +402,9 @@ extern "C" int scanhash_blake256(int thr_id, uint32_t *pdata, const uint32_t *pt
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
static bool init[8] = { 0, 0, 0, 0, 0, 0, 0, 0 };
|
||||
uint64_t targetHigh = ((uint64_t*)ptarget)[3]; // 0x00000000.0fffffff
|
||||
uint32_t endiandata[20];
|
||||
uint32_t _ALIGN(64) endiandata[20];
|
||||
#if PRECALC64
|
||||
uint32_t midstate[8];
|
||||
uint32_t _ALIGN(64) midstate[8];
|
||||
#else
|
||||
uint32_t crcsum;
|
||||
#endif
|
||||
|
@ -5,5 +5,7 @@
|
||||
|
||||
#--ptxas-options=\"-v -dlcm=cg\""
|
||||
|
||||
CUDA_CFLAGS="-O3" ./configure "CFLAGS=-O3" "CXXFLAGS=-O3" --with-cuda=/usr/local/cuda
|
||||
extracflags="-march=native -D_REENTRANT -falign-functions=16 -falign-jumps=16 -falign-labels=16"
|
||||
|
||||
CUDA_CFLAGS="-O3 -Xcompiler -Wall" ./configure CXXFLAGS="-O3 $extracflags" --with-cuda=/usr/local/cuda
|
||||
|
||||
|
20
cpu-miner.c
20
cpu-miner.c
@ -383,7 +383,7 @@ struct work {
|
||||
uint32_t scanned_to;
|
||||
};
|
||||
|
||||
static struct work g_work;
|
||||
static struct work _ALIGN(64) g_work;
|
||||
static time_t g_work_time;
|
||||
static pthread_mutex_t g_work_lock;
|
||||
|
||||
@ -484,11 +484,10 @@ static int share_result(int result, const char *reason)
|
||||
{
|
||||
char s[345];
|
||||
double hashrate;
|
||||
int i, ret = 0;
|
||||
|
||||
hashrate = 0.;
|
||||
pthread_mutex_lock(&stats_lock);
|
||||
for (i = 0; i < opt_n_threads; i++)
|
||||
for (int i = 0; i < opt_n_threads; i++)
|
||||
hashrate += thr_hashrates[i];
|
||||
result ? accepted_count++ : rejected_count++;
|
||||
pthread_mutex_unlock(&stats_lock);
|
||||
@ -651,8 +650,8 @@ static bool get_upstream_work(CURL *curl, struct work *work)
|
||||
if (opt_protocol && rc) {
|
||||
timeval_subtract(&diff, &tv_end, &tv_start);
|
||||
/* show time because curl can be slower against versions/config */
|
||||
applog(LOG_DEBUG, "got new work in %u µs",
|
||||
diff.tv_sec * 1000000 + diff.tv_usec);
|
||||
applog(LOG_DEBUG, "got new work in %.2f ms",
|
||||
(1000.0 * diff.tv_sec) + (0.001 * diff.tv_usec));
|
||||
}
|
||||
|
||||
json_decref(val);
|
||||
@ -667,7 +666,7 @@ static void workio_cmd_free(struct workio_cmd *wc)
|
||||
|
||||
switch (wc->cmd) {
|
||||
case WC_SUBMIT_WORK:
|
||||
free(wc->u.work);
|
||||
aligned_free(wc->u.work);
|
||||
break;
|
||||
default: /* do nothing */
|
||||
break;
|
||||
@ -682,7 +681,7 @@ static bool workio_get_work(struct workio_cmd *wc, CURL *curl)
|
||||
struct work *ret_work;
|
||||
int failures = 0;
|
||||
|
||||
ret_work = (struct work*)calloc(1, sizeof(*ret_work));
|
||||
ret_work = (struct work*)aligned_calloc(sizeof(*ret_work));
|
||||
if (!ret_work)
|
||||
return false;
|
||||
|
||||
@ -690,7 +689,7 @@ static bool workio_get_work(struct workio_cmd *wc, CURL *curl)
|
||||
while (!get_upstream_work(curl, ret_work)) {
|
||||
if (unlikely((opt_retries >= 0) && (++failures > opt_retries))) {
|
||||
applog(LOG_ERR, "json_rpc_call failed, terminating workio thread");
|
||||
free(ret_work);
|
||||
aligned_free(ret_work);
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -702,7 +701,7 @@ static bool workio_get_work(struct workio_cmd *wc, CURL *curl)
|
||||
|
||||
/* send work to requesting thread */
|
||||
if (!tq_push(wc->thr->q, ret_work))
|
||||
free(ret_work);
|
||||
aligned_free(ret_work);
|
||||
|
||||
return true;
|
||||
}
|
||||
@ -822,7 +821,7 @@ static bool submit_work(struct thr_info *thr, const struct work *work_in)
|
||||
if (!wc)
|
||||
return false;
|
||||
|
||||
wc->u.work = (struct work *)malloc(sizeof(*work_in));
|
||||
wc->u.work = (struct work *)aligned_calloc(sizeof(*work_in));
|
||||
if (!wc->u.work)
|
||||
goto err_out;
|
||||
|
||||
@ -946,7 +945,6 @@ static void *miner_thread(void *userdata)
|
||||
struct work work;
|
||||
uint32_t max_nonce;
|
||||
uint32_t end_nonce = 0xffffffffU / opt_n_threads * (thr_id + 1) - (thr_id + 1);
|
||||
unsigned char *scratchbuf = NULL;
|
||||
bool work_done = false;
|
||||
bool extrajob = false;
|
||||
char s[16];
|
||||
|
@ -4,7 +4,7 @@
|
||||
#include <cuda.h>
|
||||
#include <cuda_runtime.h>
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
#if defined(__INTELLISENSE__)
|
||||
/* reduce warnings */
|
||||
#include <device_functions.h>
|
||||
#include <device_launch_parameters.h>
|
||||
|
@ -42,13 +42,8 @@ void sha256func(unsigned char *hash, const unsigned char *data, int len)
|
||||
|
||||
extern "C" void groestlhash(void *state, const void *input)
|
||||
{
|
||||
// Tryout GPU-groestl
|
||||
|
||||
sph_groestl512_context ctx_groestl[2];
|
||||
static unsigned char pblank[1];
|
||||
uint32_t mask = 8;
|
||||
uint32_t zero = 0;
|
||||
|
||||
// CPU-groestl
|
||||
sph_groestl512_context ctx_groestl[2];
|
||||
|
||||
//these uint512 in the c++ source of the client are backed by an array of uint32
|
||||
uint32_t hashA[16], hashB[16];
|
||||
|
@ -219,10 +219,9 @@ extern "C" void hashlog_purge_all(void)
|
||||
extern "C" void hashlog_dump_job(char* jobid)
|
||||
{
|
||||
if (opt_debug) {
|
||||
int deleted = 0;
|
||||
uint64_t njobid = hextouint(jobid);
|
||||
uint64_t keypfx = (njobid << 32);
|
||||
uint32_t sz = tlastshares.size();
|
||||
// uint32_t sz = tlastshares.size();
|
||||
std::map<uint64_t, hashlog_data>::iterator i = tlastshares.begin();
|
||||
while (i != tlastshares.end()) {
|
||||
if ((keypfx & i->first) == keypfx) {
|
||||
@ -235,4 +234,4 @@ extern "C" void hashlog_dump_job(char* jobid)
|
||||
i++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
12
miner.h
12
miner.h
@ -51,6 +51,14 @@ void *alloca (size_t);
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ > 0
|
||||
# define _ALIGN(x) __align__(x)
|
||||
#elif _MSC_VER
|
||||
# define _ALIGN(x) __declspec(align(x))
|
||||
#else
|
||||
# define _ALIGN(x) __attribute__ ((aligned(x)))
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_SYSLOG_H
|
||||
#include <syslog.h>
|
||||
#define LOG_BLUE 0x10 /* unique value */
|
||||
@ -200,6 +208,10 @@ static inline void le16enc(void *pp, uint16_t x)
|
||||
}
|
||||
#endif
|
||||
|
||||
/* used for struct work */
|
||||
void *aligned_calloc(int size);
|
||||
void aligned_free(void *ptr);
|
||||
|
||||
#if JANSSON_MAJOR_VERSION >= 2
|
||||
#define JSON_LOADS(str, err_ptr) json_loads((str), 0, (err_ptr))
|
||||
#else
|
||||
|
@ -149,7 +149,7 @@ static void
|
||||
SPH_XCAT(SPH_XCAT(haval, PASSES), _close)(sph_haval_context *sc,
|
||||
unsigned ub, unsigned n, void *dst)
|
||||
{
|
||||
unsigned current,j;
|
||||
unsigned current;
|
||||
DSTATE;
|
||||
|
||||
#if SPH_64
|
||||
|
29
util.c
29
util.c
@ -382,7 +382,7 @@ json_t *json_rpc_call(CURL *curl, const char *url,
|
||||
curl_easy_setopt(curl, CURLOPT_POST, 1);
|
||||
|
||||
if (opt_protocol)
|
||||
applog(LOG_DEBUG, "JSON protocol request:\n%s\n", rpc_req);
|
||||
applog(LOG_DEBUG, "JSON protocol request:\n%s", rpc_req);
|
||||
|
||||
upload_data.buf = rpc_req;
|
||||
upload_data.len = strlen(rpc_req);
|
||||
@ -481,6 +481,33 @@ err_out:
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/**
|
||||
* Unlike malloc, calloc set the memory to zero
|
||||
*/
|
||||
void *aligned_calloc(int size)
|
||||
{
|
||||
const int ALIGN = 64; // cache line
|
||||
#ifdef _MSC_VER
|
||||
void* res = _aligned_malloc(size, ALIGN);
|
||||
memset(res, 0, size);
|
||||
return res;
|
||||
#else
|
||||
void *mem = calloc(1, size+ALIGN+sizeof(void*));
|
||||
void **ptr = (void**)((size_t)(mem+ALIGN+sizeof(void*)) & ~(ALIGN-1));
|
||||
ptr[-1] = mem;
|
||||
return ptr;
|
||||
#endif
|
||||
}
|
||||
|
||||
void aligned_free(void *ptr)
|
||||
{
|
||||
#ifdef _MSC_VER
|
||||
return _aligned_free(ptr);
|
||||
#else
|
||||
free(((void**)ptr)[-1]);
|
||||
#endif
|
||||
}
|
||||
|
||||
void cbin2hex(char *out, const char *in, size_t len)
|
||||
{
|
||||
if (out) {
|
||||
|
Loading…
x
Reference in New Issue
Block a user