mirror of
https://github.com/GOSTSec/ccminer
synced 2025-03-09 20:11:11 +00:00
wildkeccak, basic stratum port of rpc 2.0
scratchpad delete fix and redownload, reduce rejects (work in progress)
This commit is contained in:
parent
099389f64f
commit
2bbccc5ff4
@ -39,6 +39,7 @@ ccminer_SOURCES = elist.h miner.h compat.h \
|
||||
Algo256/blake256.cu Algo256/decred.cu Algo256/vanilla.cu Algo256/keccak256.cu \
|
||||
Algo256/blake2s.cu sph/blake2s.c \
|
||||
Algo256/bmw.cu Algo256/cuda_bmw.cu \
|
||||
crypto/xmr-rpc.cpp crypto/wildkeccak-cpu.cpp crypto/wildkeccak.cu \
|
||||
JHA/jackpotcoin.cu JHA/cuda_jha_keccak512.cu \
|
||||
JHA/cuda_jha_compactionTest.cu cuda_checkhash.cu \
|
||||
quark/cuda_jh512.cu quark/cuda_quark_blake512.cu quark/cuda_quark_groestl512.cu quark/cuda_skein512.cu \
|
||||
|
12
README.txt
12
README.txt
@ -1,5 +1,5 @@
|
||||
|
||||
ccminer 1.8.4 (Dec. 2016) "Winter boosts: sib, veltor, blake2s"
|
||||
ccminer 2.0 (December 2016) "Boolberry's WildKeccak RPC 2.0"
|
||||
---------------------------------------------------------------
|
||||
|
||||
***************************************************************
|
||||
@ -51,6 +51,7 @@ Woodcoin (Double Skein)
|
||||
Vanilla (Blake256 8-rounds - double sha256)
|
||||
Vertcoin Lyra2RE
|
||||
Ziftrcoin (ZR5)
|
||||
Boolberry (Wild Keccak)
|
||||
|
||||
where some of these coins have a VERY NOTABLE nVidia advantage
|
||||
over competing AMD (OpenCL Only) implementations.
|
||||
@ -110,6 +111,7 @@ its command line interface and options.
|
||||
vanilla use to mine Vanilla (Blake256)
|
||||
veltor use to mine VeltorCoin
|
||||
whirlpool use to mine Joincoin
|
||||
wildkeccak use to mine Boolberry (Stratum only)
|
||||
zr5 use to mine ZiftrCoin
|
||||
|
||||
-d, --devices gives a comma separated list of CUDA device IDs
|
||||
@ -184,6 +186,12 @@ Scrypt specific options:
|
||||
--no-autotune disable auto-tuning of kernel launch parameters
|
||||
|
||||
|
||||
Boolberry specific:
|
||||
-l, --launch-config gives the launch configuration for each kernel
|
||||
in a comma separated list, one per device.
|
||||
-k, --scratchpad url Url used to download the scratchpad cache.
|
||||
|
||||
|
||||
>>> Examples <<<
|
||||
|
||||
|
||||
@ -206,6 +214,8 @@ Example for Fuguecoin pool mining on dwarfpool.com with all your GPUs
|
||||
Example for Groestlcoin solo mining
|
||||
ccminer -q -s 1 -a groestl -o http://127.0.0.1:1441/ -u USERNAME -p PASSWORD
|
||||
|
||||
Example for Boolberry
|
||||
ccminer -a wildkeccak -o stratum+tcp://bbr.suprnova.cc:7777 -u tpruvot.donate -p x -k http://bbr.suprnova.cc/scratchpad.bin -l 64x360
|
||||
|
||||
Example for Scrypt-N (2048) on Nicehash
|
||||
ccminer -a scrypt:10 -o stratum+tcp://stratum.nicehash.com:3335 -u 3EujYFcoBzWvpUEvbe3obEG95mBuU88QBD -p x
|
||||
|
2
algos.h
2
algos.h
@ -48,6 +48,7 @@ enum sha_algos {
|
||||
ALGO_WHIRLCOIN,
|
||||
ALGO_WHIRLPOOL,
|
||||
ALGO_WHIRLPOOLX,
|
||||
ALGO_WILDKECCAK,
|
||||
ALGO_ZR5,
|
||||
ALGO_AUTO,
|
||||
ALGO_COUNT
|
||||
@ -99,6 +100,7 @@ static const char *algo_names[] = {
|
||||
"whirlcoin",
|
||||
"whirlpool",
|
||||
"whirlpoolx",
|
||||
"wildkeccak",
|
||||
"zr5",
|
||||
"auto", /* reserved for multi algo */
|
||||
""
|
||||
|
@ -75,6 +75,7 @@ void algo_free_all(int thr_id)
|
||||
free_veltor(thr_id);
|
||||
free_whirl(thr_id);
|
||||
//free_whirlx(thr_id);
|
||||
free_wildkeccak(thr_id);
|
||||
free_x11evo(thr_id);
|
||||
free_x11(thr_id);
|
||||
free_x13(thr_id);
|
||||
|
107
ccminer.cpp
107
ccminer.cpp
@ -43,6 +43,8 @@
|
||||
#include "miner.h"
|
||||
#include "algos.h"
|
||||
#include "sia/sia-rpc.h"
|
||||
#include "crypto/xmr-rpc.h"
|
||||
|
||||
#include <cuda_runtime.h>
|
||||
|
||||
#ifdef WIN32
|
||||
@ -95,7 +97,7 @@ bool allow_mininginfo = true;
|
||||
bool check_dups = false;
|
||||
bool check_stratum_jobs = false;
|
||||
|
||||
static bool submit_old = false;
|
||||
bool submit_old = false;
|
||||
bool use_syslog = false;
|
||||
bool use_colors = true;
|
||||
int use_pok = 0;
|
||||
@ -160,6 +162,8 @@ volatile bool pool_is_switching = false;
|
||||
volatile int pool_switch_count = 0;
|
||||
bool conditional_pool_rotate = false;
|
||||
|
||||
extern char* opt_scratchpad_url;
|
||||
|
||||
// current connection
|
||||
char *rpc_user = NULL;
|
||||
char *rpc_pass;
|
||||
@ -257,6 +261,7 @@ Options:\n\
|
||||
x14 X14\n\
|
||||
x15 X15\n\
|
||||
x17 X17\n\
|
||||
wildkeccak Boolberry\n\
|
||||
zr5 ZR5 (ZiftrCoin)\n\
|
||||
-d, --devices Comma separated list of CUDA devices to use.\n\
|
||||
Device IDs start counting from 0! Alternatively takes\n\
|
||||
@ -335,7 +340,7 @@ static char const short_options[] =
|
||||
#ifdef HAVE_SYSLOG_H
|
||||
"S"
|
||||
#endif
|
||||
"a:Bc:i:Dhp:Px:f:m:nqr:R:s:t:T:o:u:O:Vd:N:b:l:L:";
|
||||
"a:Bc:k:i:Dhp:Px:f:m:nqr:R:s:t:T:o:u:O:Vd:N:b:l:L:";
|
||||
|
||||
struct option options[] = {
|
||||
{ "algo", 1, NULL, 'a' },
|
||||
@ -360,9 +365,10 @@ struct option options[] = {
|
||||
{ "no-stratum", 0, NULL, 1007 },
|
||||
{ "no-autotune", 0, NULL, 1004 }, // scrypt
|
||||
{ "interactive", 1, NULL, 1050 }, // scrypt
|
||||
{ "launch-config", 1, NULL, 'l' }, // scrypt
|
||||
{ "lookup-gap", 1, NULL, 'L' }, // scrypt
|
||||
{ "texture-cache", 1, NULL, 1051 },// scrypt
|
||||
{ "launch-config", 1, NULL, 'l' }, // scrypt & bbr
|
||||
{ "scratchpad", 1, NULL, 'k' }, // bbr
|
||||
{ "max-temp", 1, NULL, 1060 },
|
||||
{ "max-diff", 1, NULL, 1061 },
|
||||
{ "max-rate", 1, NULL, 1062 },
|
||||
@ -431,6 +437,13 @@ Scrypt specific options:\n\
|
||||
--no-autotune disable auto-tuning of kernel launch parameters\n\
|
||||
";
|
||||
|
||||
static char const xmr_usage[] = "\n\
|
||||
CryptoNote specific options:\n\
|
||||
-l, --launch-config gives the launch configuration for each kernel\n\
|
||||
in a comma separated list, one per device.\n\
|
||||
-k, --scratchpad url Url used to download the scratchpad cache.\n\
|
||||
";
|
||||
|
||||
struct work _ALIGN(64) g_work;
|
||||
volatile time_t g_work_time;
|
||||
pthread_mutex_t g_work_lock;
|
||||
@ -544,8 +557,7 @@ void proper_exit(int reason)
|
||||
exit(reason);
|
||||
}
|
||||
|
||||
static bool jobj_binary(const json_t *obj, const char *key,
|
||||
void *buf, size_t buflen)
|
||||
bool jobj_binary(const json_t *obj, const char *key, void *buf, size_t buflen)
|
||||
{
|
||||
const char *hexstr;
|
||||
json_t *tmp;
|
||||
@ -608,6 +620,8 @@ static bool work_decode(const json_t *val, struct work *work)
|
||||
data_size = 80;
|
||||
adata_sz = data_size / 4;
|
||||
break;
|
||||
case ALGO_WILDKECCAK:
|
||||
return rpc2_job_decode(val, work);
|
||||
default:
|
||||
data_size = 128;
|
||||
adata_sz = data_size / 4;
|
||||
@ -780,6 +794,18 @@ static bool submit_upstream_work(CURL *curl, struct work *work)
|
||||
bool stale_work = false;
|
||||
int idnonce = 0;
|
||||
|
||||
if (pool->type & POOL_STRATUM && stratum.rpc2) {
|
||||
struct work submit_work;
|
||||
memcpy(&submit_work, work, sizeof(struct work));
|
||||
bool sent = hashlog_already_submittted(submit_work.job_id, submit_work.nonces[0]);
|
||||
if (sent) {
|
||||
return true;
|
||||
}
|
||||
bool ret = rpc2_stratum_submit(pool, &submit_work);
|
||||
hashlog_remember_submit(&submit_work, submit_work.nonces[0]);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* discard if a newer block was received */
|
||||
stale_work = work->height && work->height < g_work.height;
|
||||
if (have_stratum && !stale_work && opt_algo != ALGO_ZR5 && opt_algo != ALGO_SCRYPT_JANE) {
|
||||
@ -1405,6 +1431,9 @@ static bool stratum_gen_work(struct stratum_ctx *sctx, struct work *work)
|
||||
uchar merkle_root[64] = { 0 };
|
||||
int i;
|
||||
|
||||
if (sctx->rpc2)
|
||||
return rpc2_stratum_gen_work(sctx, work);
|
||||
|
||||
if (!sctx->job.job_id) {
|
||||
// applog(LOG_WARNING, "stratum_gen_work: job not yet retrieved");
|
||||
return false;
|
||||
@ -1733,10 +1762,16 @@ static void *miner_thread(void *userdata)
|
||||
|
||||
uint32_t *nonceptr = (uint32_t*) (((char*)work.data) + wcmplen);
|
||||
|
||||
if (opt_algo == ALGO_WILDKECCAK) {
|
||||
nonceptr = (uint32_t*) (((char*)work.data) + 1);
|
||||
wcmpoft = 2;
|
||||
wcmplen = 32;
|
||||
}
|
||||
|
||||
if (have_stratum) {
|
||||
uint32_t sleeptime = 0;
|
||||
|
||||
if (opt_algo == ALGO_DECRED)
|
||||
if (opt_algo == ALGO_DECRED || stratum.rpc2)
|
||||
work_done = true; // force "regen" hash
|
||||
while (!work_done && time(NULL) >= (g_work_time + opt_scantime)) {
|
||||
usleep(100*1000);
|
||||
@ -1748,7 +1783,7 @@ static void *miner_thread(void *userdata)
|
||||
}
|
||||
if (sleeptime && opt_debug && !opt_quiet)
|
||||
applog(LOG_DEBUG, "sleeptime: %u ms", sleeptime*100);
|
||||
nonceptr = (uint32_t*) (((char*)work.data) + wcmplen);
|
||||
//nonceptr = (uint32_t*) (((char*)work.data) + wcmplen);
|
||||
pthread_mutex_lock(&g_work_lock);
|
||||
extrajob |= work_done;
|
||||
|
||||
@ -1835,6 +1870,8 @@ static void *miner_thread(void *userdata)
|
||||
nonceptr[1] += 1;
|
||||
nonceptr[2] |= thr_id;
|
||||
|
||||
} else if (opt_algo == ALGO_WILDKECCAK) {
|
||||
//nonceptr[1] += 1;
|
||||
} else if (opt_algo == ALGO_SIA) {
|
||||
// suprnova job_id check without data/target/height change...
|
||||
if (have_stratum && strcmp(work.job_id, g_work.job_id)) {
|
||||
@ -1879,6 +1916,11 @@ static void *miner_thread(void *userdata)
|
||||
gpulog(LOG_DEBUG, thr_id, "no data");
|
||||
continue;
|
||||
}
|
||||
if (stratum.rpc2 && !scratchpad_size) {
|
||||
sleep(1);
|
||||
if (!thr_id) pools[cur_pooln].wait_time += 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
/* conditional mining */
|
||||
if (!wanna_mine(thr_id)) {
|
||||
@ -2196,6 +2238,9 @@ static void *miner_thread(void *userdata)
|
||||
//case ALGO_WHIRLPOOLX:
|
||||
// rc = scanhash_whirlx(thr_id, &work, max_nonce, &hashes_done);
|
||||
// break;
|
||||
case ALGO_WILDKECCAK:
|
||||
rc = scanhash_wildkeccak(thr_id, &work, max_nonce, &hashes_done);
|
||||
break;
|
||||
case ALGO_X11EVO:
|
||||
rc = scanhash_x11evo(thr_id, &work, max_nonce, &hashes_done);
|
||||
break;
|
||||
@ -2242,6 +2287,7 @@ static void *miner_thread(void *userdata)
|
||||
case ALGO_LBRY:
|
||||
case ALGO_SIA:
|
||||
case ALGO_VELTOR:
|
||||
case ALGO_WILDKECCAK:
|
||||
// migrated algos
|
||||
break;
|
||||
case ALGO_ZR5:
|
||||
@ -2254,6 +2300,13 @@ static void *miner_thread(void *userdata)
|
||||
work.nonces[1] = nonceptr[2];
|
||||
}
|
||||
|
||||
if (stratum.rpc2 && rc == -EBUSY || work_restart[thr_id].restart) {
|
||||
// bbr scratchpad download or stale result
|
||||
sleep(1);
|
||||
if (!thr_id) pools[cur_pooln].wait_time += 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (rc > 0 && opt_debug)
|
||||
applog(LOG_NOTICE, CL_CYN "found => %08x" CL_GRN " %08x", work.nonces[0], swab32(work.nonces[0]));
|
||||
if (rc > 1 && opt_debug)
|
||||
@ -2548,7 +2601,7 @@ static bool stratum_handle_response(char *buf)
|
||||
err_val = json_object_get(val, "error");
|
||||
id_val = json_object_get(val, "id");
|
||||
|
||||
if (!id_val || json_is_null(id_val) || !res_val)
|
||||
if (!id_val || json_is_null(id_val))
|
||||
goto out;
|
||||
|
||||
// ignore late login answers
|
||||
@ -2564,8 +2617,24 @@ static bool stratum_handle_response(char *buf)
|
||||
// store time required to the pool to answer to a submit
|
||||
stratum.answer_msec = (1000 * diff.tv_sec) + (uint32_t) (0.001 * diff.tv_usec);
|
||||
|
||||
share_result(json_is_true(res_val), stratum.pooln, stratum.sharediff,
|
||||
err_val ? json_string_value(json_array_get(err_val, 1)) : NULL);
|
||||
if (stratum.rpc2) {
|
||||
|
||||
const char* reject_reason = err_val ? json_string_value(json_object_get(err_val, "message")) : NULL;
|
||||
// {"id":4,"jsonrpc":"2.0","error":null,"result":{"status":"OK"}}
|
||||
share_result(json_is_null(err_val), stratum.pooln, stratum.sharediff, reject_reason);
|
||||
if (reject_reason) {
|
||||
g_work_time = 0;
|
||||
restart_threads();
|
||||
}
|
||||
|
||||
} else {
|
||||
|
||||
if (!res_val)
|
||||
goto out;
|
||||
|
||||
share_result(json_is_true(res_val), stratum.pooln, stratum.sharediff,
|
||||
err_val ? json_string_value(json_array_get(err_val, 1)) : NULL);
|
||||
}
|
||||
|
||||
ret = true;
|
||||
out:
|
||||
@ -2641,6 +2710,10 @@ wait_stratum_url:
|
||||
}
|
||||
}
|
||||
|
||||
if (opt_algo == ALGO_WILDKECCAK) {
|
||||
rpc2_stratum_thread_stuff(pool);
|
||||
}
|
||||
|
||||
if (switchn != pool_switch_count) goto pool_switched;
|
||||
|
||||
if (stratum.job.job_id &&
|
||||
@ -2733,6 +2806,9 @@ static void show_usage_and_exit(int status)
|
||||
if (opt_algo == ALGO_SCRYPT || opt_algo == ALGO_SCRYPT_JANE) {
|
||||
printf(scrypt_usage);
|
||||
}
|
||||
if (opt_algo == ALGO_WILDKECCAK) {
|
||||
printf(xmr_usage);
|
||||
}
|
||||
proper_exit(status);
|
||||
}
|
||||
|
||||
@ -2814,6 +2890,9 @@ void parse_arg(int key, char *arg)
|
||||
}
|
||||
break;
|
||||
}
|
||||
case 'k':
|
||||
opt_scratchpad_url = strdup(arg);
|
||||
break;
|
||||
case 'i':
|
||||
d = atof(arg);
|
||||
v = (uint32_t) d;
|
||||
@ -3011,7 +3090,7 @@ void parse_arg(int key, char *arg)
|
||||
case 1004:
|
||||
opt_autotune = false;
|
||||
break;
|
||||
case 'l': /* scrypt --launch-config */
|
||||
case 'l': /* --launch-config */
|
||||
{
|
||||
char *last = NULL, *pch = strtok(arg,",");
|
||||
int n = 0;
|
||||
@ -3582,6 +3661,12 @@ int main(int argc, char *argv[])
|
||||
allow_mininginfo = false;
|
||||
}
|
||||
|
||||
if (opt_algo == ALGO_WILDKECCAK) {
|
||||
rpc2_init();
|
||||
applog(LOG_INFO, "Using CryptoNote JSON-RPC 2.0");
|
||||
GetScratchpad();
|
||||
}
|
||||
|
||||
flags = !opt_benchmark && strncmp(rpc_url, "https:", 6)
|
||||
? (CURL_GLOBAL_ALL & ~CURL_GLOBAL_SSL)
|
||||
: CURL_GLOBAL_ALL;
|
||||
|
@ -39,7 +39,7 @@
|
||||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings" Condition="'$(Platform)'=='Win32'">
|
||||
<Import Project="$(VCTargetsPath)\BuildCustomizations\CUDA 6.5.props" />
|
||||
<Import Project="$(VCTargetsPath)\BuildCustomizations\CUDA 7.5.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="ExtensionSettings" Condition="'$(Platform)'=='x64'">
|
||||
<Import Project="$(VCTargetsPath)\BuildCustomizations\CUDA 8.0.props" />
|
||||
@ -230,6 +230,9 @@
|
||||
<TreatWChar_tAsBuiltInType>false</TreatWChar_tAsBuiltInType>
|
||||
<Optimization Condition="'$(Configuration)'=='Release'">Full</Optimization>
|
||||
</ClCompile>
|
||||
<ClCompile Include="crypto\mman.c" />
|
||||
<ClCompile Include="crypto\wildkeccak-cpu.cpp" />
|
||||
<ClCompile Include="crypto\xmr-rpc.cpp" />
|
||||
<ClCompile Include="nvapi.cpp" />
|
||||
<ClCompile Include="pools.cpp" />
|
||||
<ClCompile Include="util.cpp" />
|
||||
@ -248,6 +251,9 @@
|
||||
<ClCompile Include="myriadgroestl.cpp" />
|
||||
<ClCompile Include="lyra2\Lyra2.c" />
|
||||
<ClCompile Include="lyra2\Sponge.c" />
|
||||
<ClInclude Include="crypto\mman.h" />
|
||||
<ClInclude Include="crypto\wildkeccak.h" />
|
||||
<ClInclude Include="crypto\xmr-rpc.h" />
|
||||
<ClInclude Include="lyra2\cuda_lyra2_sm2.cuh" />
|
||||
<ClInclude Include="neoscrypt\neoscrypt.h" />
|
||||
<ClCompile Include="neoscrypt\neoscrypt.cpp" />
|
||||
@ -258,6 +264,9 @@
|
||||
<CudaCompile Include="Algo256\cuda_bmw.cu">
|
||||
<MaxRegCount>76</MaxRegCount>
|
||||
</CudaCompile>
|
||||
<CudaCompile Include="crypto\wildkeccak.cu">
|
||||
<MaxRegCount Condition="'$(Configuration)|$(Platform)'=='Release|x64'">128</MaxRegCount>
|
||||
</CudaCompile>
|
||||
<CudaCompile Include="neoscrypt\cuda_neoscrypt.cu">
|
||||
<MaxRegCount>160</MaxRegCount>
|
||||
</CudaCompile>
|
||||
@ -531,7 +540,7 @@
|
||||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets" Condition="'$(Platform)'=='Win32'">
|
||||
<Import Project="$(VCTargetsPath)\BuildCustomizations\CUDA 6.5.targets" />
|
||||
<Import Project="$(VCTargetsPath)\BuildCustomizations\CUDA 7.5.targets" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="ExtensionTargets" Condition="'$(Platform)'=='x64'">
|
||||
<Import Project="$(VCTargetsPath)\BuildCustomizations\CUDA 8.0.targets" />
|
||||
|
@ -88,6 +88,9 @@
|
||||
<Filter Include="Source Files\sia">
|
||||
<UniqueIdentifier>{86a896c0-1688-4854-98e0-285d166069a3}</UniqueIdentifier>
|
||||
</Filter>
|
||||
<Filter Include="Source Files\crypto">
|
||||
<UniqueIdentifier>{fea0fce3-c0fe-42f7-aa37-0cbba10b008a}</UniqueIdentifier>
|
||||
</Filter>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClCompile Include="compat\jansson\dump.c">
|
||||
@ -279,6 +282,15 @@
|
||||
<ClCompile Include="sia\sia-rpc.cpp">
|
||||
<Filter>Source Files\sia</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="crypto\xmr-rpc.cpp">
|
||||
<Filter>Source Files\crypto</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="crypto\mman.c">
|
||||
<Filter>Source Files\crypto</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="crypto\wildkeccak-cpu.cpp">
|
||||
<Filter>Source Files\crypto</Filter>
|
||||
</ClCompile>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClInclude Include="algos.h">
|
||||
@ -482,6 +494,15 @@
|
||||
<ClInclude Include="sia\sia-rpc.h">
|
||||
<Filter>Source Files\sia</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="crypto\mman.h">
|
||||
<Filter>Source Files\crypto</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="crypto\wildkeccak.h">
|
||||
<Filter>Source Files\crypto</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="crypto\xmr-rpc.h">
|
||||
<Filter>Source Files\crypto</Filter>
|
||||
</ClInclude>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<CudaCompile Include="cuda.cpp">
|
||||
@ -760,6 +781,9 @@
|
||||
<CudaCompile Include="sia\sia.cu">
|
||||
<Filter>Source Files\sia</Filter>
|
||||
</CudaCompile>
|
||||
<CudaCompile Include="crypto\wildkeccak.cu">
|
||||
<Filter>Source Files\crypto</Filter>
|
||||
</CudaCompile>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<Image Include="res\ccminer.ico">
|
||||
|
@ -164,7 +164,7 @@
|
||||
#define PACKAGE_URL "http://github.com/tpruvot/ccminer"
|
||||
|
||||
/* Define to the version of this package. */
|
||||
#define PACKAGE_VERSION "1.8.4"
|
||||
#define PACKAGE_VERSION "2.0"
|
||||
|
||||
/* If using the C implementation of alloca, define if you know the
|
||||
direction of stack growth for your system; otherwise it will be
|
||||
|
@ -1,4 +1,4 @@
|
||||
AC_INIT([ccminer], [1.8.4], [], [ccminer], [http://github.com/tpruvot/ccminer])
|
||||
AC_INIT([ccminer], [2.0], [], [ccminer], [http://github.com/tpruvot/ccminer])
|
||||
|
||||
AC_PREREQ([2.59c])
|
||||
AC_CANONICAL_SYSTEM
|
||||
|
@ -3,5 +3,5 @@
|
||||
extracflags="-march=native -D_REENTRANT -falign-functions=16 -falign-jumps=16 -falign-labels=16"
|
||||
|
||||
CUDA_CFLAGS="-O3 -lineno -Xcompiler -Wall -D_FORCE_INLINES" \
|
||||
./configure CXXFLAGS="-O3 $extracflags" --with-cuda=/usr/local/cuda --with-nvml=libnvidia-ml.so
|
||||
./configure CXXFLAGS="-O3 $extracflags" --with-cuda=/usr/local/cuda-7.5 --with-nvml=libnvidia-ml.so
|
||||
|
||||
|
573
crypto/int128_c.h
Normal file
573
crypto/int128_c.h
Normal file
@ -0,0 +1,573 @@
|
||||
#ifndef UINT128_C_H
|
||||
#define UINT128_C_H
|
||||
|
||||
struct __uint128 {
|
||||
uint64_t Hi;
|
||||
uint64_t Lo;
|
||||
};
|
||||
typedef struct __uint128 uint128;
|
||||
|
||||
void Increment(uint128 * N)
|
||||
{
|
||||
uint64_t T = (N->Lo + 1);
|
||||
N->Hi += ((N->Lo ^T) & N->Lo) >> 63;
|
||||
N->Lo = T;
|
||||
}
|
||||
|
||||
void Decrement(uint128 * N)
|
||||
{
|
||||
uint64_t T = (N->Lo - 1);
|
||||
N->Hi -= ((T ^ N->Lo) & T) >> 63;
|
||||
N->Lo = T;
|
||||
}
|
||||
|
||||
void Add(uint128 * Ans, uint128 N, uint128 M)
|
||||
{
|
||||
uint64_t C = (((N.Lo & M.Lo) & 1) + (N.Lo >> 1) + (M.Lo >> 1)) >> 63;
|
||||
Ans->Hi = N.Hi + M.Hi + C;
|
||||
Ans->Lo = N.Lo + M.Lo;
|
||||
}
|
||||
|
||||
void Subtract(uint128 * Ans, uint128 N, uint128 M)
|
||||
{
|
||||
Ans->Lo = N.Lo - M.Lo;
|
||||
uint64_t C = (((Ans->Lo & M.Lo) & 1) + (M.Lo >> 1) + (Ans->Lo >> 1)) >> 63;
|
||||
Ans->Hi = N.Hi - (M.Hi + C);
|
||||
}
|
||||
|
||||
void inc128(uint128 N, uint128* A)
|
||||
{
|
||||
A->Lo = (N.Lo + 1);
|
||||
A->Hi = N.Hi + (((N.Lo ^ A->Lo) & N.Lo) >> 63);
|
||||
}
|
||||
|
||||
void dec128(uint128 N, uint128* A)
|
||||
{
|
||||
A->Lo = N.Lo - 1;
|
||||
A->Hi = N.Hi - (((A->Lo ^ N.Lo) & A->Lo) >> 63);
|
||||
}
|
||||
|
||||
void add128(uint128 N, uint128 M, uint128* A)
|
||||
{
|
||||
uint64_t C = (((N.Lo & M.Lo) & 1) + (N.Lo >> 1) + (M.Lo >> 1)) >> 63;
|
||||
A->Hi = N.Hi + M.Hi + C;
|
||||
A->Lo = N.Lo + M.Lo;
|
||||
}
|
||||
|
||||
void sub128(uint128 N, uint128 M, uint128* A)
|
||||
{
|
||||
A->Lo = N.Lo - M.Lo;
|
||||
uint64_t C = (((A->Lo & M.Lo) & 1) + (M.Lo >> 1) + (A->Lo >> 1)) >> 63;
|
||||
A->Hi = N.Hi - (M.Hi + C);
|
||||
}
|
||||
|
||||
void mult64to128(uint64_t u, uint64_t v, uint64_t * h, uint64_t *l)
|
||||
{
|
||||
uint64_t u1 = (u & 0xffffffff);
|
||||
uint64_t v1 = (v & 0xffffffff);
|
||||
uint64_t t = (u1 * v1);
|
||||
uint64_t w3 = (t & 0xffffffff);
|
||||
uint64_t k = (t >> 32);
|
||||
|
||||
u >>= 32;
|
||||
t = (u * v1) + k;
|
||||
k = (t & 0xffffffff);
|
||||
uint64_t w1 = (t >> 32);
|
||||
|
||||
v >>= 32;
|
||||
t = (u1 * v) + k;
|
||||
k = (t >> 32);
|
||||
|
||||
*h = (u * v) + w1 + k;
|
||||
*l = (t << 32) + w3;
|
||||
}
|
||||
|
||||
void mult128(uint128 N, uint128 M, uint128 * Ans)
|
||||
{
|
||||
mult64to128(N.Lo, M.Lo, &Ans->Hi, &Ans->Lo);
|
||||
Ans->Hi += (N.Hi * M.Lo) + (N.Lo * M.Hi);
|
||||
}
|
||||
|
||||
void mult128to256(uint128 N, uint128 M, uint128 * H, uint128 * L)
|
||||
{
|
||||
mult64to128(N.Hi, M.Hi, &H->Hi, &H->Lo);
|
||||
mult64to128(N.Lo, M.Lo, &L->Hi, &L->Lo);
|
||||
|
||||
uint128 T;
|
||||
mult64to128(N.Hi, M.Lo, &T.Hi, &T.Lo);
|
||||
L->Hi += T.Lo;
|
||||
if(L->Hi < T.Lo) // if L->Hi overflowed
|
||||
{
|
||||
Increment(H);
|
||||
}
|
||||
H->Lo += T.Hi;
|
||||
if(H->Lo < T.Hi) // if H->Lo overflowed
|
||||
{
|
||||
++H->Hi;
|
||||
}
|
||||
|
||||
mult64to128(N.Lo, M.Hi, &T.Hi, &T.Lo);
|
||||
L->Hi += T.Lo;
|
||||
if(L->Hi < T.Lo) // if L->Hi overflowed
|
||||
{
|
||||
Increment(H);
|
||||
}
|
||||
H->Lo += T.Hi;
|
||||
if(H->Lo < T.Hi) // if H->Lo overflowed
|
||||
{
|
||||
++H->Hi;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void sqr64to128(uint64_t r, uint64_t * h, uint64_t *l)
|
||||
{
|
||||
uint64_t r1 = (r & 0xffffffff);
|
||||
uint64_t t = (r1 * r1);
|
||||
uint64_t w3 = (t & 0xffffffff);
|
||||
uint64_t k = (t >> 32);
|
||||
|
||||
r >>= 32;
|
||||
uint64_t m = (r * r1);
|
||||
t = m + k;
|
||||
uint64_t w2 = (t & 0xffffffff);
|
||||
uint64_t w1 = (t >> 32);
|
||||
|
||||
t = m + w2;
|
||||
k = (t >> 32);
|
||||
*h = (r * r) + w1 + k;
|
||||
*l = (t << 32) + w3;
|
||||
}
|
||||
|
||||
void sqr128(uint128 R, uint128 * Ans)
|
||||
{
|
||||
sqr64to128(R.Lo, &Ans->Hi, &Ans->Lo);
|
||||
Ans->Hi += (R.Hi * R.Lo) << 1;
|
||||
}
|
||||
|
||||
void sqr128to256(uint128 R, uint128 * H, uint128 * L)
|
||||
{
|
||||
sqr64to128(R.Hi, &H->Hi, &H->Lo);
|
||||
sqr64to128(R.Lo, &L->Hi, &L->Lo);
|
||||
|
||||
uint128 T;
|
||||
mult64to128(R.Hi, R.Lo, &T.Hi, &T.Lo);
|
||||
|
||||
H->Hi += (T.Hi >> 63);
|
||||
T.Hi = (T.Hi << 1) | (T.Lo >> 63); // Shift Left 1 bit
|
||||
T.Lo <<= 1;
|
||||
|
||||
L->Hi += T.Lo;
|
||||
if(L->Hi < T.Lo) // if L->Hi overflowed
|
||||
{
|
||||
Increment(H);
|
||||
}
|
||||
|
||||
H->Lo += T.Hi;
|
||||
if(H->Lo < T.Hi) // if H->Lo overflowed
|
||||
{
|
||||
++H->Hi;
|
||||
}
|
||||
}
|
||||
|
||||
void shiftleft128(uint128 N, size_t S, uint128 * A)
|
||||
{
|
||||
uint64_t M1, M2;
|
||||
S &= 127;
|
||||
|
||||
M1 = ((((S + 127) | S) & 64) >> 6) - 1llu;
|
||||
M2 = (S >> 6) - 1llu;
|
||||
S &= 63;
|
||||
A->Hi = (N.Lo << S) & (~M2);
|
||||
A->Lo = (N.Lo << S) & M2;
|
||||
A->Hi |= ((N.Hi << S) | ((N.Lo >> (64 - S)) & M1)) & M2;
|
||||
|
||||
/*
|
||||
S &= 127;
|
||||
|
||||
if(S != 0)
|
||||
{
|
||||
if(S > 64)
|
||||
{
|
||||
A.Hi = N.Lo << (S - 64);
|
||||
A.Lo = 0;
|
||||
}
|
||||
else if(S < 64)
|
||||
{
|
||||
A.Hi = (N.Hi << S) | (N.Lo >> (64 - S));
|
||||
A.Lo = N.Lo << S;
|
||||
}
|
||||
else
|
||||
{
|
||||
A.Hi = N.Lo;
|
||||
A.Lo = 0;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
A.Hi = N.Hi;
|
||||
A.Lo = N.Lo;
|
||||
}
|
||||
//*/
|
||||
}
|
||||
|
||||
void shiftright128(uint128 N, size_t S, uint128 * A)
|
||||
{
|
||||
uint64_t M1, M2;
|
||||
S &= 127;
|
||||
|
||||
M1 = ((((S + 127) | S) & 64) >> 6) - 1llu;
|
||||
M2 = (S >> 6) - 1llu;
|
||||
S &= 63;
|
||||
A->Lo = (N.Hi >> S) & (~M2);
|
||||
A->Hi = (N.Hi >> S) & M2;
|
||||
A->Lo |= ((N.Lo >> S) | ((N.Hi << (64 - S)) & M1)) & M2;
|
||||
|
||||
/*
|
||||
S &= 127;
|
||||
|
||||
if(S != 0)
|
||||
{
|
||||
if(S > 64)
|
||||
{
|
||||
A.Hi = N.Hi >> (S - 64);
|
||||
A.Lo = 0;
|
||||
}
|
||||
else if(S < 64)
|
||||
{
|
||||
A.Lo = (N.Lo >> S) | (N.Hi << (64 - S));
|
||||
A.Hi = N.Hi >> S;
|
||||
}
|
||||
else
|
||||
{
|
||||
A.Lo = N.Hi;
|
||||
A.Hi = 0;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
A.Hi = N.Hi;
|
||||
A.Lo = N.Lo;
|
||||
}
|
||||
//*/
|
||||
}
|
||||
|
||||
|
||||
void not128(uint128 N, uint128 * A)
|
||||
{
|
||||
A->Hi = ~N.Hi;
|
||||
A->Lo = ~N.Lo;
|
||||
}
|
||||
|
||||
void or128(uint128 N1, uint128 N2, uint128 * A)
|
||||
{
|
||||
A->Hi = N1.Hi | N2.Hi;
|
||||
A->Lo = N1.Lo | N2.Lo;
|
||||
}
|
||||
|
||||
void and128(uint128 N1, uint128 N2, uint128 * A)
|
||||
{
|
||||
A->Hi = N1.Hi & N2.Hi;
|
||||
A->Lo = N1.Lo & N2.Lo;
|
||||
}
|
||||
|
||||
void xor128(uint128 N1, uint128 N2, uint128 * A)
|
||||
{
|
||||
A->Hi = N1.Hi ^ N2.Hi;
|
||||
A->Lo = N1.Lo ^ N2.Lo;
|
||||
}
|
||||
|
||||
size_t nlz64(uint64_t N)
|
||||
{
|
||||
uint64_t I;
|
||||
size_t C;
|
||||
|
||||
I = ~N;
|
||||
C = ((I ^ (I + 1)) & I) >> 63;
|
||||
|
||||
I = (N >> 32) + 0xffffffff;
|
||||
I = ((I & 0x100000000) ^ 0x100000000) >> 27;
|
||||
C += I; N <<= I;
|
||||
|
||||
I = (N >> 48) + 0xffff;
|
||||
I = ((I & 0x10000) ^ 0x10000) >> 12;
|
||||
C += I; N <<= I;
|
||||
|
||||
I = (N >> 56) + 0xff;
|
||||
I = ((I & 0x100) ^ 0x100) >> 5;
|
||||
C += I; N <<= I;
|
||||
|
||||
I = (N >> 60) + 0xf;
|
||||
I = ((I & 0x10) ^ 0x10) >> 2;
|
||||
C += I; N <<= I;
|
||||
|
||||
I = (N >> 62) + 3;
|
||||
I = ((I & 4) ^ 4) >> 1;
|
||||
C += I; N <<= I;
|
||||
|
||||
C += (N >> 63) ^ 1;
|
||||
|
||||
return C;
|
||||
}
|
||||
|
||||
size_t ntz64(uint64_t N)
|
||||
{
|
||||
uint64_t I = ~N;
|
||||
size_t C = ((I ^ (I + 1)) & I) >> 63;
|
||||
|
||||
I = (N & 0xffffffff) + 0xffffffff;
|
||||
I = ((I & 0x100000000) ^ 0x100000000) >> 27;
|
||||
C += I; N >>= I;
|
||||
|
||||
I = (N & 0xffff) + 0xffff;
|
||||
I = ((I & 0x10000) ^ 0x10000) >> 12;
|
||||
C += I; N >>= I;
|
||||
|
||||
I = (N & 0xff) + 0xff;
|
||||
I = ((I & 0x100) ^ 0x100) >> 5;
|
||||
C += I; N >>= I;
|
||||
|
||||
I = (N & 0xf) + 0xf;
|
||||
I = ((I & 0x10) ^ 0x10) >> 2;
|
||||
C += I; N >>= I;
|
||||
|
||||
I = (N & 3) + 3;
|
||||
I = ((I & 4) ^ 4) >> 1;
|
||||
C += I; N >>= I;
|
||||
|
||||
C += ((N & 1) ^ 1);
|
||||
|
||||
return C;
|
||||
}
|
||||
|
||||
size_t popcnt64(uint64_t V)
|
||||
{
|
||||
// http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
|
||||
V -= ((V >> 1) & 0x5555555555555555);
|
||||
V = (V & 0x3333333333333333) + ((V >> 2) & 0x3333333333333333);
|
||||
return ((V + (V >> 4) & 0xF0F0F0F0F0F0F0F) * 0x101010101010101) >> 56;
|
||||
}
|
||||
|
||||
size_t popcnt128(uint128 N)
|
||||
{
|
||||
return popcnt64(N.Hi) + popcnt64(N.Lo);
|
||||
}
|
||||
|
||||
|
||||
size_t nlz128(uint128 N)
|
||||
{
|
||||
return (N.Hi == 0) ? nlz64(N.Lo) + 64 : nlz64(N.Hi);
|
||||
}
|
||||
|
||||
size_t ntz128(uint128 N)
|
||||
{
|
||||
return (N.Lo == 0) ? ntz64(N.Hi) + 64 : ntz64(N.Lo);
|
||||
}
|
||||
int compare128(uint128 N1, uint128 N2)
|
||||
{
|
||||
return (((N1.Hi > N2.Hi) || ((N1.Hi == N2.Hi) && (N1.Lo > N2.Lo))) ? 1 : 0)
|
||||
- (((N1.Hi < N2.Hi) || ((N1.Hi == N2.Hi) && (N1.Lo < N2.Lo))) ? 1 : 0);
|
||||
}
|
||||
|
||||
void bindivmod128(uint128 M, uint128 N, uint128 * Q, uint128 *R)
|
||||
{
|
||||
Q->Hi = Q->Lo = 0;
|
||||
size_t Shift = nlz128(N) - nlz128(M);
|
||||
shiftleft128(N, Shift, &N);
|
||||
|
||||
do
|
||||
{
|
||||
shiftleft128(*Q, (size_t)1, Q);
|
||||
if(compare128(M, N) >= 0)
|
||||
{
|
||||
sub128(M, N, &M);
|
||||
Q->Lo |= 1;
|
||||
}
|
||||
|
||||
shiftright128(N, 1, &N);
|
||||
}while(Shift-- != 0);
|
||||
|
||||
R->Hi = M.Hi;
|
||||
R->Lo = M.Lo;
|
||||
}
|
||||
|
||||
void divmod128by64(const uint64_t u1, const uint64_t u0, uint64_t v, uint64_t * q, uint64_t * r)
|
||||
{
|
||||
const uint64_t b = 1ll << 32;
|
||||
uint64_t un1, un0, vn1, vn0, q1, q0, un32, un21, un10, rhat, left, right;
|
||||
size_t s;
|
||||
|
||||
s = nlz64(v);
|
||||
v <<= s;
|
||||
vn1 = v >> 32;
|
||||
vn0 = v & 0xffffffff;
|
||||
|
||||
if (s > 0)
|
||||
{
|
||||
un32 = (u1 << s) | (u0 >> (64 - s));
|
||||
un10 = u0 << s;
|
||||
}
|
||||
else
|
||||
{
|
||||
un32 = u1;
|
||||
un10 = u0;
|
||||
}
|
||||
|
||||
un1 = un10 >> 32;
|
||||
un0 = un10 & 0xffffffff;
|
||||
|
||||
q1 = un32 / vn1;
|
||||
rhat = un32 % vn1;
|
||||
|
||||
left = q1 * vn0;
|
||||
right = (rhat << 32) + un1;
|
||||
again1:
|
||||
if ((q1 >= b) || (left > right))
|
||||
{
|
||||
--q1;
|
||||
rhat += vn1;
|
||||
if (rhat < b)
|
||||
{
|
||||
left -= vn0;
|
||||
right = (rhat << 32) | un1;
|
||||
goto again1;
|
||||
}
|
||||
}
|
||||
|
||||
un21 = (un32 << 32) + (un1 - (q1 * v));
|
||||
|
||||
q0 = un21 / vn1;
|
||||
rhat = un21 % vn1;
|
||||
|
||||
left = q0 * vn0;
|
||||
right = (rhat << 32) | un0;
|
||||
again2:
|
||||
if ((q0 >= b) || (left > right))
|
||||
{
|
||||
--q0;
|
||||
rhat += vn1;
|
||||
if (rhat < b)
|
||||
{
|
||||
left -= vn0;
|
||||
right = (rhat << 32) | un0;
|
||||
goto again2;
|
||||
}
|
||||
}
|
||||
|
||||
*r = ((un21 << 32) + (un0 - (q0 * v))) >> s;
|
||||
*q = (q1 << 32) | q0;
|
||||
}
|
||||
|
||||
static void divmod128by128(uint128 M, uint128 N, uint128 * Q, uint128 * R)
|
||||
{
|
||||
if (N.Hi == 0)
|
||||
{
|
||||
if (M.Hi < N.Lo)
|
||||
{
|
||||
divmod128by64(M.Hi, M.Lo, N.Lo, &Q->Lo, &R->Lo);
|
||||
Q->Hi = 0;
|
||||
R->Hi = 0;
|
||||
return;
|
||||
}
|
||||
else
|
||||
{
|
||||
Q->Hi = M.Hi / N.Lo;
|
||||
R->Hi = M.Hi % N.Lo;
|
||||
divmod128by64(R->Hi, M.Lo, N.Lo, &Q->Lo, &R->Lo);
|
||||
R->Hi = 0;
|
||||
return;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
size_t n = nlz64(N.Hi);
|
||||
|
||||
uint128 v1;
|
||||
shiftleft128(N, n, &v1);
|
||||
|
||||
uint128 u1;
|
||||
shiftright128(M, 1, &u1);
|
||||
|
||||
uint128 q1;
|
||||
divmod128by64(u1.Hi, u1.Lo, v1.Hi, &q1.Hi, &q1.Lo);
|
||||
q1.Hi = 0;
|
||||
shiftright128(q1, 63 - n, &q1);
|
||||
|
||||
if ((q1.Hi | q1.Lo) != 0)
|
||||
{
|
||||
dec128(q1, &q1);
|
||||
}
|
||||
|
||||
Q->Hi = q1.Hi;
|
||||
Q->Lo = q1.Lo;
|
||||
mult128(q1, N, &q1);
|
||||
sub128(M, q1, R);
|
||||
|
||||
if (compare128(*R, N) >= 0)
|
||||
{
|
||||
inc128(*Q, Q);
|
||||
sub128(*R, N, R);
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
void divmod128(uint128 M, uint128 N, uint128 * Q, uint128 * R)
|
||||
{
|
||||
size_t Nlz, Mlz, Ntz;
|
||||
int C;
|
||||
|
||||
Nlz = nlz128(N);
|
||||
Mlz = nlz128(M);
|
||||
Ntz = ntz128(N);
|
||||
|
||||
if(Nlz == 128)
|
||||
{
|
||||
return;
|
||||
}
|
||||
else if((M.Hi | N.Hi) == 0)
|
||||
{
|
||||
Q->Hi = R->Hi = 0;
|
||||
Q->Lo = M.Lo / N.Lo;
|
||||
R->Lo = M.Lo % N.Lo;
|
||||
return;
|
||||
}
|
||||
else if(Nlz == 127)
|
||||
{
|
||||
*Q = M;
|
||||
R->Hi = R->Lo = 0;
|
||||
return;
|
||||
}
|
||||
else if((Ntz + Nlz) == 127)
|
||||
{
|
||||
shiftright128(M, Ntz, Q);
|
||||
dec128(N, &N);
|
||||
and128(N, M, R);
|
||||
return;
|
||||
}
|
||||
|
||||
C = compare128(M, N);
|
||||
if(C < 0)
|
||||
{
|
||||
Q->Hi = Q->Lo = 0;
|
||||
*R = M;
|
||||
return;
|
||||
}
|
||||
else if(C == 0)
|
||||
{
|
||||
Q->Hi = R->Hi = R->Lo = 0;
|
||||
Q->Lo = 1;
|
||||
return;
|
||||
}
|
||||
|
||||
if((Nlz - Mlz) > 5)
|
||||
{
|
||||
divmod128by128(M, N, Q, R);
|
||||
}
|
||||
else
|
||||
{
|
||||
bindivmod128(M, N, Q, R);
|
||||
}
|
||||
}
|
||||
#endif
|
180
crypto/mman.c
Normal file
180
crypto/mman.c
Normal file
@ -0,0 +1,180 @@
|
||||
|
||||
#include <windows.h>
|
||||
#include <errno.h>
|
||||
#include <io.h>
|
||||
|
||||
#include "mman.h"
|
||||
|
||||
#ifndef FILE_MAP_EXECUTE
|
||||
#define FILE_MAP_EXECUTE 0x0020
|
||||
#endif /* FILE_MAP_EXECUTE */
|
||||
|
||||
static int __map_mman_error(const DWORD err, const int deferr)
|
||||
{
|
||||
if (err == 0)
|
||||
return 0;
|
||||
//TODO: implement
|
||||
return err;
|
||||
}
|
||||
|
||||
static DWORD __map_mmap_prot_page(const int prot)
|
||||
{
|
||||
DWORD protect = 0;
|
||||
|
||||
if (prot == PROT_NONE)
|
||||
return protect;
|
||||
|
||||
if ((prot & PROT_EXEC) != 0)
|
||||
{
|
||||
protect = ((prot & PROT_WRITE) != 0) ?
|
||||
PAGE_EXECUTE_READWRITE : PAGE_EXECUTE_READ;
|
||||
}
|
||||
else
|
||||
{
|
||||
protect = ((prot & PROT_WRITE) != 0) ?
|
||||
PAGE_READWRITE : PAGE_READONLY;
|
||||
}
|
||||
|
||||
return protect;
|
||||
}
|
||||
|
||||
static DWORD __map_mmap_prot_file(const int prot)
|
||||
{
|
||||
DWORD desiredAccess = 0;
|
||||
|
||||
if (prot == PROT_NONE)
|
||||
return desiredAccess;
|
||||
|
||||
if ((prot & PROT_READ) != 0)
|
||||
desiredAccess |= FILE_MAP_READ;
|
||||
if ((prot & PROT_WRITE) != 0)
|
||||
desiredAccess |= FILE_MAP_WRITE;
|
||||
if ((prot & PROT_EXEC) != 0)
|
||||
desiredAccess |= FILE_MAP_EXECUTE;
|
||||
|
||||
return desiredAccess;
|
||||
}
|
||||
|
||||
void* mmap(void *addr, size_t len, int prot, int flags, int fildes, OffsetType off)
|
||||
{
|
||||
HANDLE fm, h;
|
||||
|
||||
void * map = MAP_FAILED;
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#pragma warning(push)
|
||||
#pragma warning(disable: 4293)
|
||||
#endif
|
||||
|
||||
const DWORD dwFileOffsetLow = (sizeof(OffsetType) <= sizeof(DWORD)) ?
|
||||
(DWORD)off : (DWORD)(off & 0xFFFFFFFFL);
|
||||
const DWORD dwFileOffsetHigh = (sizeof(OffsetType) <= sizeof(DWORD)) ?
|
||||
(DWORD)0 : (DWORD)((off >> 32) & 0xFFFFFFFFL);
|
||||
const DWORD protect = __map_mmap_prot_page(prot);
|
||||
const DWORD desiredAccess = __map_mmap_prot_file(prot);
|
||||
|
||||
const OffsetType maxSize = off + (OffsetType)len;
|
||||
|
||||
const DWORD dwMaxSizeLow = (sizeof(OffsetType) <= sizeof(DWORD)) ?
|
||||
(DWORD)maxSize : (DWORD)(maxSize & 0xFFFFFFFFL);
|
||||
const DWORD dwMaxSizeHigh = (sizeof(OffsetType) <= sizeof(DWORD)) ?
|
||||
(DWORD)0 : (DWORD)((maxSize >> 32) & 0xFFFFFFFFL);
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#pragma warning(pop)
|
||||
#endif
|
||||
|
||||
errno = 0;
|
||||
|
||||
if (len == 0
|
||||
/* Unsupported flag combinations */
|
||||
|| (flags & MAP_FIXED) != 0
|
||||
/* Usupported protection combinations */
|
||||
|| prot == PROT_EXEC)
|
||||
{
|
||||
errno = EINVAL;
|
||||
return MAP_FAILED;
|
||||
}
|
||||
|
||||
h = ((flags & MAP_ANONYMOUS) == 0) ?
|
||||
(HANDLE)_get_osfhandle(fildes) : INVALID_HANDLE_VALUE;
|
||||
|
||||
if ((flags & MAP_ANONYMOUS) == 0 && h == INVALID_HANDLE_VALUE)
|
||||
{
|
||||
errno = EBADF;
|
||||
return MAP_FAILED;
|
||||
}
|
||||
|
||||
fm = CreateFileMapping(h, NULL, protect, dwMaxSizeHigh, dwMaxSizeLow, NULL);
|
||||
|
||||
if (fm == NULL)
|
||||
{
|
||||
errno = __map_mman_error(GetLastError(), EPERM);
|
||||
return MAP_FAILED;
|
||||
}
|
||||
|
||||
map = MapViewOfFile(fm, desiredAccess, dwFileOffsetHigh, dwFileOffsetLow, len);
|
||||
|
||||
CloseHandle(fm);
|
||||
|
||||
if (map == NULL)
|
||||
{
|
||||
errno = __map_mman_error(GetLastError(), EPERM);
|
||||
return MAP_FAILED;
|
||||
}
|
||||
|
||||
return map;
|
||||
}
|
||||
|
||||
int munmap(void *addr, size_t len)
|
||||
{
|
||||
if (UnmapViewOfFile(addr))
|
||||
return 0;
|
||||
|
||||
errno = __map_mman_error(GetLastError(), EPERM);
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
int _mprotect(void *addr, size_t len, int prot)
|
||||
{
|
||||
DWORD newProtect = __map_mmap_prot_page(prot);
|
||||
DWORD oldProtect = 0;
|
||||
|
||||
if (VirtualProtect(addr, len, newProtect, &oldProtect))
|
||||
return 0;
|
||||
|
||||
errno = __map_mman_error(GetLastError(), EPERM);
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
int msync(void *addr, size_t len, int flags)
|
||||
{
|
||||
if (FlushViewOfFile(addr, len))
|
||||
return 0;
|
||||
|
||||
errno = __map_mman_error(GetLastError(), EPERM);
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
int mlock(const void *addr, size_t len)
|
||||
{
|
||||
if (VirtualLock((LPVOID)addr, len))
|
||||
return 0;
|
||||
|
||||
errno = __map_mman_error(GetLastError(), EPERM);
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
int munlock(const void *addr, size_t len)
|
||||
{
|
||||
if (VirtualUnlock((LPVOID)addr, len))
|
||||
return 0;
|
||||
|
||||
errno = __map_mman_error(GetLastError(), EPERM);
|
||||
|
||||
return -1;
|
||||
}
|
63
crypto/mman.h
Normal file
63
crypto/mman.h
Normal file
@ -0,0 +1,63 @@
|
||||
/*
|
||||
* sys/mman.h
|
||||
* mman-win32
|
||||
*/
|
||||
|
||||
#ifndef _SYS_MMAN_H_
|
||||
#define _SYS_MMAN_H_
|
||||
|
||||
#ifndef _WIN32_WINNT // Allow use of features specific to Windows XP or later.
|
||||
#define _WIN32_WINNT 0x0501 // Change this to the appropriate value to target other versions of Windows.
|
||||
#endif
|
||||
|
||||
/* All the headers include this file. */
|
||||
#ifndef _MSC_VER
|
||||
#include <_mingw.h>
|
||||
#endif
|
||||
|
||||
/* Determine offset type */
|
||||
#include <stdint.h>
|
||||
#if defined(_WIN64)
|
||||
typedef int64_t OffsetType;
|
||||
#else
|
||||
typedef uint32_t OffsetType;
|
||||
#endif
|
||||
|
||||
#include <sys/types.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define PROT_NONE 0
|
||||
#define PROT_READ 1
|
||||
#define PROT_WRITE 2
|
||||
#define PROT_EXEC 4
|
||||
|
||||
#define MAP_FILE 0
|
||||
#define MAP_SHARED 1
|
||||
#define MAP_PRIVATE 2
|
||||
#define MAP_TYPE 0xf
|
||||
#define MAP_FIXED 0x10
|
||||
#define MAP_ANONYMOUS 0x20
|
||||
#define MAP_ANON MAP_ANONYMOUS
|
||||
|
||||
#define MAP_FAILED ((void *)-1)
|
||||
|
||||
/* Flags for msync. */
|
||||
#define MS_ASYNC 1
|
||||
#define MS_SYNC 2
|
||||
#define MS_INVALIDATE 4
|
||||
|
||||
void* mmap(void *addr, size_t len, int prot, int flags, int fildes, OffsetType off);
|
||||
int munmap(void *addr, size_t len);
|
||||
int _mprotect(void *addr, size_t len, int prot);
|
||||
int msync(void *addr, size_t len, int flags);
|
||||
int mlock(const void *addr, size_t len);
|
||||
int munlock(const void *addr, size_t len);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _SYS_MMAN_H_ */
|
371
crypto/wildkeccak-cpu.cpp
Normal file
371
crypto/wildkeccak-cpu.cpp
Normal file
@ -0,0 +1,371 @@
|
||||
// Memory-hard extension of keccak for PoW
|
||||
// Copyright (c) 2012-2013 The Cryptonote developers
|
||||
// Copyright (c) 2014 The Boolberry developers
|
||||
|
||||
// Distributed under the MIT/X11 software license, see the accompanying
|
||||
// file COPYING or http://www.opensource.org/licenses/mit-license.php.
|
||||
|
||||
// Modified for CPUminer by Lucas Jones
|
||||
// Adapted for ccminer by Tanguy Pruvot - 2016
|
||||
|
||||
#include <errno.h>
|
||||
#include <unistd.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#include <emmintrin.h>
|
||||
#include <bignum.hpp>
|
||||
#include "int128_c.h"
|
||||
#else
|
||||
#include <x86intrin.h>
|
||||
#endif
|
||||
|
||||
#include <miner.h>
|
||||
|
||||
#include "xmr-rpc.h"
|
||||
|
||||
extern uint64_t* pscratchpad_buff;
|
||||
|
||||
struct reciprocal_value64 {
|
||||
uint64_t m;
|
||||
uint8_t sh1, sh2;
|
||||
};
|
||||
|
||||
static inline int fls64(uint64_t x)
|
||||
{
|
||||
#if defined(_WIN64)
|
||||
unsigned long bitpos = 0;
|
||||
_BitScanReverse64(&bitpos, x);
|
||||
return (int) (bitpos + 1);
|
||||
#elif defined(WIN32)
|
||||
unsigned long hipos = 0, bitpos = 0;
|
||||
uint32_t hi = x >> 32;
|
||||
_BitScanReverse(&hipos, hi);
|
||||
if (!hipos) {
|
||||
_BitScanReverse(&bitpos, (uint32_t) x);
|
||||
}
|
||||
return (int) hipos ? hipos + 33 : bitpos + 1;
|
||||
#else
|
||||
/*
|
||||
* AMD64 says BSRQ won't clobber the dest reg if x==0; Intel64 says the
|
||||
* dest reg is undefined if x==0, but their CPU architect says its
|
||||
* value is written to set it to the same as before.
|
||||
*/
|
||||
register long bitpos = -1;
|
||||
asm("bsrq %1,%0" : "+r" (bitpos) : "rm" (x));
|
||||
return bitpos + 1;
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline struct reciprocal_value64 reciprocal_val64(uint64_t d)
|
||||
{
|
||||
struct reciprocal_value64 R;
|
||||
int l;
|
||||
|
||||
l = fls64(d - 1);
|
||||
|
||||
#ifdef _MSC_VER
|
||||
uint128 v1;
|
||||
v1.Lo = (1ULL << l) - d;v1.Hi=0;
|
||||
uint128 v2;
|
||||
v2.Hi = 1; v2.Lo = 0;
|
||||
|
||||
uint128 v;
|
||||
mult128(v1,v2,&v);
|
||||
divmod128by64(v.Hi,v.Lo,d,&v.Hi,&v.Lo);
|
||||
Increment(&v);
|
||||
R.m = (uint64_t)v.Hi;
|
||||
#else
|
||||
__uint128_t m;
|
||||
m = (((__uint128_t)1 << 64) * ((1ULL << l) - d));
|
||||
m /= d;
|
||||
++m;
|
||||
R.m = (uint64_t)m;
|
||||
#endif
|
||||
|
||||
R.sh1 = min(l, 1);
|
||||
R.sh2 = max(l - 1, 0);
|
||||
|
||||
return R;
|
||||
}
|
||||
|
||||
static inline uint64_t reciprocal_divide64(uint64_t a, struct reciprocal_value64 R)
|
||||
{
|
||||
#ifdef _MSC_VER
|
||||
uint128 v;
|
||||
mult64to128(a,R.m,&v.Hi,&v.Lo);
|
||||
uint64_t t = v.Hi;
|
||||
#else
|
||||
uint64_t t = (uint64_t)(((__uint128_t)a * R.m) >> 64);
|
||||
#endif
|
||||
return (t + ((a - t) >> R.sh1)) >> R.sh2;
|
||||
}
|
||||
|
||||
static inline uint64_t reciprocal_remainder64(uint64_t A, uint64_t B, struct reciprocal_value64 R)
|
||||
{
|
||||
uint64_t div, mod;
|
||||
|
||||
div = reciprocal_divide64(A, R);
|
||||
mod = A - (uint64_t) (div * B);
|
||||
if (mod >= B) mod -= B;
|
||||
return mod;
|
||||
}
|
||||
|
||||
//#define UNROLL_SCR_MIX
|
||||
|
||||
static inline uint64_t rotl641(uint64_t x) { return((x << 1) | (x >> 63)); }
|
||||
static inline uint64_t rotl64_1(uint64_t x, uint64_t y) { return((x << y) | (x >> (64 - y))); }
|
||||
static inline uint64_t rotl64_2(uint64_t x, uint64_t y) { return(rotl64_1((x >> 32) | (x << 32), y)); }
|
||||
static inline uint64_t bitselect(uint64_t a, uint64_t b, uint64_t c) { return(a ^ (c & (b ^ a))); }
|
||||
|
||||
static inline void keccakf_mul(uint64_t *s)
|
||||
{
|
||||
uint64_t bc[5], t[5];
|
||||
uint64_t tmp1, tmp2;
|
||||
int i;
|
||||
|
||||
for(i = 0; i < 5; i++)
|
||||
t[i] = s[i + 0] ^ s[i + 5] ^ s[i + 10] * s[i + 15] * s[i + 20];
|
||||
|
||||
bc[0] = t[0] ^ rotl641(t[2]);
|
||||
bc[1] = t[1] ^ rotl641(t[3]);
|
||||
bc[2] = t[2] ^ rotl641(t[4]);
|
||||
bc[3] = t[3] ^ rotl641(t[0]);
|
||||
bc[4] = t[4] ^ rotl641(t[1]);
|
||||
|
||||
tmp1 = s[1] ^ bc[0];
|
||||
|
||||
s[ 0] ^= bc[4];
|
||||
s[ 1] = rotl64_1(s[ 6] ^ bc[0], 44);
|
||||
s[ 6] = rotl64_1(s[ 9] ^ bc[3], 20);
|
||||
s[ 9] = rotl64_1(s[22] ^ bc[1], 61);
|
||||
s[22] = rotl64_1(s[14] ^ bc[3], 39);
|
||||
s[14] = rotl64_1(s[20] ^ bc[4], 18);
|
||||
s[20] = rotl64_1(s[ 2] ^ bc[1], 62);
|
||||
s[ 2] = rotl64_1(s[12] ^ bc[1], 43);
|
||||
s[12] = rotl64_1(s[13] ^ bc[2], 25);
|
||||
s[13] = rotl64_1(s[19] ^ bc[3], 8);
|
||||
s[19] = rotl64_1(s[23] ^ bc[2], 56);
|
||||
s[23] = rotl64_1(s[15] ^ bc[4], 41);
|
||||
s[15] = rotl64_1(s[ 4] ^ bc[3], 27);
|
||||
s[ 4] = rotl64_1(s[24] ^ bc[3], 14);
|
||||
s[24] = rotl64_1(s[21] ^ bc[0], 2);
|
||||
s[21] = rotl64_1(s[ 8] ^ bc[2], 55);
|
||||
s[ 8] = rotl64_1(s[16] ^ bc[0], 45);
|
||||
s[16] = rotl64_1(s[ 5] ^ bc[4], 36);
|
||||
s[ 5] = rotl64_1(s[ 3] ^ bc[2], 28);
|
||||
s[ 3] = rotl64_1(s[18] ^ bc[2], 21);
|
||||
s[18] = rotl64_1(s[17] ^ bc[1], 15);
|
||||
s[17] = rotl64_1(s[11] ^ bc[0], 10);
|
||||
s[11] = rotl64_1(s[ 7] ^ bc[1], 6);
|
||||
s[ 7] = rotl64_1(s[10] ^ bc[4], 3);
|
||||
s[10] = rotl64_1(tmp1, 1);
|
||||
|
||||
tmp1 = s[0]; tmp2 = s[1]; s[0] = bitselect(s[0] ^ s[2], s[0], s[1]); s[1] = bitselect(s[1] ^ s[3], s[1], s[2]); s[2] = bitselect(s[2] ^ s[4], s[2], s[3]); s[3] = bitselect(s[3] ^ tmp1, s[3], s[4]); s[4] = bitselect(s[4] ^ tmp2, s[4], tmp1);
|
||||
tmp1 = s[5]; tmp2 = s[6]; s[5] = bitselect(s[5] ^ s[7], s[5], s[6]); s[6] = bitselect(s[6] ^ s[8], s[6], s[7]); s[7] = bitselect(s[7] ^ s[9], s[7], s[8]); s[8] = bitselect(s[8] ^ tmp1, s[8], s[9]); s[9] = bitselect(s[9] ^ tmp2, s[9], tmp1);
|
||||
tmp1 = s[10]; tmp2 = s[11]; s[10] = bitselect(s[10] ^ s[12], s[10], s[11]); s[11] = bitselect(s[11] ^ s[13], s[11], s[12]); s[12] = bitselect(s[12] ^ s[14], s[12], s[13]); s[13] = bitselect(s[13] ^ tmp1, s[13], s[14]); s[14] = bitselect(s[14] ^ tmp2, s[14], tmp1);
|
||||
tmp1 = s[15]; tmp2 = s[16]; s[15] = bitselect(s[15] ^ s[17], s[15], s[16]); s[16] = bitselect(s[16] ^ s[18], s[16], s[17]); s[17] = bitselect(s[17] ^ s[19], s[17], s[18]); s[18] = bitselect(s[18] ^ tmp1, s[18], s[19]); s[19] = bitselect(s[19] ^ tmp2, s[19], tmp1);
|
||||
tmp1 = s[20]; tmp2 = s[21]; s[20] = bitselect(s[20] ^ s[22], s[20], s[21]); s[21] = bitselect(s[21] ^ s[23], s[21], s[22]); s[22] = bitselect(s[22] ^ s[24], s[22], s[23]); s[23] = bitselect(s[23] ^ tmp1, s[23], s[24]); s[24] = bitselect(s[24] ^ tmp2, s[24], tmp1);
|
||||
|
||||
s[0] ^= 0x0000000000000001ULL;
|
||||
}
|
||||
|
||||
static inline void keccakf_mul_last(uint64_t *s)
|
||||
{
|
||||
uint64_t bc[5], xormul[5];
|
||||
uint64_t tmp1, tmp2;
|
||||
int i;
|
||||
|
||||
for(i = 0; i < 5; i++)
|
||||
xormul[i] = s[i + 0] ^ s[i + 5] ^ s[i + 10] * s[i + 15] * s[i + 20];
|
||||
|
||||
bc[0] = xormul[0] ^ rotl641(xormul[2]);
|
||||
bc[1] = xormul[1] ^ rotl641(xormul[3]);
|
||||
bc[2] = xormul[2] ^ rotl641(xormul[4]);
|
||||
bc[3] = xormul[3] ^ rotl641(xormul[0]);
|
||||
bc[4] = xormul[4] ^ rotl641(xormul[1]);
|
||||
|
||||
s[0] ^= bc[4];
|
||||
s[1] = rotl64_2(s[6] ^ bc[0], 12);
|
||||
s[2] = rotl64_2(s[12] ^ bc[1], 11);
|
||||
s[4] = rotl64_1(s[24] ^ bc[3], 14);
|
||||
s[3] = rotl64_1(s[18] ^ bc[2], 21);
|
||||
|
||||
tmp1 = s[0]; tmp2 = s[1]; s[0] = bitselect(s[0] ^ s[2], s[0], s[1]); s[1] = bitselect(s[1] ^ s[3], s[1], s[2]); s[2] = bitselect(s[2] ^ s[4], s[2], s[3]); s[3] = bitselect(s[3] ^ tmp1, s[3], s[4]);
|
||||
s[0] ^= 0x0000000000000001ULL;
|
||||
}
|
||||
|
||||
struct reciprocal_value64 cached_recip;
|
||||
static uint64_t cached_scr_size = 0;
|
||||
|
||||
static inline void scr_mix(uint64_t *st, uint64_t scr_size, struct reciprocal_value64 recip)
|
||||
{
|
||||
#define KK_MIXIN_SIZE 24
|
||||
uint64_t _ALIGN(128) idx[KK_MIXIN_SIZE];
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#define pscr pscratchpad_buff
|
||||
int x;
|
||||
|
||||
// non-optimized 64bit operations
|
||||
for (x = 0; x < KK_MIXIN_SIZE; x++) {
|
||||
idx[x] = reciprocal_remainder64(st[x], scr_size, recip) << 2;
|
||||
}
|
||||
if (idx[7] > scr_size*4) {
|
||||
applog(LOG_WARNING, "Wrong remainder64 returned by the cpu hash %016llx > %016llx",
|
||||
(unsigned long long) idx[7], (unsigned long long) scr_size*4);
|
||||
return;
|
||||
}
|
||||
for(x = 0; x < KK_MIXIN_SIZE; x += 4) {
|
||||
st[x + 0] ^= pscr[idx[x] + 0] ^ pscr[idx[x + 1] + 0] ^ pscr[idx[x + 2] + 0] ^ pscr[idx[x + 3] + 0];
|
||||
st[x + 1] ^= pscr[idx[x] + 1] ^ pscr[idx[x + 1] + 1] ^ pscr[idx[x + 2] + 1] ^ pscr[idx[x + 3] + 1];
|
||||
st[x + 2] ^= pscr[idx[x] + 2] ^ pscr[idx[x + 1] + 2] ^ pscr[idx[x + 2] + 2] ^ pscr[idx[x + 3] + 2];
|
||||
st[x + 3] ^= pscr[idx[x] + 3] ^ pscr[idx[x + 1] + 3] ^ pscr[idx[x + 2] + 3] ^ pscr[idx[x + 3] + 3];
|
||||
}
|
||||
return;
|
||||
|
||||
#elif !defined(UNROLL_SCR_MIX)
|
||||
|
||||
#pragma GCC ivdep
|
||||
for(int x = 0; x < 3; ++x)
|
||||
{
|
||||
__m128i *st0, *st1, *st2, *st3;
|
||||
|
||||
idx[0] = reciprocal_remainder64(st[(x << 3) + 0], scr_size, recip) << 2;
|
||||
idx[1] = reciprocal_remainder64(st[(x << 3) + 1], scr_size, recip) << 2;
|
||||
idx[2] = reciprocal_remainder64(st[(x << 3) + 2], scr_size, recip) << 2;
|
||||
idx[3] = reciprocal_remainder64(st[(x << 3) + 3], scr_size, recip) << 2;
|
||||
idx[4] = reciprocal_remainder64(st[(x << 3) + 4], scr_size, recip) << 2;
|
||||
idx[5] = reciprocal_remainder64(st[(x << 3) + 5], scr_size, recip) << 2;
|
||||
idx[6] = reciprocal_remainder64(st[(x << 3) + 6], scr_size, recip) << 2;
|
||||
idx[7] = reciprocal_remainder64(st[(x << 3) + 7], scr_size, recip) << 2;
|
||||
|
||||
for(int y = 0; y < 8; y++) _mm_prefetch((const char*) (&pscratchpad_buff[idx[y]]), _MM_HINT_T1);
|
||||
|
||||
st0 = (__m128i *)&st[(x << 3) + 0];
|
||||
st1 = (__m128i *)&st[(x << 3) + 2];
|
||||
st2 = (__m128i *)&st[(x << 3) + 4];
|
||||
st3 = (__m128i *)&st[(x << 3) + 6];
|
||||
|
||||
*st0 = _mm_xor_si128(*st0, *((__m128i *)&pscratchpad_buff[idx[0]]));
|
||||
*st0 = _mm_xor_si128(*st0, *((__m128i *)&pscratchpad_buff[idx[1]]));
|
||||
*st0 = _mm_xor_si128(*st0, *((__m128i *)&pscratchpad_buff[idx[2]]));
|
||||
*st0 = _mm_xor_si128(*st0, *((__m128i *)&pscratchpad_buff[idx[3]]));
|
||||
|
||||
*st1 = _mm_xor_si128(*st1, *((__m128i *)&pscratchpad_buff[idx[0] + 2]));
|
||||
*st1 = _mm_xor_si128(*st1, *((__m128i *)&pscratchpad_buff[idx[1] + 2]));
|
||||
*st1 = _mm_xor_si128(*st1, *((__m128i *)&pscratchpad_buff[idx[2] + 2]));
|
||||
*st1 = _mm_xor_si128(*st1, *((__m128i *)&pscratchpad_buff[idx[3] + 2]));
|
||||
|
||||
*st2 = _mm_xor_si128(*st2, *((__m128i *)&pscratchpad_buff[idx[4]]));
|
||||
*st2 = _mm_xor_si128(*st2, *((__m128i *)&pscratchpad_buff[idx[5]]));
|
||||
*st2 = _mm_xor_si128(*st2, *((__m128i *)&pscratchpad_buff[idx[6]]));
|
||||
*st2 = _mm_xor_si128(*st2, *((__m128i *)&pscratchpad_buff[idx[7]]));
|
||||
|
||||
*st3 = _mm_xor_si128(*st3, *((__m128i *)&pscratchpad_buff[idx[4] + 2]));
|
||||
*st3 = _mm_xor_si128(*st3, *((__m128i *)&pscratchpad_buff[idx[5] + 2]));
|
||||
*st3 = _mm_xor_si128(*st3, *((__m128i *)&pscratchpad_buff[idx[6] + 2]));
|
||||
*st3 = _mm_xor_si128(*st3, *((__m128i *)&pscratchpad_buff[idx[7] + 2]));
|
||||
}
|
||||
|
||||
#else
|
||||
#warning using AVX2 optimizations
|
||||
|
||||
idx[ 0] = reciprocal_remainder64(st[0], scr_size, recip) << 2;
|
||||
idx[ 1] = reciprocal_remainder64(st[1], scr_size, recip) << 2;
|
||||
idx[ 2] = reciprocal_remainder64(st[2], scr_size, recip) << 2;
|
||||
idx[ 3] = reciprocal_remainder64(st[3], scr_size, recip) << 2;
|
||||
idx[ 4] = reciprocal_remainder64(st[4], scr_size, recip) << 2;
|
||||
idx[ 5] = reciprocal_remainder64(st[5], scr_size, recip) << 2;
|
||||
idx[ 6] = reciprocal_remainder64(st[6], scr_size, recip) << 2;
|
||||
idx[ 7] = reciprocal_remainder64(st[7], scr_size, recip) << 2;
|
||||
|
||||
for(int y = 0; y < 8; y++) _mm_prefetch(&pscratchpad_buff[idx[y]], _MM_HINT_T1);
|
||||
|
||||
idx[ 8] = reciprocal_remainder64(st[8], scr_size, recip) << 2;
|
||||
idx[ 9] = reciprocal_remainder64(st[9], scr_size, recip) << 2;
|
||||
idx[10] = reciprocal_remainder64(st[10], scr_size, recip) << 2;
|
||||
idx[11] = reciprocal_remainder64(st[11], scr_size, recip) << 2;
|
||||
idx[12] = reciprocal_remainder64(st[12], scr_size, recip) << 2;
|
||||
idx[13] = reciprocal_remainder64(st[13], scr_size, recip) << 2;
|
||||
idx[14] = reciprocal_remainder64(st[14], scr_size, recip) << 2;
|
||||
idx[15] = reciprocal_remainder64(st[15], scr_size, recip) << 2;
|
||||
|
||||
for(int y = 8; y < 16; ++y) _mm_prefetch(&pscratchpad_buff[idx[y]], _MM_HINT_T1);
|
||||
|
||||
idx[16] = reciprocal_remainder64(st[16], scr_size, recip) << 2;
|
||||
idx[17] = reciprocal_remainder64(st[17], scr_size, recip) << 2;
|
||||
idx[18] = reciprocal_remainder64(st[18], scr_size, recip) << 2;
|
||||
idx[19] = reciprocal_remainder64(st[19], scr_size, recip) << 2;
|
||||
idx[20] = reciprocal_remainder64(st[20], scr_size, recip) << 2;
|
||||
idx[21] = reciprocal_remainder64(st[21], scr_size, recip) << 2;
|
||||
idx[22] = reciprocal_remainder64(st[22], scr_size, recip) << 2;
|
||||
idx[23] = reciprocal_remainder64(st[23], scr_size, recip) << 2;
|
||||
|
||||
for(int y = 16; y < 24; ++y) _mm_prefetch(&pscratchpad_buff[idx[y]], _MM_HINT_T1);
|
||||
|
||||
__m256i *st0 = (__m256i *)&st[0];
|
||||
|
||||
for(int x = 0; x < 6; ++x)
|
||||
{
|
||||
*st0 = _mm256_xor_si256(*st0, *((__m256i *)&pscratchpad_buff[idx[(x << 2) + 0]]));
|
||||
*st0 = _mm256_xor_si256(*st0, *((__m256i *)&pscratchpad_buff[idx[(x << 2) + 1]]));
|
||||
*st0 = _mm256_xor_si256(*st0, *((__m256i *)&pscratchpad_buff[idx[(x << 2) + 2]]));
|
||||
*st0 = _mm256_xor_si256(*st0, *((__m256i *)&pscratchpad_buff[idx[(x << 2) + 3]]));
|
||||
++st0;
|
||||
}
|
||||
|
||||
#endif
|
||||
return;
|
||||
}
|
||||
|
||||
static void wild_keccak_hash_dbl(uint8_t * __restrict md, const uint8_t * __restrict in)
|
||||
{
|
||||
uint64_t _ALIGN(32) st[25];
|
||||
uint64_t scr_size, i;
|
||||
struct reciprocal_value64 recip;
|
||||
|
||||
scr_size = scratchpad_size >> 2;
|
||||
if (scr_size == cached_scr_size)
|
||||
recip = cached_recip;
|
||||
else {
|
||||
cached_recip = recip = reciprocal_val64(scr_size);
|
||||
cached_scr_size = scr_size;
|
||||
}
|
||||
|
||||
// Wild Keccak #1
|
||||
memcpy(st, in, 88);
|
||||
st[10] = (st[10] & 0x00000000000000FFULL) | 0x0000000000000100ULL;
|
||||
memset(&st[11], 0, 112);
|
||||
st[16] |= 0x8000000000000000ULL;
|
||||
|
||||
for(i = 0; i < 23; i++) {
|
||||
keccakf_mul(st);
|
||||
scr_mix(st, scr_size, recip);
|
||||
}
|
||||
|
||||
keccakf_mul_last(st);
|
||||
|
||||
// Wild Keccak #2
|
||||
memset(&st[4], 0x00, 168);
|
||||
st[ 4] = 0x0000000000000001ULL;
|
||||
st[16] = 0x8000000000000000ULL;
|
||||
|
||||
for(i = 0; i < 23; i++) {
|
||||
keccakf_mul(st);
|
||||
scr_mix(st, scr_size, recip);
|
||||
}
|
||||
|
||||
keccakf_mul_last(st);
|
||||
|
||||
memcpy(md, st, 32);
|
||||
return;
|
||||
}
|
||||
|
||||
void wildkeccak_hash(void* output, const void* input, uint64_t* scratchpad, uint64_t ssize)
|
||||
{
|
||||
if (scratchpad) pscratchpad_buff = scratchpad;
|
||||
if (!scratchpad_size) scratchpad_size = ssize;
|
||||
wild_keccak_hash_dbl((uint8_t*)output, (uint8_t*)input);
|
||||
}
|
369
crypto/wildkeccak.cu
Normal file
369
crypto/wildkeccak.cu
Normal file
@ -0,0 +1,369 @@
|
||||
extern "C" {
|
||||
#include <errno.h>
|
||||
#include <stdio.h>
|
||||
#include <unistd.h>
|
||||
}
|
||||
|
||||
#include <miner.h>
|
||||
#include <cuda_helper.h>
|
||||
#include <cuda_vector_uint2x4.h> // todo
|
||||
|
||||
#include "wildkeccak.h"
|
||||
|
||||
extern char *device_config[MAX_GPUS]; // -l
|
||||
extern uint64_t* pscratchpad_buff;
|
||||
|
||||
static uint64_t* d_input[MAX_GPUS];
|
||||
static uint32_t* d_retnonce[MAX_GPUS];
|
||||
static ulonglong4* d_scratchpad[MAX_GPUS];
|
||||
|
||||
static uint64_t* h_scratchpad[MAX_GPUS] = { 0 };
|
||||
static cudaStream_t bufpad_stream[MAX_GPUS] = { 0 };
|
||||
static cudaStream_t kernel_stream[MAX_GPUS] = { 0 };
|
||||
|
||||
uint64_t scratchpad_size = 0;
|
||||
|
||||
uint32_t WK_CUDABlocks = 64;
|
||||
uint32_t WK_CUDAThreads = 256;
|
||||
|
||||
#define st0 vst0.x
|
||||
#define st1 vst0.y
|
||||
#define st2 vst0.z
|
||||
#define st3 vst0.w
|
||||
|
||||
#define st4 vst4.x
|
||||
#define st5 vst4.y
|
||||
#define st6 vst4.z
|
||||
#define st7 vst4.w
|
||||
|
||||
#define st8 vst8.x
|
||||
#define st9 vst8.y
|
||||
#define st10 vst8.z
|
||||
#define st11 vst8.w
|
||||
|
||||
#define st12 vst12.x
|
||||
#define st13 vst12.y
|
||||
#define st14 vst12.z
|
||||
#define st15 vst12.w
|
||||
|
||||
#define st16 vst16.x
|
||||
#define st17 vst16.y
|
||||
#define st18 vst16.z
|
||||
#define st19 vst16.w
|
||||
|
||||
#define st20 vst20.x
|
||||
#define st21 vst20.y
|
||||
#define st22 vst20.z
|
||||
#define st23 vst20.w
|
||||
|
||||
#if __CUDA_ARCH__ >= 320
|
||||
|
||||
__device__ __forceinline__ uint64_t cuda_rotl641(const uint64_t value)
|
||||
{
|
||||
uint2 result;
|
||||
asm("shf.l.wrap.b32 %0, %1, %2, 1U;" : "=r"(result.x)
|
||||
: "r"(__double2hiint(__longlong_as_double(value))), "r"(__double2loint(__longlong_as_double(value))));
|
||||
asm("shf.l.wrap.b32 %0, %1, %2, 1U;" : "=r"(result.y)
|
||||
: "r"(__double2loint(__longlong_as_double(value))), "r"(__double2hiint(__longlong_as_double(value))));
|
||||
return __double_as_longlong(__hiloint2double(result.y, result.x));
|
||||
}
|
||||
|
||||
#else
|
||||
__noinline__ __device__ uint64_t cuda_rotl641(const uint64_t x) { return((x << 1) | (x >> 63)); }
|
||||
#endif
|
||||
|
||||
__noinline__ __device__ uint64_t bitselect(const uint64_t a, const uint64_t b, const uint64_t c) { return(a ^ (c & (b ^ a))); }
|
||||
|
||||
#define ROTL641(x) (cuda_rotl641(x))
|
||||
|
||||
#define RND() \
|
||||
bc[0] = st0 ^ st5 ^ st10 * st15 * st20 ^ ROTL641(st2 ^ st7 ^ st12 * st17 * st22); \
|
||||
bc[1] = st1 ^ st6 ^ st11 * st16 * st21 ^ ROTL641(st3 ^ st8 ^ st13 * st18 * st23); \
|
||||
bc[2] = st2 ^ st7 ^ st12 * st17 * st22 ^ ROTL641(st4 ^ st9 ^ st14 * st19 * st24); \
|
||||
bc[3] = st3 ^ st8 ^ st13 * st18 * st23 ^ ROTL641(st0 ^ st5 ^ st10 * st15 * st20); \
|
||||
bc[4] = st4 ^ st9 ^ st14 * st19 * st24 ^ ROTL641(st1 ^ st6 ^ st11 * st16 * st21); \
|
||||
tmp1 = st1 ^ bc[0]; \
|
||||
\
|
||||
st0 ^= bc[4]; \
|
||||
st1 = ROTL64(st6 ^ bc[0], 44); \
|
||||
st6 = ROTL64(st9 ^ bc[3], 20); \
|
||||
st9 = ROTL64(st22 ^ bc[1], 61); \
|
||||
st22 = ROTL64(st14 ^ bc[3], 39); \
|
||||
st14 = ROTL64(st20 ^ bc[4], 18); \
|
||||
st20 = ROTL64(st2 ^ bc[1], 62); \
|
||||
st2 = ROTL64(st12 ^ bc[1], 43); \
|
||||
st12 = ROTL64(st13 ^ bc[2], 25); \
|
||||
st13 = ROTL64(st19 ^ bc[3], 8); \
|
||||
st19 = ROTL64(st23 ^ bc[2], 56); \
|
||||
st23 = ROTL64(st15 ^ bc[4], 41); \
|
||||
st15 = ROTL64(st4 ^ bc[3], 27); \
|
||||
st4 = ROTL64(st24 ^ bc[3], 14); \
|
||||
st24 = ROTL64(st21 ^ bc[0], 2); \
|
||||
st21 = ROTL64(st8 ^ bc[2], 55); \
|
||||
st8 = ROTL64(st16 ^ bc[0], 45); \
|
||||
st16 = ROTL64(st5 ^ bc[4], 36); \
|
||||
st5 = ROTL64(st3 ^ bc[2], 28); \
|
||||
st3 = ROTL64(st18 ^ bc[2], 21); \
|
||||
st18 = ROTL64(st17 ^ bc[1], 15); \
|
||||
st17 = ROTL64(st11 ^ bc[0], 10); \
|
||||
st11 = ROTL64(st7 ^ bc[1], 6); \
|
||||
st7 = ROTL64(st10 ^ bc[4], 3); \
|
||||
st10 = ROTL641(tmp1); \
|
||||
\
|
||||
tmp1 = st0; tmp2 = st1; st0 = bitselect(st0 ^ st2, st0, st1); st1 = bitselect(st1 ^ st3, st1, st2); st2 = bitselect(st2 ^ st4, st2, st3); st3 = bitselect(st3 ^ tmp1, st3, st4); st4 = bitselect(st4 ^ tmp2, st4, tmp1); \
|
||||
tmp1 = st5; tmp2 = st6; st5 = bitselect(st5 ^ st7, st5, st6); st6 = bitselect(st6 ^ st8, st6, st7); st7 = bitselect(st7 ^ st9, st7, st8); st8 = bitselect(st8 ^ tmp1, st8, st9); st9 = bitselect(st9 ^ tmp2, st9, tmp1); \
|
||||
tmp1 = st10; tmp2 = st11; st10 = bitselect(st10 ^ st12, st10, st11); st11 = bitselect(st11 ^ st13, st11, st12); st12 = bitselect(st12 ^ st14, st12, st13); st13 = bitselect(st13 ^ tmp1, st13, st14); st14 = bitselect(st14 ^ tmp2, st14, tmp1); \
|
||||
tmp1 = st15; tmp2 = st16; st15 = bitselect(st15 ^ st17, st15, st16); st16 = bitselect(st16 ^ st18, st16, st17); st17 = bitselect(st17 ^ st19, st17, st18); st18 = bitselect(st18 ^ tmp1, st18, st19); st19 = bitselect(st19 ^ tmp2, st19, tmp1); \
|
||||
tmp1 = st20; tmp2 = st21; st20 = bitselect(st20 ^ st22, st20, st21); st21 = bitselect(st21 ^ st23, st21, st22); st22 = bitselect(st22 ^ st24, st22, st23); st23 = bitselect(st23 ^ tmp1, st23, st24); st24 = bitselect(st24 ^ tmp2, st24, tmp1); \
|
||||
st0 ^= 1;
|
||||
|
||||
#define LASTRND1() \
|
||||
bc[0] = st0 ^ st5 ^ st10 * st15 * st20 ^ ROTL64(st2 ^ st7 ^ st12 * st17 * st22, 1); \
|
||||
bc[1] = st1 ^ st6 ^ st11 * st16 * st21 ^ ROTL64(st3 ^ st8 ^ st13 * st18 * st23, 1); \
|
||||
bc[2] = st2 ^ st7 ^ st12 * st17 * st22 ^ ROTL64(st4 ^ st9 ^ st14 * st19 * st24, 1); \
|
||||
bc[3] = st3 ^ st8 ^ st13 * st18 * st23 ^ ROTL64(st0 ^ st5 ^ st10 * st15 * st20, 1); \
|
||||
bc[4] = st4 ^ st9 ^ st14 * st19 * st24 ^ ROTL64(st1 ^ st6 ^ st11 * st16 * st21, 1); \
|
||||
\
|
||||
st0 ^= bc[4]; \
|
||||
st1 = ROTL64(st6 ^ bc[0], 44); \
|
||||
st2 = ROTL64(st12 ^ bc[1], 43); \
|
||||
st4 = ROTL64(st24 ^ bc[3], 14); \
|
||||
st3 = ROTL64(st18 ^ bc[2], 21); \
|
||||
\
|
||||
tmp1 = st0; st0 = bitselect(st0 ^ st2, st0, st1); st1 = bitselect(st1 ^ st3, st1, st2); st2 = bitselect(st2 ^ st4, st2, st3); st3 = bitselect(st3 ^ tmp1, st3, st4); \
|
||||
st0 ^= 1;
|
||||
|
||||
#define LASTRND2() \
|
||||
bc[2] = st2 ^ st7 ^ st12 * st17 * st22 ^ ROTL64(st4 ^ st9 ^ st14 * st19 * st24, 1); \
|
||||
bc[3] = st3 ^ st8 ^ st13 * st18 * st23 ^ ROTL64(st0 ^ st5 ^ st10 * st15 * st20, 1); \
|
||||
bc[4] = st4 ^ st9 ^ st14 * st19 * st24 ^ ROTL64(st1 ^ st6 ^ st11 * st16 * st21, 1); \
|
||||
\
|
||||
st0 ^= bc[4]; \
|
||||
st4 = ROTL64(st24 ^ bc[3], 14); \
|
||||
st3 = ROTL64(st18 ^ bc[2], 21); \
|
||||
st3 = bitselect(st3 ^ st0, st3, st4);
|
||||
|
||||
__device__ ulonglong4 operator^(const ulonglong4 &a, const ulonglong4 &b)
|
||||
{
|
||||
return(make_ulonglong4(a.x ^ b.x, a.y ^ b.y, a.z ^ b.z, a.w ^ b.w));
|
||||
}
|
||||
|
||||
#define MIX(vst) vst = vst ^ scratchpad[vst.x % scr_size] ^ scratchpad[vst.y % scr_size] ^ scratchpad[vst.z % scr_size] ^ scratchpad[vst.w % scr_size];
|
||||
|
||||
#define MIX_ALL MIX(vst0); MIX(vst4); MIX(vst8); MIX(vst12); MIX(vst16); MIX(vst20);
|
||||
|
||||
__global__
|
||||
void wk(uint32_t* __restrict__ retnonce, const uint64_t* __restrict__ input, const ulonglong4* __restrict__ scratchpad,
|
||||
const uint32_t scr_size, const uint32_t target, uint64_t startNonce)
|
||||
{
|
||||
ulonglong4 vst0, vst4, vst8, vst12, vst16, vst20;
|
||||
uint64_t bc[5];
|
||||
uint64_t st24, tmp1, tmp2;
|
||||
|
||||
const uint64_t nonce = startNonce + (blockDim.x * blockIdx.x) + threadIdx.x;
|
||||
vst0 = make_ulonglong4((nonce << 8) + (input[0] & 0xFF), input[1] & 0xFFFFFFFFFFFFFF00ULL, input[2], input[3]);
|
||||
vst4 = make_ulonglong4(input[4], input[5], input[6], input[7]);
|
||||
vst8 = make_ulonglong4(input[8], input[9], (input[10] & 0xFF) | 0x100, 0);
|
||||
vst12 = make_ulonglong4(0, 0, 0, 0);
|
||||
vst16 = make_ulonglong4(0x8000000000000000ULL, 0, 0, 0);
|
||||
vst20 = make_ulonglong4(0, 0, 0, 0);
|
||||
st24 = 0;
|
||||
|
||||
RND();
|
||||
MIX_ALL;
|
||||
|
||||
for(int i = 0; i < 22; i++) {
|
||||
RND();
|
||||
MIX_ALL;
|
||||
}
|
||||
|
||||
LASTRND1();
|
||||
|
||||
vst4 = make_ulonglong4(1, 0, 0, 0);
|
||||
vst8 = make_ulonglong4(0, 0, 0, 0);
|
||||
vst12 = make_ulonglong4(0, 0, 0, 0);
|
||||
vst16 = make_ulonglong4(0x8000000000000000ULL, 0, 0, 0);
|
||||
vst20 = make_ulonglong4(0, 0, 0, 0);
|
||||
st24 = 0;
|
||||
|
||||
RND();
|
||||
MIX_ALL;
|
||||
|
||||
#pragma unroll
|
||||
for(int i = 0; i < 22; i++) {
|
||||
RND();
|
||||
MIX_ALL;
|
||||
}
|
||||
|
||||
LASTRND2();
|
||||
|
||||
if((st3 >> 32) <= target) {
|
||||
retnonce[0] = (uint32_t) nonce;
|
||||
retnonce[1] = retnonce[0];
|
||||
}
|
||||
}
|
||||
|
||||
__host__
|
||||
void wildkeccak_kernel(const int thr_id, const uint32_t threads, const uint32_t startNounce, const uint2 target, uint32_t *resNonces)
|
||||
{
|
||||
CUDA_SAFE_CALL(cudaMemsetAsync(d_retnonce[thr_id], 0xff, 2 * sizeof(uint32_t), kernel_stream[thr_id]));
|
||||
|
||||
const uint32_t threadsperblock = WK_CUDAThreads;
|
||||
dim3 grid((threads + threadsperblock - 1) / threadsperblock);
|
||||
dim3 block(threadsperblock);
|
||||
|
||||
wk <<<grid, block, 0, kernel_stream[thr_id]>>> (d_retnonce[thr_id], d_input[thr_id], d_scratchpad[thr_id],
|
||||
(uint32_t)(scratchpad_size >> 2), target.y, startNounce);
|
||||
|
||||
cudaMemcpyAsync(resNonces, d_retnonce[thr_id], 2*sizeof(uint32_t), cudaMemcpyDeviceToHost, kernel_stream[thr_id]);
|
||||
}
|
||||
|
||||
static bool init[MAX_GPUS] = { 0 };
|
||||
|
||||
extern "C" int scanhash_wildkeccak(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done)
|
||||
{
|
||||
//uint32_t _ALIGN(64) endiandata[20];
|
||||
uint32_t *ptarget = work->target;
|
||||
uint32_t throughput = 0;
|
||||
uint64_t n, nonce, first;
|
||||
uint8_t *pdata = (uint8_t*) work->data;
|
||||
memcpy(&first, &pdata[1], 8);
|
||||
//memcpy(&n, &pdata[1], 4);;
|
||||
n = nonce = first;
|
||||
// pdata[5] = thr_id;
|
||||
// memcpy(&nonce, &pdata[1], 8);
|
||||
|
||||
if (!scratchpad_size || !h_scratchpad[thr_id]) {
|
||||
if (h_scratchpad[thr_id])
|
||||
applog(LOG_ERR, "Scratchpad size is not set!");
|
||||
work->data[0] = 0; // invalidate
|
||||
sleep(1);
|
||||
return -EBUSY;
|
||||
}
|
||||
|
||||
if (!init[thr_id]) {
|
||||
|
||||
if (device_config[thr_id]) {
|
||||
sscanf(device_config[thr_id], "%ux%u", &WK_CUDABlocks, &WK_CUDAThreads);
|
||||
gpulog(LOG_INFO, thr_id, "Using %u x %u threads kernel launch config", WK_CUDABlocks, WK_CUDAThreads);
|
||||
} else {
|
||||
throughput = cuda_default_throughput(thr_id, WK_CUDABlocks*WK_CUDAThreads);
|
||||
gpulog(LOG_INFO, thr_id, "Intensity set to %g, %u cuda threads", throughput2intensity(throughput), throughput);
|
||||
}
|
||||
|
||||
cudaSetDevice(device_map[thr_id]);
|
||||
if (opt_cudaschedule == -1 && gpu_threads == 1) {
|
||||
cudaDeviceReset();
|
||||
// reduce cpu usage (linux)
|
||||
cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);
|
||||
cudaDeviceSetCacheConfig(cudaFuncCachePreferL1);
|
||||
CUDA_LOG_ERROR();
|
||||
}
|
||||
|
||||
CUDA_SAFE_CALL(cudaMalloc(&d_input[thr_id], 88));
|
||||
CUDA_SAFE_CALL(cudaMalloc(&d_retnonce[thr_id], 2*sizeof(uint32_t)));
|
||||
|
||||
int status = (int) cudaMalloc(&d_scratchpad[thr_id], WILD_KECCAK_SCRATCHPAD_BUFFSIZE);
|
||||
if (status != cudaSuccess) {
|
||||
gpulog(LOG_ERR, thr_id, "Unable to allocate device memory, %u MB, err %d",
|
||||
(uint32_t) (WILD_KECCAK_SCRATCHPAD_BUFFSIZE/(1024*1024)), status);
|
||||
exit(-ENOMEM);
|
||||
}
|
||||
|
||||
cudaStreamCreate(&bufpad_stream[thr_id]);
|
||||
cudaStreamCreate(&kernel_stream[thr_id]);
|
||||
|
||||
CUDA_SAFE_CALL(cudaMemcpyAsync(d_scratchpad[thr_id], h_scratchpad[thr_id], scratchpad_size << 3, cudaMemcpyHostToDevice, bufpad_stream[thr_id]));
|
||||
|
||||
init[thr_id] = true;
|
||||
}
|
||||
|
||||
throughput = cuda_default_throughput(thr_id, WK_CUDABlocks*WK_CUDAThreads);
|
||||
|
||||
cudaMemcpy(d_input[thr_id], pdata, 88, cudaMemcpyHostToDevice);
|
||||
// cudaMemset(d_retnonce[thr_id], 0xFF, 2*sizeof(uint32_t));
|
||||
|
||||
if (h_scratchpad[thr_id]) {
|
||||
cudaStreamSynchronize(bufpad_stream[thr_id]);
|
||||
}
|
||||
|
||||
do {
|
||||
// const uint32_t blocks = WK_CUDABlocks, threads = WK_CUDAThreads;
|
||||
// const dim3 block(blocks);
|
||||
// const dim3 thread(threads);
|
||||
uint32_t h_retnonce[2] = { UINT32_MAX, UINT32_MAX };
|
||||
uint2 target = make_uint2(ptarget[6], ptarget[7]);
|
||||
|
||||
wildkeccak_kernel(thr_id, throughput, nonce, target, h_retnonce);
|
||||
/*
|
||||
wk <<<block, thread, 0, kernel_stream[thr_id]>>> (d_retnonce[thr_id], d_input[thr_id], d_scratchpad[thr_id],
|
||||
(uint32_t)(scratchpad_size >> 2), nonce, ptarget[7]);
|
||||
*/
|
||||
|
||||
*hashes_done = (unsigned long) (n - first + throughput);
|
||||
|
||||
cudaStreamSynchronize(kernel_stream[thr_id]);
|
||||
if(h_retnonce[0] != UINT32_MAX) {
|
||||
uint8_t _ALIGN(64) cpuhash[32];
|
||||
uint32_t* vhash = (uint32_t*) cpuhash;
|
||||
uint64_t nonce64;
|
||||
memcpy(&pdata[1], &h_retnonce[0], sizeof(uint32_t));
|
||||
memcpy(&nonce64, &pdata[1], 8);
|
||||
wildkeccak_hash(cpuhash, pdata, pscratchpad_buff, scratchpad_size);
|
||||
if (!cpuhash[31] && vhash[7] <= ptarget[7] && fulltest(vhash, ptarget)) {
|
||||
work_set_target_ratio(work, vhash);
|
||||
//applog_hex(pdata, 84);
|
||||
//applog_hex(cpuhash, 32);
|
||||
//applog_hex(ptarget, 32);
|
||||
memcpy(work->nonces, &nonce64, 8);
|
||||
if (n + throughput > max_nonce) {
|
||||
*hashes_done = (unsigned long) (max_nonce - first);
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
if (n + throughput >= max_nonce) {
|
||||
n = max_nonce;
|
||||
break;
|
||||
}
|
||||
|
||||
n += throughput;
|
||||
nonce += throughput;
|
||||
|
||||
} while(!work_restart[thr_id].restart);
|
||||
|
||||
*hashes_done = (unsigned long) (n - first + 1);
|
||||
return 0;
|
||||
}
|
||||
|
||||
void wildkeccak_scratchpad_need_update(uint64_t* pscratchpad_buff)
|
||||
{
|
||||
for(int i = 0; i < opt_n_threads; i++) {
|
||||
h_scratchpad[i] = pscratchpad_buff;
|
||||
if (init[i]) {
|
||||
gpulog(LOG_DEBUG, i, "Starting scratchpad update...");
|
||||
cudaMemcpyAsync(d_scratchpad[i], h_scratchpad[i], scratchpad_size << 3, cudaMemcpyHostToDevice, bufpad_stream[i]);
|
||||
work_restart[i].restart = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void free_wildkeccak(int thr_id)
|
||||
{
|
||||
if (!init[thr_id])
|
||||
return;
|
||||
|
||||
cudaThreadSynchronize();
|
||||
|
||||
cudaFree(d_scratchpad[thr_id]);
|
||||
cudaFree(d_input[thr_id]);
|
||||
cudaFree(d_retnonce[thr_id]);
|
||||
|
||||
cudaStreamDestroy(bufpad_stream[thr_id]);
|
||||
cudaStreamDestroy(kernel_stream[thr_id]);
|
||||
|
||||
cudaDeviceSynchronize();
|
||||
|
||||
init[thr_id] = false;
|
||||
}
|
10
crypto/wildkeccak.h
Normal file
10
crypto/wildkeccak.h
Normal file
@ -0,0 +1,10 @@
|
||||
|
||||
#define WILD_KECCAK_SCRATCHPAD_BUFFSIZE 1ULL << 28
|
||||
#define WILD_KECCAK_ADDENDUMS_ARRAY_SIZE 10
|
||||
|
||||
extern uint64_t scratchpad_size;
|
||||
|
||||
extern uint32_t WK_CUDABlocks, WK_CUDAThreads;
|
||||
|
||||
void wildkeccak_scratchpad_need_update(uint64_t* pscratchpad_buff);
|
||||
|
1257
crypto/xmr-rpc.cpp
Normal file
1257
crypto/xmr-rpc.cpp
Normal file
File diff suppressed because it is too large
Load Diff
41
crypto/xmr-rpc.h
Normal file
41
crypto/xmr-rpc.h
Normal file
@ -0,0 +1,41 @@
|
||||
|
||||
#include <jansson.h>
|
||||
|
||||
#include "wildkeccak.h"
|
||||
|
||||
#ifdef WIN32
|
||||
#define _PACKED _ALIGN(4)
|
||||
#else
|
||||
#define _PACKED __attribute__((__packed__))
|
||||
#endif
|
||||
|
||||
struct _PACKED scratchpad_hi {
|
||||
unsigned char prevhash[32];
|
||||
uint64_t height;
|
||||
};
|
||||
|
||||
struct _PACKED addendums_array_entry {
|
||||
struct scratchpad_hi prev_hi;
|
||||
uint64_t add_size;
|
||||
};
|
||||
|
||||
|
||||
struct _PACKED scratchpad_file_header {
|
||||
struct scratchpad_hi current_hi;
|
||||
struct addendums_array_entry add_arr[WILD_KECCAK_ADDENDUMS_ARRAY_SIZE];
|
||||
uint64_t scratchpad_size;
|
||||
};
|
||||
|
||||
|
||||
bool rpc2_job_decode(const json_t *job, struct work *work);
|
||||
bool rpc2_stratum_job(struct stratum_ctx *sctx, json_t *id, json_t *params);
|
||||
bool rpc2_stratum_gen_work(struct stratum_ctx *sctx, struct work *work);
|
||||
bool rpc2_stratum_submit(struct pool_infos *pool, struct work *work);
|
||||
|
||||
int rpc2_stratum_thread_stuff(struct pool_infos* pool);
|
||||
|
||||
bool rpc2_login_decode(const json_t *val);
|
||||
|
||||
void rpc2_init();
|
||||
|
||||
void GetScratchpad();
|
6
miner.h
6
miner.h
@ -293,6 +293,7 @@ extern int scanhash_s3(int thr_id, struct work* work, uint32_t max_nonce, unsign
|
||||
extern int scanhash_vanilla(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done, int8_t blake_rounds);
|
||||
extern int scanhash_veltor(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done);
|
||||
extern int scanhash_whirl(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done);
|
||||
extern int scanhash_wildkeccak(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done);
|
||||
extern int scanhash_x11evo(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done);
|
||||
extern int scanhash_x11(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done);
|
||||
extern int scanhash_x13(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done);
|
||||
@ -340,6 +341,7 @@ extern void free_s3(int thr_id);
|
||||
extern void free_vanilla(int thr_id);
|
||||
extern void free_veltor(int thr_id);
|
||||
extern void free_whirl(int thr_id);
|
||||
extern void free_wildkeccak(int thr_id);
|
||||
extern void free_x11evo(int thr_id);
|
||||
extern void free_x11(int thr_id);
|
||||
extern void free_x13(int thr_id);
|
||||
@ -622,6 +624,7 @@ struct stratum_ctx {
|
||||
int pooln;
|
||||
time_t tm_connected;
|
||||
|
||||
int rpc2;
|
||||
int srvtime_diff;
|
||||
};
|
||||
|
||||
@ -743,6 +746,8 @@ bool stratum_authorize(struct stratum_ctx *sctx, const char *user, const char *p
|
||||
bool stratum_handle_method(struct stratum_ctx *sctx, const char *s);
|
||||
void stratum_free_job(struct stratum_ctx *sctx);
|
||||
|
||||
bool rpc2_stratum_authorize(struct stratum_ctx *sctx, const char *user, const char *pass);
|
||||
|
||||
void hashlog_remember_submit(struct work* work, uint32_t nonce);
|
||||
void hashlog_remember_scan_range(struct work* work);
|
||||
uint32_t hashlog_already_submittted(char* jobid, uint32_t nounce);
|
||||
@ -831,6 +836,7 @@ void x13hash(void *output, const void *input);
|
||||
void x14hash(void *output, const void *input);
|
||||
void x15hash(void *output, const void *input);
|
||||
void x17hash(void *output, const void *input);
|
||||
void wildkeccak_hash(void *output, const void *input, uint64_t* scratchpad, uint64_t ssize);
|
||||
void zr5hash(void *output, const void *input);
|
||||
void zr5hash_pok(void *output, uint32_t *pdata);
|
||||
|
||||
|
@ -254,6 +254,7 @@ bool pool_switch(int thr_id, int pooln)
|
||||
// temporary... until stratum code cleanup
|
||||
stratum = p->stratum;
|
||||
stratum.pooln = cur_pooln;
|
||||
stratum.rpc2 = (p->algo == ALGO_WILDKECCAK);
|
||||
|
||||
// unlock the stratum thread
|
||||
tq_push(thr_info[stratum_thr_id].q, strdup(rpc_url));
|
||||
@ -275,6 +276,9 @@ bool pool_switch(int thr_id, int pooln)
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
stratum.rpc2 = (p->algo == ALGO_WILDKECCAK);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -60,8 +60,8 @@ IDI_ICON1 ICON "ccminer.ico"
|
||||
//
|
||||
|
||||
VS_VERSION_INFO VERSIONINFO
|
||||
FILEVERSION 1,8,4,0
|
||||
PRODUCTVERSION 1,8,4,0
|
||||
FILEVERSION 2,0,0,0
|
||||
PRODUCTVERSION 2,0,0,0
|
||||
FILEFLAGSMASK 0x3fL
|
||||
#ifdef _DEBUG
|
||||
FILEFLAGS 0x21L
|
||||
@ -76,10 +76,10 @@ BEGIN
|
||||
BEGIN
|
||||
BLOCK "040904e4"
|
||||
BEGIN
|
||||
VALUE "FileVersion", "1.8.4"
|
||||
VALUE "FileVersion", "2.0"
|
||||
VALUE "LegalCopyright", "Copyright (C) 2016"
|
||||
VALUE "ProductName", "ccminer"
|
||||
VALUE "ProductVersion", "1.8.4"
|
||||
VALUE "ProductVersion", "2.0"
|
||||
END
|
||||
END
|
||||
BLOCK "VarFileInfo"
|
||||
|
11
util.cpp
11
util.cpp
@ -36,6 +36,8 @@
|
||||
#include "miner.h"
|
||||
#include "elist.h"
|
||||
|
||||
#include "crypto/xmr-rpc.h"
|
||||
|
||||
extern pthread_mutex_t stratum_sock_lock;
|
||||
extern pthread_mutex_t stratum_work_lock;
|
||||
extern bool opt_debug_diff;
|
||||
@ -1216,6 +1218,8 @@ bool stratum_subscribe(struct stratum_ctx *sctx)
|
||||
json_error_t err;
|
||||
bool ret = false, retry = false;
|
||||
|
||||
if (sctx->rpc2) return true;
|
||||
|
||||
start:
|
||||
s = (char*)malloc(128 + (sctx->session_id ? strlen(sctx->session_id) : 0));
|
||||
if (retry)
|
||||
@ -1307,6 +1311,9 @@ bool stratum_authorize(struct stratum_ctx *sctx, const char *user, const char *p
|
||||
json_error_t err;
|
||||
bool ret = false;
|
||||
|
||||
if (sctx->rpc2)
|
||||
return rpc2_stratum_authorize(sctx, user, pass);
|
||||
|
||||
s = (char*)malloc(80 + strlen(user) + strlen(pass));
|
||||
sprintf(s, "{\"id\": 2, \"method\": \"mining.authorize\", \"params\": [\"%s\", \"%s\"]}",
|
||||
user, pass);
|
||||
@ -1849,6 +1856,10 @@ bool stratum_handle_method(struct stratum_ctx *sctx, const char *s)
|
||||
ret = stratum_show_message(sctx, id, params);
|
||||
goto out;
|
||||
}
|
||||
if (sctx->rpc2 && !strcasecmp(method, "job")) { // cryptonote
|
||||
ret = rpc2_stratum_job(sctx, id, params);
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (!ret) {
|
||||
// don't fail = disconnect stratum on unknown (and optional?) methods
|
||||
|
Loading…
x
Reference in New Issue
Block a user