mirror of
https://github.com/GOSTSec/ccminer
synced 2025-01-11 15:27:56 +00:00
scrypt: final cleanup for 1.6.2 release
This commit is contained in:
parent
ca9ec1b803
commit
e74d5884b4
@ -1,8 +1,10 @@
|
|||||||
/*
|
/*
|
||||||
scrypt-jane by Andrew M, https://github.com/floodyberry/scrypt-jane
|
* scrypt-jane by Andrew M, https://github.com/floodyberry/scrypt-jane
|
||||||
|
*
|
||||||
Public Domain or MIT License, whichever is easier
|
* Public Domain or MIT License, whichever is easier
|
||||||
*/
|
*
|
||||||
|
* Adapted to ccminer by tpruvot@github (2015)
|
||||||
|
*/
|
||||||
|
|
||||||
#include "miner.h"
|
#include "miner.h"
|
||||||
|
|
||||||
@ -50,8 +52,8 @@ static const uint64_t keccak_round_constants[24] = {
|
|||||||
0x0000000080000001ull, 0x8000000080008008ull
|
0x0000000080000001ull, 0x8000000080008008ull
|
||||||
};
|
};
|
||||||
|
|
||||||
static void
|
static void keccak_block(scrypt_hash_state *S, const uint8_t *in)
|
||||||
keccak_block(scrypt_hash_state *S, const uint8_t *in) {
|
{
|
||||||
size_t i;
|
size_t i;
|
||||||
uint64_t *s = S->state, t[5], u[5], v, w;
|
uint64_t *s = S->state, t[5], u[5], v, w;
|
||||||
|
|
||||||
@ -120,13 +122,12 @@ keccak_block(scrypt_hash_state *S, const uint8_t *in) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void scrypt_hash_init(scrypt_hash_state *S) {
|
||||||
scrypt_hash_init(scrypt_hash_state *S) {
|
|
||||||
memset(S, 0, sizeof(*S));
|
memset(S, 0, sizeof(*S));
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void scrypt_hash_update(scrypt_hash_state *S, const uint8_t *in, size_t inlen)
|
||||||
scrypt_hash_update(scrypt_hash_state *S, const uint8_t *in, size_t inlen) {
|
{
|
||||||
size_t want;
|
size_t want;
|
||||||
|
|
||||||
/* handle the previous data */
|
/* handle the previous data */
|
||||||
@ -155,8 +156,8 @@ scrypt_hash_update(scrypt_hash_state *S, const uint8_t *in, size_t inlen) {
|
|||||||
memcpy(S->buffer, in, S->leftover);
|
memcpy(S->buffer, in, S->leftover);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void scrypt_hash_finish(scrypt_hash_state *S, uint8_t *hash)
|
||||||
scrypt_hash_finish(scrypt_hash_state *S, uint8_t *hash) {
|
{
|
||||||
size_t i;
|
size_t i;
|
||||||
|
|
||||||
S->buffer[S->leftover] = 0x01;
|
S->buffer[S->leftover] = 0x01;
|
||||||
@ -178,17 +179,18 @@ typedef struct scrypt_hmac_state_t {
|
|||||||
} scrypt_hmac_state;
|
} scrypt_hmac_state;
|
||||||
|
|
||||||
|
|
||||||
static void
|
static void scrypt_hash(scrypt_hash_digest hash, const uint8_t *m, size_t mlen)
|
||||||
scrypt_hash(scrypt_hash_digest hash, const uint8_t *m, size_t mlen) {
|
{
|
||||||
scrypt_hash_state st;
|
scrypt_hash_state st;
|
||||||
|
|
||||||
scrypt_hash_init(&st);
|
scrypt_hash_init(&st);
|
||||||
scrypt_hash_update(&st, m, mlen);
|
scrypt_hash_update(&st, m, mlen);
|
||||||
scrypt_hash_finish(&st, hash);
|
scrypt_hash_finish(&st, hash);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* hmac */
|
/* hmac */
|
||||||
static void
|
static void scrypt_hmac_init(scrypt_hmac_state *st, const uint8_t *key, size_t keylen)
|
||||||
scrypt_hmac_init(scrypt_hmac_state *st, const uint8_t *key, size_t keylen) {
|
{
|
||||||
uint8_t pad[SCRYPT_HASH_BLOCK_SIZE] = {0};
|
uint8_t pad[SCRYPT_HASH_BLOCK_SIZE] = {0};
|
||||||
size_t i;
|
size_t i;
|
||||||
|
|
||||||
@ -216,14 +218,14 @@ scrypt_hmac_init(scrypt_hmac_state *st, const uint8_t *key, size_t keylen) {
|
|||||||
scrypt_hash_update(&st->outer, pad, SCRYPT_HASH_BLOCK_SIZE);
|
scrypt_hash_update(&st->outer, pad, SCRYPT_HASH_BLOCK_SIZE);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void scrypt_hmac_update(scrypt_hmac_state *st, const uint8_t *m, size_t mlen)
|
||||||
scrypt_hmac_update(scrypt_hmac_state *st, const uint8_t *m, size_t mlen) {
|
{
|
||||||
/* h(inner || m...) */
|
/* h(inner || m...) */
|
||||||
scrypt_hash_update(&st->inner, m, mlen);
|
scrypt_hash_update(&st->inner, m, mlen);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void scrypt_hmac_finish(scrypt_hmac_state *st, scrypt_hash_digest mac)
|
||||||
scrypt_hmac_finish(scrypt_hmac_state *st, scrypt_hash_digest mac) {
|
{
|
||||||
/* h(inner || m) */
|
/* h(inner || m) */
|
||||||
scrypt_hash_digest innerhash;
|
scrypt_hash_digest innerhash;
|
||||||
scrypt_hash_finish(&st->inner, innerhash);
|
scrypt_hash_finish(&st->inner, innerhash);
|
||||||
@ -237,8 +239,9 @@ scrypt_hmac_finish(scrypt_hmac_state *st, scrypt_hash_digest mac) {
|
|||||||
* Special version where N = 1
|
* Special version where N = 1
|
||||||
* - mikaelh
|
* - mikaelh
|
||||||
*/
|
*/
|
||||||
static void
|
static void scrypt_pbkdf2_1(const uint8_t *password, size_t password_len,
|
||||||
scrypt_pbkdf2_1(const uint8_t *password, size_t password_len, const uint8_t *salt, size_t salt_len, uint8_t *out, size_t bytes) {
|
const uint8_t *salt, size_t salt_len, uint8_t *out, size_t bytes)
|
||||||
|
{
|
||||||
scrypt_hmac_state hmac_pw, hmac_pw_salt, work;
|
scrypt_hmac_state hmac_pw, hmac_pw_salt, work;
|
||||||
scrypt_hash_digest ti, u;
|
scrypt_hash_digest ti, u;
|
||||||
uint8_t be[4];
|
uint8_t be[4];
|
||||||
@ -271,16 +274,14 @@ scrypt_pbkdf2_1(const uint8_t *password, size_t password_len, const uint8_t *sal
|
|||||||
|
|
||||||
// ---------------------------- END PBKDF2 functions ------------------------------------
|
// ---------------------------- END PBKDF2 functions ------------------------------------
|
||||||
|
|
||||||
static void
|
static void scrypt_fatal_error_default(const char *msg) {
|
||||||
scrypt_fatal_error_default(const char *msg) {
|
|
||||||
fprintf(stderr, "%s\n", msg);
|
fprintf(stderr, "%s\n", msg);
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
static scrypt_fatal_errorfn scrypt_fatal_error = scrypt_fatal_error_default;
|
static scrypt_fatal_errorfn scrypt_fatal_error = scrypt_fatal_error_default;
|
||||||
|
|
||||||
void
|
void scrypt_set_fatal_error_default(scrypt_fatal_errorfn fn) {
|
||||||
scrypt_set_fatal_error_default(scrypt_fatal_errorfn fn) {
|
|
||||||
scrypt_fatal_error = fn;
|
scrypt_fatal_error = fn;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -293,8 +294,8 @@ static uint8_t *mem_base = (uint8_t *)0;
|
|||||||
static size_t mem_bump = 0;
|
static size_t mem_bump = 0;
|
||||||
|
|
||||||
/* allocations are assumed to be multiples of 64 bytes and total allocations not to exceed ~1.01gb */
|
/* allocations are assumed to be multiples of 64 bytes and total allocations not to exceed ~1.01gb */
|
||||||
static scrypt_aligned_alloc
|
static scrypt_aligned_alloc scrypt_alloc(uint64_t size)
|
||||||
scrypt_alloc(uint64_t size) {
|
{
|
||||||
scrypt_aligned_alloc aa;
|
scrypt_aligned_alloc aa;
|
||||||
if (!mem_base) {
|
if (!mem_base) {
|
||||||
mem_base = (uint8_t *)malloc((1024 * 1024 * 1024) + (1024 * 1024) + (SCRYPT_BLOCK_BYTES - 1));
|
mem_base = (uint8_t *)malloc((1024 * 1024 * 1024) + (1024 * 1024) + (SCRYPT_BLOCK_BYTES - 1));
|
||||||
@ -308,13 +309,13 @@ scrypt_alloc(uint64_t size) {
|
|||||||
return aa;
|
return aa;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void scrypt_free(scrypt_aligned_alloc *aa)
|
||||||
scrypt_free(scrypt_aligned_alloc *aa) {
|
{
|
||||||
mem_bump = 0;
|
mem_bump = 0;
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
static scrypt_aligned_alloc
|
static scrypt_aligned_alloc scrypt_alloc(uint64_t size)
|
||||||
scrypt_alloc(uint64_t size) {
|
{
|
||||||
static const size_t max_alloc = (size_t)-1;
|
static const size_t max_alloc = (size_t)-1;
|
||||||
scrypt_aligned_alloc aa;
|
scrypt_aligned_alloc aa;
|
||||||
size += (SCRYPT_BLOCK_BYTES - 1);
|
size += (SCRYPT_BLOCK_BYTES - 1);
|
||||||
@ -327,15 +328,16 @@ scrypt_alloc(uint64_t size) {
|
|||||||
return aa;
|
return aa;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void scrypt_free(scrypt_aligned_alloc *aa)
|
||||||
scrypt_free(scrypt_aligned_alloc *aa) {
|
{
|
||||||
free(aa->mem);
|
free(aa->mem);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
// yacoin: increasing Nfactor gradually
|
// yacoin: increasing Nfactor gradually
|
||||||
unsigned char GetNfactor(unsigned int nTimestamp) {
|
unsigned char GetNfactor(unsigned int nTimestamp)
|
||||||
|
{
|
||||||
int l = 0;
|
int l = 0;
|
||||||
|
|
||||||
unsigned int Nfactor = 0;
|
unsigned int Nfactor = 0;
|
||||||
@ -427,14 +429,13 @@ unsigned char GetNfactor(unsigned int nTimestamp) {
|
|||||||
|
|
||||||
#define bswap_32x4(x) ((((x) << 24) & 0xff000000u) | (((x) << 8) & 0x00ff0000u) \
|
#define bswap_32x4(x) ((((x) << 24) & 0xff000000u) | (((x) << 8) & 0x00ff0000u) \
|
||||||
| (((x) >> 8) & 0x0000ff00u) | (((x) >> 24) & 0x000000ffu))
|
| (((x) >> 8) & 0x0000ff00u) | (((x) >> 24) & 0x000000ffu))
|
||||||
|
|
||||||
static int s_Nfactor = 0;
|
static int s_Nfactor = 0;
|
||||||
|
|
||||||
int scanhash_scrypt_jane(int thr_id, uint32_t *pdata, const uint32_t *ptarget, unsigned char *scratchbuf,
|
int scanhash_scrypt_jane(int thr_id, uint32_t *pdata, const uint32_t *ptarget, unsigned char *scratchbuf,
|
||||||
uint32_t max_nonce, unsigned long *hashes_done, struct timeval *tv_start, struct timeval *tv_end)
|
uint32_t max_nonce, unsigned long *hashes_done, struct timeval *tv_start, struct timeval *tv_end)
|
||||||
{
|
{
|
||||||
const uint32_t Htarg = ptarget[7];
|
const uint32_t Htarg = ptarget[7];
|
||||||
uint64_t N;
|
uint32_t N;
|
||||||
|
|
||||||
if (s_Nfactor == 0 && strlen(jane_params) > 0)
|
if (s_Nfactor == 0 && strlen(jane_params) > 0)
|
||||||
applog(LOG_INFO, "Given scrypt-jane parameters: %s", jane_params);
|
applog(LOG_INFO, "Given scrypt-jane parameters: %s", jane_params);
|
||||||
|
@ -414,11 +414,12 @@ __global__ void cuda_blake256_hash( uint64_t *g_out, uint32_t nonce, uint32_t *g
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool init[MAX_GPUS] = { 0 };
|
|
||||||
static std::map<int, uint32_t *> context_good[2];
|
static std::map<int, uint32_t *> context_good[2];
|
||||||
|
|
||||||
bool default_prepare_blake256(int thr_id, const uint32_t host_pdata[20], const uint32_t host_ptarget[8])
|
bool default_prepare_blake256(int thr_id, const uint32_t host_pdata[20], const uint32_t host_ptarget[8])
|
||||||
{
|
{
|
||||||
|
static bool init[MAX_GPUS] = { 0 };
|
||||||
|
|
||||||
if (!init[thr_id])
|
if (!init[thr_id])
|
||||||
{
|
{
|
||||||
// allocate pinned host memory for good hashes
|
// allocate pinned host memory for good hashes
|
||||||
|
@ -459,10 +459,10 @@ void cuda_post_keccak512(uint32_t *g_odata, uint32_t *g_hash, uint32_t nonce)
|
|||||||
// callable host code to initialize constants and to call kernels
|
// callable host code to initialize constants and to call kernels
|
||||||
//
|
//
|
||||||
|
|
||||||
static bool init[MAX_GPUS] = { 0 };
|
|
||||||
|
|
||||||
extern "C" void prepare_keccak512(int thr_id, const uint32_t host_pdata[20])
|
extern "C" void prepare_keccak512(int thr_id, const uint32_t host_pdata[20])
|
||||||
{
|
{
|
||||||
|
static bool init[MAX_GPUS] = { 0 };
|
||||||
|
|
||||||
if (!init[thr_id])
|
if (!init[thr_id])
|
||||||
{
|
{
|
||||||
checkCudaErrors(cudaMemcpyToSymbol(c_keccak_round_constants, host_keccak_round_constants, sizeof(host_keccak_round_constants), 0, cudaMemcpyHostToDevice));
|
checkCudaErrors(cudaMemcpyToSymbol(c_keccak_round_constants, host_keccak_round_constants, sizeof(host_keccak_round_constants), 0, cudaMemcpyHostToDevice));
|
||||||
@ -796,10 +796,10 @@ void crypto_hash(uint64_t *g_out, uint32_t nonce, uint32_t *g_good, bool validat
|
|||||||
|
|
||||||
static std::map<int, uint32_t *> context_good[2];
|
static std::map<int, uint32_t *> context_good[2];
|
||||||
|
|
||||||
// ... keccak???
|
|
||||||
bool default_prepare_keccak256(int thr_id, const uint32_t host_pdata[20], const uint32_t host_ptarget[8])
|
bool default_prepare_keccak256(int thr_id, const uint32_t host_pdata[20], const uint32_t host_ptarget[8])
|
||||||
{
|
{
|
||||||
static bool init[MAX_DEVICES] = {false};
|
static bool init[MAX_GPUS] = { 0 };
|
||||||
|
|
||||||
if (!init[thr_id])
|
if (!init[thr_id])
|
||||||
{
|
{
|
||||||
checkCudaErrors(cudaMemcpyToSymbol(KeccakF_RoundConstants, host_KeccakF_RoundConstants, sizeof(host_KeccakF_RoundConstants), 0, cudaMemcpyHostToDevice));
|
checkCudaErrors(cudaMemcpyToSymbol(KeccakF_RoundConstants, host_KeccakF_RoundConstants, sizeof(host_KeccakF_RoundConstants), 0, cudaMemcpyHostToDevice));
|
||||||
|
@ -661,43 +661,43 @@ KeplerKernel::KeplerKernel() : KernelInterface()
|
|||||||
|
|
||||||
bool KeplerKernel::bindtexture_1D(uint32_t *d_V, size_t size)
|
bool KeplerKernel::bindtexture_1D(uint32_t *d_V, size_t size)
|
||||||
{
|
{
|
||||||
cudaChannelFormatDesc channelDesc4 = cudaCreateChannelDesc<uint4>();
|
cudaChannelFormatDesc channelDesc4 = cudaCreateChannelDesc<uint4>();
|
||||||
texRef1D_4_V.normalized = 0;
|
texRef1D_4_V.normalized = 0;
|
||||||
texRef1D_4_V.filterMode = cudaFilterModePoint;
|
texRef1D_4_V.filterMode = cudaFilterModePoint;
|
||||||
texRef1D_4_V.addressMode[0] = cudaAddressModeClamp;
|
texRef1D_4_V.addressMode[0] = cudaAddressModeClamp;
|
||||||
checkCudaErrors(cudaBindTexture(NULL, &texRef1D_4_V, d_V, &channelDesc4, size));
|
checkCudaErrors(cudaBindTexture(NULL, &texRef1D_4_V, d_V, &channelDesc4, size));
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool KeplerKernel::bindtexture_2D(uint32_t *d_V, int width, int height, size_t pitch)
|
bool KeplerKernel::bindtexture_2D(uint32_t *d_V, int width, int height, size_t pitch)
|
||||||
{
|
{
|
||||||
cudaChannelFormatDesc channelDesc4 = cudaCreateChannelDesc<uint4>();
|
cudaChannelFormatDesc channelDesc4 = cudaCreateChannelDesc<uint4>();
|
||||||
texRef2D_4_V.normalized = 0;
|
texRef2D_4_V.normalized = 0;
|
||||||
texRef2D_4_V.filterMode = cudaFilterModePoint;
|
texRef2D_4_V.filterMode = cudaFilterModePoint;
|
||||||
texRef2D_4_V.addressMode[0] = cudaAddressModeClamp;
|
texRef2D_4_V.addressMode[0] = cudaAddressModeClamp;
|
||||||
texRef2D_4_V.addressMode[1] = cudaAddressModeClamp;
|
texRef2D_4_V.addressMode[1] = cudaAddressModeClamp;
|
||||||
// maintain texture width of TEXWIDTH (max. limit is 65000)
|
// maintain texture width of TEXWIDTH (max. limit is 65000)
|
||||||
while (width > TEXWIDTH) { width /= 2; height *= 2; pitch /= 2; }
|
while (width > TEXWIDTH) { width /= 2; height *= 2; pitch /= 2; }
|
||||||
while (width < TEXWIDTH) { width *= 2; height = (height+1)/2; pitch *= 2; }
|
while (width < TEXWIDTH) { width *= 2; height = (height+1)/2; pitch *= 2; }
|
||||||
checkCudaErrors(cudaBindTexture2D(NULL, &texRef2D_4_V, d_V, &channelDesc4, width, height, pitch));
|
checkCudaErrors(cudaBindTexture2D(NULL, &texRef2D_4_V, d_V, &channelDesc4, width, height, pitch));
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool KeplerKernel::unbindtexture_1D()
|
bool KeplerKernel::unbindtexture_1D()
|
||||||
{
|
{
|
||||||
checkCudaErrors(cudaUnbindTexture(texRef1D_4_V));
|
checkCudaErrors(cudaUnbindTexture(texRef1D_4_V));
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool KeplerKernel::unbindtexture_2D()
|
bool KeplerKernel::unbindtexture_2D()
|
||||||
{
|
{
|
||||||
checkCudaErrors(cudaUnbindTexture(texRef2D_4_V));
|
checkCudaErrors(cudaUnbindTexture(texRef2D_4_V));
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
void KeplerKernel::set_scratchbuf_constants(int MAXWARPS, uint32_t** h_V)
|
void KeplerKernel::set_scratchbuf_constants(int MAXWARPS, uint32_t** h_V)
|
||||||
{
|
{
|
||||||
checkCudaErrors(cudaMemcpyToSymbol(c_V, h_V, MAXWARPS*sizeof(uint32_t*), 0, cudaMemcpyHostToDevice));
|
checkCudaErrors(cudaMemcpyToSymbol(c_V, h_V, MAXWARPS*sizeof(uint32_t*), 0, cudaMemcpyHostToDevice));
|
||||||
}
|
}
|
||||||
|
|
||||||
bool KeplerKernel::run_kernel(dim3 grid, dim3 threads, int WARPS_PER_BLOCK, int thr_id, cudaStream_t stream,
|
bool KeplerKernel::run_kernel(dim3 grid, dim3 threads, int WARPS_PER_BLOCK, int thr_id, cudaStream_t stream,
|
||||||
@ -706,21 +706,22 @@ bool KeplerKernel::run_kernel(dim3 grid, dim3 threads, int WARPS_PER_BLOCK, int
|
|||||||
bool success = true;
|
bool success = true;
|
||||||
|
|
||||||
// make some constants available to kernel, update only initially and when changing
|
// make some constants available to kernel, update only initially and when changing
|
||||||
static int prev_N[MAX_DEVICES] = {0};
|
static uint32_t prev_N[MAX_GPUS] = { 0 };
|
||||||
|
|
||||||
if (N != prev_N[thr_id]) {
|
if (N != prev_N[thr_id]) {
|
||||||
uint32_t h_N = N;
|
uint32_t h_N = N;
|
||||||
uint32_t h_N_1 = N-1;
|
uint32_t h_N_1 = N-1;
|
||||||
uint32_t h_SCRATCH = SCRATCH;
|
uint32_t h_SCRATCH = SCRATCH;
|
||||||
uint32_t h_SCRATCH_WU_PER_WARP = (SCRATCH * WU_PER_WARP);
|
uint32_t h_SCRATCH_WU_PER_WARP = (SCRATCH * WU_PER_WARP);
|
||||||
uint32_t h_SCRATCH_WU_PER_WARP_1 = (SCRATCH * WU_PER_WARP) - 1;
|
uint32_t h_SCRATCH_WU_PER_WARP_1 = (SCRATCH * WU_PER_WARP) - 1;
|
||||||
|
|
||||||
cudaMemcpyToSymbolAsync(c_N, &h_N, sizeof(uint32_t), 0, cudaMemcpyHostToDevice, stream);
|
cudaMemcpyToSymbolAsync(c_N, &h_N, sizeof(uint32_t), 0, cudaMemcpyHostToDevice, stream);
|
||||||
cudaMemcpyToSymbolAsync(c_N_1, &h_N_1, sizeof(uint32_t), 0, cudaMemcpyHostToDevice, stream);
|
cudaMemcpyToSymbolAsync(c_N_1, &h_N_1, sizeof(uint32_t), 0, cudaMemcpyHostToDevice, stream);
|
||||||
cudaMemcpyToSymbolAsync(c_SCRATCH, &h_SCRATCH, sizeof(uint32_t), 0, cudaMemcpyHostToDevice, stream);
|
cudaMemcpyToSymbolAsync(c_SCRATCH, &h_SCRATCH, sizeof(uint32_t), 0, cudaMemcpyHostToDevice, stream);
|
||||||
cudaMemcpyToSymbolAsync(c_SCRATCH_WU_PER_WARP, &h_SCRATCH_WU_PER_WARP, sizeof(uint32_t), 0, cudaMemcpyHostToDevice, stream);
|
cudaMemcpyToSymbolAsync(c_SCRATCH_WU_PER_WARP, &h_SCRATCH_WU_PER_WARP, sizeof(uint32_t), 0, cudaMemcpyHostToDevice, stream);
|
||||||
cudaMemcpyToSymbolAsync(c_SCRATCH_WU_PER_WARP_1, &h_SCRATCH_WU_PER_WARP_1, sizeof(uint32_t), 0, cudaMemcpyHostToDevice, stream);
|
cudaMemcpyToSymbolAsync(c_SCRATCH_WU_PER_WARP_1, &h_SCRATCH_WU_PER_WARP_1, sizeof(uint32_t), 0, cudaMemcpyHostToDevice, stream);
|
||||||
|
|
||||||
prev_N[thr_id] = N;
|
prev_N[thr_id] = N;
|
||||||
}
|
}
|
||||||
|
|
||||||
// First phase: Sequential writes to scratchpad.
|
// First phase: Sequential writes to scratchpad.
|
||||||
@ -732,14 +733,14 @@ bool KeplerKernel::run_kernel(dim3 grid, dim3 threads, int WARPS_PER_BLOCK, int
|
|||||||
unsigned int pos = 0;
|
unsigned int pos = 0;
|
||||||
do
|
do
|
||||||
{
|
{
|
||||||
if (LOOKUP_GAP == 1) {
|
if (LOOKUP_GAP == 1) {
|
||||||
if (IS_SCRYPT()) kepler_scrypt_core_kernelA<A_SCRYPT, ANDERSEN> <<< grid, threads, 0, stream >>>(d_idata, pos, min(pos+batch, N));
|
if (IS_SCRYPT()) kepler_scrypt_core_kernelA<A_SCRYPT, ANDERSEN> <<< grid, threads, 0, stream >>>(d_idata, pos, min(pos+batch, N));
|
||||||
if (IS_SCRYPT_JANE()) kepler_scrypt_core_kernelA<A_SCRYPT_JANE, SIMPLE> <<< grid, threads, 0, stream >>>(d_idata, pos, min(pos+batch, N));
|
if (IS_SCRYPT_JANE()) kepler_scrypt_core_kernelA<A_SCRYPT_JANE, SIMPLE> <<< grid, threads, 0, stream >>>(d_idata, pos, min(pos+batch, N));
|
||||||
} else {
|
} else {
|
||||||
if (IS_SCRYPT()) kepler_scrypt_core_kernelA_LG<A_SCRYPT, ANDERSEN> <<< grid, threads, 0, stream >>>(d_idata, pos, min(pos+batch, N), LOOKUP_GAP);
|
if (IS_SCRYPT()) kepler_scrypt_core_kernelA_LG<A_SCRYPT, ANDERSEN> <<< grid, threads, 0, stream >>>(d_idata, pos, min(pos+batch, N), LOOKUP_GAP);
|
||||||
if (IS_SCRYPT_JANE()) kepler_scrypt_core_kernelA_LG<A_SCRYPT_JANE, SIMPLE> <<< grid, threads, 0, stream >>>(d_idata, pos, min(pos+batch, N), LOOKUP_GAP);
|
if (IS_SCRYPT_JANE()) kepler_scrypt_core_kernelA_LG<A_SCRYPT_JANE, SIMPLE> <<< grid, threads, 0, stream >>>(d_idata, pos, min(pos+batch, N), LOOKUP_GAP);
|
||||||
}
|
}
|
||||||
pos += batch;
|
pos += batch;
|
||||||
} while (pos < N);
|
} while (pos < N);
|
||||||
|
|
||||||
// Second phase: Random read access from scratchpad.
|
// Second phase: Random read access from scratchpad.
|
||||||
|
@ -97,7 +97,8 @@ bool NVKernel::run_kernel(dim3 grid, dim3 threads, int WARPS_PER_BLOCK, int thr_
|
|||||||
bool success = true;
|
bool success = true;
|
||||||
|
|
||||||
// make some constants available to kernel, update only initially and when changing
|
// make some constants available to kernel, update only initially and when changing
|
||||||
static int prev_N[MAX_DEVICES] = {0};
|
static uint32_t prev_N[MAX_GPUS] = { 0 };
|
||||||
|
|
||||||
if (N != prev_N[thr_id]) {
|
if (N != prev_N[thr_id]) {
|
||||||
uint32_t h_N = N;
|
uint32_t h_N = N;
|
||||||
uint32_t h_N_1 = N-1;
|
uint32_t h_N_1 = N-1;
|
||||||
@ -1025,7 +1026,8 @@ static std::map<int, uint32_t *> context_good[2];
|
|||||||
|
|
||||||
bool NVKernel::prepare_keccak256(int thr_id, const uint32_t host_pdata[20], const uint32_t host_ptarget[8])
|
bool NVKernel::prepare_keccak256(int thr_id, const uint32_t host_pdata[20], const uint32_t host_ptarget[8])
|
||||||
{
|
{
|
||||||
static bool init[MAX_DEVICES] = {false};
|
static bool init[MAX_GPUS] = { 0 };
|
||||||
|
|
||||||
if (!init[thr_id])
|
if (!init[thr_id])
|
||||||
{
|
{
|
||||||
checkCudaErrors(cudaMemcpyToSymbol(KeccakF_RoundConstants, host_KeccakF_RoundConstants, sizeof(host_KeccakF_RoundConstants), 0, cudaMemcpyHostToDevice));
|
checkCudaErrors(cudaMemcpyToSymbol(KeccakF_RoundConstants, host_KeccakF_RoundConstants, sizeof(host_KeccakF_RoundConstants), 0, cudaMemcpyHostToDevice));
|
||||||
@ -1452,7 +1454,8 @@ void kepler_blake256_hash( uint64_t *g_out, uint32_t nonce, uint32_t *g_good, bo
|
|||||||
|
|
||||||
bool NVKernel::prepare_blake256(int thr_id, const uint32_t host_pdata[20], const uint32_t host_ptarget[8])
|
bool NVKernel::prepare_blake256(int thr_id, const uint32_t host_pdata[20], const uint32_t host_ptarget[8])
|
||||||
{
|
{
|
||||||
static bool init[MAX_DEVICES] = {false};
|
static bool init[MAX_GPUS] = { 0 };
|
||||||
|
|
||||||
if (!init[thr_id])
|
if (!init[thr_id])
|
||||||
{
|
{
|
||||||
// allocate pinned host memory for good hashes
|
// allocate pinned host memory for good hashes
|
||||||
|
@ -56,7 +56,8 @@ bool NV2Kernel::run_kernel(dim3 grid, dim3 threads, int WARPS_PER_BLOCK, int thr
|
|||||||
bool success = true;
|
bool success = true;
|
||||||
|
|
||||||
// make some constants available to kernel, update only initially and when changing
|
// make some constants available to kernel, update only initially and when changing
|
||||||
static int prev_N[MAX_DEVICES] = {0};
|
static uint32_t prev_N[MAX_GPUS] = { 0 };
|
||||||
|
|
||||||
if (N != prev_N[thr_id]) {
|
if (N != prev_N[thr_id]) {
|
||||||
uint32_t h_N = N;
|
uint32_t h_N = N;
|
||||||
uint32_t h_N_1 = N-1;
|
uint32_t h_N_1 = N-1;
|
||||||
@ -1264,7 +1265,8 @@ static std::map<int, uint32_t *> context_good[2];
|
|||||||
|
|
||||||
bool NV2Kernel::prepare_keccak256(int thr_id, const uint32_t host_pdata[20], const uint32_t host_ptarget[8])
|
bool NV2Kernel::prepare_keccak256(int thr_id, const uint32_t host_pdata[20], const uint32_t host_ptarget[8])
|
||||||
{
|
{
|
||||||
static bool init[MAX_DEVICES] = {false};
|
static bool init[MAX_GPUS] = { 0 };
|
||||||
|
|
||||||
if (!init[thr_id])
|
if (!init[thr_id])
|
||||||
{
|
{
|
||||||
checkCudaErrors(cudaMemcpyToSymbol(KeccakF_RoundConstants, host_KeccakF_RoundConstants, sizeof(host_KeccakF_RoundConstants), 0, cudaMemcpyHostToDevice));
|
checkCudaErrors(cudaMemcpyToSymbol(KeccakF_RoundConstants, host_KeccakF_RoundConstants, sizeof(host_KeccakF_RoundConstants), 0, cudaMemcpyHostToDevice));
|
||||||
@ -1687,7 +1689,8 @@ __global__ void titan_blake256_hash( uint64_t *g_out, uint32_t nonce, uint32_t *
|
|||||||
|
|
||||||
bool NV2Kernel::prepare_blake256(int thr_id, const uint32_t host_pdata[20], const uint32_t host_ptarget[8])
|
bool NV2Kernel::prepare_blake256(int thr_id, const uint32_t host_pdata[20], const uint32_t host_ptarget[8])
|
||||||
{
|
{
|
||||||
static bool init[MAX_DEVICES] = {false};
|
static bool init[MAX_GPUS] = { 0 };
|
||||||
|
|
||||||
if (!init[thr_id])
|
if (!init[thr_id])
|
||||||
{
|
{
|
||||||
// allocate pinned host memory for good hashes
|
// allocate pinned host memory for good hashes
|
||||||
|
@ -9,8 +9,6 @@
|
|||||||
|
|
||||||
#include "miner.h"
|
#include "miner.h"
|
||||||
|
|
||||||
#define MAX_DEVICES MAX_GPUS
|
|
||||||
|
|
||||||
// from ccminer.cpp
|
// from ccminer.cpp
|
||||||
extern short device_map[MAX_GPUS];
|
extern short device_map[MAX_GPUS];
|
||||||
extern int device_batchsize[MAX_GPUS]; // cudaminer -b
|
extern int device_batchsize[MAX_GPUS]; // cudaminer -b
|
||||||
|
@ -409,7 +409,8 @@ __global__ void cuda_post_sha256(uint32_t g_output[8], uint32_t g_tstate_ext[8],
|
|||||||
|
|
||||||
void prepare_sha256(int thr_id, uint32_t host_pdata[20], uint32_t host_midstate[8])
|
void prepare_sha256(int thr_id, uint32_t host_pdata[20], uint32_t host_midstate[8])
|
||||||
{
|
{
|
||||||
static bool init[8] = {false, false, false, false, false, false, false, false};
|
static bool init[MAX_GPUS] = { 0 };
|
||||||
|
|
||||||
if (!init[thr_id])
|
if (!init[thr_id])
|
||||||
{
|
{
|
||||||
checkCudaErrors(cudaMemcpyToSymbol(sha256_h, host_sha256_h, sizeof(host_sha256_h), 0, cudaMemcpyHostToDevice));
|
checkCudaErrors(cudaMemcpyToSymbol(sha256_h, host_sha256_h, sizeof(host_sha256_h), 0, cudaMemcpyHostToDevice));
|
||||||
|
@ -710,7 +710,8 @@ bool TestKernel::run_kernel(dim3 grid, dim3 threads, int WARPS_PER_BLOCK, int th
|
|||||||
size_t shared = ((threads.x + 31) / 32) * (32+1) * sizeof(uint32_t);
|
size_t shared = ((threads.x + 31) / 32) * (32+1) * sizeof(uint32_t);
|
||||||
|
|
||||||
// make some constants available to kernel, update only initially and when changing
|
// make some constants available to kernel, update only initially and when changing
|
||||||
static int prev_N[MAX_DEVICES] = {0};
|
static uint32_t prev_N[MAX_GPUS] = { 0 };
|
||||||
|
|
||||||
if (N != prev_N[thr_id]) {
|
if (N != prev_N[thr_id]) {
|
||||||
uint32_t h_N = N;
|
uint32_t h_N = N;
|
||||||
uint32_t h_N_1 = N-1;
|
uint32_t h_N_1 = N-1;
|
||||||
|
@ -678,7 +678,8 @@ bool TitanKernel::run_kernel(dim3 grid, dim3 threads, int WARPS_PER_BLOCK, int t
|
|||||||
bool success = true;
|
bool success = true;
|
||||||
|
|
||||||
// make some constants available to kernel, update only initially and when changing
|
// make some constants available to kernel, update only initially and when changing
|
||||||
static int prev_N[MAX_DEVICES] = {0};
|
static uint32_t prev_N[MAX_GPUS] = { 0 };
|
||||||
|
|
||||||
if (N != prev_N[thr_id]) {
|
if (N != prev_N[thr_id]) {
|
||||||
uint32_t h_N = N;
|
uint32_t h_N = N;
|
||||||
uint32_t h_N_1 = N-1;
|
uint32_t h_N_1 = N-1;
|
||||||
|
Loading…
Reference in New Issue
Block a user