mirror of
https://github.com/GOSTSec/ccminer
synced 2025-01-31 00:44:15 +00:00
quark/x11: set signed int hashPosition vars to off_t
groestl (and keccak?) seems faster with 64bit vars (off_t or int64_t)...
This commit is contained in:
parent
555504bd98
commit
ed4927fcd0
@ -428,7 +428,7 @@ void quark_bmw512_gpu_hash_80(uint32_t threads, uint32_t startNounce, uint64_t *
|
|||||||
Compression512(h, message);
|
Compression512(h, message);
|
||||||
|
|
||||||
// fertig
|
// fertig
|
||||||
uint64_t *outpHash = &g_hash[8 * thread];
|
uint64_t *outpHash = &g_hash[thread * 8];
|
||||||
|
|
||||||
#pragma unroll 8
|
#pragma unroll 8
|
||||||
for(int i=0;i<8;i++)
|
for(int i=0;i<8;i++)
|
||||||
|
@ -151,8 +151,8 @@ void quark_bmw512_gpu_hash_64_30(uint32_t threads, uint32_t startNounce, uint64_
|
|||||||
{
|
{
|
||||||
uint32_t nounce = (g_nonceVector != NULL) ? g_nonceVector[thread] : (startNounce + thread);
|
uint32_t nounce = (g_nonceVector != NULL) ? g_nonceVector[thread] : (startNounce + thread);
|
||||||
|
|
||||||
int hashPosition = nounce - startNounce;
|
uint32_t hashPosition = nounce - startNounce;
|
||||||
uint64_t *inpHash = &g_hash[8 * hashPosition];
|
uint64_t *inpHash = &g_hash[hashPosition * 8];
|
||||||
|
|
||||||
// Init
|
// Init
|
||||||
uint64_t h[16];
|
uint64_t h[16];
|
||||||
@ -205,7 +205,7 @@ void quark_bmw512_gpu_hash_64_30(uint32_t threads, uint32_t startNounce, uint64_
|
|||||||
|
|
||||||
Compression512_30(h, message);
|
Compression512_30(h, message);
|
||||||
|
|
||||||
uint64_t *outpHash = &g_hash[8 * hashPosition];
|
uint64_t *outpHash = &g_hash[hashPosition * 8];
|
||||||
|
|
||||||
#pragma unroll 8
|
#pragma unroll 8
|
||||||
for(int i=0;i<8;i++)
|
for(int i=0;i<8;i++)
|
||||||
|
@ -229,7 +229,6 @@ static __device__ __forceinline__ void E8(uint32_t x[8][4])
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Die Hash-Funktion
|
|
||||||
__global__ __launch_bounds__(256, 4)
|
__global__ __launch_bounds__(256, 4)
|
||||||
void quark_jh512_gpu_hash_64(uint32_t threads, uint32_t startNounce, uint32_t *const __restrict__ g_hash, const uint32_t *const __restrict__ g_nonceVector)
|
void quark_jh512_gpu_hash_64(uint32_t threads, uint32_t startNounce, uint32_t *const __restrict__ g_hash, const uint32_t *const __restrict__ g_nonceVector)
|
||||||
{
|
{
|
||||||
@ -237,8 +236,8 @@ void quark_jh512_gpu_hash_64(uint32_t threads, uint32_t startNounce, uint32_t *c
|
|||||||
if (thread < threads)
|
if (thread < threads)
|
||||||
{
|
{
|
||||||
const uint32_t nounce = (g_nonceVector != NULL) ? g_nonceVector[thread] : (startNounce + thread);
|
const uint32_t nounce = (g_nonceVector != NULL) ? g_nonceVector[thread] : (startNounce + thread);
|
||||||
const uint32_t hashPosition = nounce - startNounce;
|
uint32_t hashPosition = nounce - startNounce;
|
||||||
uint32_t *const Hash = &g_hash[hashPosition * 16U];
|
uint32_t *Hash = &g_hash[hashPosition * 16U];
|
||||||
uint32_t x[8][4] = {
|
uint32_t x[8][4] = {
|
||||||
{ 0x964bd16f, 0x17aa003e, 0x052e6a63, 0x43d5157a },
|
{ 0x964bd16f, 0x17aa003e, 0x052e6a63, 0x43d5157a },
|
||||||
{ 0x8d5e228a, 0x0bef970c, 0x591234e9, 0x61c3b3f2 },
|
{ 0x8d5e228a, 0x0bef970c, 0x591234e9, 0x61c3b3f2 },
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <memory.h>
|
#include <memory.h>
|
||||||
|
|
||||||
#include "cuda_helper.h"
|
#include "cuda_helper.h"
|
||||||
@ -119,12 +119,8 @@ void quark_blake512_gpu_hash_64(uint32_t threads, uint32_t startNounce, uint32_t
|
|||||||
uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x);
|
uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x);
|
||||||
|
|
||||||
#if USE_SHUFFLE
|
#if USE_SHUFFLE
|
||||||
const int warpID = threadIdx.x & 0x0F; // 16 warps
|
const uint32_t warpBlockID = (thread + 15)>>4; // aufrunden auf volle Warp-Blöcke
|
||||||
const int warpBlockID = (thread + 15)>>4; // aufrunden auf volle Warp-Blöcke
|
|
||||||
const int maxHashPosition = thread<<3;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if USE_SHUFFLE
|
|
||||||
if (warpBlockID < ( (threads+15)>>4 ))
|
if (warpBlockID < ( (threads+15)>>4 ))
|
||||||
#else
|
#else
|
||||||
if (thread < threads)
|
if (thread < threads)
|
||||||
@ -132,7 +128,7 @@ void quark_blake512_gpu_hash_64(uint32_t threads, uint32_t startNounce, uint32_t
|
|||||||
{
|
{
|
||||||
uint32_t nounce = (g_nonceVector != NULL) ? g_nonceVector[thread] : (startNounce + thread);
|
uint32_t nounce = (g_nonceVector != NULL) ? g_nonceVector[thread] : (startNounce + thread);
|
||||||
|
|
||||||
int hashPosition = nounce - startNounce;
|
off_t hashPosition = nounce - startNounce;
|
||||||
uint64_t *inpHash = &g_hash[hashPosition<<3]; // hashPosition * 8
|
uint64_t *inpHash = &g_hash[hashPosition<<3]; // hashPosition * 8
|
||||||
|
|
||||||
// 128 Bytes
|
// 128 Bytes
|
||||||
|
@ -28,19 +28,19 @@ void quark_groestl512_gpu_hash_64_quad(uint32_t threads, uint32_t startNounce, u
|
|||||||
uint32_t state[8];
|
uint32_t state[8];
|
||||||
|
|
||||||
uint32_t nounce = g_nonceVector ? g_nonceVector[thread] : (startNounce + thread);
|
uint32_t nounce = g_nonceVector ? g_nonceVector[thread] : (startNounce + thread);
|
||||||
int hashPosition = nounce - startNounce;
|
off_t hashPosition = nounce - startNounce;
|
||||||
uint32_t *inpHash = &g_hash[hashPosition << 4];
|
uint32_t *pHash = &g_hash[hashPosition << 4];
|
||||||
|
|
||||||
const uint16_t thr = threadIdx.x % THF;
|
const uint32_t thr = threadIdx.x % THF;
|
||||||
|
|
||||||
#pragma unroll
|
#pragma unroll
|
||||||
for(int k=0;k<4;k++) message[k] = inpHash[(k * THF) + thr];
|
for(int k=0;k<4;k++) message[k] = pHash[thr + (k * THF)];
|
||||||
|
|
||||||
#pragma unroll
|
#pragma unroll
|
||||||
for(int k=4;k<8;k++) message[k] = 0;
|
for(int k=4;k<8;k++) message[k] = 0;
|
||||||
|
|
||||||
if (thr == 0) message[4] = 0x80;
|
if (thr == 0) message[4] = 0x80U;
|
||||||
if (thr == 3) message[7] = 0x01000000;
|
if (thr == 3) message[7] = 0x01000000U;
|
||||||
|
|
||||||
uint32_t msgBitsliced[8];
|
uint32_t msgBitsliced[8];
|
||||||
to_bitslice_quad(message, msgBitsliced);
|
to_bitslice_quad(message, msgBitsliced);
|
||||||
@ -48,15 +48,13 @@ void quark_groestl512_gpu_hash_64_quad(uint32_t threads, uint32_t startNounce, u
|
|||||||
groestl512_progressMessage_quad(state, msgBitsliced);
|
groestl512_progressMessage_quad(state, msgBitsliced);
|
||||||
|
|
||||||
// Nur der erste von jeweils 4 Threads bekommt das Ergebns-Hash
|
// Nur der erste von jeweils 4 Threads bekommt das Ergebns-Hash
|
||||||
uint32_t *outpHash = inpHash;
|
|
||||||
uint32_t hash[16];
|
uint32_t hash[16];
|
||||||
from_bitslice_quad(state, hash);
|
from_bitslice_quad(state, hash);
|
||||||
|
|
||||||
|
|
||||||
// uint4 = 4x4 uint32_t = 16 bytes
|
// uint4 = 4x4 uint32_t = 16 bytes
|
||||||
if (thr == 0) {
|
if (thr == 0) {
|
||||||
uint4 *phash = (uint4*) hash;
|
uint4 *phash = (uint4*) hash;
|
||||||
uint4 *outpt = (uint4*) outpHash; /* var kept for hash align */
|
uint4 *outpt = (uint4*) pHash;
|
||||||
outpt[0] = phash[0];
|
outpt[0] = phash[0];
|
||||||
outpt[1] = phash[1];
|
outpt[1] = phash[1];
|
||||||
outpt[2] = phash[2];
|
outpt[2] = phash[2];
|
||||||
@ -85,7 +83,7 @@ __global__ void __launch_bounds__(TPB, THF)
|
|||||||
|
|
||||||
uint32_t nounce = g_nonceVector ? g_nonceVector[thread] : (startNounce + thread);
|
uint32_t nounce = g_nonceVector ? g_nonceVector[thread] : (startNounce + thread);
|
||||||
|
|
||||||
int hashPosition = nounce - startNounce;
|
off_t hashPosition = nounce - startNounce;
|
||||||
uint32_t * inpHash = &g_hash[hashPosition<<4];
|
uint32_t * inpHash = &g_hash[hashPosition<<4];
|
||||||
const uint16_t thr = threadIdx.x % THF;
|
const uint16_t thr = threadIdx.x % THF;
|
||||||
|
|
||||||
|
@ -230,8 +230,8 @@ void quark_groestl512_gpu_hash_64(uint32_t threads, uint32_t startNounce, uint32
|
|||||||
|
|
||||||
uint32_t nounce = (g_nonceVector != NULL) ? g_nonceVector[thread] : (startNounce + thread);
|
uint32_t nounce = (g_nonceVector != NULL) ? g_nonceVector[thread] : (startNounce + thread);
|
||||||
|
|
||||||
int hashPosition = nounce - startNounce;
|
off_t hashPosition = nounce - startNounce;
|
||||||
uint32_t *inpHash = &g_hash[16 * hashPosition];
|
uint32_t *inpHash = &g_hash[hashPosition * 16];
|
||||||
|
|
||||||
#pragma unroll 16
|
#pragma unroll 16
|
||||||
for(int k=0; k<16; k++)
|
for(int k=0; k<16; k++)
|
||||||
@ -266,7 +266,7 @@ void quark_groestl512_gpu_hash_64(uint32_t threads, uint32_t startNounce, uint32
|
|||||||
for(int u=0;u<32;u++) state[u] ^= message[u];
|
for(int u=0;u<32;u++) state[u] ^= message[u];
|
||||||
|
|
||||||
// Erzeugten Hash rausschreiben
|
// Erzeugten Hash rausschreiben
|
||||||
uint32_t *outpHash = &g_hash[16 * hashPosition];
|
uint32_t *outpHash = &g_hash[hashPosition * 16];
|
||||||
|
|
||||||
#pragma unroll 16
|
#pragma unroll 16
|
||||||
for(int k=0;k<16;k++) outpHash[k] = state[k+16];
|
for(int k=0;k<16;k++) outpHash[k] = state[k+16];
|
||||||
|
@ -101,8 +101,8 @@ void quark_keccak512_gpu_hash_64(uint32_t threads, uint32_t startNounce, uint64_
|
|||||||
{
|
{
|
||||||
uint32_t nounce = (g_nonceVector != NULL) ? g_nonceVector[thread] : (startNounce + thread);
|
uint32_t nounce = (g_nonceVector != NULL) ? g_nonceVector[thread] : (startNounce + thread);
|
||||||
|
|
||||||
int hashPosition = nounce - startNounce;
|
off_t hashPosition = nounce - startNounce;
|
||||||
uint64_t *inpHash = &g_hash[8 * hashPosition];
|
uint64_t *inpHash = &g_hash[hashPosition * 8];
|
||||||
uint2 keccak_gpu_state[25];
|
uint2 keccak_gpu_state[25];
|
||||||
|
|
||||||
for (int i = 0; i<8; i++) {
|
for (int i = 0; i<8; i++) {
|
||||||
@ -200,8 +200,8 @@ void quark_keccak512_gpu_hash_64_v30(uint32_t threads, uint32_t startNounce, uin
|
|||||||
{
|
{
|
||||||
uint32_t nounce = (g_nonceVector != NULL) ? g_nonceVector[thread] : (startNounce + thread);
|
uint32_t nounce = (g_nonceVector != NULL) ? g_nonceVector[thread] : (startNounce + thread);
|
||||||
|
|
||||||
int hashPosition = nounce - startNounce;
|
off_t hashPosition = nounce - startNounce;
|
||||||
uint32_t *inpHash = (uint32_t*)&g_hash[8 * hashPosition];
|
uint32_t *inpHash = (uint32_t*)&g_hash[hashPosition * 8];
|
||||||
|
|
||||||
uint32_t message[18];
|
uint32_t message[18];
|
||||||
#pragma unroll 16
|
#pragma unroll 16
|
||||||
@ -224,7 +224,7 @@ void quark_keccak512_gpu_hash_64_v30(uint32_t threads, uint32_t startNounce, uin
|
|||||||
U64TO32_LE((&hash[i/4]), keccak_gpu_state[i / 8]);
|
U64TO32_LE((&hash[i/4]), keccak_gpu_state[i / 8]);
|
||||||
}
|
}
|
||||||
|
|
||||||
uint32_t *outpHash = (uint32_t*)&g_hash[8 * hashPosition];
|
uint32_t *outpHash = (uint32_t*)&g_hash[hashPosition * 8];
|
||||||
#pragma unroll 16
|
#pragma unroll 16
|
||||||
for(int i=0; i<16; i++)
|
for(int i=0; i<16; i++)
|
||||||
outpHash[i] = hash[i];
|
outpHash[i] = hash[i];
|
||||||
|
@ -512,7 +512,7 @@ void quark_skein512_gpu_hash_64_sm3(uint32_t threads, uint32_t startNounce, uint
|
|||||||
|
|
||||||
uint32_t nounce = (g_nonceVector != NULL) ? g_nonceVector[thread] : (startNounce + thread);
|
uint32_t nounce = (g_nonceVector != NULL) ? g_nonceVector[thread] : (startNounce + thread);
|
||||||
|
|
||||||
int hashPosition = nounce - startNounce;
|
uint32_t hashPosition = nounce - startNounce;
|
||||||
uint64_t *inpHash = &g_hash[hashPosition * 8];
|
uint64_t *inpHash = &g_hash[hashPosition * 8];
|
||||||
|
|
||||||
// Init
|
// Init
|
||||||
|
Loading…
x
Reference in New Issue
Block a user