|
|
@ -1,49 +1,21 @@ |
|
|
|
/* |
|
|
|
/* |
|
|
|
* Quick and dirty addition of Hamsi-512 for X13 |
|
|
|
* Quick Hamsi-512 for X13 |
|
|
|
* |
|
|
|
* by tsiv - 2014 |
|
|
|
* Built on cbuchner1's implementation, actual hashing code |
|
|
|
|
|
|
|
* heavily based on phm's sgminer |
|
|
|
|
|
|
|
* |
|
|
|
|
|
|
|
*/ |
|
|
|
*/ |
|
|
|
|
|
|
|
|
|
|
|
/* |
|
|
|
#include <stdio.h> |
|
|
|
* X13 kernel implementation. |
|
|
|
#include <stdint.h> |
|
|
|
* |
|
|
|
#include <memory.h> |
|
|
|
* ==========================(LICENSE BEGIN)============================ |
|
|
|
|
|
|
|
* |
|
|
|
|
|
|
|
* Copyright (c) 2014 phm |
|
|
|
|
|
|
|
* |
|
|
|
|
|
|
|
* Permission is hereby granted, free of charge, to any person obtaining |
|
|
|
|
|
|
|
* a copy of this software and associated documentation files (the |
|
|
|
|
|
|
|
* "Software"), to deal in the Software without restriction, including |
|
|
|
|
|
|
|
* without limitation the rights to use, copy, modify, merge, publish, |
|
|
|
|
|
|
|
* distribute, sublicense, and/or sell copies of the Software, and to |
|
|
|
|
|
|
|
* permit persons to whom the Software is furnished to do so, subject to |
|
|
|
|
|
|
|
* the following conditions: |
|
|
|
|
|
|
|
* |
|
|
|
|
|
|
|
* The above copyright notice and this permission notice shall be |
|
|
|
|
|
|
|
* included in all copies or substantial portions of the Software. |
|
|
|
|
|
|
|
* |
|
|
|
|
|
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
|
|
|
|
|
|
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
|
|
|
|
|
|
|
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. |
|
|
|
|
|
|
|
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY |
|
|
|
|
|
|
|
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, |
|
|
|
|
|
|
|
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE |
|
|
|
|
|
|
|
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
|
|
|
|
|
|
|
* |
|
|
|
|
|
|
|
* ===========================(LICENSE END)============================= |
|
|
|
|
|
|
|
* |
|
|
|
|
|
|
|
* @author phm <phm@inbox.com> |
|
|
|
|
|
|
|
*/ |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#include "cuda_helper.h" |
|
|
|
#include "cuda_helper.h" |
|
|
|
|
|
|
|
|
|
|
|
// aus heavy.cu |
|
|
|
typedef unsigned char BitSequence; |
|
|
|
extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
__device__ __constant__ |
|
|
|
static __constant__ uint32_t d_alpha_n[32]; |
|
|
|
static const uint32_t d_alpha_n[] = { |
|
|
|
static __constant__ uint32_t d_alpha_f[32]; |
|
|
|
|
|
|
|
static __constant__ uint32_t d_T512[64][16]; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
static const uint32_t alpha_n[] = { |
|
|
|
SPH_C32(0xff00f0f0), SPH_C32(0xccccaaaa), SPH_C32(0xf0f0cccc), |
|
|
|
SPH_C32(0xff00f0f0), SPH_C32(0xccccaaaa), SPH_C32(0xf0f0cccc), |
|
|
|
SPH_C32(0xff00aaaa), SPH_C32(0xccccaaaa), SPH_C32(0xf0f0ff00), |
|
|
|
SPH_C32(0xff00aaaa), SPH_C32(0xccccaaaa), SPH_C32(0xf0f0ff00), |
|
|
|
SPH_C32(0xaaaacccc), SPH_C32(0xf0f0ff00), SPH_C32(0xf0f0cccc), |
|
|
|
SPH_C32(0xaaaacccc), SPH_C32(0xf0f0ff00), SPH_C32(0xf0f0cccc), |
|
|
@ -57,8 +29,7 @@ static const uint32_t d_alpha_n[] = { |
|
|
|
SPH_C32(0xff00aaaa), SPH_C32(0xccccf0f0) |
|
|
|
SPH_C32(0xff00aaaa), SPH_C32(0xccccf0f0) |
|
|
|
}; |
|
|
|
}; |
|
|
|
|
|
|
|
|
|
|
|
__device__ __constant__ |
|
|
|
static const uint32_t alpha_f[] = { |
|
|
|
static const uint32_t d_alpha_f[] = { |
|
|
|
|
|
|
|
SPH_C32(0xcaf9639c), SPH_C32(0x0ff0f9c0), SPH_C32(0x639c0ff0), |
|
|
|
SPH_C32(0xcaf9639c), SPH_C32(0x0ff0f9c0), SPH_C32(0x639c0ff0), |
|
|
|
SPH_C32(0xcaf9f9c0), SPH_C32(0x0ff0f9c0), SPH_C32(0x639ccaf9), |
|
|
|
SPH_C32(0xcaf9f9c0), SPH_C32(0x0ff0f9c0), SPH_C32(0x639ccaf9), |
|
|
|
SPH_C32(0xf9c00ff0), SPH_C32(0x639ccaf9), SPH_C32(0x639c0ff0), |
|
|
|
SPH_C32(0xf9c00ff0), SPH_C32(0x639ccaf9), SPH_C32(0x639c0ff0), |
|
|
@ -132,47 +103,47 @@ static const uint32_t d_alpha_f[] = { |
|
|
|
(a) = ROTL32(a, 13); \ |
|
|
|
(a) = ROTL32(a, 13); \ |
|
|
|
(c) = ROTL32(c, 3); \ |
|
|
|
(c) = ROTL32(c, 3); \ |
|
|
|
(b) ^= (a) ^ (c); \ |
|
|
|
(b) ^= (a) ^ (c); \ |
|
|
|
(d) ^= (c) ^ SPH_T32((a) << 3); \ |
|
|
|
(d) ^= (c) ^ ((a) << 3); \ |
|
|
|
(b) = ROTL32(b, 1); \ |
|
|
|
(b) = ROTL32(b, 1); \ |
|
|
|
(d) = ROTL32(d, 7); \ |
|
|
|
(d) = ROTL32(d, 7); \ |
|
|
|
(a) ^= (b) ^ (d); \ |
|
|
|
(a) ^= (b) ^ (d); \ |
|
|
|
(c) ^= (d) ^ SPH_T32((b) << 7); \ |
|
|
|
(c) ^= (d) ^ ((b) << 7); \ |
|
|
|
(a) = ROTL32(a, 5); \ |
|
|
|
(a) = ROTL32(a, 5); \ |
|
|
|
(c) = ROTL32(c, 22); \ |
|
|
|
(c) = ROTL32(c, 22); \ |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
#define ROUND_BIG(rc, alpha) { \ |
|
|
|
#define ROUND_BIG(rc, alpha) { \ |
|
|
|
hamsi_s00 ^= alpha[0x00]; \ |
|
|
|
hamsi_s00 ^= alpha[0x00]; \ |
|
|
|
hamsi_s01 ^= alpha[0x01] ^ (uint32_t)(rc); \ |
|
|
|
|
|
|
|
hamsi_s02 ^= alpha[0x02]; \ |
|
|
|
|
|
|
|
hamsi_s03 ^= alpha[0x03]; \ |
|
|
|
|
|
|
|
hamsi_s04 ^= alpha[0x04]; \ |
|
|
|
|
|
|
|
hamsi_s05 ^= alpha[0x05]; \ |
|
|
|
|
|
|
|
hamsi_s06 ^= alpha[0x06]; \ |
|
|
|
|
|
|
|
hamsi_s07 ^= alpha[0x07]; \ |
|
|
|
|
|
|
|
hamsi_s08 ^= alpha[0x08]; \ |
|
|
|
hamsi_s08 ^= alpha[0x08]; \ |
|
|
|
hamsi_s09 ^= alpha[0x09]; \ |
|
|
|
|
|
|
|
hamsi_s0A ^= alpha[0x0A]; \ |
|
|
|
|
|
|
|
hamsi_s0B ^= alpha[0x0B]; \ |
|
|
|
|
|
|
|
hamsi_s0C ^= alpha[0x0C]; \ |
|
|
|
|
|
|
|
hamsi_s0D ^= alpha[0x0D]; \ |
|
|
|
|
|
|
|
hamsi_s0E ^= alpha[0x0E]; \ |
|
|
|
|
|
|
|
hamsi_s0F ^= alpha[0x0F]; \ |
|
|
|
|
|
|
|
hamsi_s10 ^= alpha[0x10]; \ |
|
|
|
hamsi_s10 ^= alpha[0x10]; \ |
|
|
|
hamsi_s11 ^= alpha[0x11]; \ |
|
|
|
|
|
|
|
hamsi_s12 ^= alpha[0x12]; \ |
|
|
|
|
|
|
|
hamsi_s13 ^= alpha[0x13]; \ |
|
|
|
|
|
|
|
hamsi_s14 ^= alpha[0x14]; \ |
|
|
|
|
|
|
|
hamsi_s15 ^= alpha[0x15]; \ |
|
|
|
|
|
|
|
hamsi_s16 ^= alpha[0x16]; \ |
|
|
|
|
|
|
|
hamsi_s17 ^= alpha[0x17]; \ |
|
|
|
|
|
|
|
hamsi_s18 ^= alpha[0x18]; \ |
|
|
|
hamsi_s18 ^= alpha[0x18]; \ |
|
|
|
|
|
|
|
hamsi_s01 ^= alpha[0x01] ^ (uint32_t)(rc); \ |
|
|
|
|
|
|
|
hamsi_s09 ^= alpha[0x09]; \ |
|
|
|
|
|
|
|
hamsi_s11 ^= alpha[0x11]; \ |
|
|
|
hamsi_s19 ^= alpha[0x19]; \ |
|
|
|
hamsi_s19 ^= alpha[0x19]; \ |
|
|
|
|
|
|
|
hamsi_s02 ^= alpha[0x02]; \ |
|
|
|
|
|
|
|
hamsi_s0A ^= alpha[0x0A]; \ |
|
|
|
|
|
|
|
hamsi_s12 ^= alpha[0x12]; \ |
|
|
|
hamsi_s1A ^= alpha[0x1A]; \ |
|
|
|
hamsi_s1A ^= alpha[0x1A]; \ |
|
|
|
|
|
|
|
hamsi_s03 ^= alpha[0x03]; \ |
|
|
|
|
|
|
|
hamsi_s0B ^= alpha[0x0B]; \ |
|
|
|
|
|
|
|
hamsi_s13 ^= alpha[0x13]; \ |
|
|
|
hamsi_s1B ^= alpha[0x1B]; \ |
|
|
|
hamsi_s1B ^= alpha[0x1B]; \ |
|
|
|
|
|
|
|
hamsi_s04 ^= alpha[0x04]; \ |
|
|
|
|
|
|
|
hamsi_s0C ^= alpha[0x0C]; \ |
|
|
|
|
|
|
|
hamsi_s14 ^= alpha[0x14]; \ |
|
|
|
hamsi_s1C ^= alpha[0x1C]; \ |
|
|
|
hamsi_s1C ^= alpha[0x1C]; \ |
|
|
|
|
|
|
|
hamsi_s05 ^= alpha[0x05]; \ |
|
|
|
|
|
|
|
hamsi_s0D ^= alpha[0x0D]; \ |
|
|
|
|
|
|
|
hamsi_s15 ^= alpha[0x15]; \ |
|
|
|
hamsi_s1D ^= alpha[0x1D]; \ |
|
|
|
hamsi_s1D ^= alpha[0x1D]; \ |
|
|
|
|
|
|
|
hamsi_s06 ^= alpha[0x06]; \ |
|
|
|
|
|
|
|
hamsi_s0E ^= alpha[0x0E]; \ |
|
|
|
|
|
|
|
hamsi_s16 ^= alpha[0x16]; \ |
|
|
|
hamsi_s1E ^= alpha[0x1E]; \ |
|
|
|
hamsi_s1E ^= alpha[0x1E]; \ |
|
|
|
|
|
|
|
hamsi_s07 ^= alpha[0x07]; \ |
|
|
|
|
|
|
|
hamsi_s0F ^= alpha[0x0F]; \ |
|
|
|
|
|
|
|
hamsi_s17 ^= alpha[0x17]; \ |
|
|
|
hamsi_s1F ^= alpha[0x1F]; \ |
|
|
|
hamsi_s1F ^= alpha[0x1F]; \ |
|
|
|
SBOX(hamsi_s00, hamsi_s08, hamsi_s10, hamsi_s18); \ |
|
|
|
SBOX(hamsi_s00, hamsi_s08, hamsi_s10, hamsi_s18); \ |
|
|
|
SBOX(hamsi_s01, hamsi_s09, hamsi_s11, hamsi_s19); \ |
|
|
|
SBOX(hamsi_s01, hamsi_s09, hamsi_s11, hamsi_s19); \ |
|
|
@ -198,27 +169,13 @@ static const uint32_t d_alpha_f[] = { |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#define P_BIG { \ |
|
|
|
#define P_BIG { \ |
|
|
|
ROUND_BIG(0, d_alpha_n); \ |
|
|
|
for( int r = 0; r < 6; r++ ) \ |
|
|
|
ROUND_BIG(1, d_alpha_n); \ |
|
|
|
ROUND_BIG(r, d_alpha_n); \ |
|
|
|
ROUND_BIG(2, d_alpha_n); \ |
|
|
|
|
|
|
|
ROUND_BIG(3, d_alpha_n); \ |
|
|
|
|
|
|
|
ROUND_BIG(4, d_alpha_n); \ |
|
|
|
|
|
|
|
ROUND_BIG(5, d_alpha_n); \ |
|
|
|
|
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
#define PF_BIG { \ |
|
|
|
#define PF_BIG { \ |
|
|
|
ROUND_BIG(0, d_alpha_f); \ |
|
|
|
for( int r = 0; r < 12; r++ ) \ |
|
|
|
ROUND_BIG(1, d_alpha_f); \ |
|
|
|
ROUND_BIG(r, d_alpha_f); \ |
|
|
|
ROUND_BIG(2, d_alpha_f); \ |
|
|
|
|
|
|
|
ROUND_BIG(3, d_alpha_f); \ |
|
|
|
|
|
|
|
ROUND_BIG(4, d_alpha_f); \ |
|
|
|
|
|
|
|
ROUND_BIG(5, d_alpha_f); \ |
|
|
|
|
|
|
|
ROUND_BIG(6, d_alpha_f); \ |
|
|
|
|
|
|
|
ROUND_BIG(7, d_alpha_f); \ |
|
|
|
|
|
|
|
ROUND_BIG(8, d_alpha_f); \ |
|
|
|
|
|
|
|
ROUND_BIG(9, d_alpha_f); \ |
|
|
|
|
|
|
|
ROUND_BIG(10, d_alpha_f); \ |
|
|
|
|
|
|
|
ROUND_BIG(11, d_alpha_f); \ |
|
|
|
|
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
#define T_BIG { \ |
|
|
|
#define T_BIG { \ |
|
|
@ -241,8 +198,8 @@ static const uint32_t d_alpha_f[] = { |
|
|
|
c0 = (h[0x0] ^= hamsi_s00); \ |
|
|
|
c0 = (h[0x0] ^= hamsi_s00); \ |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
__device__ __constant__ |
|
|
|
|
|
|
|
static const uint32_t d_T512[64][16] = { |
|
|
|
static const uint32_t T512[64][16] = { |
|
|
|
{ SPH_C32(0xef0b0270), SPH_C32(0x3afd0000), SPH_C32(0x5dae0000), |
|
|
|
{ SPH_C32(0xef0b0270), SPH_C32(0x3afd0000), SPH_C32(0x5dae0000), |
|
|
|
SPH_C32(0x69490000), SPH_C32(0x9b0f3c06), SPH_C32(0x4405b5f9), |
|
|
|
SPH_C32(0x69490000), SPH_C32(0x9b0f3c06), SPH_C32(0x4405b5f9), |
|
|
|
SPH_C32(0x66140a51), SPH_C32(0x924f5d0a), SPH_C32(0xc96b0030), |
|
|
|
SPH_C32(0x66140a51), SPH_C32(0x924f5d0a), SPH_C32(0xc96b0030), |
|
|
@ -629,53 +586,8 @@ static const uint32_t d_T512[64][16] = { |
|
|
|
SPH_C32(0xe7e00a94) } |
|
|
|
SPH_C32(0xe7e00a94) } |
|
|
|
}; |
|
|
|
}; |
|
|
|
|
|
|
|
|
|
|
|
#define INPUT_BIG { \ |
|
|
|
__global__ |
|
|
|
const uint32_t *tp = &d_T512[0][0]; \ |
|
|
|
void x13_hamsi512_gpu_hash_64(int threads, uint32_t startNounce, uint64_t *g_hash, uint32_t *g_nonceVector) |
|
|
|
unsigned u, v; \ |
|
|
|
|
|
|
|
m0 = 0; \ |
|
|
|
|
|
|
|
m1 = 0; \ |
|
|
|
|
|
|
|
m2 = 0; \ |
|
|
|
|
|
|
|
m3 = 0; \ |
|
|
|
|
|
|
|
m4 = 0; \ |
|
|
|
|
|
|
|
m5 = 0; \ |
|
|
|
|
|
|
|
m6 = 0; \ |
|
|
|
|
|
|
|
m7 = 0; \ |
|
|
|
|
|
|
|
m8 = 0; \ |
|
|
|
|
|
|
|
m9 = 0; \ |
|
|
|
|
|
|
|
mA = 0; \ |
|
|
|
|
|
|
|
mB = 0; \ |
|
|
|
|
|
|
|
mC = 0; \ |
|
|
|
|
|
|
|
mD = 0; \ |
|
|
|
|
|
|
|
mE = 0; \ |
|
|
|
|
|
|
|
mF = 0; \ |
|
|
|
|
|
|
|
for (u = 0; u < 8; u ++) { \ |
|
|
|
|
|
|
|
unsigned db = buf(u); \ |
|
|
|
|
|
|
|
for (v = 0; v < 8; v ++, db >>= 1) { \ |
|
|
|
|
|
|
|
uint32_t dm = SPH_T32(-(uint32_t)(db & 1)); \ |
|
|
|
|
|
|
|
m0 ^= dm & *tp ++; \ |
|
|
|
|
|
|
|
m1 ^= dm & *tp ++; \ |
|
|
|
|
|
|
|
m2 ^= dm & *tp ++; \ |
|
|
|
|
|
|
|
m3 ^= dm & *tp ++; \ |
|
|
|
|
|
|
|
m4 ^= dm & *tp ++; \ |
|
|
|
|
|
|
|
m5 ^= dm & *tp ++; \ |
|
|
|
|
|
|
|
m6 ^= dm & *tp ++; \ |
|
|
|
|
|
|
|
m7 ^= dm & *tp ++; \ |
|
|
|
|
|
|
|
m8 ^= dm & *tp ++; \ |
|
|
|
|
|
|
|
m9 ^= dm & *tp ++; \ |
|
|
|
|
|
|
|
mA ^= dm & *tp ++; \ |
|
|
|
|
|
|
|
mB ^= dm & *tp ++; \ |
|
|
|
|
|
|
|
mC ^= dm & *tp ++; \ |
|
|
|
|
|
|
|
mD ^= dm & *tp ++; \ |
|
|
|
|
|
|
|
mE ^= dm & *tp ++; \ |
|
|
|
|
|
|
|
mF ^= dm & *tp ++; \ |
|
|
|
|
|
|
|
} \ |
|
|
|
|
|
|
|
} \ |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/***************************************************/ |
|
|
|
|
|
|
|
// Die Hash-Funktion |
|
|
|
|
|
|
|
__global__ void x13_hamsi512_gpu_hash_64(int threads, uint32_t startNounce, uint64_t *g_hash, uint32_t *g_nonceVector) |
|
|
|
|
|
|
|
{ |
|
|
|
{ |
|
|
|
int thread = (blockDim.x * blockIdx.x + threadIdx.x); |
|
|
|
int thread = (blockDim.x * blockIdx.x + threadIdx.x); |
|
|
|
if (thread < threads) |
|
|
|
if (thread < threads) |
|
|
@ -690,28 +602,73 @@ __global__ void x13_hamsi512_gpu_hash_64(int threads, uint32_t startNounce, uint |
|
|
|
uint32_t c4 = SPH_C32(0x72672031), c5 = SPH_C32(0x302c2062), c6 = SPH_C32(0x75732032), c7 = SPH_C32(0x3434362c); |
|
|
|
uint32_t c4 = SPH_C32(0x72672031), c5 = SPH_C32(0x302c2062), c6 = SPH_C32(0x75732032), c7 = SPH_C32(0x3434362c); |
|
|
|
uint32_t c8 = SPH_C32(0x20422d33), c9 = SPH_C32(0x30303120), cA = SPH_C32(0x4c657576), cB = SPH_C32(0x656e2d48); |
|
|
|
uint32_t c8 = SPH_C32(0x20422d33), c9 = SPH_C32(0x30303120), cA = SPH_C32(0x4c657576), cB = SPH_C32(0x656e2d48); |
|
|
|
uint32_t cC = SPH_C32(0x65766572), cD = SPH_C32(0x6c65652c), cE = SPH_C32(0x2042656c), cF = SPH_C32(0x6769756d); |
|
|
|
uint32_t cC = SPH_C32(0x65766572), cD = SPH_C32(0x6c65652c), cE = SPH_C32(0x2042656c), cF = SPH_C32(0x6769756d); |
|
|
|
uint32_t m0, m1, m2, m3, m4, m5, m6, m7; |
|
|
|
uint32_t m0, m1, m2, m3, m4, m5, m6, m7, m8, m9, mA, mB, mC, mD, mE, mF; |
|
|
|
uint32_t m8, m9, mA, mB, mC, mD, mE, mF; |
|
|
|
|
|
|
|
uint32_t h[16] = { c0, c1, c2, c3, c4, c5, c6, c7, c8, c9, cA, cB, cC, cD, cE, cF }; |
|
|
|
uint32_t h[16] = { c0, c1, c2, c3, c4, c5, c6, c7, c8, c9, cA, cB, cC, cD, cE, cF }; |
|
|
|
|
|
|
|
uint32_t *tp, db, dm; |
|
|
|
|
|
|
|
|
|
|
|
#define buf(u) (h1[i+u]) |
|
|
|
|
|
|
|
#pragma unroll 8 |
|
|
|
|
|
|
|
for(int i = 0; i < 64; i += 8) { |
|
|
|
for(int i = 0; i < 64; i += 8) { |
|
|
|
INPUT_BIG; |
|
|
|
|
|
|
|
P_BIG; |
|
|
|
m0 = 0; m1 = 0; m2 = 0; m3 = 0; m4 = 0; m5 = 0; m6 = 0; m7 = 0; |
|
|
|
|
|
|
|
m8 = 0; m9 = 0; mA = 0; mB = 0; mC = 0; mD = 0; mE = 0; mF = 0; |
|
|
|
|
|
|
|
tp = &d_T512[0][0]; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#pragma unroll 2 |
|
|
|
|
|
|
|
for (int u = 0; u < 8; u ++) { |
|
|
|
|
|
|
|
db = h1[i+u]; |
|
|
|
|
|
|
|
#pragma unroll 2 |
|
|
|
|
|
|
|
for (int v = 0; v < 8; v ++, db >>= 1) { |
|
|
|
|
|
|
|
dm = -(uint32_t)(db & 1); |
|
|
|
|
|
|
|
m0 ^= dm & *(tp+ 0); m1 ^= dm & *(tp+ 1); |
|
|
|
|
|
|
|
m2 ^= dm & *(tp+ 2); m3 ^= dm & *(tp+ 3); |
|
|
|
|
|
|
|
m4 ^= dm & *(tp+ 4); m5 ^= dm & *(tp+ 5); |
|
|
|
|
|
|
|
m6 ^= dm & *(tp+ 6); m7 ^= dm & *(tp+ 7); |
|
|
|
|
|
|
|
m8 ^= dm & *(tp+ 8); m9 ^= dm & *(tp+ 9); |
|
|
|
|
|
|
|
mA ^= dm & *(tp+10); mB ^= dm & *(tp+11); |
|
|
|
|
|
|
|
mC ^= dm & *(tp+12); mD ^= dm & *(tp+13); |
|
|
|
|
|
|
|
mE ^= dm & *(tp+14); mF ^= dm & *(tp+15); |
|
|
|
|
|
|
|
tp += 16; |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
for( int r = 0; r < 6; r += 2 ) { |
|
|
|
|
|
|
|
ROUND_BIG(r, d_alpha_n); |
|
|
|
|
|
|
|
ROUND_BIG(r+1, d_alpha_n); |
|
|
|
|
|
|
|
} |
|
|
|
T_BIG; |
|
|
|
T_BIG; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
#undef buf |
|
|
|
tp = &d_T512[0][0] + 112; |
|
|
|
#define buf(u) (u == 0 ? 0x80 : 0) |
|
|
|
|
|
|
|
INPUT_BIG; |
|
|
|
m0 = *(tp+ 0); m1 = *(tp+ 1); |
|
|
|
P_BIG; |
|
|
|
m2 = *(tp+ 2); m3 = *(tp+ 3); |
|
|
|
|
|
|
|
m4 = *(tp+ 4); m5 = *(tp+ 5); |
|
|
|
|
|
|
|
m6 = *(tp+ 6); m7 = *(tp+ 7); |
|
|
|
|
|
|
|
m8 = *(tp+ 8); m9 = *(tp+ 9); |
|
|
|
|
|
|
|
mA = *(tp+10); mB = *(tp+11); |
|
|
|
|
|
|
|
mC = *(tp+12); mD = *(tp+13); |
|
|
|
|
|
|
|
mE = *(tp+14); mF = *(tp+15); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
for( int r = 0; r < 6; r += 2 ) { |
|
|
|
|
|
|
|
ROUND_BIG(r, d_alpha_n); |
|
|
|
|
|
|
|
ROUND_BIG(r+1, d_alpha_n); |
|
|
|
|
|
|
|
} |
|
|
|
T_BIG; |
|
|
|
T_BIG; |
|
|
|
|
|
|
|
|
|
|
|
#undef buf |
|
|
|
tp = &d_T512[0][0] + 784; |
|
|
|
#define buf(u) (u == 6 ? 2 : 0) |
|
|
|
|
|
|
|
INPUT_BIG; |
|
|
|
m0 = *(tp+ 0); m1 = *(tp+ 1); |
|
|
|
PF_BIG; |
|
|
|
m2 = *(tp+ 2); m3 = *(tp+ 3); |
|
|
|
|
|
|
|
m4 = *(tp+ 4); m5 = *(tp+ 5); |
|
|
|
|
|
|
|
m6 = *(tp+ 6); m7 = *(tp+ 7); |
|
|
|
|
|
|
|
m8 = *(tp+ 8); m9 = *(tp+ 9); |
|
|
|
|
|
|
|
mA = *(tp+10); mB = *(tp+11); |
|
|
|
|
|
|
|
mC = *(tp+12); mD = *(tp+13); |
|
|
|
|
|
|
|
mE = *(tp+14); mF = *(tp+15); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
for( int r = 0; r < 12; r += 2 ) { |
|
|
|
|
|
|
|
ROUND_BIG(r, d_alpha_f); |
|
|
|
|
|
|
|
ROUND_BIG(r+1, d_alpha_f); |
|
|
|
|
|
|
|
} |
|
|
|
T_BIG; |
|
|
|
T_BIG; |
|
|
|
|
|
|
|
|
|
|
|
#pragma unroll 16 |
|
|
|
#pragma unroll 16 |
|
|
@ -720,24 +677,22 @@ __global__ void x13_hamsi512_gpu_hash_64(int threads, uint32_t startNounce, uint |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
__host__ |
|
|
|
__host__ void x13_hamsi512_cpu_init(int thr_id, int threads) |
|
|
|
void x13_hamsi512_cpu_init(int thr_id, int threads) |
|
|
|
{ |
|
|
|
{ |
|
|
|
|
|
|
|
cudaMemcpyToSymbol(d_alpha_n, alpha_n, sizeof(uint32_t)*32, 0, cudaMemcpyHostToDevice); |
|
|
|
|
|
|
|
cudaMemcpyToSymbol(d_alpha_f, alpha_f, sizeof(uint32_t)*32, 0, cudaMemcpyHostToDevice); |
|
|
|
|
|
|
|
CUDA_SAFE_CALL(cudaMemcpyToSymbol(d_T512, T512, sizeof(uint32_t)*64*16, 0, cudaMemcpyHostToDevice)); |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
__host__ void x13_hamsi512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order) |
|
|
|
__host__ |
|
|
|
|
|
|
|
void x13_hamsi512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order) |
|
|
|
{ |
|
|
|
{ |
|
|
|
const int threadsperblock = 256; |
|
|
|
const int threadsperblock = 256; |
|
|
|
|
|
|
|
|
|
|
|
// berechne wie viele Thread Blocks wir brauchen |
|
|
|
|
|
|
|
dim3 grid((threads + threadsperblock-1)/threadsperblock); |
|
|
|
dim3 grid((threads + threadsperblock-1)/threadsperblock); |
|
|
|
dim3 block(threadsperblock); |
|
|
|
dim3 block(threadsperblock); |
|
|
|
|
|
|
|
|
|
|
|
// Größe des dynamischen Shared Memory Bereichs |
|
|
|
x13_hamsi512_gpu_hash_64<<<grid, block>>>(threads, startNounce, (uint64_t*)d_hash, d_nonceVector); |
|
|
|
size_t shared_size = 0; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// fprintf(stderr, "threads=%d, %d blocks, %d threads per block, %d bytes shared\n", threads, grid.x, block.x, shared_size); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
x13_hamsi512_gpu_hash_64<<<grid, block, shared_size>>>(threads, startNounce, (uint64_t*)d_hash, d_nonceVector); |
|
|
|
|
|
|
|
MyStreamSynchronize(NULL, order, thr_id); |
|
|
|
MyStreamSynchronize(NULL, order, thr_id); |
|
|
|
} |
|
|
|
} |