You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
492 lines
15 KiB
492 lines
15 KiB
/* |
|
* Shabal-512 for X14/X15 (STUB) |
|
*/ |
|
#include <stdint.h> |
|
#include <cuda_runtime.h> |
|
|
|
extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id); |
|
|
|
|
|
#define SPH_C64(x) ((uint64_t)(x ## ULL)) |
|
#define SPH_C32(x) ((uint32_t)(x ## U)) |
|
#define SPH_T32(x) ((x) & SPH_C32(0xFFFFFFFF)) |
|
|
|
#define SWAB32(x) ( __byte_perm(x, x, 0x0123) ) |
|
|
|
#if __CUDA_ARCH__ < 350 |
|
// Kepler (Compute 3.0) |
|
#define ROTL32(x, n) SPH_T32(((x) << (n)) | ((x) >> (32 - (n)))) |
|
#else |
|
// Kepler (Compute 3.5) |
|
#define ROTL32(x, n) __funnelshift_l( (x), (x), (n) ) |
|
#endif |
|
|
|
/* $Id: shabal.c 175 2010-05-07 16:03:20Z tp $ */ |
|
/* |
|
* Shabal implementation. |
|
* |
|
* ==========================(LICENSE BEGIN)============================ |
|
* |
|
* Copyright (c) 2007-2010 Projet RNRT SAPHIR |
|
* |
|
* Permission is hereby granted, free of charge, to any person obtaining |
|
* a copy of this software and associated documentation files (the |
|
* "Software"), to deal in the Software without restriction, including |
|
* without limitation the rights to use, copy, modify, merge, publish, |
|
* distribute, sublicense, and/or sell copies of the Software, and to |
|
* permit persons to whom the Software is furnished to do so, subject to |
|
* the following conditions: |
|
* |
|
* The above copyright notice and this permission notice shall be |
|
* included in all copies or substantial portions of the Software. |
|
* |
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
|
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. |
|
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY |
|
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, |
|
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE |
|
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
|
* |
|
* ===========================(LICENSE END)============================= |
|
* |
|
* @author Thomas Pornin <thomas.pornin@cryptolog.com> |
|
*/ |
|
|
|
/* |
|
* Part of this code was automatically generated (the part between |
|
* the "BEGIN" and "END" markers). |
|
*/ |
|
|
|
#define sM 16 |
|
|
|
#define C32 SPH_C32 |
|
#define T32 SPH_T32 |
|
|
|
#define O1 13 |
|
#define O2 9 |
|
#define O3 6 |
|
|
|
/* |
|
* We copy the state into local variables, so that the compiler knows |
|
* that it can optimize them at will. |
|
*/ |
|
|
|
/* BEGIN -- automatically generated code. */ |
|
|
|
#define INPUT_BLOCK_ADD do { \ |
|
B0 = T32(B0 + M0); \ |
|
B1 = T32(B1 + M1); \ |
|
B2 = T32(B2 + M2); \ |
|
B3 = T32(B3 + M3); \ |
|
B4 = T32(B4 + M4); \ |
|
B5 = T32(B5 + M5); \ |
|
B6 = T32(B6 + M6); \ |
|
B7 = T32(B7 + M7); \ |
|
B8 = T32(B8 + M8); \ |
|
B9 = T32(B9 + M9); \ |
|
BA = T32(BA + MA); \ |
|
BB = T32(BB + MB); \ |
|
BC = T32(BC + MC); \ |
|
BD = T32(BD + MD); \ |
|
BE = T32(BE + ME); \ |
|
BF = T32(BF + MF); \ |
|
} while (0) |
|
|
|
#define INPUT_BLOCK_SUB do { \ |
|
C0 = T32(C0 - M0); \ |
|
C1 = T32(C1 - M1); \ |
|
C2 = T32(C2 - M2); \ |
|
C3 = T32(C3 - M3); \ |
|
C4 = T32(C4 - M4); \ |
|
C5 = T32(C5 - M5); \ |
|
C6 = T32(C6 - M6); \ |
|
C7 = T32(C7 - M7); \ |
|
C8 = T32(C8 - M8); \ |
|
C9 = T32(C9 - M9); \ |
|
CA = T32(CA - MA); \ |
|
CB = T32(CB - MB); \ |
|
CC = T32(CC - MC); \ |
|
CD = T32(CD - MD); \ |
|
CE = T32(CE - ME); \ |
|
CF = T32(CF - MF); \ |
|
} while (0) |
|
|
|
#define XOR_W do { \ |
|
A00 ^= Wlow; \ |
|
A01 ^= Whigh; \ |
|
} while (0) |
|
|
|
#define SWAP(v1, v2) do { \ |
|
uint32_t tmp = (v1); \ |
|
(v1) = (v2); \ |
|
(v2) = tmp; \ |
|
} while (0) |
|
|
|
#define SWAP_BC do { \ |
|
SWAP(B0, C0); \ |
|
SWAP(B1, C1); \ |
|
SWAP(B2, C2); \ |
|
SWAP(B3, C3); \ |
|
SWAP(B4, C4); \ |
|
SWAP(B5, C5); \ |
|
SWAP(B6, C6); \ |
|
SWAP(B7, C7); \ |
|
SWAP(B8, C8); \ |
|
SWAP(B9, C9); \ |
|
SWAP(BA, CA); \ |
|
SWAP(BB, CB); \ |
|
SWAP(BC, CC); \ |
|
SWAP(BD, CD); \ |
|
SWAP(BE, CE); \ |
|
SWAP(BF, CF); \ |
|
} while (0) |
|
|
|
#define PERM_ELT(xa0, xa1, xb0, xb1, xb2, xb3, xc, xm) do { \ |
|
xa0 = T32((xa0 \ |
|
^ (((xa1 << 15) | (xa1 >> 17)) * 5U) \ |
|
^ xc) * 3U) \ |
|
^ xb1 ^ (xb2 & ~xb3) ^ xm; \ |
|
xb0 = T32(~(((xb0 << 1) | (xb0 >> 31)) ^ xa0)); \ |
|
} while (0) |
|
|
|
#define PERM_STEP_0 do { \ |
|
PERM_ELT(A00, A0B, B0, BD, B9, B6, C8, M0); \ |
|
PERM_ELT(A01, A00, B1, BE, BA, B7, C7, M1); \ |
|
PERM_ELT(A02, A01, B2, BF, BB, B8, C6, M2); \ |
|
PERM_ELT(A03, A02, B3, B0, BC, B9, C5, M3); \ |
|
PERM_ELT(A04, A03, B4, B1, BD, BA, C4, M4); \ |
|
PERM_ELT(A05, A04, B5, B2, BE, BB, C3, M5); \ |
|
PERM_ELT(A06, A05, B6, B3, BF, BC, C2, M6); \ |
|
PERM_ELT(A07, A06, B7, B4, B0, BD, C1, M7); \ |
|
PERM_ELT(A08, A07, B8, B5, B1, BE, C0, M8); \ |
|
PERM_ELT(A09, A08, B9, B6, B2, BF, CF, M9); \ |
|
PERM_ELT(A0A, A09, BA, B7, B3, B0, CE, MA); \ |
|
PERM_ELT(A0B, A0A, BB, B8, B4, B1, CD, MB); \ |
|
PERM_ELT(A00, A0B, BC, B9, B5, B2, CC, MC); \ |
|
PERM_ELT(A01, A00, BD, BA, B6, B3, CB, MD); \ |
|
PERM_ELT(A02, A01, BE, BB, B7, B4, CA, ME); \ |
|
PERM_ELT(A03, A02, BF, BC, B8, B5, C9, MF); \ |
|
} while (0) |
|
|
|
#define PERM_STEP_1 do { \ |
|
PERM_ELT(A04, A03, B0, BD, B9, B6, C8, M0); \ |
|
PERM_ELT(A05, A04, B1, BE, BA, B7, C7, M1); \ |
|
PERM_ELT(A06, A05, B2, BF, BB, B8, C6, M2); \ |
|
PERM_ELT(A07, A06, B3, B0, BC, B9, C5, M3); \ |
|
PERM_ELT(A08, A07, B4, B1, BD, BA, C4, M4); \ |
|
PERM_ELT(A09, A08, B5, B2, BE, BB, C3, M5); \ |
|
PERM_ELT(A0A, A09, B6, B3, BF, BC, C2, M6); \ |
|
PERM_ELT(A0B, A0A, B7, B4, B0, BD, C1, M7); \ |
|
PERM_ELT(A00, A0B, B8, B5, B1, BE, C0, M8); \ |
|
PERM_ELT(A01, A00, B9, B6, B2, BF, CF, M9); \ |
|
PERM_ELT(A02, A01, BA, B7, B3, B0, CE, MA); \ |
|
PERM_ELT(A03, A02, BB, B8, B4, B1, CD, MB); \ |
|
PERM_ELT(A04, A03, BC, B9, B5, B2, CC, MC); \ |
|
PERM_ELT(A05, A04, BD, BA, B6, B3, CB, MD); \ |
|
PERM_ELT(A06, A05, BE, BB, B7, B4, CA, ME); \ |
|
PERM_ELT(A07, A06, BF, BC, B8, B5, C9, MF); \ |
|
} while (0) |
|
|
|
#define PERM_STEP_2 do { \ |
|
PERM_ELT(A08, A07, B0, BD, B9, B6, C8, M0); \ |
|
PERM_ELT(A09, A08, B1, BE, BA, B7, C7, M1); \ |
|
PERM_ELT(A0A, A09, B2, BF, BB, B8, C6, M2); \ |
|
PERM_ELT(A0B, A0A, B3, B0, BC, B9, C5, M3); \ |
|
PERM_ELT(A00, A0B, B4, B1, BD, BA, C4, M4); \ |
|
PERM_ELT(A01, A00, B5, B2, BE, BB, C3, M5); \ |
|
PERM_ELT(A02, A01, B6, B3, BF, BC, C2, M6); \ |
|
PERM_ELT(A03, A02, B7, B4, B0, BD, C1, M7); \ |
|
PERM_ELT(A04, A03, B8, B5, B1, BE, C0, M8); \ |
|
PERM_ELT(A05, A04, B9, B6, B2, BF, CF, M9); \ |
|
PERM_ELT(A06, A05, BA, B7, B3, B0, CE, MA); \ |
|
PERM_ELT(A07, A06, BB, B8, B4, B1, CD, MB); \ |
|
PERM_ELT(A08, A07, BC, B9, B5, B2, CC, MC); \ |
|
PERM_ELT(A09, A08, BD, BA, B6, B3, CB, MD); \ |
|
PERM_ELT(A0A, A09, BE, BB, B7, B4, CA, ME); \ |
|
PERM_ELT(A0B, A0A, BF, BC, B8, B5, C9, MF); \ |
|
} while (0) |
|
|
|
#define APPLY_P do { \ |
|
B0 = T32(B0 << 17) | (B0 >> 15); \ |
|
B1 = T32(B1 << 17) | (B1 >> 15); \ |
|
B2 = T32(B2 << 17) | (B2 >> 15); \ |
|
B3 = T32(B3 << 17) | (B3 >> 15); \ |
|
B4 = T32(B4 << 17) | (B4 >> 15); \ |
|
B5 = T32(B5 << 17) | (B5 >> 15); \ |
|
B6 = T32(B6 << 17) | (B6 >> 15); \ |
|
B7 = T32(B7 << 17) | (B7 >> 15); \ |
|
B8 = T32(B8 << 17) | (B8 >> 15); \ |
|
B9 = T32(B9 << 17) | (B9 >> 15); \ |
|
BA = T32(BA << 17) | (BA >> 15); \ |
|
BB = T32(BB << 17) | (BB >> 15); \ |
|
BC = T32(BC << 17) | (BC >> 15); \ |
|
BD = T32(BD << 17) | (BD >> 15); \ |
|
BE = T32(BE << 17) | (BE >> 15); \ |
|
BF = T32(BF << 17) | (BF >> 15); \ |
|
PERM_STEP_0; \ |
|
PERM_STEP_1; \ |
|
PERM_STEP_2; \ |
|
A0B = T32(A0B + C6); \ |
|
A0A = T32(A0A + C5); \ |
|
A09 = T32(A09 + C4); \ |
|
A08 = T32(A08 + C3); \ |
|
A07 = T32(A07 + C2); \ |
|
A06 = T32(A06 + C1); \ |
|
A05 = T32(A05 + C0); \ |
|
A04 = T32(A04 + CF); \ |
|
A03 = T32(A03 + CE); \ |
|
A02 = T32(A02 + CD); \ |
|
A01 = T32(A01 + CC); \ |
|
A00 = T32(A00 + CB); \ |
|
A0B = T32(A0B + CA); \ |
|
A0A = T32(A0A + C9); \ |
|
A09 = T32(A09 + C8); \ |
|
A08 = T32(A08 + C7); \ |
|
A07 = T32(A07 + C6); \ |
|
A06 = T32(A06 + C5); \ |
|
A05 = T32(A05 + C4); \ |
|
A04 = T32(A04 + C3); \ |
|
A03 = T32(A03 + C2); \ |
|
A02 = T32(A02 + C1); \ |
|
A01 = T32(A01 + C0); \ |
|
A00 = T32(A00 + CF); \ |
|
A0B = T32(A0B + CE); \ |
|
A0A = T32(A0A + CD); \ |
|
A09 = T32(A09 + CC); \ |
|
A08 = T32(A08 + CB); \ |
|
A07 = T32(A07 + CA); \ |
|
A06 = T32(A06 + C9); \ |
|
A05 = T32(A05 + C8); \ |
|
A04 = T32(A04 + C7); \ |
|
A03 = T32(A03 + C6); \ |
|
A02 = T32(A02 + C5); \ |
|
A01 = T32(A01 + C4); \ |
|
A00 = T32(A00 + C3); \ |
|
} while (0) |
|
|
|
#define INCR_W do { \ |
|
if ((Wlow = T32(Wlow + 1)) == 0) \ |
|
Whigh = T32(Whigh + 1); \ |
|
} while (0) |
|
|
|
|
|
#if 0 /* other hash sizes init */ |
|
|
|
static const uint32_t A_init_192[] = { |
|
C32(0xFD749ED4), C32(0xB798E530), C32(0x33904B6F), C32(0x46BDA85E), |
|
C32(0x076934B4), C32(0x454B4058), C32(0x77F74527), C32(0xFB4CF465), |
|
C32(0x62931DA9), C32(0xE778C8DB), C32(0x22B3998E), C32(0xAC15CFB9) |
|
}; |
|
|
|
static const uint32_t B_init_192[] = { |
|
C32(0x58BCBAC4), C32(0xEC47A08E), C32(0xAEE933B2), C32(0xDFCBC824), |
|
C32(0xA7944804), C32(0xBF65BDB0), C32(0x5A9D4502), C32(0x59979AF7), |
|
C32(0xC5CEA54E), C32(0x4B6B8150), C32(0x16E71909), C32(0x7D632319), |
|
C32(0x930573A0), C32(0xF34C63D1), C32(0xCAF914B4), C32(0xFDD6612C) |
|
}; |
|
|
|
static const uint32_t C_init_192[] = { |
|
C32(0x61550878), C32(0x89EF2B75), C32(0xA1660C46), C32(0x7EF3855B), |
|
C32(0x7297B58C), C32(0x1BC67793), C32(0x7FB1C723), C32(0xB66FC640), |
|
C32(0x1A48B71C), C32(0xF0976D17), C32(0x088CE80A), C32(0xA454EDF3), |
|
C32(0x1C096BF4), C32(0xAC76224B), C32(0x5215781C), C32(0xCD5D2669) |
|
}; |
|
|
|
static const uint32_t A_init_224[] = { |
|
C32(0xA5201467), C32(0xA9B8D94A), C32(0xD4CED997), C32(0x68379D7B), |
|
C32(0xA7FC73BA), C32(0xF1A2546B), C32(0x606782BF), C32(0xE0BCFD0F), |
|
C32(0x2F25374E), C32(0x069A149F), C32(0x5E2DFF25), C32(0xFAECF061) |
|
}; |
|
|
|
static const uint32_t B_init_224[] = { |
|
C32(0xEC9905D8), C32(0xF21850CF), C32(0xC0A746C8), C32(0x21DAD498), |
|
C32(0x35156EEB), C32(0x088C97F2), C32(0x26303E40), C32(0x8A2D4FB5), |
|
C32(0xFEEE44B6), C32(0x8A1E9573), C32(0x7B81111A), C32(0xCBC139F0), |
|
C32(0xA3513861), C32(0x1D2C362E), C32(0x918C580E), C32(0xB58E1B9C) |
|
}; |
|
|
|
static const uint32_t C_init_224[] = { |
|
C32(0xE4B573A1), C32(0x4C1A0880), C32(0x1E907C51), C32(0x04807EFD), |
|
C32(0x3AD8CDE5), C32(0x16B21302), C32(0x02512C53), C32(0x2204CB18), |
|
C32(0x99405F2D), C32(0xE5B648A1), C32(0x70AB1D43), C32(0xA10C25C2), |
|
C32(0x16F1AC05), C32(0x38BBEB56), C32(0x9B01DC60), C32(0xB1096D83) |
|
}; |
|
|
|
static const uint32_t A_init_256[] = { |
|
C32(0x52F84552), C32(0xE54B7999), C32(0x2D8EE3EC), C32(0xB9645191), |
|
C32(0xE0078B86), C32(0xBB7C44C9), C32(0xD2B5C1CA), C32(0xB0D2EB8C), |
|
C32(0x14CE5A45), C32(0x22AF50DC), C32(0xEFFDBC6B), C32(0xEB21B74A) |
|
}; |
|
|
|
static const uint32_t B_init_256[] = { |
|
C32(0xB555C6EE), C32(0x3E710596), C32(0xA72A652F), C32(0x9301515F), |
|
C32(0xDA28C1FA), C32(0x696FD868), C32(0x9CB6BF72), C32(0x0AFE4002), |
|
C32(0xA6E03615), C32(0x5138C1D4), C32(0xBE216306), C32(0xB38B8890), |
|
C32(0x3EA8B96B), C32(0x3299ACE4), C32(0x30924DD4), C32(0x55CB34A5) |
|
}; |
|
|
|
static const uint32_t C_init_256[] = { |
|
C32(0xB405F031), C32(0xC4233EBA), C32(0xB3733979), C32(0xC0DD9D55), |
|
C32(0xC51C28AE), C32(0xA327B8E1), C32(0x56C56167), C32(0xED614433), |
|
C32(0x88B59D60), C32(0x60E2CEBA), C32(0x758B4B8B), C32(0x83E82A7F), |
|
C32(0xBC968828), C32(0xE6E00BF7), C32(0xBA839E55), C32(0x9B491C60) |
|
}; |
|
|
|
static const uint32_t A_init_384[] = { |
|
C32(0xC8FCA331), C32(0xE55C504E), C32(0x003EBF26), C32(0xBB6B8D83), |
|
C32(0x7B0448C1), C32(0x41B82789), C32(0x0A7C9601), C32(0x8D659CFF), |
|
C32(0xB6E2673E), C32(0xCA54C77B), C32(0x1460FD7E), C32(0x3FCB8F2D) |
|
}; |
|
|
|
static const uint32_t B_init_384[] = { |
|
C32(0x527291FC), C32(0x2A16455F), C32(0x78E627E5), C32(0x944F169F), |
|
C32(0x1CA6F016), C32(0xA854EA25), C32(0x8DB98ABE), C32(0xF2C62641), |
|
C32(0x30117DCB), C32(0xCF5C4309), C32(0x93711A25), C32(0xF9F671B8), |
|
C32(0xB01D2116), C32(0x333F4B89), C32(0xB285D165), C32(0x86829B36) |
|
}; |
|
|
|
static const uint32_t C_init_384[] = { |
|
C32(0xF764B11A), C32(0x76172146), C32(0xCEF6934D), C32(0xC6D28399), |
|
C32(0xFE095F61), C32(0x5E6018B4), C32(0x5048ECF5), C32(0x51353261), |
|
C32(0x6E6E36DC), C32(0x63130DAD), C32(0xA9C69BD6), C32(0x1E90EA0C), |
|
C32(0x7C35073B), C32(0x28D95E6D), C32(0xAA340E0D), C32(0xCB3DEE70) |
|
}; |
|
#endif |
|
|
|
__device__ |
|
static const uint32_t d_A512[] = { |
|
C32(0x20728DFD), C32(0x46C0BD53), C32(0xE782B699), C32(0x55304632), |
|
C32(0x71B4EF90), C32(0x0EA9E82C), C32(0xDBB930F1), C32(0xFAD06B8B), |
|
C32(0xBE0CAE40), C32(0x8BD14410), C32(0x76D2ADAC), C32(0x28ACAB7F) |
|
}; |
|
|
|
__device__ |
|
static const uint32_t d_B512[] = { |
|
C32(0xC1099CB7), C32(0x07B385F3), C32(0xE7442C26), C32(0xCC8AD640), |
|
C32(0xEB6F56C7), C32(0x1EA81AA9), C32(0x73B9D314), C32(0x1DE85D08), |
|
C32(0x48910A5A), C32(0x893B22DB), C32(0xC5A0DF44), C32(0xBBC4324E), |
|
C32(0x72D2F240), C32(0x75941D99), C32(0x6D8BDE82), C32(0xA1A7502B) |
|
}; |
|
|
|
__device__ |
|
static const uint32_t d_C512[] = { |
|
C32(0xD9BF68D1), C32(0x58BAD750), C32(0x56028CB2), C32(0x8134F359), |
|
C32(0xB5D469D8), C32(0x941A8CC2), C32(0x418B2A6E), C32(0x04052780), |
|
C32(0x7F07D787), C32(0x5194358F), C32(0x3C60D665), C32(0xBE97D79A), |
|
C32(0x950C3434), C32(0xAED9A06D), C32(0x2537DC8D), C32(0x7CDB5969) |
|
}; |
|
|
|
/***************************************************/ |
|
// GPU Hash Function |
|
__global__ void x14_shabal512_gpu_hash_64(int threads, uint32_t startNounce, uint64_t *g_hash, uint32_t *g_nonceVector) |
|
{ |
|
__syncthreads(); |
|
|
|
int thread = (blockDim.x * blockIdx.x + threadIdx.x); |
|
|
|
if (thread < threads) |
|
{ |
|
uint32_t nounce = (g_nonceVector != NULL) ? g_nonceVector[thread] : (startNounce + thread); |
|
int hashPosition = nounce - startNounce; |
|
uint32_t *Hash = (uint32_t*)&g_hash[hashPosition<<3]; // [hashPosition * 8] |
|
|
|
uint32_t A00 = d_A512[0], A01 = d_A512[1], A02 = d_A512[2], A03 = d_A512[3], |
|
A04 = d_A512[4], A05 = d_A512[5], A06 = d_A512[6], A07 = d_A512[7], |
|
A08 = d_A512[8], A09 = d_A512[9], A0A = d_A512[10], A0B = d_A512[11]; |
|
uint32_t B0 = d_B512[0], B1 = d_B512[1], B2 = d_B512[2], B3 = d_B512[3], |
|
B4 = d_B512[4], B5 = d_B512[5], B6 = d_B512[6], B7 = d_B512[7], |
|
B8 = d_B512[8], B9 = d_B512[9], BA = d_B512[10], BB = d_B512[11], |
|
BC = d_B512[12], BD = d_B512[13], BE = d_B512[14], BF = d_B512[15]; |
|
uint32_t C0 = d_C512[0], C1 = d_C512[1], C2 = d_C512[2], C3 = d_C512[3], |
|
C4 = d_C512[4], C5 = d_C512[5], C6 = d_C512[6], C7 = d_C512[7], |
|
C8 = d_C512[8], C9 = d_C512[9], CA = d_C512[10], CB = d_C512[11], |
|
CC = d_C512[12], CD = d_C512[13], CE = d_C512[14], CF = d_C512[15]; |
|
uint32_t M0, M1, M2, M3, M4, M5, M6, M7, M8, M9, MA, MB, MC, MD, ME, MF; |
|
uint32_t Wlow = 1, Whigh = 0; |
|
|
|
M0 = Hash[0]; |
|
M1 = Hash[1]; |
|
M2 = Hash[2]; |
|
M3 = Hash[3]; |
|
M4 = Hash[4]; |
|
M5 = Hash[5]; |
|
M6 = Hash[6]; |
|
M7 = Hash[7]; |
|
|
|
M8 = Hash[8]; |
|
M9 = Hash[9]; |
|
MA = Hash[10]; |
|
MB = Hash[11]; |
|
MC = Hash[12]; |
|
MD = Hash[13]; |
|
ME = Hash[14]; |
|
MF = Hash[15]; |
|
|
|
INPUT_BLOCK_ADD; |
|
XOR_W; |
|
APPLY_P; |
|
INPUT_BLOCK_SUB; |
|
SWAP_BC; |
|
INCR_W; |
|
|
|
M0 = 0x80; |
|
M1 = M2 = M3 = M4 = M5 = M6 = M7 = M8 = M9 = MA = MB = MC = MD = ME = MF = 0; |
|
|
|
INPUT_BLOCK_ADD; |
|
XOR_W; |
|
APPLY_P; |
|
|
|
for (uint8_t i = 0; i < 3; i ++) |
|
{ |
|
SWAP_BC; |
|
XOR_W; |
|
APPLY_P; |
|
} |
|
|
|
Hash[0] = B0; |
|
Hash[1] = B1; |
|
Hash[2] = B2; |
|
Hash[3] = B3; |
|
Hash[4] = B4; |
|
Hash[5] = B5; |
|
Hash[6] = B6; |
|
Hash[7] = B7; |
|
|
|
Hash[8] = B8; |
|
Hash[9] = B9; |
|
Hash[10] = BA; |
|
Hash[11] = BB; |
|
Hash[12] = BC; |
|
Hash[13] = BD; |
|
Hash[14] = BE; |
|
Hash[15] = BF; |
|
|
|
//result = (Hash[3] <= target); |
|
|
|
uint32_t *outpHash = (uint32_t*)&g_hash[hashPosition << 3]; // [8 * hashPosition]; |
|
|
|
for (int i = 0; i < 16; i++) |
|
outpHash[i] = Hash[i]; |
|
} |
|
} |
|
|
|
__host__ void x14_shabal512_cpu_init(int thr_id, int threads) |
|
{ |
|
} |
|
#include <stdio.h> |
|
__host__ void x14_shabal512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order) |
|
{ |
|
const int threadsperblock = 192; |
|
|
|
// berechne wie viele Thread Blocks wir brauchen |
|
dim3 grid((threads + threadsperblock-1)/threadsperblock); |
|
dim3 block(threadsperblock); |
|
|
|
size_t shared_size = 0; |
|
|
|
// fprintf(stderr, "threads=%d, %d blocks, %d threads per block, %d bytes shared\n", threads, grid.x, block.x, shared_size); |
|
|
|
x14_shabal512_gpu_hash_64<<<grid, block, shared_size>>>(threads, startNounce, (uint64_t*)d_hash, d_nonceVector); |
|
MyStreamSynchronize(NULL, order, thr_id); |
|
}
|
|
|