diff --git a/src/cn_utils/crypto/rx-slow-hash.c b/src/cn_utils/crypto/rx-slow-hash.c index 0d7985db9..2854c8e78 100644 --- a/src/cn_utils/crypto/rx-slow-hash.c +++ b/src/cn_utils/crypto/rx-slow-hash.c @@ -34,6 +34,7 @@ #include #include #include +#include #include "randomx.h" #include "c_threads.h" @@ -50,7 +51,7 @@ typedef struct rx_state { CTHR_MUTEX_TYPE rs_mutex; - char rs_hash[32]; + char rs_hash[HASH_SIZE]; uint64_t rs_height; randomx_cache *rs_cache; } rx_state; @@ -63,7 +64,6 @@ static rx_state rx_s[2] = {{CTHR_MUTEX_INIT,{0},0,0},{CTHR_MUTEX_INIT,{0},0,0}}; static randomx_dataset *rx_dataset; static uint64_t rx_dataset_height; static THREADV randomx_vm *rx_vm = NULL; -static THREADV int rx_toggle; static void local_abort(const char *msg) { @@ -75,84 +75,41 @@ static void local_abort(const char *msg) #endif } -/** - * @brief uses cpuid to determine if the CPU supports the AES instructions - * @return true if the CPU supports AES, false otherwise - */ +static inline int disabled_flags(void) { + static int flags = -1; -static inline int force_software_aes(void) -{ - static int use = -1; - - if (use != -1) - return use; + if (flags != -1) { + return flags; + } - const char *env = getenv("MONERO_USE_SOFTWARE_AES"); + const char *env = getenv("MONERO_RANDOMX_UMASK"); if (!env) { - use = 0; - } - else if (!strcmp(env, "0") || !strcmp(env, "no")) { - use = 0; + flags = 0; } else { - use = 1; + char* endptr; + long int value = strtol(env, &endptr, 0); + if (endptr != env && value >= 0 && value < INT_MAX) { + flags = value; + } + else { + flags = 0; + } } - return use; -} -static void cpuid(int CPUInfo[4], int InfoType) -{ -#if defined(__x86_64__) - __asm __volatile__ - ( - "cpuid": - "=a" (CPUInfo[0]), - "=b" (CPUInfo[1]), - "=c" (CPUInfo[2]), - "=d" (CPUInfo[3]) : - "a" (InfoType), "c" (0) - ); -#endif -} -static inline int check_aes_hw(void) -{ -#if defined(__x86_64__) - int cpuid_results[4]; - static int supported = -1; - - if(supported >= 0) - return supported; - - cpuid(cpuid_results,1); - return supported = cpuid_results[2] & (1 << 25); -#else - return 0; -#endif + return flags; } -static volatile int use_rx_jit_flag = -1; +static inline int enabled_flags(void) { + static int flags = -1; -static inline int use_rx_jit(void) -{ -#if defined(__x86_64__) + if (flags != -1) { + return flags; + } - if (use_rx_jit_flag != -1) - return use_rx_jit_flag; + flags = randomx_get_flags(); - const char *env = getenv("MONERO_USE_RX_JIT"); - if (!env) { - use_rx_jit_flag = 1; - } - else if (!strcmp(env, "0") || !strcmp(env, "no")) { - use_rx_jit_flag = 0; - } - else { - use_rx_jit_flag = 1; - } - return use_rx_jit_flag; -#else - return 0; -#endif + return flags; } #define SEEDHASH_EPOCH_BLOCKS 2048 /* Must be same as BLOCKS_SYNCHRONIZING_MAX_COUNT in cryptonote_config.h */ @@ -162,8 +119,11 @@ void rx_reorg(const uint64_t split_height) { int i; CTHR_MUTEX_LOCK(rx_mutex); for (i=0; i<2; i++) { - if (split_height < rx_s[i].rs_height) + if (split_height <= rx_s[i].rs_height) { + if (rx_s[i].rs_height == rx_dataset_height) + rx_dataset_height = 1; rx_s[i].rs_height = 1; /* set to an invalid seed height */ + } } CTHR_MUTEX_UNLOCK(rx_mutex); } @@ -233,27 +193,27 @@ static void rx_initdata(randomx_cache *rs_cache, const int miners, const uint64_ void rx_slow_hash(const uint64_t mainheight, const uint64_t seedheight, const char *seedhash, const void *data, size_t length, char *hash, int miners, int is_alt) { uint64_t s_height = rx_seedheight(mainheight); - int changed = 0; - int toggle = is_alt ? s_height : seedheight; - randomx_flags flags = RANDOMX_FLAG_DEFAULT; + int toggle = (s_height & SEEDHASH_EPOCH_BLOCKS) != 0; + randomx_flags flags = enabled_flags() & ~disabled_flags(); rx_state *rx_sp; randomx_cache *cache; - toggle = (toggle & SEEDHASH_EPOCH_BLOCKS) != 0; CTHR_MUTEX_LOCK(rx_mutex); /* if alt block but with same seed as mainchain, no need for alt cache */ - if (is_alt && s_height == seedheight && !memcmp(rx_s[toggle].rs_hash, seedhash, sizeof(rx_s[toggle].rs_hash))) - is_alt = 0; - + if (is_alt) { + if (s_height == seedheight && !memcmp(rx_s[toggle].rs_hash, seedhash, HASH_SIZE)) + is_alt = 0; + } else { /* RPC could request an earlier block on mainchain */ - if (!is_alt && s_height > seedheight) - is_alt = 1; + if (s_height > seedheight) + is_alt = 1; + /* miner can be ahead of mainchain */ + else if (s_height < seedheight) + toggle ^= 1; + } toggle ^= (is_alt != 0); - if (toggle != rx_toggle) - changed = 1; - rx_toggle = toggle; rx_sp = &rx_s[toggle]; CTHR_MUTEX_LOCK(rx_sp->rs_mutex); @@ -261,40 +221,35 @@ void rx_slow_hash(const uint64_t mainheight, const uint64_t seedheight, const ch cache = rx_sp->rs_cache; if (cache == NULL) { - if (use_rx_jit()) - flags |= RANDOMX_FLAG_JIT; if (cache == NULL) { cache = randomx_alloc_cache(flags | RANDOMX_FLAG_LARGE_PAGES); if (cache == NULL) { - printf("Couldn't use largePages for RandomX cache\n"); + //printf("Couldn't use largePages for RandomX cache\n"); cache = randomx_alloc_cache(flags); } if (cache == NULL) - local_abort("Couldn't allocate RandomX cache\n"); + local_abort("Couldn't allocate RandomX cache"); } } - if (rx_sp->rs_height != seedheight || rx_sp->rs_cache == NULL || memcmp(seedhash, rx_sp->rs_hash, sizeof(rx_sp->rs_hash))) { - randomx_init_cache(cache, seedhash, 32); + if (rx_sp->rs_height != seedheight || rx_sp->rs_cache == NULL || memcmp(seedhash, rx_sp->rs_hash, HASH_SIZE)) { + randomx_init_cache(cache, seedhash, HASH_SIZE); rx_sp->rs_cache = cache; rx_sp->rs_height = seedheight; - memcpy(rx_sp->rs_hash, seedhash, sizeof(rx_sp->rs_hash)); - changed = 1; + memcpy(rx_sp->rs_hash, seedhash, HASH_SIZE); } if (rx_vm == NULL) { - randomx_flags flags = RANDOMX_FLAG_DEFAULT; - if (use_rx_jit()) { - flags |= RANDOMX_FLAG_JIT; - if (!miners) - flags |= RANDOMX_FLAG_SECURE; + if ((flags & RANDOMX_FLAG_JIT) && !miners) { + flags |= RANDOMX_FLAG_SECURE & ~disabled_flags(); + } + if (miners && (disabled_flags() & RANDOMX_FLAG_FULL_MEM)) { + miners = 0; } - if(!force_software_aes() && check_aes_hw()) - flags |= RANDOMX_FLAG_HARD_AES; if (miners) { CTHR_MUTEX_LOCK(rx_dataset_mutex); if (rx_dataset == NULL) { rx_dataset = randomx_alloc_dataset(RANDOMX_FLAG_LARGE_PAGES); if (rx_dataset == NULL) { - printf("Couldn't use largePages for RandomX dataset\n"); + //printf("Couldn't use largePages for RandomX dataset\n"); rx_dataset = randomx_alloc_dataset(RANDOMX_FLAG_DEFAULT); } if (rx_dataset != NULL) @@ -304,13 +259,13 @@ void rx_slow_hash(const uint64_t mainheight, const uint64_t seedheight, const ch flags |= RANDOMX_FLAG_FULL_MEM; else { miners = 0; - printf("Couldn't allocate RandomX dataset for miner\n"); + //printf("Couldn't allocate RandomX dataset for miner\n"); } CTHR_MUTEX_UNLOCK(rx_dataset_mutex); } rx_vm = randomx_create_vm(flags | RANDOMX_FLAG_LARGE_PAGES, rx_sp->rs_cache, rx_dataset); if(rx_vm == NULL) { //large pages failed - printf("Couldn't use largePages for RandomX VM\n"); + //printf("Couldn't use largePages for RandomX VM\n"); rx_vm = randomx_create_vm(flags, rx_sp->rs_cache, rx_dataset); } if(rx_vm == NULL) {//fallback if everything fails @@ -324,7 +279,8 @@ void rx_slow_hash(const uint64_t mainheight, const uint64_t seedheight, const ch if (rx_dataset != NULL && rx_dataset_height != seedheight) rx_initdata(cache, miners, seedheight); CTHR_MUTEX_UNLOCK(rx_dataset_mutex); - } else if (changed) { + } else { + /* this is a no-op if the cache hasn't changed */ randomx_vm_set_cache(rx_vm, rx_sp->rs_cache); } /* mainchain users can run in parallel */ diff --git a/src/cn_utils/randomx/aes_hash.cpp b/src/cn_utils/randomx/aes_hash.cpp index a3b7395bc..1aff37fbc 100644 --- a/src/cn_utils/randomx/aes_hash.cpp +++ b/src/cn_utils/randomx/aes_hash.cpp @@ -239,84 +239,3 @@ void fillAes4Rx4(void *state, size_t outputSize, void *buffer) { template void fillAes4Rx4(void *state, size_t outputSize, void *buffer); template void fillAes4Rx4(void *state, size_t outputSize, void *buffer); - -template -void hashAndFillAes1Rx4(void *scratchpad, size_t scratchpadSize, void *hash, void* fill_state) { - uint8_t* scratchpadPtr = (uint8_t*)scratchpad; - const uint8_t* scratchpadEnd = scratchpadPtr + scratchpadSize; - - // initial state - rx_vec_i128 hash_state0 = rx_set_int_vec_i128(AES_HASH_1R_STATE0); - rx_vec_i128 hash_state1 = rx_set_int_vec_i128(AES_HASH_1R_STATE1); - rx_vec_i128 hash_state2 = rx_set_int_vec_i128(AES_HASH_1R_STATE2); - rx_vec_i128 hash_state3 = rx_set_int_vec_i128(AES_HASH_1R_STATE3); - - const rx_vec_i128 key0 = rx_set_int_vec_i128(AES_GEN_1R_KEY0); - const rx_vec_i128 key1 = rx_set_int_vec_i128(AES_GEN_1R_KEY1); - const rx_vec_i128 key2 = rx_set_int_vec_i128(AES_GEN_1R_KEY2); - const rx_vec_i128 key3 = rx_set_int_vec_i128(AES_GEN_1R_KEY3); - - rx_vec_i128 fill_state0 = rx_load_vec_i128((rx_vec_i128*)fill_state + 0); - rx_vec_i128 fill_state1 = rx_load_vec_i128((rx_vec_i128*)fill_state + 1); - rx_vec_i128 fill_state2 = rx_load_vec_i128((rx_vec_i128*)fill_state + 2); - rx_vec_i128 fill_state3 = rx_load_vec_i128((rx_vec_i128*)fill_state + 3); - - constexpr int PREFETCH_DISTANCE = 4096; - const char* prefetchPtr = ((const char*)scratchpad) + PREFETCH_DISTANCE; - scratchpadEnd -= PREFETCH_DISTANCE; - - for (int i = 0; i < 2; ++i) { - //process 64 bytes at a time in 4 lanes - while (scratchpadPtr < scratchpadEnd) { - hash_state0 = aesenc(hash_state0, rx_load_vec_i128((rx_vec_i128*)scratchpadPtr + 0)); - hash_state1 = aesdec(hash_state1, rx_load_vec_i128((rx_vec_i128*)scratchpadPtr + 1)); - hash_state2 = aesenc(hash_state2, rx_load_vec_i128((rx_vec_i128*)scratchpadPtr + 2)); - hash_state3 = aesdec(hash_state3, rx_load_vec_i128((rx_vec_i128*)scratchpadPtr + 3)); - - fill_state0 = aesdec(fill_state0, key0); - fill_state1 = aesenc(fill_state1, key1); - fill_state2 = aesdec(fill_state2, key2); - fill_state3 = aesenc(fill_state3, key3); - - rx_store_vec_i128((rx_vec_i128*)scratchpadPtr + 0, fill_state0); - rx_store_vec_i128((rx_vec_i128*)scratchpadPtr + 1, fill_state1); - rx_store_vec_i128((rx_vec_i128*)scratchpadPtr + 2, fill_state2); - rx_store_vec_i128((rx_vec_i128*)scratchpadPtr + 3, fill_state3); - - rx_prefetch_t0(prefetchPtr); - - scratchpadPtr += 64; - prefetchPtr += 64; - } - prefetchPtr = (const char*) scratchpad; - scratchpadEnd += PREFETCH_DISTANCE; - } - - rx_store_vec_i128((rx_vec_i128*)fill_state + 0, fill_state0); - rx_store_vec_i128((rx_vec_i128*)fill_state + 1, fill_state1); - rx_store_vec_i128((rx_vec_i128*)fill_state + 2, fill_state2); - rx_store_vec_i128((rx_vec_i128*)fill_state + 3, fill_state3); - - //two extra rounds to achieve full diffusion - rx_vec_i128 xkey0 = rx_set_int_vec_i128(AES_HASH_1R_XKEY0); - rx_vec_i128 xkey1 = rx_set_int_vec_i128(AES_HASH_1R_XKEY1); - - hash_state0 = aesenc(hash_state0, xkey0); - hash_state1 = aesdec(hash_state1, xkey0); - hash_state2 = aesenc(hash_state2, xkey0); - hash_state3 = aesdec(hash_state3, xkey0); - - hash_state0 = aesenc(hash_state0, xkey1); - hash_state1 = aesdec(hash_state1, xkey1); - hash_state2 = aesenc(hash_state2, xkey1); - hash_state3 = aesdec(hash_state3, xkey1); - - //output hash - rx_store_vec_i128((rx_vec_i128*)hash + 0, hash_state0); - rx_store_vec_i128((rx_vec_i128*)hash + 1, hash_state1); - rx_store_vec_i128((rx_vec_i128*)hash + 2, hash_state2); - rx_store_vec_i128((rx_vec_i128*)hash + 3, hash_state3); -} - -template void hashAndFillAes1Rx4(void *scratchpad, size_t scratchpadSize, void *hash, void* fill_state); -template void hashAndFillAes1Rx4(void *scratchpad, size_t scratchpadSize, void *hash, void* fill_state); diff --git a/src/cn_utils/randomx/aes_hash.hpp b/src/cn_utils/randomx/aes_hash.hpp index 9f75f73ae..b4d0e9405 100644 --- a/src/cn_utils/randomx/aes_hash.hpp +++ b/src/cn_utils/randomx/aes_hash.hpp @@ -38,6 +38,3 @@ void fillAes1Rx4(void *state, size_t outputSize, void *buffer); template void fillAes4Rx4(void *state, size_t outputSize, void *buffer); - -template -void hashAndFillAes1Rx4(void *scratchpad, size_t scratchpadSize, void *hash, void* fill_state); diff --git a/src/cn_utils/randomx/allocator.cpp b/src/cn_utils/randomx/allocator.cpp index 4c6d86e05..2ddbed98b 100644 --- a/src/cn_utils/randomx/allocator.cpp +++ b/src/cn_utils/randomx/allocator.cpp @@ -47,7 +47,7 @@ namespace randomx { rx_aligned_free(ptr); } - template struct AlignedAllocator; + template class AlignedAllocator; void* LargePageAllocator::allocMemory(size_t count) { return allocLargePagesMemory(count); diff --git a/src/cn_utils/randomx/dataset.hpp b/src/cn_utils/randomx/dataset.hpp index d01911f9a..083a5d751 100644 --- a/src/cn_utils/randomx/dataset.hpp +++ b/src/cn_utils/randomx/dataset.hpp @@ -61,17 +61,8 @@ struct randomx_cache { //A pointer to a standard-layout struct object points to its initial member static_assert(std::is_standard_layout(), "randomx_dataset must be a standard-layout struct"); - //the following assert fails when compiling Debug in Visual Studio (JIT mode will crash in Debug) -#if defined(_MSC_VER) && !defined(__INTEL_COMPILER) && defined(_DEBUG) -#define TO_STR(x) #x -#define STR(x) TO_STR(x) -#pragma message ( __FILE__ "(" STR(__LINE__) ") warning: check std::is_standard_layout() is disabled for Debug configuration. JIT mode will crash." ) -#undef STR -#undef TO_STR -#else static_assert(std::is_standard_layout(), "randomx_cache must be a standard-layout struct"); -#endif namespace randomx { diff --git a/src/cn_utils/randomx/intrin_portable.h b/src/cn_utils/randomx/intrin_portable.h index c9d4475ae..b5ad91a84 100644 --- a/src/cn_utils/randomx/intrin_portable.h +++ b/src/cn_utils/randomx/intrin_portable.h @@ -102,7 +102,6 @@ typedef __m128d rx_vec_f128; #define rx_aligned_alloc(a, b) _mm_malloc(a,b) #define rx_aligned_free(a) _mm_free(a) #define rx_prefetch_nta(x) _mm_prefetch((const char *)(x), _MM_HINT_NTA) -#define rx_prefetch_t0(x) _mm_prefetch((const char *)(x), _MM_HINT_T0) #define rx_load_vec_f128 _mm_load_pd #define rx_store_vec_f128 _mm_store_pd @@ -202,7 +201,6 @@ typedef union{ #define rx_aligned_alloc(a, b) malloc(a) #define rx_aligned_free(a) free(a) #define rx_prefetch_nta(x) -#define rx_prefetch_t0(x) /* Splat 64-bit long long to 2 64-bit long longs */ FORCE_INLINE __m128i vec_splat2sd (int64_t scalar) @@ -401,10 +399,6 @@ inline void rx_prefetch_nta(void* ptr) { asm volatile ("prfm pldl1strm, [%0]\n" : : "r" (ptr)); } -inline void rx_prefetch_t0(const void* ptr) { - asm volatile ("prfm pldl1strm, [%0]\n" : : "r" (ptr)); -} - FORCE_INLINE rx_vec_f128 rx_load_vec_f128(const double* pd) { return vld1q_f64((const float64_t*)pd); } @@ -538,7 +532,6 @@ typedef union { #define rx_aligned_alloc(a, b) malloc(a) #define rx_aligned_free(a) free(a) #define rx_prefetch_nta(x) -#define rx_prefetch_t0(x) FORCE_INLINE rx_vec_f128 rx_load_vec_f128(const double* pd) { rx_vec_f128 x; diff --git a/src/cn_utils/randomx/jit_compiler_a64.hpp b/src/cn_utils/randomx/jit_compiler_a64.hpp index a02824ffb..a4adb80f4 100644 --- a/src/cn_utils/randomx/jit_compiler_a64.hpp +++ b/src/cn_utils/randomx/jit_compiler_a64.hpp @@ -38,7 +38,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. namespace randomx { class Program; - struct ProgramConfiguration; + class ProgramConfiguration; class SuperscalarProgram; class Instruction; diff --git a/src/cn_utils/randomx/jit_compiler_a64_static.S b/src/cn_utils/randomx/jit_compiler_a64_static.S index 598eca21b..0a4d006f3 100644 --- a/src/cn_utils/randomx/jit_compiler_a64_static.S +++ b/src/cn_utils/randomx/jit_compiler_a64_static.S @@ -25,32 +25,26 @@ # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -#if defined(__APPLE__) -#define DECL(x) _##x -#else -#define DECL(x) x -#endif - .arch armv8-a .text - .global DECL(randomx_program_aarch64) - .global DECL(randomx_program_aarch64_main_loop) - .global DECL(randomx_program_aarch64_vm_instructions) - .global DECL(randomx_program_aarch64_imul_rcp_literals_end) - .global DECL(randomx_program_aarch64_vm_instructions_end) - .global DECL(randomx_program_aarch64_cacheline_align_mask1) - .global DECL(randomx_program_aarch64_cacheline_align_mask2) - .global DECL(randomx_program_aarch64_update_spMix1) - .global DECL(randomx_program_aarch64_vm_instructions_end_light) - .global DECL(randomx_program_aarch64_light_cacheline_align_mask) - .global DECL(randomx_program_aarch64_light_dataset_offset) - .global DECL(randomx_init_dataset_aarch64) - .global DECL(randomx_init_dataset_aarch64_end) - .global DECL(randomx_calc_dataset_item_aarch64) - .global DECL(randomx_calc_dataset_item_aarch64_prefetch) - .global DECL(randomx_calc_dataset_item_aarch64_mix) - .global DECL(randomx_calc_dataset_item_aarch64_store_result) - .global DECL(randomx_calc_dataset_item_aarch64_end) + .global randomx_program_aarch64 + .global randomx_program_aarch64_main_loop + .global randomx_program_aarch64_vm_instructions + .global randomx_program_aarch64_imul_rcp_literals_end + .global randomx_program_aarch64_vm_instructions_end + .global randomx_program_aarch64_cacheline_align_mask1 + .global randomx_program_aarch64_cacheline_align_mask2 + .global randomx_program_aarch64_update_spMix1 + .global randomx_program_aarch64_vm_instructions_end_light + .global randomx_program_aarch64_light_cacheline_align_mask + .global randomx_program_aarch64_light_dataset_offset + .global randomx_init_dataset_aarch64 + .global randomx_init_dataset_aarch64_end + .global randomx_calc_dataset_item_aarch64 + .global randomx_calc_dataset_item_aarch64_prefetch + .global randomx_calc_dataset_item_aarch64_mix + .global randomx_calc_dataset_item_aarch64_store_result + .global randomx_calc_dataset_item_aarch64_end #include "configuration.h" @@ -107,7 +101,7 @@ # v31 -> scale mask = 0x81f000000000000081f0000000000000 .balign 4 -DECL(randomx_program_aarch64): +randomx_program_aarch64: # Save callee-saved registers sub sp, sp, 192 stp x16, x17, [sp] @@ -195,7 +189,7 @@ DECL(randomx_program_aarch64): ldr q14, literal_v14 ldr q15, literal_v15 -DECL(randomx_program_aarch64_main_loop): +randomx_program_aarch64_main_loop: # spAddr0 = spMix1 & ScratchpadL3Mask64; # spAddr1 = (spMix1 >> 32) & ScratchpadL3Mask64; lsr x18, x10, 32 @@ -268,7 +262,7 @@ DECL(randomx_program_aarch64_main_loop): orr v23.16b, v23.16b, v30.16b # Execute VM instructions -DECL(randomx_program_aarch64_vm_instructions): +randomx_program_aarch64_vm_instructions: # buffer for generated instructions # FDIV_M is the largest instruction taking up to 12 ARMv8 instructions @@ -287,7 +281,7 @@ literal_x27: .fill 1,8,0 literal_x28: .fill 1,8,0 literal_x29: .fill 1,8,0 literal_x30: .fill 1,8,0 -DECL(randomx_program_aarch64_imul_rcp_literals_end): +randomx_program_aarch64_imul_rcp_literals_end: literal_v0: .fill 2,8,0 literal_v1: .fill 2,8,0 @@ -306,14 +300,14 @@ literal_v13: .fill 2,8,0 literal_v14: .fill 2,8,0 literal_v15: .fill 2,8,0 -DECL(randomx_program_aarch64_vm_instructions_end): +randomx_program_aarch64_vm_instructions_end: # mx ^= r[readReg2] ^ r[readReg3]; eor x9, x9, x18 # Calculate dataset pointer for dataset prefetch mov w18, w9 -DECL(randomx_program_aarch64_cacheline_align_mask1): +randomx_program_aarch64_cacheline_align_mask1: # Actual mask will be inserted by JIT compiler and x18, x18, 1 add x18, x18, x1 @@ -326,12 +320,12 @@ DECL(randomx_program_aarch64_cacheline_align_mask1): # Calculate dataset pointer for dataset read mov w10, w9 -DECL(randomx_program_aarch64_cacheline_align_mask2): +randomx_program_aarch64_cacheline_align_mask2: # Actual mask will be inserted by JIT compiler and x10, x10, 1 add x10, x10, x1 -DECL(randomx_program_aarch64_xor_with_dataset_line): +randomx_program_aarch64_xor_with_dataset_line: # xor integer registers with dataset data ldp x18, x19, [x10] eor x4, x4, x18 @@ -346,7 +340,7 @@ DECL(randomx_program_aarch64_xor_with_dataset_line): eor x14, x14, x18 eor x15, x15, x19 -DECL(randomx_program_aarch64_update_spMix1): +randomx_program_aarch64_update_spMix1: # JIT compiler will replace it with "eor x10, config.readReg0, config.readReg1" eor x10, x0, x0 @@ -367,7 +361,7 @@ DECL(randomx_program_aarch64_update_spMix1): stp q18, q19, [x16, 32] subs x3, x3, 1 - bne DECL(randomx_program_aarch64_main_loop) + bne randomx_program_aarch64_main_loop # Restore x0 ldr x0, [sp], 16 @@ -401,7 +395,7 @@ DECL(randomx_program_aarch64_update_spMix1): ret -DECL(randomx_program_aarch64_vm_instructions_end_light): +randomx_program_aarch64_vm_instructions_end_light: sub sp, sp, 96 stp x0, x1, [sp, 64] stp x2, x30, [sp, 80] @@ -418,26 +412,26 @@ DECL(randomx_program_aarch64_vm_instructions_end_light): # x1 -> pointer to output mov x1, sp -DECL(randomx_program_aarch64_light_cacheline_align_mask): +randomx_program_aarch64_light_cacheline_align_mask: # Actual mask will be inserted by JIT compiler and w2, w9, 1 # x2 -> item number lsr x2, x2, 6 -DECL(randomx_program_aarch64_light_dataset_offset): +randomx_program_aarch64_light_dataset_offset: # Apply dataset offset (filled in by JIT compiler) add x2, x2, 0 add x2, x2, 0 - bl DECL(randomx_calc_dataset_item_aarch64) + bl randomx_calc_dataset_item_aarch64 mov x10, sp ldp x0, x1, [sp, 64] ldp x2, x30, [sp, 80] add sp, sp, 96 - b DECL(randomx_program_aarch64_xor_with_dataset_line) + b randomx_program_aarch64_xor_with_dataset_line @@ -448,26 +442,26 @@ DECL(randomx_program_aarch64_light_dataset_offset): # x2 -> start item # x3 -> end item -DECL(randomx_init_dataset_aarch64): +randomx_init_dataset_aarch64: # Save x30 (return address) str x30, [sp, -16]! # Load pointer to cache memory ldr x0, [x0] -DECL(randomx_init_dataset_aarch64_main_loop): - bl DECL(randomx_calc_dataset_item_aarch64) +randomx_init_dataset_aarch64_main_loop: + bl randomx_calc_dataset_item_aarch64 add x1, x1, 64 add x2, x2, 1 cmp x2, x3 - bne DECL(randomx_init_dataset_aarch64_main_loop) + bne randomx_init_dataset_aarch64_main_loop # Restore x30 (return address) ldr x30, [sp], 16 ret -DECL(randomx_init_dataset_aarch64_end): +randomx_init_dataset_aarch64_end: # Input parameters # @@ -485,7 +479,7 @@ DECL(randomx_init_dataset_aarch64_end): # x12 -> temporary # x13 -> temporary -DECL(randomx_calc_dataset_item_aarch64): +randomx_calc_dataset_item_aarch64: sub sp, sp, 112 stp x0, x1, [sp] stp x2, x3, [sp, 16] @@ -532,7 +526,7 @@ DECL(randomx_calc_dataset_item_aarch64): ldr x12, superscalarAdd7 eor x7, x0, x12 - b DECL(randomx_calc_dataset_item_aarch64_prefetch) + b randomx_calc_dataset_item_aarch64_prefetch superscalarMul0: .quad 6364136223846793005 superscalarAdd1: .quad 9298411001130361340 @@ -545,7 +539,7 @@ superscalarAdd7: .quad 9549104520008361294 # Prefetch -> SuperScalar hash -> Mix will be repeated N times -DECL(randomx_calc_dataset_item_aarch64_prefetch): +randomx_calc_dataset_item_aarch64_prefetch: # Actual mask will be inserted by JIT compiler and x11, x10, 1 add x11, x8, x11, lsl 6 @@ -553,7 +547,7 @@ DECL(randomx_calc_dataset_item_aarch64_prefetch): # Generated SuperScalar hash program goes here -DECL(randomx_calc_dataset_item_aarch64_mix): +randomx_calc_dataset_item_aarch64_mix: ldp x12, x13, [x11] eor x0, x0, x12 eor x1, x1, x13 @@ -567,7 +561,7 @@ DECL(randomx_calc_dataset_item_aarch64_mix): eor x6, x6, x12 eor x7, x7, x13 -DECL(randomx_calc_dataset_item_aarch64_store_result): +randomx_calc_dataset_item_aarch64_store_result: stp x0, x1, [x9] stp x2, x3, [x9, 16] stp x4, x5, [x9, 32] @@ -584,4 +578,4 @@ DECL(randomx_calc_dataset_item_aarch64_store_result): ret -DECL(randomx_calc_dataset_item_aarch64_end): +randomx_calc_dataset_item_aarch64_end: diff --git a/src/cn_utils/randomx/jit_compiler_fallback.hpp b/src/cn_utils/randomx/jit_compiler_fallback.hpp index 57a6dbf99..56ccb8cd7 100644 --- a/src/cn_utils/randomx/jit_compiler_fallback.hpp +++ b/src/cn_utils/randomx/jit_compiler_fallback.hpp @@ -36,7 +36,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. namespace randomx { class Program; - struct ProgramConfiguration; + class ProgramConfiguration; class SuperscalarProgram; class JitCompilerFallback { diff --git a/src/cn_utils/randomx/jit_compiler_x86.hpp b/src/cn_utils/randomx/jit_compiler_x86.hpp index e95685f9c..7829fcad4 100644 --- a/src/cn_utils/randomx/jit_compiler_x86.hpp +++ b/src/cn_utils/randomx/jit_compiler_x86.hpp @@ -36,7 +36,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. namespace randomx { class Program; - struct ProgramConfiguration; + class ProgramConfiguration; class SuperscalarProgram; class JitCompilerX86; class Instruction; diff --git a/src/cn_utils/randomx/randomx.cpp b/src/cn_utils/randomx/randomx.cpp index 31289ff58..90fc46a75 100644 --- a/src/cn_utils/randomx/randomx.cpp +++ b/src/cn_utils/randomx/randomx.cpp @@ -363,31 +363,4 @@ extern "C" { machine->getFinalResult(output, RANDOMX_HASH_SIZE); } - void randomx_calculate_hash_first(randomx_vm* machine, const void* input, size_t inputSize) { - blake2b(machine->tempHash, sizeof(machine->tempHash), input, inputSize, nullptr, 0); - machine->initScratchpad(machine->tempHash); - } - - void randomx_calculate_hash_next(randomx_vm* machine, const void* nextInput, size_t nextInputSize, void* output) { - machine->resetRoundingMode(); - for (uint32_t chain = 0; chain < RANDOMX_PROGRAM_COUNT - 1; ++chain) { - machine->run(machine->tempHash); - blake2b(machine->tempHash, sizeof(machine->tempHash), machine->getRegisterFile(), sizeof(randomx::RegisterFile), nullptr, 0); - } - machine->run(machine->tempHash); - - // Finish current hash and fill the scratchpad for the next hash at the same time - blake2b(machine->tempHash, sizeof(machine->tempHash), nextInput, nextInputSize, nullptr, 0); - machine->hashAndFill(output, RANDOMX_HASH_SIZE, machine->tempHash); - } - - void randomx_calculate_hash_last(randomx_vm* machine, void* output) { - machine->resetRoundingMode(); - for (int chain = 0; chain < RANDOMX_PROGRAM_COUNT - 1; ++chain) { - machine->run(machine->tempHash); - blake2b(machine->tempHash, sizeof(machine->tempHash), machine->getRegisterFile(), sizeof(randomx::RegisterFile), nullptr, 0); - } - machine->run(machine->tempHash); - machine->getFinalResult(output, RANDOMX_HASH_SIZE); - } } diff --git a/src/cn_utils/randomx/randomx.h b/src/cn_utils/randomx/randomx.h index 48aaf8eb6..c06002bb9 100644 --- a/src/cn_utils/randomx/randomx.h +++ b/src/cn_utils/randomx/randomx.h @@ -30,7 +30,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define RANDOMX_H #include -#include #define RANDOMX_HASH_SIZE 32 #define RANDOMX_DATASET_ITEM_SIZE 64 @@ -239,25 +238,6 @@ RANDOMX_EXPORT void randomx_destroy_vm(randomx_vm *machine); */ RANDOMX_EXPORT void randomx_calculate_hash(randomx_vm *machine, const void *input, size_t inputSize, void *output); -/** - * Set of functions used to calculate multiple RandomX hashes more efficiently. - * randomx_calculate_hash_first will begin a hash calculation. - * randomx_calculate_hash_next will output the hash value of the previous input - * and begin the calculation of the next hash. - * randomx_calculate_hash_last will output the hash value of the previous input. - * - * @param machine is a pointer to a randomx_vm structure. Must not be NULL. - * @param input is a pointer to memory to be hashed. Must not be NULL. - * @param inputSize is the number of bytes to be hashed. - * @param nextInput is a pointer to memory to be hashed for the next hash. Must not be NULL. - * @param nextInputSize is the number of bytes to be hashed for the next hash. - * @param output is a pointer to memory where the hash will be stored. Must not - * be NULL and at least RANDOMX_HASH_SIZE bytes must be available for writing. -*/ -RANDOMX_EXPORT void randomx_calculate_hash_first(randomx_vm* machine, const void* input, size_t inputSize); -RANDOMX_EXPORT void randomx_calculate_hash_next(randomx_vm* machine, const void* nextInput, size_t nextInputSize, void* output); -RANDOMX_EXPORT void randomx_calculate_hash_last(randomx_vm* machine, void* output); - #if defined(__cplusplus) } #endif diff --git a/src/cn_utils/randomx/tests/affinity.cpp b/src/cn_utils/randomx/tests/affinity.cpp index 40f64581f..a2e37aab5 100644 --- a/src/cn_utils/randomx/tests/affinity.cpp +++ b/src/cn_utils/randomx/tests/affinity.cpp @@ -65,7 +65,7 @@ set_thread_affinity(std::thread::native_handle_type thread, (thread_policy_t)&policy, 1); #elif defined(_WIN32) || defined(__CYGWIN__) rc = SetThreadAffinityMask(reinterpret_cast(thread), 1ULL << cpuid) == 0 ? -2 : 0; -#elif !defined(__OpenBSD__) && !defined(__FreeBSD__) && !defined(__ANDROID__) +#elif !defined(__OpenBSD__) cpu_set_t cs; CPU_ZERO(&cs); CPU_SET(cpuid, &cs); diff --git a/src/cn_utils/randomx/tests/benchmark.cpp b/src/cn_utils/randomx/tests/benchmark.cpp index 47cd0264a..4a611dcb1 100644 --- a/src/cn_utils/randomx/tests/benchmark.cpp +++ b/src/cn_utils/randomx/tests/benchmark.cpp @@ -122,14 +122,11 @@ void mine(randomx_vm* vm, std::atomic& atomicNonce, AtomicHash& result void* noncePtr = blockTemplate + 39; auto nonce = atomicNonce.fetch_add(1); - store32(noncePtr, nonce); - randomx_calculate_hash_first(vm, blockTemplate, sizeof(blockTemplate)); - while (nonce < noncesCount) { - nonce = atomicNonce.fetch_add(1); store32(noncePtr, nonce); - randomx_calculate_hash_next(vm, blockTemplate, sizeof(blockTemplate), &hash); + randomx_calculate_hash(vm, blockTemplate, sizeof(blockTemplate), &hash); result.xorWith(hash); + nonce = atomicNonce.fetch_add(1); } } @@ -161,19 +158,13 @@ int main(int argc, char** argv) { store32(&seed, seedValue); - std::cout << "RandomX benchmark v1.1.7" << std::endl; + std::cout << "RandomX benchmark v1.1.5" << std::endl; - if (help) { + if (help || (!miningMode && !verificationMode)) { printUsage(argv[0]); return 0; } - if (!miningMode && !verificationMode) { - std::cout << "Please select either the fast mode (--mine) or the slow mode (--verify)" << std::endl; - std::cout << "Run '" << argv[0] << " --help' to see all supported options" << std::endl; - return 0; - } - std::atomic atomicNonce(0); AtomicHash result; std::vector vms; diff --git a/src/cn_utils/randomx/tests/tests.cpp b/src/cn_utils/randomx/tests/tests.cpp index b425f19f2..962a12044 100644 --- a/src/cn_utils/randomx/tests/tests.cpp +++ b/src/cn_utils/randomx/tests/tests.cpp @@ -1026,6 +1026,9 @@ int main() { runTest("Hash test 2e (compiler)", RANDOMX_HAVE_COMPILER && stringsEqual(RANDOMX_ARGON_SALT, "RandomX\x03"), test_e); + randomx_destroy_vm(vm); + vm = nullptr; + auto flags = randomx_get_flags(); randomx_release_cache(cache); @@ -1051,28 +1054,6 @@ int main() { assert(cacheMemory[33554431] == 0x1f47f056d05cd99b); }); - runTest("Hash batch test", RANDOMX_HAVE_COMPILER && stringsEqual(RANDOMX_ARGON_SALT, "RandomX\x03"), []() { - char hash1[RANDOMX_HASH_SIZE]; - char hash2[RANDOMX_HASH_SIZE]; - char hash3[RANDOMX_HASH_SIZE]; - initCache("test key 000"); - char input1[] = "This is a test"; - char input2[] = "Lorem ipsum dolor sit amet"; - char input3[] = "sed do eiusmod tempor incididunt ut labore et dolore magna aliqua"; - - randomx_calculate_hash_first(vm, input1, sizeof(input1) - 1); - randomx_calculate_hash_next(vm, input2, sizeof(input2) - 1, &hash1); - randomx_calculate_hash_next(vm, input3, sizeof(input3) - 1, &hash2); - randomx_calculate_hash_last(vm, &hash3); - - assert(equalsHex(hash1, "639183aae1bf4c9a35884cb46b09cad9175f04efd7684e7262a0ac1c2f0b4e3f")); - assert(equalsHex(hash2, "300a0adb47603dedb42228ccb2b211104f4da45af709cd7547cd049e9489c969")); - assert(equalsHex(hash3, "c36d4ed4191e617309867ed66a443be4075014e2b061bcdaf9ce7b721d2b77a8")); - }); - - randomx_destroy_vm(vm); - vm = nullptr; - if (cache != nullptr) randomx_release_cache(cache); diff --git a/src/cn_utils/randomx/virtual_machine.cpp b/src/cn_utils/randomx/virtual_machine.cpp index 2d5d2bead..d73a02473 100644 --- a/src/cn_utils/randomx/virtual_machine.cpp +++ b/src/cn_utils/randomx/virtual_machine.cpp @@ -120,12 +120,6 @@ namespace randomx { blake2b(out, outSize, ®, sizeof(RegisterFile), nullptr, 0); } - template - void VmBase::hashAndFill(void* out, size_t outSize, uint64_t *fill_state) { - hashAndFillAes1Rx4((void*) getScratchpad(), ScratchpadSize, ®.a, fill_state); - blake2b(out, outSize, ®, sizeof(RegisterFile), nullptr, 0); - } - template void VmBase::initScratchpad(void* seed) { fillAes1Rx4(seed, ScratchpadSize, scratchpad); diff --git a/src/cn_utils/randomx/virtual_machine.hpp b/src/cn_utils/randomx/virtual_machine.hpp index d72a9181c..d662c895f 100644 --- a/src/cn_utils/randomx/virtual_machine.hpp +++ b/src/cn_utils/randomx/virtual_machine.hpp @@ -38,7 +38,6 @@ public: virtual ~randomx_vm() = 0; virtual void allocate() = 0; virtual void getFinalResult(void* out, size_t outSize) = 0; - virtual void hashAndFill(void* out, size_t outSize, uint64_t *fill_state) = 0; virtual void setDataset(randomx_dataset* dataset) { } virtual void setCache(randomx_cache* cache) { } virtual void initScratchpad(void* seed) = 0; @@ -68,7 +67,6 @@ protected: uint64_t datasetOffset; public: std::string cacheKey; - alignas(16) uint64_t tempHash[8]; //8 64-bit values used to store intermediate data }; namespace randomx { @@ -80,7 +78,6 @@ namespace randomx { void allocate() override; void initScratchpad(void* seed) override; void getFinalResult(void* out, size_t outSize) override; - void hashAndFill(void* out, size_t outSize, uint64_t *fill_state) override; protected: void generateProgram(void* seed); }; diff --git a/src/primitives/block.cpp b/src/primitives/block.cpp index 7eab07298..40f269007 100644 --- a/src/primitives/block.cpp +++ b/src/primitives/block.cpp @@ -40,6 +40,16 @@ unsigned get_max_concurrency() return max_concurrency; } +static uint256 cn_get_block_hash_by_height(uint64_t seed_height, char cnHash[32]) +{ + CBlockIndex* pblockindex = chainActive[seed_height]; + uint256 blockHash = pblockindex->GetBlockHash(); + const unsigned char* pHash = blockHash.begin(); + for (int j = 31; j >= 0; j--) { + cnHash[31 - j] = pHash[j]; + } +} + uint256 CBlockHeader::GetOriginalBlockHash() const { CHashWriter hashWriter(SER_GETHASH, PROTOCOL_VERSION); @@ -78,13 +88,13 @@ uint256 CBlockHeader::GetPoWHash() const uint32_t height = nNonce; if (cnHeader.major_version >= RX_BLOCK_VERSION) { uint64_t seed_height; + char cnHash[32]; seed_height = crypto::rx_seedheight(height); - CBlockIndex* pblockindex = chainActive[seed_height]; - crypto::rx_slow_hash(height, seed_height, (const char*)(pblockindex->GetBlockHash().begin()), - blob.data(), blob.size(), BEGIN(thash), get_max_concurrency(), 0); + cn_get_block_hash_by_height(seed_height, cnHash); + crypto::rx_slow_hash(height, seed_height, cnHash, blob.data(), blob.size(), BEGIN(thash), get_max_concurrency(), 0); } else { cn_slow_hash(blob.data(), blob.size(), BEGIN(thash), cnHeader.major_version - 6, 0, height); - } + } return thash; }