Browse Source

RX/0 now worked.

rx
Just Wonder 4 years ago
parent
commit
85c262741b
  1. 156
      src/cn_utils/crypto/rx-slow-hash.c
  2. 81
      src/cn_utils/randomx/aes_hash.cpp
  3. 3
      src/cn_utils/randomx/aes_hash.hpp
  4. 2
      src/cn_utils/randomx/allocator.cpp
  5. 9
      src/cn_utils/randomx/dataset.hpp
  6. 7
      src/cn_utils/randomx/intrin_portable.h
  7. 2
      src/cn_utils/randomx/jit_compiler_a64.hpp
  8. 94
      src/cn_utils/randomx/jit_compiler_a64_static.S
  9. 2
      src/cn_utils/randomx/jit_compiler_fallback.hpp
  10. 2
      src/cn_utils/randomx/jit_compiler_x86.hpp
  11. 27
      src/cn_utils/randomx/randomx.cpp
  12. 20
      src/cn_utils/randomx/randomx.h
  13. 2
      src/cn_utils/randomx/tests/affinity.cpp
  14. 17
      src/cn_utils/randomx/tests/benchmark.cpp
  15. 25
      src/cn_utils/randomx/tests/tests.cpp
  16. 6
      src/cn_utils/randomx/virtual_machine.cpp
  17. 3
      src/cn_utils/randomx/virtual_machine.hpp
  18. 18
      src/primitives/block.cpp

156
src/cn_utils/crypto/rx-slow-hash.c

@ -34,6 +34,7 @@ @@ -34,6 +34,7 @@
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <limits.h>
#include "randomx.h"
#include "c_threads.h"
@ -50,7 +51,7 @@ @@ -50,7 +51,7 @@
typedef struct rx_state {
CTHR_MUTEX_TYPE rs_mutex;
char rs_hash[32];
char rs_hash[HASH_SIZE];
uint64_t rs_height;
randomx_cache *rs_cache;
} rx_state;
@ -63,7 +64,6 @@ static rx_state rx_s[2] = {{CTHR_MUTEX_INIT,{0},0,0},{CTHR_MUTEX_INIT,{0},0,0}}; @@ -63,7 +64,6 @@ static rx_state rx_s[2] = {{CTHR_MUTEX_INIT,{0},0,0},{CTHR_MUTEX_INIT,{0},0,0}};
static randomx_dataset *rx_dataset;
static uint64_t rx_dataset_height;
static THREADV randomx_vm *rx_vm = NULL;
static THREADV int rx_toggle;
static void local_abort(const char *msg)
{
@ -75,84 +75,41 @@ static void local_abort(const char *msg) @@ -75,84 +75,41 @@ static void local_abort(const char *msg)
#endif
}
/**
* @brief uses cpuid to determine if the CPU supports the AES instructions
* @return true if the CPU supports AES, false otherwise
*/
static inline int disabled_flags(void) {
static int flags = -1;
static inline int force_software_aes(void)
{
static int use = -1;
if (use != -1)
return use;
if (flags != -1) {
return flags;
}
const char *env = getenv("MONERO_USE_SOFTWARE_AES");
const char *env = getenv("MONERO_RANDOMX_UMASK");
if (!env) {
use = 0;
}
else if (!strcmp(env, "0") || !strcmp(env, "no")) {
use = 0;
flags = 0;
}
else {
use = 1;
char* endptr;
long int value = strtol(env, &endptr, 0);
if (endptr != env && value >= 0 && value < INT_MAX) {
flags = value;
}
else {
flags = 0;
}
}
return use;
}
static void cpuid(int CPUInfo[4], int InfoType)
{
#if defined(__x86_64__)
__asm __volatile__
(
"cpuid":
"=a" (CPUInfo[0]),
"=b" (CPUInfo[1]),
"=c" (CPUInfo[2]),
"=d" (CPUInfo[3]) :
"a" (InfoType), "c" (0)
);
#endif
}
static inline int check_aes_hw(void)
{
#if defined(__x86_64__)
int cpuid_results[4];
static int supported = -1;
if(supported >= 0)
return supported;
cpuid(cpuid_results,1);
return supported = cpuid_results[2] & (1 << 25);
#else
return 0;
#endif
return flags;
}
static volatile int use_rx_jit_flag = -1;
static inline int enabled_flags(void) {
static int flags = -1;
static inline int use_rx_jit(void)
{
#if defined(__x86_64__)
if (flags != -1) {
return flags;
}
if (use_rx_jit_flag != -1)
return use_rx_jit_flag;
flags = randomx_get_flags();
const char *env = getenv("MONERO_USE_RX_JIT");
if (!env) {
use_rx_jit_flag = 1;
}
else if (!strcmp(env, "0") || !strcmp(env, "no")) {
use_rx_jit_flag = 0;
}
else {
use_rx_jit_flag = 1;
}
return use_rx_jit_flag;
#else
return 0;
#endif
return flags;
}
#define SEEDHASH_EPOCH_BLOCKS 2048 /* Must be same as BLOCKS_SYNCHRONIZING_MAX_COUNT in cryptonote_config.h */
@ -162,8 +119,11 @@ void rx_reorg(const uint64_t split_height) { @@ -162,8 +119,11 @@ void rx_reorg(const uint64_t split_height) {
int i;
CTHR_MUTEX_LOCK(rx_mutex);
for (i=0; i<2; i++) {
if (split_height < rx_s[i].rs_height)
if (split_height <= rx_s[i].rs_height) {
if (rx_s[i].rs_height == rx_dataset_height)
rx_dataset_height = 1;
rx_s[i].rs_height = 1; /* set to an invalid seed height */
}
}
CTHR_MUTEX_UNLOCK(rx_mutex);
}
@ -233,27 +193,27 @@ static void rx_initdata(randomx_cache *rs_cache, const int miners, const uint64_ @@ -233,27 +193,27 @@ static void rx_initdata(randomx_cache *rs_cache, const int miners, const uint64_
void rx_slow_hash(const uint64_t mainheight, const uint64_t seedheight, const char *seedhash, const void *data, size_t length,
char *hash, int miners, int is_alt) {
uint64_t s_height = rx_seedheight(mainheight);
int changed = 0;
int toggle = is_alt ? s_height : seedheight;
randomx_flags flags = RANDOMX_FLAG_DEFAULT;
int toggle = (s_height & SEEDHASH_EPOCH_BLOCKS) != 0;
randomx_flags flags = enabled_flags() & ~disabled_flags();
rx_state *rx_sp;
randomx_cache *cache;
toggle = (toggle & SEEDHASH_EPOCH_BLOCKS) != 0;
CTHR_MUTEX_LOCK(rx_mutex);
/* if alt block but with same seed as mainchain, no need for alt cache */
if (is_alt && s_height == seedheight && !memcmp(rx_s[toggle].rs_hash, seedhash, sizeof(rx_s[toggle].rs_hash)))
is_alt = 0;
if (is_alt) {
if (s_height == seedheight && !memcmp(rx_s[toggle].rs_hash, seedhash, HASH_SIZE))
is_alt = 0;
} else {
/* RPC could request an earlier block on mainchain */
if (!is_alt && s_height > seedheight)
is_alt = 1;
if (s_height > seedheight)
is_alt = 1;
/* miner can be ahead of mainchain */
else if (s_height < seedheight)
toggle ^= 1;
}
toggle ^= (is_alt != 0);
if (toggle != rx_toggle)
changed = 1;
rx_toggle = toggle;
rx_sp = &rx_s[toggle];
CTHR_MUTEX_LOCK(rx_sp->rs_mutex);
@ -261,40 +221,35 @@ void rx_slow_hash(const uint64_t mainheight, const uint64_t seedheight, const ch @@ -261,40 +221,35 @@ void rx_slow_hash(const uint64_t mainheight, const uint64_t seedheight, const ch
cache = rx_sp->rs_cache;
if (cache == NULL) {
if (use_rx_jit())
flags |= RANDOMX_FLAG_JIT;
if (cache == NULL) {
cache = randomx_alloc_cache(flags | RANDOMX_FLAG_LARGE_PAGES);
if (cache == NULL) {
printf("Couldn't use largePages for RandomX cache\n");
//printf("Couldn't use largePages for RandomX cache\n");
cache = randomx_alloc_cache(flags);
}
if (cache == NULL)
local_abort("Couldn't allocate RandomX cache\n");
local_abort("Couldn't allocate RandomX cache");
}
}
if (rx_sp->rs_height != seedheight || rx_sp->rs_cache == NULL || memcmp(seedhash, rx_sp->rs_hash, sizeof(rx_sp->rs_hash))) {
randomx_init_cache(cache, seedhash, 32);
if (rx_sp->rs_height != seedheight || rx_sp->rs_cache == NULL || memcmp(seedhash, rx_sp->rs_hash, HASH_SIZE)) {
randomx_init_cache(cache, seedhash, HASH_SIZE);
rx_sp->rs_cache = cache;
rx_sp->rs_height = seedheight;
memcpy(rx_sp->rs_hash, seedhash, sizeof(rx_sp->rs_hash));
changed = 1;
memcpy(rx_sp->rs_hash, seedhash, HASH_SIZE);
}
if (rx_vm == NULL) {
randomx_flags flags = RANDOMX_FLAG_DEFAULT;
if (use_rx_jit()) {
flags |= RANDOMX_FLAG_JIT;
if (!miners)
flags |= RANDOMX_FLAG_SECURE;
if ((flags & RANDOMX_FLAG_JIT) && !miners) {
flags |= RANDOMX_FLAG_SECURE & ~disabled_flags();
}
if (miners && (disabled_flags() & RANDOMX_FLAG_FULL_MEM)) {
miners = 0;
}
if(!force_software_aes() && check_aes_hw())
flags |= RANDOMX_FLAG_HARD_AES;
if (miners) {
CTHR_MUTEX_LOCK(rx_dataset_mutex);
if (rx_dataset == NULL) {
rx_dataset = randomx_alloc_dataset(RANDOMX_FLAG_LARGE_PAGES);
if (rx_dataset == NULL) {
printf("Couldn't use largePages for RandomX dataset\n");
//printf("Couldn't use largePages for RandomX dataset\n");
rx_dataset = randomx_alloc_dataset(RANDOMX_FLAG_DEFAULT);
}
if (rx_dataset != NULL)
@ -304,13 +259,13 @@ void rx_slow_hash(const uint64_t mainheight, const uint64_t seedheight, const ch @@ -304,13 +259,13 @@ void rx_slow_hash(const uint64_t mainheight, const uint64_t seedheight, const ch
flags |= RANDOMX_FLAG_FULL_MEM;
else {
miners = 0;
printf("Couldn't allocate RandomX dataset for miner\n");
//printf("Couldn't allocate RandomX dataset for miner\n");
}
CTHR_MUTEX_UNLOCK(rx_dataset_mutex);
}
rx_vm = randomx_create_vm(flags | RANDOMX_FLAG_LARGE_PAGES, rx_sp->rs_cache, rx_dataset);
if(rx_vm == NULL) { //large pages failed
printf("Couldn't use largePages for RandomX VM\n");
//printf("Couldn't use largePages for RandomX VM\n");
rx_vm = randomx_create_vm(flags, rx_sp->rs_cache, rx_dataset);
}
if(rx_vm == NULL) {//fallback if everything fails
@ -324,7 +279,8 @@ void rx_slow_hash(const uint64_t mainheight, const uint64_t seedheight, const ch @@ -324,7 +279,8 @@ void rx_slow_hash(const uint64_t mainheight, const uint64_t seedheight, const ch
if (rx_dataset != NULL && rx_dataset_height != seedheight)
rx_initdata(cache, miners, seedheight);
CTHR_MUTEX_UNLOCK(rx_dataset_mutex);
} else if (changed) {
} else {
/* this is a no-op if the cache hasn't changed */
randomx_vm_set_cache(rx_vm, rx_sp->rs_cache);
}
/* mainchain users can run in parallel */

81
src/cn_utils/randomx/aes_hash.cpp

@ -239,84 +239,3 @@ void fillAes4Rx4(void *state, size_t outputSize, void *buffer) { @@ -239,84 +239,3 @@ void fillAes4Rx4(void *state, size_t outputSize, void *buffer) {
template void fillAes4Rx4<true>(void *state, size_t outputSize, void *buffer);
template void fillAes4Rx4<false>(void *state, size_t outputSize, void *buffer);
template<bool softAes>
void hashAndFillAes1Rx4(void *scratchpad, size_t scratchpadSize, void *hash, void* fill_state) {
uint8_t* scratchpadPtr = (uint8_t*)scratchpad;
const uint8_t* scratchpadEnd = scratchpadPtr + scratchpadSize;
// initial state
rx_vec_i128 hash_state0 = rx_set_int_vec_i128(AES_HASH_1R_STATE0);
rx_vec_i128 hash_state1 = rx_set_int_vec_i128(AES_HASH_1R_STATE1);
rx_vec_i128 hash_state2 = rx_set_int_vec_i128(AES_HASH_1R_STATE2);
rx_vec_i128 hash_state3 = rx_set_int_vec_i128(AES_HASH_1R_STATE3);
const rx_vec_i128 key0 = rx_set_int_vec_i128(AES_GEN_1R_KEY0);
const rx_vec_i128 key1 = rx_set_int_vec_i128(AES_GEN_1R_KEY1);
const rx_vec_i128 key2 = rx_set_int_vec_i128(AES_GEN_1R_KEY2);
const rx_vec_i128 key3 = rx_set_int_vec_i128(AES_GEN_1R_KEY3);
rx_vec_i128 fill_state0 = rx_load_vec_i128((rx_vec_i128*)fill_state + 0);
rx_vec_i128 fill_state1 = rx_load_vec_i128((rx_vec_i128*)fill_state + 1);
rx_vec_i128 fill_state2 = rx_load_vec_i128((rx_vec_i128*)fill_state + 2);
rx_vec_i128 fill_state3 = rx_load_vec_i128((rx_vec_i128*)fill_state + 3);
constexpr int PREFETCH_DISTANCE = 4096;
const char* prefetchPtr = ((const char*)scratchpad) + PREFETCH_DISTANCE;
scratchpadEnd -= PREFETCH_DISTANCE;
for (int i = 0; i < 2; ++i) {
//process 64 bytes at a time in 4 lanes
while (scratchpadPtr < scratchpadEnd) {
hash_state0 = aesenc<softAes>(hash_state0, rx_load_vec_i128((rx_vec_i128*)scratchpadPtr + 0));
hash_state1 = aesdec<softAes>(hash_state1, rx_load_vec_i128((rx_vec_i128*)scratchpadPtr + 1));
hash_state2 = aesenc<softAes>(hash_state2, rx_load_vec_i128((rx_vec_i128*)scratchpadPtr + 2));
hash_state3 = aesdec<softAes>(hash_state3, rx_load_vec_i128((rx_vec_i128*)scratchpadPtr + 3));
fill_state0 = aesdec<softAes>(fill_state0, key0);
fill_state1 = aesenc<softAes>(fill_state1, key1);
fill_state2 = aesdec<softAes>(fill_state2, key2);
fill_state3 = aesenc<softAes>(fill_state3, key3);
rx_store_vec_i128((rx_vec_i128*)scratchpadPtr + 0, fill_state0);
rx_store_vec_i128((rx_vec_i128*)scratchpadPtr + 1, fill_state1);
rx_store_vec_i128((rx_vec_i128*)scratchpadPtr + 2, fill_state2);
rx_store_vec_i128((rx_vec_i128*)scratchpadPtr + 3, fill_state3);
rx_prefetch_t0(prefetchPtr);
scratchpadPtr += 64;
prefetchPtr += 64;
}
prefetchPtr = (const char*) scratchpad;
scratchpadEnd += PREFETCH_DISTANCE;
}
rx_store_vec_i128((rx_vec_i128*)fill_state + 0, fill_state0);
rx_store_vec_i128((rx_vec_i128*)fill_state + 1, fill_state1);
rx_store_vec_i128((rx_vec_i128*)fill_state + 2, fill_state2);
rx_store_vec_i128((rx_vec_i128*)fill_state + 3, fill_state3);
//two extra rounds to achieve full diffusion
rx_vec_i128 xkey0 = rx_set_int_vec_i128(AES_HASH_1R_XKEY0);
rx_vec_i128 xkey1 = rx_set_int_vec_i128(AES_HASH_1R_XKEY1);
hash_state0 = aesenc<softAes>(hash_state0, xkey0);
hash_state1 = aesdec<softAes>(hash_state1, xkey0);
hash_state2 = aesenc<softAes>(hash_state2, xkey0);
hash_state3 = aesdec<softAes>(hash_state3, xkey0);
hash_state0 = aesenc<softAes>(hash_state0, xkey1);
hash_state1 = aesdec<softAes>(hash_state1, xkey1);
hash_state2 = aesenc<softAes>(hash_state2, xkey1);
hash_state3 = aesdec<softAes>(hash_state3, xkey1);
//output hash
rx_store_vec_i128((rx_vec_i128*)hash + 0, hash_state0);
rx_store_vec_i128((rx_vec_i128*)hash + 1, hash_state1);
rx_store_vec_i128((rx_vec_i128*)hash + 2, hash_state2);
rx_store_vec_i128((rx_vec_i128*)hash + 3, hash_state3);
}
template void hashAndFillAes1Rx4<false>(void *scratchpad, size_t scratchpadSize, void *hash, void* fill_state);
template void hashAndFillAes1Rx4<true>(void *scratchpad, size_t scratchpadSize, void *hash, void* fill_state);

3
src/cn_utils/randomx/aes_hash.hpp

@ -38,6 +38,3 @@ void fillAes1Rx4(void *state, size_t outputSize, void *buffer); @@ -38,6 +38,3 @@ void fillAes1Rx4(void *state, size_t outputSize, void *buffer);
template<bool softAes>
void fillAes4Rx4(void *state, size_t outputSize, void *buffer);
template<bool softAes>
void hashAndFillAes1Rx4(void *scratchpad, size_t scratchpadSize, void *hash, void* fill_state);

2
src/cn_utils/randomx/allocator.cpp

@ -47,7 +47,7 @@ namespace randomx { @@ -47,7 +47,7 @@ namespace randomx {
rx_aligned_free(ptr);
}
template struct AlignedAllocator<CacheLineSize>;
template class AlignedAllocator<CacheLineSize>;
void* LargePageAllocator::allocMemory(size_t count) {
return allocLargePagesMemory(count);

9
src/cn_utils/randomx/dataset.hpp

@ -61,17 +61,8 @@ struct randomx_cache { @@ -61,17 +61,8 @@ struct randomx_cache {
//A pointer to a standard-layout struct object points to its initial member
static_assert(std::is_standard_layout<randomx_dataset>(), "randomx_dataset must be a standard-layout struct");
//the following assert fails when compiling Debug in Visual Studio (JIT mode will crash in Debug)
#if defined(_MSC_VER) && !defined(__INTEL_COMPILER) && defined(_DEBUG)
#define TO_STR(x) #x
#define STR(x) TO_STR(x)
#pragma message ( __FILE__ "(" STR(__LINE__) ") warning: check std::is_standard_layout<randomx_cache>() is disabled for Debug configuration. JIT mode will crash." )
#undef STR
#undef TO_STR
#else
static_assert(std::is_standard_layout<randomx_cache>(), "randomx_cache must be a standard-layout struct");
#endif
namespace randomx {

7
src/cn_utils/randomx/intrin_portable.h

@ -102,7 +102,6 @@ typedef __m128d rx_vec_f128; @@ -102,7 +102,6 @@ typedef __m128d rx_vec_f128;
#define rx_aligned_alloc(a, b) _mm_malloc(a,b)
#define rx_aligned_free(a) _mm_free(a)
#define rx_prefetch_nta(x) _mm_prefetch((const char *)(x), _MM_HINT_NTA)
#define rx_prefetch_t0(x) _mm_prefetch((const char *)(x), _MM_HINT_T0)
#define rx_load_vec_f128 _mm_load_pd
#define rx_store_vec_f128 _mm_store_pd
@ -202,7 +201,6 @@ typedef union{ @@ -202,7 +201,6 @@ typedef union{
#define rx_aligned_alloc(a, b) malloc(a)
#define rx_aligned_free(a) free(a)
#define rx_prefetch_nta(x)
#define rx_prefetch_t0(x)
/* Splat 64-bit long long to 2 64-bit long longs */
FORCE_INLINE __m128i vec_splat2sd (int64_t scalar)
@ -401,10 +399,6 @@ inline void rx_prefetch_nta(void* ptr) { @@ -401,10 +399,6 @@ inline void rx_prefetch_nta(void* ptr) {
asm volatile ("prfm pldl1strm, [%0]\n" : : "r" (ptr));
}
inline void rx_prefetch_t0(const void* ptr) {
asm volatile ("prfm pldl1strm, [%0]\n" : : "r" (ptr));
}
FORCE_INLINE rx_vec_f128 rx_load_vec_f128(const double* pd) {
return vld1q_f64((const float64_t*)pd);
}
@ -538,7 +532,6 @@ typedef union { @@ -538,7 +532,6 @@ typedef union {
#define rx_aligned_alloc(a, b) malloc(a)
#define rx_aligned_free(a) free(a)
#define rx_prefetch_nta(x)
#define rx_prefetch_t0(x)
FORCE_INLINE rx_vec_f128 rx_load_vec_f128(const double* pd) {
rx_vec_f128 x;

2
src/cn_utils/randomx/jit_compiler_a64.hpp

@ -38,7 +38,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. @@ -38,7 +38,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
namespace randomx {
class Program;
struct ProgramConfiguration;
class ProgramConfiguration;
class SuperscalarProgram;
class Instruction;

94
src/cn_utils/randomx/jit_compiler_a64_static.S

@ -25,32 +25,26 @@ @@ -25,32 +25,26 @@
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#if defined(__APPLE__)
#define DECL(x) _##x
#else
#define DECL(x) x
#endif
.arch armv8-a
.text
.global DECL(randomx_program_aarch64)
.global DECL(randomx_program_aarch64_main_loop)
.global DECL(randomx_program_aarch64_vm_instructions)
.global DECL(randomx_program_aarch64_imul_rcp_literals_end)
.global DECL(randomx_program_aarch64_vm_instructions_end)
.global DECL(randomx_program_aarch64_cacheline_align_mask1)
.global DECL(randomx_program_aarch64_cacheline_align_mask2)
.global DECL(randomx_program_aarch64_update_spMix1)
.global DECL(randomx_program_aarch64_vm_instructions_end_light)
.global DECL(randomx_program_aarch64_light_cacheline_align_mask)
.global DECL(randomx_program_aarch64_light_dataset_offset)
.global DECL(randomx_init_dataset_aarch64)
.global DECL(randomx_init_dataset_aarch64_end)
.global DECL(randomx_calc_dataset_item_aarch64)
.global DECL(randomx_calc_dataset_item_aarch64_prefetch)
.global DECL(randomx_calc_dataset_item_aarch64_mix)
.global DECL(randomx_calc_dataset_item_aarch64_store_result)
.global DECL(randomx_calc_dataset_item_aarch64_end)
.global randomx_program_aarch64
.global randomx_program_aarch64_main_loop
.global randomx_program_aarch64_vm_instructions
.global randomx_program_aarch64_imul_rcp_literals_end
.global randomx_program_aarch64_vm_instructions_end
.global randomx_program_aarch64_cacheline_align_mask1
.global randomx_program_aarch64_cacheline_align_mask2
.global randomx_program_aarch64_update_spMix1
.global randomx_program_aarch64_vm_instructions_end_light
.global randomx_program_aarch64_light_cacheline_align_mask
.global randomx_program_aarch64_light_dataset_offset
.global randomx_init_dataset_aarch64
.global randomx_init_dataset_aarch64_end
.global randomx_calc_dataset_item_aarch64
.global randomx_calc_dataset_item_aarch64_prefetch
.global randomx_calc_dataset_item_aarch64_mix
.global randomx_calc_dataset_item_aarch64_store_result
.global randomx_calc_dataset_item_aarch64_end
#include "configuration.h"
@ -107,7 +101,7 @@ @@ -107,7 +101,7 @@
# v31 -> scale mask = 0x81f000000000000081f0000000000000
.balign 4
DECL(randomx_program_aarch64):
randomx_program_aarch64:
# Save callee-saved registers
sub sp, sp, 192
stp x16, x17, [sp]
@ -195,7 +189,7 @@ DECL(randomx_program_aarch64): @@ -195,7 +189,7 @@ DECL(randomx_program_aarch64):
ldr q14, literal_v14
ldr q15, literal_v15
DECL(randomx_program_aarch64_main_loop):
randomx_program_aarch64_main_loop:
# spAddr0 = spMix1 & ScratchpadL3Mask64;
# spAddr1 = (spMix1 >> 32) & ScratchpadL3Mask64;
lsr x18, x10, 32
@ -268,7 +262,7 @@ DECL(randomx_program_aarch64_main_loop): @@ -268,7 +262,7 @@ DECL(randomx_program_aarch64_main_loop):
orr v23.16b, v23.16b, v30.16b
# Execute VM instructions
DECL(randomx_program_aarch64_vm_instructions):
randomx_program_aarch64_vm_instructions:
# buffer for generated instructions
# FDIV_M is the largest instruction taking up to 12 ARMv8 instructions
@ -287,7 +281,7 @@ literal_x27: .fill 1,8,0 @@ -287,7 +281,7 @@ literal_x27: .fill 1,8,0
literal_x28: .fill 1,8,0
literal_x29: .fill 1,8,0
literal_x30: .fill 1,8,0
DECL(randomx_program_aarch64_imul_rcp_literals_end):
randomx_program_aarch64_imul_rcp_literals_end:
literal_v0: .fill 2,8,0
literal_v1: .fill 2,8,0
@ -306,14 +300,14 @@ literal_v13: .fill 2,8,0 @@ -306,14 +300,14 @@ literal_v13: .fill 2,8,0
literal_v14: .fill 2,8,0
literal_v15: .fill 2,8,0
DECL(randomx_program_aarch64_vm_instructions_end):
randomx_program_aarch64_vm_instructions_end:
# mx ^= r[readReg2] ^ r[readReg3];
eor x9, x9, x18
# Calculate dataset pointer for dataset prefetch
mov w18, w9
DECL(randomx_program_aarch64_cacheline_align_mask1):
randomx_program_aarch64_cacheline_align_mask1:
# Actual mask will be inserted by JIT compiler
and x18, x18, 1
add x18, x18, x1
@ -326,12 +320,12 @@ DECL(randomx_program_aarch64_cacheline_align_mask1): @@ -326,12 +320,12 @@ DECL(randomx_program_aarch64_cacheline_align_mask1):
# Calculate dataset pointer for dataset read
mov w10, w9
DECL(randomx_program_aarch64_cacheline_align_mask2):
randomx_program_aarch64_cacheline_align_mask2:
# Actual mask will be inserted by JIT compiler
and x10, x10, 1
add x10, x10, x1
DECL(randomx_program_aarch64_xor_with_dataset_line):
randomx_program_aarch64_xor_with_dataset_line:
# xor integer registers with dataset data
ldp x18, x19, [x10]
eor x4, x4, x18
@ -346,7 +340,7 @@ DECL(randomx_program_aarch64_xor_with_dataset_line): @@ -346,7 +340,7 @@ DECL(randomx_program_aarch64_xor_with_dataset_line):
eor x14, x14, x18
eor x15, x15, x19
DECL(randomx_program_aarch64_update_spMix1):
randomx_program_aarch64_update_spMix1:
# JIT compiler will replace it with "eor x10, config.readReg0, config.readReg1"
eor x10, x0, x0
@ -367,7 +361,7 @@ DECL(randomx_program_aarch64_update_spMix1): @@ -367,7 +361,7 @@ DECL(randomx_program_aarch64_update_spMix1):
stp q18, q19, [x16, 32]
subs x3, x3, 1
bne DECL(randomx_program_aarch64_main_loop)
bne randomx_program_aarch64_main_loop
# Restore x0
ldr x0, [sp], 16
@ -401,7 +395,7 @@ DECL(randomx_program_aarch64_update_spMix1): @@ -401,7 +395,7 @@ DECL(randomx_program_aarch64_update_spMix1):
ret
DECL(randomx_program_aarch64_vm_instructions_end_light):
randomx_program_aarch64_vm_instructions_end_light:
sub sp, sp, 96
stp x0, x1, [sp, 64]
stp x2, x30, [sp, 80]
@ -418,26 +412,26 @@ DECL(randomx_program_aarch64_vm_instructions_end_light): @@ -418,26 +412,26 @@ DECL(randomx_program_aarch64_vm_instructions_end_light):
# x1 -> pointer to output
mov x1, sp
DECL(randomx_program_aarch64_light_cacheline_align_mask):
randomx_program_aarch64_light_cacheline_align_mask:
# Actual mask will be inserted by JIT compiler
and w2, w9, 1
# x2 -> item number
lsr x2, x2, 6
DECL(randomx_program_aarch64_light_dataset_offset):
randomx_program_aarch64_light_dataset_offset:
# Apply dataset offset (filled in by JIT compiler)
add x2, x2, 0
add x2, x2, 0
bl DECL(randomx_calc_dataset_item_aarch64)
bl randomx_calc_dataset_item_aarch64
mov x10, sp
ldp x0, x1, [sp, 64]
ldp x2, x30, [sp, 80]
add sp, sp, 96
b DECL(randomx_program_aarch64_xor_with_dataset_line)
b randomx_program_aarch64_xor_with_dataset_line
@ -448,26 +442,26 @@ DECL(randomx_program_aarch64_light_dataset_offset): @@ -448,26 +442,26 @@ DECL(randomx_program_aarch64_light_dataset_offset):
# x2 -> start item
# x3 -> end item
DECL(randomx_init_dataset_aarch64):
randomx_init_dataset_aarch64:
# Save x30 (return address)
str x30, [sp, -16]!
# Load pointer to cache memory
ldr x0, [x0]
DECL(randomx_init_dataset_aarch64_main_loop):
bl DECL(randomx_calc_dataset_item_aarch64)
randomx_init_dataset_aarch64_main_loop:
bl randomx_calc_dataset_item_aarch64
add x1, x1, 64
add x2, x2, 1
cmp x2, x3
bne DECL(randomx_init_dataset_aarch64_main_loop)
bne randomx_init_dataset_aarch64_main_loop
# Restore x30 (return address)
ldr x30, [sp], 16
ret
DECL(randomx_init_dataset_aarch64_end):
randomx_init_dataset_aarch64_end:
# Input parameters
#
@ -485,7 +479,7 @@ DECL(randomx_init_dataset_aarch64_end): @@ -485,7 +479,7 @@ DECL(randomx_init_dataset_aarch64_end):
# x12 -> temporary
# x13 -> temporary
DECL(randomx_calc_dataset_item_aarch64):
randomx_calc_dataset_item_aarch64:
sub sp, sp, 112
stp x0, x1, [sp]
stp x2, x3, [sp, 16]
@ -532,7 +526,7 @@ DECL(randomx_calc_dataset_item_aarch64): @@ -532,7 +526,7 @@ DECL(randomx_calc_dataset_item_aarch64):
ldr x12, superscalarAdd7
eor x7, x0, x12
b DECL(randomx_calc_dataset_item_aarch64_prefetch)
b randomx_calc_dataset_item_aarch64_prefetch
superscalarMul0: .quad 6364136223846793005
superscalarAdd1: .quad 9298411001130361340
@ -545,7 +539,7 @@ superscalarAdd7: .quad 9549104520008361294 @@ -545,7 +539,7 @@ superscalarAdd7: .quad 9549104520008361294
# Prefetch -> SuperScalar hash -> Mix will be repeated N times
DECL(randomx_calc_dataset_item_aarch64_prefetch):
randomx_calc_dataset_item_aarch64_prefetch:
# Actual mask will be inserted by JIT compiler
and x11, x10, 1
add x11, x8, x11, lsl 6
@ -553,7 +547,7 @@ DECL(randomx_calc_dataset_item_aarch64_prefetch): @@ -553,7 +547,7 @@ DECL(randomx_calc_dataset_item_aarch64_prefetch):
# Generated SuperScalar hash program goes here
DECL(randomx_calc_dataset_item_aarch64_mix):
randomx_calc_dataset_item_aarch64_mix:
ldp x12, x13, [x11]
eor x0, x0, x12
eor x1, x1, x13
@ -567,7 +561,7 @@ DECL(randomx_calc_dataset_item_aarch64_mix): @@ -567,7 +561,7 @@ DECL(randomx_calc_dataset_item_aarch64_mix):
eor x6, x6, x12
eor x7, x7, x13
DECL(randomx_calc_dataset_item_aarch64_store_result):
randomx_calc_dataset_item_aarch64_store_result:
stp x0, x1, [x9]
stp x2, x3, [x9, 16]
stp x4, x5, [x9, 32]
@ -584,4 +578,4 @@ DECL(randomx_calc_dataset_item_aarch64_store_result): @@ -584,4 +578,4 @@ DECL(randomx_calc_dataset_item_aarch64_store_result):
ret
DECL(randomx_calc_dataset_item_aarch64_end):
randomx_calc_dataset_item_aarch64_end:

2
src/cn_utils/randomx/jit_compiler_fallback.hpp

@ -36,7 +36,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. @@ -36,7 +36,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
namespace randomx {
class Program;
struct ProgramConfiguration;
class ProgramConfiguration;
class SuperscalarProgram;
class JitCompilerFallback {

2
src/cn_utils/randomx/jit_compiler_x86.hpp

@ -36,7 +36,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. @@ -36,7 +36,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
namespace randomx {
class Program;
struct ProgramConfiguration;
class ProgramConfiguration;
class SuperscalarProgram;
class JitCompilerX86;
class Instruction;

27
src/cn_utils/randomx/randomx.cpp

@ -363,31 +363,4 @@ extern "C" { @@ -363,31 +363,4 @@ extern "C" {
machine->getFinalResult(output, RANDOMX_HASH_SIZE);
}
void randomx_calculate_hash_first(randomx_vm* machine, const void* input, size_t inputSize) {
blake2b(machine->tempHash, sizeof(machine->tempHash), input, inputSize, nullptr, 0);
machine->initScratchpad(machine->tempHash);
}
void randomx_calculate_hash_next(randomx_vm* machine, const void* nextInput, size_t nextInputSize, void* output) {
machine->resetRoundingMode();
for (uint32_t chain = 0; chain < RANDOMX_PROGRAM_COUNT - 1; ++chain) {
machine->run(machine->tempHash);
blake2b(machine->tempHash, sizeof(machine->tempHash), machine->getRegisterFile(), sizeof(randomx::RegisterFile), nullptr, 0);
}
machine->run(machine->tempHash);
// Finish current hash and fill the scratchpad for the next hash at the same time
blake2b(machine->tempHash, sizeof(machine->tempHash), nextInput, nextInputSize, nullptr, 0);
machine->hashAndFill(output, RANDOMX_HASH_SIZE, machine->tempHash);
}
void randomx_calculate_hash_last(randomx_vm* machine, void* output) {
machine->resetRoundingMode();
for (int chain = 0; chain < RANDOMX_PROGRAM_COUNT - 1; ++chain) {
machine->run(machine->tempHash);
blake2b(machine->tempHash, sizeof(machine->tempHash), machine->getRegisterFile(), sizeof(randomx::RegisterFile), nullptr, 0);
}
machine->run(machine->tempHash);
machine->getFinalResult(output, RANDOMX_HASH_SIZE);
}
}

20
src/cn_utils/randomx/randomx.h

@ -30,7 +30,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. @@ -30,7 +30,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define RANDOMX_H
#include <stddef.h>
#include <stdint.h>
#define RANDOMX_HASH_SIZE 32
#define RANDOMX_DATASET_ITEM_SIZE 64
@ -239,25 +238,6 @@ RANDOMX_EXPORT void randomx_destroy_vm(randomx_vm *machine); @@ -239,25 +238,6 @@ RANDOMX_EXPORT void randomx_destroy_vm(randomx_vm *machine);
*/
RANDOMX_EXPORT void randomx_calculate_hash(randomx_vm *machine, const void *input, size_t inputSize, void *output);
/**
* Set of functions used to calculate multiple RandomX hashes more efficiently.
* randomx_calculate_hash_first will begin a hash calculation.
* randomx_calculate_hash_next will output the hash value of the previous input
* and begin the calculation of the next hash.
* randomx_calculate_hash_last will output the hash value of the previous input.
*
* @param machine is a pointer to a randomx_vm structure. Must not be NULL.
* @param input is a pointer to memory to be hashed. Must not be NULL.
* @param inputSize is the number of bytes to be hashed.
* @param nextInput is a pointer to memory to be hashed for the next hash. Must not be NULL.
* @param nextInputSize is the number of bytes to be hashed for the next hash.
* @param output is a pointer to memory where the hash will be stored. Must not
* be NULL and at least RANDOMX_HASH_SIZE bytes must be available for writing.
*/
RANDOMX_EXPORT void randomx_calculate_hash_first(randomx_vm* machine, const void* input, size_t inputSize);
RANDOMX_EXPORT void randomx_calculate_hash_next(randomx_vm* machine, const void* nextInput, size_t nextInputSize, void* output);
RANDOMX_EXPORT void randomx_calculate_hash_last(randomx_vm* machine, void* output);
#if defined(__cplusplus)
}
#endif

2
src/cn_utils/randomx/tests/affinity.cpp

@ -65,7 +65,7 @@ set_thread_affinity(std::thread::native_handle_type thread, @@ -65,7 +65,7 @@ set_thread_affinity(std::thread::native_handle_type thread,
(thread_policy_t)&policy, 1);
#elif defined(_WIN32) || defined(__CYGWIN__)
rc = SetThreadAffinityMask(reinterpret_cast<HANDLE>(thread), 1ULL << cpuid) == 0 ? -2 : 0;
#elif !defined(__OpenBSD__) && !defined(__FreeBSD__) && !defined(__ANDROID__)
#elif !defined(__OpenBSD__)
cpu_set_t cs;
CPU_ZERO(&cs);
CPU_SET(cpuid, &cs);

17
src/cn_utils/randomx/tests/benchmark.cpp

@ -122,14 +122,11 @@ void mine(randomx_vm* vm, std::atomic<uint32_t>& atomicNonce, AtomicHash& result @@ -122,14 +122,11 @@ void mine(randomx_vm* vm, std::atomic<uint32_t>& atomicNonce, AtomicHash& result
void* noncePtr = blockTemplate + 39;
auto nonce = atomicNonce.fetch_add(1);
store32(noncePtr, nonce);
randomx_calculate_hash_first(vm, blockTemplate, sizeof(blockTemplate));
while (nonce < noncesCount) {
nonce = atomicNonce.fetch_add(1);
store32(noncePtr, nonce);
randomx_calculate_hash_next(vm, blockTemplate, sizeof(blockTemplate), &hash);
randomx_calculate_hash(vm, blockTemplate, sizeof(blockTemplate), &hash);
result.xorWith(hash);
nonce = atomicNonce.fetch_add(1);
}
}
@ -161,19 +158,13 @@ int main(int argc, char** argv) { @@ -161,19 +158,13 @@ int main(int argc, char** argv) {
store32(&seed, seedValue);
std::cout << "RandomX benchmark v1.1.7" << std::endl;
std::cout << "RandomX benchmark v1.1.5" << std::endl;
if (help) {
if (help || (!miningMode && !verificationMode)) {
printUsage(argv[0]);
return 0;
}
if (!miningMode && !verificationMode) {
std::cout << "Please select either the fast mode (--mine) or the slow mode (--verify)" << std::endl;
std::cout << "Run '" << argv[0] << " --help' to see all supported options" << std::endl;
return 0;
}
std::atomic<uint32_t> atomicNonce(0);
AtomicHash result;
std::vector<randomx_vm*> vms;

25
src/cn_utils/randomx/tests/tests.cpp

@ -1026,6 +1026,9 @@ int main() { @@ -1026,6 +1026,9 @@ int main() {
runTest("Hash test 2e (compiler)", RANDOMX_HAVE_COMPILER && stringsEqual(RANDOMX_ARGON_SALT, "RandomX\x03"), test_e);
randomx_destroy_vm(vm);
vm = nullptr;
auto flags = randomx_get_flags();
randomx_release_cache(cache);
@ -1051,28 +1054,6 @@ int main() { @@ -1051,28 +1054,6 @@ int main() {
assert(cacheMemory[33554431] == 0x1f47f056d05cd99b);
});
runTest("Hash batch test", RANDOMX_HAVE_COMPILER && stringsEqual(RANDOMX_ARGON_SALT, "RandomX\x03"), []() {
char hash1[RANDOMX_HASH_SIZE];
char hash2[RANDOMX_HASH_SIZE];
char hash3[RANDOMX_HASH_SIZE];
initCache("test key 000");
char input1[] = "This is a test";
char input2[] = "Lorem ipsum dolor sit amet";
char input3[] = "sed do eiusmod tempor incididunt ut labore et dolore magna aliqua";
randomx_calculate_hash_first(vm, input1, sizeof(input1) - 1);
randomx_calculate_hash_next(vm, input2, sizeof(input2) - 1, &hash1);
randomx_calculate_hash_next(vm, input3, sizeof(input3) - 1, &hash2);
randomx_calculate_hash_last(vm, &hash3);
assert(equalsHex(hash1, "639183aae1bf4c9a35884cb46b09cad9175f04efd7684e7262a0ac1c2f0b4e3f"));
assert(equalsHex(hash2, "300a0adb47603dedb42228ccb2b211104f4da45af709cd7547cd049e9489c969"));
assert(equalsHex(hash3, "c36d4ed4191e617309867ed66a443be4075014e2b061bcdaf9ce7b721d2b77a8"));
});
randomx_destroy_vm(vm);
vm = nullptr;
if (cache != nullptr)
randomx_release_cache(cache);

6
src/cn_utils/randomx/virtual_machine.cpp

@ -120,12 +120,6 @@ namespace randomx { @@ -120,12 +120,6 @@ namespace randomx {
blake2b(out, outSize, &reg, sizeof(RegisterFile), nullptr, 0);
}
template<class Allocator, bool softAes>
void VmBase<Allocator, softAes>::hashAndFill(void* out, size_t outSize, uint64_t *fill_state) {
hashAndFillAes1Rx4<softAes>((void*) getScratchpad(), ScratchpadSize, &reg.a, fill_state);
blake2b(out, outSize, &reg, sizeof(RegisterFile), nullptr, 0);
}
template<class Allocator, bool softAes>
void VmBase<Allocator, softAes>::initScratchpad(void* seed) {
fillAes1Rx4<softAes>(seed, ScratchpadSize, scratchpad);

3
src/cn_utils/randomx/virtual_machine.hpp

@ -38,7 +38,6 @@ public: @@ -38,7 +38,6 @@ public:
virtual ~randomx_vm() = 0;
virtual void allocate() = 0;
virtual void getFinalResult(void* out, size_t outSize) = 0;
virtual void hashAndFill(void* out, size_t outSize, uint64_t *fill_state) = 0;
virtual void setDataset(randomx_dataset* dataset) { }
virtual void setCache(randomx_cache* cache) { }
virtual void initScratchpad(void* seed) = 0;
@ -68,7 +67,6 @@ protected: @@ -68,7 +67,6 @@ protected:
uint64_t datasetOffset;
public:
std::string cacheKey;
alignas(16) uint64_t tempHash[8]; //8 64-bit values used to store intermediate data
};
namespace randomx {
@ -80,7 +78,6 @@ namespace randomx { @@ -80,7 +78,6 @@ namespace randomx {
void allocate() override;
void initScratchpad(void* seed) override;
void getFinalResult(void* out, size_t outSize) override;
void hashAndFill(void* out, size_t outSize, uint64_t *fill_state) override;
protected:
void generateProgram(void* seed);
};

18
src/primitives/block.cpp

@ -40,6 +40,16 @@ unsigned get_max_concurrency() @@ -40,6 +40,16 @@ unsigned get_max_concurrency()
return max_concurrency;
}
static uint256 cn_get_block_hash_by_height(uint64_t seed_height, char cnHash[32])
{
CBlockIndex* pblockindex = chainActive[seed_height];
uint256 blockHash = pblockindex->GetBlockHash();
const unsigned char* pHash = blockHash.begin();
for (int j = 31; j >= 0; j--) {
cnHash[31 - j] = pHash[j];
}
}
uint256 CBlockHeader::GetOriginalBlockHash() const
{
CHashWriter hashWriter(SER_GETHASH, PROTOCOL_VERSION);
@ -78,13 +88,13 @@ uint256 CBlockHeader::GetPoWHash() const @@ -78,13 +88,13 @@ uint256 CBlockHeader::GetPoWHash() const
uint32_t height = nNonce;
if (cnHeader.major_version >= RX_BLOCK_VERSION) {
uint64_t seed_height;
char cnHash[32];
seed_height = crypto::rx_seedheight(height);
CBlockIndex* pblockindex = chainActive[seed_height];
crypto::rx_slow_hash(height, seed_height, (const char*)(pblockindex->GetBlockHash().begin()),
blob.data(), blob.size(), BEGIN(thash), get_max_concurrency(), 0);
cn_get_block_hash_by_height(seed_height, cnHash);
crypto::rx_slow_hash(height, seed_height, cnHash, blob.data(), blob.size(), BEGIN(thash), get_max_concurrency(), 0);
} else {
cn_slow_hash(blob.data(), blob.size(), BEGIN(thash), cnHeader.major_version - 6, 0, height);
}
}
return thash;
}

Loading…
Cancel
Save