|
|
@ -25,32 +25,26 @@ |
|
|
|
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
|
|
|
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
|
|
|
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
|
|
|
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
|
|
|
|
|
|
|
|
|
|
|
#if defined(__APPLE__) |
|
|
|
|
|
|
|
#define DECL(x) _##x |
|
|
|
|
|
|
|
#else |
|
|
|
|
|
|
|
#define DECL(x) x |
|
|
|
|
|
|
|
#endif |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
.arch armv8-a |
|
|
|
.arch armv8-a |
|
|
|
.text |
|
|
|
.text |
|
|
|
.global DECL(randomx_program_aarch64) |
|
|
|
.global randomx_program_aarch64
|
|
|
|
.global DECL(randomx_program_aarch64_main_loop) |
|
|
|
.global randomx_program_aarch64_main_loop
|
|
|
|
.global DECL(randomx_program_aarch64_vm_instructions) |
|
|
|
.global randomx_program_aarch64_vm_instructions
|
|
|
|
.global DECL(randomx_program_aarch64_imul_rcp_literals_end) |
|
|
|
.global randomx_program_aarch64_imul_rcp_literals_end
|
|
|
|
.global DECL(randomx_program_aarch64_vm_instructions_end) |
|
|
|
.global randomx_program_aarch64_vm_instructions_end
|
|
|
|
.global DECL(randomx_program_aarch64_cacheline_align_mask1) |
|
|
|
.global randomx_program_aarch64_cacheline_align_mask1
|
|
|
|
.global DECL(randomx_program_aarch64_cacheline_align_mask2) |
|
|
|
.global randomx_program_aarch64_cacheline_align_mask2
|
|
|
|
.global DECL(randomx_program_aarch64_update_spMix1) |
|
|
|
.global randomx_program_aarch64_update_spMix1
|
|
|
|
.global DECL(randomx_program_aarch64_vm_instructions_end_light) |
|
|
|
.global randomx_program_aarch64_vm_instructions_end_light
|
|
|
|
.global DECL(randomx_program_aarch64_light_cacheline_align_mask) |
|
|
|
.global randomx_program_aarch64_light_cacheline_align_mask
|
|
|
|
.global DECL(randomx_program_aarch64_light_dataset_offset) |
|
|
|
.global randomx_program_aarch64_light_dataset_offset
|
|
|
|
.global DECL(randomx_init_dataset_aarch64) |
|
|
|
.global randomx_init_dataset_aarch64
|
|
|
|
.global DECL(randomx_init_dataset_aarch64_end) |
|
|
|
.global randomx_init_dataset_aarch64_end
|
|
|
|
.global DECL(randomx_calc_dataset_item_aarch64) |
|
|
|
.global randomx_calc_dataset_item_aarch64
|
|
|
|
.global DECL(randomx_calc_dataset_item_aarch64_prefetch) |
|
|
|
.global randomx_calc_dataset_item_aarch64_prefetch
|
|
|
|
.global DECL(randomx_calc_dataset_item_aarch64_mix) |
|
|
|
.global randomx_calc_dataset_item_aarch64_mix
|
|
|
|
.global DECL(randomx_calc_dataset_item_aarch64_store_result) |
|
|
|
.global randomx_calc_dataset_item_aarch64_store_result
|
|
|
|
.global DECL(randomx_calc_dataset_item_aarch64_end) |
|
|
|
.global randomx_calc_dataset_item_aarch64_end
|
|
|
|
|
|
|
|
|
|
|
|
#include "configuration.h" |
|
|
|
#include "configuration.h" |
|
|
|
|
|
|
|
|
|
|
@ -107,7 +101,7 @@ |
|
|
|
# v31 -> scale mask = 0x81f000000000000081f0000000000000 |
|
|
|
# v31 -> scale mask = 0x81f000000000000081f0000000000000 |
|
|
|
|
|
|
|
|
|
|
|
.balign 4
|
|
|
|
.balign 4
|
|
|
|
DECL(randomx_program_aarch64): |
|
|
|
randomx_program_aarch64: |
|
|
|
# Save callee-saved registers |
|
|
|
# Save callee-saved registers |
|
|
|
sub sp, sp, 192 |
|
|
|
sub sp, sp, 192 |
|
|
|
stp x16, x17, [sp] |
|
|
|
stp x16, x17, [sp] |
|
|
@ -195,7 +189,7 @@ DECL(randomx_program_aarch64): |
|
|
|
ldr q14, literal_v14 |
|
|
|
ldr q14, literal_v14 |
|
|
|
ldr q15, literal_v15 |
|
|
|
ldr q15, literal_v15 |
|
|
|
|
|
|
|
|
|
|
|
DECL(randomx_program_aarch64_main_loop): |
|
|
|
randomx_program_aarch64_main_loop: |
|
|
|
# spAddr0 = spMix1 & ScratchpadL3Mask64;
|
|
|
|
# spAddr0 = spMix1 & ScratchpadL3Mask64;
|
|
|
|
# spAddr1 = (spMix1 >> 32) & ScratchpadL3Mask64;
|
|
|
|
# spAddr1 = (spMix1 >> 32) & ScratchpadL3Mask64;
|
|
|
|
lsr x18, x10, 32 |
|
|
|
lsr x18, x10, 32 |
|
|
@ -268,7 +262,7 @@ DECL(randomx_program_aarch64_main_loop): |
|
|
|
orr v23.16b, v23.16b, v30.16b |
|
|
|
orr v23.16b, v23.16b, v30.16b |
|
|
|
|
|
|
|
|
|
|
|
# Execute VM instructions |
|
|
|
# Execute VM instructions |
|
|
|
DECL(randomx_program_aarch64_vm_instructions): |
|
|
|
randomx_program_aarch64_vm_instructions: |
|
|
|
|
|
|
|
|
|
|
|
# buffer for generated instructions |
|
|
|
# buffer for generated instructions |
|
|
|
# FDIV_M is the largest instruction taking up to 12 ARMv8 instructions |
|
|
|
# FDIV_M is the largest instruction taking up to 12 ARMv8 instructions |
|
|
@ -287,7 +281,7 @@ literal_x27: .fill 1,8,0 |
|
|
|
literal_x28: .fill 1,8,0 |
|
|
|
literal_x28: .fill 1,8,0 |
|
|
|
literal_x29: .fill 1,8,0 |
|
|
|
literal_x29: .fill 1,8,0 |
|
|
|
literal_x30: .fill 1,8,0 |
|
|
|
literal_x30: .fill 1,8,0 |
|
|
|
DECL(randomx_program_aarch64_imul_rcp_literals_end): |
|
|
|
randomx_program_aarch64_imul_rcp_literals_end: |
|
|
|
|
|
|
|
|
|
|
|
literal_v0: .fill 2,8,0 |
|
|
|
literal_v0: .fill 2,8,0 |
|
|
|
literal_v1: .fill 2,8,0 |
|
|
|
literal_v1: .fill 2,8,0 |
|
|
@ -306,14 +300,14 @@ literal_v13: .fill 2,8,0 |
|
|
|
literal_v14: .fill 2,8,0 |
|
|
|
literal_v14: .fill 2,8,0 |
|
|
|
literal_v15: .fill 2,8,0 |
|
|
|
literal_v15: .fill 2,8,0 |
|
|
|
|
|
|
|
|
|
|
|
DECL(randomx_program_aarch64_vm_instructions_end): |
|
|
|
randomx_program_aarch64_vm_instructions_end: |
|
|
|
|
|
|
|
|
|
|
|
# mx ^= r[readReg2] ^ r[readReg3];
|
|
|
|
# mx ^= r[readReg2] ^ r[readReg3];
|
|
|
|
eor x9, x9, x18 |
|
|
|
eor x9, x9, x18 |
|
|
|
|
|
|
|
|
|
|
|
# Calculate dataset pointer for dataset prefetch |
|
|
|
# Calculate dataset pointer for dataset prefetch |
|
|
|
mov w18, w9 |
|
|
|
mov w18, w9 |
|
|
|
DECL(randomx_program_aarch64_cacheline_align_mask1): |
|
|
|
randomx_program_aarch64_cacheline_align_mask1: |
|
|
|
# Actual mask will be inserted by JIT compiler |
|
|
|
# Actual mask will be inserted by JIT compiler |
|
|
|
and x18, x18, 1 |
|
|
|
and x18, x18, 1 |
|
|
|
add x18, x18, x1 |
|
|
|
add x18, x18, x1 |
|
|
@ -326,12 +320,12 @@ DECL(randomx_program_aarch64_cacheline_align_mask1): |
|
|
|
|
|
|
|
|
|
|
|
# Calculate dataset pointer for dataset read |
|
|
|
# Calculate dataset pointer for dataset read |
|
|
|
mov w10, w9 |
|
|
|
mov w10, w9 |
|
|
|
DECL(randomx_program_aarch64_cacheline_align_mask2): |
|
|
|
randomx_program_aarch64_cacheline_align_mask2: |
|
|
|
# Actual mask will be inserted by JIT compiler |
|
|
|
# Actual mask will be inserted by JIT compiler |
|
|
|
and x10, x10, 1 |
|
|
|
and x10, x10, 1 |
|
|
|
add x10, x10, x1 |
|
|
|
add x10, x10, x1 |
|
|
|
|
|
|
|
|
|
|
|
DECL(randomx_program_aarch64_xor_with_dataset_line): |
|
|
|
randomx_program_aarch64_xor_with_dataset_line: |
|
|
|
# xor integer registers with dataset data |
|
|
|
# xor integer registers with dataset data |
|
|
|
ldp x18, x19, [x10] |
|
|
|
ldp x18, x19, [x10] |
|
|
|
eor x4, x4, x18 |
|
|
|
eor x4, x4, x18 |
|
|
@ -346,7 +340,7 @@ DECL(randomx_program_aarch64_xor_with_dataset_line): |
|
|
|
eor x14, x14, x18 |
|
|
|
eor x14, x14, x18 |
|
|
|
eor x15, x15, x19 |
|
|
|
eor x15, x15, x19 |
|
|
|
|
|
|
|
|
|
|
|
DECL(randomx_program_aarch64_update_spMix1): |
|
|
|
randomx_program_aarch64_update_spMix1: |
|
|
|
# JIT compiler will replace it with "eor x10, config.readReg0, config.readReg1" |
|
|
|
# JIT compiler will replace it with "eor x10, config.readReg0, config.readReg1" |
|
|
|
eor x10, x0, x0 |
|
|
|
eor x10, x0, x0 |
|
|
|
|
|
|
|
|
|
|
@ -367,7 +361,7 @@ DECL(randomx_program_aarch64_update_spMix1): |
|
|
|
stp q18, q19, [x16, 32] |
|
|
|
stp q18, q19, [x16, 32] |
|
|
|
|
|
|
|
|
|
|
|
subs x3, x3, 1 |
|
|
|
subs x3, x3, 1 |
|
|
|
bne DECL(randomx_program_aarch64_main_loop) |
|
|
|
bne randomx_program_aarch64_main_loop |
|
|
|
|
|
|
|
|
|
|
|
# Restore x0 |
|
|
|
# Restore x0 |
|
|
|
ldr x0, [sp], 16 |
|
|
|
ldr x0, [sp], 16 |
|
|
@ -401,7 +395,7 @@ DECL(randomx_program_aarch64_update_spMix1): |
|
|
|
|
|
|
|
|
|
|
|
ret |
|
|
|
ret |
|
|
|
|
|
|
|
|
|
|
|
DECL(randomx_program_aarch64_vm_instructions_end_light): |
|
|
|
randomx_program_aarch64_vm_instructions_end_light: |
|
|
|
sub sp, sp, 96 |
|
|
|
sub sp, sp, 96 |
|
|
|
stp x0, x1, [sp, 64] |
|
|
|
stp x0, x1, [sp, 64] |
|
|
|
stp x2, x30, [sp, 80] |
|
|
|
stp x2, x30, [sp, 80] |
|
|
@ -418,26 +412,26 @@ DECL(randomx_program_aarch64_vm_instructions_end_light): |
|
|
|
# x1 -> pointer to output |
|
|
|
# x1 -> pointer to output |
|
|
|
mov x1, sp |
|
|
|
mov x1, sp |
|
|
|
|
|
|
|
|
|
|
|
DECL(randomx_program_aarch64_light_cacheline_align_mask): |
|
|
|
randomx_program_aarch64_light_cacheline_align_mask: |
|
|
|
# Actual mask will be inserted by JIT compiler |
|
|
|
# Actual mask will be inserted by JIT compiler |
|
|
|
and w2, w9, 1 |
|
|
|
and w2, w9, 1 |
|
|
|
|
|
|
|
|
|
|
|
# x2 -> item number |
|
|
|
# x2 -> item number |
|
|
|
lsr x2, x2, 6 |
|
|
|
lsr x2, x2, 6 |
|
|
|
|
|
|
|
|
|
|
|
DECL(randomx_program_aarch64_light_dataset_offset): |
|
|
|
randomx_program_aarch64_light_dataset_offset: |
|
|
|
# Apply dataset offset (filled in by JIT compiler) |
|
|
|
# Apply dataset offset (filled in by JIT compiler) |
|
|
|
add x2, x2, 0 |
|
|
|
add x2, x2, 0 |
|
|
|
add x2, x2, 0 |
|
|
|
add x2, x2, 0 |
|
|
|
|
|
|
|
|
|
|
|
bl DECL(randomx_calc_dataset_item_aarch64) |
|
|
|
bl randomx_calc_dataset_item_aarch64 |
|
|
|
|
|
|
|
|
|
|
|
mov x10, sp |
|
|
|
mov x10, sp |
|
|
|
ldp x0, x1, [sp, 64] |
|
|
|
ldp x0, x1, [sp, 64] |
|
|
|
ldp x2, x30, [sp, 80] |
|
|
|
ldp x2, x30, [sp, 80] |
|
|
|
add sp, sp, 96 |
|
|
|
add sp, sp, 96 |
|
|
|
|
|
|
|
|
|
|
|
b DECL(randomx_program_aarch64_xor_with_dataset_line) |
|
|
|
b randomx_program_aarch64_xor_with_dataset_line |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@ -448,26 +442,26 @@ DECL(randomx_program_aarch64_light_dataset_offset): |
|
|
|
# x2 -> start item |
|
|
|
# x2 -> start item |
|
|
|
# x3 -> end item |
|
|
|
# x3 -> end item |
|
|
|
|
|
|
|
|
|
|
|
DECL(randomx_init_dataset_aarch64): |
|
|
|
randomx_init_dataset_aarch64: |
|
|
|
# Save x30 (return address) |
|
|
|
# Save x30 (return address) |
|
|
|
str x30, [sp, -16]! |
|
|
|
str x30, [sp, -16]! |
|
|
|
|
|
|
|
|
|
|
|
# Load pointer to cache memory |
|
|
|
# Load pointer to cache memory |
|
|
|
ldr x0, [x0] |
|
|
|
ldr x0, [x0] |
|
|
|
|
|
|
|
|
|
|
|
DECL(randomx_init_dataset_aarch64_main_loop): |
|
|
|
randomx_init_dataset_aarch64_main_loop: |
|
|
|
bl DECL(randomx_calc_dataset_item_aarch64) |
|
|
|
bl randomx_calc_dataset_item_aarch64 |
|
|
|
add x1, x1, 64 |
|
|
|
add x1, x1, 64 |
|
|
|
add x2, x2, 1 |
|
|
|
add x2, x2, 1 |
|
|
|
cmp x2, x3 |
|
|
|
cmp x2, x3 |
|
|
|
bne DECL(randomx_init_dataset_aarch64_main_loop) |
|
|
|
bne randomx_init_dataset_aarch64_main_loop |
|
|
|
|
|
|
|
|
|
|
|
# Restore x30 (return address) |
|
|
|
# Restore x30 (return address) |
|
|
|
ldr x30, [sp], 16 |
|
|
|
ldr x30, [sp], 16 |
|
|
|
|
|
|
|
|
|
|
|
ret |
|
|
|
ret |
|
|
|
|
|
|
|
|
|
|
|
DECL(randomx_init_dataset_aarch64_end): |
|
|
|
randomx_init_dataset_aarch64_end: |
|
|
|
|
|
|
|
|
|
|
|
# Input parameters |
|
|
|
# Input parameters |
|
|
|
# |
|
|
|
# |
|
|
@ -485,7 +479,7 @@ DECL(randomx_init_dataset_aarch64_end): |
|
|
|
# x12 -> temporary |
|
|
|
# x12 -> temporary |
|
|
|
# x13 -> temporary |
|
|
|
# x13 -> temporary |
|
|
|
|
|
|
|
|
|
|
|
DECL(randomx_calc_dataset_item_aarch64): |
|
|
|
randomx_calc_dataset_item_aarch64: |
|
|
|
sub sp, sp, 112 |
|
|
|
sub sp, sp, 112 |
|
|
|
stp x0, x1, [sp] |
|
|
|
stp x0, x1, [sp] |
|
|
|
stp x2, x3, [sp, 16] |
|
|
|
stp x2, x3, [sp, 16] |
|
|
@ -532,7 +526,7 @@ DECL(randomx_calc_dataset_item_aarch64): |
|
|
|
ldr x12, superscalarAdd7 |
|
|
|
ldr x12, superscalarAdd7 |
|
|
|
eor x7, x0, x12 |
|
|
|
eor x7, x0, x12 |
|
|
|
|
|
|
|
|
|
|
|
b DECL(randomx_calc_dataset_item_aarch64_prefetch) |
|
|
|
b randomx_calc_dataset_item_aarch64_prefetch |
|
|
|
|
|
|
|
|
|
|
|
superscalarMul0: .quad 6364136223846793005 |
|
|
|
superscalarMul0: .quad 6364136223846793005 |
|
|
|
superscalarAdd1: .quad 9298411001130361340 |
|
|
|
superscalarAdd1: .quad 9298411001130361340 |
|
|
@ -545,7 +539,7 @@ superscalarAdd7: .quad 9549104520008361294 |
|
|
|
|
|
|
|
|
|
|
|
# Prefetch -> SuperScalar hash -> Mix will be repeated N times |
|
|
|
# Prefetch -> SuperScalar hash -> Mix will be repeated N times |
|
|
|
|
|
|
|
|
|
|
|
DECL(randomx_calc_dataset_item_aarch64_prefetch): |
|
|
|
randomx_calc_dataset_item_aarch64_prefetch: |
|
|
|
# Actual mask will be inserted by JIT compiler |
|
|
|
# Actual mask will be inserted by JIT compiler |
|
|
|
and x11, x10, 1 |
|
|
|
and x11, x10, 1 |
|
|
|
add x11, x8, x11, lsl 6 |
|
|
|
add x11, x8, x11, lsl 6 |
|
|
@ -553,7 +547,7 @@ DECL(randomx_calc_dataset_item_aarch64_prefetch): |
|
|
|
|
|
|
|
|
|
|
|
# Generated SuperScalar hash program goes here |
|
|
|
# Generated SuperScalar hash program goes here |
|
|
|
|
|
|
|
|
|
|
|
DECL(randomx_calc_dataset_item_aarch64_mix): |
|
|
|
randomx_calc_dataset_item_aarch64_mix: |
|
|
|
ldp x12, x13, [x11] |
|
|
|
ldp x12, x13, [x11] |
|
|
|
eor x0, x0, x12 |
|
|
|
eor x0, x0, x12 |
|
|
|
eor x1, x1, x13 |
|
|
|
eor x1, x1, x13 |
|
|
@ -567,7 +561,7 @@ DECL(randomx_calc_dataset_item_aarch64_mix): |
|
|
|
eor x6, x6, x12 |
|
|
|
eor x6, x6, x12 |
|
|
|
eor x7, x7, x13 |
|
|
|
eor x7, x7, x13 |
|
|
|
|
|
|
|
|
|
|
|
DECL(randomx_calc_dataset_item_aarch64_store_result): |
|
|
|
randomx_calc_dataset_item_aarch64_store_result: |
|
|
|
stp x0, x1, [x9] |
|
|
|
stp x0, x1, [x9] |
|
|
|
stp x2, x3, [x9, 16] |
|
|
|
stp x2, x3, [x9, 16] |
|
|
|
stp x4, x5, [x9, 32] |
|
|
|
stp x4, x5, [x9, 32] |
|
|
@ -584,4 +578,4 @@ DECL(randomx_calc_dataset_item_aarch64_store_result): |
|
|
|
|
|
|
|
|
|
|
|
ret |
|
|
|
ret |
|
|
|
|
|
|
|
|
|
|
|
DECL(randomx_calc_dataset_item_aarch64_end): |
|
|
|
randomx_calc_dataset_item_aarch64_end: |
|
|
|