@ -25,32 +25,26 @@
@@ -25,32 +25,26 @@
# OR T O R T ( I N C L U D I N G N E G L I G E N C E O R O T H E R W I S E ) A R I S I N G I N A N Y W A Y O U T O F T H E U S E
# OF T H I S S O F T W A R E , E V E N I F A D V I S E D O F T H E P O S S I B I L I T Y O F S U C H D A M A G E .
# if d e f i n e d ( _ _ A P P L E _ _ )
# define D E C L ( x ) _ ## x
# else
# define D E C L ( x ) x
# endif
.arch armv8 - a
.text
.global DECL( r a n d o m x _ p r o g r a m _ a a r c h64 )
.global DECL( r a n d o m x _ p r o g r a m _ a a r c h64 _ m a i n _ l o o p )
.global DECL( r a n d o m x _ p r o g r a m _ a a r c h64 _ v m _ i n s t r u c t i o n s )
.global DECL( r a n d o m x _ p r o g r a m _ a a r c h64 _ i m u l _ r c p _ l i t e r a l s _ e n d )
.global DECL( r a n d o m x _ p r o g r a m _ a a r c h64 _ v m _ i n s t r u c t i o n s _ e n d )
.global DECL( r a n d o m x _ p r o g r a m _ a a r c h64 _ c a c h e l i n e _ a l i g n _ m a s k 1 )
.global DECL( r a n d o m x _ p r o g r a m _ a a r c h64 _ c a c h e l i n e _ a l i g n _ m a s k 2 )
.global DECL( r a n d o m x _ p r o g r a m _ a a r c h64 _ u p d a t e _ s p M i x1 )
.global DECL( r a n d o m x _ p r o g r a m _ a a r c h64 _ v m _ i n s t r u c t i o n s _ e n d _ l i g h t )
.global DECL( r a n d o m x _ p r o g r a m _ a a r c h64 _ l i g h t _ c a c h e l i n e _ a l i g n _ m a s k )
.global DECL( r a n d o m x _ p r o g r a m _ a a r c h64 _ l i g h t _ d a t a s e t _ o f f s e t )
.global DECL( r a n d o m x _ i n i t _ d a t a s e t _ a a r c h64 )
.global DECL( r a n d o m x _ i n i t _ d a t a s e t _ a a r c h64 _ e n d )
.global DECL( r a n d o m x _ c a l c _ d a t a s e t _ i t e m _ a a r c h64 )
.global DECL( r a n d o m x _ c a l c _ d a t a s e t _ i t e m _ a a r c h64 _ p r e f e t c h )
.global DECL( r a n d o m x _ c a l c _ d a t a s e t _ i t e m _ a a r c h64 _ m i x )
.global DECL( r a n d o m x _ c a l c _ d a t a s e t _ i t e m _ a a r c h64 _ s t o r e _ r e s u l t )
.global DECL( r a n d o m x _ c a l c _ d a t a s e t _ i t e m _ a a r c h64 _ e n d )
.global randomx_program_aarch64
.global randomx_program_aarch64_main_loop
.global randomx_program_aarch64_vm_instructions
.global randomx_program_aarch64_imul_rcp_literals_end
.global randomx_program_aarch64_vm_instructions_end
.global randomx_program_aarch64_cacheline_align_mask1
.global randomx_program_aarch64_cacheline_align_mask2
.global randomx_program_aarch64_update_spMix1
.global randomx_program_aarch64_vm_instructions_end_light
.global randomx_program_aarch64_light_cacheline_align_mask
.global randomx_program_aarch64_light_dataset_offset
.global randomx_init_dataset_aarch64
.global randomx_init_dataset_aarch64_end
.global randomx_calc_dataset_item_aarch64
.global randomx_calc_dataset_item_aarch64_prefetch
.global randomx_calc_dataset_item_aarch64_mix
.global randomx_calc_dataset_item_aarch64_store_result
.global randomx_calc_dataset_item_aarch64_end
# include " c o n f i g u r a t i o n . h "
@ -107,7 +101,7 @@
@@ -107,7 +101,7 @@
# v3 1 - > s c a l e m a s k = 0 x81 f00 0 0 0 0 0 0 0 0 0 0 0 8 1 f00 0 0 0 0 0 0 0 0 0 0 0
.balign 4
DECL( r a n d o m x _ p r o g r a m _ a a r c h64 ) :
ran domx_pr ogra m_aarc h64 :
# Save c a l l e e - s a v e d r e g i s t e r s
sub s p , s p , 1 9 2
stp x16 , x17 , [ s p ]
@ -195,7 +189,7 @@ DECL(randomx_program_aarch64):
@@ -195,7 +189,7 @@ DECL(randomx_program_aarch64):
ldr q14 , l i t e r a l _ v14
ldr q15 , l i t e r a l _ v15
DECL( r a n d o m x _ p r o g r a m _ a a r c h64 _ m a i n _ l o o p ) :
ran domx_pr ogra m_aarc h64_ma in _l oop :
# spAddr0 = s p M i x1 & S c r a t c h p a d L 3 M a s k 6 4 ;
# spAddr1 = ( s p M i x1 > > 3 2 ) & S c r a t c h p a d L 3 M a s k 6 4 ;
lsr x18 , x10 , 3 2
@ -268,7 +262,7 @@ DECL(randomx_program_aarch64_main_loop):
@@ -268,7 +262,7 @@ DECL(randomx_program_aarch64_main_loop):
orr v23 . 1 6 b , v23 . 1 6 b , v30 . 1 6 b
# Execute V M i n s t r u c t i o n s
DECL( r a n d o m x _ p r o g r a m _ a a r c h64 _ v m _ i n s t r u c t i o n s ) :
ran domx_pr ogra m_aarc h64_vm_ins tr uc tions :
# buffer f o r g e n e r a t e d i n s t r u c t i o n s
# FDIV_ M i s t h e l a r g e s t i n s t r u c t i o n t a k i n g u p t o 1 2 A R M v8 i n s t r u c t i o n s
@ -287,7 +281,7 @@ literal_x27: .fill 1,8,0
@@ -287,7 +281,7 @@ literal_x27: .fill 1,8,0
literal_x28 : .fill 1 , 8 , 0
literal_x29 : .fill 1 , 8 , 0
literal_x30 : .fill 1 , 8 , 0
DECL( r a n d o m x _ p r o g r a m _ a a r c h64 _ i m u l _ r c p _ l i t e r a l s _ e n d ) :
ran domx_pr ogra m_aarc h64_imul _rcp _l iterals _en d :
literal_v0 : .fill 2 , 8 , 0
literal_v1 : .fill 2 , 8 , 0
@ -306,14 +300,14 @@ literal_v13: .fill 2,8,0
@@ -306,14 +300,14 @@ literal_v13: .fill 2,8,0
literal_v14 : .fill 2 , 8 , 0
literal_v15 : .fill 2 , 8 , 0
DECL( r a n d o m x _ p r o g r a m _ a a r c h64 _ v m _ i n s t r u c t i o n s _ e n d ) :
ran domx_pr ogra m_aarc h64_vm_ins tr uc tions _en d :
# mx ^ = r [ r e a d R e g 2 ] ^ r [ r e a d R e g 3 ] ;
eor x9 , x9 , x18
# Calculate d a t a s e t p o i n t e r f o r d a t a s e t p r e f e t c h
mov w18 , w9
DECL( r a n d o m x _ p r o g r a m _ a a r c h64 _ c a c h e l i n e _ a l i g n _ m a s k 1 ) :
ran domx_pr ogra m_aarc h64_ca chel ine _al ign _mas k1 :
# Actual m a s k w i l l b e i n s e r t e d b y J I T c o m p i l e r
and x18 , x18 , 1
add x18 , x18 , x1
@ -326,12 +320,12 @@ DECL(randomx_program_aarch64_cacheline_align_mask1):
@@ -326,12 +320,12 @@ DECL(randomx_program_aarch64_cacheline_align_mask1):
# Calculate d a t a s e t p o i n t e r f o r d a t a s e t r e a d
mov w10 , w9
DECL( r a n d o m x _ p r o g r a m _ a a r c h64 _ c a c h e l i n e _ a l i g n _ m a s k 2 ) :
ran domx_pr ogra m_aarc h64_ca chel ine _al ign _mas k2 :
# Actual m a s k w i l l b e i n s e r t e d b y J I T c o m p i l e r
and x10 , x10 , 1
add x10 , x10 , x1
DECL( r a n d o m x _ p r o g r a m _ a a r c h64 _ x o r _ w i t h _ d a t a s e t _ l i n e ) :
ran domx_pr ogra m_aarc h64_xor _with_da tase t_line :
# xor i n t e g e r r e g i s t e r s w i t h d a t a s e t d a t a
ldp x18 , x19 , [ x10 ]
eor x4 , x4 , x18
@ -346,7 +340,7 @@ DECL(randomx_program_aarch64_xor_with_dataset_line):
@@ -346,7 +340,7 @@ DECL(randomx_program_aarch64_xor_with_dataset_line):
eor x14 , x14 , x18
eor x15 , x15 , x19
DECL( r a n d o m x _ p r o g r a m _ a a r c h64 _ u p d a t e _ s p M i x1 ) :
ran domx_pr ogra m_aarc h64_up da te _sp Mix1 :
# JIT c o m p i l e r w i l l r e p l a c e i t w i t h " e o r x10 , c o n f i g . r e a d R e g 0 , c o n f i g . r e a d R e g 1 "
eor x10 , x0 , x0
@ -367,7 +361,7 @@ DECL(randomx_program_aarch64_update_spMix1):
@@ -367,7 +361,7 @@ DECL(randomx_program_aarch64_update_spMix1):
stp q18 , q19 , [ x16 , 3 2 ]
subs x3 , x3 , 1
bne D E C L ( r a n d o m x _ p r o g r a m _ a a r c h64 _ m a i n _ l o o p )
bne r a n d o m x _ p r o g r a m _ a a r c h64 _ m a i n _ l o o p
# Restore x0
ldr x0 , [ s p ] , 1 6
@ -401,7 +395,7 @@ DECL(randomx_program_aarch64_update_spMix1):
@@ -401,7 +395,7 @@ DECL(randomx_program_aarch64_update_spMix1):
ret
DECL( r a n d o m x _ p r o g r a m _ a a r c h64 _ v m _ i n s t r u c t i o n s _ e n d _ l i g h t ) :
ran domx_pr ogra m_aarc h64_vm_ins truc tions _en d_l ight :
sub s p , s p , 9 6
stp x0 , x1 , [ s p , 6 4 ]
stp x2 , x30 , [ s p , 8 0 ]
@ -418,26 +412,26 @@ DECL(randomx_program_aarch64_vm_instructions_end_light):
@@ -418,26 +412,26 @@ DECL(randomx_program_aarch64_vm_instructions_end_light):
# x1 - > p o i n t e r t o o u t p u t
mov x1 , s p
DECL( r a n d o m x _ p r o g r a m _ a a r c h64 _ l i g h t _ c a c h e l i n e _ a l i g n _ m a s k ) :
ran domx_pr ogra m_aarc h64_l ight_ca chel ine _al ign _mas k :
# Actual m a s k w i l l b e i n s e r t e d b y J I T c o m p i l e r
and w2 , w9 , 1
# x2 - > i t e m n u m b e r
lsr x2 , x2 , 6
DECL( r a n d o m x _ p r o g r a m _ a a r c h64 _ l i g h t _ d a t a s e t _ o f f s e t ) :
ran domx_pr ogra m_aarc h64_l ight_da tase t_offse t :
# Apply d a t a s e t o f f s e t ( f i l l e d i n b y J I T c o m p i l e r )
add x2 , x2 , 0
add x2 , x2 , 0
bl D E C L ( r a n d o m x _ c a l c _ d a t a s e t _ i t e m _ a a r c h64 )
bl r a n d o m x _ c a l c _ d a t a s e t _ i t e m _ a a r c h64
mov x10 , s p
ldp x0 , x1 , [ s p , 6 4 ]
ldp x2 , x30 , [ s p , 8 0 ]
add s p , s p , 9 6
b D E C L ( r a n d o m x _ p r o g r a m _ a a r c h64 _ x o r _ w i t h _ d a t a s e t _ l i n e )
b r a n d o m x _ p r o g r a m _ a a r c h64 _ x o r _ w i t h _ d a t a s e t _ l i n e
@ -448,26 +442,26 @@ DECL(randomx_program_aarch64_light_dataset_offset):
@@ -448,26 +442,26 @@ DECL(randomx_program_aarch64_light_dataset_offset):
# x2 - > s t a r t i t e m
# x3 - > e n d i t e m
DECL( r a n d o m x _ i n i t _ d a t a s e t _ a a r c h64 ) :
ran domx_in it_da tase t_aarc h64 :
# Save x30 ( r e t u r n a d d r e s s )
str x30 , [ s p , - 1 6 ] !
# Load p o i n t e r t o c a c h e m e m o r y
ldr x0 , [ x0 ]
DECL( r a n d o m x _ i n i t _ d a t a s e t _ a a r c h64 _ m a i n _ l o o p ) :
bl D E C L ( r a n d o m x _ c a l c _ d a t a s e t _ i t e m _ a a r c h64 )
ran domx_in it_da tase t_aarc h64_main _l oop :
bl r a n d o m x _ c a l c _ d a t a s e t _ i t e m _ a a r c h64
add x1 , x1 , 6 4
add x2 , x2 , 1
cmp x2 , x3
bne D E C L ( r a n d o m x _ i n i t _ d a t a s e t _ a a r c h64 _ m a i n _ l o o p )
bne r a n d o m x _ i n i t _ d a t a s e t _ a a r c h64 _ m a i n _ l o o p
# Restore x30 ( r e t u r n a d d r e s s )
ldr x30 , [ s p ] , 1 6
ret
DECL( r a n d o m x _ i n i t _ d a t a s e t _ a a r c h64 _ e n d ) :
ran domx_in it_da tase t_aarc h64_en d :
# Input p a r a m e t e r s
#
@ -485,7 +479,7 @@ DECL(randomx_init_dataset_aarch64_end):
@@ -485,7 +479,7 @@ DECL(randomx_init_dataset_aarch64_end):
# x1 2 - > t e m p o r a r y
# x1 3 - > t e m p o r a r y
DECL( r a n d o m x _ c a l c _ d a t a s e t _ i t e m _ a a r c h64 ) :
ran domx_calc _da tase t_item_aarc h64 :
sub s p , s p , 1 1 2
stp x0 , x1 , [ s p ]
stp x2 , x3 , [ s p , 1 6 ]
@ -532,7 +526,7 @@ DECL(randomx_calc_dataset_item_aarch64):
@@ -532,7 +526,7 @@ DECL(randomx_calc_dataset_item_aarch64):
ldr x12 , s u p e r s c a l a r A d d7
eor x7 , x0 , x12
b D E C L ( r a n d o m x _ c a l c _ d a t a s e t _ i t e m _ a a r c h64 _ p r e f e t c h )
b r a n d o m x _ c a l c _ d a t a s e t _ i t e m _ a a r c h64 _ p r e f e t c h
superscalarMul0 : .quad 6364136223846793005
superscalarAdd1 : .quad 9298411001130361340
@ -545,7 +539,7 @@ superscalarAdd7: .quad 9549104520008361294
@@ -545,7 +539,7 @@ superscalarAdd7: .quad 9549104520008361294
# Prefetch - > S u p e r S c a l a r h a s h - > M i x w i l l b e r e p e a t e d N t i m e s
DECL( r a n d o m x _ c a l c _ d a t a s e t _ i t e m _ a a r c h64 _ p r e f e t c h ) :
ran domx_calc _da tase t_ite m_aarc h64_pre fe tc h :
# Actual m a s k w i l l b e i n s e r t e d b y J I T c o m p i l e r
and x11 , x10 , 1
add x11 , x8 , x11 , l s l 6
@ -553,7 +547,7 @@ DECL(randomx_calc_dataset_item_aarch64_prefetch):
@@ -553,7 +547,7 @@ DECL(randomx_calc_dataset_item_aarch64_prefetch):
# Generated S u p e r S c a l a r h a s h p r o g r a m g o e s h e r e
DECL( r a n d o m x _ c a l c _ d a t a s e t _ i t e m _ a a r c h64 _ m i x ) :
ran domx_calc _da tase t_ite m_aarc h64_mix :
ldp x12 , x13 , [ x11 ]
eor x0 , x0 , x12
eor x1 , x1 , x13
@ -567,7 +561,7 @@ DECL(randomx_calc_dataset_item_aarch64_mix):
@@ -567,7 +561,7 @@ DECL(randomx_calc_dataset_item_aarch64_mix):
eor x6 , x6 , x12
eor x7 , x7 , x13
DECL( r a n d o m x _ c a l c _ d a t a s e t _ i t e m _ a a r c h64 _ s t o r e _ r e s u l t ) :
ran domx_calc _da tase t_ite m_aarc h64_s tore _res ul t :
stp x0 , x1 , [ x9 ]
stp x2 , x3 , [ x9 , 1 6 ]
stp x4 , x5 , [ x9 , 3 2 ]
@ -584,4 +578,4 @@ DECL(randomx_calc_dataset_item_aarch64_store_result):
@@ -584,4 +578,4 @@ DECL(randomx_calc_dataset_item_aarch64_store_result):
ret
DECL( r a n d o m x _ c a l c _ d a t a s e t _ i t e m _ a a r c h64 _ e n d ) :
ran domx_calc _da tase t_ite m_aarc h64_en d :