@ -34,6 +34,16 @@
@@ -34,6 +34,16 @@
# include <string.h>
# include <openssl/sha.h>
# if defined(USE_SSE2) && !defined(USE_SSE2_ALWAYS)
# ifdef _MSC_VER
// MSVC 64bit is unable to use inline asm
# include <intrin.h>
# else
// GCC Linux or i686-w64-mingw32
# include <cpuid.h>
# endif
# endif
static inline uint32_t be32dec ( const void * pp )
{
const uint8_t * p = ( uint8_t const * ) pp ;
@ -202,26 +212,26 @@ static inline void xor_salsa8(uint32_t B[16], const uint32_t Bx[16])
@@ -202,26 +212,26 @@ static inline void xor_salsa8(uint32_t B[16], const uint32_t Bx[16])
/* Operate on columns. */
x04 ^ = ROTL ( x00 + x12 , 7 ) ; x09 ^ = ROTL ( x05 + x01 , 7 ) ;
x14 ^ = ROTL ( x10 + x06 , 7 ) ; x03 ^ = ROTL ( x15 + x11 , 7 ) ;
x08 ^ = ROTL ( x04 + x00 , 9 ) ; x13 ^ = ROTL ( x09 + x05 , 9 ) ;
x02 ^ = ROTL ( x14 + x10 , 9 ) ; x07 ^ = ROTL ( x03 + x15 , 9 ) ;
x12 ^ = ROTL ( x08 + x04 , 13 ) ; x01 ^ = ROTL ( x13 + x09 , 13 ) ;
x06 ^ = ROTL ( x02 + x14 , 13 ) ; x11 ^ = ROTL ( x07 + x03 , 13 ) ;
x00 ^ = ROTL ( x12 + x08 , 18 ) ; x05 ^ = ROTL ( x01 + x13 , 18 ) ;
x10 ^ = ROTL ( x06 + x02 , 18 ) ; x15 ^ = ROTL ( x11 + x07 , 18 ) ;
/* Operate on rows. */
x01 ^ = ROTL ( x00 + x03 , 7 ) ; x06 ^ = ROTL ( x05 + x04 , 7 ) ;
x11 ^ = ROTL ( x10 + x09 , 7 ) ; x12 ^ = ROTL ( x15 + x14 , 7 ) ;
x02 ^ = ROTL ( x01 + x00 , 9 ) ; x07 ^ = ROTL ( x06 + x05 , 9 ) ;
x08 ^ = ROTL ( x11 + x10 , 9 ) ; x13 ^ = ROTL ( x12 + x15 , 9 ) ;
x03 ^ = ROTL ( x02 + x01 , 13 ) ; x04 ^ = ROTL ( x07 + x06 , 13 ) ;
x09 ^ = ROTL ( x08 + x11 , 13 ) ; x14 ^ = ROTL ( x13 + x12 , 13 ) ;
x00 ^ = ROTL ( x03 + x02 , 18 ) ; x05 ^ = ROTL ( x04 + x07 , 18 ) ;
x10 ^ = ROTL ( x09 + x08 , 18 ) ; x15 ^ = ROTL ( x14 + x13 , 18 ) ;
}
@ -251,7 +261,7 @@ void scrypt_1024_1_1_256_sp_generic(const char *input, char *output, char *scrat
@@ -251,7 +261,7 @@ void scrypt_1024_1_1_256_sp_generic(const char *input, char *output, char *scrat
uint32_t i , j , k ;
V = ( uint32_t * ) ( ( ( uintptr_t ) ( scratchpad ) + 63 ) & ~ ( uintptr_t ) ( 63 ) ) ;
PBKDF2_SHA256 ( ( const uint8_t * ) input , 80 , ( const uint8_t * ) input , 80 , 1 , B , 128 ) ;
for ( k = 0 ; k < 32 ; k + + )
@ -277,47 +287,43 @@ void scrypt_1024_1_1_256_sp_generic(const char *input, char *output, char *scrat
@@ -277,47 +287,43 @@ void scrypt_1024_1_1_256_sp_generic(const char *input, char *output, char *scrat
}
# if defined(USE_SSE2)
# if defined(_M_X64) || defined(__x86_64__) || defined(_M_AMD64) || (defined(MAC_OSX) && defined(__i386__))
/* Always SSE2 */
void scrypt_detect_sse2 ( unsigned int cpuid_edx )
// By default, set to generic scrypt function. This will prevent crash in case when scrypt_detect_sse2() wasn't called
void ( * scrypt_1024_1_1_256_sp_detected ) ( const char * input , char * output , char * scratchpad ) = & scrypt_1024_1_1_256_sp_generic ;
void scrypt_detect_sse2 ( )
{
# if defined(USE_SSE2_ALWAYS)
printf ( " scrypt: using scrypt-sse2 as built. \n " ) ;
}
# else
/* Detect SSE2 */
void ( * scrypt_1024_1_1_256_sp ) ( const char * input , char * output , char * scratchpad ) ;
# else // USE_SSE2_ALWAYS
// 32bit x86 Linux or Windows, detect cpuid features
unsigned int cpuid_edx = 0 ;
# if defined(_MSC_VER)
// MSVC
int x86cpuid [ 4 ] ;
__cpuid ( x86cpuid , 1 ) ;
cpuid_edx = ( unsigned int ) buffer [ 3 ] ;
# else // _MSC_VER
// Linux or i686-w64-mingw32 (gcc-4.6.3)
unsigned int eax , ebx , ecx ;
__get_cpuid ( 1 , & eax , & ebx , & ecx , & cpuid_edx ) ;
# endif // _MSC_VER
void scrypt_detect_sse2 ( unsigned int cpuid_edx )
{
if ( cpuid_edx & 1 < < 26 )
{
scrypt_1024_1_1_256_sp = & scrypt_1024_1_1_256_sp_sse2 ;
scrypt_1024_1_1_256_sp_detected = & scrypt_1024_1_1_256_sp_sse2 ;
printf ( " scrypt: using scrypt-sse2 as detected. \n " ) ;
}
else
{
scrypt_1024_1_1_256_sp = & scrypt_1024_1_1_256_sp_generic ;
scrypt_1024_1_1_256_sp_detected = & scrypt_1024_1_1_256_sp_generic ;
printf ( " scrypt: using scrypt-generic, SSE2 unavailable. \n " ) ;
}
# endif // USE_SSE2_ALWAYS
}
# endif
# endif
void scrypt_1024_1_1_256 ( const char * input , char * output )
{
char scratchpad [ SCRYPT_SCRATCHPAD_SIZE ] ;
# if defined(USE_SSE2)
// Detection would work, but in cases where we KNOW it always has SSE2,
// it is faster to use directly than to use a function pointer or conditional.
# if defined(_M_X64) || defined(__x86_64__) || defined(_M_AMD64) || (defined(MAC_OSX) && defined(__i386__))
// Always SSE2: x86_64 or Intel MacOS X
scrypt_1024_1_1_256_sp_sse2 ( input , output , scratchpad ) ;
# else
// Detect SSE2: 32bit x86 Linux or Windows
scrypt_1024_1_1_256_sp ( input , output , scratchpad ) ;
# endif
# else
// Generic scrypt
scrypt_1024_1_1_256_sp_generic ( input , output , scratchpad ) ;
# endif
scrypt_1024_1_1_256_sp ( input , output , scratchpad ) ;
}