diff --git a/src/key.cpp b/src/key.cpp index acf62360a..36613342b 100644 --- a/src/key.cpp +++ b/src/key.cpp @@ -82,7 +82,7 @@ bool CKey::Sign(const uint256 &hash, std::vector& vchSig, uint32_ prng.Generate((unsigned char*)&nonce, 32); nonce += test_case; int nSigLen = 72; - int ret = secp256k1_ecdsa_sign((const unsigned char*)&hash, 32, (unsigned char*)&vchSig[0], &nSigLen, begin(), (unsigned char*)&nonce); + int ret = secp256k1_ecdsa_sign((const unsigned char*)&hash, (unsigned char*)&vchSig[0], &nSigLen, begin(), (unsigned char*)&nonce); nonce = 0; if (ret) { vchSig.resize(nSigLen); @@ -114,7 +114,7 @@ bool CKey::SignCompact(const uint256 &hash, std::vector& vchSig) do { uint256 nonce; prng.Generate((unsigned char*)&nonce, 32); - int ret = secp256k1_ecdsa_sign_compact((const unsigned char*)&hash, 32, &vchSig[1], begin(), (unsigned char*)&nonce, &rec); + int ret = secp256k1_ecdsa_sign_compact((const unsigned char*)&hash, &vchSig[1], begin(), (unsigned char*)&nonce, &rec); nonce = 0; if (ret) break; diff --git a/src/pubkey.cpp b/src/pubkey.cpp index 91979ff4d..80bbac920 100644 --- a/src/pubkey.cpp +++ b/src/pubkey.cpp @@ -16,7 +16,7 @@ bool CPubKey::Verify(const uint256 &hash, const std::vector& vchS if (!IsValid()) return false; #ifdef USE_SECP256K1 - if (secp256k1_ecdsa_verify((const unsigned char*)&hash, 32, &vchSig[0], vchSig.size(), begin(), size()) != 1) + if (secp256k1_ecdsa_verify((const unsigned char*)&hash, &vchSig[0], vchSig.size(), begin(), size()) != 1) return false; #else CECKey key; @@ -35,7 +35,7 @@ bool CPubKey::RecoverCompact(const uint256 &hash, const std::vector]],[[ + uint64_t a = 11, tmp; + __asm__ __volatile__("movq $0x100000000,%1; mulq %%rsi" : "+a"(a) : "S"(tmp) : "cc", "%rdx"); + ]])],[has_64bit_asm=yes],[has_64bit_asm=no]) +AC_MSG_RESULT([$has_64bit_asm]) +if test x"$set_field" == x"64bit_asm"; then + if test x"$has_64bit_asm" == x"no"; then + AC_MSG_ERROR([$set_field field support explicitly requested but no x86_64 assembly available]) fi fi ]) @@ -52,8 +30,13 @@ AC_DEFUN([SECP_OPENSSL_CHECK],[ if test x"$use_pkgconfig" = x"yes"; then : #NOP m4_ifdef([PKG_CHECK_MODULES],[ - PKG_CHECK_MODULES([CRYPTO], [libcrypto], [has_libcrypto=yes; AC_DEFINE(HAVE_LIBCRYPTO,1,[Define this symbol if libcrypto is installed])],[has_libcrypto=no]) - : #NOP + PKG_CHECK_MODULES([CRYPTO], [libcrypto], [has_libcrypto=yes],[has_libcrypto=no]) + if test x"$has_libcrypto" = x"yes"; then + TEMP_LIBS="$LIBS" + LIBS="$LIBS $CRYPTO_LIBS" + AC_CHECK_LIB(crypto, main,[AC_DEFINE(HAVE_LIBCRYPTO,1,[Define this symbol if libcrypto is installed])],[has_libcrypto=no]) + LIBS="$TEMP_LIBS" + fi ]) else AC_CHECK_HEADER(openssl/crypto.h,[AC_CHECK_LIB(crypto, main,[has_libcrypto=yes; CRYPTO_LIBS=-lcrypto; AC_DEFINE(HAVE_LIBCRYPTO,1,[Define this symbol if libcrypto is installed])] diff --git a/src/secp256k1/configure.ac b/src/secp256k1/configure.ac index 6e6fccd7f..40e121e80 100644 --- a/src/secp256k1/configure.ac +++ b/src/secp256k1/configure.ac @@ -18,6 +18,10 @@ AC_PATH_TOOL(AR, ar) AC_PATH_TOOL(RANLIB, ranlib) AC_PATH_TOOL(STRIP, strip) +if test "x$CFLAGS" = "x"; then + CFLAGS="-O3 -g" +fi + AC_PROG_CC_C99 if test x"$ac_cv_prog_cc_c99" == x"no"; then AC_MSG_ERROR([c99 compiler support required]) @@ -103,7 +107,11 @@ AC_ARG_WITH([scalar], [AS_HELP_STRING([--with-scalar=64bit|32bit|auto], AC_CHECK_TYPES([__int128]) -AC_CHECK_DECL(__builtin_expect,AC_DEFINE(HAVE_BUILTIN_EXPECT,1,[Define this symbol if __builtin_expect is available]),,) +AC_MSG_CHECKING([for __builtin_expect]) +AC_COMPILE_IFELSE([AC_LANG_SOURCE([[void myfunc() {__builtin_expect(0,0);}]])], + [ AC_MSG_RESULT([yes]);AC_DEFINE(HAVE_BUILTIN_EXPECT,1,[Define this symbol if __builtin_expect is available]) ], + [ AC_MSG_RESULT([no]) + ]) if test x"$req_field" = x"auto"; then SECP_64BIT_ASM_CHECK @@ -283,7 +291,6 @@ AC_SUBST(SECP_INCLUDES) AC_SUBST(SECP_LIBS) AC_SUBST(SECP_TEST_LIBS) AC_SUBST(SECP_TEST_INCLUDES) -AC_SUBST(YASM_BINFMT) AM_CONDITIONAL([USE_ASM], [test x"$set_field" == x"64bit_asm"]) AM_CONDITIONAL([USE_TESTS], [test x"$use_tests" != x"no"]) AM_CONDITIONAL([USE_BENCHMARK], [test x"$use_benchmark" != x"no"]) diff --git a/src/secp256k1/include/secp256k1.h b/src/secp256k1/include/secp256k1.h index 94a6ef483..dca7ca00e 100644 --- a/src/secp256k1/include/secp256k1.h +++ b/src/secp256k1/include/secp256k1.h @@ -62,8 +62,7 @@ void secp256k1_stop(void); * 0: incorrect signature * -1: invalid public key * -2: invalid signature - * In: msg: the message being verified (cannot be NULL) - * msglen: the length of the message (at most 32) + * In: msg32: the 32-byte message hash being verified (cannot be NULL) * sig: the signature being verified (cannot be NULL) * siglen: the length of the signature * pubkey: the public key to verify with (cannot be NULL) @@ -71,19 +70,17 @@ void secp256k1_stop(void); * Requires starting using SECP256K1_START_VERIFY. */ SECP256K1_WARN_UNUSED_RESULT int secp256k1_ecdsa_verify( - const unsigned char *msg, - int msglen, + const unsigned char *msg32, const unsigned char *sig, int siglen, const unsigned char *pubkey, int pubkeylen -) SECP256K1_ARG_NONNULL(1) SECP256K1_ARG_NONNULL(3) SECP256K1_ARG_NONNULL(5); +) SECP256K1_ARG_NONNULL(1) SECP256K1_ARG_NONNULL(2) SECP256K1_ARG_NONNULL(4); /** Create an ECDSA signature. * Returns: 1: signature created * 0: nonce invalid, try another one - * In: msg: the message being signed (cannot be NULL) - * msglen: the length of the message being signed (at most 32) + * In: msg32: the 32-byte message hash being signed (cannot be NULL) * seckey: pointer to a 32-byte secret key (cannot be NULL, assumed to be valid) * nonce: pointer to a 32-byte nonce (cannot be NULL, generated with a cryptographic PRNG) * Out: sig: pointer to an array where the signature will be placed (cannot be NULL) @@ -92,19 +89,17 @@ SECP256K1_WARN_UNUSED_RESULT int secp256k1_ecdsa_verify( * Requires starting using SECP256K1_START_SIGN. */ SECP256K1_WARN_UNUSED_RESULT int secp256k1_ecdsa_sign( - const unsigned char *msg, - int msglen, + const unsigned char *msg32, unsigned char *sig, int *siglen, const unsigned char *seckey, const unsigned char *nonce -) SECP256K1_ARG_NONNULL(1) SECP256K1_ARG_NONNULL(3) SECP256K1_ARG_NONNULL(4) SECP256K1_ARG_NONNULL(5) SECP256K1_ARG_NONNULL(6); +) SECP256K1_ARG_NONNULL(1) SECP256K1_ARG_NONNULL(2) SECP256K1_ARG_NONNULL(3) SECP256K1_ARG_NONNULL(4) SECP256K1_ARG_NONNULL(5); /** Create a compact ECDSA signature (64 byte + recovery id). * Returns: 1: signature created * 0: nonce invalid, try another one - * In: msg: the message being signed (cannot be NULL) - * msglen: the length of the message being signed (at most 32) + * In: msg32: the 32-byte message hash being signed (cannot be NULL) * seckey: pointer to a 32-byte secret key (cannot be NULL, assumed to be valid) * nonce: pointer to a 32-byte nonce (cannot be NULL, generated with a cryptographic PRNG) * Out: sig: pointer to a 64-byte array where the signature will be placed (cannot be NULL) @@ -112,19 +107,17 @@ SECP256K1_WARN_UNUSED_RESULT int secp256k1_ecdsa_sign( * Requires starting using SECP256K1_START_SIGN. */ SECP256K1_WARN_UNUSED_RESULT int secp256k1_ecdsa_sign_compact( - const unsigned char *msg, - int msglen, + const unsigned char *msg32, unsigned char *sig64, const unsigned char *seckey, const unsigned char *nonce, int *recid -) SECP256K1_ARG_NONNULL(1) SECP256K1_ARG_NONNULL(3) SECP256K1_ARG_NONNULL(4) SECP256K1_ARG_NONNULL(5); +) SECP256K1_ARG_NONNULL(1) SECP256K1_ARG_NONNULL(2) SECP256K1_ARG_NONNULL(3) SECP256K1_ARG_NONNULL(4); /** Recover an ECDSA public key from a compact signature. * Returns: 1: public key successfully recovered (which guarantees a correct signature). * 0: otherwise. - * In: msg: the message assumed to be signed (cannot be NULL) - * msglen: the length of the message (at most 32) + * In: msg32: the 32-byte message hash assumed to be signed (cannot be NULL) * sig64: signature as 64 byte array (cannot be NULL) * compressed: whether to recover a compressed or uncompressed pubkey * recid: the recovery id (0-3, as returned by ecdsa_sign_compact) @@ -133,14 +126,13 @@ SECP256K1_WARN_UNUSED_RESULT int secp256k1_ecdsa_sign_compact( * Requires starting using SECP256K1_START_VERIFY. */ SECP256K1_WARN_UNUSED_RESULT int secp256k1_ecdsa_recover_compact( - const unsigned char *msg, - int msglen, + const unsigned char *msg32, const unsigned char *sig64, unsigned char *pubkey, int *pubkeylen, int compressed, int recid -) SECP256K1_ARG_NONNULL(1) SECP256K1_ARG_NONNULL(3) SECP256K1_ARG_NONNULL(4) SECP256K1_ARG_NONNULL(5); +) SECP256K1_ARG_NONNULL(1) SECP256K1_ARG_NONNULL(2) SECP256K1_ARG_NONNULL(3) SECP256K1_ARG_NONNULL(4); /** Verify an ECDSA secret key. * Returns: 1: secret key is valid diff --git a/src/secp256k1/nasm_lt.sh b/src/secp256k1/nasm_lt.sh deleted file mode 100755 index 6cd73294c..000000000 --- a/src/secp256k1/nasm_lt.sh +++ /dev/null @@ -1,57 +0,0 @@ -#! /bin/sh -command="" -infile="" -o_opt=no -pic=no -while [ $# -gt 0 ]; do - case "$1" in - -DPIC|-fPIC|-fpic|-Kpic|-KPIC) - if [ "$pic" != "yes" ] ; then - command="$command -DPIC" - pic=yes - fi - ;; - -f|-fbin|-faout|-faoutb|-fcoff|-felf|-felf64|-fas86| \ - -fobj|-fwin32|-fwin64|-frdf|-fieee|-fmacho|-fmacho64) - # it's a file format specifier for nasm. - command="$command $1" - ;; - -f*) - # maybe a code-generation flag for gcc. - ;; - -[Ii]*) - incdir=`echo "$1" | sed 's/^-[Ii]//'` - if [ "x$incdir" = x -a "x$2" != x ] ; then - case "$2" in - -*) ;; - *) incdir="$2"; shift;; - esac - fi - if [ "x$incdir" != x ] ; then - # In the case of NASM, the trailing slash is necessary. - incdir=`echo "$incdir" | sed 's%/*$%/%'` - command="$command -I$incdir" - fi - ;; - -o*) - o_opt=yes - command="$command $1" - ;; - *.asm) - infile=$1 - command="$command $1" - ;; - *) - command="$command $1" - ;; - esac - shift -done -if [ "$o_opt" != yes ] ; then - # By default, NASM creates an output file - # in the same directory as the input file. - outfile="-o `echo $infile | sed -e 's%^.*/%%' -e 's%\.[^.]*$%%'`.o" - command="$command $outfile" -fi -echo $command -exec $command diff --git a/src/secp256k1/src/bench.h b/src/secp256k1/src/bench.h new file mode 100644 index 000000000..668ec39f7 --- /dev/null +++ b/src/secp256k1/src/bench.h @@ -0,0 +1,37 @@ +/********************************************************************** + * Copyright (c) 2014 Pieter Wuille * + * Distributed under the MIT software license, see the accompanying * + * file COPYING or http://www.opensource.org/licenses/mit-license.php.* + **********************************************************************/ + +#ifndef _SECP256K1_BENCH_H_ +#define _SECP256K1_BENCH_H_ + +#include +#include +#include "sys/time.h" + +static double gettimedouble(void) { + struct timeval tv; + gettimeofday(&tv, NULL); + return tv.tv_usec * 0.000001 + tv.tv_sec; +} + +void run_benchmark(void (*benchmark)(void*), void (*setup)(void*), void (*teardown)(void*), void* data, int count, int iter) { + double min = HUGE_VAL; + double sum = 0.0; + double max = 0.0; + for (int i = 0; i < count; i++) { + if (setup) setup(data); + double begin = gettimedouble(); + benchmark(data); + double total = gettimedouble() - begin; + if (teardown) teardown(data); + if (total < min) min = total; + if (total > max) max = total; + sum += total; + } + printf("min %.3fus / avg %.3fus / max %.3fus\n", min * 1000000.0 / iter, (sum / count) * 1000000.0 / iter, max * 1000000.0 / iter); +} + +#endif diff --git a/src/secp256k1/src/bench_inv.c b/src/secp256k1/src/bench_inv.c index d6f664333..3bdedea30 100644 --- a/src/secp256k1/src/bench_inv.c +++ b/src/secp256k1/src/bench_inv.c @@ -12,30 +12,41 @@ #include "field_impl.h" #include "group_impl.h" #include "scalar_impl.h" +#include "bench.h" + +typedef struct { + secp256k1_scalar_t base, x; +} bench_inv_t; + +void bench_inv_setup(void* arg) { + bench_inv_t *data = (bench_inv_t*)arg; -int main(void) { static const unsigned char init[32] = { 0x02, 0x03, 0x05, 0x07, 0x0b, 0x0d, 0x11, 0x13, 0x17, 0x1d, 0x1f, 0x25, 0x29, 0x2b, 0x2f, 0x35, 0x3b, 0x3d, 0x43, 0x47, 0x49, 0x4f, 0x53, 0x59, 0x61, 0x65, 0x67, 0x6b, 0x6d, 0x71, 0x7f, 0x83 }; - static const unsigned char fini[32] = { - 0xba, 0x28, 0x58, 0xd8, 0xaa, 0x11, 0xd6, 0xf2, - 0xfa, 0xce, 0x50, 0xb1, 0x67, 0x19, 0xb1, 0xa6, - 0xe0, 0xaa, 0x84, 0x53, 0xf6, 0x80, 0xfc, 0x23, - 0x88, 0x3c, 0xd6, 0x74, 0x9f, 0x27, 0x09, 0x03 - }; - secp256k1_ge_start(); - secp256k1_scalar_t base, x; - secp256k1_scalar_set_b32(&base, init, NULL); - secp256k1_scalar_set_b32(&x, init, NULL); - for (int i=0; i<1000000; i++) { - secp256k1_scalar_inverse(&x, &x); - secp256k1_scalar_add(&x, &x, &base); + + secp256k1_scalar_set_b32(&data->base, init, NULL); + secp256k1_scalar_set_b32(&data->x, init, NULL); +} + +void bench_inv(void* arg) { + bench_inv_t *data = (bench_inv_t*)arg; + + for (int i=0; i<20000; i++) { + secp256k1_scalar_inverse(&data->x, &data->x); + secp256k1_scalar_add(&data->x, &data->x, &data->base); } - unsigned char res[32]; - secp256k1_scalar_get_b32(res, &x); - CHECK(memcmp(res, fini, 32) == 0); +} + +int main(void) { + secp256k1_ge_start(); + + bench_inv_t data; + run_benchmark(bench_inv, bench_inv_setup, NULL, &data, 10, 20000); + + secp256k1_ge_stop(); return 0; } diff --git a/src/secp256k1/src/bench_recover.c b/src/secp256k1/src/bench_recover.c new file mode 100644 index 000000000..b1e0f33ef --- /dev/null +++ b/src/secp256k1/src/bench_recover.c @@ -0,0 +1,46 @@ +/********************************************************************** + * Copyright (c) 2014 Pieter Wuille * + * Distributed under the MIT software license, see the accompanying * + * file COPYING or http://www.opensource.org/licenses/mit-license.php.* + **********************************************************************/ + +#include "include/secp256k1.h" +#include "util.h" +#include "bench.h" + +typedef struct { + unsigned char msg[32]; + unsigned char sig[64]; +} bench_recover_t; + +void bench_recover(void* arg) { + bench_recover_t *data = (bench_recover_t*)arg; + + unsigned char pubkey[33]; + for (int i=0; i<20000; i++) { + int pubkeylen = 33; + CHECK(secp256k1_ecdsa_recover_compact(data->msg, data->sig, pubkey, &pubkeylen, 1, i % 2)); + for (int j = 0; j < 32; j++) { + data->sig[j + 32] = data->msg[j]; /* Move former message to S. */ + data->msg[j] = data->sig[j]; /* Move former R to message. */ + data->sig[j] = pubkey[j + 1]; /* Move recovered pubkey X coordinate to R (which must be a valid X coordinate). */ + } + } +} + +void bench_recover_setup(void* arg) { + bench_recover_t *data = (bench_recover_t*)arg; + + for (int i = 0; i < 32; i++) data->msg[i] = 1 + i; + for (int i = 0; i < 64; i++) data->sig[i] = 65 + i; +} + +int main(void) { + secp256k1_start(SECP256K1_START_VERIFY); + + bench_recover_t data; + run_benchmark(bench_recover, bench_recover_setup, NULL, &data, 10, 20000); + + secp256k1_stop(); + return 0; +} diff --git a/src/secp256k1/src/bench_sign.c b/src/secp256k1/src/bench_sign.c index f01f11d68..66e71e1ac 100644 --- a/src/secp256k1/src/bench_sign.c +++ b/src/secp256k1/src/bench_sign.c @@ -3,46 +3,45 @@ * Distributed under the MIT software license, see the accompanying * * file COPYING or http://www.opensource.org/licenses/mit-license.php.* **********************************************************************/ -#include -#include #include "include/secp256k1.h" #include "util.h" +#include "bench.h" -int main(void) { - secp256k1_start(SECP256K1_START_SIGN); - +typedef struct { unsigned char msg[32]; unsigned char nonce[32]; unsigned char key[32]; +} bench_sign_t; - for (int i = 0; i < 32; i++) msg[i] = i + 1; - for (int i = 0; i < 32; i++) nonce[i] = i + 33; - for (int i = 0; i < 32; i++) key[i] = i + 65; +static void bench_sign_setup(void* arg) { + bench_sign_t *data = (bench_sign_t*)arg; - unsigned char sig[64]; + for (int i = 0; i < 32; i++) data->msg[i] = i + 1; + for (int i = 0; i < 32; i++) data->nonce[i] = i + 33; + for (int i = 0; i < 32; i++) data->key[i] = i + 65; +} + +static void bench_sign(void* arg) { + bench_sign_t *data = (bench_sign_t*)arg; - for (int i=0; i<1000000; i++) { + unsigned char sig[64]; + for (int i=0; i<20000; i++) { int recid = 0; - CHECK(secp256k1_ecdsa_sign_compact(msg, 32, sig, key, nonce, &recid)); + CHECK(secp256k1_ecdsa_sign_compact(data->msg, sig, data->key, data->nonce, &recid)); for (int j = 0; j < 32; j++) { - nonce[j] = key[j]; /* Move former key to nonce */ - msg[j] = sig[j]; /* Move former R to message. */ - key[j] = sig[j + 32]; /* Move former S to key. */ + data->nonce[j] = data->key[j]; /* Move former key to nonce */ + data->msg[j] = sig[j]; /* Move former R to message. */ + data->key[j] = sig[j + 32]; /* Move former S to key. */ } } +} + +int main(void) { + secp256k1_start(SECP256K1_START_SIGN); - static const unsigned char fini[64] = { - 0x92, 0x03, 0xef, 0xf1, 0x58, 0x0b, 0x49, 0x8d, - 0x22, 0x3d, 0x49, 0x0e, 0xbf, 0x26, 0x50, 0x0e, - 0x2d, 0x62, 0x90, 0xd7, 0x82, 0xbd, 0x3d, 0x5c, - 0xa9, 0x10, 0xa5, 0x49, 0xb1, 0xd8, 0x8c, 0xc0, - 0x5b, 0x5e, 0x9e, 0x68, 0x51, 0x3d, 0xe8, 0xec, - 0x82, 0x30, 0x82, 0x88, 0x8c, 0xfd, 0xe7, 0x71, - 0x15, 0x92, 0xfc, 0x14, 0x59, 0x78, 0x31, 0xb3, - 0xf6, 0x07, 0x91, 0x18, 0x00, 0x8d, 0x4c, 0xb2 - }; - CHECK(memcmp(sig, fini, 64) == 0); + bench_sign_t data; + run_benchmark(bench_sign, bench_sign_setup, NULL, &data, 10, 20000); secp256k1_stop(); return 0; diff --git a/src/secp256k1/src/bench_verify.c b/src/secp256k1/src/bench_verify.c index 690595516..b123c4087 100644 --- a/src/secp256k1/src/bench_verify.c +++ b/src/secp256k1/src/bench_verify.c @@ -9,35 +9,46 @@ #include "include/secp256k1.h" #include "util.h" +#include "bench.h" -int main(void) { - secp256k1_start(SECP256K1_START_VERIFY); - +typedef struct { unsigned char msg[32]; - unsigned char sig[64]; - - for (int i = 0; i < 32; i++) msg[i] = 1 + i; - for (int i = 0; i < 64; i++) sig[i] = 65 + i; - + unsigned char key[32]; + unsigned char nonce[32]; + unsigned char sig[72]; + int siglen; unsigned char pubkey[33]; - for (int i=0; i<1000000; i++) { - int pubkeylen = 33; - CHECK(secp256k1_ecdsa_recover_compact(msg, 32, sig, pubkey, &pubkeylen, 1, i % 2)); - for (int j = 0; j < 32; j++) { - sig[j + 32] = msg[j]; /* Move former message to S. */ - msg[j] = sig[j]; /* Move former R to message. */ - sig[j] = pubkey[j + 1]; /* Move recovered pubkey X coordinate to R (which must be a valid X coordinate). */ - } + int pubkeylen; +} benchmark_verify_t; + +static void benchmark_verify(void* arg) { + benchmark_verify_t* data = (benchmark_verify_t*)arg; + + for (int i=0; i<20000; i++) { + data->sig[data->siglen - 1] ^= (i & 0xFF); + data->sig[data->siglen - 2] ^= ((i >> 8) & 0xFF); + data->sig[data->siglen - 3] ^= ((i >> 16) & 0xFF); + CHECK(secp256k1_ecdsa_verify(data->msg, data->sig, data->siglen, data->pubkey, data->pubkeylen) == (i == 0)); + data->sig[data->siglen - 1] ^= (i & 0xFF); + data->sig[data->siglen - 2] ^= ((i >> 8) & 0xFF); + data->sig[data->siglen - 3] ^= ((i >> 16) & 0xFF); } +} + +int main(void) { + secp256k1_start(SECP256K1_START_VERIFY | SECP256K1_START_SIGN); + + benchmark_verify_t data; + + for (int i = 0; i < 32; i++) data.msg[i] = 1 + i; + for (int i = 0; i < 32; i++) data.key[i] = 33 + i; + for (int i = 0; i < 32; i++) data.nonce[i] = 65 + i; + data.siglen = 72; + CHECK(secp256k1_ecdsa_sign(data.msg, data.sig, &data.siglen, data.key, data.nonce)); + data.pubkeylen = 33; + CHECK(secp256k1_ec_pubkey_create(data.pubkey, &data.pubkeylen, data.key, 1)); - static const unsigned char fini[33] = { - 0x02, - 0x52, 0x63, 0xae, 0x9a, 0x9d, 0x47, 0x1f, 0x1a, - 0xb2, 0x36, 0x65, 0x89, 0x11, 0xe7, 0xcc, 0x86, - 0xa3, 0xab, 0x97, 0xb6, 0xf1, 0xaf, 0xfd, 0x8f, - 0x9b, 0x38, 0xb6, 0x18, 0x55, 0xe5, 0xc2, 0x43 - }; - CHECK(memcmp(fini, pubkey, 33) == 0); + run_benchmark(benchmark_verify, NULL, NULL, &data, 10, 20000); secp256k1_stop(); return 0; diff --git a/src/secp256k1/src/ecdsa_impl.h b/src/secp256k1/src/ecdsa_impl.h index a951d0b4a..8825d05fe 100644 --- a/src/secp256k1/src/ecdsa_impl.h +++ b/src/secp256k1/src/ecdsa_impl.h @@ -27,7 +27,7 @@ static void secp256k1_ecdsa_start(void) { return; /* Allocate. */ - secp256k1_ecdsa_consts_t *ret = (secp256k1_ecdsa_consts_t*)malloc(sizeof(secp256k1_ecdsa_consts_t)); + secp256k1_ecdsa_consts_t *ret = (secp256k1_ecdsa_consts_t*)checked_malloc(sizeof(secp256k1_ecdsa_consts_t)); static const unsigned char order[] = { 0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF, @@ -38,7 +38,7 @@ static void secp256k1_ecdsa_start(void) { secp256k1_fe_set_b32(&ret->order_as_fe, order); secp256k1_fe_negate(&ret->p_minus_order, &ret->order_as_fe, 1); - secp256k1_fe_normalize(&ret->p_minus_order); + secp256k1_fe_normalize_var(&ret->p_minus_order); /* Set the global pointer. */ secp256k1_ecdsa_consts = ret; @@ -122,7 +122,7 @@ static int secp256k1_ecdsa_sig_recompute(secp256k1_scalar_t *r2, const secp256k1 secp256k1_gej_t pr; secp256k1_ecmult(&pr, &pubkeyj, &u2, &u1); if (!secp256k1_gej_is_infinity(&pr)) { secp256k1_fe_t xr; secp256k1_gej_get_x_var(&xr, &pr); - secp256k1_fe_normalize(&xr); + secp256k1_fe_normalize_var(&xr); unsigned char xrb[32]; secp256k1_fe_get_b32(xrb, &xr); secp256k1_scalar_set_b32(r2, xrb, NULL); ret = 1; @@ -144,7 +144,7 @@ static int secp256k1_ecdsa_sig_recover(const secp256k1_ecdsa_sig_t *sig, secp256 secp256k1_fe_add(&fx, &secp256k1_ecdsa_consts->order_as_fe); } secp256k1_ge_t x; - if (!secp256k1_ge_set_xo(&x, &fx, recid & 1)) + if (!secp256k1_ge_set_xo_var(&x, &fx, recid & 1)) return 0; secp256k1_gej_t xj; secp256k1_gej_set_ge(&xj, &x); diff --git a/src/secp256k1/src/eckey_impl.h b/src/secp256k1/src/eckey_impl.h index 0f218ced9..b3fa7d9bd 100644 --- a/src/secp256k1/src/eckey_impl.h +++ b/src/secp256k1/src/eckey_impl.h @@ -17,7 +17,7 @@ static int secp256k1_eckey_pubkey_parse(secp256k1_ge_t *elem, const unsigned char *pub, int size) { if (size == 33 && (pub[0] == 0x02 || pub[0] == 0x03)) { secp256k1_fe_t x; - return secp256k1_fe_set_b32(&x, pub+1) && secp256k1_ge_set_xo(elem, &x, pub[0] == 0x03); + return secp256k1_fe_set_b32(&x, pub+1) && secp256k1_ge_set_xo_var(elem, &x, pub[0] == 0x03); } else if (size == 65 && (pub[0] == 0x04 || pub[0] == 0x06 || pub[0] == 0x07)) { secp256k1_fe_t x, y; if (!secp256k1_fe_set_b32(&x, pub+1) || !secp256k1_fe_set_b32(&y, pub+33)) { @@ -26,7 +26,7 @@ static int secp256k1_eckey_pubkey_parse(secp256k1_ge_t *elem, const unsigned cha secp256k1_ge_set_xy(elem, &x, &y); if ((pub[0] == 0x06 || pub[0] == 0x07) && secp256k1_fe_is_odd(&y) != (pub[0] == 0x07)) return 0; - return secp256k1_ge_is_valid(elem); + return secp256k1_ge_is_valid_var(elem); } else { return 0; } @@ -36,8 +36,8 @@ static int secp256k1_eckey_pubkey_serialize(secp256k1_ge_t *elem, unsigned char if (secp256k1_ge_is_infinity(elem)) { return 0; } - secp256k1_fe_normalize(&elem->x); - secp256k1_fe_normalize(&elem->y); + secp256k1_fe_normalize_var(&elem->x); + secp256k1_fe_normalize_var(&elem->y); secp256k1_fe_get_b32(&pub[1], &elem->x); if (compressed) { *size = 33; diff --git a/src/secp256k1/src/ecmult_gen_impl.h b/src/secp256k1/src/ecmult_gen_impl.h index af0ead522..5a5b16ce1 100644 --- a/src/secp256k1/src/ecmult_gen_impl.h +++ b/src/secp256k1/src/ecmult_gen_impl.h @@ -34,7 +34,7 @@ static void secp256k1_ecmult_gen_start(void) { return; /* Allocate the precomputation table. */ - secp256k1_ecmult_gen_consts_t *ret = (secp256k1_ecmult_gen_consts_t*)malloc(sizeof(secp256k1_ecmult_gen_consts_t)); + secp256k1_ecmult_gen_consts_t *ret = (secp256k1_ecmult_gen_consts_t*)checked_malloc(sizeof(secp256k1_ecmult_gen_consts_t)); /* get the generator */ const secp256k1_ge_t *g = &secp256k1_ge_consts->g; @@ -47,7 +47,7 @@ static void secp256k1_ecmult_gen_start(void) { secp256k1_fe_t nums_x; VERIFY_CHECK(secp256k1_fe_set_b32(&nums_x, nums_b32)); secp256k1_ge_t nums_ge; - VERIFY_CHECK(secp256k1_ge_set_xo(&nums_ge, &nums_x, 0)); + VERIFY_CHECK(secp256k1_ge_set_xo_var(&nums_ge, &nums_x, 0)); secp256k1_gej_set_ge(&nums_gej, &nums_ge); /* Add G to make the bits in x uniformly distributed. */ secp256k1_gej_add_ge_var(&nums_gej, &nums_gej, g); @@ -73,7 +73,7 @@ static void secp256k1_ecmult_gen_start(void) { secp256k1_gej_double_var(&numsbase, &numsbase); if (j == 62) { /* In the last iteration, numsbase is (1 - 2^j) * nums instead. */ - secp256k1_gej_neg(&numsbase, &numsbase); + secp256k1_gej_neg_var(&numsbase, &numsbase); secp256k1_gej_add_var(&numsbase, &numsbase, &nums_gej); } } diff --git a/src/secp256k1/src/ecmult_impl.h b/src/secp256k1/src/ecmult_impl.h index 445b81593..653677104 100644 --- a/src/secp256k1/src/ecmult_impl.h +++ b/src/secp256k1/src/ecmult_impl.h @@ -15,11 +15,13 @@ #define WINDOW_A 5 /** larger numbers may result in slightly better performance, at the cost of - exponentially larger precomputed tables. WINDOW_G == 14 results in 640 KiB. */ + exponentially larger precomputed tables. */ #ifdef USE_ENDOMORPHISM -#define WINDOW_G 14 -#else +/** Two tables for window size 15: 1.375 MiB. */ #define WINDOW_G 15 +#else +/** One table for window size 16: 1.375 MiB. */ +#define WINDOW_G 16 #endif /** Fill a table 'pre' with precomputed odd multiples of a. W determines the size of the table. @@ -43,13 +45,14 @@ static void secp256k1_ecmult_table_precomp_gej_var(secp256k1_gej_t *pre, const s static void secp256k1_ecmult_table_precomp_ge_var(secp256k1_ge_t *pre, const secp256k1_gej_t *a, int w) { const int table_size = 1 << (w-2); - secp256k1_gej_t prej[table_size]; + secp256k1_gej_t *prej = checked_malloc(sizeof(secp256k1_gej_t) * table_size); prej[0] = *a; secp256k1_gej_t d; secp256k1_gej_double_var(&d, a); for (int i=1; ig; diff --git a/src/secp256k1/src/field.h b/src/secp256k1/src/field.h index 0cdf0fb47..53aa29e13 100644 --- a/src/secp256k1/src/field.h +++ b/src/secp256k1/src/field.h @@ -50,6 +50,9 @@ static void secp256k1_fe_stop(void); /** Normalize a field element. */ static void secp256k1_fe_normalize(secp256k1_fe_t *r); +/** Normalize a field element, without constant-time guarantee. */ +static void secp256k1_fe_normalize_var(secp256k1_fe_t *r); + /** Set a field element equal to a small integer. Resulting field element is normalized. */ static void secp256k1_fe_set_int(secp256k1_fe_t *r, int a); @@ -93,7 +96,7 @@ static void secp256k1_fe_sqr(secp256k1_fe_t *r, const secp256k1_fe_t *a); /** Sets a field element to be the (modular) square root (if any exist) of another. Requires the * input's magnitude to be at most 8. The output magnitude is 1 (but not guaranteed to be * normalized). Return value indicates whether a square root was found. */ -static int secp256k1_fe_sqrt(secp256k1_fe_t *r, const secp256k1_fe_t *a); +static int secp256k1_fe_sqrt_var(secp256k1_fe_t *r, const secp256k1_fe_t *a); /** Sets a field element to be the (modular) inverse of another. Requires the input's magnitude to be * at most 8. The output magnitude is 1 (but not guaranteed to be normalized). */ @@ -105,9 +108,6 @@ static void secp256k1_fe_inv_var(secp256k1_fe_t *r, const secp256k1_fe_t *a); /** Calculate the (modular) inverses of a batch of field elements. Requires the inputs' magnitudes to be * at most 8. The output magnitudes are 1 (but not guaranteed to be normalized). The inputs and * outputs must not overlap in memory. */ -static void secp256k1_fe_inv_all(size_t len, secp256k1_fe_t r[len], const secp256k1_fe_t a[len]); - -/** Potentially faster version of secp256k1_fe_inv_all, without constant-time guarantee. */ static void secp256k1_fe_inv_all_var(size_t len, secp256k1_fe_t r[len], const secp256k1_fe_t a[len]); /** Convert a field element to a hexadecimal string. */ diff --git a/src/secp256k1/src/field_10x26_impl.h b/src/secp256k1/src/field_10x26_impl.h index c4403fba2..d20229cda 100644 --- a/src/secp256k1/src/field_10x26_impl.h +++ b/src/secp256k1/src/field_10x26_impl.h @@ -103,6 +103,62 @@ static void secp256k1_fe_normalize(secp256k1_fe_t *r) { #endif } +static void secp256k1_fe_normalize_var(secp256k1_fe_t *r) { + uint32_t t0 = r->n[0], t1 = r->n[1], t2 = r->n[2], t3 = r->n[3], t4 = r->n[4], + t5 = r->n[5], t6 = r->n[6], t7 = r->n[7], t8 = r->n[8], t9 = r->n[9]; + + /* Reduce t9 at the start so there will be at most a single carry from the first pass */ + uint32_t x = t9 >> 22; t9 &= 0x03FFFFFUL; + uint32_t m; + + /* The first pass ensures the magnitude is 1, ... */ + t0 += x * 0x3D1UL; t1 += (x << 6); + t1 += (t0 >> 26); t0 &= 0x3FFFFFFUL; + t2 += (t1 >> 26); t1 &= 0x3FFFFFFUL; + t3 += (t2 >> 26); t2 &= 0x3FFFFFFUL; m = t2; + t4 += (t3 >> 26); t3 &= 0x3FFFFFFUL; m &= t3; + t5 += (t4 >> 26); t4 &= 0x3FFFFFFUL; m &= t4; + t6 += (t5 >> 26); t5 &= 0x3FFFFFFUL; m &= t5; + t7 += (t6 >> 26); t6 &= 0x3FFFFFFUL; m &= t6; + t8 += (t7 >> 26); t7 &= 0x3FFFFFFUL; m &= t7; + t9 += (t8 >> 26); t8 &= 0x3FFFFFFUL; m &= t8; + + /* ... except for a possible carry at bit 22 of t9 (i.e. bit 256 of the field element) */ + VERIFY_CHECK(t9 >> 23 == 0); + + /* At most a single final reduction is needed; check if the value is >= the field characteristic */ + x = (t9 >> 22) | ((t9 == 0x03FFFFFUL) & (m == 0x3FFFFFFUL) + & ((t1 + 0x40UL + ((t0 + 0x3D1UL) >> 26)) > 0x3FFFFFFUL)); + + if (x) { + t0 += 0x3D1UL; t1 += (x << 6); + t1 += (t0 >> 26); t0 &= 0x3FFFFFFUL; + t2 += (t1 >> 26); t1 &= 0x3FFFFFFUL; + t3 += (t2 >> 26); t2 &= 0x3FFFFFFUL; + t4 += (t3 >> 26); t3 &= 0x3FFFFFFUL; + t5 += (t4 >> 26); t4 &= 0x3FFFFFFUL; + t6 += (t5 >> 26); t5 &= 0x3FFFFFFUL; + t7 += (t6 >> 26); t6 &= 0x3FFFFFFUL; + t8 += (t7 >> 26); t7 &= 0x3FFFFFFUL; + t9 += (t8 >> 26); t8 &= 0x3FFFFFFUL; + + /* If t9 didn't carry to bit 22 already, then it should have after any final reduction */ + VERIFY_CHECK(t9 >> 22 == x); + + /* Mask off the possible multiple of 2^256 from the final reduction */ + t9 &= 0x03FFFFFUL; + } + + r->n[0] = t0; r->n[1] = t1; r->n[2] = t2; r->n[3] = t3; r->n[4] = t4; + r->n[5] = t5; r->n[6] = t6; r->n[7] = t7; r->n[8] = t8; r->n[9] = t9; + +#ifdef VERIFY + r->magnitude = 1; + r->normalized = 1; + secp256k1_fe_verify(r); +#endif +} + SECP256K1_INLINE static void secp256k1_fe_set_int(secp256k1_fe_t *r, int a) { r->n[0] = a; r->n[1] = r->n[2] = r->n[3] = r->n[4] = r->n[5] = r->n[6] = r->n[7] = r->n[8] = r->n[9] = 0; @@ -271,7 +327,7 @@ SECP256K1_INLINE static void secp256k1_fe_add(secp256k1_fe_t *r, const secp256k1 #define VERIFY_BITS(x, n) do { } while(0) #endif -SECP256K1_INLINE static void secp256k1_fe_mul_inner(const uint32_t *a, const uint32_t * SECP256K1_RESTRICT b, uint32_t *r) { +SECP256K1_INLINE static void secp256k1_fe_mul_inner(uint32_t *r, const uint32_t *a, const uint32_t * SECP256K1_RESTRICT b) { VERIFY_BITS(a[0], 30); VERIFY_BITS(a[1], 30); VERIFY_BITS(a[2], 30); @@ -598,7 +654,7 @@ SECP256K1_INLINE static void secp256k1_fe_mul_inner(const uint32_t *a, const uin /* [r9 r8 r7 r6 r5 r4 r3 r2 r1 r0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ } -SECP256K1_INLINE static void secp256k1_fe_sqr_inner(const uint32_t *a, uint32_t *r) { +SECP256K1_INLINE static void secp256k1_fe_sqr_inner(uint32_t *r, const uint32_t *a) { VERIFY_BITS(a[0], 30); VERIFY_BITS(a[1], 30); VERIFY_BITS(a[2], 30); @@ -879,7 +935,7 @@ static void secp256k1_fe_mul(secp256k1_fe_t *r, const secp256k1_fe_t *a, const s secp256k1_fe_verify(b); VERIFY_CHECK(r != b); #endif - secp256k1_fe_mul_inner(a->n, b->n, r->n); + secp256k1_fe_mul_inner(r->n, a->n, b->n); #ifdef VERIFY r->magnitude = 1; r->normalized = 0; @@ -892,7 +948,7 @@ static void secp256k1_fe_sqr(secp256k1_fe_t *r, const secp256k1_fe_t *a) { VERIFY_CHECK(a->magnitude <= 8); secp256k1_fe_verify(a); #endif - secp256k1_fe_sqr_inner(a->n, r->n); + secp256k1_fe_sqr_inner(r->n, a->n); #ifdef VERIFY r->magnitude = 1; r->normalized = 0; diff --git a/src/secp256k1/src/field_5x52_asm.asm b/src/secp256k1/src/field_5x52_asm.asm deleted file mode 100644 index 5e785f763..000000000 --- a/src/secp256k1/src/field_5x52_asm.asm +++ /dev/null @@ -1,469 +0,0 @@ - ;; Added by Diederik Huys, March 2013 - ;; - ;; Provided public procedures: - ;; secp256k1_fe_mul_inner - ;; secp256k1_fe_sqr_inner - ;; - ;; Needed tools: YASM (http://yasm.tortall.net) - ;; - ;; - - BITS 64 - -%ifidn __OUTPUT_FORMAT__,macho64 -%define SYM(x) _ %+ x -%else -%define SYM(x) x -%endif - - ;; Procedure ExSetMult - ;; Register Layout: - ;; INPUT: rdi = a->n - ;; rsi = b->n - ;; rdx = r->a - ;; - ;; INTERNAL: rdx:rax = multiplication accumulator - ;; r9:r8 = c - ;; r10-r13 = t0-t3 - ;; r14 = b.n[0] / t4 - ;; r15 = b.n[1] / t5 - ;; rbx = b.n[2] / t6 - ;; rcx = b.n[3] / t7 - ;; rbp = Constant 0FFFFFFFFFFFFFh / t8 - ;; rsi = b.n / b.n[4] / t9 - - GLOBAL SYM(secp256k1_fe_mul_inner) - ALIGN 32 -SYM(secp256k1_fe_mul_inner): - push rbp - push rbx - push r12 - push r13 - push r14 - push r15 - push rdx - mov r14,[rsi+8*0] ; preload b.n[0]. This will be the case until - ; b.n[0] is no longer needed, then we reassign - ; r14 to t4 - ;; c=a.n[0] * b.n[0] - mov rax,[rdi+0*8] ; load a.n[0] - mov rbp,0FFFFFFFFFFFFFh - mul r14 ; rdx:rax=a.n[0]*b.n[0] - mov r15,[rsi+1*8] - mov r10,rbp ; load modulus into target register for t0 - mov r8,rax - and r10,rax ; only need lower qword of c - shrd r8,rdx,52 - xor r9,r9 ; c < 2^64, so we ditch the HO part - - ;; c+=a.n[0] * b.n[1] + a.n[1] * b.n[0] - mov rax,[rdi+0*8] - mul r15 - add r8,rax - adc r9,rdx - - mov rax,[rdi+1*8] - mul r14 - mov r11,rbp - mov rbx,[rsi+2*8] - add r8,rax - adc r9,rdx - and r11,r8 - shrd r8,r9,52 - xor r9,r9 - - ;; c+=a.n[0 1 2] * b.n[2 1 0] - mov rax,[rdi+0*8] - mul rbx - add r8,rax - adc r9,rdx - - mov rax,[rdi+1*8] - mul r15 - add r8,rax - adc r9,rdx - - mov rax,[rdi+2*8] - mul r14 - mov r12,rbp - mov rcx,[rsi+3*8] - add r8,rax - adc r9,rdx - and r12,r8 - shrd r8,r9,52 - xor r9,r9 - - ;; c+=a.n[0 1 2 3] * b.n[3 2 1 0] - mov rax,[rdi+0*8] - mul rcx - add r8,rax - adc r9,rdx - - mov rax,[rdi+1*8] - mul rbx - add r8,rax - adc r9,rdx - - mov rax,[rdi+2*8] - mul r15 - add r8,rax - adc r9,rdx - - mov rax,[rdi+3*8] - mul r14 - mov r13,rbp - mov rsi,[rsi+4*8] ; load b.n[4] and destroy pointer - add r8,rax - adc r9,rdx - and r13,r8 - - shrd r8,r9,52 - xor r9,r9 - - - ;; c+=a.n[0 1 2 3 4] * b.n[4 3 2 1 0] - mov rax,[rdi+0*8] - mul rsi - add r8,rax - adc r9,rdx - - mov rax,[rdi+1*8] - mul rcx - add r8,rax - adc r9,rdx - - mov rax,[rdi+2*8] - mul rbx - add r8,rax - adc r9,rdx - - mov rax,[rdi+3*8] - mul r15 - add r8,rax - adc r9,rdx - - mov rax,[rdi+4*8] - mul r14 - mov r14,rbp ; load modulus into t4 and destroy a.n[0] - add r8,rax - adc r9,rdx - and r14,r8 - shrd r8,r9,52 - xor r9,r9 - - ;; c+=a.n[1 2 3 4] * b.n[4 3 2 1] - mov rax,[rdi+1*8] - mul rsi - add r8,rax - adc r9,rdx - - mov rax,[rdi+2*8] - mul rcx - add r8,rax - adc r9,rdx - - mov rax,[rdi+3*8] - mul rbx - add r8,rax - adc r9,rdx - - mov rax,[rdi+4*8] - mul r15 - mov r15,rbp - add r8,rax - adc r9,rdx - - and r15,r8 - shrd r8,r9,52 - xor r9,r9 - - ;; c+=a.n[2 3 4] * b.n[4 3 2] - mov rax,[rdi+2*8] - mul rsi - add r8,rax - adc r9,rdx - - mov rax,[rdi+3*8] - mul rcx - add r8,rax - adc r9,rdx - - mov rax,[rdi+4*8] - mul rbx - mov rbx,rbp - add r8,rax - adc r9,rdx - - and rbx,r8 - shrd r8,r9,52 - xor r9,r9 - - ;; c+=a.n[3 4] * b.n[4 3] - mov rax,[rdi+3*8] - mul rsi - add r8,rax - adc r9,rdx - - mov rax,[rdi+4*8] - mul rcx - mov rcx,rbp - add r8,rax - adc r9,rdx - and rcx,r8 - shrd r8,r9,52 - xor r9,r9 - - ;; c+=a.n[4] * b.n[4] - mov rax,[rdi+4*8] - mul rsi - ;; mov rbp,rbp ; modulus already there! - add r8,rax - adc r9,rdx - and rbp,r8 - shrd r8,r9,52 - xor r9,r9 - - mov rsi,r8 ; load c into t9 and destroy b.n[4] - - ;; ******************************************************* -common_exit_norm: - mov rdi,01000003D10h ; load constant - - mov rax,r15 ; get t5 - mul rdi - add rax,r10 ; +t0 - adc rdx,0 - mov r10,0FFFFFFFFFFFFFh ; modulus. Sadly, we ran out of registers! - mov r8,rax ; +c - and r10,rax - shrd r8,rdx,52 - xor r9,r9 - - mov rax,rbx ; get t6 - mul rdi - add rax,r11 ; +t1 - adc rdx,0 - mov r11,0FFFFFFFFFFFFFh ; modulus - add r8,rax ; +c - adc r9,rdx - and r11,r8 - shrd r8,r9,52 - xor r9,r9 - - mov rax,rcx ; get t7 - mul rdi - add rax,r12 ; +t2 - adc rdx,0 - pop rbx ; retrieve pointer to this.n - mov r12,0FFFFFFFFFFFFFh ; modulus - add r8,rax ; +c - adc r9,rdx - and r12,r8 - mov [rbx+2*8],r12 ; mov into this.n[2] - shrd r8,r9,52 - xor r9,r9 - - mov rax,rbp ; get t8 - mul rdi - add rax,r13 ; +t3 - adc rdx,0 - mov r13,0FFFFFFFFFFFFFh ; modulus - add r8,rax ; +c - adc r9,rdx - and r13,r8 - mov [rbx+3*8],r13 ; -> this.n[3] - shrd r8,r9,52 - xor r9,r9 - - mov rax,rsi ; get t9 - mul rdi - add rax,r14 ; +t4 - adc rdx,0 - mov r14,0FFFFFFFFFFFFh ; !!! - add r8,rax ; +c - adc r9,rdx - and r14,r8 - mov [rbx+4*8],r14 ; -> this.n[4] - shrd r8,r9,48 ; !!! - xor r9,r9 - - mov rax,01000003D1h - mul r8 - add rax,r10 - adc rdx,0 - mov r10,0FFFFFFFFFFFFFh ; modulus - mov r8,rax - and rax,r10 - shrd r8,rdx,52 - mov [rbx+0*8],rax ; -> this.n[0] - add r8,r11 - mov [rbx+1*8],r8 ; -> this.n[1] - - pop r15 - pop r14 - pop r13 - pop r12 - pop rbx - pop rbp - ret - - - ;; PROC ExSetSquare - ;; Register Layout: - ;; INPUT: rdi = a.n - ;; rsi = this.a - ;; INTERNAL: rdx:rax = multiplication accumulator - ;; r9:r8 = c - ;; r10-r13 = t0-t3 - ;; r14 = a.n[0] / t4 - ;; r15 = a.n[1] / t5 - ;; rbx = a.n[2] / t6 - ;; rcx = a.n[3] / t7 - ;; rbp = 0FFFFFFFFFFFFFh / t8 - ;; rsi = a.n[4] / t9 - GLOBAL SYM(secp256k1_fe_sqr_inner) - ALIGN 32 -SYM(secp256k1_fe_sqr_inner): - push rbp - push rbx - push r12 - push r13 - push r14 - push r15 - push rsi - mov rbp,0FFFFFFFFFFFFFh - - ;; c=a.n[0] * a.n[0] - mov r14,[rdi+0*8] ; r14=a.n[0] - mov r10,rbp ; modulus - mov rax,r14 - mul rax - mov r15,[rdi+1*8] ; a.n[1] - add r14,r14 ; r14=2*a.n[0] - mov r8,rax - and r10,rax ; only need lower qword - shrd r8,rdx,52 - xor r9,r9 - - ;; c+=2*a.n[0] * a.n[1] - mov rax,r14 ; r14=2*a.n[0] - mul r15 - mov rbx,[rdi+2*8] ; rbx=a.n[2] - mov r11,rbp ; modulus - add r8,rax - adc r9,rdx - and r11,r8 - shrd r8,r9,52 - xor r9,r9 - - ;; c+=2*a.n[0]*a.n[2]+a.n[1]*a.n[1] - mov rax,r14 - mul rbx - add r8,rax - adc r9,rdx - - mov rax,r15 - mov r12,rbp ; modulus - mul rax - mov rcx,[rdi+3*8] ; rcx=a.n[3] - add r15,r15 ; r15=a.n[1]*2 - add r8,rax - adc r9,rdx - and r12,r8 ; only need lower dword - shrd r8,r9,52 - xor r9,r9 - - ;; c+=2*a.n[0]*a.n[3]+2*a.n[1]*a.n[2] - mov rax,r14 - mul rcx - add r8,rax - adc r9,rdx - - mov rax,r15 ; rax=2*a.n[1] - mov r13,rbp ; modulus - mul rbx - mov rsi,[rdi+4*8] ; rsi=a.n[4] - add r8,rax - adc r9,rdx - and r13,r8 - shrd r8,r9,52 - xor r9,r9 - - ;; c+=2*a.n[0]*a.n[4]+2*a.n[1]*a.n[3]+a.n[2]*a.n[2] - mov rax,r14 ; last time we need 2*a.n[0] - mul rsi - add r8,rax - adc r9,rdx - - mov rax,r15 - mul rcx - mov r14,rbp ; modulus - add r8,rax - adc r9,rdx - - mov rax,rbx - mul rax - add rbx,rbx ; rcx=2*a.n[2] - add r8,rax - adc r9,rdx - and r14,r8 - shrd r8,r9,52 - xor r9,r9 - - ;; c+=2*a.n[1]*a.n[4]+2*a.n[2]*a.n[3] - mov rax,r15 ; last time we need 2*a.n[1] - mul rsi - add r8,rax - adc r9,rdx - - mov rax,rbx - mul rcx - mov r15,rbp ; modulus - add r8,rax - adc r9,rdx - and r15,r8 - shrd r8,r9,52 - xor r9,r9 - - ;; c+=2*a.n[2]*a.n[4]+a.n[3]*a.n[3] - mov rax,rbx ; last time we need 2*a.n[2] - mul rsi - add r8,rax - adc r9,rdx - - mov rax,rcx ; a.n[3] - mul rax - mov rbx,rbp ; modulus - add r8,rax - adc r9,rdx - and rbx,r8 ; only need lower dword - lea rax,[2*rcx] - shrd r8,r9,52 - xor r9,r9 - - ;; c+=2*a.n[3]*a.n[4] - mul rsi - mov rcx,rbp ; modulus - add r8,rax - adc r9,rdx - and rcx,r8 ; only need lower dword - shrd r8,r9,52 - xor r9,r9 - - ;; c+=a.n[4]*a.n[4] - mov rax,rsi - mul rax - ;; mov rbp,rbp ; modulus is already there! - add r8,rax - adc r9,rdx - and rbp,r8 - shrd r8,r9,52 - xor r9,r9 - - mov rsi,r8 - - ;; ******************************************************* - jmp common_exit_norm - end - - diff --git a/src/secp256k1/src/field_5x52_asm_impl.h b/src/secp256k1/src/field_5x52_asm_impl.h index f29605b11..98cc004bf 100644 --- a/src/secp256k1/src/field_5x52_asm_impl.h +++ b/src/secp256k1/src/field_5x52_asm_impl.h @@ -1,13 +1,502 @@ /********************************************************************** - * Copyright (c) 2013 Pieter Wuille * + * Copyright (c) 2013-2014 Diederik Huys, Pieter Wuille * * Distributed under the MIT software license, see the accompanying * * file COPYING or http://www.opensource.org/licenses/mit-license.php.* **********************************************************************/ +/** + * Changelog: + * - March 2013, Diederik Huys: original version + * - November 2014, Pieter Wuille: updated to use Peter Dettman's parallel multiplication algorithm + * - December 2014, Pieter Wuille: converted from YASM to GCC inline assembly + */ + #ifndef _SECP256K1_FIELD_INNER5X52_IMPL_H_ #define _SECP256K1_FIELD_INNER5X52_IMPL_H_ -void __attribute__ ((sysv_abi)) secp256k1_fe_mul_inner(const uint64_t *a, const uint64_t *b, uint64_t *r); -void __attribute__ ((sysv_abi)) secp256k1_fe_sqr_inner(const uint64_t *a, uint64_t *r); +SECP256K1_INLINE static void secp256k1_fe_mul_inner(uint64_t *r, const uint64_t *a, const uint64_t * SECP256K1_RESTRICT b) { +/** + * Registers: rdx:rax = multiplication accumulator + * r9:r8 = c + * r15:rcx = d + * r10-r14 = a0-a4 + * rbx = b + * rdi = r + * rsi = a / t? + */ + uint64_t tmp1, tmp2, tmp3; +__asm__ __volatile__( + "movq 0(%%rsi),%%r10\n" + "movq 8(%%rsi),%%r11\n" + "movq 16(%%rsi),%%r12\n" + "movq 24(%%rsi),%%r13\n" + "movq 32(%%rsi),%%r14\n" + + /* d += a3 * b0 */ + "movq 0(%%rbx),%%rax\n" + "mulq %%r13\n" + "movq %%rax,%%rcx\n" + "movq %%rdx,%%r15\n" + /* d += a2 * b1 */ + "movq 8(%%rbx),%%rax\n" + "mulq %%r12\n" + "addq %%rax,%%rcx\n" + "adcq %%rdx,%%r15\n" + /* d += a1 * b2 */ + "movq 16(%%rbx),%%rax\n" + "mulq %%r11\n" + "addq %%rax,%%rcx\n" + "adcq %%rdx,%%r15\n" + /* d = a0 * b3 */ + "movq 24(%%rbx),%%rax\n" + "mulq %%r10\n" + "addq %%rax,%%rcx\n" + "adcq %%rdx,%%r15\n" + /* c = a4 * b4 */ + "movq 32(%%rbx),%%rax\n" + "mulq %%r14\n" + "movq %%rax,%%r8\n" + "movq %%rdx,%%r9\n" + /* d += (c & M) * R */ + "movq $0xfffffffffffff,%%rdx\n" + "andq %%rdx,%%rax\n" + "movq $0x1000003d10,%%rdx\n" + "mulq %%rdx\n" + "addq %%rax,%%rcx\n" + "adcq %%rdx,%%r15\n" + /* c >>= 52 (%%r8 only) */ + "shrdq $52,%%r9,%%r8\n" + /* t3 (tmp1) = d & M */ + "movq %%rcx,%%rsi\n" + "movq $0xfffffffffffff,%%rdx\n" + "andq %%rdx,%%rsi\n" + "movq %%rsi,%q1\n" + /* d >>= 52 */ + "shrdq $52,%%r15,%%rcx\n" + "xorq %%r15,%%r15\n" + /* d += a4 * b0 */ + "movq 0(%%rbx),%%rax\n" + "mulq %%r14\n" + "addq %%rax,%%rcx\n" + "adcq %%rdx,%%r15\n" + /* d += a3 * b1 */ + "movq 8(%%rbx),%%rax\n" + "mulq %%r13\n" + "addq %%rax,%%rcx\n" + "adcq %%rdx,%%r15\n" + /* d += a2 * b2 */ + "movq 16(%%rbx),%%rax\n" + "mulq %%r12\n" + "addq %%rax,%%rcx\n" + "adcq %%rdx,%%r15\n" + /* d += a1 * b3 */ + "movq 24(%%rbx),%%rax\n" + "mulq %%r11\n" + "addq %%rax,%%rcx\n" + "adcq %%rdx,%%r15\n" + /* d += a0 * b4 */ + "movq 32(%%rbx),%%rax\n" + "mulq %%r10\n" + "addq %%rax,%%rcx\n" + "adcq %%rdx,%%r15\n" + /* d += c * R */ + "movq %%r8,%%rax\n" + "movq $0x1000003d10,%%rdx\n" + "mulq %%rdx\n" + "addq %%rax,%%rcx\n" + "adcq %%rdx,%%r15\n" + /* t4 = d & M (%%rsi) */ + "movq %%rcx,%%rsi\n" + "movq $0xfffffffffffff,%%rdx\n" + "andq %%rdx,%%rsi\n" + /* d >>= 52 */ + "shrdq $52,%%r15,%%rcx\n" + "xorq %%r15,%%r15\n" + /* tx = t4 >> 48 (tmp3) */ + "movq %%rsi,%%rax\n" + "shrq $48,%%rax\n" + "movq %%rax,%q3\n" + /* t4 &= (M >> 4) (tmp2) */ + "movq $0xffffffffffff,%%rax\n" + "andq %%rax,%%rsi\n" + "movq %%rsi,%q2\n" + /* c = a0 * b0 */ + "movq 0(%%rbx),%%rax\n" + "mulq %%r10\n" + "movq %%rax,%%r8\n" + "movq %%rdx,%%r9\n" + /* d += a4 * b1 */ + "movq 8(%%rbx),%%rax\n" + "mulq %%r14\n" + "addq %%rax,%%rcx\n" + "adcq %%rdx,%%r15\n" + /* d += a3 * b2 */ + "movq 16(%%rbx),%%rax\n" + "mulq %%r13\n" + "addq %%rax,%%rcx\n" + "adcq %%rdx,%%r15\n" + /* d += a2 * b3 */ + "movq 24(%%rbx),%%rax\n" + "mulq %%r12\n" + "addq %%rax,%%rcx\n" + "adcq %%rdx,%%r15\n" + /* d += a1 * b4 */ + "movq 32(%%rbx),%%rax\n" + "mulq %%r11\n" + "addq %%rax,%%rcx\n" + "adcq %%rdx,%%r15\n" + /* u0 = d & M (%%rsi) */ + "movq %%rcx,%%rsi\n" + "movq $0xfffffffffffff,%%rdx\n" + "andq %%rdx,%%rsi\n" + /* d >>= 52 */ + "shrdq $52,%%r15,%%rcx\n" + "xorq %%r15,%%r15\n" + /* u0 = (u0 << 4) | tx (%%rsi) */ + "shlq $4,%%rsi\n" + "movq %q3,%%rax\n" + "orq %%rax,%%rsi\n" + /* c += u0 * (R >> 4) */ + "movq $0x1000003d1,%%rax\n" + "mulq %%rsi\n" + "addq %%rax,%%r8\n" + "adcq %%rdx,%%r9\n" + /* r[0] = c & M */ + "movq %%r8,%%rax\n" + "movq $0xfffffffffffff,%%rdx\n" + "andq %%rdx,%%rax\n" + "movq %%rax,0(%%rdi)\n" + /* c >>= 52 */ + "shrdq $52,%%r9,%%r8\n" + "xorq %%r9,%%r9\n" + /* c += a1 * b0 */ + "movq 0(%%rbx),%%rax\n" + "mulq %%r11\n" + "addq %%rax,%%r8\n" + "adcq %%rdx,%%r9\n" + /* c += a0 * b1 */ + "movq 8(%%rbx),%%rax\n" + "mulq %%r10\n" + "addq %%rax,%%r8\n" + "adcq %%rdx,%%r9\n" + /* d += a4 * b2 */ + "movq 16(%%rbx),%%rax\n" + "mulq %%r14\n" + "addq %%rax,%%rcx\n" + "adcq %%rdx,%%r15\n" + /* d += a3 * b3 */ + "movq 24(%%rbx),%%rax\n" + "mulq %%r13\n" + "addq %%rax,%%rcx\n" + "adcq %%rdx,%%r15\n" + /* d += a2 * b4 */ + "movq 32(%%rbx),%%rax\n" + "mulq %%r12\n" + "addq %%rax,%%rcx\n" + "adcq %%rdx,%%r15\n" + /* c += (d & M) * R */ + "movq %%rcx,%%rax\n" + "movq $0xfffffffffffff,%%rdx\n" + "andq %%rdx,%%rax\n" + "movq $0x1000003d10,%%rdx\n" + "mulq %%rdx\n" + "addq %%rax,%%r8\n" + "adcq %%rdx,%%r9\n" + /* d >>= 52 */ + "shrdq $52,%%r15,%%rcx\n" + "xorq %%r15,%%r15\n" + /* r[1] = c & M */ + "movq %%r8,%%rax\n" + "movq $0xfffffffffffff,%%rdx\n" + "andq %%rdx,%%rax\n" + "movq %%rax,8(%%rdi)\n" + /* c >>= 52 */ + "shrdq $52,%%r9,%%r8\n" + "xorq %%r9,%%r9\n" + /* c += a2 * b0 */ + "movq 0(%%rbx),%%rax\n" + "mulq %%r12\n" + "addq %%rax,%%r8\n" + "adcq %%rdx,%%r9\n" + /* c += a1 * b1 */ + "movq 8(%%rbx),%%rax\n" + "mulq %%r11\n" + "addq %%rax,%%r8\n" + "adcq %%rdx,%%r9\n" + /* c += a0 * b2 (last use of %%r10 = a0) */ + "movq 16(%%rbx),%%rax\n" + "mulq %%r10\n" + "addq %%rax,%%r8\n" + "adcq %%rdx,%%r9\n" + /* fetch t3 (%%r10, overwrites a0), t4 (%%rsi) */ + "movq %q2,%%rsi\n" + "movq %q1,%%r10\n" + /* d += a4 * b3 */ + "movq 24(%%rbx),%%rax\n" + "mulq %%r14\n" + "addq %%rax,%%rcx\n" + "adcq %%rdx,%%r15\n" + /* d += a3 * b4 */ + "movq 32(%%rbx),%%rax\n" + "mulq %%r13\n" + "addq %%rax,%%rcx\n" + "adcq %%rdx,%%r15\n" + /* c += (d & M) * R */ + "movq %%rcx,%%rax\n" + "movq $0xfffffffffffff,%%rdx\n" + "andq %%rdx,%%rax\n" + "movq $0x1000003d10,%%rdx\n" + "mulq %%rdx\n" + "addq %%rax,%%r8\n" + "adcq %%rdx,%%r9\n" + /* d >>= 52 (%%rcx only) */ + "shrdq $52,%%r15,%%rcx\n" + /* r[2] = c & M */ + "movq %%r8,%%rax\n" + "movq $0xfffffffffffff,%%rdx\n" + "andq %%rdx,%%rax\n" + "movq %%rax,16(%%rdi)\n" + /* c >>= 52 */ + "shrdq $52,%%r9,%%r8\n" + "xorq %%r9,%%r9\n" + /* c += t3 */ + "addq %%r10,%%r8\n" + /* c += d * R */ + "movq %%rcx,%%rax\n" + "movq $0x1000003d10,%%rdx\n" + "mulq %%rdx\n" + "addq %%rax,%%r8\n" + "adcq %%rdx,%%r9\n" + /* r[3] = c & M */ + "movq %%r8,%%rax\n" + "movq $0xfffffffffffff,%%rdx\n" + "andq %%rdx,%%rax\n" + "movq %%rax,24(%%rdi)\n" + /* c >>= 52 (%%r8 only) */ + "shrdq $52,%%r9,%%r8\n" + /* c += t4 (%%r8 only) */ + "addq %%rsi,%%r8\n" + /* r[4] = c */ + "movq %%r8,32(%%rdi)\n" +: "+S"(a), "=m"(tmp1), "=m"(tmp2), "=m"(tmp3) +: "b"(b), "D"(r) +: "%rax", "%rcx", "%rdx", "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15", "cc", "memory" +); +} + +SECP256K1_INLINE static void secp256k1_fe_sqr_inner(uint64_t *r, const uint64_t *a) { +/** + * Registers: rdx:rax = multiplication accumulator + * r9:r8 = c + * rcx:rbx = d + * r10-r14 = a0-a4 + * r15 = M (0xfffffffffffff) + * rdi = r + * rsi = a / t? + */ + uint64_t tmp1, tmp2, tmp3; +__asm__ __volatile__( + "movq 0(%%rsi),%%r10\n" + "movq 8(%%rsi),%%r11\n" + "movq 16(%%rsi),%%r12\n" + "movq 24(%%rsi),%%r13\n" + "movq 32(%%rsi),%%r14\n" + "movq $0xfffffffffffff,%%r15\n" + + /* d = (a0*2) * a3 */ + "leaq (%%r10,%%r10,1),%%rax\n" + "mulq %%r13\n" + "movq %%rax,%%rbx\n" + "movq %%rdx,%%rcx\n" + /* d += (a1*2) * a2 */ + "leaq (%%r11,%%r11,1),%%rax\n" + "mulq %%r12\n" + "addq %%rax,%%rbx\n" + "adcq %%rdx,%%rcx\n" + /* c = a4 * a4 */ + "movq %%r14,%%rax\n" + "mulq %%r14\n" + "movq %%rax,%%r8\n" + "movq %%rdx,%%r9\n" + /* d += (c & M) * R */ + "andq %%r15,%%rax\n" + "movq $0x1000003d10,%%rdx\n" + "mulq %%rdx\n" + "addq %%rax,%%rbx\n" + "adcq %%rdx,%%rcx\n" + /* c >>= 52 (%%r8 only) */ + "shrdq $52,%%r9,%%r8\n" + /* t3 (tmp1) = d & M */ + "movq %%rbx,%%rsi\n" + "andq %%r15,%%rsi\n" + "movq %%rsi,%q1\n" + /* d >>= 52 */ + "shrdq $52,%%rcx,%%rbx\n" + "xorq %%rcx,%%rcx\n" + /* a4 *= 2 */ + "addq %%r14,%%r14\n" + /* d += a0 * a4 */ + "movq %%r10,%%rax\n" + "mulq %%r14\n" + "addq %%rax,%%rbx\n" + "adcq %%rdx,%%rcx\n" + /* d+= (a1*2) * a3 */ + "leaq (%%r11,%%r11,1),%%rax\n" + "mulq %%r13\n" + "addq %%rax,%%rbx\n" + "adcq %%rdx,%%rcx\n" + /* d += a2 * a2 */ + "movq %%r12,%%rax\n" + "mulq %%r12\n" + "addq %%rax,%%rbx\n" + "adcq %%rdx,%%rcx\n" + /* d += c * R */ + "movq %%r8,%%rax\n" + "movq $0x1000003d10,%%rdx\n" + "mulq %%rdx\n" + "addq %%rax,%%rbx\n" + "adcq %%rdx,%%rcx\n" + /* t4 = d & M (%%rsi) */ + "movq %%rbx,%%rsi\n" + "andq %%r15,%%rsi\n" + /* d >>= 52 */ + "shrdq $52,%%rcx,%%rbx\n" + "xorq %%rcx,%%rcx\n" + /* tx = t4 >> 48 (tmp3) */ + "movq %%rsi,%%rax\n" + "shrq $48,%%rax\n" + "movq %%rax,%q3\n" + /* t4 &= (M >> 4) (tmp2) */ + "movq $0xffffffffffff,%%rax\n" + "andq %%rax,%%rsi\n" + "movq %%rsi,%q2\n" + /* c = a0 * a0 */ + "movq %%r10,%%rax\n" + "mulq %%r10\n" + "movq %%rax,%%r8\n" + "movq %%rdx,%%r9\n" + /* d += a1 * a4 */ + "movq %%r11,%%rax\n" + "mulq %%r14\n" + "addq %%rax,%%rbx\n" + "adcq %%rdx,%%rcx\n" + /* d += (a2*2) * a3 */ + "leaq (%%r12,%%r12,1),%%rax\n" + "mulq %%r13\n" + "addq %%rax,%%rbx\n" + "adcq %%rdx,%%rcx\n" + /* u0 = d & M (%%rsi) */ + "movq %%rbx,%%rsi\n" + "andq %%r15,%%rsi\n" + /* d >>= 52 */ + "shrdq $52,%%rcx,%%rbx\n" + "xorq %%rcx,%%rcx\n" + /* u0 = (u0 << 4) | tx (%%rsi) */ + "shlq $4,%%rsi\n" + "movq %q3,%%rax\n" + "orq %%rax,%%rsi\n" + /* c += u0 * (R >> 4) */ + "movq $0x1000003d1,%%rax\n" + "mulq %%rsi\n" + "addq %%rax,%%r8\n" + "adcq %%rdx,%%r9\n" + /* r[0] = c & M */ + "movq %%r8,%%rax\n" + "andq %%r15,%%rax\n" + "movq %%rax,0(%%rdi)\n" + /* c >>= 52 */ + "shrdq $52,%%r9,%%r8\n" + "xorq %%r9,%%r9\n" + /* a0 *= 2 */ + "addq %%r10,%%r10\n" + /* c += a0 * a1 */ + "movq %%r10,%%rax\n" + "mulq %%r11\n" + "addq %%rax,%%r8\n" + "adcq %%rdx,%%r9\n" + /* d += a2 * a4 */ + "movq %%r12,%%rax\n" + "mulq %%r14\n" + "addq %%rax,%%rbx\n" + "adcq %%rdx,%%rcx\n" + /* d += a3 * a3 */ + "movq %%r13,%%rax\n" + "mulq %%r13\n" + "addq %%rax,%%rbx\n" + "adcq %%rdx,%%rcx\n" + /* c += (d & M) * R */ + "movq %%rbx,%%rax\n" + "andq %%r15,%%rax\n" + "movq $0x1000003d10,%%rdx\n" + "mulq %%rdx\n" + "addq %%rax,%%r8\n" + "adcq %%rdx,%%r9\n" + /* d >>= 52 */ + "shrdq $52,%%rcx,%%rbx\n" + "xorq %%rcx,%%rcx\n" + /* r[1] = c & M */ + "movq %%r8,%%rax\n" + "andq %%r15,%%rax\n" + "movq %%rax,8(%%rdi)\n" + /* c >>= 52 */ + "shrdq $52,%%r9,%%r8\n" + "xorq %%r9,%%r9\n" + /* c += a0 * a2 (last use of %%r10) */ + "movq %%r10,%%rax\n" + "mulq %%r12\n" + "addq %%rax,%%r8\n" + "adcq %%rdx,%%r9\n" + /* fetch t3 (%%r10, overwrites a0),t4 (%%rsi) */ + "movq %q2,%%rsi\n" + "movq %q1,%%r10\n" + /* c += a1 * a1 */ + "movq %%r11,%%rax\n" + "mulq %%r11\n" + "addq %%rax,%%r8\n" + "adcq %%rdx,%%r9\n" + /* d += a3 * a4 */ + "movq %%r13,%%rax\n" + "mulq %%r14\n" + "addq %%rax,%%rbx\n" + "adcq %%rdx,%%rcx\n" + /* c += (d & M) * R */ + "movq %%rbx,%%rax\n" + "andq %%r15,%%rax\n" + "movq $0x1000003d10,%%rdx\n" + "mulq %%rdx\n" + "addq %%rax,%%r8\n" + "adcq %%rdx,%%r9\n" + /* d >>= 52 (%%rbx only) */ + "shrdq $52,%%rcx,%%rbx\n" + /* r[2] = c & M */ + "movq %%r8,%%rax\n" + "andq %%r15,%%rax\n" + "movq %%rax,16(%%rdi)\n" + /* c >>= 52 */ + "shrdq $52,%%r9,%%r8\n" + "xorq %%r9,%%r9\n" + /* c += t3 */ + "addq %%r10,%%r8\n" + /* c += d * R */ + "movq %%rbx,%%rax\n" + "movq $0x1000003d10,%%rdx\n" + "mulq %%rdx\n" + "addq %%rax,%%r8\n" + "adcq %%rdx,%%r9\n" + /* r[3] = c & M */ + "movq %%r8,%%rax\n" + "andq %%r15,%%rax\n" + "movq %%rax,24(%%rdi)\n" + /* c >>= 52 (%%r8 only) */ + "shrdq $52,%%r9,%%r8\n" + /* c += t4 (%%r8 only) */ + "addq %%rsi,%%r8\n" + /* r[4] = c */ + "movq %%r8,32(%%rdi)\n" +: "+S"(a), "=m"(tmp1), "=m"(tmp2), "=m"(tmp3) +: "D"(r) +: "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15", "cc", "memory" +); +} #endif diff --git a/src/secp256k1/src/field_5x52_impl.h b/src/secp256k1/src/field_5x52_impl.h index 75b210eaf..63176d6de 100644 --- a/src/secp256k1/src/field_5x52_impl.h +++ b/src/secp256k1/src/field_5x52_impl.h @@ -102,6 +102,50 @@ static void secp256k1_fe_normalize(secp256k1_fe_t *r) { #endif } +static void secp256k1_fe_normalize_var(secp256k1_fe_t *r) { + uint64_t t0 = r->n[0], t1 = r->n[1], t2 = r->n[2], t3 = r->n[3], t4 = r->n[4]; + + /* Reduce t4 at the start so there will be at most a single carry from the first pass */ + uint64_t x = t4 >> 48; t4 &= 0x0FFFFFFFFFFFFULL; + uint64_t m; + + /* The first pass ensures the magnitude is 1, ... */ + t0 += x * 0x1000003D1ULL; + t1 += (t0 >> 52); t0 &= 0xFFFFFFFFFFFFFULL; + t2 += (t1 >> 52); t1 &= 0xFFFFFFFFFFFFFULL; m = t1; + t3 += (t2 >> 52); t2 &= 0xFFFFFFFFFFFFFULL; m &= t2; + t4 += (t3 >> 52); t3 &= 0xFFFFFFFFFFFFFULL; m &= t3; + + /* ... except for a possible carry at bit 48 of t4 (i.e. bit 256 of the field element) */ + VERIFY_CHECK(t4 >> 49 == 0); + + /* At most a single final reduction is needed; check if the value is >= the field characteristic */ + x = (t4 >> 48) | ((t4 == 0x0FFFFFFFFFFFFULL) & (m == 0xFFFFFFFFFFFFFULL) + & (t0 >= 0xFFFFEFFFFFC2FULL)); + + if (x) { + t0 += 0x1000003D1ULL; + t1 += (t0 >> 52); t0 &= 0xFFFFFFFFFFFFFULL; + t2 += (t1 >> 52); t1 &= 0xFFFFFFFFFFFFFULL; + t3 += (t2 >> 52); t2 &= 0xFFFFFFFFFFFFFULL; + t4 += (t3 >> 52); t3 &= 0xFFFFFFFFFFFFFULL; + + /* If t4 didn't carry to bit 48 already, then it should have after any final reduction */ + VERIFY_CHECK(t4 >> 48 == x); + + /* Mask off the possible multiple of 2^256 from the final reduction */ + t4 &= 0x0FFFFFFFFFFFFULL; + } + + r->n[0] = t0; r->n[1] = t1; r->n[2] = t2; r->n[3] = t3; r->n[4] = t4; + +#ifdef VERIFY + r->magnitude = 1; + r->normalized = 1; + secp256k1_fe_verify(r); +#endif +} + SECP256K1_INLINE static void secp256k1_fe_set_int(secp256k1_fe_t *r, int a) { r->n[0] = a; r->n[1] = r->n[2] = r->n[3] = r->n[4] = 0; @@ -255,7 +299,7 @@ static void secp256k1_fe_mul(secp256k1_fe_t *r, const secp256k1_fe_t *a, const s secp256k1_fe_verify(b); VERIFY_CHECK(r != b); #endif - secp256k1_fe_mul_inner(a->n, b->n, r->n); + secp256k1_fe_mul_inner(r->n, a->n, b->n); #ifdef VERIFY r->magnitude = 1; r->normalized = 0; @@ -268,7 +312,7 @@ static void secp256k1_fe_sqr(secp256k1_fe_t *r, const secp256k1_fe_t *a) { VERIFY_CHECK(a->magnitude <= 8); secp256k1_fe_verify(a); #endif - secp256k1_fe_sqr_inner(a->n, r->n); + secp256k1_fe_sqr_inner(r->n, a->n); #ifdef VERIFY r->magnitude = 1; r->normalized = 0; diff --git a/src/secp256k1/src/field_5x52_int128_impl.h b/src/secp256k1/src/field_5x52_int128_impl.h index e552fb431..ec631833c 100644 --- a/src/secp256k1/src/field_5x52_int128_impl.h +++ b/src/secp256k1/src/field_5x52_int128_impl.h @@ -15,7 +15,7 @@ #define VERIFY_BITS(x, n) do { } while(0) #endif -SECP256K1_INLINE static void secp256k1_fe_mul_inner(const uint64_t *a, const uint64_t * SECP256K1_RESTRICT b, uint64_t *r) { +SECP256K1_INLINE static void secp256k1_fe_mul_inner(uint64_t *r, const uint64_t *a, const uint64_t * SECP256K1_RESTRICT b) { VERIFY_BITS(a[0], 56); VERIFY_BITS(a[1], 56); VERIFY_BITS(a[2], 56); @@ -152,7 +152,7 @@ SECP256K1_INLINE static void secp256k1_fe_mul_inner(const uint64_t *a, const uin /* [r4 r3 r2 r1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */ } -SECP256K1_INLINE static void secp256k1_fe_sqr_inner(const uint64_t *a, uint64_t *r) { +SECP256K1_INLINE static void secp256k1_fe_sqr_inner(uint64_t *r, const uint64_t *a) { VERIFY_BITS(a[0], 56); VERIFY_BITS(a[1], 56); VERIFY_BITS(a[2], 56); diff --git a/src/secp256k1/src/field_gmp_impl.h b/src/secp256k1/src/field_gmp_impl.h index 8af7dd68f..73a55c4f0 100644 --- a/src/secp256k1/src/field_gmp_impl.h +++ b/src/secp256k1/src/field_gmp_impl.h @@ -46,6 +46,10 @@ static void secp256k1_fe_normalize(secp256k1_fe_t *r) { mpn_sub(r->n, r->n, FIELD_LIMBS, secp256k1_field_p, FIELD_LIMBS); } +static void secp256k1_fe_normalize_var(secp256k1_fe_t *r) { + secp256k1_fe_normalize(r); +} + SECP256K1_INLINE static void secp256k1_fe_set_int(secp256k1_fe_t *r, int a) { r->n[0] = a; for (int i=1; i 0) { - int j = i--; - secp256k1_fe_mul(&r[j], &r[i], &u); - secp256k1_fe_mul(&u, &u, &a[j]); - } - - r[0] = u; -} - static void secp256k1_fe_inv_all_var(size_t len, secp256k1_fe_t r[len], const secp256k1_fe_t a[len]) { if (len < 1) return; @@ -277,7 +253,7 @@ static void secp256k1_fe_start(void) { #endif if (secp256k1_fe_consts == NULL) { secp256k1_fe_inner_start(); - secp256k1_fe_consts_t *ret = (secp256k1_fe_consts_t*)malloc(sizeof(secp256k1_fe_consts_t)); + secp256k1_fe_consts_t *ret = (secp256k1_fe_consts_t*)checked_malloc(sizeof(secp256k1_fe_consts_t)); #ifndef USE_NUM_NONE secp256k1_num_set_bin(&ret->p, secp256k1_fe_consts_p, sizeof(secp256k1_fe_consts_p)); #endif diff --git a/src/secp256k1/src/group.h b/src/secp256k1/src/group.h index 0f14bd25f..ecfebcdc0 100644 --- a/src/secp256k1/src/group.h +++ b/src/secp256k1/src/group.h @@ -51,15 +51,16 @@ static void secp256k1_ge_set_xy(secp256k1_ge_t *r, const secp256k1_fe_t *x, cons /** Set a group element (affine) equal to the point with the given X coordinate, and given oddness * for Y. Return value indicates whether the result is valid. */ -static int secp256k1_ge_set_xo(secp256k1_ge_t *r, const secp256k1_fe_t *x, int odd); +static int secp256k1_ge_set_xo_var(secp256k1_ge_t *r, const secp256k1_fe_t *x, int odd); /** Check whether a group element is the point at infinity. */ static int secp256k1_ge_is_infinity(const secp256k1_ge_t *a); /** Check whether a group element is valid (i.e., on the curve). */ -static int secp256k1_ge_is_valid(const secp256k1_ge_t *a); +static int secp256k1_ge_is_valid_var(const secp256k1_ge_t *a); static void secp256k1_ge_neg(secp256k1_ge_t *r, const secp256k1_ge_t *a); +static void secp256k1_ge_neg_var(secp256k1_ge_t *r, const secp256k1_ge_t *a); /** Get a hex representation of a point. *rlen will be overwritten with the real length. */ static void secp256k1_ge_get_hex(char *r, int *rlen, const secp256k1_ge_t *a); @@ -84,7 +85,7 @@ static void secp256k1_gej_set_ge(secp256k1_gej_t *r, const secp256k1_ge_t *a); static void secp256k1_gej_get_x_var(secp256k1_fe_t *r, const secp256k1_gej_t *a); /** Set r equal to the inverse of a (i.e., mirrored around the X axis) */ -static void secp256k1_gej_neg(secp256k1_gej_t *r, const secp256k1_gej_t *a); +static void secp256k1_gej_neg_var(secp256k1_gej_t *r, const secp256k1_gej_t *a); /** Check whether a group element is the point at infinity. */ static int secp256k1_gej_is_infinity(const secp256k1_gej_t *a); diff --git a/src/secp256k1/src/group_impl.h b/src/secp256k1/src/group_impl.h index cbd0d8c4f..1ab5d5fe7 100644 --- a/src/secp256k1/src/group_impl.h +++ b/src/secp256k1/src/group_impl.h @@ -28,13 +28,17 @@ static int secp256k1_ge_is_infinity(const secp256k1_ge_t *a) { } static void secp256k1_ge_neg(secp256k1_ge_t *r, const secp256k1_ge_t *a) { - r->infinity = a->infinity; - r->x = a->x; - r->y = a->y; + *r = *a; secp256k1_fe_normalize(&r->y); secp256k1_fe_negate(&r->y, &r->y, 1); } +static void secp256k1_ge_neg_var(secp256k1_ge_t *r, const secp256k1_ge_t *a) { + *r = *a; + secp256k1_fe_normalize_var(&r->y); + secp256k1_fe_negate(&r->y, &r->y, 1); +} + static void secp256k1_ge_get_hex(char *r, int *rlen, const secp256k1_ge_t *a) { char cx[65]; int lx=65; char cy[65]; int ly=65; @@ -85,15 +89,16 @@ static void secp256k1_ge_set_gej_var(secp256k1_ge_t *r, secp256k1_gej_t *a) { static void secp256k1_ge_set_all_gej_var(size_t len, secp256k1_ge_t r[len], const secp256k1_gej_t a[len]) { size_t count = 0; - secp256k1_fe_t az[len]; + secp256k1_fe_t *az = checked_malloc(sizeof(secp256k1_fe_t) * len); for (size_t i=0; iy); } -static int secp256k1_ge_set_xo(secp256k1_ge_t *r, const secp256k1_fe_t *x, int odd) { +static int secp256k1_ge_set_xo_var(secp256k1_ge_t *r, const secp256k1_fe_t *x, int odd) { r->x = *x; secp256k1_fe_t x2; secp256k1_fe_sqr(&x2, x); secp256k1_fe_t x3; secp256k1_fe_mul(&x3, x, &x2); r->infinity = 0; secp256k1_fe_t c; secp256k1_fe_set_int(&c, 7); secp256k1_fe_add(&c, &x3); - if (!secp256k1_fe_sqrt(&r->y, &c)) + if (!secp256k1_fe_sqrt_var(&r->y, &c)) return 0; - secp256k1_fe_normalize(&r->y); + secp256k1_fe_normalize_var(&r->y); if (secp256k1_fe_is_odd(&r->y) != odd) secp256k1_fe_negate(&r->y, &r->y, 1); return 1; @@ -162,12 +168,12 @@ static void secp256k1_gej_get_x_var(secp256k1_fe_t *r, const secp256k1_gej_t *a) secp256k1_fe_mul(r, &a->x, &zi2); } -static void secp256k1_gej_neg(secp256k1_gej_t *r, const secp256k1_gej_t *a) { +static void secp256k1_gej_neg_var(secp256k1_gej_t *r, const secp256k1_gej_t *a) { r->infinity = a->infinity; r->x = a->x; r->y = a->y; r->z = a->z; - secp256k1_fe_normalize(&r->y); + secp256k1_fe_normalize_var(&r->y); secp256k1_fe_negate(&r->y, &r->y, 1); } @@ -175,7 +181,7 @@ static int secp256k1_gej_is_infinity(const secp256k1_gej_t *a) { return a->infinity; } -static int secp256k1_gej_is_valid(const secp256k1_gej_t *a) { +static int secp256k1_gej_is_valid_var(const secp256k1_gej_t *a) { if (a->infinity) return 0; /** y^2 = x^3 + 7 @@ -189,12 +195,12 @@ static int secp256k1_gej_is_valid(const secp256k1_gej_t *a) { secp256k1_fe_t z6; secp256k1_fe_sqr(&z6, &z2); secp256k1_fe_mul(&z6, &z6, &z2); secp256k1_fe_mul_int(&z6, 7); secp256k1_fe_add(&x3, &z6); - secp256k1_fe_normalize(&y2); - secp256k1_fe_normalize(&x3); + secp256k1_fe_normalize_var(&y2); + secp256k1_fe_normalize_var(&x3); return secp256k1_fe_equal(&y2, &x3); } -static int secp256k1_ge_is_valid(const secp256k1_ge_t *a) { +static int secp256k1_ge_is_valid_var(const secp256k1_ge_t *a) { if (a->infinity) return 0; /* y^2 = x^3 + 7 */ @@ -202,8 +208,8 @@ static int secp256k1_ge_is_valid(const secp256k1_ge_t *a) { secp256k1_fe_t x3; secp256k1_fe_sqr(&x3, &a->x); secp256k1_fe_mul(&x3, &x3, &a->x); secp256k1_fe_t c; secp256k1_fe_set_int(&c, 7); secp256k1_fe_add(&x3, &c); - secp256k1_fe_normalize(&y2); - secp256k1_fe_normalize(&x3); + secp256k1_fe_normalize_var(&y2); + secp256k1_fe_normalize_var(&x3); return secp256k1_fe_equal(&y2, &x3); } @@ -255,11 +261,11 @@ static void secp256k1_gej_add_var(secp256k1_gej_t *r, const secp256k1_gej_t *a, secp256k1_fe_t u2; secp256k1_fe_mul(&u2, &b->x, &z12); secp256k1_fe_t s1; secp256k1_fe_mul(&s1, &a->y, &z22); secp256k1_fe_mul(&s1, &s1, &b->z); secp256k1_fe_t s2; secp256k1_fe_mul(&s2, &b->y, &z12); secp256k1_fe_mul(&s2, &s2, &a->z); - secp256k1_fe_normalize(&u1); - secp256k1_fe_normalize(&u2); + secp256k1_fe_normalize_var(&u1); + secp256k1_fe_normalize_var(&u2); if (secp256k1_fe_equal(&u1, &u2)) { - secp256k1_fe_normalize(&s1); - secp256k1_fe_normalize(&s2); + secp256k1_fe_normalize_var(&s1); + secp256k1_fe_normalize_var(&s2); if (secp256k1_fe_equal(&s1, &s2)) { secp256k1_gej_double_var(r, a); } else { @@ -294,15 +300,14 @@ static void secp256k1_gej_add_ge_var(secp256k1_gej_t *r, const secp256k1_gej_t * } r->infinity = 0; secp256k1_fe_t z12; secp256k1_fe_sqr(&z12, &a->z); - secp256k1_fe_t u1 = a->x; secp256k1_fe_normalize(&u1); + secp256k1_fe_t u1 = a->x; secp256k1_fe_t u2; secp256k1_fe_mul(&u2, &b->x, &z12); - secp256k1_fe_t s1 = a->y; secp256k1_fe_normalize(&s1); + secp256k1_fe_t s1 = a->y; secp256k1_fe_normalize_var(&s1); secp256k1_fe_t s2; secp256k1_fe_mul(&s2, &b->y, &z12); secp256k1_fe_mul(&s2, &s2, &a->z); - secp256k1_fe_normalize(&u1); - secp256k1_fe_normalize(&u2); + secp256k1_fe_normalize_var(&u1); + secp256k1_fe_normalize_var(&u2); if (secp256k1_fe_equal(&u1, &u2)) { - secp256k1_fe_normalize(&s1); - secp256k1_fe_normalize(&s2); + secp256k1_fe_normalize_var(&s2); if (secp256k1_fe_equal(&s1, &s2)) { secp256k1_gej_double_var(r, a); } else { @@ -434,7 +439,7 @@ static void secp256k1_ge_start(void) { }; #endif if (secp256k1_ge_consts == NULL) { - secp256k1_ge_consts_t *ret = (secp256k1_ge_consts_t*)malloc(sizeof(secp256k1_ge_consts_t)); + secp256k1_ge_consts_t *ret = (secp256k1_ge_consts_t*)checked_malloc(sizeof(secp256k1_ge_consts_t)); #ifdef USE_ENDOMORPHISM VERIFY_CHECK(secp256k1_fe_set_b32(&ret->beta, secp256k1_ge_consts_beta)); #endif diff --git a/src/secp256k1/src/scalar_impl.h b/src/secp256k1/src/scalar_impl.h index 7fc159df7..4408cce2d 100644 --- a/src/secp256k1/src/scalar_impl.h +++ b/src/secp256k1/src/scalar_impl.h @@ -40,7 +40,7 @@ static void secp256k1_scalar_start(void) { return; /* Allocate. */ - secp256k1_scalar_consts_t *ret = (secp256k1_scalar_consts_t*)malloc(sizeof(secp256k1_scalar_consts_t)); + secp256k1_scalar_consts_t *ret = (secp256k1_scalar_consts_t*)checked_malloc(sizeof(secp256k1_scalar_consts_t)); #ifndef USE_NUM_NONE static const unsigned char secp256k1_scalar_consts_order[] = { diff --git a/src/secp256k1/src/secp256k1.c b/src/secp256k1/src/secp256k1.c index 20fc27df7..0328db88f 100644 --- a/src/secp256k1/src/secp256k1.c +++ b/src/secp256k1/src/secp256k1.c @@ -40,15 +40,12 @@ void secp256k1_stop(void) { secp256k1_fe_stop(); } -int secp256k1_ecdsa_verify(const unsigned char *msg, int msglen, const unsigned char *sig, int siglen, const unsigned char *pubkey, int pubkeylen) { +int secp256k1_ecdsa_verify(const unsigned char *msg32, const unsigned char *sig, int siglen, const unsigned char *pubkey, int pubkeylen) { DEBUG_CHECK(secp256k1_ecmult_consts != NULL); - DEBUG_CHECK(msg != NULL); - DEBUG_CHECK(msglen <= 32); + DEBUG_CHECK(msg32 != NULL); DEBUG_CHECK(sig != NULL); DEBUG_CHECK(pubkey != NULL); - unsigned char msg32[32] = {0}; - memcpy(msg32 + 32 - msglen, msg, msglen); int ret = -3; secp256k1_scalar_t m; secp256k1_ecdsa_sig_t s; @@ -72,10 +69,9 @@ end: return ret; } -int secp256k1_ecdsa_sign(const unsigned char *message, int messagelen, unsigned char *signature, int *signaturelen, const unsigned char *seckey, const unsigned char *nonce) { +int secp256k1_ecdsa_sign(const unsigned char *msg32, unsigned char *signature, int *signaturelen, const unsigned char *seckey, const unsigned char *nonce) { DEBUG_CHECK(secp256k1_ecmult_gen_consts != NULL); - DEBUG_CHECK(message != NULL); - DEBUG_CHECK(messagelen <= 32); + DEBUG_CHECK(msg32 != NULL); DEBUG_CHECK(signature != NULL); DEBUG_CHECK(signaturelen != NULL); DEBUG_CHECK(seckey != NULL); @@ -85,12 +81,7 @@ int secp256k1_ecdsa_sign(const unsigned char *message, int messagelen, unsigned secp256k1_scalar_set_b32(&sec, seckey, NULL); int overflow = 0; secp256k1_scalar_set_b32(&non, nonce, &overflow); - { - unsigned char c[32] = {0}; - memcpy(c + 32 - messagelen, message, messagelen); - secp256k1_scalar_set_b32(&msg, c, NULL); - memset(c, 0, 32); - } + secp256k1_scalar_set_b32(&msg, msg32, NULL); int ret = !secp256k1_scalar_is_zero(&non) && !overflow; secp256k1_ecdsa_sig_t sig; if (ret) { @@ -105,10 +96,9 @@ int secp256k1_ecdsa_sign(const unsigned char *message, int messagelen, unsigned return ret; } -int secp256k1_ecdsa_sign_compact(const unsigned char *message, int messagelen, unsigned char *sig64, const unsigned char *seckey, const unsigned char *nonce, int *recid) { +int secp256k1_ecdsa_sign_compact(const unsigned char *msg32, unsigned char *sig64, const unsigned char *seckey, const unsigned char *nonce, int *recid) { DEBUG_CHECK(secp256k1_ecmult_gen_consts != NULL); - DEBUG_CHECK(message != NULL); - DEBUG_CHECK(messagelen <= 32); + DEBUG_CHECK(msg32 != NULL); DEBUG_CHECK(sig64 != NULL); DEBUG_CHECK(seckey != NULL); DEBUG_CHECK(nonce != NULL); @@ -117,12 +107,7 @@ int secp256k1_ecdsa_sign_compact(const unsigned char *message, int messagelen, u secp256k1_scalar_set_b32(&sec, seckey, NULL); int overflow = 0; secp256k1_scalar_set_b32(&non, nonce, &overflow); - { - unsigned char c[32] = {0}; - memcpy(c + 32 - messagelen, message, messagelen); - secp256k1_scalar_set_b32(&msg, c, NULL); - memset(c, 0, 32); - } + secp256k1_scalar_set_b32(&msg, msg32, NULL); int ret = !secp256k1_scalar_is_zero(&non) && !overflow; secp256k1_ecdsa_sig_t sig; if (ret) { @@ -138,18 +123,15 @@ int secp256k1_ecdsa_sign_compact(const unsigned char *message, int messagelen, u return ret; } -int secp256k1_ecdsa_recover_compact(const unsigned char *msg, int msglen, const unsigned char *sig64, unsigned char *pubkey, int *pubkeylen, int compressed, int recid) { +int secp256k1_ecdsa_recover_compact(const unsigned char *msg32, const unsigned char *sig64, unsigned char *pubkey, int *pubkeylen, int compressed, int recid) { DEBUG_CHECK(secp256k1_ecmult_consts != NULL); - DEBUG_CHECK(msg != NULL); - DEBUG_CHECK(msglen <= 32); + DEBUG_CHECK(msg32 != NULL); DEBUG_CHECK(sig64 != NULL); DEBUG_CHECK(pubkey != NULL); DEBUG_CHECK(pubkeylen != NULL); DEBUG_CHECK(recid >= 0 && recid <= 3); int ret = 0; - unsigned char msg32[32] = {0}; - memcpy(msg32 + 32 - msglen, msg, msglen); secp256k1_scalar_t m; secp256k1_ecdsa_sig_t sig; int overflow = 0; diff --git a/src/secp256k1/src/tests.c b/src/secp256k1/src/tests.c index 78cdd67f2..7ebb19ff9 100644 --- a/src/secp256k1/src/tests.c +++ b/src/secp256k1/src/tests.c @@ -11,6 +11,8 @@ #include #include +#include + #include "secp256k1.c" #include "testrand_impl.h" @@ -46,7 +48,7 @@ void random_group_element_test(secp256k1_ge_t *ge) { secp256k1_fe_t fe; do { random_field_element_test(&fe); - if (secp256k1_ge_set_xo(ge, &fe, secp256k1_rand32() & 1)) + if (secp256k1_ge_set_xo_var(ge, &fe, secp256k1_rand32() & 1)) break; } while(1); } @@ -400,6 +402,30 @@ void scalar_test(void) { CHECK(secp256k1_scalar_eq(&r1, &r2)); } + { + /* Test multiplicative identity. */ + secp256k1_scalar_t r1, v1; + secp256k1_scalar_set_int(&v1,1); + secp256k1_scalar_mul(&r1, &s1, &v1); + CHECK(secp256k1_scalar_eq(&r1, &s1)); + } + + { + /* Test additive identity. */ + secp256k1_scalar_t r1, v0; + secp256k1_scalar_set_int(&v0,0); + secp256k1_scalar_add(&r1, &s1, &v0); + CHECK(secp256k1_scalar_eq(&r1, &s1)); + } + + { + /* Test zero product property. */ + secp256k1_scalar_t r1, v0; + secp256k1_scalar_set_int(&v0,0); + secp256k1_scalar_mul(&r1, &s1, &v0); + CHECK(secp256k1_scalar_eq(&r1, &v0)); + } + } void run_scalar_tests(void) { @@ -411,9 +437,12 @@ void run_scalar_tests(void) { /* (-1)+1 should be zero. */ secp256k1_scalar_t s, o; secp256k1_scalar_set_int(&s, 1); + CHECK(secp256k1_scalar_is_one(&s)); secp256k1_scalar_negate(&o, &s); secp256k1_scalar_add(&o, &o, &s); CHECK(secp256k1_scalar_is_zero(&o)); + secp256k1_scalar_negate(&o, &o); + CHECK(secp256k1_scalar_is_zero(&o)); } #ifndef USE_NUM_NONE @@ -459,14 +488,14 @@ void random_fe_non_zero(secp256k1_fe_t *nz) { void random_fe_non_square(secp256k1_fe_t *ns) { random_fe_non_zero(ns); secp256k1_fe_t r; - if (secp256k1_fe_sqrt(&r, ns)) { + if (secp256k1_fe_sqrt_var(&r, ns)) { secp256k1_fe_negate(ns, ns, 1); } } int check_fe_equal(const secp256k1_fe_t *a, const secp256k1_fe_t *b) { secp256k1_fe_t an = *a; secp256k1_fe_normalize(&an); - secp256k1_fe_t bn = *b; secp256k1_fe_normalize(&bn); + secp256k1_fe_t bn = *b; secp256k1_fe_normalize_var(&bn); return secp256k1_fe_equal(&an, &bn); } @@ -476,6 +505,55 @@ int check_fe_inverse(const secp256k1_fe_t *a, const secp256k1_fe_t *ai) { return check_fe_equal(&x, &one); } +void run_field_misc(void) { + const unsigned char f32_5[32] = { + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x05, + }; + secp256k1_fe_t x; + secp256k1_fe_t y; + secp256k1_fe_t z; + secp256k1_fe_t q; + secp256k1_fe_t fe5; + CHECK(secp256k1_fe_set_b32(&fe5, f32_5)); + for (int i=0; i<5*count; i++) { + random_fe(&x); + random_fe_non_zero(&y); + /* Test the fe equality and comparison operations. */ + CHECK(secp256k1_fe_cmp_var(&x, &x) == 0); + CHECK(secp256k1_fe_equal(&x, &x)); + z = x; + secp256k1_fe_add(&z,&y); + secp256k1_fe_normalize(&z); + /* Test the conditional move. */ + secp256k1_fe_cmov(&z, &x, 0); + CHECK(secp256k1_fe_equal(&x, &z) == 0); + CHECK(secp256k1_fe_cmp_var(&x, &z) != 0); + secp256k1_fe_cmov(&y, &x, 1); + CHECK(secp256k1_fe_equal(&x, &y)); + /* Test that mul_int, mul, and add agree. */ + secp256k1_fe_add(&y, &x); + secp256k1_fe_add(&y, &x); + z = x; + secp256k1_fe_mul_int(&z, 3); + CHECK(check_fe_equal(&y, &z)); + secp256k1_fe_add(&y, &x); + secp256k1_fe_add(&z, &x); + CHECK(check_fe_equal(&z, &y)); + z = x; + secp256k1_fe_mul_int(&z, 5); + secp256k1_fe_mul(&q, &x, &fe5); + CHECK(check_fe_equal(&z, &q)); + secp256k1_fe_negate(&x, &x, 1); + secp256k1_fe_add(&z, &x); + secp256k1_fe_add(&q, &x); + CHECK(check_fe_equal(&y, &z)); + CHECK(check_fe_equal(&q, &y)); + } +} + void run_field_inv(void) { secp256k1_fe_t x, xi, xii; for (int i=0; i<10*count; i++) { @@ -498,23 +576,6 @@ void run_field_inv_var(void) { } } -void run_field_inv_all(void) { - secp256k1_fe_t x[16], xi[16], xii[16]; - /* Check it's safe to call for 0 elements */ - secp256k1_fe_inv_all(0, xi, x); - for (int i=0; i>=2; + if ((r & 3) == 0) len = (r & 252) >> 3; + r>>=8; + if (len == 65) { + in[0] = (r & 2) ? 4 : (r & 1? 6 : 7); + } else { + in[0] = (r & 1) ? 2 : 3; + } + r>>=2; + if ((r & 7) == 0) in[0] = (r & 2040) >> 3; + r>>=11; + if (len > 1) secp256k1_rand256(&in[1]); + if (len > 33) secp256k1_rand256(&in[33]); + secp256k1_ge_t elem; + secp256k1_ge_t elem2; + if (secp256k1_eckey_pubkey_parse(&elem, in, len)) { + unsigned char out[65]; + unsigned char firstb; + int res; + int size = len; + firstb = in[0]; + /* If the pubkey can be parsed, it should round-trip... */ + CHECK(secp256k1_eckey_pubkey_serialize(&elem, out, &size, len == 33)); + CHECK(size == len); + CHECK(memcmp(&in[1], &out[1], len-1) == 0); + /* ... except for the type of hybrid inputs. */ + if ((in[0] != 6) && (in[0] != 7)) CHECK(in[0] == out[0]); + size = 65; + CHECK(secp256k1_eckey_pubkey_serialize(&elem, in, &size, 0)); + CHECK(size == 65); + CHECK(secp256k1_eckey_pubkey_parse(&elem2, in, size)); + CHECK(ge_equals_ge(&elem,&elem2)); + /* Check that the X9.62 hybrid type is checked. */ + in[0] = (r & 1) ? 6 : 7; + res = secp256k1_eckey_pubkey_parse(&elem2, in, size); + if (firstb == 2 || firstb == 3) { + if (in[0] == firstb + 4) CHECK(res); + else CHECK(!res); + } + if (res) { + CHECK(ge_equals_ge(&elem,&elem2)); + CHECK(secp256k1_eckey_pubkey_serialize(&elem, out, &size, 0)); + CHECK(memcmp(&in[1], &out[1], 64) == 0); + } + } +} + +void run_random_pubkeys(void) { + for (int i=0; i<10*count; i++) { + test_random_pubkeys(); + } +} + void run_ecdsa_end_to_end(void) { for (int i=0; i<64*count; i++) { test_ecdsa_end_to_end(); @@ -995,10 +1127,10 @@ void test_ecdsa_edge_cases(void) { }; unsigned char pubkey[65]; int pubkeylen = 65; - CHECK(!secp256k1_ecdsa_recover_compact(msg32, 32, sig64, pubkey, &pubkeylen, 0, 0)); - CHECK(secp256k1_ecdsa_recover_compact(msg32, 32, sig64, pubkey, &pubkeylen, 0, 1)); - CHECK(!secp256k1_ecdsa_recover_compact(msg32, 32, sig64, pubkey, &pubkeylen, 0, 2)); - CHECK(!secp256k1_ecdsa_recover_compact(msg32, 32, sig64, pubkey, &pubkeylen, 0, 3)); + CHECK(!secp256k1_ecdsa_recover_compact(msg32, sig64, pubkey, &pubkeylen, 0, 0)); + CHECK(secp256k1_ecdsa_recover_compact(msg32, sig64, pubkey, &pubkeylen, 0, 1)); + CHECK(!secp256k1_ecdsa_recover_compact(msg32, sig64, pubkey, &pubkeylen, 0, 2)); + CHECK(!secp256k1_ecdsa_recover_compact(msg32, sig64, pubkey, &pubkeylen, 0, 3)); /* signature (r,s) = (4,4), which can be recovered with all 4 recids. */ const unsigned char sigb64[64] = { @@ -1016,6 +1148,36 @@ void test_ecdsa_edge_cases(void) { for (int recid = 0; recid < 4; recid++) { /* (4,4) encoded in DER. */ unsigned char sigbder[8] = {0x30, 0x06, 0x02, 0x01, 0x04, 0x02, 0x01, 0x04}; + unsigned char sigcder_zr[7] = {0x30, 0x05, 0x02, 0x00, 0x02, 0x01, 0x01}; + unsigned char sigcder_zs[7] = {0x30, 0x05, 0x02, 0x01, 0x01, 0x02, 0x00}; + unsigned char sigbderalt1[39] = { + 0x30, 0x25, 0x02, 0x20, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x04, 0x02, 0x01, 0x04, + }; + unsigned char sigbderalt2[39] = { + 0x30, 0x25, 0x02, 0x01, 0x04, 0x02, 0x20, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, + }; + unsigned char sigbderalt3[40] = { + 0x30, 0x26, 0x02, 0x21, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x04, 0x02, 0x01, 0x04, + }; + unsigned char sigbderalt4[40] = { + 0x30, 0x26, 0x02, 0x01, 0x04, 0x02, 0x21, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, + }; /* (order + r,4) encoded in DER. */ unsigned char sigbderlong[40] = { 0x30, 0x26, 0x02, 0x21, 0x00, 0xFF, 0xFF, 0xFF, @@ -1024,18 +1186,45 @@ void test_ecdsa_edge_cases(void) { 0xE6, 0xAF, 0x48, 0xA0, 0x3B, 0xBF, 0xD2, 0x5E, 0x8C, 0xD0, 0x36, 0x41, 0x45, 0x02, 0x01, 0x04 }; - CHECK(secp256k1_ecdsa_recover_compact(msg32, 32, sigb64, pubkeyb, &pubkeyblen, 1, recid)); - CHECK(secp256k1_ecdsa_verify(msg32, 32, sigbder, sizeof(sigbder), pubkeyb, pubkeyblen) == 1); + CHECK(secp256k1_ecdsa_recover_compact(msg32, sigb64, pubkeyb, &pubkeyblen, 1, recid)); + CHECK(secp256k1_ecdsa_verify(msg32, sigbder, sizeof(sigbder), pubkeyb, pubkeyblen) == 1); for (int recid2 = 0; recid2 < 4; recid2++) { unsigned char pubkey2b[33]; int pubkey2blen = 33; - CHECK(secp256k1_ecdsa_recover_compact(msg32, 32, sigb64, pubkey2b, &pubkey2blen, 1, recid2)); + CHECK(secp256k1_ecdsa_recover_compact(msg32, sigb64, pubkey2b, &pubkey2blen, 1, recid2)); /* Verifying with (order + r,4) should always fail. */ - CHECK(secp256k1_ecdsa_verify(msg32, 32, sigbderlong, sizeof(sigbderlong), pubkey2b, pubkey2blen) != 1); + CHECK(secp256k1_ecdsa_verify(msg32, sigbderlong, sizeof(sigbderlong), pubkey2b, pubkey2blen) != 1); } + /* DER parsing tests. */ + /* Zero length r/s. */ + CHECK(secp256k1_ecdsa_verify(msg32, sigcder_zr, sizeof(sigcder_zr), pubkeyb, pubkeyblen) == -2); + CHECK(secp256k1_ecdsa_verify(msg32, sigcder_zs, sizeof(sigcder_zs), pubkeyb, pubkeyblen) == -2); + /* Leading zeros. */ + CHECK(secp256k1_ecdsa_verify(msg32, sigbderalt1, sizeof(sigbderalt1), pubkeyb, pubkeyblen) == 1); + CHECK(secp256k1_ecdsa_verify(msg32, sigbderalt2, sizeof(sigbderalt2), pubkeyb, pubkeyblen) == 1); + CHECK(secp256k1_ecdsa_verify(msg32, sigbderalt3, sizeof(sigbderalt3), pubkeyb, pubkeyblen) == 1); + CHECK(secp256k1_ecdsa_verify(msg32, sigbderalt4, sizeof(sigbderalt4), pubkeyb, pubkeyblen) == 1); + sigbderalt3[4] = 1; + CHECK(secp256k1_ecdsa_verify(msg32, sigbderalt3, sizeof(sigbderalt3), pubkeyb, pubkeyblen) == -2); + sigbderalt4[7] = 1; + CHECK(secp256k1_ecdsa_verify(msg32, sigbderalt4, sizeof(sigbderalt4), pubkeyb, pubkeyblen) == -2); /* Damage signature. */ sigbder[7]++; - CHECK(secp256k1_ecdsa_verify(msg32, 32, sigbder, sizeof(sigbder), pubkeyb, pubkeyblen) == 0); + CHECK(secp256k1_ecdsa_verify(msg32, sigbder, sizeof(sigbder), pubkeyb, pubkeyblen) == 0); + sigbder[7]--; + CHECK(secp256k1_ecdsa_verify(msg32, sigbder, 6, pubkeyb, pubkeyblen) == -2); + CHECK(secp256k1_ecdsa_verify(msg32, sigbder, sizeof(sigbder)-1, pubkeyb, pubkeyblen) == -2); + for(int i = 0; i<8; i++) { + unsigned char orig = sigbder[i]; + /*Try every single-byte change.*/ + for (int c=0; c<256; c++) { + if (c == orig ) continue; + sigbder[i] = c; + CHECK(secp256k1_ecdsa_verify(msg32, sigbder, sizeof(sigbder), pubkeyb, pubkeyblen) == + (i==4 || i==7) ? 0 : -2 ); + } + sigbder[i] = orig; + } } /* Test the case where ECDSA recomputes a point that is infinity. */ @@ -1069,18 +1258,60 @@ void test_ecdsa_edge_cases(void) { }; unsigned char pubkeyc[65]; int pubkeyclen = 65; - CHECK(secp256k1_ecdsa_recover_compact(msg32, 32, sigc64, pubkeyc, &pubkeyclen, 0, 0) == 1); - CHECK(secp256k1_ecdsa_verify(msg32, 32, sigcder, sizeof(sigcder), pubkeyc, pubkeyclen) == 1); + CHECK(secp256k1_ecdsa_recover_compact(msg32, sigc64, pubkeyc, &pubkeyclen, 0, 0) == 1); + CHECK(secp256k1_ecdsa_verify(msg32, sigcder, sizeof(sigcder), pubkeyc, pubkeyclen) == 1); sigcder[4] = 0; sigc64[31] = 0; - CHECK(secp256k1_ecdsa_recover_compact(msg32, 32, sigc64, pubkeyb, &pubkeyblen, 1, 0) == 0); - CHECK(secp256k1_ecdsa_verify(msg32, 32, sigcder, sizeof(sigcder), pubkeyc, pubkeyclen) == 0); + CHECK(secp256k1_ecdsa_recover_compact(msg32, sigc64, pubkeyb, &pubkeyblen, 1, 0) == 0); + CHECK(secp256k1_ecdsa_verify(msg32, sigcder, sizeof(sigcder), pubkeyc, pubkeyclen) == 0); sigcder[4] = 1; sigcder[7] = 0; sigc64[31] = 1; sigc64[63] = 0; - CHECK(secp256k1_ecdsa_recover_compact(msg32, 32, sigc64, pubkeyb, &pubkeyblen, 1, 0) == 0); - CHECK(secp256k1_ecdsa_verify(msg32, 32, sigcder, sizeof(sigcder), pubkeyc, pubkeyclen) == 0); + CHECK(secp256k1_ecdsa_recover_compact(msg32, sigc64, pubkeyb, &pubkeyblen, 1, 0) == 0); + CHECK(secp256k1_ecdsa_verify(msg32, sigcder, sizeof(sigcder), pubkeyc, pubkeyclen) == 0); + } + + /*Signature where s would be zero.*/ + { + const unsigned char nonce[32] = { + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, + }; + const unsigned char key[32] = { + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, + }; + unsigned char msg[32] = { + 0x86, 0x41, 0x99, 0x81, 0x06, 0x23, 0x44, 0x53, + 0xaa, 0x5f, 0x9d, 0x6a, 0x31, 0x78, 0xf4, 0xf7, + 0xb8, 0x12, 0xe0, 0x0b, 0x81, 0x7a, 0x77, 0x62, + 0x65, 0xdf, 0xdd, 0x31, 0xb9, 0x3e, 0x29, 0xa9, + }; + unsigned char sig[72]; + int siglen = 72; + CHECK(secp256k1_ecdsa_sign(msg, sig, &siglen, key, nonce) == 0); + msg[31] = 0xaa; + siglen = 72; + CHECK(secp256k1_ecdsa_sign(msg, sig, &siglen, key, nonce) == 1); + } + + /* Privkey export where pubkey is the point at infinity. */ + { + unsigned char privkey[300]; + unsigned char seckey[32] = { + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xfe, + 0xba, 0xae, 0xdc, 0xe6, 0xaf, 0x48, 0xa0, 0x3b, + 0xbf, 0xd2, 0x5e, 0x8c, 0xd0, 0x36, 0x41, 0x41, + }; + int outlen = 300; + CHECK(!secp256k1_ec_privkey_export(seckey, privkey, &outlen, 0)); + CHECK(!secp256k1_ec_privkey_export(seckey, privkey, &outlen, 1)); } } @@ -1185,8 +1416,8 @@ int main(int argc, char **argv) { /* field tests */ run_field_inv(); run_field_inv_var(); - run_field_inv_all(); run_field_inv_all_var(); + run_field_misc(); run_sqr(); run_sqrt(); @@ -1199,6 +1430,7 @@ int main(int argc, char **argv) { run_ecmult_chain(); /* ecdsa tests */ + run_random_pubkeys(); run_ecdsa_sign_verify(); run_ecdsa_end_to_end(); run_ecdsa_edge_cases(); diff --git a/src/secp256k1/src/util.h b/src/secp256k1/src/util.h index 08b23a9d3..c3a8f3a42 100644 --- a/src/secp256k1/src/util.h +++ b/src/secp256k1/src/util.h @@ -61,6 +61,12 @@ #define VERIFY_CHECK(cond) do { (void)(cond); } while(0) #endif +static inline void *checked_malloc(size_t size) { + void *ret = malloc(size); + CHECK(ret != NULL); + return ret; +} + /* Macro for restrict, when available and not in a VERIFY build. */ #if defined(SECP256K1_BUILD) && defined(VERIFY) # define SECP256K1_RESTRICT