/* $Id: sph_types.h 260 2011-07-21 01:02:38Z tp $ */ /** * Basic type definitions. * * This header file defines the generic integer types that will be used * for the implementation of hash functions; it also contains helper * functions which encode and decode multi-byte integer values, using * either little-endian or big-endian conventions. * * This file contains a compile-time test on the size of a byte * (the unsigned char C type). If bytes are not octets, * i.e. if they do not have a size of exactly 8 bits, then compilation * is aborted. Architectures where bytes are not octets are relatively * rare, even in the embedded devices market. We forbid non-octet bytes * because there is no clear convention on how octet streams are encoded * on such systems. * * ==========================(LICENSE BEGIN)============================ * * Copyright (c) 2007-2010 Projet RNRT SAPHIR * * Permission is hereby granted, free of charge, to any person obtaining * a copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * ===========================(LICENSE END)============================= * * @file sph_types.h * @author Thomas Pornin */ #ifndef SPH_TYPES_H__ #define SPH_TYPES_H__ #include /* * All our I/O functions are defined over octet streams. We do not know * how to handle input data if bytes are not octets. */ #if CHAR_BIT != 8 #error This code requires 8-bit bytes #endif /* ============= BEGIN documentation block for Doxygen ============ */ #ifdef DOXYGEN_IGNORE /** @mainpage sphlib C code documentation * * @section overview Overview * * sphlib is a library which contains implementations of * various cryptographic hash functions. These pages have been generated * with doxygen and * document the API for the C implementations. * * The API is described in appropriate header files, which are available * in the "Files" section. Each hash function family has its own header, * whose name begins with "sph_" and contains the family * name. For instance, the API for the RIPEMD hash functions is available * in the header file sph_ripemd.h. * * @section principles API structure and conventions * * @subsection io Input/output conventions * * In all generality, hash functions operate over strings of bits. * Individual bits are rarely encountered in C programming or actual * communication protocols; most protocols converge on the ubiquitous * "octet" which is a group of eight bits. Data is thus expressed as a * stream of octets. The C programming language contains the notion of a * "byte", which is a data unit managed under the type

"unsigned
 * char"

. The C standard prescribes that a byte should hold at * least eight bits, but possibly more. Most modern architectures, even * in the embedded world, feature eight-bit bytes, i.e. map bytes to * octets. * * Nevertheless, for some of the implemented hash functions, an extra * API has been added, which allows the input of arbitrary sequences of * bits: when the computation is about to be closed, 1 to 7 extra bits * can be added. The functions for which this API is implemented include * the SHA-2 functions and all SHA-3 candidates. * * sphlib defines hash function which may hash octet streams, * i.e. streams of bits where the number of bits is a multiple of eight. * The data input functions in the sphlib API expect data * as anonymous pointers ("const void *") with a length * (of type "size_t") which gives the input data chunk length * in bytes. A byte is assumed to be an octet; the sph_types.h * header contains a compile-time test which prevents compilation on * architectures where this property is not met. * * The hash function output is also converted into bytes. All currently * implemented hash functions have an output width which is a multiple of * eight, and this is likely to remain true for new designs. * * Most hash functions internally convert input data into 32-bit of 64-bit * words, using either little-endian or big-endian conversion. The hash * output also often consists of such words, which are encoded into output * bytes with a similar endianness convention. Some hash functions have * been only loosely specified on that subject; when necessary, * sphlib has been tested against published "reference" * implementations in order to use the same conventions. * * @subsection shortname Function short name * * Each implemented hash function has a "short name" which is used * internally to derive the identifiers for the functions and context * structures which the function uses. For instance, MD5 has the short * name "md5". Short names are listed in the next section, * for the implemented hash functions. In subsequent sections, the * short name will be assumed to be "XXX": replace with the * actual hash function name to get the C identifier. * * Note: some functions within the same family share the same core * elements, such as update function or context structure. Correspondingly, * some of the defined types or functions may actually be macros which * transparently evaluate to another type or function name. * * @subsection context Context structure * * Each implemented hash fonction has its own context structure, available * under the type name "sph_XXX_context" for the hash function * with short name "XXX". This structure holds all needed * state for a running hash computation. * * The contents of these structures are meant to be opaque, and private * to the implementation. However, these contents are specified in the * header files so that application code which uses sphlib * may access the size of those structures. * * The caller is responsible for allocating the context structure, * whether by dynamic allocation (malloc() or equivalent), * static allocation (a global permanent variable), as an automatic * variable ("on the stack"), or by any other mean which ensures proper * structure alignment. sphlib code performs no dynamic * allocation by itself. * * The context must be initialized before use, using the * sph_XXX_init() function. This function sets the context * state to proper initial values for hashing. * * Since all state data is contained within the context structure, * sphlib is thread-safe and reentrant: several hash * computations may be performed in parallel, provided that they do not * operate on the same context. Moreover, a running computation can be * cloned by copying the context (with a simple memcpy()): * the context and its clone are then independant and may be updated * with new data and/or closed without interfering with each other. * Similarly, a context structure can be moved in memory at will: * context structures contain no pointer, in particular no pointer to * themselves. * * @subsection dataio Data input * * Hashed data is input with the sph_XXX() fonction, which * takes as parameters a pointer to the context, a pointer to the data * to hash, and the number of data bytes to hash. The context is updated * with the new data. * * Data can be input in one or several calls, with arbitrary input lengths. * However, it is best, performance wise, to input data by relatively big * chunks (say a few kilobytes), because this allows sphlib to * optimize things and avoid internal copying. * * When all data has been input, the context can be closed with * sph_XXX_close(). The hash output is computed and written * into the provided buffer. The caller must take care to provide a * buffer of appropriate length; e.g., when using SHA-1, the output is * a 20-byte word, therefore the output buffer must be at least 20-byte * long. * * For some hash functions, the sph_XXX_addbits_and_close() * function can be used instead of sph_XXX_close(). This * function can take a few extra bits to be added at * the end of the input message. This allows hashing messages with a * bit length which is not a multiple of 8. The extra bits are provided * as an unsigned integer value, and a bit count. The bit count must be * between 0 and 7, inclusive. The extra bits are provided as bits 7 to * 0 (bits of numerical value 128, 64, 32... downto 0), in that order. * For instance, to add three bits of value 1, 1 and 0, the unsigned * integer will have value 192 (1*128 + 1*64 + 0*32) and the bit count * will be 3. * * The SPH_SIZE_XXX macro is defined for each hash function; * it evaluates to the function output size, expressed in bits. For instance, * SPH_SIZE_sha1 evaluates to 160. * * When closed, the context is automatically reinitialized and can be * immediately used for another computation. It is not necessary to call * sph_XXX_init() after a close. Note that * sph_XXX_init() can still be called to "reset" a context, * i.e. forget previously input data, and get back to the initial state. * * @subsection alignment Data alignment * * "Alignment" is a property of data, which is said to be "properly * aligned" when its emplacement in memory is such that the data can * be optimally read by full words. This depends on the type of access; * basically, some hash functions will read data by 32-bit or 64-bit * words. sphlib does not mandate such alignment for input * data, but using aligned data can substantially improve performance. * * As a rule, it is best to input data by chunks whose length (in bytes) * is a multiple of eight, and which begins at "generally aligned" * addresses, such as the base address returned by a call to * malloc(). * * @section functions Implemented functions * * We give here the list of implemented functions. They are grouped by * family; to each family corresponds a specific header file. Each * individual function has its associated "short name". Please refer to * the documentation for that header file to get details on the hash * function denomination and provenance. * * Note: the functions marked with a '(64)' in the list below are * available only if the C compiler provides an integer type of length * 64 bits or more. Such a type is mandatory in the latest C standard * (ISO 9899:1999, aka "C99") and is present in several older compilers * as well, so chances are that such a type is available. * * - HAVAL family: file sph_haval.h * - HAVAL-128/3 (128-bit, 3 passes): short name: haval128_3 * - HAVAL-128/4 (128-bit, 4 passes): short name: haval128_4 * - HAVAL-128/5 (128-bit, 5 passes): short name: haval128_5 * - HAVAL-160/3 (160-bit, 3 passes): short name: haval160_3 * - HAVAL-160/4 (160-bit, 4 passes): short name: haval160_4 * - HAVAL-160/5 (160-bit, 5 passes): short name: haval160_5 * - HAVAL-192/3 (192-bit, 3 passes): short name: haval192_3 * - HAVAL-192/4 (192-bit, 4 passes): short name: haval192_4 * - HAVAL-192/5 (192-bit, 5 passes): short name: haval192_5 * - HAVAL-224/3 (224-bit, 3 passes): short name: haval224_3 * - HAVAL-224/4 (224-bit, 4 passes): short name: haval224_4 * - HAVAL-224/5 (224-bit, 5 passes): short name: haval224_5 * - HAVAL-256/3 (256-bit, 3 passes): short name: haval256_3 * - HAVAL-256/4 (256-bit, 4 passes): short name: haval256_4 * - HAVAL-256/5 (256-bit, 5 passes): short name: haval256_5 * - MD2: file sph_md2.h, short name: md2 * - MD4: file sph_md4.h, short name: md4 * - MD5: file sph_md5.h, short name: md5 * - PANAMA: file sph_panama.h, short name: panama * - RadioGatun family: file sph_radiogatun.h * - RadioGatun[32]: short name: radiogatun32 * - RadioGatun[64]: short name: radiogatun64 (64) * - RIPEMD family: file sph_ripemd.h * - RIPEMD: short name: ripemd * - RIPEMD-128: short name: ripemd128 * - RIPEMD-160: short name: ripemd160 * - SHA-0: file sph_sha0.h, short name: sha0 * - SHA-1: file sph_sha1.h, short name: sha1 * - SHA-2 family, 32-bit hashes: file sph_sha2.h * - SHA-224: short name: sha224 * - SHA-256: short name: sha256 * - SHA-384: short name: sha384 (64) * - SHA-512: short name: sha512 (64) * - Tiger family: file sph_tiger.h * - Tiger: short name: tiger (64) * - Tiger2: short name: tiger2 (64) * - WHIRLPOOL family: file sph_whirlpool.h * - WHIRLPOOL-0: short name: whirlpool0 (64) * - WHIRLPOOL-1: short name: whirlpool1 (64) * - WHIRLPOOL: short name: whirlpool (64) * * The fourteen second-round SHA-3 candidates are also implemented; * when applicable, the implementations follow the "final" specifications * as published for the third round of the SHA-3 competition (BLAKE, * Groestl, JH, Keccak and Skein have been tweaked for third round). * * - BLAKE family: file sph_blake.h * - BLAKE-224: short name: blake224 * - BLAKE-256: short name: blake256 * - BLAKE-384: short name: blake384 * - BLAKE-512: short name: blake512 * - BMW (Blue Midnight Wish) family: file sph_bmw.h * - BMW-224: short name: bmw224 * - BMW-256: short name: bmw256 * - BMW-384: short name: bmw384 (64) * - BMW-512: short name: bmw512 (64) * - CubeHash family: file sph_cubehash.h (specified as * CubeHash16/32 in the CubeHash specification) * - CubeHash-224: short name: cubehash224 * - CubeHash-256: short name: cubehash256 * - CubeHash-384: short name: cubehash384 * - CubeHash-512: short name: cubehash512 * - ECHO family: file sph_echo.h * - ECHO-224: short name: echo224 * - ECHO-256: short name: echo256 * - ECHO-384: short name: echo384 * - ECHO-512: short name: echo512 * - Fugue family: file sph_fugue.h * - Fugue-224: short name: fugue224 * - Fugue-256: short name: fugue256 * - Fugue-384: short name: fugue384 * - Fugue-512: short name: fugue512 * - Groestl family: file sph_groestl.h * - Groestl-224: short name: groestl224 * - Groestl-256: short name: groestl256 * - Groestl-384: short name: groestl384 * - Groestl-512: short name: groestl512 * - Hamsi family: file sph_hamsi.h * - Hamsi-224: short name: hamsi224 * - Hamsi-256: short name: hamsi256 * - Hamsi-384: short name: hamsi384 * - Hamsi-512: short name: hamsi512 * - JH family: file sph_jh.h * - JH-224: short name: jh224 * - JH-256: short name: jh256 * - JH-384: short name: jh384 * - JH-512: short name: jh512 * - Keccak family: file sph_keccak.h * - Keccak-224: short name: keccak224 * - Keccak-256: short name: keccak256 * - Keccak-384: short name: keccak384 * - Keccak-512: short name: keccak512 * - Luffa family: file sph_luffa.h * - Luffa-224: short name: luffa224 * - Luffa-256: short name: luffa256 * - Luffa-384: short name: luffa384 * - Luffa-512: short name: luffa512 * - Shabal family: file sph_shabal.h * - Shabal-192: short name: shabal192 * - Shabal-224: short name: shabal224 * - Shabal-256: short name: shabal256 * - Shabal-384: short name: shabal384 * - Shabal-512: short name: shabal512 * - SHAvite-3 family: file sph_shavite.h * - SHAvite-224 (nominally "SHAvite-3 with 224-bit output"): * short name: shabal224 * - SHAvite-256 (nominally "SHAvite-3 with 256-bit output"): * short name: shabal256 * - SHAvite-384 (nominally "SHAvite-3 with 384-bit output"): * short name: shabal384 * - SHAvite-512 (nominally "SHAvite-3 with 512-bit output"): * short name: shabal512 * - SIMD family: file sph_simd.h * - SIMD-224: short name: simd224 * - SIMD-256: short name: simd256 * - SIMD-384: short name: simd384 * - SIMD-512: short name: simd512 * - Skein family: file sph_skein.h * - Skein-224 (nominally specified as Skein-512-224): short name: * skein224 (64) * - Skein-256 (nominally specified as Skein-512-256): short name: * skein256 (64) * - Skein-384 (nominally specified as Skein-512-384): short name: * skein384 (64) * - Skein-512 (nominally specified as Skein-512-512): short name: * skein512 (64) * * For the second-round SHA-3 candidates, the functions are as specified * for round 2, i.e. with the "tweaks" that some candidates added * between round 1 and round 2. Also, some of the submitted packages for * round 2 contained errors, in the specification, reference code, or * both. sphlib implements the corrected versions. */ /** @hideinitializer * Unsigned integer type whose length is at least 32 bits; on most * architectures, it will have a width of exactly 32 bits. Unsigned C * types implement arithmetics modulo a power of 2; use the * SPH_T32() macro to ensure that the value is truncated * to exactly 32 bits. Unless otherwise specified, all macros and * functions which accept sph_u32 values assume that these * values fit on 32 bits, i.e. do not exceed 2^32-1, even on architectures * where sph_u32 is larger than that. */ typedef __arch_dependant__ sph_u32; /** @hideinitializer * Signed integer type corresponding to sph_u32; it has * width 32 bits or more. */ typedef __arch_dependant__ sph_s32; /** @hideinitializer * Unsigned integer type whose length is at least 64 bits; on most * architectures which feature such a type, it will have a width of * exactly 64 bits. C99-compliant platform will have this type; it * is also defined when the GNU compiler (gcc) is used, and on * platforms where unsigned long is large enough. If this * type is not available, then some hash functions which depends on * a 64-bit type will not be available (most notably SHA-384, SHA-512, * Tiger and WHIRLPOOL). */ typedef __arch_dependant__ sph_u64; /** @hideinitializer * Signed integer type corresponding to sph_u64; it has * width 64 bits or more. */ typedef __arch_dependant__ sph_s64; /** * This macro expands the token x into a suitable * constant expression of type sph_u32. Depending on * how this type is defined, a suffix such as UL may * be appended to the argument. * * @param x the token to expand into a suitable constant expression */ #define SPH_C32(x) /** * Truncate a 32-bit value to exactly 32 bits. On most systems, this is * a no-op, recognized as such by the compiler. * * @param x the value to truncate (of type sph_u32) */ #define SPH_T32(x) /** * Rotate a 32-bit value by a number of bits to the left. The rotate * count must reside between 1 and 31. This macro assumes that its * first argument fits in 32 bits (no extra bit allowed on machines where * sph_u32 is wider); both arguments may be evaluated * several times. * * @param x the value to rotate (of type sph_u32) * @param n the rotation count (between 1 and 31, inclusive) */ #define SPH_ROTL32(x, n) /** * Rotate a 32-bit value by a number of bits to the left. The rotate * count must reside between 1 and 31. This macro assumes that its * first argument fits in 32 bits (no extra bit allowed on machines where * sph_u32 is wider); both arguments may be evaluated * several times. * * @param x the value to rotate (of type sph_u32) * @param n the rotation count (between 1 and 31, inclusive) */ #define SPH_ROTR32(x, n) /** * This macro is defined on systems for which a 64-bit type has been * detected, and is used for sph_u64. */ #define SPH_64 /** * This macro is defined on systems for the "native" integer size is * 64 bits (64-bit values fit in one register). */ #define SPH_64_TRUE /** * This macro expands the token x into a suitable * constant expression of type sph_u64. Depending on * how this type is defined, a suffix such as ULL may * be appended to the argument. This macro is defined only if a * 64-bit type was detected and used for sph_u64. * * @param x the token to expand into a suitable constant expression */ #define SPH_C64(x) /** * Truncate a 64-bit value to exactly 64 bits. On most systems, this is * a no-op, recognized as such by the compiler. This macro is defined only * if a 64-bit type was detected and used for sph_u64. * * @param x the value to truncate (of type sph_u64) */ #define SPH_T64(x) /** * Rotate a 64-bit value by a number of bits to the left. The rotate * count must reside between 1 and 63. This macro assumes that its * first argument fits in 64 bits (no extra bit allowed on machines where * sph_u64 is wider); both arguments may be evaluated * several times. This macro is defined only if a 64-bit type was detected * and used for sph_u64. * * @param x the value to rotate (of type sph_u64) * @param n the rotation count (between 1 and 63, inclusive) */ #define SPH_ROTL64(x, n) /** * Rotate a 64-bit value by a number of bits to the left. The rotate * count must reside between 1 and 63. This macro assumes that its * first argument fits in 64 bits (no extra bit allowed on machines where * sph_u64 is wider); both arguments may be evaluated * several times. This macro is defined only if a 64-bit type was detected * and used for sph_u64. * * @param x the value to rotate (of type sph_u64) * @param n the rotation count (between 1 and 63, inclusive) */ #define SPH_ROTR64(x, n) /** * This macro evaluates to inline or an equivalent construction, * if available on the compilation platform, or to nothing otherwise. This * is used to declare inline functions, for which the compiler should * endeavour to include the code directly in the caller. Inline functions * are typically defined in header files as replacement for macros. */ #define SPH_INLINE /** * This macro is defined if the platform has been detected as using * little-endian convention. This implies that the sph_u32 * type (and the sph_u64 type also, if it is defined) has * an exact width (i.e. exactly 32-bit, respectively 64-bit). */ #define SPH_LITTLE_ENDIAN /** * This macro is defined if the platform has been detected as using * big-endian convention. This implies that the sph_u32 * type (and the sph_u64 type also, if it is defined) has * an exact width (i.e. exactly 32-bit, respectively 64-bit). */ #define SPH_BIG_ENDIAN /** * This macro is defined if 32-bit words (and 64-bit words, if defined) * can be read from and written to memory efficiently in little-endian * convention. This is the case for little-endian platforms, and also * for the big-endian platforms which have special little-endian access * opcodes (e.g. Ultrasparc). */ #define SPH_LITTLE_FAST /** * This macro is defined if 32-bit words (and 64-bit words, if defined) * can be read from and written to memory efficiently in big-endian * convention. This is the case for little-endian platforms, and also * for the little-endian platforms which have special big-endian access * opcodes. */ #define SPH_BIG_FAST /** * On some platforms, this macro is defined to an unsigned integer type * into which pointer values may be cast. The resulting value can then * be tested for being a multiple of 2, 4 or 8, indicating an aligned * pointer for, respectively, 16-bit, 32-bit or 64-bit memory accesses. */ #define SPH_UPTR /** * When defined, this macro indicates that unaligned memory accesses * are possible with only a minor penalty, and thus should be prefered * over strategies which first copy data to an aligned buffer. */ #define SPH_UNALIGNED /** * Byte-swap a 32-bit word (i.e. 0x12345678 becomes * 0x78563412). This is an inline function which resorts * to inline assembly on some platforms, for better performance. * * @param x the 32-bit value to byte-swap * @return the byte-swapped value */ static inline sph_u32 sph_bswap32(sph_u32 x); /** * Byte-swap a 64-bit word. This is an inline function which resorts * to inline assembly on some platforms, for better performance. This * function is defined only if a suitable 64-bit type was found for * sph_u64 * * @param x the 64-bit value to byte-swap * @return the byte-swapped value */ static inline sph_u64 sph_bswap64(sph_u64 x); /** * Decode a 16-bit unsigned value from memory, in little-endian convention * (least significant byte comes first). * * @param src the source address * @return the decoded value */ static inline unsigned sph_dec16le(const void *src); /** * Encode a 16-bit unsigned value into memory, in little-endian convention * (least significant byte comes first). * * @param dst the destination buffer * @param val the value to encode */ static inline void sph_enc16le(void *dst, unsigned val); /** * Decode a 16-bit unsigned value from memory, in big-endian convention * (most significant byte comes first). * * @param src the source address * @return the decoded value */ static inline unsigned sph_dec16be(const void *src); /** * Encode a 16-bit unsigned value into memory, in big-endian convention * (most significant byte comes first). * * @param dst the destination buffer * @param val the value to encode */ static inline void sph_enc16be(void *dst, unsigned val); /** * Decode a 32-bit unsigned value from memory, in little-endian convention * (least significant byte comes first). * * @param src the source address * @return the decoded value */ static inline sph_u32 sph_dec32le(const void *src); /** * Decode a 32-bit unsigned value from memory, in little-endian convention * (least significant byte comes first). This function assumes that the * source address is suitably aligned for a direct access, if the platform * supports such things; it can thus be marginally faster than the generic * sph_dec32le() function. * * @param src the source address * @return the decoded value */ static inline sph_u32 sph_dec32le_aligned(const void *src); /** * Encode a 32-bit unsigned value into memory, in little-endian convention * (least significant byte comes first). * * @param dst the destination buffer * @param val the value to encode */ static inline void sph_enc32le(void *dst, sph_u32 val); /** * Encode a 32-bit unsigned value into memory, in little-endian convention * (least significant byte comes first). This function assumes that the * destination address is suitably aligned for a direct access, if the * platform supports such things; it can thus be marginally faster than * the generic sph_enc32le() function. * * @param dst the destination buffer * @param val the value to encode */ static inline void sph_enc32le_aligned(void *dst, sph_u32 val); /** * Decode a 32-bit unsigned value from memory, in big-endian convention * (most significant byte comes first). * * @param src the source address * @return the decoded value */ static inline sph_u32 sph_dec32be(const void *src); /** * Decode a 32-bit unsigned value from memory, in big-endian convention * (most significant byte comes first). This function assumes that the * source address is suitably aligned for a direct access, if the platform * supports such things; it can thus be marginally faster than the generic * sph_dec32be() function. * * @param src the source address * @return the decoded value */ static inline sph_u32 sph_dec32be_aligned(const void *src); /** * Encode a 32-bit unsigned value into memory, in big-endian convention * (most significant byte comes first). * * @param dst the destination buffer * @param val the value to encode */ static inline void sph_enc32be(void *dst, sph_u32 val); /** * Encode a 32-bit unsigned value into memory, in big-endian convention * (most significant byte comes first). This function assumes that the * destination address is suitably aligned for a direct access, if the * platform supports such things; it can thus be marginally faster than * the generic sph_enc32be() function. * * @param dst the destination buffer * @param val the value to encode */ static inline void sph_enc32be_aligned(void *dst, sph_u32 val); /** * Decode a 64-bit unsigned value from memory, in little-endian convention * (least significant byte comes first). This function is defined only * if a suitable 64-bit type was detected and used for sph_u64. * * @param src the source address * @return the decoded value */ static inline sph_u64 sph_dec64le(const void *src); /** * Decode a 64-bit unsigned value from memory, in little-endian convention * (least significant byte comes first). This function assumes that the * source address is suitably aligned for a direct access, if the platform * supports such things; it can thus be marginally faster than the generic * sph_dec64le() function. This function is defined only * if a suitable 64-bit type was detected and used for sph_u64. * * @param src the source address * @return the decoded value */ static inline sph_u64 sph_dec64le_aligned(const void *src); /** * Encode a 64-bit unsigned value into memory, in little-endian convention * (least significant byte comes first). This function is defined only * if a suitable 64-bit type was detected and used for sph_u64. * * @param dst the destination buffer * @param val the value to encode */ static inline void sph_enc64le(void *dst, sph_u64 val); /** * Encode a 64-bit unsigned value into memory, in little-endian convention * (least significant byte comes first). This function assumes that the * destination address is suitably aligned for a direct access, if the * platform supports such things; it can thus be marginally faster than * the generic sph_enc64le() function. This function is defined * only if a suitable 64-bit type was detected and used for * sph_u64. * * @param dst the destination buffer * @param val the value to encode */ static inline void sph_enc64le_aligned(void *dst, sph_u64 val); /** * Decode a 64-bit unsigned value from memory, in big-endian convention * (most significant byte comes first). This function is defined only * if a suitable 64-bit type was detected and used for sph_u64. * * @param src the source address * @return the decoded value */ static inline sph_u64 sph_dec64be(const void *src); /** * Decode a 64-bit unsigned value from memory, in big-endian convention * (most significant byte comes first). This function assumes that the * source address is suitably aligned for a direct access, if the platform * supports such things; it can thus be marginally faster than the generic * sph_dec64be() function. This function is defined only * if a suitable 64-bit type was detected and used for sph_u64. * * @param src the source address * @return the decoded value */ static inline sph_u64 sph_dec64be_aligned(const void *src); /** * Encode a 64-bit unsigned value into memory, in big-endian convention * (most significant byte comes first). This function is defined only * if a suitable 64-bit type was detected and used for sph_u64. * * @param dst the destination buffer * @param val the value to encode */ static inline void sph_enc64be(void *dst, sph_u64 val); /** * Encode a 64-bit unsigned value into memory, in big-endian convention * (most significant byte comes first). This function assumes that the * destination address is suitably aligned for a direct access, if the * platform supports such things; it can thus be marginally faster than * the generic sph_enc64be() function. This function is defined * only if a suitable 64-bit type was detected and used for * sph_u64. * * @param dst the destination buffer * @param val the value to encode */ static inline void sph_enc64be_aligned(void *dst, sph_u64 val); #endif /* ============== END documentation block for Doxygen ============= */ #ifndef DOXYGEN_IGNORE /* * We want to define the types "sph_u32" and "sph_u64" which hold * unsigned values of at least, respectively, 32 and 64 bits. These * tests should select appropriate types for most platforms. The * macro "SPH_64" is defined if the 64-bit is supported. */ #undef SPH_64 #undef SPH_64_TRUE #if defined __STDC__ && __STDC_VERSION__ >= 199901L /* * On C99 implementations, we can use to get an exact 64-bit * type, if any, or otherwise use a wider type (which must exist, for * C99 conformance). */ #include #ifdef UINT32_MAX typedef uint32_t sph_u32; typedef int32_t sph_s32; #else typedef uint_fast32_t sph_u32; typedef int_fast32_t sph_s32; #endif #if !SPH_NO_64 #ifdef UINT64_MAX typedef uint64_t sph_u64; typedef int64_t sph_s64; #else typedef uint_fast64_t sph_u64; typedef int_fast64_t sph_s64; #endif #endif #define SPH_C32(x) ((sph_u32)(x)) #if !SPH_NO_64 #define SPH_C64(x) ((sph_u64)(x)) #define SPH_64 1 #endif #else /* * On non-C99 systems, we use "unsigned int" if it is wide enough, * "unsigned long" otherwise. This supports all "reasonable" architectures. * We have to be cautious: pre-C99 preprocessors handle constants * differently in '#if' expressions. Hence the shifts to test UINT_MAX. */ #if ((UINT_MAX >> 11) >> 11) >= 0x3FF typedef unsigned int sph_u32; typedef int sph_s32; #define SPH_C32(x) ((sph_u32)(x ## U)) #else typedef unsigned long sph_u32; typedef long sph_s32; #define SPH_C32(x) ((sph_u32)(x ## UL)) #endif #if !SPH_NO_64 /* * We want a 64-bit type. We use "unsigned long" if it is wide enough (as * is common on 64-bit architectures such as AMD64, Alpha or Sparcv9), * "unsigned long long" otherwise, if available. We use ULLONG_MAX to * test whether "unsigned long long" is available; we also know that * gcc features this type, even if the libc header do not know it. */ #if ((ULONG_MAX >> 31) >> 31) >= 3 typedef unsigned long sph_u64; typedef long sph_s64; #define SPH_C64(x) ((sph_u64)(x ## UL)) #define SPH_64 1 #elif ((ULLONG_MAX >> 31) >> 31) >= 3 || defined __GNUC__ typedef unsigned long long sph_u64; typedef long long sph_s64; #define SPH_C64(x) ((sph_u64)(x ## ULL)) #define SPH_64 1 #else /* * No 64-bit type... */ #endif #endif #endif /* * If the "unsigned long" type has length 64 bits or more, then this is * a "true" 64-bit architectures. This is also true with Visual C on * amd64, even though the "long" type is limited to 32 bits. */ #if SPH_64 && (((ULONG_MAX >> 31) >> 31) >= 3 || defined _M_X64) #define SPH_64_TRUE 1 #endif /* * Implementation note: some processors have specific opcodes to perform * a rotation. Recent versions of gcc recognize the expression above and * use the relevant opcodes, when appropriate. */ #define SPH_T32(x) ((x) & SPH_C32(0xFFFFFFFF)) #define SPH_ROTL32(x, n) SPH_T32(((x) << (n)) | ((x) >> (32 - (n)))) #define SPH_ROTR32(x, n) SPH_ROTL32(x, (32 - (n))) #if SPH_64 #define SPH_T64(x) ((x) & SPH_C64(0xFFFFFFFFFFFFFFFF)) #define SPH_ROTL64(x, n) SPH_T64(((x) << (n)) | ((x) >> (64 - (n)))) #define SPH_ROTR64(x, n) SPH_ROTL64(x, (64 - (n))) #endif #ifndef DOXYGEN_IGNORE /* * Define SPH_INLINE to be an "inline" qualifier, if available. We define * some small macro-like functions which benefit greatly from being inlined. */ #if (defined __STDC__ && __STDC_VERSION__ >= 199901L) || defined __GNUC__ #define SPH_INLINE inline #elif defined _MSC_VER #define SPH_INLINE __inline #else #define SPH_INLINE #endif #endif /* * We define some macros which qualify the architecture. These macros * may be explicit set externally (e.g. as compiler parameters). The * code below sets those macros if they are not already defined. * * Most macros are boolean, thus evaluate to either zero or non-zero. * The SPH_UPTR macro is special, in that it evaluates to a C type, * or is not defined. * * SPH_UPTR if defined: unsigned type to cast pointers into * * SPH_UNALIGNED non-zero if unaligned accesses are efficient * SPH_LITTLE_ENDIAN non-zero if architecture is known to be little-endian * SPH_BIG_ENDIAN non-zero if architecture is known to be big-endian * SPH_LITTLE_FAST non-zero if little-endian decoding is fast * SPH_BIG_FAST non-zero if big-endian decoding is fast * * If SPH_UPTR is defined, then encoding and decoding of 32-bit and 64-bit * values will try to be "smart". Either SPH_LITTLE_ENDIAN or SPH_BIG_ENDIAN * _must_ be non-zero in those situations. The 32-bit and 64-bit types * _must_ also have an exact width. * * SPH_SPARCV9_GCC_32 UltraSPARC-compatible with gcc, 32-bit mode * SPH_SPARCV9_GCC_64 UltraSPARC-compatible with gcc, 64-bit mode * SPH_SPARCV9_GCC UltraSPARC-compatible with gcc * SPH_I386_GCC x86-compatible (32-bit) with gcc * SPH_I386_MSVC x86-compatible (32-bit) with Microsoft Visual C * SPH_AMD64_GCC x86-compatible (64-bit) with gcc * SPH_AMD64_MSVC x86-compatible (64-bit) with Microsoft Visual C * SPH_PPC32_GCC PowerPC, 32-bit, with gcc * SPH_PPC64_GCC PowerPC, 64-bit, with gcc * * TODO: enhance automatic detection, for more architectures and compilers. * Endianness is the most important. SPH_UNALIGNED and SPH_UPTR help with * some very fast functions (e.g. MD4) when using unaligned input data. * The CPU-specific-with-GCC macros are useful only for inline assembly, * normally restrained to this header file. */ /* * 32-bit x86, aka "i386 compatible". */ #if defined __i386__ || defined _M_IX86 #define SPH_DETECT_UNALIGNED 1 #define SPH_DETECT_LITTLE_ENDIAN 1 #define SPH_DETECT_UPTR sph_u32 #ifdef __GNUC__ #define SPH_DETECT_I386_GCC 1 #endif #ifdef _MSC_VER #define SPH_DETECT_I386_MSVC 1 #endif /* * 64-bit x86, hereafter known as "amd64". */ #elif defined __x86_64 || defined _M_X64 #define SPH_DETECT_UNALIGNED 1 #define SPH_DETECT_LITTLE_ENDIAN 1 #define SPH_DETECT_UPTR sph_u64 #ifdef __GNUC__ #define SPH_DETECT_AMD64_GCC 1 #endif #ifdef _MSC_VER #define SPH_DETECT_AMD64_MSVC 1 #endif /* * 64-bit Sparc architecture (implies v9). */ #elif ((defined __sparc__ || defined __sparc) && defined __arch64__) \ || defined __sparcv9 #define SPH_DETECT_BIG_ENDIAN 1 #define SPH_DETECT_UPTR sph_u64 #ifdef __GNUC__ #define SPH_DETECT_SPARCV9_GCC_64 1 #define SPH_DETECT_LITTLE_FAST 1 #endif /* * 32-bit Sparc. */ #elif (defined __sparc__ || defined __sparc) \ && !(defined __sparcv9 || defined __arch64__) #define SPH_DETECT_BIG_ENDIAN 1 #define SPH_DETECT_UPTR sph_u32 #if defined __GNUC__ && defined __sparc_v9__ #define SPH_DETECT_SPARCV9_GCC_32 1 #define SPH_DETECT_LITTLE_FAST 1 #endif /* * ARM, little-endian. */ #elif defined __arm__ && __ARMEL__ #define SPH_DETECT_LITTLE_ENDIAN 1 /* * MIPS, little-endian. */ #elif MIPSEL || _MIPSEL || __MIPSEL || __MIPSEL__ #define SPH_DETECT_LITTLE_ENDIAN 1 /* * MIPS, big-endian. */ #elif MIPSEB || _MIPSEB || __MIPSEB || __MIPSEB__ #define SPH_DETECT_BIG_ENDIAN 1 /* * PowerPC. */ #elif defined __powerpc__ || defined __POWERPC__ || defined __ppc__ \ || defined _ARCH_PPC /* * Note: we do not declare cross-endian access to be "fast": even if * using inline assembly, implementation should still assume that * keeping the decoded word in a temporary is faster than decoding * it again. */ #if defined __GNUC__ #if SPH_64_TRUE #define SPH_DETECT_PPC64_GCC 1 #else #define SPH_DETECT_PPC32_GCC 1 #endif #endif #if defined __BIG_ENDIAN__ || defined _BIG_ENDIAN #define SPH_DETECT_BIG_ENDIAN 1 #elif defined __LITTLE_ENDIAN__ || defined _LITTLE_ENDIAN #define SPH_DETECT_LITTLE_ENDIAN 1 #endif /* * Itanium, 64-bit. */ #elif defined __ia64 || defined __ia64__ \ || defined __itanium__ || defined _M_IA64 #if defined __BIG_ENDIAN__ || defined _BIG_ENDIAN #define SPH_DETECT_BIG_ENDIAN 1 #else #define SPH_DETECT_LITTLE_ENDIAN 1 #endif #if defined __LP64__ || defined _LP64 #define SPH_DETECT_UPTR sph_u64 #else #define SPH_DETECT_UPTR sph_u32 #endif #endif #if defined SPH_DETECT_SPARCV9_GCC_32 || defined SPH_DETECT_SPARCV9_GCC_64 #define SPH_DETECT_SPARCV9_GCC 1 #endif #if defined SPH_DETECT_UNALIGNED && !defined SPH_UNALIGNED #define SPH_UNALIGNED SPH_DETECT_UNALIGNED #endif #if defined SPH_DETECT_UPTR && !defined SPH_UPTR #define SPH_UPTR SPH_DETECT_UPTR #endif #if defined SPH_DETECT_LITTLE_ENDIAN && !defined SPH_LITTLE_ENDIAN #define SPH_LITTLE_ENDIAN SPH_DETECT_LITTLE_ENDIAN #endif #if defined SPH_DETECT_BIG_ENDIAN && !defined SPH_BIG_ENDIAN #define SPH_BIG_ENDIAN SPH_DETECT_BIG_ENDIAN #endif #if defined SPH_DETECT_LITTLE_FAST && !defined SPH_LITTLE_FAST #define SPH_LITTLE_FAST SPH_DETECT_LITTLE_FAST #endif #if defined SPH_DETECT_BIG_FAST && !defined SPH_BIG_FAST #define SPH_BIG_FAST SPH_DETECT_BIG_FAST #endif #if defined SPH_DETECT_SPARCV9_GCC_32 && !defined SPH_SPARCV9_GCC_32 #define SPH_SPARCV9_GCC_32 SPH_DETECT_SPARCV9_GCC_32 #endif #if defined SPH_DETECT_SPARCV9_GCC_64 && !defined SPH_SPARCV9_GCC_64 #define SPH_SPARCV9_GCC_64 SPH_DETECT_SPARCV9_GCC_64 #endif #if defined SPH_DETECT_SPARCV9_GCC && !defined SPH_SPARCV9_GCC #define SPH_SPARCV9_GCC SPH_DETECT_SPARCV9_GCC #endif #if defined SPH_DETECT_I386_GCC && !defined SPH_I386_GCC #define SPH_I386_GCC SPH_DETECT_I386_GCC #endif #if defined SPH_DETECT_I386_MSVC && !defined SPH_I386_MSVC #define SPH_I386_MSVC SPH_DETECT_I386_MSVC #endif #if defined SPH_DETECT_AMD64_GCC && !defined SPH_AMD64_GCC #define SPH_AMD64_GCC SPH_DETECT_AMD64_GCC #endif #if defined SPH_DETECT_AMD64_MSVC && !defined SPH_AMD64_MSVC #define SPH_AMD64_MSVC SPH_DETECT_AMD64_MSVC #endif #if defined SPH_DETECT_PPC32_GCC && !defined SPH_PPC32_GCC #define SPH_PPC32_GCC SPH_DETECT_PPC32_GCC #endif #if defined SPH_DETECT_PPC64_GCC && !defined SPH_PPC64_GCC #define SPH_PPC64_GCC SPH_DETECT_PPC64_GCC #endif #if SPH_LITTLE_ENDIAN && !defined SPH_LITTLE_FAST #define SPH_LITTLE_FAST 1 #endif #if SPH_BIG_ENDIAN && !defined SPH_BIG_FAST #define SPH_BIG_FAST 1 #endif #if defined SPH_UPTR && !(SPH_LITTLE_ENDIAN || SPH_BIG_ENDIAN) #error SPH_UPTR defined, but endianness is not known. #endif #if SPH_I386_GCC && !SPH_NO_ASM /* * On x86 32-bit, with gcc, we use the bswapl opcode to byte-swap 32-bit * values. */ static SPH_INLINE sph_u32 sph_bswap32(sph_u32 x) { __asm__ __volatile__ ("bswapl %0" : "=r" (x) : "0" (x)); return x; } #if SPH_64 static SPH_INLINE sph_u64 sph_bswap64(sph_u64 x) { return ((sph_u64)sph_bswap32((sph_u32)x) << 32) | (sph_u64)sph_bswap32((sph_u32)(x >> 32)); } #endif #elif SPH_AMD64_GCC && !SPH_NO_ASM /* * On x86 64-bit, with gcc, we use the bswapl opcode to byte-swap 32-bit * and 64-bit values. */ static SPH_INLINE sph_u32 sph_bswap32(sph_u32 x) { __asm__ __volatile__ ("bswapl %0" : "=r" (x) : "0" (x)); return x; } #if SPH_64 static SPH_INLINE sph_u64 sph_bswap64(sph_u64 x) { __asm__ __volatile__ ("bswapq %0" : "=r" (x) : "0" (x)); return x; } #endif /* * Disabled code. Apparently, Microsoft Visual C 2005 is smart enough * to generate proper opcodes for endianness swapping with the pure C * implementation below. * #elif SPH_I386_MSVC && !SPH_NO_ASM static __inline sph_u32 __declspec(naked) __fastcall sph_bswap32(sph_u32 x) { __asm { bswap ecx mov eax,ecx ret } } #if SPH_64 static SPH_INLINE sph_u64 sph_bswap64(sph_u64 x) { return ((sph_u64)sph_bswap32((sph_u32)x) << 32) | (sph_u64)sph_bswap32((sph_u32)(x >> 32)); } #endif * * [end of disabled code] */ #else static SPH_INLINE sph_u32 sph_bswap32(sph_u32 x) { x = SPH_T32((x << 16) | (x >> 16)); x = ((x & SPH_C32(0xFF00FF00)) >> 8) | ((x & SPH_C32(0x00FF00FF)) << 8); return x; } #if SPH_64 /** * Byte-swap a 64-bit value. * * @param x the input value * @return the byte-swapped value */ static SPH_INLINE sph_u64 sph_bswap64(sph_u64 x) { x = SPH_T64((x << 32) | (x >> 32)); x = ((x & SPH_C64(0xFFFF0000FFFF0000)) >> 16) | ((x & SPH_C64(0x0000FFFF0000FFFF)) << 16); x = ((x & SPH_C64(0xFF00FF00FF00FF00)) >> 8) | ((x & SPH_C64(0x00FF00FF00FF00FF)) << 8); return x; } #endif #endif #if SPH_SPARCV9_GCC && !SPH_NO_ASM /* * On UltraSPARC systems, native ordering is big-endian, but it is * possible to perform little-endian read accesses by specifying the * address space 0x88 (ASI_PRIMARY_LITTLE). Basically, either we use * the opcode "lda [%reg]0x88,%dst", where %reg is the register which * contains the source address and %dst is the destination register, * or we use "lda [%reg+imm]%asi,%dst", which uses the %asi register * to get the address space name. The latter format is better since it * combines an addition and the actual access in a single opcode; but * it requires the setting (and subsequent resetting) of %asi, which is * slow. Some operations (i.e. MD5 compression function) combine many * successive little-endian read accesses, which may share the same * %asi setting. The macros below contain the appropriate inline * assembly. */ #define SPH_SPARCV9_SET_ASI \ sph_u32 sph_sparcv9_asi; \ __asm__ __volatile__ ( \ "rd %%asi,%0\n\twr %%g0,0x88,%%asi" : "=r" (sph_sparcv9_asi)); #define SPH_SPARCV9_RESET_ASI \ __asm__ __volatile__ ("wr %%g0,%0,%%asi" : : "r" (sph_sparcv9_asi)); #define SPH_SPARCV9_DEC32LE(base, idx) ({ \ sph_u32 sph_sparcv9_tmp; \ __asm__ __volatile__ ("lda [%1+" #idx "*4]%%asi,%0" \ : "=r" (sph_sparcv9_tmp) : "r" (base)); \ sph_sparcv9_tmp; \ }) #endif static SPH_INLINE void sph_enc16be(void *dst, unsigned val) { ((unsigned char *)dst)[0] = (val >> 8); ((unsigned char *)dst)[1] = val; } static SPH_INLINE unsigned sph_dec16be(const void *src) { return ((unsigned)(((const unsigned char *)src)[0]) << 8) | (unsigned)(((const unsigned char *)src)[1]); } static SPH_INLINE void sph_enc16le(void *dst, unsigned val) { ((unsigned char *)dst)[0] = val; ((unsigned char *)dst)[1] = val >> 8; } static SPH_INLINE unsigned sph_dec16le(const void *src) { return (unsigned)(((const unsigned char *)src)[0]) | ((unsigned)(((const unsigned char *)src)[1]) << 8); } /** * Encode a 32-bit value into the provided buffer (big endian convention). * * @param dst the destination buffer * @param val the 32-bit value to encode */ static SPH_INLINE void sph_enc32be(void *dst, sph_u32 val) { #if defined SPH_UPTR #if SPH_UNALIGNED #if SPH_LITTLE_ENDIAN val = sph_bswap32(val); #endif *(sph_u32 *)dst = val; #else if (((SPH_UPTR)dst & 3) == 0) { #if SPH_LITTLE_ENDIAN val = sph_bswap32(val); #endif *(sph_u32 *)dst = val; } else { ((unsigned char *)dst)[0] = (val >> 24); ((unsigned char *)dst)[1] = (val >> 16); ((unsigned char *)dst)[2] = (val >> 8); ((unsigned char *)dst)[3] = val; } #endif #else ((unsigned char *)dst)[0] = (val >> 24); ((unsigned char *)dst)[1] = (val >> 16); ((unsigned char *)dst)[2] = (val >> 8); ((unsigned char *)dst)[3] = val; #endif } /** * Encode a 32-bit value into the provided buffer (big endian convention). * The destination buffer must be properly aligned. * * @param dst the destination buffer (32-bit aligned) * @param val the value to encode */ static SPH_INLINE void sph_enc32be_aligned(void *dst, sph_u32 val) { #if SPH_LITTLE_ENDIAN *(sph_u32 *)dst = sph_bswap32(val); #elif SPH_BIG_ENDIAN *(sph_u32 *)dst = val; #else ((unsigned char *)dst)[0] = (val >> 24); ((unsigned char *)dst)[1] = (val >> 16); ((unsigned char *)dst)[2] = (val >> 8); ((unsigned char *)dst)[3] = val; #endif } /** * Decode a 32-bit value from the provided buffer (big endian convention). * * @param src the source buffer * @return the decoded value */ static SPH_INLINE sph_u32 sph_dec32be(const void *src) { #if defined SPH_UPTR #if SPH_UNALIGNED #if SPH_LITTLE_ENDIAN return sph_bswap32(*(const sph_u32 *)src); #else return *(const sph_u32 *)src; #endif #else if (((SPH_UPTR)src & 3) == 0) { #if SPH_LITTLE_ENDIAN return sph_bswap32(*(const sph_u32 *)src); #else return *(const sph_u32 *)src; #endif } else { return ((sph_u32)(((const unsigned char *)src)[0]) << 24) | ((sph_u32)(((const unsigned char *)src)[1]) << 16) | ((sph_u32)(((const unsigned char *)src)[2]) << 8) | (sph_u32)(((const unsigned char *)src)[3]); } #endif #else return ((sph_u32)(((const unsigned char *)src)[0]) << 24) | ((sph_u32)(((const unsigned char *)src)[1]) << 16) | ((sph_u32)(((const unsigned char *)src)[2]) << 8) | (sph_u32)(((const unsigned char *)src)[3]); #endif } /** * Decode a 32-bit value from the provided buffer (big endian convention). * The source buffer must be properly aligned. * * @param src the source buffer (32-bit aligned) * @return the decoded value */ static SPH_INLINE sph_u32 sph_dec32be_aligned(const void *src) { #if SPH_LITTLE_ENDIAN return sph_bswap32(*(const sph_u32 *)src); #elif SPH_BIG_ENDIAN return *(const sph_u32 *)src; #else return ((sph_u32)(((const unsigned char *)src)[0]) << 24) | ((sph_u32)(((const unsigned char *)src)[1]) << 16) | ((sph_u32)(((const unsigned char *)src)[2]) << 8) | (sph_u32)(((const unsigned char *)src)[3]); #endif } /** * Encode a 32-bit value into the provided buffer (little endian convention). * * @param dst the destination buffer * @param val the 32-bit value to encode */ static SPH_INLINE void sph_enc32le(void *dst, sph_u32 val) { #if defined SPH_UPTR #if SPH_UNALIGNED #if SPH_BIG_ENDIAN val = sph_bswap32(val); #endif *(sph_u32 *)dst = val; #else if (((SPH_UPTR)dst & 3) == 0) { #if SPH_BIG_ENDIAN val = sph_bswap32(val); #endif *(sph_u32 *)dst = val; } else { ((unsigned char *)dst)[0] = val; ((unsigned char *)dst)[1] = (val >> 8); ((unsigned char *)dst)[2] = (val >> 16); ((unsigned char *)dst)[3] = (val >> 24); } #endif #else ((unsigned char *)dst)[0] = val; ((unsigned char *)dst)[1] = (val >> 8); ((unsigned char *)dst)[2] = (val >> 16); ((unsigned char *)dst)[3] = (val >> 24); #endif } /** * Encode a 32-bit value into the provided buffer (little endian convention). * The destination buffer must be properly aligned. * * @param dst the destination buffer (32-bit aligned) * @param val the value to encode */ static SPH_INLINE void sph_enc32le_aligned(void *dst, sph_u32 val) { #if SPH_LITTLE_ENDIAN *(sph_u32 *)dst = val; #elif SPH_BIG_ENDIAN *(sph_u32 *)dst = sph_bswap32(val); #else ((unsigned char *)dst)[0] = val; ((unsigned char *)dst)[1] = (val >> 8); ((unsigned char *)dst)[2] = (val >> 16); ((unsigned char *)dst)[3] = (val >> 24); #endif } /** * Decode a 32-bit value from the provided buffer (little endian convention). * * @param src the source buffer * @return the decoded value */ static SPH_INLINE sph_u32 sph_dec32le(const void *src) { #if defined SPH_UPTR #if SPH_UNALIGNED #if SPH_BIG_ENDIAN return sph_bswap32(*(const sph_u32 *)src); #else return *(const sph_u32 *)src; #endif #else if (((SPH_UPTR)src & 3) == 0) { #if SPH_BIG_ENDIAN #if SPH_SPARCV9_GCC && !SPH_NO_ASM sph_u32 tmp; /* * "__volatile__" is needed here because without it, * gcc-3.4.3 miscompiles the code and performs the * access before the test on the address, thus triggering * a bus error... */ __asm__ __volatile__ ( "lda [%1]0x88,%0" : "=r" (tmp) : "r" (src)); return tmp; /* * On PowerPC, this turns out not to be worth the effort: the inline * assembly makes GCC optimizer uncomfortable, which tends to nullify * the decoding gains. * * For most hash functions, using this inline assembly trick changes * hashing speed by less than 5% and often _reduces_ it. The biggest * gains are for MD4 (+11%) and CubeHash (+30%). For all others, it is * less then 10%. The speed gain on CubeHash is probably due to the * chronic shortage of registers that CubeHash endures; for the other * functions, the generic code appears to be efficient enough already. * #elif (SPH_PPC32_GCC || SPH_PPC64_GCC) && !SPH_NO_ASM sph_u32 tmp; __asm__ __volatile__ ( "lwbrx %0,0,%1" : "=r" (tmp) : "r" (src)); return tmp; */ #else return sph_bswap32(*(const sph_u32 *)src); #endif #else return *(const sph_u32 *)src; #endif } else { return (sph_u32)(((const unsigned char *)src)[0]) | ((sph_u32)(((const unsigned char *)src)[1]) << 8) | ((sph_u32)(((const unsigned char *)src)[2]) << 16) | ((sph_u32)(((const unsigned char *)src)[3]) << 24); } #endif #else return (sph_u32)(((const unsigned char *)src)[0]) | ((sph_u32)(((const unsigned char *)src)[1]) << 8) | ((sph_u32)(((const unsigned char *)src)[2]) << 16) | ((sph_u32)(((const unsigned char *)src)[3]) << 24); #endif } /** * Decode a 32-bit value from the provided buffer (little endian convention). * The source buffer must be properly aligned. * * @param src the source buffer (32-bit aligned) * @return the decoded value */ static SPH_INLINE sph_u32 sph_dec32le_aligned(const void *src) { #if SPH_LITTLE_ENDIAN return *(const sph_u32 *)src; #elif SPH_BIG_ENDIAN #if SPH_SPARCV9_GCC && !SPH_NO_ASM sph_u32 tmp; __asm__ __volatile__ ("lda [%1]0x88,%0" : "=r" (tmp) : "r" (src)); return tmp; /* * Not worth it generally. * #elif (SPH_PPC32_GCC || SPH_PPC64_GCC) && !SPH_NO_ASM sph_u32 tmp; __asm__ __volatile__ ("lwbrx %0,0,%1" : "=r" (tmp) : "r" (src)); return tmp; */ #else return sph_bswap32(*(const sph_u32 *)src); #endif #else return (sph_u32)(((const unsigned char *)src)[0]) | ((sph_u32)(((const unsigned char *)src)[1]) << 8) | ((sph_u32)(((const unsigned char *)src)[2]) << 16) | ((sph_u32)(((const unsigned char *)src)[3]) << 24); #endif } #if SPH_64 /** * Encode a 64-bit value into the provided buffer (big endian convention). * * @param dst the destination buffer * @param val the 64-bit value to encode */ static SPH_INLINE void sph_enc64be(void *dst, sph_u64 val) { #if defined SPH_UPTR #if SPH_UNALIGNED #if SPH_LITTLE_ENDIAN val = sph_bswap64(val); #endif *(sph_u64 *)dst = val; #else if (((SPH_UPTR)dst & 7) == 0) { #if SPH_LITTLE_ENDIAN val = sph_bswap64(val); #endif *(sph_u64 *)dst = val; } else { ((unsigned char *)dst)[0] = (val >> 56); ((unsigned char *)dst)[1] = (val >> 48); ((unsigned char *)dst)[2] = (val >> 40); ((unsigned char *)dst)[3] = (val >> 32); ((unsigned char *)dst)[4] = (val >> 24); ((unsigned char *)dst)[5] = (val >> 16); ((unsigned char *)dst)[6] = (val >> 8); ((unsigned char *)dst)[7] = val; } #endif #else ((unsigned char *)dst)[0] = (val >> 56); ((unsigned char *)dst)[1] = (val >> 48); ((unsigned char *)dst)[2] = (val >> 40); ((unsigned char *)dst)[3] = (val >> 32); ((unsigned char *)dst)[4] = (val >> 24); ((unsigned char *)dst)[5] = (val >> 16); ((unsigned char *)dst)[6] = (val >> 8); ((unsigned char *)dst)[7] = val; #endif } /** * Encode a 64-bit value into the provided buffer (big endian convention). * The destination buffer must be properly aligned. * * @param dst the destination buffer (64-bit aligned) * @param val the value to encode */ static SPH_INLINE void sph_enc64be_aligned(void *dst, sph_u64 val) { #if SPH_LITTLE_ENDIAN *(sph_u64 *)dst = sph_bswap64(val); #elif SPH_BIG_ENDIAN *(sph_u64 *)dst = val; #else ((unsigned char *)dst)[0] = (val >> 56); ((unsigned char *)dst)[1] = (val >> 48); ((unsigned char *)dst)[2] = (val >> 40); ((unsigned char *)dst)[3] = (val >> 32); ((unsigned char *)dst)[4] = (val >> 24); ((unsigned char *)dst)[5] = (val >> 16); ((unsigned char *)dst)[6] = (val >> 8); ((unsigned char *)dst)[7] = val; #endif } /** * Decode a 64-bit value from the provided buffer (big endian convention). * * @param src the source buffer * @return the decoded value */ static SPH_INLINE sph_u64 sph_dec64be(const void *src) { #if defined SPH_UPTR #if SPH_UNALIGNED #if SPH_LITTLE_ENDIAN return sph_bswap64(*(const sph_u64 *)src); #else return *(const sph_u64 *)src; #endif #else if (((SPH_UPTR)src & 7) == 0) { #if SPH_LITTLE_ENDIAN return sph_bswap64(*(const sph_u64 *)src); #else return *(const sph_u64 *)src; #endif } else { return ((sph_u64)(((const unsigned char *)src)[0]) << 56) | ((sph_u64)(((const unsigned char *)src)[1]) << 48) | ((sph_u64)(((const unsigned char *)src)[2]) << 40) | ((sph_u64)(((const unsigned char *)src)[3]) << 32) | ((sph_u64)(((const unsigned char *)src)[4]) << 24) | ((sph_u64)(((const unsigned char *)src)[5]) << 16) | ((sph_u64)(((const unsigned char *)src)[6]) << 8) | (sph_u64)(((const unsigned char *)src)[7]); } #endif #else return ((sph_u64)(((const unsigned char *)src)[0]) << 56) | ((sph_u64)(((const unsigned char *)src)[1]) << 48) | ((sph_u64)(((const unsigned char *)src)[2]) << 40) | ((sph_u64)(((const unsigned char *)src)[3]) << 32) | ((sph_u64)(((const unsigned char *)src)[4]) << 24) | ((sph_u64)(((const unsigned char *)src)[5]) << 16) | ((sph_u64)(((const unsigned char *)src)[6]) << 8) | (sph_u64)(((const unsigned char *)src)[7]); #endif } /** * Decode a 64-bit value from the provided buffer (big endian convention). * The source buffer must be properly aligned. * * @param src the source buffer (64-bit aligned) * @return the decoded value */ static SPH_INLINE sph_u64 sph_dec64be_aligned(const void *src) { #if SPH_LITTLE_ENDIAN return sph_bswap64(*(const sph_u64 *)src); #elif SPH_BIG_ENDIAN return *(const sph_u64 *)src; #else return ((sph_u64)(((const unsigned char *)src)[0]) << 56) | ((sph_u64)(((const unsigned char *)src)[1]) << 48) | ((sph_u64)(((const unsigned char *)src)[2]) << 40) | ((sph_u64)(((const unsigned char *)src)[3]) << 32) | ((sph_u64)(((const unsigned char *)src)[4]) << 24) | ((sph_u64)(((const unsigned char *)src)[5]) << 16) | ((sph_u64)(((const unsigned char *)src)[6]) << 8) | (sph_u64)(((const unsigned char *)src)[7]); #endif } /** * Encode a 64-bit value into the provided buffer (little endian convention). * * @param dst the destination buffer * @param val the 64-bit value to encode */ static SPH_INLINE void sph_enc64le(void *dst, sph_u64 val) { #if defined SPH_UPTR #if SPH_UNALIGNED #if SPH_BIG_ENDIAN val = sph_bswap64(val); #endif *(sph_u64 *)dst = val; #else if (((SPH_UPTR)dst & 7) == 0) { #if SPH_BIG_ENDIAN val = sph_bswap64(val); #endif *(sph_u64 *)dst = val; } else { ((unsigned char *)dst)[0] = val; ((unsigned char *)dst)[1] = (val >> 8); ((unsigned char *)dst)[2] = (val >> 16); ((unsigned char *)dst)[3] = (val >> 24); ((unsigned char *)dst)[4] = (val >> 32); ((unsigned char *)dst)[5] = (val >> 40); ((unsigned char *)dst)[6] = (val >> 48); ((unsigned char *)dst)[7] = (val >> 56); } #endif #else ((unsigned char *)dst)[0] = val; ((unsigned char *)dst)[1] = (val >> 8); ((unsigned char *)dst)[2] = (val >> 16); ((unsigned char *)dst)[3] = (val >> 24); ((unsigned char *)dst)[4] = (val >> 32); ((unsigned char *)dst)[5] = (val >> 40); ((unsigned char *)dst)[6] = (val >> 48); ((unsigned char *)dst)[7] = (val >> 56); #endif } /** * Encode a 64-bit value into the provided buffer (little endian convention). * The destination buffer must be properly aligned. * * @param dst the destination buffer (64-bit aligned) * @param val the value to encode */ static SPH_INLINE void sph_enc64le_aligned(void *dst, sph_u64 val) { #if SPH_LITTLE_ENDIAN *(sph_u64 *)dst = val; #elif SPH_BIG_ENDIAN *(sph_u64 *)dst = sph_bswap64(val); #else ((unsigned char *)dst)[0] = val; ((unsigned char *)dst)[1] = (val >> 8); ((unsigned char *)dst)[2] = (val >> 16); ((unsigned char *)dst)[3] = (val >> 24); ((unsigned char *)dst)[4] = (val >> 32); ((unsigned char *)dst)[5] = (val >> 40); ((unsigned char *)dst)[6] = (val >> 48); ((unsigned char *)dst)[7] = (val >> 56); #endif } /** * Decode a 64-bit value from the provided buffer (little endian convention). * * @param src the source buffer * @return the decoded value */ static SPH_INLINE sph_u64 sph_dec64le(const void *src) { #if defined SPH_UPTR #if SPH_UNALIGNED #if SPH_BIG_ENDIAN return sph_bswap64(*(const sph_u64 *)src); #else return *(const sph_u64 *)src; #endif #else if (((SPH_UPTR)src & 7) == 0) { #if SPH_BIG_ENDIAN #if SPH_SPARCV9_GCC_64 && !SPH_NO_ASM sph_u64 tmp; __asm__ __volatile__ ( "ldxa [%1]0x88,%0" : "=r" (tmp) : "r" (src)); return tmp; /* * Not worth it generally. * #elif SPH_PPC32_GCC && !SPH_NO_ASM return (sph_u64)sph_dec32le_aligned(src) | ((sph_u64)sph_dec32le_aligned( (const char *)src + 4) << 32); #elif SPH_PPC64_GCC && !SPH_NO_ASM sph_u64 tmp; __asm__ __volatile__ ( "ldbrx %0,0,%1" : "=r" (tmp) : "r" (src)); return tmp; */ #else return sph_bswap64(*(const sph_u64 *)src); #endif #else return *(const sph_u64 *)src; #endif } else { return (sph_u64)(((const unsigned char *)src)[0]) | ((sph_u64)(((const unsigned char *)src)[1]) << 8) | ((sph_u64)(((const unsigned char *)src)[2]) << 16) | ((sph_u64)(((const unsigned char *)src)[3]) << 24) | ((sph_u64)(((const unsigned char *)src)[4]) << 32) | ((sph_u64)(((const unsigned char *)src)[5]) << 40) | ((sph_u64)(((const unsigned char *)src)[6]) << 48) | ((sph_u64)(((const unsigned char *)src)[7]) << 56); } #endif #else return (sph_u64)(((const unsigned char *)src)[0]) | ((sph_u64)(((const unsigned char *)src)[1]) << 8) | ((sph_u64)(((const unsigned char *)src)[2]) << 16) | ((sph_u64)(((const unsigned char *)src)[3]) << 24) | ((sph_u64)(((const unsigned char *)src)[4]) << 32) | ((sph_u64)(((const unsigned char *)src)[5]) << 40) | ((sph_u64)(((const unsigned char *)src)[6]) << 48) | ((sph_u64)(((const unsigned char *)src)[7]) << 56); #endif } /** * Decode a 64-bit value from the provided buffer (little endian convention). * The source buffer must be properly aligned. * * @param src the source buffer (64-bit aligned) * @return the decoded value */ static SPH_INLINE sph_u64 sph_dec64le_aligned(const void *src) { #if SPH_LITTLE_ENDIAN return *(const sph_u64 *)src; #elif SPH_BIG_ENDIAN #if SPH_SPARCV9_GCC_64 && !SPH_NO_ASM sph_u64 tmp; __asm__ __volatile__ ("ldxa [%1]0x88,%0" : "=r" (tmp) : "r" (src)); return tmp; /* * Not worth it generally. * #elif SPH_PPC32_GCC && !SPH_NO_ASM return (sph_u64)sph_dec32le_aligned(src) | ((sph_u64)sph_dec32le_aligned((const char *)src + 4) << 32); #elif SPH_PPC64_GCC && !SPH_NO_ASM sph_u64 tmp; __asm__ __volatile__ ("ldbrx %0,0,%1" : "=r" (tmp) : "r" (src)); return tmp; */ #else return sph_bswap64(*(const sph_u64 *)src); #endif #else return (sph_u64)(((const unsigned char *)src)[0]) | ((sph_u64)(((const unsigned char *)src)[1]) << 8) | ((sph_u64)(((const unsigned char *)src)[2]) << 16) | ((sph_u64)(((const unsigned char *)src)[3]) << 24) | ((sph_u64)(((const unsigned char *)src)[4]) << 32) | ((sph_u64)(((const unsigned char *)src)[5]) << 40) | ((sph_u64)(((const unsigned char *)src)[6]) << 48) | ((sph_u64)(((const unsigned char *)src)[7]) << 56); #endif } #endif #endif /* Doxygen excluded block */ #endif