diff --git a/CL/cl.h b/CL/cl.h new file mode 100644 index 0000000..87fd6d7 --- /dev/null +++ b/CL/cl.h @@ -0,0 +1,1384 @@ +/******************************************************************************* + * Copyright (c) 2008 - 2013 The Khronos Group Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and/or associated documentation files (the + * "Materials"), to deal in the Materials without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Materials, and to + * permit persons to whom the Materials are furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Materials. + * + * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. + ******************************************************************************/ + +#ifndef __OPENCL_CL_H +#define __OPENCL_CL_H + +#ifdef __APPLE__ +#include +#else +#include +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +/******************************************************************************/ + +typedef struct _cl_platform_id * cl_platform_id; +typedef struct _cl_device_id * cl_device_id; +typedef struct _cl_context * cl_context; +typedef struct _cl_command_queue * cl_command_queue; +typedef struct _cl_mem * cl_mem; +typedef struct _cl_program * cl_program; +typedef struct _cl_kernel * cl_kernel; +typedef struct _cl_event * cl_event; +typedef struct _cl_sampler * cl_sampler; + +typedef cl_uint cl_bool; /* WARNING! Unlike cl_ types in cl_platform.h, cl_bool is not guaranteed to be the same size as the bool in kernels. */ +typedef cl_ulong cl_bitfield; +typedef cl_bitfield cl_device_type; +typedef cl_uint cl_platform_info; +typedef cl_uint cl_device_info; +typedef cl_bitfield cl_device_fp_config; +typedef cl_uint cl_device_mem_cache_type; +typedef cl_uint cl_device_local_mem_type; +typedef cl_bitfield cl_device_exec_capabilities; +typedef cl_bitfield cl_device_svm_capabilities; +typedef cl_bitfield cl_command_queue_properties; +typedef intptr_t cl_device_partition_property; +typedef cl_bitfield cl_device_affinity_domain; + +typedef intptr_t cl_context_properties; +typedef cl_uint cl_context_info; +typedef cl_bitfield cl_queue_properties; +typedef cl_uint cl_command_queue_info; +typedef cl_uint cl_channel_order; +typedef cl_uint cl_channel_type; +typedef cl_bitfield cl_mem_flags; +typedef cl_bitfield cl_svm_mem_flags; +typedef cl_uint cl_mem_object_type; +typedef cl_uint cl_mem_info; +typedef cl_bitfield cl_mem_migration_flags; +typedef cl_uint cl_image_info; +typedef cl_uint cl_buffer_create_type; +typedef cl_uint cl_addressing_mode; +typedef cl_uint cl_filter_mode; +typedef cl_uint cl_sampler_info; +typedef cl_bitfield cl_map_flags; +typedef intptr_t cl_pipe_properties; +typedef cl_uint cl_pipe_info; +typedef cl_uint cl_program_info; +typedef cl_uint cl_program_build_info; +typedef cl_uint cl_program_binary_type; +typedef cl_int cl_build_status; +typedef cl_uint cl_kernel_info; +typedef cl_uint cl_kernel_arg_info; +typedef cl_uint cl_kernel_arg_address_qualifier; +typedef cl_uint cl_kernel_arg_access_qualifier; +typedef cl_bitfield cl_kernel_arg_type_qualifier; +typedef cl_uint cl_kernel_work_group_info; +typedef cl_uint cl_event_info; +typedef cl_uint cl_command_type; +typedef cl_uint cl_profiling_info; +typedef cl_bitfield cl_sampler_properties; +typedef cl_uint cl_kernel_exec_info; + +typedef struct _cl_image_format { + cl_channel_order image_channel_order; + cl_channel_type image_channel_data_type; +} cl_image_format; + +typedef struct _cl_image_desc { + cl_mem_object_type image_type; + size_t image_width; + size_t image_height; + size_t image_depth; + size_t image_array_size; + size_t image_row_pitch; + size_t image_slice_pitch; + cl_uint num_mip_levels; + cl_uint num_samples; +#ifdef __GNUC__ + __extension__ /* Prevents warnings about anonymous union in -pedantic builds */ +#endif + union { + cl_mem buffer; + cl_mem mem_object; + }; +} cl_image_desc; + +typedef struct _cl_buffer_region { + size_t origin; + size_t size; +} cl_buffer_region; + + +/******************************************************************************/ + +/* Error Codes */ +#define CL_SUCCESS 0 +#define CL_DEVICE_NOT_FOUND -1 +#define CL_DEVICE_NOT_AVAILABLE -2 +#define CL_COMPILER_NOT_AVAILABLE -3 +#define CL_MEM_OBJECT_ALLOCATION_FAILURE -4 +#define CL_OUT_OF_RESOURCES -5 +#define CL_OUT_OF_HOST_MEMORY -6 +#define CL_PROFILING_INFO_NOT_AVAILABLE -7 +#define CL_MEM_COPY_OVERLAP -8 +#define CL_IMAGE_FORMAT_MISMATCH -9 +#define CL_IMAGE_FORMAT_NOT_SUPPORTED -10 +#define CL_BUILD_PROGRAM_FAILURE -11 +#define CL_MAP_FAILURE -12 +#define CL_MISALIGNED_SUB_BUFFER_OFFSET -13 +#define CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST -14 +#define CL_COMPILE_PROGRAM_FAILURE -15 +#define CL_LINKER_NOT_AVAILABLE -16 +#define CL_LINK_PROGRAM_FAILURE -17 +#define CL_DEVICE_PARTITION_FAILED -18 +#define CL_KERNEL_ARG_INFO_NOT_AVAILABLE -19 + +#define CL_INVALID_VALUE -30 +#define CL_INVALID_DEVICE_TYPE -31 +#define CL_INVALID_PLATFORM -32 +#define CL_INVALID_DEVICE -33 +#define CL_INVALID_CONTEXT -34 +#define CL_INVALID_QUEUE_PROPERTIES -35 +#define CL_INVALID_COMMAND_QUEUE -36 +#define CL_INVALID_HOST_PTR -37 +#define CL_INVALID_MEM_OBJECT -38 +#define CL_INVALID_IMAGE_FORMAT_DESCRIPTOR -39 +#define CL_INVALID_IMAGE_SIZE -40 +#define CL_INVALID_SAMPLER -41 +#define CL_INVALID_BINARY -42 +#define CL_INVALID_BUILD_OPTIONS -43 +#define CL_INVALID_PROGRAM -44 +#define CL_INVALID_PROGRAM_EXECUTABLE -45 +#define CL_INVALID_KERNEL_NAME -46 +#define CL_INVALID_KERNEL_DEFINITION -47 +#define CL_INVALID_KERNEL -48 +#define CL_INVALID_ARG_INDEX -49 +#define CL_INVALID_ARG_VALUE -50 +#define CL_INVALID_ARG_SIZE -51 +#define CL_INVALID_KERNEL_ARGS -52 +#define CL_INVALID_WORK_DIMENSION -53 +#define CL_INVALID_WORK_GROUP_SIZE -54 +#define CL_INVALID_WORK_ITEM_SIZE -55 +#define CL_INVALID_GLOBAL_OFFSET -56 +#define CL_INVALID_EVENT_WAIT_LIST -57 +#define CL_INVALID_EVENT -58 +#define CL_INVALID_OPERATION -59 +#define CL_INVALID_GL_OBJECT -60 +#define CL_INVALID_BUFFER_SIZE -61 +#define CL_INVALID_MIP_LEVEL -62 +#define CL_INVALID_GLOBAL_WORK_SIZE -63 +#define CL_INVALID_PROPERTY -64 +#define CL_INVALID_IMAGE_DESCRIPTOR -65 +#define CL_INVALID_COMPILER_OPTIONS -66 +#define CL_INVALID_LINKER_OPTIONS -67 +#define CL_INVALID_DEVICE_PARTITION_COUNT -68 +#define CL_INVALID_PIPE_SIZE -69 +#define CL_INVALID_DEVICE_QUEUE -70 + +/* OpenCL Version */ +#define CL_VERSION_1_0 1 +#define CL_VERSION_1_1 1 +#define CL_VERSION_1_2 1 +#define CL_VERSION_2_0 1 + +/* cl_bool */ +#define CL_FALSE 0 +#define CL_TRUE 1 +#define CL_BLOCKING CL_TRUE +#define CL_NON_BLOCKING CL_FALSE + +/* cl_platform_info */ +#define CL_PLATFORM_PROFILE 0x0900 +#define CL_PLATFORM_VERSION 0x0901 +#define CL_PLATFORM_NAME 0x0902 +#define CL_PLATFORM_VENDOR 0x0903 +#define CL_PLATFORM_EXTENSIONS 0x0904 + +/* cl_device_type - bitfield */ +#define CL_DEVICE_TYPE_DEFAULT (1 << 0) +#define CL_DEVICE_TYPE_CPU (1 << 1) +#define CL_DEVICE_TYPE_GPU (1 << 2) +#define CL_DEVICE_TYPE_ACCELERATOR (1 << 3) +#define CL_DEVICE_TYPE_CUSTOM (1 << 4) +#define CL_DEVICE_TYPE_ALL 0xFFFFFFFF + +/* cl_device_info */ +#define CL_DEVICE_TYPE 0x1000 +#define CL_DEVICE_VENDOR_ID 0x1001 +#define CL_DEVICE_MAX_COMPUTE_UNITS 0x1002 +#define CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS 0x1003 +#define CL_DEVICE_MAX_WORK_GROUP_SIZE 0x1004 +#define CL_DEVICE_MAX_WORK_ITEM_SIZES 0x1005 +#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR 0x1006 +#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT 0x1007 +#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT 0x1008 +#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG 0x1009 +#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT 0x100A +#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE 0x100B +#define CL_DEVICE_MAX_CLOCK_FREQUENCY 0x100C +#define CL_DEVICE_ADDRESS_BITS 0x100D +#define CL_DEVICE_MAX_READ_IMAGE_ARGS 0x100E +#define CL_DEVICE_MAX_WRITE_IMAGE_ARGS 0x100F +#define CL_DEVICE_MAX_MEM_ALLOC_SIZE 0x1010 +#define CL_DEVICE_IMAGE2D_MAX_WIDTH 0x1011 +#define CL_DEVICE_IMAGE2D_MAX_HEIGHT 0x1012 +#define CL_DEVICE_IMAGE3D_MAX_WIDTH 0x1013 +#define CL_DEVICE_IMAGE3D_MAX_HEIGHT 0x1014 +#define CL_DEVICE_IMAGE3D_MAX_DEPTH 0x1015 +#define CL_DEVICE_IMAGE_SUPPORT 0x1016 +#define CL_DEVICE_MAX_PARAMETER_SIZE 0x1017 +#define CL_DEVICE_MAX_SAMPLERS 0x1018 +#define CL_DEVICE_MEM_BASE_ADDR_ALIGN 0x1019 +#define CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE 0x101A +#define CL_DEVICE_SINGLE_FP_CONFIG 0x101B +#define CL_DEVICE_GLOBAL_MEM_CACHE_TYPE 0x101C +#define CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE 0x101D +#define CL_DEVICE_GLOBAL_MEM_CACHE_SIZE 0x101E +#define CL_DEVICE_GLOBAL_MEM_SIZE 0x101F +#define CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE 0x1020 +#define CL_DEVICE_MAX_CONSTANT_ARGS 0x1021 +#define CL_DEVICE_LOCAL_MEM_TYPE 0x1022 +#define CL_DEVICE_LOCAL_MEM_SIZE 0x1023 +#define CL_DEVICE_ERROR_CORRECTION_SUPPORT 0x1024 +#define CL_DEVICE_PROFILING_TIMER_RESOLUTION 0x1025 +#define CL_DEVICE_ENDIAN_LITTLE 0x1026 +#define CL_DEVICE_AVAILABLE 0x1027 +#define CL_DEVICE_COMPILER_AVAILABLE 0x1028 +#define CL_DEVICE_EXECUTION_CAPABILITIES 0x1029 +#define CL_DEVICE_QUEUE_PROPERTIES 0x102A /* deprecated */ +#define CL_DEVICE_QUEUE_ON_HOST_PROPERTIES 0x102A +#define CL_DEVICE_NAME 0x102B +#define CL_DEVICE_VENDOR 0x102C +#define CL_DRIVER_VERSION 0x102D +#define CL_DEVICE_PROFILE 0x102E +#define CL_DEVICE_VERSION 0x102F +#define CL_DEVICE_EXTENSIONS 0x1030 +#define CL_DEVICE_PLATFORM 0x1031 +#define CL_DEVICE_DOUBLE_FP_CONFIG 0x1032 +/* 0x1033 reserved for CL_DEVICE_HALF_FP_CONFIG */ +#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF 0x1034 +#define CL_DEVICE_HOST_UNIFIED_MEMORY 0x1035 /* deprecated */ +#define CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR 0x1036 +#define CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT 0x1037 +#define CL_DEVICE_NATIVE_VECTOR_WIDTH_INT 0x1038 +#define CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG 0x1039 +#define CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT 0x103A +#define CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE 0x103B +#define CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF 0x103C +#define CL_DEVICE_OPENCL_C_VERSION 0x103D +#define CL_DEVICE_LINKER_AVAILABLE 0x103E +#define CL_DEVICE_BUILT_IN_KERNELS 0x103F +#define CL_DEVICE_IMAGE_MAX_BUFFER_SIZE 0x1040 +#define CL_DEVICE_IMAGE_MAX_ARRAY_SIZE 0x1041 +#define CL_DEVICE_PARENT_DEVICE 0x1042 +#define CL_DEVICE_PARTITION_MAX_SUB_DEVICES 0x1043 +#define CL_DEVICE_PARTITION_PROPERTIES 0x1044 +#define CL_DEVICE_PARTITION_AFFINITY_DOMAIN 0x1045 +#define CL_DEVICE_PARTITION_TYPE 0x1046 +#define CL_DEVICE_REFERENCE_COUNT 0x1047 +#define CL_DEVICE_PREFERRED_INTEROP_USER_SYNC 0x1048 +#define CL_DEVICE_PRINTF_BUFFER_SIZE 0x1049 +#define CL_DEVICE_IMAGE_PITCH_ALIGNMENT 0x104A +#define CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT 0x104B +#define CL_DEVICE_MAX_READ_WRITE_IMAGE_ARGS 0x104C +#define CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE 0x104D +#define CL_DEVICE_QUEUE_ON_DEVICE_PROPERTIES 0x104E +#define CL_DEVICE_QUEUE_ON_DEVICE_PREFERRED_SIZE 0x104F +#define CL_DEVICE_QUEUE_ON_DEVICE_MAX_SIZE 0x1050 +#define CL_DEVICE_MAX_ON_DEVICE_QUEUES 0x1051 +#define CL_DEVICE_MAX_ON_DEVICE_EVENTS 0x1052 +#define CL_DEVICE_SVM_CAPABILITIES 0x1053 +#define CL_DEVICE_GLOBAL_VARIABLE_PREFERRED_TOTAL_SIZE 0x1054 +#define CL_DEVICE_MAX_PIPE_ARGS 0x1055 +#define CL_DEVICE_PIPE_MAX_ACTIVE_RESERVATIONS 0x1056 +#define CL_DEVICE_PIPE_MAX_PACKET_SIZE 0x1057 +#define CL_DEVICE_PREFERRED_PLATFORM_ATOMIC_ALIGNMENT 0x1058 +#define CL_DEVICE_PREFERRED_GLOBAL_ATOMIC_ALIGNMENT 0x1059 +#define CL_DEVICE_PREFERRED_LOCAL_ATOMIC_ALIGNMENT 0x105A + +/* cl_device_fp_config - bitfield */ +#define CL_FP_DENORM (1 << 0) +#define CL_FP_INF_NAN (1 << 1) +#define CL_FP_ROUND_TO_NEAREST (1 << 2) +#define CL_FP_ROUND_TO_ZERO (1 << 3) +#define CL_FP_ROUND_TO_INF (1 << 4) +#define CL_FP_FMA (1 << 5) +#define CL_FP_SOFT_FLOAT (1 << 6) +#define CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT (1 << 7) + +/* cl_device_mem_cache_type */ +#define CL_NONE 0x0 +#define CL_READ_ONLY_CACHE 0x1 +#define CL_READ_WRITE_CACHE 0x2 + +/* cl_device_local_mem_type */ +#define CL_LOCAL 0x1 +#define CL_GLOBAL 0x2 + +/* cl_device_exec_capabilities - bitfield */ +#define CL_EXEC_KERNEL (1 << 0) +#define CL_EXEC_NATIVE_KERNEL (1 << 1) + +/* cl_command_queue_properties - bitfield */ +#define CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE (1 << 0) +#define CL_QUEUE_PROFILING_ENABLE (1 << 1) +#define CL_QUEUE_ON_DEVICE (1 << 2) +#define CL_QUEUE_ON_DEVICE_DEFAULT (1 << 3) + +/* cl_context_info */ +#define CL_CONTEXT_REFERENCE_COUNT 0x1080 +#define CL_CONTEXT_DEVICES 0x1081 +#define CL_CONTEXT_PROPERTIES 0x1082 +#define CL_CONTEXT_NUM_DEVICES 0x1083 + +/* cl_context_properties */ +#define CL_CONTEXT_PLATFORM 0x1084 +#define CL_CONTEXT_INTEROP_USER_SYNC 0x1085 + +/* cl_device_partition_property */ +#define CL_DEVICE_PARTITION_EQUALLY 0x1086 +#define CL_DEVICE_PARTITION_BY_COUNTS 0x1087 +#define CL_DEVICE_PARTITION_BY_COUNTS_LIST_END 0x0 +#define CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN 0x1088 + +/* cl_device_affinity_domain */ +#define CL_DEVICE_AFFINITY_DOMAIN_NUMA (1 << 0) +#define CL_DEVICE_AFFINITY_DOMAIN_L4_CACHE (1 << 1) +#define CL_DEVICE_AFFINITY_DOMAIN_L3_CACHE (1 << 2) +#define CL_DEVICE_AFFINITY_DOMAIN_L2_CACHE (1 << 3) +#define CL_DEVICE_AFFINITY_DOMAIN_L1_CACHE (1 << 4) +#define CL_DEVICE_AFFINITY_DOMAIN_NEXT_PARTITIONABLE (1 << 5) + +/* cl_device_svm_capabilities */ +#define CL_DEVICE_SVM_COARSE_GRAIN_BUFFER (1 << 0) +#define CL_DEVICE_SVM_FINE_GRAIN_BUFFER (1 << 1) +#define CL_DEVICE_SVM_FINE_GRAIN_SYSTEM (1 << 2) +#define CL_DEVICE_SVM_ATOMICS (1 << 3) + +/* cl_command_queue_info */ +#define CL_QUEUE_CONTEXT 0x1090 +#define CL_QUEUE_DEVICE 0x1091 +#define CL_QUEUE_REFERENCE_COUNT 0x1092 +#define CL_QUEUE_PROPERTIES 0x1093 +#define CL_QUEUE_SIZE 0x1094 + +/* cl_mem_flags and cl_svm_mem_flags - bitfield */ +#define CL_MEM_READ_WRITE (1 << 0) +#define CL_MEM_WRITE_ONLY (1 << 1) +#define CL_MEM_READ_ONLY (1 << 2) +#define CL_MEM_USE_HOST_PTR (1 << 3) +#define CL_MEM_ALLOC_HOST_PTR (1 << 4) +#define CL_MEM_COPY_HOST_PTR (1 << 5) +/* reserved (1 << 6) */ +#define CL_MEM_HOST_WRITE_ONLY (1 << 7) +#define CL_MEM_HOST_READ_ONLY (1 << 8) +#define CL_MEM_HOST_NO_ACCESS (1 << 9) +#define CL_MEM_SVM_FINE_GRAIN_BUFFER (1 << 10) /* used by cl_svm_mem_flags only */ +#define CL_MEM_SVM_ATOMICS (1 << 11) /* used by cl_svm_mem_flags only */ +#define CL_MEM_KERNEL_READ_AND_WRITE (1 << 12) + +/* cl_mem_migration_flags - bitfield */ +#define CL_MIGRATE_MEM_OBJECT_HOST (1 << 0) +#define CL_MIGRATE_MEM_OBJECT_CONTENT_UNDEFINED (1 << 1) + +/* cl_channel_order */ +#define CL_R 0x10B0 +#define CL_A 0x10B1 +#define CL_RG 0x10B2 +#define CL_RA 0x10B3 +#define CL_RGB 0x10B4 +#define CL_RGBA 0x10B5 +#define CL_BGRA 0x10B6 +#define CL_ARGB 0x10B7 +#define CL_INTENSITY 0x10B8 +#define CL_LUMINANCE 0x10B9 +#define CL_Rx 0x10BA +#define CL_RGx 0x10BB +#define CL_RGBx 0x10BC +#define CL_DEPTH 0x10BD +#define CL_DEPTH_STENCIL 0x10BE +#define CL_sRGB 0x10BF +#define CL_sRGBx 0x10C0 +#define CL_sRGBA 0x10C1 +#define CL_sBGRA 0x10C2 +#define CL_ABGR 0x10C3 + +/* cl_channel_type */ +#define CL_SNORM_INT8 0x10D0 +#define CL_SNORM_INT16 0x10D1 +#define CL_UNORM_INT8 0x10D2 +#define CL_UNORM_INT16 0x10D3 +#define CL_UNORM_SHORT_565 0x10D4 +#define CL_UNORM_SHORT_555 0x10D5 +#define CL_UNORM_INT_101010 0x10D6 +#define CL_SIGNED_INT8 0x10D7 +#define CL_SIGNED_INT16 0x10D8 +#define CL_SIGNED_INT32 0x10D9 +#define CL_UNSIGNED_INT8 0x10DA +#define CL_UNSIGNED_INT16 0x10DB +#define CL_UNSIGNED_INT32 0x10DC +#define CL_HALF_FLOAT 0x10DD +#define CL_FLOAT 0x10DE +#define CL_UNORM_INT24 0x10DF + +/* cl_mem_object_type */ +#define CL_MEM_OBJECT_BUFFER 0x10F0 +#define CL_MEM_OBJECT_IMAGE2D 0x10F1 +#define CL_MEM_OBJECT_IMAGE3D 0x10F2 +#define CL_MEM_OBJECT_IMAGE2D_ARRAY 0x10F3 +#define CL_MEM_OBJECT_IMAGE1D 0x10F4 +#define CL_MEM_OBJECT_IMAGE1D_ARRAY 0x10F5 +#define CL_MEM_OBJECT_IMAGE1D_BUFFER 0x10F6 +#define CL_MEM_OBJECT_PIPE 0x10F7 + +/* cl_mem_info */ +#define CL_MEM_TYPE 0x1100 +#define CL_MEM_FLAGS 0x1101 +#define CL_MEM_SIZE 0x1102 +#define CL_MEM_HOST_PTR 0x1103 +#define CL_MEM_MAP_COUNT 0x1104 +#define CL_MEM_REFERENCE_COUNT 0x1105 +#define CL_MEM_CONTEXT 0x1106 +#define CL_MEM_ASSOCIATED_MEMOBJECT 0x1107 +#define CL_MEM_OFFSET 0x1108 +#define CL_MEM_USES_SVM_POINTER 0x1109 + +/* cl_image_info */ +#define CL_IMAGE_FORMAT 0x1110 +#define CL_IMAGE_ELEMENT_SIZE 0x1111 +#define CL_IMAGE_ROW_PITCH 0x1112 +#define CL_IMAGE_SLICE_PITCH 0x1113 +#define CL_IMAGE_WIDTH 0x1114 +#define CL_IMAGE_HEIGHT 0x1115 +#define CL_IMAGE_DEPTH 0x1116 +#define CL_IMAGE_ARRAY_SIZE 0x1117 +#define CL_IMAGE_BUFFER 0x1118 +#define CL_IMAGE_NUM_MIP_LEVELS 0x1119 +#define CL_IMAGE_NUM_SAMPLES 0x111A + +/* cl_pipe_info */ +#define CL_PIPE_PACKET_SIZE 0x1120 +#define CL_PIPE_MAX_PACKETS 0x1121 + +/* cl_addressing_mode */ +#define CL_ADDRESS_NONE 0x1130 +#define CL_ADDRESS_CLAMP_TO_EDGE 0x1131 +#define CL_ADDRESS_CLAMP 0x1132 +#define CL_ADDRESS_REPEAT 0x1133 +#define CL_ADDRESS_MIRRORED_REPEAT 0x1134 + +/* cl_filter_mode */ +#define CL_FILTER_NEAREST 0x1140 +#define CL_FILTER_LINEAR 0x1141 + +/* cl_sampler_info */ +#define CL_SAMPLER_REFERENCE_COUNT 0x1150 +#define CL_SAMPLER_CONTEXT 0x1151 +#define CL_SAMPLER_NORMALIZED_COORDS 0x1152 +#define CL_SAMPLER_ADDRESSING_MODE 0x1153 +#define CL_SAMPLER_FILTER_MODE 0x1154 +#define CL_SAMPLER_MIP_FILTER_MODE 0x1155 +#define CL_SAMPLER_LOD_MIN 0x1156 +#define CL_SAMPLER_LOD_MAX 0x1157 + +/* cl_map_flags - bitfield */ +#define CL_MAP_READ (1 << 0) +#define CL_MAP_WRITE (1 << 1) +#define CL_MAP_WRITE_INVALIDATE_REGION (1 << 2) + +/* cl_program_info */ +#define CL_PROGRAM_REFERENCE_COUNT 0x1160 +#define CL_PROGRAM_CONTEXT 0x1161 +#define CL_PROGRAM_NUM_DEVICES 0x1162 +#define CL_PROGRAM_DEVICES 0x1163 +#define CL_PROGRAM_SOURCE 0x1164 +#define CL_PROGRAM_BINARY_SIZES 0x1165 +#define CL_PROGRAM_BINARIES 0x1166 +#define CL_PROGRAM_NUM_KERNELS 0x1167 +#define CL_PROGRAM_KERNEL_NAMES 0x1168 + +/* cl_program_build_info */ +#define CL_PROGRAM_BUILD_STATUS 0x1181 +#define CL_PROGRAM_BUILD_OPTIONS 0x1182 +#define CL_PROGRAM_BUILD_LOG 0x1183 +#define CL_PROGRAM_BINARY_TYPE 0x1184 +#define CL_PROGRAM_BUILD_GLOBAL_VARIABLE_TOTAL_SIZE 0x1185 + +/* cl_program_binary_type */ +#define CL_PROGRAM_BINARY_TYPE_NONE 0x0 +#define CL_PROGRAM_BINARY_TYPE_COMPILED_OBJECT 0x1 +#define CL_PROGRAM_BINARY_TYPE_LIBRARY 0x2 +#define CL_PROGRAM_BINARY_TYPE_EXECUTABLE 0x4 + +/* cl_build_status */ +#define CL_BUILD_SUCCESS 0 +#define CL_BUILD_NONE -1 +#define CL_BUILD_ERROR -2 +#define CL_BUILD_IN_PROGRESS -3 + +/* cl_kernel_info */ +#define CL_KERNEL_FUNCTION_NAME 0x1190 +#define CL_KERNEL_NUM_ARGS 0x1191 +#define CL_KERNEL_REFERENCE_COUNT 0x1192 +#define CL_KERNEL_CONTEXT 0x1193 +#define CL_KERNEL_PROGRAM 0x1194 +#define CL_KERNEL_ATTRIBUTES 0x1195 + +/* cl_kernel_arg_info */ +#define CL_KERNEL_ARG_ADDRESS_QUALIFIER 0x1196 +#define CL_KERNEL_ARG_ACCESS_QUALIFIER 0x1197 +#define CL_KERNEL_ARG_TYPE_NAME 0x1198 +#define CL_KERNEL_ARG_TYPE_QUALIFIER 0x1199 +#define CL_KERNEL_ARG_NAME 0x119A + +/* cl_kernel_arg_address_qualifier */ +#define CL_KERNEL_ARG_ADDRESS_GLOBAL 0x119B +#define CL_KERNEL_ARG_ADDRESS_LOCAL 0x119C +#define CL_KERNEL_ARG_ADDRESS_CONSTANT 0x119D +#define CL_KERNEL_ARG_ADDRESS_PRIVATE 0x119E + +/* cl_kernel_arg_access_qualifier */ +#define CL_KERNEL_ARG_ACCESS_READ_ONLY 0x11A0 +#define CL_KERNEL_ARG_ACCESS_WRITE_ONLY 0x11A1 +#define CL_KERNEL_ARG_ACCESS_READ_WRITE 0x11A2 +#define CL_KERNEL_ARG_ACCESS_NONE 0x11A3 + +/* cl_kernel_arg_type_qualifer */ +#define CL_KERNEL_ARG_TYPE_NONE 0 +#define CL_KERNEL_ARG_TYPE_CONST (1 << 0) +#define CL_KERNEL_ARG_TYPE_RESTRICT (1 << 1) +#define CL_KERNEL_ARG_TYPE_VOLATILE (1 << 2) +#define CL_KERNEL_ARG_TYPE_PIPE (1 << 3) + +/* cl_kernel_work_group_info */ +#define CL_KERNEL_WORK_GROUP_SIZE 0x11B0 +#define CL_KERNEL_COMPILE_WORK_GROUP_SIZE 0x11B1 +#define CL_KERNEL_LOCAL_MEM_SIZE 0x11B2 +#define CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE 0x11B3 +#define CL_KERNEL_PRIVATE_MEM_SIZE 0x11B4 +#define CL_KERNEL_GLOBAL_WORK_SIZE 0x11B5 + +/* cl_kernel_exec_info */ +#define CL_KERNEL_EXEC_INFO_SVM_PTRS 0x11B6 +#define CL_KERNEL_EXEC_INFO_SVM_FINE_GRAIN_SYSTEM 0x11B7 + +/* cl_event_info */ +#define CL_EVENT_COMMAND_QUEUE 0x11D0 +#define CL_EVENT_COMMAND_TYPE 0x11D1 +#define CL_EVENT_REFERENCE_COUNT 0x11D2 +#define CL_EVENT_COMMAND_EXECUTION_STATUS 0x11D3 +#define CL_EVENT_CONTEXT 0x11D4 + +/* cl_command_type */ +#define CL_COMMAND_NDRANGE_KERNEL 0x11F0 +#define CL_COMMAND_TASK 0x11F1 +#define CL_COMMAND_NATIVE_KERNEL 0x11F2 +#define CL_COMMAND_READ_BUFFER 0x11F3 +#define CL_COMMAND_WRITE_BUFFER 0x11F4 +#define CL_COMMAND_COPY_BUFFER 0x11F5 +#define CL_COMMAND_READ_IMAGE 0x11F6 +#define CL_COMMAND_WRITE_IMAGE 0x11F7 +#define CL_COMMAND_COPY_IMAGE 0x11F8 +#define CL_COMMAND_COPY_IMAGE_TO_BUFFER 0x11F9 +#define CL_COMMAND_COPY_BUFFER_TO_IMAGE 0x11FA +#define CL_COMMAND_MAP_BUFFER 0x11FB +#define CL_COMMAND_MAP_IMAGE 0x11FC +#define CL_COMMAND_UNMAP_MEM_OBJECT 0x11FD +#define CL_COMMAND_MARKER 0x11FE +#define CL_COMMAND_ACQUIRE_GL_OBJECTS 0x11FF +#define CL_COMMAND_RELEASE_GL_OBJECTS 0x1200 +#define CL_COMMAND_READ_BUFFER_RECT 0x1201 +#define CL_COMMAND_WRITE_BUFFER_RECT 0x1202 +#define CL_COMMAND_COPY_BUFFER_RECT 0x1203 +#define CL_COMMAND_USER 0x1204 +#define CL_COMMAND_BARRIER 0x1205 +#define CL_COMMAND_MIGRATE_MEM_OBJECTS 0x1206 +#define CL_COMMAND_FILL_BUFFER 0x1207 +#define CL_COMMAND_FILL_IMAGE 0x1208 +#define CL_COMMAND_SVM_FREE 0x1209 +#define CL_COMMAND_SVM_MEMCPY 0x120A +#define CL_COMMAND_SVM_MEMFILL 0x120B +#define CL_COMMAND_SVM_MAP 0x120C +#define CL_COMMAND_SVM_UNMAP 0x120D + +/* command execution status */ +#define CL_COMPLETE 0x0 +#define CL_RUNNING 0x1 +#define CL_SUBMITTED 0x2 +#define CL_QUEUED 0x3 + +/* cl_buffer_create_type */ +#define CL_BUFFER_CREATE_TYPE_REGION 0x1220 + +/* cl_profiling_info */ +#define CL_PROFILING_COMMAND_QUEUED 0x1280 +#define CL_PROFILING_COMMAND_SUBMIT 0x1281 +#define CL_PROFILING_COMMAND_START 0x1282 +#define CL_PROFILING_COMMAND_END 0x1283 +#define CL_PROFILING_COMMAND_COMPLETE 0x1284 + +/********************************************************************************************************/ + +/* Platform API */ +extern CL_API_ENTRY cl_int CL_API_CALL +clGetPlatformIDs(cl_uint /* num_entries */, + cl_platform_id * /* platforms */, + cl_uint * /* num_platforms */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetPlatformInfo(cl_platform_id /* platform */, + cl_platform_info /* param_name */, + size_t /* param_value_size */, + void * /* param_value */, + size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; + +/* Device APIs */ +extern CL_API_ENTRY cl_int CL_API_CALL +clGetDeviceIDs(cl_platform_id /* platform */, + cl_device_type /* device_type */, + cl_uint /* num_entries */, + cl_device_id * /* devices */, + cl_uint * /* num_devices */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetDeviceInfo(cl_device_id /* device */, + cl_device_info /* param_name */, + size_t /* param_value_size */, + void * /* param_value */, + size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clCreateSubDevices(cl_device_id /* in_device */, + const cl_device_partition_property * /* properties */, + cl_uint /* num_devices */, + cl_device_id * /* out_devices */, + cl_uint * /* num_devices_ret */) CL_API_SUFFIX__VERSION_1_2; + +extern CL_API_ENTRY cl_int CL_API_CALL +clRetainDevice(cl_device_id /* device */) CL_API_SUFFIX__VERSION_1_2; + +extern CL_API_ENTRY cl_int CL_API_CALL +clReleaseDevice(cl_device_id /* device */) CL_API_SUFFIX__VERSION_1_2; + +/* Context APIs */ +extern CL_API_ENTRY cl_context CL_API_CALL +clCreateContext(const cl_context_properties * /* properties */, + cl_uint /* num_devices */, + const cl_device_id * /* devices */, + void (CL_CALLBACK * /* pfn_notify */)(const char *, const void *, size_t, void *), + void * /* user_data */, + cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_context CL_API_CALL +clCreateContextFromType(const cl_context_properties * /* properties */, + cl_device_type /* device_type */, + void (CL_CALLBACK * /* pfn_notify*/ )(const char *, const void *, size_t, void *), + void * /* user_data */, + cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clRetainContext(cl_context /* context */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clReleaseContext(cl_context /* context */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetContextInfo(cl_context /* context */, + cl_context_info /* param_name */, + size_t /* param_value_size */, + void * /* param_value */, + size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; + +/* Command Queue APIs */ +extern CL_API_ENTRY cl_command_queue CL_API_CALL +clCreateCommandQueueWithProperties(cl_context /* context */, + cl_device_id /* device */, + const cl_queue_properties * /* properties */, + cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_2_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clRetainCommandQueue(cl_command_queue /* command_queue */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clReleaseCommandQueue(cl_command_queue /* command_queue */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetCommandQueueInfo(cl_command_queue /* command_queue */, + cl_command_queue_info /* param_name */, + size_t /* param_value_size */, + void * /* param_value */, + size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; + +/* Memory Object APIs */ +extern CL_API_ENTRY cl_mem CL_API_CALL +clCreateBuffer(cl_context /* context */, + cl_mem_flags /* flags */, + size_t /* size */, + void * /* host_ptr */, + cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_mem CL_API_CALL +clCreateSubBuffer(cl_mem /* buffer */, + cl_mem_flags /* flags */, + cl_buffer_create_type /* buffer_create_type */, + const void * /* buffer_create_info */, + cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_1; + +extern CL_API_ENTRY cl_mem CL_API_CALL +clCreateImage(cl_context /* context */, + cl_mem_flags /* flags */, + const cl_image_format * /* image_format */, + const cl_image_desc * /* image_desc */, + void * /* host_ptr */, + cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_2; + +extern CL_API_ENTRY cl_mem CL_API_CALL +clCreatePipe(cl_context /* context */, + cl_mem_flags /* flags */, + cl_uint /* pipe_packet_size */, + cl_uint /* pipe_max_packets */, + const cl_pipe_properties * /* properties */, + cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_2_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clRetainMemObject(cl_mem /* memobj */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clReleaseMemObject(cl_mem /* memobj */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetSupportedImageFormats(cl_context /* context */, + cl_mem_flags /* flags */, + cl_mem_object_type /* image_type */, + cl_uint /* num_entries */, + cl_image_format * /* image_formats */, + cl_uint * /* num_image_formats */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetMemObjectInfo(cl_mem /* memobj */, + cl_mem_info /* param_name */, + size_t /* param_value_size */, + void * /* param_value */, + size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetImageInfo(cl_mem /* image */, + cl_image_info /* param_name */, + size_t /* param_value_size */, + void * /* param_value */, + size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetPipeInfo(cl_mem /* pipe */, + cl_pipe_info /* param_name */, + size_t /* param_value_size */, + void * /* param_value */, + size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_2_0; + + +extern CL_API_ENTRY cl_int CL_API_CALL +clSetMemObjectDestructorCallback(cl_mem /* memobj */, + void (CL_CALLBACK * /*pfn_notify*/)( cl_mem /* memobj */, void* /*user_data*/), + void * /*user_data */ ) CL_API_SUFFIX__VERSION_1_1; + +/* SVM Allocation APIs */ +extern CL_API_ENTRY void * CL_API_CALL +clSVMAlloc(cl_context /* context */, + cl_svm_mem_flags /* flags */, + size_t /* size */, + cl_uint /* alignment */) CL_API_SUFFIX__VERSION_2_0; + +extern CL_API_ENTRY void CL_API_CALL +clSVMFree(cl_context /* context */, + void * /* svm_pointer */) CL_API_SUFFIX__VERSION_2_0; + +/* Sampler APIs */ +extern CL_API_ENTRY cl_sampler CL_API_CALL +clCreateSamplerWithProperties(cl_context /* context */, + const cl_sampler_properties * /* normalized_coords */, + cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_2_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clRetainSampler(cl_sampler /* sampler */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clReleaseSampler(cl_sampler /* sampler */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetSamplerInfo(cl_sampler /* sampler */, + cl_sampler_info /* param_name */, + size_t /* param_value_size */, + void * /* param_value */, + size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; + +/* Program Object APIs */ +extern CL_API_ENTRY cl_program CL_API_CALL +clCreateProgramWithSource(cl_context /* context */, + cl_uint /* count */, + const char ** /* strings */, + const size_t * /* lengths */, + cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_program CL_API_CALL +clCreateProgramWithBinary(cl_context /* context */, + cl_uint /* num_devices */, + const cl_device_id * /* device_list */, + const size_t * /* lengths */, + const unsigned char ** /* binaries */, + cl_int * /* binary_status */, + cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_program CL_API_CALL +clCreateProgramWithBuiltInKernels(cl_context /* context */, + cl_uint /* num_devices */, + const cl_device_id * /* device_list */, + const char * /* kernel_names */, + cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_2; + +extern CL_API_ENTRY cl_int CL_API_CALL +clRetainProgram(cl_program /* program */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clReleaseProgram(cl_program /* program */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clBuildProgram(cl_program /* program */, + cl_uint /* num_devices */, + const cl_device_id * /* device_list */, + const char * /* options */, + void (CL_CALLBACK * /* pfn_notify */)(cl_program /* program */, void * /* user_data */), + void * /* user_data */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clCompileProgram(cl_program /* program */, + cl_uint /* num_devices */, + const cl_device_id * /* device_list */, + const char * /* options */, + cl_uint /* num_input_headers */, + const cl_program * /* input_headers */, + const char ** /* header_include_names */, + void (CL_CALLBACK * /* pfn_notify */)(cl_program /* program */, void * /* user_data */), + void * /* user_data */) CL_API_SUFFIX__VERSION_1_2; + +extern CL_API_ENTRY cl_program CL_API_CALL +clLinkProgram(cl_context /* context */, + cl_uint /* num_devices */, + const cl_device_id * /* device_list */, + const char * /* options */, + cl_uint /* num_input_programs */, + const cl_program * /* input_programs */, + void (CL_CALLBACK * /* pfn_notify */)(cl_program /* program */, void * /* user_data */), + void * /* user_data */, + cl_int * /* errcode_ret */ ) CL_API_SUFFIX__VERSION_1_2; + + +extern CL_API_ENTRY cl_int CL_API_CALL +clUnloadPlatformCompiler(cl_platform_id /* platform */) CL_API_SUFFIX__VERSION_1_2; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetProgramInfo(cl_program /* program */, + cl_program_info /* param_name */, + size_t /* param_value_size */, + void * /* param_value */, + size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetProgramBuildInfo(cl_program /* program */, + cl_device_id /* device */, + cl_program_build_info /* param_name */, + size_t /* param_value_size */, + void * /* param_value */, + size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; + +/* Kernel Object APIs */ +extern CL_API_ENTRY cl_kernel CL_API_CALL +clCreateKernel(cl_program /* program */, + const char * /* kernel_name */, + cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clCreateKernelsInProgram(cl_program /* program */, + cl_uint /* num_kernels */, + cl_kernel * /* kernels */, + cl_uint * /* num_kernels_ret */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clRetainKernel(cl_kernel /* kernel */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clReleaseKernel(cl_kernel /* kernel */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clSetKernelArg(cl_kernel /* kernel */, + cl_uint /* arg_index */, + size_t /* arg_size */, + const void * /* arg_value */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clSetKernelArgSVMPointer(cl_kernel /* kernel */, + cl_uint /* arg_index */, + const void * /* arg_value */) CL_API_SUFFIX__VERSION_2_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clSetKernelExecInfo(cl_kernel /* kernel */, + cl_kernel_exec_info /* param_name */, + size_t /* param_value_size */, + const void * /* param_value */) CL_API_SUFFIX__VERSION_2_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetKernelInfo(cl_kernel /* kernel */, + cl_kernel_info /* param_name */, + size_t /* param_value_size */, + void * /* param_value */, + size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetKernelArgInfo(cl_kernel /* kernel */, + cl_uint /* arg_indx */, + cl_kernel_arg_info /* param_name */, + size_t /* param_value_size */, + void * /* param_value */, + size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_2; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetKernelWorkGroupInfo(cl_kernel /* kernel */, + cl_device_id /* device */, + cl_kernel_work_group_info /* param_name */, + size_t /* param_value_size */, + void * /* param_value */, + size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; + +/* Event Object APIs */ +extern CL_API_ENTRY cl_int CL_API_CALL +clWaitForEvents(cl_uint /* num_events */, + const cl_event * /* event_list */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetEventInfo(cl_event /* event */, + cl_event_info /* param_name */, + size_t /* param_value_size */, + void * /* param_value */, + size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_event CL_API_CALL +clCreateUserEvent(cl_context /* context */, + cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_1; + +extern CL_API_ENTRY cl_int CL_API_CALL +clRetainEvent(cl_event /* event */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clReleaseEvent(cl_event /* event */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clSetUserEventStatus(cl_event /* event */, + cl_int /* execution_status */) CL_API_SUFFIX__VERSION_1_1; + +extern CL_API_ENTRY cl_int CL_API_CALL +clSetEventCallback( cl_event /* event */, + cl_int /* command_exec_callback_type */, + void (CL_CALLBACK * /* pfn_notify */)(cl_event, cl_int, void *), + void * /* user_data */) CL_API_SUFFIX__VERSION_1_1; + +/* Profiling APIs */ +extern CL_API_ENTRY cl_int CL_API_CALL +clGetEventProfilingInfo(cl_event /* event */, + cl_profiling_info /* param_name */, + size_t /* param_value_size */, + void * /* param_value */, + size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; + +/* Flush and Finish APIs */ +extern CL_API_ENTRY cl_int CL_API_CALL +clFlush(cl_command_queue /* command_queue */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clFinish(cl_command_queue /* command_queue */) CL_API_SUFFIX__VERSION_1_0; + +/* Enqueued Commands APIs */ +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueReadBuffer(cl_command_queue /* command_queue */, + cl_mem /* buffer */, + cl_bool /* blocking_read */, + size_t /* offset */, + size_t /* size */, + void * /* ptr */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueReadBufferRect(cl_command_queue /* command_queue */, + cl_mem /* buffer */, + cl_bool /* blocking_read */, + const size_t * /* buffer_offset */, + const size_t * /* host_offset */, + const size_t * /* region */, + size_t /* buffer_row_pitch */, + size_t /* buffer_slice_pitch */, + size_t /* host_row_pitch */, + size_t /* host_slice_pitch */, + void * /* ptr */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_1_1; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueWriteBuffer(cl_command_queue /* command_queue */, + cl_mem /* buffer */, + cl_bool /* blocking_write */, + size_t /* offset */, + size_t /* size */, + const void * /* ptr */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueWriteBufferRect(cl_command_queue /* command_queue */, + cl_mem /* buffer */, + cl_bool /* blocking_write */, + const size_t * /* buffer_offset */, + const size_t * /* host_offset */, + const size_t * /* region */, + size_t /* buffer_row_pitch */, + size_t /* buffer_slice_pitch */, + size_t /* host_row_pitch */, + size_t /* host_slice_pitch */, + const void * /* ptr */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_1_1; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueFillBuffer(cl_command_queue /* command_queue */, + cl_mem /* buffer */, + const void * /* pattern */, + size_t /* pattern_size */, + size_t /* offset */, + size_t /* size */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_1_2; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueCopyBuffer(cl_command_queue /* command_queue */, + cl_mem /* src_buffer */, + cl_mem /* dst_buffer */, + size_t /* src_offset */, + size_t /* dst_offset */, + size_t /* size */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueCopyBufferRect(cl_command_queue /* command_queue */, + cl_mem /* src_buffer */, + cl_mem /* dst_buffer */, + const size_t * /* src_origin */, + const size_t * /* dst_origin */, + const size_t * /* region */, + size_t /* src_row_pitch */, + size_t /* src_slice_pitch */, + size_t /* dst_row_pitch */, + size_t /* dst_slice_pitch */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_1_1; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueReadImage(cl_command_queue /* command_queue */, + cl_mem /* image */, + cl_bool /* blocking_read */, + const size_t * /* origin[3] */, + const size_t * /* region[3] */, + size_t /* row_pitch */, + size_t /* slice_pitch */, + void * /* ptr */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueWriteImage(cl_command_queue /* command_queue */, + cl_mem /* image */, + cl_bool /* blocking_write */, + const size_t * /* origin[3] */, + const size_t * /* region[3] */, + size_t /* input_row_pitch */, + size_t /* input_slice_pitch */, + const void * /* ptr */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueFillImage(cl_command_queue /* command_queue */, + cl_mem /* image */, + const void * /* fill_color */, + const size_t * /* origin[3] */, + const size_t * /* region[3] */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_1_2; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueCopyImage(cl_command_queue /* command_queue */, + cl_mem /* src_image */, + cl_mem /* dst_image */, + const size_t * /* src_origin[3] */, + const size_t * /* dst_origin[3] */, + const size_t * /* region[3] */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueCopyImageToBuffer(cl_command_queue /* command_queue */, + cl_mem /* src_image */, + cl_mem /* dst_buffer */, + const size_t * /* src_origin[3] */, + const size_t * /* region[3] */, + size_t /* dst_offset */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueCopyBufferToImage(cl_command_queue /* command_queue */, + cl_mem /* src_buffer */, + cl_mem /* dst_image */, + size_t /* src_offset */, + const size_t * /* dst_origin[3] */, + const size_t * /* region[3] */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY void * CL_API_CALL +clEnqueueMapBuffer(cl_command_queue /* command_queue */, + cl_mem /* buffer */, + cl_bool /* blocking_map */, + cl_map_flags /* map_flags */, + size_t /* offset */, + size_t /* size */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */, + cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY void * CL_API_CALL +clEnqueueMapImage(cl_command_queue /* command_queue */, + cl_mem /* image */, + cl_bool /* blocking_map */, + cl_map_flags /* map_flags */, + const size_t * /* origin[3] */, + const size_t * /* region[3] */, + size_t * /* image_row_pitch */, + size_t * /* image_slice_pitch */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */, + cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueUnmapMemObject(cl_command_queue /* command_queue */, + cl_mem /* memobj */, + void * /* mapped_ptr */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueMigrateMemObjects(cl_command_queue /* command_queue */, + cl_uint /* num_mem_objects */, + const cl_mem * /* mem_objects */, + cl_mem_migration_flags /* flags */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_1_2; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueNDRangeKernel(cl_command_queue /* command_queue */, + cl_kernel /* kernel */, + cl_uint /* work_dim */, + const size_t * /* global_work_offset */, + const size_t * /* global_work_size */, + const size_t * /* local_work_size */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueNativeKernel(cl_command_queue /* command_queue */, + void (CL_CALLBACK * /*user_func*/)(void *), + void * /* args */, + size_t /* cb_args */, + cl_uint /* num_mem_objects */, + const cl_mem * /* mem_list */, + const void ** /* args_mem_loc */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueMarkerWithWaitList(cl_command_queue /* command_queue */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_1_2; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueBarrierWithWaitList(cl_command_queue /* command_queue */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_1_2; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueSVMFree(cl_command_queue /* command_queue */, + cl_uint /* num_svm_pointers */, + void *[] /* svm_pointers[] */, + void (CL_CALLBACK * /*pfn_free_func*/)(cl_command_queue /* queue */, + cl_uint /* num_svm_pointers */, + void *[] /* svm_pointers[] */, + void * /* user_data */), + void * /* user_data */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_2_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueSVMMemcpy(cl_command_queue /* command_queue */, + cl_bool /* blocking_copy */, + void * /* dst_ptr */, + const void * /* src_ptr */, + size_t /* size */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_2_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueSVMMemFill(cl_command_queue /* command_queue */, + void * /* svm_ptr */, + const void * /* pattern */, + size_t /* pattern_size */, + size_t /* size */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_2_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueSVMMap(cl_command_queue /* command_queue */, + cl_bool /* blocking_map */, + cl_map_flags /* flags */, + void * /* svm_ptr */, + size_t /* size */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_2_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueSVMUnmap(cl_command_queue /* command_queue */, + void * /* svm_ptr */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_2_0; + + +/* Extension function access + * + * Returns the extension function address for the given function name, + * or NULL if a valid function can not be found. The client must + * check to make sure the address is not NULL, before using or + * calling the returned function address. + */ +extern CL_API_ENTRY void * CL_API_CALL +clGetExtensionFunctionAddressForPlatform(cl_platform_id /* platform */, + const char * /* func_name */) CL_API_SUFFIX__VERSION_1_2; + + +/* Deprecated OpenCL 1.1 APIs */ +extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_mem CL_API_CALL +clCreateImage2D(cl_context /* context */, + cl_mem_flags /* flags */, + const cl_image_format * /* image_format */, + size_t /* image_width */, + size_t /* image_height */, + size_t /* image_row_pitch */, + void * /* host_ptr */, + cl_int * /* errcode_ret */) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; + +extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_mem CL_API_CALL +clCreateImage3D(cl_context /* context */, + cl_mem_flags /* flags */, + const cl_image_format * /* image_format */, + size_t /* image_width */, + size_t /* image_height */, + size_t /* image_depth */, + size_t /* image_row_pitch */, + size_t /* image_slice_pitch */, + void * /* host_ptr */, + cl_int * /* errcode_ret */) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; + +extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_int CL_API_CALL +clEnqueueMarker(cl_command_queue /* command_queue */, + cl_event * /* event */) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; + +extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_int CL_API_CALL +clEnqueueWaitForEvents(cl_command_queue /* command_queue */, + cl_uint /* num_events */, + const cl_event * /* event_list */) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; + +extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_int CL_API_CALL +clEnqueueBarrier(cl_command_queue /* command_queue */) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; + +extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_int CL_API_CALL +clUnloadCompiler(void) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; + +extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED void * CL_API_CALL +clGetExtensionFunctionAddress(const char * /* func_name */) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; + +/* Deprecated OpenCL 2.0 APIs */ +extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_2_0_DEPRECATED cl_command_queue CL_API_CALL +clCreateCommandQueue(cl_context /* context */, + cl_device_id /* device */, + cl_command_queue_properties /* properties */, + cl_int * /* errcode_ret */) CL_EXT_SUFFIX__VERSION_2_0_DEPRECATED; + + +extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_2_0_DEPRECATED cl_sampler CL_API_CALL +clCreateSampler(cl_context /* context */, + cl_bool /* normalized_coords */, + cl_addressing_mode /* addressing_mode */, + cl_filter_mode /* filter_mode */, + cl_int * /* errcode_ret */) CL_EXT_SUFFIX__VERSION_2_0_DEPRECATED; + +extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_2_0_DEPRECATED cl_int CL_API_CALL +clEnqueueTask(cl_command_queue /* command_queue */, + cl_kernel /* kernel */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_EXT_SUFFIX__VERSION_2_0_DEPRECATED; + +#ifdef __cplusplus +} +#endif + +#endif /* __OPENCL_CL_H */ + diff --git a/CL/cl_platform.h b/CL/cl_platform.h new file mode 100644 index 0000000..d813b1f --- /dev/null +++ b/CL/cl_platform.h @@ -0,0 +1,1299 @@ +/********************************************************************************** + * Copyright (c) 2008-2013 The Khronos Group Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and/or associated documentation files (the + * "Materials"), to deal in the Materials without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Materials, and to + * permit persons to whom the Materials are furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Materials. + * + * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. + **********************************************************************************/ + +/* $Revision: 14829 $ on $Date: 2011-05-26 08:22:50 -0700 (Thu, 26 May 2011) $ */ + +#ifndef __CL_PLATFORM_H +#define __CL_PLATFORM_H + +#ifdef __APPLE__ + /* Contains #defines for AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER below */ + #include +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +#if defined(_WIN32) || defined(__CYGWIN__) + #define CL_API_ENTRY + #define CL_API_CALL __stdcall + #define CL_CALLBACK __stdcall +#else + #define CL_API_ENTRY + #define CL_API_CALL + #define CL_CALLBACK +#endif + +#ifdef __APPLE__ + #define CL_EXTENSION_WEAK_LINK __attribute__((weak_import)) + #define CL_API_SUFFIX__VERSION_1_0 AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER + #define CL_EXT_SUFFIX__VERSION_1_0 CL_EXTENSION_WEAK_LINK AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER + #define CL_API_SUFFIX__VERSION_1_1 AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER + #define GCL_API_SUFFIX__VERSION_1_1 AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER + #define CL_EXT_SUFFIX__VERSION_1_1 CL_EXTENSION_WEAK_LINK AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER + #define CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED CL_EXTENSION_WEAK_LINK AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_7 + + #ifdef AVAILABLE_MAC_OS_X_VERSION_10_8_AND_LATER + #define CL_API_SUFFIX__VERSION_1_2 AVAILABLE_MAC_OS_X_VERSION_10_8_AND_LATER + #define GCL_API_SUFFIX__VERSION_1_2 AVAILABLE_MAC_OS_X_VERSION_10_8_AND_LATER + #define CL_EXT_SUFFIX__VERSION_1_2 CL_EXTENSION_WEAK_LINK AVAILABLE_MAC_OS_X_VERSION_10_8_AND_LATER + #define CL_EXT_PREFIX__VERSION_1_1_DEPRECATED + #define CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED CL_EXTENSION_WEAK_LINK AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_8 + #else + #warning This path should never happen outside of internal operating system development. AvailabilityMacros do not function correctly here! + #define CL_API_SUFFIX__VERSION_1_2 AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER + #define GCL_API_SUFFIX__VERSION_1_2 AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER + #define CL_EXT_SUFFIX__VERSION_1_2 CL_EXTENSION_WEAK_LINK AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER + #define CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED CL_EXTENSION_WEAK_LINK AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER + #endif +#else + #define CL_EXTENSION_WEAK_LINK + #define CL_API_SUFFIX__VERSION_1_0 + #define CL_EXT_SUFFIX__VERSION_1_0 + #define CL_API_SUFFIX__VERSION_1_1 + #define CL_EXT_SUFFIX__VERSION_1_1 + #define CL_API_SUFFIX__VERSION_1_2 + #define CL_EXT_SUFFIX__VERSION_1_2 + #define CL_API_SUFFIX__VERSION_2_0 + #define CL_EXT_SUFFIX__VERSION_2_0 + + #ifdef __GNUC__ + #ifdef CL_USE_DEPRECATED_OPENCL_1_0_APIS + #define CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED + #define CL_EXT_PREFIX__VERSION_1_0_DEPRECATED + #else + #define CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED __attribute__((deprecated)) + #define CL_EXT_PREFIX__VERSION_1_0_DEPRECATED + #endif + + #ifdef CL_USE_DEPRECATED_OPENCL_1_1_APIS + #define CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED + #define CL_EXT_PREFIX__VERSION_1_1_DEPRECATED + #else + #define CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED __attribute__((deprecated)) + #define CL_EXT_PREFIX__VERSION_1_1_DEPRECATED + #endif + + #ifdef CL_USE_DEPRECATED_OPENCL_2_0_APIS + #define CL_EXT_SUFFIX__VERSION_2_0_DEPRECATED + #define CL_EXT_PREFIX__VERSION_2_0_DEPRECATED + #else + #define CL_EXT_SUFFIX__VERSION_2_0_DEPRECATED __attribute__((deprecated)) + #define CL_EXT_PREFIX__VERSION_2_0_DEPRECATED + #endif + #elif _WIN32 + #ifdef CL_USE_DEPRECATED_OPENCL_1_0_APIS + #define CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED + #define CL_EXT_PREFIX__VERSION_1_0_DEPRECATED + #else + #define CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED + #define CL_EXT_PREFIX__VERSION_1_0_DEPRECATED __declspec(deprecated) + #endif + + #ifdef CL_USE_DEPRECATED_OPENCL_1_1_APIS + #define CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED + #define CL_EXT_PREFIX__VERSION_1_1_DEPRECATED + #else + #define CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED + #define CL_EXT_PREFIX__VERSION_1_1_DEPRECATED __declspec(deprecated) + #endif + + #ifdef CL_USE_DEPRECATED_OPENCL_2_0_APIS + #define CL_EXT_SUFFIX__VERSION_2_0_DEPRECATED + #define CL_EXT_PREFIX__VERSION_2_0_DEPRECATED + #else + #define CL_EXT_SUFFIX__VERSION_2_0_DEPRECATED + #define CL_EXT_PREFIX__VERSION_2_0_DEPRECATED __declspec(deprecated) + #endif + #else + #define CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED + #define CL_EXT_PREFIX__VERSION_1_0_DEPRECATED + + #define CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED + #define CL_EXT_PREFIX__VERSION_1_1_DEPRECATED + + #define CL_EXT_SUFFIX__VERSION_2_0_DEPRECATED + #define CL_EXT_PREFIX__VERSION_2_0_DEPRECATED + #endif +#endif + +#if (defined (_WIN32) && defined(_MSC_VER)) + +/* scalar types */ +typedef signed __int8 cl_char; +typedef unsigned __int8 cl_uchar; +typedef signed __int16 cl_short; +typedef unsigned __int16 cl_ushort; +typedef signed __int32 cl_int; +typedef unsigned __int32 cl_uint; +typedef signed __int64 cl_long; +typedef unsigned __int64 cl_ulong; + +typedef unsigned __int16 cl_half; +typedef float cl_float; +typedef double cl_double; + +/* Macro names and corresponding values defined by OpenCL */ +#define CL_CHAR_BIT 8 +#define CL_SCHAR_MAX 127 +#define CL_SCHAR_MIN (-127-1) +#define CL_CHAR_MAX CL_SCHAR_MAX +#define CL_CHAR_MIN CL_SCHAR_MIN +#define CL_UCHAR_MAX 255 +#define CL_SHRT_MAX 32767 +#define CL_SHRT_MIN (-32767-1) +#define CL_USHRT_MAX 65535 +#define CL_INT_MAX 2147483647 +#define CL_INT_MIN (-2147483647-1) +#define CL_UINT_MAX 0xffffffffU +#define CL_LONG_MAX ((cl_long) 0x7FFFFFFFFFFFFFFFLL) +#define CL_LONG_MIN ((cl_long) -0x7FFFFFFFFFFFFFFFLL - 1LL) +#define CL_ULONG_MAX ((cl_ulong) 0xFFFFFFFFFFFFFFFFULL) + +#define CL_FLT_DIG 6 +#define CL_FLT_MANT_DIG 24 +#define CL_FLT_MAX_10_EXP +38 +#define CL_FLT_MAX_EXP +128 +#define CL_FLT_MIN_10_EXP -37 +#define CL_FLT_MIN_EXP -125 +#define CL_FLT_RADIX 2 +#define CL_FLT_MAX 340282346638528859811704183484516925440.0f +#define CL_FLT_MIN 1.175494350822287507969e-38f +#define CL_FLT_EPSILON 0.00000011920928955078125f + +#define CL_DBL_DIG 15 +#define CL_DBL_MANT_DIG 53 +#define CL_DBL_MAX_10_EXP +308 +#define CL_DBL_MAX_EXP +1024 +#define CL_DBL_MIN_10_EXP -307 +#define CL_DBL_MIN_EXP -1021 +#define CL_DBL_RADIX 2 +#define CL_DBL_MAX 179769313486231570814527423731704356798070567525844996598917476803157260780028538760589558632766878171540458953514382464234321326889464182768467546703537516986049910576551282076245490090389328944075868508455133942304583236903222948165808559332123348274797826204144723168738177180919299881250404026184124858368.0 +#define CL_DBL_MIN 2.225073858507201383090e-308 +#define CL_DBL_EPSILON 2.220446049250313080847e-16 + +#define CL_M_E 2.718281828459045090796 +#define CL_M_LOG2E 1.442695040888963387005 +#define CL_M_LOG10E 0.434294481903251816668 +#define CL_M_LN2 0.693147180559945286227 +#define CL_M_LN10 2.302585092994045901094 +#define CL_M_PI 3.141592653589793115998 +#define CL_M_PI_2 1.570796326794896557999 +#define CL_M_PI_4 0.785398163397448278999 +#define CL_M_1_PI 0.318309886183790691216 +#define CL_M_2_PI 0.636619772367581382433 +#define CL_M_2_SQRTPI 1.128379167095512558561 +#define CL_M_SQRT2 1.414213562373095145475 +#define CL_M_SQRT1_2 0.707106781186547572737 + +#define CL_M_E_F 2.71828174591064f +#define CL_M_LOG2E_F 1.44269502162933f +#define CL_M_LOG10E_F 0.43429449200630f +#define CL_M_LN2_F 0.69314718246460f +#define CL_M_LN10_F 2.30258512496948f +#define CL_M_PI_F 3.14159274101257f +#define CL_M_PI_2_F 1.57079637050629f +#define CL_M_PI_4_F 0.78539818525314f +#define CL_M_1_PI_F 0.31830987334251f +#define CL_M_2_PI_F 0.63661974668503f +#define CL_M_2_SQRTPI_F 1.12837922573090f +#define CL_M_SQRT2_F 1.41421353816986f +#define CL_M_SQRT1_2_F 0.70710676908493f + +#define CL_NAN (CL_INFINITY - CL_INFINITY) +#define CL_HUGE_VALF ((cl_float) 1e50) +#define CL_HUGE_VAL ((cl_double) 1e500) +#define CL_MAXFLOAT CL_FLT_MAX +#define CL_INFINITY CL_HUGE_VALF + +#else + +#include + +/* scalar types */ +typedef int8_t cl_char; +typedef uint8_t cl_uchar; +typedef int16_t cl_short __attribute__((aligned(2))); +typedef uint16_t cl_ushort __attribute__((aligned(2))); +typedef int32_t cl_int __attribute__((aligned(4))); +typedef uint32_t cl_uint __attribute__((aligned(4))); +typedef int64_t cl_long __attribute__((aligned(8))); +typedef uint64_t cl_ulong __attribute__((aligned(8))); + +typedef uint16_t cl_half __attribute__((aligned(2))); +typedef float cl_float __attribute__((aligned(4))); +typedef double cl_double __attribute__((aligned(8))); + +/* Macro names and corresponding values defined by OpenCL */ +#define CL_CHAR_BIT 8 +#define CL_SCHAR_MAX 127 +#define CL_SCHAR_MIN (-127-1) +#define CL_CHAR_MAX CL_SCHAR_MAX +#define CL_CHAR_MIN CL_SCHAR_MIN +#define CL_UCHAR_MAX 255 +#define CL_SHRT_MAX 32767 +#define CL_SHRT_MIN (-32767-1) +#define CL_USHRT_MAX 65535 +#define CL_INT_MAX 2147483647 +#define CL_INT_MIN (-2147483647-1) +#define CL_UINT_MAX 0xffffffffU +#define CL_LONG_MAX ((cl_long) 0x7FFFFFFFFFFFFFFFLL) +#define CL_LONG_MIN ((cl_long) -0x7FFFFFFFFFFFFFFFLL - 1LL) +#define CL_ULONG_MAX ((cl_ulong) 0xFFFFFFFFFFFFFFFFULL) + +#define CL_FLT_DIG 6 +#define CL_FLT_MANT_DIG 24 +#define CL_FLT_MAX_10_EXP +38 +#define CL_FLT_MAX_EXP +128 +#define CL_FLT_MIN_10_EXP -37 +#define CL_FLT_MIN_EXP -125 +#define CL_FLT_RADIX 2 +#define CL_FLT_MAX 0x1.fffffep127f +#define CL_FLT_MIN 0x1.0p-126f +#define CL_FLT_EPSILON 0x1.0p-23f + +#define CL_DBL_DIG 15 +#define CL_DBL_MANT_DIG 53 +#define CL_DBL_MAX_10_EXP +308 +#define CL_DBL_MAX_EXP +1024 +#define CL_DBL_MIN_10_EXP -307 +#define CL_DBL_MIN_EXP -1021 +#define CL_DBL_RADIX 2 +#define CL_DBL_MAX 0x1.fffffffffffffp1023 +#define CL_DBL_MIN 0x1.0p-1022 +#define CL_DBL_EPSILON 0x1.0p-52 + +#define CL_M_E 2.718281828459045090796 +#define CL_M_LOG2E 1.442695040888963387005 +#define CL_M_LOG10E 0.434294481903251816668 +#define CL_M_LN2 0.693147180559945286227 +#define CL_M_LN10 2.302585092994045901094 +#define CL_M_PI 3.141592653589793115998 +#define CL_M_PI_2 1.570796326794896557999 +#define CL_M_PI_4 0.785398163397448278999 +#define CL_M_1_PI 0.318309886183790691216 +#define CL_M_2_PI 0.636619772367581382433 +#define CL_M_2_SQRTPI 1.128379167095512558561 +#define CL_M_SQRT2 1.414213562373095145475 +#define CL_M_SQRT1_2 0.707106781186547572737 + +#define CL_M_E_F 2.71828174591064f +#define CL_M_LOG2E_F 1.44269502162933f +#define CL_M_LOG10E_F 0.43429449200630f +#define CL_M_LN2_F 0.69314718246460f +#define CL_M_LN10_F 2.30258512496948f +#define CL_M_PI_F 3.14159274101257f +#define CL_M_PI_2_F 1.57079637050629f +#define CL_M_PI_4_F 0.78539818525314f +#define CL_M_1_PI_F 0.31830987334251f +#define CL_M_2_PI_F 0.63661974668503f +#define CL_M_2_SQRTPI_F 1.12837922573090f +#define CL_M_SQRT2_F 1.41421353816986f +#define CL_M_SQRT1_2_F 0.70710676908493f + +#if defined( __GNUC__ ) + #define CL_HUGE_VALF __builtin_huge_valf() + #define CL_HUGE_VAL __builtin_huge_val() + #define CL_NAN __builtin_nanf( "" ) +#else + #define CL_HUGE_VALF ((cl_float) 1e50) + #define CL_HUGE_VAL ((cl_double) 1e500) + float nanf( const char * ); + #define CL_NAN nanf( "" ) +#endif +#define CL_MAXFLOAT CL_FLT_MAX +#define CL_INFINITY CL_HUGE_VALF + +#endif + +#include + +/* Mirror types to GL types. Mirror types allow us to avoid deciding which 87s to load based on whether we are using GL or GLES here. */ +typedef unsigned int cl_GLuint; +typedef int cl_GLint; +typedef unsigned int cl_GLenum; + +/* + * Vector types + * + * Note: OpenCL requires that all types be naturally aligned. + * This means that vector types must be naturally aligned. + * For example, a vector of four floats must be aligned to + * a 16 byte boundary (calculated as 4 * the natural 4-byte + * alignment of the float). The alignment qualifiers here + * will only function properly if your compiler supports them + * and if you don't actively work to defeat them. For example, + * in order for a cl_float4 to be 16 byte aligned in a struct, + * the start of the struct must itself be 16-byte aligned. + * + * Maintaining proper alignment is the user's responsibility. + */ + +/* Define basic vector types */ +#if defined( __VEC__ ) + #include /* may be omitted depending on compiler. AltiVec spec provides no way to detect whether the header is required. */ + typedef vector unsigned char __cl_uchar16; + typedef vector signed char __cl_char16; + typedef vector unsigned short __cl_ushort8; + typedef vector signed short __cl_short8; + typedef vector unsigned int __cl_uint4; + typedef vector signed int __cl_int4; + typedef vector float __cl_float4; + #define __CL_UCHAR16__ 1 + #define __CL_CHAR16__ 1 + #define __CL_USHORT8__ 1 + #define __CL_SHORT8__ 1 + #define __CL_UINT4__ 1 + #define __CL_INT4__ 1 + #define __CL_FLOAT4__ 1 +#endif + +#if defined( __SSE__ ) + #if defined( __MINGW64__ ) + #include + #else + #include + #endif + #if defined( __GNUC__ ) && !defined( __ICC ) + typedef float __cl_float4 __attribute__((vector_size(16))); + #else + typedef __m128 __cl_float4; + #endif + #define __CL_FLOAT4__ 1 +#endif + +#if defined( __SSE2__ ) + #if defined( __MINGW64__ ) + #include + #else + #include + #endif + #if defined( __GNUC__ ) && !defined( __ICC ) + typedef cl_uchar __cl_uchar16 __attribute__((vector_size(16))); + typedef cl_char __cl_char16 __attribute__((vector_size(16))); + typedef cl_ushort __cl_ushort8 __attribute__((vector_size(16))); + typedef cl_short __cl_short8 __attribute__((vector_size(16))); + typedef cl_uint __cl_uint4 __attribute__((vector_size(16))); + typedef cl_int __cl_int4 __attribute__((vector_size(16))); + typedef cl_ulong __cl_ulong2 __attribute__((vector_size(16))); + typedef cl_long __cl_long2 __attribute__((vector_size(16))); + typedef cl_double __cl_double2 __attribute__((vector_size(16))); + #else + typedef __m128i __cl_uchar16; + typedef __m128i __cl_char16; + typedef __m128i __cl_ushort8; + typedef __m128i __cl_short8; + typedef __m128i __cl_uint4; + typedef __m128i __cl_int4; + typedef __m128i __cl_ulong2; + typedef __m128i __cl_long2; + typedef __m128d __cl_double2; + #endif + #define __CL_UCHAR16__ 1 + #define __CL_CHAR16__ 1 + #define __CL_USHORT8__ 1 + #define __CL_SHORT8__ 1 + #define __CL_INT4__ 1 + #define __CL_UINT4__ 1 + #define __CL_ULONG2__ 1 + #define __CL_LONG2__ 1 + #define __CL_DOUBLE2__ 1 +#endif + +#if defined( __MMX__ ) + #include + #if defined( __GNUC__ ) && !defined( __ICC ) + typedef cl_uchar __cl_uchar8 __attribute__((vector_size(8))); + typedef cl_char __cl_char8 __attribute__((vector_size(8))); + typedef cl_ushort __cl_ushort4 __attribute__((vector_size(8))); + typedef cl_short __cl_short4 __attribute__((vector_size(8))); + typedef cl_uint __cl_uint2 __attribute__((vector_size(8))); + typedef cl_int __cl_int2 __attribute__((vector_size(8))); + typedef cl_ulong __cl_ulong1 __attribute__((vector_size(8))); + typedef cl_long __cl_long1 __attribute__((vector_size(8))); + typedef cl_float __cl_float2 __attribute__((vector_size(8))); + #else + typedef __m64 __cl_uchar8; + typedef __m64 __cl_char8; + typedef __m64 __cl_ushort4; + typedef __m64 __cl_short4; + typedef __m64 __cl_uint2; + typedef __m64 __cl_int2; + typedef __m64 __cl_ulong1; + typedef __m64 __cl_long1; + typedef __m64 __cl_float2; + #endif + #define __CL_UCHAR8__ 1 + #define __CL_CHAR8__ 1 + #define __CL_USHORT4__ 1 + #define __CL_SHORT4__ 1 + #define __CL_INT2__ 1 + #define __CL_UINT2__ 1 + #define __CL_ULONG1__ 1 + #define __CL_LONG1__ 1 + #define __CL_FLOAT2__ 1 +#endif + +#if defined( __AVX__ ) + #if defined( __MINGW64__ ) + #include + #else + #include + #endif + #if defined( __GNUC__ ) && !defined( __ICC ) + typedef cl_float __cl_float8 __attribute__((vector_size(32))); + typedef cl_double __cl_double4 __attribute__((vector_size(32))); + #else + typedef __m256 __cl_float8; + typedef __m256d __cl_double4; + #endif + #define __CL_FLOAT8__ 1 + #define __CL_DOUBLE4__ 1 +#endif + +/* Define capabilities for anonymous struct members. */ +#if (defined( __GNUC__ ) || defined( __IBMC__ )) && ! defined( __STRICT_ANSI__ ) +#define __CL_HAS_ANON_STRUCT__ 1 +#define __CL_ANON_STRUCT__ __extension__ +#elif defined( _WIN32) && (_MSC_VER >= 1500) + /* Microsoft Developer Studio 2008 supports anonymous structs, but + * complains by default. */ +#define __CL_HAS_ANON_STRUCT__ 1 +#define __CL_ANON_STRUCT__ + /* Disable warning C4201: nonstandard extension used : nameless + * struct/union */ +#pragma warning( push ) +#pragma warning( disable : 4201 ) +#else +#define __CL_HAS_ANON_STRUCT__ 0 +#define __CL_ANON_STRUCT__ +#endif + +/* Define alignment keys */ +#if (defined( __GNUC__ ) || defined( __IBMC__ )) + #define CL_ALIGNED(_x) __attribute__ ((aligned(_x))) +#elif defined( _WIN32) && (_MSC_VER) + /* Alignment keys neutered on windows because MSVC can't swallow function arguments with alignment requirements */ + /* http://msdn.microsoft.com/en-us/library/373ak2y1%28VS.71%29.aspx */ + /* #include */ + /* #define CL_ALIGNED(_x) _CRT_ALIGN(_x) */ + #define CL_ALIGNED(_x) +#else + #warning Need to implement some method to align data here + #define CL_ALIGNED(_x) +#endif + +/* Indicate whether .xyzw, .s0123 and .hi.lo are supported */ +#if __CL_HAS_ANON_STRUCT__ + /* .xyzw and .s0123...{f|F} are supported */ + #define CL_HAS_NAMED_VECTOR_FIELDS 1 + /* .hi and .lo are supported */ + #define CL_HAS_HI_LO_VECTOR_FIELDS 1 +#endif + +/* Define cl_vector types */ + +/* ---- cl_charn ---- */ +typedef union +{ + cl_char CL_ALIGNED(2) s[2]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_char x, y; }; + __CL_ANON_STRUCT__ struct{ cl_char s0, s1; }; + __CL_ANON_STRUCT__ struct{ cl_char lo, hi; }; +#endif +#if defined( __CL_CHAR2__) + __cl_char2 v2; +#endif +}cl_char2; + +typedef union +{ + cl_char CL_ALIGNED(4) s[4]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_char x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_char s0, s1, s2, s3; }; + __CL_ANON_STRUCT__ struct{ cl_char2 lo, hi; }; +#endif +#if defined( __CL_CHAR2__) + __cl_char2 v2[2]; +#endif +#if defined( __CL_CHAR4__) + __cl_char4 v4; +#endif +}cl_char4; + +/* cl_char3 is identical in size, alignment and behavior to cl_char4. See section 6.1.5. */ +typedef cl_char4 cl_char3; + +typedef union +{ + cl_char CL_ALIGNED(8) s[8]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_char x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_char s0, s1, s2, s3, s4, s5, s6, s7; }; + __CL_ANON_STRUCT__ struct{ cl_char4 lo, hi; }; +#endif +#if defined( __CL_CHAR2__) + __cl_char2 v2[4]; +#endif +#if defined( __CL_CHAR4__) + __cl_char4 v4[2]; +#endif +#if defined( __CL_CHAR8__ ) + __cl_char8 v8; +#endif +}cl_char8; + +typedef union +{ + cl_char CL_ALIGNED(16) s[16]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_char x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; }; + __CL_ANON_STRUCT__ struct{ cl_char s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; }; + __CL_ANON_STRUCT__ struct{ cl_char8 lo, hi; }; +#endif +#if defined( __CL_CHAR2__) + __cl_char2 v2[8]; +#endif +#if defined( __CL_CHAR4__) + __cl_char4 v4[4]; +#endif +#if defined( __CL_CHAR8__ ) + __cl_char8 v8[2]; +#endif +#if defined( __CL_CHAR16__ ) + __cl_char16 v16; +#endif +}cl_char16; + + +/* ---- cl_ucharn ---- */ +typedef union +{ + cl_uchar CL_ALIGNED(2) s[2]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_uchar x, y; }; + __CL_ANON_STRUCT__ struct{ cl_uchar s0, s1; }; + __CL_ANON_STRUCT__ struct{ cl_uchar lo, hi; }; +#endif +#if defined( __cl_uchar2__) + __cl_uchar2 v2; +#endif +}cl_uchar2; + +typedef union +{ + cl_uchar CL_ALIGNED(4) s[4]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_uchar x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_uchar s0, s1, s2, s3; }; + __CL_ANON_STRUCT__ struct{ cl_uchar2 lo, hi; }; +#endif +#if defined( __CL_UCHAR2__) + __cl_uchar2 v2[2]; +#endif +#if defined( __CL_UCHAR4__) + __cl_uchar4 v4; +#endif +}cl_uchar4; + +/* cl_uchar3 is identical in size, alignment and behavior to cl_uchar4. See section 6.1.5. */ +typedef cl_uchar4 cl_uchar3; + +typedef union +{ + cl_uchar CL_ALIGNED(8) s[8]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_uchar x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_uchar s0, s1, s2, s3, s4, s5, s6, s7; }; + __CL_ANON_STRUCT__ struct{ cl_uchar4 lo, hi; }; +#endif +#if defined( __CL_UCHAR2__) + __cl_uchar2 v2[4]; +#endif +#if defined( __CL_UCHAR4__) + __cl_uchar4 v4[2]; +#endif +#if defined( __CL_UCHAR8__ ) + __cl_uchar8 v8; +#endif +}cl_uchar8; + +typedef union +{ + cl_uchar CL_ALIGNED(16) s[16]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_uchar x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; }; + __CL_ANON_STRUCT__ struct{ cl_uchar s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; }; + __CL_ANON_STRUCT__ struct{ cl_uchar8 lo, hi; }; +#endif +#if defined( __CL_UCHAR2__) + __cl_uchar2 v2[8]; +#endif +#if defined( __CL_UCHAR4__) + __cl_uchar4 v4[4]; +#endif +#if defined( __CL_UCHAR8__ ) + __cl_uchar8 v8[2]; +#endif +#if defined( __CL_UCHAR16__ ) + __cl_uchar16 v16; +#endif +}cl_uchar16; + + +/* ---- cl_shortn ---- */ +typedef union +{ + cl_short CL_ALIGNED(4) s[2]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_short x, y; }; + __CL_ANON_STRUCT__ struct{ cl_short s0, s1; }; + __CL_ANON_STRUCT__ struct{ cl_short lo, hi; }; +#endif +#if defined( __CL_SHORT2__) + __cl_short2 v2; +#endif +}cl_short2; + +typedef union +{ + cl_short CL_ALIGNED(8) s[4]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_short x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_short s0, s1, s2, s3; }; + __CL_ANON_STRUCT__ struct{ cl_short2 lo, hi; }; +#endif +#if defined( __CL_SHORT2__) + __cl_short2 v2[2]; +#endif +#if defined( __CL_SHORT4__) + __cl_short4 v4; +#endif +}cl_short4; + +/* cl_short3 is identical in size, alignment and behavior to cl_short4. See section 6.1.5. */ +typedef cl_short4 cl_short3; + +typedef union +{ + cl_short CL_ALIGNED(16) s[8]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_short x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_short s0, s1, s2, s3, s4, s5, s6, s7; }; + __CL_ANON_STRUCT__ struct{ cl_short4 lo, hi; }; +#endif +#if defined( __CL_SHORT2__) + __cl_short2 v2[4]; +#endif +#if defined( __CL_SHORT4__) + __cl_short4 v4[2]; +#endif +#if defined( __CL_SHORT8__ ) + __cl_short8 v8; +#endif +}cl_short8; + +typedef union +{ + cl_short CL_ALIGNED(32) s[16]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_short x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; }; + __CL_ANON_STRUCT__ struct{ cl_short s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; }; + __CL_ANON_STRUCT__ struct{ cl_short8 lo, hi; }; +#endif +#if defined( __CL_SHORT2__) + __cl_short2 v2[8]; +#endif +#if defined( __CL_SHORT4__) + __cl_short4 v4[4]; +#endif +#if defined( __CL_SHORT8__ ) + __cl_short8 v8[2]; +#endif +#if defined( __CL_SHORT16__ ) + __cl_short16 v16; +#endif +}cl_short16; + + +/* ---- cl_ushortn ---- */ +typedef union +{ + cl_ushort CL_ALIGNED(4) s[2]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_ushort x, y; }; + __CL_ANON_STRUCT__ struct{ cl_ushort s0, s1; }; + __CL_ANON_STRUCT__ struct{ cl_ushort lo, hi; }; +#endif +#if defined( __CL_USHORT2__) + __cl_ushort2 v2; +#endif +}cl_ushort2; + +typedef union +{ + cl_ushort CL_ALIGNED(8) s[4]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_ushort x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_ushort s0, s1, s2, s3; }; + __CL_ANON_STRUCT__ struct{ cl_ushort2 lo, hi; }; +#endif +#if defined( __CL_USHORT2__) + __cl_ushort2 v2[2]; +#endif +#if defined( __CL_USHORT4__) + __cl_ushort4 v4; +#endif +}cl_ushort4; + +/* cl_ushort3 is identical in size, alignment and behavior to cl_ushort4. See section 6.1.5. */ +typedef cl_ushort4 cl_ushort3; + +typedef union +{ + cl_ushort CL_ALIGNED(16) s[8]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_ushort x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_ushort s0, s1, s2, s3, s4, s5, s6, s7; }; + __CL_ANON_STRUCT__ struct{ cl_ushort4 lo, hi; }; +#endif +#if defined( __CL_USHORT2__) + __cl_ushort2 v2[4]; +#endif +#if defined( __CL_USHORT4__) + __cl_ushort4 v4[2]; +#endif +#if defined( __CL_USHORT8__ ) + __cl_ushort8 v8; +#endif +}cl_ushort8; + +typedef union +{ + cl_ushort CL_ALIGNED(32) s[16]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_ushort x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; }; + __CL_ANON_STRUCT__ struct{ cl_ushort s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; }; + __CL_ANON_STRUCT__ struct{ cl_ushort8 lo, hi; }; +#endif +#if defined( __CL_USHORT2__) + __cl_ushort2 v2[8]; +#endif +#if defined( __CL_USHORT4__) + __cl_ushort4 v4[4]; +#endif +#if defined( __CL_USHORT8__ ) + __cl_ushort8 v8[2]; +#endif +#if defined( __CL_USHORT16__ ) + __cl_ushort16 v16; +#endif +}cl_ushort16; + +/* ---- cl_intn ---- */ +typedef union +{ + cl_int CL_ALIGNED(8) s[2]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_int x, y; }; + __CL_ANON_STRUCT__ struct{ cl_int s0, s1; }; + __CL_ANON_STRUCT__ struct{ cl_int lo, hi; }; +#endif +#if defined( __CL_INT2__) + __cl_int2 v2; +#endif +}cl_int2; + +typedef union +{ + cl_int CL_ALIGNED(16) s[4]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_int x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_int s0, s1, s2, s3; }; + __CL_ANON_STRUCT__ struct{ cl_int2 lo, hi; }; +#endif +#if defined( __CL_INT2__) + __cl_int2 v2[2]; +#endif +#if defined( __CL_INT4__) + __cl_int4 v4; +#endif +}cl_int4; + +/* cl_int3 is identical in size, alignment and behavior to cl_int4. See section 6.1.5. */ +typedef cl_int4 cl_int3; + +typedef union +{ + cl_int CL_ALIGNED(32) s[8]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_int x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_int s0, s1, s2, s3, s4, s5, s6, s7; }; + __CL_ANON_STRUCT__ struct{ cl_int4 lo, hi; }; +#endif +#if defined( __CL_INT2__) + __cl_int2 v2[4]; +#endif +#if defined( __CL_INT4__) + __cl_int4 v4[2]; +#endif +#if defined( __CL_INT8__ ) + __cl_int8 v8; +#endif +}cl_int8; + +typedef union +{ + cl_int CL_ALIGNED(64) s[16]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_int x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; }; + __CL_ANON_STRUCT__ struct{ cl_int s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; }; + __CL_ANON_STRUCT__ struct{ cl_int8 lo, hi; }; +#endif +#if defined( __CL_INT2__) + __cl_int2 v2[8]; +#endif +#if defined( __CL_INT4__) + __cl_int4 v4[4]; +#endif +#if defined( __CL_INT8__ ) + __cl_int8 v8[2]; +#endif +#if defined( __CL_INT16__ ) + __cl_int16 v16; +#endif +}cl_int16; + + +/* ---- cl_uintn ---- */ +typedef union +{ + cl_uint CL_ALIGNED(8) s[2]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_uint x, y; }; + __CL_ANON_STRUCT__ struct{ cl_uint s0, s1; }; + __CL_ANON_STRUCT__ struct{ cl_uint lo, hi; }; +#endif +#if defined( __CL_UINT2__) + __cl_uint2 v2; +#endif +}cl_uint2; + +typedef union +{ + cl_uint CL_ALIGNED(16) s[4]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_uint x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_uint s0, s1, s2, s3; }; + __CL_ANON_STRUCT__ struct{ cl_uint2 lo, hi; }; +#endif +#if defined( __CL_UINT2__) + __cl_uint2 v2[2]; +#endif +#if defined( __CL_UINT4__) + __cl_uint4 v4; +#endif +}cl_uint4; + +/* cl_uint3 is identical in size, alignment and behavior to cl_uint4. See section 6.1.5. */ +typedef cl_uint4 cl_uint3; + +typedef union +{ + cl_uint CL_ALIGNED(32) s[8]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_uint x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_uint s0, s1, s2, s3, s4, s5, s6, s7; }; + __CL_ANON_STRUCT__ struct{ cl_uint4 lo, hi; }; +#endif +#if defined( __CL_UINT2__) + __cl_uint2 v2[4]; +#endif +#if defined( __CL_UINT4__) + __cl_uint4 v4[2]; +#endif +#if defined( __CL_UINT8__ ) + __cl_uint8 v8; +#endif +}cl_uint8; + +typedef union +{ + cl_uint CL_ALIGNED(64) s[16]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_uint x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; }; + __CL_ANON_STRUCT__ struct{ cl_uint s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; }; + __CL_ANON_STRUCT__ struct{ cl_uint8 lo, hi; }; +#endif +#if defined( __CL_UINT2__) + __cl_uint2 v2[8]; +#endif +#if defined( __CL_UINT4__) + __cl_uint4 v4[4]; +#endif +#if defined( __CL_UINT8__ ) + __cl_uint8 v8[2]; +#endif +#if defined( __CL_UINT16__ ) + __cl_uint16 v16; +#endif +}cl_uint16; + +/* ---- cl_longn ---- */ +typedef union +{ + cl_long CL_ALIGNED(16) s[2]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_long x, y; }; + __CL_ANON_STRUCT__ struct{ cl_long s0, s1; }; + __CL_ANON_STRUCT__ struct{ cl_long lo, hi; }; +#endif +#if defined( __CL_LONG2__) + __cl_long2 v2; +#endif +}cl_long2; + +typedef union +{ + cl_long CL_ALIGNED(32) s[4]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_long x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_long s0, s1, s2, s3; }; + __CL_ANON_STRUCT__ struct{ cl_long2 lo, hi; }; +#endif +#if defined( __CL_LONG2__) + __cl_long2 v2[2]; +#endif +#if defined( __CL_LONG4__) + __cl_long4 v4; +#endif +}cl_long4; + +/* cl_long3 is identical in size, alignment and behavior to cl_long4. See section 6.1.5. */ +typedef cl_long4 cl_long3; + +typedef union +{ + cl_long CL_ALIGNED(64) s[8]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_long x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_long s0, s1, s2, s3, s4, s5, s6, s7; }; + __CL_ANON_STRUCT__ struct{ cl_long4 lo, hi; }; +#endif +#if defined( __CL_LONG2__) + __cl_long2 v2[4]; +#endif +#if defined( __CL_LONG4__) + __cl_long4 v4[2]; +#endif +#if defined( __CL_LONG8__ ) + __cl_long8 v8; +#endif +}cl_long8; + +typedef union +{ + cl_long CL_ALIGNED(128) s[16]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_long x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; }; + __CL_ANON_STRUCT__ struct{ cl_long s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; }; + __CL_ANON_STRUCT__ struct{ cl_long8 lo, hi; }; +#endif +#if defined( __CL_LONG2__) + __cl_long2 v2[8]; +#endif +#if defined( __CL_LONG4__) + __cl_long4 v4[4]; +#endif +#if defined( __CL_LONG8__ ) + __cl_long8 v8[2]; +#endif +#if defined( __CL_LONG16__ ) + __cl_long16 v16; +#endif +}cl_long16; + + +/* ---- cl_ulongn ---- */ +typedef union +{ + cl_ulong CL_ALIGNED(16) s[2]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_ulong x, y; }; + __CL_ANON_STRUCT__ struct{ cl_ulong s0, s1; }; + __CL_ANON_STRUCT__ struct{ cl_ulong lo, hi; }; +#endif +#if defined( __CL_ULONG2__) + __cl_ulong2 v2; +#endif +}cl_ulong2; + +typedef union +{ + cl_ulong CL_ALIGNED(32) s[4]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_ulong x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_ulong s0, s1, s2, s3; }; + __CL_ANON_STRUCT__ struct{ cl_ulong2 lo, hi; }; +#endif +#if defined( __CL_ULONG2__) + __cl_ulong2 v2[2]; +#endif +#if defined( __CL_ULONG4__) + __cl_ulong4 v4; +#endif +}cl_ulong4; + +/* cl_ulong3 is identical in size, alignment and behavior to cl_ulong4. See section 6.1.5. */ +typedef cl_ulong4 cl_ulong3; + +typedef union +{ + cl_ulong CL_ALIGNED(64) s[8]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_ulong x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_ulong s0, s1, s2, s3, s4, s5, s6, s7; }; + __CL_ANON_STRUCT__ struct{ cl_ulong4 lo, hi; }; +#endif +#if defined( __CL_ULONG2__) + __cl_ulong2 v2[4]; +#endif +#if defined( __CL_ULONG4__) + __cl_ulong4 v4[2]; +#endif +#if defined( __CL_ULONG8__ ) + __cl_ulong8 v8; +#endif +}cl_ulong8; + +typedef union +{ + cl_ulong CL_ALIGNED(128) s[16]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_ulong x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; }; + __CL_ANON_STRUCT__ struct{ cl_ulong s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; }; + __CL_ANON_STRUCT__ struct{ cl_ulong8 lo, hi; }; +#endif +#if defined( __CL_ULONG2__) + __cl_ulong2 v2[8]; +#endif +#if defined( __CL_ULONG4__) + __cl_ulong4 v4[4]; +#endif +#if defined( __CL_ULONG8__ ) + __cl_ulong8 v8[2]; +#endif +#if defined( __CL_ULONG16__ ) + __cl_ulong16 v16; +#endif +}cl_ulong16; + + +/* --- cl_floatn ---- */ + +typedef union +{ + cl_float CL_ALIGNED(8) s[2]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_float x, y; }; + __CL_ANON_STRUCT__ struct{ cl_float s0, s1; }; + __CL_ANON_STRUCT__ struct{ cl_float lo, hi; }; +#endif +#if defined( __CL_FLOAT2__) + __cl_float2 v2; +#endif +}cl_float2; + +typedef union +{ + cl_float CL_ALIGNED(16) s[4]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_float x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_float s0, s1, s2, s3; }; + __CL_ANON_STRUCT__ struct{ cl_float2 lo, hi; }; +#endif +#if defined( __CL_FLOAT2__) + __cl_float2 v2[2]; +#endif +#if defined( __CL_FLOAT4__) + __cl_float4 v4; +#endif +}cl_float4; + +/* cl_float3 is identical in size, alignment and behavior to cl_float4. See section 6.1.5. */ +typedef cl_float4 cl_float3; + +typedef union +{ + cl_float CL_ALIGNED(32) s[8]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_float x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_float s0, s1, s2, s3, s4, s5, s6, s7; }; + __CL_ANON_STRUCT__ struct{ cl_float4 lo, hi; }; +#endif +#if defined( __CL_FLOAT2__) + __cl_float2 v2[4]; +#endif +#if defined( __CL_FLOAT4__) + __cl_float4 v4[2]; +#endif +#if defined( __CL_FLOAT8__ ) + __cl_float8 v8; +#endif +}cl_float8; + +typedef union +{ + cl_float CL_ALIGNED(64) s[16]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_float x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; }; + __CL_ANON_STRUCT__ struct{ cl_float s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; }; + __CL_ANON_STRUCT__ struct{ cl_float8 lo, hi; }; +#endif +#if defined( __CL_FLOAT2__) + __cl_float2 v2[8]; +#endif +#if defined( __CL_FLOAT4__) + __cl_float4 v4[4]; +#endif +#if defined( __CL_FLOAT8__ ) + __cl_float8 v8[2]; +#endif +#if defined( __CL_FLOAT16__ ) + __cl_float16 v16; +#endif +}cl_float16; + +/* --- cl_doublen ---- */ + +typedef union +{ + cl_double CL_ALIGNED(16) s[2]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_double x, y; }; + __CL_ANON_STRUCT__ struct{ cl_double s0, s1; }; + __CL_ANON_STRUCT__ struct{ cl_double lo, hi; }; +#endif +#if defined( __CL_DOUBLE2__) + __cl_double2 v2; +#endif +}cl_double2; + +typedef union +{ + cl_double CL_ALIGNED(32) s[4]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_double x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_double s0, s1, s2, s3; }; + __CL_ANON_STRUCT__ struct{ cl_double2 lo, hi; }; +#endif +#if defined( __CL_DOUBLE2__) + __cl_double2 v2[2]; +#endif +#if defined( __CL_DOUBLE4__) + __cl_double4 v4; +#endif +}cl_double4; + +/* cl_double3 is identical in size, alignment and behavior to cl_double4. See section 6.1.5. */ +typedef cl_double4 cl_double3; + +typedef union +{ + cl_double CL_ALIGNED(64) s[8]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_double x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_double s0, s1, s2, s3, s4, s5, s6, s7; }; + __CL_ANON_STRUCT__ struct{ cl_double4 lo, hi; }; +#endif +#if defined( __CL_DOUBLE2__) + __cl_double2 v2[4]; +#endif +#if defined( __CL_DOUBLE4__) + __cl_double4 v4[2]; +#endif +#if defined( __CL_DOUBLE8__ ) + __cl_double8 v8; +#endif +}cl_double8; + +typedef union +{ + cl_double CL_ALIGNED(128) s[16]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_double x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; }; + __CL_ANON_STRUCT__ struct{ cl_double s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; }; + __CL_ANON_STRUCT__ struct{ cl_double8 lo, hi; }; +#endif +#if defined( __CL_DOUBLE2__) + __cl_double2 v2[8]; +#endif +#if defined( __CL_DOUBLE4__) + __cl_double4 v4[4]; +#endif +#if defined( __CL_DOUBLE8__ ) + __cl_double8 v8[2]; +#endif +#if defined( __CL_DOUBLE16__ ) + __cl_double16 v16; +#endif +}cl_double16; + +/* Macro to facilitate debugging + * Usage: + * Place CL_PROGRAM_STRING_DEBUG_INFO on the line before the first line of your source. + * The first line ends with: CL_PROGRAM_STRING_DEBUG_INFO \" + * Each line thereafter of OpenCL C source must end with: \n\ + * The last line ends in "; + * + * Example: + * + * const char *my_program = CL_PROGRAM_STRING_DEBUG_INFO "\ + * kernel void foo( int a, float * b ) \n\ + * { \n\ + * // my comment \n\ + * *b[ get_global_id(0)] = a; \n\ + * } \n\ + * "; + * + * This should correctly set up the line, (column) and file information for your source + * string so you can do source level debugging. + */ +#define __CL_STRINGIFY( _x ) # _x +#define _CL_STRINGIFY( _x ) __CL_STRINGIFY( _x ) +#define CL_PROGRAM_STRING_DEBUG_INFO "#line " _CL_STRINGIFY(__LINE__) " \"" __FILE__ "\" \n\n" + +#ifdef __cplusplus +} +#endif + +#undef __CL_HAS_ANON_STRUCT__ +#undef __CL_ANON_STRUCT__ +#if defined( _WIN32) && (_MSC_VER >= 1500) +#pragma warning( pop ) +#endif + +#endif /* __CL_PLATFORM_H */ diff --git a/Makefile b/Makefile index 3821a9c..faada7d 100644 --- a/Makefile +++ b/Makefile @@ -1,21 +1,23 @@ -INCLUDEPATHS=-I${ATISTREAMSDKROOT}/include/ -L${ATISTREAMSDKROOT}/ +INCLUDEPATHS=-I. -LIBS=-lOpenCL -ljansson -lcurl +LIBS=-lOpenCL -ljansson -lcurl -lpthread +# LDFLAGS = -L$(AMDAPPSDKROOT)/lib/x86_64 +LDFLAGS = -L/usr/local/cuda/lib64 DEFS= DEBUGFLAGS=-g CFLAGS=-O3 -Wformat $(DEBUGFLAGS) $(DEFS) $(INCLUDEPATHS) HEADERS= -OBJS=miner.o ocl.o findnonce.o util.o +OBJS=miner.o ocl.o util.o streebog.o -all: oclminer +all: gostoclminer %.o: %.c $(HEADERS) gcc -c $(CFLAGS) -o $@ $< -oclminer: $(OBJS) - gcc $(CFLAGS) -o $@ $^ $(LIBS) +gostoclminer: $(OBJS) + gcc $(LDFLAGS) -o $@ $^ $(LIBS) clean: - -rm *.o oclminer + -rm *.o gostoclminer diff --git a/gostminer.cl b/gostminer.cl new file mode 100644 index 0000000..59c880a --- /dev/null +++ b/gostminer.cl @@ -0,0 +1,939 @@ +#define WGS __attribute__((reqd_work_group_size(256, 1, 1))) + +typedef unsigned int sph_u32; +typedef int sph_s32; +#ifndef __OPENCL_VERSION__ +typedef unsigned long long sph_u64; +typedef long long sph_s64; +#else +typedef unsigned long sph_u64; +typedef long sph_s64; +#endif + +#define SWAP4(x) as_uint(as_uchar4(x).wzyx) +#define SWAP8(x) as_ulong(as_uchar8(x).s76543210) + +inline uint LODWORD(const sph_u64 x) +{ + return (uint)(x & 0xFFFFFFFF); +} + +inline uint HIDWORD(const sph_u64 x) +{ + return (uint)(x >> 32); +} + +// Tables for function F +__constant static sph_u64 T0[256] = { + 0xE6F87E5C5B711FD0, 0x258377800924FA16, 0xC849E07E852EA4A8, 0x5B4686A18F06C16A, + 0x0B32E9A2D77B416E, 0xABDA37A467815C66, 0xF61796A81A686676, 0xF5DC0B706391954B, + 0x4862F38DB7E64BF1, 0xFF5C629A68BD85C5, 0xCB827DA6FCD75795, 0x66D36DAF69B9F089, + 0x356C9F74483D83B0, 0x7CBCECB1238C99A1, 0x36A702AC31C4708D, 0x9EB6A8D02FBCDFD6, + 0x8B19FA51E5B3AE37, 0x9CCFB5408A127D0B, 0xBC0C78B508208F5A, 0xE533E3842288ECED, + 0xCEC2C7D377C15FD2, 0xEC7817B6505D0F5E, 0xB94CC2C08336871D, 0x8C205DB4CB0B04AD, + 0x763C855B28A0892F, 0x588D1B79F6FF3257, 0x3FECF69E4311933E, 0x0FC0D39F803A18C9, + 0xEE010A26F5F3AD83, 0x10EFE8F4411979A6, 0x5DCDA10C7DE93A10, 0x4A1BEE1D1248E92C, + 0x53BFF2DB21847339, 0xB4F50CCFA6A23D09, 0x5FB4BC9CD84798CD, 0xE88A2D8B071C56F9, + 0x7F7771695A756A9C, 0xC5F02E71A0BA1EBC, 0xA663F9AB4215E672, 0x2EB19E22DE5FBB78, + 0x0DB9CE0F2594BA14, 0x82520E6397664D84, 0x2F031E6A0208EA98, 0x5C7F2144A1BE6BF0, + 0x7A37CB1CD16362DB, 0x83E08E2B4B311C64, 0xCF70479BAB960E32, 0x856BA986B9DEE71E, + 0xB5478C877AF56CE9, 0xB8FE42885F61D6FD, 0x1BDD0156966238C8, 0x622157923EF8A92E, + 0xFC97FF42114476F8, 0x9D7D350856452CEB, 0x4C90C9B0E0A71256, 0x2308502DFBCB016C, + 0x2D7A03FAA7A64845, 0xF46E8B38BFC6C4AB, 0xBDBEF8FDD477DEBA, 0x3AAC4CEBC8079B79, + 0xF09CB105E8879D0C, 0x27FA6A10AC8A58CB, 0x8960E7C1401D0CEA, 0x1A6F811E4A356928, + 0x90C4FB0773D196FF, 0x43501A2F609D0A9F, 0xF7A516E0C63F3796, 0x1CE4A6B3B8DA9252, + 0x1324752C38E08A9B, 0xA5A864733BEC154F, 0x2BF124575549B33F, 0xD766DB15440DC5C7, + 0xA7D179E39E42B792, 0xDADF151A61997FD3, 0x86A0345EC0271423, 0x38D5517B6DA939A4, + 0x6518F077104003B4, 0x02791D90A5AEA2DD, 0x88D267899C4A5D0A, 0x930F66DF0A2865C2, + 0x4EE9D4204509B08B, 0x325538916685292A, 0x412907BFC533A842, 0xB27E2B62544DC673, + 0x6C5304456295E007, 0x5AF406E95351908A, 0x1F2F3B6BC123616F, 0xC37B09DC5255E5C6, + 0x3967D133B1FE6844, 0x298839C7F0E711E2, 0x409B87F71964F9A2, 0xE938ADC3DB4B0719, + 0x0C0B4E47F9C3EBF4, 0x5534D576D36B8843, 0x4610A05AEB8B02D8, 0x20C3CDF58232F251, + 0x6DE1840DBEC2B1E7, 0xA0E8DE06B0FA1D08, 0x7B854B540D34333B, 0x42E29A67BCCA5B7F, + 0xD8A6088AC437DD0E, 0xC63BB3A9D943ED81, 0x21714DBD5E65A3B1, 0x6761EDE7B5EEA169, + 0x2431F7C8D573ABF6, 0xD51FC685E1A3671A, 0x5E063CD40410C92D, 0x283AB98F2CB04002, + 0x8FEBC06CB2F2F790, 0x17D64F116FA1D33C, 0xE07359F1A99EE4AA, 0x784ED68C74CDC006, + 0x6E2A19D5C73B42DA, 0x8712B4161C7045C3, 0x371582E4ED93216D, 0xACE390414939F6FC, + 0x7EC5F12186223B7C, 0xC0B094042BAC16FB, 0xF9D745379A527EBF, 0x737C3F2EA3B68168, + 0x33E7B8D9BAD278CA, 0xA9A32A34C22FFEBB, 0xE48163CCFEDFBD0D, 0x8E5940246EA5A670, + 0x51C6EF4B842AD1E4, 0x22BAD065279C508C, 0xD91488C218608CEE, 0x319EA5491F7CDA17, + 0xD394E128134C9C60, 0x094BF43272D5E3B3, 0x9BF612A5A4AAD791, 0xCCBBDA43D26FFD0F, + 0x34DE1F3C946AD250, 0x4F5B5468995EE16B, 0xDF9FAF6FEA8F7794, 0x2648EA5870DD092B, + 0xBFC7E56D71D97C67, 0xDDE6B2FF4F21D549, 0x3C276B463AE86003, 0x91767B4FAF86C71F, + 0x68A13E7835D4B9A0, 0xB68C115F030C9FD4, 0x141DD2C916582001, 0x983D8F7DDD5324AC, + 0x64AA703FCC175254, 0xC2C989948E02B426, 0x3E5E76D69F46C2DE, 0x50746F03587D8004, + 0x45DB3D829272F1E5, 0x60584A029B560BF3, 0xFBAE58A73FFCDC62, 0xA15A5E4E6CAD4CE8, + 0x4BA96E55CE1FB8CC, 0x08F9747AAE82B253, 0xC102144CF7FB471B, 0x9F042898F3EB8E36, + 0x068B27ADF2EFFB7A, 0xEDCA97FE8C0A5EBE, 0x778E0513F4F7D8CF, 0x302C2501C32B8BF7, + 0x8D92DDFC175C554D, 0xF865C57F46052F5F, 0xEAF3301BA2B2F424, 0xAA68B7ECBBD60D86, + 0x998F0F350104754C, 0x0000000000000000, 0xF12E314D34D0CCEC, 0x710522BE061823B5, + 0xAF280D9930C005C1, 0x97FD5CE25D693C65, 0x19A41CC633CC9A15, 0x95844172F8C79EB8, + 0xDC5432B7937684A9, 0x9436C13A2490CF58, 0x802B13F332C8EF59, 0xC442AE397CED4F5C, + 0xFA1CD8EFE3AB8D82, 0xF2E5AC954D293FD1, 0x6AD823E8907A1B7D, 0x4D2249F83CF043B6, + 0x03CB9DD879F9F33D, 0xDE2D2F2736D82674, 0x2A43A41F891EE2DF, 0x6F98999D1B6C133A, + 0xD4AD46CD3DF436FA, 0xBB35DF50269825C0, 0x964FDCAA813E6D85, 0xEB41B0537EE5A5C4, + 0x0540BA758B160847, 0xA41AE43BE7BB44AF, 0xE3B8C429D0671797, 0x819993BBEE9FBEB9, + 0xAE9A8DD1EC975421, 0xF3572CDD917E6E31, 0x6393D7DAE2AFF8CE, 0x47A2201237DC5338, + 0xA32343DEC903EE35, 0x79FC56C4A89A91E6, 0x01B28048DC5751E0, 0x1296F564E4B7DB7B, + 0x75F7188351597A12, 0xDB6D9552BDCE2E33, 0x1E9DBB231D74308F, 0x520D7293FDD322D9, + 0xE20A44610C304677, 0xFEEEE2D2B4EAD425, 0xCA30FDEE20800675, 0x61EACA4A47015A13, + 0xE74AFE1487264E30, 0x2CC883B27BF119A5, 0x1664CF59B3F682DC, 0xA811AA7C1E78AF5B, + 0x1D5626FB648DC3B2, 0xB73E9117DF5BCE34, 0xD05F7CF06AB56F5D, 0xFD257F0ACD132718, + 0x574DC8E676C52A9E, 0x0739A7E52EB8AA9A, 0x5486553E0F3CD9A3, 0x56FF48AEAA927B7E, + 0xBE756525AD8E2D87, 0x7D0E6CF9FFDBC841, 0x3B1ECCA31450CA99, 0x6913BE30E983E840, + 0xAD511009956EA71C, 0xB1B5B6BA2DB4354E, 0x4469BDCA4E25A005, 0x15AF5281CA0F71E1, + 0x744598CB8D0E2BF2, 0x593F9B312AA863B7, 0xEFB38A6E29A4FC63, 0x6B6AA3A04C2D4A9D, + 0x3D95EB0EE6BF31E3, 0xA291C3961554BFD5, 0x18169C8EEF9BCBF5, 0x115D68BC9D4E2846, + 0xBA875F18FACF7420, 0xD1EDFCB8B6E23EBD, 0xB00736F2F1E364AE, 0x84D929CE6589B6FE, + 0x70B7A2F6DA4F7255, 0x0E7253D75C6D4929, 0x04F23A3D574159A7, 0x0A8069EA0B2C108E, + 0x49D073C56BB11A11, 0x8AAB7A1939E4FFD7, 0xCD095A0B0E38ACEF, 0xC9FB60365979F548, + 0x92BDE697D67F3422, 0xC78933E10514BC61, 0xE1C1D9B975C9B54A, 0xD2266160CF1BCD80, + 0x9A4492ED78FD8671, 0xB3CCAB2A881A9793, 0x72CEBF667FE1D088, 0xD6D45B5D985A9427 +}; +__constant static sph_u64 T1[256] = { + 0xC811A8058C3F55DE, 0x65F5B43196B50619, 0xF74F96B1D6706E43, 0x859D1E8BCB43D336, + 0x5AAB8A85CCFA3D84, 0xF9C7BF99C295FCFD, 0xA21FD5A1DE4B630F, 0xCDB3EF763B8B456D, + 0x803F59F87CF7C385, 0xB27C73BE5F31913C, 0x98E3AC6633B04821, 0xBF61674C26B8F818, + 0x0FFBC995C4C130C8, 0xAAA0862010761A98, 0x6057F342210116AA, 0xF63C760C0654CC35, + 0x2DDB45CC667D9042, 0xBCF45A964BD40382, 0x68E8A0C3EF3C6F3D, 0xA7BD92D269FF73BC, + 0x290AE20201ED2287, 0xB7DE34CDE885818F, 0xD901EEA7DD61059B, 0xD6FA273219A03553, + 0xD56F1AE874CCCEC9, 0xEA31245C2E83F554, 0x7034555DA07BE499, 0xCE26D2AC56E7BEF7, + 0xFD161857A5054E38, 0x6A0E7DA4527436D1, 0x5BD86A381CDE9FF2, 0xCAF7756231770C32, + 0xB09AAED9E279C8D0, 0x5DEF1091C60674DB, 0x111046A2515E5045, 0x23536CE4729802FC, + 0xC50CBCF7F5B63CFA, 0x73A16887CD171F03, 0x7D2941AFD9F28DBD, 0x3F5E3EB45A4F3B9D, + 0x84EEFE361B677140, 0x3DB8E3D3E7076271, 0x1A3A28F9F20FD248, 0x7EBC7C75B49E7627, + 0x74E5F293C7EB565C, 0x18DCF59E4F478BA4, 0x0C6EF44FA9ADCB52, 0xC699812D98DAC760, + 0x788B06DC6E469D0E, 0xFC65F8EA7521EC4E, 0x30A5F7219E8E0B55, 0x2BEC3F65BCA57B6B, + 0xDDD04969BAF1B75E, 0x99904CDBE394EA57, 0x14B201D1E6EA40F6, 0xBBB0C08241284ADD, + 0x50F20463BF8F1DFF, 0xE8D7F93B93CBACB8, 0x4D8CB68E477C86E8, 0xC1DD1B3992268E3F, + 0x7C5AA11209D62FCB, 0x2F3D98ABDB35C9AE, 0x671369562BFD5FF5, 0x15C1E16C36CEE280, + 0x1D7EB2EDF8F39B17, 0xDA94D37DB00DFE01, 0x877BC3EC760B8ADA, 0xCB8495DFE153AE44, + 0x05A24773B7B410B3, 0x12857B783C32ABDF, 0x8EB770D06812513B, 0x536739B9D2E3E665, + 0x584D57E271B26468, 0xD789C78FC9849725, 0xA935BBFA7D1AE102, 0x8B1537A3DFA64188, + 0xD0CD5D9BC378DE7A, 0x4AC82C9A4D80CFB7, 0x42777F1B83BDB620, 0x72D2883A1D33BD75, + 0x5E7A2D4BAB6A8F41, 0xF4DAAB6BBB1C95D9, 0x905CFFE7FD8D31B6, 0x83AA6422119B381F, + 0xC0AEFB8442022C49, 0xA0F908C663033AE3, 0xA428AF0804938826, 0xADE41C341A8A53C7, + 0xAE7121EE77E6A85D, 0xC47F5C4A25929E8C, 0xB538E9AA55CDD863, 0x06377AA9DAD8EB29, + 0xA18AE87BB3279895, 0x6EDFDA6A35E48414, 0x6B7D9D19825094A7, 0xD41CFA55A4E86CBF, + 0xE5CAEDC9EA42C59C, 0xA36C351C0E6FC179, 0x5181E4DE6FABBF89, 0xFFF0C530184D17D4, + 0x9D41EB1584045892, 0x1C0D525028D73961, 0xF178EC180CA8856A, 0x9A0571018EF811CD, + 0x4091A27C3EF5EFCC, 0x19AF15239F6329D2, 0x347450EFF91EB990, 0xE11B4A078DD27759, + 0xB9561DE5FC601331, 0x912F1F5A2DA993C0, 0x1654DCB65BA2191A, 0x3E2DDE098A6B99EB, + 0x8A66D71E0F82E3FE, 0x8C51ADB7D55A08D7, 0x4533E50F8941FF7F, 0x02E6DD67BD4859EC, + 0xE068AABA5DF6D52F, 0xC24826E3FF4A75A5, 0x6C39070D88ACDDF8, 0x6486548C4691A46F, + 0xD1BEBD26135C7C0C, 0xB30F93038F15334A, 0x82D9849FC1BF9A69, 0x9C320BA85420FAE4, + 0xFA528243AFF90767, 0x9ED4D6CFE968A308, 0xB825FD582C44B147, 0x9B7691BC5EDCB3BB, + 0xC7EA619048FE6516, 0x1063A61F817AF233, 0x47D538683409A693, 0x63C2CE984C6DED30, + 0x2A9FDFD86C81D91D, 0x7B1E3B06032A6694, 0x666089EBFBD9FD83, 0x0A598EE67375207B, + 0x07449A140AFC495F, 0x2CA8A571B6593234, 0x1F986F8A45BBC2FB, 0x381AA4A050B372C2, + 0x5423A3ADD81FAF3A, 0x17273C0B8B86BB6C, 0xFE83258DC869B5A2, 0x287902BFD1C980F1, + 0xF5A94BD66B3837AF, 0x88800A79B2CABA12, 0x55504310083B0D4C, 0xDF36940E07B9EEB2, + 0x04D1A7CE6790B2C5, 0x612413FFF125B4DC, 0x26F12B97C52C124F, 0x86082351A62F28AC, + 0xEF93632F9937E5E7, 0x3507B052293A1BE6, 0xE72C30AE570A9C70, 0xD3586041AE1425E0, + 0xDE4574B3D79D4CC4, 0x92BA228040C5685A, 0xF00B0CA5DC8C271C, 0xBE1287F1F69C5A6E, + 0xF39E317FB1E0DC86, 0x495D114020EC342D, 0x699B407E3F18CD4B, 0xDCA3A9D46AD51528, + 0x0D1D14F279896924, 0x0000000000000000, 0x593EB75FA196C61E, 0x2E4E78160B116BD8, + 0x6D4AE7B058887F8E, 0xE65FD013872E3E06, 0x7A6DDBBBD30EC4E2, 0xAC97FC89CAAEF1B1, + 0x09CCB33C1E19DBE1, 0x89F3EAC462EE1864, 0x7770CF49AA87ADC6, 0x56C57ECA6557F6D6, + 0x03953DDA6D6CFB9A, 0x36928D884456E07C, 0x1EEB8F37959F608D, 0x31D6179C4EAAA923, + 0x6FAC3AD7E5C02662, 0x43049FA653991456, 0xABD3669DC052B8EE, 0xAF02C153A7C20A2B, + 0x3CCB036E3723C007, 0x93C9C23D90E1CA2C, 0xC33BC65E2F6ED7D3, 0x4CFF56339758249E, + 0xB1E94E64325D6AA6, 0x37E16D359472420A, 0x79F8E661BE623F78, 0x5214D90402C74413, + 0x482EF1FDF0C8965B, 0x13F69BC5EC1609A9, 0x0E88292814E592BE, 0x4E198B542A107D72, + 0xCCC00FCBEBAFE71B, 0x1B49C844222B703E, 0x2564164DA840E9D5, 0x20C6513E1FF4F966, + 0xBAC3203F910CE8AB, 0xF2EDD1C261C47EF0, 0x814CB945ACD361F3, 0x95FEB8944A392105, + 0x5C9CF02C1622D6AD, 0x971865F3F77178E9, 0xBD87BA2B9BF0A1F4, 0x444005B259655D09, + 0xED75BE48247FBC0B, 0x7596122E17CFF42A, 0xB44B091785E97A15, 0x966B854E2755DA9F, + 0xEEE0839249134791, 0x32432A4623C652B9, 0xA8465B47AD3E4374, 0xF8B45F2412B15E8B, + 0x2417F6F078644BA3, 0xFB2162FE7FDDA511, 0x4BBBCC279DA46DC1, 0x0173E0BDD024A276, + 0x22208C59A2BCA08A, 0x8FC4906DB836F34D, 0xE4B90D743A6667EA, 0x7147B5E0705F46EF, + 0x2782CB2A1508B039, 0xEC065EF5F45B1E7D, 0x21B5B183CFD05B10, 0xDBE733C060295C77, + 0x9FA73672394C017E, 0xCF55321186C31C81, 0xD8720E1A0D45A7ED, 0x3B8F997A3DDF8958, + 0x3AFC79C7EDFB2B2E, 0xE9A4198643EF0ECE, 0x5F09CDF67B4E2D37, 0x4F6A6BE9FA34DF04, + 0xB6ADD47038A123F9, 0x8D224D0A057EAAA1, 0xC96248B85C1BF7A8, 0xE3FD9760309A2EB5, + 0x0B2A6E5BA351820D, 0xEB42C4E1FEA75722, 0x948D58299A1D8373, 0x7FCF9CC864BAD451, + 0xA55B4FB5D4B72A50, 0x08BF5381CE3D7997, 0x46A6D8D5E42D04E5, 0xD22B80FC7E308796, + 0x57B69E77B57354A0, 0x3969441D8097D0B4, 0x3330CAFBF3E2F0CF, 0xE28E77DDE0BE8CC3, + 0x62B12E259C494F46, 0xA6CE726FB9DBD1CA, 0x41E242C1EED14DBA, 0x76032FF47AA30FB0 +}; +__constant static sph_u64 T2[256] = { + 0x45B268A93ACDE4CC, 0xAF7F0BE884549D08, 0x048354B3C1468263, 0x925435C2C80EFED2, + 0xEE4E37F27FDFFBA7, 0x167A33920C60F14D, 0xFB123B52EA03E584, 0x4A0CAB53FDBB9007, + 0x9DEAF6380F788A19, 0xCB48EC558F0CB32A, 0xB59DC4B2D6FEF7E0, 0xDCDBCA22F4F3ECB6, + 0x11DF5813549A9C40, 0xE33FDEDF568ACED3, 0xA0C1C8124322E9C3, 0x07A56B8158FA6D0D, + 0x77279579B1E1F3DD, 0xD9B18B74422AC004, 0xB8EC2D9FFFABC294, 0xF4ACF8A82D75914F, + 0x7BBF69B1EF2B6878, 0xC4F62FAF487AC7E1, 0x76CE809CC67E5D0C, 0x6711D88F92E4C14C, + 0x627B99D9243DEDFE, 0x234AA5C3DFB68B51, 0x909B1F15262DBF6D, 0x4F66EA054B62BCB5, + 0x1AE2CF5A52AA6AE8, 0xBEA053FBD0CE0148, 0xED6808C0E66314C9, 0x43FE16CD15A82710, + 0xCD049231A06970F6, 0xE7BC8A6C97CC4CB0, 0x337CE835FCB3B9C0, 0x65DEF2587CC780F3, + 0x52214EDE4132BB50, 0x95F15E4390F493DF, 0x870839625DD2E0F1, 0x41313C1AFB8B66AF, + 0x91720AF051B211BC, 0x477D427ED4EEA573, 0x2E3B4CEEF6E3BE25, 0x82627834EB0BCC43, + 0x9C03E3DD78E724C8, 0x2877328AD9867DF9, 0x14B51945E243B0F2, 0x574B0F88F7EB97E2, + 0x88B6FA989AA4943A, 0x19C4F068CB168586, 0x50EE6409AF11FAEF, 0x7DF317D5C04EABA4, + 0x7A567C5498B4C6A9, 0xB6BBFB804F42188E, 0x3CC22BCF3BC5CD0B, 0xD04336EAAA397713, + 0xF02FAC1BEC33132C, 0x2506DBA7F0D3488D, 0xD7E65D6BF2C31A1E, 0x5EB9B2161FF820F5, + 0x842E0650C46E0F9F, 0x716BEB1D9E843001, 0xA933758CAB315ED4, 0x3FE414FDA2792265, + 0x27C9F1701EF00932, 0x73A4C1CA70A771BE, 0x94184BA6E76B3D0E, 0x40D829FF8C14C87E, + 0x0FBEC3FAC77674CB, 0x3616A9634A6A9572, 0x8F139119C25EF937, 0xF545ED4D5AEA3F9E, + 0xE802499650BA387B, 0x6437E7BD0B582E22, 0xE6559F89E053E261, 0x80AD52E305288DFC, + 0x6DC55A23E34B9935, 0xDE14E0F51AD0AD09, 0xC6390578A659865E, 0x96D7617109487CB1, + 0xE2D6CB3A21156002, 0x01E915E5779FAED1, 0xADB0213F6A77DCB7, 0x9880B76EB9A1A6AB, + 0x5D9F8D248644CF9B, 0xFD5E4536C5662658, 0xF1C6B9FE9BACBDFD, 0xEACD6341BE9979C4, + 0xEFA7221708405576, 0x510771ECD88E543E, 0xC2BA51CB671F043D, 0x0AD482AC71AF5879, + 0xFE787A045CDAC936, 0xB238AF338E049AED, 0xBD866CC94972EE26, 0x615DA6EBBD810290, + 0x3295FDD08B2C1711, 0xF834046073BF0AEA, 0xF3099329758FFC42, 0x1CAEB13E7DCFA934, + 0xBA2307481188832B, 0x24EFCE42874CE65C, 0x0E57D61FB0E9DA1A, 0xB3D1BAD6F99B343C, + 0xC0757B1C893C4582, 0x2B510DB8403A9297, 0x5C7698C1F1DB614A, 0x3E0D0118D5E68CB4, + 0xD60F488E855CB4CF, 0xAE961E0DF3CB33D9, 0x3A8E55AB14A00ED7, 0x42170328623789C1, + 0x838B6DD19C946292, 0x895FEF7DED3B3AEB, 0xCFCBB8E64E4A3149, 0x064C7E642F65C3DC, + 0x3D2B3E2A4C5A63DA, 0x5BD3F340A9210C47, 0xB474D157A1615931, 0xAC5934DA1DE87266, + 0x6EE365117AF7765B, 0xC86ED36716B05C44, 0x9BA6885C201D49C5, 0xB905387A88346C45, + 0x131072C4BAB9DDFF, 0xBF49461EA751AF99, 0xD52977BC1CE05BA1, 0xB0F785E46027DB52, + 0x546D30BA6E57788C, 0x305AD707650F56AE, 0xC987C682612FF295, 0xA5AB8944F5FBC571, + 0x7ED528E759F244CA, 0x8DDCBBCE2C7DB888, 0xAA154ABE328DB1BA, 0x1E619BE993ECE88B, + 0x09F2BD9EE813B717, 0x7401AA4B285D1CB3, 0x21858F143195CAEE, 0x48C381841398D1B8, + 0xFCB750D3B2F98889, 0x39A86A998D1CE1B9, 0x1F888E0CE473465A, 0x7899568376978716, + 0x02CF2AD7EE2341BF, 0x85C713B5B3F1A14E, 0xFF916FE12B4567E7, 0x7C1A0230B7D10575, + 0x0C98FCC85ECA9BA5, 0xA3E7F720DA9E06AD, 0x6A6031A2BBB1F438, 0x973E74947ED7D260, + 0x2CF4663918C0FF9A, 0x5F50A7F368678E24, 0x34D983B4A449D4CD, 0x68AF1B755592B587, + 0x7F3C3D022E6DEA1B, 0xABFC5F5B45121F6B, 0x0D71E92D29553574, 0xDFFDF5106D4F03D8, + 0x081BA87B9F8C19C6, 0xDB7EA1A3AC0981BB, 0xBBCA12AD66172DFA, 0x79704366010829C7, + 0x179326777BFF5F9C, 0x0000000000000000, 0xEB2476A4C906D715, 0x724DD42F0738DF6F, + 0xB752EE6538DDB65F, 0x37FFBC863DF53BA3, 0x8EFA84FCB5C157E6, 0xE9EB5C73272596AA, + 0x1B0BDABF2535C439, 0x86E12C872A4D4E20, 0x9969A28BCE3E087A, 0xFAFB2EB79D9C4B55, + 0x056A4156B6D92CB2, 0x5A3AE6A5DEBEA296, 0x22A3B026A8292580, 0x53C85B3B36AD1581, + 0xB11E900117B87583, 0xC51F3A4A3FE56930, 0xE019E1EDCF3621BD, 0xEC811D2591FCBA18, + 0x445B7D4C4D524A1D, 0xA8DA6069DCAEF005, 0x58F5CC72309DE329, 0xD4C062596B7FF570, + 0xCE22AD0339D59F98, 0x591CD99747024DF8, 0x8B90C5AA03187B54, 0xF663D27FC356D0F0, + 0xD8589E9135B56ED5, 0x35309651D3D67A1C, 0x12F96721CD26732E, 0xD28C1C3D441A36AC, + 0x492A946164077F69, 0x2D1D73DC6F5F514B, 0x6F0A70F40D68D88A, 0x60B4B30ECA1EAC41, + 0xD36509D83385987D, 0x0B3D97490630F6A8, 0x9ECCC90A96C46577, 0xA20EE2C5AD01A87C, + 0xE49AB55E0E70A3DE, 0xA4429CA182646BA0, 0xDA97B446DB962F6A, 0xCCED87D4D7F6DE27, + 0x2AB8185D37A53C46, 0x9F25DCEFE15BCBA6, 0xC19C6EF9FEA3EB53, 0xA764A3931BD884CE, + 0x2FD2590B817C10F4, 0x56A21A6D80743933, 0xE573A0BB79EF0D0F, 0x155C0CA095DC1E23, + 0x6C2C4FC694D437E4, 0x10364DF623053291, 0xDD32DFC7836C4267, 0x03263F3299BCEF6E, + 0x66F8CD6AE57B6F9D, 0x8C35AE2B5BE21659, 0x31B3C2E21290F87F, 0x93BD2027BF915003, + 0x69460E90220D1B56, 0x299E276FAE19D328, 0x63928C3C53A2432F, 0x7082FEF8E91B9ED0, + 0xBC6F792C3EED40F7, 0x4C40D537D2DE53DB, 0x75E8BFAE5FC2B262, 0x4DA9C0D2A541FD0A, + 0x4E8FFFE03CFD1264, 0x2620E495696FA7E3, 0xE1F0F408B8A98F6C, 0xD1AA230FDDA6D9C2, + 0xC7D0109DD1C6288F, 0x8A79D04F7487D585, 0x4694579BA3710BA2, 0x38417F7CFA834F68, + 0x1D47A4DB0A5007E5, 0x206C9AF1460A643F, 0xA128DDF734BD4712, 0x8144470672B7232D, + 0xF2E086CC02105293, 0x182DE58DBC892B57, 0xCAA1F9B0F8931DFB, 0x6B892447CC2E5AE9, + 0xF9DD11850420A43B, 0x4BE5BEB68A243ED6, 0x5584255F19C8D65D, 0x3B67404E633FA006, + 0xA68DB6766C472A1F, 0xF78AC79AB4C97E21, 0xC353442E1080AAEC, 0x9A4F9DB95782E714 +}; +__constant static sph_u64 T3[256] = { + 0x05BA7BC82C9B3220, 0x31A54665F8B65E4F, 0xB1B651F77547F4D4, 0x8BFA0D857BA46682, + 0x85A96C5AA16A98BB, 0x990FAEF908EB79C9, 0xA15E37A247F4A62D, 0x76857DCD5D27741E, + 0xF8C50B800A1820BC, 0xBE65DCB201F7A2B4, 0x666D1B986F9426E7, 0x4CC921BF53C4E648, + 0x95410A0F93D9CA42, 0x20CDCCAA647BA4EF, 0x429A4060890A1871, 0x0C4EA4F69B32B38B, + 0xCCDA362DDE354CD3, 0x96DC23BC7C5B2FA9, 0xC309BB68AA851AB3, 0xD26131A73648E013, + 0x021DC52941FC4DB2, 0xCD5ADAB7704BE48A, 0xA77965D984ED71E6, 0x32386FD61734BBA4, + 0xE82D6DD538AB7245, 0x5C2147EA6177B4B1, 0x5DA1AB70CF091CE8, 0xAC907FCE72B8BDFF, + 0x57C85DFD972278A8, 0xA4E44C6A6B6F940D, 0x3851995B4F1FDFE4, 0x62578CCAED71BC9E, + 0xD9882BB0C01D2C0A, 0x917B9D5D113C503B, 0xA2C31E11A87643C6, 0xE463C923A399C1CE, + 0xF71686C57EA876DC, 0x87B4A973E096D509, 0xAF0D567D9D3A5814, 0xB40C2A3F59DCC6F4, + 0x3602F88495D121DD, 0xD3E1DD3D9836484A, 0xF945E71AA46688E5, 0x7518547EB2A591F5, + 0x9366587450C01D89, 0x9EA81018658C065B, 0x4F54080CBC4603A3, 0x2D0384C65137BF3D, + 0xDC325078EC861E2A, 0xEA30A8FC79573FF7, 0x214D2030CA050CB6, 0x65F0322B8016C30C, + 0x69BE96DD1B247087, 0xDB95EE9981E161B8, 0xD1FC1814D9CA05F8, 0x820ED2BBCC0DE729, + 0x63D76050430F14C7, 0x3BCCB0E8A09D3A0F, 0x8E40764D573F54A2, 0x39D175C1E16177BD, + 0x12F5A37C734F1F4B, 0xAB37C12F1FDFC26D, 0x5648B167395CD0F1, 0x6C04ED1537BF42A7, + 0xED97161D14304065, 0x7D6C67DAAB72B807, 0xEC17FA87BA4EE83C, 0xDFAF79CB0304FBC1, + 0x733F060571BC463E, 0x78D61C1287E98A27, 0xD07CF48E77B4ADA1, 0xB9C262536C90DD26, + 0xE2449B5860801605, 0x8FC09AD7F941FCFB, 0xFAD8CEA94BE46D0E, 0xA343F28B0608EB9F, + 0x9B126BD04917347B, 0x9A92874AE7699C22, 0x1B017C42C4E69EE0, 0x3A4C5C720EE39256, + 0x4B6E9F5E3EA399DA, 0x6BA353F45AD83D35, 0xE7FEE0904C1B2425, 0x22D009832587E95D, + 0x842980C00F1430E2, 0xC6B3C0A0861E2893, 0x087433A419D729F2, 0x341F3DADD42D6C6F, + 0xEE0A3FAEFBB2A58E, 0x4AEE73C490DD3183, 0xAAB72DB5B1A16A34, 0xA92A04065E238FDF, + 0x7B4B35A1686B6FCC, 0x6A23BF6EF4A6956C, 0x191CB96B851AD352, 0x55D598D4D6DE351A, + 0xC9604DE5F2AE7EF3, 0x1CA6C2A3A981E172, 0xDE2F9551AD7A5398, 0x3025AAFF56C8F616, + 0x15521D9D1E2860D9, 0x506FE31CFA45073A, 0x189C55F12B647B0B, 0x0180EC9AAE7EA859, + 0x7CEC8B40050C105E, 0x2350E5198BF94104, 0xEF8AD33455CC0DD7, 0x07A7BEE16D677F92, + 0xE5E325B90DE76997, 0x5A061591A26E637A, 0xB611EF1618208B46, 0x09F4DF3EB7A981AB, + 0x1EBB078AE87DACC0, 0xB791038CB65E231F, 0x0FD38D4574B05660, 0x67EDF702C1EA8EBE, + 0xBA5F4BE0831238CD, 0xE3C477C2CEFEBE5C, 0x0DCE486C354C1BD2, 0x8C5DB36416C31910, + 0x26EA9ED1A7627324, 0x039D29B3EF82E5EB, 0x9F28FC82CBF2AE02, 0xA8AAE89CF05D2786, + 0x431AACFA2774B028, 0xCF471F9E31B7A938, 0x581BD0B8E3922EC8, 0xBC78199B400BEF06, + 0x90FB71C7BF42F862, 0x1F3BEB1046030499, 0x683E7A47B55AD8DE, 0x988F4263A695D190, + 0xD808C72A6E638453, 0x0627527BC319D7CB, 0xEBB04466D72997AE, 0xE67E0C0AE2658C7C, + 0x14D2F107B056C880, 0x7122C32C30400B8C, 0x8A7AE11FD5DACEDB, 0xA0DEDB38E98A0E74, + 0xAD109354DCC615A6, 0x0BE91A17F655CC19, 0x8DDD5FFEB8BDB149, 0xBFE53028AF890AED, + 0xD65BA6F5B4AD7A6A, 0x7956F0882997227E, 0x10E8665532B352F9, 0x0E5361DFDACEFE39, + 0xCEC7F3049FC90161, 0xFF62B561677F5F2E, 0x975CCF26D22587F0, 0x51EF0F86543BAF63, + 0x2F1E41EF10CBF28F, 0x52722635BBB94A88, 0xAE8DBAE73344F04D, 0x410769D36688FD9A, + 0xB3AB94DE34BBB966, 0x801317928DF1AA9B, 0xA564A0F0C5113C54, 0xF131D4BEBDB1A117, + 0x7F71A2F3EA8EF5B5, 0x40878549C8F655C3, 0x7EF14E6944F05DEC, 0xD44663DCF55137D8, + 0xF2ACFD0D523344FC, 0x0000000000000000, 0x5FBC6E598EF5515A, 0x16CF342EF1AA8532, + 0xB036BD6DDB395C8D, 0x13754FE6DD31B712, 0xBBDFA77A2D6C9094, 0x89E7C8AC3A582B30, + 0x3C6B0E09CDFA459D, 0xC4AE0589C7E26521, 0x49735A777F5FD468, 0xCAFD64561D2C9B18, + 0xDA1502032F9FC9E1, 0x8867243694268369, 0x3782141E3BAF8984, 0x9CB5D53124704BE9, + 0xD7DB4A6F1AD3D233, 0xA6F989432A93D9BF, 0x9D3539AB8A0EE3B0, 0x53F2CAAF15C7E2D1, + 0x6E19283C76430F15, 0x3DEBE2936384EDC4, 0x5E3C82C3208BF903, 0x33B8834CB94A13FD, + 0x6470DEB12E686B55, 0x359FD1377A53C436, 0x61CAA57902F35975, 0x043A975282E59A79, + 0xFD7F70482683129C, 0xC52EE913699CCD78, 0x28B9FF0E7DAC8D1D, 0x5455744E78A09D43, + 0xCB7D88CCB3523341, 0x44BD121B4A13CFBA, 0x4D49CD25FDBA4E11, 0x3E76CB208C06082F, + 0x3FF627BA2278A076, 0xC28957F204FBB2EA, 0x453DFE81E46D67E3, 0x94C1E6953DA7621B, + 0x2C83685CFF491764, 0xF32C1197FC4DECA5, 0x2B24D6BD922E68F6, 0xB22B78449AC5113F, + 0x48F3B6EDD1217C31, 0x2E9EAD75BEB55AD6, 0x174FD8B45FD42D6B, 0x4ED4E4961238ABFA, + 0x92E6B4EEFEBEB5D0, 0x46A0D7320BEF8208, 0x47203BA8A5912A51, 0x24F75BF8E69E3E96, + 0xF0B1382413CF094E, 0xFEE259FBC901F777, 0x276A724B091CDB7D, 0xBDF8F501EE75475F, + 0x599B3C224DEC8691, 0x6D84018F99C1EAFE, 0x7498B8E41CDB39AC, 0xE0595E71217C5BB7, + 0x2AA43A273C50C0AF, 0xF50B43EC3F543B6E, 0x838E3E2162734F70, 0xC09492DB4507FF58, + 0x72BFEA9FDFC2EE67, 0x11688ACF9CCDFAA0, 0x1A8190D86A9836B9, 0x7ACBD93BC615C795, + 0xC7332C3A286080CA, 0x863445E94EE87D50, 0xF6966A5FD0D6DE85, 0xE9AD814F96D5DA1C, + 0x70A22FB69E3EA3D5, 0x0A69F68D582B6440, 0xB8428EC9C2EE757F, 0x604A49E3AC8DF12C, + 0x5B86F90B0C10CB23, 0xE1D9B2EB8F02F3EE, 0x29391394D3D22544, 0xC8E0A17F5CD0D6AA, + 0xB58CC6A5F7A26EAD, 0x8193FB08238F02C2, 0xD5C68F465B2F9F81, 0xFCFF9CD288FDBAC5, + 0x77059157F359DC47, 0x1D262E3907FF492B, 0xFB582233E59AC557, 0xDDB2BCE242F8B673, + 0x2577B76248E096CF, 0x6F99C4A6D83DA74C, 0xC1147E41EB795701, 0xF48BAF76912A9337 +}; +__constant static sph_u64 T4[256] = { + 0x3EF29D249B2C0A19, 0xE9E16322B6F8622F, 0x5536994047757F7A, 0x9F4D56D5A47B0B33, + 0x822567466AA1174C, 0xB8F5057DEB082FB2, 0xCC48C10BF4475F53, 0x373088D4275DEC3A, + 0x968F4325180AED10, 0x173D232CF7016151, 0xAE4ED09F946FCC13, 0xFD4B4741C4539873, + 0x1B5B3F0DD9933765, 0x2FFCB0967B644052, 0xE02376D20A89840C, 0xA3AE3A70329B18D7, + 0x419CBD2335DE8526, 0xFAFEBF115B7C3199, 0x0397074F85AA9B0D, 0xC58AD4FB4836B970, + 0xBEC60BE3FC4104A8, 0x1EFF36DC4B708772, 0x131FDC33ED8453B6, 0x0844E33E341764D3, + 0x0FF11B6EAB38CD39, 0x64351F0A7761B85A, 0x3B5694F509CFBA0E, 0x30857084B87245D0, + 0x47AFB3BD2297AE3C, 0xF2BA5C2F6F6B554A, 0x74BDC4761F4F70E1, 0xCFDFC64471EDC45E, + 0xE610784C1DC0AF16, 0x7ACA29D63C113F28, 0x2DED411776A859AF, 0xAC5F211E99A3D5EE, + 0xD484F949A87EF33B, 0x3CE36CA596E013E4, 0xD120F0983A9D432C, 0x6BC40464DC597563, + 0x69D5F5E5D1956C9E, 0x9AE95F043698BB24, 0xC9ECC8DA66A4EF44, 0xD69508C8A5B2EAC6, + 0xC40C2235C0503B80, 0x38C193BA8C652103, 0x1CEEC75D46BC9E8F, 0xD331011937515AD1, + 0xD8E2E56886ECA50F, 0xB137108D5779C991, 0x709F3B6905CA4206, 0x4FEB50831680CAEF, + 0xEC456AF3241BD238, 0x58D673AFE181ABBE, 0x242F54E7CAD9BF8C, 0x0211F1810DCC19FD, + 0x90BC4DBB0F43C60A, 0x9518446A9DA0761D, 0xA1BFCBF13F57012A, 0x2BDE4F8961E172B5, + 0x27B853A84F732481, 0xB0B1E643DF1F4B61, 0x18CC38425C39AC68, 0xD2B7F7D7BF37D821, + 0x3103864A3014C720, 0x14AA246372ABFA5C, 0x6E600DB54EBAC574, 0x394765740403A3F3, + 0x09C215F0BC71E623, 0x2A58B947E987F045, 0x7B4CDF18B477BDD8, 0x9709B5EB906C6FE0, + 0x73083C268060D90B, 0xFEDC400E41F9037E, 0x284948C6E44BE9B8, 0x728ECAE808065BFB, + 0x06330E9E17492B1A, 0x5950856169E7294E, 0xBAE4F4FCE6C4364F, 0xCA7BCF95E30E7449, + 0x7D7FD186A33E96C2, 0x52836110D85AD690, 0x4DFAA1021B4CD312, 0x913ABB75872544FA, + 0xDD46ECB9140F1518, 0x3D659A6B1E869114, 0xC23F2CABD719109A, 0xD713FE062DD46836, + 0xD0A60656B2FBC1DC, 0x221C5A79DD909496, 0xEFD26DBCA1B14935, 0x0E77EDA0235E4FC9, + 0xCBFD395B6B68F6B9, 0x0DE0EAEFA6F4D4C4, 0x0422FF1F1A8532E7, 0xF969B85EDED6AA94, + 0x7F6E2007AEF28F3F, 0x3AD0623B81A938FE, 0x6624EE8B7AADA1A7, 0xB682E8DDC856607B, + 0xA78CC56F281E2A30, 0xC79B257A45FAA08D, 0x5B4174E0642B30B3, 0x5F638BFF7EAE0254, + 0x4BC9AF9C0C05F808, 0xCE59308AF98B46AE, 0x8FC58DA9CC55C388, 0x803496C7676D0EB1, + 0xF33CAAE1E70DD7BA, 0xBB6202326EA2B4BF, 0xD5020F87201871CB, 0x9D5CA754A9B712CE, + 0x841669D87DE83C56, 0x8A6184785EB6739F, 0x420BBA6CB0741E2B, 0xF12D5B60EAC1CE47, + 0x76AC35F71283691C, 0x2C6BB7D9FECEDB5F, 0xFCCDB18F4C351A83, 0x1F79C012C3160582, + 0xF0ABADAE62A74CB7, 0xE1A5801C82EF06FC, 0x67A21845F2CB2357, 0x5114665F5DF04D9D, + 0xBF40FD2D74278658, 0xA0393D3FB73183DA, 0x05A409D192E3B017, 0xA9FB28CF0B4065F9, + 0x25A9A22942BF3D7C, 0xDB75E22703463E02, 0xB326E10C5AB5D06C, 0xE7968E8295A62DE6, + 0xB973F3B3636EAD42, 0xDF571D3819C30CE5, 0xEE549B7229D7CBC5, 0x12992AFD65E2D146, + 0xF8EF4E9056B02864, 0xB7041E134030E28B, 0xC02EDD2ADAD50967, 0x932B4AF48AE95D07, + 0x6FE6FB7BC6DC4784, 0x239AACB755F61666, 0x401A4BEDBDB807D6, 0x485EA8D389AF6305, + 0xA41BC220ADB4B13D, 0x753B32B89729F211, 0x997E584BB3322029, 0x1D683193CEDA1C7F, + 0xFF5AB6C0C99F818E, 0x16BBD5E27F67E3A1, 0xA59D34EE25D233CD, 0x98F8AE853B54A2D9, + 0x6DF70AFACB105E79, 0x795D2E99B9BBA425, 0x8E437B6744334178, 0x0186F6CE886682F0, + 0xEBF092A3BB347BD2, 0xBCD7FA62F18D1D55, 0xADD9D7D011C5571E, 0x0BD3E471B1BDFFDE, + 0xAA6C2F808EEAFEF4, 0x5EE57D31F6C880A4, 0xF50FA47FF044FCA0, 0x1ADDC9C351F5B595, + 0xEA76646D3352F922, 0x0000000000000000, 0x85909F16F58EBEA6, 0x46294573AAF12CCC, + 0x0A5512BF39DB7D2E, 0x78DBD85731DD26D5, 0x29CFBE086C2D6B48, 0x218B5D36583A0F9B, + 0x152CD2ADFACD78AC, 0x83A39188E2C795BC, 0xC3B9DA655F7F926A, 0x9ECBA01B2C1D89C3, + 0x07B5F8509F2FA9EA, 0x7EE8D6C926940DCF, 0x36B67E1AAF3B6ECA, 0x86079859702425AB, + 0xFB7849DFD31AB369, 0x4C7C57CC932A51E2, 0xD96413A60E8A27FF, 0x263EA566C715A671, + 0x6C71FC344376DC89, 0x4A4F595284637AF8, 0xDAF314E98B20BCF2, 0x572768C14AB96687, + 0x1088DB7C682EC8BB, 0x887075F9537A6A62, 0x2E7A4658F302C2A2, 0x619116DBE582084D, + 0xA87DDE018326E709, 0xDCC01A779C6997E8, 0xEDC39C3DAC7D50C8, 0xA60A33A1A078A8C0, + 0xC1A82BE452B38B97, 0x3F746BEA134A88E9, 0xA228CCBEBAFD9A27, 0xABEAD94E068C7C04, + 0xF48952B178227E50, 0x5CF48CB0FB049959, 0x6017E0156DE48ABD, 0x4438B4F2A73D3531, + 0x8C528AE649FF5885, 0xB515EF924DFCFB76, 0x0C661C212E925634, 0xB493195CC59A7986, + 0x9CDA519A21D1903E, 0x32948105B5BE5C2D, 0x194ACE8CD45F2E98, 0x438D4CA238129CDB, + 0x9B6FA9CABEFE39D4, 0x81B26009EF0B8C41, 0xDED1EBF691A58E15, 0x4E6DA64D9EE6481F, + 0x54B06F8ECF13FD8A, 0x49D85E1D01C9E1F5, 0xAFC826511C094EE3, 0xF698A33075EE67AD, + 0x5AC7822EEC4DB243, 0x8DD47C28C199DA75, 0x89F68337DB1CE892, 0xCDCE37C57C21DDA3, + 0x530597DE503C5460, 0x6A42F2AA543FF793, 0x5D727A7E73621BA9, 0xE232875307459DF1, + 0x56A19E0FC2DFE477, 0xC61DD3B4CD9C227D, 0xE5877F03986A341B, 0x949EB2A415C6F4ED, + 0x6206119460289340, 0x6380E75AE84E11B0, 0x8BE772B6D6D0F16F, 0x50929091D596CF6D, + 0xE86795EC3E9EE0DF, 0x7CF927482B581432, 0xC86A3E14EEC26DB4, 0x7119CDA78DACC0F6, + 0xE40189CD100CB6EB, 0x92ADBC3A028FDFF7, 0xB2A017C2D2D3529C, 0x200DABF8D05C8D6B, + 0x34A78F9BA2F77737, 0xE3B4719D8F231F01, 0x45BE423C2F5BB7C1, 0xF71E55FEFD88E55D, + 0x6853032B59F3EE6E, 0x65B3E9C4FF073AAA, 0x772AC3399AE5EBEC, 0x87816E97F842A75B, + 0x110E2DB2E0484A4B, 0x331277CB3DD8DEDD, 0xBD510CAC79EB9FA5, 0x352179552A91F5C7 +}; +__constant static sph_u64 T5[256] = { + 0x8AB0A96846E06A6D, 0x43C7E80B4BF0B33A, 0x08C9B3546B161EE5, 0x39F1C235EBA990BE, + 0xC1BEF2376606C7B2, 0x2C209233614569AA, 0xEB01523B6FC3289A, 0x946953AB935ACEDD, + 0x272838F63E13340E, 0x8B0455ECA12BA052, 0x77A1B2C4978FF8A2, 0xA55122CA13E54086, + 0x2276135862D3F1CD, 0xDB8DDFDE08B76CFE, 0x5D1E12C89E4A178A, 0x0E56816B03969867, + 0xEE5F79953303ED59, 0xAFED748BAB78D71D, 0x6D929F2DF93E53EE, 0xF5D8A8F8BA798C2A, + 0xF619B1698E39CF6B, 0x95DDAF2F749104E2, 0xEC2A9C80E0886427, 0xCE5C8FD8825B95EA, + 0xC4E0D9993AC60271, 0x4699C3A5173076F9, 0x3D1B151F50A29F42, 0x9ED505EA2BC75946, + 0x34665ACFDC7F4B98, 0x61B1FB53292342F7, 0xC721C0080E864130, 0x8693CD1696FD7B74, + 0x872731927136B14B, 0xD3446C8A63A1721B, 0x669A35E8A6680E4A, 0xCAB658F239509A16, + 0xA4E5DE4EF42E8AB9, 0x37A7435EE83F08D9, 0x134E6239E26C7F96, 0x82791A3C2DF67488, + 0x3F6EF00A8329163C, 0x8E5A7E42FDEB6591, 0x5CAAEE4C7981DDB5, 0x19F234785AF1E80D, + 0x255DDDE3ED98BD70, 0x50898A32A99CCCAC, 0x28CA4519DA4E6656, 0xAE59880F4CB31D22, + 0x0D9798FA37D6DB26, 0x32F968F0B4FFCD1A, 0xA00F09644F258545, 0xFA3AD5175E24DE72, + 0xF46C547C5DB24615, 0x713E80FBFF0F7E20, 0x7843CF2B73D2AAFA, 0xBD17EA36AEDF62B4, + 0xFD111BACD16F92CF, 0x4ABAA7DBC72D67E0, 0xB3416B5DAD49FAD3, 0xBCA316B24914A88B, + 0x15D150068AECF914, 0xE27C1DEBE31EFC40, 0x4FE48C759BEDA223, 0x7EDCFD141B522C78, + 0x4E5070F17C26681C, 0xE696CAC15815F3BC, 0x35D2A64B3BB481A7, 0x800CFF29FE7DFDF6, + 0x1ED9FAC3D5BAA4B0, 0x6C2663A91EF599D1, 0x03C1199134404341, 0xF7AD4DED69F20554, + 0xCD9D9649B61BD6AB, 0xC8C3BDE7EADB1368, 0xD131899FB02AFB65, 0x1D18E352E1FAE7F1, + 0xDA39235AEF7CA6C1, 0xA1BBF5E0A8EE4F7A, 0x91377805CF9A0B1E, 0x3138716180BF8E5B, + 0xD9F83ACBDB3CE580, 0x0275E515D38B897E, 0x472D3F21F0FBBCC6, 0x2D946EB7868EA395, + 0xBA3C248D21942E09, 0xE7223645BFDE3983, 0xFF64FEB902E41BB1, 0xC97741630D10D957, + 0xC3CB1722B58D4ECC, 0xA27AEC719CAE0C3B, 0x99FECB51A48C15FB, 0x1465AC826D27332B, + 0xE1BD047AD75EBF01, 0x79F733AF941960C5, 0x672EC96C41A3C475, 0xC27FEBA6524684F3, + 0x64EFD0FD75E38734, 0xED9E60040743AE18, 0xFB8E2993B9EF144D, 0x38453EB10C625A81, + 0x6978480742355C12, 0x48CF42CE14A6EE9E, 0x1CAC1FD606312DCE, 0x7B82D6BA4792E9BB, + 0x9D141C7B1F871A07, 0x5616B80DC11C4A2E, 0xB849C198F21FA777, 0x7CA91801C8D9A506, + 0xB1348E487EC273AD, 0x41B20D1E987B3A44, 0x7460AB55A3CFBBE3, 0x84E628034576F20A, + 0x1B87D16D897A6173, 0x0FE27DEFE45D5258, 0x83CDE6B8CA3DBEB7, 0x0C23647ED01D1119, + 0x7A362A3EA0592384, 0xB61F40F3F1893F10, 0x75D457D1440471DC, 0x4558DA34237035B8, + 0xDCA6116587FC2043, 0x8D9B67D3C9AB26D0, 0x2B0B5C88EE0E2517, 0x6FE77A382AB5DA90, + 0x269CC472D9D8FE31, 0x63C41E46FAA8CB89, 0xB7ABBC771642F52F, 0x7D1DE4852F126F39, + 0xA8C6BA3024339BA0, 0x600507D7CEE888C8, 0x8FEE82C61A20AFAE, 0x57A2448926D78011, + 0xFCA5E72836A458F0, 0x072BCEBB8F4B4CBD, 0x497BBE4AF36D24A1, 0x3CAFE99BB769557D, + 0x12FA9EBD05A7B5A9, 0xE8C04BAA5B836BDB, 0x4273148FAC3B7905, 0x908384812851C121, + 0xE557D3506C55B0FD, 0x72FF996ACB4F3D61, 0x3EDA0C8E64E2DC03, 0xF0868356E6B949E9, + 0x04EAD72ABB0B0FFC, 0x17A4B5135967706A, 0xE3C8E16F04D5367F, 0xF84F30028DAF570C, + 0x1846C8FCBD3A2232, 0x5B8120F7F6CA9108, 0xD46FA231ECEA3EA6, 0x334D947453340725, + 0x58403966C28AD249, 0xBED6F3A79A9F21F5, 0x68CCB483A5FE962D, 0xD085751B57E1315A, + 0xFED0023DE52FD18E, 0x4B0E5B5F20E6ADDF, 0x1A332DE96EB1AB4C, 0xA3CE10F57B65C604, + 0x108F7BA8D62C3CD7, 0xAB07A3A11073D8E1, 0x6B0DAD1291BED56C, 0xF2F366433532C097, + 0x2E557726B2CEE0D4, 0x0000000000000000, 0xCB02A476DE9B5029, 0xE4E32FD48B9E7AC2, + 0x734B65EE2C84F75E, 0x6E5386BCCD7E10AF, 0x01B4FC84E7CBCA3F, 0xCFE8735C65905FD5, + 0x3613BFDA0FF4C2E6, 0x113B872C31E7F6E8, 0x2FE18BA255052AEB, 0xE974B72EBC48A1E4, + 0x0ABC5641B89D979B, 0xB46AA5E62202B66E, 0x44EC26B0C4BBFF87, 0xA6903B5B27A503C7, + 0x7F680190FC99E647, 0x97A84A3AA71A8D9C, 0xDD12EDE16037EA7C, 0xC554251DDD0DC84E, + 0x88C54C7D956BE313, 0x4D91696048662B5D, 0xB08072CC9909B992, 0xB5DE5962C5C97C51, + 0x81B803AD19B637C9, 0xB2F597D94A8230EC, 0x0B08AAC55F565DA4, 0xF1327FD2017283D6, + 0xAD98919E78F35E63, 0x6AB9519676751F53, 0x24E921670A53774F, 0xB9FD3D1C15D46D48, + 0x92F66194FBDA485F, 0x5A35DC7311015B37, 0xDED3F4705477A93D, 0xC00A0EB381CD0D8D, + 0xBB88D809C65FE436, 0x16104997BEACBA55, 0x21B70AC95693B28C, 0x59F4C5E225411876, + 0xD5DB5EB50B21F499, 0x55D7A19CF55C096F, 0xA97246B4C3F8519F, 0x8552D487A2BD3835, + 0x54635D181297C350, 0x23C2EFDC85183BF2, 0x9F61F96ECC0C9379, 0x534893A39DDC8FED, + 0x5EDF0B59AA0A54CB, 0xAC2C6D1A9F38945C, 0xD7AEBBA0D8AA7DE7, 0x2ABFA00C09C5EF28, + 0xD84CC64F3CF72FBF, 0x2003F64DB15878B3, 0xA724C7DFC06EC9F8, 0x069F323F68808682, + 0xCC296ACD51D01C94, 0x055E2BAE5CC0C5C3, 0x6270E2C21D6301B6, 0x3B842720382219C0, + 0xD2F0900E846AB824, 0x52FC6F277A1745D2, 0xC6953C8CE94D8B0F, 0xE009F8FE3095753E, + 0x655B2C7992284D0B, 0x984A37D54347DFC4, 0xEAB5AEBF8808E2A5, 0x9A3FD2C090CC56BA, + 0x9CA0E0FFF84CD038, 0x4C2595E4AFADE162, 0xDF6708F4B3BC6302, 0xBF620F237D54EBCA, + 0x93429D101C118260, 0x097D4FD08CDDD4DA, 0x8C2F9B572E60ECEF, 0x708A7C7F18C4B41F, + 0x3A30DBA4DFE9D3FF, 0x4006F19A7FB0F07B, 0x5F6BF7DD4DC19EF4, 0x1F6D064732716E8F, + 0xF9FBCC866A649D33, 0x308C8DE567744464, 0x8971B0F972A0292C, 0xD61A47243F61B7D8, + 0xEFEB8511D4C82766, 0x961CB6BE40D147A3, 0xAAB35F25F7B812DE, 0x76154E407044329D, + 0x513D76B64E570693, 0xF3479AC7D2F90AA8, 0x9B8B2E4477079C85, 0x297EB99D3D85AC69 +}; +__constant static sph_u64 T6[256] = { + 0x7E37E62DFC7D40C3, 0x776F25A4EE939E5B, 0xE045C850DD8FB5AD, 0x86ED5BA711FF1952, + 0xE91D0BD9CF616B35, 0x37E0AB256E408FFB, 0x9607F6C031025A7A, 0x0B02F5E116D23C9D, + 0xF3D8486BFB50650C, 0x621CFF27C40875F5, 0x7D40CB71FA5FD34A, 0x6DAA6616DAA29062, + 0x9F5F354923EC84E2, 0xEC847C3DC507C3B3, 0x025A3668043CE205, 0xA8BF9E6C4DAC0B19, + 0xFA808BE2E9BEBB94, 0xB5B99C5277C74FA3, 0x78D9BC95F0397BCC, 0xE332E50CDBAD2624, + 0xC74FCE129332797E, 0x1729ECEB2EA709AB, 0xC2D6B9F69954D1F8, 0x5D898CBFBAB8551A, + 0x859A76FB17DD8ADB, 0x1BE85886362F7FB5, 0xF6413F8FF136CD8A, 0xD3110FA5BBB7E35C, + 0x0A2FEED514CC4D11, 0xE83010EDCD7F1AB9, 0xA1E75DE55F42D581, 0xEEDE4A55C13B21B6, + 0xF2F5535FF94E1480, 0x0CC1B46D1888761E, 0xBCE15FDB6529913B, 0x2D25E8975A7181C2, + 0x71817F1CE2D7A554, 0x2E52C5CB5C53124B, 0xF9F7A6BEEF9C281D, 0x9E722E7D21F2F56E, + 0xCE170D9B81DCA7E6, 0x0E9B82051CB4941B, 0x1E712F623C49D733, 0x21E45CFA42F9F7DC, + 0xCB8E7A7F8BBA0F60, 0x8E98831A010FB646, 0x474CCF0D8E895B23, 0xA99285584FB27A95, + 0x8CC2B57205335443, 0x42D5B8E984EFF3A5, 0x012D1B34021E718C, 0x57A6626AAE74180B, + 0xFF19FC06E3D81312, 0x35BA9D4D6A7C6DFE, 0xC9D44C178F86ED65, 0x506523E6A02E5288, + 0x03772D5C06229389, 0x8B01F4FE0B691EC0, 0xF8DABD8AED825991, 0x4C4E3AEC985B67BE, + 0xB10DF0827FBF96A9, 0x6A69279AD4F8DAE1, 0xE78689DCD3D5FF2E, 0x812E1A2B1FA553D1, + 0xFBAD90D6EBA0CA18, 0x1AC543B234310E39, 0x1604F7DF2CB97827, 0xA6241C6951189F02, + 0x753513CCEAAF7C5E, 0x64F2A59FC84C4EFA, 0x247D2B1E489F5F5A, 0xDB64D718AB474C48, + 0x79F4A7A1F2270A40, 0x1573DA832A9BEBAE, 0x3497867968621C72, 0x514838D2A2302304, + 0xF0AF6537FD72F685, 0x1D06023E3A6B44BA, 0x678588C3CE6EDD73, 0x66A893F7CC70ACFF, + 0xD4D24E29B5EDA9DF, 0x3856321470EA6A6C, 0x07C3418C0E5A4A83, 0x2BCBB22F5635BACD, + 0x04B46CD00878D90A, 0x06EE5AB80C443B0F, 0x3B211F4876C8F9E5, 0x0958C38912EEDE98, + 0xD14B39CDBF8B0159, 0x397B292072F41BE0, 0x87C0409313E168DE, 0xAD26E98847CAA39F, + 0x4E140C849C6785BB, 0xD5FF551DB7F3D853, 0xA0CA46D15D5CA40D, 0xCD6020C787FE346F, + 0x84B76DCF15C3FB57, 0xDEFDA0FCA121E4CE, 0x4B8D7B6096012D3D, 0x9AC642AD298A2C64, + 0x0875D8BD10F0AF14, 0xB357C6EA7B8374AC, 0x4D6321D89A451632, 0xEDA96709C719B23F, + 0xF76C24BBF328BC06, 0xC662D526912C08F2, 0x3CE25EC47892B366, 0xB978283F6F4F39BD, + 0xC08C8F9E9D6833FD, 0x4F3917B09E79F437, 0x593DE06FB2C08C10, 0xD6887841B1D14BDA, + 0x19B26EEE32139DB0, 0xB494876675D93E2F, 0x825937771987C058, 0x90E9AC783D466175, + 0xF1827E03FF6C8709, 0x945DC0A8353EB87F, 0x4516F9658AB5B926, 0x3F9573987EB020EF, + 0xB855330B6D514831, 0x2AE6A91B542BCB41, 0x6331E413C6160479, 0x408F8E8180D311A0, + 0xEFF35161C325503A, 0xD06622F9BD9570D5, 0x8876D9A20D4B8D49, 0xA5533135573A0C8B, + 0xE168D364DF91C421, 0xF41B09E7F50A2F8F, 0x12B09B0F24C1A12D, 0xDA49CC2CA9593DC4, + 0x1F5C34563E57A6BF, 0x54D14F36A8568B82, 0xAF7CDFE043F6419A, 0xEA6A2685C943F8BC, + 0xE5DCBFB4D7E91D2B, 0xB27ADDDE799D0520, 0x6B443CAED6E6AB6D, 0x7BAE91C9F61BE845, + 0x3EB868AC7CAE5163, 0x11C7B65322E332A4, 0xD23C1491B9A992D0, 0x8FB5982E0311C7CA, + 0x70AC6428E0C9D4D8, 0x895BC2960F55FCC5, 0x76423E90EC8DEFD7, 0x6FF0507EDE9E7267, + 0x3DCF45F07A8CC2EA, 0x4AA06054941F5CB1, 0x5810FB5BB0DEFD9C, 0x5EFEA1E3BC9AC693, + 0x6EDD4B4ADC8003EB, 0x741808F8E8B10DD2, 0x145EC1B728859A22, 0x28BC9F7350172944, + 0x270A06424EBDCCD3, 0x972AEDF4331C2BF6, 0x059977E40A66A886, 0x2550302A4A812ED6, + 0xDD8A8DA0A7037747, 0xC515F87A970E9B7B, 0x3023EAA9601AC578, 0xB7E3AA3A73FBADA6, + 0x0FB699311EAAE597, 0x0000000000000000, 0x310EF19D6204B4F4, 0x229371A644DB6455, + 0x0DECAF591A960792, 0x5CA4978BB8A62496, 0x1C2B190A38753536, 0x41A295B582CD602C, + 0x3279DCC16426277D, 0xC1A194AA9F764271, 0x139D803B26DFD0A1, 0xAE51C4D441E83016, + 0xD813FA44AD65DFC1, 0xAC0BF2BC45D4D213, 0x23BE6A9246C515D9, 0x49D74D08923DCF38, + 0x9D05032127D066E7, 0x2F7FDEFF5E4D63C7, 0xA47E2A0155247D07, 0x99B16FF12FA8BFED, + 0x4661D4398C972AAF, 0xDFD0BBC8A33F9542, 0xDCA79694A51D06CB, 0xB020EBB67DA1E725, + 0xBA0F0563696DAA34, 0xE4F1A480D5F76CA7, 0xC438E34E9510EAF7, 0x939E81243B64F2FC, + 0x8DEFAE46072D25CF, 0x2C08F3A3586FF04E, 0xD7A56375B3CF3A56, 0x20C947CE40E78650, + 0x43F8A3DD86F18229, 0x568B795EAC6A6987, 0x8003011F1DBB225D, 0xF53612D3F7145E03, + 0x189F75DA300DEC3C, 0x9570DB9C3720C9F3, 0xBB221E576B73DBB8, 0x72F65240E4F536DD, + 0x443BE25188ABC8AA, 0xE21FFE38D9B357A8, 0xFD43CA6EE7E4F117, 0xCAA3614B89A47EEC, + 0xFE34E732E1C6629E, 0x83742C431B99B1D4, 0xCF3A16AF83C2D66A, 0xAAE5A8044990E91C, + 0x26271D764CA3BD5F, 0x91C4B74C3F5810F9, 0x7C6DD045F841A2C6, 0x7F1AFD19FE63314F, + 0xC8F957238D989CE9, 0xA709075D5306EE8E, 0x55FC5402AA48FA0E, 0x48FA563C9023BEB4, + 0x65DFBEABCA523F76, 0x6C877D22D8BCE1EE, 0xCC4D3BF385E045E3, 0xBEBB69B36115733E, + 0x10EAAD6720FD4328, 0xB6CEB10E71E5DC2A, 0xBDCC44EF6737E0B7, 0x523F158EA412B08D, + 0x989C74C52DB6CE61, 0x9BEB59992B945DE8, 0x8A2CEFCA09776F4C, 0xA3BD6B8D5B7E3784, + 0xEB473DB1CB5D8930, 0xC3FBA2C29B4AA074, 0x9C28181525CE176B, 0x683311F2D0C438E4, + 0x5FD3BAD7BE84B71F, 0xFC6ED15AE5FA809B, 0x36CDB0116C5EFE77, 0x29918447520958C8, + 0xA29070B959604608, 0x53120EBAA60CC101, 0x3A0C047C74D68869, 0x691E0AC6D2DA4968, + 0x73DB4974E6EB4751, 0x7A838AFDF40599C9, 0x5A4ACD33B4E21F99, 0x6046C94FC03497F0, + 0xE6AB92E8D1CB8EA2, 0x3354C7F5663856F1, 0xD93EE170AF7BAE4D, 0x616BD27BC22AE67C, + 0x92B39A10397A8370, 0xABC8B3304B8E9890, 0xBF967287630B02B2, 0x5B67D607B6FC6E15 +}; +__constant static sph_u64 T7[256] = { + 0xD031C397CE553FE6, 0x16BA5B01B006B525, 0xA89BADE6296E70C8, 0x6A1F525D77D3435B, + 0x6E103570573DFA0B, 0x660EFB2A17FC95AB, 0x76327A9E97634BF6, 0x4BAD9D6462458BF5, + 0xF1830CAEDBC3F748, 0xC5C8F542669131FF, 0x95044A1CDC48B0CB, 0x892962DF3CF8B866, + 0xB0B9E208E930C135, 0xA14FB3F0611A767C, 0x8D2605F21C160136, 0xD6B71922FECC549E, + 0x37089438A5907D8B, 0x0B5DA38E5803D49C, 0x5A5BCC9CEA6F3CBC, 0xEDAE246D3B73FFE5, + 0xD2B87E0FDE22EDCE, 0x5E54ABB1CA8185EC, 0x1DE7F88FE80561B9, 0xAD5E1A870135A08C, + 0x2F2ADBD665CECC76, 0x5780B5A782F58358, 0x3EDC8A2EEDE47B3F, 0xC9D95C3506BEE70F, + 0x83BE111D6C4E05EE, 0xA603B90959367410, 0x103C81B4809FDE5D, 0x2C69B6027D0C774A, + 0x399080D7D5C87953, 0x09D41E16487406B4, 0xCDD63B1826505E5F, 0xF99DC2F49B0298E8, + 0x9CD0540A943CB67F, 0xBCA84B7F891F17C5, 0x723D1DB3B78DF2A6, 0x78AA6E71E73B4F2E, + 0x1433E699A071670D, 0x84F21BE454620782, 0x98DF3327B4D20F2F, 0xF049DCE2D3769E5C, + 0xDB6C60199656EB7A, 0x648746B2078B4783, 0x32CD23598DCBADCF, 0x1EA4955BF0C7DA85, + 0xE9A143401B9D46B5, 0xFD92A5D9BBEC21B8, 0xC8138C790E0B8E1B, 0x2EE00B9A6D7BA562, + 0xF85712B893B7F1FC, 0xEB28FED80BEA949D, 0x564A65EB8A40EA4C, 0x6C9988E8474A2823, + 0x4535898B121D8F2D, 0xABD8C03231ACCBF4, 0xBA2E91CAB9867CBD, 0x7960BE3DEF8E263A, + 0x0C11A977602FD6F0, 0xCB50E1AD16C93527, 0xEAE22E94035FFD89, 0x2866D12F5DE2CE1A, + 0xFF1B1841AB9BF390, 0x9F9339DE8CFE0D43, 0x964727C8C48A0BF7, 0x524502C6AAAE531C, + 0x9B9C5EF3AC10B413, 0x4FA2FA4942AB32A5, 0x3F165A62E551122B, 0xC74148DA76E6E3D7, + 0x924840E5E464B2A7, 0xD372AE43D69784DA, 0x233B72A105E11A86, 0xA48A04914941A638, + 0xB4B68525C9DE7865, 0xDDEABAACA6CF8002, 0x0A9773C250B6BD88, 0xC284FFBB5EBD3393, + 0x8BA0DF472C8F6A4E, 0x2AEF6CB74D951C32, 0x427983722A318D41, 0x73F7CDFFBF389BB2, + 0x074C0AF9382C026C, 0x8A6A0F0B243A035A, 0x6FDAE53C5F88931F, 0xC68B98967E538AC3, + 0x44FF59C71AA8E639, 0xE2FCE0CE439E9229, 0xA20CDE2479D8CD40, 0x19E89FA2C8EBD8E9, + 0xF446BBCFF398270C, 0x43B3533E2284E455, 0xD82F0DCD8E945046, 0x51066F12B26CE820, + 0xE73957AF6BC5426D, 0x081ECE5A40C16FA0, 0x3B193D4FC5BFAB7B, 0x7FE66488DF174D42, + 0x0E9814EF705804D8, 0x8137AC857C39D7C6, 0xB1733244E185A821, 0x695C3F896F11F867, + 0xF6CF0657E3EFF524, 0x1AABF276D02963D5, 0x2DA3664E75B91E5E, 0x0289BD981077D228, + 0x90C1FD7DF413608F, 0x3C5537B6FD93A917, 0xAA12107E3919A2E0, 0x0686DAB530996B78, + 0xDAA6B0559EE3826E, 0xC34E2FF756085A87, 0x6D5358A44FFF4137, 0xFC587595B35948AC, + 0x7CA5095CC7D5F67E, 0xFB147F6C8B754AC0, 0xBFEB26AB91DDACF9, 0x6896EFC567A49173, + 0xCA9A31E11E7C5C33, 0xBBE44186B13315A9, 0x0DDB793B689ABFE4, 0x70B4A02BA7FA208E, + 0xE47A3A7B7307F951, 0x8CECD5BE14A36822, 0xEEED49B923B144D9, 0x17708B4DB8B3DC31, + 0x6088219F2765FED3, 0xB3FA8FDCF1F27A09, 0x910B2D31FCA6099B, 0x0F52C4A378ED6DCC, + 0x50CCBF5EBAD98134, 0x6BD582117F662A4F, 0x94CE9A50D4FDD9DF, 0x2B25BCFB45207526, + 0x67C42B661F49FCBF, 0x492420FC723259DD, 0x03436DD418C2BB3C, 0x1F6E4517F872B391, + 0xA08563BC69AF1F68, 0xD43EA4BAEEBB86B6, 0x01CAD04C08B56914, 0xAC94CACB0980C998, + 0x54C3D8739A373864, 0x26FEC5C02DBACAC2, 0xDEA9D778BE0D3B3E, 0x040F672D20EEB950, + 0xE5B0EA377BB29045, 0xF30AB136CBB42560, 0x62019C0737122CFB, 0xE86B930C13282FA1, + 0xCC1CEB542EE5374B, 0x538FD28AA21B3A08, 0x1B61223AD89C0AC1, 0x36C24474AD25149F, + 0x7A23D3E9F74C9D06, 0xBE21F6E79968C5ED, 0xCF5F868036278C77, 0xF705D61BEB5A9C30, + 0x4D2B47D152DCE08D, 0x5F9E7BFDC234ECF8, 0x247778583DCD18EA, 0x867BA67C4415D5AA, + 0x4CE1979D5A698999, 0x0000000000000000, 0xEC64F42133C696F1, 0xB57C5569C16B1171, + 0xC1C7926F467F88AF, 0x654D96FE0F3E2E97, 0x15F936D5A8C40E19, 0xB8A72C52A9F1AE95, + 0xA9517DAA21DB19DC, 0x58D27104FA18EE94, 0x5918A148F2AD8780, 0x5CDD1629DAF657C4, + 0x8274C15164FB6CFA, 0xD1FB13DBC6E056F2, 0x7D6FD910CF609F6A, 0xB63F38BDD9A9AA4D, + 0x3D9FE7FAF526C003, 0x74BBC706871499DE, 0xDF630734B6B8522A, 0x3AD3ED03CD0AC26F, + 0xFADEAF2083C023D4, 0xC00D42234ECAE1BB, 0x8538CBA85CD76E96, 0xC402250E6E2458EB, + 0x47BC3413026A5D05, 0xAFD7A71F114272A4, 0x978DF784CC3F62E3, 0xB96DFC1EA144C781, + 0x21B2CF391596C8AE, 0x318E4E8D950916F3, 0xCE9556CC3E92E563, 0x385A509BDD7D1047, + 0x358129A0B5E7AFA3, 0xE6F387E363702B79, 0xE0755D5653E94001, 0x7BE903A5FFF9F412, + 0x12B53C2C90E80C75, 0x3307F315857EC4DB, 0x8FAFB86A0C61D31E, 0xD9E5DD8186213952, + 0x77F8AAD29FD622E2, 0x25BDA814357871FE, 0x7571174A8FA1F0CA, 0x137FEC60985D6561, + 0x30449EC19DBC7FE7, 0xA540D4DD41F4CF2C, 0xDC206AE0AE7AE916, 0x5B911CD0E2DA55A8, + 0xB2305F90F947131D, 0x344BF9ECBD52C6B7, 0x5D17C665D2433ED0, 0x18224FEEC05EB1FD, + 0x9E59E992844B6457, 0x9A568EBFA4A5DD07, 0xA3C60E68716DA454, 0x7E2CB4C4D7A22456, + 0x87B176304CA0BCBE, 0x413AEEA632F3367D, 0x9915E36BBC67663B, 0x40F03EEA3A465F69, + 0x1C2D28C3E0B008AD, 0x4E682A054A1E5BB1, 0x05C5B761285BD044, 0xE1BF8D1A5B5C2915, + 0xF2C0617AC3014C74, 0xB7F5E8F1D11CC359, 0x63CB4C4B3FA745EF, 0x9D1A84469C89DF6B, + 0xE33630824B2BFB3D, 0xD5F474F6E60EEFA2, 0xF58C6B83FB2D4E18, 0x4676E45F0ADF3411, + 0x20781F751D23A1BA, 0xBD629B3381AA7ED1, 0xAE1D775319F71BB0, 0xFED1C80DA32E9A84, + 0x5509083F92825170, 0x29AC01635557A70E, 0xA7C9694551831D04, 0x8E65682604D4BA0A, + 0x11F651F8882AB749, 0xD77DC96EF6793D8A, 0xEF2799F52B042DCD, 0x48EEF0B07A8730C9, + 0x22F1A2ED0D547392, 0x6142F1D32FD097C7, 0x4A674D286AF0E2E1, 0x80FD7CC9748CBED2, + 0x717E7067AF4F499A, 0x938290A9ECD1DBB3, 0x88E3B293344DD172, 0x2734158C250FA3D6 +}; + + +// KeySchedule +__constant static sph_u64 CC[12][8] = {{ + 0xe9daca1eda5b08b1, 0x1f7c65c0812fcbeb, 0x16d0452e43766a2f, 0xfcc485758db84e71, + 0x0169679291e07c4b, 0x15d360a4082a42a2, 0x234d74cc36747605, 0x0745a6f2596580dd +}, { + 0x1a2f9da98ab5a36f, 0xd7b5700f469de34f, 0x982b230a72eafef3, 0x3101b5160f5ed561, + 0x5899d6126b17b59a, 0xcaa70adbc261b55c, 0x56cdcbd71ba2dd55, 0xb79bb121700479e6 +}, { + 0xc72fce2bacdc74f5, 0x35843d6a28fc390a, 0x8b1f9c525f5ef106, 0x7b7b29b11475eaf2, + 0xb19e3590e40fe2d3, 0x09db6260373ac9c1, 0x31db7a8643f4b6c2, 0xb20aba0af5961e99 +}, { + 0xd26615e8b3df1fef, 0xdde4715da0e148f9, 0x7d3c5c337e858e48, 0x3f355e68ad1c729d, + 0x75d603ed822cd7a9, 0xbe0352933313b7d8, 0xf137e893a1ea5334, 0x2ed1e384bcbe0c22 +}, { + 0x994747adac6bea4b, 0x6323a96c0c413f9a, 0x4a1086161f1c157f, 0xbdff0f80d7359e35, + 0xa3f53a254717cdbf, 0x161a2723b700ffdf, 0xf563eaa97ea2567a, 0x57fe6c7cfd581760 +}, { + 0xd9d33a1daeae4fae, 0xc039307a3bc3a46f, 0x6ca44251f9c4662d, 0xc68ef09ab49a7f18, + 0xb4b79a1cb7a6facf, 0xb6c6bec2661ff20a, 0x354f903672c571bf, 0x6e7d64467a4068fa +}, { + 0xecc5aaee160ec7f4, 0x540924bffe86ac51, 0xc987bfe6c7c69e39, 0xc9937a19333e47d3, + 0x372c822dc5ab9209, 0x04054a2883694706, 0xf34a3ca24c451735, 0x93d4143a4d568688 +}, { + 0xa7c9934d425b1f9b, 0x41416e0c02aae703, 0x1ede369c71f8b74e, 0x9ac4db4d3b44b489, + 0x90069b92cb2b89f4, 0x2fc4a5d12b8dd169, 0xd9a8515935c2ac36, 0x1ee702bfd40d7fa4 +}, { + 0x9b223116545a8f37, 0xde5f16ecd89a4c94, 0x244289251b3a7d3a, 0x84090de0b755d93c, + 0xb1ceb2db0b440a80, 0x549c07a69a8a2b7b, 0x602a1fcb92dc380e, 0xdb5a238351446172 +}, { + 0x526f0580a6debeab, 0xf3f3e4b248e52a38, 0xdb788aff1ce74189, 0x0361331b8ae1ff1f, + 0x4b3369af0267e79f, 0xf452763b306c1e7a, 0xc3b63b15d1fa9836, 0xed9c4598fbc7b474 +}, { + 0xfb89c8efd09ecd7b, 0x94fe5a63cdc60230, 0x6107abebbb6bfad8, 0x7966841421800120, + 0xcab948eaef711d8a, 0x986e477d1dcdbaef, 0x5dd86fc04a59a2de, 0x1b2df381cda4ca6b +}, { + 0xba3116f167e78e37, 0x7ab14904b08013d2, 0x771ddfbc323ca4cd, 0x9b9f2130d41220f8, + 0x86cc91189def805d, 0x5228e188aaa41de7, 0x991bb2d9d517f4fa, 0x20d71bf14a92bc48 +}}; + +__constant static sph_u64 F0[8] = // GOST_F(0) +{ + 0x74a5d4ce2efc83b3, 0x74a5d4ce2efc83b3, 0x74a5d4ce2efc83b3, 0x74a5d4ce2efc83b3, + 0x74a5d4ce2efc83b3, 0x74a5d4ce2efc83b3, 0x74a5d4ce2efc83b3, 0x74a5d4ce2efc83b3 +}; + +__constant static sph_u64 CC_F0[12][8] = +{ + { 0x8FD72F640708B0D0, 0x0DE874C7EBC3F213, 0xE92EEF3AD202E9E0, 0xC1E9DA0708013DA7, 0x9727DAB2F014BE88, 0x103051A02BCD6935, 0x33EC7E1DBD28F736, 0x1ECF460CF78AD1F4 }, + { 0x0B2D9F89C775449D, 0x6B6EEFC6DAB7E8B0, 0xF1A0D31667F6EC44, 0x2A71132D5E108166, 0x0E9357C2EC87931A, 0xC99F5C1B4A01612D, 0x7E60B16E637D4EE2, 0xA9FCB827F9BA6D81 }, + { 0x231FECA5AB3D285C, 0x70C6E1483C838C3B, 0x9C21C3C40CE4E2DA, 0x2FA796BD5688E573, 0x04C0E3FF55809FDF, 0x5FF978BFB8E3CDC8, 0xC54A19D6A3D07033, 0x0FCA83FDDE872478 }, + { 0xBDF9312726339F10, 0x51A5BA1793BC9C56, 0xC4428DA14F96D2D4, 0xEC925222374EAB1F, 0x79477893747DD92F, 0xC495E19A46886304, 0x9C23F893BA7CFA36, 0x0C47268881FC5FEB }, + { 0xCF117966029B2CB3, 0x07179ABE77088A8F, 0x671EF4CC2650E257, 0x7474B8B170DAB5C6, 0x4224FEBECF35113E, 0x993D156C675C5537, 0x2DEE3A5782C39B45, 0xE7C586F2990DD385 }, + { 0x8608FD95B1C1138A, 0x8BB0847D9E9849AC, 0x5E76623F4F0EB0C7, 0x34C2BDBAFC5060CE, 0xE9E814475907826C, 0x22C9ED94D6AAC7C9, 0xE6B75E28171EB0D6, 0xF1329E5534E60215 }, + { 0x86BB4814B1C3CE52, 0xE8F226C9FBDDD017, 0xCEDED67991CB3087, 0x76C33E32FDBFACA5, 0xDBB13BE1A9F7474C, 0x3D0273470342C356, 0x8E7246C51CF07F61, 0xAC8C125DDEF8DF71 }, + { 0x6D73E747795B8CF3, 0x4E4AA65EA0072050, 0xA14A1582CB43C2B9, 0x748EF2B7BB63B938, 0x126789534410D7D4, 0xD4D48FF40301D791, 0xC67DFBE315C41FC0, 0x35E7A1A1AF88601C }, + { 0x9BD33EA0FAB34007, 0xF51B7CDBE3D67D25, 0xD3ABDA0CE4186E6B, 0x8E61DDADCBCE1706, 0x58994565B41BE6A5, 0x7A87ABC1240CD31D, 0xFAFE6C28487968D0, 0x15B368609FF9EEA7 }, + { 0xAE33263CCF115818, 0x93B2DBE9CADFCFC8, 0x0A91952BF91B0147, 0x458E67CA5F1ED73A, 0x94C2E5F288F074E3, 0x377895E85C69E996, 0xF11A4456AAB37B10, 0x163131934816821A }, + { 0xD07E4A2366BF469D, 0x5EF1A3D220213B6C, 0x3C5BB78971D8ED0F, 0x0DE05E6B9006F2D2, 0xC58CFB00B8EAA1C9, 0xEFCDB54D1F250B76, 0xFD135634FA527042, 0x4CEE791290516407 }, + { 0xD800B9264010790F, 0x974C4823E2B668D7, 0xA605A4B385C5E361, 0x3F6C92DA5A56D8D2, 0x82B9D67C12EF8277, 0x0AB6B4582561BF90, 0x46954FD98FC2CBA3, 0x70BE45CB21B6760D } +}; + + +__constant static sph_u64 F1[8] = // GOST_F(1) +{ + 0x155f7bb040eec523, 0x155f7bb040eec523, 0x155f7bb040eec523, 0x155f7bb040eec523, + 0x155f7bb040eec523, 0x155f7bb040eec523, 0x155f7bb040eec523, 0x155f7bb040eec523 +}; + +__constant static sph_u64 CC_F1[12][8] = +{ + { 0xeaebb276318fee18, 0xea4c693382cbd63b, 0xbf26be88df699734, 0x49a504a9b6fa1c45, 0xb1666aa693de22da, 0x113563ea5e6b7e9c, 0xcdbf01848cd611e6, 0xb95e4a9dc30c7d0c }, + { 0x919565a231cfa4aa, 0x46fde791cec8ae57, 0xe3c56411e2de27bf, 0x1f9d9e511aba0b94, 0x57773e25f11309ce, 0x2ce14b67cd005091, 0x00fb26ba738ef6c7, 0x2d5f800141af74fd }, + { 0xf57a17cc650afe61, 0x26d3deadafe23502, 0xf87b7436229a32a5, 0x85459ccaae2842a5, 0x0d3a74dda91e80cd, 0x330e2b60f01ed098, 0x56c16add5dfb6720, 0x8692832019310082 }, + { 0x6f63d34f5f688399, 0xa826bf5fb7abd51f, 0x3ecb2eaa144393e2, 0x4e7d6cc0863c69e4, 0x61e175af40d59b16, 0xba60d963cd6a540a, 0x69bf99c14c3995d5, 0x5a3de79f30d5a599 }, + { 0x25f0e72cae7257f0, 0xfdb8c6bc7f9a6c15, 0x326e9413d635e7f1, 0xeaff2028e5942992, 0x1a55b07e905d6162, 0x882060860a9970d1, 0xe2b0cd223cc898af, 0x56a1f7c0137c29be }, + { 0x4e6e5462c344d15a, 0xb7fb298868e7b346, 0x33741921c3e95374, 0xacb5e26b0e8d2b0b, 0x59f16751b3b69ec8, 0xa659593ea405b0b7, 0x98408efc8cb1a951, 0x8dbbcf819b3df0fc }, + { 0x8d0aa21b9aec6c6a, 0x2b3534b940a84fb6, 0x2a1230d58e638c51, 0xc9daefb8e02f3383, 0xc709f5a9e5878201, 0x6f42d5dc6a746c8d, 0x3fb7df9057ada0b0, 0xaa6d0139a591f1c1 }, + { 0xb3a97a7336702199, 0x51bd05f743668d8a, 0xc50f8f941f5351f3, 0xbdd89dee5fa35fe3, 0x9c4e220a589d4cbb, 0xed49fc69200e2ed8, 0x38354437945f7d36, 0x0904ddf5a8b68f2b }, + { 0x1afa89fcc0636790, 0xda9d9eecd88892e6, 0xfec3d6bfe830769a, 0xafae622e5dc303d7, 0x7f7a31a7805db3f0, 0x916752f22230f876, 0x7b33cb8f67df8fca, 0xd205cb3c39e54fd7 }, + { 0x648e61636c99ce88, 0x8533e43ee0c8a504, 0xbb9189e6eee32a4e, 0x6edbda389dc2f3bf, 0xdf6ddca6e9daa1d6, 0xd3962f27af34ce52, 0xe1e63f4c628c9c15, 0xd5ad89fc0b5c693d }, + { 0x0646bda91e280a3e, 0x3a6f57000155ec3e, 0x579182cf68a16a50, 0x382fa3cafc78b976, 0x45ca8299c7305fb5, 0x778479d865838e62, 0x2a119981c6495ae7, 0xdbf255760f5a7b1d }, + { 0xeb1ab39e4073b2f0, 0x22216718aefb32e4, 0xf9926a2b4248c862, 0x838bd14eb5ba6c3f, 0xa33f1ec5ff1cb214, 0xdb6aef763e43ff19, 0xa17f903ce0f5f90e, 0x03bf0065a0ecf9fc } +}; + + +#define GOST_FS(_state, _return_state) \ +({ \ + unsigned char *state=(unsigned char *)_state; \ + sph_u64 *return_state=(sph_u64 *)_return_state; \ + sph_u64 r = 0; \ + r ^= T0S[state[56]]; \ + r ^= T1S[state[48]]; \ + r ^= T2S[state[40]]; \ + r ^= T3S[state[32]]; \ + r ^= T4S[state[24]]; \ + r ^= T5S[state[16]]; \ + r ^= T6S[state[8]]; \ + r ^= T7S[state[0]]; \ + *return_state = r; \ + r = 0; \ +\ + r ^= T0S[state[57]]; \ + r ^= T1S[state[49]]; \ + r ^= T2S[state[41]]; \ + r ^= T3S[state[33]]; \ + r ^= T4S[state[25]]; \ + r ^= T5S[state[17]]; \ + r ^= T6S[state[9]]; \ + r ^= T7S[state[1]]; \ + *(return_state+1) = r; \ + r = 0; \ +\ + r ^= T0S[state[58]]; \ + r ^= T1S[state[50]]; \ + r ^= T2S[state[42]]; \ + r ^= T3S[state[34]]; \ + r ^= T4S[state[26]]; \ + r ^= T5S[state[18]]; \ + r ^= T6S[state[10]]; \ + r ^= T7S[state[2]]; \ + *(return_state+2) = r; \ + r = 0; \ +\ + r ^= T0S[state[59]]; \ + r ^= T1S[state[51]]; \ + r ^= T2S[state[43]]; \ + r ^= T3S[state[35]]; \ + r ^= T4S[state[27]]; \ + r ^= T5S[state[19]]; \ + r ^= T6S[state[11]]; \ + r ^= T7S[state[3]]; \ + *(return_state+3) = r; \ + r = 0; \ +\ + r ^= T0S[state[60]]; \ + r ^= T1S[state[52]]; \ + r ^= T2S[state[44]]; \ + r ^= T3S[state[36]]; \ + r ^= T4S[state[28]]; \ + r ^= T5S[state[20]]; \ + r ^= T6S[state[12]]; \ + r ^= T7S[state[4]]; \ + *(return_state+4) = r; \ + r = 0; \ +\ + r ^= T0S[state[61]]; \ + r ^= T1S[state[53]]; \ + r ^= T2S[state[45]]; \ + r ^= T3S[state[37]]; \ + r ^= T4S[state[29]]; \ + r ^= T5S[state[21]]; \ + r ^= T6S[state[13]]; \ + r ^= T7S[state[5]]; \ + *(return_state+5) = r; \ + r = 0; \ +\ + r ^= T0S[state[62]]; \ + r ^= T1S[state[54]]; \ + r ^= T2S[state[46]]; \ + r ^= T3S[state[38]]; \ + r ^= T4S[state[30]]; \ + r ^= T5S[state[22]]; \ + r ^= T6S[state[14]]; \ + r ^= T7S[state[6]]; \ + *(return_state+6) = r; \ + r = 0; \ +\ + r ^= T0S[state[63]]; \ + r ^= T1S[state[55]]; \ + r ^= T2S[state[47]]; \ + r ^= T3S[state[39]]; \ + r ^= T4S[state[31]]; \ + r ^= T5S[state[23]]; \ + r ^= T6S[state[15]]; \ + r ^= T7S[state[7]]; \ + *(return_state+7) = r; \ +}) + +inline void GOST_Copy512(sph_u64* dst, const sph_u64* src) +{ + _Pragma("unroll") for (int i=0; i<8; i++) + dst[i] = src[i]; +} + + +#define GOST_F(state) \ +({ \ + sph_u64 t[8]; \ + GOST_FS(state, t); \ + GOST_Copy512(state, t); \ +}) + +inline void GOST_Xor512(sph_u64* C, const sph_u64* A, const sph_u64* B) +{ + C[0] = A[0] ^ B[0]; + C[1] = A[1] ^ B[1]; + C[2] = A[2] ^ B[2]; + C[3] = A[3] ^ B[3]; + C[4] = A[4] ^ B[4]; + C[5] = A[5] ^ B[5]; + C[6] = A[6] ^ B[6]; + C[7] = A[7] ^ B[7]; +} + +inline void GOST_Xor512_constant(sph_u64* C, const sph_u64* A, __constant const sph_u64* B) +{ + C[0] = A[0] ^ B[0]; + C[1] = A[1] ^ B[1]; + C[2] = A[2] ^ B[2]; + C[3] = A[3] ^ B[3]; + C[4] = A[4] ^ B[4]; + C[5] = A[5] ^ B[5]; + C[6] = A[6] ^ B[6]; + C[7] = A[7] ^ B[7]; +} + +inline void GOST_Xor512_3(sph_u64* C, const sph_u64* A, const sph_u64* B) +{ + C[0] ^= A[0] ^ B[0]; + C[1] ^= A[1] ^ B[1]; + C[2] ^= A[2] ^ B[2]; + C[3] ^= A[3] ^ B[3]; + C[4] ^= A[4] ^ B[4]; + C[5] ^= A[5] ^ B[5]; + C[6] ^= A[6] ^ B[6]; + C[7] ^= A[7] ^ B[7]; +} + +inline void GOST_Xor512_c(sph_u64* C, const sph_u64* A, const sph_u64* B, sph_u64 c) +{ + C[0] = A[0] ^ B[0] ^ c; + C[1] = A[1] ^ B[1] ^ c; + C[2] = A[2] ^ B[2] ^ c; + C[3] = A[3] ^ B[3] ^ c; + C[4] = A[4] ^ B[4] ^ c; + C[5] = A[5] ^ B[5] ^ c; + C[6] = A[6] ^ B[6] ^ c; + C[7] = A[7] ^ B[7] ^ c; +} + +#define GOST_E12(K, state) \ +({ \ + sph_u64 state1[8], K1[8]; \ + GOST_Xor512_constant(state1, K, CC[0]); \ + GOST_FS(state1, K1); \ + GOST_FS(state, state1); \ + GOST_Xor512(state, state1, K1); \ + _Pragma("unroll") for(int i=1; i<12; i++) \ + { \ + GOST_Xor512_constant(state1, K1, CC[i]); \ + GOST_FS(state1, K1); \ + GOST_FS(state, state1); \ + GOST_Xor512(state, state1, K1); \ + } \ +}) + +#define GOST_E(K, m, state /* out only */) \ +({ \ + GOST_Xor512(state, m, K); /* state = m ^ K */ \ + GOST_E12(K, state); \ +}) + +#define GOST_E_F0(m, state /* out only */) \ +({ \ + GOST_Xor512_constant(state, m, F0); /* state = m ^ F0 */ \ + sph_u64 state1[8]; \ + _Pragma("unroll") for(int i=0; i<12; i++) \ + { \ + GOST_FS(state, state1); \ + GOST_Xor512_constant(state, state1, CC_F0[i]); \ + } \ +}) + +#define GOST_E_F1(m, state /* out only */) \ +({ \ + GOST_Xor512_constant(state, m, F1); /* state = m ^ F0 */ \ + sph_u64 state1[8]; \ + _Pragma("unroll") for(int i=0; i<12; i++) \ + { \ + GOST_FS(state, state1); \ + GOST_Xor512_constant(state, state1, CC_F1[i]); \ + } \ +}) + +#define GOST_g_N(h, M, N) \ +({ \ + sph_u64 K[8]; \ + GOST_Xor512(K, N, h); /* K = N ^ h */ \ + GOST_F(K); \ + sph_u64 t[8]; \ + GOST_E(K, M, t); \ + GOST_Xor512_3(h, t, M); /* h = h ^ t ^ M */ \ +}) + +#define GOST_g_0(h, M) \ +({ \ + sph_u64 K[8]; \ + GOST_FS(h, K); \ + sph_u64 t[8]; \ + GOST_E(K, M, t); \ + GOST_Xor512_3(h, t, M); /* h = h ^ t ^ M */ \ +}) + +#define GOST_g_0_0(h, M) /* input h assumed zero, for iv 512 */ \ +({ \ + GOST_E_F0 (M, h); \ + GOST_Xor512 (h, h, M); /* h = h ^ M */ \ +}) + +#define GOST_g_0_1(h, M) /* input h assumed all bytes one, for iv 256 */ \ +({ \ + GOST_E_F1 (M, h); \ + GOST_Xor512_c(h, h, M, 0x0101010101010101); /* h = h ^ M ^ 1 */ \ +}) + + +inline void GOST_Add128(void *x, void * const a, void * const b) +{ + unsigned short t = 0; + _Pragma("unroll") for(int i = 15; i >= 0; i--) + { + t = ((unsigned char *)a)[i] + ((unsigned char *)b)[i] + (t >> 8); + ((unsigned char *)x)[i] = t & 0xFF; + } +} + +typedef struct { + uint data[20]; + uint target[8]; +} dev_blk_ctx; + +__kernel __attribute__((vec_type_hint(uint))) WGS void gostminer( + __constant dev_blk_ctx *ctx, __global uint *output) +{ + // move to shared memory + __local sph_u64 T0S[256]; + __local sph_u64 T1S[256]; + __local sph_u64 T2S[256]; + __local sph_u64 T3S[256]; + __local sph_u64 T4S[256]; + __local sph_u64 T5S[256]; + __local sph_u64 T6S[256]; + __local sph_u64 T7S[256]; + + int x = get_local_id(0); + T0S[x] = T0[x]; T1S[x] = T1[x]; T2S[x] = T2[x]; T3S[x] = T3[x]; + T4S[x] = T4[x]; T5S[x] = T5[x]; T6S[x] = T6[x]; T7S[x] = T7[x]; + barrier(CLK_LOCAL_MEM_FENCE); + + const uint myid = get_global_id(0); + const uint nonce = ctx->data[19] + myid; + sph_u64 N[8] = {0}; + // first hash (GOST 34.11-512 over 80 bytes) + sph_u64 block2[8]; + for (int i = 0; i < 15; i++) ((uint *)block2)[i] = ctx->data[i+4]; + ((uint *)block2)[15] = nonce; // change nonce + sph_u64 hash1[8]; + // second block + GOST_g_0_0 (hash1, block2); // zero iv for 512 assumed + N[7] = 0x0002000000000000; // 512 + // first block + sph_u64 block1[10] = { 0, 0, 0, 0, 0, 0, 0, 0x0100000000000000, 0, 0 }; + block1[8] = ((__constant sph_u64*)ctx->data)[0]; + block1[9] = ((__constant sph_u64*)ctx->data)[1]; + GOST_g_N(hash1, block1 + 2, N); + N[7] |= 0x8000000000000000; // +128 + GOST_g_0(hash1, N); + GOST_Add128(block2 + 6, block2 + 6, block1 + 8); + block2[5] += 0x0100000000000000; + GOST_g_0(hash1, block2); + // second hash (GOST 34.11-256 over 64 bytes) + sph_u64 hash[8]; + // second block + GOST_g_0_1(hash, hash1); // // iv for 256 assumed (all bytes one) + N[7] = 0x0002000000000000; // 512 + // first block + GOST_g_N(hash, block1, N); + GOST_g_0(hash, N); + hash1[7] += 0x0100000000000000; + GOST_g_0(hash, hash1); + // result is first 32 bytes of hash + output[myid] = (SWAP4 (LODWORD (hash[0])) <= ctx->target[7] && SWAP4 (HIDWORD (hash[0])) <= ctx->target[6]) ? nonce : 0; +} diff --git a/miner.c b/miner.c index 6bf777f..b14eb81 100644 --- a/miner.c +++ b/miner.c @@ -16,8 +16,14 @@ #include #include #include + +#ifdef _WIN32 +#include +#else #include #include +#endif + #include #include #include @@ -25,13 +31,13 @@ #include #include "miner.h" -#include "findnonce.h" #include "ocl.h" +#include "streebog.h" #define VERSION "0.1" -#define PROGRAM_NAME "oclminer" -#define DEF_RPC_URL "http://127.0.0.1:8332/" +#define PROGRAM_NAME "gostoclminer" +#define DEF_RPC_URL "http://127.0.0.1:9376/" #define DEF_RPC_USERPASS "rpcuser:rpcpass" enum { @@ -126,12 +132,6 @@ static bool jobj_binary(const json_t *obj, const char *key, static bool work_decode(const json_t *val, struct work_t *work) { - if (!jobj_binary(val, "midstate", - work->midstate, sizeof(work->midstate))) { - fprintf(stderr, "JSON inval midstate\n"); - goto err_out; - } - if (!jobj_binary(val, "data", work->data, sizeof(work->data))) { fprintf(stderr, "JSON inval data\n"); goto err_out; @@ -185,8 +185,9 @@ static void submit_work(struct work_t *work) res = json_object_get(val, "result"); - printf("PROOF OF WORK RESULT: %s\n", - json_is_true(res) ? "true (yay!!!)" : "false (booooo)"); + printf("PROOF OF WORK RESULT: %s\n", json_is_true(res) ? "true (yay!!!)" : "false (booooo)"); + if (!json_is_true(res)) + printf ("REASON: %s\n", json_string_value(json_object_get(val, "reject-reason"))); json_decref(val); @@ -320,9 +321,15 @@ static void *miner_thread(void *thr_id_int) continue; } - precalc_hash(&work[frame].blk, (uint32_t *)(work[frame].midstate), (uint32_t *)(work[frame].data + 64)); + memcpy (work[frame].blk.data, work[frame].data, 80); + int k; + for (k = 0; k < 19; k++) work[frame].blk.data[k] = swap32 (work[frame].blk.data[k]); + memcpy (work[frame].blk.target, work[frame].target, 32); + + /*work[frame].blk.target[6] = 0xFFFFFFFF; + work[frame].blk.target[7] = 0x000000FF;*/ - work[frame].blk.nonce = 0; + work[frame].blk.data[19] = 0; work[frame].valid = true; work[frame].ready = 0; @@ -332,9 +339,9 @@ static void *miner_thread(void *thr_id_int) gettimeofday(&tv_start, NULL); - int threads = 102400 * 4; + int threads = 65536; // TODO: globalThreads[0] = threads; - localThreads[0] = 128; + localThreads[0] = 256; status = clEnqueueWriteBuffer(clState->commandQueue, clState->inputBuffer, CL_TRUE, 0, sizeof(dev_blk_ctx), (void *)&work[frame].blk, 0, NULL, NULL); @@ -348,56 +355,46 @@ static void *miner_thread(void *thr_id_int) clFlush(clState->commandQueue); - hashes_done = 1024 * threads; + hashes_done = 1024 * threads; - if (work[res_frame].ready) { - rc = false; - - uint32_t bestG = ~0; - uint32_t nonce; + if (work[res_frame].ready) + { int j; - for(j = 0; j < work[res_frame].ready; j++) { - if(res[j]) { - uint32_t start = (work[res_frame].res_nonce + j)<<10; - uint32_t my_g, my_nonce; - my_g = postcalc_hash(&work[res_frame].blk, &work[res_frame], start, start + 1026, &my_nonce, opt_pool, &h0count); - - if (!opt_pool) { - if (opt_debug) - fprintf(stderr, "DEBUG: H0 within %u .. %u, best G = %08x, nonce = %08x\n", start, start + 1024, my_g, my_nonce); - - if(my_g < bestG) { - bestG = my_g; - nonce = my_nonce; - if (opt_debug) - fprintf(stderr, "new best\n"); - } + for(j = 0; j < work[res_frame].ready; j++) + { + if(res[j]) + { + uint32_t hash[8]; + work[frame].blk.data[19] = res[j]; + gostd_hash (hash, work[frame].blk.data); + work[frame].blk.data[19] = 0; + int k; + for (k = 0; k < 8; k++) printf ("%08x ", hash[k]); + printf ("\n"); + if (swap32 (hash[0]) <= work[frame].blk.target[7]) + { + uint32_t *target1 = (uint32_t *)(work[res_frame].target + 24); + uint32_t *target2 = (uint32_t *)(work[res_frame].target + 28); + printf("Found solution for %08x %08x: %08x\n", *target1, *target2, res[j]); + submit_nonce(&work[res_frame], swap32 (res[j])); + block++; + h0count++; + need_work = true; + break; } - - rc = true; + else + printf ("result for %08x does not validate on CPU!", res[j]); } } work[res_frame].ready = false; - - uint32_t *target = (uint32_t *)(work[res_frame].target + 24); - - if(!opt_pool && rc && bestG <= *target) { - printf("Found solution for %08x: %08x %u\n", *target, bestG, nonce); - - submit_nonce(&work[res_frame], nonce); - - block++; - - need_work = true; - } - } - if (h0count != 0) { + if (h0count != 0) + { double secs; secs = time2secs(&tv_start0); - hashrates[thr_id] = h0count * pow(2, 256) / (secs * 1e6 * (pow(2, 224) - 1.0)); + hashrates[thr_id] = h0count * pow (2, 256) / (secs * 1e6 * (pow (2, 224) - 1.0)); } status = clEnqueueReadBuffer(clState->commandQueue, clState->outputBuffer, CL_TRUE, 0, @@ -406,11 +403,11 @@ static void *miner_thread(void *thr_id_int) res_frame = frame; work[res_frame].ready = threads; - work[res_frame].res_nonce = work[res_frame].blk.nonce; + work[res_frame].res_nonce = work[res_frame].blk.data[19]; - work[frame].blk.nonce += threads; + work[frame].blk.data[19] += threads; - if (work[frame].blk.nonce > 4000000 - threads) + if (work[frame].blk.data[19] > 4000000 - threads) need_work = true; failures = 0; @@ -423,8 +420,8 @@ static void show_usage(void) { int i; - printf("oclminer version %s\n\n", VERSION); - printf("Usage:\tminerd [options]\n\nSupported options:\n"); + printf("gostoclminer version %s\n\n", VERSION); + printf("Usage:\tgostoclminer [options]\n\nSupported options:\n"); for (i = 0; i < ARRAY_SIZE(options_help); i++) { struct option_help *h; diff --git a/miner.h b/miner.h index 636ca29..f2c0825 100644 --- a/miner.h +++ b/miner.h @@ -1,5 +1,6 @@ #ifndef __MINER_H__ #define __MINER_H__ +#include #include "ocl.h" #ifndef ARRAY_SIZE @@ -14,5 +15,6 @@ extern char *bin2hex(unsigned char *p, size_t len); extern int hex2bin(unsigned char *p, const char *hexstr, size_t len); extern int timeval_subtract (struct timeval *result, struct timeval *x, struct timeval *y); +extern uint32_t swap32(uint32_t value); #endif /* __MINER_H__ */ diff --git a/ocl.c b/ocl.c index 1a4e51c..41b9021 100644 --- a/ocl.c +++ b/ocl.c @@ -3,14 +3,19 @@ #include #include #include + +#ifdef _WIN32 +#include +#else #include #include -#include +#include +#endif + #include #include #include -#include "findnonce.h" #include "ocl.h" char *file_contents(const char *filename, int *length) @@ -210,7 +215,7 @@ _clState *initCl(int gpu, char *name, size_t nameSize) { // Load CL file, build CL program object, create CL kernel object ///////////////////////////////////////////////////////////////// // - const char * filename = "oclminer.cl"; + const char * filename = "gostminer.cl"; int pl; char *source = file_contents(filename, &pl); size_t sourceSize[] = {(size_t)pl}; @@ -223,7 +228,7 @@ _clState *initCl(int gpu, char *name, size_t nameSize) { } /* create a cl program executable for all the devices specified */ - status = clBuildProgram(clState->program, 1, &devices[gpu], NULL, NULL, NULL); + status = clBuildProgram(clState->program, 1, &devices[gpu], "", NULL, NULL); if(status != CL_SUCCESS) { printf("Error: Building Program (clBuildProgram)\n"); @@ -237,7 +242,7 @@ _clState *initCl(int gpu, char *name, size_t nameSize) { } /* get a kernel object handle for a kernel with the given name */ - clState->kernel = clCreateKernel(clState->program, "oclminer", &status); + clState->kernel = clCreateKernel(clState->program, "gostminer", &status); if(status != CL_SUCCESS) { printf("Error: Creating Kernel from program. (clCreateKernel)\n"); @@ -254,16 +259,16 @@ _clState *initCl(int gpu, char *name, size_t nameSize) { return NULL; } - clState->inputBuffer = clCreateBuffer(clState->context, CL_MEM_READ_WRITE, sizeof(dev_blk_ctx), NULL, &status); + clState->inputBuffer = clCreateBuffer(clState->context, CL_MEM_READ_WRITE, sizeof (dev_blk_ctx), NULL, &status); if(status != CL_SUCCESS) { printf("Error: clCreateBuffer (inputBuffer)\n"); - return; + return NULL; } clState->outputBuffer = clCreateBuffer(clState->context, CL_MEM_READ_WRITE, sizeof(uint32_t) * MAXTHREADS, NULL, &status); if(status != CL_SUCCESS) { printf("Error: clCreateBuffer (outputBuffer)\n"); - return; + return NULL; } return clState; diff --git a/ocl.h b/ocl.h index 68c1670..63445ba 100644 --- a/ocl.h +++ b/ocl.h @@ -6,6 +6,8 @@ #include #endif +#define MAXTHREADS 256000 + typedef struct { cl_context context; cl_kernel kernel; @@ -15,6 +17,26 @@ typedef struct { cl_mem outputBuffer; } _clState; +typedef struct { + uint32_t data[20]; + uint32_t target[8]; +} dev_blk_ctx; + + +struct work_t { + unsigned char data[128]; + unsigned char hash1[64]; + unsigned char midstate[32]; + unsigned char target[32]; + + unsigned char hash[32]; + uint32_t output[MAXTHREADS]; + uint32_t res_nonce; + uint32_t valid; + uint32_t ready; + dev_blk_ctx blk; +}; + extern char *file_contents(const char *filename, int *length); extern int clDevicesNum(); extern _clState *initCl(int gpu, char *name, size_t nameSize); diff --git a/sph_types.h b/sph_types.h new file mode 100644 index 0000000..7295b0b --- /dev/null +++ b/sph_types.h @@ -0,0 +1,1976 @@ +/* $Id: sph_types.h 260 2011-07-21 01:02:38Z tp $ */ +/** + * Basic type definitions. + * + * This header file defines the generic integer types that will be used + * for the implementation of hash functions; it also contains helper + * functions which encode and decode multi-byte integer values, using + * either little-endian or big-endian conventions. + * + * This file contains a compile-time test on the size of a byte + * (the unsigned char C type). If bytes are not octets, + * i.e. if they do not have a size of exactly 8 bits, then compilation + * is aborted. Architectures where bytes are not octets are relatively + * rare, even in the embedded devices market. We forbid non-octet bytes + * because there is no clear convention on how octet streams are encoded + * on such systems. + * + * ==========================(LICENSE BEGIN)============================ + * + * Copyright (c) 2007-2010 Projet RNRT SAPHIR + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * ===========================(LICENSE END)============================= + * + * @file sph_types.h + * @author Thomas Pornin + */ + +#ifndef SPH_TYPES_H__ +#define SPH_TYPES_H__ + +#include + +/* + * All our I/O functions are defined over octet streams. We do not know + * how to handle input data if bytes are not octets. + */ +#if CHAR_BIT != 8 +#error This code requires 8-bit bytes +#endif + +/* ============= BEGIN documentation block for Doxygen ============ */ + +#ifdef DOXYGEN_IGNORE + +/** @mainpage sphlib C code documentation + * + * @section overview Overview + * + * sphlib is a library which contains implementations of + * various cryptographic hash functions. These pages have been generated + * with doxygen and + * document the API for the C implementations. + * + * The API is described in appropriate header files, which are available + * in the "Files" section. Each hash function family has its own header, + * whose name begins with "sph_" and contains the family + * name. For instance, the API for the RIPEMD hash functions is available + * in the header file sph_ripemd.h. + * + * @section principles API structure and conventions + * + * @subsection io Input/output conventions + * + * In all generality, hash functions operate over strings of bits. + * Individual bits are rarely encountered in C programming or actual + * communication protocols; most protocols converge on the ubiquitous + * "octet" which is a group of eight bits. Data is thus expressed as a + * stream of octets. The C programming language contains the notion of a + * "byte", which is a data unit managed under the type "unsigned + * char". The C standard prescribes that a byte should hold at + * least eight bits, but possibly more. Most modern architectures, even + * in the embedded world, feature eight-bit bytes, i.e. map bytes to + * octets. + * + * Nevertheless, for some of the implemented hash functions, an extra + * API has been added, which allows the input of arbitrary sequences of + * bits: when the computation is about to be closed, 1 to 7 extra bits + * can be added. The functions for which this API is implemented include + * the SHA-2 functions and all SHA-3 candidates. + * + * sphlib defines hash function which may hash octet streams, + * i.e. streams of bits where the number of bits is a multiple of eight. + * The data input functions in the sphlib API expect data + * as anonymous pointers ("const void *") with a length + * (of type "size_t") which gives the input data chunk length + * in bytes. A byte is assumed to be an octet; the sph_types.h + * header contains a compile-time test which prevents compilation on + * architectures where this property is not met. + * + * The hash function output is also converted into bytes. All currently + * implemented hash functions have an output width which is a multiple of + * eight, and this is likely to remain true for new designs. + * + * Most hash functions internally convert input data into 32-bit of 64-bit + * words, using either little-endian or big-endian conversion. The hash + * output also often consists of such words, which are encoded into output + * bytes with a similar endianness convention. Some hash functions have + * been only loosely specified on that subject; when necessary, + * sphlib has been tested against published "reference" + * implementations in order to use the same conventions. + * + * @subsection shortname Function short name + * + * Each implemented hash function has a "short name" which is used + * internally to derive the identifiers for the functions and context + * structures which the function uses. For instance, MD5 has the short + * name "md5". Short names are listed in the next section, + * for the implemented hash functions. In subsequent sections, the + * short name will be assumed to be "XXX": replace with the + * actual hash function name to get the C identifier. + * + * Note: some functions within the same family share the same core + * elements, such as update function or context structure. Correspondingly, + * some of the defined types or functions may actually be macros which + * transparently evaluate to another type or function name. + * + * @subsection context Context structure + * + * Each implemented hash fonction has its own context structure, available + * under the type name "sph_XXX_context" for the hash function + * with short name "XXX". This structure holds all needed + * state for a running hash computation. + * + * The contents of these structures are meant to be opaque, and private + * to the implementation. However, these contents are specified in the + * header files so that application code which uses sphlib + * may access the size of those structures. + * + * The caller is responsible for allocating the context structure, + * whether by dynamic allocation (malloc() or equivalent), + * static allocation (a global permanent variable), as an automatic + * variable ("on the stack"), or by any other mean which ensures proper + * structure alignment. sphlib code performs no dynamic + * allocation by itself. + * + * The context must be initialized before use, using the + * sph_XXX_init() function. This function sets the context + * state to proper initial values for hashing. + * + * Since all state data is contained within the context structure, + * sphlib is thread-safe and reentrant: several hash + * computations may be performed in parallel, provided that they do not + * operate on the same context. Moreover, a running computation can be + * cloned by copying the context (with a simple memcpy()): + * the context and its clone are then independant and may be updated + * with new data and/or closed without interfering with each other. + * Similarly, a context structure can be moved in memory at will: + * context structures contain no pointer, in particular no pointer to + * themselves. + * + * @subsection dataio Data input + * + * Hashed data is input with the sph_XXX() fonction, which + * takes as parameters a pointer to the context, a pointer to the data + * to hash, and the number of data bytes to hash. The context is updated + * with the new data. + * + * Data can be input in one or several calls, with arbitrary input lengths. + * However, it is best, performance wise, to input data by relatively big + * chunks (say a few kilobytes), because this allows sphlib to + * optimize things and avoid internal copying. + * + * When all data has been input, the context can be closed with + * sph_XXX_close(). The hash output is computed and written + * into the provided buffer. The caller must take care to provide a + * buffer of appropriate length; e.g., when using SHA-1, the output is + * a 20-byte word, therefore the output buffer must be at least 20-byte + * long. + * + * For some hash functions, the sph_XXX_addbits_and_close() + * function can be used instead of sph_XXX_close(). This + * function can take a few extra bits to be added at + * the end of the input message. This allows hashing messages with a + * bit length which is not a multiple of 8. The extra bits are provided + * as an unsigned integer value, and a bit count. The bit count must be + * between 0 and 7, inclusive. The extra bits are provided as bits 7 to + * 0 (bits of numerical value 128, 64, 32... downto 0), in that order. + * For instance, to add three bits of value 1, 1 and 0, the unsigned + * integer will have value 192 (1*128 + 1*64 + 0*32) and the bit count + * will be 3. + * + * The SPH_SIZE_XXX macro is defined for each hash function; + * it evaluates to the function output size, expressed in bits. For instance, + * SPH_SIZE_sha1 evaluates to 160. + * + * When closed, the context is automatically reinitialized and can be + * immediately used for another computation. It is not necessary to call + * sph_XXX_init() after a close. Note that + * sph_XXX_init() can still be called to "reset" a context, + * i.e. forget previously input data, and get back to the initial state. + * + * @subsection alignment Data alignment + * + * "Alignment" is a property of data, which is said to be "properly + * aligned" when its emplacement in memory is such that the data can + * be optimally read by full words. This depends on the type of access; + * basically, some hash functions will read data by 32-bit or 64-bit + * words. sphlib does not mandate such alignment for input + * data, but using aligned data can substantially improve performance. + * + * As a rule, it is best to input data by chunks whose length (in bytes) + * is a multiple of eight, and which begins at "generally aligned" + * addresses, such as the base address returned by a call to + * malloc(). + * + * @section functions Implemented functions + * + * We give here the list of implemented functions. They are grouped by + * family; to each family corresponds a specific header file. Each + * individual function has its associated "short name". Please refer to + * the documentation for that header file to get details on the hash + * function denomination and provenance. + * + * Note: the functions marked with a '(64)' in the list below are + * available only if the C compiler provides an integer type of length + * 64 bits or more. Such a type is mandatory in the latest C standard + * (ISO 9899:1999, aka "C99") and is present in several older compilers + * as well, so chances are that such a type is available. + * + * - HAVAL family: file sph_haval.h + * - HAVAL-128/3 (128-bit, 3 passes): short name: haval128_3 + * - HAVAL-128/4 (128-bit, 4 passes): short name: haval128_4 + * - HAVAL-128/5 (128-bit, 5 passes): short name: haval128_5 + * - HAVAL-160/3 (160-bit, 3 passes): short name: haval160_3 + * - HAVAL-160/4 (160-bit, 4 passes): short name: haval160_4 + * - HAVAL-160/5 (160-bit, 5 passes): short name: haval160_5 + * - HAVAL-192/3 (192-bit, 3 passes): short name: haval192_3 + * - HAVAL-192/4 (192-bit, 4 passes): short name: haval192_4 + * - HAVAL-192/5 (192-bit, 5 passes): short name: haval192_5 + * - HAVAL-224/3 (224-bit, 3 passes): short name: haval224_3 + * - HAVAL-224/4 (224-bit, 4 passes): short name: haval224_4 + * - HAVAL-224/5 (224-bit, 5 passes): short name: haval224_5 + * - HAVAL-256/3 (256-bit, 3 passes): short name: haval256_3 + * - HAVAL-256/4 (256-bit, 4 passes): short name: haval256_4 + * - HAVAL-256/5 (256-bit, 5 passes): short name: haval256_5 + * - MD2: file sph_md2.h, short name: md2 + * - MD4: file sph_md4.h, short name: md4 + * - MD5: file sph_md5.h, short name: md5 + * - PANAMA: file sph_panama.h, short name: panama + * - RadioGatun family: file sph_radiogatun.h + * - RadioGatun[32]: short name: radiogatun32 + * - RadioGatun[64]: short name: radiogatun64 (64) + * - RIPEMD family: file sph_ripemd.h + * - RIPEMD: short name: ripemd + * - RIPEMD-128: short name: ripemd128 + * - RIPEMD-160: short name: ripemd160 + * - SHA-0: file sph_sha0.h, short name: sha0 + * - SHA-1: file sph_sha1.h, short name: sha1 + * - SHA-2 family, 32-bit hashes: file sph_sha2.h + * - SHA-224: short name: sha224 + * - SHA-256: short name: sha256 + * - SHA-384: short name: sha384 (64) + * - SHA-512: short name: sha512 (64) + * - Tiger family: file sph_tiger.h + * - Tiger: short name: tiger (64) + * - Tiger2: short name: tiger2 (64) + * - WHIRLPOOL family: file sph_whirlpool.h + * - WHIRLPOOL-0: short name: whirlpool0 (64) + * - WHIRLPOOL-1: short name: whirlpool1 (64) + * - WHIRLPOOL: short name: whirlpool (64) + * + * The fourteen second-round SHA-3 candidates are also implemented; + * when applicable, the implementations follow the "final" specifications + * as published for the third round of the SHA-3 competition (BLAKE, + * Groestl, JH, Keccak and Skein have been tweaked for third round). + * + * - BLAKE family: file sph_blake.h + * - BLAKE-224: short name: blake224 + * - BLAKE-256: short name: blake256 + * - BLAKE-384: short name: blake384 + * - BLAKE-512: short name: blake512 + * - BMW (Blue Midnight Wish) family: file sph_bmw.h + * - BMW-224: short name: bmw224 + * - BMW-256: short name: bmw256 + * - BMW-384: short name: bmw384 (64) + * - BMW-512: short name: bmw512 (64) + * - CubeHash family: file sph_cubehash.h (specified as + * CubeHash16/32 in the CubeHash specification) + * - CubeHash-224: short name: cubehash224 + * - CubeHash-256: short name: cubehash256 + * - CubeHash-384: short name: cubehash384 + * - CubeHash-512: short name: cubehash512 + * - ECHO family: file sph_echo.h + * - ECHO-224: short name: echo224 + * - ECHO-256: short name: echo256 + * - ECHO-384: short name: echo384 + * - ECHO-512: short name: echo512 + * - Fugue family: file sph_fugue.h + * - Fugue-224: short name: fugue224 + * - Fugue-256: short name: fugue256 + * - Fugue-384: short name: fugue384 + * - Fugue-512: short name: fugue512 + * - Groestl family: file sph_groestl.h + * - Groestl-224: short name: groestl224 + * - Groestl-256: short name: groestl256 + * - Groestl-384: short name: groestl384 + * - Groestl-512: short name: groestl512 + * - Hamsi family: file sph_hamsi.h + * - Hamsi-224: short name: hamsi224 + * - Hamsi-256: short name: hamsi256 + * - Hamsi-384: short name: hamsi384 + * - Hamsi-512: short name: hamsi512 + * - JH family: file sph_jh.h + * - JH-224: short name: jh224 + * - JH-256: short name: jh256 + * - JH-384: short name: jh384 + * - JH-512: short name: jh512 + * - Keccak family: file sph_keccak.h + * - Keccak-224: short name: keccak224 + * - Keccak-256: short name: keccak256 + * - Keccak-384: short name: keccak384 + * - Keccak-512: short name: keccak512 + * - Luffa family: file sph_luffa.h + * - Luffa-224: short name: luffa224 + * - Luffa-256: short name: luffa256 + * - Luffa-384: short name: luffa384 + * - Luffa-512: short name: luffa512 + * - Shabal family: file sph_shabal.h + * - Shabal-192: short name: shabal192 + * - Shabal-224: short name: shabal224 + * - Shabal-256: short name: shabal256 + * - Shabal-384: short name: shabal384 + * - Shabal-512: short name: shabal512 + * - SHAvite-3 family: file sph_shavite.h + * - SHAvite-224 (nominally "SHAvite-3 with 224-bit output"): + * short name: shabal224 + * - SHAvite-256 (nominally "SHAvite-3 with 256-bit output"): + * short name: shabal256 + * - SHAvite-384 (nominally "SHAvite-3 with 384-bit output"): + * short name: shabal384 + * - SHAvite-512 (nominally "SHAvite-3 with 512-bit output"): + * short name: shabal512 + * - SIMD family: file sph_simd.h + * - SIMD-224: short name: simd224 + * - SIMD-256: short name: simd256 + * - SIMD-384: short name: simd384 + * - SIMD-512: short name: simd512 + * - Skein family: file sph_skein.h + * - Skein-224 (nominally specified as Skein-512-224): short name: + * skein224 (64) + * - Skein-256 (nominally specified as Skein-512-256): short name: + * skein256 (64) + * - Skein-384 (nominally specified as Skein-512-384): short name: + * skein384 (64) + * - Skein-512 (nominally specified as Skein-512-512): short name: + * skein512 (64) + * + * For the second-round SHA-3 candidates, the functions are as specified + * for round 2, i.e. with the "tweaks" that some candidates added + * between round 1 and round 2. Also, some of the submitted packages for + * round 2 contained errors, in the specification, reference code, or + * both. sphlib implements the corrected versions. + */ + +/** @hideinitializer + * Unsigned integer type whose length is at least 32 bits; on most + * architectures, it will have a width of exactly 32 bits. Unsigned C + * types implement arithmetics modulo a power of 2; use the + * SPH_T32() macro to ensure that the value is truncated + * to exactly 32 bits. Unless otherwise specified, all macros and + * functions which accept sph_u32 values assume that these + * values fit on 32 bits, i.e. do not exceed 2^32-1, even on architectures + * where sph_u32 is larger than that. + */ +typedef __arch_dependant__ sph_u32; + +/** @hideinitializer + * Signed integer type corresponding to sph_u32; it has + * width 32 bits or more. + */ +typedef __arch_dependant__ sph_s32; + +/** @hideinitializer + * Unsigned integer type whose length is at least 64 bits; on most + * architectures which feature such a type, it will have a width of + * exactly 64 bits. C99-compliant platform will have this type; it + * is also defined when the GNU compiler (gcc) is used, and on + * platforms where unsigned long is large enough. If this + * type is not available, then some hash functions which depends on + * a 64-bit type will not be available (most notably SHA-384, SHA-512, + * Tiger and WHIRLPOOL). + */ +typedef __arch_dependant__ sph_u64; + +/** @hideinitializer + * Signed integer type corresponding to sph_u64; it has + * width 64 bits or more. + */ +typedef __arch_dependant__ sph_s64; + +/** + * This macro expands the token x into a suitable + * constant expression of type sph_u32. Depending on + * how this type is defined, a suffix such as UL may + * be appended to the argument. + * + * @param x the token to expand into a suitable constant expression + */ +#define SPH_C32(x) + +/** + * Truncate a 32-bit value to exactly 32 bits. On most systems, this is + * a no-op, recognized as such by the compiler. + * + * @param x the value to truncate (of type sph_u32) + */ +#define SPH_T32(x) + +/** + * Rotate a 32-bit value by a number of bits to the left. The rotate + * count must reside between 1 and 31. This macro assumes that its + * first argument fits in 32 bits (no extra bit allowed on machines where + * sph_u32 is wider); both arguments may be evaluated + * several times. + * + * @param x the value to rotate (of type sph_u32) + * @param n the rotation count (between 1 and 31, inclusive) + */ +#define SPH_ROTL32(x, n) + +/** + * Rotate a 32-bit value by a number of bits to the left. The rotate + * count must reside between 1 and 31. This macro assumes that its + * first argument fits in 32 bits (no extra bit allowed on machines where + * sph_u32 is wider); both arguments may be evaluated + * several times. + * + * @param x the value to rotate (of type sph_u32) + * @param n the rotation count (between 1 and 31, inclusive) + */ +#define SPH_ROTR32(x, n) + +/** + * This macro is defined on systems for which a 64-bit type has been + * detected, and is used for sph_u64. + */ +#define SPH_64 + +/** + * This macro is defined on systems for the "native" integer size is + * 64 bits (64-bit values fit in one register). + */ +#define SPH_64_TRUE + +/** + * This macro expands the token x into a suitable + * constant expression of type sph_u64. Depending on + * how this type is defined, a suffix such as ULL may + * be appended to the argument. This macro is defined only if a + * 64-bit type was detected and used for sph_u64. + * + * @param x the token to expand into a suitable constant expression + */ +#define SPH_C64(x) + +/** + * Truncate a 64-bit value to exactly 64 bits. On most systems, this is + * a no-op, recognized as such by the compiler. This macro is defined only + * if a 64-bit type was detected and used for sph_u64. + * + * @param x the value to truncate (of type sph_u64) + */ +#define SPH_T64(x) + +/** + * Rotate a 64-bit value by a number of bits to the left. The rotate + * count must reside between 1 and 63. This macro assumes that its + * first argument fits in 64 bits (no extra bit allowed on machines where + * sph_u64 is wider); both arguments may be evaluated + * several times. This macro is defined only if a 64-bit type was detected + * and used for sph_u64. + * + * @param x the value to rotate (of type sph_u64) + * @param n the rotation count (between 1 and 63, inclusive) + */ +#define SPH_ROTL64(x, n) + +/** + * Rotate a 64-bit value by a number of bits to the left. The rotate + * count must reside between 1 and 63. This macro assumes that its + * first argument fits in 64 bits (no extra bit allowed on machines where + * sph_u64 is wider); both arguments may be evaluated + * several times. This macro is defined only if a 64-bit type was detected + * and used for sph_u64. + * + * @param x the value to rotate (of type sph_u64) + * @param n the rotation count (between 1 and 63, inclusive) + */ +#define SPH_ROTR64(x, n) + +/** + * This macro evaluates to inline or an equivalent construction, + * if available on the compilation platform, or to nothing otherwise. This + * is used to declare inline functions, for which the compiler should + * endeavour to include the code directly in the caller. Inline functions + * are typically defined in header files as replacement for macros. + */ +#define SPH_INLINE + +/** + * This macro is defined if the platform has been detected as using + * little-endian convention. This implies that the sph_u32 + * type (and the sph_u64 type also, if it is defined) has + * an exact width (i.e. exactly 32-bit, respectively 64-bit). + */ +#define SPH_LITTLE_ENDIAN + +/** + * This macro is defined if the platform has been detected as using + * big-endian convention. This implies that the sph_u32 + * type (and the sph_u64 type also, if it is defined) has + * an exact width (i.e. exactly 32-bit, respectively 64-bit). + */ +#define SPH_BIG_ENDIAN + +/** + * This macro is defined if 32-bit words (and 64-bit words, if defined) + * can be read from and written to memory efficiently in little-endian + * convention. This is the case for little-endian platforms, and also + * for the big-endian platforms which have special little-endian access + * opcodes (e.g. Ultrasparc). + */ +#define SPH_LITTLE_FAST + +/** + * This macro is defined if 32-bit words (and 64-bit words, if defined) + * can be read from and written to memory efficiently in big-endian + * convention. This is the case for little-endian platforms, and also + * for the little-endian platforms which have special big-endian access + * opcodes. + */ +#define SPH_BIG_FAST + +/** + * On some platforms, this macro is defined to an unsigned integer type + * into which pointer values may be cast. The resulting value can then + * be tested for being a multiple of 2, 4 or 8, indicating an aligned + * pointer for, respectively, 16-bit, 32-bit or 64-bit memory accesses. + */ +#define SPH_UPTR + +/** + * When defined, this macro indicates that unaligned memory accesses + * are possible with only a minor penalty, and thus should be prefered + * over strategies which first copy data to an aligned buffer. + */ +#define SPH_UNALIGNED + +/** + * Byte-swap a 32-bit word (i.e. 0x12345678 becomes + * 0x78563412). This is an inline function which resorts + * to inline assembly on some platforms, for better performance. + * + * @param x the 32-bit value to byte-swap + * @return the byte-swapped value + */ +static inline sph_u32 sph_bswap32(sph_u32 x); + +/** + * Byte-swap a 64-bit word. This is an inline function which resorts + * to inline assembly on some platforms, for better performance. This + * function is defined only if a suitable 64-bit type was found for + * sph_u64 + * + * @param x the 64-bit value to byte-swap + * @return the byte-swapped value + */ +static inline sph_u64 sph_bswap64(sph_u64 x); + +/** + * Decode a 16-bit unsigned value from memory, in little-endian convention + * (least significant byte comes first). + * + * @param src the source address + * @return the decoded value + */ +static inline unsigned sph_dec16le(const void *src); + +/** + * Encode a 16-bit unsigned value into memory, in little-endian convention + * (least significant byte comes first). + * + * @param dst the destination buffer + * @param val the value to encode + */ +static inline void sph_enc16le(void *dst, unsigned val); + +/** + * Decode a 16-bit unsigned value from memory, in big-endian convention + * (most significant byte comes first). + * + * @param src the source address + * @return the decoded value + */ +static inline unsigned sph_dec16be(const void *src); + +/** + * Encode a 16-bit unsigned value into memory, in big-endian convention + * (most significant byte comes first). + * + * @param dst the destination buffer + * @param val the value to encode + */ +static inline void sph_enc16be(void *dst, unsigned val); + +/** + * Decode a 32-bit unsigned value from memory, in little-endian convention + * (least significant byte comes first). + * + * @param src the source address + * @return the decoded value + */ +static inline sph_u32 sph_dec32le(const void *src); + +/** + * Decode a 32-bit unsigned value from memory, in little-endian convention + * (least significant byte comes first). This function assumes that the + * source address is suitably aligned for a direct access, if the platform + * supports such things; it can thus be marginally faster than the generic + * sph_dec32le() function. + * + * @param src the source address + * @return the decoded value + */ +static inline sph_u32 sph_dec32le_aligned(const void *src); + +/** + * Encode a 32-bit unsigned value into memory, in little-endian convention + * (least significant byte comes first). + * + * @param dst the destination buffer + * @param val the value to encode + */ +static inline void sph_enc32le(void *dst, sph_u32 val); + +/** + * Encode a 32-bit unsigned value into memory, in little-endian convention + * (least significant byte comes first). This function assumes that the + * destination address is suitably aligned for a direct access, if the + * platform supports such things; it can thus be marginally faster than + * the generic sph_enc32le() function. + * + * @param dst the destination buffer + * @param val the value to encode + */ +static inline void sph_enc32le_aligned(void *dst, sph_u32 val); + +/** + * Decode a 32-bit unsigned value from memory, in big-endian convention + * (most significant byte comes first). + * + * @param src the source address + * @return the decoded value + */ +static inline sph_u32 sph_dec32be(const void *src); + +/** + * Decode a 32-bit unsigned value from memory, in big-endian convention + * (most significant byte comes first). This function assumes that the + * source address is suitably aligned for a direct access, if the platform + * supports such things; it can thus be marginally faster than the generic + * sph_dec32be() function. + * + * @param src the source address + * @return the decoded value + */ +static inline sph_u32 sph_dec32be_aligned(const void *src); + +/** + * Encode a 32-bit unsigned value into memory, in big-endian convention + * (most significant byte comes first). + * + * @param dst the destination buffer + * @param val the value to encode + */ +static inline void sph_enc32be(void *dst, sph_u32 val); + +/** + * Encode a 32-bit unsigned value into memory, in big-endian convention + * (most significant byte comes first). This function assumes that the + * destination address is suitably aligned for a direct access, if the + * platform supports such things; it can thus be marginally faster than + * the generic sph_enc32be() function. + * + * @param dst the destination buffer + * @param val the value to encode + */ +static inline void sph_enc32be_aligned(void *dst, sph_u32 val); + +/** + * Decode a 64-bit unsigned value from memory, in little-endian convention + * (least significant byte comes first). This function is defined only + * if a suitable 64-bit type was detected and used for sph_u64. + * + * @param src the source address + * @return the decoded value + */ +static inline sph_u64 sph_dec64le(const void *src); + +/** + * Decode a 64-bit unsigned value from memory, in little-endian convention + * (least significant byte comes first). This function assumes that the + * source address is suitably aligned for a direct access, if the platform + * supports such things; it can thus be marginally faster than the generic + * sph_dec64le() function. This function is defined only + * if a suitable 64-bit type was detected and used for sph_u64. + * + * @param src the source address + * @return the decoded value + */ +static inline sph_u64 sph_dec64le_aligned(const void *src); + +/** + * Encode a 64-bit unsigned value into memory, in little-endian convention + * (least significant byte comes first). This function is defined only + * if a suitable 64-bit type was detected and used for sph_u64. + * + * @param dst the destination buffer + * @param val the value to encode + */ +static inline void sph_enc64le(void *dst, sph_u64 val); + +/** + * Encode a 64-bit unsigned value into memory, in little-endian convention + * (least significant byte comes first). This function assumes that the + * destination address is suitably aligned for a direct access, if the + * platform supports such things; it can thus be marginally faster than + * the generic sph_enc64le() function. This function is defined + * only if a suitable 64-bit type was detected and used for + * sph_u64. + * + * @param dst the destination buffer + * @param val the value to encode + */ +static inline void sph_enc64le_aligned(void *dst, sph_u64 val); + +/** + * Decode a 64-bit unsigned value from memory, in big-endian convention + * (most significant byte comes first). This function is defined only + * if a suitable 64-bit type was detected and used for sph_u64. + * + * @param src the source address + * @return the decoded value + */ +static inline sph_u64 sph_dec64be(const void *src); + +/** + * Decode a 64-bit unsigned value from memory, in big-endian convention + * (most significant byte comes first). This function assumes that the + * source address is suitably aligned for a direct access, if the platform + * supports such things; it can thus be marginally faster than the generic + * sph_dec64be() function. This function is defined only + * if a suitable 64-bit type was detected and used for sph_u64. + * + * @param src the source address + * @return the decoded value + */ +static inline sph_u64 sph_dec64be_aligned(const void *src); + +/** + * Encode a 64-bit unsigned value into memory, in big-endian convention + * (most significant byte comes first). This function is defined only + * if a suitable 64-bit type was detected and used for sph_u64. + * + * @param dst the destination buffer + * @param val the value to encode + */ +static inline void sph_enc64be(void *dst, sph_u64 val); + +/** + * Encode a 64-bit unsigned value into memory, in big-endian convention + * (most significant byte comes first). This function assumes that the + * destination address is suitably aligned for a direct access, if the + * platform supports such things; it can thus be marginally faster than + * the generic sph_enc64be() function. This function is defined + * only if a suitable 64-bit type was detected and used for + * sph_u64. + * + * @param dst the destination buffer + * @param val the value to encode + */ +static inline void sph_enc64be_aligned(void *dst, sph_u64 val); + +#endif + +/* ============== END documentation block for Doxygen ============= */ + +#ifndef DOXYGEN_IGNORE + +/* + * We want to define the types "sph_u32" and "sph_u64" which hold + * unsigned values of at least, respectively, 32 and 64 bits. These + * tests should select appropriate types for most platforms. The + * macro "SPH_64" is defined if the 64-bit is supported. + */ + +#undef SPH_64 +#undef SPH_64_TRUE + +#if defined __STDC__ && __STDC_VERSION__ >= 199901L + +/* + * On C99 implementations, we can use to get an exact 64-bit + * type, if any, or otherwise use a wider type (which must exist, for + * C99 conformance). + */ + +#include + +#ifdef UINT32_MAX +typedef uint32_t sph_u32; +typedef int32_t sph_s32; +#else +typedef uint_fast32_t sph_u32; +typedef int_fast32_t sph_s32; +#endif +#if !SPH_NO_64 +#ifdef UINT64_MAX +typedef uint64_t sph_u64; +typedef int64_t sph_s64; +#else +typedef uint_fast64_t sph_u64; +typedef int_fast64_t sph_s64; +#endif +#endif + +#define SPH_C32(x) ((sph_u32)(x)) +#if !SPH_NO_64 +#define SPH_C64(x) ((sph_u64)(x)) +#define SPH_64 1 +#endif + +#else + +/* + * On non-C99 systems, we use "unsigned int" if it is wide enough, + * "unsigned long" otherwise. This supports all "reasonable" architectures. + * We have to be cautious: pre-C99 preprocessors handle constants + * differently in '#if' expressions. Hence the shifts to test UINT_MAX. + */ + +#if ((UINT_MAX >> 11) >> 11) >= 0x3FF + +typedef unsigned int sph_u32; +typedef int sph_s32; + +#define SPH_C32(x) ((sph_u32)(x ## U)) + +#else + +typedef unsigned long sph_u32; +typedef long sph_s32; + +#define SPH_C32(x) ((sph_u32)(x ## UL)) + +#endif + +#if !SPH_NO_64 + +/* + * We want a 64-bit type. We use "unsigned long" if it is wide enough (as + * is common on 64-bit architectures such as AMD64, Alpha or Sparcv9), + * "unsigned long long" otherwise, if available. We use ULLONG_MAX to + * test whether "unsigned long long" is available; we also know that + * gcc features this type, even if the libc header do not know it. + */ + +#if ((ULONG_MAX >> 31) >> 31) >= 3 + +typedef unsigned long sph_u64; +typedef long sph_s64; + +#define SPH_C64(x) ((sph_u64)(x ## UL)) + +#define SPH_64 1 + +#elif ((ULLONG_MAX >> 31) >> 31) >= 3 || defined __GNUC__ + +typedef unsigned long long sph_u64; +typedef long long sph_s64; + +#define SPH_C64(x) ((sph_u64)(x ## ULL)) + +#define SPH_64 1 + +#else + +/* + * No 64-bit type... + */ + +#endif + +#endif + +#endif + +/* + * If the "unsigned long" type has length 64 bits or more, then this is + * a "true" 64-bit architectures. This is also true with Visual C on + * amd64, even though the "long" type is limited to 32 bits. + */ +#if SPH_64 && (((ULONG_MAX >> 31) >> 31) >= 3 || defined _M_X64) +#define SPH_64_TRUE 1 +#endif + +/* + * Implementation note: some processors have specific opcodes to perform + * a rotation. Recent versions of gcc recognize the expression above and + * use the relevant opcodes, when appropriate. + */ + +#define SPH_T32(x) ((x) & SPH_C32(0xFFFFFFFF)) +#define SPH_ROTL32(x, n) SPH_T32(((x) << (n)) | ((x) >> (32 - (n)))) +#define SPH_ROTR32(x, n) SPH_ROTL32(x, (32 - (n))) + +#if SPH_64 + +#define SPH_T64(x) ((x) & SPH_C64(0xFFFFFFFFFFFFFFFF)) +#define SPH_ROTL64(x, n) SPH_T64(((x) << (n)) | ((x) >> (64 - (n)))) +#define SPH_ROTR64(x, n) SPH_ROTL64(x, (64 - (n))) + +#endif + +#ifndef DOXYGEN_IGNORE +/* + * Define SPH_INLINE to be an "inline" qualifier, if available. We define + * some small macro-like functions which benefit greatly from being inlined. + */ +#if (defined __STDC__ && __STDC_VERSION__ >= 199901L) || defined __GNUC__ +#define SPH_INLINE inline +#elif defined _MSC_VER +#define SPH_INLINE __inline +#else +#define SPH_INLINE +#endif +#endif + +/* + * We define some macros which qualify the architecture. These macros + * may be explicit set externally (e.g. as compiler parameters). The + * code below sets those macros if they are not already defined. + * + * Most macros are boolean, thus evaluate to either zero or non-zero. + * The SPH_UPTR macro is special, in that it evaluates to a C type, + * or is not defined. + * + * SPH_UPTR if defined: unsigned type to cast pointers into + * + * SPH_UNALIGNED non-zero if unaligned accesses are efficient + * SPH_LITTLE_ENDIAN non-zero if architecture is known to be little-endian + * SPH_BIG_ENDIAN non-zero if architecture is known to be big-endian + * SPH_LITTLE_FAST non-zero if little-endian decoding is fast + * SPH_BIG_FAST non-zero if big-endian decoding is fast + * + * If SPH_UPTR is defined, then encoding and decoding of 32-bit and 64-bit + * values will try to be "smart". Either SPH_LITTLE_ENDIAN or SPH_BIG_ENDIAN + * _must_ be non-zero in those situations. The 32-bit and 64-bit types + * _must_ also have an exact width. + * + * SPH_SPARCV9_GCC_32 UltraSPARC-compatible with gcc, 32-bit mode + * SPH_SPARCV9_GCC_64 UltraSPARC-compatible with gcc, 64-bit mode + * SPH_SPARCV9_GCC UltraSPARC-compatible with gcc + * SPH_I386_GCC x86-compatible (32-bit) with gcc + * SPH_I386_MSVC x86-compatible (32-bit) with Microsoft Visual C + * SPH_AMD64_GCC x86-compatible (64-bit) with gcc + * SPH_AMD64_MSVC x86-compatible (64-bit) with Microsoft Visual C + * SPH_PPC32_GCC PowerPC, 32-bit, with gcc + * SPH_PPC64_GCC PowerPC, 64-bit, with gcc + * + * TODO: enhance automatic detection, for more architectures and compilers. + * Endianness is the most important. SPH_UNALIGNED and SPH_UPTR help with + * some very fast functions (e.g. MD4) when using unaligned input data. + * The CPU-specific-with-GCC macros are useful only for inline assembly, + * normally restrained to this header file. + */ + +/* + * 32-bit x86, aka "i386 compatible". + */ +#if defined __i386__ || defined _M_IX86 + +#define SPH_DETECT_UNALIGNED 1 +#define SPH_DETECT_LITTLE_ENDIAN 1 +#define SPH_DETECT_UPTR sph_u32 +#ifdef __GNUC__ +#define SPH_DETECT_I386_GCC 1 +#endif +#ifdef _MSC_VER +#define SPH_DETECT_I386_MSVC 1 +#endif + +/* + * 64-bit x86, hereafter known as "amd64". + */ +#elif defined __x86_64 || defined _M_X64 + +#define SPH_DETECT_UNALIGNED 1 +#define SPH_DETECT_LITTLE_ENDIAN 1 +#define SPH_DETECT_UPTR sph_u64 +#ifdef __GNUC__ +#define SPH_DETECT_AMD64_GCC 1 +#endif +#ifdef _MSC_VER +#define SPH_DETECT_AMD64_MSVC 1 +#endif + +/* + * 64-bit Sparc architecture (implies v9). + */ +#elif ((defined __sparc__ || defined __sparc) && defined __arch64__) \ + || defined __sparcv9 + +#define SPH_DETECT_BIG_ENDIAN 1 +#define SPH_DETECT_UPTR sph_u64 +#ifdef __GNUC__ +#define SPH_DETECT_SPARCV9_GCC_64 1 +#define SPH_DETECT_LITTLE_FAST 1 +#endif + +/* + * 32-bit Sparc. + */ +#elif (defined __sparc__ || defined __sparc) \ + && !(defined __sparcv9 || defined __arch64__) + +#define SPH_DETECT_BIG_ENDIAN 1 +#define SPH_DETECT_UPTR sph_u32 +#if defined __GNUC__ && defined __sparc_v9__ +#define SPH_DETECT_SPARCV9_GCC_32 1 +#define SPH_DETECT_LITTLE_FAST 1 +#endif + +/* + * ARM, little-endian. + */ +#elif defined __arm__ && __ARMEL__ + +#define SPH_DETECT_LITTLE_ENDIAN 1 + +/* + * MIPS, little-endian. + */ +#elif MIPSEL || _MIPSEL || __MIPSEL || __MIPSEL__ + +#define SPH_DETECT_LITTLE_ENDIAN 1 + +/* + * MIPS, big-endian. + */ +#elif MIPSEB || _MIPSEB || __MIPSEB || __MIPSEB__ + +#define SPH_DETECT_BIG_ENDIAN 1 + +/* + * PowerPC. + */ +#elif defined __powerpc__ || defined __POWERPC__ || defined __ppc__ \ + || defined _ARCH_PPC + +/* + * Note: we do not declare cross-endian access to be "fast": even if + * using inline assembly, implementation should still assume that + * keeping the decoded word in a temporary is faster than decoding + * it again. + */ +#if defined __GNUC__ +#if SPH_64_TRUE +#define SPH_DETECT_PPC64_GCC 1 +#else +#define SPH_DETECT_PPC32_GCC 1 +#endif +#endif + +#if defined __BIG_ENDIAN__ || defined _BIG_ENDIAN +#define SPH_DETECT_BIG_ENDIAN 1 +#elif defined __LITTLE_ENDIAN__ || defined _LITTLE_ENDIAN +#define SPH_DETECT_LITTLE_ENDIAN 1 +#endif + +/* + * Itanium, 64-bit. + */ +#elif defined __ia64 || defined __ia64__ \ + || defined __itanium__ || defined _M_IA64 + +#if defined __BIG_ENDIAN__ || defined _BIG_ENDIAN +#define SPH_DETECT_BIG_ENDIAN 1 +#else +#define SPH_DETECT_LITTLE_ENDIAN 1 +#endif +#if defined __LP64__ || defined _LP64 +#define SPH_DETECT_UPTR sph_u64 +#else +#define SPH_DETECT_UPTR sph_u32 +#endif + +#endif + +#if defined SPH_DETECT_SPARCV9_GCC_32 || defined SPH_DETECT_SPARCV9_GCC_64 +#define SPH_DETECT_SPARCV9_GCC 1 +#endif + +#if defined SPH_DETECT_UNALIGNED && !defined SPH_UNALIGNED +#define SPH_UNALIGNED SPH_DETECT_UNALIGNED +#endif +#if defined SPH_DETECT_UPTR && !defined SPH_UPTR +#define SPH_UPTR SPH_DETECT_UPTR +#endif +#if defined SPH_DETECT_LITTLE_ENDIAN && !defined SPH_LITTLE_ENDIAN +#define SPH_LITTLE_ENDIAN SPH_DETECT_LITTLE_ENDIAN +#endif +#if defined SPH_DETECT_BIG_ENDIAN && !defined SPH_BIG_ENDIAN +#define SPH_BIG_ENDIAN SPH_DETECT_BIG_ENDIAN +#endif +#if defined SPH_DETECT_LITTLE_FAST && !defined SPH_LITTLE_FAST +#define SPH_LITTLE_FAST SPH_DETECT_LITTLE_FAST +#endif +#if defined SPH_DETECT_BIG_FAST && !defined SPH_BIG_FAST +#define SPH_BIG_FAST SPH_DETECT_BIG_FAST +#endif +#if defined SPH_DETECT_SPARCV9_GCC_32 && !defined SPH_SPARCV9_GCC_32 +#define SPH_SPARCV9_GCC_32 SPH_DETECT_SPARCV9_GCC_32 +#endif +#if defined SPH_DETECT_SPARCV9_GCC_64 && !defined SPH_SPARCV9_GCC_64 +#define SPH_SPARCV9_GCC_64 SPH_DETECT_SPARCV9_GCC_64 +#endif +#if defined SPH_DETECT_SPARCV9_GCC && !defined SPH_SPARCV9_GCC +#define SPH_SPARCV9_GCC SPH_DETECT_SPARCV9_GCC +#endif +#if defined SPH_DETECT_I386_GCC && !defined SPH_I386_GCC +#define SPH_I386_GCC SPH_DETECT_I386_GCC +#endif +#if defined SPH_DETECT_I386_MSVC && !defined SPH_I386_MSVC +#define SPH_I386_MSVC SPH_DETECT_I386_MSVC +#endif +#if defined SPH_DETECT_AMD64_GCC && !defined SPH_AMD64_GCC +#define SPH_AMD64_GCC SPH_DETECT_AMD64_GCC +#endif +#if defined SPH_DETECT_AMD64_MSVC && !defined SPH_AMD64_MSVC +#define SPH_AMD64_MSVC SPH_DETECT_AMD64_MSVC +#endif +#if defined SPH_DETECT_PPC32_GCC && !defined SPH_PPC32_GCC +#define SPH_PPC32_GCC SPH_DETECT_PPC32_GCC +#endif +#if defined SPH_DETECT_PPC64_GCC && !defined SPH_PPC64_GCC +#define SPH_PPC64_GCC SPH_DETECT_PPC64_GCC +#endif + +#if SPH_LITTLE_ENDIAN && !defined SPH_LITTLE_FAST +#define SPH_LITTLE_FAST 1 +#endif +#if SPH_BIG_ENDIAN && !defined SPH_BIG_FAST +#define SPH_BIG_FAST 1 +#endif + +#if defined SPH_UPTR && !(SPH_LITTLE_ENDIAN || SPH_BIG_ENDIAN) +#error SPH_UPTR defined, but endianness is not known. +#endif + +#if SPH_I386_GCC && !SPH_NO_ASM + +/* + * On x86 32-bit, with gcc, we use the bswapl opcode to byte-swap 32-bit + * values. + */ + +static SPH_INLINE sph_u32 +sph_bswap32(sph_u32 x) +{ + __asm__ __volatile__ ("bswapl %0" : "=r" (x) : "0" (x)); + return x; +} + +#if SPH_64 + +static SPH_INLINE sph_u64 +sph_bswap64(sph_u64 x) +{ + return ((sph_u64)sph_bswap32((sph_u32)x) << 32) + | (sph_u64)sph_bswap32((sph_u32)(x >> 32)); +} + +#endif + +#elif SPH_AMD64_GCC && !SPH_NO_ASM + +/* + * On x86 64-bit, with gcc, we use the bswapl opcode to byte-swap 32-bit + * and 64-bit values. + */ + +static SPH_INLINE sph_u32 +sph_bswap32(sph_u32 x) +{ + __asm__ __volatile__ ("bswapl %0" : "=r" (x) : "0" (x)); + return x; +} + +#if SPH_64 + +static SPH_INLINE sph_u64 +sph_bswap64(sph_u64 x) +{ + __asm__ __volatile__ ("bswapq %0" : "=r" (x) : "0" (x)); + return x; +} + +#endif + +/* + * Disabled code. Apparently, Microsoft Visual C 2005 is smart enough + * to generate proper opcodes for endianness swapping with the pure C + * implementation below. + * + +#elif SPH_I386_MSVC && !SPH_NO_ASM + +static __inline sph_u32 __declspec(naked) __fastcall +sph_bswap32(sph_u32 x) +{ + __asm { + bswap ecx + mov eax,ecx + ret + } +} + +#if SPH_64 + +static SPH_INLINE sph_u64 +sph_bswap64(sph_u64 x) +{ + return ((sph_u64)sph_bswap32((sph_u32)x) << 32) + | (sph_u64)sph_bswap32((sph_u32)(x >> 32)); +} + +#endif + + * + * [end of disabled code] + */ + +#else + +static SPH_INLINE sph_u32 +sph_bswap32(sph_u32 x) +{ + x = SPH_T32((x << 16) | (x >> 16)); + x = ((x & SPH_C32(0xFF00FF00)) >> 8) + | ((x & SPH_C32(0x00FF00FF)) << 8); + return x; +} + +#if SPH_64 + +/** + * Byte-swap a 64-bit value. + * + * @param x the input value + * @return the byte-swapped value + */ +static SPH_INLINE sph_u64 +sph_bswap64(sph_u64 x) +{ + x = SPH_T64((x << 32) | (x >> 32)); + x = ((x & SPH_C64(0xFFFF0000FFFF0000)) >> 16) + | ((x & SPH_C64(0x0000FFFF0000FFFF)) << 16); + x = ((x & SPH_C64(0xFF00FF00FF00FF00)) >> 8) + | ((x & SPH_C64(0x00FF00FF00FF00FF)) << 8); + return x; +} + +#endif + +#endif + +#if SPH_SPARCV9_GCC && !SPH_NO_ASM + +/* + * On UltraSPARC systems, native ordering is big-endian, but it is + * possible to perform little-endian read accesses by specifying the + * address space 0x88 (ASI_PRIMARY_LITTLE). Basically, either we use + * the opcode "lda [%reg]0x88,%dst", where %reg is the register which + * contains the source address and %dst is the destination register, + * or we use "lda [%reg+imm]%asi,%dst", which uses the %asi register + * to get the address space name. The latter format is better since it + * combines an addition and the actual access in a single opcode; but + * it requires the setting (and subsequent resetting) of %asi, which is + * slow. Some operations (i.e. MD5 compression function) combine many + * successive little-endian read accesses, which may share the same + * %asi setting. The macros below contain the appropriate inline + * assembly. + */ + +#define SPH_SPARCV9_SET_ASI \ + sph_u32 sph_sparcv9_asi; \ + __asm__ __volatile__ ( \ + "rd %%asi,%0\n\twr %%g0,0x88,%%asi" : "=r" (sph_sparcv9_asi)); + +#define SPH_SPARCV9_RESET_ASI \ + __asm__ __volatile__ ("wr %%g0,%0,%%asi" : : "r" (sph_sparcv9_asi)); + +#define SPH_SPARCV9_DEC32LE(base, idx) ({ \ + sph_u32 sph_sparcv9_tmp; \ + __asm__ __volatile__ ("lda [%1+" #idx "*4]%%asi,%0" \ + : "=r" (sph_sparcv9_tmp) : "r" (base)); \ + sph_sparcv9_tmp; \ + }) + +#endif + +static SPH_INLINE void +sph_enc16be(void *dst, unsigned val) +{ + ((unsigned char *)dst)[0] = (val >> 8); + ((unsigned char *)dst)[1] = val; +} + +static SPH_INLINE unsigned +sph_dec16be(const void *src) +{ + return ((unsigned)(((const unsigned char *)src)[0]) << 8) + | (unsigned)(((const unsigned char *)src)[1]); +} + +static SPH_INLINE void +sph_enc16le(void *dst, unsigned val) +{ + ((unsigned char *)dst)[0] = val; + ((unsigned char *)dst)[1] = val >> 8; +} + +static SPH_INLINE unsigned +sph_dec16le(const void *src) +{ + return (unsigned)(((const unsigned char *)src)[0]) + | ((unsigned)(((const unsigned char *)src)[1]) << 8); +} + +/** + * Encode a 32-bit value into the provided buffer (big endian convention). + * + * @param dst the destination buffer + * @param val the 32-bit value to encode + */ +static SPH_INLINE void +sph_enc32be(void *dst, sph_u32 val) +{ +#if defined SPH_UPTR +#if SPH_UNALIGNED +#if SPH_LITTLE_ENDIAN + val = sph_bswap32(val); +#endif + *(sph_u32 *)dst = val; +#else + if (((SPH_UPTR)dst & 3) == 0) { +#if SPH_LITTLE_ENDIAN + val = sph_bswap32(val); +#endif + *(sph_u32 *)dst = val; + } else { + ((unsigned char *)dst)[0] = (val >> 24); + ((unsigned char *)dst)[1] = (val >> 16); + ((unsigned char *)dst)[2] = (val >> 8); + ((unsigned char *)dst)[3] = val; + } +#endif +#else + ((unsigned char *)dst)[0] = (val >> 24); + ((unsigned char *)dst)[1] = (val >> 16); + ((unsigned char *)dst)[2] = (val >> 8); + ((unsigned char *)dst)[3] = val; +#endif +} + +/** + * Encode a 32-bit value into the provided buffer (big endian convention). + * The destination buffer must be properly aligned. + * + * @param dst the destination buffer (32-bit aligned) + * @param val the value to encode + */ +static SPH_INLINE void +sph_enc32be_aligned(void *dst, sph_u32 val) +{ +#if SPH_LITTLE_ENDIAN + *(sph_u32 *)dst = sph_bswap32(val); +#elif SPH_BIG_ENDIAN + *(sph_u32 *)dst = val; +#else + ((unsigned char *)dst)[0] = (val >> 24); + ((unsigned char *)dst)[1] = (val >> 16); + ((unsigned char *)dst)[2] = (val >> 8); + ((unsigned char *)dst)[3] = val; +#endif +} + +/** + * Decode a 32-bit value from the provided buffer (big endian convention). + * + * @param src the source buffer + * @return the decoded value + */ +static SPH_INLINE sph_u32 +sph_dec32be(const void *src) +{ +#if defined SPH_UPTR +#if SPH_UNALIGNED +#if SPH_LITTLE_ENDIAN + return sph_bswap32(*(const sph_u32 *)src); +#else + return *(const sph_u32 *)src; +#endif +#else + if (((SPH_UPTR)src & 3) == 0) { +#if SPH_LITTLE_ENDIAN + return sph_bswap32(*(const sph_u32 *)src); +#else + return *(const sph_u32 *)src; +#endif + } else { + return ((sph_u32)(((const unsigned char *)src)[0]) << 24) + | ((sph_u32)(((const unsigned char *)src)[1]) << 16) + | ((sph_u32)(((const unsigned char *)src)[2]) << 8) + | (sph_u32)(((const unsigned char *)src)[3]); + } +#endif +#else + return ((sph_u32)(((const unsigned char *)src)[0]) << 24) + | ((sph_u32)(((const unsigned char *)src)[1]) << 16) + | ((sph_u32)(((const unsigned char *)src)[2]) << 8) + | (sph_u32)(((const unsigned char *)src)[3]); +#endif +} + +/** + * Decode a 32-bit value from the provided buffer (big endian convention). + * The source buffer must be properly aligned. + * + * @param src the source buffer (32-bit aligned) + * @return the decoded value + */ +static SPH_INLINE sph_u32 +sph_dec32be_aligned(const void *src) +{ +#if SPH_LITTLE_ENDIAN + return sph_bswap32(*(const sph_u32 *)src); +#elif SPH_BIG_ENDIAN + return *(const sph_u32 *)src; +#else + return ((sph_u32)(((const unsigned char *)src)[0]) << 24) + | ((sph_u32)(((const unsigned char *)src)[1]) << 16) + | ((sph_u32)(((const unsigned char *)src)[2]) << 8) + | (sph_u32)(((const unsigned char *)src)[3]); +#endif +} + +/** + * Encode a 32-bit value into the provided buffer (little endian convention). + * + * @param dst the destination buffer + * @param val the 32-bit value to encode + */ +static SPH_INLINE void +sph_enc32le(void *dst, sph_u32 val) +{ +#if defined SPH_UPTR +#if SPH_UNALIGNED +#if SPH_BIG_ENDIAN + val = sph_bswap32(val); +#endif + *(sph_u32 *)dst = val; +#else + if (((SPH_UPTR)dst & 3) == 0) { +#if SPH_BIG_ENDIAN + val = sph_bswap32(val); +#endif + *(sph_u32 *)dst = val; + } else { + ((unsigned char *)dst)[0] = val; + ((unsigned char *)dst)[1] = (val >> 8); + ((unsigned char *)dst)[2] = (val >> 16); + ((unsigned char *)dst)[3] = (val >> 24); + } +#endif +#else + ((unsigned char *)dst)[0] = val; + ((unsigned char *)dst)[1] = (val >> 8); + ((unsigned char *)dst)[2] = (val >> 16); + ((unsigned char *)dst)[3] = (val >> 24); +#endif +} + +/** + * Encode a 32-bit value into the provided buffer (little endian convention). + * The destination buffer must be properly aligned. + * + * @param dst the destination buffer (32-bit aligned) + * @param val the value to encode + */ +static SPH_INLINE void +sph_enc32le_aligned(void *dst, sph_u32 val) +{ +#if SPH_LITTLE_ENDIAN + *(sph_u32 *)dst = val; +#elif SPH_BIG_ENDIAN + *(sph_u32 *)dst = sph_bswap32(val); +#else + ((unsigned char *)dst)[0] = val; + ((unsigned char *)dst)[1] = (val >> 8); + ((unsigned char *)dst)[2] = (val >> 16); + ((unsigned char *)dst)[3] = (val >> 24); +#endif +} + +/** + * Decode a 32-bit value from the provided buffer (little endian convention). + * + * @param src the source buffer + * @return the decoded value + */ +static SPH_INLINE sph_u32 +sph_dec32le(const void *src) +{ +#if defined SPH_UPTR +#if SPH_UNALIGNED +#if SPH_BIG_ENDIAN + return sph_bswap32(*(const sph_u32 *)src); +#else + return *(const sph_u32 *)src; +#endif +#else + if (((SPH_UPTR)src & 3) == 0) { +#if SPH_BIG_ENDIAN +#if SPH_SPARCV9_GCC && !SPH_NO_ASM + sph_u32 tmp; + + /* + * "__volatile__" is needed here because without it, + * gcc-3.4.3 miscompiles the code and performs the + * access before the test on the address, thus triggering + * a bus error... + */ + __asm__ __volatile__ ( + "lda [%1]0x88,%0" : "=r" (tmp) : "r" (src)); + return tmp; +/* + * On PowerPC, this turns out not to be worth the effort: the inline + * assembly makes GCC optimizer uncomfortable, which tends to nullify + * the decoding gains. + * + * For most hash functions, using this inline assembly trick changes + * hashing speed by less than 5% and often _reduces_ it. The biggest + * gains are for MD4 (+11%) and CubeHash (+30%). For all others, it is + * less then 10%. The speed gain on CubeHash is probably due to the + * chronic shortage of registers that CubeHash endures; for the other + * functions, the generic code appears to be efficient enough already. + * +#elif (SPH_PPC32_GCC || SPH_PPC64_GCC) && !SPH_NO_ASM + sph_u32 tmp; + + __asm__ __volatile__ ( + "lwbrx %0,0,%1" : "=r" (tmp) : "r" (src)); + return tmp; + */ +#else + return sph_bswap32(*(const sph_u32 *)src); +#endif +#else + return *(const sph_u32 *)src; +#endif + } else { + return (sph_u32)(((const unsigned char *)src)[0]) + | ((sph_u32)(((const unsigned char *)src)[1]) << 8) + | ((sph_u32)(((const unsigned char *)src)[2]) << 16) + | ((sph_u32)(((const unsigned char *)src)[3]) << 24); + } +#endif +#else + return (sph_u32)(((const unsigned char *)src)[0]) + | ((sph_u32)(((const unsigned char *)src)[1]) << 8) + | ((sph_u32)(((const unsigned char *)src)[2]) << 16) + | ((sph_u32)(((const unsigned char *)src)[3]) << 24); +#endif +} + +/** + * Decode a 32-bit value from the provided buffer (little endian convention). + * The source buffer must be properly aligned. + * + * @param src the source buffer (32-bit aligned) + * @return the decoded value + */ +static SPH_INLINE sph_u32 +sph_dec32le_aligned(const void *src) +{ +#if SPH_LITTLE_ENDIAN + return *(const sph_u32 *)src; +#elif SPH_BIG_ENDIAN +#if SPH_SPARCV9_GCC && !SPH_NO_ASM + sph_u32 tmp; + + __asm__ __volatile__ ("lda [%1]0x88,%0" : "=r" (tmp) : "r" (src)); + return tmp; +/* + * Not worth it generally. + * +#elif (SPH_PPC32_GCC || SPH_PPC64_GCC) && !SPH_NO_ASM + sph_u32 tmp; + + __asm__ __volatile__ ("lwbrx %0,0,%1" : "=r" (tmp) : "r" (src)); + return tmp; + */ +#else + return sph_bswap32(*(const sph_u32 *)src); +#endif +#else + return (sph_u32)(((const unsigned char *)src)[0]) + | ((sph_u32)(((const unsigned char *)src)[1]) << 8) + | ((sph_u32)(((const unsigned char *)src)[2]) << 16) + | ((sph_u32)(((const unsigned char *)src)[3]) << 24); +#endif +} + +#if SPH_64 + +/** + * Encode a 64-bit value into the provided buffer (big endian convention). + * + * @param dst the destination buffer + * @param val the 64-bit value to encode + */ +static SPH_INLINE void +sph_enc64be(void *dst, sph_u64 val) +{ +#if defined SPH_UPTR +#if SPH_UNALIGNED +#if SPH_LITTLE_ENDIAN + val = sph_bswap64(val); +#endif + *(sph_u64 *)dst = val; +#else + if (((SPH_UPTR)dst & 7) == 0) { +#if SPH_LITTLE_ENDIAN + val = sph_bswap64(val); +#endif + *(sph_u64 *)dst = val; + } else { + ((unsigned char *)dst)[0] = (val >> 56); + ((unsigned char *)dst)[1] = (val >> 48); + ((unsigned char *)dst)[2] = (val >> 40); + ((unsigned char *)dst)[3] = (val >> 32); + ((unsigned char *)dst)[4] = (val >> 24); + ((unsigned char *)dst)[5] = (val >> 16); + ((unsigned char *)dst)[6] = (val >> 8); + ((unsigned char *)dst)[7] = val; + } +#endif +#else + ((unsigned char *)dst)[0] = (val >> 56); + ((unsigned char *)dst)[1] = (val >> 48); + ((unsigned char *)dst)[2] = (val >> 40); + ((unsigned char *)dst)[3] = (val >> 32); + ((unsigned char *)dst)[4] = (val >> 24); + ((unsigned char *)dst)[5] = (val >> 16); + ((unsigned char *)dst)[6] = (val >> 8); + ((unsigned char *)dst)[7] = val; +#endif +} + +/** + * Encode a 64-bit value into the provided buffer (big endian convention). + * The destination buffer must be properly aligned. + * + * @param dst the destination buffer (64-bit aligned) + * @param val the value to encode + */ +static SPH_INLINE void +sph_enc64be_aligned(void *dst, sph_u64 val) +{ +#if SPH_LITTLE_ENDIAN + *(sph_u64 *)dst = sph_bswap64(val); +#elif SPH_BIG_ENDIAN + *(sph_u64 *)dst = val; +#else + ((unsigned char *)dst)[0] = (val >> 56); + ((unsigned char *)dst)[1] = (val >> 48); + ((unsigned char *)dst)[2] = (val >> 40); + ((unsigned char *)dst)[3] = (val >> 32); + ((unsigned char *)dst)[4] = (val >> 24); + ((unsigned char *)dst)[5] = (val >> 16); + ((unsigned char *)dst)[6] = (val >> 8); + ((unsigned char *)dst)[7] = val; +#endif +} + +/** + * Decode a 64-bit value from the provided buffer (big endian convention). + * + * @param src the source buffer + * @return the decoded value + */ +static SPH_INLINE sph_u64 +sph_dec64be(const void *src) +{ +#if defined SPH_UPTR +#if SPH_UNALIGNED +#if SPH_LITTLE_ENDIAN + return sph_bswap64(*(const sph_u64 *)src); +#else + return *(const sph_u64 *)src; +#endif +#else + if (((SPH_UPTR)src & 7) == 0) { +#if SPH_LITTLE_ENDIAN + return sph_bswap64(*(const sph_u64 *)src); +#else + return *(const sph_u64 *)src; +#endif + } else { + return ((sph_u64)(((const unsigned char *)src)[0]) << 56) + | ((sph_u64)(((const unsigned char *)src)[1]) << 48) + | ((sph_u64)(((const unsigned char *)src)[2]) << 40) + | ((sph_u64)(((const unsigned char *)src)[3]) << 32) + | ((sph_u64)(((const unsigned char *)src)[4]) << 24) + | ((sph_u64)(((const unsigned char *)src)[5]) << 16) + | ((sph_u64)(((const unsigned char *)src)[6]) << 8) + | (sph_u64)(((const unsigned char *)src)[7]); + } +#endif +#else + return ((sph_u64)(((const unsigned char *)src)[0]) << 56) + | ((sph_u64)(((const unsigned char *)src)[1]) << 48) + | ((sph_u64)(((const unsigned char *)src)[2]) << 40) + | ((sph_u64)(((const unsigned char *)src)[3]) << 32) + | ((sph_u64)(((const unsigned char *)src)[4]) << 24) + | ((sph_u64)(((const unsigned char *)src)[5]) << 16) + | ((sph_u64)(((const unsigned char *)src)[6]) << 8) + | (sph_u64)(((const unsigned char *)src)[7]); +#endif +} + +/** + * Decode a 64-bit value from the provided buffer (big endian convention). + * The source buffer must be properly aligned. + * + * @param src the source buffer (64-bit aligned) + * @return the decoded value + */ +static SPH_INLINE sph_u64 +sph_dec64be_aligned(const void *src) +{ +#if SPH_LITTLE_ENDIAN + return sph_bswap64(*(const sph_u64 *)src); +#elif SPH_BIG_ENDIAN + return *(const sph_u64 *)src; +#else + return ((sph_u64)(((const unsigned char *)src)[0]) << 56) + | ((sph_u64)(((const unsigned char *)src)[1]) << 48) + | ((sph_u64)(((const unsigned char *)src)[2]) << 40) + | ((sph_u64)(((const unsigned char *)src)[3]) << 32) + | ((sph_u64)(((const unsigned char *)src)[4]) << 24) + | ((sph_u64)(((const unsigned char *)src)[5]) << 16) + | ((sph_u64)(((const unsigned char *)src)[6]) << 8) + | (sph_u64)(((const unsigned char *)src)[7]); +#endif +} + +/** + * Encode a 64-bit value into the provided buffer (little endian convention). + * + * @param dst the destination buffer + * @param val the 64-bit value to encode + */ +static SPH_INLINE void +sph_enc64le(void *dst, sph_u64 val) +{ +#if defined SPH_UPTR +#if SPH_UNALIGNED +#if SPH_BIG_ENDIAN + val = sph_bswap64(val); +#endif + *(sph_u64 *)dst = val; +#else + if (((SPH_UPTR)dst & 7) == 0) { +#if SPH_BIG_ENDIAN + val = sph_bswap64(val); +#endif + *(sph_u64 *)dst = val; + } else { + ((unsigned char *)dst)[0] = val; + ((unsigned char *)dst)[1] = (val >> 8); + ((unsigned char *)dst)[2] = (val >> 16); + ((unsigned char *)dst)[3] = (val >> 24); + ((unsigned char *)dst)[4] = (val >> 32); + ((unsigned char *)dst)[5] = (val >> 40); + ((unsigned char *)dst)[6] = (val >> 48); + ((unsigned char *)dst)[7] = (val >> 56); + } +#endif +#else + ((unsigned char *)dst)[0] = val; + ((unsigned char *)dst)[1] = (val >> 8); + ((unsigned char *)dst)[2] = (val >> 16); + ((unsigned char *)dst)[3] = (val >> 24); + ((unsigned char *)dst)[4] = (val >> 32); + ((unsigned char *)dst)[5] = (val >> 40); + ((unsigned char *)dst)[6] = (val >> 48); + ((unsigned char *)dst)[7] = (val >> 56); +#endif +} + +/** + * Encode a 64-bit value into the provided buffer (little endian convention). + * The destination buffer must be properly aligned. + * + * @param dst the destination buffer (64-bit aligned) + * @param val the value to encode + */ +static SPH_INLINE void +sph_enc64le_aligned(void *dst, sph_u64 val) +{ +#if SPH_LITTLE_ENDIAN + *(sph_u64 *)dst = val; +#elif SPH_BIG_ENDIAN + *(sph_u64 *)dst = sph_bswap64(val); +#else + ((unsigned char *)dst)[0] = val; + ((unsigned char *)dst)[1] = (val >> 8); + ((unsigned char *)dst)[2] = (val >> 16); + ((unsigned char *)dst)[3] = (val >> 24); + ((unsigned char *)dst)[4] = (val >> 32); + ((unsigned char *)dst)[5] = (val >> 40); + ((unsigned char *)dst)[6] = (val >> 48); + ((unsigned char *)dst)[7] = (val >> 56); +#endif +} + +/** + * Decode a 64-bit value from the provided buffer (little endian convention). + * + * @param src the source buffer + * @return the decoded value + */ +static SPH_INLINE sph_u64 +sph_dec64le(const void *src) +{ +#if defined SPH_UPTR +#if SPH_UNALIGNED +#if SPH_BIG_ENDIAN + return sph_bswap64(*(const sph_u64 *)src); +#else + return *(const sph_u64 *)src; +#endif +#else + if (((SPH_UPTR)src & 7) == 0) { +#if SPH_BIG_ENDIAN +#if SPH_SPARCV9_GCC_64 && !SPH_NO_ASM + sph_u64 tmp; + + __asm__ __volatile__ ( + "ldxa [%1]0x88,%0" : "=r" (tmp) : "r" (src)); + return tmp; +/* + * Not worth it generally. + * +#elif SPH_PPC32_GCC && !SPH_NO_ASM + return (sph_u64)sph_dec32le_aligned(src) + | ((sph_u64)sph_dec32le_aligned( + (const char *)src + 4) << 32); +#elif SPH_PPC64_GCC && !SPH_NO_ASM + sph_u64 tmp; + + __asm__ __volatile__ ( + "ldbrx %0,0,%1" : "=r" (tmp) : "r" (src)); + return tmp; + */ +#else + return sph_bswap64(*(const sph_u64 *)src); +#endif +#else + return *(const sph_u64 *)src; +#endif + } else { + return (sph_u64)(((const unsigned char *)src)[0]) + | ((sph_u64)(((const unsigned char *)src)[1]) << 8) + | ((sph_u64)(((const unsigned char *)src)[2]) << 16) + | ((sph_u64)(((const unsigned char *)src)[3]) << 24) + | ((sph_u64)(((const unsigned char *)src)[4]) << 32) + | ((sph_u64)(((const unsigned char *)src)[5]) << 40) + | ((sph_u64)(((const unsigned char *)src)[6]) << 48) + | ((sph_u64)(((const unsigned char *)src)[7]) << 56); + } +#endif +#else + return (sph_u64)(((const unsigned char *)src)[0]) + | ((sph_u64)(((const unsigned char *)src)[1]) << 8) + | ((sph_u64)(((const unsigned char *)src)[2]) << 16) + | ((sph_u64)(((const unsigned char *)src)[3]) << 24) + | ((sph_u64)(((const unsigned char *)src)[4]) << 32) + | ((sph_u64)(((const unsigned char *)src)[5]) << 40) + | ((sph_u64)(((const unsigned char *)src)[6]) << 48) + | ((sph_u64)(((const unsigned char *)src)[7]) << 56); +#endif +} + +/** + * Decode a 64-bit value from the provided buffer (little endian convention). + * The source buffer must be properly aligned. + * + * @param src the source buffer (64-bit aligned) + * @return the decoded value + */ +static SPH_INLINE sph_u64 +sph_dec64le_aligned(const void *src) +{ +#if SPH_LITTLE_ENDIAN + return *(const sph_u64 *)src; +#elif SPH_BIG_ENDIAN +#if SPH_SPARCV9_GCC_64 && !SPH_NO_ASM + sph_u64 tmp; + + __asm__ __volatile__ ("ldxa [%1]0x88,%0" : "=r" (tmp) : "r" (src)); + return tmp; +/* + * Not worth it generally. + * +#elif SPH_PPC32_GCC && !SPH_NO_ASM + return (sph_u64)sph_dec32le_aligned(src) + | ((sph_u64)sph_dec32le_aligned((const char *)src + 4) << 32); +#elif SPH_PPC64_GCC && !SPH_NO_ASM + sph_u64 tmp; + + __asm__ __volatile__ ("ldbrx %0,0,%1" : "=r" (tmp) : "r" (src)); + return tmp; + */ +#else + return sph_bswap64(*(const sph_u64 *)src); +#endif +#else + return (sph_u64)(((const unsigned char *)src)[0]) + | ((sph_u64)(((const unsigned char *)src)[1]) << 8) + | ((sph_u64)(((const unsigned char *)src)[2]) << 16) + | ((sph_u64)(((const unsigned char *)src)[3]) << 24) + | ((sph_u64)(((const unsigned char *)src)[4]) << 32) + | ((sph_u64)(((const unsigned char *)src)[5]) << 40) + | ((sph_u64)(((const unsigned char *)src)[6]) << 48) + | ((sph_u64)(((const unsigned char *)src)[7]) << 56); +#endif +} + +#endif + +#endif /* Doxygen excluded block */ + +#endif diff --git a/streebog.c b/streebog.c new file mode 100644 index 0000000..a789787 --- /dev/null +++ b/streebog.c @@ -0,0 +1,1054 @@ +/* Streebog GOST hash function for sib algo SibCoin */ + +#include +#include +#include +#include + +#include "streebog.h" + +#ifdef __cplusplus +extern "C"{ +#endif + + +#ifdef _MSC_VER +#pragma warning (disable: 4146) +#endif + +//-------------------------------------------------------------------------------------------- +// +// Streebog 512 implementation +// +//-------------------------------------------------------------------------------------------- + + +// Tables for function F +static const sph_u64 TG[8][256] = {{ + 0xE6F87E5C5B711FD0,0x258377800924FA16,0xC849E07E852EA4A8,0x5B4686A18F06C16A, + 0x0B32E9A2D77B416E,0xABDA37A467815C66,0xF61796A81A686676,0xF5DC0B706391954B, + 0x4862F38DB7E64BF1,0xFF5C629A68BD85C5,0xCB827DA6FCD75795,0x66D36DAF69B9F089, + 0x356C9F74483D83B0,0x7CBCECB1238C99A1,0x36A702AC31C4708D,0x9EB6A8D02FBCDFD6, + 0x8B19FA51E5B3AE37,0x9CCFB5408A127D0B,0xBC0C78B508208F5A,0xE533E3842288ECED, + 0xCEC2C7D377C15FD2,0xEC7817B6505D0F5E,0xB94CC2C08336871D,0x8C205DB4CB0B04AD, + 0x763C855B28A0892F,0x588D1B79F6FF3257,0x3FECF69E4311933E,0x0FC0D39F803A18C9, + 0xEE010A26F5F3AD83,0x10EFE8F4411979A6,0x5DCDA10C7DE93A10,0x4A1BEE1D1248E92C, + 0x53BFF2DB21847339,0xB4F50CCFA6A23D09,0x5FB4BC9CD84798CD,0xE88A2D8B071C56F9, + 0x7F7771695A756A9C,0xC5F02E71A0BA1EBC,0xA663F9AB4215E672,0x2EB19E22DE5FBB78, + 0x0DB9CE0F2594BA14,0x82520E6397664D84,0x2F031E6A0208EA98,0x5C7F2144A1BE6BF0, + 0x7A37CB1CD16362DB,0x83E08E2B4B311C64,0xCF70479BAB960E32,0x856BA986B9DEE71E, + 0xB5478C877AF56CE9,0xB8FE42885F61D6FD,0x1BDD0156966238C8,0x622157923EF8A92E, + 0xFC97FF42114476F8,0x9D7D350856452CEB,0x4C90C9B0E0A71256,0x2308502DFBCB016C, + 0x2D7A03FAA7A64845,0xF46E8B38BFC6C4AB,0xBDBEF8FDD477DEBA,0x3AAC4CEBC8079B79, + 0xF09CB105E8879D0C,0x27FA6A10AC8A58CB,0x8960E7C1401D0CEA,0x1A6F811E4A356928, + 0x90C4FB0773D196FF,0x43501A2F609D0A9F,0xF7A516E0C63F3796,0x1CE4A6B3B8DA9252, + 0x1324752C38E08A9B,0xA5A864733BEC154F,0x2BF124575549B33F,0xD766DB15440DC5C7, + 0xA7D179E39E42B792,0xDADF151A61997FD3,0x86A0345EC0271423,0x38D5517B6DA939A4, + 0x6518F077104003B4,0x02791D90A5AEA2DD,0x88D267899C4A5D0A,0x930F66DF0A2865C2, + 0x4EE9D4204509B08B,0x325538916685292A,0x412907BFC533A842,0xB27E2B62544DC673, + 0x6C5304456295E007,0x5AF406E95351908A,0x1F2F3B6BC123616F,0xC37B09DC5255E5C6, + 0x3967D133B1FE6844,0x298839C7F0E711E2,0x409B87F71964F9A2,0xE938ADC3DB4B0719, + 0x0C0B4E47F9C3EBF4,0x5534D576D36B8843,0x4610A05AEB8B02D8,0x20C3CDF58232F251, + 0x6DE1840DBEC2B1E7,0xA0E8DE06B0FA1D08,0x7B854B540D34333B,0x42E29A67BCCA5B7F, + 0xD8A6088AC437DD0E,0xC63BB3A9D943ED81,0x21714DBD5E65A3B1,0x6761EDE7B5EEA169, + 0x2431F7C8D573ABF6,0xD51FC685E1A3671A,0x5E063CD40410C92D,0x283AB98F2CB04002, + 0x8FEBC06CB2F2F790,0x17D64F116FA1D33C,0xE07359F1A99EE4AA,0x784ED68C74CDC006, + 0x6E2A19D5C73B42DA,0x8712B4161C7045C3,0x371582E4ED93216D,0xACE390414939F6FC, + 0x7EC5F12186223B7C,0xC0B094042BAC16FB,0xF9D745379A527EBF,0x737C3F2EA3B68168, + 0x33E7B8D9BAD278CA,0xA9A32A34C22FFEBB,0xE48163CCFEDFBD0D,0x8E5940246EA5A670, + 0x51C6EF4B842AD1E4,0x22BAD065279C508C,0xD91488C218608CEE,0x319EA5491F7CDA17, + 0xD394E128134C9C60,0x094BF43272D5E3B3,0x9BF612A5A4AAD791,0xCCBBDA43D26FFD0F, + 0x34DE1F3C946AD250,0x4F5B5468995EE16B,0xDF9FAF6FEA8F7794,0x2648EA5870DD092B, + 0xBFC7E56D71D97C67,0xDDE6B2FF4F21D549,0x3C276B463AE86003,0x91767B4FAF86C71F, + 0x68A13E7835D4B9A0,0xB68C115F030C9FD4,0x141DD2C916582001,0x983D8F7DDD5324AC, + 0x64AA703FCC175254,0xC2C989948E02B426,0x3E5E76D69F46C2DE,0x50746F03587D8004, + 0x45DB3D829272F1E5,0x60584A029B560BF3,0xFBAE58A73FFCDC62,0xA15A5E4E6CAD4CE8, + 0x4BA96E55CE1FB8CC,0x08F9747AAE82B253,0xC102144CF7FB471B,0x9F042898F3EB8E36, + 0x068B27ADF2EFFB7A,0xEDCA97FE8C0A5EBE,0x778E0513F4F7D8CF,0x302C2501C32B8BF7, + 0x8D92DDFC175C554D,0xF865C57F46052F5F,0xEAF3301BA2B2F424,0xAA68B7ECBBD60D86, + 0x998F0F350104754C,0x0000000000000000,0xF12E314D34D0CCEC,0x710522BE061823B5, + 0xAF280D9930C005C1,0x97FD5CE25D693C65,0x19A41CC633CC9A15,0x95844172F8C79EB8, + 0xDC5432B7937684A9,0x9436C13A2490CF58,0x802B13F332C8EF59,0xC442AE397CED4F5C, + 0xFA1CD8EFE3AB8D82,0xF2E5AC954D293FD1,0x6AD823E8907A1B7D,0x4D2249F83CF043B6, + 0x03CB9DD879F9F33D,0xDE2D2F2736D82674,0x2A43A41F891EE2DF,0x6F98999D1B6C133A, + 0xD4AD46CD3DF436FA,0xBB35DF50269825C0,0x964FDCAA813E6D85,0xEB41B0537EE5A5C4, + 0x0540BA758B160847,0xA41AE43BE7BB44AF,0xE3B8C429D0671797,0x819993BBEE9FBEB9, + 0xAE9A8DD1EC975421,0xF3572CDD917E6E31,0x6393D7DAE2AFF8CE,0x47A2201237DC5338, + 0xA32343DEC903EE35,0x79FC56C4A89A91E6,0x01B28048DC5751E0,0x1296F564E4B7DB7B, + 0x75F7188351597A12,0xDB6D9552BDCE2E33,0x1E9DBB231D74308F,0x520D7293FDD322D9, + 0xE20A44610C304677,0xFEEEE2D2B4EAD425,0xCA30FDEE20800675,0x61EACA4A47015A13, + 0xE74AFE1487264E30,0x2CC883B27BF119A5,0x1664CF59B3F682DC,0xA811AA7C1E78AF5B, + 0x1D5626FB648DC3B2,0xB73E9117DF5BCE34,0xD05F7CF06AB56F5D,0xFD257F0ACD132718, + 0x574DC8E676C52A9E,0x0739A7E52EB8AA9A,0x5486553E0F3CD9A3,0x56FF48AEAA927B7E, + 0xBE756525AD8E2D87,0x7D0E6CF9FFDBC841,0x3B1ECCA31450CA99,0x6913BE30E983E840, + 0xAD511009956EA71C,0xB1B5B6BA2DB4354E,0x4469BDCA4E25A005,0x15AF5281CA0F71E1, + 0x744598CB8D0E2BF2,0x593F9B312AA863B7,0xEFB38A6E29A4FC63,0x6B6AA3A04C2D4A9D, + 0x3D95EB0EE6BF31E3,0xA291C3961554BFD5,0x18169C8EEF9BCBF5,0x115D68BC9D4E2846, + 0xBA875F18FACF7420,0xD1EDFCB8B6E23EBD,0xB00736F2F1E364AE,0x84D929CE6589B6FE, + 0x70B7A2F6DA4F7255,0x0E7253D75C6D4929,0x04F23A3D574159A7,0x0A8069EA0B2C108E, + 0x49D073C56BB11A11,0x8AAB7A1939E4FFD7,0xCD095A0B0E38ACEF,0xC9FB60365979F548, + 0x92BDE697D67F3422,0xC78933E10514BC61,0xE1C1D9B975C9B54A,0xD2266160CF1BCD80, + 0x9A4492ED78FD8671,0xB3CCAB2A881A9793,0x72CEBF667FE1D088,0xD6D45B5D985A9427 +},{ + 0xC811A8058C3F55DE,0x65F5B43196B50619,0xF74F96B1D6706E43,0x859D1E8BCB43D336, + 0x5AAB8A85CCFA3D84,0xF9C7BF99C295FCFD,0xA21FD5A1DE4B630F,0xCDB3EF763B8B456D, + 0x803F59F87CF7C385,0xB27C73BE5F31913C,0x98E3AC6633B04821,0xBF61674C26B8F818, + 0x0FFBC995C4C130C8,0xAAA0862010761A98,0x6057F342210116AA,0xF63C760C0654CC35, + 0x2DDB45CC667D9042,0xBCF45A964BD40382,0x68E8A0C3EF3C6F3D,0xA7BD92D269FF73BC, + 0x290AE20201ED2287,0xB7DE34CDE885818F,0xD901EEA7DD61059B,0xD6FA273219A03553, + 0xD56F1AE874CCCEC9,0xEA31245C2E83F554,0x7034555DA07BE499,0xCE26D2AC56E7BEF7, + 0xFD161857A5054E38,0x6A0E7DA4527436D1,0x5BD86A381CDE9FF2,0xCAF7756231770C32, + 0xB09AAED9E279C8D0,0x5DEF1091C60674DB,0x111046A2515E5045,0x23536CE4729802FC, + 0xC50CBCF7F5B63CFA,0x73A16887CD171F03,0x7D2941AFD9F28DBD,0x3F5E3EB45A4F3B9D, + 0x84EEFE361B677140,0x3DB8E3D3E7076271,0x1A3A28F9F20FD248,0x7EBC7C75B49E7627, + 0x74E5F293C7EB565C,0x18DCF59E4F478BA4,0x0C6EF44FA9ADCB52,0xC699812D98DAC760, + 0x788B06DC6E469D0E,0xFC65F8EA7521EC4E,0x30A5F7219E8E0B55,0x2BEC3F65BCA57B6B, + 0xDDD04969BAF1B75E,0x99904CDBE394EA57,0x14B201D1E6EA40F6,0xBBB0C08241284ADD, + 0x50F20463BF8F1DFF,0xE8D7F93B93CBACB8,0x4D8CB68E477C86E8,0xC1DD1B3992268E3F, + 0x7C5AA11209D62FCB,0x2F3D98ABDB35C9AE,0x671369562BFD5FF5,0x15C1E16C36CEE280, + 0x1D7EB2EDF8F39B17,0xDA94D37DB00DFE01,0x877BC3EC760B8ADA,0xCB8495DFE153AE44, + 0x05A24773B7B410B3,0x12857B783C32ABDF,0x8EB770D06812513B,0x536739B9D2E3E665, + 0x584D57E271B26468,0xD789C78FC9849725,0xA935BBFA7D1AE102,0x8B1537A3DFA64188, + 0xD0CD5D9BC378DE7A,0x4AC82C9A4D80CFB7,0x42777F1B83BDB620,0x72D2883A1D33BD75, + 0x5E7A2D4BAB6A8F41,0xF4DAAB6BBB1C95D9,0x905CFFE7FD8D31B6,0x83AA6422119B381F, + 0xC0AEFB8442022C49,0xA0F908C663033AE3,0xA428AF0804938826,0xADE41C341A8A53C7, + 0xAE7121EE77E6A85D,0xC47F5C4A25929E8C,0xB538E9AA55CDD863,0x06377AA9DAD8EB29, + 0xA18AE87BB3279895,0x6EDFDA6A35E48414,0x6B7D9D19825094A7,0xD41CFA55A4E86CBF, + 0xE5CAEDC9EA42C59C,0xA36C351C0E6FC179,0x5181E4DE6FABBF89,0xFFF0C530184D17D4, + 0x9D41EB1584045892,0x1C0D525028D73961,0xF178EC180CA8856A,0x9A0571018EF811CD, + 0x4091A27C3EF5EFCC,0x19AF15239F6329D2,0x347450EFF91EB990,0xE11B4A078DD27759, + 0xB9561DE5FC601331,0x912F1F5A2DA993C0,0x1654DCB65BA2191A,0x3E2DDE098A6B99EB, + 0x8A66D71E0F82E3FE,0x8C51ADB7D55A08D7,0x4533E50F8941FF7F,0x02E6DD67BD4859EC, + 0xE068AABA5DF6D52F,0xC24826E3FF4A75A5,0x6C39070D88ACDDF8,0x6486548C4691A46F, + 0xD1BEBD26135C7C0C,0xB30F93038F15334A,0x82D9849FC1BF9A69,0x9C320BA85420FAE4, + 0xFA528243AFF90767,0x9ED4D6CFE968A308,0xB825FD582C44B147,0x9B7691BC5EDCB3BB, + 0xC7EA619048FE6516,0x1063A61F817AF233,0x47D538683409A693,0x63C2CE984C6DED30, + 0x2A9FDFD86C81D91D,0x7B1E3B06032A6694,0x666089EBFBD9FD83,0x0A598EE67375207B, + 0x07449A140AFC495F,0x2CA8A571B6593234,0x1F986F8A45BBC2FB,0x381AA4A050B372C2, + 0x5423A3ADD81FAF3A,0x17273C0B8B86BB6C,0xFE83258DC869B5A2,0x287902BFD1C980F1, + 0xF5A94BD66B3837AF,0x88800A79B2CABA12,0x55504310083B0D4C,0xDF36940E07B9EEB2, + 0x04D1A7CE6790B2C5,0x612413FFF125B4DC,0x26F12B97C52C124F,0x86082351A62F28AC, + 0xEF93632F9937E5E7,0x3507B052293A1BE6,0xE72C30AE570A9C70,0xD3586041AE1425E0, + 0xDE4574B3D79D4CC4,0x92BA228040C5685A,0xF00B0CA5DC8C271C,0xBE1287F1F69C5A6E, + 0xF39E317FB1E0DC86,0x495D114020EC342D,0x699B407E3F18CD4B,0xDCA3A9D46AD51528, + 0x0D1D14F279896924,0x0000000000000000,0x593EB75FA196C61E,0x2E4E78160B116BD8, + 0x6D4AE7B058887F8E,0xE65FD013872E3E06,0x7A6DDBBBD30EC4E2,0xAC97FC89CAAEF1B1, + 0x09CCB33C1E19DBE1,0x89F3EAC462EE1864,0x7770CF49AA87ADC6,0x56C57ECA6557F6D6, + 0x03953DDA6D6CFB9A,0x36928D884456E07C,0x1EEB8F37959F608D,0x31D6179C4EAAA923, + 0x6FAC3AD7E5C02662,0x43049FA653991456,0xABD3669DC052B8EE,0xAF02C153A7C20A2B, + 0x3CCB036E3723C007,0x93C9C23D90E1CA2C,0xC33BC65E2F6ED7D3,0x4CFF56339758249E, + 0xB1E94E64325D6AA6,0x37E16D359472420A,0x79F8E661BE623F78,0x5214D90402C74413, + 0x482EF1FDF0C8965B,0x13F69BC5EC1609A9,0x0E88292814E592BE,0x4E198B542A107D72, + 0xCCC00FCBEBAFE71B,0x1B49C844222B703E,0x2564164DA840E9D5,0x20C6513E1FF4F966, + 0xBAC3203F910CE8AB,0xF2EDD1C261C47EF0,0x814CB945ACD361F3,0x95FEB8944A392105, + 0x5C9CF02C1622D6AD,0x971865F3F77178E9,0xBD87BA2B9BF0A1F4,0x444005B259655D09, + 0xED75BE48247FBC0B,0x7596122E17CFF42A,0xB44B091785E97A15,0x966B854E2755DA9F, + 0xEEE0839249134791,0x32432A4623C652B9,0xA8465B47AD3E4374,0xF8B45F2412B15E8B, + 0x2417F6F078644BA3,0xFB2162FE7FDDA511,0x4BBBCC279DA46DC1,0x0173E0BDD024A276, + 0x22208C59A2BCA08A,0x8FC4906DB836F34D,0xE4B90D743A6667EA,0x7147B5E0705F46EF, + 0x2782CB2A1508B039,0xEC065EF5F45B1E7D,0x21B5B183CFD05B10,0xDBE733C060295C77, + 0x9FA73672394C017E,0xCF55321186C31C81,0xD8720E1A0D45A7ED,0x3B8F997A3DDF8958, + 0x3AFC79C7EDFB2B2E,0xE9A4198643EF0ECE,0x5F09CDF67B4E2D37,0x4F6A6BE9FA34DF04, + 0xB6ADD47038A123F9,0x8D224D0A057EAAA1,0xC96248B85C1BF7A8,0xE3FD9760309A2EB5, + 0x0B2A6E5BA351820D,0xEB42C4E1FEA75722,0x948D58299A1D8373,0x7FCF9CC864BAD451, + 0xA55B4FB5D4B72A50,0x08BF5381CE3D7997,0x46A6D8D5E42D04E5,0xD22B80FC7E308796, + 0x57B69E77B57354A0,0x3969441D8097D0B4,0x3330CAFBF3E2F0CF,0xE28E77DDE0BE8CC3, + 0x62B12E259C494F46,0xA6CE726FB9DBD1CA,0x41E242C1EED14DBA,0x76032FF47AA30FB0 +},{ + 0x45B268A93ACDE4CC,0xAF7F0BE884549D08,0x048354B3C1468263,0x925435C2C80EFED2, + 0xEE4E37F27FDFFBA7,0x167A33920C60F14D,0xFB123B52EA03E584,0x4A0CAB53FDBB9007, + 0x9DEAF6380F788A19,0xCB48EC558F0CB32A,0xB59DC4B2D6FEF7E0,0xDCDBCA22F4F3ECB6, + 0x11DF5813549A9C40,0xE33FDEDF568ACED3,0xA0C1C8124322E9C3,0x07A56B8158FA6D0D, + 0x77279579B1E1F3DD,0xD9B18B74422AC004,0xB8EC2D9FFFABC294,0xF4ACF8A82D75914F, + 0x7BBF69B1EF2B6878,0xC4F62FAF487AC7E1,0x76CE809CC67E5D0C,0x6711D88F92E4C14C, + 0x627B99D9243DEDFE,0x234AA5C3DFB68B51,0x909B1F15262DBF6D,0x4F66EA054B62BCB5, + 0x1AE2CF5A52AA6AE8,0xBEA053FBD0CE0148,0xED6808C0E66314C9,0x43FE16CD15A82710, + 0xCD049231A06970F6,0xE7BC8A6C97CC4CB0,0x337CE835FCB3B9C0,0x65DEF2587CC780F3, + 0x52214EDE4132BB50,0x95F15E4390F493DF,0x870839625DD2E0F1,0x41313C1AFB8B66AF, + 0x91720AF051B211BC,0x477D427ED4EEA573,0x2E3B4CEEF6E3BE25,0x82627834EB0BCC43, + 0x9C03E3DD78E724C8,0x2877328AD9867DF9,0x14B51945E243B0F2,0x574B0F88F7EB97E2, + 0x88B6FA989AA4943A,0x19C4F068CB168586,0x50EE6409AF11FAEF,0x7DF317D5C04EABA4, + 0x7A567C5498B4C6A9,0xB6BBFB804F42188E,0x3CC22BCF3BC5CD0B,0xD04336EAAA397713, + 0xF02FAC1BEC33132C,0x2506DBA7F0D3488D,0xD7E65D6BF2C31A1E,0x5EB9B2161FF820F5, + 0x842E0650C46E0F9F,0x716BEB1D9E843001,0xA933758CAB315ED4,0x3FE414FDA2792265, + 0x27C9F1701EF00932,0x73A4C1CA70A771BE,0x94184BA6E76B3D0E,0x40D829FF8C14C87E, + 0x0FBEC3FAC77674CB,0x3616A9634A6A9572,0x8F139119C25EF937,0xF545ED4D5AEA3F9E, + 0xE802499650BA387B,0x6437E7BD0B582E22,0xE6559F89E053E261,0x80AD52E305288DFC, + 0x6DC55A23E34B9935,0xDE14E0F51AD0AD09,0xC6390578A659865E,0x96D7617109487CB1, + 0xE2D6CB3A21156002,0x01E915E5779FAED1,0xADB0213F6A77DCB7,0x9880B76EB9A1A6AB, + 0x5D9F8D248644CF9B,0xFD5E4536C5662658,0xF1C6B9FE9BACBDFD,0xEACD6341BE9979C4, + 0xEFA7221708405576,0x510771ECD88E543E,0xC2BA51CB671F043D,0x0AD482AC71AF5879, + 0xFE787A045CDAC936,0xB238AF338E049AED,0xBD866CC94972EE26,0x615DA6EBBD810290, + 0x3295FDD08B2C1711,0xF834046073BF0AEA,0xF3099329758FFC42,0x1CAEB13E7DCFA934, + 0xBA2307481188832B,0x24EFCE42874CE65C,0x0E57D61FB0E9DA1A,0xB3D1BAD6F99B343C, + 0xC0757B1C893C4582,0x2B510DB8403A9297,0x5C7698C1F1DB614A,0x3E0D0118D5E68CB4, + 0xD60F488E855CB4CF,0xAE961E0DF3CB33D9,0x3A8E55AB14A00ED7,0x42170328623789C1, + 0x838B6DD19C946292,0x895FEF7DED3B3AEB,0xCFCBB8E64E4A3149,0x064C7E642F65C3DC, + 0x3D2B3E2A4C5A63DA,0x5BD3F340A9210C47,0xB474D157A1615931,0xAC5934DA1DE87266, + 0x6EE365117AF7765B,0xC86ED36716B05C44,0x9BA6885C201D49C5,0xB905387A88346C45, + 0x131072C4BAB9DDFF,0xBF49461EA751AF99,0xD52977BC1CE05BA1,0xB0F785E46027DB52, + 0x546D30BA6E57788C,0x305AD707650F56AE,0xC987C682612FF295,0xA5AB8944F5FBC571, + 0x7ED528E759F244CA,0x8DDCBBCE2C7DB888,0xAA154ABE328DB1BA,0x1E619BE993ECE88B, + 0x09F2BD9EE813B717,0x7401AA4B285D1CB3,0x21858F143195CAEE,0x48C381841398D1B8, + 0xFCB750D3B2F98889,0x39A86A998D1CE1B9,0x1F888E0CE473465A,0x7899568376978716, + 0x02CF2AD7EE2341BF,0x85C713B5B3F1A14E,0xFF916FE12B4567E7,0x7C1A0230B7D10575, + 0x0C98FCC85ECA9BA5,0xA3E7F720DA9E06AD,0x6A6031A2BBB1F438,0x973E74947ED7D260, + 0x2CF4663918C0FF9A,0x5F50A7F368678E24,0x34D983B4A449D4CD,0x68AF1B755592B587, + 0x7F3C3D022E6DEA1B,0xABFC5F5B45121F6B,0x0D71E92D29553574,0xDFFDF5106D4F03D8, + 0x081BA87B9F8C19C6,0xDB7EA1A3AC0981BB,0xBBCA12AD66172DFA,0x79704366010829C7, + 0x179326777BFF5F9C,0x0000000000000000,0xEB2476A4C906D715,0x724DD42F0738DF6F, + 0xB752EE6538DDB65F,0x37FFBC863DF53BA3,0x8EFA84FCB5C157E6,0xE9EB5C73272596AA, + 0x1B0BDABF2535C439,0x86E12C872A4D4E20,0x9969A28BCE3E087A,0xFAFB2EB79D9C4B55, + 0x056A4156B6D92CB2,0x5A3AE6A5DEBEA296,0x22A3B026A8292580,0x53C85B3B36AD1581, + 0xB11E900117B87583,0xC51F3A4A3FE56930,0xE019E1EDCF3621BD,0xEC811D2591FCBA18, + 0x445B7D4C4D524A1D,0xA8DA6069DCAEF005,0x58F5CC72309DE329,0xD4C062596B7FF570, + 0xCE22AD0339D59F98,0x591CD99747024DF8,0x8B90C5AA03187B54,0xF663D27FC356D0F0, + 0xD8589E9135B56ED5,0x35309651D3D67A1C,0x12F96721CD26732E,0xD28C1C3D441A36AC, + 0x492A946164077F69,0x2D1D73DC6F5F514B,0x6F0A70F40D68D88A,0x60B4B30ECA1EAC41, + 0xD36509D83385987D,0x0B3D97490630F6A8,0x9ECCC90A96C46577,0xA20EE2C5AD01A87C, + 0xE49AB55E0E70A3DE,0xA4429CA182646BA0,0xDA97B446DB962F6A,0xCCED87D4D7F6DE27, + 0x2AB8185D37A53C46,0x9F25DCEFE15BCBA6,0xC19C6EF9FEA3EB53,0xA764A3931BD884CE, + 0x2FD2590B817C10F4,0x56A21A6D80743933,0xE573A0BB79EF0D0F,0x155C0CA095DC1E23, + 0x6C2C4FC694D437E4,0x10364DF623053291,0xDD32DFC7836C4267,0x03263F3299BCEF6E, + 0x66F8CD6AE57B6F9D,0x8C35AE2B5BE21659,0x31B3C2E21290F87F,0x93BD2027BF915003, + 0x69460E90220D1B56,0x299E276FAE19D328,0x63928C3C53A2432F,0x7082FEF8E91B9ED0, + 0xBC6F792C3EED40F7,0x4C40D537D2DE53DB,0x75E8BFAE5FC2B262,0x4DA9C0D2A541FD0A, + 0x4E8FFFE03CFD1264,0x2620E495696FA7E3,0xE1F0F408B8A98F6C,0xD1AA230FDDA6D9C2, + 0xC7D0109DD1C6288F,0x8A79D04F7487D585,0x4694579BA3710BA2,0x38417F7CFA834F68, + 0x1D47A4DB0A5007E5,0x206C9AF1460A643F,0xA128DDF734BD4712,0x8144470672B7232D, + 0xF2E086CC02105293,0x182DE58DBC892B57,0xCAA1F9B0F8931DFB,0x6B892447CC2E5AE9, + 0xF9DD11850420A43B,0x4BE5BEB68A243ED6,0x5584255F19C8D65D,0x3B67404E633FA006, + 0xA68DB6766C472A1F,0xF78AC79AB4C97E21,0xC353442E1080AAEC,0x9A4F9DB95782E714 +},{ + 0x05BA7BC82C9B3220,0x31A54665F8B65E4F,0xB1B651F77547F4D4,0x8BFA0D857BA46682, + 0x85A96C5AA16A98BB,0x990FAEF908EB79C9,0xA15E37A247F4A62D,0x76857DCD5D27741E, + 0xF8C50B800A1820BC,0xBE65DCB201F7A2B4,0x666D1B986F9426E7,0x4CC921BF53C4E648, + 0x95410A0F93D9CA42,0x20CDCCAA647BA4EF,0x429A4060890A1871,0x0C4EA4F69B32B38B, + 0xCCDA362DDE354CD3,0x96DC23BC7C5B2FA9,0xC309BB68AA851AB3,0xD26131A73648E013, + 0x021DC52941FC4DB2,0xCD5ADAB7704BE48A,0xA77965D984ED71E6,0x32386FD61734BBA4, + 0xE82D6DD538AB7245,0x5C2147EA6177B4B1,0x5DA1AB70CF091CE8,0xAC907FCE72B8BDFF, + 0x57C85DFD972278A8,0xA4E44C6A6B6F940D,0x3851995B4F1FDFE4,0x62578CCAED71BC9E, + 0xD9882BB0C01D2C0A,0x917B9D5D113C503B,0xA2C31E11A87643C6,0xE463C923A399C1CE, + 0xF71686C57EA876DC,0x87B4A973E096D509,0xAF0D567D9D3A5814,0xB40C2A3F59DCC6F4, + 0x3602F88495D121DD,0xD3E1DD3D9836484A,0xF945E71AA46688E5,0x7518547EB2A591F5, + 0x9366587450C01D89,0x9EA81018658C065B,0x4F54080CBC4603A3,0x2D0384C65137BF3D, + 0xDC325078EC861E2A,0xEA30A8FC79573FF7,0x214D2030CA050CB6,0x65F0322B8016C30C, + 0x69BE96DD1B247087,0xDB95EE9981E161B8,0xD1FC1814D9CA05F8,0x820ED2BBCC0DE729, + 0x63D76050430F14C7,0x3BCCB0E8A09D3A0F,0x8E40764D573F54A2,0x39D175C1E16177BD, + 0x12F5A37C734F1F4B,0xAB37C12F1FDFC26D,0x5648B167395CD0F1,0x6C04ED1537BF42A7, + 0xED97161D14304065,0x7D6C67DAAB72B807,0xEC17FA87BA4EE83C,0xDFAF79CB0304FBC1, + 0x733F060571BC463E,0x78D61C1287E98A27,0xD07CF48E77B4ADA1,0xB9C262536C90DD26, + 0xE2449B5860801605,0x8FC09AD7F941FCFB,0xFAD8CEA94BE46D0E,0xA343F28B0608EB9F, + 0x9B126BD04917347B,0x9A92874AE7699C22,0x1B017C42C4E69EE0,0x3A4C5C720EE39256, + 0x4B6E9F5E3EA399DA,0x6BA353F45AD83D35,0xE7FEE0904C1B2425,0x22D009832587E95D, + 0x842980C00F1430E2,0xC6B3C0A0861E2893,0x087433A419D729F2,0x341F3DADD42D6C6F, + 0xEE0A3FAEFBB2A58E,0x4AEE73C490DD3183,0xAAB72DB5B1A16A34,0xA92A04065E238FDF, + 0x7B4B35A1686B6FCC,0x6A23BF6EF4A6956C,0x191CB96B851AD352,0x55D598D4D6DE351A, + 0xC9604DE5F2AE7EF3,0x1CA6C2A3A981E172,0xDE2F9551AD7A5398,0x3025AAFF56C8F616, + 0x15521D9D1E2860D9,0x506FE31CFA45073A,0x189C55F12B647B0B,0x0180EC9AAE7EA859, + 0x7CEC8B40050C105E,0x2350E5198BF94104,0xEF8AD33455CC0DD7,0x07A7BEE16D677F92, + 0xE5E325B90DE76997,0x5A061591A26E637A,0xB611EF1618208B46,0x09F4DF3EB7A981AB, + 0x1EBB078AE87DACC0,0xB791038CB65E231F,0x0FD38D4574B05660,0x67EDF702C1EA8EBE, + 0xBA5F4BE0831238CD,0xE3C477C2CEFEBE5C,0x0DCE486C354C1BD2,0x8C5DB36416C31910, + 0x26EA9ED1A7627324,0x039D29B3EF82E5EB,0x9F28FC82CBF2AE02,0xA8AAE89CF05D2786, + 0x431AACFA2774B028,0xCF471F9E31B7A938,0x581BD0B8E3922EC8,0xBC78199B400BEF06, + 0x90FB71C7BF42F862,0x1F3BEB1046030499,0x683E7A47B55AD8DE,0x988F4263A695D190, + 0xD808C72A6E638453,0x0627527BC319D7CB,0xEBB04466D72997AE,0xE67E0C0AE2658C7C, + 0x14D2F107B056C880,0x7122C32C30400B8C,0x8A7AE11FD5DACEDB,0xA0DEDB38E98A0E74, + 0xAD109354DCC615A6,0x0BE91A17F655CC19,0x8DDD5FFEB8BDB149,0xBFE53028AF890AED, + 0xD65BA6F5B4AD7A6A,0x7956F0882997227E,0x10E8665532B352F9,0x0E5361DFDACEFE39, + 0xCEC7F3049FC90161,0xFF62B561677F5F2E,0x975CCF26D22587F0,0x51EF0F86543BAF63, + 0x2F1E41EF10CBF28F,0x52722635BBB94A88,0xAE8DBAE73344F04D,0x410769D36688FD9A, + 0xB3AB94DE34BBB966,0x801317928DF1AA9B,0xA564A0F0C5113C54,0xF131D4BEBDB1A117, + 0x7F71A2F3EA8EF5B5,0x40878549C8F655C3,0x7EF14E6944F05DEC,0xD44663DCF55137D8, + 0xF2ACFD0D523344FC,0x0000000000000000,0x5FBC6E598EF5515A,0x16CF342EF1AA8532, + 0xB036BD6DDB395C8D,0x13754FE6DD31B712,0xBBDFA77A2D6C9094,0x89E7C8AC3A582B30, + 0x3C6B0E09CDFA459D,0xC4AE0589C7E26521,0x49735A777F5FD468,0xCAFD64561D2C9B18, + 0xDA1502032F9FC9E1,0x8867243694268369,0x3782141E3BAF8984,0x9CB5D53124704BE9, + 0xD7DB4A6F1AD3D233,0xA6F989432A93D9BF,0x9D3539AB8A0EE3B0,0x53F2CAAF15C7E2D1, + 0x6E19283C76430F15,0x3DEBE2936384EDC4,0x5E3C82C3208BF903,0x33B8834CB94A13FD, + 0x6470DEB12E686B55,0x359FD1377A53C436,0x61CAA57902F35975,0x043A975282E59A79, + 0xFD7F70482683129C,0xC52EE913699CCD78,0x28B9FF0E7DAC8D1D,0x5455744E78A09D43, + 0xCB7D88CCB3523341,0x44BD121B4A13CFBA,0x4D49CD25FDBA4E11,0x3E76CB208C06082F, + 0x3FF627BA2278A076,0xC28957F204FBB2EA,0x453DFE81E46D67E3,0x94C1E6953DA7621B, + 0x2C83685CFF491764,0xF32C1197FC4DECA5,0x2B24D6BD922E68F6,0xB22B78449AC5113F, + 0x48F3B6EDD1217C31,0x2E9EAD75BEB55AD6,0x174FD8B45FD42D6B,0x4ED4E4961238ABFA, + 0x92E6B4EEFEBEB5D0,0x46A0D7320BEF8208,0x47203BA8A5912A51,0x24F75BF8E69E3E96, + 0xF0B1382413CF094E,0xFEE259FBC901F777,0x276A724B091CDB7D,0xBDF8F501EE75475F, + 0x599B3C224DEC8691,0x6D84018F99C1EAFE,0x7498B8E41CDB39AC,0xE0595E71217C5BB7, + 0x2AA43A273C50C0AF,0xF50B43EC3F543B6E,0x838E3E2162734F70,0xC09492DB4507FF58, + 0x72BFEA9FDFC2EE67,0x11688ACF9CCDFAA0,0x1A8190D86A9836B9,0x7ACBD93BC615C795, + 0xC7332C3A286080CA,0x863445E94EE87D50,0xF6966A5FD0D6DE85,0xE9AD814F96D5DA1C, + 0x70A22FB69E3EA3D5,0x0A69F68D582B6440,0xB8428EC9C2EE757F,0x604A49E3AC8DF12C, + 0x5B86F90B0C10CB23,0xE1D9B2EB8F02F3EE,0x29391394D3D22544,0xC8E0A17F5CD0D6AA, + 0xB58CC6A5F7A26EAD,0x8193FB08238F02C2,0xD5C68F465B2F9F81,0xFCFF9CD288FDBAC5, + 0x77059157F359DC47,0x1D262E3907FF492B,0xFB582233E59AC557,0xDDB2BCE242F8B673, + 0x2577B76248E096CF,0x6F99C4A6D83DA74C,0xC1147E41EB795701,0xF48BAF76912A9337 +},{ + 0x3EF29D249B2C0A19,0xE9E16322B6F8622F,0x5536994047757F7A,0x9F4D56D5A47B0B33, + 0x822567466AA1174C,0xB8F5057DEB082FB2,0xCC48C10BF4475F53,0x373088D4275DEC3A, + 0x968F4325180AED10,0x173D232CF7016151,0xAE4ED09F946FCC13,0xFD4B4741C4539873, + 0x1B5B3F0DD9933765,0x2FFCB0967B644052,0xE02376D20A89840C,0xA3AE3A70329B18D7, + 0x419CBD2335DE8526,0xFAFEBF115B7C3199,0x0397074F85AA9B0D,0xC58AD4FB4836B970, + 0xBEC60BE3FC4104A8,0x1EFF36DC4B708772,0x131FDC33ED8453B6,0x0844E33E341764D3, + 0x0FF11B6EAB38CD39,0x64351F0A7761B85A,0x3B5694F509CFBA0E,0x30857084B87245D0, + 0x47AFB3BD2297AE3C,0xF2BA5C2F6F6B554A,0x74BDC4761F4F70E1,0xCFDFC64471EDC45E, + 0xE610784C1DC0AF16,0x7ACA29D63C113F28,0x2DED411776A859AF,0xAC5F211E99A3D5EE, + 0xD484F949A87EF33B,0x3CE36CA596E013E4,0xD120F0983A9D432C,0x6BC40464DC597563, + 0x69D5F5E5D1956C9E,0x9AE95F043698BB24,0xC9ECC8DA66A4EF44,0xD69508C8A5B2EAC6, + 0xC40C2235C0503B80,0x38C193BA8C652103,0x1CEEC75D46BC9E8F,0xD331011937515AD1, + 0xD8E2E56886ECA50F,0xB137108D5779C991,0x709F3B6905CA4206,0x4FEB50831680CAEF, + 0xEC456AF3241BD238,0x58D673AFE181ABBE,0x242F54E7CAD9BF8C,0x0211F1810DCC19FD, + 0x90BC4DBB0F43C60A,0x9518446A9DA0761D,0xA1BFCBF13F57012A,0x2BDE4F8961E172B5, + 0x27B853A84F732481,0xB0B1E643DF1F4B61,0x18CC38425C39AC68,0xD2B7F7D7BF37D821, + 0x3103864A3014C720,0x14AA246372ABFA5C,0x6E600DB54EBAC574,0x394765740403A3F3, + 0x09C215F0BC71E623,0x2A58B947E987F045,0x7B4CDF18B477BDD8,0x9709B5EB906C6FE0, + 0x73083C268060D90B,0xFEDC400E41F9037E,0x284948C6E44BE9B8,0x728ECAE808065BFB, + 0x06330E9E17492B1A,0x5950856169E7294E,0xBAE4F4FCE6C4364F,0xCA7BCF95E30E7449, + 0x7D7FD186A33E96C2,0x52836110D85AD690,0x4DFAA1021B4CD312,0x913ABB75872544FA, + 0xDD46ECB9140F1518,0x3D659A6B1E869114,0xC23F2CABD719109A,0xD713FE062DD46836, + 0xD0A60656B2FBC1DC,0x221C5A79DD909496,0xEFD26DBCA1B14935,0x0E77EDA0235E4FC9, + 0xCBFD395B6B68F6B9,0x0DE0EAEFA6F4D4C4,0x0422FF1F1A8532E7,0xF969B85EDED6AA94, + 0x7F6E2007AEF28F3F,0x3AD0623B81A938FE,0x6624EE8B7AADA1A7,0xB682E8DDC856607B, + 0xA78CC56F281E2A30,0xC79B257A45FAA08D,0x5B4174E0642B30B3,0x5F638BFF7EAE0254, + 0x4BC9AF9C0C05F808,0xCE59308AF98B46AE,0x8FC58DA9CC55C388,0x803496C7676D0EB1, + 0xF33CAAE1E70DD7BA,0xBB6202326EA2B4BF,0xD5020F87201871CB,0x9D5CA754A9B712CE, + 0x841669D87DE83C56,0x8A6184785EB6739F,0x420BBA6CB0741E2B,0xF12D5B60EAC1CE47, + 0x76AC35F71283691C,0x2C6BB7D9FECEDB5F,0xFCCDB18F4C351A83,0x1F79C012C3160582, + 0xF0ABADAE62A74CB7,0xE1A5801C82EF06FC,0x67A21845F2CB2357,0x5114665F5DF04D9D, + 0xBF40FD2D74278658,0xA0393D3FB73183DA,0x05A409D192E3B017,0xA9FB28CF0B4065F9, + 0x25A9A22942BF3D7C,0xDB75E22703463E02,0xB326E10C5AB5D06C,0xE7968E8295A62DE6, + 0xB973F3B3636EAD42,0xDF571D3819C30CE5,0xEE549B7229D7CBC5,0x12992AFD65E2D146, + 0xF8EF4E9056B02864,0xB7041E134030E28B,0xC02EDD2ADAD50967,0x932B4AF48AE95D07, + 0x6FE6FB7BC6DC4784,0x239AACB755F61666,0x401A4BEDBDB807D6,0x485EA8D389AF6305, + 0xA41BC220ADB4B13D,0x753B32B89729F211,0x997E584BB3322029,0x1D683193CEDA1C7F, + 0xFF5AB6C0C99F818E,0x16BBD5E27F67E3A1,0xA59D34EE25D233CD,0x98F8AE853B54A2D9, + 0x6DF70AFACB105E79,0x795D2E99B9BBA425,0x8E437B6744334178,0x0186F6CE886682F0, + 0xEBF092A3BB347BD2,0xBCD7FA62F18D1D55,0xADD9D7D011C5571E,0x0BD3E471B1BDFFDE, + 0xAA6C2F808EEAFEF4,0x5EE57D31F6C880A4,0xF50FA47FF044FCA0,0x1ADDC9C351F5B595, + 0xEA76646D3352F922,0x0000000000000000,0x85909F16F58EBEA6,0x46294573AAF12CCC, + 0x0A5512BF39DB7D2E,0x78DBD85731DD26D5,0x29CFBE086C2D6B48,0x218B5D36583A0F9B, + 0x152CD2ADFACD78AC,0x83A39188E2C795BC,0xC3B9DA655F7F926A,0x9ECBA01B2C1D89C3, + 0x07B5F8509F2FA9EA,0x7EE8D6C926940DCF,0x36B67E1AAF3B6ECA,0x86079859702425AB, + 0xFB7849DFD31AB369,0x4C7C57CC932A51E2,0xD96413A60E8A27FF,0x263EA566C715A671, + 0x6C71FC344376DC89,0x4A4F595284637AF8,0xDAF314E98B20BCF2,0x572768C14AB96687, + 0x1088DB7C682EC8BB,0x887075F9537A6A62,0x2E7A4658F302C2A2,0x619116DBE582084D, + 0xA87DDE018326E709,0xDCC01A779C6997E8,0xEDC39C3DAC7D50C8,0xA60A33A1A078A8C0, + 0xC1A82BE452B38B97,0x3F746BEA134A88E9,0xA228CCBEBAFD9A27,0xABEAD94E068C7C04, + 0xF48952B178227E50,0x5CF48CB0FB049959,0x6017E0156DE48ABD,0x4438B4F2A73D3531, + 0x8C528AE649FF5885,0xB515EF924DFCFB76,0x0C661C212E925634,0xB493195CC59A7986, + 0x9CDA519A21D1903E,0x32948105B5BE5C2D,0x194ACE8CD45F2E98,0x438D4CA238129CDB, + 0x9B6FA9CABEFE39D4,0x81B26009EF0B8C41,0xDED1EBF691A58E15,0x4E6DA64D9EE6481F, + 0x54B06F8ECF13FD8A,0x49D85E1D01C9E1F5,0xAFC826511C094EE3,0xF698A33075EE67AD, + 0x5AC7822EEC4DB243,0x8DD47C28C199DA75,0x89F68337DB1CE892,0xCDCE37C57C21DDA3, + 0x530597DE503C5460,0x6A42F2AA543FF793,0x5D727A7E73621BA9,0xE232875307459DF1, + 0x56A19E0FC2DFE477,0xC61DD3B4CD9C227D,0xE5877F03986A341B,0x949EB2A415C6F4ED, + 0x6206119460289340,0x6380E75AE84E11B0,0x8BE772B6D6D0F16F,0x50929091D596CF6D, + 0xE86795EC3E9EE0DF,0x7CF927482B581432,0xC86A3E14EEC26DB4,0x7119CDA78DACC0F6, + 0xE40189CD100CB6EB,0x92ADBC3A028FDFF7,0xB2A017C2D2D3529C,0x200DABF8D05C8D6B, + 0x34A78F9BA2F77737,0xE3B4719D8F231F01,0x45BE423C2F5BB7C1,0xF71E55FEFD88E55D, + 0x6853032B59F3EE6E,0x65B3E9C4FF073AAA,0x772AC3399AE5EBEC,0x87816E97F842A75B, + 0x110E2DB2E0484A4B,0x331277CB3DD8DEDD,0xBD510CAC79EB9FA5,0x352179552A91F5C7 +},{ + 0x8AB0A96846E06A6D,0x43C7E80B4BF0B33A,0x08C9B3546B161EE5,0x39F1C235EBA990BE, + 0xC1BEF2376606C7B2,0x2C209233614569AA,0xEB01523B6FC3289A,0x946953AB935ACEDD, + 0x272838F63E13340E,0x8B0455ECA12BA052,0x77A1B2C4978FF8A2,0xA55122CA13E54086, + 0x2276135862D3F1CD,0xDB8DDFDE08B76CFE,0x5D1E12C89E4A178A,0x0E56816B03969867, + 0xEE5F79953303ED59,0xAFED748BAB78D71D,0x6D929F2DF93E53EE,0xF5D8A8F8BA798C2A, + 0xF619B1698E39CF6B,0x95DDAF2F749104E2,0xEC2A9C80E0886427,0xCE5C8FD8825B95EA, + 0xC4E0D9993AC60271,0x4699C3A5173076F9,0x3D1B151F50A29F42,0x9ED505EA2BC75946, + 0x34665ACFDC7F4B98,0x61B1FB53292342F7,0xC721C0080E864130,0x8693CD1696FD7B74, + 0x872731927136B14B,0xD3446C8A63A1721B,0x669A35E8A6680E4A,0xCAB658F239509A16, + 0xA4E5DE4EF42E8AB9,0x37A7435EE83F08D9,0x134E6239E26C7F96,0x82791A3C2DF67488, + 0x3F6EF00A8329163C,0x8E5A7E42FDEB6591,0x5CAAEE4C7981DDB5,0x19F234785AF1E80D, + 0x255DDDE3ED98BD70,0x50898A32A99CCCAC,0x28CA4519DA4E6656,0xAE59880F4CB31D22, + 0x0D9798FA37D6DB26,0x32F968F0B4FFCD1A,0xA00F09644F258545,0xFA3AD5175E24DE72, + 0xF46C547C5DB24615,0x713E80FBFF0F7E20,0x7843CF2B73D2AAFA,0xBD17EA36AEDF62B4, + 0xFD111BACD16F92CF,0x4ABAA7DBC72D67E0,0xB3416B5DAD49FAD3,0xBCA316B24914A88B, + 0x15D150068AECF914,0xE27C1DEBE31EFC40,0x4FE48C759BEDA223,0x7EDCFD141B522C78, + 0x4E5070F17C26681C,0xE696CAC15815F3BC,0x35D2A64B3BB481A7,0x800CFF29FE7DFDF6, + 0x1ED9FAC3D5BAA4B0,0x6C2663A91EF599D1,0x03C1199134404341,0xF7AD4DED69F20554, + 0xCD9D9649B61BD6AB,0xC8C3BDE7EADB1368,0xD131899FB02AFB65,0x1D18E352E1FAE7F1, + 0xDA39235AEF7CA6C1,0xA1BBF5E0A8EE4F7A,0x91377805CF9A0B1E,0x3138716180BF8E5B, + 0xD9F83ACBDB3CE580,0x0275E515D38B897E,0x472D3F21F0FBBCC6,0x2D946EB7868EA395, + 0xBA3C248D21942E09,0xE7223645BFDE3983,0xFF64FEB902E41BB1,0xC97741630D10D957, + 0xC3CB1722B58D4ECC,0xA27AEC719CAE0C3B,0x99FECB51A48C15FB,0x1465AC826D27332B, + 0xE1BD047AD75EBF01,0x79F733AF941960C5,0x672EC96C41A3C475,0xC27FEBA6524684F3, + 0x64EFD0FD75E38734,0xED9E60040743AE18,0xFB8E2993B9EF144D,0x38453EB10C625A81, + 0x6978480742355C12,0x48CF42CE14A6EE9E,0x1CAC1FD606312DCE,0x7B82D6BA4792E9BB, + 0x9D141C7B1F871A07,0x5616B80DC11C4A2E,0xB849C198F21FA777,0x7CA91801C8D9A506, + 0xB1348E487EC273AD,0x41B20D1E987B3A44,0x7460AB55A3CFBBE3,0x84E628034576F20A, + 0x1B87D16D897A6173,0x0FE27DEFE45D5258,0x83CDE6B8CA3DBEB7,0x0C23647ED01D1119, + 0x7A362A3EA0592384,0xB61F40F3F1893F10,0x75D457D1440471DC,0x4558DA34237035B8, + 0xDCA6116587FC2043,0x8D9B67D3C9AB26D0,0x2B0B5C88EE0E2517,0x6FE77A382AB5DA90, + 0x269CC472D9D8FE31,0x63C41E46FAA8CB89,0xB7ABBC771642F52F,0x7D1DE4852F126F39, + 0xA8C6BA3024339BA0,0x600507D7CEE888C8,0x8FEE82C61A20AFAE,0x57A2448926D78011, + 0xFCA5E72836A458F0,0x072BCEBB8F4B4CBD,0x497BBE4AF36D24A1,0x3CAFE99BB769557D, + 0x12FA9EBD05A7B5A9,0xE8C04BAA5B836BDB,0x4273148FAC3B7905,0x908384812851C121, + 0xE557D3506C55B0FD,0x72FF996ACB4F3D61,0x3EDA0C8E64E2DC03,0xF0868356E6B949E9, + 0x04EAD72ABB0B0FFC,0x17A4B5135967706A,0xE3C8E16F04D5367F,0xF84F30028DAF570C, + 0x1846C8FCBD3A2232,0x5B8120F7F6CA9108,0xD46FA231ECEA3EA6,0x334D947453340725, + 0x58403966C28AD249,0xBED6F3A79A9F21F5,0x68CCB483A5FE962D,0xD085751B57E1315A, + 0xFED0023DE52FD18E,0x4B0E5B5F20E6ADDF,0x1A332DE96EB1AB4C,0xA3CE10F57B65C604, + 0x108F7BA8D62C3CD7,0xAB07A3A11073D8E1,0x6B0DAD1291BED56C,0xF2F366433532C097, + 0x2E557726B2CEE0D4,0x0000000000000000,0xCB02A476DE9B5029,0xE4E32FD48B9E7AC2, + 0x734B65EE2C84F75E,0x6E5386BCCD7E10AF,0x01B4FC84E7CBCA3F,0xCFE8735C65905FD5, + 0x3613BFDA0FF4C2E6,0x113B872C31E7F6E8,0x2FE18BA255052AEB,0xE974B72EBC48A1E4, + 0x0ABC5641B89D979B,0xB46AA5E62202B66E,0x44EC26B0C4BBFF87,0xA6903B5B27A503C7, + 0x7F680190FC99E647,0x97A84A3AA71A8D9C,0xDD12EDE16037EA7C,0xC554251DDD0DC84E, + 0x88C54C7D956BE313,0x4D91696048662B5D,0xB08072CC9909B992,0xB5DE5962C5C97C51, + 0x81B803AD19B637C9,0xB2F597D94A8230EC,0x0B08AAC55F565DA4,0xF1327FD2017283D6, + 0xAD98919E78F35E63,0x6AB9519676751F53,0x24E921670A53774F,0xB9FD3D1C15D46D48, + 0x92F66194FBDA485F,0x5A35DC7311015B37,0xDED3F4705477A93D,0xC00A0EB381CD0D8D, + 0xBB88D809C65FE436,0x16104997BEACBA55,0x21B70AC95693B28C,0x59F4C5E225411876, + 0xD5DB5EB50B21F499,0x55D7A19CF55C096F,0xA97246B4C3F8519F,0x8552D487A2BD3835, + 0x54635D181297C350,0x23C2EFDC85183BF2,0x9F61F96ECC0C9379,0x534893A39DDC8FED, + 0x5EDF0B59AA0A54CB,0xAC2C6D1A9F38945C,0xD7AEBBA0D8AA7DE7,0x2ABFA00C09C5EF28, + 0xD84CC64F3CF72FBF,0x2003F64DB15878B3,0xA724C7DFC06EC9F8,0x069F323F68808682, + 0xCC296ACD51D01C94,0x055E2BAE5CC0C5C3,0x6270E2C21D6301B6,0x3B842720382219C0, + 0xD2F0900E846AB824,0x52FC6F277A1745D2,0xC6953C8CE94D8B0F,0xE009F8FE3095753E, + 0x655B2C7992284D0B,0x984A37D54347DFC4,0xEAB5AEBF8808E2A5,0x9A3FD2C090CC56BA, + 0x9CA0E0FFF84CD038,0x4C2595E4AFADE162,0xDF6708F4B3BC6302,0xBF620F237D54EBCA, + 0x93429D101C118260,0x097D4FD08CDDD4DA,0x8C2F9B572E60ECEF,0x708A7C7F18C4B41F, + 0x3A30DBA4DFE9D3FF,0x4006F19A7FB0F07B,0x5F6BF7DD4DC19EF4,0x1F6D064732716E8F, + 0xF9FBCC866A649D33,0x308C8DE567744464,0x8971B0F972A0292C,0xD61A47243F61B7D8, + 0xEFEB8511D4C82766,0x961CB6BE40D147A3,0xAAB35F25F7B812DE,0x76154E407044329D, + 0x513D76B64E570693,0xF3479AC7D2F90AA8,0x9B8B2E4477079C85,0x297EB99D3D85AC69 +},{ + 0x7E37E62DFC7D40C3,0x776F25A4EE939E5B,0xE045C850DD8FB5AD,0x86ED5BA711FF1952, + 0xE91D0BD9CF616B35,0x37E0AB256E408FFB,0x9607F6C031025A7A,0x0B02F5E116D23C9D, + 0xF3D8486BFB50650C,0x621CFF27C40875F5,0x7D40CB71FA5FD34A,0x6DAA6616DAA29062, + 0x9F5F354923EC84E2,0xEC847C3DC507C3B3,0x025A3668043CE205,0xA8BF9E6C4DAC0B19, + 0xFA808BE2E9BEBB94,0xB5B99C5277C74FA3,0x78D9BC95F0397BCC,0xE332E50CDBAD2624, + 0xC74FCE129332797E,0x1729ECEB2EA709AB,0xC2D6B9F69954D1F8,0x5D898CBFBAB8551A, + 0x859A76FB17DD8ADB,0x1BE85886362F7FB5,0xF6413F8FF136CD8A,0xD3110FA5BBB7E35C, + 0x0A2FEED514CC4D11,0xE83010EDCD7F1AB9,0xA1E75DE55F42D581,0xEEDE4A55C13B21B6, + 0xF2F5535FF94E1480,0x0CC1B46D1888761E,0xBCE15FDB6529913B,0x2D25E8975A7181C2, + 0x71817F1CE2D7A554,0x2E52C5CB5C53124B,0xF9F7A6BEEF9C281D,0x9E722E7D21F2F56E, + 0xCE170D9B81DCA7E6,0x0E9B82051CB4941B,0x1E712F623C49D733,0x21E45CFA42F9F7DC, + 0xCB8E7A7F8BBA0F60,0x8E98831A010FB646,0x474CCF0D8E895B23,0xA99285584FB27A95, + 0x8CC2B57205335443,0x42D5B8E984EFF3A5,0x012D1B34021E718C,0x57A6626AAE74180B, + 0xFF19FC06E3D81312,0x35BA9D4D6A7C6DFE,0xC9D44C178F86ED65,0x506523E6A02E5288, + 0x03772D5C06229389,0x8B01F4FE0B691EC0,0xF8DABD8AED825991,0x4C4E3AEC985B67BE, + 0xB10DF0827FBF96A9,0x6A69279AD4F8DAE1,0xE78689DCD3D5FF2E,0x812E1A2B1FA553D1, + 0xFBAD90D6EBA0CA18,0x1AC543B234310E39,0x1604F7DF2CB97827,0xA6241C6951189F02, + 0x753513CCEAAF7C5E,0x64F2A59FC84C4EFA,0x247D2B1E489F5F5A,0xDB64D718AB474C48, + 0x79F4A7A1F2270A40,0x1573DA832A9BEBAE,0x3497867968621C72,0x514838D2A2302304, + 0xF0AF6537FD72F685,0x1D06023E3A6B44BA,0x678588C3CE6EDD73,0x66A893F7CC70ACFF, + 0xD4D24E29B5EDA9DF,0x3856321470EA6A6C,0x07C3418C0E5A4A83,0x2BCBB22F5635BACD, + 0x04B46CD00878D90A,0x06EE5AB80C443B0F,0x3B211F4876C8F9E5,0x0958C38912EEDE98, + 0xD14B39CDBF8B0159,0x397B292072F41BE0,0x87C0409313E168DE,0xAD26E98847CAA39F, + 0x4E140C849C6785BB,0xD5FF551DB7F3D853,0xA0CA46D15D5CA40D,0xCD6020C787FE346F, + 0x84B76DCF15C3FB57,0xDEFDA0FCA121E4CE,0x4B8D7B6096012D3D,0x9AC642AD298A2C64, + 0x0875D8BD10F0AF14,0xB357C6EA7B8374AC,0x4D6321D89A451632,0xEDA96709C719B23F, + 0xF76C24BBF328BC06,0xC662D526912C08F2,0x3CE25EC47892B366,0xB978283F6F4F39BD, + 0xC08C8F9E9D6833FD,0x4F3917B09E79F437,0x593DE06FB2C08C10,0xD6887841B1D14BDA, + 0x19B26EEE32139DB0,0xB494876675D93E2F,0x825937771987C058,0x90E9AC783D466175, + 0xF1827E03FF6C8709,0x945DC0A8353EB87F,0x4516F9658AB5B926,0x3F9573987EB020EF, + 0xB855330B6D514831,0x2AE6A91B542BCB41,0x6331E413C6160479,0x408F8E8180D311A0, + 0xEFF35161C325503A,0xD06622F9BD9570D5,0x8876D9A20D4B8D49,0xA5533135573A0C8B, + 0xE168D364DF91C421,0xF41B09E7F50A2F8F,0x12B09B0F24C1A12D,0xDA49CC2CA9593DC4, + 0x1F5C34563E57A6BF,0x54D14F36A8568B82,0xAF7CDFE043F6419A,0xEA6A2685C943F8BC, + 0xE5DCBFB4D7E91D2B,0xB27ADDDE799D0520,0x6B443CAED6E6AB6D,0x7BAE91C9F61BE845, + 0x3EB868AC7CAE5163,0x11C7B65322E332A4,0xD23C1491B9A992D0,0x8FB5982E0311C7CA, + 0x70AC6428E0C9D4D8,0x895BC2960F55FCC5,0x76423E90EC8DEFD7,0x6FF0507EDE9E7267, + 0x3DCF45F07A8CC2EA,0x4AA06054941F5CB1,0x5810FB5BB0DEFD9C,0x5EFEA1E3BC9AC693, + 0x6EDD4B4ADC8003EB,0x741808F8E8B10DD2,0x145EC1B728859A22,0x28BC9F7350172944, + 0x270A06424EBDCCD3,0x972AEDF4331C2BF6,0x059977E40A66A886,0x2550302A4A812ED6, + 0xDD8A8DA0A7037747,0xC515F87A970E9B7B,0x3023EAA9601AC578,0xB7E3AA3A73FBADA6, + 0x0FB699311EAAE597,0x0000000000000000,0x310EF19D6204B4F4,0x229371A644DB6455, + 0x0DECAF591A960792,0x5CA4978BB8A62496,0x1C2B190A38753536,0x41A295B582CD602C, + 0x3279DCC16426277D,0xC1A194AA9F764271,0x139D803B26DFD0A1,0xAE51C4D441E83016, + 0xD813FA44AD65DFC1,0xAC0BF2BC45D4D213,0x23BE6A9246C515D9,0x49D74D08923DCF38, + 0x9D05032127D066E7,0x2F7FDEFF5E4D63C7,0xA47E2A0155247D07,0x99B16FF12FA8BFED, + 0x4661D4398C972AAF,0xDFD0BBC8A33F9542,0xDCA79694A51D06CB,0xB020EBB67DA1E725, + 0xBA0F0563696DAA34,0xE4F1A480D5F76CA7,0xC438E34E9510EAF7,0x939E81243B64F2FC, + 0x8DEFAE46072D25CF,0x2C08F3A3586FF04E,0xD7A56375B3CF3A56,0x20C947CE40E78650, + 0x43F8A3DD86F18229,0x568B795EAC6A6987,0x8003011F1DBB225D,0xF53612D3F7145E03, + 0x189F75DA300DEC3C,0x9570DB9C3720C9F3,0xBB221E576B73DBB8,0x72F65240E4F536DD, + 0x443BE25188ABC8AA,0xE21FFE38D9B357A8,0xFD43CA6EE7E4F117,0xCAA3614B89A47EEC, + 0xFE34E732E1C6629E,0x83742C431B99B1D4,0xCF3A16AF83C2D66A,0xAAE5A8044990E91C, + 0x26271D764CA3BD5F,0x91C4B74C3F5810F9,0x7C6DD045F841A2C6,0x7F1AFD19FE63314F, + 0xC8F957238D989CE9,0xA709075D5306EE8E,0x55FC5402AA48FA0E,0x48FA563C9023BEB4, + 0x65DFBEABCA523F76,0x6C877D22D8BCE1EE,0xCC4D3BF385E045E3,0xBEBB69B36115733E, + 0x10EAAD6720FD4328,0xB6CEB10E71E5DC2A,0xBDCC44EF6737E0B7,0x523F158EA412B08D, + 0x989C74C52DB6CE61,0x9BEB59992B945DE8,0x8A2CEFCA09776F4C,0xA3BD6B8D5B7E3784, + 0xEB473DB1CB5D8930,0xC3FBA2C29B4AA074,0x9C28181525CE176B,0x683311F2D0C438E4, + 0x5FD3BAD7BE84B71F,0xFC6ED15AE5FA809B,0x36CDB0116C5EFE77,0x29918447520958C8, + 0xA29070B959604608,0x53120EBAA60CC101,0x3A0C047C74D68869,0x691E0AC6D2DA4968, + 0x73DB4974E6EB4751,0x7A838AFDF40599C9,0x5A4ACD33B4E21F99,0x6046C94FC03497F0, + 0xE6AB92E8D1CB8EA2,0x3354C7F5663856F1,0xD93EE170AF7BAE4D,0x616BD27BC22AE67C, + 0x92B39A10397A8370,0xABC8B3304B8E9890,0xBF967287630B02B2,0x5B67D607B6FC6E15 +},{ + 0xD031C397CE553FE6,0x16BA5B01B006B525,0xA89BADE6296E70C8,0x6A1F525D77D3435B, + 0x6E103570573DFA0B,0x660EFB2A17FC95AB,0x76327A9E97634BF6,0x4BAD9D6462458BF5, + 0xF1830CAEDBC3F748,0xC5C8F542669131FF,0x95044A1CDC48B0CB,0x892962DF3CF8B866, + 0xB0B9E208E930C135,0xA14FB3F0611A767C,0x8D2605F21C160136,0xD6B71922FECC549E, + 0x37089438A5907D8B,0x0B5DA38E5803D49C,0x5A5BCC9CEA6F3CBC,0xEDAE246D3B73FFE5, + 0xD2B87E0FDE22EDCE,0x5E54ABB1CA8185EC,0x1DE7F88FE80561B9,0xAD5E1A870135A08C, + 0x2F2ADBD665CECC76,0x5780B5A782F58358,0x3EDC8A2EEDE47B3F,0xC9D95C3506BEE70F, + 0x83BE111D6C4E05EE,0xA603B90959367410,0x103C81B4809FDE5D,0x2C69B6027D0C774A, + 0x399080D7D5C87953,0x09D41E16487406B4,0xCDD63B1826505E5F,0xF99DC2F49B0298E8, + 0x9CD0540A943CB67F,0xBCA84B7F891F17C5,0x723D1DB3B78DF2A6,0x78AA6E71E73B4F2E, + 0x1433E699A071670D,0x84F21BE454620782,0x98DF3327B4D20F2F,0xF049DCE2D3769E5C, + 0xDB6C60199656EB7A,0x648746B2078B4783,0x32CD23598DCBADCF,0x1EA4955BF0C7DA85, + 0xE9A143401B9D46B5,0xFD92A5D9BBEC21B8,0xC8138C790E0B8E1B,0x2EE00B9A6D7BA562, + 0xF85712B893B7F1FC,0xEB28FED80BEA949D,0x564A65EB8A40EA4C,0x6C9988E8474A2823, + 0x4535898B121D8F2D,0xABD8C03231ACCBF4,0xBA2E91CAB9867CBD,0x7960BE3DEF8E263A, + 0x0C11A977602FD6F0,0xCB50E1AD16C93527,0xEAE22E94035FFD89,0x2866D12F5DE2CE1A, + 0xFF1B1841AB9BF390,0x9F9339DE8CFE0D43,0x964727C8C48A0BF7,0x524502C6AAAE531C, + 0x9B9C5EF3AC10B413,0x4FA2FA4942AB32A5,0x3F165A62E551122B,0xC74148DA76E6E3D7, + 0x924840E5E464B2A7,0xD372AE43D69784DA,0x233B72A105E11A86,0xA48A04914941A638, + 0xB4B68525C9DE7865,0xDDEABAACA6CF8002,0x0A9773C250B6BD88,0xC284FFBB5EBD3393, + 0x8BA0DF472C8F6A4E,0x2AEF6CB74D951C32,0x427983722A318D41,0x73F7CDFFBF389BB2, + 0x074C0AF9382C026C,0x8A6A0F0B243A035A,0x6FDAE53C5F88931F,0xC68B98967E538AC3, + 0x44FF59C71AA8E639,0xE2FCE0CE439E9229,0xA20CDE2479D8CD40,0x19E89FA2C8EBD8E9, + 0xF446BBCFF398270C,0x43B3533E2284E455,0xD82F0DCD8E945046,0x51066F12B26CE820, + 0xE73957AF6BC5426D,0x081ECE5A40C16FA0,0x3B193D4FC5BFAB7B,0x7FE66488DF174D42, + 0x0E9814EF705804D8,0x8137AC857C39D7C6,0xB1733244E185A821,0x695C3F896F11F867, + 0xF6CF0657E3EFF524,0x1AABF276D02963D5,0x2DA3664E75B91E5E,0x0289BD981077D228, + 0x90C1FD7DF413608F,0x3C5537B6FD93A917,0xAA12107E3919A2E0,0x0686DAB530996B78, + 0xDAA6B0559EE3826E,0xC34E2FF756085A87,0x6D5358A44FFF4137,0xFC587595B35948AC, + 0x7CA5095CC7D5F67E,0xFB147F6C8B754AC0,0xBFEB26AB91DDACF9,0x6896EFC567A49173, + 0xCA9A31E11E7C5C33,0xBBE44186B13315A9,0x0DDB793B689ABFE4,0x70B4A02BA7FA208E, + 0xE47A3A7B7307F951,0x8CECD5BE14A36822,0xEEED49B923B144D9,0x17708B4DB8B3DC31, + 0x6088219F2765FED3,0xB3FA8FDCF1F27A09,0x910B2D31FCA6099B,0x0F52C4A378ED6DCC, + 0x50CCBF5EBAD98134,0x6BD582117F662A4F,0x94CE9A50D4FDD9DF,0x2B25BCFB45207526, + 0x67C42B661F49FCBF,0x492420FC723259DD,0x03436DD418C2BB3C,0x1F6E4517F872B391, + 0xA08563BC69AF1F68,0xD43EA4BAEEBB86B6,0x01CAD04C08B56914,0xAC94CACB0980C998, + 0x54C3D8739A373864,0x26FEC5C02DBACAC2,0xDEA9D778BE0D3B3E,0x040F672D20EEB950, + 0xE5B0EA377BB29045,0xF30AB136CBB42560,0x62019C0737122CFB,0xE86B930C13282FA1, + 0xCC1CEB542EE5374B,0x538FD28AA21B3A08,0x1B61223AD89C0AC1,0x36C24474AD25149F, + 0x7A23D3E9F74C9D06,0xBE21F6E79968C5ED,0xCF5F868036278C77,0xF705D61BEB5A9C30, + 0x4D2B47D152DCE08D,0x5F9E7BFDC234ECF8,0x247778583DCD18EA,0x867BA67C4415D5AA, + 0x4CE1979D5A698999,0x0000000000000000,0xEC64F42133C696F1,0xB57C5569C16B1171, + 0xC1C7926F467F88AF,0x654D96FE0F3E2E97,0x15F936D5A8C40E19,0xB8A72C52A9F1AE95, + 0xA9517DAA21DB19DC,0x58D27104FA18EE94,0x5918A148F2AD8780,0x5CDD1629DAF657C4, + 0x8274C15164FB6CFA,0xD1FB13DBC6E056F2,0x7D6FD910CF609F6A,0xB63F38BDD9A9AA4D, + 0x3D9FE7FAF526C003,0x74BBC706871499DE,0xDF630734B6B8522A,0x3AD3ED03CD0AC26F, + 0xFADEAF2083C023D4,0xC00D42234ECAE1BB,0x8538CBA85CD76E96,0xC402250E6E2458EB, + 0x47BC3413026A5D05,0xAFD7A71F114272A4,0x978DF784CC3F62E3,0xB96DFC1EA144C781, + 0x21B2CF391596C8AE,0x318E4E8D950916F3,0xCE9556CC3E92E563,0x385A509BDD7D1047, + 0x358129A0B5E7AFA3,0xE6F387E363702B79,0xE0755D5653E94001,0x7BE903A5FFF9F412, + 0x12B53C2C90E80C75,0x3307F315857EC4DB,0x8FAFB86A0C61D31E,0xD9E5DD8186213952, + 0x77F8AAD29FD622E2,0x25BDA814357871FE,0x7571174A8FA1F0CA,0x137FEC60985D6561, + 0x30449EC19DBC7FE7,0xA540D4DD41F4CF2C,0xDC206AE0AE7AE916,0x5B911CD0E2DA55A8, + 0xB2305F90F947131D,0x344BF9ECBD52C6B7,0x5D17C665D2433ED0,0x18224FEEC05EB1FD, + 0x9E59E992844B6457,0x9A568EBFA4A5DD07,0xA3C60E68716DA454,0x7E2CB4C4D7A22456, + 0x87B176304CA0BCBE,0x413AEEA632F3367D,0x9915E36BBC67663B,0x40F03EEA3A465F69, + 0x1C2D28C3E0B008AD,0x4E682A054A1E5BB1,0x05C5B761285BD044,0xE1BF8D1A5B5C2915, + 0xF2C0617AC3014C74,0xB7F5E8F1D11CC359,0x63CB4C4B3FA745EF,0x9D1A84469C89DF6B, + 0xE33630824B2BFB3D,0xD5F474F6E60EEFA2,0xF58C6B83FB2D4E18,0x4676E45F0ADF3411, + 0x20781F751D23A1BA,0xBD629B3381AA7ED1,0xAE1D775319F71BB0,0xFED1C80DA32E9A84, + 0x5509083F92825170,0x29AC01635557A70E,0xA7C9694551831D04,0x8E65682604D4BA0A, + 0x11F651F8882AB749,0xD77DC96EF6793D8A,0xEF2799F52B042DCD,0x48EEF0B07A8730C9, + 0x22F1A2ED0D547392,0x6142F1D32FD097C7,0x4A674D286AF0E2E1,0x80FD7CC9748CBED2, + 0x717E7067AF4F499A,0x938290A9ECD1DBB3,0x88E3B293344DD172,0x2734158C250FA3D6 +}}; + +// Constant values for KeySchedule function +const unsigned char C[12][64] = {{ + 0xB1,0x08,0x5B,0xDA,0x1E,0xCA,0xDA,0xE9,0xEB,0xCB,0x2F,0x81,0xC0,0x65,0x7C,0x1F, + 0x2F,0x6A,0x76,0x43,0x2E,0x45,0xD0,0x16,0x71,0x4E,0xB8,0x8D,0x75,0x85,0xC4,0xFC, + 0x4B,0x7C,0xE0,0x91,0x92,0x67,0x69,0x01,0xA2,0x42,0x2A,0x08,0xA4,0x60,0xD3,0x15, + 0x05,0x76,0x74,0x36,0xCC,0x74,0x4D,0x23,0xDD,0x80,0x65,0x59,0xF2,0xA6,0x45,0x07 +},{ + 0x6F,0xA3,0xB5,0x8A,0xA9,0x9D,0x2F,0x1A,0x4F,0xE3,0x9D,0x46,0x0F,0x70,0xB5,0xD7, + 0xF3,0xFE,0xEA,0x72,0x0A,0x23,0x2B,0x98,0x61,0xD5,0x5E,0x0F,0x16,0xB5,0x01,0x31, + 0x9A,0xB5,0x17,0x6B,0x12,0xD6,0x99,0x58,0x5C,0xB5,0x61,0xC2,0xDB,0x0A,0xA7,0xCA, + 0x55,0xDD,0xA2,0x1B,0xD7,0xCB,0xCD,0x56,0xE6,0x79,0x04,0x70,0x21,0xB1,0x9B,0xB7 +},{ + 0xF5,0x74,0xDC,0xAC,0x2B,0xCE,0x2F,0xC7,0x0A,0x39,0xFC,0x28,0x6A,0x3D,0x84,0x35, + 0x06,0xF1,0x5E,0x5F,0x52,0x9C,0x1F,0x8B,0xF2,0xEA,0x75,0x14,0xB1,0x29,0x7B,0x7B, + 0xD3,0xE2,0x0F,0xE4,0x90,0x35,0x9E,0xB1,0xC1,0xC9,0x3A,0x37,0x60,0x62,0xDB,0x09, + 0xC2,0xB6,0xF4,0x43,0x86,0x7A,0xDB,0x31,0x99,0x1E,0x96,0xF5,0x0A,0xBA,0x0A,0xB2 +},{ + 0xEF,0x1F,0xDF,0xB3,0xE8,0x15,0x66,0xD2,0xF9,0x48,0xE1,0xA0,0x5D,0x71,0xE4,0xDD, + 0x48,0x8E,0x85,0x7E,0x33,0x5C,0x3C,0x7D,0x9D,0x72,0x1C,0xAD,0x68,0x5E,0x35,0x3F, + 0xA9,0xD7,0x2C,0x82,0xED,0x03,0xD6,0x75,0xD8,0xB7,0x13,0x33,0x93,0x52,0x03,0xBE, + 0x34,0x53,0xEA,0xA1,0x93,0xE8,0x37,0xF1,0x22,0x0C,0xBE,0xBC,0x84,0xE3,0xD1,0x2E +},{ + 0x4B,0xEA,0x6B,0xAC,0xAD,0x47,0x47,0x99,0x9A,0x3F,0x41,0x0C,0x6C,0xA9,0x23,0x63, + 0x7F,0x15,0x1C,0x1F,0x16,0x86,0x10,0x4A,0x35,0x9E,0x35,0xD7,0x80,0x0F,0xFF,0xBD, + 0xBF,0xCD,0x17,0x47,0x25,0x3A,0xF5,0xA3,0xDF,0xFF,0x00,0xB7,0x23,0x27,0x1A,0x16, + 0x7A,0x56,0xA2,0x7E,0xA9,0xEA,0x63,0xF5,0x60,0x17,0x58,0xFD,0x7C,0x6C,0xFE,0x57 +},{ + 0xAE,0x4F,0xAE,0xAE,0x1D,0x3A,0xD3,0xD9,0x6F,0xA4,0xC3,0x3B,0x7A,0x30,0x39,0xC0, + 0x2D,0x66,0xC4,0xF9,0x51,0x42,0xA4,0x6C,0x18,0x7F,0x9A,0xB4,0x9A,0xF0,0x8E,0xC6, + 0xCF,0xFA,0xA6,0xB7,0x1C,0x9A,0xB7,0xB4,0x0A,0xF2,0x1F,0x66,0xC2,0xBE,0xC6,0xB6, + 0xBF,0x71,0xC5,0x72,0x36,0x90,0x4F,0x35,0xFA,0x68,0x40,0x7A,0x46,0x64,0x7D,0x6E +},{ + 0xF4,0xC7,0x0E,0x16,0xEE,0xAA,0xC5,0xEC,0x51,0xAC,0x86,0xFE,0xBF,0x24,0x09,0x54, + 0x39,0x9E,0xC6,0xC7,0xE6,0xBF,0x87,0xC9,0xD3,0x47,0x3E,0x33,0x19,0x7A,0x93,0xC9, + 0x09,0x92,0xAB,0xC5,0x2D,0x82,0x2C,0x37,0x06,0x47,0x69,0x83,0x28,0x4A,0x05,0x04, + 0x35,0x17,0x45,0x4C,0xA2,0x3C,0x4A,0xF3,0x88,0x86,0x56,0x4D,0x3A,0x14,0xD4,0x93 +},{ + 0x9B,0x1F,0x5B,0x42,0x4D,0x93,0xC9,0xA7,0x03,0xE7,0xAA,0x02,0x0C,0x6E,0x41,0x41, + 0x4E,0xB7,0xF8,0x71,0x9C,0x36,0xDE,0x1E,0x89,0xB4,0x44,0x3B,0x4D,0xDB,0xC4,0x9A, + 0xF4,0x89,0x2B,0xCB,0x92,0x9B,0x06,0x90,0x69,0xD1,0x8D,0x2B,0xD1,0xA5,0xC4,0x2F, + 0x36,0xAC,0xC2,0x35,0x59,0x51,0xA8,0xD9,0xA4,0x7F,0x0D,0xD4,0xBF,0x02,0xE7,0x1E +},{ + 0x37,0x8F,0x5A,0x54,0x16,0x31,0x22,0x9B,0x94,0x4C,0x9A,0xD8,0xEC,0x16,0x5F,0xDE, + 0x3A,0x7D,0x3A,0x1B,0x25,0x89,0x42,0x24,0x3C,0xD9,0x55,0xB7,0xE0,0x0D,0x09,0x84, + 0x80,0x0A,0x44,0x0B,0xDB,0xB2,0xCE,0xB1,0x7B,0x2B,0x8A,0x9A,0xA6,0x07,0x9C,0x54, + 0x0E,0x38,0xDC,0x92,0xCB,0x1F,0x2A,0x60,0x72,0x61,0x44,0x51,0x83,0x23,0x5A,0xDB +},{ + 0xAB,0xBE,0xDE,0xA6,0x80,0x05,0x6F,0x52,0x38,0x2A,0xE5,0x48,0xB2,0xE4,0xF3,0xF3, + 0x89,0x41,0xE7,0x1C,0xFF,0x8A,0x78,0xDB,0x1F,0xFF,0xE1,0x8A,0x1B,0x33,0x61,0x03, + 0x9F,0xE7,0x67,0x02,0xAF,0x69,0x33,0x4B,0x7A,0x1E,0x6C,0x30,0x3B,0x76,0x52,0xF4, + 0x36,0x98,0xFA,0xD1,0x15,0x3B,0xB6,0xC3,0x74,0xB4,0xC7,0xFB,0x98,0x45,0x9C,0xED +},{ + 0x7B,0xCD,0x9E,0xD0,0xEF,0xC8,0x89,0xFB,0x30,0x02,0xC6,0xCD,0x63,0x5A,0xFE,0x94, + 0xD8,0xFA,0x6B,0xBB,0xEB,0xAB,0x07,0x61,0x20,0x01,0x80,0x21,0x14,0x84,0x66,0x79, + 0x8A,0x1D,0x71,0xEF,0xEA,0x48,0xB9,0xCA,0xEF,0xBA,0xCD,0x1D,0x7D,0x47,0x6E,0x98, + 0xDE,0xA2,0x59,0x4A,0xC0,0x6F,0xD8,0x5D,0x6B,0xCA,0xA4,0xCD,0x81,0xF3,0x2D,0x1B +},{ + 0x37,0x8E,0xE7,0x67,0xF1,0x16,0x31,0xBA,0xD2,0x13,0x80,0xB0,0x04,0x49,0xB1,0x7A, + 0xCD,0xA4,0x3C,0x32,0xBC,0xDF,0x1D,0x77,0xF8,0x20,0x12,0xD4,0x30,0x21,0x9F,0x9B, + 0x5D,0x80,0xEF,0x9D,0x18,0x91,0xCC,0x86,0xE7,0x1D,0xA4,0xAA,0x88,0xE1,0x28,0x52, + 0xFA,0xF4,0x17,0xD5,0xD9,0xB2,0x1B,0x99,0x48,0xBC,0x92,0x4A,0xF1,0x1B,0xD7,0x20 +}}; + + +static void AddModulo512(const void *a,const void *b,void *c) +{ + const unsigned char *A=a, *B=b; + unsigned char *C=c; + int t = 0; +#ifdef FULL_UNROLL +#define ADDBYTE_8(i) t = A[i] + B[i] + (t >> 8); C[i] = t & 0xFF; + + ADDBYTE_8(63) + ADDBYTE_8(62) + ADDBYTE_8(61) + ADDBYTE_8(60) + ADDBYTE_8(59) + ADDBYTE_8(58) + ADDBYTE_8(57) + ADDBYTE_8(56) + ADDBYTE_8(55) + ADDBYTE_8(54) + ADDBYTE_8(53) + ADDBYTE_8(52) + ADDBYTE_8(51) + ADDBYTE_8(50) + ADDBYTE_8(49) + ADDBYTE_8(48) + ADDBYTE_8(47) + ADDBYTE_8(46) + ADDBYTE_8(45) + ADDBYTE_8(44) + ADDBYTE_8(43) + ADDBYTE_8(42) + ADDBYTE_8(41) + ADDBYTE_8(40) + ADDBYTE_8(39) + ADDBYTE_8(38) + ADDBYTE_8(37) + ADDBYTE_8(36) + ADDBYTE_8(35) + ADDBYTE_8(34) + ADDBYTE_8(33) + ADDBYTE_8(32) + ADDBYTE_8(31) + ADDBYTE_8(30) + ADDBYTE_8(29) + ADDBYTE_8(28) + ADDBYTE_8(27) + ADDBYTE_8(26) + ADDBYTE_8(25) + ADDBYTE_8(24) + ADDBYTE_8(23) + ADDBYTE_8(22) + ADDBYTE_8(21) + ADDBYTE_8(20) + ADDBYTE_8(19) + ADDBYTE_8(18) + ADDBYTE_8(17) + ADDBYTE_8(16) + ADDBYTE_8(15) + ADDBYTE_8(14) + ADDBYTE_8(13) + ADDBYTE_8(12) + ADDBYTE_8(11) + ADDBYTE_8(10) + ADDBYTE_8(9) + ADDBYTE_8(8) + ADDBYTE_8(7) + ADDBYTE_8(6) + ADDBYTE_8(5) + ADDBYTE_8(4) + ADDBYTE_8(3) + ADDBYTE_8(2) + ADDBYTE_8(1) + ADDBYTE_8(0) + +#else + int i = 0; + + for(i=63;i>=0;i--) + { + t = A[i] + B[i] + (t >> 8); + C[i] = t & 0xFF; + } +#endif +} + +static void AddXor512(const void *a,const void *b,void *c) +{ + const unsigned long long *A=a, *B=b; + unsigned long long *C=c; +#ifdef FULL_UNROLL + C[0] = A[0] ^ B[0]; + C[1] = A[1] ^ B[1]; + C[2] = A[2] ^ B[2]; + C[3] = A[3] ^ B[3]; + C[4] = A[4] ^ B[4]; + C[5] = A[5] ^ B[5]; + C[6] = A[6] ^ B[6]; + C[7] = A[7] ^ B[7]; +#else + int i = 0; + + for(i=0; i<8; i++) { + C[i] = A[i] ^ B[i]; + } +#endif +} + +static void F(unsigned char *state) +{ + unsigned long long return_state[8]; + register unsigned long long r = 0; + r ^= TG[0][state[56]]; + r ^= TG[1][state[48]]; + r ^= TG[2][state[40]]; + r ^= TG[3][state[32]]; + r ^= TG[4][state[24]]; + r ^= TG[5][state[16]]; + r ^= TG[6][state[8]]; + r ^= TG[7][state[0]]; + return_state[0] = r; + r = 0; + + r ^= TG[0][state[57]]; + r ^= TG[1][state[49]]; + r ^= TG[2][state[41]]; + r ^= TG[3][state[33]]; + r ^= TG[4][state[25]]; + r ^= TG[5][state[17]]; + r ^= TG[6][state[9]]; + r ^= TG[7][state[1]]; + return_state[1] = r; + r = 0; + + r ^= TG[0][state[58]]; + r ^= TG[1][state[50]]; + r ^= TG[2][state[42]]; + r ^= TG[3][state[34]]; + r ^= TG[4][state[26]]; + r ^= TG[5][state[18]]; + r ^= TG[6][state[10]]; + r ^= TG[7][state[2]]; + return_state[2] = r; + r = 0; + + r ^= TG[0][state[59]]; + r ^= TG[1][state[51]]; + r ^= TG[2][state[43]]; + r ^= TG[3][state[35]]; + r ^= TG[4][state[27]]; + r ^= TG[5][state[19]]; + r ^= TG[6][state[11]]; + r ^= TG[7][state[3]]; + return_state[3] = r; + r = 0; + + r ^= TG[0][state[60]]; + r ^= TG[1][state[52]]; + r ^= TG[2][state[44]]; + r ^= TG[3][state[36]]; + r ^= TG[4][state[28]]; + r ^= TG[5][state[20]]; + r ^= TG[6][state[12]]; + r ^= TG[7][state[4]]; + return_state[4] = r; + r = 0; + + r ^= TG[0][state[61]]; + r ^= TG[1][state[53]]; + r ^= TG[2][state[45]]; + r ^= TG[3][state[37]]; + r ^= TG[4][state[29]]; + r ^= TG[5][state[21]]; + r ^= TG[6][state[13]]; + r ^= TG[7][state[5]]; + return_state[5] = r; + r = 0; + + r ^= TG[0][state[62]]; + r ^= TG[1][state[54]]; + r ^= TG[2][state[46]]; + r ^= TG[3][state[38]]; + r ^= TG[4][state[30]]; + r ^= TG[5][state[22]]; + r ^= TG[6][state[14]]; + r ^= TG[7][state[6]]; + return_state[6] = r; + r = 0; + + r ^= TG[0][state[63]]; + r ^= TG[1][state[55]]; + r ^= TG[2][state[47]]; + r ^= TG[3][state[39]]; + r ^= TG[4][state[31]]; + r ^= TG[5][state[23]]; + r ^= TG[6][state[15]]; + r ^= TG[7][state[7]]; + return_state[7] = r; + + memcpy(state,(unsigned char*)return_state,64); +} + +#define KeySchedule(K,i) AddXor512(K,C[i],K); F(K); + +static void E(unsigned char *K,const unsigned char *m, unsigned char *state) +{ +#ifdef FULL_UNROLL + AddXor512(m,K,state); + + F(state); + KeySchedule(K,0); + AddXor512(state,K,state); + + F(state); + KeySchedule(K,1); + AddXor512(state,K,state); + + F(state); + KeySchedule(K,2); + AddXor512(state,K,state); + + F(state); + KeySchedule(K,3); + AddXor512(state,K,state); + + F(state); + KeySchedule(K,4); + AddXor512(state,K,state); + + F(state); + KeySchedule(K,5); + AddXor512(state,K,state); + + F(state); + KeySchedule(K,6); + AddXor512(state,K,state); + + F(state); + KeySchedule(K,7); + AddXor512(state,K,state); + + F(state); + KeySchedule(K,8); + AddXor512(state,K,state); + + F(state); + KeySchedule(K,9); + AddXor512(state,K,state); + + F(state); + KeySchedule(K,10); + AddXor512(state,K,state); + + F(state); + KeySchedule(K,11); + AddXor512(state,K,state); +#else + int i = 0; + + AddXor512(m,K,state); + + for(i=0;i<12;i++) { + F(state); + KeySchedule(K,i); + AddXor512(state,K,state); + } +#endif +} + +static void g_N(const unsigned char *N,unsigned char *h,const unsigned char *m) +{ + unsigned char t[64], K[64]; + + AddXor512(N,h,K); + + F(K); + + E(K,m,t); + + AddXor512(t,h,t); + AddXor512(t,m,h); +} + +static void hash_X(unsigned char *IV,const unsigned char *message,unsigned long long length,unsigned char *out) +{ + unsigned char v512[64] = { + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x02,0x00 + }; + unsigned char v0[64] = { + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 + }; + unsigned char Sigma[64] = { + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 + }; + unsigned char N[64] = { + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 + }; + unsigned char m[64], *hash = IV; + unsigned long long len = length; + + // Stage 2 + while (len >= 512) + { + memcpy(m, message + len/8 - 63 - ( (len & 0x7) == 0 ), 64); + + g_N(N,hash,m); + AddModulo512(N,v512,N); + AddModulo512(Sigma,m,Sigma); + len -= 512; + } + + memset(m,0,64); + memcpy(m + 63 - len/8 + ( (len & 0x7) == 0 ), message, len/8 + 1 - ( (len & 0x7) == 0 )); + + // Stage 3 + m[ 63 - len/8 ] |= (1 << (len & 0x7)); + + g_N(N,hash,m); + v512[63] = len & 0xFF; + v512[62] = (unsigned char) (len >> 8); + AddModulo512(N,v512,N); + + AddModulo512(Sigma,m,Sigma); + + g_N(v0,hash,N); + g_N(v0,hash,Sigma); + + memcpy(out, hash, 64); +} + +static void hash_512(const unsigned char *message, unsigned long long length, unsigned char *out) +{ + unsigned char IV[64] = { + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 + }; + + hash_X(IV,message,length,out); +} + +static void hash_256(const unsigned char *message, unsigned long long length, unsigned char *out) +{ + unsigned char IV[64] = { + 0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01, + 0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01, + 0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01, + 0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01 + }; + unsigned char hash[64]; + + hash_X(IV,message,length,hash); + + memcpy(out,hash,32); +} + + + + + +/* see sph_gost.h */ +void +sph_gost256_init(void *cc) +{ + //gost_init(cc, 256); +} + +/* see sph_gost.h */ +void +sph_gost256(void *cc, const void *data, size_t len) +{ + hash_256(data, 8*len, cc); +} + +/* see sph_gost.h */ +void +sph_gost256_close(void *cc, void *dst) +{ + //sph_gost256_addbits_and_close(cc, 0, 0, dst); + memcpy(dst, cc, 32); +} + +/* see sph_gost.h */ +void +sph_gost256_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst) +{ + //gost_close32(cc, ub, n, dst); +} + +/* see sph_gost.h */ +void +sph_gost512_init(void *cc) +{ + //gost_init(cc, 512); +} + +/* see sph_gost.h */ +void +sph_gost512(void *cc, const void *data, size_t len) +{ + hash_512(data, 8*len, cc); +} + +/* see sph_gost.h */ +void +sph_gost512_close(void *cc, void *dst) +{ + //sph_gost512_addbits_and_close(cc, 0, 0, dst); + memcpy(dst, cc, 64); +} + +/* see sph_gost.h */ +void +sph_gost512_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst) +{ + //gost_close64(cc, ub, n, dst); +} + +void gostd_hash(void *output, const void *input) +{ + unsigned char hash[64]; + + sph_gost512(hash, (const void*)input, 80); + sph_gost256(hash, (const void*)hash, 64); + + memcpy(output, hash, 32); +} + +#ifdef __cplusplus +} +#endif diff --git a/streebog.h b/streebog.h new file mode 100644 index 0000000..3e71dcf --- /dev/null +++ b/streebog.h @@ -0,0 +1,187 @@ +/* $Id: sph_gost.h 216 2010-06-08 09:46:57Z tp $ */ +/** + * GOST interface. This is the interface for GOST R 12 with the + * recommended parameters for SHA-3, with output lengths 256 + * and 512 bits. + * + * ==========================(LICENSE BEGIN)============================ + * + * Copyright (c) 2007-2010 Projet RNRT SAPHIR + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * ===========================(LICENSE END)============================= + * + * @file sph_gost.h + * @author Mish + */ + +#ifndef SPH_GOST_H__ +#define SPH_GOST_H__ + +#ifdef __cplusplus +extern "C"{ +#endif + +#include +#include "sph_types.h" + +/** + * Output size (in bits) for GOST-256. + */ +#define SPH_SIZE_gost256 256 + +/** + * Output size (in bits) for GOST-512. + */ +#define SPH_SIZE_gost512 512 + +/** + * This structure is a context for Keccak computations: it contains the + * intermediate values and some data from the last entered block. Once a + * GOST computation has been performed, the context can be reused for + * another computation. + * + * The contents of this structure are private. A running GOST computation + * can be cloned by copying the context (e.g. with a simple + * memcpy()). + */ + +/** + * This structure is a context for Gost-256 computations. + */ + +typedef struct { +#ifndef DOXYGEN_IGNORE + unsigned char buf[32]; /* first field, for alignment */ + size_t ptr; + sph_u32 V[3][8]; +#endif +} sph_gost256_context; + +/** + * This structure is a context for Gost-512 computations. + */ +typedef struct { +#ifndef DOXYGEN_IGNORE + unsigned char buf[64]; /* first field, for alignment */ + size_t ptr; + sph_u32 V[5][8]; +#endif +} sph_gost512_context; + + +/** + * Initialize a GOST-256 context. This process performs no memory allocation. + * + * @param cc the GOST-256 context (pointer to a + * sph_gost256_context) + */ +void sph_gost256_init(void *cc); + +/** + * Process some data bytes. It is acceptable that len is zero + * (in which case this function does nothing). + * + * @param cc the Gost-256 context + * @param data the input data + * @param len the input data length (in bytes) + */ +void sph_gost256(void *cc, const void *data, size_t len); + +/** + * Terminate the current GOST-256 computation and output the result into + * the provided buffer. The destination buffer must be wide enough to + * accomodate the result (32 bytes). The context is automatically + * reinitialized. + * + * @param cc the GOST-256 context + * @param dst the destination buffer + */ +void sph_gost256_close(void *cc, void *dst); + +/** + * Add a few additional bits (0 to 7) to the current computation, then + * terminate it and output the result in the provided buffer, which must + * be wide enough to accomodate the result (32 bytes). If bit number i + * in ub has value 2^i, then the extra bits are those + * numbered 7 downto 8-n (this is the big-endian convention at the byte + * level). The context is automatically reinitialized. + * + * @param cc the GOST-256 context + * @param ub the extra bits + * @param n the number of extra bits (0 to 7) + * @param dst the destination buffer + */ +void sph_gost256_addbits_and_close( + void *cc, unsigned ub, unsigned n, void *dst); + +/** + * Initialize a Gost-512 context. This process performs no memory allocation. + * + * @param cc the GOST-512 context (pointer to a + * sph_gost512_context) + */ +void sph_gost512_init(void *cc); + +/** + * Process some data bytes. It is acceptable that len is zero + * (in which case this function does nothing). + * + * @param cc the GOST-512 context + * @param data the input data + * @param len the input data length (in bytes) + */ +void sph_gost512(void *cc, const void *data, size_t len); + +/** + * Terminate the current GOST-512 computation and output the result into + * the provided buffer. The destination buffer must be wide enough to + * accomodate the result (64 bytes). The context is automatically + * reinitialized. + * + * @param cc the GOST-512 context + * @param dst the destination buffer + */ +void sph_gost512_close(void *cc, void *dst); + +/** + * Add a few additional bits (0 to 7) to the current computation, then + * terminate it and output the result in the provided buffer, which must + * be wide enough to accomodate the result (64 bytes). If bit number i + * in ub has value 2^i, then the extra bits are those + * numbered 7 downto 8-n (this is the big-endian convention at the byte + * level). The context is automatically reinitialized. + * + * @param cc the GOST-512 context + * @param ub the extra bits + * @param n the number of extra bits (0 to 7) + * @param dst the destination buffer + */ +void sph_gost512_addbits_and_close( + void *cc, unsigned ub, unsigned n, void *dst); + +void gostd_hash(void *output, const void *input); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/util.c b/util.c index 6088cf1..951ae68 100644 --- a/util.c +++ b/util.c @@ -129,7 +129,7 @@ json_t *json_rpc_call(const char *url, const char *userpass, const char *rpc_req if (rc) goto err_out; - val = json_loads(all_data.buf, &err); + val = json_loads(all_data.buf, 0, &err); if (!val) { fprintf(stderr, "JSON failed(%d): %s\n", err.line, err.text); goto err_out; @@ -226,3 +226,20 @@ timeval_subtract ( return x->tv_sec < y->tv_sec; } +uint32_t swap32(uint32_t value) +{ +#if defined(__x86_64__) || defined(__i386__) + __asm__ + ( + "bswap %0" + : "=r"(value) + : "0"(value) + : + ); + return value; +#else + value = ((value & 0xFF00FF00) >> 8) | ((value & 0x00FF00FF) << 8); + return (value<<16) | (value>>16); +#endif +} +