GOSTCoin CUDA miner project, compatible with most nvidia cards, containing only gostd algo
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

249 lines
8.2 KiB

/*
* Copyright 2008-2012 NVIDIA Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <thrust/detail/config.h>
// don't attempt to #include this file without omp support
#if (THRUST_DEVICE_COMPILER_IS_OMP_CAPABLE == THRUST_TRUE)
#include <omp.h>
#endif // omp support
#include <thrust/iterator/iterator_traits.h>
#include <thrust/system/detail/generic/select_system.h>
#include <thrust/system/cpp/detail/sort.h>
#include <thrust/system/cpp/detail/merge.h>
#include <thrust/system/cpp/detail/execution_policy.h>
#include <thrust/detail/temporary_array.h>
namespace thrust
{
namespace system
{
namespace omp
{
namespace detail
{
namespace sort_detail
{
template <typename DerivedPolicy,
typename RandomAccessIterator,
typename StrictWeakOrdering>
void inplace_merge(execution_policy<DerivedPolicy> &exec,
RandomAccessIterator first,
RandomAccessIterator middle,
RandomAccessIterator last,
StrictWeakOrdering comp)
{
typedef typename thrust::iterator_value<RandomAccessIterator>::type value_type;
thrust::detail::temporary_array<value_type,DerivedPolicy> a(exec, first, middle);
thrust::detail::temporary_array<value_type,DerivedPolicy> b(exec, middle, last);
thrust::system::cpp::detail::merge(exec, a.begin(), a.end(), b.begin(), b.end(), first, comp);
}
template <typename DerivedPolicy,
typename RandomAccessIterator1,
typename RandomAccessIterator2,
typename StrictWeakOrdering>
void inplace_merge_by_key(execution_policy<DerivedPolicy> &exec,
RandomAccessIterator1 first1,
RandomAccessIterator1 middle1,
RandomAccessIterator1 last1,
RandomAccessIterator2 first2,
StrictWeakOrdering comp)
{
typedef typename thrust::iterator_value<RandomAccessIterator1>::type value_type1;
typedef typename thrust::iterator_value<RandomAccessIterator2>::type value_type2;
RandomAccessIterator2 middle2 = first2 + (middle1 - first1);
RandomAccessIterator2 last2 = first2 + (last1 - first1);
thrust::detail::temporary_array<value_type1,DerivedPolicy> lhs1(exec, first1, middle1);
thrust::detail::temporary_array<value_type1,DerivedPolicy> rhs1(exec, middle1, last1);
thrust::detail::temporary_array<value_type2,DerivedPolicy> lhs2(exec, first2, middle2);
thrust::detail::temporary_array<value_type2,DerivedPolicy> rhs2(exec, middle2, last2);
thrust::system::cpp::detail::merge_by_key
(exec,
lhs1.begin(), lhs1.end(), rhs1.begin(), rhs1.end(),
lhs2.begin(), rhs2.begin(),
first1, first2, comp);
}
} // end sort_detail
template<typename DerivedPolicy,
typename RandomAccessIterator,
typename StrictWeakOrdering>
void stable_sort(execution_policy<DerivedPolicy> &exec,
RandomAccessIterator first,
RandomAccessIterator last,
StrictWeakOrdering comp)
{
// we're attempting to launch an omp kernel, assert we're compiling with omp support
// ========================================================================
// X Note to the user: If you've found this line due to a compiler error, X
// X you need to enable OpenMP support in your compiler. X
// ========================================================================
THRUST_STATIC_ASSERT( (thrust::detail::depend_on_instantiation<RandomAccessIterator,
(THRUST_DEVICE_COMPILER_IS_OMP_CAPABLE == THRUST_TRUE)>::value) );
#if (THRUST_DEVICE_COMPILER_IS_OMP_CAPABLE == THRUST_TRUE)
typedef typename thrust::iterator_difference<RandomAccessIterator>::type IndexType;
if (first == last)
return;
#pragma omp parallel
{
thrust::system::detail::internal::uniform_decomposition<IndexType> decomp(last - first, 1, omp_get_num_threads());
// process id
IndexType p_i = omp_get_thread_num();
// every thread sorts its own tile
if (p_i < decomp.size())
{
thrust::system::cpp::detail::stable_sort(exec,
first + decomp[p_i].begin(),
first + decomp[p_i].end(),
comp);
}
#pragma omp barrier
IndexType nseg = decomp.size();
IndexType h = 2;
// keep track of which sub-range we're processing
IndexType a=p_i, b=p_i, c=p_i+1;
while( nseg>1 )
{
if(c >= decomp.size())
c = decomp.size() - 1;
if((p_i % h) == 0 && c > b)
{
thrust::system::omp::detail::sort_detail::inplace_merge
(exec,
first + decomp[a].begin(),
first + decomp[b].end(),
first + decomp[c].end(),
comp);
b = c;
c += h;
}
nseg = (nseg + 1) / 2;
h *= 2;
#pragma omp barrier
}
}
#endif // THRUST_DEVICE_COMPILER_IS_OMP_CAPABLE
}
template<typename DerivedPolicy,
typename RandomAccessIterator1,
typename RandomAccessIterator2,
typename StrictWeakOrdering>
void stable_sort_by_key(execution_policy<DerivedPolicy> &exec,
RandomAccessIterator1 keys_first,
RandomAccessIterator1 keys_last,
RandomAccessIterator2 values_first,
StrictWeakOrdering comp)
{
// we're attempting to launch an omp kernel, assert we're compiling with omp support
// ========================================================================
// X Note to the user: If you've found this line due to a compiler error, X
// X you need to enable OpenMP support in your compiler. X
// ========================================================================
THRUST_STATIC_ASSERT( (thrust::detail::depend_on_instantiation<RandomAccessIterator1,
(THRUST_DEVICE_COMPILER_IS_OMP_CAPABLE == THRUST_TRUE)>::value) );
#if (THRUST_DEVICE_COMPILER_IS_OMP_CAPABLE == THRUST_TRUE)
typedef typename thrust::iterator_difference<RandomAccessIterator1>::type IndexType;
if (keys_first == keys_last)
return;
#pragma omp parallel
{
thrust::system::detail::internal::uniform_decomposition<IndexType> decomp(keys_last - keys_first, 1, omp_get_num_threads());
// process id
IndexType p_i = omp_get_thread_num();
// every thread sorts its own tile
if (p_i < decomp.size())
{
thrust::system::cpp::detail::stable_sort_by_key(exec,
keys_first + decomp[p_i].begin(),
keys_first + decomp[p_i].end(),
values_first + decomp[p_i].begin(),
comp);
}
#pragma omp barrier
IndexType nseg = decomp.size();
IndexType h = 2;
// keep track of which sub-range we're processing
IndexType a=p_i, b=p_i, c=p_i+1;
while( nseg>1 )
{
if(c >= decomp.size())
c = decomp.size() - 1;
if((p_i % h) == 0 && c > b)
{
thrust::system::omp::detail::sort_detail::inplace_merge_by_key
(exec,
keys_first + decomp[a].begin(),
keys_first + decomp[b].end(),
keys_first + decomp[c].end(),
values_first + decomp[a].begin(),
comp);
b = c;
c += h;
}
nseg = (nseg + 1) / 2;
h *= 2;
#pragma omp barrier
}
}
#endif // THRUST_DEVICE_COMPILER_IS_OMP_CAPABLE
}
} // end namespace detail
} // end namespace omp
} // end namespace system
} // end namespace thrust