You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
249 lines
8.2 KiB
249 lines
8.2 KiB
/* |
|
* Copyright 2008-2012 NVIDIA Corporation |
|
* |
|
* Licensed under the Apache License, Version 2.0 (the "License"); |
|
* you may not use this file except in compliance with the License. |
|
* You may obtain a copy of the License at |
|
* |
|
* http://www.apache.org/licenses/LICENSE-2.0 |
|
* |
|
* Unless required by applicable law or agreed to in writing, software |
|
* distributed under the License is distributed on an "AS IS" BASIS, |
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
|
* See the License for the specific language governing permissions and |
|
* limitations under the License. |
|
*/ |
|
|
|
|
|
#include <thrust/detail/config.h> |
|
|
|
// don't attempt to #include this file without omp support |
|
#if (THRUST_DEVICE_COMPILER_IS_OMP_CAPABLE == THRUST_TRUE) |
|
#include <omp.h> |
|
#endif // omp support |
|
|
|
#include <thrust/iterator/iterator_traits.h> |
|
#include <thrust/system/detail/generic/select_system.h> |
|
#include <thrust/system/cpp/detail/sort.h> |
|
#include <thrust/system/cpp/detail/merge.h> |
|
#include <thrust/system/cpp/detail/execution_policy.h> |
|
#include <thrust/detail/temporary_array.h> |
|
|
|
namespace thrust |
|
{ |
|
namespace system |
|
{ |
|
namespace omp |
|
{ |
|
namespace detail |
|
{ |
|
namespace sort_detail |
|
{ |
|
|
|
|
|
template <typename DerivedPolicy, |
|
typename RandomAccessIterator, |
|
typename StrictWeakOrdering> |
|
void inplace_merge(execution_policy<DerivedPolicy> &exec, |
|
RandomAccessIterator first, |
|
RandomAccessIterator middle, |
|
RandomAccessIterator last, |
|
StrictWeakOrdering comp) |
|
{ |
|
typedef typename thrust::iterator_value<RandomAccessIterator>::type value_type; |
|
|
|
thrust::detail::temporary_array<value_type,DerivedPolicy> a(exec, first, middle); |
|
thrust::detail::temporary_array<value_type,DerivedPolicy> b(exec, middle, last); |
|
|
|
thrust::system::cpp::detail::merge(exec, a.begin(), a.end(), b.begin(), b.end(), first, comp); |
|
} |
|
|
|
|
|
template <typename DerivedPolicy, |
|
typename RandomAccessIterator1, |
|
typename RandomAccessIterator2, |
|
typename StrictWeakOrdering> |
|
void inplace_merge_by_key(execution_policy<DerivedPolicy> &exec, |
|
RandomAccessIterator1 first1, |
|
RandomAccessIterator1 middle1, |
|
RandomAccessIterator1 last1, |
|
RandomAccessIterator2 first2, |
|
StrictWeakOrdering comp) |
|
{ |
|
typedef typename thrust::iterator_value<RandomAccessIterator1>::type value_type1; |
|
typedef typename thrust::iterator_value<RandomAccessIterator2>::type value_type2; |
|
|
|
RandomAccessIterator2 middle2 = first2 + (middle1 - first1); |
|
RandomAccessIterator2 last2 = first2 + (last1 - first1); |
|
|
|
thrust::detail::temporary_array<value_type1,DerivedPolicy> lhs1(exec, first1, middle1); |
|
thrust::detail::temporary_array<value_type1,DerivedPolicy> rhs1(exec, middle1, last1); |
|
thrust::detail::temporary_array<value_type2,DerivedPolicy> lhs2(exec, first2, middle2); |
|
thrust::detail::temporary_array<value_type2,DerivedPolicy> rhs2(exec, middle2, last2); |
|
|
|
thrust::system::cpp::detail::merge_by_key |
|
(exec, |
|
lhs1.begin(), lhs1.end(), rhs1.begin(), rhs1.end(), |
|
lhs2.begin(), rhs2.begin(), |
|
first1, first2, comp); |
|
} |
|
|
|
|
|
} // end sort_detail |
|
|
|
|
|
template<typename DerivedPolicy, |
|
typename RandomAccessIterator, |
|
typename StrictWeakOrdering> |
|
void stable_sort(execution_policy<DerivedPolicy> &exec, |
|
RandomAccessIterator first, |
|
RandomAccessIterator last, |
|
StrictWeakOrdering comp) |
|
{ |
|
// we're attempting to launch an omp kernel, assert we're compiling with omp support |
|
// ======================================================================== |
|
// X Note to the user: If you've found this line due to a compiler error, X |
|
// X you need to enable OpenMP support in your compiler. X |
|
// ======================================================================== |
|
THRUST_STATIC_ASSERT( (thrust::detail::depend_on_instantiation<RandomAccessIterator, |
|
(THRUST_DEVICE_COMPILER_IS_OMP_CAPABLE == THRUST_TRUE)>::value) ); |
|
|
|
#if (THRUST_DEVICE_COMPILER_IS_OMP_CAPABLE == THRUST_TRUE) |
|
typedef typename thrust::iterator_difference<RandomAccessIterator>::type IndexType; |
|
|
|
if (first == last) |
|
return; |
|
|
|
#pragma omp parallel |
|
{ |
|
thrust::system::detail::internal::uniform_decomposition<IndexType> decomp(last - first, 1, omp_get_num_threads()); |
|
|
|
// process id |
|
IndexType p_i = omp_get_thread_num(); |
|
|
|
// every thread sorts its own tile |
|
if (p_i < decomp.size()) |
|
{ |
|
thrust::system::cpp::detail::stable_sort(exec, |
|
first + decomp[p_i].begin(), |
|
first + decomp[p_i].end(), |
|
comp); |
|
} |
|
|
|
#pragma omp barrier |
|
|
|
IndexType nseg = decomp.size(); |
|
IndexType h = 2; |
|
|
|
// keep track of which sub-range we're processing |
|
IndexType a=p_i, b=p_i, c=p_i+1; |
|
|
|
while( nseg>1 ) |
|
{ |
|
if(c >= decomp.size()) |
|
c = decomp.size() - 1; |
|
|
|
if((p_i % h) == 0 && c > b) |
|
{ |
|
thrust::system::omp::detail::sort_detail::inplace_merge |
|
(exec, |
|
first + decomp[a].begin(), |
|
first + decomp[b].end(), |
|
first + decomp[c].end(), |
|
comp); |
|
b = c; |
|
c += h; |
|
} |
|
|
|
nseg = (nseg + 1) / 2; |
|
h *= 2; |
|
|
|
#pragma omp barrier |
|
} |
|
} |
|
#endif // THRUST_DEVICE_COMPILER_IS_OMP_CAPABLE |
|
} |
|
|
|
|
|
template<typename DerivedPolicy, |
|
typename RandomAccessIterator1, |
|
typename RandomAccessIterator2, |
|
typename StrictWeakOrdering> |
|
void stable_sort_by_key(execution_policy<DerivedPolicy> &exec, |
|
RandomAccessIterator1 keys_first, |
|
RandomAccessIterator1 keys_last, |
|
RandomAccessIterator2 values_first, |
|
StrictWeakOrdering comp) |
|
{ |
|
// we're attempting to launch an omp kernel, assert we're compiling with omp support |
|
// ======================================================================== |
|
// X Note to the user: If you've found this line due to a compiler error, X |
|
// X you need to enable OpenMP support in your compiler. X |
|
// ======================================================================== |
|
THRUST_STATIC_ASSERT( (thrust::detail::depend_on_instantiation<RandomAccessIterator1, |
|
(THRUST_DEVICE_COMPILER_IS_OMP_CAPABLE == THRUST_TRUE)>::value) ); |
|
|
|
#if (THRUST_DEVICE_COMPILER_IS_OMP_CAPABLE == THRUST_TRUE) |
|
typedef typename thrust::iterator_difference<RandomAccessIterator1>::type IndexType; |
|
|
|
if (keys_first == keys_last) |
|
return; |
|
|
|
#pragma omp parallel |
|
{ |
|
thrust::system::detail::internal::uniform_decomposition<IndexType> decomp(keys_last - keys_first, 1, omp_get_num_threads()); |
|
|
|
// process id |
|
IndexType p_i = omp_get_thread_num(); |
|
|
|
// every thread sorts its own tile |
|
if (p_i < decomp.size()) |
|
{ |
|
thrust::system::cpp::detail::stable_sort_by_key(exec, |
|
keys_first + decomp[p_i].begin(), |
|
keys_first + decomp[p_i].end(), |
|
values_first + decomp[p_i].begin(), |
|
comp); |
|
} |
|
|
|
#pragma omp barrier |
|
|
|
IndexType nseg = decomp.size(); |
|
IndexType h = 2; |
|
|
|
// keep track of which sub-range we're processing |
|
IndexType a=p_i, b=p_i, c=p_i+1; |
|
|
|
while( nseg>1 ) |
|
{ |
|
if(c >= decomp.size()) |
|
c = decomp.size() - 1; |
|
|
|
if((p_i % h) == 0 && c > b) |
|
{ |
|
thrust::system::omp::detail::sort_detail::inplace_merge_by_key |
|
(exec, |
|
keys_first + decomp[a].begin(), |
|
keys_first + decomp[b].end(), |
|
keys_first + decomp[c].end(), |
|
values_first + decomp[a].begin(), |
|
comp); |
|
b = c; |
|
c += h; |
|
} |
|
|
|
nseg = (nseg + 1) / 2; |
|
h *= 2; |
|
|
|
#pragma omp barrier |
|
} |
|
} |
|
#endif // THRUST_DEVICE_COMPILER_IS_OMP_CAPABLE |
|
} |
|
|
|
|
|
} // end namespace detail |
|
} // end namespace omp |
|
} // end namespace system |
|
} // end namespace thrust |
|
|
|
|