You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
67 lines
1.8 KiB
67 lines
1.8 KiB
/* |
|
* Copyright 2008-2012 NVIDIA Corporation |
|
* |
|
* Licensed under the Apache License, Version 2.0 (the "License"); |
|
* you may not use this file except in compliance with the License. |
|
* You may obtain a copy of the License at |
|
* |
|
* http://www.apache.org/licenses/LICENSE-2.0 |
|
* |
|
* Unless required by applicable law or agreed to in writing, software |
|
* distributed under the License is distributed on an "AS IS" BASIS, |
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
|
* See the License for the specific language governing permissions and |
|
* limitations under the License. |
|
*/ |
|
|
|
#pragma once |
|
|
|
#include <thrust/detail/config.h> |
|
#include <thrust/iterator/iterator_traits.h> |
|
|
|
namespace thrust |
|
{ |
|
namespace system |
|
{ |
|
namespace cuda |
|
{ |
|
namespace detail |
|
{ |
|
namespace block |
|
{ |
|
|
|
/* Reduces [data, data + n) using binary_op and stores the result in data[0] |
|
* |
|
* Upon return the elements in [data + 1, data + n) have unspecified values. |
|
*/ |
|
template <typename Context, typename ValueIterator, typename BinaryFunction> |
|
__device__ __thrust_forceinline__ |
|
void reduce_n(Context context, ValueIterator data, unsigned int n, BinaryFunction binary_op) |
|
{ |
|
if (context.block_dimension() < n) |
|
{ |
|
for (unsigned int i = context.block_dimension() + context.thread_index(); i < n; i += context.block_dimension()) |
|
data[context.thread_index()] = binary_op(data[context.thread_index()], data[i]); |
|
|
|
context.barrier(); |
|
} |
|
|
|
while (n > 1) |
|
{ |
|
unsigned int half = n / 2; |
|
|
|
if (context.thread_index() < half) |
|
data[context.thread_index()] = binary_op(data[context.thread_index()], data[n - context.thread_index() - 1]); |
|
|
|
context.barrier(); |
|
|
|
n = n - half; |
|
} |
|
} |
|
|
|
} // end namespace block |
|
} // end namespace detail |
|
} // end namespace cuda |
|
} // end namespace system |
|
} // end namespace thrust |
|
|
|
|