GOSTcoin support for ccminer CUDA miner project, compatible with most nvidia cards
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 

779 lines
36 KiB

/*
* Copyright 2008-2012 NVIDIA Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*! \file reduce.h
* \brief Functions for reducing a range to a single value
*/
#pragma once
#include <thrust/detail/config.h>
#include <thrust/detail/execution_policy.h>
#include <thrust/iterator/iterator_traits.h>
#include <thrust/pair.h>
namespace thrust
{
/*! \addtogroup reductions
* \{
*/
/*! \p reduce is a generalization of summation: it computes the sum (or some
* other binary operation) of all the elements in the range <tt>[first,
* last)</tt>. This version of \p reduce uses \c 0 as the initial value of the
* reduction. \p reduce is similar to the C++ Standard Template Library's
* <tt>std::accumulate</tt>. The primary difference between the two functions
* is that <tt>std::accumulate</tt> guarantees the order of summation, while
* \p reduce requires associativity of the binary operation to parallelize
* the reduction.
*
* Note that \p reduce also assumes that the binary reduction operator (in this
* case operator+) is commutative. If the reduction operator is not commutative
* then \p thrust::reduce should not be used. Instead, one could use
* \p inclusive_scan (which does not require commutativity) and select the
* last element of the output array.
*
* The algorithm's execution is parallelized as determined by \p exec.
*
* \param exec The execution policy to use for parallelization.
* \param first The beginning of the sequence.
* \param last The end of the sequence.
* \return The result of the reduction.
*
* \tparam DerivedPolicy The name of the derived execution policy.
* \tparam InputIterator is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>
* and if \c x and \c y are objects of \p InputIterator's \c value_type,
* then <tt>x + y</tt> is defined and is convertible to \p InputIterator's
* \c value_type. If \c T is \c InputIterator's \c value_type, then
* <tt>T(0)</tt> is defined.
*
* The following code snippet demonstrates how to use \p reduce to compute
* the sum of a sequence of integers using the \p thrust::host execution policy for parallelization:
*
* \code
* #include <thrust/reduce.h>
* #include <thrust/execution_policy.h>
* ...
* int data[6] = {1, 0, 2, 2, 1, 3};
* int result = thrust::reduce(thrust::host, data, data + 6);
*
* // result == 9
* \endcode
*
* \see http://www.sgi.com/tech/stl/accumulate.html
*/
template<typename DerivedPolicy, typename InputIterator>
typename thrust::iterator_traits<InputIterator>::value_type
reduce(const thrust::detail::execution_policy_base<DerivedPolicy> &exec, InputIterator first, InputIterator last);
/*! \p reduce is a generalization of summation: it computes the sum (or some
* other binary operation) of all the elements in the range <tt>[first,
* last)</tt>. This version of \p reduce uses \c 0 as the initial value of the
* reduction. \p reduce is similar to the C++ Standard Template Library's
* <tt>std::accumulate</tt>. The primary difference between the two functions
* is that <tt>std::accumulate</tt> guarantees the order of summation, while
* \p reduce requires associativity of the binary operation to parallelize
* the reduction.
*
* Note that \p reduce also assumes that the binary reduction operator (in this
* case operator+) is commutative. If the reduction operator is not commutative
* then \p thrust::reduce should not be used. Instead, one could use
* \p inclusive_scan (which does not require commutativity) and select the
* last element of the output array.
*
* \param first The beginning of the sequence.
* \param last The end of the sequence.
* \return The result of the reduction.
*
* \tparam InputIterator is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>
* and if \c x and \c y are objects of \p InputIterator's \c value_type,
* then <tt>x + y</tt> is defined and is convertible to \p InputIterator's
* \c value_type. If \c T is \c InputIterator's \c value_type, then
* <tt>T(0)</tt> is defined.
*
* The following code snippet demonstrates how to use \p reduce to compute
* the sum of a sequence of integers.
*
* \code
* #include <thrust/reduce.h>
* ...
* int data[6] = {1, 0, 2, 2, 1, 3};
* int result = thrust::reduce(data, data + 6);
*
* // result == 9
* \endcode
*
* \see http://www.sgi.com/tech/stl/accumulate.html
*/
template<typename InputIterator> typename
thrust::iterator_traits<InputIterator>::value_type reduce(InputIterator first, InputIterator last);
/*! \p reduce is a generalization of summation: it computes the sum (or some
* other binary operation) of all the elements in the range <tt>[first,
* last)</tt>. This version of \p reduce uses \p init as the initial value of the
* reduction. \p reduce is similar to the C++ Standard Template Library's
* <tt>std::accumulate</tt>. The primary difference between the two functions
* is that <tt>std::accumulate</tt> guarantees the order of summation, while
* \p reduce requires associativity of the binary operation to parallelize
* the reduction.
*
* Note that \p reduce also assumes that the binary reduction operator (in this
* case operator+) is commutative. If the reduction operator is not commutative
* then \p thrust::reduce should not be used. Instead, one could use
* \p inclusive_scan (which does not require commutativity) and select the
* last element of the output array.
*
* The algorithm's execution is parallelized as determined by \p exec.
*
* \param exec The execution policy to use for parallelization.
* \param first The beginning of the input sequence.
* \param last The end of the input sequence.
* \param init The initial value.
* \return The result of the reduction.
*
* \tparam DerivedPolicy The name of the derived execution policy.
* \tparam InputIterator is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>
* and if \c x and \c y are objects of \p InputIterator's \c value_type,
* then <tt>x + y</tt> is defined and is convertible to \p T.
* \tparam T is convertible to \p InputIterator's \c value_type.
*
* The following code snippet demonstrates how to use \p reduce to compute
* the sum of a sequence of integers including an intialization value using the \p thrust::host
* execution policy for parallelization:
*
* \code
* #include <thrust/reduce.h>
* #include <thrust/execution_policy.h>
* ...
* int data[6] = {1, 0, 2, 2, 1, 3};
* int result = thrust::reduce(thrust::host, data, data + 6, 1);
*
* // result == 10
* \endcode
*
* \see http://www.sgi.com/tech/stl/accumulate.html
*/
template<typename DerivedPolicy, typename InputIterator, typename T>
T reduce(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
InputIterator first,
InputIterator last,
T init);
/*! \p reduce is a generalization of summation: it computes the sum (or some
* other binary operation) of all the elements in the range <tt>[first,
* last)</tt>. This version of \p reduce uses \p init as the initial value of the
* reduction. \p reduce is similar to the C++ Standard Template Library's
* <tt>std::accumulate</tt>. The primary difference between the two functions
* is that <tt>std::accumulate</tt> guarantees the order of summation, while
* \p reduce requires associativity of the binary operation to parallelize
* the reduction.
*
* Note that \p reduce also assumes that the binary reduction operator (in this
* case operator+) is commutative. If the reduction operator is not commutative
* then \p thrust::reduce should not be used. Instead, one could use
* \p inclusive_scan (which does not require commutativity) and select the
* last element of the output array.
*
* \param first The beginning of the input sequence.
* \param last The end of the input sequence.
* \param init The initial value.
* \return The result of the reduction.
*
* \tparam InputIterator is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>
* and if \c x and \c y are objects of \p InputIterator's \c value_type,
* then <tt>x + y</tt> is defined and is convertible to \p T.
* \tparam T is convertible to \p InputIterator's \c value_type.
*
* The following code snippet demonstrates how to use \p reduce to compute
* the sum of a sequence of integers including an intialization value.
*
* \code
* #include <thrust/reduce.h>
* ...
* int data[6] = {1, 0, 2, 2, 1, 3};
* int result = thrust::reduce(data, data + 6, 1);
*
* // result == 10
* \endcode
*
* \see http://www.sgi.com/tech/stl/accumulate.html
*/
template<typename InputIterator, typename T>
T reduce(InputIterator first,
InputIterator last,
T init);
/*! \p reduce is a generalization of summation: it computes the sum (or some
* other binary operation) of all the elements in the range <tt>[first,
* last)</tt>. This version of \p reduce uses \p init as the initial value of the
* reduction and \p binary_op as the binary function used for summation. \p reduce
* is similar to the C++ Standard Template Library's <tt>std::accumulate</tt>.
* The primary difference between the two functions is that <tt>std::accumulate</tt>
* guarantees the order of summation, while \p reduce requires associativity of
* \p binary_op to parallelize the reduction.
*
* Note that \p reduce also assumes that the binary reduction operator (in this
* case \p binary_op) is commutative. If the reduction operator is not commutative
* then \p thrust::reduce should not be used. Instead, one could use
* \p inclusive_scan (which does not require commutativity) and select the
* last element of the output array.
*
* The algorithm's execution is parallelized as determined by \p exec.
*
* \param exec The execution policy to use for parallelization.
* \param first The beginning of the input sequence.
* \param last The end of the input sequence.
* \param init The initial value.
* \param binary_op The binary function used to 'sum' values.
* \return The result of the reduction.
*
* \tparam DerivedPolicy The name of the derived execution policy.
* \tparam InputIterator is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>
* and \c InputIterator's \c value_type is convertible to \c T.
* \tparam T is a model of <a href="http://www.sgi.com/tech/stl/Assignable.html">Assignable</a>,
* and is convertible to \p BinaryFunction's \c first_argument_type and \c second_argument_type.
* \tparam BinaryFunction is a model of <a href="http://www.sgi.com/tech/stl/BinaryFunction.html">Binary Function</a>,
* and \p BinaryFunction's \c result_type is convertible to \p OutputType.
*
* The following code snippet demonstrates how to use \p reduce to
* compute the maximum value of a sequence of integers using the \p thrust::host execution policy
* for parallelization:
*
* \code
* #include <thrust/reduce.h>
* #include <thrust/functional.h>
* #include <thrust/execution_policy.h>
* ...
* int data[6] = {1, 0, 2, 2, 1, 3};
* int result = thrust::reduce(thrust::host,
* data, data + 6,
* -1,
* thrust::maximum<int>());
* // result == 3
* \endcode
*
* \see http://www.sgi.com/tech/stl/accumulate.html
* \see transform_reduce
*/
template<typename DerivedPolicy,
typename InputIterator,
typename T,
typename BinaryFunction>
T reduce(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
InputIterator first,
InputIterator last,
T init,
BinaryFunction binary_op);
/*! \p reduce is a generalization of summation: it computes the sum (or some
* other binary operation) of all the elements in the range <tt>[first,
* last)</tt>. This version of \p reduce uses \p init as the initial value of the
* reduction and \p binary_op as the binary function used for summation. \p reduce
* is similar to the C++ Standard Template Library's <tt>std::accumulate</tt>.
* The primary difference between the two functions is that <tt>std::accumulate</tt>
* guarantees the order of summation, while \p reduce requires associativity of
* \p binary_op to parallelize the reduction.
*
* Note that \p reduce also assumes that the binary reduction operator (in this
* case \p binary_op) is commutative. If the reduction operator is not commutative
* then \p thrust::reduce should not be used. Instead, one could use
* \p inclusive_scan (which does not require commutativity) and select the
* last element of the output array.
*
* \param first The beginning of the input sequence.
* \param last The end of the input sequence.
* \param init The initial value.
* \param binary_op The binary function used to 'sum' values.
* \return The result of the reduction.
*
* \tparam InputIterator is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>
* and \c InputIterator's \c value_type is convertible to \c T.
* \tparam T is a model of <a href="http://www.sgi.com/tech/stl/Assignable.html">Assignable</a>,
* and is convertible to \p BinaryFunction's \c first_argument_type and \c second_argument_type.
* \tparam BinaryFunction is a model of <a href="http://www.sgi.com/tech/stl/BinaryFunction.html">Binary Function</a>,
* and \p BinaryFunction's \c result_type is convertible to \p OutputType.
*
* The following code snippet demonstrates how to use \p reduce to
* compute the maximum value of a sequence of integers.
*
* \code
* #include <thrust/reduce.h>
* #include <thrust/functional.h>
* ...
* int data[6] = {1, 0, 2, 2, 1, 3};
* int result = thrust::reduce(data, data + 6,
* -1,
* thrust::maximum<int>());
* // result == 3
* \endcode
*
* \see http://www.sgi.com/tech/stl/accumulate.html
* \see transform_reduce
*/
template<typename InputIterator,
typename T,
typename BinaryFunction>
T reduce(InputIterator first,
InputIterator last,
T init,
BinaryFunction binary_op);
/*! \p reduce_by_key is a generalization of \p reduce to key-value pairs.
* For each group of consecutive keys in the range <tt>[keys_first, keys_last)</tt>
* that are equal, \p reduce_by_key copies the first element of the group to the
* \c keys_output. The corresponding values in the range are reduced using the
* \c plus and the result copied to \c values_output.
*
* This version of \p reduce_by_key uses the function object \c equal_to
* to test for equality and \c plus to reduce values with equal keys.
*
* The algorithm's execution is parallelized as determined by \p exec.
*
* \param exec The execution policy to use for parallelization.
* \param keys_first The beginning of the input key range.
* \param keys_last The end of the input key range.
* \param values_first The beginning of the input value range.
* \param keys_output The beginning of the output key range.
* \param values_output The beginning of the output value range.
* \return A pair of iterators at end of the ranges <tt>[keys_output, keys_output_last)</tt> and <tt>[values_output, values_output_last)</tt>.
*
* \tparam DerivedPolicy The name of the derived execution policy.
* \tparam InputIterator1 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
* \tparam InputIterator2 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
* \tparam OutputIterator1 is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a> and
* and \p InputIterator1's \c value_type is convertible to \c OutputIterator1's \c value_type.
* \tparam OutputIterator2 is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a> and
* and \p InputIterator2's \c value_type is convertible to \c OutputIterator2's \c value_type.
*
* \pre The input ranges shall not overlap either output range.
*
* The following code snippet demonstrates how to use \p reduce_by_key to
* compact a sequence of key/value pairs and sum values with equal keys using the \p thrust::host
* execution policy for parallelization:
*
* \code
* #include <thrust/reduce.h>
* #include <thrust/execution_policy.h>
* ...
* const int N = 7;
* int A[N] = {1, 3, 3, 3, 2, 2, 1}; // input keys
* int B[N] = {9, 8, 7, 6, 5, 4, 3}; // input values
* int C[N]; // output keys
* int D[N]; // output values
*
* thrust::pair<int*,int*> new_end;
* new_end = thrust::reduce_by_key(thrust::host, A, A + N, B, C, D);
*
* // The first four keys in C are now {1, 3, 2, 1} and new_end.first - C is 4.
* // The first four values in D are now {9, 21, 9, 3} and new_end.second - D is 4.
* \endcode
*
* \see reduce
* \see unique_copy
* \see unique_by_key
* \see unique_by_key_copy
*/
template<typename DerivedPolicy,
typename InputIterator1,
typename InputIterator2,
typename OutputIterator1,
typename OutputIterator2>
thrust::pair<OutputIterator1,OutputIterator2>
reduce_by_key(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
InputIterator1 keys_first,
InputIterator1 keys_last,
InputIterator2 values_first,
OutputIterator1 keys_output,
OutputIterator2 values_output);
/*! \p reduce_by_key is a generalization of \p reduce to key-value pairs.
* For each group of consecutive keys in the range <tt>[keys_first, keys_last)</tt>
* that are equal, \p reduce_by_key copies the first element of the group to the
* \c keys_output. The corresponding values in the range are reduced using the
* \c plus and the result copied to \c values_output.
*
* This version of \p reduce_by_key uses the function object \c equal_to
* to test for equality and \c plus to reduce values with equal keys.
*
* \param keys_first The beginning of the input key range.
* \param keys_last The end of the input key range.
* \param values_first The beginning of the input value range.
* \param keys_output The beginning of the output key range.
* \param values_output The beginning of the output value range.
* \return A pair of iterators at end of the ranges <tt>[keys_output, keys_output_last)</tt> and <tt>[values_output, values_output_last)</tt>.
*
* \tparam InputIterator1 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
* \tparam InputIterator2 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
* \tparam OutputIterator1 is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a> and
* and \p InputIterator1's \c value_type is convertible to \c OutputIterator1's \c value_type.
* \tparam OutputIterator2 is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a> and
* and \p InputIterator2's \c value_type is convertible to \c OutputIterator2's \c value_type.
*
* \pre The input ranges shall not overlap either output range.
*
* The following code snippet demonstrates how to use \p reduce_by_key to
* compact a sequence of key/value pairs and sum values with equal keys.
*
* \code
* #include <thrust/reduce.h>
* ...
* const int N = 7;
* int A[N] = {1, 3, 3, 3, 2, 2, 1}; // input keys
* int B[N] = {9, 8, 7, 6, 5, 4, 3}; // input values
* int C[N]; // output keys
* int D[N]; // output values
*
* thrust::pair<int*,int*> new_end;
* new_end = thrust::reduce_by_key(A, A + N, B, C, D);
*
* // The first four keys in C are now {1, 3, 2, 1} and new_end.first - C is 4.
* // The first four values in D are now {9, 21, 9, 3} and new_end.second - D is 4.
* \endcode
*
* \see reduce
* \see unique_copy
* \see unique_by_key
* \see unique_by_key_copy
*/
template <typename InputIterator1,
typename InputIterator2,
typename OutputIterator1,
typename OutputIterator2>
thrust::pair<OutputIterator1,OutputIterator2>
reduce_by_key(InputIterator1 keys_first,
InputIterator1 keys_last,
InputIterator2 values_first,
OutputIterator1 keys_output,
OutputIterator2 values_output);
/*! \p reduce_by_key is a generalization of \p reduce to key-value pairs.
* For each group of consecutive keys in the range <tt>[keys_first, keys_last)</tt>
* that are equal, \p reduce_by_key copies the first element of the group to the
* \c keys_output. The corresponding values in the range are reduced using the
* \c plus and the result copied to \c values_output.
*
* This version of \p reduce_by_key uses the function object \c binary_pred
* to test for equality and \c plus to reduce values with equal keys.
*
* The algorithm's execution is parallelized as determined by \p exec.
*
* \param exec The execution policy to use for parallelization.
* \param keys_first The beginning of the input key range.
* \param keys_last The end of the input key range.
* \param values_first The beginning of the input value range.
* \param keys_output The beginning of the output key range.
* \param values_output The beginning of the output value range.
* \param binary_pred The binary predicate used to determine equality.
* \return A pair of iterators at end of the ranges <tt>[keys_output, keys_output_last)</tt> and <tt>[values_output, values_output_last)</tt>.
*
* \tparam DerivedPolicy The name of the derived execution policy.
* \tparam InputIterator1 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
* \tparam InputIterator2 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
* \tparam OutputIterator1 is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a> and
* and \p InputIterator1's \c value_type is convertible to \c OutputIterator1's \c value_type.
* \tparam OutputIterator2 is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a> and
* and \p InputIterator2's \c value_type is convertible to \c OutputIterator2's \c value_type.
* \tparam BinaryPredicate is a model of <a href="http://www.sgi.com/tech/stl/BinaryPredicate.html">Binary Predicate</a>.
*
* \pre The input ranges shall not overlap either output range.
*
* The following code snippet demonstrates how to use \p reduce_by_key to
* compact a sequence of key/value pairs and sum values with equal keys using the \p thrust::host
* execution policy for parallelization:
*
* \code
* #include <thrust/reduce.h>
* #include <thrust/execution_policy.h>
* ...
* const int N = 7;
* int A[N] = {1, 3, 3, 3, 2, 2, 1}; // input keys
* int B[N] = {9, 8, 7, 6, 5, 4, 3}; // input values
* int C[N]; // output keys
* int D[N]; // output values
*
* thrust::pair<int*,int*> new_end;
* thrust::equal_to<int> binary_pred;
* new_end = thrust::reduce_by_key(thrust::host, A, A + N, B, C, D, binary_pred);
*
* // The first four keys in C are now {1, 3, 2, 1} and new_end.first - C is 4.
* // The first four values in D are now {9, 21, 9, 3} and new_end.second - D is 4.
* \endcode
*
* \see reduce
* \see unique_copy
* \see unique_by_key
* \see unique_by_key_copy
*/
template<typename DerivedPolicy,
typename InputIterator1,
typename InputIterator2,
typename OutputIterator1,
typename OutputIterator2,
typename BinaryPredicate>
thrust::pair<OutputIterator1,OutputIterator2>
reduce_by_key(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
InputIterator1 keys_first,
InputIterator1 keys_last,
InputIterator2 values_first,
OutputIterator1 keys_output,
OutputIterator2 values_output,
BinaryPredicate binary_pred);
/*! \p reduce_by_key is a generalization of \p reduce to key-value pairs.
* For each group of consecutive keys in the range <tt>[keys_first, keys_last)</tt>
* that are equal, \p reduce_by_key copies the first element of the group to the
* \c keys_output. The corresponding values in the range are reduced using the
* \c plus and the result copied to \c values_output.
*
* This version of \p reduce_by_key uses the function object \c binary_pred
* to test for equality and \c plus to reduce values with equal keys.
*
* \param keys_first The beginning of the input key range.
* \param keys_last The end of the input key range.
* \param values_first The beginning of the input value range.
* \param keys_output The beginning of the output key range.
* \param values_output The beginning of the output value range.
* \param binary_pred The binary predicate used to determine equality.
* \return A pair of iterators at end of the ranges <tt>[keys_output, keys_output_last)</tt> and <tt>[values_output, values_output_last)</tt>.
*
* \tparam InputIterator1 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
* \tparam InputIterator2 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
* \tparam OutputIterator1 is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a> and
* and \p InputIterator1's \c value_type is convertible to \c OutputIterator1's \c value_type.
* \tparam OutputIterator2 is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a> and
* and \p InputIterator2's \c value_type is convertible to \c OutputIterator2's \c value_type.
* \tparam BinaryPredicate is a model of <a href="http://www.sgi.com/tech/stl/BinaryPredicate.html">Binary Predicate</a>.
*
* \pre The input ranges shall not overlap either output range.
*
* The following code snippet demonstrates how to use \p reduce_by_key to
* compact a sequence of key/value pairs and sum values with equal keys.
*
* \code
* #include <thrust/reduce.h>
* ...
* const int N = 7;
* int A[N] = {1, 3, 3, 3, 2, 2, 1}; // input keys
* int B[N] = {9, 8, 7, 6, 5, 4, 3}; // input values
* int C[N]; // output keys
* int D[N]; // output values
*
* thrust::pair<int*,int*> new_end;
* thrust::equal_to<int> binary_pred;
* new_end = thrust::reduce_by_key(A, A + N, B, C, D, binary_pred);
*
* // The first four keys in C are now {1, 3, 2, 1} and new_end.first - C is 4.
* // The first four values in D are now {9, 21, 9, 3} and new_end.second - D is 4.
* \endcode
*
* \see reduce
* \see unique_copy
* \see unique_by_key
* \see unique_by_key_copy
*/
template <typename InputIterator1,
typename InputIterator2,
typename OutputIterator1,
typename OutputIterator2,
typename BinaryPredicate>
thrust::pair<OutputIterator1,OutputIterator2>
reduce_by_key(InputIterator1 keys_first,
InputIterator1 keys_last,
InputIterator2 values_first,
OutputIterator1 keys_output,
OutputIterator2 values_output,
BinaryPredicate binary_pred);
/*! \p reduce_by_key is a generalization of \p reduce to key-value pairs.
* For each group of consecutive keys in the range <tt>[keys_first, keys_last)</tt>
* that are equal, \p reduce_by_key copies the first element of the group to the
* \c keys_output. The corresponding values in the range are reduced using the
* \c BinaryFunction \c binary_op and the result copied to \c values_output.
* Specifically, if consecutive key iterators \c i and \c (i + 1) are
* such that <tt>binary_pred(*i, *(i+1))</tt> is \c true, then the corresponding
* values are reduced to a single value with \c binary_op.
*
* This version of \p reduce_by_key uses the function object \c binary_pred
* to test for equality and \c binary_op to reduce values with equal keys.
*
* The algorithm's execution is parallelized as determined by \p exec.
*
* \param exec The execution policy to use for parallelization.
* \param keys_first The beginning of the input key range.
* \param keys_last The end of the input key range.
* \param values_first The beginning of the input value range.
* \param keys_output The beginning of the output key range.
* \param values_output The beginning of the output value range.
* \param binary_pred The binary predicate used to determine equality.
* \param binary_op The binary function used to accumulate values.
* \return A pair of iterators at end of the ranges <tt>[keys_output, keys_output_last)</tt> and <tt>[values_output, values_output_last)</tt>.
*
* \tparam DerivedPolicy The name of the derived execution policy.
* \tparam InputIterator1 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
* \tparam InputIterator2 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
* \tparam OutputIterator1 is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a> and
* and \p InputIterator1's \c value_type is convertible to \c OutputIterator1's \c value_type.
* \tparam OutputIterator2 is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a> and
* and \p InputIterator2's \c value_type is convertible to \c OutputIterator2's \c value_type.
* \tparam BinaryPredicate is a model of <a href="http://www.sgi.com/tech/stl/BinaryPredicate.html">Binary Predicate</a>.
* \tparam BinaryFunction is a model of <a href="http://www.sgi.com/tech/stl/BinaryFunction.html">Binary Function</a>
* and \c BinaryFunction's \c result_type is convertible to \c OutputIterator2's \c value_type.
*
* \pre The input ranges shall not overlap either output range.
*
* The following code snippet demonstrates how to use \p reduce_by_key to
* compact a sequence of key/value pairs and sum values with equal keys using the \p thrust::host
* execution policy for parallelization:
*
* \code
* #include <thrust/reduce.h>
* #include <thrust/execution_policy.h>
* ...
* const int N = 7;
* int A[N] = {1, 3, 3, 3, 2, 2, 1}; // input keys
* int B[N] = {9, 8, 7, 6, 5, 4, 3}; // input values
* int C[N]; // output keys
* int D[N]; // output values
*
* thrust::pair<int*,int*> new_end;
* thrust::equal_to<int> binary_pred;
* thrust::plus<int> binary_op;
* new_end = thrust::reduce_by_key(thrust::host, A, A + N, B, C, D, binary_pred, binary_op);
*
* // The first four keys in C are now {1, 3, 2, 1} and new_end.first - C is 4.
* // The first four values in D are now {9, 21, 9, 3} and new_end.second - D is 4.
* \endcode
*
* \see reduce
* \see unique_copy
* \see unique_by_key
* \see unique_by_key_copy
*/
template<typename DerivedPolicy,
typename InputIterator1,
typename InputIterator2,
typename OutputIterator1,
typename OutputIterator2,
typename BinaryPredicate,
typename BinaryFunction>
thrust::pair<OutputIterator1,OutputIterator2>
reduce_by_key(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
InputIterator1 keys_first,
InputIterator1 keys_last,
InputIterator2 values_first,
OutputIterator1 keys_output,
OutputIterator2 values_output,
BinaryPredicate binary_pred,
BinaryFunction binary_op);
/*! \p reduce_by_key is a generalization of \p reduce to key-value pairs.
* For each group of consecutive keys in the range <tt>[keys_first, keys_last)</tt>
* that are equal, \p reduce_by_key copies the first element of the group to the
* \c keys_output. The corresponding values in the range are reduced using the
* \c BinaryFunction \c binary_op and the result copied to \c values_output.
* Specifically, if consecutive key iterators \c i and \c (i + 1) are
* such that <tt>binary_pred(*i, *(i+1))</tt> is \c true, then the corresponding
* values are reduced to a single value with \c binary_op.
*
* This version of \p reduce_by_key uses the function object \c binary_pred
* to test for equality and \c binary_op to reduce values with equal keys.
*
* \param keys_first The beginning of the input key range.
* \param keys_last The end of the input key range.
* \param values_first The beginning of the input value range.
* \param keys_output The beginning of the output key range.
* \param values_output The beginning of the output value range.
* \param binary_pred The binary predicate used to determine equality.
* \param binary_op The binary function used to accumulate values.
* \return A pair of iterators at end of the ranges <tt>[keys_output, keys_output_last)</tt> and <tt>[values_output, values_output_last)</tt>.
*
* \tparam InputIterator1 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
* \tparam InputIterator2 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
* \tparam OutputIterator1 is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a> and
* and \p InputIterator1's \c value_type is convertible to \c OutputIterator1's \c value_type.
* \tparam OutputIterator2 is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a> and
* and \p InputIterator2's \c value_type is convertible to \c OutputIterator2's \c value_type.
* \tparam BinaryPredicate is a model of <a href="http://www.sgi.com/tech/stl/BinaryPredicate.html">Binary Predicate</a>.
* \tparam BinaryFunction is a model of <a href="http://www.sgi.com/tech/stl/BinaryFunction.html">Binary Function</a>
* and \c BinaryFunction's \c result_type is convertible to \c OutputIterator2's \c value_type.
*
* \pre The input ranges shall not overlap either output range.
*
* The following code snippet demonstrates how to use \p reduce_by_key to
* compact a sequence of key/value pairs and sum values with equal keys.
*
* \code
* #include <thrust/reduce.h>
* ...
* const int N = 7;
* int A[N] = {1, 3, 3, 3, 2, 2, 1}; // input keys
* int B[N] = {9, 8, 7, 6, 5, 4, 3}; // input values
* int C[N]; // output keys
* int D[N]; // output values
*
* thrust::pair<int*,int*> new_end;
* thrust::equal_to<int> binary_pred;
* thrust::plus<int> binary_op;
* new_end = thrust::reduce_by_key(A, A + N, B, C, D, binary_pred, binary_op);
*
* // The first four keys in C are now {1, 3, 2, 1} and new_end.first - C is 4.
* // The first four values in D are now {9, 21, 9, 3} and new_end.second - D is 4.
* \endcode
*
* \see reduce
* \see unique_copy
* \see unique_by_key
* \see unique_by_key_copy
*/
template <typename InputIterator1,
typename InputIterator2,
typename OutputIterator1,
typename OutputIterator2,
typename BinaryPredicate,
typename BinaryFunction>
thrust::pair<OutputIterator1,OutputIterator2>
reduce_by_key(InputIterator1 keys_first,
InputIterator1 keys_last,
InputIterator2 values_first,
OutputIterator1 keys_output,
OutputIterator2 values_output,
BinaryPredicate binary_pred,
BinaryFunction binary_op);
/*! \} // end reductions
*/
} // end namespace thrust
#include <thrust/detail/reduce.inl>