You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
169 lines
4.2 KiB
169 lines
4.2 KiB
/* |
|
* Copyright 2008-2012 NVIDIA Corporation |
|
* |
|
* Licensed under the Apache License, Version 2.0 (the "License"); |
|
* you may not use this file except in compliance with the License. |
|
* You may obtain a copy of the License at |
|
* |
|
* http://www.apache.org/licenses/LICENSE-2.0 |
|
* |
|
* Unless required by applicable law or agreed to in writing, software |
|
* distributed under the License is distributed on an "AS IS" BASIS, |
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
|
* See the License for the specific language governing permissions and |
|
* limitations under the License. |
|
*/ |
|
|
|
#include <thrust/detail/config.h> |
|
#include <thrust/system/cuda/detail/runtime_introspection.h> |
|
#include <thrust/system/cuda/detail/guarded_cuda_runtime_api.h> |
|
#include <thrust/detail/util/blocking.h> |
|
#include <thrust/detail/minmax.h> |
|
#include <thrust/system_error.h> |
|
#include <thrust/system/cuda/error.h> |
|
|
|
|
|
namespace thrust |
|
{ |
|
namespace system |
|
{ |
|
namespace cuda |
|
{ |
|
namespace detail |
|
{ |
|
namespace runtime_introspection_detail |
|
{ |
|
|
|
|
|
inline void get_device_properties(device_properties_t &p, int device_id) |
|
{ |
|
cudaDeviceProp properties; |
|
|
|
cudaError_t error = cudaGetDeviceProperties(&properties, device_id); |
|
|
|
if(error) |
|
throw thrust::system_error(error, thrust::cuda_category()); |
|
|
|
// be careful about how this is initialized! |
|
device_properties_t temp = { |
|
properties.major, |
|
{ |
|
properties.maxGridSize[0], |
|
properties.maxGridSize[1], |
|
properties.maxGridSize[2] |
|
}, |
|
properties.maxThreadsPerBlock, |
|
properties.maxThreadsPerMultiProcessor, |
|
properties.minor, |
|
properties.multiProcessorCount, |
|
properties.regsPerBlock, |
|
properties.sharedMemPerBlock, |
|
properties.warpSize |
|
}; |
|
|
|
p = temp; |
|
} // end get_device_properties() |
|
|
|
|
|
} // end runtime_introspection_detail |
|
|
|
|
|
inline device_properties_t device_properties(int device_id) |
|
{ |
|
// cache the result of get_device_properties, because it is slow |
|
// only cache the first few devices |
|
static const int max_num_devices = 16; |
|
|
|
static bool properties_exist[max_num_devices] = {0}; |
|
static device_properties_t device_properties[max_num_devices] = {}; |
|
|
|
if(device_id >= max_num_devices) |
|
{ |
|
device_properties_t result; |
|
runtime_introspection_detail::get_device_properties(result, device_id); |
|
return result; |
|
} |
|
|
|
if(!properties_exist[device_id]) |
|
{ |
|
runtime_introspection_detail::get_device_properties(device_properties[device_id], device_id); |
|
|
|
// disallow the compiler to move the write to properties_exist[device_id] |
|
// before the initialization of device_properties[device_id] |
|
__thrust_compiler_fence(); |
|
|
|
properties_exist[device_id] = true; |
|
} |
|
|
|
return device_properties[device_id]; |
|
} |
|
|
|
inline int current_device() |
|
{ |
|
int result = -1; |
|
|
|
cudaError_t error = cudaGetDevice(&result); |
|
|
|
if(error) |
|
throw thrust::system_error(error, thrust::cuda_category()); |
|
|
|
if(result < 0) |
|
throw thrust::system_error(cudaErrorNoDevice, thrust::cuda_category()); |
|
|
|
return result; |
|
} |
|
|
|
inline device_properties_t device_properties(void) |
|
{ |
|
return device_properties(current_device()); |
|
} |
|
|
|
template <typename KernelFunction> |
|
inline function_attributes_t function_attributes(KernelFunction kernel) |
|
{ |
|
// cudaFuncGetAttributes(), used below, only exists when __CUDACC__ is defined |
|
#ifdef __CUDACC__ |
|
typedef void (*fun_ptr_type)(); |
|
|
|
fun_ptr_type fun_ptr = reinterpret_cast<fun_ptr_type>(kernel); |
|
|
|
cudaFuncAttributes attributes; |
|
|
|
cudaError_t error = cudaFuncGetAttributes(&attributes, fun_ptr); |
|
|
|
if(error) |
|
{ |
|
throw thrust::system_error(error, thrust::cuda_category()); |
|
} |
|
|
|
// be careful about how this is initialized! |
|
function_attributes_t result = { |
|
attributes.constSizeBytes, |
|
attributes.localSizeBytes, |
|
attributes.maxThreadsPerBlock, |
|
attributes.numRegs, |
|
attributes.sharedSizeBytes |
|
}; |
|
|
|
return result; |
|
#else |
|
return function_attributes_t(); |
|
#endif // __CUDACC__ |
|
} |
|
|
|
inline size_t compute_capability(const device_properties_t &properties) |
|
{ |
|
return 10 * properties.major + properties.minor; |
|
} |
|
|
|
inline size_t compute_capability(void) |
|
{ |
|
return compute_capability(device_properties()); |
|
} |
|
|
|
|
|
} // end namespace detail |
|
} // end namespace cuda |
|
} // end namespace system |
|
} // end namespace thrust |
|
|
|
|