1
0
mirror of https://github.com/GOSTSec/ccminer synced 2025-01-23 13:04:13 +00:00
ccminer/compat/thrust/system/cuda/detail/runtime_introspection.inl

170 lines
4.2 KiB
Plaintext
Raw Normal View History

2014-03-18 22:17:40 +01:00
/*
* Copyright 2008-2012 NVIDIA Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <thrust/detail/config.h>
#include <thrust/system/cuda/detail/runtime_introspection.h>
#include <thrust/system/cuda/detail/guarded_cuda_runtime_api.h>
#include <thrust/detail/util/blocking.h>
#include <thrust/detail/minmax.h>
#include <thrust/system_error.h>
#include <thrust/system/cuda/error.h>
namespace thrust
{
namespace system
{
namespace cuda
{
namespace detail
{
namespace runtime_introspection_detail
{
inline void get_device_properties(device_properties_t &p, int device_id)
{
cudaDeviceProp properties;
cudaError_t error = cudaGetDeviceProperties(&properties, device_id);
if(error)
throw thrust::system_error(error, thrust::cuda_category());
// be careful about how this is initialized!
device_properties_t temp = {
properties.major,
{
properties.maxGridSize[0],
properties.maxGridSize[1],
properties.maxGridSize[2]
},
properties.maxThreadsPerBlock,
properties.maxThreadsPerMultiProcessor,
properties.minor,
properties.multiProcessorCount,
properties.regsPerBlock,
properties.sharedMemPerBlock,
properties.warpSize
};
p = temp;
} // end get_device_properties()
} // end runtime_introspection_detail
inline device_properties_t device_properties(int device_id)
{
// cache the result of get_device_properties, because it is slow
// only cache the first few devices
static const int max_num_devices = 16;
static bool properties_exist[max_num_devices] = {0};
static device_properties_t device_properties[max_num_devices] = {};
if(device_id >= max_num_devices)
{
device_properties_t result;
runtime_introspection_detail::get_device_properties(result, device_id);
return result;
}
if(!properties_exist[device_id])
{
runtime_introspection_detail::get_device_properties(device_properties[device_id], device_id);
// disallow the compiler to move the write to properties_exist[device_id]
// before the initialization of device_properties[device_id]
__thrust_compiler_fence();
properties_exist[device_id] = true;
}
return device_properties[device_id];
}
inline int current_device()
{
int result = -1;
cudaError_t error = cudaGetDevice(&result);
if(error)
throw thrust::system_error(error, thrust::cuda_category());
if(result < 0)
throw thrust::system_error(cudaErrorNoDevice, thrust::cuda_category());
return result;
}
inline device_properties_t device_properties(void)
{
return device_properties(current_device());
}
template <typename KernelFunction>
inline function_attributes_t function_attributes(KernelFunction kernel)
{
// cudaFuncGetAttributes(), used below, only exists when __CUDACC__ is defined
#ifdef __CUDACC__
typedef void (*fun_ptr_type)();
fun_ptr_type fun_ptr = reinterpret_cast<fun_ptr_type>(kernel);
cudaFuncAttributes attributes;
cudaError_t error = cudaFuncGetAttributes(&attributes, fun_ptr);
if(error)
{
throw thrust::system_error(error, thrust::cuda_category());
}
// be careful about how this is initialized!
function_attributes_t result = {
attributes.constSizeBytes,
attributes.localSizeBytes,
attributes.maxThreadsPerBlock,
attributes.numRegs,
attributes.sharedSizeBytes
};
return result;
#else
return function_attributes_t();
#endif // __CUDACC__
}
inline size_t compute_capability(const device_properties_t &properties)
{
return 10 * properties.major + properties.minor;
}
inline size_t compute_capability(void)
{
return compute_capability(device_properties());
}
} // end namespace detail
} // end namespace cuda
} // end namespace system
} // end namespace thrust