25#include "concepts.hpp"
27#include "program_state.hpp"
28#include "hip_runtime_api.h"
37hipError_t ihipExtLaunchMultiKernelMultiDevice(hipLaunchParams* launchParamsList,
int numDevices,
40hipError_t hipLaunchCooperativeKernel(
const void* f, dim3 gridDim,
41 dim3 blockDim,
void** args,
42 size_t sharedMem, hipStream_t stream,
45hipError_t hipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsList,
50#pragma GCC visibility push(hidden)
53template <
typename T,
typename std::enable_if<std::is_
integral<T>{}>::type* =
nullptr>
54inline T round_up_to_next_multiple_nonnegative(T x, T y) {
62 typename std::enable_if<n ==
sizeof...(Ts)>::type* =
nullptr>
64 const std::tuple<Ts...>&,
65 const kernargs_size_align&,
73 typename std::enable_if<n !=
sizeof...(Ts)>::type* =
nullptr>
75 const std::tuple<Ts...>& formals,
76 const kernargs_size_align& size_align,
78 using T =
typename std::tuple_element<n, std::tuple<Ts...>>::type;
81 !std::is_reference<T>{},
82 "A __global__ function cannot have a reference as one of its "
84 #if defined(HIP_STRICT)
86 std::is_trivially_copyable<T>{},
87 "Only TriviallyCopyable types can be arguments to a __global__ "
91 kernarg.resize(round_up_to_next_multiple_nonnegative(
92 kernarg.size(), size_align.alignment(n)) + size_align.size(n));
95 kernarg.data() + kernarg.size() - size_align.size(n),
96 &std::get<n>(formals),
98 return make_kernarg<n + 1>(formals, size_align, std::move(kernarg));
101template <
typename... Formals,
typename... Actuals>
103 void (*kernel)(Formals...), std::tuple<Actuals...> actuals) {
104 static_assert(
sizeof...(Formals) ==
sizeof...(Actuals),
105 "The count of formal arguments must match the count of actuals.");
107 if (
sizeof...(Formals) == 0)
return {};
109 std::tuple<Formals...> to_formals{std::move(actuals)};
111 kernarg.reserve(
sizeof(to_formals));
113 auto& ps = hip_impl::get_program_state();
114 return make_kernarg<0>(to_formals,
115 ps.get_kernargs_size_align(
116 reinterpret_cast<std::uintptr_t
>(kernel)),
121HIP_INTERNAL_EXPORTED_API hsa_agent_t target_agent(hipStream_t stream);
124__attribute__((visibility(
"hidden")))
125void hipLaunchKernelGGLImpl(
126 std::uintptr_t function_address,
127 const dim3& numBlocks,
128 const dim3& dimBlocks,
129 std::uint32_t sharedMemBytes,
133 const auto& kd = hip_impl::get_program_state().kernel_descriptor(function_address,
134 target_agent(stream));
136 hipModuleLaunchKernel(kd, numBlocks.x, numBlocks.y, numBlocks.z,
137 dimBlocks.x, dimBlocks.y, dimBlocks.z, sharedMemBytes,
138 stream,
nullptr, kernarg);
145hipError_t hipOccupancyMaxPotentialBlockSize(
int* gridSize,
int* blockSize,
146 T kernel,
size_t dynSharedMemPerBlk = 0,
int blockSizeLimit = 0) {
148 using namespace hip_impl;
150 hip_impl::hip_init();
151 auto f = get_program_state().kernel_descriptor(
reinterpret_cast<std::uintptr_t
>(kernel),
154 return hipModuleOccupancyMaxPotentialBlockSize(gridSize, blockSize, f,
155 dynSharedMemPerBlk, blockSizeLimit);
160hipError_t hipOccupancyMaxPotentialBlockSizeWithFlags(
int* gridSize,
int* blockSize,
161 T kernel,
size_t dynSharedMemPerBlk = 0,
int blockSizeLimit = 0,
unsigned int flags = 0 ) {
163 using namespace hip_impl;
165 hip_impl::hip_init();
166 if(flags != hipOccupancyDefault)
return hipErrorNotSupported;
167 auto f = get_program_state().kernel_descriptor(
reinterpret_cast<std::uintptr_t
>(kernel),
170 return hipModuleOccupancyMaxPotentialBlockSize(gridSize, blockSize, f,
171 dynSharedMemPerBlk, blockSizeLimit);
174template <
typename... Args,
typename F = void (*)(Args...)>
176void hipLaunchKernelGGL(F kernel,
const dim3& numBlocks,
const dim3& dimBlocks,
177 std::uint32_t sharedMemBytes, hipStream_t stream,
179 hip_impl::hip_init();
180 auto kernarg = hip_impl::make_kernarg(kernel, std::tuple<Args...>{std::move(args)...});
181 std::size_t kernarg_size =
kernarg.size();
184 HIP_LAUNCH_PARAM_BUFFER_POINTER,
186 HIP_LAUNCH_PARAM_BUFFER_SIZE,
188 HIP_LAUNCH_PARAM_END};
190 hip_impl::hipLaunchKernelGGLImpl(
reinterpret_cast<std::uintptr_t
>(kernel),
191 numBlocks, dimBlocks, sharedMemBytes,
197__attribute__((visibility(
"hidden")))
198hipError_t hipLaunchCooperativeKernel(F f, dim3 gridDim, dim3 blockDim,
199 void** args,
size_t sharedMem,
200 hipStream_t stream) {
201 hip_impl::hip_init();
202 auto& ps = hip_impl::get_program_state();
203 return hipLaunchCooperativeKernel(
reinterpret_cast<void*
>(f), gridDim,
204 blockDim, args, sharedMem, stream, ps);
208__attribute__((visibility(
"hidden")))
209hipError_t hipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsList,
211 unsigned int flags) {
213 hip_impl::hip_init();
214 auto& ps = hip_impl::get_program_state();
215 return hipLaunchCooperativeKernelMultiDevice(launchParamsList, numDevices, flags, ps);
218#pragma GCC visibility pop
Definition program_state.hpp:48
Definition program_state.hpp:63