Skip to content

Commit

Permalink
[SYCL][NFC] Optimize verifyUsedKernelBundle usage (#15496)
Browse files Browse the repository at this point in the history
This PR intended to reduce amount of template instantiations performed
during both host & device compilation passes, as well as reduce amount
of LLVM IR emitted during host compilation pass.

`handler::verifyUsedKernelBundle` is a one-line helper which was inlined
to avoid one instantiation per-kernel and emission of associated LLVM
IR. Its uses (now inlined) were also moved under `#ifndef
__SYCL_DEVICE_ONLY__` to reduce amount of code device compiler has to
parse.
  • Loading branch information
AlexeySachkov authored Sep 27, 2024
1 parent 4040a7e commit f126cf5
Showing 1 changed file with 26 additions and 16 deletions.
42 changes: 26 additions & 16 deletions sycl/include/sycl/handler.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -774,10 +774,6 @@ class __SYCL_EXPORT handler {
///
/// \param KernelName is the name of the SYCL kernel to check that the used
/// kernel bundle contains.
template <typename KernelNameT> void verifyUsedKernelBundle() {
verifyUsedKernelBundleInternal(
detail::string_view{detail::getKernelName<KernelNameT>()});
}
void verifyUsedKernelBundleInternal(detail::string_view KernelName);

/// Stores lambda to the template-free object
Expand Down Expand Up @@ -1233,7 +1229,6 @@ class __SYCL_EXPORT handler {
// conflicts they should be included in the name.
using NameT =
typename detail::get_kernel_name_t<KernelName, KernelType>::name;
verifyUsedKernelBundle<NameT>();

// Range rounding can be disabled by the user.
// Range rounding is not done on the host device.
Expand All @@ -1254,6 +1249,8 @@ class __SYCL_EXPORT handler {
kernel_parallel_for_wrapper<KName, TransformedArgType, decltype(Wrapper),
PropertiesT>(Wrapper);
#ifndef __SYCL_DEVICE_ONLY__
verifyUsedKernelBundleInternal(
detail::string_view{detail::getKernelName<NameT>()});
// We are executing over the rounded range, but there are still
// items/ids that are are constructed in ther range rounded
// kernel use items/ids in the user range, which means that
Expand All @@ -1279,6 +1276,8 @@ class __SYCL_EXPORT handler {
kernel_parallel_for_wrapper<NameT, TransformedArgType, KernelType,
PropertiesT>(KernelFunc);
#ifndef __SYCL_DEVICE_ONLY__
verifyUsedKernelBundleInternal(
detail::string_view{detail::getKernelName<NameT>()});
processProperties<detail::isKernelESIMD<NameT>(), PropertiesT>(Props);
detail::checkValueRange<Dims>(UserRange);
setNDRangeDescriptor(std::move(UserRange));
Expand Down Expand Up @@ -1315,7 +1314,6 @@ class __SYCL_EXPORT handler {
// conflicts they should be included in the name.
using NameT =
typename detail::get_kernel_name_t<KernelName, KernelType>::name;
verifyUsedKernelBundle<NameT>();
using LambdaArgType =
sycl::detail::lambda_arg_type<KernelType, nd_item<Dims>>;
static_assert(
Expand All @@ -1329,6 +1327,8 @@ class __SYCL_EXPORT handler {
kernel_parallel_for_wrapper<NameT, TransformedArgType, KernelType,
PropertiesT>(KernelFunc);
#ifndef __SYCL_DEVICE_ONLY__
verifyUsedKernelBundleInternal(
detail::string_view{detail::getKernelName<NameT>()});
detail::checkValueRange<Dims>(ExecutionRange);
setNDRangeDescriptor(std::move(ExecutionRange));
processProperties<detail::isKernelESIMD<NameT>(), PropertiesT>(Props);
Expand Down Expand Up @@ -1405,14 +1405,15 @@ class __SYCL_EXPORT handler {
// conflicts they should be included in the name.
using NameT =
typename detail::get_kernel_name_t<KernelName, KernelType>::name;
verifyUsedKernelBundle<NameT>();
using LambdaArgType =
sycl::detail::lambda_arg_type<KernelType, group<Dims>>;
(void)NumWorkGroups;
(void)Props;
kernel_parallel_for_work_group_wrapper<NameT, LambdaArgType, KernelType,
PropertiesT>(KernelFunc);
#ifndef __SYCL_DEVICE_ONLY__
verifyUsedKernelBundleInternal(
detail::string_view{detail::getKernelName<NameT>()});
processProperties<detail::isKernelESIMD<NameT>(), PropertiesT>(Props);
detail::checkValueRange<Dims>(NumWorkGroups);
setNDRangeDescriptor(NumWorkGroups, /*SetNumWorkGroups=*/true);
Expand Down Expand Up @@ -1446,7 +1447,6 @@ class __SYCL_EXPORT handler {
// conflicts they should be included in the name.
using NameT =
typename detail::get_kernel_name_t<KernelName, KernelType>::name;
verifyUsedKernelBundle<NameT>();
using LambdaArgType =
sycl::detail::lambda_arg_type<KernelType, group<Dims>>;
(void)NumWorkGroups;
Expand All @@ -1455,6 +1455,8 @@ class __SYCL_EXPORT handler {
kernel_parallel_for_work_group_wrapper<NameT, LambdaArgType, KernelType,
PropertiesT>(KernelFunc);
#ifndef __SYCL_DEVICE_ONLY__
verifyUsedKernelBundleInternal(
detail::string_view{detail::getKernelName<NameT>()});
processProperties<detail::isKernelESIMD<NameT>(), PropertiesT>(Props);
nd_range<Dims> ExecRange =
nd_range<Dims>(NumWorkGroups * WorkGroupSize, WorkGroupSize);
Expand Down Expand Up @@ -1728,9 +1730,10 @@ class __SYCL_EXPORT handler {
using NameT =
typename detail::get_kernel_name_t<KernelName, KernelType>::name;

verifyUsedKernelBundle<NameT>();
kernel_single_task_wrapper<NameT, KernelType, PropertiesT>(KernelFunc);
#ifndef __SYCL_DEVICE_ONLY__
verifyUsedKernelBundleInternal(
detail::string_view{detail::getKernelName<NameT>()});
// No need to check if range is out of INT_MAX limits as it's compile-time
// known constant.
setNDRangeDescriptor(range<1>{1});
Expand Down Expand Up @@ -2024,7 +2027,6 @@ class __SYCL_EXPORT handler {
throwIfActionIsCreated();
using NameT =
typename detail::get_kernel_name_t<KernelName, KernelType>::name;
verifyUsedKernelBundle<NameT>();
using LambdaArgType = sycl::detail::lambda_arg_type<KernelType, item<Dims>>;
using TransformedArgType = std::conditional_t<
std::is_integral<LambdaArgType>::value && Dims == 1, item<Dims>,
Expand All @@ -2033,6 +2035,8 @@ class __SYCL_EXPORT handler {
(void)WorkItemOffset;
kernel_parallel_for_wrapper<NameT, TransformedArgType>(KernelFunc);
#ifndef __SYCL_DEVICE_ONLY__
verifyUsedKernelBundleInternal(
detail::string_view{detail::getKernelName<NameT>()});
detail::checkValueRange<Dims>(NumWorkItems, WorkItemOffset);
setNDRangeDescriptor(std::move(NumWorkItems), std::move(WorkItemOffset));
StoreLambda<NameT, KernelType, Dims, TransformedArgType>(
Expand Down Expand Up @@ -2165,10 +2169,11 @@ class __SYCL_EXPORT handler {
setHandlerKernelBundle(Kernel);
using NameT =
typename detail::get_kernel_name_t<KernelName, KernelType>::name;
verifyUsedKernelBundle<NameT>();
(void)Kernel;
kernel_single_task<NameT>(KernelFunc);
#ifndef __SYCL_DEVICE_ONLY__
verifyUsedKernelBundleInternal(
detail::string_view{detail::getKernelName<NameT>()});
// No need to check if range is out of INT_MAX limits as it's compile-time
// known constant
setNDRangeDescriptor(range<1>{1});
Expand Down Expand Up @@ -2200,12 +2205,13 @@ class __SYCL_EXPORT handler {
setHandlerKernelBundle(Kernel);
using NameT =
typename detail::get_kernel_name_t<KernelName, KernelType>::name;
verifyUsedKernelBundle<NameT>();
using LambdaArgType = sycl::detail::lambda_arg_type<KernelType, item<Dims>>;
(void)Kernel;
(void)NumWorkItems;
kernel_parallel_for_wrapper<NameT, LambdaArgType>(KernelFunc);
#ifndef __SYCL_DEVICE_ONLY__
verifyUsedKernelBundleInternal(
detail::string_view{detail::getKernelName<NameT>()});
detail::checkValueRange<Dims>(NumWorkItems);
setNDRangeDescriptor(std::move(NumWorkItems));
MKernel = detail::getSyclObjImpl(std::move(Kernel));
Expand Down Expand Up @@ -2239,13 +2245,14 @@ class __SYCL_EXPORT handler {
setHandlerKernelBundle(Kernel);
using NameT =
typename detail::get_kernel_name_t<KernelName, KernelType>::name;
verifyUsedKernelBundle<NameT>();
using LambdaArgType = sycl::detail::lambda_arg_type<KernelType, item<Dims>>;
(void)Kernel;
(void)NumWorkItems;
(void)WorkItemOffset;
kernel_parallel_for_wrapper<NameT, LambdaArgType>(KernelFunc);
#ifndef __SYCL_DEVICE_ONLY__
verifyUsedKernelBundleInternal(
detail::string_view{detail::getKernelName<NameT>()});
detail::checkValueRange<Dims>(NumWorkItems, WorkItemOffset);
setNDRangeDescriptor(std::move(NumWorkItems), std::move(WorkItemOffset));
MKernel = detail::getSyclObjImpl(std::move(Kernel));
Expand Down Expand Up @@ -2278,13 +2285,14 @@ class __SYCL_EXPORT handler {
setHandlerKernelBundle(Kernel);
using NameT =
typename detail::get_kernel_name_t<KernelName, KernelType>::name;
verifyUsedKernelBundle<NameT>();
using LambdaArgType =
sycl::detail::lambda_arg_type<KernelType, nd_item<Dims>>;
(void)Kernel;
(void)NDRange;
kernel_parallel_for_wrapper<NameT, LambdaArgType>(KernelFunc);
#ifndef __SYCL_DEVICE_ONLY__
verifyUsedKernelBundleInternal(
detail::string_view{detail::getKernelName<NameT>()});
detail::checkValueRange<Dims>(NDRange);
setNDRangeDescriptor(std::move(NDRange));
MKernel = detail::getSyclObjImpl(std::move(Kernel));
Expand Down Expand Up @@ -2321,13 +2329,14 @@ class __SYCL_EXPORT handler {
setHandlerKernelBundle(Kernel);
using NameT =
typename detail::get_kernel_name_t<KernelName, KernelType>::name;
verifyUsedKernelBundle<NameT>();
using LambdaArgType =
sycl::detail::lambda_arg_type<KernelType, group<Dims>>;
(void)Kernel;
(void)NumWorkGroups;
kernel_parallel_for_work_group_wrapper<NameT, LambdaArgType>(KernelFunc);
#ifndef __SYCL_DEVICE_ONLY__
verifyUsedKernelBundleInternal(
detail::string_view{detail::getKernelName<NameT>()});
detail::checkValueRange<Dims>(NumWorkGroups);
setNDRangeDescriptor(NumWorkGroups, /*SetNumWorkGroups=*/true);
MKernel = detail::getSyclObjImpl(std::move(Kernel));
Expand Down Expand Up @@ -2361,14 +2370,15 @@ class __SYCL_EXPORT handler {
setHandlerKernelBundle(Kernel);
using NameT =
typename detail::get_kernel_name_t<KernelName, KernelType>::name;
verifyUsedKernelBundle<NameT>();
using LambdaArgType =
sycl::detail::lambda_arg_type<KernelType, group<Dims>>;
(void)Kernel;
(void)NumWorkGroups;
(void)WorkGroupSize;
kernel_parallel_for_work_group_wrapper<NameT, LambdaArgType>(KernelFunc);
#ifndef __SYCL_DEVICE_ONLY__
verifyUsedKernelBundleInternal(
detail::string_view{detail::getKernelName<NameT>()});
nd_range<Dims> ExecRange =
nd_range<Dims>(NumWorkGroups * WorkGroupSize, WorkGroupSize);
detail::checkValueRange<Dims>(ExecRange);
Expand Down

0 comments on commit f126cf5

Please sign in to comment.