Skip to content

Commit

Permalink
[CPU][ARM] Enable fast math in ACL deconvolution executor (#26615)
Browse files Browse the repository at this point in the history
### Details:
- ACL deconvolution `fast_math` option is enabled on `PERFORMANCE` mode.
- This option enables fast math computation in ACL. In case this flag
were set, ACL could dispatch the fastest implementation available which
may introduce a drop of accuracy as well.
- Accuracy testing on dataset subset highlights some deviations from
reference values. Results are attached to the ticket.

### Tickets:
 - CVS-152534
  • Loading branch information
alvoron authored Oct 21, 2024
1 parent 2be7e5f commit d0056bd
Show file tree
Hide file tree
Showing 5 changed files with 18 additions and 2 deletions.
6 changes: 6 additions & 0 deletions src/plugins/intel_cpu/src/config.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -400,6 +400,12 @@ void Config::readProperties(const ov::AnyMap& prop, const ModelType modelType) {
inferencePrecision = ov::element::undefined;
}
}
// enable ACL fast math in PERFORMANCE mode
#if defined(OV_CPU_WITH_ACL)
if (executionMode == ov::hint::ExecutionMode::PERFORMANCE) {
aclFastMath = true;
}
#endif
// disable dynamic quantization and kv quantization for best accuracy
if (executionMode == ov::hint::ExecutionMode::ACCURACY) {
if (!fcDynamicQuantizationGroupSizeSetExplicitly) {
Expand Down
3 changes: 3 additions & 0 deletions src/plugins/intel_cpu/src/config.h
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,9 @@ struct Config {
uint64_t fcDynamicQuantizationGroupSize = 32;
ov::element::Type kvCachePrecision = ov::element::f16;
bool fcDynamicQuantizationGroupSizeSetExplicitly = false;
#if defined(OV_CPU_WITH_ACL)
bool aclFastMath = false;
#endif
#if defined(OPENVINO_ARCH_X86_64)
size_t rtCacheCapacity = 5000ul;
#else
Expand Down
3 changes: 3 additions & 0 deletions src/plugins/intel_cpu/src/nodes/deconv.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -219,6 +219,9 @@ Deconvolution::Deconvolution(const std::shared_ptr<ov::Node>& op,
for (size_t i = 0; i < deconvAttrs.dilation.size(); i++) {
deconvAttrs.kernel.push_back(weightDims[withGroups + 2 + i]);
}
#if defined(OV_CPU_WITH_ACL)
deconvAttrs.aclFastMath = context->getConfig().aclFastMath;
#endif

externOutShape = inputShapes.size() == 3;
biasPort = externOutShape ? 3 : 2;
Expand Down
5 changes: 3 additions & 2 deletions src/plugins/intel_cpu/src/nodes/executors/acl/acl_deconv.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ bool AclDeconvExecutor::init(const DeconvAttrs& deconvAttrs,

deconv = std::make_unique<arm_compute::NEDeconvolutionLayer>();
configureThreadSafe([&] {
deconv->configure(&srcTensor, &weiTensor, deconvAttrs.withBiasesParam ? &biasTensor : nullptr, &dstTensor, deconv_info);
deconv->configure(&srcTensor, &weiTensor, deconvAttrs.withBiasesParam ? &biasTensor : nullptr, &dstTensor, deconv_info, deconvAttrs.aclFastMath);
});
return true;
}
Expand Down Expand Up @@ -271,7 +271,8 @@ bool AclDeconvExecutorBuilder::customIsSupported(const DeconvAttrs &deconvAttrs,
&weiTensorInfo,
deconvAttrs.withBiasesParam ? &biasTensorInfo : nullptr,
&dstTensorInfo,
deconv_info);
deconv_info,
deconvAttrs.aclFastMath);
if (!status) {
DEBUG_LOG("NEDeconvolutionLayer validation failed: ", status.error_description());
return false;
Expand Down
3 changes: 3 additions & 0 deletions src/plugins/intel_cpu/src/nodes/executors/deconv.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,9 @@ struct DeconvAttrs {
std::vector<ptrdiff_t> paddingR;
ov::CoordinateDiff outputPadding;
bool withBiasesParam = false;
#if defined(OV_CPU_WITH_ACL)
bool aclFastMath = false;
#endif
};

class DeconvExecutor {
Expand Down

0 comments on commit d0056bd

Please sign in to comment.