diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index 9b8a8546b072c0..7b8ecf29a9de6e 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -2430,12 +2430,12 @@ let SVETargetGuard = InvalidMode, SMETargetGuard = "sme2,fp8" in { def FSCALE_X4 : Inst<"svscale[_{d}_x4]", "444.x", "fhd", MergeNone, "aarch64_sme_fp8_scale_x4", [IsStreaming],[]>; // Convert from FP8 to half-precision/BFloat16 multi-vector - def SVF1CVT : Inst<"svcvt1_{d}[_mf8]_x2_fpm", "2~>", "bh", MergeNone, "aarch64_sve_fp8_cvt1_x2", [IsStreaming, SetsFPMR], []>; - def SVF2CVT : Inst<"svcvt2_{d}[_mf8]_x2_fpm", "2~>", "bh", MergeNone, "aarch64_sve_fp8_cvt2_x2", [IsStreaming, SetsFPMR], []>; + def SVF1CVT_X2 : Inst<"svcvt1_{d}[_mf8]_x2_fpm", "2~>", "bh", MergeNone, "aarch64_sve_fp8_cvt1_x2", [IsStreaming, SetsFPMR], []>; + def SVF2CVT_X2 : Inst<"svcvt2_{d}[_mf8]_x2_fpm", "2~>", "bh", MergeNone, "aarch64_sve_fp8_cvt2_x2", [IsStreaming, SetsFPMR], []>; // Convert from FP8 to deinterleaved half-precision/BFloat16 multi-vector - def SVF1CVTL : Inst<"svcvtl1_{d}[_mf8]_x2_fpm", "2~>", "bh", MergeNone, "aarch64_sve_fp8_cvtl1_x2", [IsStreaming, SetsFPMR], []>; - def SVF2CVTL : Inst<"svcvtl2_{d}[_mf8]_x2_fpm", "2~>", "bh", MergeNone, "aarch64_sve_fp8_cvtl2_x2", [IsStreaming, SetsFPMR], []>; + def SVF1CVTL_X2 : Inst<"svcvtl1_{d}[_mf8]_x2_fpm", "2~>", "bh", MergeNone, "aarch64_sve_fp8_cvtl1_x2", [IsStreaming, SetsFPMR], []>; + def SVF2CVTL_X2 : Inst<"svcvtl2_{d}[_mf8]_x2_fpm", "2~>", "bh", MergeNone, "aarch64_sve_fp8_cvtl2_x2", [IsStreaming, SetsFPMR], []>; } let SVETargetGuard = "sve2p1", SMETargetGuard = "sme2" in { @@ -2451,3 +2451,15 @@ let SVETargetGuard = "sve2,faminmax", SMETargetGuard = "sme2,faminmax" in { defm SVAMIN : SInstZPZZ<"svamin", "hfd", "aarch64_sve_famin", "aarch64_sve_famin_u">; defm SVAMAX : SInstZPZZ<"svamax", "hfd", "aarch64_sve_famax", "aarch64_sve_famax_u">; } + +let SVETargetGuard = "sve2,fp8", SMETargetGuard = "sme2,fp8" in { + // SVE FP8 widening conversions + + // 8-bit floating-point convert to BFloat16/Float16 + def SVF1CVT : SInst<"svcvt1_{d}[_mf8]_fpm", "d~>", "bh", MergeNone, "aarch64_sve_fp8_cvt1", [VerifyRuntimeMode, SetsFPMR]>; + def SVF2CVT : SInst<"svcvt2_{d}[_mf8]_fpm", "d~>", "bh", MergeNone, "aarch64_sve_fp8_cvt2", [VerifyRuntimeMode, SetsFPMR]>; + + // 8-bit floating-point convert to BFloat16/Float16 (top) + def SVF1CVTLT : SInst<"svcvtlt1_{d}[_mf8]_fpm", "d~>", "bh", MergeNone, "aarch64_sve_fp8_cvtlt1", [VerifyRuntimeMode, SetsFPMR]>; + def SVF2CVTLT : SInst<"svcvtlt2_{d}[_mf8]_fpm", "d~>", "bh", MergeNone, "aarch64_sve_fp8_cvtlt2", [VerifyRuntimeMode, SetsFPMR]>; +} diff --git a/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_cvt.c b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_cvt.c new file mode 100644 index 00000000000000..c026b8aa216f32 --- /dev/null +++ b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_cvt.c @@ -0,0 +1,173 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +fp8 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -x c++ -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +fp8 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CHECK-CXX + +// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +fp8 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -x c++ -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +fp8 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CHECK-CXX + +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +fp8 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +fp8 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s + +// REQUIRES: aarch64-registered-target + +#ifdef __ARM_FEATURE_SME +#include +#else +#include +#endif + +#ifdef SVE_OVERLOADED_FORMS +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3) A1##A2##A3 +#endif + +#ifdef __ARM_FEATURE_SME +#define STREAMING __arm_streaming +#else +#define STREAMING +#endif + +// CHECK-LABEL: define dso_local @test_svcvt1_bf16_mf8( +// CHECK-SAME: [[ZN:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.fp8.cvt1.nxv8bf16( [[ZN]]) +// CHECK-NEXT: ret [[TMP0]] +// +// CHECK-CXX-LABEL: define dso_local @_Z20test_svcvt1_bf16_mf8u13__SVMfloat8_tm( +// CHECK-CXX-SAME: [[ZN:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-CXX-NEXT: [[ENTRY:.*:]] +// CHECK-CXX-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) +// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.fp8.cvt1.nxv8bf16( [[ZN]]) +// CHECK-CXX-NEXT: ret [[TMP0]] +// +svbfloat16_t test_svcvt1_bf16_mf8(svmfloat8_t zn, fpm_t fpm) STREAMING { + return SVE_ACLE_FUNC(svcvt1_bf16,_mf8,_fpm)(zn, fpm); +} + +// CHECK-LABEL: define dso_local @test_svcvt2_bf16_mf8( +// CHECK-SAME: [[ZN:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.fp8.cvt2.nxv8bf16( [[ZN]]) +// CHECK-NEXT: ret [[TMP0]] +// +// CHECK-CXX-LABEL: define dso_local @_Z20test_svcvt2_bf16_mf8u13__SVMfloat8_tm( +// CHECK-CXX-SAME: [[ZN:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] { +// CHECK-CXX-NEXT: [[ENTRY:.*:]] +// CHECK-CXX-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) +// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.fp8.cvt2.nxv8bf16( [[ZN]]) +// CHECK-CXX-NEXT: ret [[TMP0]] +// +svbfloat16_t test_svcvt2_bf16_mf8(svmfloat8_t zn, fpm_t fpm) STREAMING { + return SVE_ACLE_FUNC(svcvt2_bf16,_mf8,_fpm)(zn, fpm); +} + +// CHECK-LABEL: define dso_local @test_svcvtlt1_bf16_mf8( +// CHECK-SAME: [[ZN:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.fp8.cvtlt1.nxv8bf16( [[ZN]]) +// CHECK-NEXT: ret [[TMP0]] +// +// CHECK-CXX-LABEL: define dso_local @_Z22test_svcvtlt1_bf16_mf8u13__SVMfloat8_tm( +// CHECK-CXX-SAME: [[ZN:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] { +// CHECK-CXX-NEXT: [[ENTRY:.*:]] +// CHECK-CXX-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) +// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.fp8.cvtlt1.nxv8bf16( [[ZN]]) +// CHECK-CXX-NEXT: ret [[TMP0]] +// +svbfloat16_t test_svcvtlt1_bf16_mf8(svmfloat8_t zn, fpm_t fpm) STREAMING { + return SVE_ACLE_FUNC(svcvtlt1_bf16,_mf8,_fpm)(zn, fpm); +} + +// CHECK-LABEL: define dso_local @test_svcvtlt2_bf16_mf8( +// CHECK-SAME: [[ZN:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.fp8.cvtlt2.nxv8bf16( [[ZN]]) +// CHECK-NEXT: ret [[TMP0]] +// +// CHECK-CXX-LABEL: define dso_local @_Z22test_svcvtlt2_bf16_mf8u13__SVMfloat8_tm( +// CHECK-CXX-SAME: [[ZN:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] { +// CHECK-CXX-NEXT: [[ENTRY:.*:]] +// CHECK-CXX-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) +// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.fp8.cvtlt2.nxv8bf16( [[ZN]]) +// CHECK-CXX-NEXT: ret [[TMP0]] +// +svbfloat16_t test_svcvtlt2_bf16_mf8(svmfloat8_t zn, fpm_t fpm) STREAMING { + return SVE_ACLE_FUNC(svcvtlt2_bf16,_mf8,_fpm)(zn, fpm); +} + +// CHECK-LABEL: define dso_local @test_svcvt1_f16_mf8( +// CHECK-SAME: [[ZN:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.fp8.cvt1.nxv8f16( [[ZN]]) +// CHECK-NEXT: ret [[TMP0]] +// +// CHECK-CXX-LABEL: define dso_local @_Z19test_svcvt1_f16_mf8u13__SVMfloat8_tm( +// CHECK-CXX-SAME: [[ZN:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] { +// CHECK-CXX-NEXT: [[ENTRY:.*:]] +// CHECK-CXX-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) +// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.fp8.cvt1.nxv8f16( [[ZN]]) +// CHECK-CXX-NEXT: ret [[TMP0]] +// +svfloat16_t test_svcvt1_f16_mf8(svmfloat8_t zn, fpm_t fpm) STREAMING { + return SVE_ACLE_FUNC(svcvt1_f16,_mf8,_fpm)(zn, fpm); +} + +// CHECK-LABEL: define dso_local @test_svcvt2_f16_mf8( +// CHECK-SAME: [[ZN:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.fp8.cvt2.nxv8f16( [[ZN]]) +// CHECK-NEXT: ret [[TMP0]] +// +// CHECK-CXX-LABEL: define dso_local @_Z19test_svcvt2_f16_mf8u13__SVMfloat8_tm( +// CHECK-CXX-SAME: [[ZN:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] { +// CHECK-CXX-NEXT: [[ENTRY:.*:]] +// CHECK-CXX-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) +// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.fp8.cvt2.nxv8f16( [[ZN]]) +// CHECK-CXX-NEXT: ret [[TMP0]] +// +svfloat16_t test_svcvt2_f16_mf8(svmfloat8_t zn, fpm_t fpm) STREAMING { + return SVE_ACLE_FUNC(svcvt2_f16,_mf8,_fpm)(zn, fpm); +} + +// CHECK-LABEL: define dso_local @test_svcvtlt1_f16_mf8( +// CHECK-SAME: [[ZN:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.fp8.cvtlt1.nxv8f16( [[ZN]]) +// CHECK-NEXT: ret [[TMP0]] +// +// CHECK-CXX-LABEL: define dso_local @_Z21test_svcvtlt1_f16_mf8u13__SVMfloat8_tm( +// CHECK-CXX-SAME: [[ZN:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] { +// CHECK-CXX-NEXT: [[ENTRY:.*:]] +// CHECK-CXX-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) +// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.fp8.cvtlt1.nxv8f16( [[ZN]]) +// CHECK-CXX-NEXT: ret [[TMP0]] +// +svfloat16_t test_svcvtlt1_f16_mf8(svmfloat8_t zn, fpm_t fpm) STREAMING { + return SVE_ACLE_FUNC(svcvtlt1_f16,_mf8,_fpm)(zn, fpm); +} + +// CHECK-LABEL: define dso_local @test_svcvtlt2_f16_mf8( +// CHECK-SAME: [[ZN:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.fp8.cvtlt2.nxv8f16( [[ZN]]) +// CHECK-NEXT: ret [[TMP0]] +// +// CHECK-CXX-LABEL: define dso_local @_Z21test_svcvtlt2_f16_mf8u13__SVMfloat8_tm( +// CHECK-CXX-SAME: [[ZN:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] { +// CHECK-CXX-NEXT: [[ENTRY:.*:]] +// CHECK-CXX-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) +// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.fp8.cvtlt2.nxv8f16( [[ZN]]) +// CHECK-CXX-NEXT: ret [[TMP0]] +// +svfloat16_t test_svcvtlt2_f16_mf8(svmfloat8_t zn, fpm_t fpm) STREAMING { + return SVE_ACLE_FUNC(svcvtlt2_f16,_mf8,_fpm)(zn, fpm); +} diff --git a/clang/test/Sema/aarch64-sve2-intrinsics/acle_sve2_fp8.c b/clang/test/Sema/aarch64-sve2-intrinsics/acle_sve2_fp8.c new file mode 100644 index 00000000000000..aafd42f798d935 --- /dev/null +++ b/clang/test/Sema/aarch64-sve2-intrinsics/acle_sve2_fp8.c @@ -0,0 +1,24 @@ +// REQUIRES: aarch64-registered-target + +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -verify -emit-llvm %s + +#include + +void test_features(svmfloat8_t zn, fpm_t fpm) { + svcvt1_bf16_mf8_fpm(zn, fpm); + // expected-error@-1 {{'svcvt1_bf16_mf8_fpm' needs target feature (sve,sve2,fp8)|(sme,sme2,fp8)}} + svcvt2_bf16_mf8_fpm(zn, fpm); + // expected-error@-1 {{'svcvt2_bf16_mf8_fpm' needs target feature (sve,sve2,fp8)|(sme,sme2,fp8)}} + svcvtlt1_bf16_mf8_fpm(zn, fpm); + // expected-error@-1 {{'svcvtlt1_bf16_mf8_fpm' needs target feature (sve,sve2,fp8)|(sme,sme2,fp8)}} + svcvtlt2_bf16_mf8_fpm(zn, fpm); + // expected-error@-1 {{'svcvtlt2_bf16_mf8_fpm' needs target feature (sve,sve2,fp8)|(sme,sme2,fp8)}} + svcvt1_f16_mf8_fpm(zn, fpm); + // expected-error@-1 {{'svcvt1_f16_mf8_fpm' needs target feature (sve,sve2,fp8)|(sme,sme2,fp8)}} + svcvt2_f16_mf8_fpm(zn, fpm); + // expected-error@-1 {{'svcvt2_f16_mf8_fpm' needs target feature (sve,sve2,fp8)|(sme,sme2,fp8)}} + svcvtlt1_f16_mf8_fpm(zn, fpm); + // expected-error@-1 {{'svcvtlt1_f16_mf8_fpm' needs target feature (sve,sve2,fp8)|(sme,sme2,fp8)}} + svcvtlt2_f16_mf8_fpm(zn, fpm); + // expected-error@-1 {{'svcvtlt2_f16_mf8_fpm' needs target feature (sve,sve2,fp8)|(sme,sme2,fp8)}} +} diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td index 54a88bac2fb4be..8d30b9bd49ff98 100644 --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -3860,6 +3860,17 @@ def int_aarch64_neon_famin : AdvSIMD_2VectorArg_Intrinsic; // let TargetPrefix = "aarch64" in { + // SVE Widening Conversions + class SVE2_FP8_Cvt + : DefaultAttrsIntrinsic<[llvm_anyvector_ty], + [llvm_nxv16i8_ty], + [IntrReadMem, IntrInaccessibleMemOnly]>; + + def int_aarch64_sve_fp8_cvt1 : SVE2_FP8_Cvt; + def int_aarch64_sve_fp8_cvt2 : SVE2_FP8_Cvt; + def int_aarch64_sve_fp8_cvtlt1 : SVE2_FP8_Cvt; + def int_aarch64_sve_fp8_cvtlt2 : SVE2_FP8_Cvt; + class SME2_FP8_CVT_X2_Single_Intrinsic : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>], [llvm_nxv16i8_ty], @@ -3886,4 +3897,4 @@ let TargetPrefix = "aarch64" in { // FP8 outer product def int_aarch64_sme_fp8_fmopa_za16 : SME_FP8_OuterProduct_Intrinsic; def int_aarch64_sme_fp8_fmopa_za32 : SME_FP8_OuterProduct_Intrinsic; -} \ No newline at end of file +} diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td index 574432869471ad..da585dd3a21c88 100644 --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -4369,14 +4369,14 @@ let Predicates = [HasNonStreamingSVE2p2orSME2p2] in { //===----------------------------------------------------------------------===// let Predicates = [HasSVE2orSME2, HasFP8] in { // FP8 upconvert -defm F1CVT_ZZ : sve2_fp8_cvt_single<0b0, 0b00, "f1cvt">; -defm F2CVT_ZZ : sve2_fp8_cvt_single<0b0, 0b01, "f2cvt">; -defm BF1CVT_ZZ : sve2_fp8_cvt_single<0b0, 0b10, "bf1cvt">; -defm BF2CVT_ZZ : sve2_fp8_cvt_single<0b0, 0b11, "bf2cvt">; -defm F1CVTLT_ZZ : sve2_fp8_cvt_single<0b1, 0b00, "f1cvtlt">; -defm F2CVTLT_ZZ : sve2_fp8_cvt_single<0b1, 0b01, "f2cvtlt">; -defm BF1CVTLT_ZZ : sve2_fp8_cvt_single<0b1, 0b10, "bf1cvtlt">; -defm BF2CVTLT_ZZ : sve2_fp8_cvt_single<0b1, 0b11, "bf2cvtlt">; +defm F1CVT_ZZ : sve2_fp8_cvt_single<0b0, 0b00, "f1cvt", nxv8f16, int_aarch64_sve_fp8_cvt1>; +defm F2CVT_ZZ : sve2_fp8_cvt_single<0b0, 0b01, "f2cvt", nxv8f16, int_aarch64_sve_fp8_cvt2>; +defm BF1CVT_ZZ : sve2_fp8_cvt_single<0b0, 0b10, "bf1cvt", nxv8bf16, int_aarch64_sve_fp8_cvt1>; +defm BF2CVT_ZZ : sve2_fp8_cvt_single<0b0, 0b11, "bf2cvt", nxv8bf16, int_aarch64_sve_fp8_cvt2>; +defm F1CVTLT_ZZ : sve2_fp8_cvt_single<0b1, 0b00, "f1cvtlt", nxv8f16, int_aarch64_sve_fp8_cvtlt1>; +defm F2CVTLT_ZZ : sve2_fp8_cvt_single<0b1, 0b01, "f2cvtlt", nxv8f16, int_aarch64_sve_fp8_cvtlt2>; +defm BF1CVTLT_ZZ : sve2_fp8_cvt_single<0b1, 0b10, "bf1cvtlt", nxv8bf16, int_aarch64_sve_fp8_cvtlt1>; +defm BF2CVTLT_ZZ : sve2_fp8_cvt_single<0b1, 0b11, "bf2cvtlt", nxv8bf16, int_aarch64_sve_fp8_cvtlt2>; // FP8 downconvert defm FCVTN_Z2Z_HtoB : sve2_fp8_down_cvt_single<0b00, "fcvtn", ZZ_h_mul_r>; diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td index e9595ff89ddb8b..9ae66518dfb4ed 100644 --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -10769,10 +10769,15 @@ class sve2_fp8_cvt_single opc, string mnemonic, let Inst{9-5} = Zn; let Inst{4-0} = Zd; let Uses = [FPMR, FPCR]; + + let mayLoad = 1; + let mayStore = 0; } -multiclass sve2_fp8_cvt_single opc, string mnemonic> { +multiclass sve2_fp8_cvt_single opc, string mnemonic, ValueType vtd, SDPatternOperator op> { def _BtoH : sve2_fp8_cvt_single; + + def : SVE_1_Op_Pat(NAME # _BtoH)>; } // FP8 downconvert diff --git a/llvm/test/CodeGen/AArch64/fp8-sve-cvt-cvtlt.ll b/llvm/test/CodeGen/AArch64/fp8-sve-cvt-cvtlt.ll new file mode 100644 index 00000000000000..a1093c28467ab3 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/fp8-sve-cvt-cvtlt.ll @@ -0,0 +1,78 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mattr=+sve2,+fp8 < %s | FileCheck %s +; RUN: llc -mattr=+sme2,+fp8 --force-streaming < %s | FileCheck %s + +target triple = "aarch64-linux" + +define @cvt1_bf16( %s) { +; CHECK-LABEL: cvt1_bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: bf1cvt z0.h, z0.b +; CHECK-NEXT: ret + %r = call @llvm.aarch64.sve.fp8.cvt1.nxv8bf16( %s) + ret %r +} + +define @cvt2_bf16( %s) { +; CHECK-LABEL: cvt2_bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: bf2cvt z0.h, z0.b +; CHECK-NEXT: ret + %r = call @llvm.aarch64.sve.fp8.cvt2.nxv8bf16( %s) + ret %r +} + +define @cvtlt1_bf16( %s) { +; CHECK-LABEL: cvtlt1_bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: bf1cvtlt z0.h, z0.b +; CHECK-NEXT: ret + %r = call @llvm.aarch64.sve.fp8.cvtlt1.nxv8bf16( %s) + ret %r +} + +define @cvtlt2_bf16( %s) { +; CHECK-LABEL: cvtlt2_bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: bf2cvtlt z0.h, z0.b +; CHECK-NEXT: ret + %r = call @llvm.aarch64.sve.fp8.cvtlt2.nxv8bf16( %s) + ret %r +} + +define @cvt1_f16( %s) { +; CHECK-LABEL: cvt1_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: f1cvt z0.h, z0.b +; CHECK-NEXT: ret + %r = call @llvm.aarch64.sve.fp8.cvt1.nxv8f16( %s) + ret %r +} + +define @cvt2_f16( %s) { +; CHECK-LABEL: cvt2_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: f2cvt z0.h, z0.b +; CHECK-NEXT: ret + %r = call @llvm.aarch64.sve.fp8.cvt2.nxv8f16( %s) + ret %r +} + + +define @cvtlt1_f16( %s) { +; CHECK-LABEL: cvtlt1_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: f1cvtlt z0.h, z0.b +; CHECK-NEXT: ret + %r = call @llvm.aarch64.sve.fp8.cvtlt1.nxv8f16( %s) + ret %r +} + +define @cvtlt2_f16( %s) { +; CHECK-LABEL: cvtlt2_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: f2cvtlt z0.h, z0.b +; CHECK-NEXT: ret + %r = call @llvm.aarch64.sve.fp8.cvtlt2.nxv8f16( %s) + ret %r +}