-
Notifications
You must be signed in to change notification settings - Fork 15.5k
[llvm][RISCV] Support Zvfofp8min llvm intrinsics and codegen #172585
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
This is follow up patch for llvm#157014 to support llvm intrinsics and codegen.
|
@llvm/pr-subscribers-backend-risc-v Author: Brandon Wu (4vtomat) ChangesThis is follow up patch for #157014 Patch is 82.03 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/172585.diff 10 Files Affected:
diff --git a/llvm/include/llvm/IR/IntrinsicsRISCV.td b/llvm/include/llvm/IR/IntrinsicsRISCV.td
index 77fcc46ea5a89..9088e5e6a357b 100644
--- a/llvm/include/llvm/IR/IntrinsicsRISCV.td
+++ b/llvm/include/llvm/IR/IntrinsicsRISCV.td
@@ -1958,6 +1958,26 @@ let TargetPrefix = "riscv" in {
let TargetPrefix = "riscv" in
def int_riscv_pause : DefaultAttrsIntrinsic<[], [], [IntrNoMem, IntrHasSideEffects]>;
+
+//===----------------------------------------------------------------------===//
+// Zvfofp8min - OFP8 conversion extension
+// The Zvfofp8min extension provides basic support for the two 8-bit
+// floating-point formats defined in the Open Compute Project OFP8
+// specification, OFP8 E4M3 and OFP8 E5M2.
+let TargetPrefix = "riscv" in {
+ // OFP8 to BF16 conversion instructions
+ defm vfwcvt_f_f_v_alt : RISCVConversion;
+ // BF16 to OFP8 conversion instructions
+ defm vfncvt_sat_f_f_w : RISCVConversionRoundingMode;
+ defm vfncvt_f_f_w_alt : RISCVConversionRoundingMode;
+ defm vfncvt_sat_f_f_w_alt : RISCVConversionRoundingMode;
+ // FP32 to OFP8 conversion instructions
+ defm vfncvt_f_f_q : RISCVConversionRoundingMode;
+ defm vfncvt_f_f_q_alt : RISCVConversionRoundingMode;
+ defm vfncvt_sat_f_f_q : RISCVConversionRoundingMode;
+ defm vfncvt_sat_f_f_q_alt : RISCVConversionRoundingMode;
+} // TargetPrefix = "riscv"
+
// Vendor extensions
//===----------------------------------------------------------------------===//
include "llvm/IR/IntrinsicsRISCVXTHead.td"
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp
index 7b9c4b3e800cd..f2c5f6947aa00 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp
@@ -216,11 +216,13 @@ void RISCVInstPrinter::printVTypeI(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI, raw_ostream &O) {
unsigned Imm = MI->getOperand(OpNo).getImm();
// Print the raw immediate for reserved values: vlmul[2:0]=4, vsew[2:0]=0b1xx,
- // altfmt=1 without zvfbfa extension, or non-zero in bits 9 and above.
+ // altfmt=1 without zvfbfa or zvfofp8min extension, or non-zero in bits 9 and
+ // above.
if (RISCVVType::getVLMUL(Imm) == RISCVVType::VLMUL::LMUL_RESERVED ||
RISCVVType::getSEW(Imm) > 64 ||
(RISCVVType::isAltFmt(Imm) &&
!(STI.hasFeature(RISCV::FeatureStdExtZvfbfa) ||
+ STI.hasFeature(RISCV::FeatureStdExtZvfofp8min) ||
STI.hasFeature(RISCV::FeatureVendorXSfvfbfexp16e))) ||
(Imm >> 9) != 0) {
O << formatImm(Imm);
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
index e07d7b5ee5563..a32f6a566493f 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
@@ -5851,8 +5851,9 @@ multiclass VPatConversionWF_VI<string intrinsic, string instruction,
}
multiclass VPatConversionWF_VF<string intrinsic, string instruction,
+ list<VTypeInfoToWide> wlist = AllWidenableFloatVectors,
bit isSEWAware = 0> {
- foreach fvtiToFWti = AllWidenableFloatVectors in {
+ foreach fvtiToFWti = wlist in {
defvar fvti = fvtiToFWti.Vti;
defvar fwti = fvtiToFWti.Wti;
// Define vfwcvt.f.f.v for f16 when Zvfhmin is enable.
@@ -7177,6 +7178,7 @@ defm : VPatConversionWF_VI<"int_riscv_vfwcvt_f_xu_v", "PseudoVFWCVT_F_XU",
defm : VPatConversionWF_VI<"int_riscv_vfwcvt_f_x_v", "PseudoVFWCVT_F_X",
isSEWAware=1>;
defm : VPatConversionWF_VF<"int_riscv_vfwcvt_f_f_v", "PseudoVFWCVT_F_F",
+ wlist=AllWidenableFloatVectors,
isSEWAware=1>;
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZvfofp8min.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZvfofp8min.td
index 86cab697cbf55..b067488ea662f 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoZvfofp8min.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZvfofp8min.td
@@ -24,3 +24,130 @@ let Predicates = [HasStdExtZvfofp8min], Constraints = "@earlyclobber $vd",
defm VFNCVT_F_F_Q : VNCVTF_FV_VS2<"vfncvt.f.f.q", 0b010010, 0b11001>;
defm VFNCVT_SAT_F_F_Q : VNCVTF_FV_VS2<"vfncvt.sat.f.f.q", 0b010010, 0b11011>;
}
+
+//===----------------------------------------------------------------------===//
+// Pseudo instructions
+//===----------------------------------------------------------------------===//
+defvar MxListQ = [V_MF8, V_MF4, V_MF2, V_M1, V_M2];
+
+defset list<VTypeInfoToWide> AllWidenableIntToBFloatVectors = {
+ def : VTypeInfoToWide<VI8MF8, VBF16MF4>;
+ def : VTypeInfoToWide<VI8MF4, VBF16MF2>;
+ def : VTypeInfoToWide<VI8MF2, VBF16M1>;
+ def : VTypeInfoToWide<VI8M1, VBF16M2>;
+ def : VTypeInfoToWide<VI8M2, VBF16M4>;
+ def : VTypeInfoToWide<VI8M4, VBF16M8>;
+}
+
+defset list<VTypeInfoToWide> AllWidenableInt8ToFloat32Vectors = {
+ def : VTypeInfoToWide<VI8MF8, VF32MF2>;
+ def : VTypeInfoToWide<VI8MF4, VF32M1>;
+ def : VTypeInfoToWide<VI8MF2, VF32M2>;
+ def : VTypeInfoToWide<VI8M1, VF32M4>;
+ def : VTypeInfoToWide<VI8M2, VF32M8>;
+}
+
+class QVRClass<LMULInfo m> {
+ LMULInfo c = !cond(!eq(m, V_MF8): V_MF2,
+ !eq(m, V_MF4): V_M1,
+ !eq(m, V_MF2): V_M2,
+ !eq(m, V_M1): V_M4,
+ !eq(m, V_M2): V_M8);
+}
+
+multiclass VPseudoVWCVTD_V_NoSched_Zvfofp8min {
+ defvar constraint = "@earlyclobber $rd";
+ foreach m = MxListW in {
+ defm _V : VPseudoConversion<m.wvrclass, m.vrclass, m, constraint, sew=8,
+ TargetConstraintType=3>;
+ }
+}
+
+multiclass VPseudoVNCVTD_W_RM_NoSched_Zvfofp8min {
+ defvar constraint = "@earlyclobber $rd";
+ foreach m = MxListW in {
+ defm _W : VPseudoConversionRoundingMode<m.vrclass, m.wvrclass, m,
+ constraint, sew=8,
+ TargetConstraintType=2>;
+ }
+}
+
+multiclass VPseudoVNCVTD_Q_RM_NoSched_Zvfofp8min {
+ defvar constraint = "@earlyclobber $rd";
+ foreach m = MxListQ in {
+ defm _Q : VPseudoConversionRoundingMode<m.vrclass, QVRClass<m>.c.vrclass, m,
+ constraint, sew=8,
+ TargetConstraintType=2>;
+ }
+}
+
+let mayRaiseFPException = true, Predicates = [HasStdExtZvfofp8min] in {
+ defm PseudoVFWCVTBF16_F_F : VPseudoVWCVTD_V_NoSched_Zvfofp8min;
+ let AltFmtType = IS_ALTFMT in
+ defm PseudoVFWCVTBF16_F_F_ALT : VPseudoVWCVTD_V_NoSched_Zvfofp8min;
+
+ defm PseudoVFNCVTBF16_F_F : VPseudoVNCVTD_W_RM_NoSched_Zvfofp8min;
+ defm PseudoVFNCVTBF16_SAT_F_F : VPseudoVNCVTD_W_RM_NoSched_Zvfofp8min;
+ defm PseudoVFNCVT_F_F : VPseudoVNCVTD_Q_RM_NoSched_Zvfofp8min;
+ defm PseudoVFNCVT_SAT_F_F : VPseudoVNCVTD_Q_RM_NoSched_Zvfofp8min;
+ let AltFmtType = IS_ALTFMT in {
+ defm PseudoVFNCVTBF16_F_F_ALT : VPseudoVNCVTD_W_RM_NoSched_Zvfofp8min;
+ defm PseudoVFNCVTBF16_SAT_F_F_ALT : VPseudoVNCVTD_W_RM_NoSched_Zvfofp8min;
+ defm PseudoVFNCVT_F_F_ALT : VPseudoVNCVTD_Q_RM_NoSched_Zvfofp8min;
+ defm PseudoVFNCVT_SAT_F_F_ALT : VPseudoVNCVTD_Q_RM_NoSched_Zvfofp8min;
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Patterns
+//===----------------------------------------------------------------------===//
+multiclass VPatConversionQF_RM<string intrinsic, string instruction,
+ bit isSEWAware = 0> {
+ foreach fvtiToFWti = AllWidenableInt8ToFloat32Vectors in {
+ defvar fvti = fvtiToFWti.Vti;
+ defvar fwti = fvtiToFWti.Wti;
+ let Predicates = [HasStdExtZvfofp8min] in
+ defm : VPatConversionRoundingMode<intrinsic, instruction, "Q",
+ fvti.Vector, fwti.Vector, fvti.Mask, fvti.Log2SEW,
+ fvti.LMul, fvti.RegClass, fwti.RegClass,
+ isSEWAware>;
+ }
+}
+
+let Predicates = [HasStdExtZvfofp8min] in {
+ // OFP8 to BF16 conversion instructions
+ defm : VPatConversionWF_VF<"int_riscv_vfwcvt_f_f_v",
+ "PseudoVFWCVTBF16_F_F",
+ wlist=AllWidenableIntToBFloatVectors,
+ isSEWAware=1>;
+ defm : VPatConversionWF_VF<"int_riscv_vfwcvt_f_f_v_alt",
+ "PseudoVFWCVTBF16_F_F_ALT",
+ wlist=AllWidenableIntToBFloatVectors,
+ isSEWAware=1>;
+ // BF16 to OFP8 conversion instructions
+ defm : VPatConversionVF_WF_RM<"int_riscv_vfncvt_f_f_w",
+ "PseudoVFNCVTBF16_F_F",
+ wlist=AllWidenableIntToBFloatVectors,
+ isSEWAware=1>;
+ defm : VPatConversionVF_WF_RM<"int_riscv_vfncvt_sat_f_f_w",
+ "PseudoVFNCVTBF16_SAT_F_F",
+ wlist=AllWidenableIntToBFloatVectors,
+ isSEWAware=1>;
+ defm : VPatConversionVF_WF_RM<"int_riscv_vfncvt_f_f_w_alt",
+ "PseudoVFNCVTBF16_F_F_ALT",
+ wlist=AllWidenableIntToBFloatVectors,
+ isSEWAware=1>;
+ defm : VPatConversionVF_WF_RM<"int_riscv_vfncvt_sat_f_f_w_alt",
+ "PseudoVFNCVTBF16_SAT_F_F_ALT",
+ wlist=AllWidenableIntToBFloatVectors,
+ isSEWAware=1>;
+ // FP32 to OFP8 conversion instructions
+ defm : VPatConversionQF_RM<"int_riscv_vfncvt_f_f_q",
+ "PseudoVFNCVT_F_F", isSEWAware=1>;
+ defm : VPatConversionQF_RM<"int_riscv_vfncvt_sat_f_f_q",
+ "PseudoVFNCVT_SAT_F_F", isSEWAware=1>;
+ defm : VPatConversionQF_RM<"int_riscv_vfncvt_f_f_q_alt",
+ "PseudoVFNCVT_F_F_ALT", isSEWAware=1>;
+ defm : VPatConversionQF_RM<"int_riscv_vfncvt_sat_f_f_q_alt",
+ "PseudoVFNCVT_SAT_F_F_ALT", isSEWAware=1>;
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfncvt-f-f-alt.ll b/llvm/test/CodeGen/RISCV/rvv/vfncvt-f-f-alt.ll
new file mode 100644
index 0000000000000..feafd4ba2d01a
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vfncvt-f-f-alt.ll
@@ -0,0 +1,357 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zvfbfmin,+experimental-zvfofp8min \
+; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zvfbfmin,+experimental-zvfofp8min \
+; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s
+
+define <vscale x 1 x i8> @intrinsic_vfncvt_f.f.w.alt_nxv1i8_nxv1bf16(<vscale x 1 x bfloat> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfncvt_f.f.w.alt_nxv1i8_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8alt, mf8, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v9, v8
+; CHECK-NEXT: vmv1r.v v8, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x i8> @llvm.riscv.vfncvt.f.f.w.alt.nxv1i8.nxv1bf16(
+ <vscale x 1 x i8> undef,
+ <vscale x 1 x bfloat> %0,
+ iXLen 7, iXLen %1)
+
+ ret <vscale x 1 x i8> %a
+}
+
+define <vscale x 1 x i8> @intrinsic_vfncvt_mask_f.f.w.alt_nxv1i8_nxv1bf16(<vscale x 1 x i8> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfncvt_mask_f.f.w.alt_nxv1i8_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8alt, mf8, ta, mu
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x i8> @llvm.riscv.vfncvt.f.f.w.alt.mask.nxv1i8.nxv1bf16(
+ <vscale x 1 x i8> %0,
+ <vscale x 1 x bfloat> %1,
+ <vscale x 1 x i1> %2,
+ iXLen 7, iXLen %3, iXLen 1)
+
+ ret <vscale x 1 x i8> %a
+}
+
+define <vscale x 2 x i8> @intrinsic_vfncvt_f.f.w.alt_nxv2i8_nxv2bf16(<vscale x 2 x bfloat> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfncvt_f.f.w.alt_nxv2i8_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8alt, mf4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v9, v8
+; CHECK-NEXT: vmv1r.v v8, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x i8> @llvm.riscv.vfncvt.f.f.w.alt.nxv2i8.nxv2bf16(
+ <vscale x 2 x i8> undef,
+ <vscale x 2 x bfloat> %0,
+ iXLen 7, iXLen %1)
+
+ ret <vscale x 2 x i8> %a
+}
+
+define <vscale x 2 x i8> @intrinsic_vfncvt_mask_f.f.w.alt_nxv2i8_nxv2bf16(<vscale x 2 x i8> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfncvt_mask_f.f.w.alt_nxv2i8_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8alt, mf4, ta, mu
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x i8> @llvm.riscv.vfncvt.f.f.w.alt.mask.nxv2i8.nxv2bf16(
+ <vscale x 2 x i8> %0,
+ <vscale x 2 x bfloat> %1,
+ <vscale x 2 x i1> %2,
+ iXLen 7, iXLen %3, iXLen 1)
+
+ ret <vscale x 2 x i8> %a
+}
+
+define <vscale x 4 x i8> @intrinsic_vfncvt_f.f.w.alt_nxv4i8_nxv4bf16(<vscale x 4 x bfloat> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfncvt_f.f.w.alt_nxv4i8_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8alt, mf2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v9, v8
+; CHECK-NEXT: vmv1r.v v8, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x i8> @llvm.riscv.vfncvt.f.f.w.alt.nxv4i8.nxv4bf16(
+ <vscale x 4 x i8> undef,
+ <vscale x 4 x bfloat> %0,
+ iXLen 7, iXLen %1)
+
+ ret <vscale x 4 x i8> %a
+}
+
+define <vscale x 4 x i8> @intrinsic_vfncvt_mask_f.f.w.alt_nxv4i8_nxv4bf16(<vscale x 4 x i8> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfncvt_mask_f.f.w.alt_nxv4i8_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8alt, mf2, ta, mu
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x i8> @llvm.riscv.vfncvt.f.f.w.alt.mask.nxv4i8.nxv4bf16(
+ <vscale x 4 x i8> %0,
+ <vscale x 4 x bfloat> %1,
+ <vscale x 4 x i1> %2,
+ iXLen 7, iXLen %3, iXLen 1)
+
+ ret <vscale x 4 x i8> %a
+}
+
+define <vscale x 8 x i8> @intrinsic_vfncvt_f.f.w.alt_nxv8i8_nxv8bf16(<vscale x 8 x bfloat> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfncvt_f.f.w.alt_nxv8i8_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8alt, m1, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v10, v8
+; CHECK-NEXT: vmv.v.v v8, v10
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x i8> @llvm.riscv.vfncvt.f.f.w.alt.nxv8i8.nxv8bf16(
+ <vscale x 8 x i8> undef,
+ <vscale x 8 x bfloat> %0,
+ iXLen 7, iXLen %1)
+
+ ret <vscale x 8 x i8> %a
+}
+
+define <vscale x 8 x i8> @intrinsic_vfncvt_mask_f.f.w.alt_nxv8i8_nxv8bf16(<vscale x 8 x i8> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfncvt_mask_f.f.w.alt_nxv8i8_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8alt, m1, ta, mu
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x i8> @llvm.riscv.vfncvt.f.f.w.alt.mask.nxv8i8.nxv8bf16(
+ <vscale x 8 x i8> %0,
+ <vscale x 8 x bfloat> %1,
+ <vscale x 8 x i1> %2,
+ iXLen 7, iXLen %3, iXLen 1)
+
+ ret <vscale x 8 x i8> %a
+}
+
+define <vscale x 16 x i8> @intrinsic_vfncvt_f.f.w.alt_nxv16i8_nxv16bf16(<vscale x 16 x bfloat> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfncvt_f.f.w.alt_nxv16i8_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8alt, m2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v12, v8
+; CHECK-NEXT: vmv.v.v v8, v12
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x i8> @llvm.riscv.vfncvt.f.f.w.alt.nxv16i8.nxv16bf16(
+ <vscale x 16 x i8> undef,
+ <vscale x 16 x bfloat> %0,
+ iXLen 7, iXLen %1)
+
+ ret <vscale x 16 x i8> %a
+}
+
+define <vscale x 16 x i8> @intrinsic_vfncvt_mask_f.f.w.alt_nxv16i8_nxv16bf16(<vscale x 16 x i8> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfncvt_mask_f.f.w.alt_nxv16i8_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8alt, m2, ta, mu
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x i8> @llvm.riscv.vfncvt.f.f.w.alt.mask.nxv16i8.nxv16bf16(
+ <vscale x 16 x i8> %0,
+ <vscale x 16 x bfloat> %1,
+ <vscale x 16 x i1> %2,
+ iXLen 7, iXLen %3, iXLen 1)
+
+ ret <vscale x 16 x i8> %a
+}
+
+define <vscale x 32 x i8> @intrinsic_vfncvt_f.f.w.alt_nxv32i8_nxv32bf16(<vscale x 32 x bfloat> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfncvt_f.f.w.alt_nxv32i8_nxv32bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8alt, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v16, v8
+; CHECK-NEXT: vmv.v.v v8, v16
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 32 x i8> @llvm.riscv.vfncvt.f.f.w.alt.nxv32i8.nxv32bf16(
+ <vscale x 32 x i8> undef,
+ <vscale x 32 x bfloat> %0,
+ iXLen 7, iXLen %1)
+
+ ret <vscale x 32 x i8> %a
+}
+
+define <vscale x 32 x i8> @intrinsic_vfncvt_mask_f.f.w.alt_nxv32i8_nxv32bf16(<vscale x 32 x i8> %0, <vscale x 32 x bfloat> %1, <vscale x 32 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfncvt_mask_f.f.w.alt_nxv32i8_nxv32bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8alt, m4, ta, mu
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 32 x i8> @llvm.riscv.vfncvt.f.f.w.alt.mask.nxv32i8.nxv32bf16(
+ <vscale x 32 x i8> %0,
+ <vscale x 32 x bfloat> %1,
+ <vscale x 32 x i1> %2,
+ iXLen 7, iXLen %3, iXLen 1)
+
+ ret <vscale x 32 x i8> %a
+}
+
+define <vscale x 1 x i8> @intrinsic_vfncvt_f.f.q.alt_nxv1i8_nxv1f32(<vscale x 1 x float> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfncvt_f.f.q.alt_nxv1i8_nxv1f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8alt, mf8, ta, ma
+; CHECK-NEXT: vfncvt.f.f.q v9, v8
+; CHECK-NEXT: vmv1r.v v8, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x i8> @llvm.riscv.vfncvt.f.f.q.alt.nxv1i8.nxv1f32(
+ <vscale x 1 x i8> undef,
+ <vscale x 1 x float> %0,
+ iXLen 7, iXLen %1)
+
+ ret <vscale x 1 x i8> %a
+}
+
+define <vscale x 1 x i8> @intrinsic_vfncvt_mask_f.f.q.alt_nxv1i8_nxv1f32(<vscale x 1 x i8> %0, <vscale x 1 x float> %1, <vscale x 1 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfncvt_mask_f.f.q.alt_nxv1i8_nxv1f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8alt, mf8, ta, mu
+; CHECK-NEXT: vfncvt.f.f.q v8, v9, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x i8> @llvm.riscv.vfncvt.f.f.q.alt.mask.nxv1i8.nxv1f32(
+ <vscale x 1 x i8> %0,
+ <vscale x 1 x float> %1,
+ <vscale x 1 x i1> %2,
+ iXLen 7, iXLen %3, iXLen 1)
+
+ ret <vscale x 1 x i8> %a
+}
+
+define <vscale x 2 x i8> @intrinsic_vfncvt_f.f.q.alt_nxv2i8_nxv2f32(<vscale x 2 x float> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfncvt_f.f.q.alt_nxv2i8_nxv2f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8alt, mf4, ta, ma
+; CHECK-NEXT: vfncvt.f.f.q v9, v8
+; CHECK-NEXT: vmv1r.v v8, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x i8> @llvm.riscv.vfncvt.f.f.q.alt.nxv2i8.nxv2f32(
+ <vscale x 2 x i8> undef,
+ <vscale x 2 x float> %0,
+ iXLen 7, iXLen %1)
+
+ ret <vscale x 2 x i8> %a
+}
+
+define <vscale x 2 x i8> @intrinsic_vfncvt_mask_f.f.q.alt_nxv2i8_nxv2f32(<vscale x 2 x i8> %0, <vscale x 2 x float> %1, <vscale x 2 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfncvt_mask_f.f.q.alt_nxv2i8_nxv2f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8alt, mf4, ta, mu
+; CHECK-NEXT: vfncvt.f.f.q v8, v9, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x i8> @llvm.riscv.vfncvt.f.f.q.alt.mask.nxv2i8.nxv2f32(
+ <vscale x 2 x i8> %0,
+ <vscale x 2 x ...
[truncated]
|
|
@llvm/pr-subscribers-llvm-ir Author: Brandon Wu (4vtomat) ChangesThis is follow up patch for #157014 Patch is 82.03 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/172585.diff 10 Files Affected:
diff --git a/llvm/include/llvm/IR/IntrinsicsRISCV.td b/llvm/include/llvm/IR/IntrinsicsRISCV.td
index 77fcc46ea5a89..9088e5e6a357b 100644
--- a/llvm/include/llvm/IR/IntrinsicsRISCV.td
+++ b/llvm/include/llvm/IR/IntrinsicsRISCV.td
@@ -1958,6 +1958,26 @@ let TargetPrefix = "riscv" in {
let TargetPrefix = "riscv" in
def int_riscv_pause : DefaultAttrsIntrinsic<[], [], [IntrNoMem, IntrHasSideEffects]>;
+
+//===----------------------------------------------------------------------===//
+// Zvfofp8min - OFP8 conversion extension
+// The Zvfofp8min extension provides basic support for the two 8-bit
+// floating-point formats defined in the Open Compute Project OFP8
+// specification, OFP8 E4M3 and OFP8 E5M2.
+let TargetPrefix = "riscv" in {
+ // OFP8 to BF16 conversion instructions
+ defm vfwcvt_f_f_v_alt : RISCVConversion;
+ // BF16 to OFP8 conversion instructions
+ defm vfncvt_sat_f_f_w : RISCVConversionRoundingMode;
+ defm vfncvt_f_f_w_alt : RISCVConversionRoundingMode;
+ defm vfncvt_sat_f_f_w_alt : RISCVConversionRoundingMode;
+ // FP32 to OFP8 conversion instructions
+ defm vfncvt_f_f_q : RISCVConversionRoundingMode;
+ defm vfncvt_f_f_q_alt : RISCVConversionRoundingMode;
+ defm vfncvt_sat_f_f_q : RISCVConversionRoundingMode;
+ defm vfncvt_sat_f_f_q_alt : RISCVConversionRoundingMode;
+} // TargetPrefix = "riscv"
+
// Vendor extensions
//===----------------------------------------------------------------------===//
include "llvm/IR/IntrinsicsRISCVXTHead.td"
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp
index 7b9c4b3e800cd..f2c5f6947aa00 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp
@@ -216,11 +216,13 @@ void RISCVInstPrinter::printVTypeI(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI, raw_ostream &O) {
unsigned Imm = MI->getOperand(OpNo).getImm();
// Print the raw immediate for reserved values: vlmul[2:0]=4, vsew[2:0]=0b1xx,
- // altfmt=1 without zvfbfa extension, or non-zero in bits 9 and above.
+ // altfmt=1 without zvfbfa or zvfofp8min extension, or non-zero in bits 9 and
+ // above.
if (RISCVVType::getVLMUL(Imm) == RISCVVType::VLMUL::LMUL_RESERVED ||
RISCVVType::getSEW(Imm) > 64 ||
(RISCVVType::isAltFmt(Imm) &&
!(STI.hasFeature(RISCV::FeatureStdExtZvfbfa) ||
+ STI.hasFeature(RISCV::FeatureStdExtZvfofp8min) ||
STI.hasFeature(RISCV::FeatureVendorXSfvfbfexp16e))) ||
(Imm >> 9) != 0) {
O << formatImm(Imm);
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
index e07d7b5ee5563..a32f6a566493f 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
@@ -5851,8 +5851,9 @@ multiclass VPatConversionWF_VI<string intrinsic, string instruction,
}
multiclass VPatConversionWF_VF<string intrinsic, string instruction,
+ list<VTypeInfoToWide> wlist = AllWidenableFloatVectors,
bit isSEWAware = 0> {
- foreach fvtiToFWti = AllWidenableFloatVectors in {
+ foreach fvtiToFWti = wlist in {
defvar fvti = fvtiToFWti.Vti;
defvar fwti = fvtiToFWti.Wti;
// Define vfwcvt.f.f.v for f16 when Zvfhmin is enable.
@@ -7177,6 +7178,7 @@ defm : VPatConversionWF_VI<"int_riscv_vfwcvt_f_xu_v", "PseudoVFWCVT_F_XU",
defm : VPatConversionWF_VI<"int_riscv_vfwcvt_f_x_v", "PseudoVFWCVT_F_X",
isSEWAware=1>;
defm : VPatConversionWF_VF<"int_riscv_vfwcvt_f_f_v", "PseudoVFWCVT_F_F",
+ wlist=AllWidenableFloatVectors,
isSEWAware=1>;
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZvfofp8min.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZvfofp8min.td
index 86cab697cbf55..b067488ea662f 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoZvfofp8min.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZvfofp8min.td
@@ -24,3 +24,130 @@ let Predicates = [HasStdExtZvfofp8min], Constraints = "@earlyclobber $vd",
defm VFNCVT_F_F_Q : VNCVTF_FV_VS2<"vfncvt.f.f.q", 0b010010, 0b11001>;
defm VFNCVT_SAT_F_F_Q : VNCVTF_FV_VS2<"vfncvt.sat.f.f.q", 0b010010, 0b11011>;
}
+
+//===----------------------------------------------------------------------===//
+// Pseudo instructions
+//===----------------------------------------------------------------------===//
+defvar MxListQ = [V_MF8, V_MF4, V_MF2, V_M1, V_M2];
+
+defset list<VTypeInfoToWide> AllWidenableIntToBFloatVectors = {
+ def : VTypeInfoToWide<VI8MF8, VBF16MF4>;
+ def : VTypeInfoToWide<VI8MF4, VBF16MF2>;
+ def : VTypeInfoToWide<VI8MF2, VBF16M1>;
+ def : VTypeInfoToWide<VI8M1, VBF16M2>;
+ def : VTypeInfoToWide<VI8M2, VBF16M4>;
+ def : VTypeInfoToWide<VI8M4, VBF16M8>;
+}
+
+defset list<VTypeInfoToWide> AllWidenableInt8ToFloat32Vectors = {
+ def : VTypeInfoToWide<VI8MF8, VF32MF2>;
+ def : VTypeInfoToWide<VI8MF4, VF32M1>;
+ def : VTypeInfoToWide<VI8MF2, VF32M2>;
+ def : VTypeInfoToWide<VI8M1, VF32M4>;
+ def : VTypeInfoToWide<VI8M2, VF32M8>;
+}
+
+class QVRClass<LMULInfo m> {
+ LMULInfo c = !cond(!eq(m, V_MF8): V_MF2,
+ !eq(m, V_MF4): V_M1,
+ !eq(m, V_MF2): V_M2,
+ !eq(m, V_M1): V_M4,
+ !eq(m, V_M2): V_M8);
+}
+
+multiclass VPseudoVWCVTD_V_NoSched_Zvfofp8min {
+ defvar constraint = "@earlyclobber $rd";
+ foreach m = MxListW in {
+ defm _V : VPseudoConversion<m.wvrclass, m.vrclass, m, constraint, sew=8,
+ TargetConstraintType=3>;
+ }
+}
+
+multiclass VPseudoVNCVTD_W_RM_NoSched_Zvfofp8min {
+ defvar constraint = "@earlyclobber $rd";
+ foreach m = MxListW in {
+ defm _W : VPseudoConversionRoundingMode<m.vrclass, m.wvrclass, m,
+ constraint, sew=8,
+ TargetConstraintType=2>;
+ }
+}
+
+multiclass VPseudoVNCVTD_Q_RM_NoSched_Zvfofp8min {
+ defvar constraint = "@earlyclobber $rd";
+ foreach m = MxListQ in {
+ defm _Q : VPseudoConversionRoundingMode<m.vrclass, QVRClass<m>.c.vrclass, m,
+ constraint, sew=8,
+ TargetConstraintType=2>;
+ }
+}
+
+let mayRaiseFPException = true, Predicates = [HasStdExtZvfofp8min] in {
+ defm PseudoVFWCVTBF16_F_F : VPseudoVWCVTD_V_NoSched_Zvfofp8min;
+ let AltFmtType = IS_ALTFMT in
+ defm PseudoVFWCVTBF16_F_F_ALT : VPseudoVWCVTD_V_NoSched_Zvfofp8min;
+
+ defm PseudoVFNCVTBF16_F_F : VPseudoVNCVTD_W_RM_NoSched_Zvfofp8min;
+ defm PseudoVFNCVTBF16_SAT_F_F : VPseudoVNCVTD_W_RM_NoSched_Zvfofp8min;
+ defm PseudoVFNCVT_F_F : VPseudoVNCVTD_Q_RM_NoSched_Zvfofp8min;
+ defm PseudoVFNCVT_SAT_F_F : VPseudoVNCVTD_Q_RM_NoSched_Zvfofp8min;
+ let AltFmtType = IS_ALTFMT in {
+ defm PseudoVFNCVTBF16_F_F_ALT : VPseudoVNCVTD_W_RM_NoSched_Zvfofp8min;
+ defm PseudoVFNCVTBF16_SAT_F_F_ALT : VPseudoVNCVTD_W_RM_NoSched_Zvfofp8min;
+ defm PseudoVFNCVT_F_F_ALT : VPseudoVNCVTD_Q_RM_NoSched_Zvfofp8min;
+ defm PseudoVFNCVT_SAT_F_F_ALT : VPseudoVNCVTD_Q_RM_NoSched_Zvfofp8min;
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Patterns
+//===----------------------------------------------------------------------===//
+multiclass VPatConversionQF_RM<string intrinsic, string instruction,
+ bit isSEWAware = 0> {
+ foreach fvtiToFWti = AllWidenableInt8ToFloat32Vectors in {
+ defvar fvti = fvtiToFWti.Vti;
+ defvar fwti = fvtiToFWti.Wti;
+ let Predicates = [HasStdExtZvfofp8min] in
+ defm : VPatConversionRoundingMode<intrinsic, instruction, "Q",
+ fvti.Vector, fwti.Vector, fvti.Mask, fvti.Log2SEW,
+ fvti.LMul, fvti.RegClass, fwti.RegClass,
+ isSEWAware>;
+ }
+}
+
+let Predicates = [HasStdExtZvfofp8min] in {
+ // OFP8 to BF16 conversion instructions
+ defm : VPatConversionWF_VF<"int_riscv_vfwcvt_f_f_v",
+ "PseudoVFWCVTBF16_F_F",
+ wlist=AllWidenableIntToBFloatVectors,
+ isSEWAware=1>;
+ defm : VPatConversionWF_VF<"int_riscv_vfwcvt_f_f_v_alt",
+ "PseudoVFWCVTBF16_F_F_ALT",
+ wlist=AllWidenableIntToBFloatVectors,
+ isSEWAware=1>;
+ // BF16 to OFP8 conversion instructions
+ defm : VPatConversionVF_WF_RM<"int_riscv_vfncvt_f_f_w",
+ "PseudoVFNCVTBF16_F_F",
+ wlist=AllWidenableIntToBFloatVectors,
+ isSEWAware=1>;
+ defm : VPatConversionVF_WF_RM<"int_riscv_vfncvt_sat_f_f_w",
+ "PseudoVFNCVTBF16_SAT_F_F",
+ wlist=AllWidenableIntToBFloatVectors,
+ isSEWAware=1>;
+ defm : VPatConversionVF_WF_RM<"int_riscv_vfncvt_f_f_w_alt",
+ "PseudoVFNCVTBF16_F_F_ALT",
+ wlist=AllWidenableIntToBFloatVectors,
+ isSEWAware=1>;
+ defm : VPatConversionVF_WF_RM<"int_riscv_vfncvt_sat_f_f_w_alt",
+ "PseudoVFNCVTBF16_SAT_F_F_ALT",
+ wlist=AllWidenableIntToBFloatVectors,
+ isSEWAware=1>;
+ // FP32 to OFP8 conversion instructions
+ defm : VPatConversionQF_RM<"int_riscv_vfncvt_f_f_q",
+ "PseudoVFNCVT_F_F", isSEWAware=1>;
+ defm : VPatConversionQF_RM<"int_riscv_vfncvt_sat_f_f_q",
+ "PseudoVFNCVT_SAT_F_F", isSEWAware=1>;
+ defm : VPatConversionQF_RM<"int_riscv_vfncvt_f_f_q_alt",
+ "PseudoVFNCVT_F_F_ALT", isSEWAware=1>;
+ defm : VPatConversionQF_RM<"int_riscv_vfncvt_sat_f_f_q_alt",
+ "PseudoVFNCVT_SAT_F_F_ALT", isSEWAware=1>;
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfncvt-f-f-alt.ll b/llvm/test/CodeGen/RISCV/rvv/vfncvt-f-f-alt.ll
new file mode 100644
index 0000000000000..feafd4ba2d01a
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vfncvt-f-f-alt.ll
@@ -0,0 +1,357 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zvfbfmin,+experimental-zvfofp8min \
+; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zvfbfmin,+experimental-zvfofp8min \
+; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s
+
+define <vscale x 1 x i8> @intrinsic_vfncvt_f.f.w.alt_nxv1i8_nxv1bf16(<vscale x 1 x bfloat> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfncvt_f.f.w.alt_nxv1i8_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8alt, mf8, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v9, v8
+; CHECK-NEXT: vmv1r.v v8, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x i8> @llvm.riscv.vfncvt.f.f.w.alt.nxv1i8.nxv1bf16(
+ <vscale x 1 x i8> undef,
+ <vscale x 1 x bfloat> %0,
+ iXLen 7, iXLen %1)
+
+ ret <vscale x 1 x i8> %a
+}
+
+define <vscale x 1 x i8> @intrinsic_vfncvt_mask_f.f.w.alt_nxv1i8_nxv1bf16(<vscale x 1 x i8> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfncvt_mask_f.f.w.alt_nxv1i8_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8alt, mf8, ta, mu
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x i8> @llvm.riscv.vfncvt.f.f.w.alt.mask.nxv1i8.nxv1bf16(
+ <vscale x 1 x i8> %0,
+ <vscale x 1 x bfloat> %1,
+ <vscale x 1 x i1> %2,
+ iXLen 7, iXLen %3, iXLen 1)
+
+ ret <vscale x 1 x i8> %a
+}
+
+define <vscale x 2 x i8> @intrinsic_vfncvt_f.f.w.alt_nxv2i8_nxv2bf16(<vscale x 2 x bfloat> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfncvt_f.f.w.alt_nxv2i8_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8alt, mf4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v9, v8
+; CHECK-NEXT: vmv1r.v v8, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x i8> @llvm.riscv.vfncvt.f.f.w.alt.nxv2i8.nxv2bf16(
+ <vscale x 2 x i8> undef,
+ <vscale x 2 x bfloat> %0,
+ iXLen 7, iXLen %1)
+
+ ret <vscale x 2 x i8> %a
+}
+
+define <vscale x 2 x i8> @intrinsic_vfncvt_mask_f.f.w.alt_nxv2i8_nxv2bf16(<vscale x 2 x i8> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfncvt_mask_f.f.w.alt_nxv2i8_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8alt, mf4, ta, mu
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x i8> @llvm.riscv.vfncvt.f.f.w.alt.mask.nxv2i8.nxv2bf16(
+ <vscale x 2 x i8> %0,
+ <vscale x 2 x bfloat> %1,
+ <vscale x 2 x i1> %2,
+ iXLen 7, iXLen %3, iXLen 1)
+
+ ret <vscale x 2 x i8> %a
+}
+
+define <vscale x 4 x i8> @intrinsic_vfncvt_f.f.w.alt_nxv4i8_nxv4bf16(<vscale x 4 x bfloat> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfncvt_f.f.w.alt_nxv4i8_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8alt, mf2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v9, v8
+; CHECK-NEXT: vmv1r.v v8, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x i8> @llvm.riscv.vfncvt.f.f.w.alt.nxv4i8.nxv4bf16(
+ <vscale x 4 x i8> undef,
+ <vscale x 4 x bfloat> %0,
+ iXLen 7, iXLen %1)
+
+ ret <vscale x 4 x i8> %a
+}
+
+define <vscale x 4 x i8> @intrinsic_vfncvt_mask_f.f.w.alt_nxv4i8_nxv4bf16(<vscale x 4 x i8> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfncvt_mask_f.f.w.alt_nxv4i8_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8alt, mf2, ta, mu
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x i8> @llvm.riscv.vfncvt.f.f.w.alt.mask.nxv4i8.nxv4bf16(
+ <vscale x 4 x i8> %0,
+ <vscale x 4 x bfloat> %1,
+ <vscale x 4 x i1> %2,
+ iXLen 7, iXLen %3, iXLen 1)
+
+ ret <vscale x 4 x i8> %a
+}
+
+define <vscale x 8 x i8> @intrinsic_vfncvt_f.f.w.alt_nxv8i8_nxv8bf16(<vscale x 8 x bfloat> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfncvt_f.f.w.alt_nxv8i8_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8alt, m1, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v10, v8
+; CHECK-NEXT: vmv.v.v v8, v10
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x i8> @llvm.riscv.vfncvt.f.f.w.alt.nxv8i8.nxv8bf16(
+ <vscale x 8 x i8> undef,
+ <vscale x 8 x bfloat> %0,
+ iXLen 7, iXLen %1)
+
+ ret <vscale x 8 x i8> %a
+}
+
+define <vscale x 8 x i8> @intrinsic_vfncvt_mask_f.f.w.alt_nxv8i8_nxv8bf16(<vscale x 8 x i8> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfncvt_mask_f.f.w.alt_nxv8i8_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8alt, m1, ta, mu
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x i8> @llvm.riscv.vfncvt.f.f.w.alt.mask.nxv8i8.nxv8bf16(
+ <vscale x 8 x i8> %0,
+ <vscale x 8 x bfloat> %1,
+ <vscale x 8 x i1> %2,
+ iXLen 7, iXLen %3, iXLen 1)
+
+ ret <vscale x 8 x i8> %a
+}
+
+define <vscale x 16 x i8> @intrinsic_vfncvt_f.f.w.alt_nxv16i8_nxv16bf16(<vscale x 16 x bfloat> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfncvt_f.f.w.alt_nxv16i8_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8alt, m2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v12, v8
+; CHECK-NEXT: vmv.v.v v8, v12
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x i8> @llvm.riscv.vfncvt.f.f.w.alt.nxv16i8.nxv16bf16(
+ <vscale x 16 x i8> undef,
+ <vscale x 16 x bfloat> %0,
+ iXLen 7, iXLen %1)
+
+ ret <vscale x 16 x i8> %a
+}
+
+define <vscale x 16 x i8> @intrinsic_vfncvt_mask_f.f.w.alt_nxv16i8_nxv16bf16(<vscale x 16 x i8> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfncvt_mask_f.f.w.alt_nxv16i8_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8alt, m2, ta, mu
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x i8> @llvm.riscv.vfncvt.f.f.w.alt.mask.nxv16i8.nxv16bf16(
+ <vscale x 16 x i8> %0,
+ <vscale x 16 x bfloat> %1,
+ <vscale x 16 x i1> %2,
+ iXLen 7, iXLen %3, iXLen 1)
+
+ ret <vscale x 16 x i8> %a
+}
+
+define <vscale x 32 x i8> @intrinsic_vfncvt_f.f.w.alt_nxv32i8_nxv32bf16(<vscale x 32 x bfloat> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfncvt_f.f.w.alt_nxv32i8_nxv32bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8alt, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v16, v8
+; CHECK-NEXT: vmv.v.v v8, v16
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 32 x i8> @llvm.riscv.vfncvt.f.f.w.alt.nxv32i8.nxv32bf16(
+ <vscale x 32 x i8> undef,
+ <vscale x 32 x bfloat> %0,
+ iXLen 7, iXLen %1)
+
+ ret <vscale x 32 x i8> %a
+}
+
+define <vscale x 32 x i8> @intrinsic_vfncvt_mask_f.f.w.alt_nxv32i8_nxv32bf16(<vscale x 32 x i8> %0, <vscale x 32 x bfloat> %1, <vscale x 32 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfncvt_mask_f.f.w.alt_nxv32i8_nxv32bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8alt, m4, ta, mu
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 32 x i8> @llvm.riscv.vfncvt.f.f.w.alt.mask.nxv32i8.nxv32bf16(
+ <vscale x 32 x i8> %0,
+ <vscale x 32 x bfloat> %1,
+ <vscale x 32 x i1> %2,
+ iXLen 7, iXLen %3, iXLen 1)
+
+ ret <vscale x 32 x i8> %a
+}
+
+define <vscale x 1 x i8> @intrinsic_vfncvt_f.f.q.alt_nxv1i8_nxv1f32(<vscale x 1 x float> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfncvt_f.f.q.alt_nxv1i8_nxv1f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8alt, mf8, ta, ma
+; CHECK-NEXT: vfncvt.f.f.q v9, v8
+; CHECK-NEXT: vmv1r.v v8, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x i8> @llvm.riscv.vfncvt.f.f.q.alt.nxv1i8.nxv1f32(
+ <vscale x 1 x i8> undef,
+ <vscale x 1 x float> %0,
+ iXLen 7, iXLen %1)
+
+ ret <vscale x 1 x i8> %a
+}
+
+define <vscale x 1 x i8> @intrinsic_vfncvt_mask_f.f.q.alt_nxv1i8_nxv1f32(<vscale x 1 x i8> %0, <vscale x 1 x float> %1, <vscale x 1 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfncvt_mask_f.f.q.alt_nxv1i8_nxv1f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8alt, mf8, ta, mu
+; CHECK-NEXT: vfncvt.f.f.q v8, v9, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x i8> @llvm.riscv.vfncvt.f.f.q.alt.mask.nxv1i8.nxv1f32(
+ <vscale x 1 x i8> %0,
+ <vscale x 1 x float> %1,
+ <vscale x 1 x i1> %2,
+ iXLen 7, iXLen %3, iXLen 1)
+
+ ret <vscale x 1 x i8> %a
+}
+
+define <vscale x 2 x i8> @intrinsic_vfncvt_f.f.q.alt_nxv2i8_nxv2f32(<vscale x 2 x float> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfncvt_f.f.q.alt_nxv2i8_nxv2f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8alt, mf4, ta, ma
+; CHECK-NEXT: vfncvt.f.f.q v9, v8
+; CHECK-NEXT: vmv1r.v v8, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x i8> @llvm.riscv.vfncvt.f.f.q.alt.nxv2i8.nxv2f32(
+ <vscale x 2 x i8> undef,
+ <vscale x 2 x float> %0,
+ iXLen 7, iXLen %1)
+
+ ret <vscale x 2 x i8> %a
+}
+
+define <vscale x 2 x i8> @intrinsic_vfncvt_mask_f.f.q.alt_nxv2i8_nxv2f32(<vscale x 2 x i8> %0, <vscale x 2 x float> %1, <vscale x 2 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfncvt_mask_f.f.q.alt_nxv2i8_nxv2f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8alt, mf4, ta, mu
+; CHECK-NEXT: vfncvt.f.f.q v8, v9, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x i8> @llvm.riscv.vfncvt.f.f.q.alt.mask.nxv2i8.nxv2f32(
+ <vscale x 2 x i8> %0,
+ <vscale x 2 x ...
[truncated]
|
|
✅ With the latest revision this PR passed the undef deprecator. |
This is follow up patch for #157014
to support llvm intrinsics and codegen.