Skip to content

Commit

Permalink
[CIR] [Lowering] [X86_64] Support VAArg for LongDouble (#1101)
Browse files Browse the repository at this point in the history
This is the following of #1100.

After #1100, when we want to use
LongDouble for VAArg, we will be in trouble due to details in X86_64's
ABI and this patch tries to address this.

The practical impact the patch is, after this patch, with
#1088 and a small following up fix,
we can build and run all C's benchmark in SpecCPU 2017. I think it is a
milestone.
  • Loading branch information
ChuanqiXu9 authored Nov 13, 2024
1 parent ef39145 commit 4ceef42
Show file tree
Hide file tree
Showing 8 changed files with 197 additions and 8 deletions.
3 changes: 2 additions & 1 deletion clang/lib/CIR/Dialect/IR/CIRTypes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -747,7 +747,7 @@ const llvm::fltSemantics &FP80Type::getFloatSemantics() const {
llvm::TypeSize
FP80Type::getTypeSizeInBits(const mlir::DataLayout &dataLayout,
mlir::DataLayoutEntryListRef params) const {
return llvm::TypeSize::getFixed(16);
return llvm::TypeSize::getFixed(128);
}

uint64_t FP80Type::getABIAlignment(const mlir::DataLayout &dataLayout,
Expand All @@ -768,6 +768,7 @@ const llvm::fltSemantics &FP128Type::getFloatSemantics() const {
llvm::TypeSize
FP128Type::getTypeSizeInBits(const mlir::DataLayout &dataLayout,
mlir::DataLayoutEntryListRef params) const {
// FIXME: We probably want it to return 128. But we're lacking a test now.
return llvm::TypeSize::getFixed(16);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,4 +65,11 @@ CIRCXXABI::RecordArgABI getRecordArgABI(const StructType RT,
return CXXABI.getRecordArgABI(RT);
}

CIRCXXABI::RecordArgABI getRecordArgABI(mlir::Type ty, CIRCXXABI &CXXABI) {
auto sTy = mlir::dyn_cast<StructType>(ty);
if (!sTy)
return CIRCXXABI::RAA_Default;
return getRecordArgABI(sTy, CXXABI);
}

} // namespace cir
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ mlir::Value emitRoundPointerUpToAlignment(cir::CIRBaseBuilderTy &builder,
mlir::Type useFirstFieldIfTransparentUnion(mlir::Type Ty);

CIRCXXABI::RecordArgABI getRecordArgABI(const StructType RT, CIRCXXABI &CXXABI);
CIRCXXABI::RecordArgABI getRecordArgABI(mlir::Type ty, CIRCXXABI &CXXABI);

} // namespace cir

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,18 @@ clang::TypeInfo CIRLowerContext::getTypeInfoImpl(const mlir::Type T) const {
Align = Target->getDoubleAlign();
break;
}
if (auto longDoubleTy = mlir::dyn_cast<LongDoubleType>(T)) {
if (getLangOpts().OpenMP && getLangOpts().OpenMPIsTargetDevice &&
(Target->getLongDoubleWidth() != AuxTarget->getLongDoubleWidth() ||
Target->getLongDoubleAlign() != AuxTarget->getLongDoubleAlign())) {
Width = AuxTarget->getLongDoubleWidth();
Align = AuxTarget->getLongDoubleAlign();
} else {
Width = Target->getLongDoubleWidth();
Align = Target->getLongDoubleAlign();
}
break;
}
cir_cconv_unreachable("Unknown builtin type!");
break;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,6 @@ std::unique_ptr<cir::LowerModule> getLowerModule(cir::VAArgOp op) {
mlir::ModuleOp mo = op->getParentOfType<mlir::ModuleOp>();
if (!mo)
return nullptr;

mlir::PatternRewriter rewriter(mo.getContext());
return cir::createLowerModule(mo, rewriter);
}
Expand Down Expand Up @@ -92,7 +91,7 @@ mlir::Value LoweringPrepareX86CXXABI::lowerVAArgX86_64(
// Let's hope LLVM's va_arg instruction can take care of it.
// Remove this when X86_64ABIInfo::classify can take care of every type.
if (!mlir::isa<VoidType, IntType, SingleType, DoubleType, BoolType,
StructType>(op.getType()))
StructType, LongDoubleType>(op.getType()))
return nullptr;

// Assume that va_list type is correct; should be pointer to LLVM type:
Expand All @@ -107,7 +106,6 @@ mlir::Value LoweringPrepareX86CXXABI::lowerVAArgX86_64(
std::unique_ptr<cir::LowerModule> lowerModule = getLowerModule(op);
if (!lowerModule)
return nullptr;

mlir::Type ty = op.getType();

// FIXME: How should we access the X86AVXABILevel?
Expand Down Expand Up @@ -167,7 +165,6 @@ mlir::Value LoweringPrepareX86CXXABI::lowerVAArgX86_64(
mlir::Block *contBlock = currentBlock->splitBlock(op);
mlir::Block *inRegBlock = builder.createBlock(contBlock);
mlir::Block *inMemBlock = builder.createBlock(contBlock);

builder.setInsertionPointToEnd(currentBlock);
builder.create<BrCondOp>(loc, inRegs, inRegBlock, inMemBlock);

Expand Down
116 changes: 113 additions & 3 deletions clang/lib/CIR/Dialect/Transforms/TargetLowering/Targets/X86.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,21 @@ void X86_64ABIInfo::classify(mlir::Type Ty, uint64_t OffsetBase, Class &Lo,
Current = Class::SSE;
return;

} else if (mlir::isa<LongDoubleType>(Ty)) {
const llvm::fltSemantics *LDF =
&getContext().getTargetInfo().getLongDoubleFormat();
if (LDF == &llvm::APFloat::IEEEquad()) {
Lo = Class::SSE;
Hi = Class::SSEUp;
} else if (LDF == &llvm::APFloat::x87DoubleExtended()) {
Lo = Class::X87;
Hi = Class::X87Up;
} else if (LDF == &llvm::APFloat::IEEEdouble()) {
Current = Class::SSE;
} else {
llvm_unreachable("unexpected long double representation!");
}
return;
} else if (mlir::isa<BoolType>(Ty)) {
Current = Class::Integer;
} else if (const auto RT = mlir::dyn_cast<StructType>(Ty)) {
Expand Down Expand Up @@ -267,6 +282,65 @@ void X86_64ABIInfo::classify(mlir::Type Ty, uint64_t OffsetBase, Class &Lo,
cir_cconv_unreachable("NYI");
}

ABIArgInfo X86_64ABIInfo::getIndirectResult(mlir::Type ty,
unsigned freeIntRegs) const {
// If this is a scalar LLVM value then assume LLVM will pass it in the right
// place naturally.
//
// This assumption is optimistic, as there could be free registers available
// when we need to pass this argument in memory, and LLVM could try to pass
// the argument in the free register. This does not seem to happen currently,
// but this code would be much safer if we could mark the argument with
// 'onstack'. See PR12193.
if (!isAggregateTypeForABI(ty) /* && IsIllegalVectorType(Ty) &&*/
/*!Ty->isBitIntType()*/) {
// FIXME: Handling enum type?

return (isPromotableIntegerTypeForABI(ty) ? ABIArgInfo::getExtend(ty)
: ABIArgInfo::getDirect());
}

if (CIRCXXABI::RecordArgABI RAA = getRecordArgABI(ty, getCXXABI()))
return getNaturalAlignIndirect(ty, RAA == CIRCXXABI::RAA_DirectInMemory);

// Compute the byval alignment. We specify the alignment of the byval in all
// cases so that the mid-level optimizer knows the alignment of the byval.
unsigned align = std::max(getContext().getTypeAlign(ty) / 8, 8U);

// Attempt to avoid passing indirect results using byval when possible. This
// is important for good codegen.
//
// We do this by coercing the value into a scalar type which the backend can
// handle naturally (i.e., without using byval).
//
// For simplicity, we currently only do this when we have exhausted all of the
// free integer registers. Doing this when there are free integer registers
// would require more care, as we would have to ensure that the coerced value
// did not claim the unused register. That would require either reording the
// arguments to the function (so that any subsequent inreg values came first),
// or only doing this optimization when there were no following arguments that
// might be inreg.
//
// We currently expect it to be rare (particularly in well written code) for
// arguments to be passed on the stack when there are still free integer
// registers available (this would typically imply large structs being passed
// by value), so this seems like a fair tradeoff for now.
//
// We can revisit this if the backend grows support for 'onstack' parameter
// attributes. See PR12193.
if (freeIntRegs == 0) {
uint64_t size = getContext().getTypeSize(ty);

// If this type fits in an eightbyte, coerce it into the matching integral
// type, which will end up on the stack (with alignment 8).
if (align == 8 && size <= 64)
return ABIArgInfo::getDirect(
cir::IntType::get(LT.getMLIRContext(), size, false));
}

return ABIArgInfo::getIndirect(align);
}

/// Return a type that will be passed by the backend in the low 8 bytes of an
/// XMM register, corresponding to the SSE class.
mlir::Type X86_64ABIInfo::GetSSETypeAtOffset(mlir::Type IRType,
Expand All @@ -278,7 +352,7 @@ mlir::Type X86_64ABIInfo::GetSSETypeAtOffset(mlir::Type IRType,
(unsigned)getContext().getTypeSize(SourceTy) / 8 - SourceOffset;
mlir::Type T0 = getFPTypeAtOffset(IRType, IROffset, TD);
if (!T0 || mlir::isa<mlir::Float64Type>(T0))
return T0; // NOTE(cir): Not sure if this is correct.
return cir::DoubleType::get(LT.getMLIRContext());

mlir::Type T1 = {};
unsigned T0Size = TD.getTypeAllocSize(T0);
Expand Down Expand Up @@ -539,13 +613,34 @@ ABIArgInfo X86_64ABIInfo::classifyArgumentType(
++neededSSE;
break;
}
// AMD64-ABI 3.2.3p3: Rule 1. If the class is MEMORY, pass the argument
// on the stack.
case Class::Memory:

// AMD64-ABI 3.2.3p3: Rule 5. If the class is X87, X87UP or
// COMPLEX_X87, it is passed in memory.
case Class::X87:
case Class::ComplexX87:
if (getRecordArgABI(Ty, getCXXABI()) == CIRCXXABI::RAA_Indirect)
++neededInt;
return getIndirectResult(Ty, freeIntRegs);

case Class::SSEUp:
case Class::X87Up:
llvm_unreachable("Invalid classification for lo word.");

default:
cir_cconv_assert_or_abort(!cir::MissingFeatures::X86ArgTypeClassification(),
"NYI");
}

mlir::Type HighPart = {};
switch (Hi) {
case Class::Memory:
case Class::X87:
case Class::ComplexX87:
llvm_unreachable("Invalid classification for hi word.");

case Class::NoClass:
break;

Expand All @@ -558,8 +653,23 @@ ABIArgInfo X86_64ABIInfo::classifyArgumentType(
return ABIArgInfo::getDirect(HighPart, 8);
break;

default:
cir_cconv_unreachable("NYI");
// X87Up generally doesn't occur here (long double is passed in
// memory), except in situations involving unions.
case Class::X87Up:
case Class::SSE:
++neededSSE;
HighPart = GetSSETypeAtOffset(Ty, 8, Ty, 8);

if (Lo == Class::NoClass) // Pass HighPart at offset 8 in memory.
return ABIArgInfo::getDirect(HighPart, 8);
break;

// AMD64-ABI 3.2.3p3: Rule 4. If the class is SSEUP, the
// eightbyte is passed in the upper half of the last used SSE
// register. This only happens when 128-bit vectors are passed.
case Class::SSEUp:
llvm_unreachable("NYI && We need to implement GetByteVectorType");
break;
}

// If a high part was specified, merge it together with the low part. It is
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,14 @@ class X86_64ABIInfo : public cir::ABIInfo {
mlir::Type SourceTy,
unsigned SourceOffset) const;

/// Give a source type \arg Ty, return a suitable result such that the
/// argument will be passed in memory.
///
/// \param freeIntRegs - The number of free integer registers remaining
/// available.
::cir::ABIArgInfo getIndirectResult(mlir::Type ty,
unsigned freeIntRegs) const;

/// The 0.98 ABI revision clarified a lot of ambiguities,
/// unfortunately in ways that were not always consistent with
/// certain previous compilers. In particular, platforms which
Expand Down
53 changes: 53 additions & 0 deletions clang/test/CIR/Lowering/var-arg-x86_64.c
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
// REQUIRES: system-linux
// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir -fno-clangir-call-conv-lowering %s -o %t.cir
// RUN: FileCheck --input-file=%t.cir %s --check-prefix=CIR
// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm -fno-clangir-call-conv-lowering %s -o %t.ll
Expand Down Expand Up @@ -76,3 +77,55 @@ double f1(int n, ...) {
// CIR: [[CASTED_ARG_P:%.+]] = cir.cast(bitcast, [[ARG]]
// CIR: [[CASTED_ARG:%.+]] = cir.load align(16) [[CASTED_ARG_P]]
// CIR: store [[CASTED_ARG]], [[RES]]
long double f2(int n, ...) {
va_list valist;
va_start(valist, n);
long double res = va_arg(valist, long double);
va_end(valist);
return res;
}

// CHECK: define {{.*}}@f2
// CHECK: [[RESULT:%.+]] = alloca x86_fp80
// CHECK: [[VA_LIST_ALLOCA:%.+]] = alloca {{.*}}[[VA_LIST_TYPE]]
// CHECK: [[RES:%.+]] = alloca x86_fp80
// CHECK: [[VA_LIST:%.+]] = getelementptr {{.*}} [[VA_LIST_ALLOCA]], i32 0
// CHECK: call {{.*}}@llvm.va_start.p0(ptr [[VA_LIST]])
// CHECK: [[VA_LIST2:%.+]] = getelementptr {{.*}} [[VA_LIST_ALLOCA]], i32 0
// CHECK: [[OVERFLOW_AREA_P:%.+]] = getelementptr {{.*}} [[VA_LIST2]], i32 0, i32 2
// CHECK: [[OVERFLOW_AREA:%.+]] = load ptr, ptr [[OVERFLOW_AREA_P]]
// Ptr Mask Operations
// CHECK: [[OVERFLOW_AREA_OFFSET_ALIGNED:%.+]] = getelementptr i8, ptr [[OVERFLOW_AREA]], i64 15
// CHECK: [[OVERFLOW_AREA_OFFSET_ALIGNED_P:%.+]] = ptrtoint ptr [[OVERFLOW_AREA_OFFSET_ALIGNED]] to i32
// CHECK: [[MASKED:%.+]] = and i32 [[OVERFLOW_AREA_OFFSET_ALIGNED_P]], -16
// CHECK: [[DIFF:%.+]] = sub i32 [[OVERFLOW_AREA_OFFSET_ALIGNED_P]], [[MASKED]]
// CHECK: [[PTR_MASKED:%.+]] = getelementptr i8, ptr [[OVERFLOW_AREA_OFFSET_ALIGNED]], i32 [[DIFF]]
// CHECK: [[OVERFLOW_AREA_NEXT:%.+]] = getelementptr i8, ptr [[PTR_MASKED]], i64 16
// CHECK: store ptr [[OVERFLOW_AREA_NEXT]], ptr [[OVERFLOW_AREA_P]]
// CHECK: [[VALUE:%.+]] = load x86_fp80, ptr [[PTR_MASKED]]
// CHECK: store x86_fp80 [[VALUE]], ptr [[RES]]
// CHECK: [[VA_LIST2:%.+]] = getelementptr {{.*}} [[VA_LIST_ALLOCA]], i32 0
// CHECK: call {{.*}}@llvm.va_end.p0(ptr [[VA_LIST2]])
// CHECK: [[VALUE2:%.+]] = load x86_fp80, ptr [[RES]]
// CHECK: store x86_fp80 [[VALUE2]], ptr [[RESULT]]
// CHECK: [[RETURN_VALUE:%.+]] = load x86_fp80, ptr [[RESULT]]
// CHECK: ret x86_fp80 [[RETURN_VALUE]]

// CIR: cir.func @f2
// CIR: [[VA_LIST_ALLOCA:%.+]] = cir.alloca !cir.array<!ty___va_list_tag x 1>, !cir.ptr<!cir.array<!ty___va_list_tag x 1>>, ["valist"]
// CIR: [[RES:%.+]] = cir.alloca !cir.long_double<!cir.f80>, !cir.ptr<!cir.long_double<!cir.f80>>, ["res"
// CIR: [[VASTED_VA_LIST:%.+]] = cir.cast(array_to_ptrdecay, [[VA_LIST_ALLOCA]]
// CIR: cir.va.start [[VASTED_VA_LIST]]
// CIR: [[VASTED_VA_LIST:%.+]] = cir.cast(array_to_ptrdecay, [[VA_LIST_ALLOCA]]
// CIR: [[OVERFLOW_AREA_P:%.+]] = cir.get_member [[VASTED_VA_LIST]][2] {name = "overflow_arg_area"}
// CIR: [[OVERFLOW_AREA:%.+]] = cir.load [[OVERFLOW_AREA_P]]
// CIR: [[CASTED:%.+]] = cir.cast(bitcast, [[OVERFLOW_AREA]] : !cir.ptr<!void>)
// CIR: [[CONSTANT:%.+]] = cir.const #cir.int<15>
// CIR: [[PTR_STRIDE:%.+]] = cir.ptr_stride([[CASTED]] {{.*}}[[CONSTANT]]
// CIR: [[MINUS_ALIGN:%.+]] = cir.const #cir.int<-16>
// CIR: [[ALIGNED:%.+]] = cir.ptr_mask([[PTR_STRIDE]], [[MINUS_ALIGN]]
// CIR: [[ALIGN:%.+]] = cir.const #cir.int<16>
// CIR: [[CAST_ALIGNED:%.+]] = cir.cast(bitcast, [[ALIGNED]] : !cir.ptr<!u8i>), !cir.ptr<!cir.long_double<!cir.f80>>
// CIR: [[CAST_ALIGNED_VALUE:%.+]] = cir.load [[CAST_ALIGNED]]
// CIR: cir.store [[CAST_ALIGNED_VALUE]], [[RES]]
// CIR. cir.via.end

0 comments on commit 4ceef42

Please sign in to comment.