Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[CIR] [Lowering] [X86_64] Support VAArg for LongDouble #1101

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion clang/lib/CIR/Dialect/IR/CIRTypes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -747,7 +747,7 @@ const llvm::fltSemantics &FP80Type::getFloatSemantics() const {
llvm::TypeSize
FP80Type::getTypeSizeInBits(const mlir::DataLayout &dataLayout,
mlir::DataLayoutEntryListRef params) const {
return llvm::TypeSize::getFixed(16);
return llvm::TypeSize::getFixed(128);
bcardosolopes marked this conversation as resolved.
Show resolved Hide resolved
}

uint64_t FP80Type::getABIAlignment(const mlir::DataLayout &dataLayout,
Expand All @@ -768,6 +768,7 @@ const llvm::fltSemantics &FP128Type::getFloatSemantics() const {
llvm::TypeSize
FP128Type::getTypeSizeInBits(const mlir::DataLayout &dataLayout,
mlir::DataLayoutEntryListRef params) const {
// FIXME: We probably want it to return 128. But we're lacking a test now.
return llvm::TypeSize::getFixed(16);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,4 +65,11 @@ CIRCXXABI::RecordArgABI getRecordArgABI(const StructType RT,
return CXXABI.getRecordArgABI(RT);
}

CIRCXXABI::RecordArgABI getRecordArgABI(mlir::Type ty, CIRCXXABI &CXXABI) {
auto sTy = mlir::dyn_cast<StructType>(ty);
if (!sTy)
return CIRCXXABI::RAA_Default;
return getRecordArgABI(sTy, CXXABI);
}

} // namespace cir
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ mlir::Value emitRoundPointerUpToAlignment(cir::CIRBaseBuilderTy &builder,
mlir::Type useFirstFieldIfTransparentUnion(mlir::Type Ty);

CIRCXXABI::RecordArgABI getRecordArgABI(const StructType RT, CIRCXXABI &CXXABI);
CIRCXXABI::RecordArgABI getRecordArgABI(mlir::Type ty, CIRCXXABI &CXXABI);

} // namespace cir

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,18 @@ clang::TypeInfo CIRLowerContext::getTypeInfoImpl(const mlir::Type T) const {
Align = Target->getDoubleAlign();
break;
}
if (auto longDoubleTy = mlir::dyn_cast<LongDoubleType>(T)) {
if (getLangOpts().OpenMP && getLangOpts().OpenMPIsTargetDevice &&
(Target->getLongDoubleWidth() != AuxTarget->getLongDoubleWidth() ||
Target->getLongDoubleAlign() != AuxTarget->getLongDoubleAlign())) {
Width = AuxTarget->getLongDoubleWidth();
Align = AuxTarget->getLongDoubleAlign();
} else {
Width = Target->getLongDoubleWidth();
Align = Target->getLongDoubleAlign();
}
break;
}
cir_cconv_unreachable("Unknown builtin type!");
break;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,6 @@ std::unique_ptr<cir::LowerModule> getLowerModule(cir::VAArgOp op) {
mlir::ModuleOp mo = op->getParentOfType<mlir::ModuleOp>();
if (!mo)
return nullptr;

mlir::PatternRewriter rewriter(mo.getContext());
return cir::createLowerModule(mo, rewriter);
}
Expand Down Expand Up @@ -92,7 +91,7 @@ mlir::Value LoweringPrepareX86CXXABI::lowerVAArgX86_64(
// Let's hope LLVM's va_arg instruction can take care of it.
// Remove this when X86_64ABIInfo::classify can take care of every type.
if (!mlir::isa<VoidType, IntType, SingleType, DoubleType, BoolType,
StructType>(op.getType()))
StructType, LongDoubleType>(op.getType()))
return nullptr;

// Assume that va_list type is correct; should be pointer to LLVM type:
Expand All @@ -107,7 +106,6 @@ mlir::Value LoweringPrepareX86CXXABI::lowerVAArgX86_64(
std::unique_ptr<cir::LowerModule> lowerModule = getLowerModule(op);
if (!lowerModule)
return nullptr;

mlir::Type ty = op.getType();

// FIXME: How should we access the X86AVXABILevel?
Expand Down Expand Up @@ -167,7 +165,6 @@ mlir::Value LoweringPrepareX86CXXABI::lowerVAArgX86_64(
mlir::Block *contBlock = currentBlock->splitBlock(op);
mlir::Block *inRegBlock = builder.createBlock(contBlock);
mlir::Block *inMemBlock = builder.createBlock(contBlock);

builder.setInsertionPointToEnd(currentBlock);
builder.create<BrCondOp>(loc, inRegs, inRegBlock, inMemBlock);

Expand Down
116 changes: 113 additions & 3 deletions clang/lib/CIR/Dialect/Transforms/TargetLowering/Targets/X86.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,21 @@ void X86_64ABIInfo::classify(mlir::Type Ty, uint64_t OffsetBase, Class &Lo,
Current = Class::SSE;
return;

} else if (mlir::isa<LongDoubleType>(Ty)) {
const llvm::fltSemantics *LDF =
&getContext().getTargetInfo().getLongDoubleFormat();
if (LDF == &llvm::APFloat::IEEEquad()) {
Lo = Class::SSE;
Hi = Class::SSEUp;
} else if (LDF == &llvm::APFloat::x87DoubleExtended()) {
Lo = Class::X87;
Hi = Class::X87Up;
} else if (LDF == &llvm::APFloat::IEEEdouble()) {
Current = Class::SSE;
} else {
llvm_unreachable("unexpected long double representation!");
}
return;
} else if (mlir::isa<BoolType>(Ty)) {
Current = Class::Integer;
} else if (const auto RT = mlir::dyn_cast<StructType>(Ty)) {
Expand Down Expand Up @@ -267,6 +282,65 @@ void X86_64ABIInfo::classify(mlir::Type Ty, uint64_t OffsetBase, Class &Lo,
cir_cconv_unreachable("NYI");
}

ABIArgInfo X86_64ABIInfo::getIndirectResult(mlir::Type ty,
unsigned freeIntRegs) const {
// If this is a scalar LLVM value then assume LLVM will pass it in the right
// place naturally.
//
// This assumption is optimistic, as there could be free registers available
// when we need to pass this argument in memory, and LLVM could try to pass
// the argument in the free register. This does not seem to happen currently,
// but this code would be much safer if we could mark the argument with
// 'onstack'. See PR12193.
if (!isAggregateTypeForABI(ty) /* && IsIllegalVectorType(Ty) &&*/
/*!Ty->isBitIntType()*/) {
// FIXME: Handling enum type?

return (isPromotableIntegerTypeForABI(ty) ? ABIArgInfo::getExtend(ty)
: ABIArgInfo::getDirect());
}

if (CIRCXXABI::RecordArgABI RAA = getRecordArgABI(ty, getCXXABI()))
return getNaturalAlignIndirect(ty, RAA == CIRCXXABI::RAA_DirectInMemory);

// Compute the byval alignment. We specify the alignment of the byval in all
// cases so that the mid-level optimizer knows the alignment of the byval.
unsigned align = std::max(getContext().getTypeAlign(ty) / 8, 8U);

// Attempt to avoid passing indirect results using byval when possible. This
// is important for good codegen.
//
// We do this by coercing the value into a scalar type which the backend can
// handle naturally (i.e., without using byval).
//
// For simplicity, we currently only do this when we have exhausted all of the
// free integer registers. Doing this when there are free integer registers
// would require more care, as we would have to ensure that the coerced value
// did not claim the unused register. That would require either reording the
// arguments to the function (so that any subsequent inreg values came first),
// or only doing this optimization when there were no following arguments that
// might be inreg.
//
// We currently expect it to be rare (particularly in well written code) for
// arguments to be passed on the stack when there are still free integer
// registers available (this would typically imply large structs being passed
// by value), so this seems like a fair tradeoff for now.
//
// We can revisit this if the backend grows support for 'onstack' parameter
// attributes. See PR12193.
if (freeIntRegs == 0) {
uint64_t size = getContext().getTypeSize(ty);

// If this type fits in an eightbyte, coerce it into the matching integral
// type, which will end up on the stack (with alignment 8).
if (align == 8 && size <= 64)
return ABIArgInfo::getDirect(
cir::IntType::get(LT.getMLIRContext(), size, false));
}

return ABIArgInfo::getIndirect(align);
}

/// Return a type that will be passed by the backend in the low 8 bytes of an
/// XMM register, corresponding to the SSE class.
mlir::Type X86_64ABIInfo::GetSSETypeAtOffset(mlir::Type IRType,
Expand All @@ -278,7 +352,7 @@ mlir::Type X86_64ABIInfo::GetSSETypeAtOffset(mlir::Type IRType,
(unsigned)getContext().getTypeSize(SourceTy) / 8 - SourceOffset;
mlir::Type T0 = getFPTypeAtOffset(IRType, IROffset, TD);
if (!T0 || mlir::isa<mlir::Float64Type>(T0))
return T0; // NOTE(cir): Not sure if this is correct.
return cir::DoubleType::get(LT.getMLIRContext());

mlir::Type T1 = {};
unsigned T0Size = TD.getTypeAllocSize(T0);
Expand Down Expand Up @@ -539,13 +613,34 @@ ABIArgInfo X86_64ABIInfo::classifyArgumentType(
++neededSSE;
break;
}
// AMD64-ABI 3.2.3p3: Rule 1. If the class is MEMORY, pass the argument
// on the stack.
case Class::Memory:

// AMD64-ABI 3.2.3p3: Rule 5. If the class is X87, X87UP or
// COMPLEX_X87, it is passed in memory.
case Class::X87:
case Class::ComplexX87:
if (getRecordArgABI(Ty, getCXXABI()) == CIRCXXABI::RAA_Indirect)
++neededInt;
return getIndirectResult(Ty, freeIntRegs);

case Class::SSEUp:
case Class::X87Up:
llvm_unreachable("Invalid classification for lo word.");

default:
cir_cconv_assert_or_abort(!cir::MissingFeatures::X86ArgTypeClassification(),
"NYI");
}

mlir::Type HighPart = {};
switch (Hi) {
case Class::Memory:
case Class::X87:
case Class::ComplexX87:
llvm_unreachable("Invalid classification for hi word.");

case Class::NoClass:
break;

Expand All @@ -558,8 +653,23 @@ ABIArgInfo X86_64ABIInfo::classifyArgumentType(
return ABIArgInfo::getDirect(HighPart, 8);
break;

default:
cir_cconv_unreachable("NYI");
// X87Up generally doesn't occur here (long double is passed in
// memory), except in situations involving unions.
case Class::X87Up:
case Class::SSE:
++neededSSE;
HighPart = GetSSETypeAtOffset(Ty, 8, Ty, 8);

if (Lo == Class::NoClass) // Pass HighPart at offset 8 in memory.
return ABIArgInfo::getDirect(HighPart, 8);
break;

// AMD64-ABI 3.2.3p3: Rule 4. If the class is SSEUP, the
// eightbyte is passed in the upper half of the last used SSE
// register. This only happens when 128-bit vectors are passed.
case Class::SSEUp:
llvm_unreachable("NYI && We need to implement GetByteVectorType");
break;
}

// If a high part was specified, merge it together with the low part. It is
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,14 @@ class X86_64ABIInfo : public cir::ABIInfo {
mlir::Type SourceTy,
unsigned SourceOffset) const;

/// Give a source type \arg Ty, return a suitable result such that the
/// argument will be passed in memory.
///
/// \param freeIntRegs - The number of free integer registers remaining
/// available.
::cir::ABIArgInfo getIndirectResult(mlir::Type ty,
unsigned freeIntRegs) const;

/// The 0.98 ABI revision clarified a lot of ambiguities,
/// unfortunately in ways that were not always consistent with
/// certain previous compilers. In particular, platforms which
Expand Down
53 changes: 53 additions & 0 deletions clang/test/CIR/Lowering/var-arg-x86_64.c
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
// REQUIRES: system-linux
// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir -fno-clangir-call-conv-lowering %s -o %t.cir
// RUN: FileCheck --input-file=%t.cir %s --check-prefix=CIR
// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm -fno-clangir-call-conv-lowering %s -o %t.ll
Expand Down Expand Up @@ -76,3 +77,55 @@ double f1(int n, ...) {
// CIR: [[CASTED_ARG_P:%.+]] = cir.cast(bitcast, [[ARG]]
// CIR: [[CASTED_ARG:%.+]] = cir.load align(16) [[CASTED_ARG_P]]
// CIR: store [[CASTED_ARG]], [[RES]]
long double f2(int n, ...) {
va_list valist;
va_start(valist, n);
long double res = va_arg(valist, long double);
va_end(valist);
return res;
}

// CHECK: define {{.*}}@f2
// CHECK: [[RESULT:%.+]] = alloca x86_fp80
// CHECK: [[VA_LIST_ALLOCA:%.+]] = alloca {{.*}}[[VA_LIST_TYPE]]
// CHECK: [[RES:%.+]] = alloca x86_fp80
// CHECK: [[VA_LIST:%.+]] = getelementptr {{.*}} [[VA_LIST_ALLOCA]], i32 0
// CHECK: call {{.*}}@llvm.va_start.p0(ptr [[VA_LIST]])
// CHECK: [[VA_LIST2:%.+]] = getelementptr {{.*}} [[VA_LIST_ALLOCA]], i32 0
// CHECK: [[OVERFLOW_AREA_P:%.+]] = getelementptr {{.*}} [[VA_LIST2]], i32 0, i32 2
// CHECK: [[OVERFLOW_AREA:%.+]] = load ptr, ptr [[OVERFLOW_AREA_P]]
// Ptr Mask Operations
// CHECK: [[OVERFLOW_AREA_OFFSET_ALIGNED:%.+]] = getelementptr i8, ptr [[OVERFLOW_AREA]], i64 15
// CHECK: [[OVERFLOW_AREA_OFFSET_ALIGNED_P:%.+]] = ptrtoint ptr [[OVERFLOW_AREA_OFFSET_ALIGNED]] to i32
// CHECK: [[MASKED:%.+]] = and i32 [[OVERFLOW_AREA_OFFSET_ALIGNED_P]], -16
// CHECK: [[DIFF:%.+]] = sub i32 [[OVERFLOW_AREA_OFFSET_ALIGNED_P]], [[MASKED]]
// CHECK: [[PTR_MASKED:%.+]] = getelementptr i8, ptr [[OVERFLOW_AREA_OFFSET_ALIGNED]], i32 [[DIFF]]
// CHECK: [[OVERFLOW_AREA_NEXT:%.+]] = getelementptr i8, ptr [[PTR_MASKED]], i64 16
// CHECK: store ptr [[OVERFLOW_AREA_NEXT]], ptr [[OVERFLOW_AREA_P]]
// CHECK: [[VALUE:%.+]] = load x86_fp80, ptr [[PTR_MASKED]]
// CHECK: store x86_fp80 [[VALUE]], ptr [[RES]]
// CHECK: [[VA_LIST2:%.+]] = getelementptr {{.*}} [[VA_LIST_ALLOCA]], i32 0
// CHECK: call {{.*}}@llvm.va_end.p0(ptr [[VA_LIST2]])
// CHECK: [[VALUE2:%.+]] = load x86_fp80, ptr [[RES]]
// CHECK: store x86_fp80 [[VALUE2]], ptr [[RESULT]]
// CHECK: [[RETURN_VALUE:%.+]] = load x86_fp80, ptr [[RESULT]]
// CHECK: ret x86_fp80 [[RETURN_VALUE]]

// CIR: cir.func @f2
// CIR: [[VA_LIST_ALLOCA:%.+]] = cir.alloca !cir.array<!ty___va_list_tag x 1>, !cir.ptr<!cir.array<!ty___va_list_tag x 1>>, ["valist"]
// CIR: [[RES:%.+]] = cir.alloca !cir.long_double<!cir.f80>, !cir.ptr<!cir.long_double<!cir.f80>>, ["res"
// CIR: [[VASTED_VA_LIST:%.+]] = cir.cast(array_to_ptrdecay, [[VA_LIST_ALLOCA]]
// CIR: cir.va.start [[VASTED_VA_LIST]]
// CIR: [[VASTED_VA_LIST:%.+]] = cir.cast(array_to_ptrdecay, [[VA_LIST_ALLOCA]]
// CIR: [[OVERFLOW_AREA_P:%.+]] = cir.get_member [[VASTED_VA_LIST]][2] {name = "overflow_arg_area"}
// CIR: [[OVERFLOW_AREA:%.+]] = cir.load [[OVERFLOW_AREA_P]]
// CIR: [[CASTED:%.+]] = cir.cast(bitcast, [[OVERFLOW_AREA]] : !cir.ptr<!void>)
// CIR: [[CONSTANT:%.+]] = cir.const #cir.int<15>
// CIR: [[PTR_STRIDE:%.+]] = cir.ptr_stride([[CASTED]] {{.*}}[[CONSTANT]]
// CIR: [[MINUS_ALIGN:%.+]] = cir.const #cir.int<-16>
// CIR: [[ALIGNED:%.+]] = cir.ptr_mask([[PTR_STRIDE]], [[MINUS_ALIGN]]
// CIR: [[ALIGN:%.+]] = cir.const #cir.int<16>
// CIR: [[CAST_ALIGNED:%.+]] = cir.cast(bitcast, [[ALIGNED]] : !cir.ptr<!u8i>), !cir.ptr<!cir.long_double<!cir.f80>>
// CIR: [[CAST_ALIGNED_VALUE:%.+]] = cir.load [[CAST_ALIGNED]]
// CIR: cir.store [[CAST_ALIGNED_VALUE]], [[RES]]
// CIR. cir.via.end