Skip to content

Commit

Permalink
implement inverse_f_cdf() function
Browse files Browse the repository at this point in the history
  • Loading branch information
svm1 committed Oct 16, 2024
1 parent a976ba5 commit a5dc8a9
Show file tree
Hide file tree
Showing 4 changed files with 74 additions and 0 deletions.
6 changes: 6 additions & 0 deletions velox/docs/functions/presto/math.rst
Original file line number Diff line number Diff line change
Expand Up @@ -341,6 +341,12 @@ Probability Functions: inverse_cdf
probability (p): P(N < n). The a, b parameters must be positive real values (all of type DOUBLE).
The probability p must lie on the interval [0, 1].

.. function:: inverse_f_cdf(df1, df2, p) -> double

Compute the inverse of the F cdf with a given df1 (numerator degrees of freedom) and df2 (denominator degrees of freedom) parameters
for the cumulative probability (p): P(N < n). The numerator and denominator df parameters must be positive real numbers.
The probability p must lie on the interval [0, 1].

.. function:: inverse_weibull_cdf(a, b, p) -> double

Compute the inverse of the Weibull cdf with given parameters ``a``, ``b`` for the probability ``p``.
Expand Down
15 changes: 15 additions & 0 deletions velox/functions/prestosql/Probability.h
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,21 @@ struct InverseBetaCDFFunction {
}
};

template <typename T>
struct InverseFCDFFunction {
VELOX_DEFINE_FUNCTION_TYPES(T);

FOLLY_ALWAYS_INLINE void
call(double& result, double df1, double df2, double p) {
VELOX_USER_CHECK((p >= 0) && (p <= 1), "p must be in the interval [0, 1]");
VELOX_USER_CHECK_GT(df1, 0, "numerator df must be greater than 0");
VELOX_USER_CHECK_GT(df2, 0, "denominator df must be greater than 0");

boost::math::fisher_f_distribution<> dist(df1, df2);
result = boost::math::quantile(dist, p);
}
};

template <typename T>
struct ChiSquaredCDFFunction {
VELOX_DEFINE_FUNCTION_TYPES(T);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,8 @@ void registerProbTrigFunctions(const std::string& prefix) {
{prefix + "f_cdf"});
registerFunction<InverseBetaCDFFunction, double, double, double, double>(
{prefix + "inverse_beta_cdf"});
registerFunction<InverseFCDFFunction, double, double, double, double>(
{prefix + "inverse_f_cdf"});
registerFunction<InverseNormalCDFFunction, double, double, double, double>(
{prefix + "inverse_normal_cdf"});
registerFunction<PoissonCDFFunction, double, double, int32_t>(
Expand Down
51 changes: 51 additions & 0 deletions velox/functions/prestosql/tests/ProbabilityTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -268,6 +268,57 @@ TEST_F(ProbabilityTest, invBetaCDF) {
VELOX_ASSERT_THROW(invBetaCDF(3, 5, 1.1), "p must be in the interval [0, 1]");
}

TEST_F(ProbabilityTest, inverseFCDF) {
const auto inverseFCDF = [&](std::optional<double> df1,
std::optional<double> df2,
std::optional<double> p) {
return evaluateOnce<double>("inverse_f_cdf(c0, c1, c2)", df1, df2, p);
};

EXPECT_EQ(inverseFCDF(2.0, 5.0, 0.0), 0.0);
EXPECT_EQ(inverseFCDF(2.0, 5.0, 0.5), 0.79876977693223561);
EXPECT_EQ(inverseFCDF(2.0, 5.0, 0.9), 3.779716078773951);

EXPECT_EQ(inverseFCDF(2.0, 5.0, std::nullopt), std::nullopt);
EXPECT_EQ(inverseFCDF(2.0, std::nullopt, 3.7797), std::nullopt);
EXPECT_EQ(inverseFCDF(std::nullopt, 5.0, 3.7797), std::nullopt);

EXPECT_EQ(inverseFCDF(kDoubleMax, 5.0, 1), kInf);
EXPECT_EQ(inverseFCDF(1, kDoubleMax, 1), kInf);
EXPECT_EQ(inverseFCDF(82.6, 901.10, 1), kInf);
EXPECT_EQ(inverseFCDF(kDoubleMin, 50.620, 1), kInf);
EXPECT_EQ(
inverseFCDF(kBigIntMax, 5.0, 0.93256230095450132), 3.7797000000000009);
EXPECT_EQ(inverseFCDF(76.901, kBigIntMax, 1), kInf);
EXPECT_EQ(inverseFCDF(2.0, 5.0, 1), kInf);

// Test invalid inputs for df1.
VELOX_ASSERT_THROW(
inverseFCDF(0, 3, 0.5), "numerator df must be greater than 0");
VELOX_ASSERT_THROW(
inverseFCDF(kBigIntMin, 5.0, 0.999),
"numerator df must be greater than 0");

// Test invalid inputs for df2.
VELOX_ASSERT_THROW(
inverseFCDF(3, 0, 0.5), "denominator df must be greater than 0");
VELOX_ASSERT_THROW(
inverseFCDF(2.0, kBigIntMin, 0.0001),
"denominator df must be greater than 0");

// Test invalid inputs for p.
VELOX_ASSERT_THROW(
inverseFCDF(3, 5, -0.1), "p must be in the interval [0, 1]");
VELOX_ASSERT_THROW(
inverseFCDF(2.0, 5.0, kBigIntMin), "p must be in the interval [0, 1]");

// Test a combination of invalid inputs.
VELOX_ASSERT_THROW(
inverseFCDF(-1.2, 0, -0.1), "p must be in the interval [0, 1]");
VELOX_ASSERT_THROW(
inverseFCDF(1, -kInf, -0.1), "p must be in the interval [0, 1]");
}

TEST_F(ProbabilityTest, chiSquaredCDF) {
const auto chiSquaredCDF = [&](std::optional<double> df,
std::optional<double> value) {
Expand Down

0 comments on commit a5dc8a9

Please sign in to comment.