From f87e80aebac30bc413aa19e027a9d8f3a29f5c78 Mon Sep 17 00:00:00 2001 From: marcelosousa <601882+marcelosousa@users.noreply.github.com> Date: Mon, 18 Apr 2022 15:23:17 +0100 Subject: [PATCH] Fix percentile computation --- data_test.go | 2 +- percentile.go | 32 ++++++------- percentile_test.go | 113 +++++++++++++++++++++++++++++++++++++++++---- 3 files changed, 117 insertions(+), 30 deletions(-) diff --git a/data_test.go b/data_test.go index abfb503..e4e4d24 100644 --- a/data_test.go +++ b/data_test.go @@ -166,7 +166,7 @@ func assertPercentiles(fn func(i float64) (float64, error), i float64, f float64 } func TestPercentileMethods(t *testing.T) { - assertPercentiles(data1.Percentile, 75, 4.2, t) + assertPercentiles(data1.Percentile, 75, 4.4, t) assertPercentiles(data1.PercentileNearestRank, 75, 4.2, t) } diff --git a/percentile.go b/percentile.go index f564178..f4248e2 100644 --- a/percentile.go +++ b/percentile.go @@ -11,39 +11,33 @@ func Percentile(input Float64Data, percent float64) (percentile float64, err err return math.NaN(), EmptyInputErr } - if length == 1 { - return input[0], nil - } - - if percent <= 0 || percent > 100 { + if percent < 0 || percent > 100 { return math.NaN(), BoundsErr } // Start by sorting a copy of the slice c := sortedCopy(input) - // Multiply percent by length of input - index := (percent / 100) * float64(len(c)) + // Calculate rank + rank := (percent / 100) * float64(len(c)-1) - // Check if the index is a whole number - if index == float64(int64(index)) { + // Convert float to int + ri := int(rank) - // Convert float to int - i := int(index) + // Check if the index is a whole number + if rank == float64(ri) { // Find the value at the index - percentile = c[i-1] + percentile = c[ri] - } else if index > 1 { + } else { - // Convert float to int via truncation - i := int(index) + // Calculate the fractional part of the rank + rf := rank - float64(ri) - // Find the average of the index and following values - percentile, _ = Mean(Float64Data{c[i-1], c[i]}) + // Interpolate + percentile = c[ri] + rf*(c[ri+1]-c[ri]) - } else { - return math.NaN(), BoundsErr } return percentile, nil diff --git a/percentile_test.go b/percentile_test.go index 75d0240..2d49a5a 100644 --- a/percentile_test.go +++ b/percentile_test.go @@ -1,12 +1,104 @@ package stats_test import ( + "math" "reflect" "testing" "github.com/montanaflynn/stats" ) +func round(value float64) float64 { + return math.Round(value*100) / 100 +} + +func TestPercentileOneToTen(t *testing.T) { + m, _ := stats.Percentile([]float64{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, 0) + if m != 1 { + t.Errorf("%.1f != %.1f", m, 1.0) + } + m, _ = stats.Percentile([]float64{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, 5) + if m != 1.45 { + t.Errorf("%.2f != %.2f", m, 1.45) + } + m, _ = stats.Percentile([]float64{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, 10) + if m != 1.9 { + t.Errorf("%.1f != %.1f", m, 1.9) + } + m, _ = stats.Percentile([]float64{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, 15) + if round(m) != 2.35 { + t.Errorf("%.2f != %.2f", m, 2.35) + } + m, _ = stats.Percentile([]float64{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, 20) + if m != 2.8 { + t.Errorf("%.1f != %.1f", m, 2.8) + } + m, _ = stats.Percentile([]float64{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, 25) + if m != 3.25 { + t.Errorf("%.1f != %.1f", m, 3.25) + } + m, _ = stats.Percentile([]float64{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, 30) + if round(m) != 3.7 { + t.Errorf("%.1f != %.1f", m, 3.7) + } + m, _ = stats.Percentile([]float64{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, 35) + if m != 4.15 { + t.Errorf("%.1f != %.1f", m, 4.15) + } + m, _ = stats.Percentile([]float64{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, 40) + if m != 4.6 { + t.Errorf("%.1f != %.1f", m, 4.6) + } + m, _ = stats.Percentile([]float64{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, 45) + if m != 5.05 { + t.Errorf("%.1f != %.1f", m, 5.05) + } + m, _ = stats.Percentile([]float64{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, 50) + if m != 5.5 { + t.Errorf("%.1f != %.1f", m, 5.5) + } + m, _ = stats.Percentile([]float64{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, 55) + if m != 5.95 { + t.Errorf("%.1f != %.1f", m, 5.95) + } + m, _ = stats.Percentile([]float64{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, 60) + if round(m) != 6.4 { + t.Errorf("%.1f != %.1f", m, 6.4) + } + m, _ = stats.Percentile([]float64{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, 65) + if round(m) != 6.85 { + t.Errorf("%.1f != %.1f", m, 6.85) + } + m, _ = stats.Percentile([]float64{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, 70) + if m != 7.3 { + t.Errorf("%.1f != %.1f", m, 7.3) + } + m, _ = stats.Percentile([]float64{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, 75) + if m != 7.75 { + t.Errorf("%.1f != %.1f", m, 7.75) + } + m, _ = stats.Percentile([]float64{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, 80) + if m != 8.2 { + t.Errorf("%.1f != %.1f", m, 8.2) + } + m, _ = stats.Percentile([]float64{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, 85) + if round(m) != 8.65 { + t.Errorf("%.1f != %.1f", m, 8.65) + } + m, _ = stats.Percentile([]float64{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, 90) + if m != 9.1 { + t.Errorf("%.1f != %.1f", m, 9.1) + } + m, _ = stats.Percentile([]float64{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, 95) + if round(m) != 9.55 { + t.Errorf("%.1f != %.1f", m, 9.55) + } + m, _ = stats.Percentile([]float64{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, 100) + if m != 10.0 { + t.Errorf("%.1f != %.1f", m, 10.0) + } +} + func TestPercentile(t *testing.T) { m, _ := stats.Percentile([]float64{43, 54, 56, 61, 62, 66}, 90) if m != 64.0 { @@ -17,12 +109,13 @@ func TestPercentile(t *testing.T) { t.Errorf("%.1f != %.1f", m, 43.0) } m, _ = stats.Percentile([]float64{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, 50) - if m != 5.0 { - t.Errorf("%.1f != %.1f", m, 5.0) + median, _ := stats.Median([]float64{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}) + if m != median || m != 5.5 { + t.Errorf("%.1f != %.1f", m, 5.5) } m, _ = stats.Percentile([]float64{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, 99.9) - if m != 9.5 { - t.Errorf("%.1f != %.1f", m, 9.5) + if round(m) != 9.99 { + t.Errorf("%.2f != %.2f", m, 9.91) } m, _ = stats.Percentile([]float64{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, 100) if m != 10.0 { @@ -32,13 +125,13 @@ func TestPercentile(t *testing.T) { if err != stats.EmptyInputErr { t.Errorf("Empty slice didn't return expected error; got %v", err) } - _, err = stats.Percentile([]float64{1, 2, 3, 4, 5}, 0) - if err != stats.BoundsErr { - t.Errorf("Zero percent didn't return expected error; got %v", err) + m, err = stats.Percentile([]float64{1, 2, 3, 4, 5}, 0) + if m != 1.0 { + t.Errorf("%.1f != %.1f", m, 1.0) } - _, err = stats.Percentile([]float64{1, 2, 3, 4, 5}, 0.13) - if err != stats.BoundsErr { - t.Errorf("Too low percent didn't return expected error; got %v", err) + m, err = stats.Percentile([]float64{1, 2, 3, 4, 5}, 0.13) + if round(m) != 1.01 { + t.Errorf("%.2f != %.2f", m, 1.0) } _, err = stats.Percentile([]float64{1, 2, 3, 4, 5}, 101) if err != stats.BoundsErr {