From 09302078a6eb65ddeda117363dd150bcf5f43b22 Mon Sep 17 00:00:00 2001 From: Stefan Vodita <41467371+stefanvodita@users.noreply.github.com> Date: Wed, 29 May 2024 07:55:37 +0100 Subject: [PATCH] Allow users to retrieve counts from taxo association facets (#13414) Add a count field to LabelAndValue --- lucene/CHANGES.txt | 2 ++ .../demo/facet/AssociationsFacetsExample.java | 8 ++++++++ .../org/apache/lucene/facet/LabelAndValue.java | 13 ++++++++++++- .../lucene/facet/taxonomy/TaxonomyFacets.java | 8 ++++++-- .../org/apache/lucene/facet/FacetTestCase.java | 16 ++++++++++++++++ .../taxonomy/TestTaxonomyFacetAssociations.java | 2 ++ 6 files changed, 46 insertions(+), 3 deletions(-) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 2f42f3695846..2bfe78fafe95 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -44,6 +44,8 @@ New Features * GITHUB#13181: Add new VectorScorer interface to vector value iterators. This allows for vector codecs to supply simpler and more optimized vector scoring when iterating vector values directly. (Ben Trent) +* GITHUB#13414: Counts are always available in the result when using taxonomy facets. (Stefan Vodita) + Improvements --------------------- diff --git a/lucene/demo/src/java/org/apache/lucene/demo/facet/AssociationsFacetsExample.java b/lucene/demo/src/java/org/apache/lucene/demo/facet/AssociationsFacetsExample.java index 22727a66d693..f3982d0b5176 100644 --- a/lucene/demo/src/java/org/apache/lucene/demo/facet/AssociationsFacetsExample.java +++ b/lucene/demo/src/java/org/apache/lucene/demo/facet/AssociationsFacetsExample.java @@ -26,6 +26,7 @@ import org.apache.lucene.facet.Facets; import org.apache.lucene.facet.FacetsCollector; import org.apache.lucene.facet.FacetsConfig; +import org.apache.lucene.facet.LabelAndValue; import org.apache.lucene.facet.taxonomy.AssociationAggregationFunction; import org.apache.lucene.facet.taxonomy.FloatAssociationFacetField; import org.apache.lucene.facet.taxonomy.IntAssociationFacetField; @@ -165,5 +166,12 @@ public static void main(String[] args) throws Exception { List results = new AssociationsFacetsExample().runSumAssociations(); System.out.println("tags: " + results.get(0)); System.out.println("genre: " + results.get(1)); + System.out.println("-------------------------"); + System.out.println("Counts per label are also available:"); + for (FacetResult facetResult : results) { + for (LabelAndValue lv : facetResult.labelValues) { + System.out.println("\t" + lv.label + ": " + lv.count); + } + } } } diff --git a/lucene/facet/src/java/org/apache/lucene/facet/LabelAndValue.java b/lucene/facet/src/java/org/apache/lucene/facet/LabelAndValue.java index 29de9761120d..6557a06081e5 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/LabelAndValue.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/LabelAndValue.java @@ -24,10 +24,21 @@ public final class LabelAndValue { /** Value associated with this label. */ public final Number value; - /** Sole constructor. */ + /** Number of occurrences for this label. */ + public final int count; + + /** Constructor with unspecified count, we assume the value is a count. */ public LabelAndValue(String label, Number value) { this.label = label; this.value = value; + this.count = value.intValue(); + } + + /** Constructor with value and count. */ + public LabelAndValue(String label, Number value, int count) { + this.label = label; + this.value = value; + this.count = count; } @Override diff --git a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/TaxonomyFacets.java b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/TaxonomyFacets.java index 0d5d93c9512a..092f607f1fe5 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/TaxonomyFacets.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/TaxonomyFacets.java @@ -379,7 +379,9 @@ private FacetResult createFacetResult( // add 1 here to also account for the dim: int childComponentIdx = path.length + 1; for (int i = 0; i < labelValues.length; i++) { - labelValues[i] = new LabelAndValue(bulkPath[i].components[childComponentIdx], values[i]); + labelValues[i] = + new LabelAndValue( + bulkPath[i].components[childComponentIdx], values[i], getCount(ordinals[i])); } return new FacetResult( @@ -455,7 +457,9 @@ public FacetResult getAllChildren(String dim, String... path) throws IOException LabelAndValue[] labelValues = new LabelAndValue[ordValues.size()]; for (int i = 0; i < ordValues.size(); i++) { - labelValues[i] = new LabelAndValue(bulkPath[i].components[cp.length], ordValues.get(i)); + labelValues[i] = + new LabelAndValue( + bulkPath[i].components[cp.length], ordValues.get(i), getCount(ordinals.get(i))); } return new FacetResult(dim, path, aggregatedValue, labelValues, ordinals.size()); } diff --git a/lucene/facet/src/test/org/apache/lucene/facet/FacetTestCase.java b/lucene/facet/src/test/org/apache/lucene/facet/FacetTestCase.java index ebb0b7c3b265..e01fde20df67 100644 --- a/lucene/facet/src/test/org/apache/lucene/facet/FacetTestCase.java +++ b/lucene/facet/src/test/org/apache/lucene/facet/FacetTestCase.java @@ -303,4 +303,20 @@ protected void assertFacetResult( // assert children equal with no assumption of the children ordering assertTrue(Arrays.asList(result.labelValues).containsAll(Arrays.asList(expectedChildren))); } + + protected void assertFacetResult( + FacetResult result, + String expectedDim, + String[] expectedPath, + int expectedChildCount, + Number expectedValue, + Map countPerLabel, + LabelAndValue... expectedChildren) { + assertFacetResult( + result, expectedDim, expectedPath, expectedChildCount, expectedValue, expectedChildren); + assertEquals(result.labelValues.length, countPerLabel.size()); + for (LabelAndValue lv : result.labelValues) { + assertEquals(lv.count, (int) countPerLabel.get(lv.label)); + } + } } diff --git a/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/TestTaxonomyFacetAssociations.java b/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/TestTaxonomyFacetAssociations.java index 3db906eb05c8..29e7daed6d73 100644 --- a/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/TestTaxonomyFacetAssociations.java +++ b/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/TestTaxonomyFacetAssociations.java @@ -225,6 +225,7 @@ public void testIntSumAssociation() throws Exception { new String[0], 2, -1, + Map.of("a", 100, "b", 50), new LabelAndValue[] { new LabelAndValue("a", 200), new LabelAndValue("b", 150), }); @@ -306,6 +307,7 @@ public void testFloatSumAssociation() throws Exception { new String[0], 2, -1f, + Map.of("a", 100, "b", 50), new LabelAndValue[] { new LabelAndValue("a", 50.0f), new LabelAndValue("b", 9.999995f), });