UI for similar datasets

amercader · Jan 22, 2024 · 2267945 · 2267945
1 parent 4c3413b
commit 2267945
Show file tree

Hide file tree

Showing 6 changed files with 51 additions and 10 deletions.
diff --git a/README.md b/README.md
@@ -38,7 +38,7 @@ datasets to a "Bathing Water Quality" one, besides other datasets explicitly men
 metadata you'll get others that might include things like "Wastewater Treatment", "Aquaculture Sites" or "Lakes".
 
 The plugin adds a `package_similar_show` action that will return the closest datasets to the one provided with
-the `id` parameter (id or name). 5 are returned by default, which can be changed using the `limit` paramater.
+the `id` parameter (id or name). 5 are returned by default, which can be changed using the `limit` parameter.
 
 
 #### 2. Semantic search
@@ -84,10 +84,6 @@ ckanapi action package_search q=boats extras='{"ext_vector_search":"true"}' | jq
 
 Remember that the Semantic Search will always return a fixed number of datasets (the default in this case is 10).
 
-
-
-
-
 ## Requirements
 
 Tested on CKAN 2.10/master
@@ -132,7 +128,7 @@ TODO
 ## Customizing
 
 You can choose the backend used to generate the embeddings by settings the `ckanext.embeddings.backend` config option.
-Right now the plugins includes two backends, one that runs locally using  [Sentence Transformers](https://www.sbert.net/)'s `all-MiniLM-L6-v2` model (`sentence_transformers`, the default one) and one that uses OpenAI's Embeddings API (`openai`). You will need
+Right now the plugin includes two backends, one that runs locally using  [Sentence Transformers](https://www.sbert.net/)'s `all-MiniLM-L6-v2` model (`sentence_transformers`, the default one) and one that uses OpenAI's Embeddings API (`openai`). You will need
 to provide an API key for this one, either via the `ckanext.embeddings.openai.api_key` config option or a `OPENAI_API_KEY` env var.
 
 Additionally, it's really easy to provide your own backends. You can write your own class that inherits from
@@ -206,7 +202,7 @@ USER solr
 
 ```
 
-and then in the ini file:
+And then in the ini file you can choose the Solr field used to when indexing/querying:
 
 ```ini
 ckanext.embeddings.solr_vector_field_name = vector

diff --git a/ckanext/embeddings/actions.py b/ckanext/embeddings/actions.py
@@ -6,7 +6,7 @@
 @toolkit.side_effect_free
 def package_similar_show(context, data_dict):
     dataset_id = toolkit.get_or_bust(data_dict, "id")
-    limit = data_dict.get("limit", 5)
+    limit = data_dict.get("limit") or 5
     try:
         limit = int(limit)
     except ValueError:

diff --git a/ckanext/embeddings/assets/style.css b/ckanext/embeddings/assets/style.css
@@ -5,3 +5,11 @@
 .semantic-search-label:after {
   content: '';
 }
+
+.similar-datasets {
+  margin-top: 30px;
+}
+
+.similar-datasets > h3 {
+  margin-bottom: 20px;
+}
diff --git a/ckanext/embeddings/helpers.py b/ckanext/embeddings/helpers.py
@@ -0,0 +1,12 @@
+from ckan.plugins import toolkit
+
+
+def get_similar_datasets(dataset_id, limit=None):
+    try:
+        datasets = toolkit.get_action("package_similar_show")(
+            {}, {"id": dataset_id, "limit": limit}
+        )
+    except toolkit.ObjectNotFound:
+        datasets = []
+
+    return datasets
diff --git a/ckanext/embeddings/plugin.py b/ckanext/embeddings/plugin.py
@@ -6,7 +6,7 @@
 import ckan.plugins.toolkit as toolkit
 
 from ckanext.embeddings.model import DatasetEmbedding
-from ckanext.embeddings import cli
+from ckanext.embeddings import cli, helpers
 from ckanext.embeddings.actions import package_similar_show
 from ckanext.embeddings.auth import package_similar_show as package_similar_show_auth
 from ckanext.embeddings.backends import get_embeddings_backend
@@ -18,6 +18,7 @@ class EmbeddingPlugin(plugins.SingletonPlugin):
     plugins.implements(plugins.IClick)
     plugins.implements(plugins.IActions)
     plugins.implements(plugins.IAuthFunctions)
+    plugins.implements(plugins.ITemplateHelpers)
     plugins.implements(plugins.IPackageController, inherit=True)
 
     backend = None
@@ -46,6 +47,11 @@ def get_actions(self):
     def get_auth_functions(self):
         return {"package_similar_show": package_similar_show_auth}
 
+    # ITemplateHelpers
+
+    def get_helpers(self):
+        return {"embeddings_get_similar_datasets": helpers.get_similar_datasets}
+
     # IDatasetForm
 
     def before_dataset_index(self, dataset_dict):
@@ -70,7 +76,9 @@ def before_dataset_search(self, search_params):
             try:
                 extras = json.loads(extras)
             except ValueError:
-                raise toolkit.ValidationError({"extras": f"Wrong JSON object: {extras}"})
+                raise toolkit.ValidationError(
+                    {"extras": f"Wrong JSON object: {extras}"}
+                )
 
         if not toolkit.asbool(extras.get("ext_vector_search")):
             return search_params

diff --git a/ckanext/embeddings/templates/package/read.html b/ckanext/embeddings/templates/package/read.html
@@ -0,0 +1,17 @@
+{% ckan_extends %}
+
+{% block primary_content_inner %}
+  {{ super() }}
+
+  {% asset "ckanext-embeddings/css" %}
+
+  <section class="similar-datasets">
+    <h3>{{ _('Similar Datasets') }}</h3>
+
+    {% set similar_datasets = h.embeddings_get_similar_datasets(pkg.id) %}
+
+    {{ h.snippet('snippets/package_list.html', packages=similar_datasets) }}
+  </section>
+
+{% endblock %}
+