Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Convert IRI -> CURIE using bioregistry while parse-ing a KGCL command. #6

Merged
merged 9 commits into from
Aug 16, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,4 @@ __pycache__
.pytest_cache
.idea
tmp/
tests/outputs/*
112 changes: 107 additions & 5 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ readme = "README.md"
python = "^3.8"
linkml-runtime = "^1.1.24"
lark = "^1.1.2"
bioregistry = "^0.5.49"

[tool.poetry.dev-dependencies]
linkml = "^1.2.15"
Expand Down
30 changes: 22 additions & 8 deletions src/kgcl_schema/grammar/parser.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,24 @@
"""KGCL parser."""
import logging
import re
import sys
from pathlib import Path
from typing import List

import click
from kgcl_schema.utils import to_json, to_rdf, to_yaml
from lark import Lark, Token

from kgcl_schema.datamodel.kgcl import (ClassCreation, EdgeCreation, EdgeDeletion,
NewSynonym, NodeAnnotationChange, NodeCreation,
from bioregistry import parse_iri, get_preferred_prefix, curie_to_str
from kgcl_schema.datamodel.kgcl import (Change, ClassCreation, EdgeCreation,
EdgeDeletion, NewSynonym,
NodeAnnotationChange, NodeCreation,
NodeDeepening, NodeDeletion, NodeMove,
NodeObsoletion, NodeRename, NodeShallowing,
NodeUnobsoletion, PlaceUnder, PredicateChange,
RemovedNodeFromSubset, RemoveUnder, Change, Session)
NodeObsoletion, NodeRename,
NodeShallowing, NodeUnobsoletion,
PlaceUnder, PredicateChange,
RemovedNodeFromSubset, RemoveUnder,
Session)
from kgcl_schema.datamodel.ontology_model import Edge
from kgcl_schema.utils import to_json, to_rdf, to_yaml
from lark import Lark, Token


def id_generator():
Expand Down Expand Up @@ -54,6 +58,16 @@ def parse_statement(input: str) -> Change:

Return an instantiated dataclass object from model.kgcl_schema.
"""
regex = r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+'
uri_list = re.findall(regex, input)
if uri_list:
# curie = curie_from_iri(uri[0].replace("<", "").replace(">",""))
for _, uri in enumerate(uri_list):
pref, i = parse_iri(uri)
pref = get_preferred_prefix(pref)
curie = curie_to_str(pref, i)
input = input.replace(uri, curie)

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It seems very hacky to replace URIs with CURIEs just so the KGCL parser works. Why is that necessary?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When adding synonyms, apparently only CURIEs are expected and ehnce the conversion from URI => CURIE. Just a judgement call, there could be a better way of handling this. Ideas?

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I dont know how that parser works, but if CURIES are expected, wouldn't it be better to make sure the input is "valid kgcl" using a validator and not trying to hack anything? The user can be forced to provide CURIEs if that is what the spec wants..

In any case, I would for sure factor out the method def replace_uris_with_curies_in_kgcl_command(input) into its how, document it etc. Not convinced this is great but if its necessary to be parseable than so be it.

tree = kgcl_parser.parse(input)
id = "kgcl_change_id_" + str(next(id_gen))

Expand Down
4 changes: 2 additions & 2 deletions src/kgcl_schema/grammar/render_operations.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,13 @@ def render_entity(entity, rdf_type):
if rdf_type is None:
return entity
elif rdf_type == "uri":
return "<" + entity + ">"
return entity
elif rdf_type == "label":
if "'" in entity:
# TODO: replacing quotes with backticks
# is only a temporary workaround
entity = entity.replace("'", "`")
return "'" + entity + "'"
return entity
elif rdf_type == "literal":
# TODO: test this
if '"' not in entity:
Expand Down
22 changes: 11 additions & 11 deletions tests/cases.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,21 +6,21 @@

TODO_TOKEN = "TODO"
PART_OF = "BFO:0000050"
PART_OF_URI = "<http://purl.obolibrary.org/obo/BFO_0000050>"
PART_OF_URI = "http://purl.obolibrary.org/obo/BFO_0000050"
IS_A = "rdfs:subClassOf"
IS_A_URI = "<http://www.w3.org/2000/01/rdf-schema#subClassOf>"
IS_A_URI = "http://www.w3.org/2000/01/rdf-schema#subClassOf"
NUCLEUS = "GO:0005634"
NUCLEUS_URI = "<http://purl.obolibrary.org/obo/GO_0005634>"
NUCLEUS_URI = "http://purl.obolibrary.org/obo/GO_0005634"
MITOCHONDRION = "GO:0005739"
MITOCHONDRION_URI = "<http://purl.obolibrary.org/obo/GO_0005739>"
MITOCHONDRION_URI = "http://purl.obolibrary.org/obo/GO_0005739"
IMBO = "GO:0043231"
IMBO_URI = "<http://purl.obolibrary.org/obo/GO_0043231>"
IMBO_URI = "http://purl.obolibrary.org/obo/GO_0043231"
NUCLEAR_ENVELOPE = "GO:0005635"
NUCLEAR_ENVELOPE_URI = "<http://purl.obolibrary.org/obo/GO_0005635>"
NUCLEAR_ENVELOPE_URI = "http://purl.obolibrary.org/obo/GO_0005635"
NEW_TERM = "GO:9999999"
NEW_TERM_URI = "<http://purl.obolibrary.org/obo/GO_9999999>"
NEW_TERM_URI = "http://purl.obolibrary.org/obo/GO_9999999"
RESPONSE_TO_UV = "GO:0009411"
RESPONSE_TO_UV_URI = "<http://purl.obolibrary.org/obo/GO_0009411>"
RESPONSE_TO_UV_URI = "http://purl.obolibrary.org/obo/GO_0009411"

UID = "CHANGE:001"
TERM = "GO:123"
Expand Down Expand Up @@ -124,10 +124,10 @@
#f"create node {NEW_TERM_URI} 'foo'",
TODO_TOKEN,
NodeCreation(id=UID,
node_id=NEW_TERM_URI, ## TODO: remove this
about_node=NEW_TERM_URI,
node_id=NEW_TERM, ## TODO: remove this
about_node=NEW_TERM,
name="'foo'",
about_node_representation='uri'),
about_node_representation='curie'),
None
),
(
Expand Down