Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Prepare adding shacl import #148

Draft
wants to merge 2 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion docs/introduction.rst
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,11 @@ Importing from alternative modeling frameworks
See :ref:`importers`

* OWL (but this only works for schema-style OWL)
* SHACL (in progress)
* JSON-Schema
* SQL DDL

In future other frameworks will be supported
In future other frameworks will be supported.

Annotating schemas
------------------
Expand Down
10 changes: 10 additions & 0 deletions docs/packages/importers.rst
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,16 @@ Use robot to convert ahead of time:
robot convert -i schemaorg.ttl -o schemaorg.ofn
schemauto import-owl schemaorg.ofn

Importing from SHACL
--------------------

You can import from a SHACL shapes file.

.. code-block::

schemauto import-shacl tests/resources/test_shacl_simple.ttl


Importing from SQL
------------------

Expand Down
28 changes: 28 additions & 0 deletions schema_automator/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -497,6 +497,34 @@ def import_rdfs(rdfsfile, output, metamodel_mappings, **args):
schema = sie.convert(rdfsfile, **args)
write_schema(schema, output)

@main.command()
@click.argument('shaclfile')
@output_option
@schema_name_option
@click.option('--input-type', '-I',
default='turtle',
help="Input format, eg. turtle")
@click.option('--identifier', '-I', help="Slot to use as identifier")
@click.option('--model-uri', help="Model URI prefix")
@click.option('--metamodel-mappings',
help="Path to metamodel mappings YAML dictionary")
@click.option('--output', '-o', help="Path to saved yaml schema")
def import_shacl(shaclfile, output, metamodel_mappings, **args):
"""
Import an SHACL profile to LinkML

Example:

schemauto import-shacl mymodel.shacl.ttl -o mymodel.yaml
"""
mappings_obj = None
if metamodel_mappings:
with open(metamodel_mappings) as f:
mappings_obj = yaml.safe_load(f)
sie = ShaclImportEngine(initial_metamodel_mappings=mappings_obj)
schema = sie.convert(shaclfile, **args)
write_schema(schema, output)

@main.command()
@click.argument('rdffile')
@output_option
Expand Down
1 change: 1 addition & 0 deletions schema_automator/importers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,4 @@
from schema_automator.importers.dosdp_import_engine import DOSDPImportEngine
from schema_automator.importers.frictionless_import_engine import FrictionlessImportEngine
from schema_automator.importers.cadsr_import_engine import CADSRImportEngine
from schema_automator.importers.shacl_import_engine import ShaclImportEngine
227 changes: 227 additions & 0 deletions schema_automator/importers/shacl_import_engine.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,227 @@
import logging

from linkml.utils.schema_builder import SchemaBuilder
from linkml_runtime import SchemaView
from linkml_runtime.linkml_model import (
SchemaDefinition,
SlotDefinition,
ClassDefinition,
)


HTTP_SDO = Namespace("http://schema.org/")

DEFAULT_METAMODEL_MAPPINGS = {
"is_a": [RDFS.subClassOf, SKOS.broader],
"domain_of": [HTTP_SDO.domainIncludes, SDO.domainIncludes],
"rangeIncludes": [HTTP_SDO.rangeIncludes, SDO.rangeIncludes],
"exact_mappings": [OWL.sameAs, HTTP_SDO.sameAs],
ClassDefinition.__name__: [RDFS.Class, OWL.Class, SKOS.Concept],
SlotDefinition.__name__: [
RDF.Property,
OWL.ObjectProperty,
OWL.DatatypeProperty,
OWL.AnnotationProperty,
],
}


@dataclass
class ShaclImportEngine(ImportEngine):
"""
An ImportEngine that takes SHACL and converts it to a LinkML schema
"""

mappings: dict = None
initial_metamodel_mappings: Dict[str, List[URIRef]] = None
metamodel_mappings: Dict[str, List[URIRef]] = None
reverse_metamodel_mappings: Dict[URIRef, List[str]] = None
include_unmapped_annotations = False
metamodel = None
metamodel_schemaview: SchemaView = None
classdef_slots: List[str] = None

def __post_init__(self):
sv = package_schemaview("linkml_runtime.linkml_model.meta")
self.metamodel_schemaview = sv
self.metamodel = sv
self.metamodel_mappings = defaultdict(list)
self.reverse_metamodel_mappings = defaultdict(list)
for k, vs in DEFAULT_METAMODEL_MAPPINGS.items():
self.metamodel_mappings[k].extend(vs)
for v in vs:
self.reverse_metamodel_mappings[v].append(k)
if self.initial_metamodel_mappings:
for k, vs in self.initial_metamodel_mappings.items():
if not isinstance(vs, list):
vs = [vs]
self.metamodel_mappings[k].extend(vs)
for v in vs:
self.reverse_metamodel_mappings[URIRef(v)].append(k)
logging.info(f"Adding mapping {k} -> {v}")
for e in sv.all_elements().values():
mappings = []
for ms in sv.get_mappings(e.name, expand=True).values():
for m in ms:
uri = URIRef(m)
mappings.append(uri)
self.reverse_metamodel_mappings[uri].append(e.name)
self.metamodel_mappings[e.name] = mappings
self.defclass_slots = [s.name for s in sv.class_induced_slots(ClassDefinition.class_name)]

def convert(
self,
file: str,
name: str = None,
format="turtle",
default_prefix: str = None,
model_uri: str = None,
identifier: str = None,
**kwargs,
) -> SchemaDefinition:
"""
Converts an OWL schema-style ontology

:param file:
:param name:
:param model_uri:
:param identifier:
:param kwargs:
:return:
"""
self.mappings = {}
g = Graph()
g.parse(file, format=format)
if name is not None and default_prefix is None:
default_prefix = name
if name is None:
name = default_prefix
if name is None:
name = "example"
sb = SchemaBuilder(name=name)
sb.add_defaults()
schema = sb.schema
for k, v in g.namespaces():
if k == "schema" and v != "http://schema.org/":
continue
sb.add_prefix(k, v, replace_if_present=True)
if default_prefix is not None:
schema.default_prefix = default_prefix
if default_prefix not in schema.prefixes:
sb.add_prefix(default_prefix, model_uri, replace_if_present=True)
schema.id = schema.prefixes[default_prefix].prefix_reference
cls_slots = defaultdict(list)
props = []
for rdfs_property_metaclass in self._rdfs_metamodel_iri(
SlotDefinition.__name__
):
for p in g.subjects(RDF.type, rdfs_property_metaclass):
props.append(p)
# implicit properties
for metap in (
self.reverse_metamodel_mappings["domain_of"]
+ self.reverse_metamodel_mappings["rangeIncludes"]
):
for p, _, _o in g.triples((None, metap, None)):
props.append(p)
for p in set(props):
sn = self.iri_to_name(p)
init_dict = self._dict_for_subject(g, p)
if "domain_of" in init_dict:
for x in init_dict["domain_of"]:
cls_slots[x].append(sn)
del init_dict["domain_of"]
if "rangeIncludes" in init_dict:
init_dict["any_of"] = [{"range": x} for x in init_dict["rangeIncludes"]]
del init_dict["rangeIncludes"]
slot = SlotDefinition(sn, **init_dict)
slot.slot_uri = str(p.n3(g.namespace_manager))
sb.add_slot(slot)
rdfs_classes = []
for rdfs_class_metaclass in self._rdfs_metamodel_iri(ClassDefinition.__name__):
for s in g.subjects(RDF.type, rdfs_class_metaclass):
rdfs_classes.append(s)
# implicit classes
for metap in [RDFS.subClassOf]:
for s, _, o in g.triples((None, metap, None)):
rdfs_classes.append(s)
rdfs_classes.append(o)
for s in set(rdfs_classes):
cn = self.iri_to_name(s)
init_dict = self._dict_for_subject(g, s)
c = ClassDefinition(cn, **init_dict)
c.slots = cls_slots.get(cn, [])
c.class_uri = str(s.n3(g.namespace_manager))
sb.add_class(c)
if identifier is not None:
id_slot = SlotDefinition(identifier, identifier=True, range="uriorcurie")
schema.slots[identifier] = id_slot
for c in schema.classes.values():
if not c.is_a and not c.mixins:
if identifier not in c.slots:
c.slots.append(identifier)
return schema

def _dict_for_subject(self, g: Graph, s: URIRef) -> Dict[str, Any]:
"""
Looks up triples for a subject and converts to dict using linkml keys.

:param g:
:param p:
:return:
"""
init_dict = {}
for pp, obj in g.predicate_objects(s):
if pp == RDF.type:
continue
metaslot_name = self._element_from_iri(pp)
logging.debug(f"Mapping {pp} -> {metaslot_name}")
if metaslot_name not in self.defclass_slots:
continue
if metaslot_name is None:
logging.warning(f"Not mapping {pp}")
continue
if metaslot_name == "name":
metaslot_name = "title"
metaslot = self.metamodel.get_slot(metaslot_name)
v = self._object_to_value(obj, metaslot=metaslot)
metaslot_name_safe = underscore(metaslot_name)
if not metaslot or metaslot.multivalued:
if metaslot_name_safe not in init_dict:
init_dict[metaslot_name_safe] = []
init_dict[metaslot_name_safe].append(v)
else:
init_dict[metaslot_name_safe] = v
return init_dict

def _rdfs_metamodel_iri(self, name: str) -> List[URIRef]:
return self.metamodel_mappings.get(name, [])

def _element_from_iri(self, iri: URIRef) -> str:
r = self.reverse_metamodel_mappings.get(iri, [])
if len(r) > 0:
if len(r) > 1:
logging.debug(f"Multiple mappings for {iri}: {r}")
return r[0]

def _object_to_value(self, obj: Any, metaslot: SlotDefinition = None) -> Any:
if isinstance(obj, URIRef):
if metaslot.range == "uriorcurie" or metaslot.range == "uri":
return str(obj)
return self.iri_to_name(obj)
if isinstance(obj, Literal):
return obj.value
return obj

def iri_to_name(self, v: URIRef) -> str:
n = self._as_name(v)
if n != v:
self.mappings[n] = v
return n

def _as_name(self, v: URIRef):
v = str(v)
for sep in ["#", "/", ":"]:
if sep in v:
return v.split(sep)[-1]
return v
1 change: 1 addition & 0 deletions tests/resources/test_shacl_simple.ttl
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
# tbw
41 changes: 41 additions & 0 deletions tests/test_importers/test_shacl_importer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
import os
import pytest

from linkml_runtime import SchemaView

from schema_automator.importers.shacl_import_engine import ShaclImportEngine
from linkml.generators.yamlgen import YAMLGenerator

from schema_automator.utils.schemautils import write_schema
from tests import INPUT_DIR, OUTPUT_DIR

# TODO - Write tests (this is a copy of test_rdfs_importer)

REPRO = os.path.join(INPUT_DIR, 'reproschema.ttl')
OUTSCHEMA = os.path.join(OUTPUT_DIR, 'reproschema-from-ttl.yaml')



def test_from_shacl():
"""Test Shacl conversion."""
oie = ShaclImportEngine()

return
schema = oie.convert(REPRO, default_prefix='reproschema', identifier='id')
write_schema(schema, OUTSCHEMA)
# roundtrip
s = YAMLGenerator(OUTSCHEMA).serialize()
print(s[0:100])
sv = SchemaView(OUTSCHEMA)
activity = sv.get_class("Activity")
assert activity
assert activity.name == "Activity"
assert activity.is_a == "CreativeWork"
slots = sv.class_induced_slots(activity.name)
assert len(slots) == 1
slot = slots[0]
assert slot.name == "id"




Loading