diff --git a/.gitignore b/.gitignore index 8e0b33eff0..6451bd59f9 100644 --- a/.gitignore +++ b/.gitignore @@ -197,7 +197,9 @@ composite-metazoan.* composite-metazoan-basic.* composite-vertebrate.* composite-vertebrate-basic.* +composite-lifestages.* collected-metazoan.* +collected-lifestages.* ext.json ext.obo ext.owl diff --git a/src/ontology/Makefile b/src/ontology/Makefile index 1da07760ef..dce85f2588 100644 --- a/src/ontology/Makefile +++ b/src/ontology/Makefile @@ -1,7 +1,7 @@ # ---------------------------------------- # Makefile for uberon # Generated using ontology-development-kit -# ODK Version: v1.5.2 +# ODK Version: v1.5.4 # ---------------------------------------- # IMPORTANT: DO NOT EDIT THIS FILE. To override default make goals, use uberon.Makefile instead @@ -10,7 +10,7 @@ # More information: https://github.com/INCATools/ontology-development-kit/ # Fingerprint of the configuration file when this Makefile was last generated -CONFIG_HASH= 60e47a069bd32a71a48c480dcf88b75d116c5eef7d1f9a55073500ff79968b28 +CONFIG_HASH= 2280340b1810b2a972296e0c511d5e87711793855300136263ac0b8801de9cf3 # ---------------------------------------- @@ -47,7 +47,7 @@ REPORT_PROFILE_OPTS = --profile $(ROBOT_PROFILE) OBO_FORMAT_OPTIONS = SPARQL_VALIDATION_CHECKS = equivalent-classes owldef-self-reference illegal-annotation-property taxon-range orcid-contributor obsolete-replaced_by xrefs-mesh-pattern label-synonym-polysemy id-format SPARQL_EXPORTS = basic-report -ODK_VERSION_MAKEFILE = v1.5.2 +ODK_VERSION_MAKEFILE = v1.5.4 TODAY ?= $(shell date +%Y-%m-%d) OBODATE ?= $(shell date +'%d:%m:%Y %H:%M') @@ -65,13 +65,13 @@ PATTERN_RELEASE_FILES= $(PATTERNDIR)/definitions.owl $(PATTERNDIR)/pattern. MAPPINGDIR= ../mappings MAPPING_TESTER= sssom validate SSSOMPY= sssom -MAPPINGS= fbbt cl biomappings uberon-local uberon import-corrections +MAPPINGS= fbbt cl sslso biomappings uberon-local uberon import-corrections MAPPING_RELEASE_FILES= $(foreach n,$(MAPPINGS), $(MAPPINGDIR)/$(n).sssom.tsv) FORMATS = $(sort owl obo json owl) FORMATS_INCL_TSV = $(sort $(FORMATS) tsv) -RELEASE_ARTEFACTS = $(sort $(ONT)-base $(ONT)-full $(ONT)-simple $(ONT)-basic collected-metazoan composite-metazoan composite-metazoan-basic composite-vertebrate composite-vertebrate-basic common-anatomy ) +RELEASE_ARTEFACTS = $(sort $(ONT)-base $(ONT)-full $(ONT)-simple $(ONT)-basic collected-metazoan composite-metazoan composite-metazoan-basic composite-vertebrate composite-vertebrate-basic collected-lifestages composite-lifestages common-anatomy ) ifeq ($(ODK_DEBUG),yes) ODK_DEBUG_FILE = debug.log @@ -154,7 +154,7 @@ $(ROBOT_PLUGINS_DIRECTORY)/%.jar: # Specific rules for supplementary plugins defined in configuration $(ROBOT_PLUGINS_DIRECTORY)/uberon.jar: - curl -L -o $@ https://github.com/gouttegd/uberon-robot-plugin/releases/download/uberon-robot-plugin-0.3.1/uberon.jar + curl -L -o $@ https://github.com/gouttegd/uberon-robot-plugin/releases/download/uberon-robot-plugin-0.3.2/uberon.jar # ---------------------------------------- @@ -202,7 +202,7 @@ all_subsets: $(SUBSET_FILES) # ---------------------------------------- -MAPPINGS = fbbt cl biomappings uberon-local uberon import-corrections +MAPPINGS = fbbt cl sslso biomappings uberon-local uberon import-corrections MAPPING_FILES = $(patsubst %, $(MAPPINGDIR)/%.sssom.tsv, $(MAPPINGS)) @@ -756,6 +756,10 @@ $(MAPPINGDIR)/fbbt.sssom.tsv: $(MAPPINGDIR)/cl.sssom.tsv: test -f $@ +# This mappingset is manually curated, so we only check that the file actually exists. +$(MAPPINGDIR)/sslso.sssom.tsv: + test -f $@ + # This mappingset is manually curated, so we only check that the file actually exists. $(MAPPINGDIR)/biomappings.sssom.tsv: test -f $@ @@ -836,6 +840,18 @@ composite-vertebrate-basic.json: composite-vertebrate-basic.owl $(ROBOT) annotate --input $< --ontology-iri $(ONTBASE)/$@ $(ANNOTATE_ONTOLOGY_VERSION) \ convert --check false -f json -o $@.tmp.json &&\ mv $@.tmp.json $@ +collected-lifestages.obo: collected-lifestages.owl + $(ROBOT) convert --input $< --check false -f obo $(OBO_FORMAT_OPTIONS) -o $@.tmp.obo && grep -v ^owl-axioms $@.tmp.obo > $@ && rm $@.tmp.obo +collected-lifestages.json: collected-lifestages.owl + $(ROBOT) annotate --input $< --ontology-iri $(ONTBASE)/$@ $(ANNOTATE_ONTOLOGY_VERSION) \ + convert --check false -f json -o $@.tmp.json &&\ + mv $@.tmp.json $@ +composite-lifestages.obo: composite-lifestages.owl + $(ROBOT) convert --input $< --check false -f obo $(OBO_FORMAT_OPTIONS) -o $@.tmp.obo && grep -v ^owl-axioms $@.tmp.obo > $@ && rm $@.tmp.obo +composite-lifestages.json: composite-lifestages.owl + $(ROBOT) annotate --input $< --ontology-iri $(ONTBASE)/$@ $(ANNOTATE_ONTOLOGY_VERSION) \ + convert --check false -f json -o $@.tmp.json &&\ + mv $@.tmp.json $@ common-anatomy.obo: common-anatomy.owl $(ROBOT) convert --input $< --check false -f obo $(OBO_FORMAT_OPTIONS) -o $@.tmp.obo && grep -v ^owl-axioms $@.tmp.obo > $@ && rm $@.tmp.obo common-anatomy.json: common-anatomy.owl @@ -932,6 +948,12 @@ composite-vertebrate.owl: composite-vertebrate-basic.owl: echo "ERROR: You have configured a custom release artefact ($@); this release artefact needs to be define in uberon.Makefile!" && false +collected-lifestages.owl: + echo "ERROR: You have configured a custom release artefact ($@); this release artefact needs to be define in uberon.Makefile!" && false + +composite-lifestages.owl: + echo "ERROR: You have configured a custom release artefact ($@); this release artefact needs to be define in uberon.Makefile!" && false + common-anatomy.owl: echo "ERROR: You have configured a custom release artefact ($@); this release artefact needs to be define in uberon.Makefile!" && false # ---------------------------------------- diff --git a/src/ontology/bridge/bridges.dispatch b/src/ontology/bridge/bridges.dispatch index 580768722d..0514d7562d 100644 --- a/src/ontology/bridge/bridges.dispatch +++ b/src/ontology/bridge/bridges.dispatch @@ -253,3 +253,6 @@ dc-contributor: Melissa Haendel [zfs-uberon] file: uberon-bridge-to-zfs.owl + +[sslso-uberon] +file: uberon-bridge-to-sslso.owl diff --git a/src/ontology/bridge/bridges.rules b/src/ontology/bridge/bridges.rules new file mode 100644 index 0000000000..7b7a41cc9d --- /dev/null +++ b/src/ontology/bridge/bridges.rules @@ -0,0 +1,125 @@ +# SSSOM is OBO-agnostic and doesn't know about the implicit OBO prefix +# rule (PFX -> http://purl.obolibrary.org/obo/PFX_); *all* prefixes must +# be explicitly declared. +prefix AEO: +prefix BFO: +prefix BSPO: +prefix CARO: +prefix CL: +prefix GO: +prefix IAO: +prefix NCBITaxon: +prefix NCIT: +prefix OG: +prefix OGES: +prefix OIO: +prefix RO: +prefix SCTID: +prefix UBERON: +%INSERT-TAX-SPECIFIC-PREFIXES + +# Uberon may not contain declarations for all the taxa we bridge to, so +# to be on the safe side we declare all of them preventively. +%INSERT-TAX-SPECIFIC-DIRECTIVES + +# The relation to use in the existential restrictions between Uberon/CL +# terms and their taxon-specific equivalents: part_of (BFO:0000050) for +# most cases, except for life stage terms where we use occurs_in +# (BFO:0000066) instead. +set_var("TAXREL", BFO:0000050); +is_a(%{object_id}, UBERON:0000104) -> set_var("TAXREL", BFO:0000066); +is_a(%{object_id}, UBERON:0000105) -> set_var("TAXREL", BFO:0000066); + +# Make sure UBERON and CL classes are on the object side. +subject==UBERON:* || subject==CL:* -> invert(); + +# Ignore any mapping to something else than UBERON or CL. +!(object==UBERON:* || object==CL:*) -> stop(); + +# Ignore any mapping to an inexistent or obsolete UBERON/CL class. +!exists(%{object_id}) -> stop(); + +# Do not allow a same foreign term to be mapped with more than one UBERON/CL class. +!cardinality==*:1 -> stop(); + +# Ignore any mapping with a predicate other than those four. +!(predicate==skos:exactMatch + || predicate==skos:narrowMatch + || predicate==skos:broadMatch + || predicate==semapv:crossSpeciesExactMatch) -> stop(); + +# Generate cross-reference annotations (for backward compatibility). +[xrefs] predicate==* -> annotate(%{object_id}, OIO:hasDbXref, "%{subject_id|short}"); + +# Bridging axiom-generating rules. +# The highly repetitive nature of those rules stems from the fact that +# we need separate rules for each foreign ontology and for both Uberon +# and CL, if we want to be able to send the corresponding axioms to +# separate bridge files. + +# Taxon-neutral bridges. +[aeo-uberon] subject==AEO:* object==UBERON:* { + predicate==* -> annotate(%{subject_id}, IAO:0000589, "%{object_label} (AEO)"); + predicate==skos:exactMatch -> create_axiom("%subject_id EquivalentTo: %object_id"); +} +[aeo-cl] subject==AEO:* object==CL:* { + predicate==* -> annotate(%{subject_id}, IAO:0000589, "%{object_label} (AEO)"); + predicate==skos:exactMatch -> create_axiom("%subject_id EquivalentTo: %object_id"); +} + +[bfo-uberon] subject==BFO:* object==UBERON:* { + predicate==* -> annotate(%{subject_id}, IAO:0000589, "%{object_label} (BFO)"); + predicate==skos:narrowMatch -> create_axiom("%object_id SubClassOf: %subject_id"); +} +[bfo-cl] subject==BFO:* object==CL:* { + predicate==* -> annotate(%{subject_id}, IAO:0000589, "%{object_label} (BFO)"); + predicate==skos:narrowMatch -> create_axiom("%object_id SubClassOf: %subject_id"); +} + +[caro-uberon] subject==CARO:* object==UBERON:* { + predicate==* -> annotate(%{subject_id}, IAO:0000589, "%{object_label} (CARO)"); + predicate==skos:exactMatch -> create_axiom("%subject_id EquivalentTo: %object_id"); +} +[caro-cl] subject==CARO:* object==CL:* { + predicate==* -> annotate(%{subject_id}, IAO:0000589, "%{object_label} (CARO)"); + predicate==skos:exactMatch -> create_axiom("%subject_id EquivalentTo: %object_id"); +} + +[go-uberon] subject==GO:* object==UBERON:* { + predicate==* -> annotate(%{subject_id}, IAO:0000589, "%{object_label} (GO)"); + predicate==skos:exactMatch -> create_axiom("%subject_id EquivalentTo: %object_id"); +} +[go-cl] subject==GO:* object==CL:* { + predicate==* -> annotate(%{subject_id}, IAO:0000589, "%{object_label} (GO)"); + predicate==skos:exactMatch -> create_axiom("%subject_id EquivalentTo: %object_id"); +} + +[ncit-uberon] subject==NCIT:* object==UBERON:* { + predicate==* -> annotate(%{subject_id}, IAO:0000589, "%{object_label} (NCIT)"); + predicate==skos:broadMatch -> create_axiom("%subject_id SubClassOf: %object_id"); +} +[ncit-cl] subject==NCIT:* object==CL:* { + predicate==* -> annotate(%{subject_id}, IAO:0000589, "%{object_label} (NCIT)"); + predicate==skos:broadMatch -> create_axiom("%subject_id SubClassOf: %object_id"); +} + +[oges-uberon] subject==OGES:* object==UBERON:* { + predicate==* -> annotate(%{subject_id}, IAO:0000589, "%{object_label} (OGES)"); + predicate==skos:broadMatch -> create_axiom("%subject_id SubClassOf: %object_id"); +} +[ncit-cl] subject==OGES:* object==CL:* { + predicate==* -> annotate(%{subject_id}, IAO:0000589, "%{object_label} (OGES)"); + predicate==skos:broadMatch -> create_axiom("%subject_id SubClassOf: %object_id"); +} + +[sctid-uberon] subject==SCTID:* object==UBERON:* { + predicate==* -> annotate(%{subject_id}, IAO:0000589, "%{object_label} (SCTID)"); + predicate==skos:broadMatch -> create_axiom("%subject_id SubClassOf: %object_id"); +} +[sctid-cl] subject==SCTID:* object==CL:* { + predicate==* -> annotate(%{subject_id}, IAO:0000589, "%{object_label} (SCTID)"); + predicate==skos:broadMatch -> create_axiom("%subject_id SubClassOf: %object_id"); +} + +# Taxon-specific bridges. +%INSERT-TAX-SPECIFIC-BRIDGES diff --git a/src/ontology/bridge/bridges.rules.m4 b/src/ontology/bridge/bridges.rules.m4 deleted file mode 100644 index 8fbdbdbdd7..0000000000 --- a/src/ontology/bridge/bridges.rules.m4 +++ /dev/null @@ -1,112 +0,0 @@ -# SSSOM is OBO-agnostic and doesn't know about the implicit OBO prefix -# rule (PFX -> http://purl.obolibrary.org/obo/PFX_); *all* prefixes must -# be explicitly declared. -prefix AEO: -prefix BFO: -prefix BSPO: -prefix CARO: -prefix CL: -prefix DHBA: -prefix EHDAA2: -prefix EMAPA: -prefix FBbt: -prefix FBdv: -prefix FMA: -prefix GO: -prefix HAO: -prefix HBA: -prefix HsapDv: -prefix IAO: -prefix KUPO: -prefix MA: -prefix MmusDv: -prefix NCBITaxon: -prefix NCIT: -prefix OG: -prefix OGES: -prefix OIO: -prefix PBA: -prefix RO: -prefix SCTID: -prefix SPD: -prefix TADS: -prefix TGMA: -prefix UBERON: -prefix WBbt: -prefix WBls: -prefix XAO: -prefix ZFA: -prefix ZFS: - -# The relation to use in the existential restrictions between Uberon/CL -# terms and their taxon-specific equivalents: part_of (BFO:0000050) for -# most cases, except for life stage terms where we use occurs_in -# (BFO:0000066) instead. -set_var("TAXREL", BFO:0000050); -set_var("TAXREL", BFO:0000066, "%object_id is_a UBERON:0000104"); -set_var("TAXREL", BFO:0000066, "%object_id is_a UBERON:0000105"); - -# Those are not present in Uberon, we must declare them before we can -# use them in bridges. -declare_class(NCBITaxon:6893, NCBITaxon:6939, NCBITaxon:44484); - -# Make sure UBERON and CL classes are on the object side. -subject==UBERON:* || subject==CL:* -> invert(); - -# Ignore any mapping to something else than UBERON or CL. -!(object==UBERON:* || object==CL:*) -> stop(); - -# Ignore any mapping to an inexistent or obsolete UBERON/CL class. -predicate==* -> check_object_existence(); - -# Do not allow a same foreign term to be mapped with more than one UBERON/CL class. -!cardinality==*:1 -> stop(); - -# Ignore any mapping with a predicate other than those four. -!(predicate==skos:exactMatch - || predicate==skos:narrowMatch - || predicate==skos:broadMatch - || predicate==semapv:crossSpeciesExactMatch) -> stop(); - -# Generate cross-reference annotations (for backward compatibility). -[xrefs] predicate==* -> annotate_object(OIO:hasDbXref, "%subject_curie"); - -# Bridging axiom-generating rules. -# The highly repetitive nature of those rules stems from the fact that -# we need separate rules for each foreign ontology and for both Uberon -# and CL, if we want to be able to send the corresponding axioms to -# separate bridge files. - -# Taxon-neutral bridges. -BRIDGE(AEO) -BRIDGE(BFO) -BRIDGE(BSPO) -BRIDGE(CARO) -BRIDGE(GO) -BRIDGE(NCIT) -BRIDGE(OG) -BRIDGE(OGES) -BRIDGE(SCTID) - -# Taxon-specific bridges. -BRIDGE(DHBA, NCBITaxon:9606, human) -BRIDGE(EHDAA2, NCBITaxon:9606, embryonic human) -BRIDGE(EMAPA, NCBITaxon:10090, embryonic mouse) -BRIDGE(FBbt, NCBITaxon:7227, drosophila) -BRIDGE(FBdv, NCBITaxon:7227, drosophila) -BRIDGE(FMA, NCBITaxon:9606, post-embryonic human) -BRIDGE(HAO, NCBITaxon:7399, hymenoptera) -BRIDGE(HBA, NCBITaxon:9606, human) -BRIDGE(HsapDv, NCBITaxon:9606, human) -BRIDGE(KUPO, NCBITaxon:9606, human) -BRIDGE(MA, NCBITaxon:10090, post-embryonic mouse) -BRIDGE(MmusDv, NCBITaxon:10090, mouse) -BRIDGE(PBA, NCBITaxon:9443, primates) -BRIDGE(SPD, NCBITaxon:6893, spider) -BRIDGE(TADS, NCBITaxon:6939, hardbacked ticks) -BRIDGE(TGMA, NCBITaxon:44484, mosquitos) -BRIDGE(WBbt, NCBITaxon:6237, worm) -BRIDGE(WBls, NCBITaxon:6237, worm) -BRIDGE(XAO, NCBITaxon:8353, xenopus) -BRIDGE(ZFA, NCBITaxon:7954, zebrafish) -BRIDGE(ZFS, NCBITaxon:7954, zebrafish) diff --git a/src/ontology/bridge/collected-lifestages-hdr.owl b/src/ontology/bridge/collected-lifestages-hdr.owl new file mode 100644 index 0000000000..8e754edc3e --- /dev/null +++ b/src/ontology/bridge/collected-lifestages-hdr.owl @@ -0,0 +1,51 @@ + + + + Uberon editors + This is an ontology that collects all of Uberon "life stages" terms, plus the terms from all the taxon-specific life stages ontologies.. + Collected life stages ontology for metazoans + Uberon ontologies with the prefix ‘collected’ are ontologies that import the core Uberon ontology together with one or more external, typically species-centric, ontologies plus bridging axioms that connect Uberon to those external ontologies. Uberon ontologies with the prefix ‘composite’ are variations of the corresponding ‘collected’ ontologies with additional reasoning performed. + http://genomebiology.com/2012/13/1/R5 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/ontology/config/taxa.yaml b/src/ontology/config/taxa.yaml new file mode 100644 index 0000000000..b628b1e756 --- /dev/null +++ b/src/ontology/config/taxa.yaml @@ -0,0 +1,225 @@ +defaults: + compositing: + unfold_over: + - BFO:0000050 + - BFO:0000066 +species: + - taxon_id: NCBITaxon:9606 + label: human + compositing: + preserve: + - RO:0002202 + - RO:0002496 + - RO:0002497 + - BFO:0000051 + bridging: + - prefix: DHBA + namespace: https://purl.brain-bican.org/ontology/dhbao/DHBA_ + - prefix: HBA + namespace: https://purl.brain-bican.org/ontology/hbao/HBA_ + - prefix: EHDAA2 + label: embryonic human + - prefix: HsapDv + - prefix: FMA + namespace: http://purl.org/sig/ont/fma/fma + label: post-embryonic human + - prefix: KUPO + - taxon_id: NCBITaxon:10090 + label: mouse + compositing: + preserve: + - RO:0002202 + - RO:0002496 + - RO:0002497 + - BFO:0000051 + bridging: + - prefix: EMAPA + label: embryonic mouse + - prefix: MA + label: post-embryonic mouse + - prefix: MmusDv + - taxon_id: NCBITaxon:7227 + label: D melanogaster + bridging: + - prefix: FBbt + - prefix: FBdv + - taxon_id: NCBITaxon:7399 + label: hymenoptera + bridging: + - prefix: HAO + - taxon_id: NCBITaxon:9443 + label: primate + bridging: + - prefix: PBA + namespace: https://purl.brain-bican.org/ontology/pbao/PBA_ + - taxon_id: NCBITaxon:6893 + label: spider + bridging: + - prefix: SPD + - taxon_id: NCBITaxon:6939 + label: hardbacked ticks + bridging: + - prefix: TADS + - taxon_id: NCBITaxon:44484 + label: mosquitos + bridging: + - prefix: TGMA + - taxon_id: NCBITaxon:6237 + label: worm + bridging: + - prefix: WBbt + - prefix: WBls + - taxon_id: NCBITaxon:8353 + label: xenopus + bridging: + - prefix: XAO + - taxon_id: NCBITaxon:7954 + label: zebrafish + bridging: + - prefix: ZFA + - prefix: ZFS + - taxon_id: NCBITaxon:28377 + label: anolis + bridging: + - prefix: AcarDv + name: sslso + - taxon_id: NCBITaxon:9913 + label: cow + bridging: + - prefix: BtauDv + name: sslso + - taxon_id: NCBITaxon:9615 + label: dog + bridging: + - prefix: CfamDv + name: sslso + - taxon_id: NCBITaxon:10141 + label: cavy + bridging: + - prefix: CporDv + name: sslso + - taxon_id: NCBITaxon:7217 + label: D ananassae + bridging: + - prefix: DanaDv + name: sslso + - taxon_id: NCBITaxon:7230 + label: D mojavensis + bridging: + - prefix: DmojDv + name: sslso + - taxon_id: NCBITaxon:7237 + label: D pseudobscura + bridging: + - prefix: DpseDv + name: sslso + - taxon_id: NCBITaxon:7240 + label: D simulans + bridging: + - prefix: DsimDv + name: sslso + - taxon_id: NCBITaxon:7244 + label: D virilis + bridging: + - prefix: DvirDv + name: sslso + - taxon_id: NCBITaxon:7245 + label: D yakuba + bridging: + - prefix: DyakDv + name: sslso + - taxon_id: NCBITaxon:9796 + label: horse + bridging: + - prefix: EcabDv + name: sslso + - taxon_id: NCBITaxon:9365 + label: hedgehog + bridging: + - prefix: EeurDv + name: sslso + - taxon_id: NCBITaxon:9685 + label: cat + bridging: + - prefix: FcatDv + name: sslso + - taxon_id: NCBITaxon:9031 + label: chicken + bridging: + - prefix: GgalDv + name: sslso + - taxon_id: NCBITaxon:9593 + label: gorilla + bridging: + - prefix: GgorDv + name: sslso + - taxon_id: NCBITaxon:13616 + label: opossum + bridging: + - prefix: MdomDv + name: sslso + - taxon_id: NCBITaxon:9544 + label: macaque + bridging: + - prefix: MmulDv + name: sslso + - taxon_id: NCBITaxon:9258 + label: platypus + bridging: + - prefix: OanaDv + name: sslso + - taxon_id: NCBITaxon:9940 + label: sheep + bridging: + - prefix: OariDv + name: sslso + - taxon_id: NCBITaxon:9986 + label: rabbit + bridging: + - prefix: OcunDv + name: sslso + - taxon_id: NCBITaxon:8090 + label: medaka + bridging: + - prefix: OlatDv + name: sslso + - taxon_id: NCBITaxon:6358 + label: Platynereis + bridging: + - prefix: PdumDv + name: sslso + - taxon_id: NCBITaxon:9597 + label: bonobo + bridging: + - prefix: PpanDv + name: sslso + - taxon_id: NCBITaxon:9600 + label: orangutan + bridging: + - prefix: PpygDv + name: sslso + - taxon_id: NCBITaxon:9598 + label: chimpanzee + bridging: + - prefix: PtroDv + name: sslso + - taxon_id: NCBITaxon:10116 + label: rat + bridging: + - prefix: RnorDv + name: sslso + - taxon_id: NCBITaxon:8030 + label: salmon + bridging: + - prefix: SsalDv + name: sslso + - taxon_id: NCBITaxon:9823 + label: pig + bridging: + - prefix: SscrDv + name: sslso + - taxon_id: NCBITaxon:99883 + label: tetraodon + bridging: + - prefix: TnigDv + name: sslso diff --git a/src/ontology/uberon-odk.yaml b/src/ontology/uberon-odk.yaml index 8fe7a465fc..db5deba0a1 100644 --- a/src/ontology/uberon-odk.yaml +++ b/src/ontology/uberon-odk.yaml @@ -26,6 +26,8 @@ release_artefacts: - custom-composite-metazoan-basic - custom-composite-vertebrate - custom-composite-vertebrate-basic + - custom-collected-lifestages + - custom-composite-lifestages - custom-common-anatomy edit_format: obo import_group: @@ -113,6 +115,8 @@ sssom_mappingset_group: maintenance: manual - id: cl maintenance: manual + - id: sslso + maintenance: manual - id: biomappings maintenance: manual - id: uberon-local @@ -125,7 +129,7 @@ robot_java_args: '-Xmx20G' robot_plugins: plugins: - name: uberon - mirror_from: https://github.com/gouttegd/uberon-robot-plugin/releases/download/uberon-robot-plugin-0.3.1/uberon.jar + mirror_from: https://github.com/gouttegd/uberon-robot-plugin/releases/download/uberon-robot-plugin-0.3.2/uberon.jar robot_report: release_reports: False fail_on: ERROR diff --git a/src/ontology/uberon.Makefile b/src/ontology/uberon.Makefile index 96c3fb83f4..55b0b90458 100644 --- a/src/ontology/uberon.Makefile +++ b/src/ontology/uberon.Makefile @@ -291,12 +291,12 @@ mirror-caro: | $(TMPDIR) $(ROBOT) convert -i $(MIRRORDIR)/caro-download.owl -o $@.tmp.owl &&\ mv $@.tmp.owl $(TMPDIR)/$@.owl; fi -## ONTOLOGY: ssso -.PHONY: mirror-ssso -.PRECIOUS: $(MIRRORDIR)/ssso.owl +## ONTOLOGY: sslso +.PHONY: mirror-sslso +.PRECIOUS: $(MIRRORDIR)/life-sslso.owl mirror-ssso: | $(TMPDIR) - if [ $(MIR) = true ] && [ $(IMP) = true ]; then curl -L https://github.com/obophenotype/developmental-stage-ontologies/releases/latest/download/ssso-merged-uberon.obo --create-dirs -o $(TMPDIR)/mirror-ssso.obo --retry 4 --max-time 200 && \ - $(ROBOT) convert -i $(TMPDIR)/mirror-ssso.obo -o $(TMPDIR)/$@.owl; fi + if [ $(MIR) = true ] && [ $(IMP) = true ]; then curl -L https://github.com/obophenotype/developmental-stage-ontologies/releases/latest/download/life-stages-base.owl --create-dirs -o $(TMPDIR)/life-stages-download.owl --retry 4 --max-time 200 && \ + $(ROBOT) convert -i $(TMPDIR)/life-stages-download.owl -o $(TMPDIR)/$@.owl; fi ## ONTOLOGY: mmusdv .PHONY: mirror-mmusdv @@ -370,16 +370,12 @@ imports/local-allen-%.owl: mirror/allen-%.owl --axioms external --preserve-structure false --trim false \ convert -f ofn -o $@ -# For the life stages ontology, by construction it includes classes from -# all over the place, and we need to preserve most of them. We do not -# preserve the FBdv and WBls classes however, since they are provided -# by local-fbdv and local-wbls respectively. -SSSO_PREFIXES = BtauDv DpseDv DsimDv GgalDv GgorDv HsapDv MdomDv MmulDv MmusDv OariDv PpanDv PtroDv RnorDv SscrDv ZFS -imports/local-ssso.owl: mirror/ssso.owl - $(ROBOT) remove -i $< \ - $(foreach pfx,$(SSSO_PREFIXES),--base-iri $(URIBASE)/$(pfx)) \ - --axioms external --preserve-structure false --trim false \ - convert -f ofn -o $@ +# For the life stages ontology, we already got a base from upstream, but +# we can't use the generic rule above as the ontology contains (by +# construction) classes from all over the place. So we just convert it +# to OFN without removing anything. +imports/local-sslso.owl: mirror/life-sslso.owl + $(ROBOT) convert -i $< -f ofn -o $@ # For the following ontologies, in addition to removing axioms about # external entities we also need to replace some old-style properties. @@ -1101,11 +1097,24 @@ COLLECTED_vertebrate_SOURCES = $(COLLECTED_tetrapod_SOURCES) \ COLLECTED_metazoan_SOURCES = $(COLLECTED_vertebrate_SOURCES) \ $(COLLECTED_drosophila_SOURCES) \ $(COLLECTED_worm_SOURCES) \ - $(IMPORTDIR)/local-ssso.owl \ + $(IMPORTDIR)/local-sslso.owl \ + $(BRIDGEDIR)/uberon-bridge-to-sslso.owl \ $(IMPORTDIR)/local-ceph.owl \ $(IMPORTDIR)/local-cteno.owl \ $(IMPORTDIR)/local-poro.owl +COLLECTED_lifestages_SOURCES = $(SUBSETDIR)/life-stages-minimal.owl \ + $(IMPORTDIR)/local-fbdv.owl \ + $(IMPORTDIR)/local-wbls.owl \ + $(IMPORTDIR)/local-mmusdv.owl \ + $(IMPORTDIR)/local-hsapdv.owl \ + $(IMPORTDIR)/local-sslso.owl \ + $(BRIDGEDIR)/uberon-bridge-to-fbdv.owl \ + $(BRIDGEDIR)/uberon-bridge-to-wbls.owl \ + $(BRIDGEDIR)/uberon-bridge-to-mmusdv.owl \ + $(BRIDGEDIR)/uberon-bridge-to-hsapdv.owl \ + $(BRIDGEDIR)/uberon-bridge-to-sslso.owl + # Composite pipeline proper # ---------------------------------------- @@ -1132,22 +1141,27 @@ collected-metazoan.owl: $(TMPDIR)/collected-metazoan.owl $(ROBOT) merge -i $< $(COMPOSITE_STRIPPING_COMMAND) \ annotate --ontology-iri $(ONTBASE)/$@ $(ANNOTATE_ONTOLOGY_VERSION) -o $@ +# Step 1c: collected-lifestages is special in that it should not +# include Uberon and CL (only the life-stages-minimal subset). +$(TMPDIR)/collected-lifestages.owl: $(COLLECTED_lifestages_SOURCES) + $(ROBOT) merge $(foreach src,$^,-i $(src)) -o $@ + +# Step 1d: And it is also a released artefact. +collected-lifestages.owl: $(TMPDIR)/collected-lifestages.owl + $(ROBOT) merge -i $< $(COMPOSITE_STRIPPING_COMMAND) \ + annotate --ontology-iri $(ONTBASE)/$@ $(ANNOTATE_ONTOLOGY_VERSION) -o $@ + # Step 2: Create a "composite" ontology. This is the core of the # composite pipeline. It heavily relies on the Uberon plugin for ROBOT, # which provides the 'merge-species' and 'merge-equivalent-sets' # commands. -TAXON_GCI_RELS = RO:0002202 RO:0002496 RO:0002497 BFO:0000051 -MERGESPECIES_OPTS = --remove-declarations --extended-translation --translate-gcas .PRECIOUS: $(TMPDIR)/composite-%.owl -$(TMPDIR)/composite-%.owl: $(TMPDIR)/collected-%.owl | all_robot_plugins +$(TMPDIR)/composite-%.owl: $(TMPDIR)/collected-%.owl $(TMPDIR)/tax-merges.tsv | all_robot_plugins $(ROBOT) merge -i $< $(COMPOSITE_STRIPPING_COMMAND) \ - uberon:merge-species $(MERGESPECIES_OPTS) -s 'mouse' -t NCBITaxon:10090 $(foreach rel,$(TAXON_GCI_RELS),-q $(rel)) \ - uberon:merge-species $(MERGESPECIES_OPTS) -s 'human' -t NCBITaxon:9606 $(foreach rel,$(TAXON_GCI_RELS),-q $(rel)) \ - uberon:merge-species $(MERGESPECIES_OPTS) -s 'primate' -t NCBITaxon:9443 \ - uberon:merge-species $(MERGESPECIES_OPTS) -s 'Xenopus' -t NCBITaxon:8353 \ - uberon:merge-species $(MERGESPECIES_OPTS) -s 'Danio' -t NCBITaxon:7954 \ - uberon:merge-species $(MERGESPECIES_OPTS) -s 'Drosophila' -t NCBITaxon:7227 \ - uberon:merge-species $(MERGESPECIES_OPTS) -s 'C elegans' -t NCBITaxon:6237 \ + uberon:merge-species --remove-declarations \ + --extended-translation \ + --translate-gcas \ + --batch-file $(TMPDIR)/tax-merges.tsv \ uberon:merge-equivalent-sets -s UBERON=10 -s CL=9 -s CARO=5 \ -l UBERON=10 -l CL=9 \ -d UBERON=10 -d CL=9 \ @@ -1155,16 +1169,23 @@ $(TMPDIR)/composite-%.owl: $(TMPDIR)/collected-%.owl | all_robot_plugins relax \ reduce -r ELK -o $@ +# Step 2a: The "tax-merges.tsv" file used in the rule above is automatically +# derived from the list of species in config/taxa.yaml. +$(TMPDIR)/tax-merges.tsv: $(SCRIPTSDIR)/taxa.py config/taxa.yaml + python3 $(SCRIPTSDIR)/taxa.py make-merge-table > $@ + # Step 3: Annotate the result of step 2. This is a separate step only so -# that we can have explicit rules for composite-metazoan and -# composite-vertebrate, because the ODK-generated Makefile already -# defines a non-implicit rules with those targets. +# that we can have explicit rules for composite-metazoan, -vertebrate, +# and -lifestages, because the ODK-generated Makefile already defines a +# non-implicit rules with those targets. composite-%.owl: $(TMPDIR)/composite-%.owl $(ROBOT) annotate -i $< --ontology-iri $(ONTBASE)/$@ $(ANNOTATE_ONTOLOGY_VERSION) -o $@ composite-metazoan.owl: $(TMPDIR)/composite-metazoan.owl $(ROBOT) annotate -i $< --ontology-iri $(ONTBASE)/$@ $(ANNOTATE_ONTOLOGY_VERSION) -o $@ composite-vertebrate.owl: $(TMPDIR)/composite-vertebrate.owl $(ROBOT) annotate -i $< --ontology-iri $(ONTBASE)/$@ $(ANNOTATE_ONTOLOGY_VERSION) -o $@ +composite-lifestages.owl: $(TMPDIR)/composite-lifestages.owl + $(ROBOT) annotate -i $< --ontology-iri $(ONTBASE)/$@ $(ANNOTATE_ONTOLOGY_VERSION) -o $@ # Some special products derived from the products generated above @@ -1231,9 +1252,10 @@ $(MAPPINGDIR)/uberon-local.sssom.tsv: $(SRC) | all_robot_plugins # Uberon's "meta" mapping set, containing all mappings between Uberon # and foreign ontologies, regardless of where they are maintained. Made -# by compiling the "local" set above with the FBbt set obtained below. +# by compiling the "local" set above with the remote sets obtained below. $(MAPPINGDIR)/uberon.sssom.tsv: $(MAPPINGDIR)/uberon-local.sssom.tsv \ - $(MAPPINGDIR)/fbbt.sssom.tsv + $(MAPPINGDIR)/fbbt.sssom.tsv \ + $(MAPPINGDIR)/sslso.sssom.tsv sssom-cli $(foreach src, $^, -i $(src)) \ --prefix-map-from-input \ --rule 'object==UBERON:* -> invert()' \ @@ -1244,7 +1266,7 @@ $(MAPPINGDIR)/uberon.sssom.tsv: $(MAPPINGDIR)/uberon-local.sssom.tsv \ # ------------------- # The following providers publish their own mapping sets. -EXTERNAL_SSSOM_PROVIDERS = fbbt cl biomappings +EXTERNAL_SSSOM_PROVIDERS = fbbt cl sslso biomappings # All the sets coming from the above ontologies. EXTERNAL_SSSOM_SETS = $(foreach provider, $(EXTERNAL_SSSOM_PROVIDERS), $(MAPPINGDIR)/$(provider).sssom.tsv) @@ -1266,6 +1288,10 @@ $(MAPPINGDIR)/fbbt.sssom.tsv: .FORCE $(MAPPINGDIR)/cl.sssom.tsv: .FORCE wget "http://purl.obolibrary.org/obo/cl/cl.sssom.tsv" -O $@ +# SSLSO (life stages) mapping set. We simply fetch it as it is. +$(MAPPINGDIR)/sslso.sssom.tsv: .FORCE + wget "https://github.com/obophenotype/developmental-stage-ontologies/releases/latest/download/life-stages.sssom.tsv" -O $@ + # Biomappings mapping set. Nominally a simple mirror, but we need a # custom rule for two reasons: # - the set is provided with the metadata in a separate file, which the @@ -1300,10 +1326,10 @@ EXTERN_BRIDGES = $(BRIDGEDIR)/uberon-bridge-to-mba.owl \ $(BRIDGEDIR)/uberon-bridge-to-dmba.owl # 1. Prepare the ruleset file. -# The ruleset file is maintained with M4 macros to make it more easily -# editable, so we need to expand the macros first. -$(TMPDIR)/bridges.rules: $(SCRIPTSDIR)/sssomt.m4 $(BRIDGEDIR)/bridges.rules.m4 - m4 $^ > $@ +# The ruleset file is maintained with the help of the Python script +# to automatically insert the taxon-specific rules. +$(TMPDIR)/bridges.rules: $(SCRIPTSDIR)/taxa.py $(BRIDGEDIR)/bridges.rules + python3 $(SCRIPTSDIR)/taxa.py make-rules $(BRIDGEDIR)/bridges.rules > $@ # 2. Generate the bridges from the "meta" mapping set and the CL set. # Note that merging CL here is not strictly necessary, but doing so diff --git a/src/scripts/sssomt.m4 b/src/scripts/sssomt.m4 deleted file mode 100644 index d43eb3b140..0000000000 --- a/src/scripts/sssomt.m4 +++ /dev/null @@ -1,40 +0,0 @@ -dnl This file contains M4 macros intended to make it easier to write -dnl the SSSOM/Transform ruleset needed to generate the bridge files. - -dnl Expand to instructions to create all possible bridging axioms -dnl $1 = suffix to append to the original label -dnl $2 = taxon ID (empty for a taxon-neutral bridge) -define(STD_BRIDGE, `predicate==* -> annotate_subject(IAO:0000589, "%object_label ($1)"); -ifelse($2, , `dnl - predicate==skos:exactMatch -> create_axiom("%subject_id EquivalentTo: %object_id"); - predicate==skos:broadMatch -> create_axiom("%subject_id SubClassOf: %object_id"); - predicate==skos:narrowMatch -> create_axiom("%object_id SubClassOf: %subject_id");',`dnl - predicate==semapv:crossSpeciesExactMatch -> create_axiom("%subject_id EquivalentTo: %object_id and (%TAXREL some $2)");')') - -dnl Expand to instructions to create all possible bridging axioms -dnl both for Uberon and for CL -dnl $1 = lowercase tag identifying the bridge -dnl $2 = foreign ontology prefix -dnl $3 = taxon ID (empty for a taxon-neutral bridge) -dnl 4$ = suffix to append to the original label -define(DO_BRIDGE, `[$1-uberon] subject==$2:* object==UBERON:* { - STD_BRIDGE($4, $3) -} -[$1-cl] subject==$2:* object==CL:* { - STD_BRIDGE($4, $3) -} -') - -dnl This is the macro that should be called from the ruleset -dnl template file -dnl $1 = foreign ontology prefix -dnl $2 = taxon ID -dnl $3 = suffix to append to the original label -dnl -dnl Example for a taxon-neutral bridge: -dnl BRIDGE(EFO) -dnl -dnl Example for a taxon-specific bridge: -dnl BRIDGE(FBbt, NCBITaxon:7227, Drosophila) -define(BRIDGE, `DO_BRIDGE(`translit(`$1', `A-Z', `a-z')', `$1', `$2', - `ifelse(`$3', , `$1', `$3')')') diff --git a/src/scripts/taxa.py b/src/scripts/taxa.py new file mode 100644 index 0000000000..732425b1a6 --- /dev/null +++ b/src/scripts/taxa.py @@ -0,0 +1,124 @@ +#!/usr/bin/env python3 + +# HELPER SCRIPT TO MANAGE TAXA +# +# Uberon bridging and compositing processes require a bit of information +# about the taxa that are involved. To make it easier to manage those +# informations, they are centralised in the config/taxa.yaml file. +# +# The main contents of the config/taxa.yaml file is a list of taxa, +# where each taxon is represented by a structure that should look like +# this: +# +# ---8<----- +# taxon_id: NCBITaxon:1234 +# label: +# compositing: +# unfold_over: +# preserve: +# bridging: +# - prefix: +# namespace: +# name: 8----- +# +# Currently, this file is used for two things: +# +# (1) automatically generate the SSSOM/T-OWL rules needed to generate +# the cross-species bridge files (taxa.py make-rules); +# +# (2) automatically generate the batch file used by the +# uberon:merge-species command to create the composite ontologies. + +import sys +import yaml + +with open('config/taxa.yaml', 'r') as f: + taxa = yaml.load(f, yaml.CLoader) + + +def generate_prefix_declarations(f): + """Insert SSSOM/T-OWL prefix declarations for all species.""" + for taxon in taxa['species']: + for bridge in taxon['bridging']: + prefix = bridge['prefix'] + namespace = bridge.get('namespace', 'http://purl.obolibrary.org/obo/' + prefix + '_') + + f.write(f"prefix {prefix}: <{namespace}>\n") + + +def generate_directives(f): + """Insert SSSOM/T-OWL taxon declarations for all species.""" + for taxon_id in [t['taxon_id'] for t in taxa['species']]: + f.write(f"declare({taxon_id});\n") + + +def generate_bridging_rules(f): + """Insert SSSOM/T-OWL bridging rules for all species.""" + for taxon in taxa['species']: + taxon_id = taxon['taxon_id'] + def_label = taxon['label'] + for bridge in taxon['bridging']: + prefix = bridge['prefix'] + name = bridge.get('name', prefix.lower()) + label = bridge.get('label', def_label) + + f.write(f""" +[{name}-uberon] subject=={prefix}:* object==UBERON:* {{ + predicate==* -> annotate(%{{subject_id}}, IAO:0000589, "%{{object_label}} ({label})"); + predicate==semapv:crossSpeciesExactMatch -> create_axiom("%subject_id EquivalentTo: %object_id and (%TAXREL some {taxon_id})"); +}} +[{name}-cl] subject=={prefix}:* object==CL:* {{ + predicate==* -> annotate(%{{subject_id}}, IAO:0000589, "%{{object_label}} ({label})"); + predicate==semapv:crossSpeciesExactMatch -> create_axiom("%subject_id EquivalentTo: %object_id and (%TAXREL some {taxon_id})"); +}} +""") + + +def process_rule_file(filein, fileout): + """Insert all SSSOM/T-OWL instructions required to generate + cross-species bridges. + """ + + for line in filein: + if line.strip() == '%INSERT-TAX-SPECIFIC-PREFIXES': + generate_prefix_declarations(fileout) + elif line.strip() == '%INSERT-TAX-SPECIFIC-DIRECTIVES': + generate_directives(fileout) + elif line.strip() == '%INSERT-TAX-SPECIFIC-BRIDGES': + generate_bridging_rules(fileout) + else: + fileout.write(line) + + +def generate_merge_table(fileout): + """Generates a batch-file for uberon:merge-species.""" + def_link_properties = taxa.get('defaults', {}).get('compositing', {}).get('unfold_over', ['BFO:0000050', 'BFO:0000066']) + def_preserved_properties = taxa.get('defaults', {}).get('compositing', {}).get('preserve', []) + + for taxon in taxa['species']: + taxon_id = taxon['taxon_id'] + label = taxon['label'] + link_properties = ','.join(taxon.get('compositing', {}).get('unfold_over', def_link_properties)) + preserved_properties = ','.join(taxon.get('compositing', {}).get('preserve', def_preserved_properties)) + + fileout.write(f"{taxon_id}\t{label}\t{link_properties}\t{preserved_properties}\n") + + +usage = f"Usage: {sys.argv[0]} | " +if len(sys.argv) < 2: + sys.exit(usage) + +cmd = sys.argv[1] +if cmd == 'make-rules': + if len(sys.argv) != 3: + sys.exit(usage) + with open(sys.argv[2], 'r') as fin: + process_rule_file(fin, sys.stdout) +elif cmd == 'make-merge-table': + generate_merge_table(sys.stdout) +else: + sys.exit(usage)