Skip to content

Commit

Permalink
Update to work with new array schema and LoL pydantic generator (#14)
Browse files Browse the repository at this point in the history
  • Loading branch information
rly authored Sep 19, 2024
1 parent 2659925 commit bacaf30
Show file tree
Hide file tree
Showing 71 changed files with 2,042 additions and 3,806 deletions.
5 changes: 3 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -129,5 +129,6 @@ dmypy.json
.pyre/

.DS_Store
/my_temperature.zarr
/my_temperature.h5
/out/*
/my_container.h5
/my_container.zarr
4 changes: 1 addition & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,7 @@ Support for loading and dumping N-dimensional arrays in LinkML.
# Quick reference for common commands

```bash
cd linkml-model
poetry run gen-json-schema tests/input/examples/schema_definition-array-2.yaml
poetry run gen-pydantic tests/input/examples/schema_definition-array-2.yaml
poetry run gen-pydantic tests/input/temperature_schema.yaml > tests/array_classes_lol.py
```

# Acknowledgements
Expand Down
3 changes: 2 additions & 1 deletion docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

import os
from datetime import date

from linkml_arrays import __version__

# -- Project information -----------------------------------------------------
Expand All @@ -24,7 +25,7 @@
"sphinx_rtd_theme",
"sphinx_click",
# "sphinx_autodoc_typehints",
"myst_parser"
"myst_parser",
]

# generate autosummary pages
Expand Down
Binary file removed my_temperature.DaySeries.values.h5
Binary file not shown.
Binary file removed my_temperature.DaySeries.values.npy
Binary file not shown.
Binary file removed my_temperature.LatitudeSeries.values.h5
Binary file not shown.
Binary file removed my_temperature.LatitudeSeries.values.npy
Binary file not shown.
Binary file removed my_temperature.LongitudeSeries.values.h5
Binary file not shown.
Binary file removed my_temperature.LongitudeSeries.values.npy
Binary file not shown.
Binary file removed my_temperature.TemperatureMatrix.values.h5
Binary file not shown.
Binary file removed my_temperature.TemperatureMatrix.values.npy
Binary file not shown.
3,627 changes: 780 additions & 2,847 deletions poetry.lock

Large diffs are not rendered by default.

62 changes: 31 additions & 31 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,49 +1,50 @@
[tool.poetry]
name = "linkml-arrays"
version = "0.0.0"
version = "0.1.0"
description = "linkml-arrays"
authors = ["Ryan Ly <[email protected]>"]
authors = [
"Ryan Ly <[email protected]>",
"Chris Mungall <[email protected]>",
]
license = "BSD-3"
readme = "README.md"

[tool.poetry.dependencies]
python = "^3.9"
linkml-runtime = ">=1.7.0"
linkml-runtime = ">=1.8.0"
numpy = ">=1.24.3"
h5py = ">=3.9.0"
zarr = ">=2.16.1"
nptyping = ">=2.5.0"
xarray = "^2024.1.1"
tox = "^3.25.1" # TODO move out of main deps
ruamel-yaml = "^0.18.6"
importlib_metadata = "*"

[tool.poetry.dev-dependencies]
pytest = "^7.1.2"
sphinx = {version = "^5.3.0", extras = ["docs"]}
sphinx-rtd-theme = {version = "^1.0.0", extras = ["docs"]}
# sphinx-autodoc-typehints = {version = "^1.19.4", extras = ["docs"]}
sphinx-click = {version = "^4.3.0", extras = ["docs"]}
myst-parser = {version = "^0.18.1", extras = ["docs"]}
jupyter = {version = "*", extras = ["jupyter"]}

[tool.poetry.scripts]
linkml-arrays = "linkml_arrays.cli:main"
pytest = "*"
tox = "*"
# sphinx = {version = "*", extras = ["docs"]}
# sphinx-rtd-theme = {version = "^1.0.0", extras = ["docs"]}
# # sphinx-autodoc-typehints = {version = "^1.19.4", extras = ["docs"]}
# sphinx-click = {version = "^4.3.0", extras = ["docs"]}
# myst-parser = {version = "*", extras = ["docs"]}
# jupyter = {version = "*", extras = ["jupyter"]}

[tool.poetry.extras]
docs = [
"sphinx",
"sphinx-rtd-theme",
# "sphinx-autodoc-typehints",
"sphinx-click",
"myst-parser"
]
jupyter = [
"jupyter"
]
# [tool.poetry.extras]
# docs = [
# "sphinx",
# "sphinx-rtd-theme",
# "sphinx-autodoc-typehints",
# "sphinx-click",
# "myst-parser"
# ]
# jupyter = [
# "jupyter"
# ]

[tool.poetry.group.dev.dependencies]
black = "^24.1.1"
pytest = "^7.1.2"
mypy = "^1.8.0"
# [tool.poetry.group.dev.dependencies]
# black = "^24.1.1"
# pytest = "^7.1.2"
# mypy = "^1.8.0"

[tool.poetry-dynamic-versioning]
enable = true
Expand All @@ -52,7 +53,6 @@ style = "pep440"

[tool.black]
line-length = 100
target-version = ["py38", "py39", "py310"]

[tool.isort]
profile = "black"
Expand Down
44 changes: 0 additions & 44 deletions src/linkml_arrays/cli.py

This file was deleted.

24 changes: 11 additions & 13 deletions src/linkml_arrays/dumpers/hdf5_dumper.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
"""Class for dumping a LinkML model to an HDF5 file."""

from pathlib import Path
from typing import Union

import h5py
Expand All @@ -14,15 +15,15 @@ def _iterate_element(
):
"""Recursively iterate through the elements of a LinkML model and save them.
Writes Pydantic BaseModel objects as groups, slots that implement "linkml:elements"
Write Pydantic BaseModel objects as groups, slots with the "array" element
as datasets, and other slots as attributes.
"""
# get the type of the element
element_type = type(element).__name__

for k, v in vars(element).items():
found_slot = schemaview.induced_slot(k, element_type)
if "linkml:elements" in found_slot.implements:
if found_slot.array:
# save the numpy array to an hdf5 dataset
group.create_dataset(found_slot.name, data=v)
else:
Expand All @@ -39,16 +40,13 @@ class Hdf5Dumper(Dumper):
"""Dumper class for LinkML models to HDF5 files."""

# TODO is this the right method to overwrite? it does not dump a string
def dumps(self, element: Union[YAMLRoot, BaseModel], schemaview: SchemaView, **kwargs):
"""Dump the element to an HDF5 file.
Raises:
ValueError: If the class requires an identifier and it is not provided.
"""
id_slot = schemaview.get_identifier_slot(element.__class__.__name__)
if id_slot is None:
raise ValueError("The class requires an identifier.")
id_value = getattr(element, id_slot.name)
output_file_path = f"{id_value}.h5"
def dumps(
self,
element: Union[YAMLRoot, BaseModel],
schemaview: SchemaView,
output_file_path: Union[str, Path],
**kwargs,
):
"""Dump the element to an HDF5 file."""
with h5py.File(output_file_path, "w") as f:
_iterate_element(element, schemaview, f)
123 changes: 123 additions & 0 deletions src/linkml_arrays/dumpers/yaml_array_file_dumper.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
"""Base class for dumping a LinkML model to YAML with paths to files containing arrays."""

import os
from abc import ABCMeta, abstractmethod
from collections.abc import Callable
from pathlib import Path
from typing import List, Optional, Union

import numpy as np
import yaml
from linkml_runtime import SchemaView
from linkml_runtime.dumpers.dumper_root import Dumper
from linkml_runtime.utils.yamlutils import YAMLRoot
from pydantic import BaseModel


def _iterate_element(
element: Union[YAMLRoot, BaseModel],
schemaview: SchemaView,
output_dir: Path,
write_array: Callable,
format: str,
parent_identifier=None,
inlined_name=None,
):
"""Recursively iterate through the elements of a LinkML model and save them.
Return a dictionary with the same structure as the input element, but where the slots
with the "array" element are written to an array file and the paths to these
files are returned in the dictionary. The paths are relative to the output directory.
Raises:
ValueError: If the class requires an identifier and it is not provided.
"""
# get the type of the element
element_type = type(element).__name__

# ask schemaview whether it has a class by this name
found_class = schemaview.get_class(element_type)

id_slot = schemaview.get_identifier_slot(found_class.name)
if id_slot is not None:
id_value = getattr(element, id_slot.name)
else:
id_value = None

ret_dict = dict()
for k, v in vars(element).items():
found_slot = schemaview.induced_slot(k, element_type)
if found_slot.array:
if id_slot is None and parent_identifier is None:
raise ValueError("The class requires an identifier.")

# determine the output file name without the suffix
if id_slot is not None:
output_file_name = f"{id_value}.{found_slot.name}"
elif inlined_name is not None:
output_file_name = f"{parent_identifier}.{inlined_name}.{found_slot.name}"
elif parent_identifier is not None:
output_file_name = f"{parent_identifier}.{found_slot.name}"
else:
output_file_name = f"{found_slot.name}"

# if output_dir is absolute, make it relative to current working directory
# and create the directory if it does not exist
if output_dir.is_absolute():
output_dir = Path(os.path.relpath(output_dir, start=os.getcwd()))
output_dir.mkdir(exist_ok=True)
output_file_path_no_suffix = output_dir / output_file_name

# save the numpy array to file and write the file path to the dictionary
output_file_path = write_array(v, output_file_path_no_suffix)
ret_dict[k] = {
"source": [
{
"file": f"./{output_file_path}",
"format": format,
}
]
}
else:
if isinstance(v, BaseModel):
v2 = _iterate_element(
v,
schemaview,
output_dir,
write_array,
format,
id_value,
inlined_name=found_slot.name,
)
ret_dict[k] = v2
else:
ret_dict[k] = v
return ret_dict


class YamlArrayFileDumper(Dumper, metaclass=ABCMeta):
"""Base dumper class for LinkML models to YAML files with paths to array files."""

# FORMAT is a class attribute that must be set by subclasses

def dumps(
self,
element: Union[YAMLRoot, BaseModel],
schemaview: SchemaView,
output_dir: Optional[Union[str, Path]] = None,
**kwargs,
) -> str:
"""Return element formatted as a YAML string."""
if output_dir is None:
output_dir = "."
input = _iterate_element(
element, schemaview, Path(output_dir), self.write_array, self.FORMAT
)

return yaml.dump(input)

@classmethod
@abstractmethod
def write_array(cls, array: Union[List, np.ndarray], output_file_path: Union[str, Path]):
"""Write an array to a file."""
raise NotImplementedError("Subclasses must implement this method.")
11 changes: 6 additions & 5 deletions src/linkml_arrays/dumpers/yaml_dumper.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
"""Class for dumpling a LinkML model to a YAML file."""
"""Class for dumping a LinkML model to YAML."""

from typing import Union

Expand All @@ -14,8 +14,8 @@ def _iterate_element(
):
"""Recursively iterate through the elements of a LinkML model and save them.
Returns a dictionary with the same structure as the input element, but with the slots
that implement "linkml:elements" (arrays) are written as lists or lists of lists.
Returns a dictionary with the same structure as the input element, but where the slots
with the "array" element are written as lists of lists in YAML.
Raises:
ValueError: If the class requires an identifier and it is not provided.
Expand All @@ -35,10 +35,11 @@ def _iterate_element(
ret_dict = dict()
for k, v in vars(element).items():
found_slot = schemaview.induced_slot(k, element_type)
if "linkml:elements" in found_slot.implements:
if found_slot.array:
if id_slot is None and parent_identifier is None:
raise ValueError("The class requires an identifier.")
ret_dict[k] = v.tolist()
assert isinstance(v, list)
ret_dict[k] = v
else:
if isinstance(v, BaseModel):
v2 = _iterate_element(v, schemaview, id_value)
Expand Down
Loading

0 comments on commit bacaf30

Please sign in to comment.