-
Notifications
You must be signed in to change notification settings - Fork 75
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #2239 from ytausch/pydantic
Add Pydantic Data Model to Document Node Attributes Data Structure
- Loading branch information
Showing
12 changed files
with
927 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -10,6 +10,7 @@ omit = | |
.*version.* | ||
*_version.py | ||
*.yaml | ||
conda_forge_tick/models/* | ||
|
||
exclude_lines = | ||
if __name__ == '__main__': |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
<!-- Put your changes here --> | ||
|
||
<!-- | ||
Thanks for contributing to cf-scripts! | ||
We are currently transitioning to a Pydantic-based model documenting the format of the conda-forge dependency graph | ||
data that this bot internally uses (see README). | ||
Please make sure that your changes either do not change the implicit data model or adjust the model in | ||
conda_forge_tick/models appropriately and document any new fields or files. Tick the checkbox below to confirm. | ||
Note that the model exists next to and independent of the actual production code. | ||
--> | ||
|
||
- [ ] Pydantic model updated or no update needed |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
name: test-model | ||
|
||
on: | ||
workflow_dispatch: | ||
schedule: | ||
- cron: "10 * * * *" # every hour | ||
|
||
env: | ||
PY_COLORS: "1" | ||
|
||
jobs: | ||
tests: | ||
name: tests | ||
runs-on: "ubuntu-latest" | ||
defaults: | ||
run: | ||
shell: bash -l {0} | ||
|
||
steps: | ||
- uses: actions/checkout@v3 | ||
|
||
- uses: conda-incubator/setup-miniconda@v3 | ||
with: | ||
miniforge-version: latest | ||
environment-file: environment.yml | ||
python-version: "${{ vars.PYVER }}" | ||
activate-environment: cf-scripts | ||
condarc-file: autotick-bot/condarc | ||
|
||
- name: configure conda, install code, and clone cf-graph | ||
run: | | ||
conda install --file requirements-dev.txt --yes | ||
pip install --no-deps --no-build-isolation -e . | ||
git clone --depth=1 https://github.com/regro/cf-graph-countyfair.git cf-graph | ||
- name: conda info and env | ||
run: | | ||
echo "==================================================================" | ||
echo "==================================================================" | ||
conda info | ||
echo "" | ||
echo "==================================================================" | ||
echo "==================================================================" | ||
conda list | ||
- name: run pytest (model) | ||
run: | | ||
cd cf-graph | ||
pytest \ | ||
-v \ | ||
--durations 10 \ | ||
../tests/model |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,70 @@ | ||
# The cf-graph Data Model | ||
|
||
Refer to the [main README](../../README.md) for an explanation of what this is about. | ||
|
||
## Directory Structure | ||
|
||
The most important parts of the graph repository looks like this: | ||
``` | ||
cf-graph-countyfair | ||
├── import_to_pkg_maps | ||
│ └── ... | ||
├── mappings/pypi | ||
│ └── ... | ||
├── node_attrs | ||
│ ├── somepackage.json | ||
│ └── ... | ||
├── pr_info | ||
│ ├── somepackage.json | ||
│ └── ... | ||
├── pr_json | ||
│ ├── 123456789.json | ||
│ └── ... | ||
├── version_pr_info | ||
│ ├── somepackage.json | ||
│ └── ... | ||
├── versions | ||
│ ├── somepackage.json | ||
│ └── ... | ||
├── graph.json | ||
└── ranked_hubs_authorities.json | ||
``` | ||
|
||
For efficiency reasons, all subdirectories make use of sharded paths. For example, the path | ||
`node_attrs/pytest.json` is actually a sharded path, and the actual path in the repository is | ||
`node_attrs/d/9/a/8/c/pytest.json`. This is done to avoid having too many files in a single directory, allowing | ||
git to efficiently manage the repository. | ||
|
||
## File and Directory Descriptions | ||
|
||
### `import_to_pkg_maps` | ||
Undocumented. | ||
|
||
### `mappings/pypi` | ||
Undocumented. | ||
|
||
### `node_attrs` | ||
One file per conda-forge package containing metadata about the package. | ||
Pydantic Model: `NodeAttributes` in [node_attributes.py](node_attributes.py). | ||
|
||
### `pr_info` | ||
Undocumented. | ||
|
||
### `pr_json` | ||
Undocumented. | ||
|
||
### `version_pr_info` | ||
Undocumented. | ||
|
||
### `versions` | ||
One file per conda-forge package containing upstream version update information about the package. | ||
|
||
### `graph.json` | ||
The JSON representation of a [networkx](https://networkx.org/) graph. The graph is a directed graph, where the nodes | ||
are package names and the edges are dependencies. The node list of this graph is treated as the set of all packages in | ||
the conda-forge ecosystem. The edges are directed from the dependency package to the dependent package. | ||
|
||
The nodes have attributes which reference JSON files in the `node_attrs` directory. | ||
|
||
### `ranked_hubs_authorities.json` | ||
Undocumented. |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,135 @@ | ||
from typing import Annotated, Any, Generic, Literal, Never, TypeVar | ||
|
||
from pydantic import BaseModel, BeforeValidator, ConfigDict, Field, UrlConstraints | ||
from pydantic_core import Url | ||
|
||
T = TypeVar("T") | ||
|
||
K = TypeVar("K") | ||
V = TypeVar("V") | ||
|
||
|
||
class StrictBaseModel(BaseModel): | ||
model_config = ConfigDict(validate_assignment=True, extra="forbid") | ||
|
||
|
||
class ValidatedBaseModel(BaseModel): | ||
model_config = ConfigDict(validate_assignment=True, extra="allow") | ||
|
||
|
||
class Set(StrictBaseModel, Generic[T]): | ||
""" | ||
A custom set type. It contains a special set marker `__set__`, allowing dynamic instantiation of the set type. | ||
This is considered legacy and should be removed if a proper data model is used for validation. | ||
""" | ||
|
||
magic_set_marker: Literal[True] = Field(..., alias="__set__") | ||
elements: set[T] | ||
|
||
|
||
def none_to_empty_list(value: T | None) -> T | list[Never]: | ||
""" | ||
Convert `None` to an empty list. Everything else is kept as is. | ||
""" | ||
if value is None: | ||
return [] | ||
return value | ||
|
||
|
||
NoneIsEmptyList = Annotated[list[T], BeforeValidator(none_to_empty_list)] | ||
""" | ||
A generic list type that converts `None` to an empty list. | ||
This should not be needed if this proper data model is used in production. | ||
Defining this type is already the first step to remove it. | ||
""" | ||
|
||
|
||
def convert_to_list(value: T) -> list[T]: | ||
""" | ||
Convert a single value to a list. | ||
""" | ||
return [value] | ||
|
||
|
||
SingleElementToList = Annotated[list[T], BeforeValidator(convert_to_list)] | ||
""" | ||
A generic list type that converts a single value to a list. Union with list[T] to allow multiple values. | ||
""" | ||
|
||
|
||
def empty_string_to_none(value: Any) -> None: | ||
""" | ||
Convert an empty string to `None`. None is kept as is. | ||
""" | ||
if value is None or value == "": | ||
return None | ||
raise ValueError("value must be an empty string or None") | ||
|
||
|
||
EmptyStringIsNone = Annotated[None, BeforeValidator(empty_string_to_none)] | ||
""" | ||
A type that can only receive an empty string and converts it to `None`. | ||
Can also hold `None` as is. | ||
This should not be needed if a proper data model is used in production. | ||
""" | ||
|
||
|
||
def split_string_newline(value: Any) -> list[str]: | ||
""" | ||
Split a string by newlines. | ||
""" | ||
if not isinstance(value, str): | ||
raise ValueError("value must be a string") | ||
return value.split("\n") | ||
|
||
|
||
SplitStringNewlineBefore = Annotated[list[str], BeforeValidator(split_string_newline)] | ||
""" | ||
A generic list type that splits a string at newlines before validation. | ||
""" | ||
|
||
|
||
def false_to_none(value: Any) -> None: | ||
""" | ||
Convert `False` to `None`. Keep `None` as is. | ||
""" | ||
if value is False or value is None: | ||
return None | ||
raise ValueError("value must be False or None") | ||
|
||
|
||
FalseIsNone = Annotated[None, BeforeValidator(false_to_none)] | ||
""" | ||
A type that can only receive `False` or `None` and converts it to `None`. | ||
""" | ||
|
||
|
||
def none_to_empty_dict(value: T | None) -> T | dict[Never, Never]: | ||
""" | ||
Convert `None` to an empty dictionary, otherwise keep the value as is. | ||
""" | ||
if value is None: | ||
return {} | ||
return value | ||
|
||
|
||
NoneIsEmptyDict = Annotated[dict[K, V], BeforeValidator(none_to_empty_dict)] | ||
""" | ||
A generic dict type that converts `None` to an empty dict. | ||
""" | ||
|
||
|
||
GitUrl = Annotated[Url, UrlConstraints(allowed_schemes=["git"])] | ||
|
||
|
||
class LazyJsonReference(StrictBaseModel): | ||
""" | ||
A lazy reference to a JSON object. | ||
""" | ||
|
||
# TODO: There should be an elegant pydantic way to resolve LazyJSON references. | ||
|
||
json_reference: str = Field(pattern=r".*\.json$", alias="__lazy_json__") | ||
""" | ||
The JSON file reference. | ||
""" |
Oops, something went wrong.