From 3f2e89108220b26c5e9fb18cf3e63a79055e9c8f Mon Sep 17 00:00:00 2001 From: Saskia de Vries Date: Tue, 15 Oct 2024 21:33:26 -0700 Subject: [PATCH 01/17] start of model class --- src/aind_data_schema/core/model.py | 72 ++++++++++++++++++++++++++++++ 1 file changed, 72 insertions(+) create mode 100644 src/aind_data_schema/core/model.py diff --git a/src/aind_data_schema/core/model.py b/src/aind_data_schema/core/model.py new file mode 100644 index 000000000..16c6b033b --- /dev/null +++ b/src/aind_data_schema/core/model.py @@ -0,0 +1,72 @@ +""" schema describing an analysis model """ + +from decimal import Decimal +from enum import Enum +from typing import List, Literal, Optional + +from pydantic import Field + +from aind_data_schema_models.modalities import Modality + +from aind_data_schema.base import AindCoreModel, AindGenericType, AindModel, AwareDatetimeWithDefault +from aind_data_schema.components.devices import Software + + +class Backbone(str, Enum): + """Types of network backbones""" + + ALEXNET = "AlexNet" + RESNET = "ResNet" + VGGNET = "VGGNet" + + +class ModelArchitecture(AindModel): + """Description of model architecture""" + + backbone: Backbone = Field(..., title="Backbone") + layers: int = Field(..., title="Layers") + parameters: AindGenericType = Field(..., title="Parameters") + + +class ModelPerformance(AindModel): + """Description of performance metrics""" + + precision: Decimal = Field(..., title="Precision") + recall: Decimal = Field(..., title="Recall") + f1_score: Decimal = Field(..., title="F1 score") + + +class ModelTraining(AindModel): + """Description of model training""" + + training_data: str = Field(..., title="Path to training data") + training_data_description: Optional[str] = Field(default=None, title="Description of training data") + validation_folds: int = Field(..., title="Validation folds") #is the validation methods x-fold? or are there other validations? Enum? + performance: ModelPerformance = Field(..., title="Training performance") + + +class ModelEvaluation(AindModel): + """Description of model evaluation""" + + + +class Model(AindCoreModel): + """Description of an analysis model""" + + _DESCRIBED_BY_URL = AindCoreModel._DESCRIBED_BY_BASE_URL.default + "aind_data_schema/core/model.py" + describedBy: str = Field(_DESCRIBED_BY_URL, json_schema_extra={"const": _DESCRIBED_BY_URL}) + schema_version: Literal["1.0.1"] = Field("0.0.1") + + name: str = Field(..., title="Name") + developer_full_name: str = Field(..., title="Name of developer") + #developer_institution: + #shared by + modality: Modality.ONE_OF = Field(..., title="Modality") + date_trained: AwareDatetimeWithDefault = Field(..., title="Date trained") #not sure we need datetime + model_architecture: ModelArchitecture = Field(..., title="Model architecture") + software: Software = Field(..., title="software") + direct_use: str = Field(..., title="Intended model use", description="Semantic description of intended use") + limitations: Optional[str] = Field(default=None, title="Model limitations") + training: List[ModelTraining] = Field(..., title="Training") + + From 9d6932920a79ca8abd936dd9e42d29f5b78f7755 Mon Sep 17 00:00:00 2001 From: Saskia de Vries Date: Wed, 16 Oct 2024 16:01:12 -0700 Subject: [PATCH 02/17] added more to model --- src/aind_data_schema/core/model.py | 38 ++++++++++++++++++++---------- 1 file changed, 26 insertions(+), 12 deletions(-) diff --git a/src/aind_data_schema/core/model.py b/src/aind_data_schema/core/model.py index 16c6b033b..9f73d4ca4 100644 --- a/src/aind_data_schema/core/model.py +++ b/src/aind_data_schema/core/model.py @@ -2,11 +2,12 @@ from decimal import Decimal from enum import Enum -from typing import List, Literal, Optional +from typing import List, Literal, Optional, Union from pydantic import Field from aind_data_schema_models.modalities import Modality +from aind_data_schema_models.organizations import Organization from aind_data_schema.base import AindCoreModel, AindGenericType, AindModel, AwareDatetimeWithDefault from aind_data_schema.components.devices import Software @@ -26,14 +27,22 @@ class ModelArchitecture(AindModel): backbone: Backbone = Field(..., title="Backbone") layers: int = Field(..., title="Layers") parameters: AindGenericType = Field(..., title="Parameters") + #notes? -class ModelPerformance(AindModel): +class ScoreStatistics(AindModel): + """Statistics for x-fold validation scores""" + + mean: Decimal = Field(..., title="Mean") + std: Decimal = Field(..., title="Standard deviation") + + +class PerformanceScore(AindModel): """Description of performance metrics""" - precision: Decimal = Field(..., title="Precision") - recall: Decimal = Field(..., title="Recall") - f1_score: Decimal = Field(..., title="F1 score") + precision: Union[Decimal, ScoreStatistics] = Field(..., title="Precision") + recall: Union[Decimal, ScoreStatistics] = Field(..., title="Recall") + f1_score: Union[Decimal, ScoreStatistics] = Field(..., title="F1 score") class ModelTraining(AindModel): @@ -41,13 +50,20 @@ class ModelTraining(AindModel): training_data: str = Field(..., title="Path to training data") training_data_description: Optional[str] = Field(default=None, title="Description of training data") + training_date: AwareDatetimeWithDefault = Field(..., title="Date trained") #not sure we need datetime validation_folds: int = Field(..., title="Validation folds") #is the validation methods x-fold? or are there other validations? Enum? - performance: ModelPerformance = Field(..., title="Training performance") + performance: PerformanceScore = Field(..., title="Training performance") + notes: Optional[str] = Field(default=None, title="Notes") class ModelEvaluation(AindModel): """Description of model evaluation""" + evaluation_data: str = Field(..., title="Path to evaluation data") + evaluation_data_description: Optional[str] = Field(default=None, title="Description of evaluation data") + evaluation_date: AwareDatetimeWithDefault = Field(..., title="Date trained") #not sure we need datetime + performance: PerformanceScore = Field(..., title="Evaluation performance") + notes: Optional[str] = Field(default=None, title="Notes") class Model(AindCoreModel): @@ -58,15 +74,13 @@ class Model(AindCoreModel): schema_version: Literal["1.0.1"] = Field("0.0.1") name: str = Field(..., title="Name") - developer_full_name: str = Field(..., title="Name of developer") - #developer_institution: - #shared by + developer_full_name: Optional[str] = Field(default=None, title="Name of developer") + developer_institution: Organization.ONE_OF = Field(default=None, title="Institute where developed") modality: Modality.ONE_OF = Field(..., title="Modality") - date_trained: AwareDatetimeWithDefault = Field(..., title="Date trained") #not sure we need datetime model_architecture: ModelArchitecture = Field(..., title="Model architecture") software: Software = Field(..., title="software") direct_use: str = Field(..., title="Intended model use", description="Semantic description of intended use") limitations: Optional[str] = Field(default=None, title="Model limitations") training: List[ModelTraining] = Field(..., title="Training") - - + evaluations: Optional[List[ModelEvaluation]] = Field(default=[], title="Evaluations") + notes: Optional[str] = Field(default=None, title="Notes") From 90357d5c4802fcecd0e9a18228e3740129a48d42 Mon Sep 17 00:00:00 2001 From: Saskia de Vries Date: Wed, 16 Oct 2024 16:02:14 -0700 Subject: [PATCH 03/17] fixed literal --- src/aind_data_schema/core/model.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/aind_data_schema/core/model.py b/src/aind_data_schema/core/model.py index 9f73d4ca4..714bda435 100644 --- a/src/aind_data_schema/core/model.py +++ b/src/aind_data_schema/core/model.py @@ -71,7 +71,7 @@ class Model(AindCoreModel): _DESCRIBED_BY_URL = AindCoreModel._DESCRIBED_BY_BASE_URL.default + "aind_data_schema/core/model.py" describedBy: str = Field(_DESCRIBED_BY_URL, json_schema_extra={"const": _DESCRIBED_BY_URL}) - schema_version: Literal["1.0.1"] = Field("0.0.1") + schema_version: Literal["0.0.1"] = Field("0.0.1") name: str = Field(..., title="Name") developer_full_name: Optional[str] = Field(default=None, title="Name of developer") From 6776d47a7b0e38d2dcf7bbca122c384460f910cc Mon Sep 17 00:00:00 2001 From: Saskia de Vries Date: Wed, 16 Oct 2024 16:06:02 -0700 Subject: [PATCH 04/17] added cumulative evaluation --- src/aind_data_schema/core/model.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/aind_data_schema/core/model.py b/src/aind_data_schema/core/model.py index 714bda435..b23cd7f84 100644 --- a/src/aind_data_schema/core/model.py +++ b/src/aind_data_schema/core/model.py @@ -66,6 +66,14 @@ class ModelEvaluation(AindModel): notes: Optional[str] = Field(default=None, title="Notes") +class CumulativeEvaluation(AindModel): + """Description of cumulative evaluation performances""" + + evaluation_date: AwareDatetimeWithDefault = Field(..., title="Date trained") #not sure we need datetime + performance: PerformanceScore = Field(..., title="Evaluation performance") + notes: Optional[str] = Field(default=None, title="Notes") + + class Model(AindCoreModel): """Description of an analysis model""" @@ -75,7 +83,7 @@ class Model(AindCoreModel): name: str = Field(..., title="Name") developer_full_name: Optional[str] = Field(default=None, title="Name of developer") - developer_institution: Organization.ONE_OF = Field(default=None, title="Institute where developed") + developer_institution: Optional[Organization.ONE_OF] = Field(default=None, title="Institute where developed") modality: Modality.ONE_OF = Field(..., title="Modality") model_architecture: ModelArchitecture = Field(..., title="Model architecture") software: Software = Field(..., title="software") @@ -83,4 +91,5 @@ class Model(AindCoreModel): limitations: Optional[str] = Field(default=None, title="Model limitations") training: List[ModelTraining] = Field(..., title="Training") evaluations: Optional[List[ModelEvaluation]] = Field(default=[], title="Evaluations") + cumulative_performance: Optional[List[CumulativeEvaluation]] = Field(default=[], title="Cumulative performance") notes: Optional[str] = Field(default=None, title="Notes") From 05074660f8b101ed67d9481ace82b99f181a8ee1 Mon Sep 17 00:00:00 2001 From: Saskia de Vries Date: Thu, 17 Oct 2024 11:58:02 -0700 Subject: [PATCH 05/17] line length --- src/aind_data_schema/core/model.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/aind_data_schema/core/model.py b/src/aind_data_schema/core/model.py index b23cd7f84..0f1b17727 100644 --- a/src/aind_data_schema/core/model.py +++ b/src/aind_data_schema/core/model.py @@ -51,7 +51,7 @@ class ModelTraining(AindModel): training_data: str = Field(..., title="Path to training data") training_data_description: Optional[str] = Field(default=None, title="Description of training data") training_date: AwareDatetimeWithDefault = Field(..., title="Date trained") #not sure we need datetime - validation_folds: int = Field(..., title="Validation folds") #is the validation methods x-fold? or are there other validations? Enum? + validation_folds: int = Field(..., title="Validation folds") #is the validation methods always x-fold? or Enum? performance: PerformanceScore = Field(..., title="Training performance") notes: Optional[str] = Field(default=None, title="Notes") From b2a68e9b5723446d47f1e26edef6708b72ad4f83 Mon Sep 17 00:00:00 2001 From: Saskia de Vries Date: Fri, 18 Oct 2024 10:42:59 -0700 Subject: [PATCH 06/17] cleanup based on feedback from team --- src/aind_data_schema/core/model.py | 69 ++++++++---------------------- 1 file changed, 18 insertions(+), 51 deletions(-) diff --git a/src/aind_data_schema/core/model.py b/src/aind_data_schema/core/model.py index 0f1b17727..4340639ee 100644 --- a/src/aind_data_schema/core/model.py +++ b/src/aind_data_schema/core/model.py @@ -2,75 +2,42 @@ from decimal import Decimal from enum import Enum -from typing import List, Literal, Optional, Union +from typing import Any, Optional, List, Literal from pydantic import Field from aind_data_schema_models.modalities import Modality from aind_data_schema_models.organizations import Organization +from aind_data_schema_models.system_architecture import ModelBackbone from aind_data_schema.base import AindCoreModel, AindGenericType, AindModel, AwareDatetimeWithDefault from aind_data_schema.components.devices import Software -class Backbone(str, Enum): - """Types of network backbones""" - - ALEXNET = "AlexNet" - RESNET = "ResNet" - VGGNET = "VGGNet" - - class ModelArchitecture(AindModel): """Description of model architecture""" - backbone: Backbone = Field(..., title="Backbone") + backbone: ModelBackbone = Field(..., title="Backbone") layers: int = Field(..., title="Layers") parameters: AindGenericType = Field(..., title="Parameters") - #notes? - - -class ScoreStatistics(AindModel): - """Statistics for x-fold validation scores""" - - mean: Decimal = Field(..., title="Mean") - std: Decimal = Field(..., title="Standard deviation") - - -class PerformanceScore(AindModel): - """Description of performance metrics""" - - precision: Union[Decimal, ScoreStatistics] = Field(..., title="Precision") - recall: Union[Decimal, ScoreStatistics] = Field(..., title="Recall") - f1_score: Union[Decimal, ScoreStatistics] = Field(..., title="F1 score") - - -class ModelTraining(AindModel): - """Description of model training""" - - training_data: str = Field(..., title="Path to training data") - training_data_description: Optional[str] = Field(default=None, title="Description of training data") - training_date: AwareDatetimeWithDefault = Field(..., title="Date trained") #not sure we need datetime - validation_folds: int = Field(..., title="Validation folds") #is the validation methods always x-fold? or Enum? - performance: PerformanceScore = Field(..., title="Training performance") notes: Optional[str] = Field(default=None, title="Notes") -class ModelEvaluation(AindModel): - """Description of model evaluation""" +class PerformanceMetric(AindModel): + """Description of a performance metric""" - evaluation_data: str = Field(..., title="Path to evaluation data") - evaluation_data_description: Optional[str] = Field(default=None, title="Description of evaluation data") - evaluation_date: AwareDatetimeWithDefault = Field(..., title="Date trained") #not sure we need datetime - performance: PerformanceScore = Field(..., title="Evaluation performance") - notes: Optional[str] = Field(default=None, title="Notes") + name: str = Field(..., title="Metric name") + value: Any = Field(..., title="Metric value") -class CumulativeEvaluation(AindModel): - """Description of cumulative evaluation performances""" +class ModelEvaluation(AindModel): + """Description of model training""" - evaluation_date: AwareDatetimeWithDefault = Field(..., title="Date trained") #not sure we need datetime - performance: PerformanceScore = Field(..., title="Evaluation performance") + data: str = Field(..., title="Path to training data") + data_description: Optional[str] = Field(default=None, title="Description of training data") + date: AwareDatetimeWithDefault = Field(..., title="Date trained") #not sure we need datetime + validation_folds: Optional[int] = Field(default=None, title="Validation folds") + performance: List[PerformanceMetric] = Field(..., title="Training performance") notes: Optional[str] = Field(default=None, title="Notes") @@ -82,14 +49,14 @@ class Model(AindCoreModel): schema_version: Literal["0.0.1"] = Field("0.0.1") name: str = Field(..., title="Name") - developer_full_name: Optional[str] = Field(default=None, title="Name of developer") + license: str = Field(..., title="License") + developer_full_name: Optional[List[str]] = Field(default=None, title="Name of developer") developer_institution: Optional[Organization.ONE_OF] = Field(default=None, title="Institute where developed") modality: Modality.ONE_OF = Field(..., title="Modality") model_architecture: ModelArchitecture = Field(..., title="Model architecture") - software: Software = Field(..., title="software") + software: List[Software] = Field(..., title="software") direct_use: str = Field(..., title="Intended model use", description="Semantic description of intended use") limitations: Optional[str] = Field(default=None, title="Model limitations") - training: List[ModelTraining] = Field(..., title="Training") + training: List[ModelEvaluation] = Field(..., title="Training") evaluations: Optional[List[ModelEvaluation]] = Field(default=[], title="Evaluations") - cumulative_performance: Optional[List[CumulativeEvaluation]] = Field(default=[], title="Cumulative performance") notes: Optional[str] = Field(default=None, title="Notes") From d38cf13244a81883a9492c23d42e80edec75d9f5 Mon Sep 17 00:00:00 2001 From: Saskia de Vries Date: Wed, 6 Nov 2024 16:37:08 -0800 Subject: [PATCH 07/17] small edits --- src/aind_data_schema/core/model.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/src/aind_data_schema/core/model.py b/src/aind_data_schema/core/model.py index 4340639ee..b97b51d20 100644 --- a/src/aind_data_schema/core/model.py +++ b/src/aind_data_schema/core/model.py @@ -12,13 +12,14 @@ from aind_data_schema.base import AindCoreModel, AindGenericType, AindModel, AwareDatetimeWithDefault from aind_data_schema.components.devices import Software +from aind_data_schema.core.processing import DataProcess class ModelArchitecture(AindModel): """Description of model architecture""" backbone: ModelBackbone = Field(..., title="Backbone") - layers: int = Field(..., title="Layers") + layers: Optional[int] = Field(default=None, title="Layers") parameters: AindGenericType = Field(..., title="Parameters") notes: Optional[str] = Field(default=None, title="Notes") @@ -33,11 +34,11 @@ class PerformanceMetric(AindModel): class ModelEvaluation(AindModel): """Description of model training""" - data: str = Field(..., title="Path to training data") - data_description: Optional[str] = Field(default=None, title="Description of training data") - date: AwareDatetimeWithDefault = Field(..., title="Date trained") #not sure we need datetime + data: str = Field(..., title="Path to evaluation data") + data_description: Optional[str] = Field(default=None, title="Description of evaluation data") + date: AwareDatetimeWithDefault = Field(..., title="Date") #not sure we need datetime validation_folds: Optional[int] = Field(default=None, title="Validation folds") - performance: List[PerformanceMetric] = Field(..., title="Training performance") + performance: List[PerformanceMetric] = Field(..., title="Evaluation performance") notes: Optional[str] = Field(default=None, title="Notes") @@ -54,9 +55,9 @@ class Model(AindCoreModel): developer_institution: Optional[Organization.ONE_OF] = Field(default=None, title="Institute where developed") modality: Modality.ONE_OF = Field(..., title="Modality") model_architecture: ModelArchitecture = Field(..., title="Model architecture") - software: List[Software] = Field(..., title="software") + software: List[Software] = Field(..., title="Software") direct_use: str = Field(..., title="Intended model use", description="Semantic description of intended use") limitations: Optional[str] = Field(default=None, title="Model limitations") - training: List[ModelEvaluation] = Field(..., title="Training") + training: Optional[List[ModelEvaluation]] = Field(default=[], title="Training") evaluations: Optional[List[ModelEvaluation]] = Field(default=[], title="Evaluations") notes: Optional[str] = Field(default=None, title="Notes") From a0ef7f3bfda167bf998e449e21ae46d006ffc407 Mon Sep 17 00:00:00 2001 From: Saskia de Vries Date: Sat, 16 Nov 2024 19:49:17 -0800 Subject: [PATCH 08/17] model validation --- src/aind_data_schema/core/model.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/src/aind_data_schema/core/model.py b/src/aind_data_schema/core/model.py index b97b51d20..f8a97d53a 100644 --- a/src/aind_data_schema/core/model.py +++ b/src/aind_data_schema/core/model.py @@ -32,16 +32,21 @@ class PerformanceMetric(AindModel): class ModelEvaluation(AindModel): - """Description of model training""" + """Description of model evaluation""" - data: str = Field(..., title="Path to evaluation data") + data: Optional[str] = Field(default=None, title="Path to evaluation data") data_description: Optional[str] = Field(default=None, title="Description of evaluation data") - date: AwareDatetimeWithDefault = Field(..., title="Date") #not sure we need datetime - validation_folds: Optional[int] = Field(default=None, title="Validation folds") + date: AwareDatetimeWithDefault = Field(..., title="Date") performance: List[PerformanceMetric] = Field(..., title="Evaluation performance") notes: Optional[str] = Field(default=None, title="Notes") +class ModelTraining(ModelEvaluation): + """Description of model training""" + + cross_validation_method: str = Field(..., title="Cross validation method") + + class Model(AindCoreModel): """Description of an analysis model""" @@ -56,8 +61,8 @@ class Model(AindCoreModel): modality: Modality.ONE_OF = Field(..., title="Modality") model_architecture: ModelArchitecture = Field(..., title="Model architecture") software: List[Software] = Field(..., title="Software") - direct_use: str = Field(..., title="Intended model use", description="Semantic description of intended use") + intended_use: str = Field(..., title="Intended model use", description="Semantic description of intended use") limitations: Optional[str] = Field(default=None, title="Model limitations") - training: Optional[List[ModelEvaluation]] = Field(default=[], title="Training") + training: Optional[List[ModelTraining]] = Field(default=[], title="Training") evaluations: Optional[List[ModelEvaluation]] = Field(default=[], title="Evaluations") notes: Optional[str] = Field(default=None, title="Notes") From 029efefab79739015822bbb198237220996d4b17 Mon Sep 17 00:00:00 2001 From: Saskia de Vries Date: Sat, 16 Nov 2024 19:51:37 -0800 Subject: [PATCH 09/17] using dataprocess --- src/aind_data_schema/core/model.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/aind_data_schema/core/model.py b/src/aind_data_schema/core/model.py index f8a97d53a..30c1ed2e0 100644 --- a/src/aind_data_schema/core/model.py +++ b/src/aind_data_schema/core/model.py @@ -31,14 +31,11 @@ class PerformanceMetric(AindModel): value: Any = Field(..., title="Metric value") -class ModelEvaluation(AindModel): +class ModelEvaluation(DataProcess): """Description of model evaluation""" - data: Optional[str] = Field(default=None, title="Path to evaluation data") data_description: Optional[str] = Field(default=None, title="Description of evaluation data") - date: AwareDatetimeWithDefault = Field(..., title="Date") performance: List[PerformanceMetric] = Field(..., title="Evaluation performance") - notes: Optional[str] = Field(default=None, title="Notes") class ModelTraining(ModelEvaluation): From 28072506233ead82ccfff0a541eb8d90e703d068 Mon Sep 17 00:00:00 2001 From: Saskia de Vries Date: Sat, 16 Nov 2024 19:53:08 -0800 Subject: [PATCH 10/17] linting --- src/aind_data_schema/core/model.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/aind_data_schema/core/model.py b/src/aind_data_schema/core/model.py index 30c1ed2e0..4c44e9e5c 100644 --- a/src/aind_data_schema/core/model.py +++ b/src/aind_data_schema/core/model.py @@ -1,7 +1,5 @@ """ schema describing an analysis model """ -from decimal import Decimal -from enum import Enum from typing import Any, Optional, List, Literal from pydantic import Field @@ -10,7 +8,7 @@ from aind_data_schema_models.organizations import Organization from aind_data_schema_models.system_architecture import ModelBackbone -from aind_data_schema.base import AindCoreModel, AindGenericType, AindModel, AwareDatetimeWithDefault +from aind_data_schema.base import AindCoreModel, AindGenericType, AindModel from aind_data_schema.components.devices import Software from aind_data_schema.core.processing import DataProcess @@ -34,7 +32,7 @@ class PerformanceMetric(AindModel): class ModelEvaluation(DataProcess): """Description of model evaluation""" - data_description: Optional[str] = Field(default=None, title="Description of evaluation data") + data_description: Optional[str] = Field(default=None, title="Description of evaluation data") performance: List[PerformanceMetric] = Field(..., title="Evaluation performance") From 47cfc63476007e35eeabdebef562104cb21d5b12 Mon Sep 17 00:00:00 2001 From: Saskia de Vries Date: Sat, 16 Nov 2024 21:02:36 -0800 Subject: [PATCH 11/17] remove dataprocess and add test --- src/aind_data_schema/core/model.py | 10 ++-- tests/test_model.py | 96 ++++++++++++++++++++++++++++++ 2 files changed, 102 insertions(+), 4 deletions(-) create mode 100644 tests/test_model.py diff --git a/src/aind_data_schema/core/model.py b/src/aind_data_schema/core/model.py index 4c44e9e5c..3ab05eb68 100644 --- a/src/aind_data_schema/core/model.py +++ b/src/aind_data_schema/core/model.py @@ -8,9 +8,8 @@ from aind_data_schema_models.organizations import Organization from aind_data_schema_models.system_architecture import ModelBackbone -from aind_data_schema.base import AindCoreModel, AindGenericType, AindModel +from aind_data_schema.base import AindCoreModel, AindGenericType, AindModel, AwareDatetimeWithDefault from aind_data_schema.components.devices import Software -from aind_data_schema.core.processing import DataProcess class ModelArchitecture(AindModel): @@ -29,11 +28,14 @@ class PerformanceMetric(AindModel): value: Any = Field(..., title="Metric value") -class ModelEvaluation(DataProcess): +class ModelEvaluation(AindModel): """Description of model evaluation""" - data_description: Optional[str] = Field(default=None, title="Description of evaluation data") + data: Optional[str] = Field(default=None, title="Path to evaluation data") + data_description: Optional[str] = Field(default=None, title="Description of evaluation data") + date: AwareDatetimeWithDefault = Field(..., title="Date") performance: List[PerformanceMetric] = Field(..., title="Evaluation performance") + notes: Optional[str] = Field(default=None, title="Notes") class ModelTraining(ModelEvaluation): diff --git a/tests/test_model.py b/tests/test_model.py new file mode 100644 index 000000000..999fd981e --- /dev/null +++ b/tests/test_model.py @@ -0,0 +1,96 @@ +""" tests for Model """ + +import datetime +import unittest + +import pydantic + +from aind_data_schema_models.modalities import Modality +from aind_data_schema_models.organizations import Organization +from aind_data_schema_models.process_names import ProcessName +from aind_data_schema_models.system_architecture import ModelBackbone + +from aind_data_schema.core.model import Model, ModelArchitecture, ModelEvaluation, ModelTraining, PerformanceMetric +from aind_data_schema.components.devices import Software + +class ModelTests(unittest.TestCase): + """tests for model""" + + def test_constructors(self): + """try building model""" + + with self.assertRaises(pydantic.ValidationError): + Model() + + now = datetime.datetime.now() + + m = Model( + name="2024_01_01_ResNet18_SmartSPIM.h5", + license="CC-BY-4.0", + developer_full_name="Joe Schmoe", + developer_institution=Organization.AIND, + modality=Modality.SPIM, + model_architecture=ModelArchitecture( + backbone=ModelBackbone.RESNET, + layers=18, + parameters={ + "downsample": 1, + "input_shape": [ + 14, + 14, + 26 + ], + "learning_rate": 0.0001, + "train_test_split": 0.8, + "batch_size": 32, + "augmentation": True, + "finetuning": True + }, + ), + software=[Software( + name="tensorflow", + version="2.11.0", + ) + ], + intended_use="Cell counting for 488 channel of SmartSPIM data", + limitations="Only trained on 488 channel", + training=[ + ModelTraining( + data="path to training set", + data_description="description of training set", + date=now, + performance=[ + PerformanceMetric( + name="precision", + value=0.9 + ), + PerformanceMetric( + name="recall", + value=0.85 + ) + ], + cross_validation_method="5-fold" + ) + ], + evaluations=[ + ModelEvaluation( + data="path to evaluation data", + data_description="description of evaluation set", + date=now, + performance=[ + PerformanceMetric( + name="precision", + value="0.8" + ) + ] + ) + ] + ) + + Model.model_validate_json(m.model_dump_json()) + + self.assertIsNotNone(m) + + +if __name__ == "__main__": + unittest.main() \ No newline at end of file From b6d07b28feac0c3ba4dfa2b5b89e515c0820a33f Mon Sep 17 00:00:00 2001 From: Saskia de Vries Date: Sat, 16 Nov 2024 21:05:24 -0800 Subject: [PATCH 12/17] linting --- src/aind_data_schema/core/model.py | 2 +- tests/test_model.py | 13 +++++++------ 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/src/aind_data_schema/core/model.py b/src/aind_data_schema/core/model.py index 3ab05eb68..af696f66d 100644 --- a/src/aind_data_schema/core/model.py +++ b/src/aind_data_schema/core/model.py @@ -32,7 +32,7 @@ class ModelEvaluation(AindModel): """Description of model evaluation""" data: Optional[str] = Field(default=None, title="Path to evaluation data") - data_description: Optional[str] = Field(default=None, title="Description of evaluation data") + data_description: Optional[str] = Field(default=None, title="Description of evaluation data") date: AwareDatetimeWithDefault = Field(..., title="Date") performance: List[PerformanceMetric] = Field(..., title="Evaluation performance") notes: Optional[str] = Field(default=None, title="Notes") diff --git a/tests/test_model.py b/tests/test_model.py index 999fd981e..f1af9ece5 100644 --- a/tests/test_model.py +++ b/tests/test_model.py @@ -7,12 +7,12 @@ from aind_data_schema_models.modalities import Modality from aind_data_schema_models.organizations import Organization -from aind_data_schema_models.process_names import ProcessName from aind_data_schema_models.system_architecture import ModelBackbone from aind_data_schema.core.model import Model, ModelArchitecture, ModelEvaluation, ModelTraining, PerformanceMetric from aind_data_schema.components.devices import Software + class ModelTests(unittest.TestCase): """tests for model""" @@ -36,9 +36,9 @@ def test_constructors(self): parameters={ "downsample": 1, "input_shape": [ - 14, - 14, - 26 + 14, + 14, + 26 ], "learning_rate": 0.0001, "train_test_split": 0.8, @@ -86,11 +86,12 @@ def test_constructors(self): ) ] ) - + Model.model_validate_json(m.model_dump_json()) self.assertIsNotNone(m) if __name__ == "__main__": - unittest.main() \ No newline at end of file + unittest.main() + \ No newline at end of file From 3343a9622af3913ffcbc8c0ded38dc7e02ea11eb Mon Sep 17 00:00:00 2001 From: Saskia de Vries Date: Sat, 16 Nov 2024 21:06:26 -0800 Subject: [PATCH 13/17] more lint --- tests/test_model.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/test_model.py b/tests/test_model.py index f1af9ece5..bf6c7d6c2 100644 --- a/tests/test_model.py +++ b/tests/test_model.py @@ -94,4 +94,3 @@ def test_constructors(self): if __name__ == "__main__": unittest.main() - \ No newline at end of file From d4c13bb5bcff35c5c9c90e3747dfcf17f6aa8051 Mon Sep 17 00:00:00 2001 From: Tom Chartrand Date: Mon, 25 Nov 2024 13:50:45 -0800 Subject: [PATCH 14/17] fix: backward compatible tweaks to processing for broader usage makes software_version, parameters optional, allows list for input_location --- src/aind_data_schema/core/processing.py | 11 ++++++----- tests/test_processing.py | 8 +------- 2 files changed, 7 insertions(+), 12 deletions(-) diff --git a/src/aind_data_schema/core/processing.py b/src/aind_data_schema/core/processing.py index 9a1e8edd3..0c0107107 100644 --- a/src/aind_data_schema/core/processing.py +++ b/src/aind_data_schema/core/processing.py @@ -1,7 +1,7 @@ """ schema for processing """ from enum import Enum -from typing import List, Literal, Optional +from typing import List, Literal, Optional, Union from aind_data_schema_models.process_names import ProcessName from aind_data_schema_models.units import MemoryUnit, UnitlessUnit @@ -57,15 +57,16 @@ class DataProcess(AindModel): """Description of a single processing step""" name: ProcessName = Field(..., title="Name") - software_version: str = Field(..., description="Version of the software used", title="Version") + software_version: Optional[str] = Field(default=None, description="Version of the software used", title="Version") start_date_time: AwareDatetimeWithDefault = Field(..., title="Start date time") end_date_time: AwareDatetimeWithDefault = Field(..., title="End date time") - input_location: str = Field(..., description="Path to data inputs", title="Input location") + # allowing multiple input locations, to be replaced by CompositeData object in future + input_location: Union[str, List[str]] = Field(..., description="Path(s) to data inputs", title="Input location") output_location: str = Field(..., description="Path to data outputs", title="Output location") code_url: str = Field(..., description="Path to code repository", title="Code URL") code_version: Optional[str] = Field(default=None, description="Version of the code", title="Code version") - parameters: AindGenericType = Field(..., title="Parameters") - outputs: AindGenericType = Field(AindGeneric(), description="Output parameters", title="Outputs") + parameters: AindGenericType = Field(default=AindGeneric(), title="Parameters") + outputs: AindGenericType = Field(default=AindGeneric(), description="Output parameters", title="Outputs") notes: Optional[str] = Field(default=None, title="Notes", validate_default=True) resources: Optional[ResourceUsage] = Field(default=None, title="Process resource usage") diff --git a/tests/test_processing.py b/tests/test_processing.py index 81d7d9d8e..928ddff6f 100644 --- a/tests/test_processing.py +++ b/tests/test_processing.py @@ -37,10 +37,7 @@ def test_constructors(self): DataProcess(name="Other", notes="") expected_exception = ( - "8 validation errors for DataProcess\n" - "software_version\n" - " Field required [type=missing, input_value={'name': 'Other', 'notes': ''}, input_type=dict]\n" - f" For further information visit https://errors.pydantic.dev/{PYD_VERSION}/v/missing\n" + "6 validation errors for DataProcess\n" "start_date_time\n" " Field required [type=missing, input_value={'name': 'Other', 'notes': ''}, input_type=dict]\n" f" For further information visit https://errors.pydantic.dev/{PYD_VERSION}/v/missing\n" @@ -56,9 +53,6 @@ def test_constructors(self): "code_url\n" " Field required [type=missing, input_value={'name': 'Other', 'notes': ''}, input_type=dict]\n" f" For further information visit https://errors.pydantic.dev/{PYD_VERSION}/v/missing\n" - "parameters\n" - " Field required [type=missing, input_value={'name': 'Other', 'notes': ''}, input_type=dict]\n" - f" For further information visit https://errors.pydantic.dev/{PYD_VERSION}/v/missing\n" "notes\n" " Value error, Notes cannot be empty if 'name' is Other. Describe the process name in the notes field." " [type=value_error, input_value='', input_type=str]\n" From 91a3785c186de99f5cfa97ca00fe3794b5d43306 Mon Sep 17 00:00:00 2001 From: Tom Chartrand Date: Mon, 25 Nov 2024 14:56:38 -0800 Subject: [PATCH 15/17] fix: use DataProcess in model training/eval --- src/aind_data_schema/core/model.py | 30 +++++++++------- tests/test_model.py | 58 +++++++++++++++++++----------- 2 files changed, 55 insertions(+), 33 deletions(-) diff --git a/src/aind_data_schema/core/model.py b/src/aind_data_schema/core/model.py index af696f66d..9cea2b544 100644 --- a/src/aind_data_schema/core/model.py +++ b/src/aind_data_schema/core/model.py @@ -8,16 +8,18 @@ from aind_data_schema_models.organizations import Organization from aind_data_schema_models.system_architecture import ModelBackbone -from aind_data_schema.base import AindCoreModel, AindGenericType, AindModel, AwareDatetimeWithDefault +from aind_data_schema.base import AindCoreModel, AindGenericType, AindModel, AindGeneric from aind_data_schema.components.devices import Software +from aind_data_schema.core.processing import DataProcess, ProcessName class ModelArchitecture(AindModel): """Description of model architecture""" - backbone: ModelBackbone = Field(..., title="Backbone") + backbone: ModelBackbone = Field(..., title="Backbone", description="Core network architecture") + software: List[Software] = Field(default=[], title="Software frameworks") layers: Optional[int] = Field(default=None, title="Layers") - parameters: AindGenericType = Field(..., title="Parameters") + parameters: AindGenericType = Field(default=AindGeneric(), title="Parameters") notes: Optional[str] = Field(default=None, title="Notes") @@ -28,20 +30,24 @@ class PerformanceMetric(AindModel): value: Any = Field(..., title="Metric value") -class ModelEvaluation(AindModel): +class ModelEvaluation(DataProcess): """Description of model evaluation""" - data: Optional[str] = Field(default=None, title="Path to evaluation data") - data_description: Optional[str] = Field(default=None, title="Description of evaluation data") - date: AwareDatetimeWithDefault = Field(..., title="Date") + name: ProcessName = Field(ProcessName.MODEL_EVALUATION, title="Process name") performance: List[PerformanceMetric] = Field(..., title="Evaluation performance") - notes: Optional[str] = Field(default=None, title="Notes") -class ModelTraining(ModelEvaluation): +class ModelTraining(DataProcess): """Description of model training""" - cross_validation_method: str = Field(..., title="Cross validation method") + name: ProcessName = Field(ProcessName.MODEL_TRAINING, title="Process name") + train_performance: List[PerformanceMetric] = Field(..., title="Training performance", description="Performance on training set") + test_performance: Optional[List[PerformanceMetric]] = Field( + default=None, title="Test performance", description="Performance on untrained data, evaluated during training" + ) + test_data: Optional[str] = Field( + default=None, title="Test data", description="Path or cross-validation/split approach" + ) class Model(AindCoreModel): @@ -56,10 +62,10 @@ class Model(AindCoreModel): developer_full_name: Optional[List[str]] = Field(default=None, title="Name of developer") developer_institution: Optional[Organization.ONE_OF] = Field(default=None, title="Institute where developed") modality: Modality.ONE_OF = Field(..., title="Modality") - model_architecture: ModelArchitecture = Field(..., title="Model architecture") - software: List[Software] = Field(..., title="Software") + architecture: ModelArchitecture = Field(..., title="Model architecture") intended_use: str = Field(..., title="Intended model use", description="Semantic description of intended use") limitations: Optional[str] = Field(default=None, title="Model limitations") + pretrained_source_url: Optional[str] = Field(default=None, title="Pretrained source URL") training: Optional[List[ModelTraining]] = Field(default=[], title="Training") evaluations: Optional[List[ModelEvaluation]] = Field(default=[], title="Evaluations") notes: Optional[str] = Field(default=None, title="Notes") diff --git a/tests/test_model.py b/tests/test_model.py index bf6c7d6c2..d9dd0fba7 100644 --- a/tests/test_model.py +++ b/tests/test_model.py @@ -27,10 +27,11 @@ def test_constructors(self): m = Model( name="2024_01_01_ResNet18_SmartSPIM.h5", license="CC-BY-4.0", - developer_full_name="Joe Schmoe", + developer_full_name=["Joe Schmoe"], developer_institution=Organization.AIND, modality=Modality.SPIM, - model_architecture=ModelArchitecture( + pretrained_source_url="url pretrained weights are from", + architecture=ModelArchitecture( backbone=ModelBackbone.RESNET, layers=18, parameters={ @@ -40,26 +41,23 @@ def test_constructors(self): 14, 26 ], - "learning_rate": 0.0001, - "train_test_split": 0.8, - "batch_size": 32, - "augmentation": True, - "finetuning": True }, + software=[Software( + name="tensorflow", + version="2.11.0", + ) + ], ), - software=[Software( - name="tensorflow", - version="2.11.0", - ) - ], intended_use="Cell counting for 488 channel of SmartSPIM data", limitations="Only trained on 488 channel", training=[ ModelTraining( - data="path to training set", - data_description="description of training set", - date=now, - performance=[ + input_location=["s3 path to eval 1", "s3 path to eval 2"], + output_location="s3 path to trained model asset", + code_url="url for training code repo", + start_date_time=now, + end_date_time=now, + train_performance=[ PerformanceMetric( name="precision", value=0.9 @@ -69,18 +67,36 @@ def test_constructors(self): value=0.85 ) ], - cross_validation_method="5-fold" + test_performance=[ + PerformanceMetric( + name="precision", + value=0.8 + ), + PerformanceMetric( + name="recall", + value=0.8 + ) + ], + test_data="4:1 train/test split", + parameters={ + "learning_rate": 0.0001, + "batch_size": 32, + "augmentation": True, + }, + notes="note on training data selection" ) ], evaluations=[ ModelEvaluation( - data="path to evaluation data", - data_description="description of evaluation set", - date=now, + input_location=["s3 path to eval 1", "s3 path to eval 2"], + output_location="s3 path (output asset or trained model asset if no output)", + code_url="url for evaluation code repo (or capsule?)", + start_date_time=now, + end_date_time=now, performance=[ PerformanceMetric( name="precision", - value="0.8" + value=0.8 ) ] ) From 0a8e986eb0c9679fb935097b752244d6fdd295eb Mon Sep 17 00:00:00 2001 From: Tom Chartrand Date: Mon, 25 Nov 2024 15:57:42 -0800 Subject: [PATCH 16/17] linting --- src/aind_data_schema/core/model.py | 11 +++---- tests/test_model.py | 47 +++++++++--------------------- 2 files changed, 19 insertions(+), 39 deletions(-) diff --git a/src/aind_data_schema/core/model.py b/src/aind_data_schema/core/model.py index 9cea2b544..a215e8924 100644 --- a/src/aind_data_schema/core/model.py +++ b/src/aind_data_schema/core/model.py @@ -1,14 +1,13 @@ """ schema describing an analysis model """ -from typing import Any, Optional, List, Literal - -from pydantic import Field +from typing import Any, List, Literal, Optional from aind_data_schema_models.modalities import Modality from aind_data_schema_models.organizations import Organization from aind_data_schema_models.system_architecture import ModelBackbone +from pydantic import Field -from aind_data_schema.base import AindCoreModel, AindGenericType, AindModel, AindGeneric +from aind_data_schema.base import AindCoreModel, AindGeneric, AindGenericType, AindModel from aind_data_schema.components.devices import Software from aind_data_schema.core.processing import DataProcess, ProcessName @@ -41,7 +40,9 @@ class ModelTraining(DataProcess): """Description of model training""" name: ProcessName = Field(ProcessName.MODEL_TRAINING, title="Process name") - train_performance: List[PerformanceMetric] = Field(..., title="Training performance", description="Performance on training set") + train_performance: List[PerformanceMetric] = Field( + ..., title="Training performance", description="Performance on training set" + ) test_performance: Optional[List[PerformanceMetric]] = Field( default=None, title="Test performance", description="Performance on untrained data, evaluated during training" ) diff --git a/tests/test_model.py b/tests/test_model.py index d9dd0fba7..d57b2cf3a 100644 --- a/tests/test_model.py +++ b/tests/test_model.py @@ -4,13 +4,12 @@ import unittest import pydantic - from aind_data_schema_models.modalities import Modality from aind_data_schema_models.organizations import Organization from aind_data_schema_models.system_architecture import ModelBackbone -from aind_data_schema.core.model import Model, ModelArchitecture, ModelEvaluation, ModelTraining, PerformanceMetric from aind_data_schema.components.devices import Software +from aind_data_schema.core.model import Model, ModelArchitecture, ModelEvaluation, ModelTraining, PerformanceMetric class ModelTests(unittest.TestCase): @@ -36,15 +35,12 @@ def test_constructors(self): layers=18, parameters={ "downsample": 1, - "input_shape": [ - 14, - 14, - 26 - ], + "input_shape": [14, 14, 26], }, - software=[Software( - name="tensorflow", - version="2.11.0", + software=[ + Software( + name="tensorflow", + version="2.11.0", ) ], ), @@ -58,24 +54,12 @@ def test_constructors(self): start_date_time=now, end_date_time=now, train_performance=[ - PerformanceMetric( - name="precision", - value=0.9 - ), - PerformanceMetric( - name="recall", - value=0.85 - ) + PerformanceMetric(name="precision", value=0.9), + PerformanceMetric(name="recall", value=0.85), ], test_performance=[ - PerformanceMetric( - name="precision", - value=0.8 - ), - PerformanceMetric( - name="recall", - value=0.8 - ) + PerformanceMetric(name="precision", value=0.8), + PerformanceMetric(name="recall", value=0.8), ], test_data="4:1 train/test split", parameters={ @@ -83,7 +67,7 @@ def test_constructors(self): "batch_size": 32, "augmentation": True, }, - notes="note on training data selection" + notes="note on training data selection", ) ], evaluations=[ @@ -93,14 +77,9 @@ def test_constructors(self): code_url="url for evaluation code repo (or capsule?)", start_date_time=now, end_date_time=now, - performance=[ - PerformanceMetric( - name="precision", - value=0.8 - ) - ] + performance=[PerformanceMetric(name="precision", value=0.8)], ) - ] + ], ) Model.model_validate_json(m.model_dump_json()) From e59a2918bb59aad4d3ae3d988a1ed7641b86aeb1 Mon Sep 17 00:00:00 2001 From: Tom Chartrand Date: Mon, 9 Dec 2024 12:53:11 -0800 Subject: [PATCH 17/17] wording change --- src/aind_data_schema/core/model.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/aind_data_schema/core/model.py b/src/aind_data_schema/core/model.py index a215e8924..5de0259cd 100644 --- a/src/aind_data_schema/core/model.py +++ b/src/aind_data_schema/core/model.py @@ -44,7 +44,7 @@ class ModelTraining(DataProcess): ..., title="Training performance", description="Performance on training set" ) test_performance: Optional[List[PerformanceMetric]] = Field( - default=None, title="Test performance", description="Performance on untrained data, evaluated during training" + default=None, title="Test performance", description="Performance on test data, evaluated during training" ) test_data: Optional[str] = Field( default=None, title="Test data", description="Path or cross-validation/split approach"