From 9e5f2ca20157d22f5977cd69e5c730135d213fdd Mon Sep 17 00:00:00 2001 From: cranberrydeveloper <153510230+cranberrydeveloper@users.noreply.github.com> Date: Mon, 11 Dec 2023 13:16:34 +0000 Subject: [PATCH 1/2] Adding Indian Women Menstrual Health Chatbot Eval --- .../data/indian_women_menstrual_health/samples.jsonl | 3 +++ evals/registry/evals/indian_women_menstrual_health.yaml | 9 +++++++++ 2 files changed, 12 insertions(+) create mode 100644 evals/registry/data/indian_women_menstrual_health/samples.jsonl create mode 100644 evals/registry/evals/indian_women_menstrual_health.yaml diff --git a/evals/registry/data/indian_women_menstrual_health/samples.jsonl b/evals/registry/data/indian_women_menstrual_health/samples.jsonl new file mode 100644 index 0000000000..14f4434345 --- /dev/null +++ b/evals/registry/data/indian_women_menstrual_health/samples.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:408624e768984787105e5c32dd522a51098e14ba5f201d73bb11d2809841e2e7 +size 251488 diff --git a/evals/registry/evals/indian_women_menstrual_health.yaml b/evals/registry/evals/indian_women_menstrual_health.yaml new file mode 100644 index 0000000000..f3829b0065 --- /dev/null +++ b/evals/registry/evals/indian_women_menstrual_health.yaml @@ -0,0 +1,9 @@ +indian_women_menstrual_health: + id: indian_women_menstrual_health.dev.v0 + description: Questions and answers related to menstrual health verified by health researchers and contextualized for India. + metrics: [accuracy] + +indian_women_menstrual_health.dev.v0: + class: evals.elsuite.basic.match:Match + args: + samples_jsonl: indian_women_menstrual_health/samples.jsonl \ No newline at end of file From 98d6d253bfad145aeb4bf2fb315d711e3f0481fd Mon Sep 17 00:00:00 2001 From: phalgunagopal <59923496+phalgunagopal@users.noreply.github.com> Date: Mon, 11 Mar 2024 14:09:13 +0530 Subject: [PATCH 2/2] Closed QA ModelGraded Eval for Indian Menstrual Health ChatBot. Eval Suggests Indian Context not maintained even after mentioning in System Prompt. --- .../closedqa/Indian_Menstrual_Bot_ClosedQA.jsonl | 3 +++ .../data/indian_women_menstrual_health/samples.jsonl | 3 --- .../evals/Indian_Menstrual_Health_ChatBot-closedqa.yaml | 9 +++++++++ evals/registry/evals/indian_women_menstrual_health.yaml | 9 --------- 4 files changed, 12 insertions(+), 12 deletions(-) create mode 100644 evals/registry/data/Indian_Menstrual_Health_ChatBot_closedqa/closedqa/Indian_Menstrual_Bot_ClosedQA.jsonl delete mode 100644 evals/registry/data/indian_women_menstrual_health/samples.jsonl create mode 100644 evals/registry/evals/Indian_Menstrual_Health_ChatBot-closedqa.yaml delete mode 100644 evals/registry/evals/indian_women_menstrual_health.yaml diff --git a/evals/registry/data/Indian_Menstrual_Health_ChatBot_closedqa/closedqa/Indian_Menstrual_Bot_ClosedQA.jsonl b/evals/registry/data/Indian_Menstrual_Health_ChatBot_closedqa/closedqa/Indian_Menstrual_Bot_ClosedQA.jsonl new file mode 100644 index 0000000000..26d2fc70fd --- /dev/null +++ b/evals/registry/data/Indian_Menstrual_Health_ChatBot_closedqa/closedqa/Indian_Menstrual_Bot_ClosedQA.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6309f9af460f1f3e06a1edfd0171dfb097fdaf0533ea635f9a15232787498c90 +size 39009 diff --git a/evals/registry/data/indian_women_menstrual_health/samples.jsonl b/evals/registry/data/indian_women_menstrual_health/samples.jsonl deleted file mode 100644 index 14f4434345..0000000000 --- a/evals/registry/data/indian_women_menstrual_health/samples.jsonl +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:408624e768984787105e5c32dd522a51098e14ba5f201d73bb11d2809841e2e7 -size 251488 diff --git a/evals/registry/evals/Indian_Menstrual_Health_ChatBot-closedqa.yaml b/evals/registry/evals/Indian_Menstrual_Health_ChatBot-closedqa.yaml new file mode 100644 index 0000000000..97a2e44c6e --- /dev/null +++ b/evals/registry/evals/Indian_Menstrual_Health_ChatBot-closedqa.yaml @@ -0,0 +1,9 @@ +Indian_Menstrual_Health_ChatBot-closedqa: + id: Indian_Menstrual_Health_ChatBot-closedqa.dev.v0 + metrics: [accuracy] +Indian_Menstrual_Health_ChatBot-closedqa.dev.v0: + class: evals.elsuite.modelgraded.classify:ModelBasedClassify + args: + samples_jsonl: Indian_Menstrual_Health_ChatBot_closedqa/closedqa/Indian_Menstrual_Bot_ClosedQA.jsonl + eval_type: cot_classify + modelgraded_spec: closedqa \ No newline at end of file diff --git a/evals/registry/evals/indian_women_menstrual_health.yaml b/evals/registry/evals/indian_women_menstrual_health.yaml deleted file mode 100644 index f3829b0065..0000000000 --- a/evals/registry/evals/indian_women_menstrual_health.yaml +++ /dev/null @@ -1,9 +0,0 @@ -indian_women_menstrual_health: - id: indian_women_menstrual_health.dev.v0 - description: Questions and answers related to menstrual health verified by health researchers and contextualized for India. - metrics: [accuracy] - -indian_women_menstrual_health.dev.v0: - class: evals.elsuite.basic.match:Match - args: - samples_jsonl: indian_women_menstrual_health/samples.jsonl \ No newline at end of file