From d839fc48cc9ee6360badaa76936988ff505839ab Mon Sep 17 00:00:00 2001 From: Peter Ciccolo <139496666+ciccolo-neon@users.noreply.github.com> Date: Tue, 27 Aug 2024 13:56:18 -0700 Subject: [PATCH] fix typos re: divergences and grond (#4405) --- .../running-pre-tested-evals/ai-vs-human-groundtruth.md | 2 +- packages/phoenix-evals/src/phoenix/evals/default_templates.py | 4 ++-- tutorials/evals/evaluate_human_vs_ai_classifications.ipynb | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/docs/evaluation/how-to-evals/running-pre-tested-evals/ai-vs-human-groundtruth.md b/docs/evaluation/how-to-evals/running-pre-tested-evals/ai-vs-human-groundtruth.md index f176b9fcad..b64e3bf5d8 100644 --- a/docs/evaluation/how-to-evals/running-pre-tested-evals/ai-vs-human-groundtruth.md +++ b/docs/evaluation/how-to-evals/running-pre-tested-evals/ai-vs-human-groundtruth.md @@ -52,7 +52,7 @@ Your goal is to determine if the AI answer correctly matches, in substance, the [END DATA] Compare the AI answer to the human ground truth answer, if the AI correctly answers the question, then the AI answer is "correct". If the AI answer is longer but contains the main idea of the -Human answer please answer "correct". If the AI answer divergences or does not contain the main +Human answer please answer "correct". If the AI answer diverges or does not contain the main idea of the human answer, please answer "incorrect". ``` diff --git a/packages/phoenix-evals/src/phoenix/evals/default_templates.py b/packages/phoenix-evals/src/phoenix/evals/default_templates.py index 6f5e8e0d5e..ea3a1b6ee6 100644 --- a/packages/phoenix-evals/src/phoenix/evals/default_templates.py +++ b/packages/phoenix-evals/src/phoenix/evals/default_templates.py @@ -444,7 +444,7 @@ [END DATA] Compare the AI answer to the human ground truth answer, if the AI correctly answers the question, then the AI answer is "correct". If the AI answer is longer but contains the main idea of the -Human answer please answer "correct". If the AI answer divergences or does not contain the main +Human answer please answer "correct". If the AI answer diverges does not contain the main idea of the human answer, please answer "incorrect". """ @@ -469,7 +469,7 @@ If the AI correctly answers the question as compared to the human answer, then the AI answer LABEL is "correct". If the AI answer is longer but contains the main idea of the Human answer please answer LABEL "correct". If the AI answer -divergences or does not contain the main idea of the human answer, please answer +diverges or does not contain the main idea of the human answer, please answer LABEL "incorrect". Example response: diff --git a/tutorials/evals/evaluate_human_vs_ai_classifications.ipynb b/tutorials/evals/evaluate_human_vs_ai_classifications.ipynb index 9c334c74ae..9024a69d40 100644 --- a/tutorials/evals/evaluate_human_vs_ai_classifications.ipynb +++ b/tutorials/evals/evaluate_human_vs_ai_classifications.ipynb @@ -15,7 +15,7 @@ " Community\n", "

\n", "\n", - "

Human/GrondTruth Versus AI Evals

\n", + "

Human/GroundTruth Versus AI Evals

\n", "\n", "Arize provides tooling to evaluate LLM applications, including tools to determine whether AI answers match Human Groundtruth answers. In many Q&A systems its important to test the AI answer results as compared to Human answers prior to deployment. These help assess how often the answers are correctly generated by the AI system. \n", "\n", @@ -362,7 +362,7 @@ " [END DATA]\n", "Compare the AI answer to the human ground truth answer, if the AI correctly answers the question,\n", "then the AI answer is \"correct\". If the AI answer is longer but contains the main idea of the\n", - "Human answer please answer \"correct\". If the AI answer divergences or does not contain the main\n", + "Human answer please answer \"correct\". If the AI answer diverges or does not contain the main\n", "idea of the human answer, please answer \"incorrect\".\n", "\n" ]