From abf6c03adc1516611f40c995a15e8d959117d681 Mon Sep 17 00:00:00 2001 From: Lingjiao Chen Date: Tue, 17 Dec 2024 11:39:26 -0800 Subject: [PATCH 1/7] add tagging --- .../aime_templates/Template_tag1.jinja | 19 ++++++++++++++++++ eureka_ml_insights/user_configs/__init__.py | 1 + eureka_ml_insights/user_configs/aime.py | 20 +++++++++++++++++++ 3 files changed, 40 insertions(+) create mode 100644 eureka_ml_insights/prompt_templates/aime_templates/Template_tag1.jinja diff --git a/eureka_ml_insights/prompt_templates/aime_templates/Template_tag1.jinja b/eureka_ml_insights/prompt_templates/aime_templates/Template_tag1.jinja new file mode 100644 index 0000000..056c1ac --- /dev/null +++ b/eureka_ml_insights/prompt_templates/aime_templates/Template_tag1.jinja @@ -0,0 +1,19 @@ +You are a genius math expert in understanding math questions. Please read the following math question, and then decide which math category it falls into. + +Your judgment should be one of the following: + +arithmetic +algebra +counting +geometry +number theory +probability +other topics + +Do not generate any other texts except one of the above topics. + +---------- +Original question: +{{prompt}} +---------- +Your judgment: diff --git a/eureka_ml_insights/user_configs/__init__.py b/eureka_ml_insights/user_configs/__init__.py index 4061a27..e1a48f9 100644 --- a/eureka_ml_insights/user_configs/__init__.py +++ b/eureka_ml_insights/user_configs/__init__.py @@ -7,6 +7,7 @@ AIME_PIPELINE256Run, AIME_PIPELINE512Run, AIME_PIPELINE1024Run, + AIME_PIPELINETag, ) from .dna import DNA_PIPELINE from .drop import Drop_Experiment_Pipeline diff --git a/eureka_ml_insights/user_configs/aime.py b/eureka_ml_insights/user_configs/aime.py index 5b85b73..40669e1 100644 --- a/eureka_ml_insights/user_configs/aime.py +++ b/eureka_ml_insights/user_configs/aime.py @@ -23,6 +23,7 @@ MajorityVoteTransform, MultiplyTransform, SequenceTransform, + SamplerTransform, ) from eureka_ml_insights.data_utils.aime_utils import AIMEExtractAnswer from eureka_ml_insights.data_utils.data import DataLoader @@ -312,3 +313,22 @@ def configure_pipeline( MultiplyTransform(n_repeats=1024) ) return pipeline + + +class AIME_PIPELINETag(AIME_PIPELINE): + """This class specifies the config for running AIME benchmark 5 repeated times""" + + def configure_pipeline( + self, model_config: ModelConfig, resume_from: str = None, **kwargs: dict[str, Any] + ) -> PipelineConfig: + pipeline = super().configure_pipeline(model_config=model_config, resume_from=resume_from) + self.data_processing_comp.data_reader_config.init_args["transform"].transforms.append( + SamplerTransform(random_seed=0, + sample_count=10, + ) + ) + # data preprocessing + self.data_processing_comp.prompt_template_path=os.path.join( + os.path.dirname(__file__), "../prompt_templates/aime_templates/Template_tag1.jinja" + ) + return pipeline \ No newline at end of file From 77d1103a318761f04eeb2886310fcae72ab3d0ec Mon Sep 17 00:00:00 2001 From: Lingjiao Chen Date: Wed, 18 Dec 2024 08:27:07 -0800 Subject: [PATCH 2/7] add new metric --- eureka_ml_insights/configs/model_configs.py | 236 +++++++++++++++++++- eureka_ml_insights/metrics/__init__.py | 2 + eureka_ml_insights/metrics/aime_metrics.py | 20 ++ eureka_ml_insights/user_configs/aime.py | 8 +- 4 files changed, 259 insertions(+), 7 deletions(-) create mode 100644 eureka_ml_insights/metrics/aime_metrics.py diff --git a/eureka_ml_insights/configs/model_configs.py b/eureka_ml_insights/configs/model_configs.py index 1b83a7b..8f1e023 100644 --- a/eureka_ml_insights/configs/model_configs.py +++ b/eureka_ml_insights/configs/model_configs.py @@ -4,6 +4,7 @@ from eureka_ml_insights.models import ( AzureOpenAIO1Model, + AzureOpenAIModel, ClaudeModel, DirectOpenAIModel, DirectOpenAIO1Model, @@ -13,7 +14,7 @@ LLaVAModel, MistralServerlessAzureRestEndpointModel, RestEndpointModel, - TestModel, + #TnRModels, ) from .config import ModelConfig @@ -22,17 +23,22 @@ # in the secret_key_params dictionary. OR you can provide the key name and key vault URL to fetch the key from Azure Key Vault. # You don't need to provide both the key_vault_url and local_keys_path. You can provide one of them based on your setup. - -# Test model -TEST_MODEL_CONFIG = ModelConfig(TestModel, {}) - # OpenAI models +''' OPENAI_SECRET_KEY_PARAMS = { "key_name": "your_openai_secret_key_name", "local_keys_path": "keys/keys.json", "key_vault_url": None, } +''' + +OPENAI_SECRET_KEY_PARAMS = { + "key_name": "openai", + "local_keys_path": "keys/aifeval-vault-azure-net.json", + "key_vault_url": "https://aifeval.vault.azure.net", +} + OAI_O1_PREVIEW_CONFIG = ModelConfig( DirectOpenAIO1Model, @@ -42,6 +48,14 @@ }, ) +OAI_O1_MINI_CONFIG = ModelConfig( + DirectOpenAIO1Model, + { + "model_name": "o1-mini-2024-09-12", + "secret_key_params": OPENAI_SECRET_KEY_PARAMS, + }, +) + OAI_O1_PREVIEW_AUZRE_CONFIG = ModelConfig( AzureOpenAIO1Model, { @@ -91,12 +105,110 @@ }, ) +# Azure OAI models +## Azure OAI models -- TNR Models + +TNR_SECRET_KEY_PARAMS = { + "key_name": "tnrllmproxy", + "local_keys_path": "keys/aifeval-vault-azure-net.json", + "key_vault_url": "https://aifeval.vault.azure.net", +} + +GCRAOAI8SW1_AZURE_OAI_O1_PREVIEW_CONFIG = ModelConfig( + AzureOpenAIO1Model, + { + "url": "https://gcraoai8sw1.openai.azure.com/", + "model_name": "o1-preview", + "api_version": "2024-08-01-preview", + } +) + +GCRAOAI8SW1_AZURE_OAI_O1_MINI_CONFIG = ModelConfig( + AzureOpenAIO1Model, + { + "url": "https://gcraoai8sw1.openai.azure.com/", + "model_name": "o1-mini", + "api_version": "2024-08-01-preview", + } +) + +GCRAOAI8SW1_AZURE_OAI_GPT4O_CONFIG = ModelConfig( + AzureOpenAIO1Model, + { + "url": "https://gcraoai8sw1.openai.azure.com/", + "model_name": "gpt-4o", + "api_version": "2024-08-01-preview", + "temperature": 1.0, + + } +) + + +GCRAOAI8SW1_AZURE_OAI_GPT4_T1_CONFIG = ModelConfig( + AzureOpenAIO1Model, + { + "url": "https://gcraoai8sw1.openai.azure.com/", + "model_name": "gpt-4", + "api_version": "2024-08-01-preview", + "temperature": 1.0, + + } +) + +AzureOpenAIModel + +""" +TNR_GPT4_1106_PREVIEW_CONFIG = ModelConfig( + TnRModels, + { + "url": "https://trapi.research.microsoft.com/gcr/shared/nj/", + "secret_key_params": TNR_SECRET_KEY_PARAMS, + "model_name": "gpt-4", + }, +) + +TNR_GPT4_VISION_PREVIEW_CONFIG = ModelConfig( + TnRModels, + { + "url": "https://trapi.research.microsoft.com/gcr/shared/nj/", + "secret_key_params": TNR_SECRET_KEY_PARAMS, + "model_name": "gpt-4-turbo-v", + }, +) + +TNR_GPT4V_TURBO_2024_04_09_CONFIG = ModelConfig( + TnRModels, + { + "url": "https://trapi.research.microsoft.com/gcr/shared/nj/", + "secret_key_params": TNR_SECRET_KEY_PARAMS, + "model_name": "gpt-4-turbo", + }, +) + +TNR_GPT4O_2024_05_13_CONFIG = ModelConfig( + TnRModels, + { + "url": "https://trapi.research.microsoft.com/gcr/shared/nj/", + "secret_key_params": TNR_SECRET_KEY_PARAMS, + "model_name": "gpt-4o", + }, +) +""" + # Gemini models +''' GEMINI_SECRET_KEY_PARAMS = { "key_name": "your_gemini_secret_key_name", "local_keys_path": "keys/keys.json", "key_vault_url": None, } +''' + +GEMINI_SECRET_KEY_PARAMS = { + "key_name": "aif-eval-gemini-firstproject", + "local_keys_path": "keys/aifeval-vault-azure-net.json", + "key_vault_url": "https://aifeval.vault.azure.net", +} GEMINI_V15_PRO_CONFIG = ModelConfig( GeminiModel, @@ -106,6 +218,37 @@ }, ) + +GEMINI_V15_PRO_T1_CONFIG = ModelConfig( + GeminiModel, + { + "model_name": "gemini-1.5-pro", + "secret_key_params": GEMINI_SECRET_KEY_PARAMS, + "temperature":1.0, + }, +) + +GEMINI_EXP_1206_T1_CONFIG = ModelConfig( + GeminiModel, + { + "model_name": "gemini-exp-1206", + "secret_key_params": GEMINI_SECRET_KEY_PARAMS, + "temperature":1.0, + }, +) + + +GEMINI_EXP_1121_T1_CONFIG = ModelConfig( + GeminiModel, + { + "model_name": "gemini-exp-1121", + "secret_key_params": GEMINI_SECRET_KEY_PARAMS, + "temperature":1.0, + }, +) + + + GEMINI_V1_PRO_CONFIG = ModelConfig( GeminiModel, { @@ -115,11 +258,20 @@ ) # Claude models +''' CLAUDE_SECRET_KEY_PARAMS = { "key_name": "your_claude_secret_key_name", "local_keys_path": "keys/keys.json", "key_vault_url": None, } +''' + +CLAUDE_SECRET_KEY_PARAMS = { + "key_name": "aif-eval-claude", + "local_keys_path": "keys/aifeval-vault-azure-net.json", + "key_vault_url": "https://aifeval.vault.azure.net", +} + CLAUDE_3_OPUS_CONFIG = ModelConfig( ClaudeModel, @@ -137,6 +289,32 @@ }, ) +CLAUDE_3_5_SONNET_T1_CONFIG = ModelConfig( + ClaudeModel, + { + "secret_key_params": CLAUDE_SECRET_KEY_PARAMS, + "model_name": "claude-3-5-sonnet-20240620", + "temperature":1.0, + }, +) + +CLAUDE_3_5_SONNET_SEARCH_T1_CONFIG = ModelConfig( + ClaudeModel, + { + "secret_key_params": CLAUDE_SECRET_KEY_PARAMS, + "model_name": "claude-3-5-sonnet-20241022", + "temperature": 1.0, + }, +) + +CLAUDE_3_5_SONNET_SEARCH_CONFIG = ModelConfig( + ClaudeModel, + { + "secret_key_params": CLAUDE_SECRET_KEY_PARAMS, + "model_name": "claude-3-5-sonnet-20241022", + }, +) + # LLAVA models LLAVAHF_V16_34B_CONFIG = ModelConfig( LLaVAHuggingFaceModel, @@ -199,3 +377,51 @@ "model_name": "Mistral-large-2407", }, ) + + + +AIF_NT_MISTRAL_LARGE_2_2407_T1_CONFIG = ModelConfig( + MistralServerlessAzureRestEndpointModel, + { + "url": "https://Mistral-large-2407-aifeval.eastus.models.ai.azure.com/v1/chat/completions", + "secret_key_params": { + "key_name": "aif-nt-mistral-large-2-2407", + "local_keys_path": "keys/aifeval-vault-azure-net.json", + "key_vault_url": "https://aifeval.vault.azure.net", + }, + "model_name": "Mistral-large-2407-aifeval", + "temperature": 1.0, + + }, +) + + +GCR_LLAMA3_1_70B_INSTRUCT_CONFIG = ModelConfig( + RestEndpointModel, + { + "url": "https://gcr-llama31-70b-instruct.westus3.inference.ml.azure.com/score", + "secret_key_params": { + "key_name": "meta-llama-3-1-70b-instruct-1", + "local_keys_path": "keys/aifeval-vault-azure-net.json", + "key_vault_url": "https://aifeval.vault.azure.net", + }, + "model_name": "meta-llama-3-1-70b-instruct-1", + "temperature": 1.0, + + }, +) + +AIF_NT_LLAMA3_1_405B_INSTRUCT_CONFIG = ModelConfig( + LlamaServerlessAzureRestEndpointModel, + { + "url": "https://Meta-Llama-3-1-405B-Instruct-aif.eastus.models.ai.azure.com/v1/chat/completions", + "secret_key_params": { + "key_name": "aif-nt-meta-llama-3-1-405b-instruct-1", + "local_keys_path": "keys/aifeval-vault-azure-net.json", + "key_vault_url": "https://aifeval.vault.azure.net", + }, + "model_name": "Meta-Llama-3-1-405B-Instruct-aif", + "temperature": 1.0, + + }, +) diff --git a/eureka_ml_insights/metrics/__init__.py b/eureka_ml_insights/metrics/__init__.py index 2b19ec1..e31b08a 100644 --- a/eureka_ml_insights/metrics/__init__.py +++ b/eureka_ml_insights/metrics/__init__.py @@ -28,6 +28,7 @@ SpatialAndLayoutReasoningMetric, ) +from .aime_metrics import NumericMatch __all__ = [ Metric, ClassicMetric, @@ -52,4 +53,5 @@ SumAggregator, MMMUMetric, MaxTokenF1ScoreMetric, + NumericMatch, ] diff --git a/eureka_ml_insights/metrics/aime_metrics.py b/eureka_ml_insights/metrics/aime_metrics.py new file mode 100644 index 0000000..8106bb6 --- /dev/null +++ b/eureka_ml_insights/metrics/aime_metrics.py @@ -0,0 +1,20 @@ +from tqdm.auto import tqdm + +from eureka_ml_insights.metrics.metrics_base import ClassicMetric + +import numpy as np + +class NumericMatch(ClassicMetric): + """This class checks for a numeric match.""" + eps = 1e-6 + def __evaluate__(self, answer_text, target_text, is_valid): + if not is_valid: + return "none" + try: + diff = np.abs(float(target_text)-float(answer_text)) + except: + return "none" + if diff PipelineConfig: pipeline = super().configure_pipeline(model_config=model_config, resume_from=resume_from) + ''' self.data_processing_comp.data_reader_config.init_args["transform"].transforms.append( SamplerTransform(random_seed=0, sample_count=10, ) ) + ''' # data preprocessing self.data_processing_comp.prompt_template_path=os.path.join( os.path.dirname(__file__), "../prompt_templates/aime_templates/Template_tag1.jinja" From 2fe709537bfd85cf365ab76b91adb6d5ee30448f Mon Sep 17 00:00:00 2001 From: Lingjiao Chen Date: Wed, 18 Dec 2024 09:10:37 -0800 Subject: [PATCH 3/7] remove sampling --- eureka_ml_insights/user_configs/aime.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/eureka_ml_insights/user_configs/aime.py b/eureka_ml_insights/user_configs/aime.py index 23366e8..cd7b442 100644 --- a/eureka_ml_insights/user_configs/aime.py +++ b/eureka_ml_insights/user_configs/aime.py @@ -123,10 +123,10 @@ def configure_pipeline( CountAggregator, { "column_names": [ - "ExactMatch_result", + "NumericMatch_result", ], "group_by": "Year", - "filename_base": "ExactMatch_GroupBy", + "filename_base": "NumericMatch_GroupBy", }, ), ], @@ -180,7 +180,7 @@ def configure_pipeline( BiLevelCountAggregator, { "column_names": [ - "ExactMatch_result", + "NumericMatch_result", ], "first_groupby": "ID", "filename_base": "MajorityVote", From bd721690d5d07ccd691b0e5c6192ab0c05140f42 Mon Sep 17 00:00:00 2001 From: Lingjiao Chen Date: Wed, 18 Dec 2024 09:16:03 -0800 Subject: [PATCH 4/7] back to original model configs --- eureka_ml_insights/configs/model_configs.py | 236 +------------------- eureka_ml_insights/user_configs/aime.py | 1 - 2 files changed, 5 insertions(+), 232 deletions(-) diff --git a/eureka_ml_insights/configs/model_configs.py b/eureka_ml_insights/configs/model_configs.py index 8f1e023..1b83a7b 100644 --- a/eureka_ml_insights/configs/model_configs.py +++ b/eureka_ml_insights/configs/model_configs.py @@ -4,7 +4,6 @@ from eureka_ml_insights.models import ( AzureOpenAIO1Model, - AzureOpenAIModel, ClaudeModel, DirectOpenAIModel, DirectOpenAIO1Model, @@ -14,7 +13,7 @@ LLaVAModel, MistralServerlessAzureRestEndpointModel, RestEndpointModel, - #TnRModels, + TestModel, ) from .config import ModelConfig @@ -23,22 +22,17 @@ # in the secret_key_params dictionary. OR you can provide the key name and key vault URL to fetch the key from Azure Key Vault. # You don't need to provide both the key_vault_url and local_keys_path. You can provide one of them based on your setup. + +# Test model +TEST_MODEL_CONFIG = ModelConfig(TestModel, {}) + # OpenAI models -''' OPENAI_SECRET_KEY_PARAMS = { "key_name": "your_openai_secret_key_name", "local_keys_path": "keys/keys.json", "key_vault_url": None, } -''' - -OPENAI_SECRET_KEY_PARAMS = { - "key_name": "openai", - "local_keys_path": "keys/aifeval-vault-azure-net.json", - "key_vault_url": "https://aifeval.vault.azure.net", -} - OAI_O1_PREVIEW_CONFIG = ModelConfig( DirectOpenAIO1Model, @@ -48,14 +42,6 @@ }, ) -OAI_O1_MINI_CONFIG = ModelConfig( - DirectOpenAIO1Model, - { - "model_name": "o1-mini-2024-09-12", - "secret_key_params": OPENAI_SECRET_KEY_PARAMS, - }, -) - OAI_O1_PREVIEW_AUZRE_CONFIG = ModelConfig( AzureOpenAIO1Model, { @@ -105,110 +91,12 @@ }, ) -# Azure OAI models -## Azure OAI models -- TNR Models - -TNR_SECRET_KEY_PARAMS = { - "key_name": "tnrllmproxy", - "local_keys_path": "keys/aifeval-vault-azure-net.json", - "key_vault_url": "https://aifeval.vault.azure.net", -} - -GCRAOAI8SW1_AZURE_OAI_O1_PREVIEW_CONFIG = ModelConfig( - AzureOpenAIO1Model, - { - "url": "https://gcraoai8sw1.openai.azure.com/", - "model_name": "o1-preview", - "api_version": "2024-08-01-preview", - } -) - -GCRAOAI8SW1_AZURE_OAI_O1_MINI_CONFIG = ModelConfig( - AzureOpenAIO1Model, - { - "url": "https://gcraoai8sw1.openai.azure.com/", - "model_name": "o1-mini", - "api_version": "2024-08-01-preview", - } -) - -GCRAOAI8SW1_AZURE_OAI_GPT4O_CONFIG = ModelConfig( - AzureOpenAIO1Model, - { - "url": "https://gcraoai8sw1.openai.azure.com/", - "model_name": "gpt-4o", - "api_version": "2024-08-01-preview", - "temperature": 1.0, - - } -) - - -GCRAOAI8SW1_AZURE_OAI_GPT4_T1_CONFIG = ModelConfig( - AzureOpenAIO1Model, - { - "url": "https://gcraoai8sw1.openai.azure.com/", - "model_name": "gpt-4", - "api_version": "2024-08-01-preview", - "temperature": 1.0, - - } -) - -AzureOpenAIModel - -""" -TNR_GPT4_1106_PREVIEW_CONFIG = ModelConfig( - TnRModels, - { - "url": "https://trapi.research.microsoft.com/gcr/shared/nj/", - "secret_key_params": TNR_SECRET_KEY_PARAMS, - "model_name": "gpt-4", - }, -) - -TNR_GPT4_VISION_PREVIEW_CONFIG = ModelConfig( - TnRModels, - { - "url": "https://trapi.research.microsoft.com/gcr/shared/nj/", - "secret_key_params": TNR_SECRET_KEY_PARAMS, - "model_name": "gpt-4-turbo-v", - }, -) - -TNR_GPT4V_TURBO_2024_04_09_CONFIG = ModelConfig( - TnRModels, - { - "url": "https://trapi.research.microsoft.com/gcr/shared/nj/", - "secret_key_params": TNR_SECRET_KEY_PARAMS, - "model_name": "gpt-4-turbo", - }, -) - -TNR_GPT4O_2024_05_13_CONFIG = ModelConfig( - TnRModels, - { - "url": "https://trapi.research.microsoft.com/gcr/shared/nj/", - "secret_key_params": TNR_SECRET_KEY_PARAMS, - "model_name": "gpt-4o", - }, -) -""" - # Gemini models -''' GEMINI_SECRET_KEY_PARAMS = { "key_name": "your_gemini_secret_key_name", "local_keys_path": "keys/keys.json", "key_vault_url": None, } -''' - -GEMINI_SECRET_KEY_PARAMS = { - "key_name": "aif-eval-gemini-firstproject", - "local_keys_path": "keys/aifeval-vault-azure-net.json", - "key_vault_url": "https://aifeval.vault.azure.net", -} GEMINI_V15_PRO_CONFIG = ModelConfig( GeminiModel, @@ -218,37 +106,6 @@ }, ) - -GEMINI_V15_PRO_T1_CONFIG = ModelConfig( - GeminiModel, - { - "model_name": "gemini-1.5-pro", - "secret_key_params": GEMINI_SECRET_KEY_PARAMS, - "temperature":1.0, - }, -) - -GEMINI_EXP_1206_T1_CONFIG = ModelConfig( - GeminiModel, - { - "model_name": "gemini-exp-1206", - "secret_key_params": GEMINI_SECRET_KEY_PARAMS, - "temperature":1.0, - }, -) - - -GEMINI_EXP_1121_T1_CONFIG = ModelConfig( - GeminiModel, - { - "model_name": "gemini-exp-1121", - "secret_key_params": GEMINI_SECRET_KEY_PARAMS, - "temperature":1.0, - }, -) - - - GEMINI_V1_PRO_CONFIG = ModelConfig( GeminiModel, { @@ -258,20 +115,11 @@ ) # Claude models -''' CLAUDE_SECRET_KEY_PARAMS = { "key_name": "your_claude_secret_key_name", "local_keys_path": "keys/keys.json", "key_vault_url": None, } -''' - -CLAUDE_SECRET_KEY_PARAMS = { - "key_name": "aif-eval-claude", - "local_keys_path": "keys/aifeval-vault-azure-net.json", - "key_vault_url": "https://aifeval.vault.azure.net", -} - CLAUDE_3_OPUS_CONFIG = ModelConfig( ClaudeModel, @@ -289,32 +137,6 @@ }, ) -CLAUDE_3_5_SONNET_T1_CONFIG = ModelConfig( - ClaudeModel, - { - "secret_key_params": CLAUDE_SECRET_KEY_PARAMS, - "model_name": "claude-3-5-sonnet-20240620", - "temperature":1.0, - }, -) - -CLAUDE_3_5_SONNET_SEARCH_T1_CONFIG = ModelConfig( - ClaudeModel, - { - "secret_key_params": CLAUDE_SECRET_KEY_PARAMS, - "model_name": "claude-3-5-sonnet-20241022", - "temperature": 1.0, - }, -) - -CLAUDE_3_5_SONNET_SEARCH_CONFIG = ModelConfig( - ClaudeModel, - { - "secret_key_params": CLAUDE_SECRET_KEY_PARAMS, - "model_name": "claude-3-5-sonnet-20241022", - }, -) - # LLAVA models LLAVAHF_V16_34B_CONFIG = ModelConfig( LLaVAHuggingFaceModel, @@ -377,51 +199,3 @@ "model_name": "Mistral-large-2407", }, ) - - - -AIF_NT_MISTRAL_LARGE_2_2407_T1_CONFIG = ModelConfig( - MistralServerlessAzureRestEndpointModel, - { - "url": "https://Mistral-large-2407-aifeval.eastus.models.ai.azure.com/v1/chat/completions", - "secret_key_params": { - "key_name": "aif-nt-mistral-large-2-2407", - "local_keys_path": "keys/aifeval-vault-azure-net.json", - "key_vault_url": "https://aifeval.vault.azure.net", - }, - "model_name": "Mistral-large-2407-aifeval", - "temperature": 1.0, - - }, -) - - -GCR_LLAMA3_1_70B_INSTRUCT_CONFIG = ModelConfig( - RestEndpointModel, - { - "url": "https://gcr-llama31-70b-instruct.westus3.inference.ml.azure.com/score", - "secret_key_params": { - "key_name": "meta-llama-3-1-70b-instruct-1", - "local_keys_path": "keys/aifeval-vault-azure-net.json", - "key_vault_url": "https://aifeval.vault.azure.net", - }, - "model_name": "meta-llama-3-1-70b-instruct-1", - "temperature": 1.0, - - }, -) - -AIF_NT_LLAMA3_1_405B_INSTRUCT_CONFIG = ModelConfig( - LlamaServerlessAzureRestEndpointModel, - { - "url": "https://Meta-Llama-3-1-405B-Instruct-aif.eastus.models.ai.azure.com/v1/chat/completions", - "secret_key_params": { - "key_name": "aif-nt-meta-llama-3-1-405b-instruct-1", - "local_keys_path": "keys/aifeval-vault-azure-net.json", - "key_vault_url": "https://aifeval.vault.azure.net", - }, - "model_name": "Meta-Llama-3-1-405B-Instruct-aif", - "temperature": 1.0, - - }, -) diff --git a/eureka_ml_insights/user_configs/aime.py b/eureka_ml_insights/user_configs/aime.py index cd7b442..2d7eefb 100644 --- a/eureka_ml_insights/user_configs/aime.py +++ b/eureka_ml_insights/user_configs/aime.py @@ -23,7 +23,6 @@ MajorityVoteTransform, MultiplyTransform, SequenceTransform, - SamplerTransform, ) from eureka_ml_insights.data_utils.aime_utils import AIMEExtractAnswer from eureka_ml_insights.data_utils.data import DataLoader From e4cb82025f2e623f80afb0f75472ffa7363d32ca Mon Sep 17 00:00:00 2001 From: Lingjiao Chen Date: Wed, 18 Dec 2024 17:07:47 -0800 Subject: [PATCH 5/7] update the tagging prompt --- .../aime_templates/Template_tag1.jinja | 6 +++--- eureka_ml_insights/user_configs/aime.py | 20 +++++++------------ 2 files changed, 10 insertions(+), 16 deletions(-) diff --git a/eureka_ml_insights/prompt_templates/aime_templates/Template_tag1.jinja b/eureka_ml_insights/prompt_templates/aime_templates/Template_tag1.jinja index 056c1ac..325e8c1 100644 --- a/eureka_ml_insights/prompt_templates/aime_templates/Template_tag1.jinja +++ b/eureka_ml_insights/prompt_templates/aime_templates/Template_tag1.jinja @@ -1,6 +1,6 @@ -You are a genius math expert in understanding math questions. Please read the following math question, and then decide which math category it falls into. +You are a genius math expert in understanding math questions. Please read the following math question, and then decide which math categories it falls into. -Your judgment should be one of the following: +Your judgment should be one or more of the following: arithmetic algebra @@ -10,7 +10,7 @@ number theory probability other topics -Do not generate any other texts except one of the above topics. +Do not generate any other texts except one or more of the above topics. For multiple topics, seperate them by commas. ---------- Original question: diff --git a/eureka_ml_insights/user_configs/aime.py b/eureka_ml_insights/user_configs/aime.py index 2d7eefb..4f314e4 100644 --- a/eureka_ml_insights/user_configs/aime.py +++ b/eureka_ml_insights/user_configs/aime.py @@ -26,9 +26,7 @@ ) from eureka_ml_insights.data_utils.aime_utils import AIMEExtractAnswer from eureka_ml_insights.data_utils.data import DataLoader -from eureka_ml_insights.metrics.metrics_base import ExactMatch from eureka_ml_insights.metrics.aime_metrics import NumericMatch - from eureka_ml_insights.metrics.reports import ( BiLevelCountAggregator, CountAggregator, @@ -323,15 +321,11 @@ def configure_pipeline( self, model_config: ModelConfig, resume_from: str = None, **kwargs: dict[str, Any] ) -> PipelineConfig: pipeline = super().configure_pipeline(model_config=model_config, resume_from=resume_from) - ''' - self.data_processing_comp.data_reader_config.init_args["transform"].transforms.append( - SamplerTransform(random_seed=0, - sample_count=10, - ) - ) - ''' # data preprocessing - self.data_processing_comp.prompt_template_path=os.path.join( - os.path.dirname(__file__), "../prompt_templates/aime_templates/Template_tag1.jinja" - ) - return pipeline \ No newline at end of file + self.data_processing_comp.prompt_template_path = os.path.join( + os.path.dirname(__file__), "../prompt_templates/aime_templates/Template_tag1.jinja" + ) + # Each query is tagged with one or more topics from arithmetic, algebra, counting, geometry, number theory, and probability and other. + # These topics follow the description on the official website: https://artofproblemsolving.com/wiki/index.php/American_Invitational_Mathematics_Examination?srsltid=AfmBOooSIQ8ua5aJX00ZtYCKDuOAB4I4c-YE9zr1xYZ86fq8x5RL2sEg. + # In their own words, "The AIME tests mathematical problem solving with arithmetic, algebra, counting, geometry, number theory, and probability and other secondary school math topics" + return pipeline From 551341ab4902ccfcf5c9c4e293b62654c17c1533 Mon Sep 17 00:00:00 2001 From: lchen001 Date: Fri, 17 Jan 2025 11:02:52 -0800 Subject: [PATCH 6/7] add direct run prompt --- .../aime_templates/Template_1direct.jinja | 5 +++++ eureka_ml_insights/user_configs/__init__.py | 3 +++ eureka_ml_insights/user_configs/aime.py | 16 +++++++++++++++- 3 files changed, 23 insertions(+), 1 deletion(-) create mode 100644 eureka_ml_insights/prompt_templates/aime_templates/Template_1direct.jinja diff --git a/eureka_ml_insights/prompt_templates/aime_templates/Template_1direct.jinja b/eureka_ml_insights/prompt_templates/aime_templates/Template_1direct.jinja new file mode 100644 index 0000000..7804716 --- /dev/null +++ b/eureka_ml_insights/prompt_templates/aime_templates/Template_1direct.jinja @@ -0,0 +1,5 @@ +You are a genius math graduate student solving math problems from the AIME competition. + +Provide your final answer in the format: 'Final Answer: [numeric value]'. Dont box it, just provide the answer directly at the end. + +{{prompt}} diff --git a/eureka_ml_insights/user_configs/__init__.py b/eureka_ml_insights/user_configs/__init__.py index e1a48f9..36545ac 100644 --- a/eureka_ml_insights/user_configs/__init__.py +++ b/eureka_ml_insights/user_configs/__init__.py @@ -4,9 +4,12 @@ AIME_PIPELINE16Run, AIME_PIPELINE32Run, AIME_PIPELINE64Run, + AIME_PIPELINE128Run, AIME_PIPELINE256Run, AIME_PIPELINE512Run, AIME_PIPELINE1024Run, + AIME_PIPELINE5Run, + AIME_PIPELINEDirect5Run, AIME_PIPELINETag, ) from .dna import DNA_PIPELINE diff --git a/eureka_ml_insights/user_configs/aime.py b/eureka_ml_insights/user_configs/aime.py index 4f314e4..3fdecc8 100644 --- a/eureka_ml_insights/user_configs/aime.py +++ b/eureka_ml_insights/user_configs/aime.py @@ -23,6 +23,7 @@ MajorityVoteTransform, MultiplyTransform, SequenceTransform, + SamplerTransform ) from eureka_ml_insights.data_utils.aime_utils import AIMEExtractAnswer from eureka_ml_insights.data_utils.data import DataLoader @@ -78,7 +79,7 @@ def configure_pipeline( ), output_dir=os.path.join(self.log_dir, "inference_result"), resume_from=resume_from, - max_concurrent=10, + max_concurrent=1, ) # post process the response to extract the answer self.data_post_processing = DataProcessingConfig( @@ -215,6 +216,19 @@ def configure_pipeline( ) return pipeline +class AIME_PIPELINEDirect5Run(AIME_PIPELINE5Run): + """This class specifies the config for running AIME benchmark 5 repeated times""" + + def configure_pipeline( + self, model_config: ModelConfig, resume_from: str = None, **kwargs: dict[str, Any] + ) -> PipelineConfig: + pipeline = super().configure_pipeline(model_config=model_config, resume_from=resume_from) + # data preprocessing + self.data_processing_comp.prompt_template_path=os.path.join( + os.path.dirname(__file__), "../prompt_templates/aime_templates/Template_1direct.jinja" + ) + return pipeline + class AIME_PIPELINE16Run(AIME_PIPELINE): """This class specifies the config for running AIME benchmark 5 repeated times""" From 392b55909db18d344bbd553188fb4af1c5570439 Mon Sep 17 00:00:00 2001 From: Lingjiao Chen Date: Wed, 12 Feb 2025 15:28:23 -0800 Subject: [PATCH 7/7] split majority vote performance by year --- eureka_ml_insights/user_configs/aime.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/eureka_ml_insights/user_configs/aime.py b/eureka_ml_insights/user_configs/aime.py index 3fdecc8..029f84d 100644 --- a/eureka_ml_insights/user_configs/aime.py +++ b/eureka_ml_insights/user_configs/aime.py @@ -59,6 +59,7 @@ def configure_pipeline( "Answer": "ground_truth", } ), + #SamplerTransform(sample_count=10,random_seed=0), ], ), }, @@ -185,6 +186,18 @@ def configure_pipeline( "normalize": True, }, ), + AggregatorConfig( + BiLevelCountAggregator, + { + "column_names": [ + "NumericMatch_result", + ], + "first_groupby": "ID", + "second_groupby": "Year", + "filename_base": "MajorityVote_byyear", + "normalize": True, + }, + ), ], output_dir=os.path.join(self.log_dir, "eval_report_majorityVote"), )