Updated o1 and o1 preview in bot

Metaculus · Dec 21, 2024 · 089c510 · 089c510
1 parent 8013a75
commit 089c510
Show file tree

Hide file tree

Showing 5 changed files with 68 additions and 73 deletions.
diff --git a/forecasting_tools/ai_models/ai_utils/openai_utils.py b/forecasting_tools/ai_models/ai_utils/openai_utils.py
@@ -1,7 +1,5 @@
-import logging
-
-logger = logging.getLogger(__name__)
 import base64
+import logging
 import math
 import re
 from io import BytesIO
@@ -21,6 +19,8 @@
 from pydantic import BaseModel
 from tiktoken import Encoding
 
+logger = logging.getLogger(__name__)
+
 
 class VisionMessageData(BaseModel):
     prompt: str

diff --git a/forecasting_tools/forecasting/forecast_bots/experiments/exa_q4_binary_o1.py b/forecasting_tools/forecasting/forecast_bots/experiments/exa_q4_binary_o1.py
diff --git a/forecasting_tools/forecasting/forecast_bots/experiments/exa_q4_binary_o1_preview.py b/forecasting_tools/forecasting/forecast_bots/experiments/exa_q4_binary_o1_preview.py
@@ -0,0 +1,8 @@
+from forecasting_tools.ai_models.gpto1preview import GptO1Preview
+from forecasting_tools.forecasting.forecast_bots.experiments.exa_q4_binary import (
+    ExaQ4BinaryBot,
+)
+
+
+class ExaQ4BinaryO1PreviewBot(ExaQ4BinaryBot):
+    FINAL_DECISION_LLM = GptO1Preview()
diff --git a/poetry.lock b/poetry.lock
diff --git a/scripts/benchmark_forecast_bot.py b/scripts/benchmark_forecast_bot.py
@@ -14,8 +14,8 @@
 from forecasting_tools.forecasting.forecast_bots.experiments.exa_q4_binary import (
     ExaQ4BinaryBot,
 )
-from forecasting_tools.forecasting.forecast_bots.experiments.exa_q4_binary_o1 import (
-    ExaQ4BinaryO1Bot,
+from forecasting_tools.forecasting.forecast_bots.experiments.exa_q4_binary_o1_preview import (
+    ExaQ4BinaryO1PreviewBot,
 )
 from forecasting_tools.forecasting.forecast_bots.experiments.q4_main_binary_bot import (
     Q4MainBinaryBot,
@@ -30,7 +30,7 @@
 
 
 async def benchmark_forecast_bot() -> None:
-    questions_to_use = 2
+    questions_to_use = 1
     with MonetaryCostManager() as cost_manager:
         bots = [
             ExaBot(),
@@ -48,7 +48,7 @@ async def benchmark_forecast_bot() -> None:
                 research_reports_per_question=3,
                 predictions_per_research_report=3,
             ),
-            ExaQ4BinaryO1Bot(),
+            ExaQ4BinaryO1PreviewBot(),
         ]
         bots = typeguard.check_type(bots, list[ForecastBot])
         benchmarks = await Benchmarker(