-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
374fa4c
commit 8013a75
Showing
10 changed files
with
189 additions
and
64 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,43 +1,5 @@ | ||
from typing import Any, Final | ||
from forecasting_tools.ai_models.gpto1preview import GptO1Preview | ||
|
||
from forecasting_tools.ai_models.ai_utils.response_types import ( | ||
TextTokenCostResponse, | ||
) | ||
from forecasting_tools.ai_models.model_archetypes.openai_text_model import ( | ||
OpenAiTextToTextModel, | ||
) | ||
|
||
|
||
class GptO1(OpenAiTextToTextModel): | ||
# See OpenAI Limit on the account dashboard for most up-to-date limit | ||
MODEL_NAME: Final[str] = "o1-preview" | ||
REQUESTS_PER_PERIOD_LIMIT: Final[int] = 8_000 | ||
REQUEST_PERIOD_IN_SECONDS: Final[int] = 60 | ||
TIMEOUT_TIME: Final[int] = 120 | ||
TOKENS_PER_PERIOD_LIMIT: Final[int] = 2_000_000 | ||
TOKEN_PERIOD_IN_SECONDS: Final[int] = 60 | ||
|
||
def __init__( | ||
self, | ||
*args: Any, | ||
temperature: float = 1, | ||
system_prompt: str | None = None, | ||
**kwargs: Any, | ||
): | ||
assert ( | ||
system_prompt is None | ||
), "GptO1Preview does not support system prompts" | ||
assert ( | ||
temperature == 1 | ||
), f"GptO1Preview must have temperature 1, but {temperature} was given." | ||
super().__init__(*args, temperature=temperature, **kwargs) | ||
|
||
@classmethod | ||
def _get_mock_return_for_direct_call_to_model_using_cheap_input( | ||
cls, | ||
) -> TextTokenCostResponse: | ||
response = ( | ||
super()._get_mock_return_for_direct_call_to_model_using_cheap_input() | ||
) | ||
response.total_tokens_used += 269 # Add reasoning tokens | ||
return response | ||
class GptO1(GptO1Preview): | ||
MODEL_NAME: str = "o1" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
from typing import Any | ||
|
||
from forecasting_tools.ai_models.ai_utils.response_types import ( | ||
TextTokenCostResponse, | ||
) | ||
from forecasting_tools.ai_models.model_archetypes.openai_text_model import ( | ||
OpenAiTextToTextModel, | ||
) | ||
|
||
|
||
class GptO1Preview(OpenAiTextToTextModel): | ||
# See OpenAI Limit on the account dashboard for most up-to-date limit | ||
MODEL_NAME: str = "o1-preview" | ||
REQUESTS_PER_PERIOD_LIMIT: int = 8_000 | ||
REQUEST_PERIOD_IN_SECONDS: int = 60 | ||
TIMEOUT_TIME: int = 120 | ||
TOKENS_PER_PERIOD_LIMIT: int = 2_000_000 | ||
TOKEN_PERIOD_IN_SECONDS: int = 60 | ||
|
||
def __init__( | ||
self, | ||
*args: Any, | ||
temperature: float = 1, | ||
system_prompt: str | None = None, | ||
**kwargs: Any, | ||
): | ||
assert ( | ||
system_prompt is None | ||
), "GptO1Preview does not support system prompts" | ||
assert ( | ||
temperature == 1 | ||
), f"GptO1Preview must have temperature 1, but {temperature} was given." | ||
super().__init__(*args, temperature=temperature, **kwargs) | ||
|
||
@classmethod | ||
def _get_mock_return_for_direct_call_to_model_using_cheap_input( | ||
cls, | ||
) -> TextTokenCostResponse: | ||
response = ( | ||
super()._get_mock_return_for_direct_call_to_model_using_cheap_input() | ||
) | ||
response.total_tokens_used += 269 # Add reasoning tokens | ||
return response |
85 changes: 85 additions & 0 deletions
85
forecasting_tools/forecasting/forecast_bots/experiments/exa_q4_binary.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,85 @@ | ||
from datetime import datetime | ||
|
||
from forecasting_tools.ai_models.ai_utils.ai_misc import clean_indents | ||
from forecasting_tools.ai_models.gpt4o import Gpt4o | ||
from forecasting_tools.forecasting.forecast_bots.template_bot import ( | ||
TemplateBot, | ||
) | ||
from forecasting_tools.forecasting.helpers.smart_searcher import SmartSearcher | ||
from forecasting_tools.forecasting.questions_and_reports.forecast_report import ( | ||
ReasonedPrediction, | ||
) | ||
from forecasting_tools.forecasting.questions_and_reports.questions import ( | ||
BinaryQuestion, | ||
MetaculusQuestion, | ||
) | ||
|
||
|
||
class ExaQ4BinaryBot(TemplateBot): | ||
FINAL_DECISION_LLM = Gpt4o(temperature=0.1) | ||
|
||
async def run_research(self, question: MetaculusQuestion) -> str: | ||
prompt = clean_indents( | ||
f""" | ||
You are an assistant to a superforecaster. | ||
The superforecaster will give you a question they intend to forecast on. | ||
To be a great assistant, you generate a concise but detailed rundown of the most relevant news, including if the question would resolve Yes or No based on current information. | ||
You do not produce forecasts yourself. | ||
Question: | ||
{question.question_text} | ||
""" | ||
) | ||
|
||
response = await SmartSearcher(temperature=0.1).invoke(prompt) | ||
return response | ||
|
||
async def _run_forecast_on_binary( | ||
self, question: BinaryQuestion, research: str | ||
) -> ReasonedPrediction[float]: | ||
assert isinstance( | ||
question, BinaryQuestion | ||
), "Question must be a BinaryQuestion" | ||
prompt = clean_indents( | ||
f""" | ||
You are a professional forecaster interviewing for a job. | ||
Your interview question is: | ||
{question.question_text} | ||
Background information: | ||
{question.background_info if question.background_info else "No background information provided."} | ||
Resolution criteria: | ||
{question.resolution_criteria if question.resolution_criteria else "No resolution criteria provided."} | ||
Fine print: | ||
{question.fine_print if question.fine_print else "No fine print provided."} | ||
Your research assistant says: | ||
``` | ||
{research} | ||
``` | ||
Today is {datetime.now().strftime("%Y-%m-%d")}. | ||
Before answering you write: | ||
(a) The time left until the outcome to the question is known. | ||
(b) What the outcome would be if nothing changed. | ||
(c) The most important factors that will influence a successful/unsuccessful resolution. | ||
(d) What do you not know that should give you pause and lower confidence? Remember people are statistically overconfident. | ||
(e) What you would forecast if you were to only use historical precedent (i.e. how often this happens in the past) without any current information. | ||
(f) What you would forecast if there was only a quarter of the time left. | ||
(g) What you would forecast if there was 4x the time left. | ||
You write your rationale and then the last thing you write is your final answer as: "Probability: ZZ%", 0-100 | ||
""" | ||
) | ||
gpt_forecast = await self.FINAL_DECISION_LLM.invoke(prompt) | ||
prediction = self._extract_forecast_from_binary_rationale( | ||
gpt_forecast, max_prediction=0.99, min_prediction=0.01 | ||
) | ||
return ReasonedPrediction( | ||
prediction_value=prediction, reasoning=gpt_forecast | ||
) |
8 changes: 8 additions & 0 deletions
8
forecasting_tools/forecasting/forecast_bots/experiments/exa_q4_binary_o1.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
from forecasting_tools.ai_models.gpto1 import GptO1 | ||
from forecasting_tools.forecasting.forecast_bots.experiments.exa_q4_binary import ( | ||
ExaQ4BinaryBot, | ||
) | ||
|
||
|
||
class ExaQ4BinaryO1Bot(ExaQ4BinaryBot): | ||
FINAL_DECISION_LLM = GptO1() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters