Skip to content

Commit

Permalink
Updated o1 and o1 preview in bot
Browse files Browse the repository at this point in the history
  • Loading branch information
CodexVeritas committed Dec 22, 2024
1 parent 089c510 commit ec45592
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 19 deletions.
16 changes: 2 additions & 14 deletions scripts/benchmark_forecast_bot.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,32 +30,20 @@


async def benchmark_forecast_bot() -> None:
questions_to_use = 1
questions_to_use = 120
with MonetaryCostManager() as cost_manager:
bots = [
ExaBot(),
Q4MainBinaryBot(),
ExaBot(
research_reports_per_question=3,
predictions_per_research_report=3,
),
Q4MainBinaryBot(
research_reports_per_question=3,
predictions_per_research_report=3,
),
ExaQ4BinaryBot(),
ExaQ4BinaryBot(
research_reports_per_question=3,
predictions_per_research_report=3,
),
ExaQ4BinaryO1PreviewBot(),
]
bots = typeguard.check_type(bots, list[ForecastBot])
benchmarks = await Benchmarker(
number_of_questions_to_use=questions_to_use,
forecast_bots=bots,
file_path_to_save_reports="logs/forecasts/benchmarks/",
concurrent_question_batch_size=50,
concurrent_question_batch_size=20,
).run_benchmark()
for i, benchmark in enumerate(benchmarks):
logger.info(
Expand Down
27 changes: 22 additions & 5 deletions scripts/simulate_a_tournament.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
Expand Down Expand Up @@ -488,9 +488,18 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 4,
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_463249/3173621166.py:35: UserWarning: Tight layout not applied. The left and right margins cannot be made large enough to accommodate all Axes decorations.\n",
" plt.tight_layout()\n"
]
}
],
"source": [
"import matplotlib.pyplot as plt\n",
"import seaborn as sns\n",
Expand All @@ -515,7 +524,15 @@
" scores = ranking.ranked_scores\n",
" names = [f.name for f in ranking.ranked_forecasters]\n",
"\n",
" sns.barplot(x=scores, y=names, ax=ax, palette='viridis')\n",
" bars = sns.barplot(x=scores, y=names, ax=ax, palette='viridis')\n",
"\n",
" if len(bars) < 0:\n",
" raise ValueError(f\"No bars found for {ranking.scoring_method.name}\")\n",
"\n",
" # Add value labels next to each bar\n",
" for i, score in enumerate(scores):\n",
" ax.text(score, i, f' {score:.2f}', va='center')\n",
"\n",
" ax.set_title(f'Scores using {ranking.scoring_method.name}')\n",
" ax.set_xlabel('Score')\n",
" ax.set_ylabel('Forecaster')\n",
Expand Down Expand Up @@ -862,7 +879,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.15"
"version": "3.10.16"
}
},
"nbformat": 4,
Expand Down

0 comments on commit ec45592

Please sign in to comment.