Merge branch 'main' into fix/get_installed_package_name

UKGovernmentBEIS · Feb 12, 2025 · f7de38b · f7de38b
2 parents 7f3553f + 776b869
commit f7de38b
Show file tree

Hide file tree

Showing 207 changed files with 6,346 additions and 2,217 deletions.
diff --git a/.github/dependabot.yml b/.github/dependabot.yml
@@ -1,11 +1,16 @@
 version: 2
 updates:
-- package-ecosystem: pip
-  directory: "/"
-  schedule:
-    interval: daily
-    time: "13:00"
-  groups:
-    python-packages:
-      patterns:
-        - "*"
+  - package-ecosystem: pip
+    directory: "/"
+    schedule:
+      interval: daily
+      time: "13:00"
+    groups:
+      python-packages:
+        patterns:
+          - "*"
+    ignore:
+      - dependency-name: "quarto-cli"
+        versions: [">=1.6.0"]
+      - dependency-name: "ruff"
+        versions: [">=0.9.5"]
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,12 +1,22 @@
-# Changelog
 
 ## Unreleased
 
+- [Reference documentation](https://inspect.ai-safety-institute.org.uk/reference/) for Python API and CLI commands.
 - Add support for [clustered standard errors](https://inspect.ai-safety-institute.org.uk/scorers.html#clustered-standard-errors) via a new `cluster` parameter for the `stderr()` metric.
 - Metrics now take `list[SampleScore]` rather than `list[Score]` (previous signature is deprecated but still works with a warning).
 - Use a sample adjustment for the `var()` metric.
+- OpenAI: Native tool calling for o1-mini (upon initial release it required emulated tool calling like o1-preview).
+- Google: Speculative fix for completion candidates not being returned as a list.
+- Python and Bash tools: Add `sandbox` argument for running in non-default sandboxes.
+- Transcript: Log `ScoreEvent` (with `intermediate=True`) when the `score()` function is called.
+- Transcript: Add `source` field to `InfoEvent` and use it for events logged by the human agent.
 - Docker: Support Dockerfiles with `.Dockerfile` extension.
+- Docker: Raise error when there is an explicitly configured `container_name` (incompatible with epochs > 1).
+- Docker: Dynamically set `compose up` timeout when there are `healthcheck` entries for services.
+- Log: Validate that `log_dir` is writeable at startup.
+- Log: Write eval config defaults into log file (rather than `None`).
 - Bugfix: Always honor level-level-transcript setting for transcript logging.
+- Bugfix: Fix some dynamic layout issues for sample sandbox view.
 
 ## v0.3.63 (07 February 2025)
 

diff --git a/docs/.gitignore b/docs/.gitignore
@@ -1,3 +1,3 @@
 /.quarto/
 /_book/
-/_site/
+/_site/
diff --git a/docs/_quarto.yml b/docs/_quarto.yml
@@ -3,8 +3,18 @@ project:
    resources: 
       - CNAME
       - llms.txt
-   post-render: scripts/post-render.sh
-
+   pre-render: 
+      - reference/filter/sidebar.py
+   post-render: 
+      - scripts/post-render.sh
+
+metadata-files: 
+  - reference/_sidebar.yml  
+
+filters:
+  - at: pre-quarto
+    path: reference/filter/interlink.lua
+
 website:
    title: "Inspect"
    bread-crumbs: true
@@ -21,66 +31,74 @@ website:
       title: "Inspect"
       description: "Open-source framework for large language model evaluations"
       image: /images/inspect.png
-   sidebar:
-      style: floating
+   navbar:
+      title: "Inspect AI"
+      background: light
       search: true
-      header: >
-         [![](/images/aisi-logo.png){fig-alt="UK AI Safety Institute Website"}](https://aisi.gov.uk/)
-      tools:
-        - icon: github
-          href: https://github.com/UKGovernmentBEIS/inspect_ai
-          text: "Source Code"
-      contents:
-        - text: Welcome
-          href: index.qmd   
-        - section: "Basics"
-          contents:
-            - tutorial.qmd
-            - options.qmd
-            - text: "Evals"
-              href: evals/index.qmd
-            - log-viewer.qmd
-            - text: "VS Code"
-              href: vscode.qmd
-
-        - section: "Components"
-          contents: 
-            - tasks.qmd
-            - datasets.qmd
-            - solvers.qmd
-            - tools.qmd
-            - scorers.qmd
+      logo: images/aisi-logo.png
+      logo-href: https://www.aisi.gov.uk/     
+      left:      
+          - text: "User Guide"
+            href: index.qmd
+          - text: "Reference"
+            href: reference/index.qmd
+          - text: "Evals"
+            href: evals/index.qmd
+      right: 
+          - icon: github
+            href: https://github.com/UKGovernmentBEIS/inspect_ai
+
+   sidebar:
+      - title: Guide
+        style: docked
+        contents:
+         - section: "Basics"
+           contents:
+               - text: "Welcome"
+                 href: index.qmd
+               - tutorial.qmd
+               - options.qmd
+               - log-viewer.qmd
+               - text: "VS Code"
+                 href: vscode.qmd
 
-        - section: "Models"
-          contents:
-            - models.qmd
-            - text: "Providers"
-              href: providers.qmd
-            - caching.qmd
-            - multimodal.qmd
-            - reasoning.qmd
+         - section: "Components"
+           contents: 
+               - tasks.qmd
+               - datasets.qmd
+               - solvers.qmd
+               - tools.qmd
+               - scorers.qmd
 
-        - section: "Agents"
-          contents:
-            - agents.qmd
-            - sandboxing.qmd
-            - agents-api.qmd
-            - agent-bridge.qmd
-            - human-agent.qmd
-            - approval.qmd
+         - section: "Models"
+           contents:
+               - models.qmd
+               - text: "Providers"
+                 href: providers.qmd
+               - caching.qmd
+               - multimodal.qmd
+               - reasoning.qmd
 
-        - section: "Advanced"
-          contents:
-            - eval-logs.qmd
-            - eval-sets.qmd
-            - text: "Errors & Limits"
-              href: errors-and-limits.qmd
-            - typing.qmd
-            - tracing.qmd
-            - parallelism.qmd
-            - interactivity.qmd
-            - extensions.qmd
+         - section: "Agents"
+           contents:
+               - agents.qmd
+               - sandboxing.qmd
+               - agents-api.qmd
+               - agent-bridge.qmd
+               - human-agent.qmd
+               - approval.qmd
 
+         - section: "Advanced"
+           contents:
+               - eval-logs.qmd
+               - eval-sets.qmd
+               - text: "Errors & Limits"
+                 href: errors-and-limits.qmd
+               - typing.qmd
+               - tracing.qmd
+               - parallelism.qmd
+               - interactivity.qmd
+               - extensions.qmd
 
    page-footer: 
       left: