Merge pull request #107 from hdresearch/mp/ex

examples: update all, amend config prompting
hdresearch · Jul 11, 2024 · 4f42621 · 4f42621
2 parents 7113943 + 78b8be1
commit 4f42621
Show file tree

Hide file tree

Showing 9 changed files with 71 additions and 147 deletions.
diff --git a/examples/login.ts b/examples/login.ts
@@ -1,18 +1,20 @@
 import yargs from "yargs/yargs";
 
-import { AgentBrowser } from "../src/agentBrowser";
 import { Browser } from "../src/browser";
 import { Agent } from "../src/agent/agent";
 import { Inventory } from "../src/inventory";
 import { completionApiBuilder } from "../src/agent";
 import { Logger } from "../src/utils";
-
-import { ModelResponseSchema, ObjectiveComplete } from "../src/types";
+import { nolitarc } from "../src/utils/config";
 
 const parser = yargs(process.argv.slice(2)).options({
   headless: { type: "boolean", default: true },
 });
 
+// this imports your config from running `npx nolita auth`
+// if you haven't run `npx nolita auth` yet, you can set ANTHROPIC_API_KEY in your environment
+const { agentProvider, agentApiKey, agentModel } = nolitarc();
+
 async function main() {
   const argv = await parser.parse();
 
@@ -21,14 +23,14 @@ async function main() {
   const maxIterations = 10;
 
   const providerOptions = {
-    apiKey: process.env.ANTHROPIC_API_KEY!,
-    provider: "anthropic",
+    apiKey: agentApiKey || process.env.ANTHROPIC_API_KEY!,
+    provider: agentProvider || "anthropic",
   };
 
   // We can create a chat api using the completionApiBuilder.
   // These can be swapped out for other providers like OpenAI
   const chatApi = completionApiBuilder(providerOptions, {
-    model: "claude-3-5-sonnet-20240620",
+    model: agentModel || "claude-3-5-sonnet-20240620",
   });
 
   if (!chatApi) {
@@ -53,24 +55,18 @@ async function main() {
   ]);
 
   const agent = new Agent({ modelApi: chatApi });
-  const agentBrowser = new AgentBrowser({
-    agent,
-    browser: await Browser.launch(argv.headless, agent),
+  const browser = await Browser.launch(argv.headless, agent, logger, {
     inventory,
-    logger,
   });
 
-  const answer = await agentBrowser.browse(
-    {
-      startUrl: startUrl,
-      objective: [objective],
-      maxIterations: maxIterations,
-    },
-    ModelResponseSchema(ObjectiveComplete)
-  );
-
-  console.log("Answer:", answer?.result);
-  await agentBrowser.close();
+  const page = await browser.newPage();
+  await page.goto(startUrl);
+  const answer = await page.browse(objective, {
+    maxTurns: maxIterations
+  });
+  // @ts-expect-error - we are not using the full response schema
+  console.log("Answer:", answer?.objectiveComplete?.result);
+  await browser.close();
 }
 
 main();
diff --git a/examples/shopping.ts b/examples/shopping.ts
@@ -1,14 +1,17 @@
 import yargs from "yargs/yargs";
 import { z } from "zod";
 
-import { AgentBrowser } from "../src/agentBrowser";
 import { Browser } from "../src/browser";
 import { Agent } from "../src/agent/agent";
 import { Inventory } from "../src/inventory";
 import { completionApiBuilder } from "../src/agent";
 import { Logger } from "../src/utils";
+import { nolitarc } from "../src/utils/config";
 
-import { ModelResponseSchema, ObjectiveComplete } from "../src/types";
+// these are imported from `npx nolita auth`
+// if you haven't set config, you can set the defaults for this example in your environment:
+// OPENAI_API_KEY, HDR_API_KEY
+const { hdrApiKey, agentProvider, agentModel, agentApiKey } = nolitarc();
 
 const parser = yargs(process.argv.slice(2)).options({
   headless: { type: "boolean", default: true },
@@ -24,10 +27,10 @@ async function main() {
   const maxIterations = 15;
 
   const providerOptions = {
-    apiKey: process.env.OPENAI_API_KEY!,
-    provider: "openai",
+    apiKey: agentApiKey || process.env.OPENAI_API_KEY!,
+    provider: agentProvider || "openai",
   };
-  const chatApi = completionApiBuilder(providerOptions, { model: "gpt-4" });
+  const chatApi = completionApiBuilder(providerOptions, { model: agentModel || "gpt-4" });
 
   if (!chatApi) {
     throw new Error(
@@ -36,40 +39,25 @@ async function main() {
   }
   const logger = new Logger(["info"], (msg) => console.log(msg));
 
-  // You can pass in collective memory configuration to the agent browser
-  const collectiveMemoryConfig = {
-    apiKey: process.env.HDR_API_KEY!,
-    endpoint: process.env.HDR_ENDPOINT!,
-  };
   const agent = new Agent({ modelApi: chatApi });
-  const agentBrowser = new AgentBrowser({
-    agent: new Agent({ modelApi: chatApi }),
-    browser: await Browser.launch(argv.headless, agent),
-    logger,
+  const browser = await Browser.launch(argv.headless, agent, logger, {
+    apiKey: hdrApiKey || process.env.HDR_API_KEY!,
+  });
+  const page = await browser.newPage();
+  await page.goto(startUrl);
+  const answer = await page.browse(objective, {
+    maxTurns: maxIterations,
+    schema: z.object({
+      orderTotals: z.array(z.number()).describe("The order total in number format"),
+    }),
     inventory: new Inventory([
       { value: "emma.lopez@gmail.com", name: "email", type: "string" },
       { value: "Password.123", name: "Password", type: "string" },
     ]),
-    collectiveMemoryConfig,
-  });
-
-  const orderTotalAnswer = ObjectiveComplete.extend({
-    orderTotals: z.array(
-      z.number().describe("The order total in number format")
-    ),
   });
-
-  const answer = await agentBrowser.browse(
-    {
-      startUrl: startUrl,
-      objective: [objective],
-      maxIterations: maxIterations,
-    },
-    ModelResponseSchema(orderTotalAnswer)
-  );
-
-  console.log("\x1b[32m Answer:", JSON.stringify(answer?.result));
-  await agentBrowser.close();
+  // @ts-expect-error - we are not using the full response schema
+  console.log("\x1b[32m Answer:", JSON.stringify(answer?.objectiveComplete?.orderTotals));
+  await browser.close();
 }
 
 main();
diff --git a/examples/wikipedia.ts b/examples/wikipedia.ts
@@ -1,13 +1,11 @@
 import yargs from "yargs/yargs";
 import { z } from "zod";
 
-import { AgentBrowser } from "../src/agentBrowser";
 import { Browser } from "../src/browser";
 import { Agent } from "../src/agent/agent";
 import { completionApiBuilder } from "../src/agent";
 import { Logger } from "../src/utils";
-
-import { ModelResponseSchema, ObjectiveComplete } from "../src/types";
+import { nolitarc } from "../src/utils/config";
 
 const parser = yargs(process.argv.slice(2)).options({
   headless: { type: "boolean", default: true },
@@ -16,23 +14,19 @@ const parser = yargs(process.argv.slice(2)).options({
   maxIterations: { type: "number", default: 10 },
 });
 
+// these are imported from `npx nolita auth`
+// if you haven't set config, you can set the defaults for this example in your environment:
+// OPENAI_API_KEY
+const { agentApiKey, agentProvider, agentModel } = nolitarc();
+
 async function main() {
   const argv = await parser.parse();
-  console.log(argv);
-
-  if (!argv.startUrl) {
-    throw new Error("url is not provided");
-  }
-
-  if (!argv.objective) {
-    throw new Error("objective is not provided");
-  }
 
   const providerOptions = {
-    apiKey: process.env.OPENAI_API_KEY!,
-    provider: "openai",
+    apiKey: agentApiKey || process.env.OPENAI_API_KEY!,
+    provider: agentProvider || "openai",
   };
-  const chatApi = completionApiBuilder(providerOptions, { model: "gpt-4" });
+  const chatApi = completionApiBuilder(providerOptions, { model: agentModel || "gpt-4" });
 
   if (!chatApi) {
     throw new Error(
@@ -42,35 +36,20 @@ async function main() {
   const logger = new Logger(["info"], (msg) => console.log(msg));
   const agent = new Agent({ modelApi: chatApi });
 
-  const agentBrowser = new AgentBrowser({
-    agent: agent,
-    browser: await Browser.launch(argv.headless, agent),
-    logger,
-  });
-
-  // Here we are defining a custom return schema
-  // Custom schemas extrend `ObjectiveComplete` by adding additional fields
-  // In addition to returning structured data, we find that using these fields
-  // improves the performance of the model by constraining the conditions
-  // under which the model can halt
-  const wikipediaAnswer = ObjectiveComplete.extend({
-    numberOfEditors: z
-      .number()
-      .int()
-      .describe("The number of editors in int format"),
+  const browser = await Browser.launch(argv.headless, agent, logger);
+  const page = await browser.newPage();
+  await page.goto(argv.startUrl || "https://google.com");
+  const answer = await page.browse(argv.objective || "How many accounts are on Wikipedia?", {
+    maxTurns: argv.maxIterations || 10,
+    schema: z.object({
+      numberOfEditors: z.number().int().describe("The number of accounts in int format"),
+    }),
   });
-  const answer = await agentBrowser.browse(
-    {
-      startUrl: argv.startUrl,
-      objective: [argv.objective],
-      maxIterations: argv.maxIterations,
-    },
-    ModelResponseSchema(wikipediaAnswer)
-  );
 
-  console.log("Answer:", answer?.result);
+  // @ts-expect-error - we are not using the full response schema
+  console.log("Answer:", answer?.objectiveComplete?.numberOfEditors);
 
-  await agentBrowser.close();
+  await browser.close();
 }
 
 main();
diff --git a/jest.config.ts b/jest.config.ts
@@ -4,6 +4,7 @@ const config: Config = {
   verbose: true,
   preset: "ts-jest",
   testEnvironment: "node",
+  testTimeout: 20000
 };
 
 export default config;
diff --git a/src/agent/agent.ts b/src/agent/agent.ts
@@ -67,7 +67,9 @@ export class Agent {
     memories: Memory[],
     config?: { inventory?: Inventory; systemPrompt?: string }
   ): ChatRequestMessage[] {
-    const userPrompt = `Here are examples of a request: 
+    const userPrompt = `
+    ${config?.inventory ? `Use the following information to achieve your objective as needed: ${config?.inventory.toString()}` : ""}
+    Here are examples of a request: 
     ${stringifyObjects(memories)}
 
     remember to return a result only in the form of an ActionStep.

diff --git a/src/agent/messages.ts b/src/agent/messages.ts
@@ -77,7 +77,7 @@ export function handleConfigMessages(
 ): ChatRequestMessage[] {
   const messages: ChatRequestMessage[] = [];
 
-  const { systemPrompt, inventory } = config;
+  const { systemPrompt } = config;
 
   if (systemPrompt) {
     console.log("systemPrompt", systemPrompt);
@@ -87,13 +87,6 @@ export function handleConfigMessages(
     });
   }
 
-  if (inventory) {
-    messages.push({
-      role: "user",
-      content: `Use the following information to achieve your objective as needed: ${inventory.toString()}`,
-    });
-  }
-
   return messages;
 }
 

diff --git a/src/browser/page.ts b/src/browser/page.ts
@@ -638,7 +638,10 @@ export class Page {
   ) {
     let currentTurn = 0;
     while (currentTurn < opts.maxTurns) {
-      const step = await this.step(objective, opts?.schema, opts);
+      const step = await this.step(objective, opts?.schema, {
+        ...opts,
+        inventory: opts.inventory ?? this.inventory,
+      });
 
       if (step?.objectiveComplete) {
         return step;

diff --git a/tests/agent/agent.test.ts b/tests/agent/agent.test.ts
@@ -10,8 +10,6 @@ import {
   ObjectiveComplete,
 } from "../../src/types/browser/actionStep.types";
 
-import { Inventory } from "../../src/inventory";
-
 import { z } from "zod";
 import { ObjectiveState } from "../../src/types/browser";
 import { completionApiBuilder } from "../../src/agent/config";
@@ -32,20 +30,6 @@ describe("Agent -- configs", () => {
     agent = new Agent({ modelApi: chatApi! });
   });
 
-  test("that configs are handled", async () => {
-    const prompt = await agent.prompt(
-      stateActionPair1.objectiveState,
-      [stateActionPair1],
-      {
-        inventory: new Inventory([
-          { value: "test", name: "test", type: "string" },
-        ]),
-      }
-    );
-    expect(prompt[0].role).toBe("user");
-    expect(prompt[0].content).toContain("Use the following information");
-  });
-
   test("that empty configs are handled", async () => {
     const prompt = await agent.prompt(
       stateActionPair1.objectiveState,