diff --git a/examples/login.ts b/examples/login.ts index 50c35bb..76364ba 100644 --- a/examples/login.ts +++ b/examples/login.ts @@ -1,18 +1,20 @@ import yargs from "yargs/yargs"; -import { AgentBrowser } from "../src/agentBrowser"; import { Browser } from "../src/browser"; import { Agent } from "../src/agent/agent"; import { Inventory } from "../src/inventory"; import { completionApiBuilder } from "../src/agent"; import { Logger } from "../src/utils"; - -import { ModelResponseSchema, ObjectiveComplete } from "../src/types"; +import { nolitarc } from "../src/utils/config"; const parser = yargs(process.argv.slice(2)).options({ headless: { type: "boolean", default: true }, }); +// this imports your config from running `npx nolita auth` +// if you haven't run `npx nolita auth` yet, you can set ANTHROPIC_API_KEY in your environment +const { agentProvider, agentApiKey, agentModel } = nolitarc(); + async function main() { const argv = await parser.parse(); @@ -21,14 +23,14 @@ async function main() { const maxIterations = 10; const providerOptions = { - apiKey: process.env.ANTHROPIC_API_KEY!, - provider: "anthropic", + apiKey: agentApiKey || process.env.ANTHROPIC_API_KEY!, + provider: agentProvider || "anthropic", }; // We can create a chat api using the completionApiBuilder. // These can be swapped out for other providers like OpenAI const chatApi = completionApiBuilder(providerOptions, { - model: "claude-3-5-sonnet-20240620", + model: agentModel || "claude-3-5-sonnet-20240620", }); if (!chatApi) { @@ -53,24 +55,18 @@ async function main() { ]); const agent = new Agent({ modelApi: chatApi }); - const agentBrowser = new AgentBrowser({ - agent, - browser: await Browser.launch(argv.headless, agent), + const browser = await Browser.launch(argv.headless, agent, logger, { inventory, - logger, }); - const answer = await agentBrowser.browse( - { - startUrl: startUrl, - objective: [objective], - maxIterations: maxIterations, - }, - ModelResponseSchema(ObjectiveComplete) - ); - - console.log("Answer:", answer?.result); - await agentBrowser.close(); + const page = await browser.newPage(); + await page.goto(startUrl); + const answer = await page.browse(objective, { + maxTurns: maxIterations + }); + // @ts-expect-error - we are not using the full response schema + console.log("Answer:", answer?.objectiveComplete?.result); + await browser.close(); } main(); diff --git a/examples/shopping.ts b/examples/shopping.ts index 4b0bc4a..a2cb33d 100644 --- a/examples/shopping.ts +++ b/examples/shopping.ts @@ -1,14 +1,17 @@ import yargs from "yargs/yargs"; import { z } from "zod"; -import { AgentBrowser } from "../src/agentBrowser"; import { Browser } from "../src/browser"; import { Agent } from "../src/agent/agent"; import { Inventory } from "../src/inventory"; import { completionApiBuilder } from "../src/agent"; import { Logger } from "../src/utils"; +import { nolitarc } from "../src/utils/config"; -import { ModelResponseSchema, ObjectiveComplete } from "../src/types"; +// these are imported from `npx nolita auth` +// if you haven't set config, you can set the defaults for this example in your environment: +// OPENAI_API_KEY, HDR_API_KEY +const { hdrApiKey, agentProvider, agentModel, agentApiKey } = nolitarc(); const parser = yargs(process.argv.slice(2)).options({ headless: { type: "boolean", default: true }, @@ -24,10 +27,10 @@ async function main() { const maxIterations = 15; const providerOptions = { - apiKey: process.env.OPENAI_API_KEY!, - provider: "openai", + apiKey: agentApiKey || process.env.OPENAI_API_KEY!, + provider: agentProvider || "openai", }; - const chatApi = completionApiBuilder(providerOptions, { model: "gpt-4" }); + const chatApi = completionApiBuilder(providerOptions, { model: agentModel || "gpt-4" }); if (!chatApi) { throw new Error( @@ -36,40 +39,25 @@ async function main() { } const logger = new Logger(["info"], (msg) => console.log(msg)); - // You can pass in collective memory configuration to the agent browser - const collectiveMemoryConfig = { - apiKey: process.env.HDR_API_KEY!, - endpoint: process.env.HDR_ENDPOINT!, - }; const agent = new Agent({ modelApi: chatApi }); - const agentBrowser = new AgentBrowser({ - agent: new Agent({ modelApi: chatApi }), - browser: await Browser.launch(argv.headless, agent), - logger, + const browser = await Browser.launch(argv.headless, agent, logger, { + apiKey: hdrApiKey || process.env.HDR_API_KEY!, + }); + const page = await browser.newPage(); + await page.goto(startUrl); + const answer = await page.browse(objective, { + maxTurns: maxIterations, + schema: z.object({ + orderTotals: z.array(z.number()).describe("The order total in number format"), + }), inventory: new Inventory([ { value: "emma.lopez@gmail.com", name: "email", type: "string" }, { value: "Password.123", name: "Password", type: "string" }, ]), - collectiveMemoryConfig, - }); - - const orderTotalAnswer = ObjectiveComplete.extend({ - orderTotals: z.array( - z.number().describe("The order total in number format") - ), }); - - const answer = await agentBrowser.browse( - { - startUrl: startUrl, - objective: [objective], - maxIterations: maxIterations, - }, - ModelResponseSchema(orderTotalAnswer) - ); - - console.log("\x1b[32m Answer:", JSON.stringify(answer?.result)); - await agentBrowser.close(); + // @ts-expect-error - we are not using the full response schema + console.log("\x1b[32m Answer:", JSON.stringify(answer?.objectiveComplete?.orderTotals)); + await browser.close(); } main(); diff --git a/examples/wikipedia.ts b/examples/wikipedia.ts index 91ffcd1..39d570d 100644 --- a/examples/wikipedia.ts +++ b/examples/wikipedia.ts @@ -1,13 +1,11 @@ import yargs from "yargs/yargs"; import { z } from "zod"; -import { AgentBrowser } from "../src/agentBrowser"; import { Browser } from "../src/browser"; import { Agent } from "../src/agent/agent"; import { completionApiBuilder } from "../src/agent"; import { Logger } from "../src/utils"; - -import { ModelResponseSchema, ObjectiveComplete } from "../src/types"; +import { nolitarc } from "../src/utils/config"; const parser = yargs(process.argv.slice(2)).options({ headless: { type: "boolean", default: true }, @@ -16,23 +14,19 @@ const parser = yargs(process.argv.slice(2)).options({ maxIterations: { type: "number", default: 10 }, }); +// these are imported from `npx nolita auth` +// if you haven't set config, you can set the defaults for this example in your environment: +// OPENAI_API_KEY +const { agentApiKey, agentProvider, agentModel } = nolitarc(); + async function main() { const argv = await parser.parse(); - console.log(argv); - - if (!argv.startUrl) { - throw new Error("url is not provided"); - } - - if (!argv.objective) { - throw new Error("objective is not provided"); - } const providerOptions = { - apiKey: process.env.OPENAI_API_KEY!, - provider: "openai", + apiKey: agentApiKey || process.env.OPENAI_API_KEY!, + provider: agentProvider || "openai", }; - const chatApi = completionApiBuilder(providerOptions, { model: "gpt-4" }); + const chatApi = completionApiBuilder(providerOptions, { model: agentModel || "gpt-4" }); if (!chatApi) { throw new Error( @@ -42,35 +36,20 @@ async function main() { const logger = new Logger(["info"], (msg) => console.log(msg)); const agent = new Agent({ modelApi: chatApi }); - const agentBrowser = new AgentBrowser({ - agent: agent, - browser: await Browser.launch(argv.headless, agent), - logger, - }); - - // Here we are defining a custom return schema - // Custom schemas extrend `ObjectiveComplete` by adding additional fields - // In addition to returning structured data, we find that using these fields - // improves the performance of the model by constraining the conditions - // under which the model can halt - const wikipediaAnswer = ObjectiveComplete.extend({ - numberOfEditors: z - .number() - .int() - .describe("The number of editors in int format"), + const browser = await Browser.launch(argv.headless, agent, logger); + const page = await browser.newPage(); + await page.goto(argv.startUrl || "https://google.com"); + const answer = await page.browse(argv.objective || "How many accounts are on Wikipedia?", { + maxTurns: argv.maxIterations || 10, + schema: z.object({ + numberOfEditors: z.number().int().describe("The number of accounts in int format"), + }), }); - const answer = await agentBrowser.browse( - { - startUrl: argv.startUrl, - objective: [argv.objective], - maxIterations: argv.maxIterations, - }, - ModelResponseSchema(wikipediaAnswer) - ); - console.log("Answer:", answer?.result); + // @ts-expect-error - we are not using the full response schema + console.log("Answer:", answer?.objectiveComplete?.numberOfEditors); - await agentBrowser.close(); + await browser.close(); } main(); diff --git a/jest.config.ts b/jest.config.ts index 0cc4194..1400cb2 100644 --- a/jest.config.ts +++ b/jest.config.ts @@ -4,6 +4,7 @@ const config: Config = { verbose: true, preset: "ts-jest", testEnvironment: "node", + testTimeout: 20000 }; export default config; diff --git a/src/agent/agent.ts b/src/agent/agent.ts index 573ae80..2169922 100644 --- a/src/agent/agent.ts +++ b/src/agent/agent.ts @@ -67,7 +67,9 @@ export class Agent { memories: Memory[], config?: { inventory?: Inventory; systemPrompt?: string } ): ChatRequestMessage[] { - const userPrompt = `Here are examples of a request: + const userPrompt = ` + ${config?.inventory ? `Use the following information to achieve your objective as needed: ${config?.inventory.toString()}` : ""} + Here are examples of a request: ${stringifyObjects(memories)} remember to return a result only in the form of an ActionStep. diff --git a/src/agent/messages.ts b/src/agent/messages.ts index b8fccd3..ab42528 100644 --- a/src/agent/messages.ts +++ b/src/agent/messages.ts @@ -77,7 +77,7 @@ export function handleConfigMessages( ): ChatRequestMessage[] { const messages: ChatRequestMessage[] = []; - const { systemPrompt, inventory } = config; + const { systemPrompt } = config; if (systemPrompt) { console.log("systemPrompt", systemPrompt); @@ -87,13 +87,6 @@ export function handleConfigMessages( }); } - if (inventory) { - messages.push({ - role: "user", - content: `Use the following information to achieve your objective as needed: ${inventory.toString()}`, - }); - } - return messages; } diff --git a/src/browser/page.ts b/src/browser/page.ts index 0ad6f0b..8af7c41 100644 --- a/src/browser/page.ts +++ b/src/browser/page.ts @@ -638,7 +638,10 @@ export class Page { ) { let currentTurn = 0; while (currentTurn < opts.maxTurns) { - const step = await this.step(objective, opts?.schema, opts); + const step = await this.step(objective, opts?.schema, { + ...opts, + inventory: opts.inventory ?? this.inventory, + }); if (step?.objectiveComplete) { return step; diff --git a/tests/agent/agent.test.ts b/tests/agent/agent.test.ts index 8d1da48..9c18f90 100644 --- a/tests/agent/agent.test.ts +++ b/tests/agent/agent.test.ts @@ -10,8 +10,6 @@ import { ObjectiveComplete, } from "../../src/types/browser/actionStep.types"; -import { Inventory } from "../../src/inventory"; - import { z } from "zod"; import { ObjectiveState } from "../../src/types/browser"; import { completionApiBuilder } from "../../src/agent/config"; @@ -32,20 +30,6 @@ describe("Agent -- configs", () => { agent = new Agent({ modelApi: chatApi! }); }); - test("that configs are handled", async () => { - const prompt = await agent.prompt( - stateActionPair1.objectiveState, - [stateActionPair1], - { - inventory: new Inventory([ - { value: "test", name: "test", type: "string" }, - ]), - } - ); - expect(prompt[0].role).toBe("user"); - expect(prompt[0].content).toContain("Use the following information"); - }); - test("that empty configs are handled", async () => { const prompt = await agent.prompt( stateActionPair1.objectiveState, diff --git a/tests/agent/messages.test.ts b/tests/agent/messages.test.ts index 36d84bd..b81171a 100644 --- a/tests/agent/messages.test.ts +++ b/tests/agent/messages.test.ts @@ -15,16 +15,6 @@ describe("handleConfigMessages", () => { expect(messages[0].content).toBe("test"); }); - it("should return an inventory message", () => { - const messages = handleConfigMessages({ - inventory: new Inventory([ - { value: "test", name: "test", type: "string" }, - ]), - }); - expect(messages[0].role).toBe("user"); - expect(messages[0].content).toContain("Use the following information"); - }); - it("should return both messages", () => { const messages = handleConfigMessages({ systemPrompt: "test", @@ -34,8 +24,6 @@ describe("handleConfigMessages", () => { }); expect(messages[0].role).toBe("system"); expect(messages[0].content).toBe("test"); - expect(messages[1].role).toBe("user"); - expect(messages[1].content).toContain("Use the following information"); }); }); @@ -62,17 +50,12 @@ describe("CommandPrompt", () => { } ); - expect(messages[0].role).toBe("user"); - expect(messages[0].content).toContain( - "Use the following information to achieve your objective as needed" - ); - - expect(messages[2].role).toBe("user"); - expect(messages[2].content).toContain( + expect(messages[1].role).toBe("user"); + expect(messages[1].content).toContain( "Please generate the next ActionStep for" ); - expect(messages.length).toBe(3); + expect(messages.length).toBe(2); }); it("should return a command prompt with a system prompt", () => { @@ -147,15 +130,10 @@ describe("getPrompt", () => { expect(messages[0].role).toBe("user"); expect(messages[0].content).toContain( - "Use the following information to achieve your objective as needed" - ); - - expect(messages[1].role).toBe("user"); - expect(messages[1].content).toContain( "Here is the current aria of the page" ); - expect(messages.length).toBe(2); + expect(messages.length).toBe(1); }); it("should return a get prompt with a system prompt", () => {