diff --git a/.env.example b/.env.example index 4c3900d9c1f..7f49509c2d1 100644 --- a/.env.example +++ b/.env.example @@ -115,6 +115,8 @@ DEBUG_OPENAI=false # OPENAI_REVERSE_PROXY= +# OPENAI_ORGANIZATION= + #============# # OpenRouter # #============# @@ -143,11 +145,22 @@ AZURE_AI_SEARCH_SEARCH_OPTION_QUERY_TYPE= AZURE_AI_SEARCH_SEARCH_OPTION_TOP= AZURE_AI_SEARCH_SEARCH_OPTION_SELECT= -# DALL·E 3 +# DALL·E +#---------------- +# DALLE_API_KEY= # Key for both DALL-E-2 and DALL-E-3 +# DALLE3_API_KEY= # Key for DALL-E-3 only +# DALLE2_API_KEY= # Key for DALL-E-2 only +# DALLE3_SYSTEM_PROMPT="Your DALL-E-3 System Prompt here" +# DALLE2_SYSTEM_PROMPT="Your DALL-E-2 System Prompt here" +# DALLE_REVERSE_PROXY= # Reverse proxy for DALL-E-2 and DALL-E-3 +# DALLE3_BASEURL= # Base URL for DALL-E-3 +# DALLE2_BASEURL= # Base URL for DALL-E-2 + +# DALL·E (via Azure OpenAI) +# Note: requires some of the variables above to be set #---------------- -# DALLE_API_KEY= -# DALLE3_SYSTEM_PROMPT="Your System Prompt here" -# DALLE_REVERSE_PROXY= +# DALLE3_AZURE_API_VERSION= # Azure OpenAI API version for DALL-E-3 +# DALLE2_AZURE_API_VERSION= # Azure OpenAI API versiion for DALL-E-2 # Google #----------------- diff --git a/api/app/clients/OpenAIClient.js b/api/app/clients/OpenAIClient.js index f832b3ca134..7d0baa40d79 100644 --- a/api/app/clients/OpenAIClient.js +++ b/api/app/clients/OpenAIClient.js @@ -954,6 +954,10 @@ ${convo} opts.defaultHeaders = { ...opts.defaultHeaders, 'api-key': this.apiKey }; } + if (process.env.OPENAI_ORGANIZATION) { + opts.organization = process.env.OPENAI_ORGANIZATION; + } + let chatCompletion; const openai = new OpenAI({ apiKey: this.apiKey, diff --git a/api/app/clients/tools/DALL-E.js b/api/app/clients/tools/DALL-E.js index 7f0c9520300..4eca7f7932e 100644 --- a/api/app/clients/tools/DALL-E.js +++ b/api/app/clients/tools/DALL-E.js @@ -1,7 +1,5 @@ -// From https://platform.openai.com/docs/api-reference/images/create -// To use this tool, you must pass in a configured OpenAIApi object. +const path = require('path'); const OpenAI = require('openai'); -// const { genAzureEndpoint } = require('~/utils/genAzureEndpoints'); const { v4: uuidv4 } = require('uuid'); const { Tool } = require('langchain/tools'); const { HttpsProxyAgent } = require('https-proxy-agent'); @@ -10,46 +8,39 @@ const { processFileURL } = require('~/server/services/Files/process'); const extractBaseURL = require('~/utils/extractBaseURL'); const { logger } = require('~/config'); -const { DALLE_REVERSE_PROXY, PROXY } = process.env; +const { + DALLE2_SYSTEM_PROMPT, + DALLE_REVERSE_PROXY, + PROXY, + DALLE2_AZURE_API_VERSION, + DALLE2_BASEURL, + DALLE2_API_KEY, + DALLE_API_KEY, +} = process.env; class OpenAICreateImage extends Tool { constructor(fields = {}) { super(); this.userId = fields.userId; this.fileStrategy = fields.fileStrategy; - let apiKey = fields.DALLE_API_KEY || this.getApiKey(); + let apiKey = fields.DALLE2_API_KEY ?? fields.DALLE_API_KEY ?? this.getApiKey(); const config = { apiKey }; if (DALLE_REVERSE_PROXY) { config.baseURL = extractBaseURL(DALLE_REVERSE_PROXY); } + if (DALLE2_AZURE_API_VERSION && DALLE2_BASEURL) { + config.baseURL = DALLE2_BASEURL; + config.defaultQuery = { 'api-version': DALLE2_AZURE_API_VERSION }; + config.defaultHeaders = { 'api-key': DALLE2_API_KEY, 'Content-Type': 'application/json' }; + config.apiKey = DALLE2_API_KEY; + } + if (PROXY) { config.httpAgent = new HttpsProxyAgent(PROXY); } - // let azureKey = fields.AZURE_API_KEY || process.env.AZURE_API_KEY; - - // if (azureKey) { - // apiKey = azureKey; - // const azureConfig = { - // apiKey, - // azureOpenAIApiInstanceName: process.env.AZURE_OPENAI_API_INSTANCE_NAME || fields.azureOpenAIApiInstanceName, - // azureOpenAIApiDeploymentName: process.env.AZURE_OPENAI_API_DEPLOYMENT_NAME || fields.azureOpenAIApiDeploymentName, - // azureOpenAIApiVersion: process.env.AZURE_OPENAI_API_VERSION || fields.azureOpenAIApiVersion - // }; - // config = { - // apiKey, - // basePath: genAzureEndpoint({ - // ...azureConfig, - // }), - // baseOptions: { - // headers: { 'api-key': apiKey }, - // params: { - // 'api-version': azureConfig.azureOpenAIApiVersion // this might change. I got the current value from the sample code at https://oai.azure.com/portal/chat - // } - // } - // }; - // } + this.openai = new OpenAI(config); this.name = 'dall-e'; this.description = `You can generate images with 'dall-e'. This tool is exclusively for visual content. @@ -59,10 +50,24 @@ Guidelines: - It's best to follow this format for image creation. Come up with the optional inputs yourself if none are given: "Subject: [subject], Style: [style], Color: [color], Details: [details], Emotion: [emotion]" - Generate images only once per human query unless explicitly requested by the user`; + this.description_for_model = + DALLE2_SYSTEM_PROMPT ?? + `// Whenever a description of an image is given, generate prompts (following these rules), and use dalle to create the image. If the user does not ask for a specific number of images, default to creating 2 prompts to send to dalle that are written to be as diverse as possible. All prompts sent to dalle must abide by the following policies: +// 1. Prompts must be in English. Translate to English if needed. +// 2. One image per function call. Create only 1 image per request unless explicitly told to generate more than 1 image. +// 3. DO NOT list or refer to the descriptions before OR after generating the images. They should ONLY ever be written out ONCE, in the \`"prompts"\` field of the request. You do not need to ask for permission to generate, just do it! +// 4. Always mention the image type (photo, oil painting, watercolor painting, illustration, cartoon, drawing, vector, render, etc.) at the beginning of the caption. Unless the captions suggests otherwise, make one of the images a photo. +// 5. Diversify depictions of ALL images with people to always include always DESCENT and GENDER for EACH person using direct terms. Adjust only human descriptions. +// - EXPLICITLY specify these attributes, not abstractly reference them. The attributes should be specified in a minimal way and should directly describe their physical form. +// - Your choices should be grounded in reality. For example, all of a given OCCUPATION should not be the same gender or race. Additionally, focus on creating diverse, inclusive, and exploratory scenes via the properties you choose during rewrites. Make choices that may be insightful or unique sometimes. +// - Use "various" or "diverse" ONLY IF the description refers to groups of more than 3 people. Do not change the number of people requested in the original description. +// - Don't alter memes, fictional character origins, or unseen people. Maintain the original prompt's intent and prioritize quality. +// The prompt must intricately describe every part of the image in concrete, objective detail. THINK about what the end goal of the description is, and extrapolate that to what would make satisfying images. +// All descriptions sent to dalle should be a paragraph of text that is extremely descriptive and detailed. Each should be more than 3 sentences long.`; } getApiKey() { - const apiKey = process.env.DALLE_API_KEY || ''; + const apiKey = DALLE2_API_KEY ?? DALLE_API_KEY ?? ''; if (!apiKey) { throw new Error('Missing DALLE_API_KEY environment variable.'); } @@ -96,17 +101,19 @@ Guidelines: } const imageBasename = getImageBasename(theImageUrl); - let imageName = `image_${uuidv4()}.png`; - - if (imageBasename) { - imageName = imageBasename; - logger.debug('[DALL-E]', { imageName }); // Output: img-lgCf7ppcbhqQrz6a5ear6FOb.png - } else { - logger.debug('[DALL-E] No image name found in the string.', { - theImageUrl, - data: resp.data[0], - }); - } + const imageExt = path.extname(imageBasename); + + const extension = imageExt.startsWith('.') ? imageExt.slice(1) : imageExt; + const imageName = `img-${uuidv4()}.${extension}`; + + logger.debug('[DALL-E-2]', { + imageName, + imageBasename, + imageExt, + extension, + theImageUrl, + data: resp.data[0], + }); try { const result = await processFileURL({ diff --git a/api/app/clients/tools/manifest.json b/api/app/clients/tools/manifest.json index d5f2c75d3d5..bde4c8a87a9 100644 --- a/api/app/clients/tools/manifest.json +++ b/api/app/clients/tools/manifest.json @@ -89,7 +89,7 @@ "icon": "https://i.imgur.com/u2TzXzH.png", "authConfig": [ { - "authField": "DALLE_API_KEY", + "authField": "DALLE2_API_KEY", "label": "OpenAI API Key", "description": "You can use DALL-E with your API Key from OpenAI." } @@ -102,7 +102,7 @@ "icon": "https://i.imgur.com/u2TzXzH.png", "authConfig": [ { - "authField": "DALLE_API_KEY", + "authField": "DALLE3_API_KEY", "label": "OpenAI API Key", "description": "You can use DALL-E with your API Key from OpenAI." } diff --git a/api/app/clients/tools/structured/DALLE3.js b/api/app/clients/tools/structured/DALLE3.js index 16326acdeec..33df93e7fcf 100644 --- a/api/app/clients/tools/structured/DALLE3.js +++ b/api/app/clients/tools/structured/DALLE3.js @@ -1,6 +1,5 @@ -// From https://platform.openai.com/docs/guides/images/usage?context=node -// To use this tool, you must pass in a configured OpenAIApi object. const { z } = require('zod'); +const path = require('path'); const OpenAI = require('openai'); const { v4: uuidv4 } = require('uuid'); const { Tool } = require('langchain/tools'); @@ -10,19 +9,33 @@ const { processFileURL } = require('~/server/services/Files/process'); const extractBaseURL = require('~/utils/extractBaseURL'); const { logger } = require('~/config'); -const { DALLE3_SYSTEM_PROMPT, DALLE_REVERSE_PROXY, PROXY } = process.env; +const { + DALLE3_SYSTEM_PROMPT, + DALLE_REVERSE_PROXY, + PROXY, + DALLE3_AZURE_API_VERSION, + DALLE3_BASEURL, + DALLE3_API_KEY, +} = process.env; class DALLE3 extends Tool { constructor(fields = {}) { super(); this.userId = fields.userId; this.fileStrategy = fields.fileStrategy; - let apiKey = fields.DALLE_API_KEY || this.getApiKey(); + let apiKey = fields.DALLE3_API_KEY ?? fields.DALLE_API_KEY ?? this.getApiKey(); const config = { apiKey }; if (DALLE_REVERSE_PROXY) { config.baseURL = extractBaseURL(DALLE_REVERSE_PROXY); } + if (DALLE3_AZURE_API_VERSION && DALLE3_BASEURL) { + config.baseURL = DALLE3_BASEURL; + config.defaultQuery = { 'api-version': DALLE3_AZURE_API_VERSION }; + config.defaultHeaders = { 'api-key': DALLE3_API_KEY, 'Content-Type': 'application/json' }; + config.apiKey = DALLE3_API_KEY; + } + if (PROXY) { config.httpAgent = new HttpsProxyAgent(PROXY); } @@ -46,7 +59,8 @@ class DALLE3 extends Tool { // - Use "various" or "diverse" ONLY IF the description refers to groups of more than 3 people. Do not change the number of people requested in the original description. // - Don't alter memes, fictional character origins, or unseen people. Maintain the original prompt's intent and prioritize quality. // The prompt must intricately describe every part of the image in concrete, objective detail. THINK about what the end goal of the description is, and extrapolate that to what would make satisfying images. - // All descriptions sent to dalle should be a paragraph of text that is extremely descriptive and detailed. Each should be more than 3 sentences long.`; + // All descriptions sent to dalle should be a paragraph of text that is extremely descriptive and detailed. Each should be more than 3 sentences long. + // - The "vivid" style is HIGHLY preferred, but "natural" is also supported.`; this.schema = z.object({ prompt: z .string() @@ -71,7 +85,7 @@ class DALLE3 extends Tool { } getApiKey() { - const apiKey = process.env.DALLE_API_KEY || ''; + const apiKey = process.env.DALLE3_API_KEY ?? process.env.DALLE_API_KEY ?? ''; if (!apiKey) { throw new Error('Missing DALLE_API_KEY environment variable.'); } @@ -121,17 +135,19 @@ Error Message: ${error.message}`; } const imageBasename = getImageBasename(theImageUrl); - let imageName = `image_${uuidv4()}.png`; - - if (imageBasename) { - imageName = imageBasename; - logger.debug('[DALL-E-3]', { imageName }); // Output: img-lgCf7ppcbhqQrz6a5ear6FOb.png - } else { - logger.debug('[DALL-E-3] No image name found in the string.', { - theImageUrl, - data: resp.data[0], - }); - } + const imageExt = path.extname(imageBasename); + + const extension = imageExt.startsWith('.') ? imageExt.slice(1) : imageExt; + const imageName = `img-${uuidv4()}.${extension}`; + + logger.debug('[DALL-E-3]', { + imageName, + imageBasename, + imageExt, + extension, + theImageUrl, + data: resp.data[0], + }); try { const result = await processFileURL({ diff --git a/api/app/clients/tools/structured/specs/DALLE3.spec.js b/api/app/clients/tools/structured/specs/DALLE3.spec.js index 65a5fbd844d..58771b1459e 100644 --- a/api/app/clients/tools/structured/specs/DALLE3.spec.js +++ b/api/app/clients/tools/structured/specs/DALLE3.spec.js @@ -48,6 +48,9 @@ jest.mock('path', () => { resolve: jest.fn(), join: jest.fn(), relative: jest.fn(), + extname: jest.fn().mockImplementation((filename) => { + return filename.slice(filename.lastIndexOf('.')); + }), }; }); @@ -148,7 +151,7 @@ describe('DALLE3', () => { await expect(dalle._call(mockData)).rejects.toThrow('Missing required field: prompt'); }); - it('should log to console if no image name is found in the URL', async () => { + it('should log appropriate debug values', async () => { const mockData = { prompt: 'A test prompt', }; @@ -162,9 +165,13 @@ describe('DALLE3', () => { generate.mockResolvedValue(mockResponse); await dalle._call(mockData); - expect(logger.debug).toHaveBeenCalledWith('[DALL-E-3] No image name found in the string.', { + expect(logger.debug).toHaveBeenCalledWith('[DALL-E-3]', { data: { url: 'http://example.com/invalid-url' }, theImageUrl: 'http://example.com/invalid-url', + extension: expect.any(String), + imageBasename: expect.any(String), + imageExt: expect.any(String), + imageName: expect.any(String), }); }); diff --git a/api/server/services/Endpoints/custom/initializeClient.js b/api/server/services/Endpoints/custom/initializeClient.js index 0c0ad9e7e21..978506b7b47 100644 --- a/api/server/services/Endpoints/custom/initializeClient.js +++ b/api/server/services/Endpoints/custom/initializeClient.js @@ -22,6 +22,13 @@ const initializeClient = async ({ req, res, endpointOption }) => { const CUSTOM_API_KEY = extractEnvVariable(endpointConfig.apiKey); const CUSTOM_BASE_URL = extractEnvVariable(endpointConfig.baseURL); + let resolvedHeaders = {}; + if (endpointConfig.headers && typeof endpointConfig.headers === 'object') { + Object.keys(endpointConfig.headers).forEach((key) => { + resolvedHeaders[key] = extractEnvVariable(endpointConfig.headers[key]); + }); + } + if (CUSTOM_API_KEY.match(envVarRegex)) { throw new Error(`Missing API Key for ${endpoint}.`); } @@ -31,6 +38,7 @@ const initializeClient = async ({ req, res, endpointOption }) => { } const customOptions = { + headers: resolvedHeaders, addParams: endpointConfig.addParams, dropParams: endpointConfig.dropParams, titleConvo: endpointConfig.titleConvo, diff --git a/api/server/services/ModelService.js b/api/server/services/ModelService.js index 2e433dbd14e..76ac061546d 100644 --- a/api/server/services/ModelService.js +++ b/api/server/services/ModelService.js @@ -53,6 +53,10 @@ const fetchModels = async ({ apiKey, baseURL, name = 'OpenAI', azure = false }) payload.httpsAgent = new HttpsProxyAgent(PROXY); } + if (process.env.OPENAI_ORGANIZATION && baseURL.includes('openai')) { + payload.headers['OpenAI-Organization'] = process.env.OPENAI_ORGANIZATION; + } + const res = await axios.get(`${baseURL}${azure ? '' : '/models'}`, payload); models = res.data.data.map((item) => item.id); } catch (err) { diff --git a/client/src/components/Chat/Input/SendButton.tsx b/client/src/components/Chat/Input/SendButton.tsx index 0751dd51eb0..549363dc139 100644 --- a/client/src/components/Chat/Input/SendButton.tsx +++ b/client/src/components/Chat/Input/SendButton.tsx @@ -6,8 +6,7 @@ export default function SendButton({ text, disabled }) {