Skip to content

Commit

Permalink
🖼️ fix: correct image extraction (#3538)
Browse files Browse the repository at this point in the history
* fix: regex issue extracting text with images in markdown

* fix: update addImages function to include only the first observed image path in the response message

* ci: tests for addImages function: correct image extraction

* fix(GoogleClient): linting

---------

Co-authored-by: Dongwoo Jeong <dongwoo.jeong@lge.com>
Co-authored-by: Dongwoo Jeong <dongwoo@duck.com>
  • Loading branch information
3 people authored Aug 5, 2024
1 parent 389b2a6 commit 5baa95b
Show file tree
Hide file tree
Showing 3 changed files with 86 additions and 26 deletions.
48 changes: 25 additions & 23 deletions api/app/clients/GoogleClient.js
Original file line number Diff line number Diff line change
Expand Up @@ -626,11 +626,11 @@ class GoogleClient extends BaseClient {
const { onProgress, abortController } = options;
const streamRate = this.options.streamRate ?? Constants.DEFAULT_STREAM_RATE;
const { messages: _messages, context, examples: _examples } = instances?.[0] ?? {};

let examples;

let clientOptions = { ...parameters, maxRetries: 2 };

if (this.project_id) {
clientOptions['authOptions'] = {
credentials: {
Expand All @@ -639,16 +639,16 @@ class GoogleClient extends BaseClient {
projectId: this.project_id,
};
}

if (!parameters) {
clientOptions = { ...clientOptions, ...this.modelOptions };
}

if (this.isGenerativeModel && !this.project_id) {
clientOptions.modelName = clientOptions.model;
delete clientOptions.model;
}

if (_examples && _examples.length) {
examples = _examples
.map((ex) => {
Expand All @@ -662,26 +662,26 @@ class GoogleClient extends BaseClient {
};
})
.filter((ex) => ex);

clientOptions.examples = examples;
}

const model = this.createLLM(clientOptions);

let reply = '';
const messages = this.isTextModel ? _payload.trim() : _messages;

if (!this.isVisionModel && context && messages?.length > 0) {
messages.unshift(new SystemMessage(context));
}

const modelName = clientOptions.modelName ?? clientOptions.model ?? '';
if (modelName?.includes('1.5') && !this.project_id) {
const client = model;
const requestOptions = {
contents: _payload,
};

if (this.options?.promptPrefix?.length) {
requestOptions.systemInstruction = {
parts: [
Expand All @@ -691,9 +691,9 @@ class GoogleClient extends BaseClient {
],
};
}

requestOptions.safetySettings = _payload.safetySettings;

const delay = modelName.includes('flash') ? 8 : 14;
const result = await client.generateContentStream(requestOptions);
for await (const chunk of result.stream) {
Expand All @@ -706,15 +706,15 @@ class GoogleClient extends BaseClient {
}
return reply;
}

const stream = await model.stream(messages, {
signal: abortController.signal,
timeout: 7000,
safetySettings: _payload.safetySettings,
});

let delay = this.options.streamRate || 8;

if (!this.options.streamRate) {
if (this.isGenerativeModel) {
delay = 12;
Expand All @@ -723,15 +723,15 @@ class GoogleClient extends BaseClient {
delay = 5;
}
}

for await (const chunk of stream) {
const chunkText = chunk?.content ?? chunk;
await this.generateTextStream(chunkText, onProgress, {
delay,
});
reply += chunkText;
}

return reply;
}

Expand Down Expand Up @@ -869,17 +869,18 @@ class GoogleClient extends BaseClient {

async sendCompletion(payload, opts = {}) {
payload.safetySettings = this.getSafetySettings();

let reply = '';
reply = await this.getCompletion(payload, opts);
return reply.trim();
}

getSafetySettings() {
return [
{
category: 'HARM_CATEGORY_SEXUALLY_EXPLICIT',
threshold: process.env.GOOGLE_SAFETY_SEXUALLY_EXPLICIT || 'HARM_BLOCK_THRESHOLD_UNSPECIFIED',
threshold:
process.env.GOOGLE_SAFETY_SEXUALLY_EXPLICIT || 'HARM_BLOCK_THRESHOLD_UNSPECIFIED',
},
{
category: 'HARM_CATEGORY_HATE_SPEECH',
Expand All @@ -891,7 +892,8 @@ class GoogleClient extends BaseClient {
},
{
category: 'HARM_CATEGORY_DANGEROUS_CONTENT',
threshold: process.env.GOOGLE_SAFETY_DANGEROUS_CONTENT || 'HARM_BLOCK_THRESHOLD_UNSPECIFIED',
threshold:
process.env.GOOGLE_SAFETY_DANGEROUS_CONTENT || 'HARM_BLOCK_THRESHOLD_UNSPECIFIED',
},
];
}
Expand Down
6 changes: 3 additions & 3 deletions api/app/clients/output_parsers/addImages.js
Original file line number Diff line number Diff line change
Expand Up @@ -60,10 +60,10 @@ function addImages(intermediateSteps, responseMessage) {
if (!observation || !observation.includes('![')) {
return;
}
const observedImagePath = observation.match(/!\[.*\]\([^)]*\)/g);
const observedImagePath = observation.match(/!\[[^(]*\]\([^)]*\)/g);
if (observedImagePath && !responseMessage.text.includes(observedImagePath[0])) {
responseMessage.text += '\n' + observation;
logger.debug('[addImages] added image from intermediateSteps:', observation);
responseMessage.text += '\n' + observedImagePath[0];
logger.debug('[addImages] added image from intermediateSteps:', observedImagePath[0]);
}
});
}
Expand Down
58 changes: 58 additions & 0 deletions api/app/clients/output_parsers/addImages.spec.js
Original file line number Diff line number Diff line change
Expand Up @@ -81,4 +81,62 @@ describe('addImages', () => {
addImages(intermediateSteps, responseMessage);
expect(responseMessage.text).toBe(`${originalText}\n${imageMarkdown}`);
});

it('should extract only image markdowns when there is text between them', () => {
const markdownWithTextBetweenImages = `
![image1](/images/image1.png)
Some text between images that should not be included.
![image2](/images/image2.png)
More text that should be ignored.
![image3](/images/image3.png)
`;
intermediateSteps.push({ observation: markdownWithTextBetweenImages });
addImages(intermediateSteps, responseMessage);
expect(responseMessage.text).toBe('\n![image1](/images/image1.png)');
});

it('should only return the first image when multiple images are present', () => {
const markdownWithMultipleImages = `
![image1](/images/image1.png)
![image2](/images/image2.png)
![image3](/images/image3.png)
`;
intermediateSteps.push({ observation: markdownWithMultipleImages });
addImages(intermediateSteps, responseMessage);
expect(responseMessage.text).toBe('\n![image1](/images/image1.png)');
});

it('should not include any text or metadata surrounding the image markdown', () => {
const markdownWithMetadata = `
Title: Test Document
Author: John Doe
![image1](/images/image1.png)
Some content after the image.
Vector values: [0.1, 0.2, 0.3]
`;
intermediateSteps.push({ observation: markdownWithMetadata });
addImages(intermediateSteps, responseMessage);
expect(responseMessage.text).toBe('\n![image1](/images/image1.png)');
});

it('should handle complex markdown with multiple images and only return the first one', () => {
const complexMarkdown = `
# Document Title
## Section 1
Here's some text with an embedded image:
![image1](/images/image1.png)
## Section 2
More text here...
![image2](/images/image2.png)
### Subsection
Even more content
![image3](/images/image3.png)
`;
intermediateSteps.push({ observation: complexMarkdown });
addImages(intermediateSteps, responseMessage);
expect(responseMessage.text).toBe('\n![image1](/images/image1.png)');
});
});

0 comments on commit 5baa95b

Please sign in to comment.