Skip to content

Commit

Permalink
removed cleaning functions from Generate Description, added msw serve…
Browse files Browse the repository at this point in the history
…r - missing tests
  • Loading branch information
ghOdisea committed Dec 2, 2024
1 parent 83185d2 commit 07be3a0
Show file tree
Hide file tree
Showing 10 changed files with 62 additions and 179 deletions.
1 change: 1 addition & 0 deletions services/wiki/.env
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,4 @@ HF_API_KEY=your_huggingface_api_key
HF_RES_MAX_LENGTH=350
HF_RES_TEMP=0.75
HF_RES_TOP_P=0.95
HF_MODEL="Qwen2.5-Coder-32B-Instruct"
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
import { http, HttpResponse } from 'msw'

export const generateTextHandler = http.post(
process.env.HF_API_ENDPOINT as string,
async ({ request }) => {
const { inputs, parameters } = (await request.json()) as {
inputs: string
parameters: { max_length: number; temperature: number; top_p: number }
}

// Mock response logic
if (!inputs || !parameters) {
return HttpResponse.json({ message: 'Invalid request' }, { status: 400 })
}

return HttpResponse.json(
{
message: `Mocked response for: ${inputs}`,
},

{ status: 200 }
)
}
)
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
export { generateTextHandler } from './generateText'
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import {
registerHandler,
validateTokenHandler,
} from './ssoHandlers'
import { generateTextHandler } from './huggingFaceHandlers'

const handlers = [
loginHandler,
Expand All @@ -18,5 +19,6 @@ const handlers = [
getUsersNameByIdHandler,
listItinerariesHandler,
]
const huggingFaceHandlers = [generateTextHandler]

export const ssoServer = setupServer(...handlers)
export const ssoServer = setupServer(...handlers, ...huggingFaceHandlers)
44 changes: 9 additions & 35 deletions services/wiki/src/controllers/resources/generateDescription.ts
Original file line number Diff line number Diff line change
@@ -1,54 +1,28 @@
import Koa, { Middleware } from 'koa'
import { HuggingFaceRepository } from '../../repository/huggingFace'
import { getLanguageInput } from '../../helpers/getLanguageInput'
import { TSupportedLanguage } from '../../db/knexTypes'
import {
DefaultError,
MissingParamError,
ServiceFail,
} from '../../helpers/errors'
import { DefaultError } from '../../helpers/errors'
import generateHFDescriptionSchema from '../../schemas/huggingFace/generateHFDescription'

export const generateDescription: Middleware = async (ctx: Koa.Context) => {
const { title, url, topic } = ctx.request.body
const { language } = ctx.query
const { title, url, topic, language } = generateHFDescriptionSchema.parse(
ctx.request.body
)
const huggingFaceRepository = new HuggingFaceRepository()
try {
if (!title || !url || !topic || !language) {
throw new MissingParamError('required params')
}
const languageInput = language as TSupportedLanguage

const input = getLanguageInput(languageInput, title, url, topic)
const input = getLanguageInput(language, title, url, topic)

const response = await huggingFaceRepository.getResponse({
input,
title,
url,
topic,
language: languageInput,
language,
})

const cleanResponse = await huggingFaceRepository.cleanHFResponse(
[{ generated_text: response.generated_text }],
languageInput,
title,
url,
topic
)

if (!cleanResponse || !cleanResponse.generated_text) {
throw new ServiceFail('Failed to process the response from external API')
}

ctx.status = 200
ctx.body = cleanResponse
ctx.body = response
} catch (error: any) {
if (error instanceof DefaultError) {
ctx.status = error.status
ctx.body = { error: error.message }
} else {
ctx.status = 500
ctx.body = { error: error.message }
}
throw new DefaultError(error, 'Failed to generate description')
}
}
6 changes: 3 additions & 3 deletions services/wiki/src/helpers/getLanguageInput.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,11 @@ export const getLanguageInput = (
): string => {
switch (language) {
case 'en':
return `Please provide a detailed summary of the following resource ${title}, including the key points, the main purpose, and the most relevant concepts. Use a clear and accessible tone. The resource can be found at ${url}, and its topic is ${topic}. The summary should be between 200 and 300 words. Summary:\n`
return `Please provide a detailed summary of the following resource ${title}, including the key points, the main purpose, and the most relevant concepts. Use a clear and accessible tone. The resource can be found at ${url}, and its topic is ${topic}. The summary should be between 200 and 300 words.`
case 'es':
return `Por favor, proporciona una resumen detallado de la siguiente fuente ${title}, incluyendo los puntos clave, el propósito principal y los conceptos relevantes. Usa un tono claro y accesible. La fuente puede ser encontrada en ${url}, y su tema es ${topic}. El resumen debe estar entre 200 y 300 palabras. Resumen:`
return `Por favor, proporciona una resumen detallado de la siguiente fuente ${title}, incluyendo los puntos clave, el propósito principal y los conceptos relevantes. Usa un tono claro y accesible. La fuente puede ser encontrada en ${url}, y su tema es ${topic}. El resumen debe estar entre 200 y 300 palabras.`
case 'ca':
return `Si us plau, porporciona un resum detallat de la següent font ${title}, incloent els punts clau, el propòsit principal i els conceptes més rellevants. Empra un to clar i accesible. La font es pot trobar a ${url}, i el seu tema és ${topic}. El resum ha de tenir entre 200 a 300 paraules. RESUM:`
return `Si us plau, porporciona un resum detallat de la següent font ${title}, incloent els punts clau, el propòsit principal i els conceptes més rellevants. Empra un to clar i accesible. La font es pot trobar a ${url}, i el seu tema és ${topic}. El resum ha de tenir entre 200 a 300 paraules.`
default:
throw new Error('Unsupported language')
}
Expand Down
45 changes: 0 additions & 45 deletions services/wiki/src/repository/huggingFace.test.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import { describe, it, expect, vi, beforeEach } from 'vitest'
import { HuggingFaceRepository } from './huggingFace'
import { TSupportedLanguage } from '../db/knexTypes'
import { inputToTest } from '../__tests__/globalSetup'

global.fetch = vi.fn()
Expand Down Expand Up @@ -58,48 +57,4 @@ describe('HuggingFaceRepository', () => {
'HTTP error! status: 500'
)
})

it('should clean the response text properly', async () => {
const output = [{ generated_text: 'Summary:\nThis is a test summary.' }]
const options = {
summaryPrefix: 'Summary:\n',
removeInputTemplate: 'Summary:\n',
}

const result = await repo.cleanText(output, options)

expect(result).toBe('This is a test summary.')
})

it('should return a cleaned HuggingFace response', async () => {
const output = [{ generated_text: 'Mocked summary text' }]

const result = await repo.cleanHFResponse(
output,
TSupportedLanguage.English,
inputToTest.title,
inputToTest.url,
inputToTest.topic
)

expect(result).toEqual({
generated_text: expect.any(String),
})
})

it('should return an error message if cleaning the text fails', async () => {
const output = [{ generated_text: null }]

const result = await repo.cleanHFResponse(
output,
TSupportedLanguage.English,
inputToTest.title,
inputToTest.url,
inputToTest.topic
)

expect(result).toEqual({
generated_text: 'Error: Unable to clean text.',
})
})
})
99 changes: 5 additions & 94 deletions services/wiki/src/repository/huggingFace.ts
Original file line number Diff line number Diff line change
@@ -1,11 +1,5 @@
import 'dotenv/config'
import {
TCleanTextOptions,
THuggingFaceResponseInput,
TResponse,
TSupportedLanguage,
} from '../db/knexTypes'
import { extractSummary, trimDoubleBackspace } from '../helpers/getHFCleanText'
import { THuggingFaceResponseInput, TResponse } from '../db/knexTypes'

export class HuggingFaceRepository {
private apiEndpoint: string
Expand All @@ -29,101 +23,18 @@ export class HuggingFaceRepository {
max_length: Number(process.env.HF_RES_MAX_LENGTH),
temperature: Number(process.env.HF_RES_TEMPERATURE),
top_p: Number(process.env.HF_RES_TOP_P),
return_full_text: false,
},
}),
})

if (!fetchResponse.ok) {
await fetchResponse.text()
throw new Error(`HTTP error! status: ${fetchResponse.status}`)
}
const output = await fetchResponse.json()
const response = this.cleanHFResponse(
output[0],
input.language as TSupportedLanguage,
input.title,
input.url,
input.topic
)
return response as unknown as TResponse
}

async cleanText(
output: { generated_text: string }[],
options: TCleanTextOptions & { removeInputTemplate?: string },
title?: string
): Promise<string> {
const originalText = output[0]?.generated_text

if (typeof originalText !== 'string') {
return ''
}

let cleanText = originalText

if (options.removeInputTemplate) {
const indexOfTemplate = cleanText.indexOf(options.removeInputTemplate)
if (indexOfTemplate !== -1) {
cleanText = cleanText
.slice(indexOfTemplate + options.removeInputTemplate.length)
.trim()
}
}

const extractedSummary = extractSummary(cleanText, options, title)

return trimDoubleBackspace(extractedSummary)
}
async cleanHFResponse(
output: { generated_text: any }[] | { generated_text: any },
language: TSupportedLanguage,
title: string,
url: string,
topic: string
): Promise<{ generated_text: string }> {
if (!Array.isArray(output)) {
// eslint-disable-next-line no-param-reassign
output = [output]
}
const response = await fetchResponse.json()

if (!output[0] || typeof output[0].generated_text !== 'string') {
return { generated_text: 'Error: Unable to clean text.' }
}

let text = ''

try {
const languageOptions = {
es: {
summaryPrefix: 'Resumen: ',
removeInputTemplate: `Por favor, proporciona una resumen detallado de la siguiente fuente ${title}, incluyendo los puntos clave, el propósito principal y los conceptos relevantes. Usa un tono claro y accesible. La fuente puede ser encontrada en ${url}, y su tema es ${topic}. El resumen debe estar entre 200 y 300 palabras. Resumen:`,
},
en: {
summaryPrefix: 'Summary:\n',
removeInputTemplate: `Please provide a detailed summary of the following resource ${title}, including the key points, the main purpose, and the most relevant concepts. Use a clear and accessible tone. The resource can be found at ${url}, and its topic is ${topic}. The summary should be between 200 and 300 words. Summary:\n`,
},
ca: {
summaryPrefix: 'RESUM: ',
removeInputTemplate: `Si us plau, porporciona un resum detallat de la següent font ${title}, incloent els punts clau, el propòsit principal i els conceptes més rellevants. Empra un to clar i accesible. La font es pot trobar a ${url}, i el seu tema és ${topic}. El resum ha de tenir entre 200 a 300 paraules. RESUM:`,
},
}

const options = languageOptions[language]

if (!options) {
throw new Error(`Unsupported language: ${language}`)
}
text = await this.cleanText(
output,
{ ...options, titleIncluded: true },
title
)
if (!text || text.trim().length === 0) {
throw new Error('Cleaned text is empty')
}
} catch (error) {
return { generated_text: 'Error: Unable to clean text.' }
}

return { generated_text: text }
return response as unknown as TResponse
}
}
15 changes: 15 additions & 0 deletions services/wiki/src/schemas/huggingFace/generateHFDescription.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
import z from 'zod'
import { TSupportedLanguage } from '../../db/knexTypes'

const generateHFDescriptionSchema = z.object({
url: z.string(),
title: z.string(),
topic: z.string(),
language: z.enum([
TSupportedLanguage.Spanish,
TSupportedLanguage.English,
TSupportedLanguage.Catalan,
]),
})

export default generateHFDescriptionSchema
2 changes: 1 addition & 1 deletion services/wiki/tsconfig.json
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
"esModuleInterop": true,
"skipLibCheck": true
},
"include": ["src/**/*.ts"],
"include": ["src/**/*.ts", "src/repository/huggingFace.ts"],
"exclude": [
"node_modules",
"dist",
Expand Down

0 comments on commit 07be3a0

Please sign in to comment.