diff --git a/services/wiki/.env b/services/wiki/.env index aa2f01e88..fc9d51f8d 100644 --- a/services/wiki/.env +++ b/services/wiki/.env @@ -15,3 +15,4 @@ HF_API_KEY=your_huggingface_api_key HF_RES_MAX_LENGTH=350 HF_RES_TEMP=0.75 HF_RES_TOP_P=0.95 +HF_MODEL="Qwen2.5-Coder-32B-Instruct" diff --git a/services/wiki/src/__tests__/mocks/huggingFaceHandlers/generateText.ts b/services/wiki/src/__tests__/mocks/huggingFaceHandlers/generateText.ts new file mode 100644 index 000000000..7ada58f4e --- /dev/null +++ b/services/wiki/src/__tests__/mocks/huggingFaceHandlers/generateText.ts @@ -0,0 +1,24 @@ +import { http, HttpResponse } from 'msw' + +export const generateTextHandler = http.post( + process.env.HF_API_ENDPOINT as string, + async ({ request }) => { + const { inputs, parameters } = (await request.json()) as { + inputs: string + parameters: { max_length: number; temperature: number; top_p: number } + } + + // Mock response logic + if (!inputs || !parameters) { + return HttpResponse.json({ message: 'Invalid request' }, { status: 400 }) + } + + return HttpResponse.json( + { + message: `Mocked response for: ${inputs}`, + }, + + { status: 200 } + ) + } +) diff --git a/services/wiki/src/__tests__/mocks/huggingFaceHandlers/index.ts b/services/wiki/src/__tests__/mocks/huggingFaceHandlers/index.ts new file mode 100644 index 000000000..159a8e535 --- /dev/null +++ b/services/wiki/src/__tests__/mocks/huggingFaceHandlers/index.ts @@ -0,0 +1 @@ +export { generateTextHandler } from './generateText' diff --git a/services/wiki/src/__tests__/mocks/ssoServer.ts b/services/wiki/src/__tests__/mocks/mswServer.ts similarity index 68% rename from services/wiki/src/__tests__/mocks/ssoServer.ts rename to services/wiki/src/__tests__/mocks/mswServer.ts index 9b6a70f9f..a669da55f 100644 --- a/services/wiki/src/__tests__/mocks/ssoServer.ts +++ b/services/wiki/src/__tests__/mocks/mswServer.ts @@ -8,6 +8,7 @@ import { registerHandler, validateTokenHandler, } from './ssoHandlers' +import { generateTextHandler } from './huggingFaceHandlers' const handlers = [ loginHandler, @@ -18,5 +19,6 @@ const handlers = [ getUsersNameByIdHandler, listItinerariesHandler, ] +const huggingFaceHandlers = [generateTextHandler] -export const ssoServer = setupServer(...handlers) +export const ssoServer = setupServer(...handlers, ...huggingFaceHandlers) diff --git a/services/wiki/src/controllers/resources/generateDescription.ts b/services/wiki/src/controllers/resources/generateDescription.ts index d4300365a..fca1f196f 100644 --- a/services/wiki/src/controllers/resources/generateDescription.ts +++ b/services/wiki/src/controllers/resources/generateDescription.ts @@ -1,54 +1,28 @@ import Koa, { Middleware } from 'koa' import { HuggingFaceRepository } from '../../repository/huggingFace' import { getLanguageInput } from '../../helpers/getLanguageInput' -import { TSupportedLanguage } from '../../db/knexTypes' -import { - DefaultError, - MissingParamError, - ServiceFail, -} from '../../helpers/errors' +import { DefaultError } from '../../helpers/errors' +import generateHFDescriptionSchema from '../../schemas/huggingFace/generateHFDescription' export const generateDescription: Middleware = async (ctx: Koa.Context) => { - const { title, url, topic } = ctx.request.body - const { language } = ctx.query + const { title, url, topic, language } = generateHFDescriptionSchema.parse( + ctx.request.body + ) const huggingFaceRepository = new HuggingFaceRepository() try { - if (!title || !url || !topic || !language) { - throw new MissingParamError('required params') - } - const languageInput = language as TSupportedLanguage - - const input = getLanguageInput(languageInput, title, url, topic) + const input = getLanguageInput(language, title, url, topic) const response = await huggingFaceRepository.getResponse({ input, title, url, topic, - language: languageInput, + language, }) - const cleanResponse = await huggingFaceRepository.cleanHFResponse( - [{ generated_text: response.generated_text }], - languageInput, - title, - url, - topic - ) - - if (!cleanResponse || !cleanResponse.generated_text) { - throw new ServiceFail('Failed to process the response from external API') - } - ctx.status = 200 - ctx.body = cleanResponse + ctx.body = response } catch (error: any) { - if (error instanceof DefaultError) { - ctx.status = error.status - ctx.body = { error: error.message } - } else { - ctx.status = 500 - ctx.body = { error: error.message } - } + throw new DefaultError(error, 'Failed to generate description') } } diff --git a/services/wiki/src/helpers/getLanguageInput.ts b/services/wiki/src/helpers/getLanguageInput.ts index fa08a28b7..a0a320051 100644 --- a/services/wiki/src/helpers/getLanguageInput.ts +++ b/services/wiki/src/helpers/getLanguageInput.ts @@ -6,11 +6,11 @@ export const getLanguageInput = ( ): string => { switch (language) { case 'en': - return `Please provide a detailed summary of the following resource ${title}, including the key points, the main purpose, and the most relevant concepts. Use a clear and accessible tone. The resource can be found at ${url}, and its topic is ${topic}. The summary should be between 200 and 300 words. Summary:\n` + return `Please provide a detailed summary of the following resource ${title}, including the key points, the main purpose, and the most relevant concepts. Use a clear and accessible tone. The resource can be found at ${url}, and its topic is ${topic}. The summary should be between 200 and 300 words.` case 'es': - return `Por favor, proporciona una resumen detallado de la siguiente fuente ${title}, incluyendo los puntos clave, el propósito principal y los conceptos relevantes. Usa un tono claro y accesible. La fuente puede ser encontrada en ${url}, y su tema es ${topic}. El resumen debe estar entre 200 y 300 palabras. Resumen:` + return `Por favor, proporciona una resumen detallado de la siguiente fuente ${title}, incluyendo los puntos clave, el propósito principal y los conceptos relevantes. Usa un tono claro y accesible. La fuente puede ser encontrada en ${url}, y su tema es ${topic}. El resumen debe estar entre 200 y 300 palabras.` case 'ca': - return `Si us plau, porporciona un resum detallat de la següent font ${title}, incloent els punts clau, el propòsit principal i els conceptes més rellevants. Empra un to clar i accesible. La font es pot trobar a ${url}, i el seu tema és ${topic}. El resum ha de tenir entre 200 a 300 paraules. RESUM:` + return `Si us plau, porporciona un resum detallat de la següent font ${title}, incloent els punts clau, el propòsit principal i els conceptes més rellevants. Empra un to clar i accesible. La font es pot trobar a ${url}, i el seu tema és ${topic}. El resum ha de tenir entre 200 a 300 paraules.` default: throw new Error('Unsupported language') } diff --git a/services/wiki/src/repository/huggingFace.test.ts b/services/wiki/src/repository/huggingFace.test.ts index ce090eb20..71aebeb84 100644 --- a/services/wiki/src/repository/huggingFace.test.ts +++ b/services/wiki/src/repository/huggingFace.test.ts @@ -1,6 +1,5 @@ import { describe, it, expect, vi, beforeEach } from 'vitest' import { HuggingFaceRepository } from './huggingFace' -import { TSupportedLanguage } from '../db/knexTypes' import { inputToTest } from '../__tests__/globalSetup' global.fetch = vi.fn() @@ -58,48 +57,4 @@ describe('HuggingFaceRepository', () => { 'HTTP error! status: 500' ) }) - - it('should clean the response text properly', async () => { - const output = [{ generated_text: 'Summary:\nThis is a test summary.' }] - const options = { - summaryPrefix: 'Summary:\n', - removeInputTemplate: 'Summary:\n', - } - - const result = await repo.cleanText(output, options) - - expect(result).toBe('This is a test summary.') - }) - - it('should return a cleaned HuggingFace response', async () => { - const output = [{ generated_text: 'Mocked summary text' }] - - const result = await repo.cleanHFResponse( - output, - TSupportedLanguage.English, - inputToTest.title, - inputToTest.url, - inputToTest.topic - ) - - expect(result).toEqual({ - generated_text: expect.any(String), - }) - }) - - it('should return an error message if cleaning the text fails', async () => { - const output = [{ generated_text: null }] - - const result = await repo.cleanHFResponse( - output, - TSupportedLanguage.English, - inputToTest.title, - inputToTest.url, - inputToTest.topic - ) - - expect(result).toEqual({ - generated_text: 'Error: Unable to clean text.', - }) - }) }) diff --git a/services/wiki/src/repository/huggingFace.ts b/services/wiki/src/repository/huggingFace.ts index f07d62a61..190780257 100644 --- a/services/wiki/src/repository/huggingFace.ts +++ b/services/wiki/src/repository/huggingFace.ts @@ -1,11 +1,5 @@ import 'dotenv/config' -import { - TCleanTextOptions, - THuggingFaceResponseInput, - TResponse, - TSupportedLanguage, -} from '../db/knexTypes' -import { extractSummary, trimDoubleBackspace } from '../helpers/getHFCleanText' +import { THuggingFaceResponseInput, TResponse } from '../db/knexTypes' export class HuggingFaceRepository { private apiEndpoint: string @@ -29,101 +23,18 @@ export class HuggingFaceRepository { max_length: Number(process.env.HF_RES_MAX_LENGTH), temperature: Number(process.env.HF_RES_TEMPERATURE), top_p: Number(process.env.HF_RES_TOP_P), + return_full_text: false, }, }), }) + if (!fetchResponse.ok) { await fetchResponse.text() throw new Error(`HTTP error! status: ${fetchResponse.status}`) } - const output = await fetchResponse.json() - const response = this.cleanHFResponse( - output[0], - input.language as TSupportedLanguage, - input.title, - input.url, - input.topic - ) - return response as unknown as TResponse - } - - async cleanText( - output: { generated_text: string }[], - options: TCleanTextOptions & { removeInputTemplate?: string }, - title?: string - ): Promise { - const originalText = output[0]?.generated_text - - if (typeof originalText !== 'string') { - return '' - } - let cleanText = originalText - - if (options.removeInputTemplate) { - const indexOfTemplate = cleanText.indexOf(options.removeInputTemplate) - if (indexOfTemplate !== -1) { - cleanText = cleanText - .slice(indexOfTemplate + options.removeInputTemplate.length) - .trim() - } - } - - const extractedSummary = extractSummary(cleanText, options, title) - - return trimDoubleBackspace(extractedSummary) - } - async cleanHFResponse( - output: { generated_text: any }[] | { generated_text: any }, - language: TSupportedLanguage, - title: string, - url: string, - topic: string - ): Promise<{ generated_text: string }> { - if (!Array.isArray(output)) { - // eslint-disable-next-line no-param-reassign - output = [output] - } + const response = await fetchResponse.json() - if (!output[0] || typeof output[0].generated_text !== 'string') { - return { generated_text: 'Error: Unable to clean text.' } - } - - let text = '' - - try { - const languageOptions = { - es: { - summaryPrefix: 'Resumen: ', - removeInputTemplate: `Por favor, proporciona una resumen detallado de la siguiente fuente ${title}, incluyendo los puntos clave, el propósito principal y los conceptos relevantes. Usa un tono claro y accesible. La fuente puede ser encontrada en ${url}, y su tema es ${topic}. El resumen debe estar entre 200 y 300 palabras. Resumen:`, - }, - en: { - summaryPrefix: 'Summary:\n', - removeInputTemplate: `Please provide a detailed summary of the following resource ${title}, including the key points, the main purpose, and the most relevant concepts. Use a clear and accessible tone. The resource can be found at ${url}, and its topic is ${topic}. The summary should be between 200 and 300 words. Summary:\n`, - }, - ca: { - summaryPrefix: 'RESUM: ', - removeInputTemplate: `Si us plau, porporciona un resum detallat de la següent font ${title}, incloent els punts clau, el propòsit principal i els conceptes més rellevants. Empra un to clar i accesible. La font es pot trobar a ${url}, i el seu tema és ${topic}. El resum ha de tenir entre 200 a 300 paraules. RESUM:`, - }, - } - - const options = languageOptions[language] - - if (!options) { - throw new Error(`Unsupported language: ${language}`) - } - text = await this.cleanText( - output, - { ...options, titleIncluded: true }, - title - ) - if (!text || text.trim().length === 0) { - throw new Error('Cleaned text is empty') - } - } catch (error) { - return { generated_text: 'Error: Unable to clean text.' } - } - - return { generated_text: text } + return response as unknown as TResponse } } diff --git a/services/wiki/src/schemas/huggingFace/generateHFDescription.ts b/services/wiki/src/schemas/huggingFace/generateHFDescription.ts new file mode 100644 index 000000000..deb17fe32 --- /dev/null +++ b/services/wiki/src/schemas/huggingFace/generateHFDescription.ts @@ -0,0 +1,15 @@ +import z from 'zod' +import { TSupportedLanguage } from '../../db/knexTypes' + +const generateHFDescriptionSchema = z.object({ + url: z.string(), + title: z.string(), + topic: z.string(), + language: z.enum([ + TSupportedLanguage.Spanish, + TSupportedLanguage.English, + TSupportedLanguage.Catalan, + ]), +}) + +export default generateHFDescriptionSchema diff --git a/services/wiki/tsconfig.json b/services/wiki/tsconfig.json index f0c614b82..00d86b02b 100644 --- a/services/wiki/tsconfig.json +++ b/services/wiki/tsconfig.json @@ -13,7 +13,7 @@ "esModuleInterop": true, "skipLibCheck": true }, - "include": ["src/**/*.ts"], + "include": ["src/**/*.ts", "src/repository/huggingFace.ts"], "exclude": [ "node_modules", "dist",