Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

✨ Add knowledge retrieval by using RAG #805

Open
wants to merge 10 commits into
base: main
Choose a base branch
from
Open
2 changes: 2 additions & 0 deletions frontend/apps/migration-web/.env.template
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,5 @@ OPENAI_API_KEY="YOUR_API_KEY"
LANGFUSE_PUBLIC_KEY=""
LANGFUSE_SECRET_KEY=""
LANGFUSE_BASE_URL="https://cloud.langfuse.com"
SUPABASE_URL="YOUR_SUPABASE_URL"
SUPABASE_SERVICE_ROLE_KEY="YOUR_SERVICE_ROLE_KEY"
37 changes: 37 additions & 0 deletions frontend/apps/migration-web/app/api/vectorize/route.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
import { vectorizeUrl } from '@/lib/vectorization'
import type { NextRequest } from 'next/server'

export const runtime = 'edge'

export async function POST(req: NextRequest) {
try {
const { url } = await req.json()

if (!url || typeof url !== 'string') {
return new Response(
JSON.stringify({
error: 'URL is not provided or is in an invalid format',
}),
{ status: 400 },
)
}

const result = await vectorizeUrl(url)

return new Response(
JSON.stringify({
success: true,
message: 'Content vectorized and stored successfully',
id: result.documentId,
chunkCount: result.chunkCount,
}),
{ status: 200 },
)
} catch (error) {
console.error('Error in vectorize API:', error)
return new Response(
JSON.stringify({ error: 'An error occurred while processing the URL' }),
{ status: 500 },
)
}
}
2 changes: 2 additions & 0 deletions frontend/apps/migration-web/app/review/page.tsx
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
import { ReviewWindow } from '@/components/ReviewWindow'
import { UrlVectorizer } from '@/components/UrlVectorizer'
import styles from './page.module.css'

export default function ReviewPage() {
return (
<div className={styles.container}>
<h1 className={styles.title}>Database Schema Review</h1>
<UrlVectorizer endpoint="api/vectorize" />
<ReviewWindow
endpoint="api/review"
placeholder="CREATE TABLE users (id INT PRIMARY KEY, name VARCHAR(255), ...);"
Expand Down
74 changes: 74 additions & 0 deletions frontend/apps/migration-web/components/UrlVectorizer.module.css
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
.container {
margin-top: 2rem;
padding: 1.5rem;
border: 1px solid #e0e0e0;
border-radius: 8px;
background-color: #f9f9f9;
}

.title {
font-size: 1.5rem;
margin-bottom: 1rem;
color: #333;
}

.form {
display: flex;
flex-direction: column;
gap: 1rem;
}

.inputGroup {
display: flex;
gap: 0.5rem;
}

.input {
flex: 1;
padding: 0.75rem;
border: 1px solid #ccc;
border-radius: 4px;
font-size: 1rem;
}

.button {
padding: 0.75rem 1.5rem;
background-color: #0070f3;
color: white;
border: none;
border-radius: 4px;
font-size: 1rem;
cursor: pointer;
transition: background-color 0.2s;
}

.button:hover {
background-color: #0060df;
}

.button:disabled {
background-color: #ccc;
cursor: not-allowed;
}

.error {
color: #e53e3e;
padding: 0.5rem;
border-radius: 4px;
background-color: #fff5f5;
border: 1px solid #fed7d7;
}

.success {
color: #38a169;
padding: 0.5rem;
border-radius: 4px;
background-color: #f0fff4;
border: 1px solid #c6f6d5;
}

.description {
font-size: 0.875rem;
color: #666;
line-height: 1.5;
}
92 changes: 92 additions & 0 deletions frontend/apps/migration-web/components/UrlVectorizer.tsx
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
'use client'

import { type FormEvent, useState } from 'react'
import styles from './UrlVectorizer.module.css'

export type UrlVectorizerProps = {
endpoint: string
}

export const UrlVectorizer = ({ endpoint }: UrlVectorizerProps) => {
const [url, setUrl] = useState<string>('')
const [isLoading, setIsLoading] = useState<boolean>(false)
const [error, setError] = useState<string | null>(null)
const [success, setSuccess] = useState<string | null>(null)

const handleSubmit = async (e: FormEvent) => {
e.preventDefault()

if (!url.trim()) {
setError('Please enter a URL')
return
}

if (!url.startsWith('http://') && !url.startsWith('https://')) {
setError('Please enter a valid URL (must start with http:// or https://)')
return
}

setIsLoading(true)
setError(null)
setSuccess(null)

try {
const response = await fetch(`/${endpoint}`, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
},
body: JSON.stringify({ url }),
})

const data = await response.json()

if (!response.ok) {
throw new Error(data.error || `Error: ${response.statusText}`)
}

setSuccess(
`URL content successfully vectorized and stored. ID: ${data.id}`,
)
setUrl('')
} catch (err) {
console.error('Error during vectorization:', err)
setError(
err instanceof Error
? err.message
: 'An error occurred while processing the URL',
)
} finally {
setIsLoading(false)
}
}

return (
<div className={styles.container}>
<h2 className={styles.title}>URL Content Vectorization</h2>
<form onSubmit={handleSubmit} className={styles.form}>
<div className={styles.inputGroup}>
<input
type="text"
value={url}
onChange={(e) => setUrl(e.target.value)}
placeholder="https://example.com"
className={styles.input}
disabled={isLoading}
/>
<button type="submit" className={styles.button} disabled={isLoading}>
{isLoading ? 'Processing...' : 'Vectorize'}
</button>
</div>

{error && <div className={styles.error}>{error}</div>}
{success && <div className={styles.success}>{success}</div>}

<p className={styles.description}>
Enter a URL to fetch its content, vectorize it, and store it in the
database.
</p>
</form>
</div>
)
}
2 changes: 2 additions & 0 deletions frontend/apps/migration-web/lib/index.ts
Original file line number Diff line number Diff line change
@@ -1 +1,3 @@
export * from './langfuse'
export * from './supabase'
export * from './vectorization'
6 changes: 6 additions & 0 deletions frontend/apps/migration-web/lib/supabase.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
import { createClient } from '@supabase/supabase-js'

export const supabaseClient = createClient(
process.env.SUPABASE_URL || '',
process.env.SUPABASE_SERVICE_ROLE_KEY || '',
)
65 changes: 65 additions & 0 deletions frontend/apps/migration-web/lib/vectorization.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
import { CheerioWebBaseLoader } from '@langchain/community/document_loaders/web/cheerio'
import { HtmlToTextTransformer } from '@langchain/community/document_transformers/html_to_text'
import { SupabaseVectorStore } from '@langchain/community/vectorstores/supabase'
import { OpenAIEmbeddings } from '@langchain/openai'
import { RecursiveCharacterTextSplitter } from 'langchain/text_splitter'
import { supabaseClient } from '.'

const embeddings = new OpenAIEmbeddings({
modelName: 'text-embedding-3-small',
})

const vectorStore = new SupabaseVectorStore(embeddings, {
client: supabaseClient,
tableName: 'documents',
queryName: 'match_documents',
})

export type VectorizationResult = {
documentId?: string
chunkCount: number
}

export async function vectorizeUrl(url: string): Promise<VectorizationResult> {
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

const loader = new CheerioWebBaseLoader(url)
const docs = await loader.load()

const transformer = new HtmlToTextTransformer()
const sequence =
RecursiveCharacterTextSplitter.fromLanguage('html').pipe(transformer)
const newDocuments = await sequence.invoke(docs)
const pages = newDocuments.map((doc) => doc.pageContent)

const extractedText = await Promise.all(
pages.map(async (page) => {
return page.split('\n').join(' ')
}),
)

const extractedDocs = extractedText.map((text) => ({
pageContent: text,
metadata: {},
}))

const splitter = new RecursiveCharacterTextSplitter({
chunkSize: 300,
chunkOverlap: 0,
})

const chunks = await splitter.splitDocuments(extractedDocs)

for (const chunk of chunks) {
chunk.metadata = {
...chunk.metadata,
source: url,
type: 'webpage',
}
}

const ids = await vectorStore.addDocuments(chunks)

return {
documentId: ids[0],
chunkCount: chunks.length,
}
}
6 changes: 4 additions & 2 deletions frontend/apps/migration-web/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,11 @@
"private": true,
"version": "0.1.0",
"dependencies": {
"@langchain/community": "0.3.33",
"@langchain/core": "0.3.42",
"@langchain/openai": "0.4.4",
"@supabase/supabase-js": "2.49.1",
"html-to-text": "9.0.5",
"langchain": "0.3.19",
"langfuse-langchain": "3.36.0",
"next": "15.1.2",
Expand All @@ -16,7 +19,6 @@
"@types/node": "22.9.0",
"@types/react": "18",
"@types/react-dom": "18",
"supabase": "2.15.8",
"typed-css-modules": "0.9.1",
"typescript": "5"
},
Expand All @@ -32,7 +34,7 @@
"lint": "pnpm run '/^lint:.*/'",
"lint:biome": "biome check .",
"start": "next start",
"supabase:pull": "pnpm supabase pull",
"supabase:pull": "pnpm supabase db pull",
"supabase:reset": "pnpm supabase db reset",
"supabase:start": "pnpm supabase start",
"supabase:stop": "pnpm supabase stop"
Expand Down
Loading