-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathupload.py
37 lines (28 loc) · 1.05 KB
/
upload.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
from fastapi import FastAPI, File, UploadFile
import io
import PyPDF2
import numpy as np
import pinecone
# Set up Pinecone
pinecone.init(api_key="ba049302-4865-4a8d-9e96-bbc3174b480b", environment="us-central1-gcp")
#pinecone.create_index("pdf", dimension=512)
app = FastAPI()
@app.post("/upload-pdf/")
async def upload_pdf(file: UploadFile = File(...)):
# Read uploaded PDF file as bytes
pdf_bytes = await file.read()
# Create an in-memory file object for PyPDF2
pdf_file = io.BytesIO(pdf_bytes)
# Create a PDF reader object using PyPDF2
pdf_reader = PyPDF2.PdfFileReader(pdf_file)
# Extract text from the PDF
text = ""
for page in pdf_reader.pages:
text += page.extract_text()
# Convert the text to a vector representation
# (Note: This part depends on your specific use case and vectorization method)
vector = np.random.rand(512)
# Upload the vector to Pinecone
pinecone_index = pinecone.Index(index_name="pdfs")
pinecone_index.upsert(ids=[file.filename], vectors=[vector])
return {"text": text}