Skip to content

Commit

Permalink
fix: wip upload
Browse files Browse the repository at this point in the history
  • Loading branch information
cherninlab committed Nov 14, 2024
1 parent 695ffa1 commit d5fa361
Show file tree
Hide file tree
Showing 7 changed files with 116 additions and 84 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -45,3 +45,6 @@ uploads/

# Google Cloud
*.json


app.log
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -84,12 +84,13 @@ gcloud iam service-accounts keys create credentials.json --iam-account=vertex-ra
- The service account needs both Vertex AI and Storage permissions to function properly
- You can use more granular permissions instead of storage.admin if needed

## Usage
## Usage

1. Start the application:

```bash
flask run
# FLASK_DEBUG=1 FLASK_APP=src/app flask run --debug
```

2. Open http://localhost:5000 in your browser
Expand Down
10 changes: 7 additions & 3 deletions src/app/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,14 @@ def create_app(config_class=Config):
app = Flask(__name__)
app.config.from_object(config_class)

# Configure logging
# Enhanced logging configuration
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
level=logging.DEBUG if app.debug else logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
handlers=[
logging.StreamHandler(),
logging.FileHandler('app.log')
]
)
app.logger.setLevel(logging.INFO)

Expand Down
17 changes: 12 additions & 5 deletions src/app/routes/api.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from flask import Blueprint, jsonify, request
from flask import Blueprint, jsonify, request, current_app
from pydantic import BaseModel

from app.services.vertex_service import RagConfig, VertexService
Expand All @@ -23,13 +23,13 @@ def query():
try:
config = RagConfig(project_id=data.project_id)
service = VertexService(config)

response = service.query(
corpus=data.corpus_name,
query=data.query,
top_k=data.top_k
)

return jsonify(response)
except Exception as e:
return jsonify({"error": str(e)}), 500
Expand All @@ -54,7 +54,7 @@ def list_files(corpus_name: str):
def delete_document(corpus_name: str):
project_id = request.args.get("project_id")
document_id = request.args.get("document_id")

if not all([project_id, document_id]):
return jsonify({"error": "Missing required parameters"}), 400

Expand All @@ -64,4 +64,11 @@ def delete_document(corpus_name: str):
service.delete_files(corpus_name, [document_id])
return jsonify({"success": True})
except Exception as e:
return jsonify({"error": str(e)}), 500
return jsonify({"error": str(e)}), 500


@bp.route("/upload/status", methods=["GET"])
def upload_status():
upload_id = request.args.get("upload_id")
status = current_app.config.get(f"upload_status_{upload_id}", {})
return jsonify(status)
130 changes: 56 additions & 74 deletions src/app/routes/main.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import uuid
import os
from pathlib import Path

from flask import (
Blueprint, current_app, flash, redirect,
render_template, request, url_for, jsonify
Expand Down Expand Up @@ -48,86 +49,67 @@ def create_new_bucket():
def upload():
"""Handle document uploads and processing."""
if request.method == "POST":
upload_id = str(uuid.uuid4())
temp_file = None

try:
upload_dir = Path(current_app.config["UPLOAD_FOLDER"])
upload_dir.mkdir(mode=0o755, parents=True, exist_ok=True)
current_app.logger.info(
f"Upload request received - ID: {upload_id}")
current_app.logger.debug(
f"Request headers: {dict(request.headers)}")

# Check if file was submitted
if "file" not in request.files:
current_app.logger.error("No file part in request")
flash("No file part", "error")
return redirect(request.url)

# Validate bucket name
project_id = get_project_id()
bucket_name = request.form.get("bucket_name")
if not bucket_name:
current_app.logger.error("Missing bucket name")
flash("Bucket name is required", "error")
return redirect(request.url)
raise ValueError("No file uploaded")

file = request.files["file"]
if file.filename == "":
current_app.logger.error("No selected file")
flash("No selected file", "error")
return redirect(request.url)

if file and allowed_file(file.filename):
try:
current_app.config["LAST_BUCKET_NAME"] = bucket_name
filename = secure_filename(file.filename)
filepath = upload_dir / filename

# Log file details
current_app.logger.info(f"Processing file: {filename}")
current_app.logger.info(
f"File size: {len(file.read())} bytes")
file.seek(0) # Reset file pointer

file.save(filepath)

# Process document
doc_service = DocumentService()
chunks = doc_service.process_document(filepath)

if not chunks:
current_app.logger.error(
f"No content extracted from {filename}")
flash("No text content found in document", "error")
return redirect(request.url)

# Create corpus and import chunks
config = RagConfig(
project_id=project_id,
bucket_name=bucket_name,
display_name=f"corpus_{filename}"
)
service = VertexService(config)
corpus = service.create_corpus()
service.import_chunks(corpus, chunks)

flash(f"Successfully processed document: {
filename}", "success")
return redirect(url_for("main.chat", corpus_name=corpus.name, project_id=project_id))

except Exception as e:
current_app.logger.error(
f"Upload processing error: {str(e)}")
flash(f"Error processing upload: {str(e)}", "error")
return redirect(request.url)
finally:
# Clean up uploaded file
if filepath.exists():
filepath.unlink()
else:
current_app.logger.error(f"Invalid file type: {file.filename}")
flash("Invalid file type", "error")
return redirect(request.url)
if not file.filename:
raise ValueError("Empty filename")

# Create temporary file
filename = secure_filename(file.filename)
upload_dir = Path(current_app.config["UPLOAD_FOLDER"])
upload_dir.mkdir(exist_ok=True)
temp_file = upload_dir / f"{upload_id}_{filename}"

# Save uploaded file temporarily
file.save(temp_file)
current_app.logger.info(f"Saved temp file: {temp_file}")

# Update status
current_app.config[f"upload_status_{upload_id}"] = {
"status": "processing",
"step": "validating",
"progress": 10
}

# Process document
doc_service = DocumentService()
chunks = doc_service.process_document(str(temp_file))

if not chunks:
raise ValueError("No content could be extracted from document")

# Return success response
return jsonify({
"status": "success",
"message": "Document processed successfully",
"redirect": url_for("main.chat")
})

except Exception as e:
current_app.logger.error(f"Unexpected error: {str(e)}")
flash("An unexpected error occurred", "error")
return redirect(request.url)
current_app.logger.error(
f"Upload failed - ID: {upload_id}", exc_info=True)
error_msg = str(
e) if current_app.debug else "Document processing failed"
return jsonify({
"status": "error",
"message": error_msg
}), 400

finally:
# Clean up temporary file
if temp_file and os.path.exists(temp_file):
os.unlink(temp_file)

# GET request handling
project_id = get_project_id()
Expand Down
2 changes: 1 addition & 1 deletion src/app/services/vertex_service.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
import uuid
from dataclasses import dataclass
from pathlib import Path
from typing import List, Dict, Any
import tempfile
import os
import uuid

from google.cloud import storage
from vertexai.preview import rag
Expand Down
35 changes: 35 additions & 0 deletions src/app/templates/upload.html
Original file line number Diff line number Diff line change
Expand Up @@ -290,6 +290,13 @@ <h3 class="text-lg leading-6 font-medium text-gray-900 text-center" id="processS
errorDiv.classList.remove('hidden');
}

// Show success message
function showSuccess(message) {
const successDiv = document.getElementById('success-message');
successDiv.textContent = message;
successDiv.classList.remove('hidden');
}

// Cancel upload
document.getElementById('cancelUpload').addEventListener('click', function() {
if (xhr && xhr.readyState !== 4) {
Expand Down Expand Up @@ -346,5 +353,33 @@ <h3 class="text-lg leading-6 font-medium text-gray-900 text-center" id="processS
console.error('Error:', error);
}
});

async function uploadFile(formData) {
try {
const response = await fetch('/upload', {
method: 'POST',
body: formData
});

if (!response.ok) {
const error = await response.json();
throw new Error(error.message || 'Upload failed');
}

const result = await response.json();

if (result.status === 'error') {
showError(result.message);
} else if (result.status === 'success') {
showSuccess(result.message);
if (result.redirect) {
window.location.href = result.redirect;
}
}
} catch (error) {
showError(`Upload failed: ${error.message}`);
console.error('Upload error:', error);
}
}
</script>
{% endblock %}

0 comments on commit d5fa361

Please sign in to comment.