diff --git a/.gitignore b/.gitignore index 1242dbf..9625a2b 100644 --- a/.gitignore +++ b/.gitignore @@ -45,3 +45,6 @@ uploads/ # Google Cloud *.json + + +app.log \ No newline at end of file diff --git a/README.md b/README.md index 6ec0059..3883c84 100644 --- a/README.md +++ b/README.md @@ -84,12 +84,13 @@ gcloud iam service-accounts keys create credentials.json --iam-account=vertex-ra - The service account needs both Vertex AI and Storage permissions to function properly - You can use more granular permissions instead of storage.admin if needed -## Usage +## Usage 1. Start the application: ```bash flask run +# FLASK_DEBUG=1 FLASK_APP=src/app flask run --debug ``` 2. Open http://localhost:5000 in your browser diff --git a/src/app/__init__.py b/src/app/__init__.py index 8a09c29..9a49b9b 100644 --- a/src/app/__init__.py +++ b/src/app/__init__.py @@ -8,10 +8,14 @@ def create_app(config_class=Config): app = Flask(__name__) app.config.from_object(config_class) - # Configure logging + # Enhanced logging configuration logging.basicConfig( - level=logging.INFO, - format='%(asctime)s - %(name)s - %(levelname)s - %(message)s' + level=logging.DEBUG if app.debug else logging.INFO, + format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', + handlers=[ + logging.StreamHandler(), + logging.FileHandler('app.log') + ] ) app.logger.setLevel(logging.INFO) diff --git a/src/app/routes/api.py b/src/app/routes/api.py index c7c6e46..d7ca639 100644 --- a/src/app/routes/api.py +++ b/src/app/routes/api.py @@ -1,4 +1,4 @@ -from flask import Blueprint, jsonify, request +from flask import Blueprint, jsonify, request, current_app from pydantic import BaseModel from app.services.vertex_service import RagConfig, VertexService @@ -23,13 +23,13 @@ def query(): try: config = RagConfig(project_id=data.project_id) service = VertexService(config) - + response = service.query( corpus=data.corpus_name, query=data.query, top_k=data.top_k ) - + return jsonify(response) except Exception as e: return jsonify({"error": str(e)}), 500 @@ -54,7 +54,7 @@ def list_files(corpus_name: str): def delete_document(corpus_name: str): project_id = request.args.get("project_id") document_id = request.args.get("document_id") - + if not all([project_id, document_id]): return jsonify({"error": "Missing required parameters"}), 400 @@ -64,4 +64,11 @@ def delete_document(corpus_name: str): service.delete_files(corpus_name, [document_id]) return jsonify({"success": True}) except Exception as e: - return jsonify({"error": str(e)}), 500 \ No newline at end of file + return jsonify({"error": str(e)}), 500 + + +@bp.route("/upload/status", methods=["GET"]) +def upload_status(): + upload_id = request.args.get("upload_id") + status = current_app.config.get(f"upload_status_{upload_id}", {}) + return jsonify(status) diff --git a/src/app/routes/main.py b/src/app/routes/main.py index 6a4e0a1..bffdb98 100644 --- a/src/app/routes/main.py +++ b/src/app/routes/main.py @@ -1,5 +1,6 @@ +import uuid +import os from pathlib import Path - from flask import ( Blueprint, current_app, flash, redirect, render_template, request, url_for, jsonify @@ -48,86 +49,67 @@ def create_new_bucket(): def upload(): """Handle document uploads and processing.""" if request.method == "POST": + upload_id = str(uuid.uuid4()) + temp_file = None + try: - upload_dir = Path(current_app.config["UPLOAD_FOLDER"]) - upload_dir.mkdir(mode=0o755, parents=True, exist_ok=True) + current_app.logger.info( + f"Upload request received - ID: {upload_id}") + current_app.logger.debug( + f"Request headers: {dict(request.headers)}") - # Check if file was submitted if "file" not in request.files: - current_app.logger.error("No file part in request") - flash("No file part", "error") - return redirect(request.url) - - # Validate bucket name - project_id = get_project_id() - bucket_name = request.form.get("bucket_name") - if not bucket_name: - current_app.logger.error("Missing bucket name") - flash("Bucket name is required", "error") - return redirect(request.url) + raise ValueError("No file uploaded") file = request.files["file"] - if file.filename == "": - current_app.logger.error("No selected file") - flash("No selected file", "error") - return redirect(request.url) - - if file and allowed_file(file.filename): - try: - current_app.config["LAST_BUCKET_NAME"] = bucket_name - filename = secure_filename(file.filename) - filepath = upload_dir / filename - - # Log file details - current_app.logger.info(f"Processing file: {filename}") - current_app.logger.info( - f"File size: {len(file.read())} bytes") - file.seek(0) # Reset file pointer - - file.save(filepath) - - # Process document - doc_service = DocumentService() - chunks = doc_service.process_document(filepath) - - if not chunks: - current_app.logger.error( - f"No content extracted from {filename}") - flash("No text content found in document", "error") - return redirect(request.url) - - # Create corpus and import chunks - config = RagConfig( - project_id=project_id, - bucket_name=bucket_name, - display_name=f"corpus_{filename}" - ) - service = VertexService(config) - corpus = service.create_corpus() - service.import_chunks(corpus, chunks) - - flash(f"Successfully processed document: { - filename}", "success") - return redirect(url_for("main.chat", corpus_name=corpus.name, project_id=project_id)) - - except Exception as e: - current_app.logger.error( - f"Upload processing error: {str(e)}") - flash(f"Error processing upload: {str(e)}", "error") - return redirect(request.url) - finally: - # Clean up uploaded file - if filepath.exists(): - filepath.unlink() - else: - current_app.logger.error(f"Invalid file type: {file.filename}") - flash("Invalid file type", "error") - return redirect(request.url) + if not file.filename: + raise ValueError("Empty filename") + + # Create temporary file + filename = secure_filename(file.filename) + upload_dir = Path(current_app.config["UPLOAD_FOLDER"]) + upload_dir.mkdir(exist_ok=True) + temp_file = upload_dir / f"{upload_id}_{filename}" + + # Save uploaded file temporarily + file.save(temp_file) + current_app.logger.info(f"Saved temp file: {temp_file}") + + # Update status + current_app.config[f"upload_status_{upload_id}"] = { + "status": "processing", + "step": "validating", + "progress": 10 + } + + # Process document + doc_service = DocumentService() + chunks = doc_service.process_document(str(temp_file)) + + if not chunks: + raise ValueError("No content could be extracted from document") + + # Return success response + return jsonify({ + "status": "success", + "message": "Document processed successfully", + "redirect": url_for("main.chat") + }) except Exception as e: - current_app.logger.error(f"Unexpected error: {str(e)}") - flash("An unexpected error occurred", "error") - return redirect(request.url) + current_app.logger.error( + f"Upload failed - ID: {upload_id}", exc_info=True) + error_msg = str( + e) if current_app.debug else "Document processing failed" + return jsonify({ + "status": "error", + "message": error_msg + }), 400 + + finally: + # Clean up temporary file + if temp_file and os.path.exists(temp_file): + os.unlink(temp_file) # GET request handling project_id = get_project_id() diff --git a/src/app/services/vertex_service.py b/src/app/services/vertex_service.py index f18921a..7123a57 100644 --- a/src/app/services/vertex_service.py +++ b/src/app/services/vertex_service.py @@ -1,9 +1,9 @@ +import uuid from dataclasses import dataclass from pathlib import Path from typing import List, Dict, Any import tempfile import os -import uuid from google.cloud import storage from vertexai.preview import rag diff --git a/src/app/templates/upload.html b/src/app/templates/upload.html index 0302f51..c3c0f36 100644 --- a/src/app/templates/upload.html +++ b/src/app/templates/upload.html @@ -290,6 +290,13 @@