diff --git a/.gitignore b/.gitignore index 2b2e393..8597d3a 100644 --- a/.gitignore +++ b/.gitignore @@ -20,6 +20,8 @@ mlruns mlartifacts/ mlruns/ +node_modules/ + # Distribution / packaging .Python diff --git a/Dockerfile b/Dockerfile index 76625ad..50d7b1e 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,43 +1,24 @@ -# Use official Python image as the base image +# Use the official Python image as a base image FROM python:3.10 -# Set environment variables -ENV PYTHONDONTWRITEBYTECODE 1 -ENV PYTHONUNBUFFERED 1 -ENV FLASK_APP=app - -# Copy and install system dependencies -RUN apt-get update && apt-get install -y \ - build-essential \ - libpq-dev \ - curl \ - && rm -rf /var/lib/apt/lists/* - -# Install Node.js and npm for TailwindCSS -RUN curl -fsSL https://deb.nodesource.com/setup_18.x | bash - && \ - apt-get install -y nodejs && \ - apt-get clean && rm -rf /var/lib/apt/lists/* - -# Set the working directory +# Set the working directory in the container WORKDIR /app -# Copy project dependencies first to improve caching +# Copy the requirements file into the container COPY requirements.txt . -RUN pip install --no-cache-dir --upgrade pip && \ - pip install --no-cache-dir -r requirements.txt -# Copy Node dependencies and install them -COPY app/package.json . -RUN npm install +# Install the dependencies +RUN pip install --no-cache-dir -r requirements.txt -# Copy the rest of the project files -COPY app/. . +# Copy the entire application into the container +COPY . . -# Build CSS with Tailwind -RUN npm run create-css +# Set environment variables +ENV FLASK_APP=app +ENV FLASK_ENV=development -# Expose port 5000 for the Flask app +# Expose the port the app runs on EXPOSE 5000 -# Run the Flask app +# Command to run the application CMD ["flask", "run", "--host=0.0.0.0"] diff --git a/app/__init__.py b/app/__init__.py index de0472e..c703a71 100644 --- a/app/__init__.py +++ b/app/__init__.py @@ -2,7 +2,7 @@ from dotenv import load_dotenv from flask import Flask -from app import pages, transactions, data, predict +from app import pages, transactions, data, xgb_model from app.models import db from app.database import init_db_command from app.dashboard import ( @@ -35,7 +35,7 @@ def create_app(): app.register_blueprint(pages.bp) app.register_blueprint(transactions.bp) app.register_blueprint(data.bp) - app.register_blueprint(predict.bp) + app.register_blueprint(xgb_model.bp) # Initialize Dash apps and pass the `db` object for querying the database create_summary_dash_app(app, db) diff --git a/app/dashboard.py b/app/dashboard.py index f399765..0f5420b 100644 --- a/app/dashboard.py +++ b/app/dashboard.py @@ -1,7 +1,7 @@ import pandas as pd import plotly.express as px from dash import Dash, dcc, html, Input, Output -from sqlalchemy import create_engine, func +from sqlalchemy import func from app.models import Features def create_summary_dash_app(flask_app, db): @@ -101,7 +101,7 @@ def update_device_browser_analysis(_): ) device_browser_df = pd.DataFrame(device_browser_data, columns=['device_id', 'browser', 'fraud_cases']) - fig = px.bar(device_browser_df, x='device_id', y='fraud_cases', color='browser', title='Fraud by Device and Browser') + fig = px.bar(device_browser_df, x='fraud_cases', y='device_id', color='browser', title='Fraud by Device and Browser') return fig return dash_app diff --git a/app/data.py b/app/data.py index 5741ea4..5834462 100644 --- a/app/data.py +++ b/app/data.py @@ -3,31 +3,13 @@ import pandas as pd import matplotlib.pyplot as plt from dotenv import load_dotenv -from flask import Blueprint, render_template, jsonify +from flask import Blueprint, render_template from sqlalchemy import create_engine -from sklearn.model_selection import train_test_split -from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix -from sklearn.preprocessing import LabelEncoder -from imblearn.over_sampling import SMOTE -from xgboost import XGBClassifier -from sklearn import preprocessing -import mlflow -from mlflow import pyfunc -import xgboost as xgb -from dash import Dash, dcc, html, Input, Output -from flask import Blueprint -from dash import Dash -import dash_core_components as dcc -import dash_html_components as html load_dotenv() bp = Blueprint("data", __name__) -# Load the model -model = xgb.Booster() -model = model.load_model("model.xgb") - @bp.route("/data") def data(): # Retrieve database URL from environment variables @@ -59,56 +41,3 @@ def data(): features = df.to_dict(orient='records') return render_template('data/data.html', features=features) - -@bp.route('/result') -def result(): - database_url = os.getenv('DATABASE_URL') - if not database_url: - raise RuntimeError("DATABASE_URL environment variable not set.") - - engine = create_engine(database_url) - - # Fetch data from database - data = pd.read_sql("SELECT * FROM transactions ORDER BY ctid DESC LIMIT 1", engine) - - ID = data['user_id'] - - df = data - - # Initialize a LabelEncoder object - label_encoder = LabelEncoder() - - # Iterate through each column in the dataframe - for column in df.columns: - # Check if the column datatype is not numeric - if df[column].dtype not in ['int64', 'float64']: - # Fit label encoder and transform values - df[column] = label_encoder.fit_transform(df[column]) - - # Get the numeric columns - numeric_columns = df.select_dtypes(include=['number']).columns - - # Create the StandardScaler - transform = preprocessing.StandardScaler() - - # Fit the scaler on the data (calculate mean and standard deviation) - transform.fit(df[numeric_columns]) - - # Transform the data using the fitted transform and reassign it to X - df[numeric_columns] = transform.transform(df[numeric_columns]) - - df = xgb.DMatrix(df) - - prediction = None - - if model is None: - print("Model is not loaded. Please check the loading process.") - else: - prediction = model.predict(df) - - - # Make prediction - #prediction = model.predict(df) - print("Prediction: ", prediction) - - return render_template('data/result.html', prediction=prediction, ID=ID) diff --git a/app/model.xgb b/app/model.xgb deleted file mode 100644 index cb1d13e..0000000 Binary files a/app/model.xgb and /dev/null differ diff --git a/app/models.py b/app/models.py index 7530800..8de6ad7 100644 --- a/app/models.py +++ b/app/models.py @@ -33,7 +33,7 @@ class Features(db.Model): sex = db.Column(db.String) age = db.Column(db.Integer) ip_address = db.Column(db.String) - class_ = db.Column('class', db.Integer) # Fraud class: 0 = non-fraud, 1 = fraud + class_ = db.Column('class', db.Integer) lower_bound_ip_address = db.Column(db.BigInteger) upper_bound_ip_address = db.Column(db.BigInteger) country = db.Column(db.String) diff --git a/app/predict.py b/app/predict.py deleted file mode 100644 index 80bcf85..0000000 --- a/app/predict.py +++ /dev/null @@ -1,36 +0,0 @@ -from flask import Blueprint, render_template, send_file, request, jsonify -import matplotlib.pyplot as plt -from app.models import db, Transaction -import io -import pandas as pd -import pickle - -from datetime import datetime, timezone -import pickle -from sklearn.preprocessing import LabelEncoder -import pandas as pd - - -bp = Blueprint("predict", __name__) - - -@bp.route('/plot.png') -def plot_png(): - fig = create_figure() - output = io.BytesIO() - fig.savefig(output, format='png') - output.seek(0) - return send_file(output, mimetype='image/png') - -def create_figure(): - # Sample data - categories = ['TransactionID', 'Category', 'Amount', 'AnomalyScore', 'Timestamp'] - values = [30, 30, 15, 10, 20] - - fig, ax = plt.subplots() - ax.bar(categories, values) - ax.set_xlabel('Features') - ax.set_ylabel('Importance (%)') - ax.set_title('Feature Importance Visualization') - - return fig diff --git a/app/schema.sql b/app/schema.sql index c753413..edfcbea 100644 --- a/app/schema.sql +++ b/app/schema.sql @@ -1,17 +1,35 @@ DROP TABLE IF EXISTS transactions; CREATE TABLE transactions ( - user_id INT PRIMARY KEY, -- user_id as the unique identifier - signup_time TIMESTAMP NOT NULL, -- Signup time as timestamp - purchase_time TIMESTAMP NOT NULL, -- Purchase time as timestamp - purchase_value FLOAT NOT NULL, -- Purchase value as a float - device_id VARCHAR NOT NULL, -- Device ID as a string - source VARCHAR NOT NULL, -- Source as a string (e.g., app, web) - browser VARCHAR NOT NULL, -- Browser as a string - sex VARCHAR NOT NULL, -- Sex as a string - age INT NOT NULL, -- Age as integer - ip_address FLOAT NOT NULL, -- IP address as a float - lower_bound_ip_address FLOAT NOT NULL, -- Lower bound of IP address - upper_bound_ip_address FLOAT NOT NULL, -- Upper bound of IP address - country VARCHAR NOT NULL -- Country as a string + user_id INT PRIMARY KEY, + signup_time TIMESTAMP NOT NULL, + purchase_time TIMESTAMP NOT NULL, + purchase_value FLOAT NOT NULL, + device_id VARCHAR NOT NULL, + source VARCHAR NOT NULL, + browser VARCHAR NOT NULL, + sex VARCHAR NOT NULL, + age INT NOT NULL, + ip_address FLOAT NOT NULL, + lower_bound_ip_address FLOAT NOT NULL, + upper_bound_ip_address FLOAT NOT NULL, + country VARCHAR NOT NULL + +DROP TABLE IF EXISTS features; + +CREATE TABLE features ( + user_id SERIAL PRIMARY KEY, + signup_time TIMESTAMP, + purchase_time TIMESTAMP, + purchase_value FLOAT, + device_id VARCHAR, + source VARCHAR, + browser VARCHAR, + sex VARCHAR, + age INTEGER, + ip_address VARCHAR, + class INTEGER, + lower_bound_ip_address BIGINT, + upper_bound_ip_address BIGINT, + country VARCHAR ); diff --git a/app/static/css/main.css b/app/static/css/main.css index a55d0b6..1354ed0 100644 --- a/app/static/css/main.css +++ b/app/static/css/main.css @@ -616,6 +616,11 @@ video { position: relative; } +.inset-x-0 { + left: 0px; + right: 0px; +} + .start-0 { inset-inline-start: 0px; } @@ -628,6 +633,14 @@ video { top: 0.75rem; } +.bottom-0 { + bottom: 0px; +} + +.end-0 { + inset-inline-end: 0px; +} + .-z-10 { z-index: -10; } @@ -636,6 +649,10 @@ video { z-index: 0; } +.z-10 { + z-index: 10; +} + .z-20 { z-index: 20; } @@ -670,6 +687,10 @@ video { margin-bottom: 1.5rem; } +.ms-3 { + margin-inline-start: 0.75rem; +} + .mt-4 { margin-top: 1rem; } @@ -702,36 +723,40 @@ video { height: 2.5rem; } +.h-2\.5 { + height: 0.625rem; +} + .h-5 { height: 1.25rem; } -.h-32 { - height: 8rem; +.h-full { + height: 100%; } -.h-8 { - height: 2rem; +.h-screen { + height: 100vh; } .w-10 { width: 2.5rem; } -.w-5 { - width: 1.25rem; +.w-2\.5 { + width: 0.625rem; } -.w-full { - width: 100%; +.w-44 { + width: 11rem; } -.w-32 { - width: 8rem; +.w-5 { + width: 1.25rem; } -.w-8 { - width: 2rem; +.w-full { + width: 100%; } .min-w-full { @@ -815,6 +840,11 @@ video { border-bottom-width: calc(1px * var(--tw-divide-y-reverse)); } +.divide-gray-100 > :not([hidden]) ~ :not([hidden]) { + --tw-divide-opacity: 1; + border-color: rgb(243 244 246 / var(--tw-divide-opacity)); +} + .divide-gray-200 > :not([hidden]) ~ :not([hidden]) { --tw-divide-opacity: 1; border-color: rgb(229 231 235 / var(--tw-divide-opacity)); @@ -825,10 +855,6 @@ video { border-color: rgb(55 65 81 / var(--tw-divide-opacity)); } -.self-center { - align-self: center; -} - .overflow-hidden { overflow: hidden; } @@ -837,10 +863,6 @@ video { overflow-x: auto; } -.whitespace-nowrap { - white-space: nowrap; -} - .rounded { border-radius: 0.25rem; } @@ -913,6 +935,11 @@ video { background-color: rgb(17 45 50 / var(--tw-bg-opacity)); } +.bg-dg-dark-grey { + --tw-bg-opacity: 1; + background-color: rgb(79 74 65 / var(--tw-bg-opacity)); +} + .bg-dg-light-blue { --tw-bg-opacity: 1; background-color: rgb(37 78 88 / var(--tw-bg-opacity)); @@ -937,11 +964,6 @@ video { background-color: rgb(255 255 255 / var(--tw-bg-opacity)); } -.bg-dg-dark-grey { - --tw-bg-opacity: 1; - background-color: rgb(79 74 65 / var(--tw-bg-opacity)); -} - .p-10 { padding: 2.5rem; } @@ -979,6 +1001,11 @@ video { padding-right: 12rem; } +.px-5 { + padding-left: 1.25rem; + padding-right: 1.25rem; +} + .py-2 { padding-top: 0.5rem; padding-bottom: 0.5rem; @@ -1028,11 +1055,6 @@ video { font-family: Montserrat, sans-serif; } -.text-2xl { - font-size: 1.5rem; - line-height: 2rem; -} - .text-4xl { font-size: 2.25rem; line-height: 2.5rem; @@ -1069,10 +1091,6 @@ video { font-weight: 400; } -.font-semibold { - font-weight: 600; -} - .leading-none { line-height: 1; } @@ -1081,9 +1099,9 @@ video { letter-spacing: -0.025em; } -.text-blue-400 { +.text-gray-100 { --tw-text-opacity: 1; - color: rgb(96 165 250 / var(--tw-text-opacity)); + color: rgb(243 244 246 / var(--tw-text-opacity)); } .text-gray-500 { @@ -1101,11 +1119,6 @@ video { color: rgb(255 255 255 / var(--tw-text-opacity)); } -.text-gray-100 { - --tw-text-opacity: 1; - color: rgb(243 244 246 / var(--tw-text-opacity)); -} - .shadow { --tw-shadow: 0 1px 3px 0 rgb(0 0 0 / 0.1), 0 1px 2px -1px rgb(0 0 0 / 0.1); --tw-shadow-colored: 0 1px 3px 0 var(--tw-shadow-color), 0 1px 2px -1px var(--tw-shadow-color); @@ -1143,11 +1156,26 @@ video { background-color: rgb(29 78 216 / var(--tw-bg-opacity)); } +.hover\:bg-blue-800:hover { + --tw-bg-opacity: 1; + background-color: rgb(30 64 175 / var(--tw-bg-opacity)); +} + +.hover\:bg-dg-light-blue:hover { + --tw-bg-opacity: 1; + background-color: rgb(37 78 88 / var(--tw-bg-opacity)); +} + .hover\:bg-gray-100:hover { --tw-bg-opacity: 1; background-color: rgb(243 244 246 / var(--tw-bg-opacity)); } +.hover\:bg-dg-dark-grey:hover { + --tw-bg-opacity: 1; + background-color: rgb(79 74 65 / var(--tw-bg-opacity)); +} + .hover\:underline:hover { text-decoration-line: underline; } diff --git a/app/templates/_footer.html b/app/templates/_footer.html index 51fb527..56ec08c 100644 --- a/app/templates/_footer.html +++ b/app/templates/_footer.html @@ -1,6 +1,6 @@ -