diff --git a/.gitignore b/.gitignore
index 9fa4cb0..9383775 100644
--- a/.gitignore
+++ b/.gitignore
@@ -12,6 +12,7 @@ dist
dist-ssr
*.local
.env
+*.csv
# Editor directories and files
.vscode/*
diff --git a/python/data_science_demo.ipynb b/python/data_science_demo.ipynb
deleted file mode 100644
index 43317dd..0000000
--- a/python/data_science_demo.ipynb
+++ /dev/null
@@ -1,571 +0,0 @@
-{
- "cells": [
- {
- "cell_type": "code",
- "execution_count": 2,
- "metadata": {},
- "outputs": [],
- "source": [
- "# Initialize imports\n",
- "import pickle\n",
- "import pandas as pd\n",
- "from sklearn.linear_model import LogisticRegression\n",
- "from sklearn.model_selection import train_test_split\n",
- "from sklearn.metrics import accuracy_score"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 3,
- "metadata": {},
- "outputs": [],
- "source": [
- "# Load the diabetes dataset from the URL\n",
- "url = \"https://classfiles.blob.core.windows.net/edp/diabetes.csv\"\n",
- "data = pd.read_csv(url)\n",
- "\n",
- "y = data['Outcome']\n",
- "X = data[['Glucose']]\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 4,
- "metadata": {},
- "outputs": [],
- "source": [
- "# Split the data into training and testing data\n",
- "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 5,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "
LogisticRegression()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org. "
- ],
- "text/plain": [
- "LogisticRegression()"
- ]
- },
- "execution_count": 5,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "# Train the model\n",
- "model = LogisticRegression()\n",
- "model.fit(X_train, y_train)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 6,
- "metadata": {},
- "outputs": [],
- "source": [
- "# Create a prediction\n",
- "prediction = model.predict(X_test)\n",
- "\n",
- "#display(prediction)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 7,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "0.7532467532467533"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- }
- ],
- "source": [
- "# Display the accuracy of our prediction\n",
- "accuracy = accuracy_score(y_test, prediction)\n",
- "display(accuracy)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 8,
- "metadata": {},
- "outputs": [],
- "source": [
- "# Preserve regression model on disk\n",
- "filename = \"edp_logistic_regression_demo_model.pkl\"\n",
- "with open(filename, 'wb') as file:\n",
- " pickle.dump(model, file)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 10,
- "metadata": {},
- "outputs": [],
- "source": [
- "# Retrieve regression model from disk\n",
- "with open(filename, 'rb') as file:\n",
- " model = pickle.load(file)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 23,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "array([0, 1, 1, 0])"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- }
- ],
- "source": [
- "# Make a prediction with new data\n",
- "patient_data = [141, 142, 185, 100]\n",
- "patient_df = pd.DataFrame(patient_data, columns=[\"Glucose\"])\n",
- "patient_prediction = model.predict(patient_df)\n",
- "display(patient_prediction)"
- ]
- }
- ],
- "metadata": {
- "kernelspec": {
- "display_name": "Python 3",
- "language": "python",
- "name": "python3"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.12.3"
- }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}