Create ecommerce

This Python data analysis project analyzes ecommerce sales to predict the yearly amount spent by customers based on features such as average session length, time on app, time on website, and length of membership. After loading and exploring the data, a linear regression model is built to examine the relationship between these features and yearly spending. The data is split into training and testing sets, and the model is trained on the training data. The model's accuracy is evaluated using metrics like Mean Absolute Error (MAE) and Root Mean Squared Error (RMSE), with residuals analyzed to assess model performance.
bamiro · Aug 29, 2024 · 148ba3d · 148ba3d
1 parent 2071d84
commit 148ba3d
Showing 1 changed file with 44 additions and 0 deletions.
diff --git a/ecommerce b/ecommerce
@@ -0,0 +1,44 @@
+# Import the libraries
+import pandas as pd
+import matplotlib.pyplot as plt
+import seaborn as sns
+from sklearn.model_selection import train_test_split
+from sklearn.linear_model import LinearRegression
+from sklearn.metrics import mean_squared_error, mean_absolute_error
+import math
+import pylab
+import scipy.stats as stats
+
+df = pd.read_csv("/Users/tikkybamiro/Documents/linear practice/ecommerce.csv")
+df.head()
+df.describe()
+
+lm = LinearRegression()
+
+X = df[['Avg. Session Length', 'Time on App', 'Time on Website', 'Length of Membership']]
+Y = df['Yearly Amount Spent']
+
+X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3, random_state=42)
+
+lm.fit(X_train, Y_train)
+
+cdf = pd.DataFrame(lm.coef_, X.columns, columns=['Coefficients'])
+cdf
+
+predictions = lm.predict(X_test)
+
+sns.scatterplot(x = predictions, y= Y_test)
+plt.xlabel("Predictions")
+
+print("Mean Absolute Error: ", mean_absolute_error(Y_test, predictions))
+print("Mean Squared Error: ", mean_squared_error(Y_test, predictions))
+print("RMSE: ", math.sqrt(mean_squared_error(Y_test, predictions)))
+
+residuals = Y_test - predictions
+
+sns.displot(residuals, bins=20, kde=True)
+plt.ylabel("Frequency")
+plt.xlabel("Residuals")
+
+stats.probplot(residuals, dist="norm", plot=pylab)
+pylab.show()