Skip to content

Commit

Permalink
updates
Browse files Browse the repository at this point in the history
  • Loading branch information
jackturner83 committed Mar 22, 2024
1 parent 9d3e5a7 commit 98bb930
Show file tree
Hide file tree
Showing 7 changed files with 72 additions and 3 deletions.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
44 changes: 44 additions & 0 deletions metricdata/graph.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
import os
import pandas as pd
import matplotlib.pyplot as plt

# Function to create line graph and save it for each file
def create_line_graph(file_path):
# Read CSV file
df = pd.read_csv(file_path, parse_dates=['Date'])

# Extract data
date = df['Date']
prediction = df['Prediction']
actual = df['Actual']

# Plot line graph
plt.figure(figsize=(10, 6))
plt.plot(date, prediction, label='Prediction', marker='o')
plt.plot(date, actual, label='Actual', marker='o')

# Add title and labels
file_name = os.path.splitext(os.path.basename(file_path))[0]
plt.title(f'{file_name} - Prediction vs Actual')
plt.xlabel('Date')
plt.ylabel('Value')

# Rotate x-axis labels for better readability
plt.xticks(rotation=45)

# Add legend
plt.legend()

# Save graph as image file
file_name = file_name + '_line_graph.png'
plt.savefig(file_name, bbox_inches='tight') # Adjusted to fit the date in the picture
plt.close()

# Directory path containing CSV files
directory_path = 'metricdata'

# Iterate over files in directory
for file in os.listdir(directory_path):
if file.endswith('.csv'):
file_path = os.path.join(directory_path, file)
create_line_graph(file_path)
Original file line number Diff line number Diff line change
@@ -1,15 +1,22 @@
# Import necessary libraries
import pandas as pd
import numpy as np
import os
from math import sqrt
from sklearn.metrics import r2_score, explained_variance_score as ev_score
import matplotlib.pyplot as plt

# Function to calculate performance metrics
def calculate_metrics(data):
# Define column names for actual and predicted values
actual_col = 'Actual'
predicted_col = 'Prediction'

# Extract actual and predicted values from the data
actual = data[actual_col]
predicted = data[predicted_col]

# Calculate various performance metrics
metrics = {}
metrics['MAE'] = mean_absolute_error(actual, predicted)
metrics['RMSE'] = root_mean_squared_error(actual, predicted)
Expand All @@ -18,51 +25,69 @@ def calculate_metrics(data):
metrics['Median_Absolute_Error'] = median_absolute_error(actual, predicted)
return metrics

# Function to calculate Mean Absolute Error (MAE)
def mean_absolute_error(actual, predicted):
return np.mean(np.abs(actual - predicted))

# Function to calculate Root Mean Squared Error (RMSE)
def root_mean_squared_error(actual, predicted):
return sqrt(mean_squared_error(actual, predicted))

# Function to calculate R-squared value
def r_squared(actual, predicted):
return r2_score(actual, predicted)

# Function to calculate Median Absolute Error
def median_absolute_error(actual, predicted):
return np.median(np.abs(actual - predicted))

# Function to calculate Mean Squared Error (MSE)
def mean_squared_error(actual, predicted):
return np.mean((actual - predicted) ** 2)

# Function to load data from CSV files in a directory
def load_data(data_dir):
data_frames = {}
for file in os.listdir(data_dir):
if file.endswith('.csv'):
# Extract ticker symbol from filename
ticker = os.path.splitext(file)[0]
# Read CSV file and store in dictionary with ticker symbol as key
data_frames[ticker] = pd.read_csv(os.path.join(data_dir, file))
return data_frames

# Function to analyze performance metrics for all files in a directory
def analyze_all_files(directory_path):
# Load data from CSV files
data_frames = load_data(directory_path)
metrics_results = {}
# Calculate metrics for each ticker
for ticker, df in data_frames.items():
metrics = calculate_metrics(df)
metrics_results[ticker] = metrics
return metrics_results

# Function to plot performance metrics for each ticker
def plot_metrics(metrics_results):
# Convert metrics results to DataFrame
metrics_df = pd.DataFrame(metrics_results).T
# Plot bar graph
metrics_df.plot(kind='bar', figsize=(12, 8))
# Add title and labels
plt.title('Performance Metrics for Each Ticker')
plt.xlabel('Ticker')
plt.ylabel('Metric Value')
plt.xticks(rotation=45)
# Show plot
plt.legend(loc='upper left', bbox_to_anchor=(1.0, 1.0))
plt.tight_layout()
plt.show()

# Example usage
directory_path = 'metdata'
# Directory path containing CSV files
directory_path = 'metricdata'
# Analyze performance metrics for all files in the directory
all_metrics = analyze_all_files(directory_path)
# Print metrics results
print(all_metrics)

# Plot performance metrics
plot_metrics(all_metrics)

0 comments on commit 98bb930

Please sign in to comment.