Skip to content

Commit

Permalink
Correctly implemented dbs and data
Browse files Browse the repository at this point in the history
  • Loading branch information
jackturner83 committed Feb 29, 2024
1 parent a429f62 commit a7c9419
Show file tree
Hide file tree
Showing 10 changed files with 333 additions and 28 deletions.
File renamed without changes.
31 changes: 31 additions & 0 deletions data/avg_sent_score.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
import sqlite3
import pandas as pd

# Connect to the SQLite database
conn_prices = sqlite3.connect('data/stock_prices.db')

# Read daily average sentiment scores from CSV
sentiment_data = pd.read_csv('data/daily_average_sentiment_scores.csv')

# Query to fetch stock prices data from the database
query = """
SELECT "Date", "Open", "High", "Low", "Volume", "symbol"
FROM stock_prices
"""

# Fetch stock prices data into a DataFrame
stock_prices_data = pd.read_sql_query(query, conn_prices)

# Close the database connection
conn_prices.close()

# Merge sentiment data with stock prices data based on symbol and date
combined_data = pd.merge(stock_prices_data, sentiment_data, how='inner', left_on=['symbol', 'Date'], right_on=['symbol', 'date'])

# Drop the duplicate date column
combined_data.drop(columns=['date'], inplace=True)

# Save the combined data to a CSV file
combined_data.to_csv('combined_data.csv', index=False)

print("Combined data saved to combined_data.csv")
19 changes: 0 additions & 19 deletions data/combine_data.py

This file was deleted.

105 changes: 105 additions & 0 deletions data/combined_data.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
symbol,Date,Open,High,Low,Volume,avg_sentiment_score
AAPL,2024-01-30,190.94000244140625,191.8000030517578,187.47000122070312,55859400,0.1718614718614718
AAPL,2024-01-31,187.0399932861328,187.10000610351562,184.35000610351562,55467800,0.125
AAPL,2024-02-01,183.99000549316406,186.9499969482422,183.82000732421875,64885400,0.0916666666666666
AAPL,2024-02-02,179.86000061035156,187.3300018310547,179.25,102518000,0.1151515151515151
AAPL,2024-02-05,188.14999389648438,189.25,185.83999633789062,69668800,-0.1
AAPL,2024-02-06,186.86000061035156,189.30999755859375,186.77000427246094,43490800,0.053030303030303
AAPL,2024-02-07,190.63999938964844,191.0500030517578,188.61000061035156,53439000,0.2337121212121212
AAPL,2024-02-08,189.38999938964844,189.5399932861328,187.35000610351562,40962000,0.2194444444444444
AAPL,2024-02-09,188.64999389648438,189.99000549316406,188.0,45155200,-0.1333333333333333
AAPL,2024-02-12,188.4199981689453,188.6699981689453,186.7899932861328,41781900,0.0446969696969697
AAPL,2024-02-13,185.77000427246094,186.2100067138672,183.50999450683594,56529500,0.1886904761904762
AAPL,2024-02-14,185.32000732421875,185.52999877929688,182.44000244140625,54630500,0.1836363636363636
AAPL,2024-02-15,183.5500030517578,184.49000549316406,181.35000610351562,65434500,0.0249999999999999
AAPL,2024-02-16,183.4199981689453,184.85000610351562,181.6699981689453,49701400,0.2583333333333333
AAPL,2024-02-20,181.7899932861328,182.42999267578125,180.0,53665600,0.0
AAPL,2024-02-21,181.94000244140625,182.88999938964844,180.66000366210938,41529700,0.3181818181818182
AAPL,2024-02-22,183.47999572753906,184.9600067138672,182.4600067138672,52292200,-0.0666666666666666
AAPL,2024-02-23,185.00999450683594,185.0399932861328,182.22999572753906,45074500,0.2291666666666666
AAPL,2024-02-26,182.24000549316406,182.75999450683594,180.64999389648438,40867400,0.0
AAPL,2024-02-27,181.10000610351562,183.9199981689453,179.55999755859375,54318900,-0.05
AAPL,2024-02-28,182.50999450683594,183.1199951171875,180.1300048828125,48894200,-0.04
GOOGL,2024-01-30,152.8000030517578,153.6199951171875,151.19000244140625,36331800,-0.05
GOOGL,2024-01-31,143.6199951171875,144.0,139.8699951171875,71910000,0.1393939393939394
GOOGL,2024-02-01,142.1199951171875,143.05999755859375,140.7899932861328,40466500,-0.0238095238095238
GOOGL,2024-02-02,139.25999450683594,142.6199951171875,136.5,62470600,0.1960227272727272
GOOGL,2024-02-05,142.82000732421875,145.47000122070312,142.77999877929688,38505400,0.1666666666666666
GOOGL,2024-02-06,144.64999389648438,145.36000061035156,143.19000244140625,29128200,0.0
GOOGL,2024-02-07,144.75999450683594,145.6199951171875,143.92999267578125,25208900,0.109090909090909
GOOGL,2024-02-08,145.8300018310547,146.3300018310547,145.10000610351562,22563800,0.2382440476190476
GOOGL,2024-02-09,146.67999267578125,149.44000244140625,146.17999267578125,26829500,0.0
GOOGL,2024-02-12,148.4199981689453,149.33999633789062,147.3699951171875,21564100,-0.0159090909090909
GOOGL,2024-02-13,144.9199981689453,146.6699981689453,143.69000244140625,27837700,0.0681818181818181
GOOGL,2024-02-14,146.0800018310547,146.52000427246094,144.08999633789062,22704200,-0.0291666666666666
GOOGL,2024-02-15,143.13999938964844,143.52000427246094,140.4600067138672,37590700,0.0795454545454545
GOOGL,2024-02-16,142.99000549316406,143.19000244140625,140.13999938964844,31451100,0.35
GOOGL,2024-02-20,139.66000366210938,142.0800018310547,139.49000549316406,25144700,0.0
GOOGL,2024-02-21,141.4499969482422,142.69000244140625,140.67999267578125,23315700,0.0409090909090909
GOOGL,2024-02-22,144.92999267578125,145.0,142.8000030517578,27191900,0.0729365079365079
GOOGL,2024-02-23,143.6699981689453,144.67999267578125,143.42999267578125,19475800,-0.0561728395061728
GOOGL,2024-02-26,142.13999938964844,142.44000244140625,137.38999938964844,53641800,0.0
GOOGL,2024-02-27,138.02000427246094,139.25,137.08999633789062,33099200,0.05
GOOGL,2024-02-28,137.89999389648438,138.00999450683594,135.41000366210938,37278600,-0.075
AMZN,2024-01-30,160.6999969482422,161.72999572753906,158.49000549316406,45207400,0.0
AMZN,2024-01-31,157.0,159.00999450683594,154.80999755859375,50284400,0.2239583333333333
AMZN,2024-02-01,155.8699951171875,159.75999450683594,155.6199951171875,76542400,0.1805194805194805
AMZN,2024-02-02,169.19000244140625,172.5,167.3300018310547,117154900,0.0642676767676767
AMZN,2024-02-05,170.1999969482422,170.5500030517578,167.6999969482422,55081300,0.4375
AMZN,2024-02-06,169.38999938964844,170.7100067138672,167.64999389648438,42505500,0.142547928262214
AMZN,2024-02-07,169.47999572753906,170.8800048828125,168.94000244140625,47174100,0.1339285714285714
AMZN,2024-02-08,169.64999389648438,171.42999267578125,168.8800048828125,42316500,0.2380952380952381
AMZN,2024-02-09,170.89999389648438,175.0,170.5800018310547,56986000,0.2222222222222222
AMZN,2024-02-12,174.8000030517578,175.38999938964844,171.5399932861328,51050400,0.0
AMZN,2024-02-13,167.72999572753906,170.9499969482422,165.75,56345100,0.0437777777777777
AMZN,2024-02-14,169.2100067138672,171.2100067138672,168.27999877929688,42815500,0.0
AMZN,2024-02-15,170.5800018310547,171.1699981689453,167.58999633789062,49855200,0.2628968253968254
AMZN,2024-02-16,168.74000549316406,170.4199981689453,167.1699981689453,48074600,0.4166666666666667
AMZN,2024-02-20,167.8300018310547,168.7100067138672,165.74000549316406,41980300,-0.0777777777777777
AMZN,2024-02-21,168.94000244140625,170.22999572753906,167.13999938964844,44575600,0.3090909090909091
AMZN,2024-02-22,173.10000610351562,174.8000030517578,171.77000427246094,55392400,0.3767195767195767
AMZN,2024-02-23,174.27999877929688,175.75,173.6999969482422,59662900,0.2644444444444444
AMZN,2024-02-26,175.6999969482422,176.3699951171875,174.25999450683594,44368600,0.1999999999999999
AMZN,2024-02-27,174.0800018310547,174.6199951171875,172.86000061035156,31141700,0.5
AMZN,2024-02-28,172.44000244140625,174.0500030517578,172.27000427246094,28093900,0.0482142857142857
MSFT,2024-01-30,412.260009765625,413.04998779296875,406.45001220703125,33477600,-0.0166666666666666
MSFT,2024-01-31,406.9599914550781,415.32000732421875,397.2099914550781,47871100,-0.0012396694214876
MSFT,2024-02-01,401.8299865722656,408.0,401.79998779296875,30657700,0.2727272727272727
MSFT,2024-02-02,403.80999755859375,412.6499938964844,403.55999755859375,28245000,0.2
MSFT,2024-02-05,409.8999938964844,411.1600036621094,403.989990234375,25352300,0.0
MSFT,2024-02-06,405.8800048828125,407.9700012207031,402.9100036621094,18382600,0.0
MSFT,2024-02-07,407.44000244140625,414.29998779296875,407.3999938964844,22340500,0.1727272727272727
MSFT,2024-02-08,414.04998779296875,415.55999755859375,412.5299987792969,21225300,0.0698412698412698
MSFT,2024-02-09,415.25,420.82000732421875,415.0899963378906,22032800,0.1036796536796536
MSFT,2024-02-12,420.55999755859375,420.739990234375,414.75,21202900,0.2020833333333333
MSFT,2024-02-13,404.94000244140625,410.07000732421875,403.3900146484375,27824900,-0.0263636363636363
MSFT,2024-02-14,408.07000732421875,409.8399963378906,404.57000732421875,20401200,0.135
MSFT,2024-02-15,408.1400146484375,409.1300048828125,404.2900085449219,21825500,0.0189393939393939
MSFT,2024-02-16,407.9599914550781,408.2900085449219,403.44000244140625,22281100,0.0
MSFT,2024-02-20,403.239990234375,404.489990234375,398.010009765625,24307900,0.0
MSFT,2024-02-21,400.1700134277344,402.2900085449219,397.2200012207031,18631100,-0.1
MSFT,2024-02-22,410.19000244140625,412.8299865722656,408.57000732421875,27009900,0.0
MSFT,2024-02-26,411.4599914550781,412.1600036621094,407.3599853515625,16193500,-0.0467532467532467
MSFT,2024-02-27,407.989990234375,408.32000732421875,403.8500061035156,14835800,-0.0272727272727272
MSFT,2024-02-28,408.17999267578125,409.29998779296875,405.32000732421875,13159700,0.0
TSLA,2024-01-30,195.3300018310547,196.36000061035156,190.61000061035156,109982300,0.0
TSLA,2024-01-31,187.0,193.97000122070312,185.85000610351562,103221400,0.0593181818181818
TSLA,2024-02-01,188.5,189.8800048828125,184.27999877929688,91843300,0.1714285714285714
TSLA,2024-02-02,185.0399932861328,188.69000244140625,182.0,110505100,0.0317155067155067
TSLA,2024-02-05,184.25999450683594,184.67999267578125,175.00999450683594,134294400,0.1875
TSLA,2024-02-06,177.2100067138672,186.49000549316406,177.11000061035156,122676000,-0.2375
TSLA,2024-02-07,188.17999267578125,189.7899932861328,182.67999267578125,111535200,0.0083333333333333
TSLA,2024-02-08,189.0,191.6199951171875,185.5800018310547,83034000,0.2
TSLA,2024-02-09,190.17999267578125,194.1199951171875,189.47999572753906,84476300,-0.0416666666666666
TSLA,2024-02-12,192.11000061035156,194.72999572753906,187.27999877929688,95498600,0.1166666666666666
TSLA,2024-02-13,183.99000549316406,187.25999450683594,182.11000061035156,86759500,0.2
TSLA,2024-02-14,185.3000030517578,188.88999938964844,183.35000610351562,81203000,0.06
TSLA,2024-02-15,189.16000366210938,200.8800048828125,188.86000061035156,120831800,-0.0337752525252525
TSLA,2024-02-16,202.05999755859375,203.1699981689453,197.39999389648438,111173600,0.225
TSLA,2024-02-20,196.1300048828125,198.60000610351562,189.1300048828125,104545800,0.25
TSLA,2024-02-21,193.36000061035156,199.44000244140625,191.9499969482422,103844000,-0.0625
TSLA,2024-02-22,194.0,198.32000732421875,191.36000061035156,92739500,0.3125
TSLA,2024-02-23,195.30999755859375,197.57000732421875,191.5,78670300,0.0
TSLA,2024-02-26,192.2899932861328,201.77999877929688,192.0,111747100,0.0
TSLA,2024-02-27,204.0399932861328,205.60000610351562,198.25999450683594,108645400,-0.046875
TSLA,2024-02-28,200.4199981689453,205.3000030517578,198.44000244140625,99588500,0.0136363636363636
59 changes: 59 additions & 0 deletions data/combined_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
import pandas as pd
import sqlite3
from datetime import datetime

# Connect to the SQLite database
conn_prices = sqlite3.connect('data/stock_prices.db')

# Read daily average sentiment scores from CSV
sentiment_data = pd.read_csv('data/daily_average_sentiment_scores.csv')

# Convert symbol to uppercase in sentiment data
sentiment_data['symbol'] = sentiment_data['symbol'].str.upper()

# Mapping dictionary to map symbols in sentiment data to match the symbols in stock prices data
symbol_mapping = {
'AMAZON': 'AMZN',
'APPLE': 'AAPL',
'GOOGLE': 'GOOGL',
'MICROSOFT': 'MSFT',
'TESLA': 'TSLA'
}

# Map symbols in sentiment data using the mapping dictionary
sentiment_data['symbol'] = sentiment_data['symbol'].map(symbol_mapping)

# Query to fetch stock prices data from the database
query = """
SELECT "symbol", "Date", "Open", "High", "Low", "Volume"
FROM stock_prices
"""

# Fetch stock prices data into a DataFrame
stock_prices_data = pd.read_sql_query(query, conn_prices)

# Close the database connection
conn_prices.close()

# Convert 'Date' columns to consistent format
sentiment_data['date'] = pd.to_datetime(sentiment_data['date'])
stock_prices_data['Date'] = pd.to_datetime(stock_prices_data['Date'])

# Print unique symbols in both datasets
print("Unique symbols in stock prices data:", stock_prices_data['symbol'].unique())
print("Unique symbols in sentiment data:", sentiment_data['symbol'].unique())

# Print data types of the 'Date' columns in both datasets
print("Data type of 'Date' column in stock prices data:", stock_prices_data['Date'].dtype)
print("Data type of 'date' column in sentiment data:", sentiment_data['date'].dtype)

# Merge sentiment data with stock prices data based on uppercase symbol and date
combined_data = pd.merge(stock_prices_data, sentiment_data, how='inner', left_on=['symbol', 'Date'], right_on=['symbol', 'date'])

# Drop the duplicate date column
combined_data.drop(columns=['date'], inplace=True)

# Save the combined data to a CSV file
combined_data.to_csv('data/combined_data.csv', index=False)

print("\nCombined data saved to combined_data.csv")
132 changes: 132 additions & 0 deletions data/daily_average_sentiment_scores.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
symbol,date,avg_sentiment_score
amazon,2024-01-30,0.0
amazon,2024-01-31,0.22395833333333334
amazon,2024-02-01,0.1805194805194805
amazon,2024-02-02,0.06426767676767677
amazon,2024-02-03,0.3181818181818182
amazon,2024-02-04,0.18333333333333335
amazon,2024-02-05,0.4375
amazon,2024-02-06,0.142547928262214
amazon,2024-02-07,0.13392857142857142
amazon,2024-02-08,0.2380952380952381
amazon,2024-02-09,0.2222222222222222
amazon,2024-02-10,0.3
amazon,2024-02-11,0.011111111111111115
amazon,2024-02-12,0.0
amazon,2024-02-13,0.043777777777777784
amazon,2024-02-14,0.0
amazon,2024-02-15,0.2628968253968254
amazon,2024-02-16,0.4166666666666667
amazon,2024-02-17,0.28409090909090906
amazon,2024-02-19,0.1
amazon,2024-02-20,-0.07777777777777779
amazon,2024-02-21,0.3090909090909091
amazon,2024-02-22,0.37671957671957673
amazon,2024-02-23,0.2644444444444444
amazon,2024-02-25,0.0
amazon,2024-02-26,0.19999999999999998
amazon,2024-02-27,0.5
amazon,2024-02-28,0.04821428571428571
apple,2024-01-30,0.17186147186147188
apple,2024-01-31,0.125
apple,2024-02-01,0.09166666666666667
apple,2024-02-02,0.11515151515151516
apple,2024-02-03,0.26666666666666666
apple,2024-02-04,0.0
apple,2024-02-05,-0.1
apple,2024-02-06,0.05303030303030303
apple,2024-02-07,0.2337121212121212
apple,2024-02-08,0.21944444444444444
apple,2024-02-09,-0.13333333333333333
apple,2024-02-10,0.07954545454545454
apple,2024-02-12,0.0446969696969697
apple,2024-02-13,0.1886904761904762
apple,2024-02-14,0.18363636363636365
apple,2024-02-15,0.024999999999999994
apple,2024-02-16,0.25833333333333336
apple,2024-02-17,-0.04444444444444446
apple,2024-02-18,0.4166666666666667
apple,2024-02-20,0.0
apple,2024-02-21,0.3181818181818182
apple,2024-02-22,-0.06666666666666667
apple,2024-02-23,0.22916666666666666
apple,2024-02-24,0.43333333333333335
apple,2024-02-26,0.0
apple,2024-02-27,-0.05
apple,2024-02-28,-0.04
google,2024-01-30,-0.05000000000000001
google,2024-01-31,0.1393939393939394
google,2024-02-01,-0.023809523809523808
google,2024-02-02,0.19602272727272727
google,2024-02-05,0.16666666666666666
google,2024-02-06,0.0
google,2024-02-07,0.10909090909090909
google,2024-02-08,0.2382440476190476
google,2024-02-09,0.0
google,2024-02-10,0.0
google,2024-02-12,-0.015909090909090914
google,2024-02-13,0.06818181818181818
google,2024-02-14,-0.02916666666666667
google,2024-02-15,0.07954545454545454
google,2024-02-16,0.35
google,2024-02-17,0.06818181818181818
google,2024-02-20,0.0
google,2024-02-21,0.04090909090909091
google,2024-02-22,0.07293650793650794
google,2024-02-23,-0.056172839506172835
google,2024-02-24,0.0
google,2024-02-26,0.0
google,2024-02-27,0.05
google,2024-02-28,-0.075
microsoft,2024-01-30,-0.016666666666666666
microsoft,2024-01-31,-0.0012396694214876062
microsoft,2024-02-01,0.2727272727272727
microsoft,2024-02-02,0.2
microsoft,2024-02-03,0.0
microsoft,2024-02-04,0.0
microsoft,2024-02-05,0.0
microsoft,2024-02-06,0.0
microsoft,2024-02-07,0.17272727272727273
microsoft,2024-02-08,0.06984126984126984
microsoft,2024-02-09,0.10367965367965366
microsoft,2024-02-10,0.5
microsoft,2024-02-12,0.20208333333333334
microsoft,2024-02-13,-0.026363636363636367
microsoft,2024-02-14,0.135
microsoft,2024-02-15,0.018939393939393936
microsoft,2024-02-16,0.0
microsoft,2024-02-17,0.0
microsoft,2024-02-19,-0.00833333333333334
microsoft,2024-02-20,0.0
microsoft,2024-02-21,-0.1
microsoft,2024-02-22,0.0
microsoft,2024-02-26,-0.046753246753246755
microsoft,2024-02-27,-0.02727272727272727
microsoft,2024-02-28,0.0
tesla,2024-01-30,0.0
tesla,2024-01-31,0.059318181818181825
tesla,2024-02-01,0.17142857142857143
tesla,2024-02-02,0.031715506715506714
tesla,2024-02-03,0.2
tesla,2024-02-04,-0.016666666666666663
tesla,2024-02-05,0.1875
tesla,2024-02-06,-0.23750000000000004
tesla,2024-02-07,0.008333333333333333
tesla,2024-02-08,0.2
tesla,2024-02-09,-0.04166666666666664
tesla,2024-02-10,0.0
tesla,2024-02-12,0.11666666666666667
tesla,2024-02-13,0.2
tesla,2024-02-14,0.06
tesla,2024-02-15,-0.03377525252525252
tesla,2024-02-16,0.225
tesla,2024-02-19,0.0
tesla,2024-02-20,0.25
tesla,2024-02-21,-0.0625
tesla,2024-02-22,0.3125
tesla,2024-02-23,0.0
tesla,2024-02-24,0.25
tesla,2024-02-25,0.330952380952381
tesla,2024-02-26,0.0
tesla,2024-02-27,-0.046875
tesla,2024-02-28,0.013636363636363636
Binary file modified data/stock_news.db
Binary file not shown.
Binary file modified data/stock_prices.db
Binary file not shown.
4 changes: 3 additions & 1 deletion src/fetch_historical_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,9 @@ def main(tickers):

# Iterate over ticker symbols
for ticker in tickers:
# Delete existing data for the ticker
conn.execute(f"DELETE FROM stock_prices WHERE symbol = '{ticker}'")

# Fetch stock prices
stock_prices = fetch_stock_prices(ticker)

Expand All @@ -45,7 +48,6 @@ def main(tickers):
# Close the database connection
conn.close()


if __name__ == "__main__":
# Ticker symbols you want to fetch historical prices for
ticker_symbols = ['AAPL', 'GOOGL', 'AMZN', 'MSFT', 'TSLA']
Expand Down
11 changes: 3 additions & 8 deletions src/test.py → src/multiple_ticker_sent.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,16 +25,11 @@
conn = sqlite3.connect(database_path)
c = conn.cursor()

# Check if the 'articles' table exists
c.execute('''SELECT count(name) FROM sqlite_master WHERE type='table' AND name='articles' ''')
if c.fetchone()[0] == 0:
# If the 'articles' table does not exist, create it
c.execute('''CREATE TABLE articles
(symbol TEXT, title TEXT, date TEXT, sentiment_score REAL)''')
conn.commit()

# Iterate over ticker symbols
for symbol in ticker_symbols:
# Delete existing articles for the symbol
c.execute("DELETE FROM articles WHERE symbol = ?", (symbol,))

# Make a request to get everything related to a specific ticker symbol for the past month
response = api.get_everything(q=symbol, from_param=start_date_str, to=end_date_str)

Expand Down

0 comments on commit a7c9419

Please sign in to comment.