-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathloadData.py
89 lines (73 loc) · 2.99 KB
/
loadData.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
import quandl
import os
import pandas as pd
import pickle
import bs4 as bs
import requests
#To get API key create an account on Quandl website.Obtaining API Key lets you make more than 50 API calls in a day
quandl.ApiConfig.api_key='wsMvHhXo7XzCEwzGp_qa'
# date format yyyy-mm-dd
startdate="2012-11-22"
enddate="2019-11-22"
def nifty_50_list():
resp = requests.get('https://en.wikipedia.org/wiki/NIFTY_50')
soup = bs.BeautifulSoup(resp.text, 'lxml')
table = soup.find('table', {'class': 'wikitable sortable'},'tbody')
tickers = []
correctedTicker = []
for row in table.findAll('tr')[1:]:
ticker = row.findAll('td')[1].text
# print(f"ticker{ticker}")
tickers.append(ticker)
with open("nifty50_list.pickle","wb") as f:
pickle.dump(tickers,f)
tickers.append('BAJAJ_AUTO.NS')#Adding it manually since ticker name obtained from Wikipedia contains a hypen whereas quandl code expects an underscore
tickers.append('MM.NS')#Adding it manually since quandl code is different than the ticker symbol obtained from Wiki which is M&M
tickers.append('NIFTY_50')#Fetching data for NIFTY50 index whose price we want to predict
tickers.remove('VEDL.NS')
tickers.remove('UPL.NS')
#tickers.remove('IBULHSGFIN')
for tick in tickers:
correctedTicker.append(tick.split('.')[0])
return correctedTicker
#function to scrap NIFTY50 list from WIKI only if not already obtained
def get_nifty50_list(scrap=False):
if scrap:
tickers=nifty_50_list()
else:
with open("nifty50_list.pickle","rb") as f:
tickers=pickle.load(f)
return tickers
#function to fetch stock prices from Quandl and then storing them to avoid making duplicate calls to Quandl API
def getStockdataFromQuandl(ticker):
quandl_code="NSE/"+ticker
try:
if not os.path.exists(f'stock_data/{ticker}.csv'):
data=quandl.get(quandl_code,start_date=startdate,end_date=enddate)
data.to_csv(f'stock_data/{ticker}.csv')
else:
print(f"stock data for {ticker} already exists")
except quandl.errors.quandl_error.NotFoundError as e:
print(ticker)
print(str(e))
# getStockdataFromQuandl('INFY')
def load():
tickers=get_nifty50_list(True)
# print(f'corrected tickers, {tickers}')
df=pd.DataFrame()
for ticker in tickers:
getStockdataFromQuandl(ticker)
try:
data=pd.read_csv(f'stock_data/{ticker}.csv')
if(ticker == "NIFTY_50"):
data.rename(columns={'Close':f"{ticker}_Close",'Shares Traded':f"{ticker}_Volume"},inplace=True)
else:
data.rename(columns={'Close':f"{ticker}_Close",'Total Trade Quantity':f"{ticker}_Volume"},inplace=True)
df=pd.concat([df,data[f'{ticker}_Volume'],data[f'{ticker}_Close']],axis=1)
except Exception as e:
print(f"couldn't find {ticker}")
print(str(e))
df.to_csv('nifty50_closingprices.csv')
df.dropna(inplace=True)
return df
load()