-
Notifications
You must be signed in to change notification settings - Fork 17
/
Copy pathloadData.py
84 lines (68 loc) · 2.81 KB
/
loadData.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
import quandl
import os
import pandas as pd
import pickle
import bs4 as bs
import requests
#To get API key create an account on Quandl website.Obtaining API Key lets you make more than 50 API calls in a day
quandl.ApiConfig.api_key='3up2RunMM6af9-CP8E6c'
startdate="2012-11-22"
enddate="2018-11-22"
def nifty_50_list():
resp = requests.get('https://en.wikipedia.org/wiki/NIFTY_50')
soup = bs.BeautifulSoup(resp.text, 'lxml')
table = soup.find('table', {'class': 'wikitable sortable'},'tbody')
tickers = []
for row in table.findAll('tr')[1:]:
ticker = row.findAll('td')[1].text
# print(f"ticker{ticker}")
tickers.append(ticker)
with open("nifty50_list.pickle","wb") as f:
pickle.dump(tickers,f)
tickers.append('BAJAJ_AUTO')#Adding it manually since ticker name obtained from Wikipedia contains a hypen whereas quandl code expects an underscore
tickers.append('MM')#Adding it manually since quandl code is different than the ticker symbol obtained from Wiki which is M&M
tickers.append('NIFTY_50')#Fetching data for NIFTY50 index whose price we want to predict
tickers.remove('VEDL')
tickers.remove('UPL')
tickers.remove('IBULHSGFIN')
return tickers
#function to scrap NIFTY50 list from WIKI only if not already obtained
def get_nifty50_list(scrap=False):
if scrap:
tickers=nifty_50_list()
else:
with open("nifty50_list.pickle","rb") as f:
tickers=pickle.load(f)
return tickers
#function to fetch stock prices from Quandl and then storing them to avoid making duplicate calls to Quandl API
def getStockdataFromQuandl(ticker):
quandl_code="NSE/"+ticker
try:
if not os.path.exists(f'stock_data/{ticker}.csv'):
data=quandl.get(quandl_code,start_date=startdate,end_date=enddate)
data.to_csv(f'stock_data/{ticker}.csv')
else:
print(f"stock data for {ticker} already exists")
except quandl.errors.quandl_error.NotFoundError as e:
print(ticker)
print(str(e))
# getStockdataFromQuandl('INFY')
def load():
tickers=get_nifty50_list(True)
df=pd.DataFrame()
for ticker in tickers:
getStockdataFromQuandl(ticker)
try:
data=pd.read_csv(f'stock_data/{ticker}.csv')
if(ticker == "NIFTY_50"):
data.rename(columns={'Close':f"{ticker}_Close",'Shares Traded':f"{ticker}_Volume"},inplace=True)
else:
data.rename(columns={'Close':f"{ticker}_Close",'Total Trade Quantity':f"{ticker}_Volume"},inplace=True)
df=pd.concat([df,data[f'{ticker}_Volume'],data[f'{ticker}_Close']],axis=1)
except Exception as e:
print(f"couldn't find {ticker}")
print(str(e))
df.to_csv('nifty50_closingprices.csv')
df.dropna(inplace=True)
return df
# load()