-
Notifications
You must be signed in to change notification settings - Fork 94
/
Copy pathcrawler_allTickers.py
executable file
·57 lines (47 loc) · 1.58 KB
/
crawler_allTickers.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
#!/usr/bin/python
import requests
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np
import io
import sys
def preprocess(cap):
m = {'K': 3, 'M': 6, 'B': 9, 'T': 12}
if type(cap) == str:
cap = cap.strip().replace('$','')
if cap[-1] in m:
amount = float(cap[:-1])*float(pow(10,m[cap[-1]]))
return amount
else:
return float(cap)
else:
return np.NaN
def getTickers(percent):
tot_data = None
try:
for exchange in ["NASDAQ", "NYSE", "AMEX"]:
url="https://old.nasdaq.com/screening/companies-by-name.aspx?letter=0&exchange="+exchange.lower()+"&render=download"
s=requests.get(url).content
c=pd.read_csv(io.StringIO(s.decode('utf-8')))
# print(c)
c['Exchange'] = exchange
if tot_data is None:
tot_data = c
else:
tot_data = pd.concat([tot_data, c])
except:
print('ERROR')
pass
columns = ['Symbol', 'Name', 'Exchange', 'MarketCap']
tot_data = tot_data[columns]
tot_data = tot_data.dropna().reset_index(drop=True)
print('Number of Samples:',tot_data.shape[0])
tot_data['MarketCap'] = tot_data['MarketCap'].apply(lambda x : preprocess(x))
markets_caps = list(tot_data['MarketCap'])
tot_data[tot_data['MarketCap']<=np.percentile(markets_caps, 99.9)].shape
tot_data.to_csv('input/tickerList.csv')
def main():
arg = sys.argv[1]
s = getTickers(float(arg)) # keep the top N% market-cap companies
if __name__ == "__main__":
main()