Skip to content

Commit

Permalink
add force arg for crawler.
Browse files Browse the repository at this point in the history
  • Loading branch information
terryding77 committed Jul 30, 2016
1 parent 64c52e5 commit 762370b
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 7 deletions.
22 changes: 18 additions & 4 deletions fund_net_value_crawler.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#!/usr/bin/env python
from urllib2 import urlopen
import os
import json
import argparse

Expand All @@ -12,7 +13,13 @@ def get_data(url):
return urlopen(url).read()


def get_net_value(fund_number, begin_date='', end_date=''):
def get_net_value(fund_number, begin_date='', end_date='', force=False):
if os.path.isfile('./%s.csv' % fund_number) and not force:
with open('./%s.csv' % fund_number, 'r') as f:
data = [[t.strip() for t in l.split(',')] for l in f.readlines()]
titles = data[0]
x = sorted([dict(zip(titles, net_value)) for net_value in data[1:]], key=lambda x: x['fbrq'])
return x
url_base = 'http://stock.finance.sina.com.cn/fundInfo/api/openapi.php/CaihuiFundInfoService.getNav'
args = {
'symbol': fund_number,
Expand All @@ -32,7 +39,7 @@ def get_net_value(fund_number, begin_date='', end_date=''):
data = get_data(get_url(url_base, args)).decode('gbk')
data = json.loads(data)['result']['data']['data']
net_values += data
print("\n".join([",".join(["%s=%s" % (k, v) for k, v in net_value.items()]) for net_value in net_values]))
# print("\n".join([",".join(["%s=%s" % (k, v) for k, v in net_value.items()]) for net_value in net_values]))
titles = sorted(list(set([k for l in net_values for k in l])))
with open("./%s.csv" % fund_number, 'w') as f:
f.write(", ".join(titles))
Expand All @@ -43,7 +50,14 @@ def get_net_value(fund_number, begin_date='', end_date=''):

if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument('-n', "--fund_number", required=True, type=str)
parser.add_argument('-n', "--fund_number", type=str)
parser.add_argument("all")
args = parser.parse_args()
get_net_value(args.fund_number)
if args.all:
from funds_crawler import crawler_all_fund
funds = crawler_all_fund(force=True)
for fund in funds:
get_net_value(fund_number=fund['symbol'],force=True)
else:
get_net_value(args.fund_number, force=True)

12 changes: 9 additions & 3 deletions funds_crawler.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#!/usr/bin/env python
import sys
from urllib2 import urlopen
import os
import re
import json

Expand All @@ -20,7 +21,12 @@ def get_data(url):
return urlopen(url).read()


def crawler_all_fund():
def crawler_all_fund(force=False):
if os.path.isfile('./funds.csv') and not force:
with open('./funds.csv', 'r') as f:
data = [[t.strip() for t in l.split(',')] for l in f.readlines()]
titles = data[0]
return sorted([dict(zip(titles, fund)) for fund in data[1:]], key=lambda x: x['symbol'])
url_base = 'http://vip.stock.finance.sina.com.cn/fund_center/data/jsonp.php/IO/NetValue_Service.getNetValueOpen'
args = {
'page': 0,
Expand Down Expand Up @@ -50,12 +56,12 @@ def crawler_all_fund():
f.write(", ".join(titles))
f.write('\n')
for l in funds:
print(", ".join(["%s = %s" % (k,v) for k, v in l.items()]))
print(", ".join(["%s = %s" % (k, v) for k, v in l.items()]))
f.write(", ".join([unicode(l.get(k, "")).encode("gbk") for k in titles]))
f.write('\n')
return funds


if __name__ == '__main__':
print("start all fund crawler.")
crawler_all_fund()
crawler_all_fund(force=True)

0 comments on commit 762370b

Please sign in to comment.