-
Notifications
You must be signed in to change notification settings - Fork 65
/
Copy pathparsexml.py
56 lines (33 loc) · 1018 Bytes
/
parsexml.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
import csv
import requests
import xml.etree.ElementTree as ET
def loadRSS():
url = 'http://www.hindustantimes.com/rss/topnews/rssfeed.xml'
resp = requests.get(url)
with open('topnewsfeed.xml', 'wb') as f:
f.write(resp.content)
def parseXML(xmlfile):
tree = ET.parse(xmlfile)
root = tree.getroot()
newsitems = []
for item in root.findall('./channel/item'):
news = {}
for child in item:
if child.tag == '{http://search.yahoo.com/mrss/}content':
news['media'] = child.attrib['url']
else:
news[child.tag] = child.text.encode('utf8')
newsitems.append(news)
return newsitems
def savetoCSV(newsitems, filename):
fields = ['guid', 'title', 'pubDate', 'description', 'link', 'media']
with open(filename, 'w') as csvfile:
writer = csv.DictWriter(csvfile, fieldnames = fields)
writer.writeheader()
writer.writerows(newsitems)
def main():
loadRSS()
newsitems = parseXML('topnewsfeed.xml')
savetoCSV(newsitems, 'topnews.csv')
if __name__ == "__main__":
main()