Skip to content

Commit

Permalink
Merge pull request #11 from th0ma7/fix-html.unescape
Browse files Browse the repository at this point in the history
zap2it: Multiple fixes and added tags
  • Loading branch information
daniel-widrick authored Jun 1, 2021
2 parents 03684f8 + f845c74 commit 008af85
Showing 1 changed file with 57 additions and 44 deletions.
101 changes: 57 additions & 44 deletions zap2it-GuideScrape.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,47 +20,45 @@

def sanitizeData(data):
#https://stackoverflow.com/questions/1091945/what-characters-do-i-need-to-escape-in-xml-documents
sData = data.replace('"','"')
sData = sData.replace("'",''')
sData = sData.replace('<','&lt;')
sData = sData.replace('<','&gt;')
sData = sData.replace('&','&amp;')
return sData;
data = data.replace('&','&amp;')
data = data.replace('"','&quot;')
data = data.replace("'",'&apos;')
data = data.replace('<','&lt;')
data = data.replace('>','&gt;')
return data;

def buildXMLChannel(channel):
xml = ""
xml = xml + ' <channel id="' + sanitizeData(channel["channelId"]) + '">' + "\n"
xml = xml + ' <display-name>' + sanitizeData(channel["channelNo"] + " " + channel["callSign"]) + '</display-name>' + "\n"
xml = xml + ' <display-name>' + sanitizeData(channel["channelNo"]) + '</display-name>' + "\n"
xml = xml + ' <display-name>' + sanitizeData(channel["callSign"]) + '</display-name>' + "\n"
xml = xml + ' </channel>' + "\n"
xml = xml + "\t" + '<channel id="' + html.unescape(channel["channelId"]) + '">' + "\n"
xml = xml + "\t\t" + '<display-name>' + html.unescape(channel["channelNo"] + " " + channel["callSign"]) + '</display-name>' + "\n"
xml = xml + "\t\t" + '<display-name>' + html.unescape(channel["channelNo"]) + '</display-name>' + "\n"
xml = xml + "\t\t" + '<display-name>' + html.unescape(channel["callSign"]) + '</display-name>' + "\n"
xml = xml + "\t\t" + '<display-name>' + html.unescape(channel["affiliateName"].title()) + '</display-name>' + "\n"
xml = xml + "\t\t" + '<icon src="http:' + channel["thumbnail"].partition('?')[0] + '" />' + "\n"
xml = xml + "\t" + '</channel>' + "\n"
return xml

def buildXMLProgram(event,channelId):
#2018-04-11T21:00:00Z
#20180408120000 +0000
xml = ""
xml = xml + ' <programme start="' + buildXMLDate(event["startTime"]) + '" '
xml = xml + 'stop="' + buildXMLDate(event["endTime"]) + '" channel="' + sanitizeData(channelId) + '">' + "\n"
xml = xml + ' <title lang="' + optLanguage + '">' + sanitizeData(event["program"]["title"]) + '</title>' + "\n"
season = "0"
episode = "0"

xml = xml + "\t" + '<programme start="' + buildXMLDate(event["startTime"]) + '" '
xml = xml + 'stop="' + buildXMLDate(event["endTime"]) + '" channel="' + html.unescape(channelId) + '">' + "\n"
xml = xml + "\t\t" + '<title lang="' + optLanguage + '">' + sanitizeData(event["program"]["title"]) + '</title>' + "\n"
if event["program"]["episodeTitle"] is not None:
xml = xml + ' <sub-title lang="' + optLanguage + '">' + sanitizeData(event["program"]["episodeTitle"]) + ' </sub-title>' + "\n"
xml = xml + "\t\t" + '<sub-title lang="' + optLanguage + '">' + sanitizeData(event["program"]["episodeTitle"]) + '</sub-title>' + "\n"
if event["program"]["shortDesc"] is None:
event["program"]["shortDesc"] = "Unavailable"
xml = xml + ' <desc lang="' + optLanguage + '">' + html.escape(event["program"]["shortDesc"]) + '</desc>' + "\n"
xml = xml + ' <length units="minutes">' + sanitizeData(event["duration"]) + '</length>' + "\n"
for category in event["filter"]:
xml = xml + ' <category>' + sanitizeData(category.replace('filter-','')) + '</category>' + "\n"
xml = xml + "\t\t" + '<desc lang="' + optLanguage + '">' + sanitizeData(event["program"]["shortDesc"]) + '</desc>' + "\n"
xml = xml + "\t\t" + '<length units="minutes">' + html.unescape(event["duration"]) + '</length>' + "\n"
if event["thumbnail"] is not None:
xml = xml + ' <thumbnail>http://zap2it.tmsimg.com/assets/' + event["thumbnail"] + '.jpg</thumbnail>' + "\n"
xml = xml + ' <icon src="http://zap2it.tmsimg.com/assets/' + event["thumbnail"] + '.jpg" />' + "\n"
if event["rating"] is not None:
xml = xml + ' <rating>' + "\n"
xml = xml + ' <value>' + event["rating"] + '</value>' + "\n"
xml = xml + ' </rating>' + "\n"
xml = xml + ' <subtitles type="teletext" />' + "\n"
season = "0"
episode = "0"
episodeid = ""
xml = xml + "\t\t" + '<thumbnail>http://zap2it.tmsimg.com/assets/' + event["thumbnail"] + '.jpg</thumbnail>' + "\n"
xml = xml + "\t\t" + '<icon src="http://zap2it.tmsimg.com/assets/' + event["thumbnail"] + '.jpg" />' + "\n"

xml = xml + "\t\t" + '<url>https://tvlistings.zap2it.com//overview.html?programSeriesId=' + event["seriesId"] + '&amp;tmsId=' + event["program"]["id"] + '</url>' + "\n"

try:
#if "season" in event:
Expand All @@ -69,26 +67,41 @@ def buildXMLProgram(event,channelId):
if event["program"]["episode"] is not None:
episode = str(event["program"]["episode"])

#if "id" in event:
if event["program"]["id"] is not None:
episodeid = str(event["program"]["id"])
except KeyError:
print("no season for:" + event["program"]["title"])

for category in event["filter"]:
xml = xml + "\t\t" + '<category lang="en">' + html.unescape(category.replace('filter-','')) + '</category>' + "\n"

#print season + "." + episode
if ((int(season) != 0) and (int(episode) != 0)):
if int(season) < 10:
season = "0" + str(season)
if int(episode) < 10:
episode = "0" + str(episode)
xml = xml + ' <episode-num system="SxxExx">S' + season + "E" + episode + "</episode-num>" + "\n"
xml = xml + ' <episode-num system="common">S' + season + "E" + episode + "</episode-num>" + "\n"

showid = event["seriesId"].replace('SH','')
episodeid = episodeid.replace('EP' + showid,'')
xml = xml + ' <episode-num system="dd_progid">EP' + sanitizeData(showid + '.' + episodeid) + '</episode-num>' + "\n"
xml = xml + "\t\t" + '<category lang="en">Series</category>' + "\n"
xml = xml + "\t\t" + '<episode-num system="common">S' + str(season).zfill(2) + "E" + str(episode).zfill(2) + "</episode-num>" + "\n"
xml = xml + "\t\t" + '<episode-num system="xmltv_ns">' + str(int(season) - 1) + "." + str(int(episode) - 1) + ".</episode-num>" + "\n"

if event["program"]["id"][-4:] == "0000":
xml = xml + "\t\t" + '<episode-num system="dd_progid">' + event["seriesId"] + '.' + event["program"]["id"][-4:] + '</episode-num>' + "\n"
else:
xml = xml + "\t\t" + '<episode-num system="dd_progid">' + event["seriesId"].replace('SH','EP') + '.' + event["program"]["id"][-4:] + '</episode-num>' + "\n"

for flag in event["flag"]:
if (flag == "New"):
xml = xml + "\t\t<new />\n"
elif (flag == "Finale"):
xml = xml + "\t\t<last-chance />\n"
elif (flag == "Premiere"):
xml = xml + "\t\t<premiere />\n"

for tag in event["tags"]:
if (tag == "CC"):
xml = xml + "\t\t" + '<subtitles type="teletext" />' + "\n"

if event["rating"] is not None:
xml = xml + "\t\t" + '<rating>' + "\n"
xml = xml + "\t\t\t" + '<value>' + event["rating"] + '</value>' + "\n"
xml = xml + "\t\t" + '</rating>' + "\n"

xml = xml + ' </programme>'+"\n"
xml = xml + "\t" + '</programme>'+"\n"
return xml

def buildXMLDate(inputDateString):
Expand Down Expand Up @@ -197,7 +210,7 @@ def buildXMLDate(inputDateString):
closestTimestamp = closestTimestamp + (60*60*3)

guideXML = '<?xml version="1.0" encoding="UTF-8"?>' + "\n"
guideXML = guideXML + '<!DOCTYPE tv SYSTEM "xmltv.dtd">' + "\n"
guideXML = guideXML + '<!DOCTYPE tv SYSTEM "xmltv.dtd">' + "\n\n"

guideXML = guideXML + '<tv source-info-url="http://tvlistings.zap2it.com/" source-info-name="zap2it.com" generator-info-name="zap2it-GuideScraping" generator-info-url="daniel@widrick.net">' + "\n"

Expand Down

0 comments on commit 008af85

Please sign in to comment.