-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpoemBot.py
67 lines (56 loc) · 2.07 KB
/
poemBot.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
#!/usr/bin/python
# main.py
from __future__ import print_function
import subprocess
import time
import socket
import csv
import textwrap
import random
import requests
from bs4 import BeautifulSoup
# from lib.bs4 import BeautifulSoup
from markupsafe import Markup
# Load up all poems from CSV
# poem CSV is structured with the columns: number,title,author,poem,book
# goldenTreasuryPoems.csv was parsed from the text of Project Gutenberg EBook #19221
# http://www.gutenberg.org/ebooks/19221
# the poem column contains the full text of the poem with no markup, only \n
# the CSV is in PC437 encoding since the printer only supports this character set
with open("poembot_poems_2020.csv") as csvPoems:
allPoems = list(csv.reader(csvPoems, delimiter=","))
def getURL():
randPoem = random.choice(allPoems)
return randPoem[3]
# Start printing
print("Hello!")
print("Ready to print")
def getbs4Poems():
"""
scrapePoems from poems.org url given in CSV file by calling getURL() from poemBot module
Returns:
[array]: [contains poem title, author, date, poem, url]
"""
URL = str(getURL())
r = requests.get(URL)
# If this line causes an error, run 'pip install html5lib' or install html5lib
soup = BeautifulSoup(r.content, "html5lib")
# Escape HTML to make it render in flask
poem = Markup(
str(soup.find("div", attrs={"class": "poem__body px-md-4 font-serif"}))
)
title = Markup(str(soup.find("h1", attrs={"class": "card-title"}).contents[0]))
date = Markup(str(soup.find("span", attrs={"class": "dates"})))
author = Markup(str(soup.find("a", attrs={"itemprop": "author"})))
# instructions = recipeSoup.find("span", itemprop="name")
url = str(URL)
# handle poem's whose URL has changed and can't scrape poem info
if date == None:
date = " "
if author == None:
# card-subtitle
author = Markup(str(soup.find("a", attrs={"itemprop": "card-subtitle"})))
if title == None:
title = "Sorry, this poem's URL has changed"
print("author", author)
return [title, author, date, poem, url]