-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrexburg_pass.py
145 lines (125 loc) · 4.86 KB
/
rexburg_pass.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
import requests
from bs4 import BeautifulSoup
import re
from datetime import datetime
def fetch_html(url):
"""
Fetches the HTML content from the given URL.
"""
try:
response = requests.get(url)
response.raise_for_status()
return response.content
except requests.exceptions.RequestException as e:
print(f"Error fetching data from {url}: {e}")
return None
def parse_initial_parking_pass_info(html_content):
"""
Parses the initial HTML content to extract parking pass options.
"""
passes_info = []
soup = BeautifulSoup(html_content, 'html.parser')
try:
pass_options = soup.find('select', {'name': 'inv_id'}).find_all('option')
for option in pass_options:
if option['value']:
pass_text = option.text.strip()
coverage_dates = pass_text.split('(')[0].strip()
available = pass_text.split('(')[1].split(' ')[0].strip()
pass_id = option['value']
passes_info.append((coverage_dates, available, pass_id))
except Exception as e:
print(f'Failed to scrape City of Rexburg website, {type(e)}: {e}')
return passes_info
def fetch_pass_details(pass_id):
"""
Fetches the HTML content for a specific parking pass option.
"""
url = "https://secure.xpressbillpay.com/portal/payment_forms/?id=MzYzNQ%3D%3D"
payload = {
'inv_id': pass_id
}
try:
response = requests.post(url, data=payload)
response.raise_for_status()
return response.content
except requests.exceptions.RequestException as e:
print(f"Error fetching details for pass ID {pass_id}: {e}")
return None
def clean_html(raw_html):
"""
Removes HTML tags and special characters from the input string.
"""
clean_text = re.sub('<.*?>', '', raw_html)
clean_text = re.sub(r'\s+', ' ', clean_text)
clean_text = re.sub(r'\\xc2\\xa0', ' ', clean_text)
clean_text = re.sub(r'\\r\\n', ' ', clean_text)
return clean_text.strip()
def parse_pass_details(html_content):
"""
Parses the HTML content to extract the cost and validity period of a parking pass.
"""
soup = BeautifulSoup(html_content, 'html.parser')
cost_tag = soup.find('td', class_='instruct')
if cost_tag:
cost = cost_tag.find('strong').text.strip()
else:
cost = "Cost information not found"
# Clean the HTML content before searching for validity information
clean_content = clean_html(str(html_content))
# Define regex patterns for different validity formats
semester_pattern = re.compile(r'Valid beginning (\w+ \d{1,2}) thru (\w+ \d{1,2}), (\d{4})')
annual_pattern = re.compile(r'Valid from (\w+ \d{1,2}, \d{4}) to (\w+ \d{1,2}, \d{4})')
# Search for validity information using regex
validity_text = semester_pattern.search(clean_content)
if validity_text:
valid_from = f"{validity_text.group(1)}, {validity_text.group(3)}"
valid_to = f"{validity_text.group(2)}, {validity_text.group(3)}"
else:
validity_text = annual_pattern.search(clean_content)
if validity_text:
valid_from = validity_text.group(1).strip()
valid_to = validity_text.group(2).strip()
else:
valid_from = "Validity information not found"
valid_to = "Validity information not found"
return cost, valid_from, valid_to
def scrape_parking_pass_info():
"""
Scrapes parking pass information including cost and validity dates.
Returns a list of dictionaries containing the pass details.
"""
url = "https://secure.xpressbillpay.com/portal/payment_forms/?id=MzYzNQ%3D%3D"
html_content = fetch_html(url)
if not html_content:
return []
passes_info = parse_initial_parking_pass_info(html_content)
pass_details = []
for pass_info in passes_info:
coverage_dates, available, pass_id = pass_info
pass_details_html = fetch_pass_details(pass_id)
if not pass_details_html:
continue
cost, valid_from, valid_to = parse_pass_details(pass_details_html)
pass_details.append({
"Pass Type": coverage_dates,
'Cost': cost,
"Available": available,
"Valid From": valid_from,
"Valid To": valid_to
})
return pass_details
def print_pass_details(pass_details):
"""
Prints the parking pass details in a formatted way.
"""
for pass_detail in pass_details:
print(f"Pass Type: {pass_detail['Pass Type']}")
print(f"Cost: {pass_detail['Cost']}")
print(f"Available: {pass_detail['Available']}")
print(f"Valid From: {pass_detail['Valid From']}")
print(f"Valid Through: {pass_detail['Valid To']}")
print("")
if __name__ == "__main__":
pass_details = scrape_parking_pass_info()
print_pass_details(pass_details)