-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathwebsite-title
executable file
·83 lines (60 loc) · 1.81 KB
/
website-title
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
#!/usr/bin/env python3
"""Retrieve a website and extract the title
Default to OpenGraph Title property, and fall back to <title>.
"""
import os
import sys
import requests
from bs4 import BeautifulSoup
# fallback url
fallback_url = 'https://github.com/malte70/scripts'
def usage(output=sys.stdout) -> None:
argv0 = os.path.basename(sys.argv[0])
print(f"Usage: {argv0} <url>", file=output)
def get_title(url: str) -> str | None:
"""Get a web site's title.
Uses og:title by default, or the classic <title> tag. Returns None if neither are found.
Args:
url (str): Website URL
Returns:
str | None: The title, or None if no title could be detected.
"""
# making requests instance
reqs = requests.get(url)
# print("get_title(): Got " + str(len(reqs.text)) + " Bytes...", file=sys.stderr)
# using the BeautifulSoup module
soup = BeautifulSoup(reqs.text, 'html.parser')
# Try to get <title>
# for title in soup.find_all('title'):
# if len(title) > 1:
# return title.get_text()
title = soup.find("title")
if title is not None and len(title.get_text()) > 1:
return title.get_text()
# Try to get og:title meta property
title = soup.find("meta", property="og:title")
if title is not None and len(title["content"]) > 1:
return title["content"]
# No title found :(
return None
if __name__ == "__main__":
if len(sys.argv) == 1:
print("Using fallback URL...")
url = fallback_url
title = get_title(url)
print(f"Website-URL: {url}")
print(f"Title: {title}")
else:
for arg in sys.argv[1:]:
if arg == "--help" or arg == "-h":
usage()
elif arg[0] == "-":
print(f"Unknown option: {arg}", file=sys.stderr)
usage(sys.stderr)
sys.exit(1)
else:
url = arg
title = get_title(url)
# print(f"Website-URL: {url}")
# print(f"Title: {title}")
print(title)