-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathbys.js
97 lines (97 loc) · 3.22 KB
/
bys.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
import { write, stdout } from "bun";
const BunYoutubeScraper = async (url) => {
if (!url) {
throw new Error("No url provided");
} else if (!url.startsWith("https://") && !url.startsWith("http://")) {
url = `https://youtube.com/watch?v=${url}`;
} else {
url.startsWith("https://") || url.startsWith("http://");
url = `${url}`;
}
if (!url.includes("youtube.com"))
return Promise.reject(new Error("Invalid url"));
// Make a request to the url
const response = await fetch(url);
// if response is not ok, throw error
if (!response.ok) throw Error("Scrape shield encountered!");
// Save content of the response in a variable.
const tags = [];
// List of tags to scrape.
const tagsList = [
'meta[name="title"]',
'meta[property="og:image"]',
'meta[name="description"]',
'meta[name="keywords"]',
'meta[itemprop="datePublished"]',
'meta[itemprop="uploadDate"]',
'meta[itemprop="videoId"]',
];
// GET the HTML of the page and push it to the tags array.
const html = new HTMLRewriter()
.on(tagsList, {
element(el) {
tags.push(el.getAttribute("content"));
},
})
.transform(response)
.text();
const title = tags[0]; // Get the title of the video with HTMLRewriter.
const keywords = tags[2]; // Get the keywords of the video with HTMLRewriter.
const description = tags[1]; // Get the description of the video with HTMLRewriter.
const videoThumbnai = tags[3]; // Get the thumbnail of the video with HTMLRewriter.
const videoId = tags[4]; // Get the videoId of the video with HTMLRewriter.
const uploadDate = tags[5]; // Get the uploadDate of the video with HTMLRewriter.
const datePublished = tags[6]; // Get the datePublished of the video with HTMLRewriter.
// Find the view counter with the result of "HTML Rewriter".
const viewCount = await html.then(
(text) => text.match(/{"viewCount":{"simpleText":"(.*?)"/g)[0].split('"')[5]
);
// Find the likes counter with the result of "HTML Rewriter".
const likesCount = await html.then(
(text) =>
text
.match(
/defaultText":{"accessibility":{"accessibilityData":{"label":"(.*?)"/g
)[0]
.split('"')[8]
);
// Find the duration of the video with the result of "HTML Rewriter".
const approxDurationMs = await html.then(
(text) =>
text
.match(/approxDurationMs":"(.*?)"/g)[0]
.split(":")[1]
.split('"')[1]
);
// Find the large description with the result of "HTML Rewriter".
const largeDescription = await html.then((text) =>
text
.match(/description":{"simpleText":"(.*?)"/g)[0]
.split('"')[4]
.replace(/\\n/g, " ")
.replace(/\\"/g, '"')
);
// Return the data in a JSON format.
const videoDataObject = {
url,
title,
videoId,
videoThumbnai,
description,
largeDescription,
keywords,
uploadDate,
datePublished,
approxDurationMs,
viewCount,
likesCount,
};
// Print the video data object to the console.
write(stdout, JSON.stringify(videoDataObject, null, 2));
// Return the scraped data.
return {
...videoDataObject,
};
};
// Any valid video Id "WzcCVPoX2wQ" or https://www.youtube.com/watch?v=WzcCVPoX2wQ
const bys = await BunYoutubeScraper("");