This repository has been archived by the owner on Oct 11, 2022. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 51
/
index.js
50 lines (42 loc) · 1.59 KB
/
index.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
const { parse } = require('url')
const { send } = require('micro')
const got = require('got');
const cache = require('memory-cache')
const metascraper = require('metascraper').load([
require('metascraper-author')(),
require('metascraper-date')(),
require('metascraper-description')(),
require('metascraper-image')(),
require('metascraper-logo')(),
require('metascraper-clearbit-logo')(),
require('metascraper-logo-favicon')(),
require('metascraper-publisher')(),
require('metascraper-title')(),
require('metascraper-url')(),
require('metascraper-logo-favicon')(),
require('metascraper-amazon')(),
require('metascraper-youtube')(),
require('metascraper-soundcloud')(),
require('metascraper-video-provider')()
])
const TWENTY_FOUR_HOURS = 86400000
module.exports = async (req, res) => {
res.setHeader('Access-Control-Allow-Origin', '*')
const { query: { url } } = parse(req.url, true)
if (!url) return send(res, 401, { message: 'Please supply an URL to be scraped in the url query parameter.' })
const cachedResult = cache.get(url)
if (cachedResult) return send(res, 200, cachedResult)
let statusCode, data
try {
const { body: html } = await got(url);
data = await metascraper({ url, html })
statusCode = 200
} catch (err) {
console.log(err)
statusCode = 401
data = { message: `Scraping the open graph data from "${url}" failed.`, suggestion: 'Make sure your URL is correct and the webpage has open graph data, meta tags or twitter card data.' }
}
send(res, statusCode, data)
// Cache results for 24 hours
cache.put(url, data, TWENTY_FOUR_HOURS)
}