diff --git a/package.json b/package.json index 9a37f9f..db884ee 100644 --- a/package.json +++ b/package.json @@ -45,6 +45,7 @@ "fix-path": "^2.1.0", "fluent-ffmpeg": "2.1.2", "js-video-url-parser": "^0.2.8", + "miniget": "^4.2.3", "mkdirp": "^0.5.1", "mobx": "^5.8.0", "mobx-react": "^5.4.3", diff --git a/src/services/playlist-scraper.ts b/src/services/playlist-scraper.ts index adb0d5a..12dda53 100644 --- a/src/services/playlist-scraper.ts +++ b/src/services/playlist-scraper.ts @@ -1,48 +1,65 @@ import * as cheerio from 'cheerio'; +import * as miniget from 'miniget'; + +const fetchWithMiniget = async (url: string) => { + return miniget(url, { + headers: { + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36', + }, + }).text(); +} const getPlaylistPageContent = async (playlistURL: string) => { - const playlistId = new URL(playlistURL).searchParams.get('list'); - const response = await fetch(`https://www.youtube.com/playlist?list=${playlistId}`); - if (!response.ok) { - switch (response.status) { + try { + const playlistId = new URL(playlistURL).searchParams.get('list'); + const response = await fetchWithMiniget(`https://www.youtube.com/playlist?list=${playlistId}`); + return response; + } catch (error /*: Miniget.MinigetError */) { + console.log('failed to fetch playlist page', error); + switch (error.statusCode) { case 403: throw new Error('Playlist is private or not accessible to the app'); case 400: - case 404: throw new Error('Invalid playlist URL'); + case 404: + throw new Error('Playlist not found'); default: throw new Error('Failed to fetch playlist page'); } } - return response.text(); } export const scrap = async (playlistURL: string) => { const data = await getPlaylistPageContent(playlistURL); - const $ = cheerio.load(data); - const ytInitialData = $('script').filter((_index, tag) => { - const html = cheerio.html(tag); - return html.includes('ytInitialData'); - })[0]; + try { + const $ = cheerio.load(data); + const ytInitialData = $('script').filter((_index, tag) => { + const html = cheerio.html(tag); + return html.includes('ytInitialData'); + })[0]; - const [jsonStr] = /{.*}/gm.exec(cheerio.html(ytInitialData)); - const info = JSON.parse(jsonStr); - const { - contents, - } = info.contents.twoColumnBrowseResultsRenderer.tabs[0].tabRenderer.content.sectionListRenderer.contents[0].itemSectionRenderer.contents[0].playlistVideoListRenderer; - const songs = contents - .filter(({ playlistVideoRenderer }) => playlistVideoRenderer) - .map(({ playlistVideoRenderer }) => ({ - id: playlistVideoRenderer.videoId, - name: playlistVideoRenderer.title.runs[0].text, - })); + const [jsonStr] = /{.*}/gm.exec(cheerio.html(ytInitialData)); + const info = JSON.parse(jsonStr); + const { + contents, + } = info.contents.twoColumnBrowseResultsRenderer.tabs[0].tabRenderer.content.sectionListRenderer.contents[0].itemSectionRenderer.contents[0].playlistVideoListRenderer; + const songs = contents + .filter(({ playlistVideoRenderer }) => playlistVideoRenderer) + .map(({ playlistVideoRenderer }) => ({ + id: playlistVideoRenderer.videoId, + name: playlistVideoRenderer.title.runs[0].text, + })); - const hasMore = contents.length > songs.length + const hasMore = contents.length > songs.length - return { - name: info.metadata.playlistMetadataRenderer.title, - playlist: songs, - hasMore, - }; + return { + name: info.metadata.playlistMetadataRenderer.title, + playlist: songs, + hasMore, + }; + } catch (err) { + console.error(err); + throw new Error('The playlist may be private, or the URL is invalid'); + } }; diff --git a/yarn.lock b/yarn.lock index 184d22c..bf81b9e 100644 --- a/yarn.lock +++ b/yarn.lock @@ -5398,6 +5398,11 @@ miniget@^4.2.2: resolved "https://registry.yarnpkg.com/miniget/-/miniget-4.2.2.tgz#db20320f265efdc4c1826a0be431d56753074475" integrity sha512-a7voNL1N5lDMxvTMExOkg+Fq89jM2vY8pAi9ZEWzZtfNmdfP6RXkvUtFnCAXoCv2T9k1v/fUJVaAEuepGcvLYA== +miniget@^4.2.3: + version "4.2.3" + resolved "https://registry.yarnpkg.com/miniget/-/miniget-4.2.3.tgz#3707a24c7c11c25d359473291638ab28aab349bd" + integrity sha512-SjbDPDICJ1zT+ZvQwK0hUcRY4wxlhhNpHL9nJOB2MEAXRGagTljsO8MEDzQMTFf0Q8g4QNi8P9lEm/g7e+qgzA== + minimatch@3.0.4: version "3.0.4" resolved "https://registry.yarnpkg.com/minimatch/-/minimatch-3.0.4.tgz#5166e286457f03306064be5497e8dbb0c3d32083"