-
Notifications
You must be signed in to change notification settings - Fork 28
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
fix(scrap-youtube): bypass consent screen (#133)
- Loading branch information
Showing
3 changed files
with
51 additions
and
28 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,48 +1,65 @@ | ||
import * as cheerio from 'cheerio'; | ||
import * as miniget from 'miniget'; | ||
|
||
const fetchWithMiniget = async (url: string) => { | ||
return miniget(url, { | ||
headers: { | ||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36', | ||
}, | ||
}).text(); | ||
} | ||
|
||
const getPlaylistPageContent = async (playlistURL: string) => { | ||
const playlistId = new URL(playlistURL).searchParams.get('list'); | ||
const response = await fetch(`https://www.youtube.com/playlist?list=${playlistId}`); | ||
if (!response.ok) { | ||
switch (response.status) { | ||
try { | ||
const playlistId = new URL(playlistURL).searchParams.get('list'); | ||
const response = await fetchWithMiniget(`https://www.youtube.com/playlist?list=${playlistId}`); | ||
return response; | ||
} catch (error /*: Miniget.MinigetError */) { | ||
console.log('failed to fetch playlist page', error); | ||
switch (error.statusCode) { | ||
case 403: | ||
throw new Error('Playlist is private or not accessible to the app'); | ||
case 400: | ||
case 404: | ||
throw new Error('Invalid playlist URL'); | ||
case 404: | ||
throw new Error('Playlist not found'); | ||
default: | ||
throw new Error('Failed to fetch playlist page'); | ||
} | ||
} | ||
return response.text(); | ||
} | ||
|
||
export const scrap = async (playlistURL: string) => { | ||
const data = await getPlaylistPageContent(playlistURL); | ||
|
||
const $ = cheerio.load(data); | ||
const ytInitialData = $('script').filter((_index, tag) => { | ||
const html = cheerio.html(tag); | ||
return html.includes('ytInitialData'); | ||
})[0]; | ||
try { | ||
const $ = cheerio.load(data); | ||
const ytInitialData = $('script').filter((_index, tag) => { | ||
const html = cheerio.html(tag); | ||
return html.includes('ytInitialData'); | ||
})[0]; | ||
|
||
const [jsonStr] = /{.*}/gm.exec(cheerio.html(ytInitialData)); | ||
const info = JSON.parse(jsonStr); | ||
const { | ||
contents, | ||
} = info.contents.twoColumnBrowseResultsRenderer.tabs[0].tabRenderer.content.sectionListRenderer.contents[0].itemSectionRenderer.contents[0].playlistVideoListRenderer; | ||
const songs = contents | ||
.filter(({ playlistVideoRenderer }) => playlistVideoRenderer) | ||
.map(({ playlistVideoRenderer }) => ({ | ||
id: playlistVideoRenderer.videoId, | ||
name: playlistVideoRenderer.title.runs[0].text, | ||
})); | ||
const [jsonStr] = /{.*}/gm.exec(cheerio.html(ytInitialData)); | ||
const info = JSON.parse(jsonStr); | ||
const { | ||
contents, | ||
} = info.contents.twoColumnBrowseResultsRenderer.tabs[0].tabRenderer.content.sectionListRenderer.contents[0].itemSectionRenderer.contents[0].playlistVideoListRenderer; | ||
const songs = contents | ||
.filter(({ playlistVideoRenderer }) => playlistVideoRenderer) | ||
.map(({ playlistVideoRenderer }) => ({ | ||
id: playlistVideoRenderer.videoId, | ||
name: playlistVideoRenderer.title.runs[0].text, | ||
})); | ||
|
||
const hasMore = contents.length > songs.length | ||
const hasMore = contents.length > songs.length | ||
|
||
return { | ||
name: info.metadata.playlistMetadataRenderer.title, | ||
playlist: songs, | ||
hasMore, | ||
}; | ||
return { | ||
name: info.metadata.playlistMetadataRenderer.title, | ||
playlist: songs, | ||
hasMore, | ||
}; | ||
} catch (err) { | ||
console.error(err); | ||
throw new Error('The playlist may be private, or the URL is invalid'); | ||
} | ||
}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters