diff --git a/README.md b/README.md index e31c45a..5e5cd35 100644 --- a/README.md +++ b/README.md @@ -35,8 +35,6 @@ - [2.3.2. Development](#232-development) - [3. Usage](#3-usage) - [3.1. Development](#31-development) -- [4. Modules](#4-modules) - - [4.1. requests\_package.py](#41-requests_packagepy) - [5. Roadmap](#5-roadmap) - [6. FAQ](#6-faq) - [7. License](#7-license) @@ -89,10 +87,6 @@ To run this project, you will need to add the following environment variables to Create the virutual environment and install dependencies ```bash -python -m venv .venv - -.venv\Scripts\activate.bat - pip install flit ``` @@ -132,24 +126,6 @@ Use this space to tell a little more about your project and how it can be used. 4. Review the dependencies under `pyproject.toml` and remove as needed. 5. Remove unneeded dependencies from `src\\` -## 4. Modules - -### 4.1. requests_package.py - -Includes frequently used requests packages, functions, classes and defaults -The following functions are defined in the `req` class - -|Function Name| Purpose| -|-------------|--------| -|`randomize_header`|Randomize request headers by updating both referer and useragent| -|`change_useragent`|Change request useragent to random one| -|`change_referer`|Randomly set google.com as referer| -|`get_from_list`|Complete requests to a list of urls and return the list of responses| -|`get`|URL request with header randomization, timeout, proxy and retries builtin| -|`proxy_get_from_list`|Complete requests to a list of urls and return the list of responses using proxy ips| -|`proxy_get`|completes `get` request using proxies| -|`create_session`|Generate sessions object with adequate headers and adapters| - ## 5. Roadmap diff --git a/database.pkl b/database.pkl new file mode 100644 index 0000000..5eb3815 Binary files /dev/null and b/database.pkl differ diff --git a/pyproject.toml b/pyproject.toml index 1fe35b3..f3b2de5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -14,17 +14,17 @@ dependencies = [ "typer[all]", #cli_app.py "icecream", "slack_sdk", #Slack.py - "requests", #request_package + "ak_requests", #request_package "beautifulsoup4", #request_package "ffmpy", - "eyed3" + "eyed3", + "keyring","brotlipy" ] [project.optional-dependencies] test = [ "pytest", - "ipykernel", - "pandasgui" + "ipykernel" ] [project.urls] diff --git a/src/jiosaavn/API/SaavnMe_Parser.py b/src/jiosaavn/API/SaavnMe_Parser.py new file mode 100644 index 0000000..ccdf958 --- /dev/null +++ b/src/jiosaavn/API/SaavnMe_Parser.py @@ -0,0 +1,84 @@ +from ak_requests import RequestsSession +import brotli + +from jiosaavn.file_parser import Song +import json + +from jiosaavn.utils import log + +class SaavnMe: + BASEURL: str = 'https://saavn.me/' + SESSION = RequestsSession() + log.info('SaavnMe Instance Initialized') + + def __init__(self) -> None: + self.SESSION.MIN_REQUEST_GAP = 1.5 #To prevent Ratelimit in free API + + def __str__(self) -> str: + return 'Instance of SaavnMe class for saavn.me parser' + + def __repr__(self) -> str: + return 'SaavnMe()' + + def playlist(self, id: int|str) -> list[Song]: + """Provides a list of Song dataclass from the playlist id""" + log.info(f'Extracting Playlist Info with ID: {id}') + url: str = f'{self.BASEURL}playlists?id={id}' + _res = self.SESSION.get(url) + try: + data: dict = json.loads(_res.content)['data'] + except Exception: + data: dict = json.loads(brotli.decompress(_res.content))['data'] + + log.debug(f'Playlist: {data.get("name")}\nSongCount:{data.get("songCount")}\nFollowers:{data.get("followerCount")}\nURL:{data.get("url")}') + + return _parse_playlist_results( + song_list=data['songs'] + ) + + def song(self, url: str) -> Song: + """Returns Song dataclass from provided jiosaavn url""" + req_url: str = f'{self.BASEURL}songs?link={url}' + log.info(f'Extracting Song from URL: {url}') + data: dict = self.SESSION.get(req_url).json()['data'] + return _parse_song_dict(song_dict=data) + +def _parse_song_dict(song_dict: dict) -> Song: + # get media url + download_urls: list[dict] = song_dict['downloadUrl'] + _currentkbps: int = 0 + media_url: str = '' + + for download_url in download_urls: + __curr = int(download_url['quality'].replace('kbps','')) + if __curr > _currentkbps: + media_url = download_url['link'] + _currentkbps = __curr + + #primary_artists + primary_artists = song_dict['primaryArtists'].split(', ') + + # get image url + urls: list[dict] = song_dict['image'] + _currentkbps: int = 0 + image_url: str = '' + + for url in urls: + __curr = int(url['quality'].split('x')[0]) + if __curr > _currentkbps: + image_url = url['link'] + _currentkbps = __curr + + return Song( + song_id=song_dict['id'], + name=song_dict['name'], + album=song_dict['album']['name'], + media_url=media_url, + primary_artists=primary_artists, + artists=primary_artists, + year=int(song_dict['year']), + image_url=image_url + ) + +def _parse_playlist_results(song_list: list[dict]) -> list[Song]: + return [_parse_song_dict(song_dict) for song_dict in song_list] \ No newline at end of file diff --git a/src/jiosaavn/API/__init__.py b/src/jiosaavn/API/__init__.py new file mode 100644 index 0000000..ddad8e9 --- /dev/null +++ b/src/jiosaavn/API/__init__.py @@ -0,0 +1 @@ +from .SaavnMe_Parser import SaavnMe \ No newline at end of file diff --git a/src/jiosaavn/Slack.py b/src/jiosaavn/Slack.py index 09b0e30..7692420 100644 --- a/src/jiosaavn/Slack.py +++ b/src/jiosaavn/Slack.py @@ -1,28 +1,67 @@ -# Import WebClient from Python SDK (github.com/slackapi/python-slack-sdk) from slack_sdk import WebClient from slack_sdk.errors import SlackApiError -from jiosaavn.credentials import getpwd -class Slack_instance: +from jiosaavn.utils import getpwd +from jiosaavn.utils import log +class Blocks: + def __init__(self) -> None: + self.DIVIDER = {"type": "divider"} + + def TEXT(self, text: str, img_url: str='', img_alt: str ='') -> dict: + block = { + "type": "section", + "text": { "type": "mrkdwn", "text": text} + } + + if img_url != '': + block["accessory"] = { + "type": "image", + "image_url": img_url, + "alt_text": img_alt + } + return block + + def IMAGE(self, img_url: str, alt_txt: str='', text: str='') -> dict: + block: dict = { + "type": "image", + "image_url": img_url, + "alt_text": alt_txt + } + + if text != '': + block['title'] = { + "type": "plain_text", + "text": text, + "emoji": True + } + return block + + def HEADER(self, text: str) -> dict: + return { + "type": "header", + "text": { + "type": "plain_text", + "text": text, + "emoji": True + } + } + +class Slack: def __init__(self): - #GetSlack self.client = WebClient(token=getpwd('Slack-pythonbot', 'token')) + self.BLOCK = Blocks() + log.info('Slack Instance Initialized') return - - # Destructor - def __del__(self): - return - - def do_actions(self): - return 0 + + def __str__(self) -> str: + return "SlackAPI Instance" + + def __repr__(self) -> str: + return "Slack()" def channel_id(self, channel_name:str) -> str: """Returns channel id for the specified channel name - Args: - channel_name (str): Name of the slack channel - Returns: - str: Channel ID """ return getpwd('Slack-pythonbot', channel_name) @@ -30,67 +69,33 @@ def init_block(self): self.block = [] return - def msg(self, message:str, channel:str="python"): + def msg(self, message:str, channel:str="python") -> int: """Sends Slack message - Args: - message (str): Message to be sent - channel (str, optional): Slack channel to send the message to. Defaults to "#python". """ err = 0 try: _ = self.client.chat_postMessage( channel=self.channel_id(channel), text=message) + log.debug('Slack message sent') except SlackApiError as e: # You will get a SlackApiError if "ok" is False - print(f'NG - Slack message not sent: {str(e)}') + log.error(f'NG - Slack message not sent: {str(e)}') err = 1 return err - - def add_text(self, text, image_url=None,image_alt_text=""): - """Adds markdown element to block message - Args: - text (str): Text to display - image_url (str, optional): Image to display. Defaults to None. - image_alt_text (str, optional): Alt string for image. Defaults to "". - """ - if not image_url: - self.block.append( - { - "type": "section", - "text": { "type": "mrkdwn", "text": text} - }) - else: - self.block.append( - { - "type": "section", - "text": {"type": "mrkdwn", "text": text}, - "accessory": { - "type": "image", - "image_url": image_url, - "alt_text": image_alt_text - } - }) - - return - - def add_divider(self): - """Adds divider to the block message - """ - self.block.append({"type": "divider"}) - return - - def post_block(self, channel): + def post_block(self, channel: str, blocks: list[dict]): """Posts the currently constructed block to slack chat Args: channel (str): Channel name """ err = 0 try: - response = self.client.chat_postMessage(channel=self.channel_id(channel),blocks=self.block) + _ = self.client.chat_postMessage(channel=self.channel_id(channel), + blocks=blocks) + log.debug('Slack block sent successfully') except SlackApiError as e: # You will get a SlackApiError if "ok" is False - print(f'NG - Slack message not sent: {str(e)}') + log.error(f'NG - Slack message not sent: {str(e)}') err = 1 return err \ No newline at end of file diff --git a/src/jiosaavn/__init__.py b/src/jiosaavn/__init__.py index 64e3bb8..14fb167 100644 --- a/src/jiosaavn/__init__.py +++ b/src/jiosaavn/__init__.py @@ -1,4 +1,19 @@ "Placeholder module info" __version__ = "0.0.1" -from jiosaavn.debugger import * \ No newline at end of file +from jiosaavn.utils import log, ic +from jiosaavn.main import JiosaavnDownload + + +log.info('Jiosaavn Module Initialized') + +ic.disable() +ic.enable() # Comment this line out to enable debugger + + +if ic.enabled: + log.setLevel(10) #debug +else: + log.setLevel(20) #info + +log.debug(f'Icecream Debugger: {ic.enabled}') \ No newline at end of file diff --git a/src/jiosaavn/api_parser.py b/src/jiosaavn/api_parser.py index 7017562..3050775 100644 --- a/src/jiosaavn/api_parser.py +++ b/src/jiosaavn/api_parser.py @@ -1,10 +1,13 @@ from dataclasses import dataclass from jiosaavn.file_parser import Song -from jiosaavn.debugger import ic -from jiosaavn.request_package import Req +from pathlib import Path +from ak_requests import RequestsSession +from . import log, ic +ic.configureOutput(prefix=f'{Path(__file__).name} -> ') + class SaavnAPI: - session = Req() + session = RequestsSession() def __init__(self, baseurl: str, port: int = 80): self.baseurl = baseurl self.port = port @@ -20,14 +23,14 @@ def song(self, url: str) -> Song: data = self.session.get(f"{self.url}/song/?query={url}").json() return _song_from_json(data) - def playlist(self, url: str) -> tuple[Song]: + def playlist(self, url: str) -> list[Song]: data = self.session.get(f"{self.url}/result/?query={url}").json() - return (_song_from_json(song) for song in data.get('songs')) + return [_song_from_json(song) for song in data.get('songs')] def _song_from_json(data: dict) -> Song: - if type(data.get('artistMap')) == dict: - artists = list(data.get('artistMap').keys()) + if isinstance(_artist_map:=data.get('artistMap'), dict): + artists = list(_artist_map.keys()) else: artists = [] @@ -39,5 +42,5 @@ def _song_from_json(data: dict) -> Song: primary_artists= data['primary_artists'].split(', '), artists= artists, year = int(data.get('year', 0)), - image_url= data.get('image') + image_url= data.get('image') # type: ignore ) \ No newline at end of file diff --git a/src/jiosaavn/cli_app.py b/src/jiosaavn/cli_app.py index c26bf05..ca6a3ea 100644 --- a/src/jiosaavn/cli_app.py +++ b/src/jiosaavn/cli_app.py @@ -1,10 +1,9 @@ import typer from rich import print -from typing import Optional from pathlib import Path #Default import to globally enable/disable debugger -from jiosaavn.debugger import * +from . import log, ic ic.configureOutput(prefix=f'{Path(__file__).name} -> ') app = typer.Typer() @@ -15,6 +14,7 @@ def template_fn( template_bool: bool = False): """This is a sample function to be executed through the cli app """ + log.info('Calling the Template Function') ic(template_bool) ic(template_str) diff --git a/src/jiosaavn/debugger.py b/src/jiosaavn/debugger.py deleted file mode 100644 index 9c573cc..0000000 --- a/src/jiosaavn/debugger.py +++ /dev/null @@ -1,4 +0,0 @@ -from icecream import ic - -ic.enable() -# ic.disable() diff --git a/src/jiosaavn/file_parser.py b/src/jiosaavn/file_parser.py index 36c6eca..3b01876 100644 --- a/src/jiosaavn/file_parser.py +++ b/src/jiosaavn/file_parser.py @@ -1,17 +1,22 @@ -import unicodedata, re, time +from ak_requests import RequestsSession +import eyed3 from ffmpy import FFmpeg + from datetime import datetime +import html from pathlib import Path +import re +import shutil +import time from typing import Optional -from jiosaavn.request_package import Req -from jiosaavn.debugger import ic -import eyed3 + +from jiosaavn.utils import log, sanitize class Song: - session = Req() + session = RequestsSession() def __init__(self, song_id: str, name: str, album: str, media_url: str, - primary_artists: list[str], - artists: list[str], year: int, image_url: str, **kwargs) -> None: + primary_artists: list[str], artists: list[str], year: int, + image_url: str, debug_mode: bool = False, **kwargs) -> None: self.id = song_id self.name = name self.album = album @@ -23,6 +28,7 @@ def __init__(self, song_id: str, name: str, album: str, media_url: str, self.artists = artists self.year = year self.image_url = image_url + self.debug_mode = debug_mode def __str__(self) -> str: return f"Song {self.name} from {self.album}" @@ -38,21 +44,22 @@ def __enter__(self): def __exit__(self, exc_type, exc_value, traceback): if exc_type is not None: - print(f"There was an {str(exc_type)} error on {self.name}({self.id}) from {self.album}.") - print(f"\nError:\n{exc_value}") - print(f"\nTraceback:\n{traceback}") + log.error(f"There was an {str(exc_type)} error on {self.name}({self.id}) from {self.album}.") + log.error(f"\nError:\n{exc_value}") + log.error(f"\nTraceback:\n{traceback}") - - def download(self, final_name: str, media_url: Optional[str] = None) -> Path: + def download(self, final_name: str|None=None, media_url: Optional[str] = None) -> Path: """Downloads the mp3 to local folder Args: - final_name (str): Name for the mp3 file + final_name (str): Name for the mp3 file, can pass `None` to autoset from metadata. media_url (str, optional): url for the mp3. If none, tries to get url from `self.media_url`. Defaults to None. Returns: Path: path of the downloaded file """ + if final_name is None: + final_name = self.filename if media_url: self.media_url = media_url else: @@ -61,105 +68,86 @@ def download(self, final_name: str, media_url: Optional[str] = None) -> Path: final_name = sanitize(final_name) if final_name.endswith('.mp3') else sanitize(final_name + '.mp3') filepath = Path(final_name) - ic(f"Downloading from {media_url} to {filepath.absolute()}") - start = time.time() - FFmpeg( - inputs={media_url:None}, - outputs={final_name: None} - ).run() + if not self.debug_mode: + log.debug(f"Downloading from {media_url} to {filepath.absolute()}") + + start = time.time() + FFmpeg( + inputs={media_url:None}, + outputs={final_name: None} + ).run() + self.download_date = datetime.now() + size = filepath.stat().st_size/(1024*1024) #In MB + time_taken = time.time() - start + log.info(f"Download completed ({size:.2f} MB) in {time_taken:.1f} sec(s) at {size/time_taken:.1f} MB/s") + else: + log.debug(f'Run Downloaded: {self.media_url} -> {self.filepath}') self.filepath = filepath - self.download_date = datetime.now() - size = filepath.stat().st_size/(1024*1024) #In MB - time_taken = time.time() - start - ic(f"Download completed ({size:.2f} MB) in {time_taken:.1f} sec(s) at {size/time_taken:.1f} MB/s") return filepath - def move(self, finalpath: Path) -> Path: - if type(finalpath) == str: - finalpath = Path(finalpath) - - ic(f"Moving {self.filepath} --> {finalpath}") + def move(self, finalpath: Path|str) -> Path: + """Move file to final destination""" + finalpath = Path(str(finalpath)) + assert self.filepath is not None if finalpath.is_dir(): finalpath = finalpath / self.filepath.name - - self.filepath.rename(finalpath) + + if not self.debug_mode: + log.debug(f"Moving {self.filepath} --> {finalpath}") + #self.filepath.rename(finalpath) + shutil.move(src=self.filepath, dst=finalpath) + else: + log.debug(f'Move {self.filepath} -> {finalpath}') self.filepath = finalpath return finalpath def embed_metadata(self) -> Path: + """Write Metadata to `mp3`""" + assert self.filepath is not None filepath = self.filepath - ic(f"Writing metadata to {filepath}") - audiofile = eyed3.load(filepath) - audiofile.initTag() - audiofile.tag.artist = ', '.join(self.primary_artists) - audiofile.tag.album = self.album - - - audiofile.tag.album_artist = "" if self.artists == [] else ', '.join(self.artists) - audiofile.tag.title = self.name - - if self.year != 0: - audiofile.tag.year = self.year - - if self.image_url: - audiofile.tag.images.set(3, self.image, "image/jpeg", u"cover") - audiofile.tag.save() - - ic(f"Metadata written for {self.name}") + if not self.debug_mode: + log.debug(f"Writing metadata to {filepath}") + audiofile = eyed3.load(filepath) + if audiofile is None: + return self.filepath + audiofile.initTag() + if audiofile.tag is None: + return self.filepath + audiofile.tag.artist = ', '.join(self.primary_artists) + audiofile.tag.album = self.album + + audiofile.tag.album_artist = "" if self.artists == [] else ', '.join(self.artists) + audiofile.tag.title = self.name + + if self.year != 0: + audiofile.tag.year = self.year + + if self.image_url: + audiofile.tag.images.set(3, self.image, "image/jpeg", u"cover") + audiofile.tag.save() + + log.info(f"Metadata written for {self.name}") + else: + log.debug(f'Embed metadata for {self.filepath}') return self.filepath @property def image(self): + """Return contents of `.image_url` url""" if self.image_url == "": - ic('`self.image_url` is ""') + log.debug('`self.image_url` is ""') return None - ic(f"Initiating requests: {self.image_url}") + log.debug(f"Initiating requests: {self.image_url}") return self.session.get(url=self.image_url, timeout=10).content - -def sanitize(filename: str) -> str: - """Return a fairly safe version of the filename. - - We don't limit ourselves to ascii, because we want to keep municipality - names, etc, but we do want to get rid of anything potentially harmful, - and make sure we do not exceed Windows filename length limits. - Hence a less safe blacklist, rather than a whitelist. - """ - blacklist = ["\\", "/", ":", "*", "?", "\"", "<", ">", "|", "\0"] - reserved = [ - "CON", "PRN", "AUX", "NUL", "COM1", "COM2", "COM3", "COM4", "COM5", - "COM6", "COM7", "COM8", "COM9", "LPT1", "LPT2", "LPT3", "LPT4", "LPT5", - "LPT6", "LPT7", "LPT8", "LPT9", - ] # Reserved words on Windows - filename = "".join(c for c in filename if c not in blacklist) - # Remove all charcters below code point 32 - filename = "".join(c for c in filename if 31 < ord(c)) - filename = unicodedata.normalize("NFKD", filename) - filename = filename.rstrip(". ") # Windows does not allow these at end - filename = filename.strip() - if all([x == "." for x in filename]): - filename = "__" + filename - if filename in reserved: - filename = "__" + filename - if len(filename) == 0: - filename = "__" - if len(filename) > 255: - parts = re.split(r"/|\\", filename)[-1].split(".") - if len(parts) > 1: - ext = "." + parts.pop() - filename = filename[:-len(ext)] - else: - ext = "" - if filename == "": - filename = "__" - if len(ext) > 254: - ext = ext[254:] - maxl = 255 - len(ext) - filename = filename[:maxl] - filename = filename + ext - # Re-check last character (if there was no extension) - filename = filename.rstrip(". ") - if len(filename) == 0: - filename = "__" - return filename + + @property + def filename(self) -> str: + song_name:str = html.unescape(self.name) + song_name = re.sub(r' ?\(From ".*"\)', '', song_name) + album_name: str = html.unescape(self.album) + if _album:=re.findall(r'\(From "(.*)"\)',album_name): + album_name = _album[0] + log.debug(f'{song_name=}, {album_name=}') + return f'{song_name}-{album_name}({self.year if self.year !=0 else ""}).mp3' \ No newline at end of file diff --git a/src/jiosaavn/main.py b/src/jiosaavn/main.py index a8dfddc..74c8b7e 100644 --- a/src/jiosaavn/main.py +++ b/src/jiosaavn/main.py @@ -1,61 +1,51 @@ -from jiosaavn.debugger import ic from jiosaavn.api_parser import SaavnAPI +from jiosaavn.API import SaavnMe from jiosaavn.file_parser import Song -from jiosaavn.Cacher import Cache +from jiosaavn.utils import Cache from pathlib import Path +from typing import Literal -class MainApp(): - def __init__(self, baseurl: str, port: str, song_urls: list[str] = [], - playlist_urls: list[str] = [], cache_filepath: str= None, - skip_downloaded: bool = True, final_location: str = None - ) -> None: - self.song_urls = song_urls - self.playlist_urls = playlist_urls - self.cache_filepath = cache_filepath - self.skip_downloaded = skip_downloaded - self.final_location = final_location - self.cache = Cache(Path(str(cache_filepath))) - self.saavn = SaavnAPI(baseurl=baseurl, port=port) +from . import log, ic +ic.configureOutput(prefix=f'{Path(__file__).name} -> ') - - def run(self) -> list[Song]: - cache_data = self.cache.data - for song in self.songlist: - try: - with song as f: - f.download(final_name=f"") - f.embed_metadata() - if self.final_location: - f.move(finalpath=Path(self.final_location)) - if self.skip_downloaded: - cache_data.append(song) - self.cache.write(cache_data) - except Exception as e: - print(str(e)) - - @property - def songlist(self) -> list[Song]: - song_list = [] - for url in self.song_urls: - try: - _song = self.saavn.song(url) - except Exception as e: - print(str(e)) - if self.skip_downloaded and _song in self.cache.data: - continue - else: - song_list.append(_song) - - for url in self.playlist_urls: - for _song in self.saavn.song(url): - if self.skip_downloaded and _song in self.cache.data: - continue - else: - song_list.append(_song) - return song_list +class JiosaavnDownload: + def __init__(self, cache_filepath: str|Path = Path('database.pkl'), final_location: Path|str = Path('.')) -> None: + self.cache_filepath: Path = Path(str(cache_filepath)) + self.cache = Cache(filepath=self.cache_filepath) + self.set_downloader() + self.final_location: Path = Path(str(final_location)) + + def set_downloader(self, downloader: SaavnMe = SaavnMe()): + self.ApiProvider = downloader - def __str__(self) -> str: - return "MainApp Class for downloading from JioSaavn" + def song(self, url: str, skip_downloaded: bool = True, debug_only: bool=False): + self._download_song(song=self.ApiProvider.song(url=url), + skip_downloaded=skip_downloaded, debug_only=debug_only) + + def _download_song(self, song: Song, skip_downloaded: bool, debug_only: bool): + with song: + if debug_only: + song.debug_mode = True + if skip_downloaded and self.check_downloaded(song): + log.debug(f'Skipping {song.name} from {song.album}, Downloaded on {song.download_date}') + return + _download_song(song=song, final_location=self.final_location) + if not debug_only: + _cache_data = self.cache.data + _cache_data.append(song) + self.cache.write(data = _cache_data) + else: + log.debug('Cache will be updated here.') + + def playlist(self, id: str|int, skip_downloaded: bool = True, debug_only: bool=False): + for song in self.ApiProvider.playlist(id=id): + self._download_song(song=song, skip_downloaded=skip_downloaded, debug_only=debug_only) + + def check_downloaded(self, song: Song) -> bool: + """Checks if the specified song has previously been downloaded""" + return song.id in [each.id for each in self.cache.data] - def __repr__(self) -> str: - return f"MainApp({self.song_urls=},{self.playlist_urls=}, {self.cache_filepath=}, {self.skip_downloaded}, {self.final_location=})" \ No newline at end of file +def _download_song(song: Song, final_location: Path|str) -> None: + song.download() + song.embed_metadata() + song.move(finalpath=final_location) \ No newline at end of file diff --git a/src/jiosaavn/request_package.py b/src/jiosaavn/request_package.py deleted file mode 100644 index 6e0e856..0000000 --- a/src/jiosaavn/request_package.py +++ /dev/null @@ -1,230 +0,0 @@ -# -import requests -from requests.adapters import HTTPAdapter, Retry - -from bs4 import BeautifulSoup -import random, time -import urllib3 -urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) -from pathlib import Path -from jiosaavn.debugger import ic - -# -DEFAULT_TIMEOUT_s = 5 #seconds - -# -class TimeoutHTTPAdapter(HTTPAdapter): - #Courtesy of https://findwork.dev/blog/advanced-usage-python-requests-timeouts-retries-hooks/ - def __init__(self, *args, **kwargs): - self.timeout = DEFAULT_TIMEOUT_s - if "timeout" in kwargs: - self.timeout = kwargs["timeout"] - del kwargs["timeout"] - super().__init__(*args, **kwargs) - - def send(self, request, **kwargs): - timeout = kwargs.get("timeout") - if timeout is None: - kwargs["timeout"] = self.timeout - return super().send(request, **kwargs) - -class Req: - def __init__(self,MIN_TIME_BET_REQ_s:float = 1): - self.MIN_TIME_BET_REQ_s = MIN_TIME_BET_REQ_s #seconds - self.DEFAULT_TIMEOUT_s = DEFAULT_TIMEOUT_s #seconds - self.last_request = time.time() - self.headers = { - 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:49.0) Gecko/20100101 Firefox/49.0', - 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', - 'Accept-Language': 'en-US,en;q=0.5', - 'Accept-Encoding': 'gzip, deflate', - 'DNT': '1', - 'Connection': 'keep-alive', - 'Upgrade-Insecure-Requests': '1' - } - - @staticmethod - def useragent_list()-> list: - return ['Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36', - 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.107 Safari/537.36', - 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36', - 'Mozilla/5.0 (iPhone; CPU iPhone OS 12_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148', - 'Mozilla/5.0 (Linux; Android 11; SM-G960U) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.72 Mobile Safari/537.36', - 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:102.0) Gecko/20100101 Firefox/102.0'] - - def default_headers(self) -> dict: - return { - 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:49.0) Gecko/20100101 Firefox/49.0', - 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', - 'Accept-Language': 'en-US,en;q=0.5', - 'Accept-Encoding': 'gzip, deflate', - 'DNT': '1', - 'Connection': 'keep-alive', - 'Upgrade-Insecure-Requests': '1' - } - - @staticmethod - def default_referers() -> list: - return ['','https://www.google.com/'] - - def _get_new_useragent(self) -> str: - if (not hasattr(self, "_list_of_useragent")) or (self._list_of_useragent == []): - self._list_of_useragent = self.useragent_list() - random.shuffle(self._list_of_useragent) - return self._list_of_useragent.pop() - - def _get_new_referer(self) -> str: - if (not hasattr(self, "_list_of_referer")) or (self._list_of_referer == []): - self._list_of_referer = self.default_referers() - random.shuffle(self._list_of_referer) - return self._list_of_referer.pop() - - - def _change_useragent(self) -> None: - """Change request useragent to random one - """ - self.headers['User-Agent'] = self._get_new_useragent() - - def _change_referer(self): - self.headers['Referer'] = self._get_new_referer() - - def get_from_list(self, url_list:list[str], randomize_useragent:bool=False, randomize_referer:bool=False, custom_headers:dict =None, allow_redirects: bool=True, verify_ssl:bool = True) -> list[requests.Response]: - """Complete requests to a list of urls and return the list of responses - """ - duplicate_list = url_list[:] - random.shuffle(duplicate_list) - - req = {} - for url in duplicate_list: - req[url] = self.get(url, randomize_useragent=randomize_useragent, randomize_referer=randomize_referer, custom_headers=custom_headers, allow_redirects=allow_redirects, verify_ssl=verify_ssl) - - return [req[url] for url in url_list] - - def get(self, url, randomize_useragent:bool=False, randomize_referer:bool=False, timeout:float =None, retry:int = 5, custom_headers:dict=None, allow_redirects:bool=True, verify_ssl:bool=True, data:dict=None) -> requests.Response: - """URL request with header randomization, timeout, and retries builtin - - Args: - url (str): URL to request - randomize_header (bool, optional): Randomize useragent and referer. Defaults to False. - timeout (float, optional): request timeout. Defaults to None. - retry(bool, optional): Number of times to retry on failure, Defaults to 5 - custom_headers(dict, optional): Custom headers, Defaults to None - - Returns: - request object: request object - """ - - if randomize_referer: - self._change_referer() - if randomize_useragent: - self._change_useragent() - - headers = custom_headers if custom_headers else self.headers - - if not timeout: - timeout = self.DEFAULT_TIMEOUT_s - - time_elapsed = time.time() - self.last_request - time.sleep(max(0, self.MIN_TIME_BET_REQ_s - time_elapsed)) - for i in range(retry): - try: - res = requests.get( - url, - headers=headers, - timeout=timeout, - allow_redirects=allow_redirects, - verify=verify_ssl) - res.raise_for_status() - break - except Exception as e: - time.sleep(0.5 * (2 ** (i))) - res = None - return res - - def create_session(self, retry:int = 5) -> requests.Session: - """Generate sessions object with adequate headers and adapters - - Args: - retry (int, optional): Number of times to retry on failed request. Defaults to 5. - - Returns: - sessions obj: sessions object - """ - s = requests.Session() - s.headers = self.headers - retries = Retry(total=retry, - backoff_factor=0.5, - status_forcelist=[429, 500, 502, 503, 504], - method_whitelist=["HEAD", "GET", "OPTIONS"] - ) - s.mount('http://', TimeoutHTTPAdapter(max_retries=retries)) - s.mount('https://', TimeoutHTTPAdapter(max_retries=retries)) - self.session = s - return s - - def session_get(self, url: str, custom_headers: dict = None, data:dict = None) -> requests.Response: - if custom_headers: - headers = custom_headers - else: - headers = self.headers - - return self.session.get(url, data=data, headers=headers) - - def session_get_from_list(self, url_list:list[str], data:dict=None, custom_headers:dict=None) -> list[requests.Response]: - duplicate_list = url_list[:] - random.shuffle(duplicate_list) - - req = {} - for url in duplicate_list: - req[url] = self.session_get(url, custom_headers=custom_headers, data=data) - - return [req[url] for url in url_list] - - def __repr__(self) -> str: - return f"req(MIN_TIME_BET_REQ_s={self.MIN_TIME_BET_REQ_s})" - - def __str__(self) -> str: - class_def = f""" - Requests Class - Min time between requests : {self.MIN_TIME_BET_REQ_s:.2f}s - Default Timeout : {self.DEFAULT_TIMEOUT_s:.2f}s - Headers: - """ - - header_def = "" - for key, value in self.headers.items(): - header_def += f"{key}:{value}\n" - - return class_def + header_def - - def download(self, url: str, save_to_file: bool = True) -> Path: - filepath = Path(url.split('/')[-1]) - ic(f"Downloading from {url} to {filepath}") - start = time.time() - with requests.get(url, stream=True, headers=self.headers) as r: - r.raise_for_status() - with open(filepath, 'wb') as f: - for chunk in r.iter_content(chunk_size=8192): - f.write(chunk) - size = filepath.stat().st_size/(1024*1024) #In MB - time_taken = time.time() - start - ic(f"Download completed ({round(size,2)} MB) in {round(time_taken,1)} sec(s) at {round(size/time_taken, 1)} MB/s") - return filepath - -class BS: - def __init__(self) -> BeautifulSoup: - self.bs = BeautifulSoup - return None - - def get_soup(self, res: requests.Response): - return self.bs(res.text, "html.parser") - - def get_soup_list(self, res_text_list: list[str]) -> list: - return [self.get_soup(res_text) for res_text in res_text_list] - - def __repr__(self) -> str: - return "BS()" - - def __str__(self) -> str: - return "Beautifulsoup class with useful function methods" - \ No newline at end of file diff --git a/src/jiosaavn/utils/__init__.py b/src/jiosaavn/utils/__init__.py new file mode 100644 index 0000000..9e42988 --- /dev/null +++ b/src/jiosaavn/utils/__init__.py @@ -0,0 +1,4 @@ +from .logger import log, ic +from .credentials import getpwd +from .cacher import Cache +from .file import sanitize \ No newline at end of file diff --git a/src/jiosaavn/Cacher.py b/src/jiosaavn/utils/cacher.py similarity index 50% rename from src/jiosaavn/Cacher.py rename to src/jiosaavn/utils/cacher.py index 4820983..c37894b 100644 --- a/src/jiosaavn/Cacher.py +++ b/src/jiosaavn/utils/cacher.py @@ -1,11 +1,14 @@ from pathlib import Path import pickle -from jiosaavn.debugger import ic + +from jiosaavn.utils import log class Cache: - def __init__(self, filepath: str) -> None: - self.filepath = Path(filepath) + def __init__(self, filepath: str|Path) -> None: + self.filepath = Path(str(filepath)) + self.initialize() self.cache_data = None + log.info(f'Initialized Cache Object at {self.filepath}') def __str__(self) -> str: return f"Cache file for Jiosaavn downloaded from {self.filepath.name}\n\nData:\n{self.cache_data}" @@ -16,25 +19,25 @@ def __repr__(self) -> str: @property def data(self) -> list: if self.cache_data is None: - if not self.filepath.is_file(): - self.cache_data = self._write_to_pickle([]) - else: - with open(self.filepath, 'rb') as f: - self.cache_data = pickle.load(f) + with open(self.filepath, 'rb') as f: + self.cache_data = pickle.load(f) return self.cache_data - def write(self, data: list) -> list: + def write(self, data: list) -> None: """Write the `data` to cache file - - Returns: - list: `data` """ + log.debug(f'Writing cache data to {self.filepath}') self.cache_data = self._write_to_pickle(data) - return self.data + return None def _write_to_pickle(self, data: list) -> list: with open(self.filepath, 'wb') as f: pickle.dump(data, f) - ic('Cache Updated') + log.debug('Cache Updated') return data - \ No newline at end of file + + def initialize(self) -> None: + """Creates the database file if not exist""" + if not self.filepath.is_file(): + self._write_to_pickle([]) + log.info('Cache file created') \ No newline at end of file diff --git a/src/jiosaavn/credentials.py b/src/jiosaavn/utils/credentials.py similarity index 93% rename from src/jiosaavn/credentials.py rename to src/jiosaavn/utils/credentials.py index a42bc48..3ccca48 100644 --- a/src/jiosaavn/credentials.py +++ b/src/jiosaavn/utils/credentials.py @@ -1,7 +1,8 @@ import sys if sys.platform=="win32": #pip install keyring - import keyring, getpass + import keyring + import getpass else: import getpass diff --git a/src/jiosaavn/utils/file.py b/src/jiosaavn/utils/file.py new file mode 100644 index 0000000..0637654 --- /dev/null +++ b/src/jiosaavn/utils/file.py @@ -0,0 +1,48 @@ +import unicodedata +import re + +def sanitize(filename: str) -> str: + """Return a fairly safe version of the filename. + + We don't limit ourselves to ascii, because we want to keep municipality + names, etc, but we do want to get rid of anything potentially harmful, + and make sure we do not exceed Windows filename length limits. + Hence a less safe blacklist, rather than a whitelist. + """ + blacklist = ["\\", "/", ":", "*", "?", "\"", "<", ">", "|", "\0"] + reserved = [ + "CON", "PRN", "AUX", "NUL", "COM1", "COM2", "COM3", "COM4", "COM5", + "COM6", "COM7", "COM8", "COM9", "LPT1", "LPT2", "LPT3", "LPT4", "LPT5", + "LPT6", "LPT7", "LPT8", "LPT9", + ] # Reserved words on Windows + filename = "".join(c for c in filename if c not in blacklist) + # Remove all charcters below code point 32 + filename = "".join(c for c in filename if 31 < ord(c)) + filename = unicodedata.normalize("NFKD", filename) + filename = filename.rstrip(". ") # Windows does not allow these at end + filename = filename.strip() + if all([x == "." for x in filename]): + filename = "__" + filename + if filename in reserved: + filename = "__" + filename + if len(filename) == 0: + filename = "__" + if len(filename) > 255: + parts = re.split(r"/|\\", filename)[-1].split(".") + if len(parts) > 1: + ext = "." + parts.pop() + filename = filename[:-len(ext)] + else: + ext = "" + if filename == "": + filename = "__" + if len(ext) > 254: + ext = ext[254:] + maxl = 255 - len(ext) + filename = filename[:maxl] + filename = filename + ext + # Re-check last character (if there was no extension) + filename = filename.rstrip(". ") + if len(filename) == 0: + filename = "__" + return filename diff --git a/src/jiosaavn/logger.py b/src/jiosaavn/utils/logger.py similarity index 80% rename from src/jiosaavn/logger.py rename to src/jiosaavn/utils/logger.py index c24ea9e..a5d93d1 100644 --- a/src/jiosaavn/logger.py +++ b/src/jiosaavn/utils/logger.py @@ -1,6 +1,7 @@ #myLog.py import logging, getpass, time from pathlib import Path +from icecream import ic class Log(object): #class CALog(logging.Logger): @@ -9,14 +10,15 @@ class Log(object): def __init__(self): user=getpass.getuser() self.logger=logging.getLogger(user) - self.logger.setLevel(logging.DEBUG) + self.logger.setLevel(logging.INFO) format='%(asctime)s-%(levelname)s: %(message)s' formatter=logging.Formatter(format, datefmt='%Y%m%d-%H%M%S') streamhandler=logging.StreamHandler() streamhandler.setFormatter(formatter) self.logger.addHandler(streamhandler) - Path('logs').mkdir(exist_ok=True) - logfile = Path('logs') / f'{user}{time.strftime("-%Y-%b")}.log' + log_dir: Path = Path(__file__).parent.parent.parent / 'logs' + log_dir.mkdir(exist_ok=True) + logfile = log_dir / f'{user}{time.strftime("-%Y-%b")}.log' filehandler=logging.FileHandler(logfile, encoding="utf-8") filehandler.setFormatter(formatter) self.logger.addHandler(filehandler) @@ -35,4 +37,6 @@ def log(self, level, msg): def setLevel(self, level): self.logger.setLevel(level) def disable(self): - logging.disable(50) \ No newline at end of file + logging.disable(50) + +log = Log() \ No newline at end of file