From 52be5a192b5310e399054aae9f974453c250e109 Mon Sep 17 00:00:00 2001 From: Felipe Adeildo Date: Mon, 2 Dec 2024 15:21:54 -0300 Subject: [PATCH] chore(lint): improve code formatting and readability across multiple files --- pysus/ftp/__init__.py | 23 ++++++++++++++++------- pysus/online_data/ESUS.py | 4 ++-- pysus/online_data/IBGE.py | 17 +++++++++-------- pysus/online_data/Infogripe.py | 2 +- pysus/preprocessing/ESUS.py | 8 +++++--- pysus/tests/test_ftp.py | 5 +++-- 6 files changed, 36 insertions(+), 23 deletions(-) diff --git a/pysus/ftp/__init__.py b/pysus/ftp/__init__.py index c90cd32..93d0dd0 100644 --- a/pysus/ftp/__init__.py +++ b/pysus/ftp/__init__.py @@ -22,11 +22,10 @@ import humanize from aioftp import Client from loguru import logger +from pysus.data.local import Data from tqdm import tqdm from typing_extensions import Self -from pysus.data.local import Data - # Type aliases PathLike = Union[str, pathlib.Path] FileContent = Dict[str, Union["Directory", "File"]] @@ -400,13 +399,17 @@ def load_directory_content(path: str) -> FileContent: def line_parser(line: str): if "" in line: date, time, _, name = line.strip().split(maxsplit=3) - modify = datetime.strptime(f"{date} {time}", "%m-%d-%y %I:%M%p") + modify = datetime.strptime( + f"{date} {time}", "%m-%d-%y %I:%M%p" + ) info = {"size": 0, "type": "dir", "modify": modify} xpath = f"{path}/{name}" content[name] = Directory(xpath) else: date, time, size, name = line.strip().split(maxsplit=3) - modify = datetime.strptime(f"{date} {time}", "%m-%d-%y %I:%M%p") + modify = datetime.strptime( + f"{date} {time}", "%m-%d-%y %I:%M%p" + ) info: FileInfo = { "size": size, "type": "file", @@ -478,7 +481,9 @@ def content(self) -> List[Union[Directory, File]]: inside content, `load()` the directory and call `content` again. """ if not self.__content__: - logger.info("content is not loaded, use `load()` to load default paths") + logger.info( + "content is not loaded, use `load()` to load default paths" + ) return [] return sorted(list(self.__content__.values()), key=str) @@ -543,7 +548,9 @@ def get_files(self, *args, **kwargs) -> list[File]: """ ... - def download(self, files: List[File], local_dir: str = CACHEPATH) -> List[str]: + def download( + self, files: List[File], local_dir: str = CACHEPATH + ) -> List[str]: """ Downloads a list of Files. """ @@ -558,7 +565,9 @@ def download(self, files: List[File], local_dir: str = CACHEPATH) -> List[str]: return dfiles[0] return dfiles - async def async_download(self, files: List[File], local_dir: str = CACHEPATH): + async def async_download( + self, files: List[File], local_dir: str = CACHEPATH + ): """ Asynchronously downloads a list of files """ diff --git a/pysus/online_data/ESUS.py b/pysus/online_data/ESUS.py index dbe9008..ef2b990 100644 --- a/pysus/online_data/ESUS.py +++ b/pysus/online_data/ESUS.py @@ -21,7 +21,7 @@ def download(uf, cache=True, checkmemory=True): today = date.today() dt = today.strftime("_%d_%m_%Y") base = f"desc-esus-notifica-estado-{uf}" # desc-notificacoes-esusve- - url = f"https://{user}:{pwd}@elasticsearch-saps.saude.gov.br" + url = f"https://{user}:{pwd}@elasticsearch-saps.saude.gov.br" # noqa: E231 out = f"ESUS_{uf}_{dt}.parquet" cachefile = os.path.join(CACHEPATH, out) @@ -36,7 +36,7 @@ def download(uf, cache=True, checkmemory=True): fname = fetch(base, uf, url) size = os.stat(fname).st_size if size > 50e6 and checkmemory: - print(f"Downloaded data is to large:{size / 1e6} MB compressed.") + print(f"Downloaded data is to large: {size / 1e6} MB compressed.") print( "Only loading the first 1000 rows. If your computer has enough" + " memory, set 'checkmemory' to False" diff --git a/pysus/online_data/IBGE.py b/pysus/online_data/IBGE.py index 3771b2e..33fba90 100644 --- a/pysus/online_data/IBGE.py +++ b/pysus/online_data/IBGE.py @@ -1,6 +1,7 @@ """ Helper functions to download official statistics from IBGE SIDRA """ + import ssl # Builtin from pathlib import Path from tempfile import TemporaryDirectory @@ -142,7 +143,7 @@ def get_sidra_table( print(f"Requesting data from {url}") try: - with (get_legacy_session() as s, s.get(url) as response): + with get_legacy_session() as s, s.get(url) as response: df = pd.DataFrame(response.json()) except HTTPError: response = requests.get(url) @@ -163,7 +164,7 @@ def list_agregados(**kwargs): url += "&".join([f"{k}={v}" for k, v in kwargs.items()]) print(f"Fetching Data groupings from {url}") try: - with (get_legacy_session() as s, s.get(url) as response): + with get_legacy_session() as s, s.get(url) as response: table = pd.DataFrame(response.json()) except requests.exceptions.SSLError as e: print(f"Failed fetching aggregates: {e}") @@ -183,7 +184,7 @@ def localidades_por_agregado(agregado: int, nivel: str): """ url = APIBASE + f"agregados/{agregado}/localidades/{nivel}" try: - with (get_legacy_session() as s, s.get(url) as response): + with get_legacy_session() as s, s.get(url) as response: table = pd.DataFrame(response.json()) except Exception as e: print(f"Could not download from {url}\n{e}") @@ -199,7 +200,7 @@ def metadados(agregado: int): """ url = APIBASE + f"agregados/{agregado}/metadados" try: - with (get_legacy_session() as s, s.get(url) as response): + with get_legacy_session() as s, s.get(url) as response: data = response.json() except Exception as e: print(f"Could not download from {url}\n{e}") @@ -215,7 +216,7 @@ def lista_periodos(agregado: int): """ url = APIBASE + f"agregados/{agregado}/periodos" try: - with (get_legacy_session() as s, s.get(url) as response): + with get_legacy_session() as s, s.get(url) as response: table = pd.DataFrame(response.json()) except Exception: return None @@ -309,10 +310,10 @@ def __init__( def _fetch_JSON(self): try: print(f"Fetching {self.url}") - with (get_legacy_session() as s, s.get(self.url) as response): + with get_legacy_session() as s, s.get(self.url) as response: self.JSON = response.json() except Exception as e: - print(f"Couldn't download data:\n{e}") + print("Couldn't download data:", e, sep="\n") def to_dataframe(self): return pd.DataFrame(self.JSON) @@ -389,7 +390,7 @@ def get_population( opts = ["ALF", "ESCA", "ESCB", "IDOSO", "RENDA"] if not censo_data or censo_data not in opts: raise ValueError( - f"Incorrect `censo_data` parameter. Options: {opts}" + f"Incorrect 'censo_data' parameter. Options: {opts}" ) file = [f for f in files if censo_data in f.name][0].download() else: diff --git a/pysus/online_data/Infogripe.py b/pysus/online_data/Infogripe.py index a576219..bd496c7 100644 --- a/pysus/online_data/Infogripe.py +++ b/pysus/online_data/Infogripe.py @@ -18,6 +18,6 @@ def list_datasets(): def download(dataset_name): - url = BASEURL + DATASETS[dataset_name] + url = BASEURL + DATASETS[dataset_name] + "?inline=false" df = pd.read_csv(url, delimiter=";", decimal=",") return df diff --git a/pysus/preprocessing/ESUS.py b/pysus/preprocessing/ESUS.py index 5552f58..110215c 100644 --- a/pysus/preprocessing/ESUS.py +++ b/pysus/preprocessing/ESUS.py @@ -26,8 +26,8 @@ def cases_by_age_and_sex(UF, start="2020-03-01", end="2020-08-31"): inplace=True, ) print( - f"Removed {old_size - len(df)} rows with missing dates of symptoms," - " notification or testing" + f"Removed {old_size - len(df)} rows with missing dates of symptoms, " + "notification or testing" ) # Desconsiderando os resultados negativos ou inconclusivos @@ -52,7 +52,9 @@ def cases_by_age_and_sex(UF, start="2020-03-01", end="2020-08-31"): ini = np.arange(0, 81, 5) fin = np.arange(5, 86, 5) fin[-1] = 120 - faixa_etaria = {f"[{i},{f})": (i, f) for i, f in zip(ini, fin)} + faixa_etaria = { + f"[{i},{f})": (i, f) for i, f in zip(ini, fin) # noqa: E231 + } labels = list(faixa_etaria.keys()) df["faixa_etaria"] = [ diff --git a/pysus/tests/test_ftp.py b/pysus/tests/test_ftp.py index f424966..9f78dfd 100644 --- a/pysus/tests/test_ftp.py +++ b/pysus/tests/test_ftp.py @@ -2,7 +2,6 @@ from pathlib import Path import pandas as pd - from pysus.data.local import ParquetSet from pysus.ftp import DIRECTORY_CACHE, Database, Directory, File from pysus.ftp.databases import ( @@ -41,7 +40,9 @@ def _test_database(testcase: unittest.TestCase, database: Database): ) testcase.assertTrue(isinstance(downloaded_file, ParquetSet)) testcase.assertTrue(Path(downloaded_file.path).exists()) - testcase.assertTrue(isinstance(downloaded_file.to_dataframe(), pd.DataFrame)) + testcase.assertTrue( + isinstance(downloaded_file.to_dataframe(), pd.DataFrame) + ) testcase.assertTrue(not downloaded_file.to_dataframe().empty)