From 04899348b8baacdc0eca7a95065580a5f7eeeb47 Mon Sep 17 00:00:00 2001 From: "S.Nakano" <35072092+sumeshi@users.noreply.github.com> Date: Fri, 22 Nov 2024 20:02:15 +0000 Subject: [PATCH] release: v0.3.7 --- README.md | 4 ++++ pyproject.toml | 2 +- src/qsv/controllers/CsvController.py | 4 +++- src/qsv/controllers/DataFrameController.py | 4 ++-- 4 files changed, 10 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 67340c8..aa78108 100644 --- a/README.md +++ b/README.md @@ -55,6 +55,10 @@ Loads the specified CSV files. ``` Arguments: *path: tuple[str] + +Options: + separator: str = ',' + low_memory: bool = False ``` examples diff --git a/pyproject.toml b/pyproject.toml index 9e1fd81..bc1f66f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "qsv" -version = "0.3.6" +version = "0.3.7" description = "A tool that provides elastic and rapid filtering for efficient analysis of huge CSV files, such as eventlogs." readme = "README.md" authors = [ diff --git a/src/qsv/controllers/CsvController.py b/src/qsv/controllers/CsvController.py index fa310b3..a116c2e 100644 --- a/src/qsv/controllers/CsvController.py +++ b/src/qsv/controllers/CsvController.py @@ -5,11 +5,13 @@ class CsvController(object): def __init__(self, path): self.path: Path = path - def get_dataframe(self) -> pl.DataFrame: + def get_dataframe(self, separator: str = ',', low_memory: bool = False) -> pl.DataFrame: df = pl.scan_csv( self.path, try_parse_dates=True, rechunk=True, truncate_ragged_lines=True, + separator=separator, + low_memory=low_memory, ) return df diff --git a/src/qsv/controllers/DataFrameController.py b/src/qsv/controllers/DataFrameController.py index af7732e..9baed6f 100644 --- a/src/qsv/controllers/DataFrameController.py +++ b/src/qsv/controllers/DataFrameController.py @@ -53,11 +53,11 @@ def quilt(self, config: str, *path: tuple[str]) -> None: getattr(self, k)() # -- initializer -- - def load(self, *path: tuple[str]): + def load(self, *path: tuple[str], separator: str = ',', low_memory: bool = False): """[initializer] Loads the specified CSV files.""" logger.debug(f"{len(path)} files are loaded. [{', '.join(path)}]") self.__check_exists_path(path) - self.df = CsvController(path=path).get_dataframe() + self.df = CsvController(path=path).get_dataframe(separator=separator, low_memory=low_memory) return self # -- chainable --