Merge pull request #14 from sumeshi/refactor/v0.3.9

Refactor/v0.3.9
sumeshi · Dec 18, 2024 · 51c48ab · 51c48ab
2 parents 3ec8e94 + 61bc443
commit 51c48ab
Show file tree

Hide file tree

Showing 5 changed files with 53 additions and 52 deletions.
diff --git a/README.md b/README.md
@@ -106,7 +106,7 @@ Filters rows where the specified column matches the given regex.
 | Category | Parameter  | Data Type | Default Value | Description                                                          |
 | -------- | ---------- | --------- | ------------- | -------------------------------------------------------------------- |
 | Argument | colname    | str       |               | The name of the column to test against the regex pattern.            |
-| Argument | regex      | str       |               | A regular expression pattern used for matching values in the column. |
+| Argument | pattern    | str       |               | A regular expression pattern used for matching values in the column. |
 | Argument | ignorecase | bool      | False         | If True, performs case-insensitive pattern matching.                 |
 
 ```
@@ -116,12 +116,12 @@ $ qsv load ./Security.csv - contains 'Date and Time' '10/6/2016'
 #### sed
 Replaces values using the specified regex.
 
-| Category | Parameter     | Data Type | Default Value | Description                                                            |
-| -------- | ------------- | --------- | ------------- | ---------------------------------------------------------------------- |
-| Argument | colname       | str       |               | The name of the column whose values will be modified.                  |
-| Argument | regex         | str       |               | A regular expression pattern identifying substrings to replace.        |
-| Argument | replaced_text | str       |               | The text that replaces matched substrings.                             |
-| Argument | ignorecase    | bool      | False         | If True, the regex matching is performed in a case-insensitive manner. |
+| Category | Parameter   | Data Type | Default Value | Description                                                            |
+| -------- | ----------- | --------- | ------------- | ---------------------------------------------------------------------- |
+| Argument | colname     | str       |               | The name of the column whose values will be modified.                  |
+| Argument | pattern     | str       |               | A regular expression pattern identifying substrings to replace.        |
+| Argument | replacement | str       |               | The text that replaces matched substrings.                             |
+| Argument | ignorecase  | bool      | False         | If True, the regex matching is performed in a case-insensitive manner. |
 
 ```
 $ qsv load ./Security.csv - sed 'Date and Time' '/' '-'
@@ -133,7 +133,7 @@ This function is similar to running a grep command while preserving the header r
 
 | Category | Parameter  | Data Type | Default Value | Description                                                                     |
 | -------- | ---------- | --------- | ------------- | ------------------------------------------------------------------------------- |
-| Argument | regex      | str       |               | A regular expression pattern used to filter rows. Any row with a match is kept. |
+| Argument | pattern    | str       |               | A regular expression pattern used to filter rows. Any row with a match is kept. |
 | Argument | ignorecase | bool      | False         | If True, the regex match is case-insensitive.                                   |
 
 ```
@@ -195,7 +195,7 @@ The datetime format strings follow the same conventions as [Python](https://docs
 | -------- | --------------- | --------- | ------------- | ---------------------------------------------------------------------------------------------- |
 | Argument | colname         | str       |               | The name of the date/time column to convert.                                                   |
 | Option   | timezone_from   | str       | "UTC"         | The original timezone of the column's values.                                                  |
-| Option   | timezone_to     | str       | "Asia/Tokyo"  | The target timezone to convert values into.                                                    |
+| Option   | timezone_to     | str       | "UTC"  | The target timezone to convert values into.                                                    |
 | Option   | datetime_format | str       | AutoDetect    | The datetime format for parsing values. If not provided, the format is automatically inferred. |
 
 ```

diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "qsv"
-version = "0.3.8"
+version = "0.3.9"
 description = "A tool that provides elastic and rapid filtering for efficient analysis of huge CSV files, such as eventlogs."
 readme = "README.md"
 authors = [
@@ -9,9 +9,9 @@ authors = [
 requires-python = ">=3.11"
 dependencies = [
     "fire>=0.7.0",
-    "polars>=1.11.0",
+    "polars>=1.17.1",
     "pyyaml>=6.0.2",
-    "rich>=13.9.3",
+    "rich>=13.9.4",
 ]
 
 [build-system]
@@ -20,8 +20,8 @@ build-backend = "hatchling.build"
 
 [dependency-groups]
 dev = [
-    "pdoc>=15.0.0",
-    "pytest>=8.3.3",
+    "pdoc>=15.0.1",
+    "pytest>=8.3.4",
 ]
 
 [project.scripts]

diff --git a/src/qsv/controllers/DataFrameController.py b/src/qsv/controllers/DataFrameController.py
@@ -100,27 +100,27 @@ def isin(self, colname: str, values: list):
         self.df = self.df.filter(pl.col(colname).is_in(values))
         return self
 
-    def contains(self, colname: str, regex: str, ignorecase: bool = False):
+    def contains(self, colname: str, pattern: str, ignorecase: bool = False):
         """[chainable] Filters rows where the specified column matches the given regex."""
-        logger.debug(f"filter condition: {regex} contains {colname}")
+        logger.debug(f"filter condition: {pattern} contains {colname}")
         self.__check_exists_colnames([colname])
-        regex = regex if type(regex) is str else str(regex)
+        pattern = pattern if type(pattern) is str else str(pattern)
         self.df = self.df.filter(
-            pl.col(colname).str.contains(f"(?i){regex}") if ignorecase else pl.col(colname).str.contains(regex)
+            pl.col(colname).str.contains(f"(?i){pattern}") if ignorecase else pl.col(colname).str.contains(pattern)
         )
         return self
 
-    def sed(self, colname: str, regex: str, replaced_text: str, ignorecase: bool = False):
+    def sed(self, colname: str, pattern: str, replacement: str, ignorecase: bool = False):
         """[chainable] Replaces values using the specified regex."""
-        logger.debug(f"sed condition: {regex} on {colname}")
+        logger.debug(f"sed condition: {pattern} on {colname}")
         self.__check_exists_colnames([colname])
-        regex = regex if type(regex) is str else str(regex)
+        pattern = pattern if type(pattern) is str else str(pattern)
         self.df = self.df.with_columns(
-            pl.col(colname).cast(pl.String).str.replace(f"(?i){regex}", replaced_text) if ignorecase else pl.col(colname).cast(pl.String).str.replace(regex, replaced_text)
+            pl.col(colname).cast(pl.String).str.replace(f"(?i){pattern}", replacement) if ignorecase else pl.col(colname).cast(pl.String).str.replace(pattern, replacement)
         )
         return self
 
-    def grep(self, regex: str, ignorecase: bool = False):
+    def grep(self, pattern: str, ignorecase: bool = False):
         """[chainable] Treats all columns as strings and filters rows where any column matches the specified regex."""
         self.df = self.df.with_columns(
             pl.concat_str(
@@ -129,7 +129,7 @@ def grep(self, regex: str, ignorecase: bool = False):
             ).alias('___combined')
         )
         self.df = self.df.filter(
-            pl.col('___combined').str.contains(f"(?i){regex}") if ignorecase else pl.col('___combined').str.contains(regex)
+            pl.col('___combined').str.contains(f"(?i){pattern}") if ignorecase else pl.col('___combined').str.contains(pattern)
         )
         self.df = self.df.drop(['___combined'])
         return self
@@ -176,7 +176,7 @@ def changetz(
             self,
             colname: str,
             timezone_from: str = "UTC",
-            timezone_to: str = "Asia/Tokyo",
+            timezone_to: str = "UTC",
             datetime_format: str = None
         ):
         """[chainable] Changes the timezone of the specified date column."""
@@ -209,7 +209,7 @@ def headers(self, plain: bool = False) -> None:
         else:
             digits = len(str(len(self.df.collect_schema().names())))
             TableView.print(
-                headers=["#", "Column Name"],
+                headers=[f"{''.join([' ' for _ in range(0, digits-1)])}#", "Column Name"],
                 values=[[str(i).zfill(digits), c] for i, c in enumerate(self.df.collect_schema().names())]
             )
 

diff --git a/src/qsv/views/TableView.py b/src/qsv/views/TableView.py
@@ -1,11 +1,12 @@
-from rich.console import Console
+from rich import box
 from rich.table import Table
+from rich.console import Console
 
 
 class TableView(object):
     @staticmethod
     def print(headers: list[str], values: list[list[str]]):
-        table = Table(show_header=True)
+        table = Table(show_header=True, box=box.SQUARE_DOUBLE_HEAD)
 
         for header in headers:
             table.add_column(header)

diff --git a/uv.lock b/uv.lock