Skip to content

Commit

Permalink
Merge pull request #14 from sumeshi/refactor/v0.3.9
Browse files Browse the repository at this point in the history
Refactor/v0.3.9
  • Loading branch information
sumeshi authored Dec 18, 2024
2 parents 3ec8e94 + 61bc443 commit 51c48ab
Show file tree
Hide file tree
Showing 5 changed files with 53 additions and 52 deletions.
18 changes: 9 additions & 9 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ Filters rows where the specified column matches the given regex.
| Category | Parameter | Data Type | Default Value | Description |
| -------- | ---------- | --------- | ------------- | -------------------------------------------------------------------- |
| Argument | colname | str | | The name of the column to test against the regex pattern. |
| Argument | regex | str | | A regular expression pattern used for matching values in the column. |
| Argument | pattern | str | | A regular expression pattern used for matching values in the column. |
| Argument | ignorecase | bool | False | If True, performs case-insensitive pattern matching. |

```
Expand All @@ -116,12 +116,12 @@ $ qsv load ./Security.csv - contains 'Date and Time' '10/6/2016'
#### sed
Replaces values using the specified regex.

| Category | Parameter | Data Type | Default Value | Description |
| -------- | ------------- | --------- | ------------- | ---------------------------------------------------------------------- |
| Argument | colname | str | | The name of the column whose values will be modified. |
| Argument | regex | str | | A regular expression pattern identifying substrings to replace. |
| Argument | replaced_text | str | | The text that replaces matched substrings. |
| Argument | ignorecase | bool | False | If True, the regex matching is performed in a case-insensitive manner. |
| Category | Parameter | Data Type | Default Value | Description |
| -------- | ----------- | --------- | ------------- | ---------------------------------------------------------------------- |
| Argument | colname | str | | The name of the column whose values will be modified. |
| Argument | pattern | str | | A regular expression pattern identifying substrings to replace. |
| Argument | replacement | str | | The text that replaces matched substrings. |
| Argument | ignorecase | bool | False | If True, the regex matching is performed in a case-insensitive manner. |

```
$ qsv load ./Security.csv - sed 'Date and Time' '/' '-'
Expand All @@ -133,7 +133,7 @@ This function is similar to running a grep command while preserving the header r

| Category | Parameter | Data Type | Default Value | Description |
| -------- | ---------- | --------- | ------------- | ------------------------------------------------------------------------------- |
| Argument | regex | str | | A regular expression pattern used to filter rows. Any row with a match is kept. |
| Argument | pattern | str | | A regular expression pattern used to filter rows. Any row with a match is kept. |
| Argument | ignorecase | bool | False | If True, the regex match is case-insensitive. |

```
Expand Down Expand Up @@ -195,7 +195,7 @@ The datetime format strings follow the same conventions as [Python](https://docs
| -------- | --------------- | --------- | ------------- | ---------------------------------------------------------------------------------------------- |
| Argument | colname | str | | The name of the date/time column to convert. |
| Option | timezone_from | str | "UTC" | The original timezone of the column's values. |
| Option | timezone_to | str | "Asia/Tokyo" | The target timezone to convert values into. |
| Option | timezone_to | str | "UTC" | The target timezone to convert values into. |
| Option | datetime_format | str | AutoDetect | The datetime format for parsing values. If not provided, the format is automatically inferred. |

```
Expand Down
10 changes: 5 additions & 5 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "qsv"
version = "0.3.8"
version = "0.3.9"
description = "A tool that provides elastic and rapid filtering for efficient analysis of huge CSV files, such as eventlogs."
readme = "README.md"
authors = [
Expand All @@ -9,9 +9,9 @@ authors = [
requires-python = ">=3.11"
dependencies = [
"fire>=0.7.0",
"polars>=1.11.0",
"polars>=1.17.1",
"pyyaml>=6.0.2",
"rich>=13.9.3",
"rich>=13.9.4",
]

[build-system]
Expand All @@ -20,8 +20,8 @@ build-backend = "hatchling.build"

[dependency-groups]
dev = [
"pdoc>=15.0.0",
"pytest>=8.3.3",
"pdoc>=15.0.1",
"pytest>=8.3.4",
]

[project.scripts]
Expand Down
24 changes: 12 additions & 12 deletions src/qsv/controllers/DataFrameController.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,27 +100,27 @@ def isin(self, colname: str, values: list):
self.df = self.df.filter(pl.col(colname).is_in(values))
return self

def contains(self, colname: str, regex: str, ignorecase: bool = False):
def contains(self, colname: str, pattern: str, ignorecase: bool = False):
"""[chainable] Filters rows where the specified column matches the given regex."""
logger.debug(f"filter condition: {regex} contains {colname}")
logger.debug(f"filter condition: {pattern} contains {colname}")
self.__check_exists_colnames([colname])
regex = regex if type(regex) is str else str(regex)
pattern = pattern if type(pattern) is str else str(pattern)
self.df = self.df.filter(
pl.col(colname).str.contains(f"(?i){regex}") if ignorecase else pl.col(colname).str.contains(regex)
pl.col(colname).str.contains(f"(?i){pattern}") if ignorecase else pl.col(colname).str.contains(pattern)
)
return self

def sed(self, colname: str, regex: str, replaced_text: str, ignorecase: bool = False):
def sed(self, colname: str, pattern: str, replacement: str, ignorecase: bool = False):
"""[chainable] Replaces values using the specified regex."""
logger.debug(f"sed condition: {regex} on {colname}")
logger.debug(f"sed condition: {pattern} on {colname}")
self.__check_exists_colnames([colname])
regex = regex if type(regex) is str else str(regex)
pattern = pattern if type(pattern) is str else str(pattern)
self.df = self.df.with_columns(
pl.col(colname).cast(pl.String).str.replace(f"(?i){regex}", replaced_text) if ignorecase else pl.col(colname).cast(pl.String).str.replace(regex, replaced_text)
pl.col(colname).cast(pl.String).str.replace(f"(?i){pattern}", replacement) if ignorecase else pl.col(colname).cast(pl.String).str.replace(pattern, replacement)
)
return self

def grep(self, regex: str, ignorecase: bool = False):
def grep(self, pattern: str, ignorecase: bool = False):
"""[chainable] Treats all columns as strings and filters rows where any column matches the specified regex."""
self.df = self.df.with_columns(
pl.concat_str(
Expand All @@ -129,7 +129,7 @@ def grep(self, regex: str, ignorecase: bool = False):
).alias('___combined')
)
self.df = self.df.filter(
pl.col('___combined').str.contains(f"(?i){regex}") if ignorecase else pl.col('___combined').str.contains(regex)
pl.col('___combined').str.contains(f"(?i){pattern}") if ignorecase else pl.col('___combined').str.contains(pattern)
)
self.df = self.df.drop(['___combined'])
return self
Expand Down Expand Up @@ -176,7 +176,7 @@ def changetz(
self,
colname: str,
timezone_from: str = "UTC",
timezone_to: str = "Asia/Tokyo",
timezone_to: str = "UTC",
datetime_format: str = None
):
"""[chainable] Changes the timezone of the specified date column."""
Expand Down Expand Up @@ -209,7 +209,7 @@ def headers(self, plain: bool = False) -> None:
else:
digits = len(str(len(self.df.collect_schema().names())))
TableView.print(
headers=["#", "Column Name"],
headers=[f"{''.join([' ' for _ in range(0, digits-1)])}#", "Column Name"],
values=[[str(i).zfill(digits), c] for i, c in enumerate(self.df.collect_schema().names())]
)

Expand Down
5 changes: 3 additions & 2 deletions src/qsv/views/TableView.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
from rich.console import Console
from rich import box
from rich.table import Table
from rich.console import Console


class TableView(object):
@staticmethod
def print(headers: list[str], values: list[list[str]]):
table = Table(show_header=True)
table = Table(show_header=True, box=box.SQUARE_DOUBLE_HEAD)

for header in headers:
table.add_column(header)
Expand Down
48 changes: 24 additions & 24 deletions uv.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 51c48ab

Please sign in to comment.