Skip to content

Commit

Permalink
Implement "ne" built-in check for the Ibis backend
Browse files Browse the repository at this point in the history
Signed-off-by: Deepyaman Datta <deepyaman.datta@utexas.edu>
  • Loading branch information
deepyaman committed Feb 8, 2025
1 parent bd88b25 commit 9cdcae4
Show file tree
Hide file tree
Showing 5 changed files with 77 additions and 4 deletions.
14 changes: 14 additions & 0 deletions pandera/backends/ibis/builtin_checks.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,3 +23,17 @@ def equal_to(data: IbisData, value: Any) -> ir.Table:
equal to this value.
"""
return data.table[data.key] == value


@register_builtin_check(

Check warning on line 28 in pandera/backends/ibis/builtin_checks.py

View check run for this annotation

Codecov / codecov/patch

pandera/backends/ibis/builtin_checks.py#L28

Added line #L28 was not covered by tests
aliases=["ne"],
error="not_equal_to({value})",
)
def not_equal_to(data: IbisData, value: Any) -> ir.Table:

Check warning on line 32 in pandera/backends/ibis/builtin_checks.py

View check run for this annotation

Codecov / codecov/patch

pandera/backends/ibis/builtin_checks.py#L32

Added line #L32 was not covered by tests
"""Ensure no element of a data container equal a certain value.
:param data: NamedTuple IbisData contains the table and column name for the check. The key
to access the table is "table", and the key to access the column name is "key".
:param value: This value must not occur in the checked data structure.
"""
return data.table[data.key] != value

Check warning on line 39 in pandera/backends/ibis/builtin_checks.py

View check run for this annotation

Codecov / codecov/patch

pandera/backends/ibis/builtin_checks.py#L39

Added line #L39 was not covered by tests
2 changes: 1 addition & 1 deletion pandera/backends/pandas/builtin_checks.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ def equal_to(data: PandasData, value: Any) -> PandasData:
error="not_equal_to({value})",
)
def not_equal_to(data: PandasData, value: Any) -> PandasData:
"""Ensure no elements of a data container equals a certain value.
"""Ensure no element of a data container equals a certain value.
:param value: This value must not occur in the checked
:class:`pandas.Series`.
Expand Down
2 changes: 1 addition & 1 deletion pandera/backends/polars/builtin_checks.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ def equal_to(data: PolarsData, value: Any) -> pl.LazyFrame:
error="not_equal_to({value})",
)
def not_equal_to(data: PolarsData, value: Any) -> pl.LazyFrame:
"""Ensure no elements of a data container equals a certain value.
"""Ensure no element of a data container equals a certain value.
:param data: NamedTuple PolarsData contains the dataframe and column name for the check. The key
to access the dataframe is "dataframe", and the key the to access the column name is "key".
Expand Down
4 changes: 2 additions & 2 deletions pandera/backends/pyspark/builtin_checks.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,10 +58,10 @@ def equal_to(data: PysparkDataframeColumnObject, value: Any) -> bool:
)
)
def not_equal_to(data: PysparkDataframeColumnObject, value: Any) -> bool:
"""Ensure no elements of a data container equals a certain value.
"""Ensure no element of a data container equals a certain value.
:param data: NamedTuple PysparkDataframeColumnObject contains the dataframe and column name for the check. The keys
to access the dataframe is "dataframe" and column name using "column_name".
to access the dataframe is "dataframe" and column name using "column_name".
:param value: This value must not occur in the checked
"""
cond = col(data.column_name) != value
Expand Down
59 changes: 59 additions & 0 deletions tests/ibis/test_ibis_builtin_checks.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,8 @@ def check_function(


class TestEqualToCheck(BaseClass):
"""This class is used to test the equal to check"""

sample_numeric_data = {
"test_pass_data": [("foo", 30), ("bar", 30)],
"test_fail_data": [("foo", 30), ("bar", 31)],
Expand Down Expand Up @@ -169,3 +171,60 @@ def test_equal_to_check(self, check_fn, datatype, data) -> None:
datatype,
data["test_expression"],
)


class TestNotEqualToCheck(BaseClass):
"""This class is used to test the not equal to check"""

sample_numeric_data = {
"test_pass_data": [("foo", 31), ("bar", 32)],
"test_fail_data": [("foo", 30), ("bar", 31)],
"test_expression": 30,
}

sample_string_data = {
"test_pass_data": [("foo", "b"), ("bar", "c")],
"test_fail_data": [("foo", "a"), ("bar", "a")],
"test_expression": "a",
}

def pytest_generate_tests(self, metafunc):
"""This function passes the parameter for each function based on parameter form get_data_param function"""
# called once per each test function
funcarglist = self.get_data_param()[metafunc.function.__name__]
argnames = sorted(funcarglist[0])
metafunc.parametrize(
argnames,
[
[funcargs[name] for name in argnames]
for funcargs in funcarglist
],
)

def get_data_param(self):
"""Generate the params which will be used to test this function. All the acceptable
data types would be tested"""
return {
"test_not_equal_to_check": [
{"datatype": dt.Int32, "data": self.sample_numeric_data},
{"datatype": dt.Int64, "data": self.sample_numeric_data},
{"datatype": dt.String, "data": self.sample_string_data},
{
"datatype": dt.Float64,
"data": self.convert_data(
self.sample_numeric_data, "float64"
),
},
]
}

@pytest.mark.parametrize("check_fn", [pa.Check.not_equal_to, pa.Check.ne])
def test_not_equal_to_check(self, check_fn, datatype, data) -> None:
"""Test the Check to see if all the values are equal to defined value"""
self.check_function(
check_fn,
data["test_pass_data"],
data["test_fail_data"],
datatype,
data["test_expression"],
)

0 comments on commit 9cdcae4

Please sign in to comment.