Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Typing/mypy settings #1911

Draft
wants to merge 15 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docs/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -353,7 +353,7 @@ def filter(self, record: pylogging.LogRecord) -> bool:

if (
msg.strip().startswith("document isn't included in any toctree")
and record.location == "_tags/tagsindex"
and record.location == "_tags/tagsindex" # type: ignore [attr-defined]
):
# ignore this warning, since we don't want the side nav to be
# cluttered with the tags index page.
Expand Down
8 changes: 4 additions & 4 deletions pandera/api/pyspark/column_schema.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""Core pyspark column specification."""

import copy
from typing import Any, List, Optional, Type, TypeVar, cast
from typing import Any, Optional, Type, TypeVar, cast

Check warning on line 4 in pandera/api/pyspark/column_schema.py

View check run for this annotation

Codecov / codecov/patch

pandera/api/pyspark/column_schema.py#L4

Added line #L4 was not covered by tests

import pyspark.sql as ps

Expand All @@ -25,7 +25,7 @@
checks: Optional[CheckList] = None,
nullable: bool = False,
coerce: bool = False,
name: Any = None,
name: Optional[Any] = None,
title: Optional[str] = None,
description: Optional[str] = None,
metadata: Optional[dict] = None,
Expand Down Expand Up @@ -95,7 +95,7 @@
random_state: Optional[int] = None,
lazy: bool = False,
inplace: bool = False,
error_handler: ErrorHandler = None,
error_handler: Optional[ErrorHandler] = None,
):
# pylint: disable=too-many-locals,too-many-branches,too-many-statements
"""Validate a specific column in a dataframe.
Expand Down Expand Up @@ -165,7 +165,7 @@
# Schema Transforms Methods #
#############################

def update_checks(self, checks: List[Check]):
def update_checks(self, checks: CheckList):

Check warning on line 168 in pandera/api/pyspark/column_schema.py

View check run for this annotation

Codecov / codecov/patch

pandera/api/pyspark/column_schema.py#L168

Added line #L168 was not covered by tests
"""Create a new Schema with a new set of Checks

:param checks: checks to set on the new schema
Expand Down
4 changes: 2 additions & 2 deletions pandera/api/pyspark/components.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ class Column(ColumnSchema):

def __init__(
self,
dtype: PySparkDtypeInputTypes = None,
dtype: Optional[PySparkDtypeInputTypes] = None,
checks: Optional[CheckList] = None,
nullable: bool = False,
coerce: bool = False,
Expand Down Expand Up @@ -125,7 +125,7 @@ def validate(
random_state: Optional[int] = None,
lazy: bool = True,
inplace: bool = False,
error_handler: ErrorHandler = None,
error_handler: Optional[ErrorHandler] = None,
) -> ps.DataFrame:
"""Validate a Column in a DataFrame object.

Expand Down
4 changes: 2 additions & 2 deletions pandera/api/pyspark/container.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ def __init__(
Dict[Any, pandera.api.pyspark.components.Column] # type: ignore [name-defined]
] = None,
checks: Optional[CheckList] = None,
dtype: PySparkDtypeInputTypes = None,
dtype: Optional[PySparkDtypeInputTypes] = None,
coerce: bool = False,
strict: StrictType = False,
name: Optional[str] = None,
Expand Down Expand Up @@ -354,7 +354,7 @@ def _validate(
random_state: Optional[int] = None,
lazy: bool = False,
inplace: bool = False,
error_handler: ErrorHandler = None,
error_handler: Optional[ErrorHandler] = None,
):
return self.get_backend(check_obj).validate(
check_obj=check_obj,
Expand Down
2 changes: 1 addition & 1 deletion pandera/api/pyspark/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -366,7 +366,7 @@
Similar to inspect.get_members but bypass descriptors __get__.
"""
bases = inspect.getmro(cls)[:-1] # bases -> DataFrameModel -> object
attrs = {}
attrs: Dict[str, Any] = {}

Check warning on line 369 in pandera/api/pyspark/model.py

View check run for this annotation

Codecov / codecov/patch

pandera/api/pyspark/model.py#L369

Added line #L369 was not covered by tests
for base in reversed(bases):
if issubclass(base, DataFrameModel):
attrs.update(base.__dict__)
Expand Down
28 changes: 14 additions & 14 deletions pandera/api/pyspark/model_components.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ def _to_schema_component(
self,
dtype: PySparkDtypeInputTypes,
component: Type[SchemaComponent],
checks: CheckArg = None,
checks: Optional[CheckArg] = None,
**kwargs: Any,
) -> SchemaComponent:
if self.dtype_kwargs:
Expand All @@ -54,9 +54,9 @@ def _to_schema_component(
def to_column(
self,
dtype: PySparkDtypeInputTypes,
checks: CheckArg = None,
checks: Optional[CheckArg] = None,
required: bool = True,
name: str = None,
name: Optional[str] = None,
) -> Column:
"""Create a schema_components.Column from a field."""
return self._to_schema_component(
Expand Down Expand Up @@ -92,15 +92,15 @@ def properties(self) -> Dict[str, Any]:

def Field(
*,
eq: Any = None,
ne: Any = None,
gt: Any = None,
ge: Any = None,
lt: Any = None,
le: Any = None,
in_range: Dict[str, Any] = None,
isin: Iterable = None,
notin: Iterable = None,
eq: Optional[Any] = None,
ne: Optional[Any] = None,
gt: Optional[Any] = None,
ge: Optional[Any] = None,
lt: Optional[Any] = None,
le: Optional[Any] = None,
in_range: Optional[Dict[str, Any]] = None,
isin: Optional[Iterable] = None,
notin: Optional[Iterable] = None,
str_contains: Optional[str] = None,
str_endswith: Optional[str] = None,
str_length: Optional[Dict[str, Any]] = None,
Expand All @@ -112,8 +112,8 @@ def Field(
regex: bool = False,
ignore_na: bool = True,
raise_warning: bool = False,
n_failure_cases: int = None,
alias: Any = None,
n_failure_cases: Optional[int] = None,
alias: Optional[Any] = None,
check_name: Optional[bool] = None,
dtype_kwargs: Optional[Dict[str, Any]] = None,
title: Optional[str] = None,
Expand Down
6 changes: 3 additions & 3 deletions pandera/api/pyspark/types.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""Utility functions for pyspark validation."""

from functools import lru_cache
from typing import List, NamedTuple, Tuple, Type, Union
from typing import List, NamedTuple, Tuple, Type, Union, Any

Check warning on line 4 in pandera/api/pyspark/types.py

View check run for this annotation

Codecov / codecov/patch

pandera/api/pyspark/types.py#L4

Added line #L4 was not covered by tests
from numpy import bool_ as np_bool
from packaging import version

Expand Down Expand Up @@ -92,7 +92,7 @@
)


def is_table(obj):
def is_table(obj: Any) -> bool:

Check warning on line 95 in pandera/api/pyspark/types.py

View check run for this annotation

Codecov / codecov/patch

pandera/api/pyspark/types.py#L95

Added line #L95 was not covered by tests
"""Verifies whether an object is table-like.

Where a table is a 2-dimensional data matrix of rows and columns, which
Expand All @@ -101,6 +101,6 @@
return isinstance(obj, supported_types().table_types)


def is_bool(x):
def is_bool(x: Any) -> bool:

Check warning on line 104 in pandera/api/pyspark/types.py

View check run for this annotation

Codecov / codecov/patch

pandera/api/pyspark/types.py#L104

Added line #L104 was not covered by tests
"""Verifies whether an object is a boolean type."""
return isinstance(x, (bool, type(pst.BooleanType()), np_bool))
5 changes: 3 additions & 2 deletions pandera/backends/polars/builtin_checks.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""Built-in checks for polars."""

import re
from collections.abc import Collection

Check warning on line 4 in pandera/backends/polars/builtin_checks.py

View check run for this annotation

Codecov / codecov/patch

pandera/backends/polars/builtin_checks.py#L4

Added line #L4 was not covered by tests
from typing import Any, Iterable, Optional, TypeVar, Union

import polars as pl
Expand Down Expand Up @@ -140,7 +141,7 @@
@register_builtin_check(
error="isin({allowed_values})",
)
def isin(data: PolarsData, allowed_values: Iterable) -> pl.LazyFrame:
def isin(data: PolarsData, allowed_values: Collection) -> pl.LazyFrame:

Check warning on line 144 in pandera/backends/polars/builtin_checks.py

View check run for this annotation

Codecov / codecov/patch

pandera/backends/polars/builtin_checks.py#L144

Added line #L144 was not covered by tests
"""Ensure only allowed values occur within a series.

This checks whether all elements of a :class:`polars.Series`
Expand All @@ -160,7 +161,7 @@
@register_builtin_check(
error="notin({forbidden_values})",
)
def notin(data: PolarsData, forbidden_values: Iterable) -> pl.LazyFrame:
def notin(data: PolarsData, forbidden_values: Collection) -> pl.LazyFrame:

Check warning on line 164 in pandera/backends/polars/builtin_checks.py

View check run for this annotation

Codecov / codecov/patch

pandera/backends/polars/builtin_checks.py#L164

Added line #L164 was not covered by tests
"""Ensure some defined values don't occur within a series.

Like :meth:`Check.isin` this check operates on single characters if
Expand Down
4 changes: 2 additions & 2 deletions pandera/backends/pyspark/checks.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,8 @@ def preprocess(
def apply(
self,
check_obj: Union[DataFrameTypes, is_table],
column_name: str = None,
kwargs: dict = None,
column_name: Optional[str] = None,
kwargs: Optional[Dict] = None,
):
if column_name and kwargs:
check_obj_and_col_name = PysparkDataframeColumnObject(
Expand Down
2 changes: 1 addition & 1 deletion pandera/backends/pyspark/column.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ def validate(
random_state: Optional[int] = None, # pylint: disable=unused-argument
lazy: bool = False,
inplace: bool = False,
error_handler: ErrorHandler = None,
error_handler: Optional[ErrorHandler] = None,
):
# pylint: disable=too-many-locals
check_obj = self.preprocess(check_obj, inplace)
Expand Down
2 changes: 1 addition & 1 deletion pandera/backends/pyspark/components.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ def validate(
random_state: Optional[int] = None,
lazy: bool = False,
inplace: bool = False,
error_handler: ErrorHandler = None,
error_handler: Optional[ErrorHandler] = None,
) -> DataFrame:
"""Validation backend implementation for pyspark dataframe columns.."""

Expand Down
6 changes: 3 additions & 3 deletions pandera/backends/pyspark/container.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ def validate(
random_state: Optional[int] = None,
lazy: bool = False,
inplace: bool = False,
error_handler: ErrorHandler = None,
error_handler: Optional[ErrorHandler] = None,
):
"""
Parse and validate a check object, returning type-coerced and validated
Expand Down Expand Up @@ -401,7 +401,7 @@ def coerce_dtype(
check_obj: DataFrame,
*,
schema=None,
error_handler: ErrorHandler = None,
error_handler: Optional[ErrorHandler] = None,
):
"""Coerces check object to the expected type."""
assert schema is not None, "The `schema` argument must be provided."
Expand Down Expand Up @@ -508,7 +508,7 @@ def unique(
check_obj: DataFrame,
*,
schema=None,
error_handler: ErrorHandler = None,
error_handler: Optional[ErrorHandler] = None,
):
"""Check uniqueness in the check object."""
assert schema is not None, "The `schema` argument must be provided."
Expand Down
4 changes: 2 additions & 2 deletions pandera/backends/pyspark/decorators.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import logging
import warnings
from contextlib import contextmanager
from typing import List, Type
from typing import List, Type, Optional

Check warning on line 7 in pandera/backends/pyspark/decorators.py

View check run for this annotation

Codecov / codecov/patch

pandera/backends/pyspark/decorators.py#L7

Added line #L7 was not covered by tests

from pyspark.sql import DataFrame

Expand All @@ -17,7 +17,7 @@


def register_input_datatypes(
acceptable_datatypes: List[Type[PysparkDefaultTypes]] = None,
acceptable_datatypes: Optional[List[Type[PysparkDefaultTypes]]] = None,
):
"""
This decorator is used to register the input datatype for the check.
Expand Down
Loading
Loading