unionai-oss · m-richards · Feb 15, 2025 · Feb 15, 2025 · Feb 15, 2025 · Feb 15, 2025
diff --git a/docs/source/conf.py b/docs/source/conf.py
@@ -353,7 +353,7 @@ def filter(self, record: pylogging.LogRecord) -> bool:
 
         if (
             msg.strip().startswith("document isn't included in any toctree")
-            and record.location == "_tags/tagsindex"
+            and record.location == "_tags/tagsindex"  # type: ignore [attr-defined]
         ):
             # ignore this warning, since we don't want the side nav to be
             # cluttered with the tags index page.

diff --git a/pandera/api/pyspark/column_schema.py b/pandera/api/pyspark/column_schema.py
@@ -1,7 +1,7 @@
 """Core pyspark column specification."""
 
 import copy
-from typing import Any, List, Optional, Type, TypeVar, cast
+from typing import Any, Optional, Type, TypeVar, cast
 
 import pyspark.sql as ps
 
@@ -25,7 +25,7 @@
         checks: Optional[CheckList] = None,
         nullable: bool = False,
         coerce: bool = False,
-        name: Any = None,
+        name: Optional[Any] = None,
         title: Optional[str] = None,
         description: Optional[str] = None,
         metadata: Optional[dict] = None,
@@ -95,7 +95,7 @@
         random_state: Optional[int] = None,
         lazy: bool = False,
         inplace: bool = False,
-        error_handler: ErrorHandler = None,
+        error_handler: Optional[ErrorHandler] = None,
     ):
         # pylint: disable=too-many-locals,too-many-branches,too-many-statements
         """Validate a specific column in a dataframe.
@@ -165,7 +165,7 @@
     # Schema Transforms Methods #
     #############################
 
-    def update_checks(self, checks: List[Check]):
+    def update_checks(self, checks: CheckList):
         """Create a new Schema with a new set of Checks
 
         :param checks: checks to set on the new schema

diff --git a/pandera/api/pyspark/components.py b/pandera/api/pyspark/components.py
@@ -14,7 +14,7 @@ class Column(ColumnSchema):
 
     def __init__(
         self,
-        dtype: PySparkDtypeInputTypes = None,
+        dtype: Optional[PySparkDtypeInputTypes] = None,
         checks: Optional[CheckList] = None,
         nullable: bool = False,
         coerce: bool = False,
@@ -125,7 +125,7 @@ def validate(
         random_state: Optional[int] = None,
         lazy: bool = True,
         inplace: bool = False,
-        error_handler: ErrorHandler = None,
+        error_handler: Optional[ErrorHandler] = None,
     ) -> ps.DataFrame:
         """Validate a Column in a DataFrame object.
 

diff --git a/pandera/api/pyspark/container.py b/pandera/api/pyspark/container.py
@@ -47,7 +47,7 @@ def __init__(
             Dict[Any, pandera.api.pyspark.components.Column]  # type: ignore [name-defined]
         ] = None,
         checks: Optional[CheckList] = None,
-        dtype: PySparkDtypeInputTypes = None,
+        dtype: Optional[PySparkDtypeInputTypes] = None,
         coerce: bool = False,
         strict: StrictType = False,
         name: Optional[str] = None,
@@ -354,7 +354,7 @@ def _validate(
         random_state: Optional[int] = None,
         lazy: bool = False,
         inplace: bool = False,
-        error_handler: ErrorHandler = None,
+        error_handler: Optional[ErrorHandler] = None,
     ):
         return self.get_backend(check_obj).validate(
             check_obj=check_obj,

diff --git a/pandera/api/pyspark/model.py b/pandera/api/pyspark/model.py
@@ -366,7 +366,7 @@
         Similar to inspect.get_members but bypass descriptors __get__.
         """
         bases = inspect.getmro(cls)[:-1]  # bases -> DataFrameModel -> object
-        attrs = {}
+        attrs: Dict[str, Any] = {}
         for base in reversed(bases):
             if issubclass(base, DataFrameModel):
                 attrs.update(base.__dict__)

diff --git a/pandera/api/pyspark/model_components.py b/pandera/api/pyspark/model_components.py
@@ -43,7 +43,7 @@ def _to_schema_component(
         self,
         dtype: PySparkDtypeInputTypes,
         component: Type[SchemaComponent],
-        checks: CheckArg = None,
+        checks: Optional[CheckArg] = None,
         **kwargs: Any,
     ) -> SchemaComponent:
         if self.dtype_kwargs:
@@ -54,9 +54,9 @@ def _to_schema_component(
     def to_column(
         self,
         dtype: PySparkDtypeInputTypes,
-        checks: CheckArg = None,
+        checks: Optional[CheckArg] = None,
         required: bool = True,
-        name: str = None,
+        name: Optional[str] = None,
     ) -> Column:
         """Create a schema_components.Column from a field."""
         return self._to_schema_component(
@@ -92,15 +92,15 @@ def properties(self) -> Dict[str, Any]:
 
 def Field(
     *,
-    eq: Any = None,
-    ne: Any = None,
-    gt: Any = None,
-    ge: Any = None,
-    lt: Any = None,
-    le: Any = None,
-    in_range: Dict[str, Any] = None,
-    isin: Iterable = None,
-    notin: Iterable = None,
+    eq: Optional[Any] = None,
+    ne: Optional[Any] = None,
+    gt: Optional[Any] = None,
+    ge: Optional[Any] = None,
+    lt: Optional[Any] = None,
+    le: Optional[Any] = None,
+    in_range: Optional[Dict[str, Any]] = None,
+    isin: Optional[Iterable] = None,
+    notin: Optional[Iterable] = None,
     str_contains: Optional[str] = None,
     str_endswith: Optional[str] = None,
     str_length: Optional[Dict[str, Any]] = None,
@@ -112,8 +112,8 @@ def Field(
     regex: bool = False,
     ignore_na: bool = True,
     raise_warning: bool = False,
-    n_failure_cases: int = None,
-    alias: Any = None,
+    n_failure_cases: Optional[int] = None,
+    alias: Optional[Any] = None,
     check_name: Optional[bool] = None,
     dtype_kwargs: Optional[Dict[str, Any]] = None,
     title: Optional[str] = None,

diff --git a/pandera/api/pyspark/types.py b/pandera/api/pyspark/types.py
@@ -1,7 +1,7 @@
 """Utility functions for pyspark validation."""
 
 from functools import lru_cache
-from typing import List, NamedTuple, Tuple, Type, Union
+from typing import List, NamedTuple, Tuple, Type, Union, Any
 from numpy import bool_ as np_bool
 from packaging import version
 
@@ -92,7 +92,7 @@
     )
 
 
-def is_table(obj):
+def is_table(obj: Any) -> bool:
     """Verifies whether an object is table-like.
 
     Where a table is a 2-dimensional data matrix of rows and columns, which
@@ -101,6 +101,6 @@
     return isinstance(obj, supported_types().table_types)
 
 
-def is_bool(x):
+def is_bool(x: Any) -> bool:
     """Verifies whether an object is a boolean type."""
     return isinstance(x, (bool, type(pst.BooleanType()), np_bool))
diff --git a/pandera/backends/polars/builtin_checks.py b/pandera/backends/polars/builtin_checks.py
@@ -1,6 +1,7 @@
 """Built-in checks for polars."""
 
 import re
+from collections.abc import Collection
 from typing import Any, Iterable, Optional, TypeVar, Union
 
 import polars as pl
@@ -140,7 +141,7 @@
 @register_builtin_check(
     error="isin({allowed_values})",
 )
-def isin(data: PolarsData, allowed_values: Iterable) -> pl.LazyFrame:
+def isin(data: PolarsData, allowed_values: Collection) -> pl.LazyFrame:
     """Ensure only allowed values occur within a series.
 
     This checks whether all elements of a :class:`polars.Series`
@@ -160,7 +161,7 @@
 @register_builtin_check(
     error="notin({forbidden_values})",
 )
-def notin(data: PolarsData, forbidden_values: Iterable) -> pl.LazyFrame:
+def notin(data: PolarsData, forbidden_values: Collection) -> pl.LazyFrame:
     """Ensure some defined values don't occur within a series.
 
     Like :meth:`Check.isin` this check operates on single characters if

diff --git a/pandera/backends/pyspark/checks.py b/pandera/backends/pyspark/checks.py
@@ -56,8 +56,8 @@ def preprocess(
     def apply(
         self,
         check_obj: Union[DataFrameTypes, is_table],
-        column_name: str = None,
-        kwargs: dict = None,
+        column_name: Optional[str] = None,
+        kwargs: Optional[Dict] = None,
     ):
         if column_name and kwargs:
             check_obj_and_col_name = PysparkDataframeColumnObject(

diff --git a/pandera/backends/pyspark/column.py b/pandera/backends/pyspark/column.py
@@ -66,7 +66,7 @@ def validate(
         random_state: Optional[int] = None,  # pylint: disable=unused-argument
         lazy: bool = False,
         inplace: bool = False,
-        error_handler: ErrorHandler = None,
+        error_handler: Optional[ErrorHandler] = None,
     ):
         # pylint: disable=too-many-locals
         check_obj = self.preprocess(check_obj, inplace)

diff --git a/pandera/backends/pyspark/components.py b/pandera/backends/pyspark/components.py
@@ -30,7 +30,7 @@ def validate(
         random_state: Optional[int] = None,
         lazy: bool = False,
         inplace: bool = False,
-        error_handler: ErrorHandler = None,
+        error_handler: Optional[ErrorHandler] = None,
     ) -> DataFrame:
         """Validation backend implementation for pyspark dataframe columns.."""
 

diff --git a/pandera/backends/pyspark/container.py b/pandera/backends/pyspark/container.py
@@ -115,7 +115,7 @@ def validate(
         random_state: Optional[int] = None,
         lazy: bool = False,
         inplace: bool = False,
-        error_handler: ErrorHandler = None,
+        error_handler: Optional[ErrorHandler] = None,
     ):
         """
         Parse and validate a check object, returning type-coerced and validated
@@ -401,7 +401,7 @@ def coerce_dtype(
         check_obj: DataFrame,
         *,
         schema=None,
-        error_handler: ErrorHandler = None,
+        error_handler: Optional[ErrorHandler] = None,
     ):
         """Coerces check object to the expected type."""
         assert schema is not None, "The `schema` argument must be provided."
@@ -508,7 +508,7 @@ def unique(
         check_obj: DataFrame,
         *,
         schema=None,
-        error_handler: ErrorHandler = None,
+        error_handler: Optional[ErrorHandler] = None,
     ):
         """Check uniqueness in the check object."""
         assert schema is not None, "The `schema` argument must be provided."

diff --git a/pandera/backends/pyspark/decorators.py b/pandera/backends/pyspark/decorators.py
@@ -4,7 +4,7 @@
 import logging
 import warnings
 from contextlib import contextmanager
-from typing import List, Type
+from typing import List, Type, Optional
 
 from pyspark.sql import DataFrame
 
@@ -17,7 +17,7 @@
 
 
 def register_input_datatypes(
-    acceptable_datatypes: List[Type[PysparkDefaultTypes]] = None,
+    acceptable_datatypes: Optional[List[Type[PysparkDefaultTypes]]] = None,
 ):
     """
     This decorator is used to register the input datatype for the check.