Skip to content

Commit

Permalink
masked attributes
Browse files Browse the repository at this point in the history
  • Loading branch information
sdaza committed Jan 3, 2025
1 parent 22f9dd3 commit 3e0e3ac
Show file tree
Hide file tree
Showing 2 changed files with 51 additions and 45 deletions.
90 changes: 45 additions & 45 deletions experiment_utils/estimators.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,14 +21,14 @@ def __init__(self, treatment_col: str, instrument_col: Optional[str] = None,
min_ps_score: float = 0.05, max_ps_score: float = 0.95,
polynomial_ipw: bool = False) -> None:

self.logger = get_logger('Estimators')
self.treatment_col = treatment_col
self.instrument_col = instrument_col
self.target_ipw_effect = target_ipw_effect
self.alpha = alpha
self.max_ps_score = max_ps_score
self.min_ps_score = min_ps_score
self.polynomial_ipw = polynomial_ipw
self._logger = get_logger('Estimators')
self._treatment_col = treatment_col
self._instrument_col = instrument_col
self._target_ipw_effect = target_ipw_effect
self._alpha = alpha
self._max_ps_score = max_ps_score
self._min_ps_score = min_ps_score
self._polynomial_ipw = polynomial_ipw

def __create_formula(self, outcome_variable: str, covariates: Optional[List[str]], model_type: str = 'regression') -> str:
"""
Expand All @@ -44,8 +44,8 @@ def __create_formula(self, outcome_variable: str, covariates: Optional[List[str]
"""

formula_dict = {
'regression': f"{outcome_variable} ~ 1 + {self.treatment_col}",
'iv': f"{outcome_variable} ~ 1 + [{self.treatment_col} ~ {self.instrument_col}]"
'regression': f"{outcome_variable} ~ 1 + {self._treatment_col}",
'iv': f"{outcome_variable} ~ 1 + [{self._treatment_col} ~ {self._instrument_col}]"
}
if covariates:
standardized_covariates = [f"z_{covariate}" for covariate in covariates]
Expand Down Expand Up @@ -81,23 +81,23 @@ def linear_regression(self, data: pd.DataFrame, outcome_variable: str, covariate
model = smf.ols(formula, data=data)
results = model.fit(cov_type="HC3")

coefficient = results.params[self.treatment_col]
coefficient = results.params[self._treatment_col]
intercept = results.params["Intercept"]
relative_effect = coefficient / intercept
standard_error = results.bse[self.treatment_col]
pvalue = results.pvalues[self.treatment_col]
standard_error = results.bse[self._treatment_col]
pvalue = results.pvalues[self._treatment_col]

return {
"outcome": outcome_variable,
"treated_units": data[self.treatment_col].sum(),
"control_units": data[self.treatment_col].count() - data[self.treatment_col].sum(),
"treated_units": data[self._treatment_col].sum(),
"control_units": data[self._treatment_col].count() - data[self._treatment_col].sum(),
"control_value": intercept,
"treatment_value": intercept + coefficient,
"absolute_effect": coefficient,
"relative_effect": relative_effect,
"standard_error": standard_error,
"pvalue": pvalue,
"stat_significance": 1 if pvalue < self.alpha else 0
"stat_significance": 1 if pvalue < self._alpha else 0
}

def weighted_least_squares(self, data: pd.DataFrame, outcome_variable: str,
Expand Down Expand Up @@ -132,23 +132,23 @@ def weighted_least_squares(self, data: pd.DataFrame, outcome_variable: str,
)
results = model.fit(cov_type="HC3")

coefficient = results.params[self.treatment_col]
coefficient = results.params[self._treatment_col]
intercept = results.params["Intercept"]
relative_effect = coefficient / intercept
standard_error = results.bse[self.treatment_col]
pvalue = results.pvalues[self.treatment_col]
standard_error = results.bse[self._treatment_col]
pvalue = results.pvalues[self._treatment_col]

return {
"outcome": outcome_variable,
"treated_units": data[self.treatment_col].sum().astype(int),
"control_units": (data[self.treatment_col].count() - data[self.treatment_col].sum()).astype(int),
"treated_units": data[self._treatment_col].sum().astype(int),
"control_units": (data[self._treatment_col].count() - data[self._treatment_col].sum()).astype(int),
"control_value": intercept,
"treatment_value": intercept + coefficient,
"absolute_effect": coefficient,
"relative_effect": relative_effect,
"standard_error": standard_error,
"pvalue": pvalue,
"stat_significance": 1 if pvalue < self.alpha else 0
"stat_significance": 1 if pvalue < self._alpha else 0
}

def iv_regression(self, data: pd.DataFrame, outcome_variable: str, covariates: Optional[List[str]] = None) -> Dict[str, Union[str, int, float]]:
Expand All @@ -173,30 +173,30 @@ def iv_regression(self, data: pd.DataFrame, outcome_variable: str, covariates: O
- "pvalue" (float): The p-value of the treatment coefficient.
- "stat_significance" (int): Indicator of statistical significance (1 if p-value < alpha, else 0).
"""
if not self.instrument_col:
log_and_raise_error(self.logger, "Instrument column must be specified for IV adjustment")
if not self._instrument_col:
log_and_raise_error(self._logger, "Instrument column must be specified for IV adjustment")

formula = self.__create_formula(outcome_variable=outcome_variable, model_type='iv', covariates=covariates)
model = IV2SLS.from_formula(formula, data)
results = model.fit(cov_type='robust')

coefficient = results.params[self.treatment_col]
coefficient = results.params[self._treatment_col]
intercept = results.params["Intercept"]
relative_effect = coefficient / intercept
standard_error = results.std_errors[self.treatment_col]
pvalue = results.pvalues[self.treatment_col]
standard_error = results.std_errors[self._treatment_col]
pvalue = results.pvalues[self._treatment_col]

return {
"outcome": outcome_variable,
"treated_units": data[self.treatment_col].sum().astype(int),
"control_units": (data[self.treatment_col].count() - data[self.treatment_col].sum()).astype(int),
"treated_units": data[self._treatment_col].sum().astype(int),
"control_units": (data[self._treatment_col].count() - data[self._treatment_col].sum()).astype(int),
"control_value": intercept,
"treatment_value": intercept + coefficient,
"absolute_effect": coefficient,
"relative_effect": relative_effect,
"standard_error": standard_error,
"pvalue": pvalue,
"stat_significance": 1 if pvalue < self.alpha else 0
"stat_significance": 1 if pvalue < self._alpha else 0
}

def ipw_logistic(self, data: pd.DataFrame, covariates: List[str], penalty: str = 'l2', C: float = 1.0, max_iter: int = 5000) -> pd.DataFrame:
Expand Down Expand Up @@ -224,23 +224,23 @@ def ipw_logistic(self, data: pd.DataFrame, covariates: List[str], penalty: str =

logistic_model = LogisticRegression(penalty=penalty, C=C, max_iter=max_iter)

if self.polynomial_ipw:
if self._polynomial_ipw:
poly = PolynomialFeatures()
X = poly.fit_transform(data[covariates])
feature_names = poly.get_feature_names_out(covariates)
X = pd.DataFrame(X, columns=feature_names)
else:
X = data[covariates]

y = data[self.treatment_col]
y = data[self._treatment_col]
logistic_model.fit(X, y)

if not logistic_model.n_iter_[0] < logistic_model.max_iter:
self.logger.warning("Logistic regression model did not converge. Consider increasing the number of iterations or adjusting other parameters.")
self._logger.warning("Logistic regression model did not converge. Consider increasing the number of iterations or adjusting other parameters.")

data['propensity_score'] = logistic_model.predict_proba(X)[:, 1]
data['propensity_score'] = np.minimum(self.max_ps_score, data['propensity_score'])
data['propensity_score'] = np.maximum(self.min_ps_score, data['propensity_score'])
data['propensity_score'] = np.minimum(self._max_ps_score, data['propensity_score'])
data['propensity_score'] = np.maximum(self._min_ps_score, data['propensity_score'])

data = self.__calculate_stabilized_weights(data)
return data
Expand All @@ -258,14 +258,14 @@ def ipw_xgboost(self, data: pd.DataFrame, covariates: List[str]) -> pd.DataFrame
"""

X = data[covariates]
y = data[self.treatment_col]
y = data[self._treatment_col]

xgb_model = XGBClassifier(eval_metric='logloss')
xgb_model.fit(X, y)

data['propensity_score'] = xgb_model.predict_proba(X)[:, 1]
data['propensity_score'] = np.minimum(self.max_ps_score, data['propensity_score'])
data['propensity_score'] = np.maximum(self.min_ps_score, data['propensity_score'])
data['propensity_score'] = np.minimum(self._max_ps_score, data['propensity_score'])
data['propensity_score'] = np.maximum(self._min_ps_score, data['propensity_score'])
data = self.__calculate_stabilized_weights(data)

return data
Expand All @@ -285,24 +285,24 @@ def __calculate_stabilized_weights(self, data: pd.DataFrame) -> pd.DataFrame:
Data with the calculated stabilized weights
"""
num_units = data.shape[0]
p_treatment = sum(data[self.treatment_col]) / num_units
p_treatment = sum(data[self._treatment_col]) / num_units

data["ips_stabilized_weight"] = data[self.treatment_col] / data[
data["ips_stabilized_weight"] = data[self._treatment_col] / data[
"propensity_score"
] * p_treatment + (1 - data[self.treatment_col]) / (
] * p_treatment + (1 - data[self._treatment_col]) / (
1 - data["propensity_score"]
) * (
1 - p_treatment
)

data["tips_stabilized_weight"] = data[self.treatment_col] * p_treatment + (
1 - data[self.treatment_col]
data["tips_stabilized_weight"] = data[self._treatment_col] * p_treatment + (
1 - data[self._treatment_col]
) * data["propensity_score"] / (1 - data["propensity_score"]) * (1 - p_treatment)

data["cips_stabilized_weight"] = data[self.treatment_col] * (
data["cips_stabilized_weight"] = data[self._treatment_col] * (
1 - data["propensity_score"]
) / data["propensity_score"] * p_treatment + (
1 - data[self.treatment_col]
1 - data[self._treatment_col]
) * (
1 - p_treatment
)
Expand Down
6 changes: 6 additions & 0 deletions experiment_utils/experiment_analyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,12 @@ class ExperimentAnalyzer:
Target IPW effect (ATT, ATE, ATC), by default "ATT"
propensity_score_method : str, optional
Propensity score method (logistic, xgboost), by default 'logistic'
min_ps_score : float, optional
Minimum propensity score, by default 0.05
max_ps_score : float, optional
Maximum propensity score, by default 0.95
polynomial_ipw : bool, optional
Use polynomial and interaction features for IPW, by default True
assess_overlap : bool, optional
Assess overlap between treatment and control groups (slow) when using IPW to adjust covariates, by default False
instrument_col : str, optional
Expand Down

0 comments on commit 3e0e3ac

Please sign in to comment.