-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathtest.py
113 lines (95 loc) · 5.64 KB
/
test.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
import inps
import pandas as pd
import numpy as np
from numpy.random import default_rng
from xgboost import XGBRegressor, XGBClassifier
from sklearn.naive_bayes import BernoulliNB
from sklearn.neural_network import MLPRegressor
rng = default_rng(0)
pop_size = 10000
n = 1000
N = 2000
np_sample = rng.standard_normal(n * 5).reshape(-1, 5)
p_sample = rng.standard_normal(N * 3).reshape(-1, 3)
population = rng.standard_normal(pop_size * 3).reshape(-1, 3)
weights = [pop_size / N * 0.8] * int(N/2) + [pop_size / N * 1.2] * int(N/2)
def to_category(num_series):
return pd.Series(np.where(num_series > 0, "Yes", "No"), dtype = "category", copy = False)
np_sample = pd.DataFrame(np_sample, columns = ["A", "B", "cat", "target", "target_cat"], copy = False)
p_sample = pd.DataFrame(p_sample, columns = ["A", "B", "cat"], copy = False)
population = pd.DataFrame(population, columns = ["A", "B", "cat"], copy = False)
np_sample["target_cat"] = to_category(np_sample["target_cat"])
np_sample["cat"] = to_category(np_sample["cat"])
p_sample["cat"] = to_category(p_sample["cat"])
population["cat"] = to_category(population["cat"])
p_sample["weights"] = weights
print(np_sample)
print(p_sample)
population_totals = pd.Series({"A": 10, "B": 5})
print(population_totals)
calibration_weights = inps.calibration_weights(np_sample, population_totals, population_size = pop_size)
print(calibration_weights)
calibration_weights2 = inps.calibration_weights(p_sample, population_totals, weights_column = "weights")
print(calibration_weights2)
mean_estimation = inps.estimation(np_sample["target"], calibration_weights)
mean_interval = inps.confidence_interval(np_sample["target"], calibration_weights)
print(mean_estimation)
print(mean_interval)
proportion_estimation = inps.estimation(np_sample["target_cat"] == "Yes", calibration_weights)
proportion_interval = inps.confidence_interval(np_sample["target_cat"] == "Yes", calibration_weights)
print(proportion_estimation)
print(proportion_interval)
psa_weights = inps.psa_weights(np_sample, p_sample, pop_size)
psa_weights = inps.psa_weights(np_sample, p_sample, pop_size, weights_column = "weights")
psa_weights = inps.psa_weights(np_sample, p_sample, pop_size, weights_column = "weights", covariates = ["A", "B", "cat"])
psa_weights2 = inps.psa_weights(np_sample, p_sample, pop_size, weights_column = "weights", model = XGBClassifier(enable_categorical = True, tree_method = "hist"))
psa_weights3 = inps.psa_weights(np_sample, p_sample, pop_size, weights_column = "weights", model = inps.make_preprocess_estimator(BernoulliNB()))
print(psa_weights["np"])
print(psa_weights["p"])
mean_estimation = inps.estimation(np_sample["target"], psa_weights["np"])
mean_interval = inps.confidence_interval(np_sample["target"], psa_weights["np"])
print(mean_estimation)
print(mean_interval)
proportion_estimation = inps.estimation(np_sample["target_cat"] == "Yes", psa_weights["np"])
proportion_interval = inps.confidence_interval(np_sample["target_cat"] == "Yes", psa_weights["np"])
print(proportion_estimation)
print(proportion_interval)
matching_values = inps.matching_values(np_sample, p_sample, "target")
cat_matching_values = inps.matching_values(np_sample, p_sample, "target_cat", "Yes")
matching_values = inps.matching_values(np_sample, p_sample, "target", covariates = ["A", "B", "cat"])
matching_values2 = inps.matching_values(np_sample, p_sample, "target", model = XGBRegressor(enable_categorical = True, tree_method = "hist"))
matching_values3 = inps.matching_values(np_sample, p_sample, "target", model = inps.make_preprocess_estimator(MLPRegressor()))
print(matching_values["p"])
print(matching_values["np"])
print(cat_matching_values["p"])
print(cat_matching_values["np"])
mean_estimation = inps.estimation(matching_values["p"], p_sample["weights"])
mean_interval = inps.confidence_interval(matching_values["p"], p_sample["weights"])
print(mean_estimation)
print(mean_interval)
proportion_estimation = inps.estimation(cat_matching_values["p"], p_sample["weights"])
proportion_estimation = inps.confidence_interval(cat_matching_values["p"], p_sample["weights"])
print(proportion_estimation)
print(proportion_interval)
doubly_robust_estimation = inps.doubly_robust_estimation(np_sample, p_sample, "target", covariates = ["A", "B", "cat"])
cat_doubly_robust_estimation = inps.doubly_robust_estimation(np_sample, p_sample, "target_cat", "Yes", covariates = ["A", "B", "cat"])
doubly_robust_estimation2 = inps.doubly_robust_estimation(np_sample, p_sample, "target", psa_model = XGBClassifier(enable_categorical = True, tree_method = "hist"), matching_model = XGBRegressor(enable_categorical = True, tree_method = "hist"))
print(doubly_robust_estimation)
print(cat_doubly_robust_estimation)
training_values = inps.training_values(np_sample, p_sample, "target", psa_model = XGBClassifier(enable_categorical = True, tree_method = "hist"), matching_model = XGBRegressor(enable_categorical = True, tree_method = "hist"))
print(training_values["p"])
kw_weights = inps.kw_weights(np_sample, p_sample, pop_size, weights_column = "weights", covariates = ["A", "B", "cat"])
print(kw_weights)
proportion_estimation = inps.estimation(np_sample["target_cat"] == "Yes", kw_weights)
print(proportion_estimation)
print(population)
imputed_values = inps.training_values(np_sample, population, "target")
cat_imputed_values = inps.training_values(np_sample, population, "target_cat", "Yes")
mean_estimation = inps.estimation(imputed_values["p"])
mean_interval = inps.confidence_interval(imputed_values["p"])
print(mean_estimation)
print(mean_interval)
proportion_estimation = inps.estimation(cat_imputed_values["p"])
proportion_interval = inps.confidence_interval(cat_imputed_values["p"])
print(proportion_estimation)
print(proportion_interval)