Nice notebook for ERUPT std estimates

py-why · Nov 29, 2024 · a02d290 · a02d290
1 parent e5d26ce
commit a02d290
Showing 1 changed file with 70 additions and 66 deletions.
diff --git a/notebooks/ERUPT basics.ipynb b/notebooks/ERUPT basics.ipynb
@@ -103,45 +103,45 @@
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>0</th>\n",
-       "      <td>0.062077</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1.201419</td>\n",
+       "      <td>0.452636</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1.684484</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
-       "      <td>0.485400</td>\n",
+       "      <td>0.380215</td>\n",
        "      <td>0</td>\n",
-       "      <td>0.683959</td>\n",
+       "      <td>0.745268</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
-       "      <td>0.888416</td>\n",
+       "      <td>0.584036</td>\n",
        "      <td>1</td>\n",
-       "      <td>1.462598</td>\n",
+       "      <td>0.762300</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3</th>\n",
-       "      <td>0.747222</td>\n",
+       "      <td>0.505191</td>\n",
        "      <td>0</td>\n",
-       "      <td>0.813349</td>\n",
+       "      <td>1.425354</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>4</th>\n",
-       "      <td>0.903443</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0.718212</td>\n",
+       "      <td>0.384110</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1.834628</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
        "</div>"
       ],
       "text/plain": [
        "          X  T1        Y1\n",
-       "0  0.062077   1  1.201419\n",
-       "1  0.485400   0  0.683959\n",
-       "2  0.888416   1  1.462598\n",
-       "3  0.747222   0  0.813349\n",
-       "4  0.903443   0  0.718212"
+       "0  0.452636   0  1.684484\n",
+       "1  0.380215   0  0.745268\n",
+       "2  0.584036   1  0.762300\n",
+       "3  0.505191   0  1.425354\n",
+       "4  0.384110   1  1.834628"
       ]
      },
      "execution_count": 2,
@@ -216,65 +216,65 @@
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>0</th>\n",
-       "      <td>0.062077</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1.201419</td>\n",
-       "      <td>0.531039</td>\n",
+       "      <td>0.452636</td>\n",
        "      <td>0</td>\n",
-       "      <td>0.468961</td>\n",
-       "      <td>1.901956</td>\n",
+       "      <td>1.684484</td>\n",
+       "      <td>0.726318</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0.273682</td>\n",
+       "      <td>0.904259</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
-       "      <td>0.485400</td>\n",
+       "      <td>0.380215</td>\n",
        "      <td>0</td>\n",
-       "      <td>0.683959</td>\n",
-       "      <td>0.742700</td>\n",
+       "      <td>0.745268</td>\n",
+       "      <td>0.690108</td>\n",
        "      <td>1</td>\n",
-       "      <td>0.742700</td>\n",
-       "      <td>1.386464</td>\n",
+       "      <td>0.690108</td>\n",
+       "      <td>1.930383</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
-       "      <td>0.888416</td>\n",
+       "      <td>0.584036</td>\n",
        "      <td>1</td>\n",
-       "      <td>1.462598</td>\n",
-       "      <td>0.944208</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0.055792</td>\n",
-       "      <td>0.035300</td>\n",
+       "      <td>0.762300</td>\n",
+       "      <td>0.792018</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0.792018</td>\n",
+       "      <td>0.959608</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3</th>\n",
-       "      <td>0.747222</td>\n",
+       "      <td>0.505191</td>\n",
        "      <td>0</td>\n",
-       "      <td>0.813349</td>\n",
-       "      <td>0.873611</td>\n",
+       "      <td>1.425354</td>\n",
+       "      <td>0.752596</td>\n",
        "      <td>1</td>\n",
-       "      <td>0.873611</td>\n",
-       "      <td>1.511816</td>\n",
+       "      <td>0.752596</td>\n",
+       "      <td>1.017777</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>4</th>\n",
-       "      <td>0.903443</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0.718212</td>\n",
-       "      <td>0.951722</td>\n",
+       "      <td>0.384110</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1.834628</td>\n",
+       "      <td>0.692055</td>\n",
        "      <td>1</td>\n",
-       "      <td>0.951722</td>\n",
-       "      <td>2.741387</td>\n",
+       "      <td>0.692055</td>\n",
+       "      <td>2.374030</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
        "</div>"
       ],
       "text/plain": [
        "          X  T1        Y1         p  T2  p_of_actual        Y2\n",
-       "0  0.062077   1  1.201419  0.531039   0     0.468961  1.901956\n",
-       "1  0.485400   0  0.683959  0.742700   1     0.742700  1.386464\n",
-       "2  0.888416   1  1.462598  0.944208   0     0.055792  0.035300\n",
-       "3  0.747222   0  0.813349  0.873611   1     0.873611  1.511816\n",
-       "4  0.903443   0  0.718212  0.951722   1     0.951722  2.741387"
+       "0  0.452636   0  1.684484  0.726318   0     0.273682  0.904259\n",
+       "1  0.380215   0  0.745268  0.690108   1     0.690108  1.930383\n",
+       "2  0.584036   1  0.762300  0.792018   1     0.792018  0.959608\n",
+       "3  0.505191   0  1.425354  0.752596   1     0.752596  1.017777\n",
+       "4  0.384110   1  1.834628  0.692055   1     0.692055  2.374030"
       ]
      },
      "execution_count": 3,
@@ -319,24 +319,26 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Average outcome of the actual biased assignment experiment: 1.4103322213139171\n",
-      "Estimated outcome of random assignment: 1.2642961174989582\n",
-      "Average outcome of the actual random assignment experiment: 1.2553285100413232\n"
+      "Average outcome of the actual biased assignment experiment: 1.411675477573636\n",
+      "Estimated outcome of random assignment: 1.251567372523789\n",
+      "95% confidence interval for estimated outcome: 1.2311928820519622 1.2719418629956158\n",
+      "Average outcome of the actual random assignment experiment: 1.2559621877416332\n"
      ]
     }
    ],
    "source": [
-    "from causaltune.score.erupt import erupt\n",
+    "from causaltune.score.erupt_core import erupt_with_std\n",
     "\n",
     "# Let's use data from biased assignment experiment to estimate the average effect of fully random assignment\n",
-    "est = erupt(actual_propensity=df[\"p_of_actual\"], \n",
+    "est, std = erupt_with_std(actual_propensity=df[\"p_of_actual\"], \n",
     "                     actual_treatment=df[\"T2\"],\n",
     "                     actual_outcome=df[\"Y2\"],\n",
     "                     hypothetical_policy=df[\"T1\"])\n",
     "\n",
     "\n",
     "print(\"Average outcome of the actual biased assignment experiment:\", df[\"Y2\"].mean())\n",
     "print(\"Estimated outcome of random assignment:\", est)\n",
+    "print(\"95% confidence interval for estimated outcome:\", est-2*std, est + 2*std)\n",
     "print(\"Average outcome of the actual random assignment experiment:\",  df[\"Y1\"].mean())"
    ]
   },
@@ -358,9 +360,10 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Average outcome of the actual random assignment experiment: 1.2553285100413232\n",
-      "Estimated outcome of biased assignment: 1.4128941840680447\n",
-      "Average outcome of the actual biased assignment experiment: 1.4103322213139171\n"
+      "Average outcome of the actual random assignment experiment: 1.2559621877416332\n",
+      "Estimated outcome of biased assignment: 1.4147647990746988\n",
+      "Confidence interval for estimated outcome: 1.398423601541284 1.4311059966081134\n",
+      "Average outcome of the actual biased assignment experiment: 1.411675477573636\n"
      ]
     }
    ],
@@ -369,31 +372,32 @@
     "\n",
     "# Let's use data from biased assignment experiment to estimate the average effect of fully random assignment\n",
     "hypothetical_policy = df[\"T2\"]\n",
-    "est = erupt(actual_propensity=0.5*pd.Series(np.ones(len(df))), \n",
+    "est, std = erupt_with_std(actual_propensity=0.5*pd.Series(np.ones(len(df))), \n",
     "                     actual_treatment=df[\"T1\"],\n",
     "                     actual_outcome=df[\"Y1\"],\n",
     "                     hypothetical_policy= df[\"T2\"])\n",
     "\n",
     "print(\"Average outcome of the actual random assignment experiment:\", df[\"Y1\"].mean())\n",
     "print(\"Estimated outcome of biased assignment:\", est)\n",
+    "print(\"Confidence interval for estimated outcome:\", est-2*std, est + 2*std)\n",
     "print(\"Average outcome of the actual biased assignment experiment:\",  df[\"Y2\"].mean())"
    ]
   },
   {
    "cell_type": "markdown",
-   "id": "a54530bf",
+   "id": "f724dbc3",
    "metadata": {},
    "source": [
-    "For more details on the math behind ERUPT, consult [Hitsch and Misra (2018)](https://papers.ssrn.com/sol3/papers.cfm?abstract_id=3111957), who call it policy value. Note also that we assume that treatment takes integer values from 0 to n."
+    "As you can see, the actual outcome is well within the confidence interval estimated by ERUPT"
    ]
   },
   {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "8e99cd00-f2af-4cfa-bd79-5c2a4cbc1828",
+   "cell_type": "markdown",
+   "id": "a54530bf",
    "metadata": {},
-   "outputs": [],
-   "source": []
+   "source": [
+    "For more details on the math behind ERUPT, consult [Hitsch and Misra (2018)](https://papers.ssrn.com/sol3/papers.cfm?abstract_id=3111957), who call it policy value. Note also that we assume that treatment takes integer values from 0 to n."
+   ]
   }
  ],
  "metadata": {