Merge pull request #19 from VeriFIT/eval-update

Eval: update jupyter script
VeriFIT · Aug 30, 2024 · 6e72a16 · 6e72a16
2 parents ad94605 + 0a1717d
commit 6e72a16
Showing 1 changed file with 76 additions and 20 deletions.
diff --git a/eval/FinalInterpretation.ipynb b/eval/FinalInterpretation.ipynb
@@ -34,7 +34,6 @@
     "warnings.simplefilter(action='ignore', category=UserWarning)\n",
     "\n",
     "# Short hack for display of images in jupyter notebook\n",
-    "\n",
     "from IPython.display import display, HTML\n",
     "display(HTML(\"<style>div.output_area pre {white-space: pre;}</style>\"))"
    ]
@@ -85,7 +84,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "DATA_SOURCE = \"../results/data/experiments-test-awali\""
+    "DATA_SOURCE = \"../results/data/tacas24\""
    ]
   },
   {
@@ -96,6 +95,14 @@
     "## Creating DataFrame"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "id": "27e19cac",
+   "metadata": {},
+   "source": [
+    "The following functions `to_operation`, `to_tool_and_lang` and `to_bench` serve to provide classification and translation of particular parts of results to notation used in paper (i.e. tools correspond to their name in paper, benchmarks to their naming in paper, and operations are unified, as we used different notations in different tools)."
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -232,14 +239,33 @@
     "    elif val == 'ERR':\n",
     "        return 'ERR'\n",
     "    elif val == 'MISSING':\n",
-    "        return numpy.NAN\n",
-    "        #return TIMEOUT\n",
+    "        #return numpy.NAN\n",
+    "        return TIMEOUT\n",
     "    elif val == 'TIMEOUT' or val == 'TO':\n",
     "        return TIMEOUT\n",
     "    print(f\"{val} unhandled\")\n",
     "    assert False"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "id": "9b8273ba",
+   "metadata": {},
+   "source": [
+    "Master function for creating `pandas.DataFrame` from directory containing `csv` files.\n",
+    "\n",
+    "The `.csv` files are in the following structure: `instance;tool1-op1;tool1-op2;...tool1-opn;...toolm-opn`\n",
+    "\n",
+    "This is transformed into a following dataframe: `bench | input | tool | lang | op | time` Where:\n",
+    "\n",
+    "- `bench` corresponds to classification of benchmark used in paper (returned by `to_bench` function);\n",
+    "- `input` corresponds to instance of the benchmark, i.e. the input automata used for evaluation;\n",
+    "- `tool` corresponds to naming of the tool (returned by `to_tool_and_lang`);\n",
+    "- `lang` corresponds to language of the tool (returned by `to_tool_and_lang`);\n",
+    "- `op` corresponds to individual operations, such as intersection or union (returned by to_operation); and;\n",
+    "- `time` corresponds to the value of the operation: either float value; `TO`, if timeout happened; `MISSING` if the instance was missing some automata; `ERR` if error happened."
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -266,15 +292,24 @@
     "    (\"mona\", \"noodler-union\"),\n",
     "    (\"mona\", \"noodler-compl\"),\n",
     "    (\"mata\", \"param-diff\"),\n",
+    "    (\"vata\", \"param-diff\"),\n",
     "    (\"awali\", \"param-diff\"),\n",
     "    (\"brics\", \"param-diff\"),\n",
+    "    \n",
+    "    (\"mata\", \"noodler-concat\"),\n",
+    "    (\"vata\", \"noodler-concat\"),\n",
+    "    (\"brics\", \"noodler-concat\"),\n",
+    "    (\"awali\", \"noodler-concat\"),\n",
     ")\n",
     "ignored_tools = (\n",
     "    '(py)mata', 'mona', 'automatalib-old', 'mata-old', 'automata-old', 'automata.net-old'\n",
     ")\n",
     "\n",
+    "vata_trims = 0\n",
+    "\n",
     "def to_pandas(src_dir):\n",
     "    global TIMEOUT\n",
+    "    global vata_trims\n",
     "    data = []\n",
     "    for csv_source in progressbar.progressbar(os.listdir(src_dir)):\n",
     "        if csv_source.endswith('.csv'):\n",
@@ -298,10 +333,12 @@
     "                            if (tool, bench) in ignores:\n",
     "                                continue\n",
     "                            data.append([bench, inputs, tool, lang, op, val])\n",
+    "                        \n",
     "                except StopIteration:\n",
     "                    pass\n",
     "    return pandas.DataFrame(data, columns=HEADERS)\n",
-    "df = to_pandas(DATA_SOURCE)"
+    "df = to_pandas(DATA_SOURCE)\n",
+    "    "
    ]
   },
   {
@@ -334,7 +371,7 @@
     "        return float(vals[0])\n",
     "    except:\n",
     "        print(str(vals[0]))\n",
-    "        return 0 if str(vals[0]) not in ('ERR', 'TIMEOUT') else TIMEOUT\n",
+    "        return 0 if str(vals[0]) not in ('ERR', 'TIMEOUT', 'MISSING') else TIMEOUT\n",
     "for grp, series in df.groupby(['bench', 'input', 'tool']):\n",
     "    data['bench'].append(grp[0])\n",
     "    data['input'].append(grp[1])\n",
@@ -347,7 +384,7 @@
     "        print(f\"{list(series.items())=}\")\n",
     "        print(f\"{pyco_runtime=}\")\n",
     "        assert False\n",
-    "    if pyco_runtime[0] == TIMEOUT or pyco_runtime[0] == 'ERR':\n",
+    "    if pyco_runtime[0] == TIMEOUT or pyco_runtime[0] == 'ERR' or pyco_runtime[0] == 'MISSING':\n",
     "        data['time'].append(TIMEOUT)\n",
     "        continue\n",
     "        \n",
@@ -408,7 +445,7 @@
     "        print(f\"{list(series.items())=}\")\n",
     "        print(f\"{pyco_runtime=}\")\n",
     "        assert False\n",
-    "    if pyco_runtime[0] == TIMEOUT or pyco_runtime[0] == 'ERR':\n",
+    "    if pyco_runtime[0] == TIMEOUT or pyco_runtime[0] == 'ERR' or pyco_runtime[0] == 'MISSING':\n",
     "        data['time'].append(TIMEOUT)\n",
     "        continue\n",
     "        \n",
@@ -421,6 +458,14 @@
     "df = pandas.concat([df, ddf])"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "id": "b57cb74f",
+   "metadata": {},
+   "source": [
+    "The following is a helper function used for computing the cactus plot: the times are sorted, and summed until timeouts are encountered."
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -445,6 +490,14 @@
     "            yield sum"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "id": "ab1d511f",
+   "metadata": {},
+   "source": [
+    "Setting of colours and axis style."
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -927,28 +980,29 @@
     "op_list = sorted(list(set(list(df['op']))))\n",
     "print(f\"available: {op_list}\")\n",
     "op_list = [\n",
-    "    'complement',    #'fair-overall',      \n",
+    "    #'complement',    #'fair-overall',      \n",
     "    #'fairest-of-them-all',\n",
-    "    'trim', \n",
-    "    'emptiness',                     'inclusion',\n",
-    "    'concatenation', 'intersection', 'union'\n",
+    "    #'trim', \n",
+    "    'emptiness',                     \n",
+    "    'inclusion',\n",
+    "    #'concatenation', \n",
+    "    #'intersection', \n",
+    "    #'union',\n",
     "    #'determization',  \n",
     "    #'overall', \n",
     "    #'interpretation', \n",
     "    #'minterm', \n",
     "    #'parsing', 'reduce', \n",
     "    #'runtime', 'transform', \n",
     "]\n",
-    "item_no = len(op_list)\n",
-    "x_dim = item_no // 3 + 1\n",
-    "y_dim = min(item_no, 3)\n",
     "\n",
     "seaborn.set_style('white', {'axes.grid': True, 'grid.linestyle': '--', 'lines.solid_capstyle': 'butt'})\n",
     "\n",
-    "tool_other = \"awali\"\n",
+    "tool_other = \"vata\"\n",
     "\n",
     "for sum_op, sum_name in [(to_values, \"sum\")]:\n",
-    "    fig, axs = plt.subplots(len(op_list), 1, figsize=(5, len(op_list) * 5))\n",
+    "    fig, axs = plt.subplots(len(op_list), 1, figsize=(4, (len(op_list)) * 4 ))\n",
+    "    plt.subplots_adjust(bottom=0.1, right=0.8, top=0.9)\n",
     "    \n",
     "    i = 0\n",
     "    for grp in op_list:\n",
@@ -959,6 +1013,7 @@
     "        for tool, values in series.groupby('tool'):\n",
     "            values = values.sort_values(by=['bench', 'input'])\n",
     "            idata[tool] = list(sum_op(values['time'], timeout=TIMEOUT))\n",
+    "            \n",
     "        order = ['mata', 'mata-sim', 'awali', 'mona', 'vata',  'automata.net', 'brics', 'automatalib', 'fado', 'automata.py'][::-1]\n",
     "        \n",
     "        if tool_other not in idata:\n",
@@ -971,7 +1026,7 @@
     "            if key not in idata.keys():\n",
     "                continue\n",
     "            data[key] = idata[key]\n",
-    "\n",
+    "        \n",
     "        g = axs[i].scatter(\n",
     "            data[\"mata\"], data[tool_other], marker='o', s=4\n",
     "        )\n",
@@ -984,10 +1039,11 @@
     "        ax.set_ylim(0.000001,60)\n",
     "        ax.set_xlabel(\"mata\")\n",
     "        ax.set_ylabel(tool_other)\n",
-    "        ax.set_title(f\"{grp}\", weight='bold', fontsize=10)\n",
+    "        ax.set_title(f\"{grp}\", weight='bold', fontsize=12)\n",
+    "        fig.tight_layout()\n",
     "        i += 1\n",
     "\n",
-    "    save_figure(f\"scatter-plot-per-operation-{sum_name}-{grp}\")"
+    "    save_figure(f\"scatter-plot-per-operation-{tool_other}-{sum_name}-log\", ext=\".pdf\")"
    ]
   },
   {