Skip to content

Commit

Permalink
Merge pull request #19 from VeriFIT/eval-update
Browse files Browse the repository at this point in the history
Eval: update jupyter script
  • Loading branch information
vhavlena authored Aug 30, 2024
2 parents ad94605 + 0a1717d commit 6e72a16
Showing 1 changed file with 76 additions and 20 deletions.
96 changes: 76 additions & 20 deletions eval/FinalInterpretation.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@
"warnings.simplefilter(action='ignore', category=UserWarning)\n",
"\n",
"# Short hack for display of images in jupyter notebook\n",
"\n",
"from IPython.display import display, HTML\n",
"display(HTML(\"<style>div.output_area pre {white-space: pre;}</style>\"))"
]
Expand Down Expand Up @@ -85,7 +84,7 @@
"metadata": {},
"outputs": [],
"source": [
"DATA_SOURCE = \"../results/data/experiments-test-awali\""
"DATA_SOURCE = \"../results/data/tacas24\""
]
},
{
Expand All @@ -96,6 +95,14 @@
"## Creating DataFrame"
]
},
{
"cell_type": "markdown",
"id": "27e19cac",
"metadata": {},
"source": [
"The following functions `to_operation`, `to_tool_and_lang` and `to_bench` serve to provide classification and translation of particular parts of results to notation used in paper (i.e. tools correspond to their name in paper, benchmarks to their naming in paper, and operations are unified, as we used different notations in different tools)."
]
},
{
"cell_type": "code",
"execution_count": null,
Expand Down Expand Up @@ -232,14 +239,33 @@
" elif val == 'ERR':\n",
" return 'ERR'\n",
" elif val == 'MISSING':\n",
" return numpy.NAN\n",
" #return TIMEOUT\n",
" #return numpy.NAN\n",
" return TIMEOUT\n",
" elif val == 'TIMEOUT' or val == 'TO':\n",
" return TIMEOUT\n",
" print(f\"{val} unhandled\")\n",
" assert False"
]
},
{
"cell_type": "markdown",
"id": "9b8273ba",
"metadata": {},
"source": [
"Master function for creating `pandas.DataFrame` from directory containing `csv` files.\n",
"\n",
"The `.csv` files are in the following structure: `instance;tool1-op1;tool1-op2;...tool1-opn;...toolm-opn`\n",
"\n",
"This is transformed into a following dataframe: `bench | input | tool | lang | op | time` Where:\n",
"\n",
"- `bench` corresponds to classification of benchmark used in paper (returned by `to_bench` function);\n",
"- `input` corresponds to instance of the benchmark, i.e. the input automata used for evaluation;\n",
"- `tool` corresponds to naming of the tool (returned by `to_tool_and_lang`);\n",
"- `lang` corresponds to language of the tool (returned by `to_tool_and_lang`);\n",
"- `op` corresponds to individual operations, such as intersection or union (returned by to_operation); and;\n",
"- `time` corresponds to the value of the operation: either float value; `TO`, if timeout happened; `MISSING` if the instance was missing some automata; `ERR` if error happened."
]
},
{
"cell_type": "code",
"execution_count": null,
Expand All @@ -266,15 +292,24 @@
" (\"mona\", \"noodler-union\"),\n",
" (\"mona\", \"noodler-compl\"),\n",
" (\"mata\", \"param-diff\"),\n",
" (\"vata\", \"param-diff\"),\n",
" (\"awali\", \"param-diff\"),\n",
" (\"brics\", \"param-diff\"),\n",
" \n",
" (\"mata\", \"noodler-concat\"),\n",
" (\"vata\", \"noodler-concat\"),\n",
" (\"brics\", \"noodler-concat\"),\n",
" (\"awali\", \"noodler-concat\"),\n",
")\n",
"ignored_tools = (\n",
" '(py)mata', 'mona', 'automatalib-old', 'mata-old', 'automata-old', 'automata.net-old'\n",
")\n",
"\n",
"vata_trims = 0\n",
"\n",
"def to_pandas(src_dir):\n",
" global TIMEOUT\n",
" global vata_trims\n",
" data = []\n",
" for csv_source in progressbar.progressbar(os.listdir(src_dir)):\n",
" if csv_source.endswith('.csv'):\n",
Expand All @@ -298,10 +333,12 @@
" if (tool, bench) in ignores:\n",
" continue\n",
" data.append([bench, inputs, tool, lang, op, val])\n",
" \n",
" except StopIteration:\n",
" pass\n",
" return pandas.DataFrame(data, columns=HEADERS)\n",
"df = to_pandas(DATA_SOURCE)"
"df = to_pandas(DATA_SOURCE)\n",
" "
]
},
{
Expand Down Expand Up @@ -334,7 +371,7 @@
" return float(vals[0])\n",
" except:\n",
" print(str(vals[0]))\n",
" return 0 if str(vals[0]) not in ('ERR', 'TIMEOUT') else TIMEOUT\n",
" return 0 if str(vals[0]) not in ('ERR', 'TIMEOUT', 'MISSING') else TIMEOUT\n",
"for grp, series in df.groupby(['bench', 'input', 'tool']):\n",
" data['bench'].append(grp[0])\n",
" data['input'].append(grp[1])\n",
Expand All @@ -347,7 +384,7 @@
" print(f\"{list(series.items())=}\")\n",
" print(f\"{pyco_runtime=}\")\n",
" assert False\n",
" if pyco_runtime[0] == TIMEOUT or pyco_runtime[0] == 'ERR':\n",
" if pyco_runtime[0] == TIMEOUT or pyco_runtime[0] == 'ERR' or pyco_runtime[0] == 'MISSING':\n",
" data['time'].append(TIMEOUT)\n",
" continue\n",
" \n",
Expand Down Expand Up @@ -408,7 +445,7 @@
" print(f\"{list(series.items())=}\")\n",
" print(f\"{pyco_runtime=}\")\n",
" assert False\n",
" if pyco_runtime[0] == TIMEOUT or pyco_runtime[0] == 'ERR':\n",
" if pyco_runtime[0] == TIMEOUT or pyco_runtime[0] == 'ERR' or pyco_runtime[0] == 'MISSING':\n",
" data['time'].append(TIMEOUT)\n",
" continue\n",
" \n",
Expand All @@ -421,6 +458,14 @@
"df = pandas.concat([df, ddf])"
]
},
{
"cell_type": "markdown",
"id": "b57cb74f",
"metadata": {},
"source": [
"The following is a helper function used for computing the cactus plot: the times are sorted, and summed until timeouts are encountered."
]
},
{
"cell_type": "code",
"execution_count": null,
Expand All @@ -445,6 +490,14 @@
" yield sum"
]
},
{
"cell_type": "markdown",
"id": "ab1d511f",
"metadata": {},
"source": [
"Setting of colours and axis style."
]
},
{
"cell_type": "code",
"execution_count": null,
Expand Down Expand Up @@ -927,28 +980,29 @@
"op_list = sorted(list(set(list(df['op']))))\n",
"print(f\"available: {op_list}\")\n",
"op_list = [\n",
" 'complement', #'fair-overall', \n",
" #'complement', #'fair-overall', \n",
" #'fairest-of-them-all',\n",
" 'trim', \n",
" 'emptiness', 'inclusion',\n",
" 'concatenation', 'intersection', 'union'\n",
" #'trim', \n",
" 'emptiness', \n",
" 'inclusion',\n",
" #'concatenation', \n",
" #'intersection', \n",
" #'union',\n",
" #'determization', \n",
" #'overall', \n",
" #'interpretation', \n",
" #'minterm', \n",
" #'parsing', 'reduce', \n",
" #'runtime', 'transform', \n",
"]\n",
"item_no = len(op_list)\n",
"x_dim = item_no // 3 + 1\n",
"y_dim = min(item_no, 3)\n",
"\n",
"seaborn.set_style('white', {'axes.grid': True, 'grid.linestyle': '--', 'lines.solid_capstyle': 'butt'})\n",
"\n",
"tool_other = \"awali\"\n",
"tool_other = \"vata\"\n",
"\n",
"for sum_op, sum_name in [(to_values, \"sum\")]:\n",
" fig, axs = plt.subplots(len(op_list), 1, figsize=(5, len(op_list) * 5))\n",
" fig, axs = plt.subplots(len(op_list), 1, figsize=(4, (len(op_list)) * 4 ))\n",
" plt.subplots_adjust(bottom=0.1, right=0.8, top=0.9)\n",
" \n",
" i = 0\n",
" for grp in op_list:\n",
Expand All @@ -959,6 +1013,7 @@
" for tool, values in series.groupby('tool'):\n",
" values = values.sort_values(by=['bench', 'input'])\n",
" idata[tool] = list(sum_op(values['time'], timeout=TIMEOUT))\n",
" \n",
" order = ['mata', 'mata-sim', 'awali', 'mona', 'vata', 'automata.net', 'brics', 'automatalib', 'fado', 'automata.py'][::-1]\n",
" \n",
" if tool_other not in idata:\n",
Expand All @@ -971,7 +1026,7 @@
" if key not in idata.keys():\n",
" continue\n",
" data[key] = idata[key]\n",
"\n",
" \n",
" g = axs[i].scatter(\n",
" data[\"mata\"], data[tool_other], marker='o', s=4\n",
" )\n",
Expand All @@ -984,10 +1039,11 @@
" ax.set_ylim(0.000001,60)\n",
" ax.set_xlabel(\"mata\")\n",
" ax.set_ylabel(tool_other)\n",
" ax.set_title(f\"{grp}\", weight='bold', fontsize=10)\n",
" ax.set_title(f\"{grp}\", weight='bold', fontsize=12)\n",
" fig.tight_layout()\n",
" i += 1\n",
"\n",
" save_figure(f\"scatter-plot-per-operation-{sum_name}-{grp}\")"
" save_figure(f\"scatter-plot-per-operation-{tool_other}-{sum_name}-log\", ext=\".pdf\")"
]
},
{
Expand Down

0 comments on commit 6e72a16

Please sign in to comment.