From 00d7c71e66d03e45c235d6defca35729647c168e Mon Sep 17 00:00:00 2001 From: Filippo Airaldi Date: Thu, 4 Jul 2024 11:02:06 +0200 Subject: [PATCH] improvements to summary and timings plot --- benchmarking/plot.py | 229 ++++++++++++++++++++++++++++--------------- 1 file changed, 150 insertions(+), 79 deletions(-) diff --git a/benchmarking/plot.py b/benchmarking/plot.py index eae9591..157d496 100644 --- a/benchmarking/plot.py +++ b/benchmarking/plot.py @@ -25,6 +25,8 @@ ALPHA = 0.95 METHODS_ORDER = ["random", "ei", "myopic", "myopic-s", "ms-gh", "ms-mc"] +METHOD_PATTER = re.compile(r"ms-(mc|gh)((?:\.\d+)+)") +VALID_PATTERN = re.compile(r"[^a-zA-Z0-9]+") def _sort_method(method: str) -> int: @@ -74,6 +76,32 @@ def _compute_all_stats(row: pd.Series) -> pd.Series: ) +def official_method_name_and_type( + method: str, no_spaces: bool = False, for_filename: bool = False +) -> tuple[str, int]: + """Utility to get the official name of the method.""" + match = METHOD_PATTER.fullmatch(method) + if match is not None: # rollout/multi-step with MC/GH + sampler = match.group(1).upper() + fantasies = match.group(2).split(".")[1:] + horizon = len(fantasies) + 1 + prefix = "R" if all(f == "1" for f in fantasies) else "MS" + name = f"{prefix}-{horizon} ({sampler})" + if prefix == "R": + type_ = 0 if sampler == "MC" else 1 + else: + type_ = 2 if sampler == "MC" else 3 + else: + name = method.title() + type_ = 4 + if no_spaces: + name = name.replace(" ", r"\,") + if for_filename: + name = VALID_PATTERN.sub("", name) + assert name.isalnum(), f"Invalid file name: {name}" + return name, type_ + + def load_data( csv_filename: str, include_methods: list[str], @@ -183,73 +211,78 @@ def _compute_dispersion(row: pd.Series) -> pd.Series: return pd.Series(out) -def plot_timings( - df: pd.DataFrame, figtitle: Optional[str], single_problem: bool = False +def _compute_official_name_and_type(row: pd.Series) -> pd.Series: + """Computes the official name and type of the given row of the dataframe.""" + new_row = row.copy() + new_row["method"], new_row["type"] = official_method_name_and_type( + row["method"], no_spaces=True + ) + return new_row + + +def itertime_vs_gap( + df: pd.DataFrame, plot: bool, pgfplotstables: bool, title: Optional[str] = None ) -> None: - """Plots the average time per iteration versus the optimality gap.""" - if single_problem: - assert len(df.index.unique(level="problem")) == 1, ( - "Only one problem detected, inter-problem dispersion will be calculated" - " instead of intra-problem." - ) - data = { - "final-gap-mean": df["final-gap"], - "time-mean": df["time"].apply(partial(np.mean, axis=1), axis=1), + """Plots/saves the average time per iteration versus the optimality gap.""" + df_ = ( + df[["final-gap-mean", "time-mean"]] + .rename(columns={"final-gap-mean": "gap", "time-mean": "time"}) + .droplevel("problem") + .groupby("method", sort=False) + .aggregate(list) + .apply(_compute_dispersion, axis=1) + ) + + if plot: + fig, ax = plt.subplots(1, 1, constrained_layout=True) + opts = { + "random": {"ha": "left", "xytext": (5, 5)}, + "ei": {"ha": "left", "xytext": (5, 5)}, + "myopic": {"ha": "right", "xytext": (-5, 5)}, + "myopic-s": {"ha": "left", "xytext": (5, 5)}, + "ms": {"ha": "left", "xytext": (5, 5)}, } - df_ = pd.DataFrame(data).droplevel("problem").apply(_compute_dispersion, axis=1) - else: - df_ = ( - df[["final-gap-mean", "time-mean"]] - .rename(columns={"final-gap-mean": "gap", "time-mean": "time"}) - .droplevel("problem") - .groupby("method", sort=False) - .aggregate(list) - .apply(_compute_dispersion, axis=1) + for method, row in df_.iterrows(): + if re.fullmatch(r"ms-mc(\.1)+", method): # rollout with MC sampling + color = "C0" + elif re.fullmatch(r"ms-gh(\.1)+", method): # rollout with GH sampling + color = "C1" + elif method.startswith("ms-mc"): # multistep with MC sampling + color = "C2" + elif method.startswith("ms-gh"): # multistep with GH sampling + color = "C3" + else: # myopic strategies + color = "C4" + opt = opts["ms"] if method.startswith("ms") else opts[method.split(".")[0]] + ax.errorbar( + x=row["time"], + xerr=row["time-err"], + y=row["gap"], + yerr=row["gap-err"], + ls="none", + lw=1.5, + capsize=3, + capthick=1.5, + ecolor=color, + marker="o", + markersize=8, + markerfacecolor=color, + markeredgecolor="white", + ) + ax.annotate( + method, xy=(row["time"], row["gap"]), textcoords="offset points", **opt + ) + ax.set_xscale("log") + ax.set_xlabel("Seconds per iteration") + ax.set_ylabel("Optimality gap") + fig.suptitle(title, fontsize=12) + + if pgfplotstables: + fn = "pgfplotstables/itertime-vs-gap" + fn += f"_{title}.dat" if title is not None else ".dat" + df_.reset_index().apply(_compute_official_name_and_type, axis=1).to_string( + fn, index=False ) - fig, ax = plt.subplots(1, 1, constrained_layout=True) - options = { - "random": {"ha": "left", "xytext": (5, 5)}, - "ei": {"ha": "left", "xytext": (5, 5)}, - "myopic": {"ha": "right", "xytext": (-5, 5)}, - "myopic-s": {"ha": "left", "xytext": (5, 5)}, - "ms": {"ha": "left", "xytext": (5, 5)}, - } - for method, row in df_.iterrows(): - if re.fullmatch(r"ms-mc(\.1)+", method): # rollout with MC sampling - color = "C0" - elif re.fullmatch(r"ms-gh(\.1)+", method): # rollout with GH sampling - color = "C1" - elif method.startswith("ms-mc"): # multistep with MC sampling - color = "C2" - elif method.startswith("ms-gh"): # multistep with GH sampling - color = "C3" - else: # myopic strategies - color = "C4" - opts = ( - options["ms"] if method.startswith("ms") else options[method.split(".")[0]] - ) - ax.errorbar( - x=row["time"], - xerr=row["time-err"], - y=row["gap"], - yerr=row["gap-err"], - ls="none", - lw=1.5, - capsize=3, - capthick=1.5, - ecolor=color, - marker="o", - markersize=8, - markerfacecolor=color, - markeredgecolor="white", - ) - ax.annotate( - method, xy=(row["time"], row["gap"]), textcoords="offset points", **opts - ) - ax.set_xscale("log") - ax.set_xlabel("Seconds per iteration") - ax.set_ylabel("Optimality gap") - fig.suptitle(figtitle, fontsize=12) def _format_row( @@ -287,9 +320,11 @@ def _format_row( return strs -def summarize(df: pd.DataFrame, tabletitle: Optional[str]) -> None: - """Prints the summary of the results in the given dataframe as three tables, one - containing the (final) optimality gap, one the cumulative rewards, and the last +def summary_tables( + df: pd.DataFrame, summary: bool, pgfplotstables: bool, title: Optional[str] = None +) -> None: + """Prints/saves the summary of the results in the given dataframe as three tables, + one containing the (final) optimality gap, one the cumulative rewards, and the last the time per iteration.""" # first, build the dataframe with the statistics for gap, returns, and time @@ -337,10 +372,39 @@ def summarize(df: pd.DataFrame, tabletitle: Optional[str]) -> None: tables[2].add_row([""] * (len(field_names) - 1)) # finally, print the tables side by side - if tabletitle is not None: - print(tabletitle) - for table in tables: - print(table.get_string()) + if summary: + if title is not None: + print(title) + for table in tables: + print(table.get_string()) + + # save the first table to a latex-friendly format + if pgfplotstables: + table = tables[0].copy() + table.align = "l" + table._title = None + for row in table.rows: + if problem_name := row[0]: + row[0] = rf"\multirow{{2}}*{{{problem_name}}}" + for i, entry in enumerate(row): + match = re.search(r"0\.\d+", entry) + if match is not None: + num = f"{float(match.group()):.3f}" + if entry.startswith("\033[1;34m"): + num = rf"{{\color{{blue}}\textbf{{{num}}}}}" + elif entry.startswith("\033[35m"): + num = rf"{{\color{{purple}}\textit{{{num}}}}}" + row[i] = num + latex = table.get_string( + border=False, preserve_internal_border=True, hrules=pt.NONE + ) + latex = latex.replace("|", "&") + latex = "\n".join(line[:-1] + r"\\" for line in latex.split("\n")) + + fn = "pgfplotstables/summary" + fn += f"_{title}.tex" if title is not None else ".tex" + with open(fn, "w", encoding="utf-8") as f: + f.write(latex) if __name__ == "__main__": @@ -386,21 +450,27 @@ def summarize(df: pd.DataFrame, tabletitle: Optional[str]) -> None: ) group = parser.add_mutually_exclusive_group() group.add_argument( - "--no-plot", + "--plot", action="store_true", help="Only print the summary and do not show the plots.", ) group.add_argument( - "--no-summary", + "--summary", action="store_true", help="Only show the plot and do not print the summary.", ) + group.add_argument( + "--pgfplotstables", + action="store_true", + help="Generates the data files for PGFPLOTS.", + ) args = parser.parse_args() + fplot, fsummary, fpgfplotstables = args.plot, args.summary, args.pgfplotstables # load each result and plot include_title = len(args.filenames) > 1 for filename in args.filenames: - title = filename if include_title else None + stitle = filename if include_title else None dataframe = load_data( filename, args.include_methods, @@ -408,9 +478,10 @@ def summarize(df: pd.DataFrame, tabletitle: Optional[str]) -> None: args.exclude_methods, args.exclude_problems, ) - if not args.no_plot: - plot_converges(dataframe, title) - plot_timings(dataframe, title) - if not args.no_summary: - summarize(dataframe, title) + # if not args.no_plot: + # plot_converges(dataframe, title) + if fplot or fpgfplotstables: + itertime_vs_gap(dataframe, fplot, fpgfplotstables, stitle) + if fsummary or fpgfplotstables: + summary_tables(dataframe, fsummary, fpgfplotstables, stitle) plt.show()