From c925912a20878485e32bfb9389a2d7cb9953b22c Mon Sep 17 00:00:00 2001 From: Robin Scheibler Date: Thu, 7 May 2020 23:10:53 +0900 Subject: [PATCH] Data analysis and figures --- analysis.py | 259 ++++++++++++++++++++++++----------------- bss_speech_dataset | 2 +- dereverb_separation.py | 34 +++--- example.py | 55 +++++++-- 4 files changed, 215 insertions(+), 135 deletions(-) diff --git a/analysis.py b/analysis.py index 63f3738..2538f3c 100644 --- a/analysis.py +++ b/analysis.py @@ -50,8 +50,48 @@ def find_best_pq_sdr(df, config): # find the best variants under all conditions for bss_algo in df["bss_algo"].unique(): + # Find best parameters overall channels + df_ba = df[ + (df["proj_algo"] == "minimum_distortion") & (df["bss_algo"] == bss_algo) + ] + + pt_sdr_fix = df_ba.pivot_table( + index="q", columns="p", values="sdr", aggfunc=np.mean, + ) + + pt_sir_fix = df_ba.pivot_table( + index="q", columns="p", values="sir", aggfunc=np.mean, + ) + + strategy_fix = { + "sir": { + "val": pt_sir_fix[CLASSIC_P][CLASSIC_Q], + "pq": [CLASSIC_P, CLASSIC_Q], + }, + "sdr": { + "val": pt_sdr_fix[CLASSIC_P][CLASSIC_Q], + "pq": [CLASSIC_P, CLASSIC_Q], + }, + } + + for ip, p in enumerate(p_vals): + for q in p_vals[ip:]: + + if pt_sdr_fix[p][q] > strategy_fix["sdr"]["val"]: + strategy_fix["sdr"]["val"] = pt_sdr_fix[p][q] + strategy_fix["sdr"]["pq"] = [p, q] + + if ( + pt_sdr_fix[p][q] >= pt_sdr_fix[CLASSIC_P][CLASSIC_Q] + and pt_n_iter[p][q] <= BEST_MAX_ITER + and pt_sir_fix[p][q] > strategy_fix["sir"]["val"] + ): + strategy_fix["sir"]["val"] = pt_sir_fix[p][q] + strategy_fix["sir"]["pq"] = [p, q] + for n_chan in df["n_channels"].unique(): + # Find best parameters for this number of channels df_loc = df[ (df["proj_algo"] == "minimum_distortion") & (df["bss_algo"] == bss_algo) @@ -73,93 +113,66 @@ def find_best_pq_sdr(df, config): # TBD replace by 2.0! sdr_l2 = pt_sdr[CLASSIC_P][CLASSIC_Q] - max_sdr_lbl = "md_best_sdr" - max_sdr = sdr_l2 - max_sdr_id = [CLASSIC_P, CLASSIC_Q] - - max_sir_lbl = "md_best_sir" - max_sir = sdr_l2 - max_sir_id = [CLASSIC_P, CLASSIC_Q] - - max_n_iter_lbl = "md_best_sir_m10" - max_n_iter = sdr_l2 - max_n_iter_id = [CLASSIC_P, CLASSIC_Q] + strategies = { + "sdr": {"val": sdr_l2, "pq": [CLASSIC_P, CLASSIC_Q],}, + "sir": { + "val": pt_sir[CLASSIC_P][CLASSIC_Q], + "pq": [CLASSIC_P, CLASSIC_Q], + }, + "sir_10": { + "val": pt_sir[CLASSIC_P][CLASSIC_Q], + "pq": [CLASSIC_P, CLASSIC_Q], + }, + "sdr_fix": strategy_fix["sdr"], + "sir_fix": strategy_fix["sir"], + } for ip, p in enumerate(p_vals): for q in p_vals[ip:]: - if pt_sdr[p][q] > max_sdr: - max_sdr = pt_sdr[p][q] - max_sdr_id = [p, q] + if pt_sdr[p][q] > strategies["sdr"]["val"]: + strategies["sdr"]["val"] = pt_sdr[p][q] + strategies["sdr"]["pq"] = [p, q] - if pt_sdr[p][q] >= sdr_l2 and pt_sir[p][q] > max_sir: + if ( + pt_sdr[p][q] >= sdr_l2 + and pt_sir[p][q] > strategies["sir"]["val"] + ): + strategies["sir"]["val"] = pt_sir[p][q] + strategies["sir"]["pq"] = [p, q] max_sir = pt_sir[p][q] max_sir_id = [p, q] if ( pt_sdr[p][q] >= sdr_l2 and pt_n_iter[p][q] <= BEST_MAX_ITER - and pt_sir[p][q] > max_n_iter + and pt_sir[p][q] > strategies["sir_10"]["val"] ): - max_n_iter = pt_sir[p][q] - max_n_iter_id = [p, q] - - sub_data.append( - df_loc[ - (df_loc["proj_algo"] == "minimum_distortion") - & (df_loc["p"] == max_sdr_id[0]) - & (df_loc["q"] == max_sdr_id[1]) - ].replace({"proj_algo": {"minimum_distortion": max_sdr_lbl}}) - ) - - best_params.append( - { - "bss_algo": bss_algo, - "n_channels": n_chan, - "proj_algo": max_sdr_lbl, - "p": max_sdr_id[0], - "q": max_sdr_id[1], - "n_iter": pt_n_iter[max_sdr_id[0]][max_sdr_id[1]], - } - ) - - sub_data.append( - df_loc[ - (df_loc["proj_algo"] == "minimum_distortion") - & (df_loc["p"] == max_sir_id[0]) - & (df_loc["q"] == max_sir_id[1]) - ].replace({"proj_algo": {"minimum_distortion": max_sir_lbl}}) - ) - - best_params.append( - { - "bss_algo": bss_algo, - "n_channels": n_chan, - "proj_algo": max_sir_lbl, - "p": max_sir_id[0], - "q": max_sir_id[1], - "n_iter": pt_n_iter[max_sir_id[0]][max_sir_id[1]], - } - ) - - sub_data.append( - df_loc[ - (df_loc["proj_algo"] == "minimum_distortion") - & (df_loc["p"] == max_n_iter_id[0]) - & (df_loc["q"] == max_n_iter_id[1]) - ].replace({"proj_algo": {"minimum_distortion": max_n_iter_lbl}}) - ) - - best_params.append( - { - "bss_algo": bss_algo, - "n_channels": n_chan, - "proj_algo": max_n_iter_lbl, - "p": max_n_iter_id[0], - "q": max_n_iter_id[1], - "n_iter": pt_n_iter[max_n_iter_id[0]][max_n_iter_id[1]], - } - ) + strategies["sir_10"]["val"] = pt_sir[p][q] + strategies["sir_10"]["pq"] = [p, q] + + for label, strat in strategies.items(): + + new_label = f"gmd_{label}" + + sub_data.append( + df_loc[ + (df_loc["proj_algo"] == "minimum_distortion") + & (df_loc["p"] == strat["pq"][0]) + & (df_loc["q"] == strat["pq"][1]) + ].replace({"proj_algo": {"minimum_distortion": new_label}}) + ) + + best_params.append( + { + "bss_algo": bss_algo, + "n_channels": n_chan, + "proj_algo": new_label, + "p": strat["pq"][0], + "q": strat["pq"][1], + "n_iter": pt_n_iter[strat["pq"][0]][strat["pq"][1]], + } + ) sub_data = pd.concat(sub_data) best_params_df = pd.DataFrame(best_params) @@ -167,7 +180,7 @@ def find_best_pq_sdr(df, config): return sub_data, best_params_df -def print_table(sub_data, best_params, metrics=[]): +def print_table(sub_data, best_params, proj_algos=None, metrics=[]): """ Merge the two data frames and print the result as a nice latex table """ @@ -200,28 +213,33 @@ def print_table(sub_data, best_params, metrics=[]): # now print this into a table that we can import in latex + # do everything if not specified + if proj_algos is None: + print("yo") + proj_algos = avgmet["proj_algo"].unique() + # First row has only the name of the algorithms print(" & ", end="") - for proj_algo in avgmet["proj_algo"].unique(): + for proj_algo in proj_algos: print(f" & \\multicolumn{{3}}{{c}}{{ {proj_algo} }} ", end="") print(" \\\\") # Second row has the parameter names (algo/channels) and the metric names - print("\\text{Algo} & \\text{Channels} ", end="") + print("Algo. & Mics ", end="") for proj_algo in avgmet["proj_algo"].unique(): for metric in metrics: - print(f" & \\text{{ {metric} }}", end="") + print(f" & {metric}", end="") print(" \\\\") for bss_algo in avgmet["bss_algo"].unique(): for m, n_chan in enumerate(avgmet["n_channels"].unique()): if m == 0: - print(f"\\text{{ {bss_algo} }} & \\text{{ {n_chan} }} ", end="") + print(f" {bss_algo} & {n_chan} ", end="") else: - print(f" & \\text{{ {n_chan} }} ", end="") + print(f" & {n_chan} ", end="") - for proj_algo in avgmet["proj_algo"].unique(): + for proj_algo in proj_algos: for metric in metrics: val = avgmet[ (avgmet["bss_algo"] == bss_algo) @@ -311,34 +329,35 @@ def draw_heatmap(*args, **kwargs): ax = plt.gca() - for metric in ["sdr", "sir", "n_iter"]: - fg = sns.FacetGrid( - df, - col="n_channels", - row="bss_algo", - # row_order=algo_order[n_targets], - # margin_titles=True, - # aspect=aspect, - # height=height, - ) - fg.map_dataframe( - draw_heatmap, - "p", - "q", - metric, - # cbar=False, - # vmin=0.0, - # vmax=1.0, - # xticklabels=[1, "", "", 4, "", "", 7, "", "", 10], - # yticklabels=[10, "", "", 40, "", "", 70, "", "", 100], - # yticklabels=yticklabels, - square=True, - ) + for bss_algo in params["bss_algorithms"].keys(): + for metric in ["sdr", "sir"]: + fg = sns.FacetGrid( + df[df["bss_algo"] == bss_algo], + col="n_channels", + row="bss_algo", + # row_order=algo_order[n_targets], + # margin_titles=True, + # aspect=aspect, + # height=height, + ) + fg.map_dataframe( + draw_heatmap, + "p", + "q", + metric, + # cbar=False, + # vmin=0.0, + # vmax=1.0, + # xticklabels=[1, "", "", 4, "", "", 7, "", "", 10], + # yticklabels=[10, "", "", 40, "", "", 70, "", "", 100], + # yticklabels=yticklabels, + square=True, + ) - fg.set_titles(template=f"{metric} " + "| {row_name} | mics={col_name}") + fg.set_titles(template=f"{metric} " + "| {row_name} | mics={col_name}") - for suffix in ["png", "pdf"]: - plt.savefig(output_dir / f"heatmaps_{metric}.{suffix}") + for suffix in ["png", "pdf"]: + plt.savefig(output_dir / f"heatmaps_{bss_algo}_{metric}.{suffix}") """ Now we will plot the box plots for key algorithms @@ -346,8 +365,28 @@ def draw_heatmap(*args, **kwargs): sub_df, best_params = find_best_pq_sdr(df, params) - print_table(sub_df, best_params, metrics=["sdr", "sir"]) + print_table( + sub_df, + best_params, + metrics=["sdr", "sir"], + proj_algos=[ + "projection_back", + "minimum_distortion_l2", + "gmd_sdr", + "gmd_sir", + "gmd_sir_10", + "gmd_sdr_fix", + # "gmd_sir_fix", + ], + ) + + print("") - print_table(sub_df, best_params, metrics=["p", "q", "n_iter_med"]) + print_table( + sub_df, + best_params, + metrics=["p", "q", "n_iter_med"], + proj_algos=["gmd_sdr", "gmd_sir_10", "gmd_sdr_fix"], + ) plt.show() diff --git a/bss_speech_dataset b/bss_speech_dataset index 4bdeeb7..316e2b7 160000 --- a/bss_speech_dataset +++ b/bss_speech_dataset @@ -1 +1 @@ -Subproject commit 4bdeeb7f324a57cff97ea974105e4105fa11df9b +Subproject commit 316e2b7775443a21dd5c3370a7a34f9f14ed77dd diff --git a/dereverb_separation.py b/dereverb_separation.py index e735aaf..ff63574 100644 --- a/dereverb_separation.py +++ b/dereverb_separation.py @@ -174,20 +174,6 @@ def ilrma_t_iteration( # costOrgByFreq = log_likelihood_ilmra_t_by_frequency(y, P[:, 0, :, :], d, eps) # costOrg = np.average(costOrgByFreq) - if fixB == False: - b_temp = ilrma_t_b_iteration(y, b, v, eps) - # dを更新する - - if use_increase_constraint == True: - for freq in range(fftMax): - if costOrgByFreq[freq] > costTempByFreq[freq]: - b[freq, ...] = b_temp[freq, ...] - else: - b = b_temp - - # 時間周波数分散 - d = np.einsum("bst,fsb->fts", v, b) - if fixV == False: # y: fftMax,frameNum,micNum # b: fftMax,sourceNum,basis @@ -206,6 +192,20 @@ def ilrma_t_iteration( d = np.einsum("bst,fsb->fts", v, b) + if fixB == False: + b_temp = ilrma_t_b_iteration(y, b, v, eps) + # dを更新する + + if use_increase_constraint == True: + for freq in range(fftMax): + if costOrgByFreq[freq] > costTempByFreq[freq]: + b[freq, ...] = b_temp[freq, ...] + else: + b = b_temp + + # 時間周波数分散 + d = np.einsum("bst,fsb->fts", v, b) + # フィルタを求める。 IP1 = True IP2 = False @@ -509,12 +509,16 @@ def ilrma_t_dereverb_separation( weight = np.random.uniform(size=fftMax * source_num * nmf_basis_num) weight = np.reshape(weight, [fftMax, source_num, nmf_basis_num]) + # v: basis,sourceNum,frameNum v = np.einsum("fst,fsb->bst", v, weight) v_ave = np.mean(v, axis=2, keepdims=True) v = v / (v_ave + 1.0e-14) v = np.abs(v) + v = 0.2 * np.random.rand(nmf_basis_num, source_num, frameNum) + 0.8 - b = np.ones(shape=(fftMax, source_num, nmf_basis_num)) + # b: fftMax,sourceNum,basis + # b = np.ones(shape=(fftMax, source_num, nmf_basis_num)) + b = 0.2 * np.random.rand(fftMax, source_num, nmf_basis_num) + 0.8 W = np.zeros(shape=(fftMax, channels, (tap_num + 1) * channels), dtype=np.complex) W[:, :, :channels] = ( diff --git a/example.py b/example.py index 44f966a..cfbdbd4 100644 --- a/example.py +++ b/example.py @@ -13,6 +13,20 @@ from metrics import si_bss_eval from pyroomacoustics.transform import stft + +def auxiva_ilrma_t(X, n_iter=20, proj_back=True, auxiva_n_iter=30, **kwargs): + + Y, W = pra.bss.auxiva(X, n_iter=auxiva_n_iter, return_filters=True, proj_back=False) + + Y = ilrma_t(Y, n_iter=n_iter, proj_back=proj_back, **kwargs) + + if proj_back: + A = np.linalg.inv(W) + Y = A[None, :, 0, :] * Y + + return Y + + algorithms = { "auxiva": pra.bss.auxiva, "ilrma": pra.bss.ilrma, @@ -20,9 +34,10 @@ "fastmnmf": pra.bss.fastmnmf, "ilrma_t": ilrma_t, "kagami": kagami, + "auxiva_ilrma_t": auxiva_ilrma_t, } -dereverb_algos = ["ilrma_t", "kagami"] +dereverb_algos = ["ilrma_t", "kagami", "auxiva_ilrma_t"] DATA_DIR = Path("bss_speech_dataset/data") DATA_META = DATA_DIR / "metadata.json" @@ -73,6 +88,9 @@ rooms ), f"Room must be between 0 and {len(rooms) - 1}" + t60 = rooms[args.room]["room_params"]["t60"] + print(f"Using room {args.room} with T60={t60:.3f}") + # choose and read the audio files # the mixtures @@ -112,17 +130,29 @@ elif args.algo in dereverb_algos: if args.p is None: Y = algorithms[args.algo]( - X, n_iter=15 * args.mics, n_taps=3, n_delays=2, proj_back=True + X, + n_iter=15 * args.mics, + n_taps=3, + n_delays=2, + n_components=1, + proj_back=True, ) else: Y = algorithms[args.algo]( - X, n_iter=15 * args.mics, n_taps=3, n_delays=2, proj_back=False + X, + n_iter=15 * args.mics, + n_taps=3, + n_delays=2, + n_components=1, + proj_back=False, ) else: Y = algorithms[args.algo](X, n_iter=30, proj_back=False) t2 = time.perf_counter() + print(f"Separation time: {t2 - t1:.3f} s") + # Projection back if args.p is not None: @@ -135,6 +165,8 @@ t3 = time.perf_counter() + print(f"Proj. back time: {t3 - t2:.3f} s") + # iSTFT y = stft.synthesis(Y, args.block, hop, win=win_s) y = y[args.block - hop :] @@ -144,11 +176,18 @@ # Evaluate m = np.minimum(ref.shape[0], y.shape[0]) - # scale invaliant metric - # sdr, sir, sar, perm = si_bss_eval(ref[:m, :], y[:m, :]) + t4 = time.perf_counter() - # conventional metric - sdr, sir, sar, perm = bss_eval_sources(ref[:m, :].T, y[:m, :].T) + if args.algo in dereverb_algos: + # conventional metric + sdr, sir, sar, perm = bss_eval_sources(ref[:m, :].T, y[:m, :].T) + else: + # scale invaliant metric + sdr, sir, sar, perm = si_bss_eval(ref[:m, :], y[:m, :]) + + t5 = time.perf_counter() + + print(f"Eval. back time: {t5 - t4:.3f} s") wavfile.write("example_mix.wav", fs, mix) wavfile.write("example_ref.wav", fs, ref[:m, :]) @@ -157,5 +196,3 @@ # Reorder the signals print("SDR:", sdr) print("SIR:", sir) - print(f"Separation time: {t2 - t1:.3f} s") - print(f"Proj. back time: {t3 - t2:.3f} s")