diff --git a/ChangePDB.py b/ChangePDB.py
new file mode 100755
index 0000000..2adf933
--- /dev/null
+++ b/ChangePDB.py
@@ -0,0 +1,252 @@
+import argparse
+import sys
+import numpy as np
+import re
+import matplotlib.pyplot as plotter
+import os
+
+parser = argparse.ArgumentParser(epilog='Change PDB files, eg the beta field. Annie Westerlund 2017.');
+parser.add_argument('-top','--topology_file',help='Input 1 topology file (.gro, .pdb, etc)',type=str,default='');
+parser.add_argument('-beta','--change_beta',help='Flag for writing beta of residues (optional).',action='store_true');
+parser.add_argument('-hel_beta','--change_helix_beta',help='Flag for writing beta of helices (optional).',action='store_true');
+parser.add_argument('-rename','--rename_atoms',help='Flag for renaming residues from CHARMM-gui to Gromacs (optional).',action='store_true');
+
+parser.add_argument('-renumber','--renumber_residues',help='Flag for renumbering residues from sequential to chain numbering. Need to enter number of chains or set first resID. (optional)',action='store_true');
+parser.add_argument('-n_chains','--number_of_chains',help='Number of chains to use when renumbering the residues',default=1);
+parser.add_argument('-first_resid','--first_resID',help='Start resID in the structure.',default=1);
+
+parser.add_argument('-f','--in_file',help='Input file.',default='');
+parser.add_argument('-f_conv','--helix_ind_conv_file',help='File containing residue number of helices (optional).',default=0);
+parser.add_argument('-fe','--file_end_name',type=str,help='Output file end name (optional)', default='');
+parser.add_argument('-od','--out_directory',type=str,help='The directory where data should be saved (optional)',default='');
+parser.add_argument('-o','--out_file',help='File name (optional).',default='structure');
+
+args = parser.parse_args();
+
+# Put / at end of out directory if not present. Check so that folder extsts, otherwise construct it.
+if args.out_directory !='':
+	if args.out_directory[-1] != '/':
+		args.out_directory += '/';
+
+	if not os.path.exists(args.out_directory):
+		os.makedirs(args.out_directory);
+
+# Change residue names
+if args.rename_atoms:
+	print('Changing residue names to match gromacs names');
+	fID1 = open(args.topology_file,'r');
+	fID2 = open(args.out_directory + args.out_file + '_gmx_' + args.file_end_name+'.pdb','w');
+	first_line = True;
+	
+	for line in fID1:
+		if line[0:3] == 'END':
+			continue;
+		
+		if first_line:
+			first_line = False;
+			fID2.write(line);
+		else:
+			atom = line[13:17];
+			residue = line[17:21];
+			
+			#Change the names of atoms and residues
+			if atom == 'CL  ':
+				atom = 'CLA ';
+				residue = 'CLA ';
+			elif atom == 'K   ':
+				atom = 'POT ';
+				residue = 'POT ';
+			elif atom == 'OH2 ':
+				atom = 'OW  ';
+				residue = 'SOL ';
+			elif atom == 'H1  ':
+				atom = 'HW1 ';
+				residue = 'SOL ';
+			elif atom == 'H2  ':
+				atom = 'HW2 ';
+				residue = 'SOL ';
+			
+			tmpLine = line[0:13] + atom + residue + line[21::];
+			fID2.write(tmpLine);
+	
+	fID1.close();
+	fID2.close();
+
+
+# Change beta columns general
+if args.change_beta:
+	print('Changing beta column ' + args.file_end_name);
+	values = np.loadtxt(args.in_file);
+	print('Max value: '+str(np.max(values)));
+	print('Min value: '+str(np.min(values)));
+	
+
+	counter = 0;
+	resid_counter = -1;
+	current_resid = -1;
+	next_resid_write = -1;
+	fID1 = open(args.topology_file,'r');
+	fID2 = open(args.out_directory + args.out_file + '_beta_'+ args.file_end_name+'.pdb','w');
+	first_line = True;
+	
+	for line in fID1:
+		if line[0:3] == 'END':
+			continue;
+		
+		if first_line:
+			first_line = False;
+			fID2.write(line);
+		else:
+			if line[0:3] == 'END' or line[0:3]=='TER':
+				continue;
+			resid = int(line[23:26]);
+			
+			if resid != current_resid:
+				current_resid = resid;
+				resid_counter += 1;
+			
+			if resid_counter < len(values):
+				fID2.write(line[0:60]);
+				fID2.write(' ');
+				fID2.write('%.3f' % values[resid_counter]);
+				fID2.write(line[66::]);
+			else:
+				fID2.write(line[0:60]);
+				fID2.write(' ');
+				fID2.write('0.00');
+				fID2.write(line[66::]);
+				
+	
+	fID1.close();
+	fID2.close();
+
+# Change beta column of helices
+if args.change_helix_beta:
+	print('Changing beta column for each helix');
+	helix_resid_convert_ind = np.loadtxt(args.helix_ind_conv_file);
+	
+	values = np.loadtxt(args.in_file);
+	values -= np.min(values);
+	values /= np.max(values);
+	values = np.floor(1000*values)/100;
+	
+	counter = 0;
+	resid_counter = -1;
+	current_resid = -1;
+	current_resid_min = helix_resid_convert_ind[counter,0];
+	current_resid_max = helix_resid_convert_ind[counter,1];
+	fID1 = open(args.topology_file,'r');
+	fID2 = open(args.out_directory + args.out_file + '_helix_beta_'+ args.file_end_name+'.pdb','w');
+	first_line = True;	
+	
+	for line in fID1:
+		if line[0:3] == 'END' or line[0:3]=='TER':
+			continue;
+		
+		if first_line:
+			first_line = False;
+			fID2.write(line);
+		else:
+			resid = int(line[23:26]);
+			if resid != current_resid:
+				current_resid = resid;
+				resid_counter += 1;
+
+			if resid_counter >= current_resid_min and resid_counter <= current_resid_max:
+				fID2.write(line[0:62]);
+				fID2.write(str(values[counter]));
+				fID2.write(line[66::]);
+			else:
+				fID2.write(line[0:62]);
+				fID2.write('0.00');
+				fID2.write(line[66::]);
+				if resid_counter > current_resid_max:
+					counter += 1;
+					if counter < len(helix_resid_convert_ind[::,0]):
+						current_resid_min = helix_resid_convert_ind[counter,0];
+						current_resid_max = helix_resid_convert_ind[counter,1];
+					else: 
+						current_resid_min = -1;
+						current_resid_max = -1;
+
+	
+	fID1.close();
+	fID2.close();
+
+if args.renumber_residues:
+
+	chain_letters = 'ABCDEFGHIJKLMNOPQRSTUVXYZ'
+	
+	n_chains = float(args.number_of_chains);
+	first_resid = int(args.first_resID);
+	
+	fID1 = open(args.topology_file,'r');
+	fID2 = open(args.out_directory + args.out_file + '_renumbered_'+ args.file_end_name+'.pdb','w');
+	
+	lines = [];	
+	# Collect all lines
+	for line in fID1:
+		lines.append(line);
+	
+	# Count the number of residues
+	counter = 0;
+	resid_counter = 0;
+	current_resid = -1;
+	first_line = True;
+	
+	
+	for i in range(1,len(lines)):
+		line = lines[i];		
+
+		if line[0:3] == 'END' or line[0:3]=='TER':
+			continue;
+
+
+		resid = int(line[23:26]);
+		if resid != current_resid:
+			current_resid = resid;
+			resid_counter += 1;
+	
+	# Compute total number of residues per chain
+	nResiduesPerChain = np.floor(resid_counter/n_chains);
+	
+	# Renumber residues
+	counter = 0;
+	resid_counter = 0;
+	current_resid = -1;
+	first_line = True;
+	new_resid = first_resid-1;
+	chain_id = 0;
+	
+	for i in range(0,len(lines)):
+		line = lines[i];
+		if line[0:3] == 'END' or line[0:3]=='TER':
+			fID2.write(line);
+			continue;
+		
+		if first_line:
+			first_line = False;
+			fID2.write(line);
+			continue;
+		
+		resid = int(line[22:26]);
+		if resid != current_resid:
+			current_resid = resid;
+			resid_counter += 1;
+			new_resid += 1;
+			if resid_counter > nResiduesPerChain:
+				new_resid = first_resid;
+				resid_counter = 1;
+				chain_id += 1;
+				fID2.write('TER\n');
+		
+		if new_resid < 10:
+			fID2.write(line[0:21]+chain_letters[chain_id]+'   '+str(new_resid)+line[26::]);
+		elif new_resid < 100:
+			fID2.write(line[0:21]+chain_letters[chain_id]+'  '+str(new_resid)+line[26::]);
+		else:
+			fID2.write(line[0:21]+chain_letters[chain_id]+' '+str(new_resid)+line[26::]);
+
+	print(nResiduesPerChain)
+
+
diff --git a/benchmarking/computing.py b/benchmarking/computing.py
index 312ae8b..e11909a 100644
--- a/benchmarking/computing.py
+++ b/benchmarking/computing.py
@@ -27,7 +27,8 @@ def compute(extractor_type,
             overwrite=False,
             accuracy_method='mse',
             displacement=1e-1,
-            visualize=True,
+            visualize=False,
+            natoms=100,
             noise_level=1e-2,  # [1e-2, 1e-2, 2e-1, 2e-1],
             output_dir="output/benchmarking/"):
     """
@@ -49,20 +50,24 @@ def compute(extractor_type,
                                                                  n_iterations=n_iterations)
     n_extractors = len(extractor_names)
     for iter in range(iterations_per_model):
-        modeldir = "{output_dir}/{extractor_type}/{feature_type}/{test_model}/noise-{noise_level}/iter-{iter}/".format(
+        modeldir = "{output_dir}/{extractor_type}/{feature_type}/{test_model}/noise-{noise_level}/atoms-{natoms}/iter-{iter}/".format(
             output_dir=output_dir,
             extractor_type=extractor_type,
             feature_type=feature_type,
             test_model=test_model,
             noise_level=noise_level,
-            iter=iter)
+            natoms=natoms,
+            iter=iter
+        )
 
         finished_extractors = []
         for name in extractor_names:
-            if not overwrite and os.path.exists(modeldir):
+            if os.path.exists(modeldir):
                 filepath = "{}/{}/importance_per_residue.npy".format(modeldir, name)
                 existing_files = glob.glob(filepath)
-                if len(existing_files) > 0:
+                if overwrite and len(existing_files) > 0:
+                    logger.debug("File %s already exists. overwriting", existing_files[0])
+                elif len(existing_files) > 0:
                     logger.debug("File %s already exists. skipping computations", existing_files[0])
                     finished_extractors.append(name)
                 else:
@@ -70,10 +75,11 @@ def compute(extractor_type,
             else:
                 os.makedirs(modeldir)
         needs_computations = len(finished_extractors) < n_extractors
-        dg = DataGenerator(natoms=100,
+        atoms_per_cluster = max(int(natoms / 10 + 0.5), 1)
+        dg = DataGenerator(natoms=natoms,
                            nclusters=3,
-                           natoms_per_cluster=[10, 10, 10],
-                           nframes_per_cluster=1200 if needs_computations else 2,
+                           natoms_per_cluster=[atoms_per_cluster, atoms_per_cluster, atoms_per_cluster],
+                           nframes_per_cluster=12 * natoms if needs_computations else 2,
                            # Faster generation for postprocessing purposes when we don't need the frames
                            test_model=test_model,
                            noise_natoms=None,
diff --git a/benchmarking/configuration.py b/benchmarking/configuration.py
index bba3f80..82331f4 100644
--- a/benchmarking/configuration.py
+++ b/benchmarking/configuration.py
@@ -62,24 +62,47 @@ def create_rand_feature_extractors(extractor_kwargs):
     ]
 
 
-def create_RF_feature_extractors(extractor_kwargs, n_estimators=[10, 100, 200, 1000]):
-    extractors = []
-    for one_vs_rest in [True, False]:
-        suffix = "" if one_vs_rest else "_multiclass"
-        for nest in n_estimators:
-            extractors.append(
-                fe.RandomForestFeatureExtractor(
-                    name="{}-estimators{}".format(nest, suffix),
-                    classifier_kwargs={
-                        'n_estimators': nest
-                    },
-                    one_vs_rest=one_vs_rest,
-                    **extractor_kwargs)
-            )
-
+def _shuffle_and_shorten(extractors, random_state=6, max_number_extractors=12):
+    extractors = np.array(extractors)
+    if random_state is not None:
+        np.random.seed(random_state)
+    np.random.shuffle(extractors)
+    extractors = extractors[:max_number_extractors]
+    if random_state is not None:
+        np.random.seed() #reset seed
     return extractors
 
 
+def create_RF_feature_extractors(extractor_kwargs,
+                                 n_estimators=[10, 100, 1000],
+                                 min_samples_leaves=[0.25, 0.1, 1],
+                                 max_depths=[None, 1, 10, 100]):
+    extractors = []
+    for one_vs_rest in [False, True]:
+        # We only consider min samples leafs and max_depths if we have real multiclass
+        for md in [None] if one_vs_rest else max_depths:
+            for msl in [1] if one_vs_rest else min_samples_leaves:
+                suffix = "" if one_vs_rest else "_multiclass"
+                if md is not None:  # None is the default value
+                    suffix += "_max_depth{}".format(msl)
+                if msl > 1:  # 1 is the default vlaue
+                    suffix += "_max_depth{}".format(msl)
+                for nest in n_estimators:
+                    extractors.append(
+                        fe.RandomForestFeatureExtractor(
+                            name="{}-estimators{}".format(nest, suffix),
+                            classifier_kwargs={
+                                'n_estimators': nest,
+                                'min_samples_leaf': msl,
+                                'max_depth': md
+                            },
+                            one_vs_rest=one_vs_rest,
+                            **extractor_kwargs)
+                    )
+    return _shuffle_and_shorten(extractors)
+    # return extractors
+
+
 def create_PCA_feature_extractors(extractor_kwargs, variance_cutoffs=["auto", "1_components", "2_components", 50, 100]):
     return [
         fe.PCAFeatureExtractor(
@@ -156,48 +179,59 @@ def create_MLP_feature_extractors(extractor_kwargs,
 
 def create_AE_feature_extractors(extractor_kwargs,
                                  alpha_hidden_layers=[
-                                     # (0.001, [1, ]),  # new
-                                     (0.001, [10, ]),
-                                     # (0.00001, [10, ]),  # new
+                                     (1e-2, "auto"),
+                                     (0.01, [10, ]),
+                                     (0.001, [100, ]),
+                                     (0.00001, [100, ]),
                                      (0.01, [10, 7, 5, 2, 5, 7, 10, ]),
-                                     (0.001, [10, 7, 5, 2, 5, 7, 10, ]),
-                                     (0.01, [20, 10, 7, 5, 2, 5, 7, 10, 20]),
-                                     (0.01, [20, 5, 20, ]),
-                                     # New values
-                                     # (0.0001, [8, 2, 8, ]),
-                                     # (0.0001, [16, ]),
-                                     # (0.1, [8, 2, 8, ]),
-                                     # (0.0001, [1, ]),
-
-                                     # Benchmarking layer size
-                                     # (0.0001, [10, 2, 10, ]),
-                                     # (0.0001, [100, 25, 100, ]),  # actually used in both benchmarks
-                                     # (0.0001, [100, 25, 5, 25, 100, ]),
-                                     # # benchmarking alpha
-                                     # (0.001, [100, 25, 100, ]),
-                                     # (0.01, [100, 25, 100, ]),
-                                     # (0.1, [100, 25, 100, ]),
-
-                                 ]
+
+                                 ],
+                                 batch_sizes=["auto", 10, 100],  # 1000
+                                 learning_rates=["adaptive"],
+                                 # batch_sizes=["auto"],
+                                 max_iters=[20, 200],  # 2000
+                                 # max_iters=[200],
+                                 parameter_set_default=None, #"original",
+                                 # Used to enhance performa. Just using one set of hyperparams
                                  ):
+    if parameter_set_default is not None:
+        learning_rates = ["adaptive"]
+        batch_sizes = ["auto"]
+        max_iters = [200]
+        if parameter_set_default == 'original':
+            # Default values used in bioRxiv version
+            alpha_hidden_layers = [(0.001, [10, ]),
+                                   (0.01, [10, 7, 5, 2, 5, 7, 10, ]),
+                                   (0.001, [10, 7, 5, 2, 5, 7, 10, ]),
+                                   (0.01, [20, 10, 7, 5, 2, 5, 7, 10, 20]),
+                                   (0.01, [20, 5, 20, ]), ]
+        elif parameter_set_default == 'auto':
+            # simple setup if you just want to run with one parameter set
+            alpha_hidden_layers = [(0.01, "auto")]
+
     feature_extractors = []
     for alpha, layers in alpha_hidden_layers:
-        name = "{}-alpha_{}-layers".format(alpha, "x".join([str(l) for l in layers]))
-        feature_extractors.append(
-            fe.MlpAeFeatureExtractor(
-                name=name,
-                classifier_kwargs={
-                    'hidden_layer_sizes': layers,
-                    'max_iter': 200,
-                    'learning_rate': 'adaptive',
-                    'alpha': alpha,
-                    'solver': "adam",
-                    'early_stopping': True,
-                    'tol': 1e-2,
-                    'warm_start': False
-                },
-                activation="logistic",
-                use_reconstruction_for_lrp=True,
-                **extractor_kwargs)
-        )
-    return feature_extractors
+        for bs in batch_sizes:
+            for lr in learning_rates:
+                for mi in max_iters:
+                    name = "{}-alpha_{}-layers".format(alpha, "x".join([str(l) for l in layers]))
+                    name += "_{}-batchsize_{}-maxiter_{}-learningrate".format(bs, mi, lr)
+                    feature_extractors.append(
+                        fe.MlpAeFeatureExtractor(
+                            name=name,
+                            classifier_kwargs={
+                                'hidden_layer_sizes': layers,
+                                'max_iter': mi,
+                                'learning_rate': lr,  # not used with the adam solver
+                                'batch_size': bs,
+                                'alpha': alpha,
+                                'solver': "adam",
+                                'early_stopping': True,
+                                'tol': 1e-2,
+                                'warm_start': False
+                            },
+                            activation="logistic",
+                            use_reconstruction_for_lrp=True,
+                            **extractor_kwargs)
+                    )
+    return _shuffle_and_shorten(feature_extractors, max_number_extractors=6)
diff --git a/benchmarks.sh b/benchmarks.sh
new file mode 100644
index 0000000..2036bc3
--- /dev/null
+++ b/benchmarks.sh
@@ -0,0 +1,19 @@
+for it in {1..3}; do
+echo "Iteration $iter"
+for ns in 1e-2; do
+for tm in linear non-linear; do
+for ft in inv-dist; do
+cmd="python run_benchmarks.py --extractor_type=AE --output_dir=output/mega/results/benchmarking/ --feature_type=$ft --noise_level=$ns --test_model=$tm --accuracy_method mse relevant_fraction --overwrite=False;"
+echo $cmd;
+$cmd
+done
+done
+done;
+done;
+
+echo "Running all"
+cmd="python run_benchmarks.py --extractor_type=all --output_dir=output/mega/results/benchmarking/ --feature_type inv-dist cartesian_rot compact-dist --noise_level 1e-2 5e-2 --test_model linear non-linear --accuracy_method mse relevant_fraction --overwrite=False;"
+echo $cmd
+$cmd
+
+echo "Done"
\ No newline at end of file
diff --git a/modules/data_generation.py b/modules/data_generation.py
index 9c297e6..930752a 100755
--- a/modules/data_generation.py
+++ b/modules/data_generation.py
@@ -43,6 +43,8 @@ def __init__(self, natoms, nclusters, natoms_per_cluster, nframes_per_cluster,
             raise Exception("parameter natoms_per_cluster should be an array of length {}".format(nclusters))
         if moved_atoms is not None and len(moved_atoms) != nclusters:
             raise Exception("parameter moved_atoms should be None or an array of length {}".format(nclusters))
+        if max(natoms_per_cluster) >= natoms:
+            raise Exception("parameter natoms_per_cluster should be less than the number of atoms in the system")
         self.natoms = natoms
         self.nclusters = nclusters
         self.natoms_per_cluster = natoms_per_cluster
@@ -271,9 +273,9 @@ def _to_compact_dist(self, conf):
 
     def _to_cartesian(self, conf):
 
-        if "_rot" in self.feature_type:
+        if "rot" in self.feature_type :
             conf = self._random_rotation(conf)
-        if "_trans" in self.feature_type:
+        if "trans" in self.feature_type:
             conf = self._random_translation(conf)
 
         feats = np.empty((self.nfeatures))
diff --git a/modules/feature_extraction/mlp_ae_feature_extractor.py b/modules/feature_extraction/mlp_ae_feature_extractor.py
index 4f50408..988531d 100644
--- a/modules/feature_extraction/mlp_ae_feature_extractor.py
+++ b/modules/feature_extraction/mlp_ae_feature_extractor.py
@@ -27,12 +27,26 @@ def __init__(self,
         self.use_reconstruction_for_lrp = use_reconstruction_for_lrp
         logger.debug("Initializing MLP AE with the following parameters:"
                      " use_reconstruction_for_lrp %s", use_reconstruction_for_lrp)
+        # If no hidden layers are specific or it is set to auto we will set this hyperparameter on the fly
+        self._automatic_layer_size = self.classifier_kwargs.get("hidden_layer_sizes", "auto") in [None, "auto"]
 
     def train(self, train_set, train_labels):
         logger.debug("Training %s with %s samples and %s features ...", self.name, train_set.shape[0],
                      train_set.shape[1])
         classifier_kwargs = self.classifier_kwargs
-        classifier_kwargs['hidden_layer_sizes'] = list(classifier_kwargs['hidden_layer_sizes']) + [train_set.shape[1]]
+        if self._automatic_layer_size:
+            # Use a rule of thumb with decreasing the layer half at the time
+            layer_sizes = []
+            nfeatures = train_set.shape[1]
+            n_hidden_nodes = nfeatures / 2
+            while n_hidden_nodes > 0 and n_hidden_nodes > nfeatures / 8:
+                layer_sizes.append(int(n_hidden_nodes + 0.5))
+                n_hidden_nodes /= 2
+        else:
+            layer_sizes = list(classifier_kwargs['hidden_layer_sizes'])
+        # add output layer same size as features
+        layer_sizes += [train_set.shape[1]]
+        classifier_kwargs['hidden_layer_sizes'] = layer_sizes
         classifier = sklearn.neural_network.MLPRegressor(**classifier_kwargs)
         classifier.fit(train_set, train_set)  # note same output as input
         return classifier
diff --git a/modules/visualization.py b/modules/visualization.py
index 96fb6b7..91d05cb 100755
--- a/modules/visualization.py
+++ b/modules/visualization.py
@@ -363,24 +363,35 @@ def visualize(postprocessors,
         plt.clf()
 
 
+def _sort_by_accuracy(accuracy, xlabels, postprocessors):
+    mean_accuracy = accuracy.mean(axis=0)
+    sorted_indices = [i for i, a in sorted(enumerate(mean_accuracy), key=lambda tpl: tpl[1], reverse=True)]
+    return accuracy[:, sorted_indices], np.array(xlabels)[sorted_indices], postprocessors[:, sorted_indices]
+
+
 def _show_performance(postprocessors,
                       xlabels=None,
                       title=None,
                       filename=None,
                       accuracy_limits=None,
-                      supervised=False,
+                      plot_per_state_accuracy=False,
                       output_dir=None,
-                      accuracy_method=None):
+                      sort_by_accuracy=False,
+                      accuracy_method=None,
+                      width_factor=0.75):
     if len(postprocessors) == 0:
         return
     if not os.path.exists(output_dir):
         os.makedirs(output_dir)
-    nrows = 2 if supervised else 1
+
+    nrows = 2 if plot_per_state_accuracy else 1
     fig, axs = plt.subplots(nrows, 1, sharex=True, sharey=False, squeeze=False,
-                            figsize=(int(0.5 * postprocessors.shape[0]), 2 * nrows),
+                            figsize=(int(width_factor * postprocessors.shape[0]), 2 * nrows),
                             constrained_layout=True)
     fig.subplots_adjust(bottom=0.2)
     accuracy = utils.to_accuracy(postprocessors)
+    if sort_by_accuracy:
+        accuracy, xlabels, postprocessors = _sort_by_accuracy(accuracy, xlabels, postprocessors)
     ax0 = axs[0, 0]
     if title is not None:
         ax0.set_title(title)
@@ -395,7 +406,7 @@ def _show_performance(postprocessors,
     elif accuracy_method == 'relevant_fraction':
         accuracy_label = "Accuracy:\nignoring irrelevant"
     ax0.set_ylabel(accuracy_label)
-    if supervised:
+    if plot_per_state_accuracy:
         # Per state
         ax0.get_shared_y_axes().join(ax0, axs[1, 0])  # share y range with regular accuracy
         axs[1, 0].boxplot(utils.to_accuracy_per_cluster(postprocessors),
@@ -434,7 +445,8 @@ def show_single_extractor_performance(postprocessors,
                       xlabels=xlabels,
                       # title=extractor_type,
                       filename=filename,
-                      supervised=postprocessors[0, 0].extractor.supervised,
+                      sort_by_accuracy=True,
+                      plot_per_state_accuracy=postprocessors[0, 0].extractor.supervised,
                       output_dir=output_dir,
                       accuracy_method=accuracy_method)
 
@@ -460,11 +472,58 @@ def show_all_extractors_performance(postprocessors,
                       # title=title,
                       filename=filename,
                       accuracy_limits=[0.2, 1.1],
-                      supervised=supervised,
+                      plot_per_state_accuracy=supervised,
                       output_dir=output_dir,
+                      width_factor=0.4,
                       accuracy_method=accuracy_method)
 
 
+def show_system_size_dependence(n_atoms,
+                                postprocessors,
+                                extractor_types,
+                                test_model=None,
+                                noise_level=None,
+                                feature_type=None,
+                                filename=None,
+                                output_dir="output/benchmarking/",
+                                accuracy_method=None):
+    if len(postprocessors) == 0:
+        return
+    if filename is None:
+        filename = "{feature_type}_{test_model}_{noise_level}noise_{natoms}atoms_{accuracy_method}_ {extractor_types}.svg".format(
+            feature_type=feature_type,
+            test_model=test_model,
+            extractor_types="&".join(extractor_types),
+            noise_level=noise_level,
+            accuracy_method=accuracy_method,
+            natoms="{}to{}atoms".format(min(n_atoms), max(n_atoms))
+        )
+    fig, axs = plt.subplots(1, 1, sharex=True, sharey=False, squeeze=False,
+                            # figsize=(int(0.5 * postprocessors.shape[0]), 2 * nrows),
+                            constrained_layout=True)
+    ax = axs[0, 0]
+    if not os.path.exists(output_dir):
+        os.makedirs(output_dir)
+    accuracy_label = "Accuracy"
+    if accuracy_method == 'mse':
+        accuracy_label = "Accuracy:\nfinding all"
+    elif accuracy_method == 'relevant_fraction':
+        accuracy_label = "Accuracy:\nignoring irrelevant"
+    ax.set_ylabel(accuracy_label)
+    ax.set_xlabel("Number of atoms in toy model")
+    ax.yaxis.set_major_formatter(FormatStrFormatter('%.2f'))
+    for extractor_idx, extractor in enumerate(extractor_types):
+        accuracy = utils.to_accuracy(postprocessors[:, extractor_idx])
+        y = accuracy.mean(axis=1)
+        ystd = accuracy.std(axis=1)
+        ax.plot(n_atoms, y, label=extractor)
+        ax.fill_between(n_atoms, y - ystd, y + ystd, color="gray", alpha=0.2)
+    plt.tight_layout(pad=0.3)
+    plt.legend()
+    plt.savefig(output_dir + filename)
+    plt.clf()
+
+
 def _to_settings_string(extractor):
     parts = []
     if isinstance(extractor, fe.MlpFeatureExtractor):
@@ -472,11 +531,19 @@ def _to_settings_string(extractor):
         if alpha is not None:
             parts.append(utils.to_scientific_number_format(alpha))
         layers = extractor.classifier_kwargs.get('hidden_layer_sizes', (100,))
-        ls = []
-        for idx, l in enumerate(layers):
-            if idx == 0 or l < layers[idx - 1]:  # assuming decreasing layer size here, not showing decoding layers
-                ls.append(l)
-        parts.append("x".join([str(l) for l in ls]))
+        if isinstance(layers, str):
+            parts.append(layers)
+        else:
+            ls = []
+            for idx, l in enumerate(layers):
+                if idx == 0 or l < layers[idx - 1]:  # assuming decreasing layer size here, not showing decoding layers
+                    ls.append(l)
+            parts.append("x".join([str(l) for l in ls]))
+        if isinstance(extractor, fe.MlpAeFeatureExtractor):
+            batch_size = extractor.classifier_kwargs.get('batch_size', None)
+            # learning_rate = extractor.classifier_kwargs.get('learning_rate', None) #not relevant when solver = 'adam'
+            max_iter = extractor.classifier_kwargs.get('max_iter', None)
+            parts += [str(s) for s in [batch_size, max_iter]]
     elif isinstance(extractor, fe.RbmFeatureExtractor):
         learning_rate = extractor.classifier_kwargs.get('learning_rate', None)
         if learning_rate is not None:
@@ -488,6 +555,10 @@ def _to_settings_string(extractor):
         parts.append(str(nest))
         binary = extractor.one_vs_rest
         parts.append("BC" if binary else "MC")
+        min_samples_leaf = extractor.classifier_kwargs.get('min_samples_leaf', 1)
+        max_depth = extractor.classifier_kwargs.get('max_depth', None)
+        parts.append("{}".format(min_samples_leaf))
+        parts.append("{}".format(max_depth if max_depth is not None else "-"))
     elif isinstance(extractor, fe.PCAFeatureExtractor):
         cutoff = extractor.variance_cutoff
         if isinstance(cutoff, int) or isinstance(cutoff, float):
diff --git a/run_benchmarks.py b/run_benchmarks.py
deleted file mode 100644
index 1e8dc4f..0000000
--- a/run_benchmarks.py
+++ /dev/null
@@ -1,117 +0,0 @@
-from __future__ import absolute_import, division, print_function
-
-import logging
-import sys
-
-logging.basicConfig(
-    stream=sys.stdout,
-    level=logging.DEBUG,
-    format='%(asctime)s %(name)s-%(levelname)s: %(message)s',
-    datefmt='%Y-%m-%d %H:%M:%S')
-import matplotlib as mpl
-
-mpl.use('Agg')  # TO DISABLE GUI. USEFUL WHEN RUNNING ON CLUSTER WITHOUT X SERVER
-import argparse
-import numpy as np
-from benchmarking import computing
-from modules import visualization, utils
-
-logger = logging.getLogger("benchmarking")
-
-
-def _fix_extractor_type(extractor_types):
-    extractor_types = utils.make_list(extractor_types)
-    if len(extractor_types) == 1:
-        et = extractor_types[0]
-        if et == "supervised":
-            return ["KL", "RF", "MLP", "RAND"]
-        elif et == "unsupervised":
-            return ["PCA", "RBM", "AE", "RAND"]
-        elif et == "all":
-            return ["PCA", "RBM", "AE", "KL", "RF", "MLP", "RAND"]
-    return extractor_types
-
-
-def create_argparser():
-    _bool_lambda = lambda x: (str(x).lower() == 'true')
-    parser = argparse.ArgumentParser(
-        epilog='Benchmarking for demystifying')
-    parser.add_argument('--extractor_type', nargs='+', help='list of extractor types (MLP, KL, PCA, ..)', type=str,
-                        required=True)
-    parser.add_argument('--output_dir', type=str, help='Root directory for output files',
-                        default="output/benchmarking/")
-    parser.add_argument('--test_model', nargs='+', type=str, help='Toy model displacement: linear or non-linear',
-                        default="linear")
-    parser.add_argument('--feature_type', nargs='+',
-                        type=str, help='Toy model feature type: cartesian_rot, inv-dist, etc.',
-                        default="cartesian_rot")
-    parser.add_argument('--noise_level', nargs='+', type=float,
-                        help='Strength of noise added to atomic coordinates at each frame',
-                        default=1e-2)
-    parser.add_argument('--displacement', type=float, help='Strength of displacement for important atoms', default=1e-1)
-    parser.add_argument('--overwrite', type=_bool_lambda,
-                        help='Overwrite existing results with new (if set to False no new computations will be performed)',
-                        default=False)
-    parser.add_argument('--visualize', type=_bool_lambda, help='Generate output figures', default=True)
-    parser.add_argument('--iterations_per_model', type=int, help='', default=10)
-    parser.add_argument('--accuracy_method', nargs='+', type=str, help='', default='mse')
-
-    return parser
-
-
-def do_run(args, extractor_types, noise_level, test_model, feature_type, accuracy_method):
-    visualize = args.visualize
-    output_dir = args.output_dir
-    fig_filename = "{feature_type}_{test_model}_{noise_level}noise_{accuracy_method}.svg".format(
-        feature_type=feature_type,
-        test_model=test_model,
-        noise_level=noise_level,
-        accuracy_method=accuracy_method)
-    best_processors = []
-    for et in extractor_types:
-        try:
-            postprocessors = computing.compute(extractor_type=et,
-                                               output_dir=output_dir,
-                                               feature_type=feature_type,
-                                               overwrite=args.overwrite,
-                                               accuracy_method=accuracy_method,
-                                               iterations_per_model=args.iterations_per_model,
-                                               noise_level=noise_level,
-                                               visualize=visualize,
-                                               test_model=test_model)
-            if visualize:
-                visualization.show_single_extractor_performance(postprocessors=postprocessors,
-                                                                extractor_type=et,
-                                                                filename=fig_filename,
-                                                                output_dir=output_dir,
-                                                                accuracy_method=accuracy_method)
-            best_processors.append(utils.find_best(postprocessors))
-        except Exception as ex:
-            logger.exception(ex)
-            logger.warn("Failed for extractor %s ", et)
-            raise ex
-    if visualize:
-        fig_filename = fig_filename.replace(".svg", "_{}.svg".format("-".join(extractor_types)))
-        visualization.show_all_extractors_performance(np.array(best_processors),
-                                                      extractor_types,
-                                                      feature_type=feature_type,
-                                                      filename=fig_filename,
-                                                      output_dir=output_dir,
-                                                      accuracy_method=accuracy_method)
-
-
-def run_all(args):
-    extractor_types = _fix_extractor_type(args.extractor_type)
-    for noise_level in utils.make_list(args.noise_level):
-        for test_model in utils.make_list(args.test_model):
-            for feature_type in utils.make_list(args.feature_type):
-                for accuracy_method in utils.make_list(args.accuracy_method):
-                    do_run(args, extractor_types, noise_level, test_model, feature_type, accuracy_method)
-
-
-if __name__ == "__main__":
-    parser = create_argparser()
-    args = parser.parse_args()
-    logger.info("Starting script run_benchmarks with arguments %s", args)
-    run_all(args)
-    logger.info("Done!")