Update readme file

anuprulez · Oct 15, 2019 · 6e91571 · 6e91571
1 parent f8c3d2a
commit 6e91571
Show file tree

Hide file tree

Showing 3 changed files with 13 additions and 55 deletions.
diff --git a/README.md b/README.md
@@ -129,48 +129,18 @@ In the set of training sequences, each one can have many labels (or categories)
 
 ## Accuracy on test data
 
-### Comparison of different networks (Network with deep layers (DNN), Convolutional (CNN) and Recurrent (RNN) networks)
+### Precision comparison of different networks (Dense neural network (DNN), Convolutional (CNN) and Recurrent (RNN/GRU) neural networks)
 
 <p align="center">
-  <img src="https://raw.githubusercontent.com/anuprulez/similar_galaxy_workflow/release_tool_recommendation_v_03_19/images/precision.png">
+  <img src="https://raw.githubusercontent.com/anuprulez/similar_galaxy_workflow/tool_recommendation_release_19_09/plots/precision.png">
 </p>
 
-### Comparison of two different networks (CNN and RNN)
+### Usage frequencies of predicted tools comparison of different networks (DNN, CNN and RNN/GRU)
 
 <p align="center">
-  <img src="https://raw.githubusercontent.com/anuprulez/similar_galaxy_workflow/release_tool_recommendation_v_03_19/images/precision_cnn_rnn.png">
+  <img src="https://raw.githubusercontent.com/anuprulez/similar_galaxy_workflow/tool_recommendation_release_19_09/plots/usage.png">
 </p>
 
-The plot above shows precision computed over training epochs on test data. The test data makes `20%` of the complete dataset (sequences of tools). 
-
-### Comparison of different networks (Network with deep layers (DNN), Convolutional (CNN) and Recurrent (RNN) networks)
-
-<p align="center">
-  <img src="https://raw.githubusercontent.com/anuprulez/similar_galaxy_workflow/release_tool_recommendation_v_03_19/images/loss.png">
-</p>
-
-### Comparison of two different networks (CNN and RNN)
-
-<p align="center">
-  <img src="https://raw.githubusercontent.com/anuprulez/similar_galaxy_workflow/release_tool_recommendation_v_03_19/images/loss_cnn_rnn.png">
-</p>
-
-The plot above shows the cross-entropy loss over training epochs.
-
-### Comparison of different networks (Network with deep layers (DNN), Convolutional (CNN) and Recurrent (RNN) networks)
-
-<p align="center">
-  <img src="https://raw.githubusercontent.com/anuprulez/similar_galaxy_workflow/release_tool_recommendation_v_03_19/images/usage.png">
-</p>
-
-### Comparison of two different networks (CNN and RNN)
-
-<p align="center">
-  <img src="https://raw.githubusercontent.com/anuprulez/similar_galaxy_workflow/release_tool_recommendation_v_03_19/images/usage_cnn_rnn.png">
-</p>
-
-The plot above shows the increase of mean usage over training epochs. As the precision improves, tools with higher usage are predicted.
-
 
 ## Literature:
 - [LSTM](http://colah.github.io/posts/2015-08-Understanding-LSTMs/)

diff --git a/output_files/paper_plots_dense_cnn_rnn.py b/output_files/paper_plots_dense_cnn_rnn.py
@@ -28,7 +28,7 @@
 
 loss_ylim = (0.0, 1.0)
 usage_ylim = (2.5, 5.0)
-precision_ylim = (0.75, 1.0)
+precision_ylim = (0.95, 1.0)
 gs = gridspec.GridSpec(3,2)
 leg_loc = 3
 leg_size = 16
@@ -306,19 +306,7 @@ def plot_path_size_distribution(x_val, title, xlabel, ylabel, xlabels):
     plt.grid(True)
     plt.show()
 
-#plot_path_size_distribution(sorted_key_values, 'Data distribution', 'Number of tools in paths', 'Number of paths', sizes)
-
-all_approaches_path = ['cnn_bc/', 'cnn_custom_loss/', 'rnn_bc/', 'rnn_custom_loss/']
-
-titles = ['(a) CNN', '(b) CNN (weighted loss)', '(c) GRU', '(d) GRU with weighted loss']
-
-
-#assemble_loss()
-#plt.show()
-#assemble_usage()
-#plt.show()
-#assemble_accuracy()
-#plt.show()
+#plot_path_size_distribution(sorted_key_values, 'Data distribution', 'Number of tools in paths', 'Number of paths', sizes)'''
 
 ################################################################ Tool usage
 
@@ -327,18 +315,18 @@ def plot_path_size_distribution(x_val, title, xlabel, ylabel, xlabels):
 import numpy as np
 import collections
 
-import plotly
-import plotly.graph_objs as go
-from plotly import tools
-import plotly.io as pio
+#import plotly
+#import plotly.graph_objs as go
+#from plotly import tools
+#import plotly.io as pio
 from matplotlib import pyplot as plt
 
 def format_tool_id(tool_link):
         tool_id_split = tool_link.split( "/" )
         tool_id = tool_id_split[ -2 ] if len( tool_id_split ) > 1 else tool_link
         return tool_id
 
-tool_usage_file = "../data/tool_usage/tool-popularity.tsv"
+tool_usage_file = "../data/tool-popularity-19-09.tsv"
 cutoff_date = '2017-12-01'
 tool_usage_dict = dict()
 tool_list = list()
@@ -373,7 +361,7 @@ def format_tool_id(tool_link):
 colors = ['r', 'b', 'g', 'c']
 tool_names = ['Cut1', 'cufflinks', 'bowtie2', 'DatamashOps']
 legends_tools = ['Tool B', 'Tool C', 'Tool D', 'Tool E']
-xticks = ['Jan, 2018', 'Feb, 2018', 'Mar, 2018', 'Apr, 2018', 'May, 2018', 'Jun, 2018', 'Jul, 2018', 'Aug, 2018', 'Sep, 2018', 'Oct, 2018', 'Nov, 2018', 'Dec, 2018', 'Jan, 2019', 'Feb, 2019' ]
+xticks = ['Jan, 2018', '', 'Mar, 2018', '', 'May, 2018', '', 'Jul, 2018', '', 'Sep, 2018', '', 'Nov, 2018', '', 'Jan, 2019', '', 'Mar, 2019', '', 'May, 2019', '', 'Jul, 2019', '', 'Sep, 2019' ]
 
 def plot_tool_usage(tool_names):
     plt.figure(figsize=(12, 12))
@@ -397,5 +385,5 @@ def plot_tool_usage(tool_names):
     plt.show()
 
 
-plot_tool_usage(tool_names)'''
+plot_tool_usage(tool_names)
 
diff --git a/plots/usage_frequency.png b/plots/usage_frequency.png