diff --git a/.gitignore b/.gitignore
index 48d267266c87cf..0f8b77d39a77ce 100644
--- a/.gitignore
+++ b/.gitignore
@@ -15,3 +15,4 @@ vendor
*.swp
.jekyll-metadata
.jekyll-cache
+plantuml.jar
diff --git a/CONTRIBUTORS.yaml b/CONTRIBUTORS.yaml
index 639fa6dc2eeb1d..c2c2d7105f0ba5 100644
--- a/CONTRIBUTORS.yaml
+++ b/CONTRIBUTORS.yaml
@@ -138,7 +138,7 @@ cganote:
name: Carrie Ganote
chrisbarnettster:
- name: Christopher Barnettster
+ name: Christopher Barnett
gitter: chrisbarnettster
daltis20012009:
@@ -205,7 +205,11 @@ fidelram:
name: Fidel Ramirez
foellmelanie:
- name: Melanie Foell
+ name: Melanie Föll
+ email: melanie.foell@mol-med.uni-freiburg.de
+ twitter: MCFoell
+ gitter: foellmelanie
+ orcid: 0000-0002-1887-7543
fpsom:
name: Fotis E. Psomopoulos
@@ -346,7 +350,7 @@ marziacremona:
email: mac78@psu.edu
matthias313:
- name: Matthias
+ name: Matthias Fahrner
mblue9:
name: Maria Doyle
@@ -402,6 +406,10 @@ NickSto:
name: Nick Stoler
twitter: NickStoler
+nomadscientist:
+ name: Wendi Bacon
+ email: wendi@ebi.ac.uk
+
nsoranzo:
name: Nicola Soranzo
gitter: nsoranzo
@@ -496,8 +504,8 @@ subinamehta:
linkedin: subinamehta
gitter: subinamehta
orcid: 0000-0001-9818-0537
-
-
+
+
tealiie:
thomasmanke:
@@ -540,7 +548,7 @@ tklingstrom:
twitter: tklingstrom
gitter: tklingstrom
orcid: 0000-0002-9504-1352
-
+
valentinmarcon:
name: Valentin Marcon
diff --git a/Makefile b/Makefile
index f7ad178eba97a8..b6fe85f1127621 100644
--- a/Makefile
+++ b/Makefile
@@ -31,7 +31,7 @@ endif
default: help
install-conda: ## install Miniconda
- curl $(MINICONDA_URL) -o miniconda.sh
+ curl -L $(MINICONDA_URL) -o miniconda.sh
bash miniconda.sh -b
.PHONY: install-conda
diff --git a/_config.yml b/_config.yml
index 3b83c4a3a69945..fd51752fedf9f5 100644
--- a/_config.yml
+++ b/_config.yml
@@ -79,80 +79,76 @@ scholar:
# Icon tag config
icon-tag:
- # in tutorials
- question: fa-question-circle
- solution: fa-eye
- hands_on: fa-pencil
- comment: fa-commenting-o
- tip: fa-lightbulb-o
- objectives: fa-bullseye
- requirements: fa-check-circle
- time: fa-hourglass-end
- keypoints: fa-key
- tool: fa-wrench
- workflow: fa-project-diagram
- feedback: fa-comments-o
- congratulations: fa-thumbs-up
- trophy: fa-trophy
- warning: fa-warning
- details: fa-info-circle
- exchange: fa-exchange
- wf-input: fa-chevron-circle-right
- sticky-note: fa-sticky-note
- new-history: fa-plus
- param-file: fa-file-o
- param-files: fa-files-o
- param-collection: fa-folder-o
- param-text: fa-pencil
- param-check: fa-check-square-o
- param-select: fa-filter
- param-repeat: fa-plus-square-o
- galaxy-eye: fa-eye
- galaxy-info: fa-info-circle
- galaxy-gear: fa-cog
- galaxy-history: fa-archive
- galaxy-library: fa-folder
- galaxy-pencil: fa-pencil
- galaxy-refresh: fa-refresh
- galaxy-barchart: fa-bar-chart
- galaxy-cross: fa-times
- galaxy-columns: fa-columns
- galaxy-tags: fa-tags
- galaxy-selector: fa-check-square-o
- galaxy-upload: fa fa-upload
- galaxy-chart-select-data: fa fa-database
- galaxy-save: fa fa-save
- galaxy-scratchbook: fa fa-th
- galaxy-dropdown: fa-caret-down
- workflow-runtime-toggle: fa-caret-square-o-up
- galaxy-wf-connection: fa-arrows-h
- galaxy-wf-new: fa-plus
- search: fa fa-search
code-in: far fa-keyboard
code-out: fas laptop-code
-
- # menus, links
- zenodo_link: fa-files-o
- tutorial: fa-laptop
- slides: fa-slideshare
- interactive_tour: fa-magic
- workflow: fa-share-alt
- topic: fa-folder-o
- instances: fa-globe
- docker_image: fa-ship
- galaxy_instance: fa-external-link
- last_modification: fa-calendar
- references: fa-bookmark
- gitter: fa-comments
- help: fa-life-ring
- github: fa-github
- email: fa-envelope-o
- twitter: fa-twitter
- linkedin: fa-linkedin
- orcid: ai-orcid
- curriculum: fa-graduation-cap
- level: fa-graduation-cap
- hall-of-fame: fa-users
+ comment: far fa-comment-dots
+ congratulations: far fa-thumbs-up
+ curriculum: fas fa-graduation-cap
+ details: fas fa-info-circle
+ docker_image: fab fa-docker
+ email: far fa-envelope
+ exchange: fas fa-exchange-alt
+ feedback: far fa-comments
+ galaxy-barchart: fas fa-chart-bar
+ galaxy-chart-select-data: fas fa-database
+ galaxy-columns: fas fa-columns
+ galaxy-cross: fas fa-times
+ galaxy-dropdown: fas fa-caret-down
+ galaxy-eye: far fa-eye
+ galaxy-gear: fas fa-cog
+ galaxy-history: fas fa-columns
+ galaxy-info: fas fa-info-circle
+ galaxy-library: far fa-folder
+ galaxy-pencil: fas fa-pencil-alt
+ galaxy-refresh: fas fa-sync-alt
+ galaxy-save: far fa-save
+ galaxy-scratchbook: fas fa-th
+ galaxy-selector: far fa-check-square
+ galaxy-tags: fas fa-tags
+ galaxy-upload: fas fa-upload
+ galaxy-wf-connection: fas fa-arrows-alt-h
+ galaxy-wf-new: fas fa-plus
+ galaxy_instance: fas fa-external-link-alt
+ github: fab fa-github
+ gitter: fab fa-gitter
+ hall-of-fame: fas fa-users
+ hands_on: fas fa-pencil-alt
+ help: far fa-question-circle
+ instances: fas fa-globe
+ interactive_tour: fas fa-magic
+ keypoints: fas fa-key
+ last_modification: far fa-calendar
+ level: fas fa-graduation-cap
+ linkedin: fab fa-linkedin
+ new-history: fas fa-plus
+ objectives: fas fa-bullseye
+ orcid: fab fa-orcid
+ param-check: far fa-check-square
+ param-collection: far fa-folder
+ param-file: far fa-file
+ param-files: far fa-copy
+ param-repeat: far fa-plus-square
+ param-select: fas fa-filter
+ param-text: fas fa-pencil-alt
+ question: far fa-question-circle
+ references: far fa-bookmark
+ requirements: fas fa-check-circle
+ search: fas fa fa-search
+ slides: fab fa-slideshare
+ solution: far fa-eye
+ sticky-note: fas fa-sticky-note
+ time: fas fa-hourglass-half
+ tip: far fa-lightbulb
+ tool: fas fa-wrench
+ topic: far fa-folder
+ trophy: fas fa-trophy
+ tutorial: fas fa-laptop
+ twitter: fab fa-twitter
+ warning: fas fa-exclamation-triangle
+ wf-input: fas fa-chevron-circle-right
+ workflow-runtime-toggle: far fa-caret-square-up
+ workflow: fas fa-share-alt
+ zenodo_link: far fa-copy
# To exclude in _site
exclude:
diff --git a/_layouts/base.html b/_layouts/base.html
index 2e453b2ca3f1b9..5bd50245eb3c62 100644
--- a/_layouts/base.html
+++ b/_layouts/base.html
@@ -13,7 +13,7 @@
-
+
diff --git a/_layouts/base_slides.html b/_layouts/base_slides.html
index 9977a0fe059e79..09fd8f148b087f 100644
--- a/_layouts/base_slides.html
+++ b/_layouts/base_slides.html
@@ -16,7 +16,7 @@
{% include _includes/analytics.html %}
{% endif %}
-
+
diff --git a/_plugins/jekyll-icon-tag.rb b/_plugins/jekyll-icon-tag.rb
index 88c8c813cfe924..0a37a48a9f791e 100644
--- a/_plugins/jekyll-icon-tag.rb
+++ b/_plugins/jekyll-icon-tag.rb
@@ -18,7 +18,7 @@ def render(context)
end
if icon.start_with?("fa")
- %Q(#{@text})
+ %Q(#{@text})
elsif icon.start_with?("ai")
%Q(#{@text})
end
diff --git a/assets/css/font-awesome.css b/assets/css/font-awesome.css
deleted file mode 100644
index a0b879fa0006d7..00000000000000
--- a/assets/css/font-awesome.css
+++ /dev/null
@@ -1,2199 +0,0 @@
-/*!
- * Font Awesome 4.6.3 by @davegandy - http://fontawesome.io - @fontawesome
- * License - http://fontawesome.io/license (Font: SIL OFL 1.1, CSS: MIT License)
- */
-/* FONT PATH
- * -------------------------- */
-@font-face {
- font-family: 'FontAwesome';
- src: url('../fonts/fontawesome-webfont.eot?v=4.6.3');
- src: url('../fonts/fontawesome-webfont.eot?#iefix&v=4.6.3') format('embedded-opentype'), url('../fonts/fontawesome-webfont.woff2?v=4.6.3') format('woff2'), url('../fonts/fontawesome-webfont.woff?v=4.6.3') format('woff'), url('../fonts/fontawesome-webfont.ttf?v=4.6.3') format('truetype'), url('../fonts/fontawesome-webfont.svg?v=4.6.3#fontawesomeregular') format('svg');
- font-weight: normal;
- font-style: normal;
-}
-.fa {
- display: inline-block;
- font: normal normal normal 14px/1 FontAwesome;
- font-size: inherit;
- text-rendering: auto;
- -webkit-font-smoothing: antialiased;
- -moz-osx-font-smoothing: grayscale;
-}
-/* makes the font 33% larger relative to the icon container */
-.fa-lg {
- font-size: 1.33333333em;
- line-height: 0.75em;
- vertical-align: -15%;
-}
-.fa-2x {
- font-size: 2em;
-}
-.fa-3x {
- font-size: 3em;
-}
-.fa-4x {
- font-size: 4em;
-}
-.fa-5x {
- font-size: 5em;
-}
-.fa-fw {
- width: 1.28571429em;
- text-align: center;
-}
-.fa-ul {
- padding-left: 0;
- margin-left: 2.14285714em;
- list-style-type: none;
-}
-.fa-ul > li {
- position: relative;
-}
-.fa-li {
- position: absolute;
- left: -2.14285714em;
- width: 2.14285714em;
- top: 0.14285714em;
- text-align: center;
-}
-.fa-li.fa-lg {
- left: -1.85714286em;
-}
-.fa-border {
- padding: .2em .25em .15em;
- border: solid 0.08em #eeeeee;
- border-radius: .1em;
-}
-.fa-pull-left {
- float: left;
-}
-.fa-pull-right {
- float: right;
-}
-.fa.fa-pull-left {
- margin-right: .3em;
-}
-.fa.fa-pull-right {
- margin-left: .3em;
-}
-/* Deprecated as of 4.4.0 */
-.pull-right {
- float: right;
-}
-.pull-left {
- float: left;
-}
-.fa.pull-left {
- margin-right: .3em;
-}
-.fa.pull-right {
- margin-left: .3em;
-}
-.fa-spin {
- -webkit-animation: fa-spin 2s infinite linear;
- animation: fa-spin 2s infinite linear;
-}
-.fa-pulse {
- -webkit-animation: fa-spin 1s infinite steps(8);
- animation: fa-spin 1s infinite steps(8);
-}
-@-webkit-keyframes fa-spin {
- 0% {
- -webkit-transform: rotate(0deg);
- transform: rotate(0deg);
- }
- 100% {
- -webkit-transform: rotate(359deg);
- transform: rotate(359deg);
- }
-}
-@keyframes fa-spin {
- 0% {
- -webkit-transform: rotate(0deg);
- transform: rotate(0deg);
- }
- 100% {
- -webkit-transform: rotate(359deg);
- transform: rotate(359deg);
- }
-}
-.fa-rotate-90 {
- -ms-filter: "progid:DXImageTransform.Microsoft.BasicImage(rotation=1)";
- -webkit-transform: rotate(90deg);
- -ms-transform: rotate(90deg);
- transform: rotate(90deg);
-}
-.fa-rotate-180 {
- -ms-filter: "progid:DXImageTransform.Microsoft.BasicImage(rotation=2)";
- -webkit-transform: rotate(180deg);
- -ms-transform: rotate(180deg);
- transform: rotate(180deg);
-}
-.fa-rotate-270 {
- -ms-filter: "progid:DXImageTransform.Microsoft.BasicImage(rotation=3)";
- -webkit-transform: rotate(270deg);
- -ms-transform: rotate(270deg);
- transform: rotate(270deg);
-}
-.fa-flip-horizontal {
- -ms-filter: "progid:DXImageTransform.Microsoft.BasicImage(rotation=0, mirror=1)";
- -webkit-transform: scale(-1, 1);
- -ms-transform: scale(-1, 1);
- transform: scale(-1, 1);
-}
-.fa-flip-vertical {
- -ms-filter: "progid:DXImageTransform.Microsoft.BasicImage(rotation=2, mirror=1)";
- -webkit-transform: scale(1, -1);
- -ms-transform: scale(1, -1);
- transform: scale(1, -1);
-}
-:root .fa-rotate-90,
-:root .fa-rotate-180,
-:root .fa-rotate-270,
-:root .fa-flip-horizontal,
-:root .fa-flip-vertical {
- filter: none;
-}
-.fa-stack {
- position: relative;
- display: inline-block;
- width: 2em;
- height: 2em;
- line-height: 2em;
- vertical-align: middle;
-}
-.fa-stack-1x,
-.fa-stack-2x {
- position: absolute;
- left: 0;
- width: 100%;
- text-align: center;
-}
-.fa-stack-1x {
- line-height: inherit;
-}
-.fa-stack-2x {
- font-size: 2em;
-}
-.fa-inverse {
- color: #ffffff;
-}
-/* Font Awesome uses the Unicode Private Use Area (PUA) to ensure screen
- readers do not read off random characters that represent icons */
-.fa-glass:before {
- content: "\f000";
-}
-.fa-music:before {
- content: "\f001";
-}
-.fa-search:before {
- content: "\f002";
-}
-.fa-envelope-o:before {
- content: "\f003";
-}
-.fa-heart:before {
- content: "\f004";
-}
-.fa-star:before {
- content: "\f005";
-}
-.fa-star-o:before {
- content: "\f006";
-}
-.fa-user:before {
- content: "\f007";
-}
-.fa-film:before {
- content: "\f008";
-}
-.fa-th-large:before {
- content: "\f009";
-}
-.fa-th:before {
- content: "\f00a";
-}
-.fa-th-list:before {
- content: "\f00b";
-}
-.fa-check:before {
- content: "\f00c";
-}
-.fa-remove:before,
-.fa-close:before,
-.fa-times:before {
- content: "\f00d";
-}
-.fa-search-plus:before {
- content: "\f00e";
-}
-.fa-search-minus:before {
- content: "\f010";
-}
-.fa-power-off:before {
- content: "\f011";
-}
-.fa-signal:before {
- content: "\f012";
-}
-.fa-gear:before,
-.fa-cog:before {
- content: "\f013";
-}
-.fa-trash-o:before {
- content: "\f014";
-}
-.fa-home:before {
- content: "\f015";
-}
-.fa-file-o:before {
- content: "\f016";
-}
-.fa-clock-o:before {
- content: "\f017";
-}
-.fa-road:before {
- content: "\f018";
-}
-.fa-download:before {
- content: "\f019";
-}
-.fa-arrow-circle-o-down:before {
- content: "\f01a";
-}
-.fa-arrow-circle-o-up:before {
- content: "\f01b";
-}
-.fa-inbox:before {
- content: "\f01c";
-}
-.fa-play-circle-o:before {
- content: "\f01d";
-}
-.fa-rotate-right:before,
-.fa-repeat:before {
- content: "\f01e";
-}
-.fa-refresh:before {
- content: "\f021";
-}
-.fa-list-alt:before {
- content: "\f022";
-}
-.fa-lock:before {
- content: "\f023";
-}
-.fa-flag:before {
- content: "\f024";
-}
-.fa-headphones:before {
- content: "\f025";
-}
-.fa-volume-off:before {
- content: "\f026";
-}
-.fa-volume-down:before {
- content: "\f027";
-}
-.fa-volume-up:before {
- content: "\f028";
-}
-.fa-qrcode:before {
- content: "\f029";
-}
-.fa-barcode:before {
- content: "\f02a";
-}
-.fa-tag:before {
- content: "\f02b";
-}
-.fa-tags:before {
- content: "\f02c";
-}
-.fa-book:before {
- content: "\f02d";
-}
-.fa-bookmark:before {
- content: "\f02e";
-}
-.fa-print:before {
- content: "\f02f";
-}
-.fa-camera:before {
- content: "\f030";
-}
-.fa-font:before {
- content: "\f031";
-}
-.fa-bold:before {
- content: "\f032";
-}
-.fa-italic:before {
- content: "\f033";
-}
-.fa-text-height:before {
- content: "\f034";
-}
-.fa-text-width:before {
- content: "\f035";
-}
-.fa-align-left:before {
- content: "\f036";
-}
-.fa-align-center:before {
- content: "\f037";
-}
-.fa-align-right:before {
- content: "\f038";
-}
-.fa-align-justify:before {
- content: "\f039";
-}
-.fa-list:before {
- content: "\f03a";
-}
-.fa-dedent:before,
-.fa-outdent:before {
- content: "\f03b";
-}
-.fa-indent:before {
- content: "\f03c";
-}
-.fa-video-camera:before {
- content: "\f03d";
-}
-.fa-photo:before,
-.fa-image:before,
-.fa-picture-o:before {
- content: "\f03e";
-}
-.fa-pencil:before {
- content: "\f040";
-}
-.fa-map-marker:before {
- content: "\f041";
-}
-.fa-adjust:before {
- content: "\f042";
-}
-.fa-tint:before {
- content: "\f043";
-}
-.fa-edit:before,
-.fa-pencil-square-o:before {
- content: "\f044";
-}
-.fa-share-square-o:before {
- content: "\f045";
-}
-.fa-check-square-o:before {
- content: "\f046";
-}
-.fa-arrows:before {
- content: "\f047";
-}
-.fa-step-backward:before {
- content: "\f048";
-}
-.fa-fast-backward:before {
- content: "\f049";
-}
-.fa-backward:before {
- content: "\f04a";
-}
-.fa-play:before {
- content: "\f04b";
-}
-.fa-pause:before {
- content: "\f04c";
-}
-.fa-stop:before {
- content: "\f04d";
-}
-.fa-forward:before {
- content: "\f04e";
-}
-.fa-fast-forward:before {
- content: "\f050";
-}
-.fa-step-forward:before {
- content: "\f051";
-}
-.fa-eject:before {
- content: "\f052";
-}
-.fa-chevron-left:before {
- content: "\f053";
-}
-.fa-chevron-right:before {
- content: "\f054";
-}
-.fa-plus-circle:before {
- content: "\f055";
-}
-.fa-minus-circle:before {
- content: "\f056";
-}
-.fa-times-circle:before {
- content: "\f057";
-}
-.fa-check-circle:before {
- content: "\f058";
-}
-.fa-question-circle:before {
- content: "\f059";
-}
-.fa-info-circle:before {
- content: "\f05a";
-}
-.fa-crosshairs:before {
- content: "\f05b";
-}
-.fa-times-circle-o:before {
- content: "\f05c";
-}
-.fa-check-circle-o:before {
- content: "\f05d";
-}
-.fa-ban:before {
- content: "\f05e";
-}
-.fa-arrow-left:before {
- content: "\f060";
-}
-.fa-arrow-right:before {
- content: "\f061";
-}
-.fa-arrow-up:before {
- content: "\f062";
-}
-.fa-arrow-down:before {
- content: "\f063";
-}
-.fa-mail-forward:before,
-.fa-share:before {
- content: "\f064";
-}
-.fa-expand:before {
- content: "\f065";
-}
-.fa-compress:before {
- content: "\f066";
-}
-.fa-plus:before {
- content: "\f067";
-}
-.fa-minus:before {
- content: "\f068";
-}
-.fa-asterisk:before {
- content: "\f069";
-}
-.fa-exclamation-circle:before {
- content: "\f06a";
-}
-.fa-gift:before {
- content: "\f06b";
-}
-.fa-leaf:before {
- content: "\f06c";
-}
-.fa-fire:before {
- content: "\f06d";
-}
-.fa-eye:before {
- content: "\f06e";
-}
-.fa-eye-slash:before {
- content: "\f070";
-}
-.fa-warning:before,
-.fa-exclamation-triangle:before {
- content: "\f071";
-}
-.fa-plane:before {
- content: "\f072";
-}
-.fa-calendar:before {
- content: "\f073";
-}
-.fa-random:before {
- content: "\f074";
-}
-.fa-comment:before {
- content: "\f075";
-}
-.fa-magnet:before {
- content: "\f076";
-}
-.fa-chevron-up:before {
- content: "\f077";
-}
-.fa-chevron-down:before {
- content: "\f078";
-}
-.fa-retweet:before {
- content: "\f079";
-}
-.fa-shopping-cart:before {
- content: "\f07a";
-}
-.fa-folder:before {
- content: "\f07b";
-}
-.fa-folder-open:before {
- content: "\f07c";
-}
-.fa-arrows-v:before {
- content: "\f07d";
-}
-.fa-arrows-h:before {
- content: "\f07e";
-}
-.fa-bar-chart-o:before,
-.fa-bar-chart:before {
- content: "\f080";
-}
-.fa-twitter-square:before {
- content: "\f081";
-}
-.fa-facebook-square:before {
- content: "\f082";
-}
-.fa-camera-retro:before {
- content: "\f083";
-}
-.fa-key:before {
- content: "\f084";
-}
-.fa-gears:before,
-.fa-cogs:before {
- content: "\f085";
-}
-.fa-comments:before {
- content: "\f086";
-}
-.fa-thumbs-o-up:before {
- content: "\f087";
-}
-.fa-thumbs-o-down:before {
- content: "\f088";
-}
-.fa-star-half:before {
- content: "\f089";
-}
-.fa-heart-o:before {
- content: "\f08a";
-}
-.fa-sign-out:before {
- content: "\f08b";
-}
-.fa-linkedin-square:before {
- content: "\f08c";
-}
-.fa-thumb-tack:before {
- content: "\f08d";
-}
-.fa-external-link:before {
- content: "\f08e";
-}
-.fa-sign-in:before {
- content: "\f090";
-}
-.fa-trophy:before {
- content: "\f091";
-}
-.fa-github-square:before {
- content: "\f092";
-}
-.fa-upload:before {
- content: "\f093";
-}
-.fa-lemon-o:before {
- content: "\f094";
-}
-.fa-phone:before {
- content: "\f095";
-}
-.fa-square-o:before {
- content: "\f096";
-}
-.fa-bookmark-o:before {
- content: "\f097";
-}
-.fa-phone-square:before {
- content: "\f098";
-}
-.fa-twitter:before {
- content: "\f099";
-}
-.fa-facebook-f:before,
-.fa-facebook:before {
- content: "\f09a";
-}
-.fa-github:before {
- content: "\f09b";
-}
-.fa-unlock:before {
- content: "\f09c";
-}
-.fa-credit-card:before {
- content: "\f09d";
-}
-.fa-feed:before,
-.fa-rss:before {
- content: "\f09e";
-}
-.fa-hdd-o:before {
- content: "\f0a0";
-}
-.fa-bullhorn:before {
- content: "\f0a1";
-}
-.fa-bell:before {
- content: "\f0f3";
-}
-.fa-certificate:before {
- content: "\f0a3";
-}
-.fa-hand-o-right:before {
- content: "\f0a4";
-}
-.fa-hand-o-left:before {
- content: "\f0a5";
-}
-.fa-hand-o-up:before {
- content: "\f0a6";
-}
-.fa-hand-o-down:before {
- content: "\f0a7";
-}
-.fa-arrow-circle-left:before {
- content: "\f0a8";
-}
-.fa-arrow-circle-right:before {
- content: "\f0a9";
-}
-.fa-arrow-circle-up:before {
- content: "\f0aa";
-}
-.fa-arrow-circle-down:before {
- content: "\f0ab";
-}
-.fa-globe:before {
- content: "\f0ac";
-}
-.fa-wrench:before {
- content: "\f0ad";
-}
-.fa-tasks:before {
- content: "\f0ae";
-}
-.fa-filter:before {
- content: "\f0b0";
-}
-.fa-briefcase:before {
- content: "\f0b1";
-}
-.fa-arrows-alt:before {
- content: "\f0b2";
-}
-.fa-group:before,
-.fa-users:before {
- content: "\f0c0";
-}
-.fa-chain:before,
-.fa-link:before {
- content: "\f0c1";
-}
-.fa-cloud:before {
- content: "\f0c2";
-}
-.fa-flask:before {
- content: "\f0c3";
-}
-.fa-cut:before,
-.fa-scissors:before {
- content: "\f0c4";
-}
-.fa-copy:before,
-.fa-files-o:before {
- content: "\f0c5";
-}
-.fa-paperclip:before {
- content: "\f0c6";
-}
-.fa-save:before,
-.fa-floppy-o:before {
- content: "\f0c7";
-}
-.fa-square:before {
- content: "\f0c8";
-}
-.fa-navicon:before,
-.fa-reorder:before,
-.fa-bars:before {
- content: "\f0c9";
-}
-.fa-list-ul:before {
- content: "\f0ca";
-}
-.fa-list-ol:before {
- content: "\f0cb";
-}
-.fa-strikethrough:before {
- content: "\f0cc";
-}
-.fa-underline:before {
- content: "\f0cd";
-}
-.fa-table:before {
- content: "\f0ce";
-}
-.fa-magic:before {
- content: "\f0d0";
-}
-.fa-truck:before {
- content: "\f0d1";
-}
-.fa-pinterest:before {
- content: "\f0d2";
-}
-.fa-pinterest-square:before {
- content: "\f0d3";
-}
-.fa-google-plus-square:before {
- content: "\f0d4";
-}
-.fa-google-plus:before {
- content: "\f0d5";
-}
-.fa-money:before {
- content: "\f0d6";
-}
-.fa-caret-down:before {
- content: "\f0d7";
-}
-.fa-caret-up:before {
- content: "\f0d8";
-}
-.fa-caret-left:before {
- content: "\f0d9";
-}
-.fa-caret-right:before {
- content: "\f0da";
-}
-.fa-columns:before {
- content: "\f0db";
-}
-.fa-unsorted:before,
-.fa-sort:before {
- content: "\f0dc";
-}
-.fa-sort-down:before,
-.fa-sort-desc:before {
- content: "\f0dd";
-}
-.fa-sort-up:before,
-.fa-sort-asc:before {
- content: "\f0de";
-}
-.fa-envelope:before {
- content: "\f0e0";
-}
-.fa-linkedin:before {
- content: "\f0e1";
-}
-.fa-rotate-left:before,
-.fa-undo:before {
- content: "\f0e2";
-}
-.fa-legal:before,
-.fa-gavel:before {
- content: "\f0e3";
-}
-.fa-dashboard:before,
-.fa-tachometer:before {
- content: "\f0e4";
-}
-.fa-comment-o:before {
- content: "\f0e5";
-}
-.fa-comments-o:before {
- content: "\f0e6";
-}
-.fa-flash:before,
-.fa-bolt:before {
- content: "\f0e7";
-}
-.fa-sitemap:before {
- content: "\f0e8";
-}
-.fa-umbrella:before {
- content: "\f0e9";
-}
-.fa-paste:before,
-.fa-clipboard:before {
- content: "\f0ea";
-}
-.fa-lightbulb-o:before {
- content: "\f0eb";
-}
-.fa-exchange:before {
- content: "\f0ec";
-}
-.fa-cloud-download:before {
- content: "\f0ed";
-}
-.fa-cloud-upload:before {
- content: "\f0ee";
-}
-.fa-user-md:before {
- content: "\f0f0";
-}
-.fa-stethoscope:before {
- content: "\f0f1";
-}
-.fa-suitcase:before {
- content: "\f0f2";
-}
-.fa-bell-o:before {
- content: "\f0a2";
-}
-.fa-coffee:before {
- content: "\f0f4";
-}
-.fa-cutlery:before {
- content: "\f0f5";
-}
-.fa-file-text-o:before {
- content: "\f0f6";
-}
-.fa-building-o:before {
- content: "\f0f7";
-}
-.fa-hospital-o:before {
- content: "\f0f8";
-}
-.fa-ambulance:before {
- content: "\f0f9";
-}
-.fa-medkit:before {
- content: "\f0fa";
-}
-.fa-fighter-jet:before {
- content: "\f0fb";
-}
-.fa-beer:before {
- content: "\f0fc";
-}
-.fa-h-square:before {
- content: "\f0fd";
-}
-.fa-plus-square:before {
- content: "\f0fe";
-}
-.fa-angle-double-left:before {
- content: "\f100";
-}
-.fa-angle-double-right:before {
- content: "\f101";
-}
-.fa-angle-double-up:before {
- content: "\f102";
-}
-.fa-angle-double-down:before {
- content: "\f103";
-}
-.fa-angle-left:before {
- content: "\f104";
-}
-.fa-angle-right:before {
- content: "\f105";
-}
-.fa-angle-up:before {
- content: "\f106";
-}
-.fa-angle-down:before {
- content: "\f107";
-}
-.fa-desktop:before {
- content: "\f108";
-}
-.fa-laptop:before {
- content: "\f109";
-}
-.fa-tablet:before {
- content: "\f10a";
-}
-.fa-mobile-phone:before,
-.fa-mobile:before {
- content: "\f10b";
-}
-.fa-circle-o:before {
- content: "\f10c";
-}
-.fa-quote-left:before {
- content: "\f10d";
-}
-.fa-quote-right:before {
- content: "\f10e";
-}
-.fa-spinner:before {
- content: "\f110";
-}
-.fa-circle:before {
- content: "\f111";
-}
-.fa-mail-reply:before,
-.fa-reply:before {
- content: "\f112";
-}
-.fa-github-alt:before {
- content: "\f113";
-}
-.fa-folder-o:before {
- content: "\f114";
-}
-.fa-folder-open-o:before {
- content: "\f115";
-}
-.fa-smile-o:before {
- content: "\f118";
-}
-.fa-frown-o:before {
- content: "\f119";
-}
-.fa-meh-o:before {
- content: "\f11a";
-}
-.fa-gamepad:before {
- content: "\f11b";
-}
-.fa-keyboard-o:before {
- content: "\f11c";
-}
-.fa-flag-o:before {
- content: "\f11d";
-}
-.fa-flag-checkered:before {
- content: "\f11e";
-}
-.fa-terminal:before {
- content: "\f120";
-}
-.fa-code:before {
- content: "\f121";
-}
-.fa-mail-reply-all:before,
-.fa-reply-all:before {
- content: "\f122";
-}
-.fa-star-half-empty:before,
-.fa-star-half-full:before,
-.fa-star-half-o:before {
- content: "\f123";
-}
-.fa-location-arrow:before {
- content: "\f124";
-}
-.fa-crop:before {
- content: "\f125";
-}
-.fa-code-fork:before {
- content: "\f126";
-}
-.fa-unlink:before,
-.fa-chain-broken:before {
- content: "\f127";
-}
-.fa-question:before {
- content: "\f128";
-}
-.fa-info:before {
- content: "\f129";
-}
-.fa-exclamation:before {
- content: "\f12a";
-}
-.fa-superscript:before {
- content: "\f12b";
-}
-.fa-subscript:before {
- content: "\f12c";
-}
-.fa-eraser:before {
- content: "\f12d";
-}
-.fa-puzzle-piece:before {
- content: "\f12e";
-}
-.fa-microphone:before {
- content: "\f130";
-}
-.fa-microphone-slash:before {
- content: "\f131";
-}
-.fa-shield:before {
- content: "\f132";
-}
-.fa-calendar-o:before {
- content: "\f133";
-}
-.fa-fire-extinguisher:before {
- content: "\f134";
-}
-.fa-rocket:before {
- content: "\f135";
-}
-.fa-maxcdn:before {
- content: "\f136";
-}
-.fa-chevron-circle-left:before {
- content: "\f137";
-}
-.fa-chevron-circle-right:before {
- content: "\f138";
-}
-.fa-chevron-circle-up:before {
- content: "\f139";
-}
-.fa-chevron-circle-down:before {
- content: "\f13a";
-}
-.fa-html5:before {
- content: "\f13b";
-}
-.fa-css3:before {
- content: "\f13c";
-}
-.fa-anchor:before {
- content: "\f13d";
-}
-.fa-unlock-alt:before {
- content: "\f13e";
-}
-.fa-bullseye:before {
- content: "\f140";
-}
-.fa-ellipsis-h:before {
- content: "\f141";
-}
-.fa-ellipsis-v:before {
- content: "\f142";
-}
-.fa-rss-square:before {
- content: "\f143";
-}
-.fa-play-circle:before {
- content: "\f144";
-}
-.fa-ticket:before {
- content: "\f145";
-}
-.fa-minus-square:before {
- content: "\f146";
-}
-.fa-minus-square-o:before {
- content: "\f147";
-}
-.fa-level-up:before {
- content: "\f148";
-}
-.fa-level-down:before {
- content: "\f149";
-}
-.fa-check-square:before {
- content: "\f14a";
-}
-.fa-pencil-square:before {
- content: "\f14b";
-}
-.fa-external-link-square:before {
- content: "\f14c";
-}
-.fa-share-square:before {
- content: "\f14d";
-}
-.fa-compass:before {
- content: "\f14e";
-}
-.fa-toggle-down:before,
-.fa-caret-square-o-down:before {
- content: "\f150";
-}
-.fa-toggle-up:before,
-.fa-caret-square-o-up:before {
- content: "\f151";
-}
-.fa-toggle-right:before,
-.fa-caret-square-o-right:before {
- content: "\f152";
-}
-.fa-euro:before,
-.fa-eur:before {
- content: "\f153";
-}
-.fa-gbp:before {
- content: "\f154";
-}
-.fa-dollar:before,
-.fa-usd:before {
- content: "\f155";
-}
-.fa-rupee:before,
-.fa-inr:before {
- content: "\f156";
-}
-.fa-cny:before,
-.fa-rmb:before,
-.fa-yen:before,
-.fa-jpy:before {
- content: "\f157";
-}
-.fa-ruble:before,
-.fa-rouble:before,
-.fa-rub:before {
- content: "\f158";
-}
-.fa-won:before,
-.fa-krw:before {
- content: "\f159";
-}
-.fa-bitcoin:before,
-.fa-btc:before {
- content: "\f15a";
-}
-.fa-file:before {
- content: "\f15b";
-}
-.fa-file-text:before {
- content: "\f15c";
-}
-.fa-sort-alpha-asc:before {
- content: "\f15d";
-}
-.fa-sort-alpha-desc:before {
- content: "\f15e";
-}
-.fa-sort-amount-asc:before {
- content: "\f160";
-}
-.fa-sort-amount-desc:before {
- content: "\f161";
-}
-.fa-sort-numeric-asc:before {
- content: "\f162";
-}
-.fa-sort-numeric-desc:before {
- content: "\f163";
-}
-.fa-thumbs-up:before {
- content: "\f164";
-}
-.fa-thumbs-down:before {
- content: "\f165";
-}
-.fa-youtube-square:before {
- content: "\f166";
-}
-.fa-youtube:before {
- content: "\f167";
-}
-.fa-xing:before {
- content: "\f168";
-}
-.fa-xing-square:before {
- content: "\f169";
-}
-.fa-youtube-play:before {
- content: "\f16a";
-}
-.fa-dropbox:before {
- content: "\f16b";
-}
-.fa-stack-overflow:before {
- content: "\f16c";
-}
-.fa-instagram:before {
- content: "\f16d";
-}
-.fa-flickr:before {
- content: "\f16e";
-}
-.fa-adn:before {
- content: "\f170";
-}
-.fa-bitbucket:before {
- content: "\f171";
-}
-.fa-bitbucket-square:before {
- content: "\f172";
-}
-.fa-tumblr:before {
- content: "\f173";
-}
-.fa-tumblr-square:before {
- content: "\f174";
-}
-.fa-long-arrow-down:before {
- content: "\f175";
-}
-.fa-long-arrow-up:before {
- content: "\f176";
-}
-.fa-long-arrow-left:before {
- content: "\f177";
-}
-.fa-long-arrow-right:before {
- content: "\f178";
-}
-.fa-apple:before {
- content: "\f179";
-}
-.fa-windows:before {
- content: "\f17a";
-}
-.fa-android:before {
- content: "\f17b";
-}
-.fa-linux:before {
- content: "\f17c";
-}
-.fa-dribbble:before {
- content: "\f17d";
-}
-.fa-skype:before {
- content: "\f17e";
-}
-.fa-foursquare:before {
- content: "\f180";
-}
-.fa-trello:before {
- content: "\f181";
-}
-.fa-female:before {
- content: "\f182";
-}
-.fa-male:before {
- content: "\f183";
-}
-.fa-gittip:before,
-.fa-gratipay:before {
- content: "\f184";
-}
-.fa-sun-o:before {
- content: "\f185";
-}
-.fa-moon-o:before {
- content: "\f186";
-}
-.fa-archive:before {
- content: "\f187";
-}
-.fa-bug:before {
- content: "\f188";
-}
-.fa-vk:before {
- content: "\f189";
-}
-.fa-weibo:before {
- content: "\f18a";
-}
-.fa-renren:before {
- content: "\f18b";
-}
-.fa-pagelines:before {
- content: "\f18c";
-}
-.fa-stack-exchange:before {
- content: "\f18d";
-}
-.fa-arrow-circle-o-right:before {
- content: "\f18e";
-}
-.fa-arrow-circle-o-left:before {
- content: "\f190";
-}
-.fa-toggle-left:before,
-.fa-caret-square-o-left:before {
- content: "\f191";
-}
-.fa-dot-circle-o:before {
- content: "\f192";
-}
-.fa-wheelchair:before {
- content: "\f193";
-}
-.fa-vimeo-square:before {
- content: "\f194";
-}
-.fa-turkish-lira:before,
-.fa-try:before {
- content: "\f195";
-}
-.fa-plus-square-o:before {
- content: "\f196";
-}
-.fa-space-shuttle:before {
- content: "\f197";
-}
-.fa-slack:before {
- content: "\f198";
-}
-.fa-envelope-square:before {
- content: "\f199";
-}
-.fa-wordpress:before {
- content: "\f19a";
-}
-.fa-openid:before {
- content: "\f19b";
-}
-.fa-institution:before,
-.fa-bank:before,
-.fa-university:before {
- content: "\f19c";
-}
-.fa-mortar-board:before,
-.fa-graduation-cap:before {
- content: "\f19d";
-}
-.fa-yahoo:before {
- content: "\f19e";
-}
-.fa-google:before {
- content: "\f1a0";
-}
-.fa-reddit:before {
- content: "\f1a1";
-}
-.fa-reddit-square:before {
- content: "\f1a2";
-}
-.fa-stumbleupon-circle:before {
- content: "\f1a3";
-}
-.fa-stumbleupon:before {
- content: "\f1a4";
-}
-.fa-delicious:before {
- content: "\f1a5";
-}
-.fa-digg:before {
- content: "\f1a6";
-}
-.fa-pied-piper-pp:before {
- content: "\f1a7";
-}
-.fa-pied-piper-alt:before {
- content: "\f1a8";
-}
-.fa-drupal:before {
- content: "\f1a9";
-}
-.fa-joomla:before {
- content: "\f1aa";
-}
-.fa-language:before {
- content: "\f1ab";
-}
-.fa-fax:before {
- content: "\f1ac";
-}
-.fa-building:before {
- content: "\f1ad";
-}
-.fa-child:before {
- content: "\f1ae";
-}
-.fa-paw:before {
- content: "\f1b0";
-}
-.fa-spoon:before {
- content: "\f1b1";
-}
-.fa-cube:before {
- content: "\f1b2";
-}
-.fa-cubes:before {
- content: "\f1b3";
-}
-.fa-behance:before {
- content: "\f1b4";
-}
-.fa-behance-square:before {
- content: "\f1b5";
-}
-.fa-steam:before {
- content: "\f1b6";
-}
-.fa-steam-square:before {
- content: "\f1b7";
-}
-.fa-recycle:before {
- content: "\f1b8";
-}
-.fa-automobile:before,
-.fa-car:before {
- content: "\f1b9";
-}
-.fa-cab:before,
-.fa-taxi:before {
- content: "\f1ba";
-}
-.fa-tree:before {
- content: "\f1bb";
-}
-.fa-spotify:before {
- content: "\f1bc";
-}
-.fa-deviantart:before {
- content: "\f1bd";
-}
-.fa-soundcloud:before {
- content: "\f1be";
-}
-.fa-database:before {
- content: "\f1c0";
-}
-.fa-file-pdf-o:before {
- content: "\f1c1";
-}
-.fa-file-word-o:before {
- content: "\f1c2";
-}
-.fa-file-excel-o:before {
- content: "\f1c3";
-}
-.fa-file-powerpoint-o:before {
- content: "\f1c4";
-}
-.fa-file-photo-o:before,
-.fa-file-picture-o:before,
-.fa-file-image-o:before {
- content: "\f1c5";
-}
-.fa-file-zip-o:before,
-.fa-file-archive-o:before {
- content: "\f1c6";
-}
-.fa-file-sound-o:before,
-.fa-file-audio-o:before {
- content: "\f1c7";
-}
-.fa-file-movie-o:before,
-.fa-file-video-o:before {
- content: "\f1c8";
-}
-.fa-file-code-o:before {
- content: "\f1c9";
-}
-.fa-vine:before {
- content: "\f1ca";
-}
-.fa-codepen:before {
- content: "\f1cb";
-}
-.fa-jsfiddle:before {
- content: "\f1cc";
-}
-.fa-life-bouy:before,
-.fa-life-buoy:before,
-.fa-life-saver:before,
-.fa-support:before,
-.fa-life-ring:before {
- content: "\f1cd";
-}
-.fa-circle-o-notch:before {
- content: "\f1ce";
-}
-.fa-ra:before,
-.fa-resistance:before,
-.fa-rebel:before {
- content: "\f1d0";
-}
-.fa-ge:before,
-.fa-empire:before {
- content: "\f1d1";
-}
-.fa-git-square:before {
- content: "\f1d2";
-}
-.fa-git:before {
- content: "\f1d3";
-}
-.fa-y-combinator-square:before,
-.fa-yc-square:before,
-.fa-hacker-news:before {
- content: "\f1d4";
-}
-.fa-tencent-weibo:before {
- content: "\f1d5";
-}
-.fa-qq:before {
- content: "\f1d6";
-}
-.fa-wechat:before,
-.fa-weixin:before {
- content: "\f1d7";
-}
-.fa-send:before,
-.fa-paper-plane:before {
- content: "\f1d8";
-}
-.fa-send-o:before,
-.fa-paper-plane-o:before {
- content: "\f1d9";
-}
-.fa-history:before {
- content: "\f1da";
-}
-.fa-circle-thin:before {
- content: "\f1db";
-}
-.fa-header:before {
- content: "\f1dc";
-}
-.fa-paragraph:before {
- content: "\f1dd";
-}
-.fa-sliders:before {
- content: "\f1de";
-}
-.fa-share-alt:before {
- content: "\f1e0";
-}
-.fa-share-alt-square:before {
- content: "\f1e1";
-}
-.fa-bomb:before {
- content: "\f1e2";
-}
-.fa-soccer-ball-o:before,
-.fa-futbol-o:before {
- content: "\f1e3";
-}
-.fa-tty:before {
- content: "\f1e4";
-}
-.fa-binoculars:before {
- content: "\f1e5";
-}
-.fa-plug:before {
- content: "\f1e6";
-}
-.fa-slideshare:before {
- content: "\f1e7";
-}
-.fa-twitch:before {
- content: "\f1e8";
-}
-.fa-yelp:before {
- content: "\f1e9";
-}
-.fa-newspaper-o:before {
- content: "\f1ea";
-}
-.fa-wifi:before {
- content: "\f1eb";
-}
-.fa-calculator:before {
- content: "\f1ec";
-}
-.fa-paypal:before {
- content: "\f1ed";
-}
-.fa-google-wallet:before {
- content: "\f1ee";
-}
-.fa-cc-visa:before {
- content: "\f1f0";
-}
-.fa-cc-mastercard:before {
- content: "\f1f1";
-}
-.fa-cc-discover:before {
- content: "\f1f2";
-}
-.fa-cc-amex:before {
- content: "\f1f3";
-}
-.fa-cc-paypal:before {
- content: "\f1f4";
-}
-.fa-cc-stripe:before {
- content: "\f1f5";
-}
-.fa-bell-slash:before {
- content: "\f1f6";
-}
-.fa-bell-slash-o:before {
- content: "\f1f7";
-}
-.fa-trash:before {
- content: "\f1f8";
-}
-.fa-copyright:before {
- content: "\f1f9";
-}
-.fa-at:before {
- content: "\f1fa";
-}
-.fa-eyedropper:before {
- content: "\f1fb";
-}
-.fa-paint-brush:before {
- content: "\f1fc";
-}
-.fa-birthday-cake:before {
- content: "\f1fd";
-}
-.fa-area-chart:before {
- content: "\f1fe";
-}
-.fa-pie-chart:before {
- content: "\f200";
-}
-.fa-line-chart:before {
- content: "\f201";
-}
-.fa-lastfm:before {
- content: "\f202";
-}
-.fa-lastfm-square:before {
- content: "\f203";
-}
-.fa-toggle-off:before {
- content: "\f204";
-}
-.fa-toggle-on:before {
- content: "\f205";
-}
-.fa-bicycle:before {
- content: "\f206";
-}
-.fa-bus:before {
- content: "\f207";
-}
-.fa-ioxhost:before {
- content: "\f208";
-}
-.fa-angellist:before {
- content: "\f209";
-}
-.fa-cc:before {
- content: "\f20a";
-}
-.fa-shekel:before,
-.fa-sheqel:before,
-.fa-ils:before {
- content: "\f20b";
-}
-.fa-meanpath:before {
- content: "\f20c";
-}
-.fa-buysellads:before {
- content: "\f20d";
-}
-.fa-connectdevelop:before {
- content: "\f20e";
-}
-.fa-dashcube:before {
- content: "\f210";
-}
-.fa-forumbee:before {
- content: "\f211";
-}
-.fa-leanpub:before {
- content: "\f212";
-}
-.fa-sellsy:before {
- content: "\f213";
-}
-.fa-shirtsinbulk:before {
- content: "\f214";
-}
-.fa-simplybuilt:before {
- content: "\f215";
-}
-.fa-skyatlas:before {
- content: "\f216";
-}
-.fa-cart-plus:before {
- content: "\f217";
-}
-.fa-cart-arrow-down:before {
- content: "\f218";
-}
-.fa-diamond:before {
- content: "\f219";
-}
-.fa-ship:before {
- content: "\f21a";
-}
-.fa-user-secret:before {
- content: "\f21b";
-}
-.fa-motorcycle:before {
- content: "\f21c";
-}
-.fa-street-view:before {
- content: "\f21d";
-}
-.fa-heartbeat:before {
- content: "\f21e";
-}
-.fa-venus:before {
- content: "\f221";
-}
-.fa-mars:before {
- content: "\f222";
-}
-.fa-mercury:before {
- content: "\f223";
-}
-.fa-intersex:before,
-.fa-transgender:before {
- content: "\f224";
-}
-.fa-transgender-alt:before {
- content: "\f225";
-}
-.fa-venus-double:before {
- content: "\f226";
-}
-.fa-mars-double:before {
- content: "\f227";
-}
-.fa-venus-mars:before {
- content: "\f228";
-}
-.fa-mars-stroke:before {
- content: "\f229";
-}
-.fa-mars-stroke-v:before {
- content: "\f22a";
-}
-.fa-mars-stroke-h:before {
- content: "\f22b";
-}
-.fa-neuter:before {
- content: "\f22c";
-}
-.fa-genderless:before {
- content: "\f22d";
-}
-.fa-facebook-official:before {
- content: "\f230";
-}
-.fa-pinterest-p:before {
- content: "\f231";
-}
-.fa-whatsapp:before {
- content: "\f232";
-}
-.fa-server:before {
- content: "\f233";
-}
-.fa-user-plus:before {
- content: "\f234";
-}
-.fa-user-times:before {
- content: "\f235";
-}
-.fa-hotel:before,
-.fa-bed:before {
- content: "\f236";
-}
-.fa-viacoin:before {
- content: "\f237";
-}
-.fa-train:before {
- content: "\f238";
-}
-.fa-subway:before {
- content: "\f239";
-}
-.fa-medium:before {
- content: "\f23a";
-}
-.fa-yc:before,
-.fa-y-combinator:before {
- content: "\f23b";
-}
-.fa-optin-monster:before {
- content: "\f23c";
-}
-.fa-opencart:before {
- content: "\f23d";
-}
-.fa-expeditedssl:before {
- content: "\f23e";
-}
-.fa-battery-4:before,
-.fa-battery-full:before {
- content: "\f240";
-}
-.fa-battery-3:before,
-.fa-battery-three-quarters:before {
- content: "\f241";
-}
-.fa-battery-2:before,
-.fa-battery-half:before {
- content: "\f242";
-}
-.fa-battery-1:before,
-.fa-battery-quarter:before {
- content: "\f243";
-}
-.fa-battery-0:before,
-.fa-battery-empty:before {
- content: "\f244";
-}
-.fa-mouse-pointer:before {
- content: "\f245";
-}
-.fa-i-cursor:before {
- content: "\f246";
-}
-.fa-object-group:before {
- content: "\f247";
-}
-.fa-object-ungroup:before {
- content: "\f248";
-}
-.fa-sticky-note:before {
- content: "\f249";
-}
-.fa-sticky-note-o:before {
- content: "\f24a";
-}
-.fa-cc-jcb:before {
- content: "\f24b";
-}
-.fa-cc-diners-club:before {
- content: "\f24c";
-}
-.fa-clone:before {
- content: "\f24d";
-}
-.fa-balance-scale:before {
- content: "\f24e";
-}
-.fa-hourglass-o:before {
- content: "\f250";
-}
-.fa-hourglass-1:before,
-.fa-hourglass-start:before {
- content: "\f251";
-}
-.fa-hourglass-2:before,
-.fa-hourglass-half:before {
- content: "\f252";
-}
-.fa-hourglass-3:before,
-.fa-hourglass-end:before {
- content: "\f253";
-}
-.fa-hourglass:before {
- content: "\f254";
-}
-.fa-hand-grab-o:before,
-.fa-hand-rock-o:before {
- content: "\f255";
-}
-.fa-hand-stop-o:before,
-.fa-hand-paper-o:before {
- content: "\f256";
-}
-.fa-hand-scissors-o:before {
- content: "\f257";
-}
-.fa-hand-lizard-o:before {
- content: "\f258";
-}
-.fa-hand-spock-o:before {
- content: "\f259";
-}
-.fa-hand-pointer-o:before {
- content: "\f25a";
-}
-.fa-hand-peace-o:before {
- content: "\f25b";
-}
-.fa-trademark:before {
- content: "\f25c";
-}
-.fa-registered:before {
- content: "\f25d";
-}
-.fa-creative-commons:before {
- content: "\f25e";
-}
-.fa-gg:before {
- content: "\f260";
-}
-.fa-gg-circle:before {
- content: "\f261";
-}
-.fa-tripadvisor:before {
- content: "\f262";
-}
-.fa-odnoklassniki:before {
- content: "\f263";
-}
-.fa-odnoklassniki-square:before {
- content: "\f264";
-}
-.fa-get-pocket:before {
- content: "\f265";
-}
-.fa-wikipedia-w:before {
- content: "\f266";
-}
-.fa-safari:before {
- content: "\f267";
-}
-.fa-chrome:before {
- content: "\f268";
-}
-.fa-firefox:before {
- content: "\f269";
-}
-.fa-opera:before {
- content: "\f26a";
-}
-.fa-internet-explorer:before {
- content: "\f26b";
-}
-.fa-tv:before,
-.fa-television:before {
- content: "\f26c";
-}
-.fa-contao:before {
- content: "\f26d";
-}
-.fa-500px:before {
- content: "\f26e";
-}
-.fa-amazon:before {
- content: "\f270";
-}
-.fa-calendar-plus-o:before {
- content: "\f271";
-}
-.fa-calendar-minus-o:before {
- content: "\f272";
-}
-.fa-calendar-times-o:before {
- content: "\f273";
-}
-.fa-calendar-check-o:before {
- content: "\f274";
-}
-.fa-industry:before {
- content: "\f275";
-}
-.fa-map-pin:before {
- content: "\f276";
-}
-.fa-map-signs:before {
- content: "\f277";
-}
-.fa-map-o:before {
- content: "\f278";
-}
-.fa-map:before {
- content: "\f279";
-}
-.fa-commenting:before {
- content: "\f27a";
-}
-.fa-commenting-o:before {
- content: "\f27b";
-}
-.fa-houzz:before {
- content: "\f27c";
-}
-.fa-vimeo:before {
- content: "\f27d";
-}
-.fa-black-tie:before {
- content: "\f27e";
-}
-.fa-fonticons:before {
- content: "\f280";
-}
-.fa-reddit-alien:before {
- content: "\f281";
-}
-.fa-edge:before {
- content: "\f282";
-}
-.fa-credit-card-alt:before {
- content: "\f283";
-}
-.fa-codiepie:before {
- content: "\f284";
-}
-.fa-modx:before {
- content: "\f285";
-}
-.fa-fort-awesome:before {
- content: "\f286";
-}
-.fa-usb:before {
- content: "\f287";
-}
-.fa-product-hunt:before {
- content: "\f288";
-}
-.fa-mixcloud:before {
- content: "\f289";
-}
-.fa-scribd:before {
- content: "\f28a";
-}
-.fa-pause-circle:before {
- content: "\f28b";
-}
-.fa-pause-circle-o:before {
- content: "\f28c";
-}
-.fa-stop-circle:before {
- content: "\f28d";
-}
-.fa-stop-circle-o:before {
- content: "\f28e";
-}
-.fa-shopping-bag:before {
- content: "\f290";
-}
-.fa-shopping-basket:before {
- content: "\f291";
-}
-.fa-hashtag:before {
- content: "\f292";
-}
-.fa-bluetooth:before {
- content: "\f293";
-}
-.fa-bluetooth-b:before {
- content: "\f294";
-}
-.fa-percent:before {
- content: "\f295";
-}
-.fa-gitlab:before {
- content: "\f296";
-}
-.fa-wpbeginner:before {
- content: "\f297";
-}
-.fa-wpforms:before {
- content: "\f298";
-}
-.fa-envira:before {
- content: "\f299";
-}
-.fa-universal-access:before {
- content: "\f29a";
-}
-.fa-wheelchair-alt:before {
- content: "\f29b";
-}
-.fa-question-circle-o:before {
- content: "\f29c";
-}
-.fa-blind:before {
- content: "\f29d";
-}
-.fa-audio-description:before {
- content: "\f29e";
-}
-.fa-volume-control-phone:before {
- content: "\f2a0";
-}
-.fa-braille:before {
- content: "\f2a1";
-}
-.fa-assistive-listening-systems:before {
- content: "\f2a2";
-}
-.fa-asl-interpreting:before,
-.fa-american-sign-language-interpreting:before {
- content: "\f2a3";
-}
-.fa-deafness:before,
-.fa-hard-of-hearing:before,
-.fa-deaf:before {
- content: "\f2a4";
-}
-.fa-glide:before {
- content: "\f2a5";
-}
-.fa-glide-g:before {
- content: "\f2a6";
-}
-.fa-signing:before,
-.fa-sign-language:before {
- content: "\f2a7";
-}
-.fa-low-vision:before {
- content: "\f2a8";
-}
-.fa-viadeo:before {
- content: "\f2a9";
-}
-.fa-viadeo-square:before {
- content: "\f2aa";
-}
-.fa-snapchat:before {
- content: "\f2ab";
-}
-.fa-snapchat-ghost:before {
- content: "\f2ac";
-}
-.fa-snapchat-square:before {
- content: "\f2ad";
-}
-.fa-pied-piper:before {
- content: "\f2ae";
-}
-.fa-first-order:before {
- content: "\f2b0";
-}
-.fa-yoast:before {
- content: "\f2b1";
-}
-.fa-themeisle:before {
- content: "\f2b2";
-}
-.fa-google-plus-circle:before,
-.fa-google-plus-official:before {
- content: "\f2b3";
-}
-.fa-fa:before,
-.fa-font-awesome:before {
- content: "\f2b4";
-}
-.sr-only {
- position: absolute;
- width: 1px;
- height: 1px;
- padding: 0;
- margin: -1px;
- overflow: hidden;
- clip: rect(0, 0, 0, 0);
- border: 0;
-}
-.sr-only-focusable:active,
-.sr-only-focusable:focus {
- position: static;
- width: auto;
- height: auto;
- margin: 0;
- overflow: visible;
- clip: auto;
-}
diff --git a/assets/css/slides.css b/assets/css/slides.css
index 70b373eeafad23..1a24449f7a01e5 100644
--- a/assets/css/slides.css
+++ b/assets/css/slides.css
@@ -296,6 +296,14 @@ th {
font-size: 30px;
}
+.enlarge200 {
+ font-size: 200%;
+}
+
+.enlarge200 .remark-code {
+ font-size: 36px;
+}
+
.strike {
text-decoration: line-through;
}
diff --git a/bin/validate-workflow-tags.sh b/bin/validate-workflow-tags.sh
index 961bad154f848f..8eea5024c15ed3 100755
--- a/bin/validate-workflow-tags.sh
+++ b/bin/validate-workflow-tags.sh
@@ -7,33 +7,36 @@ function tester {
import sys
import json
+problems = 0
+output = ["-----------------------ERROR-----------------------------"]
with open("$1") as json_file:
data = json.load(json_file)
- # Checking for 'tags' in workflow
- if 'tags' not in data or "$2" not in data['tags']:
- sys.stderr.write("-------------------------------------------\n")
- sys.stderr.write(
- "Workflow {} has no corresponding 'tags' attribute. Please add:\n".format(data['name']))
- sys.stderr.write('"tags": [' + "\n\t" + '"' + "$2" + '"' + "\n]\n")
- sys.exit(False)
+ # Checking for 'tags' in workflow if topic is known
+ if 'tags' not in data or not data['tags'] or "$2" not in data['tags']:
+ problems += 1
+ output.append(
+ "{}. The 'tags' attribute is missing. Please add:".format(str(problems), data['name']))
+ output.append('"tags": [' + "\n\t" + '"' + "$2" + '"' + "\n]")
# Checking for 'annotation' in workflow
- elif 'annotation' not in data or not data['annotation']:
- sys.stderr.write("-------------------------------------------\n")
- sys.stderr.write(
- "Workflow {} has no corresponding 'annotation' attribute. Please add: \n".format(data['name']))
- sys.stderr.write('"annotation": "
"' + "\n")
- sys.exit(False)
+ if 'annotation' not in data or not data['annotation']:
+ problems += 1
+ output.append(
+ "{}. The 'annotation' attribute is missing. Please add:".format(str(problems)))
+ output.append('"annotation": ""')
# Checking if there are tools used from the testtoolshed
- else:
- for stepnr, step in data['steps'].items():
- if step['tool_id'] and step['type'] == 'tool' and 'testtoolshed.g2.bx.psu.edu' in step['tool_id']:
- sys.stderr.write("-------------------------------------------\n")
- sys.stderr.write("Workflow {} has a tool from the testtoolshed in step {}.\n".format(
- data['name'], str(stepnr)))
- sys.exit(False)
- sys.exit(True)
+ for stepnr, step in data['steps'].items():
+ if step['tool_id'] and step['type'] == 'tool' and 'testtoolshed.g2.bx.psu.edu' in step['tool_id']:
+ problems += 1
+ output.append("{}. Step {} has a tool from the testtoolshed.".format(str(problems), str(stepnr)))
+
+ if problems:
+ output.insert(1, "Workflow '{}' has {} problem(s) because:".format(data['name'], str(problems)))
+ output.append("---------------------------------------------------------\n")
+ sys.stderr.write("\n".join(output))
+ sys.exit(False)
+ sys.exit(True)
END
}
@@ -50,7 +53,6 @@ do
if tester $w $topic;
then
- echo "-------------Invalid workflow--------------"
exit_with=1
fi
done
@@ -58,4 +60,4 @@ do
done
done
-exit $exit_with
+exit $exit_with
\ No newline at end of file
diff --git a/snippets/admin-testing.md b/snippets/admin-testing.md
new file mode 100644
index 00000000000000..0aee04a2a7ceb9
--- /dev/null
+++ b/snippets/admin-testing.md
@@ -0,0 +1,9 @@
+> ### {% icon tip %} Operating System Compatability
+>
+> These Ansible roles and training materials were last tested on Centos 7 and Ubuntu 18.04, but will probably work on other RHEL and Debian variants.
+>
+> The roles that are used in these training are currently used by `usegalaxy.*`, and other, servers in maintaining their infrastructure. ([US](https://github.com/galaxyproject/infrastructure-playbook/), [EU](https://github.com/usegalaxy-eu/infrastructure-playbook), both are running CentOS 7)
+>
+> If you have an issue running these trainings on your OS flavour, please report [the issue](https://github.com/galaxyproject/training-material/issues/new) in the training material and we can see if it is possible to solve.
+>
+{: .tip}
\ No newline at end of file
diff --git a/topics/admin/tutorials/ansible-galaxy/tutorial.md b/topics/admin/tutorials/ansible-galaxy/tutorial.md
index b3e2d4d4da9bc8..0b51351bf05536 100644
--- a/topics/admin/tutorials/ansible-galaxy/tutorial.md
+++ b/topics/admin/tutorials/ansible-galaxy/tutorial.md
@@ -47,6 +47,7 @@ We want to give you a comprehensive understanding of how the Galaxy installation
>
{: .agenda}
+{% include snippets/admin-testing.md %}
# Playbook Overview
diff --git a/topics/admin/tutorials/ansible/tutorial.md b/topics/admin/tutorials/ansible/tutorial.md
index 5c7df1d3a109b5..a96a8c077c9341 100644
--- a/topics/admin/tutorials/ansible/tutorial.md
+++ b/topics/admin/tutorials/ansible/tutorial.md
@@ -37,6 +37,7 @@ This will be a very practical training with emphasis on looking at examples from
>
{: .agenda}
+{% include snippets/admin-testing.md %}
# What is Ansible?
diff --git a/topics/assembly/tutorials/assembly-with-preprocessing/tutorial.md b/topics/assembly/tutorials/assembly-with-preprocessing/tutorial.md
new file mode 100644
index 00000000000000..7fe7aef5c37e83
--- /dev/null
+++ b/topics/assembly/tutorials/assembly-with-preprocessing/tutorial.md
@@ -0,0 +1,922 @@
+---
+layout: tutorial_hands_on
+
+title: "Unicycler assembly of SARS-CoV-2 genome with preprocessing to remove human genome reads"
+zenodo_link: "https://doi.org/10.5281/zenodo.3732358"
+questions:
+ - How can a genome of interest be assembled against a background of contaminating reads from other genomes?
+ - How can sequencing data from public sources be turned into assembly-ready polished datasets?
+objectives:
+ - Obtain viral (SARS-CoV-2) sequencing data with contaminating human reads from public sources
+ - Organize the data into collections and check its quality
+ - Detect and remove human reads
+ - Assemble retained reads and explore the results
+time_estimation: "4h" # plus additional time for (optional) NCBI SRA downloads
+level: Intermediate
+key_points:
+ - Certain types of NGS samples can be heavily contaminated with sequences from other genomes
+ - Reads from known/expected contaminating sources can be identified by mapping to the respective genomes
+ - After mapping, use filtering tools to remove identified contaminating reads, and use conversion tools to convert remaining mapped reads back into raw sequenced reads expected by most downstream tools
+requirements:
+ -
+ type: "internal"
+ topic_name: assembly
+ tutorials:
+ - unicycler-assembly
+ -
+ type: "internal"
+ topic_name: sequence-analysis
+ tutorials:
+ - mapping
+ -
+ type: "internal"
+ topic_name: galaxy-data-manipulation
+ tutorials:
+ - collections
+tags:
+ - covid19
+contributors:
+ - wm75
+
+---
+
+# Introduction
+{:.no_toc}
+
+In some research or clinical contexts it is not possible, or very hard, to
+purify DNA/RNA for sequencing from just the specimen of interest.
+Instead you will isolate DNA that is contaminated, sometimes heavily, with
+DNA/RNA of a different origin.
+This is the case for example with microbiome samples, which typically display
+considerable contamination with host DNA, or with samples of body fluids for
+pathogen detection. Such contamination can pose an issue with certain types of
+analyses, in particular with genome assembly.
+
+This tutorial guides you through the preprocessing of sequencing data of
+bronchoalveolar lavage fluid (BALF) samples obtained from early COVID-19
+patients in China. Since such samples are expected to be contaminated
+signficantly with human sequenced reads, the goal is to enrich the data for
+SARS-CoV-2 reads by identifying and discarding reads of human origin before
+trying to assemble the viral genome sequence.
+
+> ### {% icon comment %} The usegalaxy.* COVID-19 analysis project
+> This tutorial uses the same data as, and recapitulates to a large extent, the
+> [Pre-processing](https://covid19.galaxyproject.org/genomics/1-PreProcessing/)
+> and [Assembly](https://covid19.galaxyproject.org/genomics/2-Assembly/) steps
+> of the [Genomics](https://covid19.galaxyproject.org/genomics/) section of
+> [covid19.galaxyproject.org](https://covid19.galaxyproject.org/).
+>
+{: .comment}
+
+> ### Agenda
+>
+> In this tutorial, we will deal with:
+>
+> 1. TOC
+> {:toc}
+>
+{: .agenda}
+
+# Get data
+
+We are going to create an assembly of the SARS-CoV-2 genome based on mixed
+short-reads (Illumina) and long-reads (Nanopore) data from a total of six
+different samples, all of which are publicly accessible through the NCBI
+and EBI short-reads archives (SRAs).
+
+Since automated data downloads from SRAs can be unreliable at times and could
+get broken by changes to the download interface on the NCBI/EBI side, this
+tutorial offers two ways to access the sequenced reads input data:
+
+1. Direct download from the NCBI SRA based on accession numbers and using the
+ dedicated **Faster Download** tool
+
+ Use this method if it works and is fast enough for you, and if you are
+ interested in learning to obtain short-reads data directly from NCBI, in
+ general.
+
+2. Download of the same data deposited as a copy at [Zenodo](https://zenodo.org/record/3732359)
+ This method uses Galaxy's generic data import functionality, and should be
+ very reliable and faster than the download from NCBI.
+ It also showcases **rule-based** uploads and demonstrates how they can be
+ used to download several datasets and to arrange them into easy to handle
+ data structures at the same time.
+
+ > ### {% icon details %} Rule-based uploads
+ > In this tutorial you will only use the features of Galaxy's rule-based
+ > uploader that are required to get the input data ready for our analysis,
+ > and we will not explain those features in much detail.
+ >
+ > If, after this first taste, you are interested in a thorough introduction
+ > we recommend the advanced tutorial
+ > [Collections: Rule Based Uploader](../../../galaxy-data-manipulation/tutorials/upload-rules/tutorial.html).
+ {: .details}
+
+ Use this method if the direct download from the NCBI SRA does not work, or
+ is too slow for your time frame, or if you are interested in advanced use
+ of Galaxy's data import functionality.
+
+The corresponding two step-by-step instructions below have been crafted to
+produce identically arranged data structures in your history so all subsequent
+steps are independent of the data source you choose.
+
+## Get data from NCBI SRA
+
+> ### {% icon hands_on %} Hands-on: Data upload to Galaxy from NCBI SRA
+>
+> 1. Create a new history for this tutorial and give it a proper name
+>
+> {% include snippets/create_new_history.md %}
+> {% include snippets/rename_history.md %}
+>
+> 2. Create a new dataset listing the SRA accession numbers of the Illumina paired-end input data for this tutorial:
+>
+> ```
+> SRR10903401
+> SRR10903402
+> SRR10971381
+> ```
+>
+> call it, *e.g.*, `Illumina accessions` and set its datatype to `tabular`.
+>
+> {% include snippets/create_new_file.md format="tabular" %}
+>
+> 3. Create another new dataset listing the SRA accession numbers of the Nanopore input data for this tutorial:
+>
+> ```
+> SRR10948550
+> SRR10948474
+> SRR10902284
+> ```
+>
+> call it, *e.g.*, `Nanopore accessions` and set its datatype to `tabular`.
+>
+> 4. Add `#illumina`/`#nanopore` tags to the datasets
+>
+> > ### {% icon comment %} Name tags in the analysis
+> > We are going to treat the Illumina- and the Nanopore-sequenced data
+> > separately in this tutorial up to the actual genome assembly step.
+> >
+> > To make it easier to keep track of the two branches of the analysis, we
+> > recommend the use of Galaxy's dataset **name tags**.
+> > A name tag will automatically propagate to any new dataset derived
+> > from the tagged dataset.
+> {: .comment}
+>
+> You can create a name tag by attaching a tag starting with `#` to any
+> dataset.
+>
+> Name tags are meant to help you identify the origin of datasets quickly.
+> Feel free to either use the suggested names above or choose ones you like.
+>
+> {% include snippets/add_tag.md %}
+>
+> 4. Retrieve the Illumina reads data from NCBI:
+>
+> Run **Faster Download and Extract Reads in FASTQ** {% icon tool %} with
+> the following parameter settings:
+> - *"select input type"*: `List of SRA accession, one per line`
+> - {% icon param-file %} *"sra accession list"*:
+> the `Illumina accessions` dataset created above
+> - in *"Advanced Options"*
+> - *"Select how to split the spots"*: `--split-3`
+>
+> The tool run should generate four new items in your history - three
+> collections and one *log* dataset with a summary of what happened.
+>
+> Since all three datasets that we tried to retrieve contain only
+> paired-end reads, only the `Pair-end data` collection is expected to
+> contain downloaded data. Click on the other two collections to verify that
+> they are empty, then delete them from your history (since the collections
+> do not contain any datasets, it does not matter, which delete option -
+> "Collection Only", "Delete Datasets" or "Permanently Delete Datasets" you
+> are choosing when prompted).
+>
+> 5. Retrieve the Nanopore reads data from NCBI:
+>
+> Run **Faster Download and Extract Reads in FASTQ** {% icon tool %} with
+> the following parameter settings:
+> - *"select input type"*: `List of SRA accession, one per line`
+> - {% icon param-file %} *"sra accession list"*:
+> the `Nanopore accessions` dataset created above
+> - in *"Advanced Options"*
+> - *"Select how to split the spots"*: `--split-3`
+>
+> As in the previous step, the tool run should generate four new items in
+> your history.
+>
+> Since all three datasets that we tried to retrieve in this run contain
+> only single-end reads, only the `Single-end data` collection is expected to
+> contain downloaded data this time. Click on the other two collections to
+> verify that they are empty, then delete them from your history.
+>
+{: .hands_on}
+
+
+## Get data from Zenodo
+
+> ### {% icon hands_on %} Hands-on: Data upload to Galaxy from Zenodo
+>
+> 1. Create a new history for this tutorial and give it a proper name
+>
+> {% include snippets/create_new_history.md %}
+> {% include snippets/rename_history.md %}
+>
+> 2. Import Illumina-sequenced reads data from [Zenodo](https://zenodo.org/record/3732359)
+>
+> The Zenodo links for the data are these:
+> ```
+> https://zenodo.org/record/3732359/files/SRR10903401_r1.fq.gz
+> https://zenodo.org/record/3732359/files/SRR10903401_r2.fq.gz
+> https://zenodo.org/record/3732359/files/SRR10903402_r1.fq.gz
+> https://zenodo.org/record/3732359/files/SRR10903402_r2.fq.gz
+> https://zenodo.org/record/3732359/files/SRR10971381_r1.fq.gz
+> https://zenodo.org/record/3732359/files/SRR10971381_r2.fq.gz
+> ```
+>
+> To upload these data to your Galaxy history in structured form:
+>
+> - Copy the above list of links
+> - Open the Galaxy Upload Manager
+> ({% icon galaxy-upload %} on the top right of the tool panel)
+> - In the pop-up window, switch to the **Rule-based** tab and select
+> - *"Upload data as:"*: `Collection(s)`
+> - *"Load tabular data from:"*: `Pasted Table`
+> - Paste the copied links into the text field
+> - Click **Build**
+> - In the next screen, select
+> - {% icon param-repeat %} *"Column"*: `Using a Regular expression`
+> - *"From Column"*: `A`
+> - {% icon param-check %} *"Create columns matching expression groups."*
+> - *"Regular Expression"*: `.+/(SRR\d+)_r(\d).fq.gz`
+> - *"Number of Groups"*: `2`
+> - Click **Apply**
+> - {% icon param-repeat %} *"Rules"*: `Add / Modify Column Definitions`
+> - {% icon param-repeat %} *"Add Definition"*: `URL`
+> - *"URL"*: `A`
+> - {% icon param-repeat %} *"Add Definition"*: `List Identifier(s)`
+> - *"List Identifier(s)"*: `B`
+> - {% icon param-repeat %} *"Add Definition"*: `Paired-end Indicator`
+> - *"Paired-end Indicator"*: `C`
+> - Click **Apply**
+> - *"Type"*: `fastqsanger.gz`
+> - *"Name"*: `Illumnia PE data` (or similar)
+> - *"Add nametag for name:"* {% icon param-check %}
+>
+> > ### {% icon comment %} Name tags in the analysis
+> > We are going to treat the Illumina- and the Nanopore-sequenced data
+> > separately in this tutorial up to the actual genome assembly step.
+> >
+> > To make it easier to keep track of the two branches of the analysis, we
+> > recommend the use of Galaxy's dataset **name tags**.
+> > A name tag will automatically propagate to any new dataset derived
+> > from the tagged dataset.
+> {: .comment}
+>
+> Checking this option tells Galaxy to reuse the collection name above
+> as a name tag on the collection.
+>
+> - Click **Upload**
+>
+> 3. Import Nanopore-sequenced reads data from [Zenodo](https://zenodo.org/record/3732359)
+>
+> ```
+> https://zenodo.org/record/3732359/files/SRR10902284_ONT.fq.gz
+> https://zenodo.org/record/3732359/files/SRR10948474_ONT.fq.gz
+> https://zenodo.org/record/3732359/files/SRR10948550_ONT.fq.gz
+> ```
+>
+> Again, we want to upload this data to our history in structured form.
+> To do so:
+>
+> - Copy the above list of links
+> - Open the Galaxy Upload Manager
+> ({% icon galaxy-upload %} on the top right of the tool panel)
+> - In the pop-up window, switch to the **Rule-based** tab and select
+> - *"Upload data as:"*: `Collection(s)`
+> - *"Load tabular data from:"*: `Pasted Table`
+> - Paste the copied links into the text field
+> - Click **Build**
+> - In the next screen, select
+> - {% icon param-repeat %} *"Column"*: `Using a Regular expression`
+> - *"From Column"*: `A`
+> - {% icon param-check %} *"Create columns matching expression groups."*
+> - *"Regular Expression"*: `.+/(SRR\d+)_ONT.fq.gz`
+> - *"Number of Groups"*: `1`
+> - Click **Apply**
+> - {% icon param-repeat %} *"Rules"*: `Add / Modify Column Definitions`
+> - {% icon param-repeat %} *"Add Definition"*: `URL`
+> - *"URL"*: `A`
+> - {% icon param-repeat %} *"Add Definition"*: `List Identifier(s)`
+> - *"List Identifier(s)"*: `B`
+> - Click **Apply**
+> - *"Type"*: `fastqsanger.gz`
+> - *"Name"*: `Nanopore data` (or similar)
+> - *"Add nametag for name:"* {% icon param-check %}
+> - Click **Upload**
+>
+{: .hands_on}
+
+
+# Read trimming and quality control
+
+In the end, we do not want to base our mapping on low-quality reads that may
+cause misassembly of fragments or introduce sequencing artefacts into the final
+assembled sequence. Since quality control, read filtering and read trimming are
+all quite fast and computationally cheap operations compared to the read
+mapping that we will use to identify and eliminate human reads, it is best to
+perform these steps up front.
+
+Due to their very different nature, however, Illumina- and Nanopore-sequenced
+reads should be treated rather differently.
+
+## Trimming and filtering of Illumina reads
+
+Galaxy offers a panel of different NGS reads trimming/filtering tools. Here,
+we use **fastp** {% icon tool %}, which is straightforward to configure,
+and when combined with **MultiQC** {% icon tool %}, enables nice and
+easy-to-interpret visualizations of the effects of preprocessing, in particular
+for multiple samples.
+
+In the following, we configure the tool to retain reads only if at most 20% of
+their bases have a Phred-scaled quality >= 20 and if there length in bases after
+trimming of adapter sequences (which the tool auto-detects for us) is at least
+50.
+
+The JSON-formatted report produced by the tool, can serve as input to
+**MultiQC** {% icon tool %} for a direct visual comparison of key quality
+metrics for all samples before and after preprocessing.
+
+> ### {% icon hands_on %} Hands-on: Reads preprocessing and quality reporting
+> 1. **fastp** {% icon tool %} with the following parameters
+> - *"Single-end or paired reads"*: `Paired Collection`
+> - *"Select paired collection(s)"*: the collection of Illumina-sequenced
+> reads as produced in the *Get Data* section
+> - in *"Filter Options"*
+> - in *"Quality filtering options"*
+> - *"Disable quality filtering"*: `No`
+> - *"Qualified quality phred"*: `20`
+> - *"Unqualified percent limit"*: `20`
+> - in *"Length filtering options"*
+> - *"Disable length filtering"*: `No`
+> - *"Length required"*: `50`
+> - in *"Output Options"*
+> - *"Output HTML report"*: `No`
+> - *"Output JSON report"*: `Yes`
+>
+> The tool run produces two collections - one with the actual preprocessed
+> reads of all input samples, another one with a JSON-formatted report of
+> the processing for every sample.
+>
+> 2. **MultiQC** {% icon tool %} with the following parameters
+> - {% icon param-repeat %} *"Results"*
+> - *"Which tool was used generate logs?"*: `fastp`
+> - {% icon param-collection %} *"Output of fastp"*: the collection of
+> JSON-formatted reports, second collection produced by **fastp**
+> {% icon tool %}
+>
+> This tool run generates a single output with the combined quality reports
+> for all samples before and after processing with **fastp** {% icon tool %}.
+>
+{: .hands_on}
+
+## Quality control of Nanopore reads
+
+Nanopore-sequenced reads differ greatly in length from one another and are, on
+average, of relatively low quality (in particular when compared to
+Illumina-sequenced reads). These properties make it challenging to preprocess
+them with standard tools. Quality assessment tools calibrated to work well with
+Illumina-sequenced reads are less useful for Nanopore-sequenced reads, too, for
+the same reasons. Here we restrict ourselves to a simple quality check with
+**NanoPlot** {% icon tool %}, a dedicated QC tool for Nanopore-sequenced reads.
+
+> ### {% icon hands_on %} Hands-on: Checking the quality of Nanopore reads with NanoPlot
+> 1. **NanoPlot** {% icon tool %} with the following parameters
+> - *"Select multifile mode"*: `batch`
+> - *"Type of the file(s) to work on"*: `fastq`
+> - {% icon param-collection %} *"files"*: the collection of
+> Nanopore-sequenced reads as produced in the *Get Data* section
+> - in *"Options for filtering or transforming input prior to plotting"*
+> - *"Logarithmic scaling of lengths in plots."*: `Yes`
+>
+> > ### {% icon question %} Questions
+> >
+> > 1. Looking at the three generated quality reports, which of the three
+> > samples seems to be of better quality overall than the other two,
+> > and what are some criteria that support this conclusion?
+> >
+> > > ### {% icon solution %} Solution
+> > > 1. Sample `SRR10948474` has the best overall quality.
+> > >
+> > > It has both higher average read length and quality than the other
+> > > two (see the *Summary statistics* table), has a distribution of
+> > > read qualities that peaks around intermediate quality scores (not
+> > > low ones as for the other samples), and contains some extra-long
+> > > (though at least partly rather low-quality) reads. Those last two
+> > > points become most obvious when looking at the *Read lengths vs
+> > > Average read quality* plots.
+> > {: .solution }
+> {: .question}
+>
+{: .hands_on}
+
+# Subtraction of reads mapping to the human reference genome
+
+## Mapping of Illumina reads
+
+In this tutorial, we are using **Bowtie2** for mapping our short-reads data to
+the human genome. *BWA-MEM* would be a good alternative for mapping the 150
+nucleotides (see the QC report above) reads from our samples.
+
+According to its authors, the *Minimap2* aligner, which we will be using for
+mapping the Nanopore-sequenced data in the next step, is supposed to outcompete
+*Bowtie2* and *BWA-MEM* in terms of speed even for Illumina-sequenced reads
+of length > 100 nts, but we opt for the conservative approach of using a
+widely-used, well-tested tool here.
+
+> ### {% icon hands_on %} Hands-on: Mapping with Bowtie2
+> 1. **Bowtie2** {% icon tool %} with the following parameters
+> - *"Is this single or paired library"*: `Paired-end Dataset Collection`
+> - *"FASTQ Paired Dataset"*: the collection of preprocessed
+> Illumina-sequenced reads, output of **fastp** {% icon tool %}
+> - *"Write unaligned reads (in fastq format) to separate file(s)"*: `No`
+>
+> Activating this option may seem attractive since the unaligned reads
+> are what we are interested in, but filtering for those reads with
+> a dedicated tool in a separate step allows us to filter on the
+> properties of the read pairs instead of those of individual reads.
+>
+> - *"Write aligned reads (in fastq format) to separate file(s)"*: `No`
+> - *"Do you want to set paired-end options?"*: `No`
+>
+> - *"Will you select a reference genome from your history or use a built-in index?"*:
+ `Use a built-in genome index`
+> - *"Select reference genome"*: `Human (Homo sapiens): hg38 Full`
+> - *"Set read groups information?"*: `Do not set`
+> - *"Select analysis mode"*: `Default setting only`
+> - *"Do you want to tweak SAM/BAM Options?"*: `No`
+> - *"Save the bowtie2 mapping statistics to the history"*: `Yes`
+>
+> The tool run should produce two collections of output datasets. One with
+> the actual mapped reads of the three samples and one with the corresponding
+> mapping statistics for each sample, which we want to have a brief look at
+> next.
+>
+> 2. Inspect the `mapping stats` of each sample by clicking on the corresponding collection, then on the {% icon galaxy-eye %} (eye) icon of each individual sample data
+>
+> > ### {% icon question %} Questions
+> >
+> > 1. What percentage of reads of each sample has been aligned to the `hg38` reference genome?
+> > 2. Which sample is the least contaminated with human reads?
+> > 3. Which sample contains the highest amount of SARS-CoV2 reads?
+> >
+> > > ### {% icon solution %} Solution
+> > > 1. The samples have between 13% and 21% of reads aligned to `hg38`.
+> > > The information can be found on the last line of output for each
+> > > sample.
+> > > 2. Sample `SRR10971381` is the least contaminated with just above 13%
+> > > of human reads
+> > > 3. You cannot answer this question with this data. While `SRR10971381`
+> > > shows the lowest relative contamination with human reads, that
+> > > does not mean that all other reads are from SARS-CoV-2. They could
+> > > come from other species (*e.g.*, bacteria or other viruses)
+> > > contained in this BALF sample.
+> > {: .solution }
+> {: .question}
+>
+{: .hands_on}
+
+## Mapping of Nanopore reads
+
+For the mapping of the Nanopore-sequenced data we are using the **Minimap2**
+aligner, which is particularly efficient for mapping long reads.
+
+> ### {% icon hands_on %} Hands-on: Nanopore reads mapping
+>
+> 1. **Map with minimap2** {% icon tool %} with the following parameters
+> - *"Will you select a reference genome from your history or use a built-in index?"*:
+> `Use a built-in genome index`
+> - *"Using reference genome"*: `Human Dec. 2013 (GRCh38/hg38) (hg38)`
+>
+> - *"Single or Paired-end reads"*: `Single`
+> - {% icon param-collection %} *"Select fastq dataset"*: the collection
+> of Nanopore-sequenced reads as set up in the *Get Data* section
+>
+> - *"Select analysis mode (sets default)"*: `Oxford Nanopore read to reference mapping. ...`
+>
+> This tool run produces one collection with the actual mapped reads for
+> each Nanopore-sequenced sample. Unlike **Bowtie2** it does not have an
+> option to output mapping statistics directly. However, we can generate
+> that information through an extra step.
+>
+> 2. **Samtools stats** {% icon tool %} with the following parameters
+> - {% icon param-collection %} *"BAM file"*: the collection of mapped
+> Nanopore-sequenced reads, output of **Map with minimap2 {% icon tool %} (step 1)
+> - *"Output"*: `Separate datasets for each statistic`
+> - *"Desired output files"*
+> - {% icon param-check %} *"Summary numbers"*
+>
+> These simple summary stats correspond approximately to the
+> statistics generated by **Bowtie2** and are enough for our purpose.
+>
+> > ### {% icon comment %} Mapping stats for Nanopore-sequenced long reads
+> >
+> > Since, unlike Illumina-generated reads, Nanopore-sequenced reads can
+> > have very different lengths, it makes limited sense to calculate a
+> > ratio of mapped to overall reads for them.
+> >
+> > Instead, `bases mapped` / `total length` should give a more reliable
+> > estimate of which fraction of the data is due to human genome sequence.
+> >
+> > Try to calculate this ratio for the three samples on your own!
+> {: .comment}
+>
+{: .hands_on}
+
+## Human reads subtraction
+
+At this point you should have two collections of mapped reads - one holding the
+mapping results obtained with **Bowtie2** of the three Illumina-sequenced
+samples, the other one holding the **minimap2** output for the three
+Nanopore-sequenced samples.
+
+Next, we are going to filter the data from both collections to retain only
+those reads that were *not* mapped to the human genome, *i.e* those of
+potential viral origin.
+
+> ### {% icon hands_on %} Hands-on: Mapped reads filtering
+>
+> 1. **Samtools view** {% icon tool %} to filter the Illumina-sequenced reads mapped with Bowtie2:
+> - {% icon param-collection %} *"SAM/BAM/CRAM data set"*: the collection of
+> mapped Illumina-sequenced reads, output of **Bowtie2** {% icon tool %}
+> - *"What would you like to look at?"*: `A filtered/subsampled selection of reads`
+> - in *"Configure filters"*
+> - *"Require that these flags are set"*: `Read is unmapped` and
+> `Mate is unmapped`
+>
+> - *"What would you like to have reported?"*: `All reads retained after filtering and subsampling`
+> - *"Produce extra dataset with dropped reads?"*: `No`
+> - *"Output format"*: `BAM (-b)`
+>
+> 2. **Samtools view** {% icon tool %} to filter the Nanopore-sequenced reads mapped with minimap2:
+> - {% icon param-collection %} *"SAM/BAM/CRAM data set"*: the collection of
+> mapped Nanopore-sequenced reads, output of **minimap2** {% icon tool %}
+> - *"What would you like to look at?"*: `A filtered/subsampled selection of reads`
+> - In *"Configure filters"*
+> - *"Require that these flags are set"*: `Read is unmapped`
+>
+> - *"What would you like to have reported?"*: `All reads retained after filtering and subsampling`
+> - *"Produce extra dataset with dropped reads?"*: `No`
+> - *"Output format"*: `BAM (-b)`
+>
+> 3. (Optional) Remove the database `hg38` attribute from the output files
+>
+> > ### {% icon details %} Why do this?
+> > When we ran the **Bowtie2** {% icon tool %} and **minimap2**
+> > {% icon tool %} mappers before, these tools set the *database* attribute
+> > on their outputs to `hg38` to indicate that the mapped reads in these
+> > outputs have been mapped against this version of the human reference
+> > genome - an important piece of information for further work with those
+> > mapped reads.
+> >
+> > In step 1 and 2 above, however, we have eliminated all mapped reads so
+> > the `database: hg38` info is misleading from this point onward in the
+> > analysis. While not directly harmful, it is best practice to remove this
+> > metadata now.
+> {: .details}
+>
+> For the outputs of step 1 and step 2 above, reset the database/build
+> (dbkey) to `unspecified (?)`.
+>
+> {% include snippets/change_dbkey.md dbkey="unspecified (?)" %}
+>
+{: .hands_on}
+
+
+# Format conversion of remaining reads
+
+## Conversion to fastq format
+
+Assembly tools, typically, expect their input data to be fastq-formatted, but
+what we have after mapping and filtering is data in BAM format. Hence, we need
+to convert the retained Illumina- and Nanopore-sequenced reads back into their
+original format before proceeding to assembly.
+
+> ### {% icon hands_on %} Hands-on: BAM to fastq format conversion
+>
+> 1. **Samtools fastx** {% icon tool %} to convert the filtered Illumina-sequenced reads to fastq format
+> - {% icon param-collection %} *"BAM or SAM file to convert"*: the collection of
+> filtered Illumina-sequenced reads, output of first **Samtools view** {% icon tool %} run
+> - *"Output format"*: `compressed FASTQ`
+> - *"outputs"*: `READ1` and `READ2`
+>
+> 2. **Samtools fastx** {% icon tool %} to convert the filtered Nanopore-sequenced reads to fastq format
+> - {% icon param-collection %} *"BAM or SAM file to convert"*: the collection of
+> filtered Nanopore-sequenced reads, output of second **Samtools view** {% icon tool %} run
+> - *"Output format"*: `compressed FASTQ`
+> - *"outputs"*: `unspecific`
+>
+{: .hands_on}
+
+## Optional: Rearrange the filtered data into the original nested data structure
+
+If you compare the outputs of the last step to the input data we started out
+with, you will notice that the Illumina-sequenced data is arranged differently
+now than initially. It is arranged now into separate collections of forward and
+reverse reads, whereas we started with a single nested collection of the data.
+
+We can easily cast the data back into its original structure with one of
+Galaxy's collection manipulation tools, but note that we will not use the
+resulting nested collection for this tutorial because the **Unicycler** tool
+for assembling the reads would not be able to handle the nested data correctly.
+
+Thus, the following just serves as an illustration and is entirely optional.
+
+> ### {% icon hands_on %} Hands-on: Arrange two list collections into a list of pairs
+>
+> 1. **Zip Collection** {% icon tool %} with the following parameters
+> - {% icon param-collection %} *"Input Dataset (Forward)"*: the collection
+> of filtered Illumina-sequenced forward reads in fastq format,
+> first output of first **Samtools fastx** {% icon tool %} run
+> - {% icon param-collection %} *"Input Dataset (Reverse)"*: the collection
+> of filtered Illumina-sequenced reverse reads in fastq format,
+> second output of first **Samtools fastx** {% icon tool %} run
+>
+{: .hands_on}
+
+## Merging of reads with collection operations
+
+To merge reads from several samples into a combined final assembly, we need to
+pass the data to **Unicycler** {% icon tool %} in partially merged form. The
+forward and reverse reads of paired-end data should be kept separate, and so
+should short and long reads. However, the tool has no option to combine data
+from individual samples, so we need to merge the forward, reverse, and the long
+reads data, respectively, across samples. Conveniently for us, the outputs of
+the earlier **Samtools fastx** {% icon tool %} runs have already returned the
+data structured into three corresponding collections for us.
+
+> ### {% icon hands_on %} Hands-on: Collapsing each collection into a single dataset
+>
+> 1. **Collapse Collection** {% icon tool %} of Illumina-sequenced *forward* reads
+> - {% icon param-collection %} *"Collection of files to collapse into single dataset"*:
+> the collection of filtered Illumina-sequenced forward reads in fastq format,
+> first output of first **Samtools fastx** {% icon tool %} run
+> - *"Keep one header line"*: `No`
+> - *"Prepend File name"*: `No`
+>
+> 2. **Collapse Collection** {% icon tool %} of Illumina-sequenced *reverse* reads
+> - {% icon param-collection %} *"Collection of files to collapse into single dataset"*:
+> the collection of filtered Illumina-sequenced reverse reads in fastq format,
+> second output of first **Samtools fastx** {% icon tool %} run
+> - *"Keep one header line"*: `No`
+> - *"Prepend File name"*: `No`
+>
+> 3. **Collapse Collection** {% icon tool %} of Nanopore-sequenced reads
+> - {% icon param-collection %} *"Collection of files to collapse into single dataset"*:
+> the collection of filtered Nanopore-sequenced reads in fastq format,
+> output of second **Samtools fastx** {% icon tool %} run
+> - *"Keep one header line"*: `No`
+> - *"Prepend File name"*: `No`
+>
+{: .hands_on}
+
+
+# SARS-CoV-2 genome assembly
+
+## Optional: Subsampling of reads
+
+The actual assembly of the sequenced reads represents the real bottleneck in
+this tutorial. Assembly of the full set of sequences can easily take 10 hours
+and would best be conducted overnight.
+
+If you do not have that much time, you should downsample the Illumina-sequenced
+combined reads now. Which will reduce the time required to finish the
+subsequent assembly step to approximately 1-2 hours.
+
+> ### {% icon comment %} If you are in a hurry
+> The downsampling parameters below have been chosen to have minimal impact on
+> the assembly results. Further speed-ups are certainly possible, but will
+> likely lead to poor assembly outcomes.
+{: .comment}
+
+> ### {% icon hands_on %} Hands-on: Subsampling of paired-end short-reads data
+>
+> 1. **seqtk_sample** {% icon tool %} with the following parameters
+> - {% icon param-files %} *"Input FASTA/Q file"*: The two datasets with the
+> combined Illumina-sequenced forward and reverse reads, outputs of the
+> first and the second run of **Collapse Collection**
+> - *"RNG seed"*: 4
+> - *"Subsample (decimal fraction or number)"*: 0.1
+>
+{: .hands_on}
+
+## Create assembly
+
+> ### {% icon hands_on %} Hands-on: Assembly of SARS-CoV2 genome
+>
+> 1. **Create assemblies with Unicycler** {% icon tool %} with the following parameters
+> - *"Paired or Single end data?"*: `Paired`
+> - {% icon param-file %} *"Select first set of reads"*: the combined
+> Illumina-sequenced forward reads from all samples, output of the
+> first **Collapse Collection** {% icon tool %} run (or first output of
+> **seqtk_sample** {% icon tool %} for a subsample-based assembly)
+> - {% icon param-file %} *"Select second set of reads"*: the combined
+> Illumina-sequenced reverse reads from all samples, output of the
+> second **Collapse Collection** {% icon tool %} run (or second output
+> of **seqtk_sample** {% icon tool %} for a subsample-based assembly)
+>
+> - {% icon param-file %} *"Select long reads. If there are no long reads, leave this empty"*:
+> Nanpore-sequenced reads from all samples, output of the third
+> **Collapse Collection** {% icon tool %} run
+> - *"Select Bridging mode"*: `Normal (moderate contig size and misassembly rate)`
+> - *"Exclude contigs from the FASTA file which are shorter than this length (bp)"*:
+> `100`
+> - *"The expected number of linear (i.e. non-circular) sequences in the assembly"*:
+> `1`
+>
+{: .hands_on}
+
+## Explore assembly
+
+The **Unicycler** {% icon tool %} run above should produce two output datasets:
+
+- a final assembly in FASTA format
+- an assembly graph
+
+Of these, the assembly graph is more information-rich because it not only
+contains the sequences of *all* assembled fragments (including the ones shorter
+than the threshold length defined for inclusion of the fragments into the FASTA
+output), but also indicates the relative average coverage of the fragments by
+sequenced reads and how some of the fragments could potentially be bridged
+after resolving ambiguities manually.
+
+### Assembly inspection with Bandage
+
+On the downside, the assembly graph format takes some getting used to before
+you can make sense out of the information it provides.
+
+This issue can be alleviated through the use of **Bandage**, a package for
+exploring assembly graphs through summary reports and visualizations of their
+contents.
+
+> ### {% icon hands_on %} Hands-on: Assembly stats and visualization with Bandage
+>
+> 1. **Bandage Info** {% icon tool %} with the following parameters
+> - {% icon param-file %} *"Graphical Fragment Assembly"*: the assembly graph dataset produced by
+> **Unicycler**
+> - *"Output the information in a single tab-delimited line starting with the graph file"*:
+> `No`
+>
+> 2. **Bandage Image** {% icon tool %} with the following parameters
+> - {% icon param-file %} *"Graphical Fragment Assembly"*: the assembly graph dataset produced by
+> **Unicycler**
+> - *"Node name labels?"*: `Yes`
+> - *"Node length labels?"*: `Yes`
+>
+{: .hands_on}
+
+Let us inspect the summary report produced by **Bandage Info** {% icon tool %}
+first:
+
+You may be rather disappointed by the large *percentage of of dead ends* in the
+assembly graph (in general, lower is better here), and by the correspondingly
+large *node count*. After all, should the viral sequence not be encoded on a
+single small contig (a quick check at
+[Wikipedia](https://en.wikipedia.org/wiki/Coronavirus#Genome) reveals that
+coronaviruses have genomes in the size range of 30kb)?
+
+On the other hand, there is the *Longest node* of 29768 bp of assembled
+sequence, which is suspiciously close to the expected genome size, but a much
+larger *Estimated sequence length*.
+
+Next, take a look at the assembly graph visualization generated by **Bandage
+Image** {% icon tool %} to see if that tells us more:
+
+Indeed, this output shows that **Unicycler** {% icon tool %} managed to
+assemble a good number of contigs of moderate size, then had trouble with a
+number of really small fragments that it could only assemble with lots of
+ambiguities (leading to that ugly clutter of nodes in the top row of the
+image). Those small fragments will probably be hard to make sense of, but the
+manageable list of moderate-size contigs (nodes 1-23, 25, 26) is encouraging.
+
+Of these, node 1 is the longest node mentioned in the report with a size close
+to our expectations.
+
+Node 2 looks peculiar since **Unicycler** claims it is circular, while
+Coronavirus genomes are known to be linear.
+
+### Check origin of assembled sequences with BLAST
+
+While we could view the actual contents of the assembly graph output of
+**Unicycler** {% icon tool %} and extract node sequences of interest from it
+(the longest node and that circular one could be a start), things are much
+easier if we work with the FASTA output of **Unicycler** instead.
+
+From the visualization with **Bandage Image** {% icon tool %} we know that the
+separately assembled nodes are all longer than 1000 bp. We can extract those
+sequences based on the length threshold in Galaxy, then BLAST all retained
+sequences in one go.
+
+> ### {% icon hands_on %} Hands-on: Filter FASTA sequences by their length
+>
+> 1. **Filter sequences by length** {% icon tool %} with the following parameters
+> - {% icon param-file %} *"Fasta file"*: the FASTA output produced by
+> **Unicycler**
+> - *"Minimal length"*: `1000`
+>
+> This outputs a new FASTA datasets with only the sequences satisfying our
+> length threshold.
+>
+> > ### {% icon tip %} Apply length filters after instead of during assembly
+> > You may have noted that in the **Unicycler** {% icon tool %} run we kept
+> > the tool's *"Exclude contigs from the FASTA file which are shorter than
+> > this length (bp)"* option at its default value of `100` instead of using
+> > the 1,000 bp threshold there directly to save a step in the analysis.
+> >
+> > The reason we did this is that normally you will not know the exact length
+> > threshold you want until *after* having explored the generated assembly.
+> >
+> > Length-filtering some FASTA sequences is a trivial process that takes very
+> > little time, but you would not want to rerun an hours-long assembly job
+> > just because you accidentally stripped some interesting assembled sequences
+> > from the output.
+> {: .tip}
+>
+{: .hands_on}
+
+> ### {% icon hands_on %} Hands-on: NCBI BLAST of multiple contigs
+>
+> 1. View the output of **Filter sequences by length** {% icon tool %} by
+> clicking the {% icon galaxy-eye %} (eye) icon attached to that dataset.
+> 2. Click into the middle panel, which should now display the content of the
+> dataset, select all sequences by pressing Ctrl+A,
+> then copy the selection to the clipboard with Ctrl+C
+> 3. Head over to the
+> [NCBI BLAST](https://blast.ncbi.nlm.nih.gov/Blast.cgi?PAGE=MegaBlast&PROGRAM=blastn)
+> service, paste the copied content into the `Enter Query Sequence` text
+> box and click `BLAST` at the bottom of the form.
+> 4. Wait for the *BLAST* run to finish.
+> 5. On the results page, look for the drop-down menu next to `Results for`.
+> It lets you toggle the BLAST hits list further down to include only the
+> matches to individual sequences from your multi-sequence query.
+>
+{: .hands_on}
+
+Now take a bit of time to explore the BLAST hits uncovered for some of your
+assembled nodes. Pay attention, specifically, to the node with the longest
+sequence (node #1) and the circular node #2, but also investigate the results
+for a few others.
+
+> ### {% icon question %} Question
+>
+> 1. Which genome is represented by node #1?
+> 2. Which genome corresponds to node #2? Does this finding remind you of
+> something you have learnt before?
+> 3. What do most other node sequences have in common? Do these additinal
+> findings make sense?
+>
+> > ### {% icon solution %} Solution
+> > 1. The sequence of node #1 is the assembled SARS-CoV-2 sequence we are
+> > looking for. It is a perfect match to various SARS-CoV-2 genome
+> > sequences found in Genbank over the entire assembled length, and we have
+> > been able to assemble almost the entire genome even from the subsampled
+> > sequencing data.
+> > 2. The circular sequence of node #2 corresponds to the 5,386 bp genome of
+> > bacteriophage phiX174. As explained in the more general
+> > [Unicycler assembly](../unicycler-assembly/tutorial.html) tutorial,
+> > this genome is often used as a spike-in in Illumina sequencing.
+> > Finding the complete sequence here is, thus, another indication that
+> > our analysis worked and produced meaningful results.
+> > 3. Almost all other assembled sequences appear to represent parts of
+> > bacterial genomes. The only exceptions are the node #10 sequence, for
+> > which no significant BLAST hits could be found, and the node #11
+> > sequence, which represents a small stretch of left-over human genomic
+> > DNA, which seems to have survived our subtraction approach.
+> >
+> > What all the bacterial genomes have in common is that they represent
+> > genera of bacteria that are known to colonize the oral cavity and
+> > mucosa. Since all samples in this analysis are BALF samples the presence
+> > of DNA from such bacteria should not be surprising. In addition, some
+> > members of the identified genera are known as opportunistic pathogens.
+> > In particular, members of the genus *Prevotella* can infect the
+> > respiratory tract and contribute to inflammation under anaerobic
+> > conditions caused by primary infections. Hence, an alternative
+> > explanation for the presence of some of these sequences in the samples
+> > might be that the corresponding bacteria contributed to the clinical
+> > picture of some of the Covid-19 patients they were obtained from.
+> {: .solution}
+>
+{: .question}
+
+# Conclusion
+{:.no_toc}
+
+The power of modern genome assembly tools is remarkable, and so is their
+robustness in the face of data of metagenomic nature. Assembling reads derived
+from a virus and a good handful of copurified bacteria back into separate
+contigs is a challenging task, which Unicycler solved without major issues!
+
+Nevertheless, good quality assemblies still rely on proper preprocessing and
+filtering to reduce the number of misassembly events, ambiguous assemblies and
+of incorporation of sequencing errors into the final assembly.
+
diff --git a/topics/assembly/tutorials/assembly-with-preprocessing/workflows/assembly_with_preprocessing.ga b/topics/assembly/tutorials/assembly-with-preprocessing/workflows/assembly_with_preprocessing.ga
new file mode 100644
index 00000000000000..e2de007d945975
--- /dev/null
+++ b/topics/assembly/tutorials/assembly-with-preprocessing/workflows/assembly_with_preprocessing.ga
@@ -0,0 +1,1088 @@
+{
+ "a_galaxy_workflow": "true",
+ "annotation": "Assembly with preprocessing",
+ "format-version": "0.1",
+ "name": "assembly_with_preprocessing",
+ "steps": {
+ "0": {
+ "annotation": "",
+ "content_id": null,
+ "errors": null,
+ "id": 0,
+ "input_connections": {},
+ "inputs": [],
+ "label": "Collection of paired short-reads data",
+ "name": "Input dataset collection",
+ "outputs": [],
+ "position": {
+ "left": 268.13330078125,
+ "top": 341.41668701171875
+ },
+ "tool_id": null,
+ "tool_state": "{\"collection_type\": \"list:paired\"}",
+ "tool_version": null,
+ "type": "data_collection_input",
+ "uuid": "3e7dab04-77db-4780-b1dd-c724b80a8b7b",
+ "workflow_outputs": [
+ {
+ "label": null,
+ "output_name": "output",
+ "uuid": "daba9931-3837-42b5-a93f-e2e51f5f667a"
+ }
+ ]
+ },
+ "1": {
+ "annotation": "",
+ "content_id": null,
+ "errors": null,
+ "id": 1,
+ "input_connections": {},
+ "inputs": [],
+ "label": "Collection of long-reads data",
+ "name": "Input dataset collection",
+ "outputs": [],
+ "position": {
+ "left": 267.13330078125,
+ "top": 756.4166717529297
+ },
+ "tool_id": null,
+ "tool_state": "{\"collection_type\": \"lis\"}",
+ "tool_version": null,
+ "type": "data_collection_input",
+ "uuid": "bd1f5561-95a1-4fb5-af1e-f3d4cbb40639",
+ "workflow_outputs": [
+ {
+ "label": null,
+ "output_name": "output",
+ "uuid": "7146e49b-ab4b-44a8-ac54-e98539fecb97"
+ }
+ ]
+ },
+ "10": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/samtools_fastx/samtools_fastx/1.9+galaxy1",
+ "errors": null,
+ "id": 10,
+ "input_connections": {
+ "input": {
+ "id": 7,
+ "output_name": "outputsam"
+ }
+ },
+ "inputs": [],
+ "label": null,
+ "name": "Samtools fastx",
+ "outputs": [
+ {
+ "name": "nonspecific",
+ "type": "fasta"
+ }
+ ],
+ "position": {
+ "left": 1333.63330078125,
+ "top": 852.4166717529297
+ },
+ "post_job_actions": {},
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/samtools_fastx/samtools_fastx/1.9+galaxy1",
+ "tool_shed_repository": {
+ "changeset_revision": "a8d69aee190e",
+ "name": "samtools_fastx",
+ "owner": "iuc",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"copy_arb_tags\": \"\\\"\\\"\", \"__page__\": null, \"output_fmt_cond\": \"{\\\"__current_case__\\\": 2, \\\"default_quality\\\": \\\"\\\", \\\"ilumina_casava\\\": \\\"false\\\", \\\"output_fmt_select\\\": \\\"fastqsanger.gz\\\", \\\"output_quality\\\": \\\"false\\\"}\", \"idxout_cond\": \"{\\\"__current_case__\\\": 0, \\\"idxout_select\\\": \\\"no\\\"}\", \"read_numbering\": \"\\\"\\\"\", \"exclusive_filter\": \"null\", \"exclusive_filter_all\": \"null\", \"inclusive_filter\": \"null\", \"outputs\": \"[\\\"r0\\\"]\", \"input\": \"{\\\"__class__\\\": \\\"ConnectedValue\\\"}\", \"copy_tags\": \"\\\"false\\\"\", \"__rerun_remap_job_id__\": null}",
+ "tool_version": "1.9+galaxy1",
+ "type": "tool",
+ "uuid": "31e2b4e8-7631-46c0-8382-b805cee2b739",
+ "workflow_outputs": [
+ {
+ "label": null,
+ "output_name": "nonspecific",
+ "uuid": "cfbf839f-53f0-4a82-8966-2bdb6919de5e"
+ }
+ ]
+ },
+ "11": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/samtools_fastx/samtools_fastx/1.9+galaxy1",
+ "errors": null,
+ "id": 11,
+ "input_connections": {
+ "input": {
+ "id": 9,
+ "output_name": "outputsam"
+ }
+ },
+ "inputs": [],
+ "label": null,
+ "name": "Samtools fastx",
+ "outputs": [
+ {
+ "name": "forward",
+ "type": "fasta"
+ },
+ {
+ "name": "reverse",
+ "type": "fasta"
+ }
+ ],
+ "position": {
+ "left": 1533.63330078125,
+ "top": 638.4166717529297
+ },
+ "post_job_actions": {
+ "HideDatasetActionforward": {
+ "action_arguments": {},
+ "action_type": "HideDatasetAction",
+ "output_name": "forward"
+ },
+ "HideDatasetActionreverse": {
+ "action_arguments": {},
+ "action_type": "HideDatasetAction",
+ "output_name": "reverse"
+ }
+ },
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/samtools_fastx/samtools_fastx/1.9+galaxy1",
+ "tool_shed_repository": {
+ "changeset_revision": "a8d69aee190e",
+ "name": "samtools_fastx",
+ "owner": "iuc",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"copy_arb_tags\": \"\\\"\\\"\", \"__page__\": null, \"output_fmt_cond\": \"{\\\"__current_case__\\\": 2, \\\"default_quality\\\": \\\"\\\", \\\"ilumina_casava\\\": \\\"false\\\", \\\"output_fmt_select\\\": \\\"fastqsanger.gz\\\", \\\"output_quality\\\": \\\"false\\\"}\", \"idxout_cond\": \"{\\\"__current_case__\\\": 0, \\\"idxout_select\\\": \\\"no\\\"}\", \"read_numbering\": \"\\\"\\\"\", \"exclusive_filter\": \"null\", \"exclusive_filter_all\": \"null\", \"inclusive_filter\": \"null\", \"outputs\": \"[\\\"r1\\\", \\\"r2\\\"]\", \"input\": \"{\\\"__class__\\\": \\\"ConnectedValue\\\"}\", \"copy_tags\": \"\\\"false\\\"\", \"__rerun_remap_job_id__\": null}",
+ "tool_version": "1.9+galaxy1",
+ "type": "tool",
+ "uuid": "a9623672-277b-4b9f-aa00-fd88cf5e55a1",
+ "workflow_outputs": []
+ },
+ "12": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/nml/collapse_collections/collapse_dataset/4.2",
+ "errors": null,
+ "id": 12,
+ "input_connections": {
+ "input_list": {
+ "id": 10,
+ "output_name": "nonspecific"
+ }
+ },
+ "inputs": [],
+ "label": null,
+ "name": "Collapse Collection",
+ "outputs": [
+ {
+ "name": "output",
+ "type": "input"
+ }
+ ],
+ "position": {
+ "left": 1715.63330078125,
+ "top": 1015.4166870117188
+ },
+ "post_job_actions": {
+ "HideDatasetActionoutput": {
+ "action_arguments": {},
+ "action_type": "HideDatasetAction",
+ "output_name": "output"
+ }
+ },
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/nml/collapse_collections/collapse_dataset/4.2",
+ "tool_shed_repository": {
+ "changeset_revision": "830961c48e42",
+ "name": "collapse_collections",
+ "owner": "nml",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"__page__\": null, \"__rerun_remap_job_id__\": null, \"input_list\": \"{\\\"__class__\\\": \\\"ConnectedValue\\\"}\", \"one_header\": \"\\\"false\\\"\", \"filename\": \"{\\\"__current_case__\\\": 1, \\\"add_name\\\": \\\"false\\\"}\"}",
+ "tool_version": "4.2",
+ "type": "tool",
+ "uuid": "423fc320-f6b5-45fe-aabd-4046ff3cf54d",
+ "workflow_outputs": []
+ },
+ "13": {
+ "annotation": "",
+ "content_id": "__ZIP_COLLECTION__",
+ "errors": null,
+ "id": 13,
+ "input_connections": {
+ "input_forward": {
+ "id": 11,
+ "output_name": "forward"
+ },
+ "input_reverse": {
+ "id": 11,
+ "output_name": "reverse"
+ }
+ },
+ "inputs": [],
+ "label": null,
+ "name": "Zip Collection",
+ "outputs": [
+ {
+ "name": "output",
+ "type": "input"
+ }
+ ],
+ "position": {
+ "left": 1754.63330078125,
+ "top": 477.41668701171875
+ },
+ "post_job_actions": {},
+ "tool_id": "__ZIP_COLLECTION__",
+ "tool_state": "{\"__page__\": null, \"__rerun_remap_job_id__\": null, \"input_reverse\": \"{\\\"__class__\\\": \\\"ConnectedValue\\\"}\", \"input_forward\": \"{\\\"__class__\\\": \\\"ConnectedValue\\\"}\"}",
+ "tool_version": "1.0.0",
+ "type": "tool",
+ "uuid": "9ba89e81-9c53-4f5b-9d61-42319b7d574c",
+ "workflow_outputs": [
+ {
+ "label": null,
+ "output_name": "output",
+ "uuid": "ebc8f563-c45e-4e17-a541-b1275e7f61ad"
+ }
+ ]
+ },
+ "14": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/nml/collapse_collections/collapse_dataset/4.2",
+ "errors": null,
+ "id": 14,
+ "input_connections": {
+ "input_list": {
+ "id": 11,
+ "output_name": "forward"
+ }
+ },
+ "inputs": [],
+ "label": null,
+ "name": "Collapse Collection",
+ "outputs": [
+ {
+ "name": "output",
+ "type": "input"
+ }
+ ],
+ "position": {
+ "left": 1837.6333312988281,
+ "top": 710.4166717529297
+ },
+ "post_job_actions": {
+ "HideDatasetActionoutput": {
+ "action_arguments": {},
+ "action_type": "HideDatasetAction",
+ "output_name": "output"
+ }
+ },
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/nml/collapse_collections/collapse_dataset/4.2",
+ "tool_shed_repository": {
+ "changeset_revision": "830961c48e42",
+ "name": "collapse_collections",
+ "owner": "nml",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"__page__\": null, \"__rerun_remap_job_id__\": null, \"input_list\": \"{\\\"__class__\\\": \\\"ConnectedValue\\\"}\", \"one_header\": \"\\\"false\\\"\", \"filename\": \"{\\\"__current_case__\\\": 1, \\\"add_name\\\": \\\"false\\\"}\"}",
+ "tool_version": "4.2",
+ "type": "tool",
+ "uuid": "6b7925f4-4706-4983-8840-0323858b6978",
+ "workflow_outputs": []
+ },
+ "15": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/nml/collapse_collections/collapse_dataset/4.2",
+ "errors": null,
+ "id": 15,
+ "input_connections": {
+ "input_list": {
+ "id": 11,
+ "output_name": "reverse"
+ }
+ },
+ "inputs": [],
+ "label": null,
+ "name": "Collapse Collection",
+ "outputs": [
+ {
+ "name": "output",
+ "type": "input"
+ }
+ ],
+ "position": {
+ "left": 1808.6333312988281,
+ "top": 860.4166717529297
+ },
+ "post_job_actions": {
+ "HideDatasetActionoutput": {
+ "action_arguments": {},
+ "action_type": "HideDatasetAction",
+ "output_name": "output"
+ }
+ },
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/nml/collapse_collections/collapse_dataset/4.2",
+ "tool_shed_repository": {
+ "changeset_revision": "830961c48e42",
+ "name": "collapse_collections",
+ "owner": "nml",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"__page__\": null, \"__rerun_remap_job_id__\": null, \"input_list\": \"{\\\"__class__\\\": \\\"ConnectedValue\\\"}\", \"one_header\": \"\\\"false\\\"\", \"filename\": \"{\\\"__current_case__\\\": 1, \\\"add_name\\\": \\\"false\\\"}\"}",
+ "tool_version": "4.2",
+ "type": "tool",
+ "uuid": "f53e47f8-969b-4948-b608-7fed0e95680d",
+ "workflow_outputs": []
+ },
+ "16": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/seqtk/seqtk_sample/1.3.2",
+ "errors": null,
+ "id": 16,
+ "input_connections": {
+ "in_file": {
+ "id": 14,
+ "output_name": "output"
+ }
+ },
+ "inputs": [
+ {
+ "description": "runtime parameter for tool seqtk_sample",
+ "name": "in_file"
+ }
+ ],
+ "label": null,
+ "name": "seqtk_sample",
+ "outputs": [
+ {
+ "name": "default",
+ "type": "input"
+ }
+ ],
+ "position": {
+ "left": 2129.4166717529297,
+ "top": 711.4166717529297
+ },
+ "post_job_actions": {},
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/seqtk/seqtk_sample/1.3.2",
+ "tool_shed_repository": {
+ "changeset_revision": "58c8ece95b53",
+ "name": "seqtk",
+ "owner": "iuc",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"__page__\": null, \"__rerun_remap_job_id__\": null, \"s\": \"\\\"4\\\"\", \"in_file\": \"{\\\"__class__\\\": \\\"RuntimeValue\\\"}\", \"subsample_size\": \"\\\"0.1\\\"\", \"advanced\": \"{\\\"single_pass_mode\\\": \\\"false\\\"}\"}",
+ "tool_version": "1.3.2",
+ "type": "tool",
+ "uuid": "4f7c9133-a664-46dc-9583-c9980d5d1011",
+ "workflow_outputs": [
+ {
+ "label": null,
+ "output_name": "default",
+ "uuid": "be52344e-6874-431c-8d86-9d0e9371aed9"
+ }
+ ]
+ },
+ "17": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/seqtk/seqtk_sample/1.3.2",
+ "errors": null,
+ "id": 17,
+ "input_connections": {
+ "in_file": {
+ "id": 15,
+ "output_name": "output"
+ }
+ },
+ "inputs": [
+ {
+ "description": "runtime parameter for tool seqtk_sample",
+ "name": "in_file"
+ }
+ ],
+ "label": null,
+ "name": "seqtk_sample",
+ "outputs": [
+ {
+ "name": "default",
+ "type": "input"
+ }
+ ],
+ "position": {
+ "left": 2116.4166717529297,
+ "top": 859.4166717529297
+ },
+ "post_job_actions": {},
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/seqtk/seqtk_sample/1.3.2",
+ "tool_shed_repository": {
+ "changeset_revision": "58c8ece95b53",
+ "name": "seqtk",
+ "owner": "iuc",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"__page__\": null, \"__rerun_remap_job_id__\": null, \"s\": \"\\\"4\\\"\", \"in_file\": \"{\\\"__class__\\\": \\\"RuntimeValue\\\"}\", \"subsample_size\": \"\\\"0.1\\\"\", \"advanced\": \"{\\\"single_pass_mode\\\": \\\"false\\\"}\"}",
+ "tool_version": "1.3.2",
+ "type": "tool",
+ "uuid": "79064891-aa79-42e3-b848-7a6240022acc",
+ "workflow_outputs": [
+ {
+ "label": null,
+ "output_name": "default",
+ "uuid": "3e196eb1-8ba1-4be5-90d0-c5b8d3df4ff0"
+ }
+ ]
+ },
+ "18": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/unicycler/unicycler/0.4.8.0",
+ "errors": null,
+ "id": 18,
+ "input_connections": {
+ "long": {
+ "id": 12,
+ "output_name": "output"
+ },
+ "paired_unpaired|fastq_input1": {
+ "id": 16,
+ "output_name": "default"
+ },
+ "paired_unpaired|fastq_input2": {
+ "id": 17,
+ "output_name": "default"
+ }
+ },
+ "inputs": [
+ {
+ "description": "runtime parameter for tool Create assemblies with Unicycler",
+ "name": "lr_align"
+ },
+ {
+ "description": "runtime parameter for tool Create assemblies with Unicycler",
+ "name": "long"
+ },
+ {
+ "description": "runtime parameter for tool Create assemblies with Unicycler",
+ "name": "paired_unpaired"
+ },
+ {
+ "description": "runtime parameter for tool Create assemblies with Unicycler",
+ "name": "paired_unpaired"
+ },
+ {
+ "description": "runtime parameter for tool Create assemblies with Unicycler",
+ "name": "rotation"
+ }
+ ],
+ "label": null,
+ "name": "Create assemblies with Unicycler",
+ "outputs": [
+ {
+ "name": "assembly_graph",
+ "type": "tabular"
+ },
+ {
+ "name": "assembly",
+ "type": "fasta"
+ }
+ ],
+ "position": {
+ "left": 2365.633331298828,
+ "top": 706.4166717529297
+ },
+ "post_job_actions": {},
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/unicycler/unicycler/0.4.8.0",
+ "tool_shed_repository": {
+ "changeset_revision": "88c240872a65",
+ "name": "unicycler",
+ "owner": "iuc",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"linear_seqs\": \"\\\"1\\\"\", \"spades\": \"{\\\"depth_filter\\\": \\\"0.25\\\", \\\"kmer_count\\\": \\\"10\\\", \\\"kmers\\\": \\\"\\\", \\\"largest_component\\\": \\\"false\\\", \\\"max_kmer_frac\\\": \\\"0.95\\\", \\\"min_kmer_frac\\\": \\\"0.2\\\", \\\"no_correct\\\": \\\"false\\\"}\", \"graph_clean\": \"{\\\"min_component_size\\\": \\\"1000\\\", \\\"min_dead_end_size\\\": \\\"1000\\\"}\", \"__page__\": null, \"min_anchor_seg_len\": \"\\\"\\\"\", \"__rerun_remap_job_id__\": null, \"lr_align\": \"{\\\"contamination\\\": {\\\"__class__\\\": \\\"RuntimeValue\\\"}, \\\"low_score\\\": \\\"\\\", \\\"scores\\\": \\\"3,-6,-5,-2\\\"}\", \"long\": \"{\\\"__class__\\\": \\\"RuntimeValue\\\"}\", \"paired_unpaired\": \"{\\\"__current_case__\\\": 0, \\\"fastq_input1\\\": {\\\"__class__\\\": \\\"RuntimeValue\\\"}, \\\"fastq_input2\\\": {\\\"__class__\\\": \\\"RuntimeValue\\\"}, \\\"fastq_input_selector\\\": \\\"paired\\\"}\", \"min_fasta_length\": \"\\\"100\\\"\", \"mode\": \"\\\"normal\\\"\", \"rotation\": \"{\\\"no_rotate\\\": \\\"false\\\", \\\"start_gene_cov\\\": \\\"95.0\\\", \\\"start_gene_id\\\": \\\"90.0\\\", \\\"start_genes\\\": {\\\"__class__\\\": \\\"RuntimeValue\\\"}}\", \"pilon\": \"{\\\"min_polish_size\\\": \\\"1000\\\", \\\"no_pilon\\\": \\\"false\\\"}\"}",
+ "tool_version": "0.4.8.0",
+ "type": "tool",
+ "uuid": "851c6be6-c202-450a-9cd3-19e205dcc496",
+ "workflow_outputs": [
+ {
+ "label": null,
+ "output_name": "assembly",
+ "uuid": "702dd61e-6072-49d9-a502-44e0a23f5a98"
+ },
+ {
+ "label": null,
+ "output_name": "assembly_graph",
+ "uuid": "3285cc00-6b39-4f3d-87a5-87abd483ede6"
+ }
+ ]
+ },
+ "19": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/bandage/bandage_info/0.8.1+galaxy1",
+ "errors": null,
+ "id": 19,
+ "input_connections": {
+ "input_file": {
+ "id": 18,
+ "output_name": "assembly_graph"
+ }
+ },
+ "inputs": [],
+ "label": null,
+ "name": "Bandage Info",
+ "outputs": [
+ {
+ "name": "outfile",
+ "type": "txt"
+ }
+ ],
+ "position": {
+ "left": 2665.633331298828,
+ "top": 811.4166717529297
+ },
+ "post_job_actions": {},
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/bandage/bandage_info/0.8.1+galaxy1",
+ "tool_shed_repository": {
+ "changeset_revision": "b2860df42e16",
+ "name": "bandage",
+ "owner": "iuc",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"tsv\": \"\\\"false\\\"\", \"__page__\": null, \"__rerun_remap_job_id__\": null, \"input_file\": \"{\\\"__class__\\\": \\\"ConnectedValue\\\"}\"}",
+ "tool_version": "0.8.1+galaxy1",
+ "type": "tool",
+ "uuid": "4aa230bd-3269-41c3-81f5-9085fcc4001d",
+ "workflow_outputs": [
+ {
+ "label": null,
+ "output_name": "outfile",
+ "uuid": "cf5eab6a-97ae-430e-a27a-6ecc33d4b8c0"
+ }
+ ]
+ },
+ "2": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/fastp/fastp/0.19.5+galaxy1",
+ "errors": null,
+ "id": 2,
+ "input_connections": {
+ "single_paired|paired_input": {
+ "id": 0,
+ "output_name": "output"
+ }
+ },
+ "inputs": [],
+ "label": "fastp: Trimmed Illumina Reads",
+ "name": "fastp",
+ "outputs": [
+ {
+ "name": "output_paired_coll",
+ "type": "input"
+ },
+ {
+ "name": "report_html",
+ "type": "html"
+ },
+ {
+ "name": "report_json",
+ "type": "json"
+ }
+ ],
+ "position": {
+ "left": 585,
+ "top": 365.04998779296875
+ },
+ "post_job_actions": {
+ "HideDatasetActionreport_html": {
+ "action_arguments": {},
+ "action_type": "HideDatasetAction",
+ "output_name": "report_html"
+ },
+ "HideDatasetActionreport_json": {
+ "action_arguments": {},
+ "action_type": "HideDatasetAction",
+ "output_name": "report_json"
+ }
+ },
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/fastp/fastp/0.19.5+galaxy1",
+ "tool_shed_repository": {
+ "changeset_revision": "1d8fe9bc4cb0",
+ "name": "fastp",
+ "owner": "iuc",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"__page__\": null, \"output_options\": \"{\\\"report_html\\\": \\\"true\\\", \\\"report_json\\\": \\\"true\\\"}\", \"single_paired\": \"{\\\"__current_case__\\\": 2, \\\"adapter_trimming_options\\\": {\\\"adapter_sequence1\\\": \\\"\\\", \\\"adapter_sequence2\\\": \\\"\\\", \\\"disable_adapter_trimming\\\": \\\"false\\\"}, \\\"global_trimming_options\\\": {\\\"trim_front1\\\": \\\"\\\", \\\"trim_front2\\\": \\\"\\\", \\\"trim_tail1\\\": \\\"\\\", \\\"trim_tail2\\\": \\\"\\\"}, \\\"paired_input\\\": {\\\"__class__\\\": \\\"ConnectedValue\\\"}, \\\"single_paired_selector\\\": \\\"paired_collection\\\"}\", \"read_mod_options\": \"{\\\"base_correction_options\\\": {\\\"correction\\\": \\\"false\\\"}, \\\"cutting_by_quality_options\\\": {\\\"cut_by_quality3\\\": \\\"false\\\", \\\"cut_by_quality5\\\": \\\"false\\\", \\\"cut_mean_quality\\\": \\\"\\\", \\\"cut_window_size\\\": \\\"\\\"}, \\\"polyg_tail_trimming\\\": {\\\"__current_case__\\\": 1, \\\"poly_g_min_len\\\": \\\"\\\", \\\"trimming_select\\\": \\\"\\\"}, \\\"polyx_tail_trimming\\\": {\\\"__current_case__\\\": 1, \\\"polyx_trimming_select\\\": \\\"\\\"}, \\\"umi_processing\\\": {\\\"umi\\\": \\\"false\\\", \\\"umi_len\\\": \\\"\\\", \\\"umi_loc\\\": \\\"\\\", \\\"umi_prefix\\\": \\\"\\\"}}\", \"overrepresented_sequence_analysis\": \"{\\\"overrepresentation_analysis\\\": \\\"false\\\", \\\"overrepresentation_sampling\\\": \\\"\\\"}\", \"filter_options\": \"{\\\"length_filtering_options\\\": {\\\"disable_length_filtering\\\": \\\"false\\\", \\\"length_required\\\": \\\"50\\\"}, \\\"low_complexity_filter\\\": {\\\"complexity_threshold\\\": \\\"\\\", \\\"enable_low_complexity_filter\\\": \\\"false\\\"}, \\\"quality_filtering_options\\\": {\\\"disable_quality_filtering\\\": \\\"false\\\", \\\"n_base_limit\\\": \\\"\\\", \\\"qualified_quality_phred\\\": \\\"20\\\", \\\"unqualified_percent_limit\\\": \\\"20\\\"}}\", \"__rerun_remap_job_id__\": null}",
+ "tool_version": "0.19.5+galaxy1",
+ "type": "tool",
+ "uuid": "7508c22b-dc6a-4071-8dd0-d5616718e62b",
+ "workflow_outputs": [
+ {
+ "label": null,
+ "output_name": "output_paired_coll",
+ "uuid": "a7be7eab-3e80-4be6-b1a4-ab303c314096"
+ }
+ ]
+ },
+ "20": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/devteam/fasta_filter_by_length/fasta_filter_by_length/1.2",
+ "errors": null,
+ "id": 20,
+ "input_connections": {
+ "input": {
+ "id": 18,
+ "output_name": "assembly"
+ }
+ },
+ "inputs": [
+ {
+ "description": "runtime parameter for tool Filter sequences by length",
+ "name": "input"
+ }
+ ],
+ "label": null,
+ "name": "Filter sequences by length",
+ "outputs": [
+ {
+ "name": "output",
+ "type": "fasta"
+ }
+ ],
+ "position": {
+ "left": 2574.4166870117188,
+ "top": 1254.4166870117188
+ },
+ "post_job_actions": {},
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/fasta_filter_by_length/fasta_filter_by_length/1.2",
+ "tool_shed_repository": {
+ "changeset_revision": "8cacfcf96a52",
+ "name": "fasta_filter_by_length",
+ "owner": "devteam",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"__page__\": null, \"input\": \"{\\\"__class__\\\": \\\"RuntimeValue\\\"}\", \"__rerun_remap_job_id__\": null, \"max_length\": \"\\\"0\\\"\", \"min_length\": \"\\\"1000\\\"\"}",
+ "tool_version": "1.2",
+ "type": "tool",
+ "uuid": "3cbd67e0-3e55-4726-8dbe-0c61aec4d293",
+ "workflow_outputs": [
+ {
+ "label": null,
+ "output_name": "output",
+ "uuid": "e4143f49-239f-4266-ba8d-e6299a331403"
+ }
+ ]
+ },
+ "21": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/bandage/bandage_image/0.8.1+galaxy2",
+ "errors": null,
+ "id": 21,
+ "input_connections": {
+ "input_file": {
+ "id": 18,
+ "output_name": "assembly_graph"
+ }
+ },
+ "inputs": [
+ {
+ "description": "runtime parameter for tool Bandage Image",
+ "name": "input_file"
+ }
+ ],
+ "label": null,
+ "name": "Bandage Image",
+ "outputs": [
+ {
+ "name": "outfile",
+ "type": "jpg"
+ }
+ ],
+ "position": {
+ "left": 2667.4166870117188,
+ "top": 1052.4166870117188
+ },
+ "post_job_actions": {},
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/bandage/bandage_image/0.8.1+galaxy2",
+ "tool_shed_repository": {
+ "changeset_revision": "b2860df42e16",
+ "name": "bandage",
+ "owner": "iuc",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"__page__\": null, \"input_file\": \"{\\\"__class__\\\": \\\"RuntimeValue\\\"}\", \"lengths\": \"\\\"true\\\"\", \"output_format\": \"\\\"jpg\\\"\", \"height\": \"\\\"1000\\\"\", \"width\": \"\\\"\\\"\", \"names\": \"\\\"true\\\"\", \"__rerun_remap_job_id__\": null}",
+ "tool_version": "0.8.1+galaxy2",
+ "type": "tool",
+ "uuid": "ef2c6a46-8421-499e-a840-8dd03483c5c9",
+ "workflow_outputs": [
+ {
+ "label": null,
+ "output_name": "outfile",
+ "uuid": "5e535f98-b740-44ec-aba1-9d9090acb89a"
+ }
+ ]
+ },
+ "3": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/nanoplot/nanoplot/1.25.0+galaxy1",
+ "errors": null,
+ "id": 3,
+ "input_connections": {
+ "mode|reads|files": {
+ "id": 1,
+ "output_name": "output"
+ }
+ },
+ "inputs": [],
+ "label": null,
+ "name": "NanoPlot",
+ "outputs": [
+ {
+ "name": "output_html",
+ "type": "html"
+ },
+ {
+ "name": "nanostats",
+ "type": "txt"
+ },
+ {
+ "name": "nanostats_post_filtering",
+ "type": "txt"
+ },
+ {
+ "name": "read_length",
+ "type": "png"
+ },
+ {
+ "name": "log_read_length",
+ "type": "png"
+ }
+ ],
+ "position": {
+ "left": 630,
+ "top": 650.4666595458984
+ },
+ "post_job_actions": {
+ "HideDatasetActionlog_read_length": {
+ "action_arguments": {},
+ "action_type": "HideDatasetAction",
+ "output_name": "log_read_length"
+ },
+ "HideDatasetActionnanostats": {
+ "action_arguments": {},
+ "action_type": "HideDatasetAction",
+ "output_name": "nanostats"
+ },
+ "HideDatasetActionnanostats_post_filtering": {
+ "action_arguments": {},
+ "action_type": "HideDatasetAction",
+ "output_name": "nanostats_post_filtering"
+ },
+ "HideDatasetActionread_length": {
+ "action_arguments": {},
+ "action_type": "HideDatasetAction",
+ "output_name": "read_length"
+ }
+ },
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/nanoplot/nanoplot/1.25.0+galaxy1",
+ "tool_shed_repository": {
+ "changeset_revision": "645159bcee2d",
+ "name": "nanoplot",
+ "owner": "iuc",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"filter\": \"{\\\"alength\\\": \\\"false\\\", \\\"barcoded\\\": \\\"false\\\", \\\"downsample\\\": \\\"\\\", \\\"drop_outliers\\\": \\\"false\\\", \\\"loglength\\\": \\\"true\\\", \\\"maxlength\\\": \\\"\\\", \\\"minlength\\\": \\\"\\\", \\\"minqual\\\": \\\"\\\", \\\"percentqual\\\": \\\"false\\\", \\\"readtype\\\": null}\", \"__page__\": null, \"__rerun_remap_job_id__\": null, \"mode\": \"{\\\"__current_case__\\\": 0, \\\"choice\\\": \\\"batch\\\", \\\"reads\\\": {\\\"__current_case__\\\": 0, \\\"files\\\": {\\\"__class__\\\": \\\"ConnectedValue\\\"}, \\\"type\\\": \\\"fastq\\\"}}\", \"customization\": \"{\\\"N50\\\": \\\"false\\\", \\\"color\\\": null, \\\"format\\\": \\\"png\\\", \\\"noN50\\\": \\\"false\\\", \\\"plots\\\": null}\"}",
+ "tool_version": "1.25.0+galaxy1",
+ "type": "tool",
+ "uuid": "9b1d43ac-82e3-463f-9c88-08c8826a62c9",
+ "workflow_outputs": [
+ {
+ "label": null,
+ "output_name": "output_html",
+ "uuid": "cd8570c7-02db-479b-81a9-cd0d26643f71"
+ }
+ ]
+ },
+ "4": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/minimap2/minimap2/2.17+galaxy1",
+ "errors": null,
+ "id": 4,
+ "input_connections": {
+ "fastq_input|fastq_input1": {
+ "id": 1,
+ "output_name": "output"
+ }
+ },
+ "inputs": [],
+ "label": null,
+ "name": "Map with minimap2",
+ "outputs": [
+ {
+ "name": "alignment_output",
+ "type": "bam"
+ }
+ ],
+ "position": {
+ "left": 709.199951171875,
+ "top": 1131.9666748046875
+ },
+ "post_job_actions": {},
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/minimap2/minimap2/2.17+galaxy1",
+ "tool_shed_repository": {
+ "changeset_revision": "53c0b7a1a0c3",
+ "name": "minimap2",
+ "owner": "iuc",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"alignment_options\": \"{\\\"-O2\\\": \\\"\\\", \\\"A\\\": \\\"\\\", \\\"B\\\": \\\"\\\", \\\"E\\\": \\\"\\\", \\\"E2\\\": \\\"\\\", \\\"O\\\": \\\"\\\", \\\"s\\\": \\\"\\\", \\\"u\\\": null, \\\"z\\\": \\\"\\\"}\", \"analysis_type_selector\": \"\\\"splice\\\"\", \"__page__\": null, \"__rerun_remap_job_id__\": null, \"io_options\": \"{\\\"K\\\": \\\"\\\", \\\"L\\\": \\\"false\\\", \\\"Q\\\": \\\"false\\\", \\\"Y\\\": \\\"false\\\", \\\"c\\\": \\\"false\\\", \\\"cs\\\": null, \\\"eqx\\\": \\\"false\\\", \\\"output_format\\\": \\\"BAM\\\"}\", \"mapping_options\": \"{\\\"F\\\": \\\"\\\", \\\"G\\\": \\\"\\\", \\\"N\\\": \\\"\\\", \\\"X\\\": \\\"false\\\", \\\"f\\\": \\\"\\\", \\\"g\\\": \\\"\\\", \\\"m\\\": \\\"\\\", \\\"min_occ_floor\\\": \\\"\\\", \\\"n\\\": \\\"\\\", \\\"p\\\": \\\"\\\", \\\"r\\\": \\\"\\\"}\", \"fastq_input\": \"{\\\"__current_case__\\\": 1, \\\"fastq_input1\\\": {\\\"__class__\\\": \\\"ConnectedValue\\\"}, \\\"fastq_input_selector\\\": \\\"single\\\"}\", \"reference_source\": \"{\\\"__current_case__\\\": 0, \\\"ref_file\\\": \\\"hg38\\\", \\\"reference_source_selector\\\": \\\"cached\\\"}\", \"indexing_options\": \"{\\\"H\\\": \\\"false\\\", \\\"I\\\": \\\"\\\", \\\"k\\\": \\\"\\\", \\\"w\\\": \\\"\\\"}\"}",
+ "tool_version": "2.17+galaxy1",
+ "type": "tool",
+ "uuid": "098d2283-1218-489b-9882-8822e79d9444",
+ "workflow_outputs": [
+ {
+ "label": null,
+ "output_name": "alignment_output",
+ "uuid": "9972015d-4bdc-440f-b8b6-84e92a64ef8f"
+ }
+ ]
+ },
+ "5": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/multiqc/multiqc/1.7",
+ "errors": null,
+ "id": 5,
+ "input_connections": {
+ "results_0|software_cond|input": {
+ "id": 2,
+ "output_name": "report_json"
+ }
+ },
+ "inputs": [],
+ "label": null,
+ "name": "MultiQC",
+ "outputs": [
+ {
+ "name": "stats",
+ "type": "input"
+ },
+ {
+ "name": "html_report",
+ "type": "html"
+ }
+ ],
+ "position": {
+ "left": 938.699951171875,
+ "top": 250
+ },
+ "post_job_actions": {
+ "HideDatasetActionstats": {
+ "action_arguments": {},
+ "action_type": "HideDatasetAction",
+ "output_name": "stats"
+ }
+ },
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/multiqc/multiqc/1.7",
+ "tool_shed_repository": {
+ "changeset_revision": "b2f1f75d49c4",
+ "name": "multiqc",
+ "owner": "iuc",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"comment\": \"\\\"\\\"\", \"__page__\": null, \"title\": \"\\\"\\\"\", \"__rerun_remap_job_id__\": null, \"results\": \"[{\\\"__index__\\\": 0, \\\"software_cond\\\": {\\\"__current_case__\\\": 7, \\\"input\\\": {\\\"__class__\\\": \\\"ConnectedValue\\\"}, \\\"software\\\": \\\"fastp\\\"}}]\", \"saveLog\": \"\\\"false\\\"\"}",
+ "tool_version": "1.7",
+ "type": "tool",
+ "uuid": "73566697-4a46-4d35-b8e2-61770acc62c0",
+ "workflow_outputs": [
+ {
+ "label": null,
+ "output_name": "html_report",
+ "uuid": "15f42a94-2c97-4932-8e3f-f26a72b58be4"
+ }
+ ]
+ },
+ "6": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/devteam/bowtie2/bowtie2/2.3.4.3+galaxy0",
+ "errors": null,
+ "id": 6,
+ "input_connections": {
+ "library|input_1": {
+ "id": 2,
+ "output_name": "output_paired_coll"
+ }
+ },
+ "inputs": [],
+ "label": null,
+ "name": "Bowtie2",
+ "outputs": [
+ {
+ "name": "output",
+ "type": "bam"
+ },
+ {
+ "name": "mapping_stats",
+ "type": "txt"
+ }
+ ],
+ "position": {
+ "left": 913.9166259765625,
+ "top": 564.6333312988281
+ },
+ "post_job_actions": {},
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/bowtie2/bowtie2/2.3.4.3+galaxy0",
+ "tool_shed_repository": {
+ "changeset_revision": "749c918495f7",
+ "name": "bowtie2",
+ "owner": "devteam",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"sam_options\": \"{\\\"__current_case__\\\": 1, \\\"sam_options_selector\\\": \\\"no\\\"}\", \"__page__\": null, \"__rerun_remap_job_id__\": null, \"library\": \"{\\\"__current_case__\\\": 2, \\\"aligned_file\\\": \\\"false\\\", \\\"input_1\\\": {\\\"__class__\\\": \\\"ConnectedValue\\\"}, \\\"paired_options\\\": {\\\"__current_case__\\\": 1, \\\"paired_options_selector\\\": \\\"no\\\"}, \\\"type\\\": \\\"paired_collection\\\", \\\"unaligned_file\\\": \\\"false\\\"}\", \"reference_genome\": \"{\\\"__current_case__\\\": 0, \\\"index\\\": \\\"hg38full\\\", \\\"source\\\": \\\"indexed\\\"}\", \"rg\": \"{\\\"__current_case__\\\": 3, \\\"rg_selector\\\": \\\"do_not_set\\\"}\", \"save_mapping_stats\": \"\\\"true\\\"\", \"analysis_type\": \"{\\\"__current_case__\\\": 0, \\\"analysis_type_selector\\\": \\\"simple\\\", \\\"presets\\\": \\\"no_presets\\\"}\"}",
+ "tool_version": "2.3.4.3+galaxy0",
+ "type": "tool",
+ "uuid": "43331d2c-7116-410e-a6bd-7e7520d93a52",
+ "workflow_outputs": [
+ {
+ "label": null,
+ "output_name": "output",
+ "uuid": "eeae40f6-f1b2-4c56-9860-ac7b8eb1dec0"
+ },
+ {
+ "label": null,
+ "output_name": "mapping_stats",
+ "uuid": "c567226e-efaf-4188-bdec-3ba20d0733b0"
+ }
+ ]
+ },
+ "7": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/samtools_view/samtools_view/1.9+galaxy1",
+ "errors": null,
+ "id": 7,
+ "input_connections": {
+ "input": {
+ "id": 4,
+ "output_name": "alignment_output"
+ }
+ },
+ "inputs": [],
+ "label": null,
+ "name": "Samtools view",
+ "outputs": [
+ {
+ "name": "outputsam",
+ "type": "input"
+ }
+ ],
+ "position": {
+ "left": 954.63330078125,
+ "top": 961.4166717529297
+ },
+ "post_job_actions": {
+ "HideDatasetActionoutputsam": {
+ "action_arguments": {},
+ "action_type": "HideDatasetAction",
+ "output_name": "outputsam"
+ }
+ },
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/samtools_view/samtools_view/1.9+galaxy1",
+ "tool_shed_repository": {
+ "changeset_revision": "b01db2684fa5",
+ "name": "samtools_view",
+ "owner": "iuc",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"addref_cond\": \"{\\\"__current_case__\\\": 0, \\\"addref_select\\\": \\\"no\\\"}\", \"input\": \"{\\\"__class__\\\": \\\"ConnectedValue\\\"}\", \"__rerun_remap_job_id__\": null, \"mode\": \"{\\\"__current_case__\\\": 1, \\\"filter_config\\\": {\\\"cigarcons\\\": \\\"\\\", \\\"cond_region\\\": {\\\"__current_case__\\\": 0, \\\"select_region\\\": \\\"no\\\"}, \\\"cond_rg\\\": {\\\"__current_case__\\\": 0, \\\"select_rg\\\": \\\"no\\\"}, \\\"exclusive_filter\\\": null, \\\"exclusive_filter_all\\\": null, \\\"inclusive_filter\\\": [\\\"4\\\"], \\\"library\\\": \\\"\\\", \\\"quality\\\": \\\"\\\"}, \\\"output_options\\\": {\\\"__current_case__\\\": 0, \\\"adv_output\\\": {\\\"collapsecigar\\\": \\\"false\\\", \\\"readtags\\\": []}, \\\"complementary_output\\\": \\\"false\\\", \\\"output_format\\\": {\\\"__current_case__\\\": 1, \\\"fmtopt\\\": \\\"-b\\\", \\\"oformat\\\": \\\"bam\\\"}, \\\"reads_report_type\\\": \\\"retained\\\"}, \\\"outtype\\\": \\\"selected_reads\\\", \\\"subsample_config\\\": {\\\"subsampling_mode\\\": {\\\"__current_case__\\\": 0, \\\"factor\\\": \\\"1.0\\\", \\\"seed\\\": \\\"\\\", \\\"select_subsample\\\": \\\"fraction\\\"}}}\", \"__page__\": null}",
+ "tool_version": "1.9+galaxy1",
+ "type": "tool",
+ "uuid": "22379db6-5fca-40fb-9ce9-d59a1336c679",
+ "workflow_outputs": []
+ },
+ "8": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/devteam/samtools_stats/samtools_stats/2.0.2+galaxy2",
+ "errors": null,
+ "id": 8,
+ "input_connections": {
+ "input": {
+ "id": 4,
+ "output_name": "alignment_output"
+ }
+ },
+ "inputs": [],
+ "label": null,
+ "name": "Samtools stats",
+ "outputs": [
+ {
+ "name": "output_collection",
+ "type": "input"
+ }
+ ],
+ "position": {
+ "left": 1022.4166259765625,
+ "top": 1254.4166870117188
+ },
+ "post_job_actions": {},
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/samtools_stats/samtools_stats/2.0.2+galaxy2",
+ "tool_shed_repository": {
+ "changeset_revision": "145f6d74ff5e",
+ "name": "samtools_stats",
+ "owner": "devteam",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"__page__\": null, \"coverage_cond\": \"{\\\"__current_case__\\\": 0, \\\"coverage_select\\\": \\\"no\\\"}\", \"gc_depth\": \"\\\"\\\"\", \"cov_threshold\": \"\\\"\\\"\", \"most_inserts\": \"\\\"\\\"\", \"cond_region\": \"{\\\"__current_case__\\\": 0, \\\"select_region\\\": \\\"no\\\"}\", \"split_output_cond\": \"{\\\"__current_case__\\\": 1, \\\"generate_tables\\\": [\\\"SN\\\"], \\\"split_output_selector\\\": \\\"yes\\\"}\", \"read_length\": \"\\\"\\\"\", \"trim_quality\": \"\\\"\\\"\", \"remove_overlaps\": \"\\\"false\\\"\", \"filter_by_flags\": \"{\\\"__current_case__\\\": 1, \\\"filter_flags\\\": \\\"nofilter\\\"}\", \"sparse\": \"\\\"false\\\"\", \"addref_cond\": \"{\\\"__current_case__\\\": 0, \\\"addref_select\\\": \\\"no\\\"}\", \"input\": \"{\\\"__class__\\\": \\\"ConnectedValue\\\"}\", \"insert_size\": \"\\\"\\\"\", \"__rerun_remap_job_id__\": null, \"remove_dups\": \"\\\"false\\\"\"}",
+ "tool_version": "2.0.2+galaxy2",
+ "type": "tool",
+ "uuid": "15f6252f-79d4-4319-855d-09665deac6a3",
+ "workflow_outputs": [
+ {
+ "label": null,
+ "output_name": "output_collection",
+ "uuid": "faed6306-eed6-40bd-ac9f-9fb3a4e980ea"
+ }
+ ]
+ },
+ "9": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/samtools_view/samtools_view/1.9+galaxy1",
+ "errors": null,
+ "id": 9,
+ "input_connections": {
+ "input": {
+ "id": 6,
+ "output_name": "output"
+ }
+ },
+ "inputs": [],
+ "label": null,
+ "name": "Samtools view",
+ "outputs": [
+ {
+ "name": "outputsam",
+ "type": "input"
+ }
+ ],
+ "position": {
+ "left": 1221.63330078125,
+ "top": 531.4166717529297
+ },
+ "post_job_actions": {
+ "HideDatasetActionoutputsam": {
+ "action_arguments": {},
+ "action_type": "HideDatasetAction",
+ "output_name": "outputsam"
+ }
+ },
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/samtools_view/samtools_view/1.9+galaxy1",
+ "tool_shed_repository": {
+ "changeset_revision": "b01db2684fa5",
+ "name": "samtools_view",
+ "owner": "iuc",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"addref_cond\": \"{\\\"__current_case__\\\": 0, \\\"addref_select\\\": \\\"no\\\"}\", \"input\": \"{\\\"__class__\\\": \\\"ConnectedValue\\\"}\", \"__rerun_remap_job_id__\": null, \"mode\": \"{\\\"__current_case__\\\": 1, \\\"filter_config\\\": {\\\"cigarcons\\\": \\\"\\\", \\\"cond_region\\\": {\\\"__current_case__\\\": 0, \\\"select_region\\\": \\\"no\\\"}, \\\"cond_rg\\\": {\\\"__current_case__\\\": 0, \\\"select_rg\\\": \\\"no\\\"}, \\\"exclusive_filter\\\": null, \\\"exclusive_filter_all\\\": null, \\\"inclusive_filter\\\": [\\\"4\\\", \\\"8\\\"], \\\"library\\\": \\\"\\\", \\\"quality\\\": \\\"\\\"}, \\\"output_options\\\": {\\\"__current_case__\\\": 0, \\\"adv_output\\\": {\\\"collapsecigar\\\": \\\"false\\\", \\\"readtags\\\": []}, \\\"complementary_output\\\": \\\"false\\\", \\\"output_format\\\": {\\\"__current_case__\\\": 1, \\\"fmtopt\\\": \\\"-b\\\", \\\"oformat\\\": \\\"bam\\\"}, \\\"reads_report_type\\\": \\\"retained\\\"}, \\\"outtype\\\": \\\"selected_reads\\\", \\\"subsample_config\\\": {\\\"subsampling_mode\\\": {\\\"__current_case__\\\": 0, \\\"factor\\\": \\\"1.0\\\", \\\"seed\\\": \\\"\\\", \\\"select_subsample\\\": \\\"fraction\\\"}}}\", \"__page__\": null}",
+ "tool_version": "1.9+galaxy1",
+ "type": "tool",
+ "uuid": "cca12f9c-c2c5-4238-b396-fc8f1fdd9ad7",
+ "workflow_outputs": []
+ }
+ },
+ "tags": [
+ "GTN",
+ "assembly"
+ ],
+ "uuid": "4041a0cb-dfb6-439e-9ca9-e36bf9a0cc88",
+ "version": 5
+}
diff --git a/topics/assembly/tutorials/assembly-with-preprocessing/workflows/assembly_with_preprocessing_and_sra_download.ga b/topics/assembly/tutorials/assembly-with-preprocessing/workflows/assembly_with_preprocessing_and_sra_download.ga
new file mode 100644
index 00000000000000..856d711c005231
--- /dev/null
+++ b/topics/assembly/tutorials/assembly-with-preprocessing/workflows/assembly_with_preprocessing_and_sra_download.ga
@@ -0,0 +1,1228 @@
+{
+ "a_galaxy_workflow": "true",
+ "annotation": "Assembly with preprocessing and SRA download",
+ "format-version": "0.1",
+ "name": "assembly_with_preprocessing_and_sra_download",
+ "steps": {
+ "0": {
+ "annotation": "",
+ "content_id": null,
+ "errors": null,
+ "id": 0,
+ "input_connections": {},
+ "inputs": [],
+ "label": "List of Illumina accessions",
+ "name": "Input dataset",
+ "outputs": [],
+ "position": {
+ "left": 257.683349609375,
+ "top": 215.9166259765625
+ },
+ "tool_id": null,
+ "tool_state": "{\"optional\": false}",
+ "tool_version": null,
+ "type": "data_input",
+ "uuid": "1a61a3b3-4515-47a1-8ee5-fb79f05a8dfa",
+ "workflow_outputs": [
+ {
+ "label": null,
+ "output_name": "output",
+ "uuid": "ea340c2d-d9e8-4ea5-bbc0-7ee7d92a57bf"
+ }
+ ]
+ },
+ "1": {
+ "annotation": "",
+ "content_id": null,
+ "errors": null,
+ "id": 1,
+ "input_connections": {},
+ "inputs": [],
+ "label": "List of ONT accessions",
+ "name": "Input dataset",
+ "outputs": [],
+ "position": {
+ "left": 264.9833984375,
+ "top": 522.7166442871094
+ },
+ "tool_id": null,
+ "tool_state": "{\"optional\": false}",
+ "tool_version": null,
+ "type": "data_input",
+ "uuid": "0988bf07-dc80-4d7b-a62f-8e5e852b0982",
+ "workflow_outputs": [
+ {
+ "label": null,
+ "output_name": "output",
+ "uuid": "fb053f14-71fc-45ac-83ea-a308a5c22918"
+ }
+ ]
+ },
+ "10": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/devteam/samtools_stats/samtools_stats/2.0.2+galaxy2",
+ "errors": null,
+ "id": 10,
+ "input_connections": {
+ "input": {
+ "id": 6,
+ "output_name": "alignment_output"
+ }
+ },
+ "inputs": [],
+ "label": null,
+ "name": "Samtools stats",
+ "outputs": [
+ {
+ "name": "output_collection",
+ "type": "input"
+ }
+ ],
+ "position": {
+ "left": 1328.4166259765625,
+ "top": 1239.4166259765625
+ },
+ "post_job_actions": {},
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/samtools_stats/samtools_stats/2.0.2+galaxy2",
+ "tool_shed_repository": {
+ "changeset_revision": "145f6d74ff5e",
+ "name": "samtools_stats",
+ "owner": "devteam",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"__page__\": null, \"coverage_cond\": \"{\\\"__current_case__\\\": 0, \\\"coverage_select\\\": \\\"no\\\"}\", \"gc_depth\": \"\\\"\\\"\", \"cov_threshold\": \"\\\"\\\"\", \"most_inserts\": \"\\\"\\\"\", \"cond_region\": \"{\\\"__current_case__\\\": 0, \\\"select_region\\\": \\\"no\\\"}\", \"split_output_cond\": \"{\\\"__current_case__\\\": 1, \\\"generate_tables\\\": [\\\"SN\\\"], \\\"split_output_selector\\\": \\\"yes\\\"}\", \"read_length\": \"\\\"\\\"\", \"trim_quality\": \"\\\"\\\"\", \"remove_overlaps\": \"\\\"false\\\"\", \"filter_by_flags\": \"{\\\"__current_case__\\\": 1, \\\"filter_flags\\\": \\\"nofilter\\\"}\", \"sparse\": \"\\\"false\\\"\", \"addref_cond\": \"{\\\"__current_case__\\\": 0, \\\"addref_select\\\": \\\"no\\\"}\", \"input\": \"{\\\"__class__\\\": \\\"ConnectedValue\\\"}\", \"insert_size\": \"\\\"\\\"\", \"__rerun_remap_job_id__\": null, \"remove_dups\": \"\\\"false\\\"\"}",
+ "tool_version": "2.0.2+galaxy2",
+ "type": "tool",
+ "uuid": "ae176340-d7da-4c2e-851b-e6129a0c222d",
+ "workflow_outputs": [
+ {
+ "label": null,
+ "output_name": "output_collection",
+ "uuid": "8652ea8a-35f5-4531-8b89-88bca189ab9f"
+ }
+ ]
+ },
+ "11": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/samtools_view/samtools_view/1.9+galaxy1",
+ "errors": null,
+ "id": 11,
+ "input_connections": {
+ "input": {
+ "id": 8,
+ "output_name": "output"
+ }
+ },
+ "inputs": [],
+ "label": null,
+ "name": "Samtools view",
+ "outputs": [
+ {
+ "name": "outputsam",
+ "type": "input"
+ }
+ ],
+ "position": {
+ "left": 1516.4166259765625,
+ "top": 531.4166412353516
+ },
+ "post_job_actions": {
+ "HideDatasetActionoutputsam": {
+ "action_arguments": {},
+ "action_type": "HideDatasetAction",
+ "output_name": "outputsam"
+ }
+ },
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/samtools_view/samtools_view/1.9+galaxy1",
+ "tool_shed_repository": {
+ "changeset_revision": "b01db2684fa5",
+ "name": "samtools_view",
+ "owner": "iuc",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"addref_cond\": \"{\\\"__current_case__\\\": 0, \\\"addref_select\\\": \\\"no\\\"}\", \"input\": \"{\\\"__class__\\\": \\\"ConnectedValue\\\"}\", \"__rerun_remap_job_id__\": null, \"mode\": \"{\\\"__current_case__\\\": 1, \\\"filter_config\\\": {\\\"cigarcons\\\": \\\"\\\", \\\"cond_region\\\": {\\\"__current_case__\\\": 0, \\\"select_region\\\": \\\"no\\\"}, \\\"cond_rg\\\": {\\\"__current_case__\\\": 0, \\\"select_rg\\\": \\\"no\\\"}, \\\"exclusive_filter\\\": null, \\\"exclusive_filter_all\\\": null, \\\"inclusive_filter\\\": [\\\"4\\\", \\\"8\\\"], \\\"library\\\": \\\"\\\", \\\"quality\\\": \\\"\\\"}, \\\"output_options\\\": {\\\"__current_case__\\\": 0, \\\"adv_output\\\": {\\\"collapsecigar\\\": \\\"false\\\", \\\"readtags\\\": []}, \\\"complementary_output\\\": \\\"false\\\", \\\"output_format\\\": {\\\"__current_case__\\\": 1, \\\"fmtopt\\\": \\\"-b\\\", \\\"oformat\\\": \\\"bam\\\"}, \\\"reads_report_type\\\": \\\"retained\\\"}, \\\"outtype\\\": \\\"selected_reads\\\", \\\"subsample_config\\\": {\\\"subsampling_mode\\\": {\\\"__current_case__\\\": 0, \\\"factor\\\": \\\"1.0\\\", \\\"seed\\\": \\\"\\\", \\\"select_subsample\\\": \\\"fraction\\\"}}}\", \"__page__\": null}",
+ "tool_version": "1.9+galaxy1",
+ "type": "tool",
+ "uuid": "cca12f9c-c2c5-4238-b396-fc8f1fdd9ad7",
+ "workflow_outputs": []
+ },
+ "12": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/samtools_fastx/samtools_fastx/1.9+galaxy1",
+ "errors": null,
+ "id": 12,
+ "input_connections": {
+ "input": {
+ "id": 9,
+ "output_name": "outputsam"
+ }
+ },
+ "inputs": [],
+ "label": null,
+ "name": "Samtools fastx",
+ "outputs": [
+ {
+ "name": "nonspecific",
+ "type": "fasta"
+ }
+ ],
+ "position": {
+ "left": 1628.4166259765625,
+ "top": 852.4166412353516
+ },
+ "post_job_actions": {},
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/samtools_fastx/samtools_fastx/1.9+galaxy1",
+ "tool_shed_repository": {
+ "changeset_revision": "a8d69aee190e",
+ "name": "samtools_fastx",
+ "owner": "iuc",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"copy_arb_tags\": \"\\\"\\\"\", \"__page__\": null, \"output_fmt_cond\": \"{\\\"__current_case__\\\": 2, \\\"default_quality\\\": \\\"\\\", \\\"ilumina_casava\\\": \\\"false\\\", \\\"output_fmt_select\\\": \\\"fastqsanger.gz\\\", \\\"output_quality\\\": \\\"false\\\"}\", \"idxout_cond\": \"{\\\"__current_case__\\\": 0, \\\"idxout_select\\\": \\\"no\\\"}\", \"read_numbering\": \"\\\"\\\"\", \"exclusive_filter\": \"null\", \"exclusive_filter_all\": \"null\", \"inclusive_filter\": \"null\", \"outputs\": \"[\\\"r0\\\"]\", \"input\": \"{\\\"__class__\\\": \\\"ConnectedValue\\\"}\", \"copy_tags\": \"\\\"false\\\"\", \"__rerun_remap_job_id__\": null}",
+ "tool_version": "1.9+galaxy1",
+ "type": "tool",
+ "uuid": "31e2b4e8-7631-46c0-8382-b805cee2b739",
+ "workflow_outputs": [
+ {
+ "label": null,
+ "output_name": "nonspecific",
+ "uuid": "cfbf839f-53f0-4a82-8966-2bdb6919de5e"
+ }
+ ]
+ },
+ "13": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/samtools_fastx/samtools_fastx/1.9+galaxy1",
+ "errors": null,
+ "id": 13,
+ "input_connections": {
+ "input": {
+ "id": 11,
+ "output_name": "outputsam"
+ }
+ },
+ "inputs": [],
+ "label": null,
+ "name": "Samtools fastx",
+ "outputs": [
+ {
+ "name": "forward",
+ "type": "fasta"
+ },
+ {
+ "name": "reverse",
+ "type": "fasta"
+ }
+ ],
+ "position": {
+ "left": 1828.4166870117188,
+ "top": 638.4166412353516
+ },
+ "post_job_actions": {
+ "HideDatasetActionforward": {
+ "action_arguments": {},
+ "action_type": "HideDatasetAction",
+ "output_name": "forward"
+ },
+ "HideDatasetActionreverse": {
+ "action_arguments": {},
+ "action_type": "HideDatasetAction",
+ "output_name": "reverse"
+ }
+ },
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/samtools_fastx/samtools_fastx/1.9+galaxy1",
+ "tool_shed_repository": {
+ "changeset_revision": "a8d69aee190e",
+ "name": "samtools_fastx",
+ "owner": "iuc",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"copy_arb_tags\": \"\\\"\\\"\", \"__page__\": null, \"output_fmt_cond\": \"{\\\"__current_case__\\\": 2, \\\"default_quality\\\": \\\"\\\", \\\"ilumina_casava\\\": \\\"false\\\", \\\"output_fmt_select\\\": \\\"fastqsanger.gz\\\", \\\"output_quality\\\": \\\"false\\\"}\", \"idxout_cond\": \"{\\\"__current_case__\\\": 0, \\\"idxout_select\\\": \\\"no\\\"}\", \"read_numbering\": \"\\\"\\\"\", \"exclusive_filter\": \"null\", \"exclusive_filter_all\": \"null\", \"inclusive_filter\": \"null\", \"outputs\": \"[\\\"r1\\\", \\\"r2\\\"]\", \"input\": \"{\\\"__class__\\\": \\\"ConnectedValue\\\"}\", \"copy_tags\": \"\\\"false\\\"\", \"__rerun_remap_job_id__\": null}",
+ "tool_version": "1.9+galaxy1",
+ "type": "tool",
+ "uuid": "a9623672-277b-4b9f-aa00-fd88cf5e55a1",
+ "workflow_outputs": []
+ },
+ "14": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/nml/collapse_collections/collapse_dataset/4.2",
+ "errors": null,
+ "id": 14,
+ "input_connections": {
+ "input_list": {
+ "id": 12,
+ "output_name": "nonspecific"
+ }
+ },
+ "inputs": [],
+ "label": null,
+ "name": "Collapse Collection",
+ "outputs": [
+ {
+ "name": "output",
+ "type": "input"
+ }
+ ],
+ "position": {
+ "left": 2010.4166870117188,
+ "top": 1015.4166412353516
+ },
+ "post_job_actions": {
+ "HideDatasetActionoutput": {
+ "action_arguments": {},
+ "action_type": "HideDatasetAction",
+ "output_name": "output"
+ }
+ },
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/nml/collapse_collections/collapse_dataset/4.2",
+ "tool_shed_repository": {
+ "changeset_revision": "830961c48e42",
+ "name": "collapse_collections",
+ "owner": "nml",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"__page__\": null, \"__rerun_remap_job_id__\": null, \"input_list\": \"{\\\"__class__\\\": \\\"ConnectedValue\\\"}\", \"one_header\": \"\\\"false\\\"\", \"filename\": \"{\\\"__current_case__\\\": 1, \\\"add_name\\\": \\\"false\\\"}\"}",
+ "tool_version": "4.2",
+ "type": "tool",
+ "uuid": "423fc320-f6b5-45fe-aabd-4046ff3cf54d",
+ "workflow_outputs": []
+ },
+ "15": {
+ "annotation": "",
+ "content_id": "__ZIP_COLLECTION__",
+ "errors": null,
+ "id": 15,
+ "input_connections": {
+ "input_forward": {
+ "id": 13,
+ "output_name": "forward"
+ },
+ "input_reverse": {
+ "id": 13,
+ "output_name": "reverse"
+ }
+ },
+ "inputs": [],
+ "label": null,
+ "name": "Zip Collection",
+ "outputs": [
+ {
+ "name": "output",
+ "type": "input"
+ }
+ ],
+ "position": {
+ "left": 2049.4166870117188,
+ "top": 477.4166259765625
+ },
+ "post_job_actions": {},
+ "tool_id": "__ZIP_COLLECTION__",
+ "tool_state": "{\"__page__\": null, \"__rerun_remap_job_id__\": null, \"input_reverse\": \"{\\\"__class__\\\": \\\"ConnectedValue\\\"}\", \"input_forward\": \"{\\\"__class__\\\": \\\"ConnectedValue\\\"}\"}",
+ "tool_version": "1.0.0",
+ "type": "tool",
+ "uuid": "9ba89e81-9c53-4f5b-9d61-42319b7d574c",
+ "workflow_outputs": [
+ {
+ "label": null,
+ "output_name": "output",
+ "uuid": "ebc8f563-c45e-4e17-a541-b1275e7f61ad"
+ }
+ ]
+ },
+ "16": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/nml/collapse_collections/collapse_dataset/4.2",
+ "errors": null,
+ "id": 16,
+ "input_connections": {
+ "input_list": {
+ "id": 13,
+ "output_name": "forward"
+ }
+ },
+ "inputs": [],
+ "label": null,
+ "name": "Collapse Collection",
+ "outputs": [
+ {
+ "name": "output",
+ "type": "input"
+ }
+ ],
+ "position": {
+ "left": 2132.4166870117188,
+ "top": 710.4166412353516
+ },
+ "post_job_actions": {
+ "HideDatasetActionoutput": {
+ "action_arguments": {},
+ "action_type": "HideDatasetAction",
+ "output_name": "output"
+ }
+ },
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/nml/collapse_collections/collapse_dataset/4.2",
+ "tool_shed_repository": {
+ "changeset_revision": "830961c48e42",
+ "name": "collapse_collections",
+ "owner": "nml",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"__page__\": null, \"__rerun_remap_job_id__\": null, \"input_list\": \"{\\\"__class__\\\": \\\"ConnectedValue\\\"}\", \"one_header\": \"\\\"false\\\"\", \"filename\": \"{\\\"__current_case__\\\": 1, \\\"add_name\\\": \\\"false\\\"}\"}",
+ "tool_version": "4.2",
+ "type": "tool",
+ "uuid": "6b7925f4-4706-4983-8840-0323858b6978",
+ "workflow_outputs": []
+ },
+ "17": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/nml/collapse_collections/collapse_dataset/4.2",
+ "errors": null,
+ "id": 17,
+ "input_connections": {
+ "input_list": {
+ "id": 13,
+ "output_name": "reverse"
+ }
+ },
+ "inputs": [],
+ "label": null,
+ "name": "Collapse Collection",
+ "outputs": [
+ {
+ "name": "output",
+ "type": "input"
+ }
+ ],
+ "position": {
+ "left": 2103.4166870117188,
+ "top": 860.4166412353516
+ },
+ "post_job_actions": {
+ "HideDatasetActionoutput": {
+ "action_arguments": {},
+ "action_type": "HideDatasetAction",
+ "output_name": "output"
+ }
+ },
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/nml/collapse_collections/collapse_dataset/4.2",
+ "tool_shed_repository": {
+ "changeset_revision": "830961c48e42",
+ "name": "collapse_collections",
+ "owner": "nml",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"__page__\": null, \"__rerun_remap_job_id__\": null, \"input_list\": \"{\\\"__class__\\\": \\\"ConnectedValue\\\"}\", \"one_header\": \"\\\"false\\\"\", \"filename\": \"{\\\"__current_case__\\\": 1, \\\"add_name\\\": \\\"false\\\"}\"}",
+ "tool_version": "4.2",
+ "type": "tool",
+ "uuid": "f53e47f8-969b-4948-b608-7fed0e95680d",
+ "workflow_outputs": []
+ },
+ "18": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/seqtk/seqtk_sample/1.3.2",
+ "errors": null,
+ "id": 18,
+ "input_connections": {
+ "in_file": {
+ "id": 16,
+ "output_name": "output"
+ }
+ },
+ "inputs": [
+ {
+ "description": "runtime parameter for tool seqtk_sample",
+ "name": "in_file"
+ }
+ ],
+ "label": null,
+ "name": "seqtk_sample",
+ "outputs": [
+ {
+ "name": "default",
+ "type": "input"
+ }
+ ],
+ "position": {
+ "left": 2438.0833282470703,
+ "top": 688.6833190917969
+ },
+ "post_job_actions": {},
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/seqtk/seqtk_sample/1.3.2",
+ "tool_shed_repository": {
+ "changeset_revision": "58c8ece95b53",
+ "name": "seqtk",
+ "owner": "iuc",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"__page__\": null, \"__rerun_remap_job_id__\": null, \"s\": \"\\\"4\\\"\", \"in_file\": \"{\\\"__class__\\\": \\\"RuntimeValue\\\"}\", \"subsample_size\": \"\\\"0.1\\\"\", \"advanced\": \"{\\\"single_pass_mode\\\": \\\"false\\\"}\"}",
+ "tool_version": "1.3.2",
+ "type": "tool",
+ "uuid": "b4e40092-e076-4eb0-af30-c8c47885fefa",
+ "workflow_outputs": [
+ {
+ "label": null,
+ "output_name": "default",
+ "uuid": "cf3eb8ef-fce5-42af-a14d-60ff915fbff3"
+ }
+ ]
+ },
+ "19": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/seqtk/seqtk_sample/1.3.2",
+ "errors": null,
+ "id": 19,
+ "input_connections": {
+ "in_file": {
+ "id": 17,
+ "output_name": "output"
+ }
+ },
+ "inputs": [
+ {
+ "description": "runtime parameter for tool seqtk_sample",
+ "name": "in_file"
+ }
+ ],
+ "label": null,
+ "name": "seqtk_sample",
+ "outputs": [
+ {
+ "name": "default",
+ "type": "input"
+ }
+ ],
+ "position": {
+ "left": 2413.0833129882812,
+ "top": 864.6833190917969
+ },
+ "post_job_actions": {},
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/seqtk/seqtk_sample/1.3.2",
+ "tool_shed_repository": {
+ "changeset_revision": "58c8ece95b53",
+ "name": "seqtk",
+ "owner": "iuc",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"__page__\": null, \"__rerun_remap_job_id__\": null, \"s\": \"\\\"4\\\"\", \"in_file\": \"{\\\"__class__\\\": \\\"RuntimeValue\\\"}\", \"subsample_size\": \"\\\"0.1\\\"\", \"advanced\": \"{\\\"single_pass_mode\\\": \\\"false\\\"}\"}",
+ "tool_version": "1.3.2",
+ "type": "tool",
+ "uuid": "1ef9ca9e-e447-4933-8871-c0e8e9a953c4",
+ "workflow_outputs": [
+ {
+ "label": null,
+ "output_name": "default",
+ "uuid": "bf722cb7-0b06-42ed-8fbc-9dc3896e9340"
+ }
+ ]
+ },
+ "2": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/sra_tools/fasterq_dump/2.10.4+galaxy1",
+ "errors": null,
+ "id": 2,
+ "input_connections": {
+ "input|file_list": {
+ "id": 0,
+ "output_name": "output"
+ }
+ },
+ "inputs": [],
+ "label": "Illumina data",
+ "name": "Faster Download and Extract Reads in FASTQ",
+ "outputs": [
+ {
+ "name": "list_paired",
+ "type": "input"
+ },
+ {
+ "name": "output_collection",
+ "type": "input"
+ },
+ {
+ "name": "output_collection_other",
+ "type": "input"
+ },
+ {
+ "name": "log",
+ "type": "txt"
+ }
+ ],
+ "position": {
+ "left": 511.783447265625,
+ "top": 273.1666259765625
+ },
+ "post_job_actions": {
+ "HideDatasetActionlog": {
+ "action_arguments": {},
+ "action_type": "HideDatasetAction",
+ "output_name": "log"
+ },
+ "HideDatasetActionoutput_collection": {
+ "action_arguments": {},
+ "action_type": "HideDatasetAction",
+ "output_name": "output_collection"
+ },
+ "HideDatasetActionoutput_collection_other": {
+ "action_arguments": {},
+ "action_type": "HideDatasetAction",
+ "output_name": "output_collection_other"
+ }
+ },
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/sra_tools/fasterq_dump/2.10.4+galaxy1",
+ "tool_shed_repository": {
+ "changeset_revision": "aad3885b3216",
+ "name": "sra_tools",
+ "owner": "iuc",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"adv\": \"{\\\"minlen\\\": \\\"\\\", \\\"skip_technical\\\": \\\"true\\\", \\\"split\\\": \\\"--split-3\\\"}\", \"input\": \"{\\\"__current_case__\\\": 2, \\\"file_list\\\": {\\\"__class__\\\": \\\"ConnectedValue\\\"}, \\\"input_select\\\": \\\"file_list\\\"}\", \"__rerun_remap_job_id__\": null, \"__page__\": null}",
+ "tool_version": "2.10.4+galaxy1",
+ "type": "tool",
+ "uuid": "b097b5cb-44a8-4881-9ba0-a089ef1b4ad6",
+ "workflow_outputs": [
+ {
+ "label": null,
+ "output_name": "list_paired",
+ "uuid": "c6b4e7f2-5cf4-4051-afd1-a110381c4679"
+ }
+ ]
+ },
+ "20": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/unicycler/unicycler/0.4.8.0",
+ "errors": null,
+ "id": 20,
+ "input_connections": {
+ "long": {
+ "id": 14,
+ "output_name": "output"
+ },
+ "paired_unpaired|fastq_input1": {
+ "id": 18,
+ "output_name": "default"
+ },
+ "paired_unpaired|fastq_input2": {
+ "id": 19,
+ "output_name": "default"
+ }
+ },
+ "inputs": [
+ {
+ "description": "runtime parameter for tool Create assemblies with Unicycler",
+ "name": "lr_align"
+ },
+ {
+ "description": "runtime parameter for tool Create assemblies with Unicycler",
+ "name": "long"
+ },
+ {
+ "description": "runtime parameter for tool Create assemblies with Unicycler",
+ "name": "paired_unpaired"
+ },
+ {
+ "description": "runtime parameter for tool Create assemblies with Unicycler",
+ "name": "paired_unpaired"
+ },
+ {
+ "description": "runtime parameter for tool Create assemblies with Unicycler",
+ "name": "rotation"
+ }
+ ],
+ "label": null,
+ "name": "Create assemblies with Unicycler",
+ "outputs": [
+ {
+ "name": "assembly_graph",
+ "type": "tabular"
+ },
+ {
+ "name": "assembly",
+ "type": "fasta"
+ }
+ ],
+ "position": {
+ "left": 2673.4166717529297,
+ "top": 699.4166412353516
+ },
+ "post_job_actions": {},
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/unicycler/unicycler/0.4.8.0",
+ "tool_shed_repository": {
+ "changeset_revision": "88c240872a65",
+ "name": "unicycler",
+ "owner": "iuc",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"linear_seqs\": \"\\\"1\\\"\", \"spades\": \"{\\\"depth_filter\\\": \\\"0.25\\\", \\\"kmer_count\\\": \\\"10\\\", \\\"kmers\\\": \\\"\\\", \\\"largest_component\\\": \\\"false\\\", \\\"max_kmer_frac\\\": \\\"0.95\\\", \\\"min_kmer_frac\\\": \\\"0.2\\\", \\\"no_correct\\\": \\\"false\\\"}\", \"graph_clean\": \"{\\\"min_component_size\\\": \\\"1000\\\", \\\"min_dead_end_size\\\": \\\"1000\\\"}\", \"__page__\": null, \"min_anchor_seg_len\": \"\\\"\\\"\", \"__rerun_remap_job_id__\": null, \"lr_align\": \"{\\\"contamination\\\": {\\\"__class__\\\": \\\"RuntimeValue\\\"}, \\\"low_score\\\": \\\"\\\", \\\"scores\\\": \\\"3,-6,-5,-2\\\"}\", \"long\": \"{\\\"__class__\\\": \\\"RuntimeValue\\\"}\", \"paired_unpaired\": \"{\\\"__current_case__\\\": 0, \\\"fastq_input1\\\": {\\\"__class__\\\": \\\"RuntimeValue\\\"}, \\\"fastq_input2\\\": {\\\"__class__\\\": \\\"RuntimeValue\\\"}, \\\"fastq_input_selector\\\": \\\"paired\\\"}\", \"min_fasta_length\": \"\\\"100\\\"\", \"mode\": \"\\\"normal\\\"\", \"rotation\": \"{\\\"no_rotate\\\": \\\"false\\\", \\\"start_gene_cov\\\": \\\"95.0\\\", \\\"start_gene_id\\\": \\\"90.0\\\", \\\"start_genes\\\": {\\\"__class__\\\": \\\"RuntimeValue\\\"}}\", \"pilon\": \"{\\\"min_polish_size\\\": \\\"1000\\\", \\\"no_pilon\\\": \\\"false\\\"}\"}",
+ "tool_version": "0.4.8.0",
+ "type": "tool",
+ "uuid": "851c6be6-c202-450a-9cd3-19e205dcc496",
+ "workflow_outputs": [
+ {
+ "label": null,
+ "output_name": "assembly",
+ "uuid": "17700863-7cc5-4a67-ac0f-aa5f7b258d62"
+ },
+ {
+ "label": null,
+ "output_name": "assembly_graph",
+ "uuid": "3285cc00-6b39-4f3d-87a5-87abd483ede6"
+ }
+ ]
+ },
+ "21": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/bandage/bandage_info/0.8.1+galaxy1",
+ "errors": null,
+ "id": 21,
+ "input_connections": {
+ "input_file": {
+ "id": 20,
+ "output_name": "assembly_graph"
+ }
+ },
+ "inputs": [],
+ "label": null,
+ "name": "Bandage Info",
+ "outputs": [
+ {
+ "name": "outfile",
+ "type": "txt"
+ }
+ ],
+ "position": {
+ "left": 2977.4166870117188,
+ "top": 803.4166412353516
+ },
+ "post_job_actions": {},
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/bandage/bandage_info/0.8.1+galaxy1",
+ "tool_shed_repository": {
+ "changeset_revision": "b2860df42e16",
+ "name": "bandage",
+ "owner": "iuc",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"tsv\": \"\\\"false\\\"\", \"__page__\": null, \"__rerun_remap_job_id__\": null, \"input_file\": \"{\\\"__class__\\\": \\\"ConnectedValue\\\"}\"}",
+ "tool_version": "0.8.1+galaxy1",
+ "type": "tool",
+ "uuid": "4aa230bd-3269-41c3-81f5-9085fcc4001d",
+ "workflow_outputs": [
+ {
+ "label": null,
+ "output_name": "outfile",
+ "uuid": "cf5eab6a-97ae-430e-a27a-6ecc33d4b8c0"
+ }
+ ]
+ },
+ "22": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/bandage/bandage_image/0.8.1+galaxy2",
+ "errors": null,
+ "id": 22,
+ "input_connections": {
+ "input_file": {
+ "id": 20,
+ "output_name": "assembly_graph"
+ }
+ },
+ "inputs": [
+ {
+ "description": "runtime parameter for tool Bandage Image",
+ "name": "input_file"
+ }
+ ],
+ "label": null,
+ "name": "Bandage Image",
+ "outputs": [
+ {
+ "name": "outfile",
+ "type": "jpg"
+ }
+ ],
+ "position": {
+ "left": 2975.4166870117188,
+ "top": 1017.4166412353516
+ },
+ "post_job_actions": {},
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/bandage/bandage_image/0.8.1+galaxy2",
+ "tool_shed_repository": {
+ "changeset_revision": "b2860df42e16",
+ "name": "bandage",
+ "owner": "iuc",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"__page__\": null, \"input_file\": \"{\\\"__class__\\\": \\\"RuntimeValue\\\"}\", \"lengths\": \"\\\"true\\\"\", \"output_format\": \"\\\"jpg\\\"\", \"height\": \"\\\"1000\\\"\", \"width\": \"\\\"\\\"\", \"names\": \"\\\"true\\\"\", \"__rerun_remap_job_id__\": null}",
+ "tool_version": "0.8.1+galaxy2",
+ "type": "tool",
+ "uuid": "7683e21a-0fe1-4871-b044-7e74321fe44b",
+ "workflow_outputs": [
+ {
+ "label": null,
+ "output_name": "outfile",
+ "uuid": "12e0cc76-0374-4ce8-b882-f5f21a942ad1"
+ }
+ ]
+ },
+ "23": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/devteam/fasta_filter_by_length/fasta_filter_by_length/1.2",
+ "errors": null,
+ "id": 23,
+ "input_connections": {
+ "input": {
+ "id": 20,
+ "output_name": "assembly"
+ }
+ },
+ "inputs": [
+ {
+ "description": "runtime parameter for tool Filter sequences by length",
+ "name": "input"
+ }
+ ],
+ "label": null,
+ "name": "Filter sequences by length",
+ "outputs": [
+ {
+ "name": "output",
+ "type": "fasta"
+ }
+ ],
+ "position": {
+ "left": 2978.4166870117188,
+ "top": 1220.4166259765625
+ },
+ "post_job_actions": {},
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/fasta_filter_by_length/fasta_filter_by_length/1.2",
+ "tool_shed_repository": {
+ "changeset_revision": "8cacfcf96a52",
+ "name": "fasta_filter_by_length",
+ "owner": "devteam",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"__page__\": null, \"input\": \"{\\\"__class__\\\": \\\"RuntimeValue\\\"}\", \"__rerun_remap_job_id__\": null, \"max_length\": \"\\\"0\\\"\", \"min_length\": \"\\\"1000\\\"\"}",
+ "tool_version": "1.2",
+ "type": "tool",
+ "uuid": "ef08963b-e96b-4841-b1fd-02056f6ab1f2",
+ "workflow_outputs": [
+ {
+ "label": null,
+ "output_name": "output",
+ "uuid": "637c866b-dc17-4c32-aa1e-5207ae2c79a0"
+ }
+ ]
+ },
+ "3": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/sra_tools/fasterq_dump/2.10.4+galaxy1",
+ "errors": null,
+ "id": 3,
+ "input_connections": {
+ "input|file_list": {
+ "id": 1,
+ "output_name": "output"
+ }
+ },
+ "inputs": [],
+ "label": "ONT data",
+ "name": "Faster Download and Extract Reads in FASTQ",
+ "outputs": [
+ {
+ "name": "list_paired",
+ "type": "input"
+ },
+ {
+ "name": "output_collection",
+ "type": "input"
+ },
+ {
+ "name": "output_collection_other",
+ "type": "input"
+ },
+ {
+ "name": "log",
+ "type": "txt"
+ }
+ ],
+ "position": {
+ "left": 516.13330078125,
+ "top": 540.7166442871094
+ },
+ "post_job_actions": {
+ "HideDatasetActionlist_paired": {
+ "action_arguments": {},
+ "action_type": "HideDatasetAction",
+ "output_name": "list_paired"
+ },
+ "HideDatasetActionlog": {
+ "action_arguments": {},
+ "action_type": "HideDatasetAction",
+ "output_name": "log"
+ },
+ "HideDatasetActionoutput_collection_other": {
+ "action_arguments": {},
+ "action_type": "HideDatasetAction",
+ "output_name": "output_collection_other"
+ }
+ },
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/sra_tools/fasterq_dump/2.10.4+galaxy1",
+ "tool_shed_repository": {
+ "changeset_revision": "aad3885b3216",
+ "name": "sra_tools",
+ "owner": "iuc",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"adv\": \"{\\\"minlen\\\": \\\"\\\", \\\"skip_technical\\\": \\\"true\\\", \\\"split\\\": \\\"--split-3\\\"}\", \"input\": \"{\\\"__current_case__\\\": 2, \\\"file_list\\\": {\\\"__class__\\\": \\\"ConnectedValue\\\"}, \\\"input_select\\\": \\\"file_list\\\"}\", \"__rerun_remap_job_id__\": null, \"__page__\": null}",
+ "tool_version": "2.10.4+galaxy1",
+ "type": "tool",
+ "uuid": "2ea0a236-4ecc-43b4-a815-40f37bf1bd59",
+ "workflow_outputs": [
+ {
+ "label": null,
+ "output_name": "output_collection",
+ "uuid": "9cd44dd5-4375-4c34-9edc-804544192c34"
+ }
+ ]
+ },
+ "4": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/fastp/fastp/0.19.5+galaxy1",
+ "errors": null,
+ "id": 4,
+ "input_connections": {
+ "single_paired|paired_input": {
+ "id": 2,
+ "output_name": "list_paired"
+ }
+ },
+ "inputs": [
+ {
+ "description": "runtime parameter for tool fastp",
+ "name": "single_paired"
+ }
+ ],
+ "label": "fastp: Trimmed Illumina Reads",
+ "name": "fastp",
+ "outputs": [
+ {
+ "name": "output_paired_coll",
+ "type": "input"
+ },
+ {
+ "name": "report_json",
+ "type": "json"
+ }
+ ],
+ "position": {
+ "left": 879.7833251953125,
+ "top": 365.04998779296875
+ },
+ "post_job_actions": {
+ "HideDatasetActionreport_json": {
+ "action_arguments": {},
+ "action_type": "HideDatasetAction",
+ "output_name": "report_json"
+ }
+ },
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/fastp/fastp/0.19.5+galaxy1",
+ "tool_shed_repository": {
+ "changeset_revision": "1d8fe9bc4cb0",
+ "name": "fastp",
+ "owner": "iuc",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"__page__\": null, \"output_options\": \"{\\\"report_html\\\": \\\"false\\\", \\\"report_json\\\": \\\"true\\\"}\", \"single_paired\": \"{\\\"__current_case__\\\": 2, \\\"adapter_trimming_options\\\": {\\\"adapter_sequence1\\\": \\\"\\\", \\\"adapter_sequence2\\\": \\\"\\\", \\\"disable_adapter_trimming\\\": \\\"false\\\"}, \\\"global_trimming_options\\\": {\\\"trim_front1\\\": \\\"\\\", \\\"trim_front2\\\": \\\"\\\", \\\"trim_tail1\\\": \\\"\\\", \\\"trim_tail2\\\": \\\"\\\"}, \\\"paired_input\\\": {\\\"__class__\\\": \\\"RuntimeValue\\\"}, \\\"single_paired_selector\\\": \\\"paired_collection\\\"}\", \"read_mod_options\": \"{\\\"base_correction_options\\\": {\\\"correction\\\": \\\"false\\\"}, \\\"cutting_by_quality_options\\\": {\\\"cut_by_quality3\\\": \\\"false\\\", \\\"cut_by_quality5\\\": \\\"false\\\", \\\"cut_mean_quality\\\": \\\"\\\", \\\"cut_window_size\\\": \\\"\\\"}, \\\"polyg_tail_trimming\\\": {\\\"__current_case__\\\": 1, \\\"poly_g_min_len\\\": \\\"\\\", \\\"trimming_select\\\": \\\"\\\"}, \\\"polyx_tail_trimming\\\": {\\\"__current_case__\\\": 1, \\\"polyx_trimming_select\\\": \\\"\\\"}, \\\"umi_processing\\\": {\\\"umi\\\": \\\"false\\\", \\\"umi_len\\\": \\\"\\\", \\\"umi_loc\\\": \\\"\\\", \\\"umi_prefix\\\": \\\"\\\"}}\", \"overrepresented_sequence_analysis\": \"{\\\"overrepresentation_analysis\\\": \\\"false\\\", \\\"overrepresentation_sampling\\\": \\\"\\\"}\", \"filter_options\": \"{\\\"length_filtering_options\\\": {\\\"disable_length_filtering\\\": \\\"false\\\", \\\"length_required\\\": \\\"50\\\"}, \\\"low_complexity_filter\\\": {\\\"complexity_threshold\\\": \\\"\\\", \\\"enable_low_complexity_filter\\\": \\\"false\\\"}, \\\"quality_filtering_options\\\": {\\\"disable_quality_filtering\\\": \\\"false\\\", \\\"n_base_limit\\\": \\\"\\\", \\\"qualified_quality_phred\\\": \\\"20\\\", \\\"unqualified_percent_limit\\\": \\\"20\\\"}}\", \"__rerun_remap_job_id__\": null}",
+ "tool_version": "0.19.5+galaxy1",
+ "type": "tool",
+ "uuid": "7508c22b-dc6a-4071-8dd0-d5616718e62b",
+ "workflow_outputs": [
+ {
+ "label": null,
+ "output_name": "output_paired_coll",
+ "uuid": "a7be7eab-3e80-4be6-b1a4-ab303c314096"
+ }
+ ]
+ },
+ "5": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/nanoplot/nanoplot/1.25.0+galaxy1",
+ "errors": null,
+ "id": 5,
+ "input_connections": {
+ "mode|reads|files": {
+ "id": 3,
+ "output_name": "output_collection"
+ }
+ },
+ "inputs": [],
+ "label": null,
+ "name": "NanoPlot",
+ "outputs": [
+ {
+ "name": "output_html",
+ "type": "html"
+ },
+ {
+ "name": "nanostats",
+ "type": "txt"
+ },
+ {
+ "name": "nanostats_post_filtering",
+ "type": "txt"
+ },
+ {
+ "name": "read_length",
+ "type": "png"
+ },
+ {
+ "name": "log_read_length",
+ "type": "png"
+ }
+ ],
+ "position": {
+ "left": 924.7833251953125,
+ "top": 650.4666442871094
+ },
+ "post_job_actions": {
+ "HideDatasetActionlog_read_length": {
+ "action_arguments": {},
+ "action_type": "HideDatasetAction",
+ "output_name": "log_read_length"
+ },
+ "HideDatasetActionnanostats": {
+ "action_arguments": {},
+ "action_type": "HideDatasetAction",
+ "output_name": "nanostats"
+ },
+ "HideDatasetActionnanostats_post_filtering": {
+ "action_arguments": {},
+ "action_type": "HideDatasetAction",
+ "output_name": "nanostats_post_filtering"
+ },
+ "HideDatasetActionread_length": {
+ "action_arguments": {},
+ "action_type": "HideDatasetAction",
+ "output_name": "read_length"
+ }
+ },
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/nanoplot/nanoplot/1.25.0+galaxy1",
+ "tool_shed_repository": {
+ "changeset_revision": "645159bcee2d",
+ "name": "nanoplot",
+ "owner": "iuc",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"filter\": \"{\\\"alength\\\": \\\"false\\\", \\\"barcoded\\\": \\\"false\\\", \\\"downsample\\\": \\\"\\\", \\\"drop_outliers\\\": \\\"false\\\", \\\"loglength\\\": \\\"true\\\", \\\"maxlength\\\": \\\"\\\", \\\"minlength\\\": \\\"\\\", \\\"minqual\\\": \\\"\\\", \\\"percentqual\\\": \\\"false\\\", \\\"readtype\\\": null}\", \"__page__\": null, \"__rerun_remap_job_id__\": null, \"mode\": \"{\\\"__current_case__\\\": 0, \\\"choice\\\": \\\"batch\\\", \\\"reads\\\": {\\\"__current_case__\\\": 0, \\\"files\\\": {\\\"__class__\\\": \\\"ConnectedValue\\\"}, \\\"type\\\": \\\"fastq\\\"}}\", \"customization\": \"{\\\"N50\\\": \\\"false\\\", \\\"color\\\": null, \\\"format\\\": \\\"png\\\", \\\"noN50\\\": \\\"false\\\", \\\"plots\\\": null}\"}",
+ "tool_version": "1.25.0+galaxy1",
+ "type": "tool",
+ "uuid": "9b1d43ac-82e3-463f-9c88-08c8826a62c9",
+ "workflow_outputs": [
+ {
+ "label": null,
+ "output_name": "output_html",
+ "uuid": "cd8570c7-02db-479b-81a9-cd0d26643f71"
+ }
+ ]
+ },
+ "6": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/minimap2/minimap2/2.17+galaxy1",
+ "errors": null,
+ "id": 6,
+ "input_connections": {
+ "fastq_input|fastq_input1": {
+ "id": 3,
+ "output_name": "output_collection"
+ }
+ },
+ "inputs": [],
+ "label": null,
+ "name": "Map with minimap2",
+ "outputs": [
+ {
+ "name": "alignment_output",
+ "type": "bam"
+ }
+ ],
+ "position": {
+ "left": 1003.9833984375,
+ "top": 1131.9666442871094
+ },
+ "post_job_actions": {},
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/minimap2/minimap2/2.17+galaxy1",
+ "tool_shed_repository": {
+ "changeset_revision": "53c0b7a1a0c3",
+ "name": "minimap2",
+ "owner": "iuc",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"alignment_options\": \"{\\\"-O2\\\": \\\"\\\", \\\"A\\\": \\\"\\\", \\\"B\\\": \\\"\\\", \\\"E\\\": \\\"\\\", \\\"E2\\\": \\\"\\\", \\\"O\\\": \\\"\\\", \\\"s\\\": \\\"\\\", \\\"u\\\": null, \\\"z\\\": \\\"\\\"}\", \"analysis_type_selector\": \"\\\"splice\\\"\", \"__page__\": null, \"__rerun_remap_job_id__\": null, \"io_options\": \"{\\\"K\\\": \\\"\\\", \\\"L\\\": \\\"false\\\", \\\"Q\\\": \\\"false\\\", \\\"Y\\\": \\\"false\\\", \\\"c\\\": \\\"false\\\", \\\"cs\\\": null, \\\"eqx\\\": \\\"false\\\", \\\"output_format\\\": \\\"BAM\\\"}\", \"mapping_options\": \"{\\\"F\\\": \\\"\\\", \\\"G\\\": \\\"\\\", \\\"N\\\": \\\"\\\", \\\"X\\\": \\\"false\\\", \\\"f\\\": \\\"\\\", \\\"g\\\": \\\"\\\", \\\"m\\\": \\\"\\\", \\\"min_occ_floor\\\": \\\"\\\", \\\"n\\\": \\\"\\\", \\\"p\\\": \\\"\\\", \\\"r\\\": \\\"\\\"}\", \"fastq_input\": \"{\\\"__current_case__\\\": 1, \\\"fastq_input1\\\": {\\\"__class__\\\": \\\"ConnectedValue\\\"}, \\\"fastq_input_selector\\\": \\\"single\\\"}\", \"reference_source\": \"{\\\"__current_case__\\\": 0, \\\"ref_file\\\": \\\"hg38\\\", \\\"reference_source_selector\\\": \\\"cached\\\"}\", \"indexing_options\": \"{\\\"H\\\": \\\"false\\\", \\\"I\\\": \\\"\\\", \\\"k\\\": \\\"\\\", \\\"w\\\": \\\"\\\"}\"}",
+ "tool_version": "2.17+galaxy1",
+ "type": "tool",
+ "uuid": "098d2283-1218-489b-9882-8822e79d9444",
+ "workflow_outputs": [
+ {
+ "label": null,
+ "output_name": "alignment_output",
+ "uuid": "9972015d-4bdc-440f-b8b6-84e92a64ef8f"
+ }
+ ]
+ },
+ "7": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/multiqc/multiqc/1.7",
+ "errors": null,
+ "id": 7,
+ "input_connections": {
+ "results_0|software_cond|input": {
+ "id": 4,
+ "output_name": "report_json"
+ }
+ },
+ "inputs": [],
+ "label": null,
+ "name": "MultiQC",
+ "outputs": [
+ {
+ "name": "stats",
+ "type": "input"
+ },
+ {
+ "name": "html_report",
+ "type": "html"
+ }
+ ],
+ "position": {
+ "left": 1152.4833984375,
+ "top": 200
+ },
+ "post_job_actions": {
+ "HideDatasetActionstats": {
+ "action_arguments": {},
+ "action_type": "HideDatasetAction",
+ "output_name": "stats"
+ }
+ },
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/multiqc/multiqc/1.7",
+ "tool_shed_repository": {
+ "changeset_revision": "b2f1f75d49c4",
+ "name": "multiqc",
+ "owner": "iuc",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"comment\": \"\\\"\\\"\", \"__page__\": null, \"title\": \"\\\"\\\"\", \"__rerun_remap_job_id__\": null, \"results\": \"[{\\\"__index__\\\": 0, \\\"software_cond\\\": {\\\"__current_case__\\\": 7, \\\"input\\\": {\\\"__class__\\\": \\\"ConnectedValue\\\"}, \\\"software\\\": \\\"fastp\\\"}}]\", \"saveLog\": \"\\\"false\\\"\"}",
+ "tool_version": "1.7",
+ "type": "tool",
+ "uuid": "73566697-4a46-4d35-b8e2-61770acc62c0",
+ "workflow_outputs": [
+ {
+ "label": null,
+ "output_name": "html_report",
+ "uuid": "15f42a94-2c97-4932-8e3f-f26a72b58be4"
+ }
+ ]
+ },
+ "8": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/devteam/bowtie2/bowtie2/2.3.4.3+galaxy0",
+ "errors": null,
+ "id": 8,
+ "input_connections": {
+ "library|input_1": {
+ "id": 4,
+ "output_name": "output_paired_coll"
+ }
+ },
+ "inputs": [],
+ "label": null,
+ "name": "Bowtie2",
+ "outputs": [
+ {
+ "name": "output",
+ "type": "bam"
+ },
+ {
+ "name": "mapping_stats",
+ "type": "txt"
+ }
+ ],
+ "position": {
+ "left": 1208.699951171875,
+ "top": 564.6333160400391
+ },
+ "post_job_actions": {},
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/bowtie2/bowtie2/2.3.4.3+galaxy0",
+ "tool_shed_repository": {
+ "changeset_revision": "749c918495f7",
+ "name": "bowtie2",
+ "owner": "devteam",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"sam_options\": \"{\\\"__current_case__\\\": 1, \\\"sam_options_selector\\\": \\\"no\\\"}\", \"__page__\": null, \"__rerun_remap_job_id__\": null, \"library\": \"{\\\"__current_case__\\\": 2, \\\"aligned_file\\\": \\\"false\\\", \\\"input_1\\\": {\\\"__class__\\\": \\\"ConnectedValue\\\"}, \\\"paired_options\\\": {\\\"__current_case__\\\": 1, \\\"paired_options_selector\\\": \\\"no\\\"}, \\\"type\\\": \\\"paired_collection\\\", \\\"unaligned_file\\\": \\\"false\\\"}\", \"reference_genome\": \"{\\\"__current_case__\\\": 0, \\\"index\\\": \\\"hg38full\\\", \\\"source\\\": \\\"indexed\\\"}\", \"rg\": \"{\\\"__current_case__\\\": 3, \\\"rg_selector\\\": \\\"do_not_set\\\"}\", \"save_mapping_stats\": \"\\\"true\\\"\", \"analysis_type\": \"{\\\"__current_case__\\\": 0, \\\"analysis_type_selector\\\": \\\"simple\\\", \\\"presets\\\": \\\"no_presets\\\"}\"}",
+ "tool_version": "2.3.4.3+galaxy0",
+ "type": "tool",
+ "uuid": "43331d2c-7116-410e-a6bd-7e7520d93a52",
+ "workflow_outputs": [
+ {
+ "label": null,
+ "output_name": "output",
+ "uuid": "eeae40f6-f1b2-4c56-9860-ac7b8eb1dec0"
+ },
+ {
+ "label": null,
+ "output_name": "mapping_stats",
+ "uuid": "c567226e-efaf-4188-bdec-3ba20d0733b0"
+ }
+ ]
+ },
+ "9": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/samtools_view/samtools_view/1.9+galaxy1",
+ "errors": null,
+ "id": 9,
+ "input_connections": {
+ "input": {
+ "id": 6,
+ "output_name": "alignment_output"
+ }
+ },
+ "inputs": [],
+ "label": null,
+ "name": "Samtools view",
+ "outputs": [
+ {
+ "name": "outputsam",
+ "type": "input"
+ }
+ ],
+ "position": {
+ "left": 1249.4166259765625,
+ "top": 961.4166412353516
+ },
+ "post_job_actions": {
+ "HideDatasetActionoutputsam": {
+ "action_arguments": {},
+ "action_type": "HideDatasetAction",
+ "output_name": "outputsam"
+ }
+ },
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/samtools_view/samtools_view/1.9+galaxy1",
+ "tool_shed_repository": {
+ "changeset_revision": "b01db2684fa5",
+ "name": "samtools_view",
+ "owner": "iuc",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"addref_cond\": \"{\\\"__current_case__\\\": 0, \\\"addref_select\\\": \\\"no\\\"}\", \"input\": \"{\\\"__class__\\\": \\\"ConnectedValue\\\"}\", \"__rerun_remap_job_id__\": null, \"mode\": \"{\\\"__current_case__\\\": 1, \\\"filter_config\\\": {\\\"cigarcons\\\": \\\"\\\", \\\"cond_region\\\": {\\\"__current_case__\\\": 0, \\\"select_region\\\": \\\"no\\\"}, \\\"cond_rg\\\": {\\\"__current_case__\\\": 0, \\\"select_rg\\\": \\\"no\\\"}, \\\"exclusive_filter\\\": null, \\\"exclusive_filter_all\\\": null, \\\"inclusive_filter\\\": [\\\"4\\\"], \\\"library\\\": \\\"\\\", \\\"quality\\\": \\\"\\\"}, \\\"output_options\\\": {\\\"__current_case__\\\": 0, \\\"adv_output\\\": {\\\"collapsecigar\\\": \\\"false\\\", \\\"readtags\\\": []}, \\\"complementary_output\\\": \\\"false\\\", \\\"output_format\\\": {\\\"__current_case__\\\": 1, \\\"fmtopt\\\": \\\"-b\\\", \\\"oformat\\\": \\\"bam\\\"}, \\\"reads_report_type\\\": \\\"retained\\\"}, \\\"outtype\\\": \\\"selected_reads\\\", \\\"subsample_config\\\": {\\\"subsampling_mode\\\": {\\\"__current_case__\\\": 0, \\\"factor\\\": \\\"1.0\\\", \\\"seed\\\": \\\"\\\", \\\"select_subsample\\\": \\\"fraction\\\"}}}\", \"__page__\": null}",
+ "tool_version": "1.9+galaxy1",
+ "type": "tool",
+ "uuid": "22379db6-5fca-40fb-9ce9-d59a1336c679",
+ "workflow_outputs": []
+ }
+ },
+ "tags": [
+ "GTN",
+ "assembly"
+ ],
+ "uuid": "d3a0ed14-e8eb-4c2a-b76f-20af5e91c46b",
+ "version": 9
+}
diff --git a/topics/assembly/tutorials/assembly-with-preprocessing/workflows/index.md b/topics/assembly/tutorials/assembly-with-preprocessing/workflows/index.md
new file mode 100644
index 00000000000000..e092e0ae66ddd4
--- /dev/null
+++ b/topics/assembly/tutorials/assembly-with-preprocessing/workflows/index.md
@@ -0,0 +1,3 @@
+---
+layout: workflow-list
+---
diff --git a/topics/assembly/tutorials/general-introduction/workflows/assembly-general-introduction.ga b/topics/assembly/tutorials/general-introduction/workflows/assembly-general-introduction.ga
index af93fd8d312224..ba703195904cdf 100644
--- a/topics/assembly/tutorials/general-introduction/workflows/assembly-general-introduction.ga
+++ b/topics/assembly/tutorials/general-introduction/workflows/assembly-general-introduction.ga
@@ -1,203 +1,546 @@
{
- "a_galaxy_workflow": "true",
- "annotation": "Introduction to Genome Assembly",
- "format-version": "0.1",
- "name": "Assembly General Intro",
- "steps": {
- "0": {
- "annotation": "",
- "content_id": null,
- "errors": null,
- "id": 0,
- "input_connections": {},
- "inputs": [
- {
- "description": "",
- "name": "mutant_R1.fastq"
+ "a_galaxy_workflow": "true",
+ "annotation": "tutorial",
+ "format-version": "0.1",
+ "name": "Intro to Genome Assembly",
+ "steps": {
+ "0": {
+ "annotation": "",
+ "content_id": null,
+ "errors": null,
+ "id": 0,
+ "input_connections": {},
+ "inputs": [
+ {
+ "description": "",
+ "name": "mutant_R1.fastq"
+ }
+ ],
+ "label": "mutant_R1.fastq",
+ "name": "Input dataset",
+ "outputs": [],
+ "position": {
+ "left": 241,
+ "top": 367.6875
+ },
+ "tool_id": null,
+ "tool_state": "{\"optional\": false}",
+ "tool_version": null,
+ "type": "data_input",
+ "uuid": "ae357211-b491-4cab-98c2-11da142da53b",
+ "workflow_outputs": [
+ {
+ "label": null,
+ "output_name": "output",
+ "uuid": "6082dd9a-a97c-4f0e-a6b6-cf6d93bc1a56"
+ }
+ ]
+ },
+ "1": {
+ "annotation": "",
+ "content_id": null,
+ "errors": null,
+ "id": 1,
+ "input_connections": {},
+ "inputs": [
+ {
+ "description": "",
+ "name": "mutant_R2.fastq"
+ }
+ ],
+ "label": "mutant_R2.fastq",
+ "name": "Input dataset",
+ "outputs": [],
+ "position": {
+ "left": 197,
+ "top": 523.203125
+ },
+ "tool_id": null,
+ "tool_state": "{\"optional\": false}",
+ "tool_version": null,
+ "type": "data_input",
+ "uuid": "b614685b-d77b-4cb4-a1cf-745302274379",
+ "workflow_outputs": [
+ {
+ "label": null,
+ "output_name": "output",
+ "uuid": "9476849b-1cd4-41b8-91b9-72acbe6927e2"
+ }
+ ]
+ },
+ "2": {
+ "annotation": "",
+ "content_id": null,
+ "errors": null,
+ "id": 2,
+ "input_connections": {},
+ "inputs": [
+ {
+ "description": "",
+ "name": "Genome file"
+ }
+ ],
+ "label": "Genome file",
+ "name": "Input dataset",
+ "outputs": [],
+ "position": {
+ "left": 984,
+ "top": 651.75
+ },
+ "tool_id": null,
+ "tool_state": "{\"optional\": false}",
+ "tool_version": null,
+ "type": "data_input",
+ "uuid": "ee2978d6-4c6d-456d-9be0-66d0edb629e5",
+ "workflow_outputs": [
+ {
+ "label": null,
+ "output_name": "output",
+ "uuid": "6463d497-a5ad-4475-ae92-186dfe76c6bd"
+ }
+ ]
+ },
+ "3": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/devteam/fastqc/fastqc/0.72+galaxy1",
+ "errors": null,
+ "id": 3,
+ "input_connections": {
+ "input_file": {
+ "id": 0,
+ "output_name": "output"
+ }
+ },
+ "inputs": [
+ {
+ "description": "runtime parameter for tool FastQC",
+ "name": "limits"
+ },
+ {
+ "description": "runtime parameter for tool FastQC",
+ "name": "contaminants"
+ },
+ {
+ "description": "runtime parameter for tool FastQC",
+ "name": "adapters"
+ }
+ ],
+ "label": null,
+ "name": "FastQC",
+ "outputs": [
+ {
+ "name": "html_file",
+ "type": "html"
+ },
+ {
+ "name": "text_file",
+ "type": "txt"
+ }
+ ],
+ "position": {
+ "left": 618.890625,
+ "top": 200
+ },
+ "post_job_actions": {},
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/fastqc/fastqc/0.72+galaxy1",
+ "tool_shed_repository": {
+ "changeset_revision": "e7b2202befea",
+ "name": "fastqc",
+ "owner": "devteam",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"min_length\": \"\", \"kmers\": \"7\", \"limits\": {\"__class__\": \"RuntimeValue\"}, \"input_file\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null, \"contaminants\": {\"__class__\": \"RuntimeValue\"}, \"adapters\": {\"__class__\": \"RuntimeValue\"}, \"nogroup\": \"false\"}",
+ "tool_version": null,
+ "type": "tool",
+ "uuid": "3a5a44fd-e16b-4069-82f3-2cd7eabada8b",
+ "workflow_outputs": [
+ {
+ "label": null,
+ "output_name": "html_file",
+ "uuid": "22a51e6e-2e5f-4415-a127-e0b1c179a1b1"
+ },
+ {
+ "label": null,
+ "output_name": "text_file",
+ "uuid": "51ec8c05-eb47-4359-9120-0eca1a6d5c2f"
+ }
+ ]
+ },
+ "4": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/devteam/fastqc/fastqc/0.72+galaxy1",
+ "errors": null,
+ "id": 4,
+ "input_connections": {
+ "input_file": {
+ "id": 1,
+ "output_name": "output"
+ }
+ },
+ "inputs": [
+ {
+ "description": "runtime parameter for tool FastQC",
+ "name": "limits"
+ },
+ {
+ "description": "runtime parameter for tool FastQC",
+ "name": "contaminants"
+ },
+ {
+ "description": "runtime parameter for tool FastQC",
+ "name": "adapters"
+ }
+ ],
+ "label": null,
+ "name": "FastQC",
+ "outputs": [
+ {
+ "name": "html_file",
+ "type": "html"
+ },
+ {
+ "name": "text_file",
+ "type": "txt"
+ }
+ ],
+ "position": {
+ "left": 621.625,
+ "top": 481.03125
+ },
+ "post_job_actions": {
+ "HideDatasetActionhtml_file": {
+ "action_arguments": {},
+ "action_type": "HideDatasetAction",
+ "output_name": "html_file"
+ },
+ "HideDatasetActiontext_file": {
+ "action_arguments": {},
+ "action_type": "HideDatasetAction",
+ "output_name": "text_file"
+ }
+ },
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/fastqc/fastqc/0.72+galaxy1",
+ "tool_shed_repository": {
+ "changeset_revision": "e7b2202befea",
+ "name": "fastqc",
+ "owner": "devteam",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"min_length\": \"\", \"kmers\": \"7\", \"limits\": {\"__class__\": \"RuntimeValue\"}, \"input_file\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null, \"contaminants\": {\"__class__\": \"RuntimeValue\"}, \"adapters\": {\"__class__\": \"RuntimeValue\"}, \"nogroup\": \"false\"}",
+ "tool_version": null,
+ "type": "tool",
+ "uuid": "7ac25d0a-b294-4c4b-8b24-b5ed56f2f758",
+ "workflow_outputs": []
+ },
+ "5": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/devteam/fastq_paired_end_interlacer/fastq_paired_end_interlacer/1.2.0.1+galaxy0",
+ "errors": null,
+ "id": 5,
+ "input_connections": {
+ "reads|input1_file": {
+ "id": 0,
+ "output_name": "output"
+ },
+ "reads|input2_file": {
+ "id": 1,
+ "output_name": "output"
+ }
+ },
+ "inputs": [],
+ "label": null,
+ "name": "FASTQ interlacer",
+ "outputs": [
+ {
+ "name": "outfile_pairs",
+ "type": "input"
+ },
+ {
+ "name": "outfile_singles",
+ "type": "input"
+ }
+ ],
+ "position": {
+ "left": 450,
+ "top": 825
+ },
+ "post_job_actions": {},
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/fastq_paired_end_interlacer/fastq_paired_end_interlacer/1.2.0.1+galaxy0",
+ "tool_shed_repository": {
+ "changeset_revision": "2ccb8dcabddc",
+ "name": "fastq_paired_end_interlacer",
+ "owner": "devteam",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"__page__\": null, \"reads\": {\"input2_file\": {\"__class__\": \"ConnectedValue\"}, \"input1_file\": {\"__class__\": \"ConnectedValue\"}, \"__current_case__\": 0, \"reads_selector\": \"paired\"}, \"__rerun_remap_job_id__\": null}",
+ "tool_version": null,
+ "type": "tool",
+ "uuid": "af9b6352-c51e-4f0c-83f6-4f5b9272985a",
+ "workflow_outputs": [
+ {
+ "label": null,
+ "output_name": "outfile_pairs",
+ "uuid": "225edf5f-3b90-4f84-a38a-5c786000a3fc"
+ },
+ {
+ "label": null,
+ "output_name": "outfile_singles",
+ "uuid": "dae90093-ae96-4bb2-824d-c92b61585096"
+ }
+ ]
+ },
+ "6": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/multiqc/multiqc/1.7",
+ "errors": null,
+ "id": 6,
+ "input_connections": {
+ "results_0|software_cond|output_0|input": {
+ "id": 3,
+ "output_name": "text_file"
+ },
+ "results_0|software_cond|output_1|input": {
+ "id": 4,
+ "output_name": "text_file"
+ }
+ },
+ "inputs": [],
+ "label": null,
+ "name": "MultiQC",
+ "outputs": [
+ {
+ "name": "stats",
+ "type": "input"
+ },
+ {
+ "name": "html_report",
+ "type": "html"
+ }
+ ],
+ "position": {
+ "left": 963.890625,
+ "top": 340.6875
+ },
+ "post_job_actions": {},
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/multiqc/multiqc/1.7",
+ "tool_shed_repository": {
+ "changeset_revision": "b2f1f75d49c4",
+ "name": "multiqc",
+ "owner": "iuc",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"comment\": \"\", \"__page__\": null, \"title\": \"\", \"__rerun_remap_job_id__\": null, \"results\": [{\"__index__\": 0, \"software_cond\": {\"output\": [{\"__index__\": 0, \"type\": \"data\", \"input\": {\"__class__\": \"ConnectedValue\"}}, {\"__index__\": 1, \"type\": \"data\", \"input\": {\"__class__\": \"ConnectedValue\"}}], \"__current_case__\": 8, \"software\": \"fastqc\"}}], \"saveLog\": \"false\"}",
+ "tool_version": null,
+ "type": "tool",
+ "uuid": "95440a74-31d3-4b9c-8c8f-4e91fd54cf01",
+ "workflow_outputs": [
+ {
+ "label": null,
+ "output_name": "stats",
+ "uuid": "85e3704a-859d-4657-b3d7-9b67e4981d21"
+ },
+ {
+ "label": null,
+ "output_name": "html_report",
+ "uuid": "fab7e06a-604e-4432-b0ed-b63157a8643b"
+ }
+ ]
+ },
+ "7": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/devteam/velvet/velveth/1.2.10.1",
+ "errors": null,
+ "id": 7,
+ "input_connections": {
+ "inputs_0|input": {
+ "id": 5,
+ "output_name": "outfile_pairs"
+ }
+ },
+ "inputs": [],
+ "label": null,
+ "name": "velveth",
+ "outputs": [
+ {
+ "name": "out_file1",
+ "type": "velvet"
+ }
+ ],
+ "position": {
+ "left": 763,
+ "top": 877
+ },
+ "post_job_actions": {},
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/velvet/velveth/1.2.10.1",
+ "tool_shed_repository": {
+ "changeset_revision": "5da9a0e2fb2d",
+ "name": "velvet",
+ "owner": "devteam",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"__page__\": null, \"hash_length\": \"29\", \"__rerun_remap_job_id__\": null, \"strand_specific\": \"false\", \"inputs\": [{\"__index__\": 0, \"read_type\": \"-shortPaired\", \"input\": {\"__class__\": \"RuntimeValue\"}, \"file_format\": \"-fastq\"}]}",
+ "tool_version": null,
+ "type": "tool",
+ "uuid": "f98f0453-b473-4407-82da-8348aff285dd",
+ "workflow_outputs": [
+ {
+ "label": null,
+ "output_name": "out_file1",
+ "uuid": "607a214d-9137-4e73-9b73-954c79678937"
+ }
+ ]
+ },
+ "8": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/devteam/velvet/velvetg/1.2.10.1",
+ "errors": null,
+ "id": 8,
+ "input_connections": {
+ "input": {
+ "id": 7,
+ "output_name": "out_file1"
+ }
+ },
+ "inputs": [],
+ "label": null,
+ "name": "velvetg",
+ "outputs": [
+ {
+ "name": "stats",
+ "type": "tabular"
+ },
+ {
+ "name": "contigs",
+ "type": "fasta"
+ }
+ ],
+ "position": {
+ "left": 1018,
+ "top": 882
+ },
+ "post_job_actions": {},
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/velvet/velvetg/1.2.10.1",
+ "tool_shed_repository": {
+ "changeset_revision": "5da9a0e2fb2d",
+ "name": "velvet",
+ "owner": "devteam",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"__page__\": null, \"__rerun_remap_job_id__\": null, \"last_graph\": {\"generate_graph\": \"no\", \"__current_case__\": 0}, \"generate_amos\": {\"afg\": \"no\", \"__current_case__\": 0}, \"reads\": {\"paired\": \"yes\", \"options\": {\"__current_case__\": 0, \"advanced\": \"no\"}, \"__current_case__\": 1, \"ins_length\": \"-1\"}, \"unused_reads\": {\"generate_unused\": \"no\", \"__current_case__\": 0}, \"coverage\": {\"cutoff\": \"none\", \"__current_case__\": 0}, \"expected\": {\"__current_case__\": 0, \"coverage\": \"none\"}, \"input\": {\"__class__\": \"ConnectedValue\"}, \"read_trkg\": \"false\", \"contig_lgth\": {\"__current_case__\": 0, \"use_contig_lgth\": \"no\"}}",
+ "tool_version": null,
+ "type": "tool",
+ "uuid": "c34bb33c-0e8a-4698-a370-f8d8f96f0921",
+ "workflow_outputs": [
+ {
+ "label": null,
+ "output_name": "contigs",
+ "uuid": "b0cf3779-6199-42c9-963f-a733559f90bb"
+ },
+ {
+ "label": null,
+ "output_name": "stats",
+ "uuid": "54926701-ccae-4143-b649-b7e08e1c6a8e"
+ }
+ ]
+ },
+ "9": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/quast/quast/5.0.2+galaxy0",
+ "errors": null,
+ "id": 9,
+ "input_connections": {
+ "assembly|ref|r": {
+ "id": 2,
+ "output_name": "output"
+ },
+ "in|inputs": {
+ "id": 8,
+ "output_name": "contigs"
+ }
+ },
+ "inputs": [],
+ "label": null,
+ "name": "Quast",
+ "outputs": [
+ {
+ "name": "quast_tabular",
+ "type": "tabular"
+ },
+ {
+ "name": "report_html",
+ "type": "html"
+ },
+ {
+ "name": "report_pdf",
+ "type": "pdf"
+ },
+ {
+ "name": "log",
+ "type": "txt"
+ },
+ {
+ "name": "mis_ass",
+ "type": "tabular"
+ },
+ {
+ "name": "unalign",
+ "type": "tabular"
+ }
+ ],
+ "position": {
+ "left": 1390.5,
+ "top": 594.25
+ },
+ "post_job_actions": {
+ "HideDatasetActionlog": {
+ "action_arguments": {},
+ "action_type": "HideDatasetAction",
+ "output_name": "log"
+ },
+ "HideDatasetActionmis_ass": {
+ "action_arguments": {},
+ "action_type": "HideDatasetAction",
+ "output_name": "mis_ass"
+ },
+ "HideDatasetActionquast_tabular": {
+ "action_arguments": {},
+ "action_type": "HideDatasetAction",
+ "output_name": "quast_tabular"
+ },
+ "HideDatasetActionunalign": {
+ "action_arguments": {},
+ "action_type": "HideDatasetAction",
+ "output_name": "unalign"
+ }
+ },
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/quast/quast/5.0.2+galaxy0",
+ "tool_shed_repository": {
+ "changeset_revision": "59db8ea8c845",
+ "name": "quast",
+ "owner": "iuc",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"scaffold_gap_max_size\": \"1000\", \"skip_unaligned_mis_contigs\": \"true\", \"assembly\": {\"ref\": {\"operons\": {\"__class__\": \"RuntimeValue\"}, \"r\": {\"__class__\": \"ConnectedValue\"}, \"use_ref\": \"true\", \"features\": {\"__class__\": \"RuntimeValue\"}, \"__current_case__\": 0}, \"orga_type\": \"\", \"type\": \"genome\", \"__current_case__\": 0}, \"circos\": \"false\", \"contig_thresholds\": \"0,1000\", \"__page__\": null, \"strict_NA\": \"false\", \"__rerun_remap_job_id__\": null, \"genes\": {\"rna_finding\": \"false\", \"gene_finding\": {\"tool\": \"none\", \"__current_case__\": 0}, \"conserved_genes_finding\": \"false\"}, \"unaligned_part_size\": \"500\", \"al\": {\"upper_bound_assembly\": \"false\", \"min_identity\": \"95.0\", \"ambiguity_score\": \"0.99\", \"ambiguity_usage\": \"one\", \"upper_bound_min_con\": \"2\", \"fragmented\": \"false\", \"fragmented_max_indent\": \"50\", \"use_all_alignments\": \"false\", \"min_alignment\": \"65\"}, \"extensive_mis_size\": \"1000\", \"large\": \"false\", \"in\": {\"inputs\": {\"__class__\": \"ConnectedValue\"}, \"__current_case__\": 1, \"custom\": \"false\"}, \"k_mer\": {\"k_mer_stats\": \"\", \"__current_case__\": 1}, \"split_scaffolds\": \"false\", \"min_contig\": \"500\"}",
+ "tool_version": null,
+ "type": "tool",
+ "uuid": "80922e53-cb63-4bba-8fd6-d53e48b17327",
+ "workflow_outputs": [
+ {
+ "label": null,
+ "output_name": "report_html",
+ "uuid": "0babe959-4d63-4838-9e18-4526b3dd0db5"
+ },
+ {
+ "label": null,
+ "output_name": "report_pdf",
+ "uuid": "f41c43c4-763c-42bc-b36e-9ea76e7752c2"
+ }
+ ]
}
- ],
- "label": null,
- "name": "Input dataset",
- "outputs": [],
- "position": {
- "left": 10,
- "top": 10
- },
- "tool_id": null,
- "tool_state": "{\"name\": \"mutant_R1.fastq\"}",
- "tool_version": null,
- "type": "data_input",
- "uuid": "d8f71ebc-9556-4abf-b3a4-31f42fa703fc",
- "workflow_outputs": []
- },
- "1": {
- "annotation": "",
- "content_id": null,
- "errors": null,
- "id": 1,
- "input_connections": {},
- "inputs": [
- {
- "description": "",
- "name": "mutant_R2.fastq"
- }
- ],
- "label": null,
- "name": "Input dataset",
- "outputs": [],
- "position": {
- "left": 10,
- "top": 130
- },
- "tool_id": null,
- "tool_state": "{\"name\": \"mutant_R2.fastq\"}",
- "tool_version": null,
- "type": "data_input",
- "uuid": "a33fa5e6-b342-40d3-9dcc-d74939ccf70f",
- "workflow_outputs": []
- },
- "2": {
- "annotation": "",
- "content_id": "toolshed.g2.bx.psu.edu/repos/devteam/fastqc/fastqc/0.69",
- "errors": null,
- "id": 2,
- "input_connections": {
- "input_file": {
- "id": 0,
- "output_name": "output"
- }
- },
- "inputs": [],
- "label": null,
- "name": "FastQC",
- "outputs": [
- {
- "name": "html_file",
- "type": "html"
- },
- {
- "name": "text_file",
- "type": "txt"
- }
- ],
- "position": {
- "left": 230,
- "top": 10
- },
- "post_job_actions": {},
- "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/fastqc/fastqc/0.69",
- "tool_shed_repository": {
- "changeset_revision": "9337dd1fbc66",
- "name": "fastqc",
- "owner": "devteam",
- "tool_shed": "toolshed.g2.bx.psu.edu"
- },
- "tool_state": "{\"__page__\": null, \"limits\": \"null\", \"input_file\": \"null\", \"__rerun_remap_job_id__\": null, \"contaminants\": \"null\", \"chromInfo\": \"\\\"/galaxy-central/tool-data/shared/ucsc/chrom/?.len\\\"\"}",
- "tool_version": "0.69",
- "type": "tool",
- "uuid": "cba4f7f3-6e0d-4b7f-902c-dce13b1a7b62",
- "workflow_outputs": []
- },
- "3": {
- "annotation": "",
- "content_id": "toolshed.g2.bx.psu.edu/repos/simon-gladman/velvetoptimiser/velvet/0.1.0",
- "errors": null,
- "id": 3,
- "input_connections": {
- "files_0|paired_type|input1": {
- "id": 0,
- "output_name": "output"
- },
- "files_0|paired_type|input2": {
- "id": 1,
- "output_name": "output"
- }
- },
- "inputs": [],
- "label": null,
- "name": "velvet",
- "outputs": [
- {
- "name": "contigs",
- "type": "fasta"
- },
- {
- "name": "stats",
- "type": "tabular"
- },
- {
- "name": "graph",
- "type": "tabular"
- },
- {
- "name": "log",
- "type": "tabular"
- }
- ],
- "position": {
- "left": 230,
- "top": 130
- },
- "post_job_actions": {},
- "tool_id": "toolshed.g2.bx.psu.edu/repos/simon-gladman/velvetoptimiser/velvet/0.1.0",
- "tool_shed_repository": {
- "changeset_revision": "d81360ea69d8",
- "name": "velvetoptimiser",
- "owner": "simon-gladman",
- "tool_shed": "toolshed.g2.bx.psu.edu"
- },
- "tool_state": "{\"files\": \"[{\\\"__index__\\\": 0, \\\"filetype\\\": \\\"fastq\\\", \\\"paired_type\\\": {\\\"input2\\\": null, \\\"paired_type_selector\\\": \\\"paired\\\", \\\"input1\\\": null, \\\"__current_case__\\\": 0}}]\", \"__page__\": null, \"__rerun_remap_job_id__\": null, \"chromInfo\": \"\\\"/galaxy-central/tool-data/shared/ucsc/chrom/?.len\\\"\", \"kmer\": \"\\\"29\\\"\"}",
- "tool_version": "0.1.0",
- "type": "tool",
- "uuid": "8b992d49-f94d-47a5-8030-51d31fcc9ffc",
- "workflow_outputs": []
- },
- "4": {
- "annotation": "",
- "content_id": "toolshed.g2.bx.psu.edu/repos/simon-gladman/fasta_stats/fasta-stats/1.0.0",
- "errors": null,
- "id": 4,
- "input_connections": {
- "dataset": {
- "id": 3,
- "output_name": "contigs"
- }
- },
- "inputs": [],
- "label": null,
- "name": "Fasta Statistics",
- "outputs": [
- {
- "name": "stats",
- "type": "tabular"
- }
- ],
- "position": {
- "left": 450,
- "top": 10
- },
- "post_job_actions": {},
- "tool_id": "toolshed.g2.bx.psu.edu/repos/simon-gladman/fasta_stats/fasta-stats/1.0.0",
- "tool_shed_repository": {
- "changeset_revision": "20ca2574216a",
- "name": "fasta_stats",
- "owner": "simon-gladman",
- "tool_shed": "toolshed.g2.bx.psu.edu"
- },
- "tool_state": "{\"__page__\": null, \"__rerun_remap_job_id__\": null, \"chromInfo\": \"\\\"/galaxy-central/tool-data/shared/ucsc/chrom/?.len\\\"\", \"dataset\": \"null\"}",
- "tool_version": "1.0.0",
- "type": "tool",
- "uuid": "ca342af6-786b-4a91-9f81-13ed8e6f9a72",
- "workflow_outputs": [
- {
- "output_name": "stats",
- "label": "fasta_stats_tabular"
- }
- ]
- }
- },
- "uuid": "89e97456-31e7-40b2-b16e-f94e3c93c407",
- "tags": [
- "assembly"
- ]
+ },
+ "tags": [
+ "assembly"
+ ],
+ "uuid": "d631b74f-8a83-4f06-97a2-d37a34eb3736",
+ "version": 3
}
\ No newline at end of file
diff --git a/topics/assembly/tutorials/unicycler-assembly/workflows/unicycler.ga b/topics/assembly/tutorials/unicycler-assembly/workflows/unicycler.ga
index ae87df392ba4ca..5536985e0b0dd9 100644
--- a/topics/assembly/tutorials/unicycler-assembly/workflows/unicycler.ga
+++ b/topics/assembly/tutorials/unicycler-assembly/workflows/unicycler.ga
@@ -1,438 +1,590 @@
{
- "uuid": "2fa1768f-116d-4695-83c5-6a07eff7be33",
- "tags": [
- "assembly"
- ],
- "format-version": "0.1",
- "name": "Unicycler Training",
- "steps": {
- "0": {
- "tool_id": null,
- "tool_version": null,
- "outputs": [],
- "workflow_outputs": [],
- "input_connections": {},
- "tool_state": "{\"name\": \"forward_reads\"}",
- "id": 0,
- "uuid": "1e8ef78c-1cb2-4376-9fc5-3f9d47acfe69",
- "errors": null,
- "name": "Input dataset",
- "label": "Forward reads",
- "inputs": [
- {
- "name": "forward_reads",
- "description": ""
+ "a_galaxy_workflow": "true",
+ "annotation": "Unicycler Assembly",
+ "format-version": "0.1",
+ "name": "Unicycler training",
+ "steps": {
+ "0": {
+ "annotation": "",
+ "content_id": null,
+ "errors": null,
+ "id": 0,
+ "input_connections": {},
+ "inputs": [
+ {
+ "description": "",
+ "name": "Forward reads"
+ }
+ ],
+ "label": "Forward reads",
+ "name": "Input dataset",
+ "outputs": [],
+ "position": {
+ "left": 230.140625,
+ "top": 404.015625
+ },
+ "tool_id": null,
+ "tool_state": "{\"optional\": false}",
+ "tool_version": null,
+ "type": "data_input",
+ "uuid": "5152d41f-3523-496e-afa2-8d4b0af89b2a",
+ "workflow_outputs": [
+ {
+ "label": null,
+ "output_name": "output",
+ "uuid": "e14afa81-ce6c-4577-b17e-90ef2b75c885"
+ }
+ ]
+ },
+ "1": {
+ "annotation": "",
+ "content_id": null,
+ "errors": null,
+ "id": 1,
+ "input_connections": {},
+ "inputs": [
+ {
+ "description": "",
+ "name": "Reverse Reads"
+ }
+ ],
+ "label": "Reverse Reads",
+ "name": "Input dataset",
+ "outputs": [],
+ "position": {
+ "left": 187.359375,
+ "top": 656.765625
+ },
+ "tool_id": null,
+ "tool_state": "{\"optional\": false}",
+ "tool_version": null,
+ "type": "data_input",
+ "uuid": "25209075-9248-4e70-9396-87e6e28995e1",
+ "workflow_outputs": [
+ {
+ "label": null,
+ "output_name": "output",
+ "uuid": "4cade8d2-87ae-4bae-8795-b1f64c5c3cab"
+ }
+ ]
+ },
+ "2": {
+ "annotation": "",
+ "content_id": null,
+ "errors": null,
+ "id": 2,
+ "input_connections": {},
+ "inputs": [
+ {
+ "description": "",
+ "name": "Long Reads"
+ }
+ ],
+ "label": "Long Reads",
+ "name": "Input dataset",
+ "outputs": [],
+ "position": {
+ "left": 186.65625,
+ "top": 803.734375
+ },
+ "tool_id": null,
+ "tool_state": "{\"optional\": false}",
+ "tool_version": null,
+ "type": "data_input",
+ "uuid": "c1cfa6a5-2c17-4cd7-b582-ce85ff552b67",
+ "workflow_outputs": [
+ {
+ "label": null,
+ "output_name": "output",
+ "uuid": "bf7b5353-30b6-471b-91b7-5fd2143bf981"
+ }
+ ]
+ },
+ "3": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/devteam/fastqc/fastqc/0.72+galaxy1",
+ "errors": null,
+ "id": 3,
+ "input_connections": {
+ "input_file": {
+ "id": 0,
+ "output_name": "output"
+ }
+ },
+ "inputs": [
+ {
+ "description": "runtime parameter for tool FastQC",
+ "name": "limits"
+ },
+ {
+ "description": "runtime parameter for tool FastQC",
+ "name": "contaminants"
+ },
+ {
+ "description": "runtime parameter for tool FastQC",
+ "name": "adapters"
+ }
+ ],
+ "label": null,
+ "name": "FastQC",
+ "outputs": [
+ {
+ "name": "html_file",
+ "type": "html"
+ },
+ {
+ "name": "text_file",
+ "type": "txt"
+ }
+ ],
+ "position": {
+ "left": 415.015625,
+ "top": 199.984375
+ },
+ "post_job_actions": {},
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/fastqc/fastqc/0.72+galaxy1",
+ "tool_shed_repository": {
+ "changeset_revision": "e7b2202befea",
+ "name": "fastqc",
+ "owner": "devteam",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"min_length\": \"\", \"kmers\": \"7\", \"limits\": {\"__class__\": \"RuntimeValue\"}, \"input_file\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null, \"contaminants\": {\"__class__\": \"RuntimeValue\"}, \"adapters\": {\"__class__\": \"RuntimeValue\"}, \"nogroup\": \"false\"}",
+ "tool_version": "0.72+galaxy1",
+ "type": "tool",
+ "uuid": "d0e16801-e8f6-4889-9502-c57aa6b1a436",
+ "workflow_outputs": [
+ {
+ "label": null,
+ "output_name": "html_file",
+ "uuid": "3c8f9841-bf87-4c4b-9af2-115a0b84f13f"
+ },
+ {
+ "label": null,
+ "output_name": "text_file",
+ "uuid": "5c63bd29-f9c1-423b-ad6e-4b944e32c6ac"
+ }
+ ]
+ },
+ "4": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/devteam/fastqc/fastqc/0.72+galaxy1",
+ "errors": null,
+ "id": 4,
+ "input_connections": {
+ "input_file": {
+ "id": 1,
+ "output_name": "output"
+ }
+ },
+ "inputs": [
+ {
+ "description": "runtime parameter for tool FastQC",
+ "name": "limits"
+ },
+ {
+ "description": "runtime parameter for tool FastQC",
+ "name": "contaminants"
+ },
+ {
+ "description": "runtime parameter for tool FastQC",
+ "name": "adapters"
+ }
+ ],
+ "label": null,
+ "name": "FastQC",
+ "outputs": [
+ {
+ "name": "html_file",
+ "type": "html"
+ },
+ {
+ "name": "text_file",
+ "type": "txt"
+ }
+ ],
+ "position": {
+ "left": 415.390625,
+ "top": 410.765625
+ },
+ "post_job_actions": {},
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/fastqc/fastqc/0.72+galaxy1",
+ "tool_shed_repository": {
+ "changeset_revision": "e7b2202befea",
+ "name": "fastqc",
+ "owner": "devteam",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"min_length\": \"\", \"kmers\": \"7\", \"limits\": {\"__class__\": \"RuntimeValue\"}, \"input_file\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null, \"contaminants\": {\"__class__\": \"RuntimeValue\"}, \"adapters\": {\"__class__\": \"RuntimeValue\"}, \"nogroup\": \"false\"}",
+ "tool_version": "0.72+galaxy1",
+ "type": "tool",
+ "uuid": "1f890a7b-848b-40e0-987b-26450c9f5203",
+ "workflow_outputs": [
+ {
+ "label": null,
+ "output_name": "html_file",
+ "uuid": "0dee124c-9984-4c90-9a76-353ffbcea854"
+ },
+ {
+ "label": null,
+ "output_name": "text_file",
+ "uuid": "f80ae006-d3ee-4794-bfe1-b656aea05823"
+ }
+ ]
+ },
+ "5": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/unicycler/unicycler/0.4.8.0",
+ "errors": null,
+ "id": 5,
+ "input_connections": {
+ "long": {
+ "id": 2,
+ "output_name": "output"
+ },
+ "paired_unpaired|fastq_input1": {
+ "id": 0,
+ "output_name": "output"
+ },
+ "paired_unpaired|fastq_input2": {
+ "id": 1,
+ "output_name": "output"
+ }
+ },
+ "inputs": [
+ {
+ "description": "runtime parameter for tool Create assemblies with Unicycler",
+ "name": "lr_align"
+ },
+ {
+ "description": "runtime parameter for tool Create assemblies with Unicycler",
+ "name": "rotation"
+ }
+ ],
+ "label": null,
+ "name": "Create assemblies with Unicycler",
+ "outputs": [
+ {
+ "name": "assembly_graph",
+ "type": "tabular"
+ },
+ {
+ "name": "assembly",
+ "type": "fasta"
+ }
+ ],
+ "position": {
+ "left": 397.15625,
+ "top": 636.984375
+ },
+ "post_job_actions": {
+ "HideDatasetActionassembly_graph": {
+ "action_arguments": {},
+ "action_type": "HideDatasetAction",
+ "output_name": "assembly_graph"
+ }
+ },
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/unicycler/unicycler/0.4.8.0",
+ "tool_shed_repository": {
+ "changeset_revision": "88c240872a65",
+ "name": "unicycler",
+ "owner": "iuc",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"linear_seqs\": \"0\", \"spades\": {\"no_correct\": \"false\", \"kmer_count\": \"10\", \"depth_filter\": \"0.25\", \"kmers\": \"\", \"min_kmer_frac\": \"0.2\", \"max_kmer_frac\": \"0.95\", \"largest_component\": \"false\"}, \"graph_clean\": {\"min_dead_end_size\": \"1000\", \"min_component_size\": \"1000\"}, \"__page__\": null, \"min_anchor_seg_len\": \"\", \"__rerun_remap_job_id__\": null, \"lr_align\": {\"low_score\": \"\", \"scores\": \"\", \"contamination\": {\"__class__\": \"RuntimeValue\"}}, \"long\": {\"__class__\": \"ConnectedValue\"}, \"paired_unpaired\": {\"fastq_input2\": {\"__class__\": \"ConnectedValue\"}, \"__current_case__\": 0, \"fastq_input_selector\": \"paired\", \"fastq_input1\": {\"__class__\": \"ConnectedValue\"}}, \"min_fasta_length\": \"100\", \"mode\": \"normal\", \"rotation\": {\"start_genes\": {\"__class__\": \"RuntimeValue\"}, \"start_gene_cov\": \"95.0\", \"no_rotate\": \"false\", \"start_gene_id\": \"90.0\"}, \"pilon\": {\"no_pilon\": \"false\", \"min_polish_size\": \"1000\"}}",
+ "tool_version": "0.4.8.0",
+ "type": "tool",
+ "uuid": "98050cb0-ca52-4a9d-a1c3-cd301f36356c",
+ "workflow_outputs": [
+ {
+ "label": null,
+ "output_name": "assembly",
+ "uuid": "c35c6740-11f7-4ca3-8b93-a1fe502b8719"
+ }
+ ]
+ },
+ "6": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/multiqc/multiqc/1.7",
+ "errors": null,
+ "id": 6,
+ "input_connections": {
+ "results_0|software_cond|output_0|input": {
+ "id": 3,
+ "output_name": "text_file"
+ },
+ "results_0|software_cond|output_1|input": {
+ "id": 4,
+ "output_name": "text_file"
+ }
+ },
+ "inputs": [],
+ "label": null,
+ "name": "MultiQC",
+ "outputs": [
+ {
+ "name": "stats",
+ "type": "input"
+ },
+ {
+ "name": "html_report",
+ "type": "html"
+ }
+ ],
+ "position": {
+ "left": 650.71875,
+ "top": 251.3125
+ },
+ "post_job_actions": {},
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/multiqc/multiqc/1.7",
+ "tool_shed_repository": {
+ "changeset_revision": "b2f1f75d49c4",
+ "name": "multiqc",
+ "owner": "iuc",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"comment\": \"\", \"__page__\": null, \"title\": \"\", \"__rerun_remap_job_id__\": null, \"results\": [{\"__index__\": 0, \"software_cond\": {\"output\": [{\"__index__\": 0, \"type\": \"data\", \"input\": {\"__class__\": \"ConnectedValue\"}}, {\"__index__\": 1, \"type\": \"data\", \"input\": {\"__class__\": \"ConnectedValue\"}}], \"__current_case__\": 8, \"software\": \"fastqc\"}}], \"saveLog\": \"false\"}",
+ "tool_version": "1.7",
+ "type": "tool",
+ "uuid": "3277000f-cee0-4279-80d3-0b266694cce2",
+ "workflow_outputs": [
+ {
+ "label": null,
+ "output_name": "stats",
+ "uuid": "3bfec2db-625d-4aa5-9016-e16fe27bf466"
+ },
+ {
+ "label": null,
+ "output_name": "html_report",
+ "uuid": "6f40035d-5c8d-4e18-82ea-0093237d250f"
+ }
+ ]
+ },
+ "7": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/quast/quast/5.0.2+galaxy0",
+ "errors": null,
+ "id": 7,
+ "input_connections": {
+ "in|inputs": {
+ "id": 5,
+ "output_name": "assembly"
+ }
+ },
+ "inputs": [],
+ "label": null,
+ "name": "Quast",
+ "outputs": [
+ {
+ "name": "quast_tabular",
+ "type": "tabular"
+ },
+ {
+ "name": "report_html",
+ "type": "html"
+ },
+ {
+ "name": "report_pdf",
+ "type": "pdf"
+ },
+ {
+ "name": "log",
+ "type": "txt"
+ }
+ ],
+ "position": {
+ "left": 648.875,
+ "top": 416.5
+ },
+ "post_job_actions": {
+ "HideDatasetActionlog": {
+ "action_arguments": {},
+ "action_type": "HideDatasetAction",
+ "output_name": "log"
+ },
+ "HideDatasetActionquast_tabular": {
+ "action_arguments": {},
+ "action_type": "HideDatasetAction",
+ "output_name": "quast_tabular"
+ },
+ "HideDatasetActionreport_pdf": {
+ "action_arguments": {},
+ "action_type": "HideDatasetAction",
+ "output_name": "report_pdf"
+ }
+ },
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/quast/quast/5.0.2+galaxy0",
+ "tool_shed_repository": {
+ "changeset_revision": "59db8ea8c845",
+ "name": "quast",
+ "owner": "iuc",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"scaffold_gap_max_size\": \"1000\", \"skip_unaligned_mis_contigs\": \"true\", \"assembly\": {\"ref\": {\"use_ref\": \"false\", \"est_ref_size\": \"\", \"__current_case__\": 1}, \"orga_type\": \"\", \"type\": \"genome\", \"__current_case__\": 0}, \"circos\": \"false\", \"contig_thresholds\": \"0,1000\", \"__page__\": null, \"strict_NA\": \"false\", \"__rerun_remap_job_id__\": null, \"genes\": {\"rna_finding\": \"false\", \"gene_finding\": {\"tool\": \"none\", \"__current_case__\": 0}, \"conserved_genes_finding\": \"false\"}, \"unaligned_part_size\": \"500\", \"al\": {\"upper_bound_assembly\": \"false\", \"min_identity\": \"95.0\", \"ambiguity_score\": \"0.99\", \"ambiguity_usage\": \"one\", \"upper_bound_min_con\": \"2\", \"fragmented\": \"false\", \"fragmented_max_indent\": \"50\", \"use_all_alignments\": \"false\", \"min_alignment\": \"65\"}, \"extensive_mis_size\": \"1000\", \"large\": \"false\", \"in\": {\"inputs\": {\"__class__\": \"ConnectedValue\"}, \"__current_case__\": 1, \"custom\": \"false\"}, \"k_mer\": {\"k_mer_stats\": \"\", \"__current_case__\": 1}, \"split_scaffolds\": \"false\", \"min_contig\": \"500\"}",
+ "tool_version": "5.0.2+galaxy0",
+ "type": "tool",
+ "uuid": "1bb7b9fa-3e76-48b6-aa08-1169710715c5",
+ "workflow_outputs": [
+ {
+ "label": null,
+ "output_name": "quast_tsv",
+ "uuid": "22d4aad1-6145-4209-8261-8dadf4fd4c02"
+ },
+ {
+ "label": null,
+ "output_name": "report_html",
+ "uuid": "0de88512-45db-4372-9457-94f136a77267"
+ },
+ {
+ "label": null,
+ "output_name": "icarus",
+ "uuid": "84990237-682a-4800-9ce3-dfeb83c7042d"
+ }
+ ]
+ },
+ "8": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/crs4/prokka/prokka/1.14.5",
+ "errors": null,
+ "id": 8,
+ "input_connections": {
+ "input": {
+ "id": 5,
+ "output_name": "assembly"
+ }
+ },
+ "inputs": [
+ {
+ "description": "runtime parameter for tool Prokka",
+ "name": "proteins"
+ }
+ ],
+ "label": null,
+ "name": "Prokka",
+ "outputs": [
+ {
+ "name": "out_gff",
+ "type": "gff"
+ },
+ {
+ "name": "out_gbk",
+ "type": "txt"
+ },
+ {
+ "name": "out_fna",
+ "type": "fasta"
+ },
+ {
+ "name": "out_faa",
+ "type": "fasta"
+ },
+ {
+ "name": "out_ffn",
+ "type": "fasta"
+ },
+ {
+ "name": "out_sqn",
+ "type": "asn1"
+ },
+ {
+ "name": "out_fsa",
+ "type": "fasta"
+ },
+ {
+ "name": "out_tbl",
+ "type": "txt"
+ },
+ {
+ "name": "out_err",
+ "type": "txt"
+ },
+ {
+ "name": "out_txt",
+ "type": "txt"
+ },
+ {
+ "name": "out_log",
+ "type": "txt"
+ }
+ ],
+ "position": {
+ "left": 647.125,
+ "top": 603.75
+ },
+ "post_job_actions": {},
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/crs4/prokka/prokka/1.14.5",
+ "tool_shed_repository": {
+ "changeset_revision": "bf68eb663bc3",
+ "name": "prokka",
+ "owner": "crs4",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"strain\": \"C-1\", \"increment\": \"10\", \"species\": \"Coli\", \"__page__\": null, \"usegenus\": \"true\", \"evalue\": \"1e-06\", \"locustag\": \"PROKKA\", \"fast\": \"false\", \"compliant\": {\"mincontig\": \"200\", \"__current_case__\": 0, \"compliant_select\": \"no\", \"addgenes\": \"false\"}, \"input\": {\"__class__\": \"ConnectedValue\"}, \"kingdom\": {\"gcode\": \"11\", \"kingdom_select\": \"Bacteria\", \"__current_case__\": 1}, \"metagenome\": \"false\", \"centre\": \"\", \"outputs\": [\"gff\", \"gbk\", \"fna\", \"faa\", \"ffn\", \"sqn\", \"fsa\", \"tbl\", \"err\", \"txt\"], \"gffver\": \"3\", \"norrna\": \"false\", \"proteins\": {\"__class__\": \"RuntimeValue\"}, \"notrna\": \"false\", \"plasmid\": \"\", \"rfam\": \"false\", \"genus\": \"Escherichia\", \"__rerun_remap_job_id__\": null}",
+ "tool_version": "1.14.5",
+ "type": "tool",
+ "uuid": "dd9a67df-68a8-423b-90d2-2167393949e6",
+ "workflow_outputs": [
+ {
+ "label": "tbl",
+ "output_name": "out_tbl",
+ "uuid": "0d6b33c9-b105-4997-9045-b86e52f00156"
+ },
+ {
+ "label": null,
+ "output_name": "out_txt",
+ "uuid": "4b7ef57d-3075-41cf-bc75-a5fc4aa8c942"
+ },
+ {
+ "label": "gbk",
+ "output_name": "out_gbk",
+ "uuid": "8e08b54d-87e1-4269-9b96-8ebf1a9bb4ab"
+ },
+ {
+ "label": null,
+ "output_name": "out_err",
+ "uuid": "d2cd11ae-12f9-4123-bfde-119fa409da73"
+ },
+ {
+ "label": "ffn",
+ "output_name": "out_ffn",
+ "uuid": "65e4ddf9-f5bc-4078-8b4e-37b332e12ece"
+ },
+ {
+ "label": null,
+ "output_name": "out_fsa",
+ "uuid": "14a0d804-36f3-488f-a85d-4d18ff95d56f"
+ },
+ {
+ "label": null,
+ "output_name": "out_fna",
+ "uuid": "d494a946-ff3c-4900-a3f5-573ff1540ea3"
+ },
+ {
+ "label": null,
+ "output_name": "out_log",
+ "uuid": "80ed53bf-7d76-4de2-85b5-7dc487922e0e"
+ },
+ {
+ "label": null,
+ "output_name": "out_faa",
+ "uuid": "249632ad-2412-4010-a703-aa8faeecc538"
+ },
+ {
+ "label": null,
+ "output_name": "out_sqn",
+ "uuid": "175956d0-42a8-4270-b976-3eefef7384ab"
+ },
+ {
+ "label": null,
+ "output_name": "out_gff",
+ "uuid": "0e2ffd13-e194-4693-9adf-54e561fd24e5"
+ }
+ ]
}
- ],
- "position": {
- "top": 346,
- "left": 160.98333740234375
- },
- "annotation": "",
- "content_id": null,
- "type": "data_input"
- },
- "1": {
- "tool_id": null,
- "tool_version": null,
- "outputs": [],
- "workflow_outputs": [],
- "input_connections": {},
- "tool_state": "{\"name\": \"reverse_reads\"}",
- "id": 1,
- "uuid": "251d7a96-2b0d-4b8c-bf34-d24b65b32b41",
- "errors": null,
- "name": "Input dataset",
- "label": "Reverse Reads",
- "inputs": [
- {
- "name": "reverse_reads",
- "description": ""
- }
- ],
- "position": {
- "top": 683,
- "left": 103.9666748046875
- },
- "annotation": "",
- "content_id": null,
- "type": "data_input"
- },
- "2": {
- "tool_id": null,
- "tool_version": null,
- "outputs": [],
- "workflow_outputs": [],
- "input_connections": {},
- "tool_state": "{\"name\": \"long_reads\"}",
- "id": 2,
- "uuid": "80a96186-0c6f-448e-a8cd-11f18e04629d",
- "errors": null,
- "name": "Input dataset",
- "label": "Long Reads",
- "inputs": [
- {
- "name": "long_reads",
- "description": ""
- }
- ],
- "position": {
- "top": 878.9833374023438,
- "left": 103
- },
- "annotation": "",
- "content_id": null,
- "type": "data_input"
- },
- "3": {
- "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/fastqc/fastqc/0.71",
- "tool_version": "0.71",
- "outputs": [
- {
- "type": "html",
- "name": "html_file"
- },
- {
- "type": "txt",
- "name": "text_file"
- }
- ],
- "workflow_outputs": [],
- "input_connections": {
- "input_file": {
- "output_name": "output",
- "id": 0
- }
- },
- "tool_state": "{\"__page__\": null, \"contaminants\": \"{\\\"__class__\\\": \\\"RuntimeValue\\\"}\", \"__rerun_remap_job_id__\": null, \"limits\": \"{\\\"__class__\\\": \\\"RuntimeValue\\\"}\", \"input_file\": \"{\\\"__class__\\\": \\\"RuntimeValue\\\"}\"}",
- "id": 3,
- "tool_shed_repository": {
- "owner": "devteam",
- "changeset_revision": "ff9530579d1f",
- "name": "fastqc",
- "tool_shed": "toolshed.g2.bx.psu.edu"
- },
- "uuid": "ca4cf888-bf14-4cc9-9bc1-202b3ed818d0",
- "errors": null,
- "name": "FastQC",
- "post_job_actions": {},
- "label": null,
- "inputs": [
- {
- "name": "contaminants",
- "description": "runtime parameter for tool FastQC"
- },
- {
- "name": "limits",
- "description": "runtime parameter for tool FastQC"
- },
- {
- "name": "input_file",
- "description": "runtime parameter for tool FastQC"
- }
- ],
- "position": {
- "top": 271.31666564941406,
- "left": 684.8333129882812
- },
- "annotation": "",
- "content_id": "toolshed.g2.bx.psu.edu/repos/devteam/fastqc/fastqc/0.71",
- "type": "tool"
- },
- "4": {
- "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/fastqc/fastqc/0.71",
- "tool_version": "0.71",
- "outputs": [
- {
- "type": "html",
- "name": "html_file"
- },
- {
- "type": "txt",
- "name": "text_file"
- }
- ],
- "workflow_outputs": [],
- "input_connections": {
- "input_file": {
- "output_name": "output",
- "id": 1
- }
- },
- "tool_state": "{\"__page__\": null, \"contaminants\": \"{\\\"__class__\\\": \\\"RuntimeValue\\\"}\", \"__rerun_remap_job_id__\": null, \"limits\": \"{\\\"__class__\\\": \\\"RuntimeValue\\\"}\", \"input_file\": \"{\\\"__class__\\\": \\\"RuntimeValue\\\"}\"}",
- "id": 4,
- "tool_shed_repository": {
- "owner": "devteam",
- "changeset_revision": "ff9530579d1f",
- "name": "fastqc",
- "tool_shed": "toolshed.g2.bx.psu.edu"
- },
- "uuid": "988ebf23-99af-4b76-a476-28b58f505f61",
- "errors": null,
- "name": "FastQC",
- "post_job_actions": {},
- "label": null,
- "inputs": [
- {
- "name": "contaminants",
- "description": "runtime parameter for tool FastQC"
- },
- {
- "name": "limits",
- "description": "runtime parameter for tool FastQC"
- },
- {
- "name": "input_file",
- "description": "runtime parameter for tool FastQC"
- }
- ],
- "position": {
- "top": 603,
- "left": 339.98333740234375
- },
- "annotation": "",
- "content_id": "toolshed.g2.bx.psu.edu/repos/devteam/fastqc/fastqc/0.71",
- "type": "tool"
- },
- "5": {
- "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/unicycler/unicycler/0.2.0",
- "tool_version": "0.2.0",
- "outputs": [
- {
- "type": "txt",
- "name": "assembly_grapth"
- },
- {
- "type": "fasta",
- "name": "assembly"
- }
- ],
- "workflow_outputs": [],
- "input_connections": {
- "paired_unpaired|fastq_input2": {
- "output_name": "output",
- "id": 1
- },
- "paired_unpaired|fastq_input1": {
- "output_name": "output",
- "id": 0
- },
- "long_reads": {
- "output_name": "output",
- "id": 2
- }
- },
- "tool_state": "{\"__page__\": null, \"spades\": \"{\\\"min_kmer_frac\\\": \\\"\\\", \\\"max_kmer_frac\\\": \\\"\\\", \\\"kmer_count\\\": \\\"\\\"}\", \"graph_clean\": \"{\\\"min_dead_end_size\\\": \\\"\\\", \\\"min_component_size\\\": \\\"\\\"}\", \"uc_opt\": \"{\\\"no_correct\\\": \\\"false\\\", \\\"min_fasta_length\\\": \\\"\\\", \\\"mode\\\": \\\"normal\\\", \\\"no_pilon\\\": \\\"false\\\", \\\"no_rotate\\\": \\\"false\\\", \\\"lin_seq\\\": \\\"\\\"}\", \"__rerun_remap_job_id__\": null, \"lr_align\": \"{\\\"low_score\\\": \\\"\\\", \\\"scores\\\": \\\"\\\", \\\"contamination_fasta\\\": {\\\"__class__\\\": \\\"RuntimeValue\\\"}}\", \"paired_unpaired\": \"{\\\"fastq_input2\\\": {\\\"__class__\\\": \\\"RuntimeValue\\\"}, \\\"__current_case__\\\": 0, \\\"fastq_input_selector\\\": \\\"paired\\\", \\\"fastq_input1\\\": {\\\"__class__\\\": \\\"RuntimeValue\\\"}}\", \"__job_resource\": \"{\\\"__current_case__\\\": 0, \\\"__job_resource__select\\\": \\\"no\\\"}\", \"rotation\": \"{\\\"start_gene_id\\\": \\\"\\\", \\\"start_gene_cov\\\": \\\"\\\", \\\"start_genes\\\": {\\\"__class__\\\": \\\"RuntimeValue\\\"}}\", \"chromInfo\": \"\\\"/galaxy-repl/localdata/chrom/?.len\\\"\", \"long_reads\": \"{\\\"__class__\\\": \\\"RuntimeValue\\\"}\", \"pilon\": \"{\\\"min_polish_size\\\": \\\"\\\"}\"}",
- "id": 5,
- "tool_shed_repository": {
- "owner": "iuc",
- "changeset_revision": "e9c1cdb9f9dc",
- "name": "unicycler",
- "tool_shed": "toolshed.g2.bx.psu.edu"
- },
- "uuid": "f469e49a-5729-4dbd-8f08-f5be3fca6191",
- "errors": null,
- "name": "Create assemblies with Unicycler",
- "post_job_actions": {},
- "label": null,
- "inputs": [
- {
- "name": "lr_align",
- "description": "runtime parameter for tool Create assemblies with Unicycler"
- },
- {
- "name": "paired_unpaired",
- "description": "runtime parameter for tool Create assemblies with Unicycler"
- },
- {
- "name": "paired_unpaired",
- "description": "runtime parameter for tool Create assemblies with Unicycler"
- },
- {
- "name": "rotation",
- "description": "runtime parameter for tool Create assemblies with Unicycler"
- },
- {
- "name": "long_reads",
- "description": "runtime parameter for tool Create assemblies with Unicycler"
- }
- ],
- "position": {
- "top": 609.9833374023438,
- "left": 725
- },
- "annotation": "",
- "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/unicycler/unicycler/0.2.0",
- "type": "tool"
- },
- "6": {
- "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/quast/quast/4.1.1",
- "tool_version": "4.1.1",
- "outputs": [
- {
- "type": "txt",
- "name": "quast_out"
- },
- {
- "type": "tsv",
- "name": "quast_tsv"
- },
- {
- "type": "tex",
- "name": "quast_tex"
- },
- {
- "type": "html",
- "name": "icarus"
- },
- {
- "type": "html",
- "name": "report_html"
- }
- ],
- "workflow_outputs": [],
- "input_connections": {
- "files_0|input": {
- "output_name": "assembly",
- "id": 5
- }
- },
- "tool_state": "{\"files\": \"[{\\\"type_file\\\": \\\"scaffold\\\", \\\"__index__\\\": 0, \\\"input\\\": {\\\"__class__\\\": \\\"RuntimeValue\\\"}}]\", \"input_operon\": \"{\\\"__class__\\\": \\\"RuntimeValue\\\"}\", \"__page__\": null, \"threshold_contig\": \"\\\"0,1000\\\"\", \"__rerun_remap_job_id__\": null, \"annot\": \"{\\\"__class__\\\": \\\"RuntimeValue\\\"}\", \"input_size\": \"\\\"\\\"\", \"input_ref\": \"{\\\"__class__\\\": \\\"RuntimeValue\\\"}\", \"chromInfo\": \"\\\"/galaxy-repl/localdata/chrom/?.len\\\"\", \"min_contig\": \"\\\"500\\\"\", \"gene_selection\": \"\\\"prokaryotes\\\"\"}",
- "id": 6,
- "tool_shed_repository": {
- "owner": "iuc",
- "changeset_revision": "2f581f956e1c",
- "name": "quast",
- "tool_shed": "toolshed.g2.bx.psu.edu"
- },
- "uuid": "f20468eb-8292-4273-ac9e-b0d9c09b9e2e",
- "errors": null,
- "name": "Quast",
- "post_job_actions": {},
- "label": null,
- "inputs": [
- {
- "name": "input_operon",
- "description": "runtime parameter for tool Quast"
- },
- {
- "name": "annot",
- "description": "runtime parameter for tool Quast"
- },
- {
- "name": "input_ref",
- "description": "runtime parameter for tool Quast"
- }
- ],
- "position": {
- "top": 200,
- "left": 1145.9833984375
- },
- "annotation": "",
- "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/quast/quast/4.1.1",
- "type": "tool"
- },
- "7": {
- "tool_id": "toolshed.g2.bx.psu.edu/repos/crs4/prokka/prokka/1.12.0",
- "tool_version": "1.12.0",
- "outputs": [
- {
- "type": "gff",
- "name": "out_gff"
- },
- {
- "type": "txt",
- "name": "out_gbk"
- },
- {
- "type": "fasta",
- "name": "out_fna"
- },
- {
- "type": "fasta",
- "name": "out_faa"
- },
- {
- "type": "fasta",
- "name": "out_ffn"
- },
- {
- "type": "asn1",
- "name": "out_sqn"
- },
- {
- "type": "fasta",
- "name": "out_fsa"
- },
- {
- "type": "txt",
- "name": "out_tbl"
- },
- {
- "type": "txt",
- "name": "out_err"
- },
- {
- "type": "txt",
- "name": "out_txt"
- },
- {
- "type": "txt",
- "name": "out_log"
- }
- ],
- "workflow_outputs": [
- {
- "output_name": "out_ffn",
- "label": "ffn"
- },
- {
- "output_name": "out_gbk",
- "label": "gbk"
- },
- {
- "output_name": "out_tbl",
- "label": "tbl"
- }
- ],
- "input_connections": {
- "input": {
- "output_name": "assembly",
- "id": 5
- }
- },
- "tool_state": "{\"strain\": \"\\\"C\\\"\", \"increment\": \"\\\"10\\\"\", \"species\": \"\\\"Coli\\\"\", \"__page__\": null, \"usegenus\": \"\\\"true\\\"\", \"evalue\": \"\\\"1e-06\\\"\", \"compliant\": \"{\\\"mincontig\\\": \\\"200\\\", \\\"__current_case__\\\": 0, \\\"compliant_select\\\": \\\"no\\\", \\\"addgenes\\\": \\\"false\\\"}\", \"fast\": \"\\\"false\\\"\", \"__job_resource\": \"{\\\"__current_case__\\\": 0, \\\"__job_resource__select\\\": \\\"no\\\"}\", \"locustag\": \"\\\"PROKKA\\\"\", \"input\": \"{\\\"__class__\\\": \\\"RuntimeValue\\\"}\", \"kingdom\": \"{\\\"gcode\\\": \\\"11\\\", \\\"kingdom_select\\\": \\\"Bacteria\\\", \\\"__current_case__\\\": 1}\", \"metagenome\": \"\\\"false\\\"\", \"centre\": \"\\\"\\\"\", \"outputs\": \"[\\\"gff\\\", \\\"gbk\\\", \\\"fna\\\", \\\"faa\\\", \\\"ffn\\\", \\\"sqn\\\", \\\"fsa\\\", \\\"tbl\\\", \\\"err\\\", \\\"txt\\\"]\", \"gffver\": \"\\\"3\\\"\", \"norrna\": \"\\\"false\\\"\", \"proteins\": \"{\\\"__class__\\\": \\\"RuntimeValue\\\"}\", \"notrna\": \"\\\"false\\\"\", \"plasmid\": \"\\\"\\\"\", \"rfam\": \"\\\"false\\\"\", \"genus\": \"\\\"Escherichia\\\"\", \"chromInfo\": \"\\\"/galaxy-repl/localdata/chrom/?.len\\\"\", \"__rerun_remap_job_id__\": null}",
- "id": 7,
- "tool_shed_repository": {
- "owner": "crs4",
- "changeset_revision": "a17498c603ec",
- "name": "prokka",
- "tool_shed": "toolshed.g2.bx.psu.edu"
- },
- "uuid": "9a6e8f35-351c-42c4-bbe0-9a9da64ae1a8",
- "errors": null,
- "name": "Prokka",
- "post_job_actions": {},
- "label": null,
- "inputs": [
- {
- "name": "input",
- "description": "runtime parameter for tool Prokka"
- },
- {
- "name": "proteins",
- "description": "runtime parameter for tool Prokka"
- }
- ],
- "position": {
- "top": 555,
- "left": 1100.9833984375
- },
- "annotation": "",
- "content_id": "toolshed.g2.bx.psu.edu/repos/crs4/prokka/prokka/1.12.0",
- "type": "tool"
- }
- },
- "annotation": "Unicycler Assembly",
- "a_galaxy_workflow": "true"
-}
\ No newline at end of file
+ },
+ "tags": [
+ "assembly"
+ ],
+ "uuid": "732443e4-0655-4fde-b0a7-770d97f608f5",
+ "version": 1
+}
diff --git a/topics/climate/images/climate_stripes_temperature_Paris.png b/topics/climate/images/climate_stripes_temperature_Paris.png
new file mode 100644
index 00000000000000..96a0e7be8c4198
Binary files /dev/null and b/topics/climate/images/climate_stripes_temperature_Paris.png differ
diff --git a/topics/climate/images/rh_ecv_july_1980_2018.png b/topics/climate/images/rh_ecv_july_1980_2018.png
new file mode 100644
index 00000000000000..07ddc156a3b59a
Binary files /dev/null and b/topics/climate/images/rh_ecv_july_1980_2018.png differ
diff --git a/topics/climate/images/weather_versus_climate.png b/topics/climate/images/weather_versus_climate.png
new file mode 100755
index 00000000000000..33f668949f1cec
Binary files /dev/null and b/topics/climate/images/weather_versus_climate.png differ
diff --git a/topics/climate/images/yearly_mean_temperature_Paris.png b/topics/climate/images/yearly_mean_temperature_Paris.png
new file mode 100644
index 00000000000000..995ba0354673e4
Binary files /dev/null and b/topics/climate/images/yearly_mean_temperature_Paris.png differ
diff --git a/topics/climate/metadata.yaml b/topics/climate/metadata.yaml
index 5b8888a8bbdac0..274a48bcbd475e 100644
--- a/topics/climate/metadata.yaml
+++ b/topics/climate/metadata.yaml
@@ -1,6 +1,5 @@
---
name: climate
-enable: false
type: use
title: Climate
summary: Learn to analyze climate data through Galaxy.
diff --git a/topics/climate/tutorials/climate-101/tutorial.md b/topics/climate/tutorials/climate-101/tutorial.md
new file mode 100644
index 00000000000000..eed1ca0972a3d6
--- /dev/null
+++ b/topics/climate/tutorials/climate-101/tutorial.md
@@ -0,0 +1,411 @@
+---
+layout: tutorial_hands_on
+title: Getting your hands-on climate data
+zenodo_link: 'https://doi.org/10.5281/zenodo.3776499'
+questions:
+- What is climate?
+- What type of data is available?
+objectives:
+- Learn about the terminology
+- Learn about the different source of climate data
+- Learn about climate observations, reanalysis, climate predictions and climate projections
+time_estimation: 1H
+key_points:
+- Weather versus Climate
+- Essential Climate Variables
+- Observations, reanalysis, predictions and projections.
+contributors:
+- annefou
+
+---
+
+
+# Introduction
+{:.no_toc}
+
+> ### {% icon comment %} Comment
+>
+> This tutorial is significantly based on [Getting your hands-on Climate data](https://nordicesmhub.github.io/climate-data-tutorial/).
+>
+{: .comment}
+
+The practical aims at familiarzing you with Climate Science and the terminology used by climate scientists. The target audience is not a climate scientist but
+anyone interested in learning about climate.
+
+> ### Agenda
+>
+> In this tutorial, we will cover:
+>
+> 1. TOC
+> {:toc}
+>
+{: .agenda}
+
+> ### {% icon comment %} Background
+>
+> [European Copernicus Climate Change Service (C3S)](https://climate.copernicus.eu/) provide authoritative information about the past, present
+> and future climate. C3S is one of the many services provided by Copernicus, the European Union's Earth Observation Programme, looking
+> at our planet and its environment for the ultimate benefit of all European citizens.
+> The C3S [Climate Data Store (CDS)](https://cds.climate.copernicus.eu/#!/home) provides a single point of access to a wide range of
+> quality-assured climate datasets distributed in the cloud.
+> Access to the CDS data is open, free and unrestricted.
+> We will be using freely available datasets from the CDS, including
+> observations, historical climate data records, estimates of Essential Climate Variables (ECVs) derived from Earth observations,
+> global and regional climate reanalyses of past observations, seasonal forecasts and climate projections.
+{: .comment}
+
+For the purpose of this tutorial, sample datasets have been created from data downloaded from [C3S](https://climate.copernicus.eu/) through
+[Copernicus Climate Data Store](https://cds.climate.copernicus.eu/#!/home):
+- [E-OBS daily gridded meteorological data for Europe from 1950 to present derived from in-situ observations](https://cds.climate.copernicus.eu/cdsapp#!/dataset/insitu-gridded-observations-europe?tab=overview)
+- [Essential climate variables for assessment of climate variability from 1979 to present](https://cds.climate.copernicus.eu/cdsapp#!/dataset/ecv-for-climate-change?tab=overview)
+
+To reduce the volume of data, the data resolution (in space and/or time) has been significantly reduced and/or data has been selected on sample locations (Paris, Oslo and
+Freiburg). The data format may also have been changed (for instance to tabular) to ease processing.
+
+## Get data
+
+> ### {% icon hands_on %} Hands-on: Data upload
+>
+> 1. Create a new history for this tutorial. If you are not inspired, you can name it *climate101*.
+> {% include snippets/create_new_history.md %}
+> 2. Import the files from [Zenodo]() or from the shared data library
+>
+> ```
+> https://zenodo.org/record/3776500/files/tg_ens_mean_0.1deg_reg_v20.0e_Paris_daily.csv
+> https://zenodo.org/record/3776500/files/ts_cities.csv
+> ```
+>
+> {% include snippets/import_via_link.md %}
+> {% include snippets/import_from_data_library.md %}
+>
+> 3. Check that the datatype is **csv**
+>
+> {% include snippets/change_datatype.md datatype="datatypes" %}
+>
+> 4. Add a tag to the dataset corresponding to `copernicus`
+>
+> {% include snippets/add_tag.md %}
+>
+{: .hands_on}
+
+# What is climate?
+
+According to [wikipedia](https://en.wikipedia.org/wiki/Climate),
+Climate is defined as the average state of everyday's weather condition **over a period of 30 years**. It is measured by assessing
+the patterns of variation in temperature, humidity, atmospheric pressure, wind, precipitation, atmospheric particle count and
+other meteorological variables in a given region over a long period of time (usually 20 or 30 years).
+Climate differs from weather, in that weather only describes the short-term conditions of these variables in a given region.
+
+
+## Climate versus Weather
+
+Quantities that climate scientists are interested in are similar to those used to assess the weather (temperature, precipitation, etc.).
+But there is a big difference between climate and weather: **weather** varies from hour to hour and from day to day whereas **climate**
+is defined as the average of weather over several decades or longer.
+
+The figure below shows a woman walking her dog and we can use it to make an analogy to illustrate the difference between weather and climate.
+if you focus your attention on the dog, you can see that it is all over the place, sometimes upwards, sometimes downwards: this can represent the weather and its
+variability. The dog (weather) is not following a fully random pattern and varies around a main direction (trend) that is given by the woman: the woman is representing
+the climate and gives us an indication of where both the woman and dog are likely to be in the future.
+
+ ![Illustrate the difference between weather and climate](../../images/weather_versus_climate.png "Weather versus Climate")
+
+*Source: [Animated short introduction to statistics in climate research](https://youtu.be/e0vj-0imOLw) from Norwegian infotainment program Siffer. Produced by TeddyTV for NRK. Animation by Ole Christoffer Haga*
+
+
+You can also watch an animated illustration of the difference between climate and weather:
+
+
+
+
+# What is the weather like in Paris?
+
+In order to answer this question, we are going to inspect and visualize the dataset `tg_ens_mean_0.1deg_reg_v20.0e_Paris_daily.csv` using simple galaxy tools.
+
+> ### {% icon hands_on %} Hands-on: Daily temperature time series
+> > ### {% icon comment %} Tip: search for the tool
+> >
+> > Many different tools can be used to answer to the questions. Here we give you some guidelines to help you to choose.
+> > Use the **tools search box** at the top of the tool panel to find **Select lines that match an expression** {% icon tool %} and **Datamash** {% icon tool %}.
+> {: .tip}
+>
+> > ### {% icon question %} Questions
+> >
+> > 1. What was the average temperature in Paris on the 14th of July 2003?
+> > 2. What is the minimum and maximum temperatures in Paris?
+> > 3. On which date did the minimum temperature occured?
+> > 4. On which date did the maximum temperature occured?
+> >
+> > > ### {% icon solution %} Solution
+> > > 1. The average temperature in Paris on the 14th of July 2003 was 26.73 degrees Celcius. It can be found by using **Select lines that match an expression** {% icon tool %} with parameter **"the pattern"** set to 2003-07-14.
+> > > 2. The minimum temperature in Paris is -11.6799995 degrees celcius and the maximum temperature in Paris is 33.579998 degrees celcius. To find out, you can use **Datamash** {% icon tool %} with the following parameters:
+> > > - {% icon param-file %} *"Input tabular dataset"*: `tg_ens_mean_0.1deg_reg_v20.0e_Paris_daily.csv`
+> > > - *"Input file has a header line"*: `Yes`
+> > > - *"Print header line"*: `Yes`
+> > > - "Print all fields from input file": `No`
+> > > - In *"Operation to perform on each group"*:
+> > > - {% icon param-repeat %} *"Insert Operation to perform on each group"*
+> > > - *"Type"*: `minimum`
+> > > - *"On column"*: `c2`
+> > > - {% icon param-repeat %} *"Insert Operation to perform on each group"*
+> > > - *"Type"*: `maximum`
+> > > - *"On column"*: `c2`
+> > >
+> > > 3. The minimum temperature (-11.6799995 degrees celcius) was observed on January 16 1985.
+> > > You can use different Galaxy tools to find out the solution and here we show you how to use **Datamash** {% icon tool %} with the following parameters:
+> > > - {% icon param-file %} *"Input tabular dataset"*: `tg_ens_mean_0.1deg_reg_v20.0e_Paris_daily.csv`
+> > > - *"Input file has a header line"*: `Yes`
+> > > - *"Print header line"*: `Yes`
+> > > - "Print all fields from input file": `Yes`
+> > > - In *"Operation to perform on each group"*:
+> > > - {% icon param-repeat %} *"Insert Operation to perform on each group"*
+> > > - *"Type"*: `minimum`
+> > > - *"On column"*: `c2`
+> > >
+> > > 4. The maximum temperature (33.579998 degrees celcius) was observed on July 25 2019. For the **maximum**, repeat **Datamash** {% icon tool %} with the following parameters:
+> > >
+> > > - {% icon param-file %} *"Input tabular dataset"*: `tg_ens_mean_0.1deg_reg_v20.0e_Paris_daily.csv`
+> > > - *"Input file has a header line"*: `Yes`
+> > > - *"Print header line"*: `Yes`
+> > > - "Print all fields from input file": `Yes`
+> > > - In *"Operation to perform on each group"*:
+> > > - {% icon param-repeat %} *"Insert Operation to perform on each group"*
+> > > - *"Type"*: `maximum`
+> > > - *"On column"*: `c2`
+> > {: .solution}
+> {: .question}
+>
+{: .hands_on}
+
+# What is the climate in Paris?
+
+To get some information about the (past and current) climate in Paris, we will first look at monthly averages.
+
+## Seasonality
+
+> ### {% icon hands_on %} Hands-on: What is the monthly climatological temperature in Paris?
+>
+> To answer to this question, we will compute the global average temperatures over the entire period 1950 and 2019 for each month (January, February, etc.). Indeed,
+> this period of time is sufficiently long for computing monthly climatological temperature (more than 30 years).
+> > ### {% icon question %} Questions
+> >
+> > 1. What is the warmest summer month e.g. between June, July and August (JJA) in Paris?
+> > 2. What is the coolest winter month e.g. between December, January and February (DJF) in Paris?
+> >
+> > > ### {% icon solution %} Solution
+> > > 1. The warmest summer month in Paris is July (19.921018171429 degrees celcius). However, it is interesting to remark that on our dataset we see very little difference in the mean temperature between July and August.
+> > > 2. The coolest winter month in Paris is January (4.4669169722484 degrees celcius).
+> > >
+> > > Below, we show you how we found these results.
+> > > We will first split all the dates (first column) from YYYY-MM-DD (where YYYY is the year, MM the month and DD the day) to three column to get 3 columns: one for the year, one for the month and one for the day. Use **Text reformatting with awk** {% icon tool %} with parameters:
+> > > - **File to process**: `tg_ens_mean_0.1deg_reg_v20.0e_Paris_daily.csv`
+> > > - **AWK Program**: `gsub(/-/,"\t",$1){$1=$1} {print}`
+> > >
+> > > Rename the resulting file to `split_dates_Paris.csv`.
+> > >
+> > > Then use **Datamash** {% icon tool %} with the following parameters:
+> > > - {% icon param-file %} *"Input tabular dataset"*: `split_dates_Paris.csv`
+> > > - *"Group by fields"*: 2
+> > > - *"Input file has a header line"*: `Yes`
+> > > - *"Print header line"*: `No`
+> > > - "Print all fields from input file": `No`
+> > > - In *"Operation to perform on each group"*:
+> > > - {% icon param-repeat %} *"Insert Operation to perform on each group"*
+> > > - *"Type"*: `Mean`
+> > > - *"On column"*: `c4`
+> > >
+> > > Rename the resulting file to `climatology_Paris.csv`.
+> > > Then use again **Datamash** to get the month where the minimum and maximum temperatures are found:
+> > > - {% icon param-file %} *"Input tabular dataset"*: `climatology_Paris.csv`
+> > > - *"Group by fields"*:
+> > > - *"Input file has a header line"*: `Yes`
+> > > - *"Print header line"*: `No`
+> > > - "Print all fields from input file": `Yes`
+> > > - In *"Operation to perform on each group"*:
+> > > - {% icon param-repeat %} *"Insert Operation to perform on each group"*
+> > > - *"Type"*: `minimum`
+> > > - *"On column"*: `c2`
+> > >
+> > > Look at the resulting file and the first field will give you the month (07 e.g. July) where the maximum temperature is found.
+> > >
+> > > For the **maximum**, repeat **Datamash** {% icon tool %} with the following parameters:
+> > > - {% icon param-file %} *"Input tabular dataset"*: `climatology_Paris.csv`
+> > > - *"Group by fields"*: ``
+> > > - *"Input file has a header line"*: `Yes`
+> > > - *"Print header line"*: `No`
+> > > - "Print all fields from input file": `Yes`
+> > > - In *"Operation to perform on each group"*:
+> > > - {% icon param-repeat %} *"Insert Operation to perform on each group"*
+> > > - *"Type"*: `maximum`
+> > > - *"On column"*: `c2`
+> > >
+> > > The result is in the first column of the resulting file which indicates `01` e.g. January.
+> > >
+> > > Please note that you may use other Galaxy tools to reach the same results.
+> > > Results can be slightly different when using different source of climate information. However, you will always observe the same pattern e.g. cool month in winter and warm month on summer. We can also clearly see that Paris has a mild climate with on average no extreme temperatures.
+> > {: .solution}
+> {: .question}
+>
+{: .hands_on}
+
+> ### {% icon tip %} Tip: Using existing climatologies
+>
+> In this tutorial, we compute manually the monthly climatological temperatures to explain you the algorithm used behing.
+> However, many data providers have pre-computed climatologies and can be directly downloaded. For instance, on the [CDS](https://cds.climate.copernicus.eu/cdsapp#!/search?type=dataset), climatologies are provided for [Essential climate variables for assessment of climate variability from 1979 to present](https://cds.climate.copernicus.eu/cdsapp#!/dataset/ecv-for-climate-change?tab=overview).
+{: .tip}
+
+
+## Yearly average
+
+> ### {% icon hands_on %} Hands-on: What is the trend (cooling/warming) in the climate for Paris between 1950 and 2019?
+>
+> To answer to this question, we will compute yearly mean of the temperature in Paris and visualize it.
+>
+> 1. Use **Datamash** {% icon tool %} with the following parameters:
+> - {% icon param-file %} *"Input tabular dataset"*: `split_dates_Paris.csv`
+> - *"Group by fields"*: 1
+> - *"Input file has a header line"*: `Yes`
+> - *"Print header line"*: `No`
+> - "Print all fields from input file": `No`
+> - *"Sort input"*: `Yes`
+> - In *"Operation to perform on each group"*:
+> - {% icon param-repeat %} *"Insert Operation to perform on each group"*
+> - *"Type"*: `Mean`
+> - *"On column"*: `c4`
+>
+> 2. Rename the resulting file to `yearly_mean_Paris.csv`.
+>
+> 3. To make a plot, you can use **Scatterplot w ggplot2** {% icon tool %} with the following parameters:
+> - *"Input in tabular format"*: `yearly_mean_Paris.csv`
+> - *"Column to plot on x-axis"*: 1
+> - *"Column to plot on y-axis"*: 2
+> - *"Plot title"*: Yearly mean temperature in Paris from 1950 to 2019
+> - *"Label for x axis"*: Year (YYYY)
+> - *"Label for y axis"*: Temperature (degrees celcius)
+> - And finally in `Advanced Options` change `Type of plot` to **Points and Lines**.
+>
+> 4. **View** {% icon galaxy-eye%} the resulting plot:
+>
+> ![Yearly mean temperature in Paris](../../images/yearly_mean_temperature_Paris.png)
+>
+> > ### {% icon question %} Questions
+> >
+> > Can we easily observe a trend?
+> >
+> > > ### {% icon solution %} Solution
+> > >
+> > > The plot clearly shows a slight increase in the yearly mean temperature between 1950 and 2019. Even though it looks no more than a few degrees celcius, it is
+> > > quite significant.
+> > >
+> > {: .solution}
+> {: .question}
+>
+{: .hands_on}
+
+## Anomalies
+
+In climate change studies, temperature **anomalies** are more important than **absolute** temperature. A temperature anomaly is the difference from an average, or baseline,
+temperature. The baseline temperature is typically computed by averaging 30 or more years of temperature data. A *positive anomaly* indicates the observed temperature was *warmer* than the baseline, while a *negative anomaly* indicates the observed temperature was *cooler* than the baseline.
+
+> ### {% icon hands_on %} Hands-on: Climate stripes for Paris
+> Computing temperature anomalies is out of scope of this tutorial and we will therefore use pre-computed temperature anomalies `ts_cities.csv`.
+> A simple way to visualize anomalies and highlight cooling/warming over the years, is to use **climate stripes from timeseries** {% icon tool %} with the following parameters:
+> - *"timeseries to plot"*: `ts_cities.csv`
+> - *"column name to use for plotting"*: `tg_anomalies_paris`
+> - *"plot title"*: `Climate stripes for Paris (1950-2019)`
+>
+> **View** {% icon galaxy-eye%} the resulting plot:
+>
+> ![Climate stripes in Paris](../../images/climate_stripes_temperature_Paris.png)
+>
+> > ### {% icon question %} Question: do you observe a warming or cooling between 1950 and 2019?
+> >
+> > > ### {% icon solution %} Solution
+> > >
+> > > The climate stripes clearly show a warming between 1950 and 2019.
+> > {: .solution}
+> {: .question}
+>
+{: .hands_on}
+
+> ### {% icon tip %} Tip: Copernicus Climate Bulletin
+>
+> [Copernicus Climate Bulletins](https://climate.copernicus.eu/climate-bulletins) presents the current condition of the climate using key climate change indicators.
+> They also provide data, analysis of the maps and guidance on how they are produced. Datasets for temperature anomalies can be found and are
+> regularly updated (with recent dates). For instance, in March 2020, the corresponding dataset can be found [here](https://climate.copernicus.eu/sites/default/files/2020-04/ts_1month_anomaly_Global_ea_2t_202003_v01.csv).
+>
+{: .tip}
+
+# Climate variables
+
+*Temperature* is often the first variable that comes to mind when we talk about climate. However, it is insufficient to fully characterize the climate, and scientists have agreed on a number of variables to systematically observe Earth`s changing climate.
+
+That is what we call *Essential Climate Variables*.
+
+## Essential Climate Variables
+
+The [Global Climate Observing System](https://gcos.wmo.int/) (GCOS) and its GCOS expert panels maintain definitions of [Essential Climate Variables](https://gcos.wmo.int/en/essential-climate-variables) (ECVs).
+
+GCOS is co-sponsored by the [World Meteorological Organization](https://public.wmo.int/en) (WMO), the [Intergovernmental Oceanographic Commission of the United Nations Educational, Scientific and Cultural Organization](http://www.ioc-unesco.org/) (IOC-UNESCO), the [United Nations Environment Programme](https://www.unenvironment.org/) (UN Environment), and the [International Science Council](https://council.science/) (ISC). It regularly assesses the status of global climate observations of the atmosphere, land and ocean and produces guidance for its improvement.
+
+At the moment, there are [54 ECVs](https://gcos.wmo.int/en/essential-climate-variables).
+
+*Source: [https://gcos.wmo.int/en/essential-climate-variables](https://gcos.wmo.int/en/essential-climate-variables)*
+
+> ### {% icon hands_on %} Hands-on: Essential Climate Variables
+>
+> We will look at the [Water Vapor Essential Climate Variable ](https://gcos.wmo.int/en/essential-climate-variables/surface-vapour/):
+> *The humidity of air near the surface of the Earth affects the comfort and health of humans, livestock and wildlife, the swarming behaviour of insects and the occurrence of plant disease. The humidity of air near the surface affects evaporation and the strength of the hydrological and energy cycles. Evaporation from the surface of the earth is the source of water in the atmosphere and so is responsible for important feedbacks in the climate system due to clouds and radiation.*
+>
+> 1. **Copernicus Essential Climate Variables** {% icon tool %} with the following parameters:
+> - *"Variable(s)"*: surface_air_relative_humidity
+> - *"Select type of data"*: Monthly mean
+> - *"Select year(s)"*: `1980` and `2018`
+> - *"Select month"*: `July`
+> Rename the resulting file to `rh_mean_july_1980_2018.nc`
+> 2. **map plot gridded (lat/lon) netCDF data** {% icon tool %} with the following parameters:
+> - *"input with geographical coordinates (netCDF format)"*: `rh_mean_july_1980_2018.nc`
+> - *"variable name as given in the netCDF file": `tp`
+> - And finally in `Advanced Options` change:
+> - *"multiple times"*: `Yes`
+> - *"comma separated list of indexes for fields to plot"*: 0,1
+> - *"number of rows for subplot grid"*: 2
+> - *"subplot title (repeated on each subplot)"*: relative humidity in percentage
+> - *"colormap"*: PiYG
+> 3. **View** {% icon galaxy-eye%} the resulting plot:
+>
+> ![Relative humidity in July 1980 and July 2018](../../images/rh_ecv_july_1980_2018.png)
+>
+>
+> > ### {% icon question %} Relative humidity
+> >
+> > 1. Do you observe any significant changes relative humidity in France from 1979 to 2018?
+> > 2. Do we have sufficient information to make any conclusions on the change in climate?
+> >
+> > > ### {% icon solution %} Solution
+> > > 1. We can see significant changes on the plot over France. The relative humidity of air near the surface of the Earth is lower in July 2018 than in July 1980.
+> > > 2. We do not have sufficient information to draw any conclusions about the change in climate. In our analysis, we only used two different months (July 1980 and July 2018) and can only discuss the average changes in weather during these two periods (July 1980 and July 2018). We learnt that to draw any conclusions on the climate, we would need to make statistics over a long period of time e.g. we would need to download about 30 years of data and for instance compute anomalies in relative humidity to check if there is any trend. These aspects will be discussed further in other Galaxy tutorials.
+> > >
+> > {: .solution}
+> {: .question}
+>
+{: .hands_on}
+
+
+## Past, present and future climate?
+
+
+When we talk about climate data, the type of data can vary significantly. We have very little actual observations at the scale of climate and usually not covering a large area. In addition to observations, we can make use of:
+- Re-analyses where observations and numerical modelling are combined together.
+- Climate models.
+
+Observations and re-analyses provide information about the past and current climate while climate models can provide past, current and future climate information.
+When it comes to future climate, we usually need to make some assumptions (such as how much CO2 emissions, etc.) and simulate different scenarios e.g. we run climate models using different assumptions and look at future trends under each of these scenarios: this is what we call **climate projections**. Climate projections will be discussed in a separate Galaxy tutorial.
+
+
+# Conclusion
+
+{:.no_toc}
+
+We have learnt to differentiate climate from weather and got an overview of the terminology used by climate scientists to identify the
+various source of climate data.
diff --git a/topics/climate/tutorials/panoply/tutorial.md b/topics/climate/tutorials/panoply/tutorial.md
index 4712c1aae25dd3..97a51f9989fccd 100644
--- a/topics/climate/tutorials/panoply/tutorial.md
+++ b/topics/climate/tutorials/panoply/tutorial.md
@@ -1,6 +1,5 @@
---
layout: tutorial_hands_on
-enable: false
title: Visualize Climate data with Panoply netCDF viewer
zenodo_link: 'https://doi.org/10.5281/zenodo.3695482'
questions:
@@ -84,9 +83,17 @@ In this tutorial, we will be using data from the [Copernicus Climate Data Store]
>
> 3. Check that the datatype is **netcdf**
>
+> Files you uploaded are in netcdf format. In Galaxy, Datatypes are, by default, automatically guessed. Here, as necdf is a derivative of the h5 format, Galaxy automatically affect the h5 datatype to netcdf files. To cope with that, one can change the datatype manually, once datasets uploaded (as shown below) OR you can directly specify datatype on the upload tool form so Galaxy will not try to automatically guess it.
+>
> {% include snippets/change_datatype.md datatype="datatypes" %}
>
-> 4. Add a tag to the dataset corresponding to `copernicus`
+> 4. Rename Datasets
+>
+> As "https://zenodo.org/record/3697454/files/ecv_1979.nc" is not a beautiful name and can give errors for some tools, it is a good practice to change the dataset name by something more meaningfull. For example by removing `https://zenodo.org/record/3697454/files/` to obtain `ecv_1979.nc` and `ecv_2018.nc`, respectively.
+>
+> {% include snippets/rename_dataset.md %}
+>
+> 5. Add a tag to the dataset corresponding to `copernicus`
>
> {% include snippets/add_tag.md %}
>
@@ -101,7 +108,7 @@ In this tutorial, we will be using data from the [Copernicus Climate Data Store]
> Panoply is available as a Galaxy interactive environment and may not be available on all Galaxy servers.
>
> > ### {% icon tip %} Tip: Launch Panoply in Galaxy
-> > Currently Panoply in Galaxy is available on [Live.useGalaxy.eu](https://live.usegalaxy.eu)
+> > Currently Panoply in Galaxy is available on useGalaxy.eu instance, on the "Interactive tools" tool panel section or, as all interactive tools, from the dedicated usGalaxy.eu subdomain: [Live.useGalaxy.eu](https://live.usegalaxy.eu)
> >
> > 1. Open the Panoply tool {% icon tool %} by clicking [here](https://live.usegalaxy.eu/?tool_id=interactive_tool_panoply){:target="_blank"}
> > 2. Check **ecv_1979.nc** dataset selected in the netcdf input field
@@ -118,11 +125,13 @@ In this tutorial, we will be using data from the [Copernicus Climate Data Store]
> ### {% icon hands_on %} Hands-on: Inspect dataset
>
-> 1. Inspect the sea ice area fraction (**siconc**)
+> 1. Inspect dataset content
+>
+> Here you can look at the dataset (ecv_1979.nc) and related variables (`latitude`, `longitude`, `number`, `siconc`, etc)
>
> > ### {% icon question %} Question
> >
-> > what is the unit of this variable?
+> > what is the unit of the `sea ice area fraction` variable (**siconc**) variable?
> >
> > > ### {% icon solution %} Solution
> > >
diff --git a/topics/computational-chemistry/images/empot.png b/topics/computational-chemistry/images/empot.png
new file mode 100644
index 00000000000000..88ddd05bc48091
Binary files /dev/null and b/topics/computational-chemistry/images/empot.png differ
diff --git a/topics/computational-chemistry/images/hsp90lig.png b/topics/computational-chemistry/images/hsp90lig.png
new file mode 100644
index 00000000000000..66786f2060de84
Binary files /dev/null and b/topics/computational-chemistry/images/hsp90lig.png differ
diff --git a/topics/computational-chemistry/images/htmd_analysis_pc1_hsp90.gif b/topics/computational-chemistry/images/htmd_analysis_pc1_hsp90.gif
new file mode 100644
index 00000000000000..7821e9e390a229
Binary files /dev/null and b/topics/computational-chemistry/images/htmd_analysis_pc1_hsp90.gif differ
diff --git a/topics/computational-chemistry/images/htmd_analysis_pca.png b/topics/computational-chemistry/images/htmd_analysis_pca.png
new file mode 100644
index 00000000000000..604ff65df502fb
Binary files /dev/null and b/topics/computational-chemistry/images/htmd_analysis_pca.png differ
diff --git a/topics/computational-chemistry/images/htmd_analysis_rmsd1_histo.png b/topics/computational-chemistry/images/htmd_analysis_rmsd1_histo.png
new file mode 100644
index 00000000000000..02470a68acc13d
Binary files /dev/null and b/topics/computational-chemistry/images/htmd_analysis_rmsd1_histo.png differ
diff --git a/topics/computational-chemistry/images/htmd_analysis_rmsd1_series.png b/topics/computational-chemistry/images/htmd_analysis_rmsd1_series.png
new file mode 100644
index 00000000000000..9003b5d66fcca4
Binary files /dev/null and b/topics/computational-chemistry/images/htmd_analysis_rmsd1_series.png differ
diff --git a/topics/computational-chemistry/images/htmd_analysis_rmsd2_histo.png b/topics/computational-chemistry/images/htmd_analysis_rmsd2_histo.png
new file mode 100644
index 00000000000000..9f6e681ddb3e2a
Binary files /dev/null and b/topics/computational-chemistry/images/htmd_analysis_rmsd2_histo.png differ
diff --git a/topics/computational-chemistry/images/htmd_analysis_rmsd2_series.png b/topics/computational-chemistry/images/htmd_analysis_rmsd2_series.png
new file mode 100644
index 00000000000000..92f89e928795d2
Binary files /dev/null and b/topics/computational-chemistry/images/htmd_analysis_rmsd2_series.png differ
diff --git a/topics/computational-chemistry/images/htmd_analysis_rmsf.png b/topics/computational-chemistry/images/htmd_analysis_rmsf.png
new file mode 100644
index 00000000000000..0a8b9ca7103f65
Binary files /dev/null and b/topics/computational-chemistry/images/htmd_analysis_rmsf.png differ
diff --git a/topics/computational-chemistry/images/mpro.png b/topics/computational-chemistry/images/mpro.png
new file mode 100644
index 00000000000000..509cb6ce180402
Binary files /dev/null and b/topics/computational-chemistry/images/mpro.png differ
diff --git a/topics/computational-chemistry/tutorials/cheminformatics/tutorial.md b/topics/computational-chemistry/tutorials/cheminformatics/tutorial.md
index 3a77807aa006db..e16421c09656b6 100644
--- a/topics/computational-chemistry/tutorials/cheminformatics/tutorial.md
+++ b/topics/computational-chemistry/tutorials/cheminformatics/tutorial.md
@@ -163,6 +163,13 @@ We will generate our compound library by searching ChEMBL for compounds which ha
There are some other tools available, which will not be used in this tutorial, which help to develop a more focused compound library. For example, the 'Natural product likeness calculator' and 'Drug-likeness' tools assign a score to compounds based on how similar they are to typical natural products and drugs respectively, which could then be used to filter the library. If you are interested, you can try testing them out on the library just generated.
+> ### {% icon tip %} Tip: Generating a compound library
+> If you try using this tutorial using your own data, you might encounter some issues. Important things to remember:
+> * If you encounter an error, check the SMILES file only has a single column. Additional columns can be removed using the 'Cut' tool.
+> * If the output file is empty, it may be that the ChEMBL database doesn't have any compounds similar to the input. Consider lowering the Tanimoto coefficient to 70 if this is the case and removing filters (including the Lipinski RO5 filter). If this doesn't help, you will have to use another source of chemical data (e.g. PubChem).
+> * Finally, please remember this step is totally optional if you already have a list of compounds for docking (in SMILES or another format). In this case you can upload them to Galaxy and continue with the next step.
+{: .tip}
+
> ### {% icon details %} What are SMILES and SDF formats?
>
> SMILES and SD-files both represent chemical structures. A SMILES file represents the 2D structure of a molecule as a chemical graph. In other words, it states only the atoms and the connectivity between them. An example of a SMILES string (taken from the ligand in the PDB file) is `c1c2OCCOc2ccc1c1c(C)[nH]nc1c1cc(CC)c(O)cc1O`. For more information on how the notation works, please consult the [OpenSMILES specification](http://opensmiles.org/opensmiles.html) or the description provided by [Wikipedia](https://en.wikipedia.org/wiki/Simplified_molecular-input_line-entry_system). A more comprehensive alternative to the SMILES system is the International Chemical Identifier (InChI).
diff --git a/topics/computational-chemistry/tutorials/cheminformatics/_workflows/_main_workflow._ga b/topics/computational-chemistry/tutorials/cheminformatics/workflows/main_workflow.ga
similarity index 100%
rename from topics/computational-chemistry/tutorials/cheminformatics/_workflows/_main_workflow._ga
rename to topics/computational-chemistry/tutorials/cheminformatics/workflows/main_workflow.ga
diff --git a/topics/computational-chemistry/tutorials/covid19-docking/tutorial.bib b/topics/computational-chemistry/tutorials/covid19-docking/tutorial.bib
new file mode 100644
index 00000000000000..aeeaacd61ee518
--- /dev/null
+++ b/topics/computational-chemistry/tutorials/covid19-docking/tutorial.bib
@@ -0,0 +1,103 @@
+
+# This is the bibliography file for your tutorial.
+#
+# To add bibliography (bibtex) entries here, follow these steps:
+# 1) Find the DOI for the article you want to cite
+# 2) Go to https://doi2bib.org and fill in the DOI
+# 3) Copy the resulting bibtex entry into this file
+#
+# To cite the example below, in your tutorial.md file
+# use {% Batut2018 %}
+
+@article{Batut2018,
+ doi = {10.1016/j.cels.2018.05.012},
+ url = {https://doi.org/10.1016/j.cels.2018.05.012},
+ year = {2018},
+ month = jun,
+ publisher = {Elsevier {BV}},
+ volume = {6},
+ number = {6},
+ pages = {752--758.e1},
+ author = {B{\'{e}}r{\'{e}}nice Batut and Saskia Hiltemann and Andrea Bagnacani and Dannon Baker and Vivek Bhardwaj and
+ Clemens Blank and Anthony Bretaudeau and Loraine Brillet-Gu{\'{e}}guen and Martin {\v{C}}ech and John Chilton
+ and Dave Clements and Olivia Doppelt-Azeroual and Anika Erxleben and Mallory Ann Freeberg and Simon Gladman and
+ Youri Hoogstrate and Hans-Rudolf Hotz and Torsten Houwaart and Pratik Jagtap and Delphine Larivi{\`{e}}re and
+ Gildas Le Corguill{\'{e}} and Thomas Manke and Fabien Mareuil and Fidel Ram{\'{i}}rez and Devon Ryan and
+ Florian Christoph Sigloch and Nicola Soranzo and Joachim Wolff and Pavankumar Videm and Markus Wolfien and
+ Aisanjiang Wubuli and Dilmurat Yusuf and James Taylor and Rolf Backofen and Anton Nekrutenko and Bj\"{o}rn Gr\"{u}ning},
+ title = {Community-Driven Data Analysis Training for Biology},
+ journal = {Cell Systems}
+}
+
+@article{OBoyle2011,
+ doi = {10.1186/1758-2946-3-33},
+ url = {https://doi.org/10.1186/1758-2946-3-33},
+ year = {2011},
+ month = oct,
+ publisher = {Springer Science and Business Media {LLC}},
+ volume = {3},
+ number = {1},
+ author = {Noel M O{\textquotesingle}Boyle and Michael Banck and Craig A James and Chris Morley and Tim Vandermeersch and Geoffrey R Hutchison},
+ title = {Open Babel: An open chemical toolbox},
+ journal = {Journal of Cheminformatics}
+}
+
+@article{rdock,
+ author = {Ruiz-Carmona, Sergio AND Alvarez-Garcia, Daniel AND Foloppe, Nicolas AND Garmendia-Doval, A. Beatriz AND Juhos, Szilveszter AND Schmidtke, Peter AND Barril, Xavier AND Hubbard, Roderick E. AND Morley, S. David},
+ journal = {PLOS Computational Biology},
+ publisher = {Public Library of Science},
+ title = {rDock: A Fast, Versatile and Open Source Program for Docking Ligands to Proteins and Nucleic Acids},
+ year = {2014},
+ month = {04},
+ volume = {10},
+ url = {https://doi.org/10.1371/journal.pcbi.1003571},
+ pages = {1-7},
+ number = {4},
+ doi = {10.1371/journal.pcbi.1003571}
+}
+
+@article{transfs,
+ author = {Scantlebury, Jack et al.},
+ publisher = {Biorxiv},
+ title = {Dataset Augmentation Allows Deep Learning-Based Virtual Screening To Better Generalise To Unseen Target Classes, And Highlight Important Binding Interactions},
+ year = {2019},
+ url = {https://www.biorxiv.org/content/10.1101/2020.03.06.979625v1},
+}
+
+@article{sucos,
+ author = {Leong, Susan et al.},
+ publisher = {Chemrxiv},
+ title = {SuCOS is Better than RMSD for Evaluating Fragment Elaboration and Docking Poses},
+ year = {2019},
+ url = {https://chemrxiv.org/articles/SuCOS_is_Better_than_RMSD_for_Evaluating_Fragment_Elaboration_and_Docking_Poses/8100203/1},
+ doi = {10.26434/chemrxiv.8100203.v1}
+}
+
+@article{Ropp2019,
+ doi = {10.1186/s13321-019-0336-9},
+ url = {https://doi.org/10.1186/s13321-019-0336-9},
+ year = {2019},
+ month = feb,
+ publisher = {Springer Science and Business Media {LLC}},
+ volume = {11},
+ number = {1},
+ author = {Patrick J. Ropp and Jesse C. Kaminsky and Sara Yablonski and Jacob D. Durrant},
+ title = {Dimorphite-{DL}: an open-source program for enumerating the ionization states of drug-like small molecules},
+ journal = {Journal of Cheminformatics}
+}
+
+@article{ngl,
+ author = {Rose, Alexander S and Bradley, Anthony R and Valasatava, Yana and Duarte, Jose M and Prlić, Andreas and Rose, Peter W},
+ title = "{NGL viewer: web-based molecular graphics for large complexes}",
+ journal = {Bioinformatics},
+ volume = {34},
+ number = {21},
+ pages = {3755-3758},
+ year = {2018},
+ month = {05},
+ abstract = "{The interactive visualization of very large macromolecular complexes on the web is becoming a challenging problem as experimental techniques advance at an unprecedented rate and deliver structures of increasing size.We have tackled this problem by developing highly memory-efficient and scalable extensions for the NGL WebGL-based molecular viewer and by using Macromolecular Transmission Format (MMTF), a binary and compressed MMTF. These enable NGL to download and render molecular complexes with millions of atoms interactively on desktop computers and smartphones alike, making it a tool of choice for web-based molecular visualization in research and education.The source code is freely available under the MIT license at github.com/arose/ngl and distributed on NPM (npmjs.com/package/ngl). MMTF-JavaScript encoders and decoders are available at github.com/rcsb/mmtf-javascript.}",
+ issn = {1367-4803},
+ doi = {10.1093/bioinformatics/bty419},
+ url = {https://doi.org/10.1093/bioinformatics/bty419},
+ eprint = {https://academic.oup.com/bioinformatics/article-pdf/34/21/3755/26147052/bty419.pdf},
+}
\ No newline at end of file
diff --git a/topics/computational-chemistry/tutorials/covid19-docking/tutorial.md b/topics/computational-chemistry/tutorials/covid19-docking/tutorial.md
new file mode 100644
index 00000000000000..d1b46abdfacbb7
--- /dev/null
+++ b/topics/computational-chemistry/tutorials/covid19-docking/tutorial.md
@@ -0,0 +1,347 @@
+---
+layout: tutorial_hands_on
+
+title: Virtual screening of the SARS-CoV-2 main protease with rDock and pose scoring
+level: Intermediate
+zenodo_link: 'https://zenodo.org/record/3730474'
+questions:
+- How can candidate ligands be generated and docked to a protein in Galaxy?
+- How can the poses of the docked ligands be evaluated?
+- How can a workflow for drug virtual screening be constructed in Galaxy?
+objectives:
+- Understand how Galaxy was used to perform docking and pose scoring on the SARS-CoV-2 main protease (MPro).
+- Replicate the study on a (very) small scale
+- Gain familiarity with the docking and scoring techniques involved.
+time_estimation: 2H # Just 1 week (if you have 5000 CPUs) ;)
+key_points:
+- Galaxy can support large, rapid studies in computational chemistry
+- Protein-ligand docking contributes to the discovery of new drugs
+requirements:
+ -
+ type: "internal"
+ topic_name: computational-chemistry
+ tutorials:
+ - cheminformatics
+tags:
+- covid19
+contributors:
+- simonbray
+
+---
+
+# Introduction
+{:.no_toc}
+
+This tutorial provides a companion to the work performed in March 2020 by InformaticsMatters, the Diamond Light Source, and the European Galaxy Team to perform virtual screening on candidate ligands for the SARS-CoV-2 main protease (MPro). This work is described [here](https://covid19.galaxyproject.org/cheminformatics).
+
+In this tutorial, you will perform protein-ligand docking to MPro using rDock ({% cite rdock %}) and score the results using two different methods. The same tools will be used as in the original study, but with a smaller dataset.
+
+> ### Agenda
+>
+> In this tutorial, we will cover:
+>
+> 1. TOC
+> {:toc}
+>
+{: .agenda}
+
+# Background
+
+Early in March 2020, the Diamond Light Source completed a successful fragment screen on MPro, which provided 55 fragment hits (see their [press release](https://www.diamond.ac.uk/covid-19/for-scientists/Main-protease-structure-and-XChem.html) here). In an effort to identify candidate molecules for binding, InformaticsMatters, the XChem group and the European Galaxy team joined forces to construct and execute a Galaxy workflow for performing and evaluating molecular docking on a massive scale.
+
+An initial list of 41,000 candidate molecules was assembled by using the Fragalysis fragment network to elaborate from the initial fragment hits, as described [here](https://diamondlightsource.atlassian.net/wiki/spaces/FRAG/pages/8323192/The+Astex+Fragment+network). These were used as inputs for the docking and scoring workflow. The workflow consists of the following steps, each of which was carried out using tools installed on the European Galaxy server:
+1. Charge enumeration of the 41,000 candidate molecules selected based on the fragment hits.
+2. Generation of 3D conformations based on SMILES strings of the candidate molecules.
+3. Docking of molecules into each of the MPro structures using rDock.
+4. Evaluation of the docking poses using a TransFS, a deep learning approach ({% cite transfs %}) developed by the XChem group and collaborators, and SuCOS scoring ({% cite sucos %}), which compares the poses with the structures of the original fragment hits.
+
+The original study required almost 20 years of CPU time, not counting GPU resources consumed. This is obviously not reproducible as a tutorial. Therefore, we will repeat the workflow with a small library of just 100 molecules, on a single MPro fragment structure. Links will be provided to original Galaxy histories, with notes to explain where and why things were done differently to the tutorial.
+
+![MPro structure, with a fragment bound]({% link topics/computational-chemistry/images/mpro.png %} "Structure of MPro, with a fragment bound. Click to view in NGL. ({% cite ngl %})")
+
+# Get data
+
+We require three datasets for the simulation and analysis:
+1. A list of 100 ligand candidates. These are the molecules which will be docking into the protein binding site.
+2. A PDB file of the receptor MPro protein (without ligand or solvent).
+3. A list of fragment hits (17 in total) in SDF format.
+
+> ### {% icon details %} Differences with the original study
+>
+> Of the initial 55 fragment hits, 17 were chosen for further study. From these, 41,587 compounds were generated using the Fragalysis fragment network for further study. The 100 compounds used in the tutorial are taken from this list.
+>
+>
+> Starting data is available from this Galaxy history: [https://usegalaxy.eu/u/sbray/h/mpro-raw-data](https://usegalaxy.eu/u/sbray/h/mpro-raw-data).
+>
+>
+> This history contains 103 files. One of these (`Initial candidates for docking`) contains the 41k candidate compounds in SMILES format. The remaining 102 files (all with names beginning with `Mpro-x...`) provide structural information on the fragment hits - 6 files per hit (hence 17 x 6 = 102).
+>
+>
+> The identity of the files is as follows:
+>
+> - the `*_0.mol` files contain the fragment structure in mol format.
+> - the `*_0.pdb` files contain the fragment structure in pdb format.
+> - the `*_0_apo.pdb` files contain the protein with solvent, but without ligand
+> - the `*_0_apo-desolv.pdb` files contain the protein without either solvent or ligand
+> - the `*_0_apo-solv.pdb` files contain only solvent
+> - the `*_0_bound.pdb` file contain everything (protein, ligand and solvent)
+>
+> The PDB file of the receptor that we are using is `Mpro-x0195_0_apo-desolv.pdb`. In other words, the structure is derived from just one fragment hit. In the original study, however, all compounds were docked against all of the fragment hit structures.
+{: .details}
+
+> ### {% icon hands_on %} Hands-on: Data upload
+>
+> 1. Create a new history for this tutorial
+> 2. Import the files from [Zenodo](https://zenodo.org/record/3730474):
+>
+> ```
+> https://zenodo.org/record/3730474/files/candidates.smi
+> https://zenodo.org/record/3730474/files/Mpro-x0195_0_apo-desolv.pdb
+> https://zenodo.org/record/3730474/files/hits.sdf
+> ```
+>
+> {% include snippets/import_via_link.md %}
+>
+> 3. Rename the datasets `Candidates SMILES`, `Receptor PDB` and `Hits SDF` respectively.
+> 4. Check that the datatypes (`smi`, `pdb`, and `sdf` respectively) are correct. In particularly, check the `Candidates SMILES` file, as the SMILES datatype is not detected automatically by Galaxy.
+>
+> {% include snippets/change_datatype.md datatype="datatypes" %}
+{: .hands_on}
+
+# Preparation for docking
+
+Before docking, the candidate ligands need to be prepared for docking with the following steps: 1) charge enumeration, 2) generation of three-dimensional structures, and 3) splitting the SD-file into a collection.
+
+> ### {% icon details %} Differences with the original study
+>
+> This stage is carried out as described here, except of course with the full set of 42,000 compounds. See [here](https://covid19.galaxyproject.org/cheminformatics/1-DockingPrep/) for more details.
+{: .details}
+
+## Charge enumeration
+
+Many of the compounds may contain functional groups which can exist in multiple charge states, and this will affect the quality of binding to the receptor dramatically. Therefore, we perform 'charge enumeration', which means that we generate all charge forms of the compounds within a certain pH range.
+
+> ### {% icon hands_on %} Hands-on: Charge enumeration
+>
+> 1. **Enumerate changes** {% icon tool %} with the following parameters:
+> - *"Input molecule data"*: `Candidate SMILES`
+> - *"Minimum pH"*: `4.4`
+> - *"Maximum pH"*: `10.4`
+> 2. Rename the output file `Enumerated candidates SMILES`.
+>
+> > ### {% icon comment %} Comment
+> >
+> > The **Enumerate charges** {% icon tool %} tool is based on the Dimorphite-DL program. ({% cite Ropp2019 %})
+> {: .comment}
+>
+{: .hands_on}
+
+The output is another SMILES file, with several hundred entries.
+
+## Generate three-dimensional conformations
+
+So far our list of enumerated candidate compounds is still in SMILES format; we need to produce three-dimensional structures in SDF format for docking. This can be done with the **Compound conversion** {% icon tool %} tool.
+
+If you are not familiar with SMILES and SDF formats, consult the introductory [protein-ligand docking tutorial](../cheminformatics/tutorial.html) for more details.
+
+> ### {% icon hands_on %} Hands-on: Convert to SDF format
+>
+> 1. **Compound conversion** {% icon tool %} with the following parameters:
+> - *"Molecular input file"*: `Enumerated candidates` dataset.
+> - *"Output format"*: `MDL MOL format (sdf, mol)`
+> - *"Generate 3D coordinates"*: `Yes`
+> 2. Rename the output file `Enumerated candidates SDF`.
+>
+> > ### {% icon comment %} Comment
+> >
+> > The **Compound conversion** {% icon tool %} tool is based on the OpenBabel toolkit. ({% cite OBoyle2011 %})
+> {: .comment}
+>
+{: .hands_on}
+
+## Splitting the SD-file into a collection
+
+The next stage is to split the SD-file with the candidate ligands into a set of smaller SD-files.
+
+
+> ### {% icon question %} Questions
+>
+> Why is splitting the file necessary?
+>
+> > ### {% icon solution %} Solution
+> >
+> > The rDock tool performs one docking at a time (more technically: the task is not parallelized, as it uses only a single CPU). Therefore, splitting the large SD-file into many small files allows the work to be carried out by multiple Galaxy jobs in parallel, so it completes faster.
+> >
+> > In the original study, this kind of parallelization was absolutely essential because of the enormous dataset; at some points, there were 5,000 docking jobs running concurrently on the European Galaxy server. Even on the much smaller scale of this tutorial, we can speed things up considerably using this trick.
+> >
+> {: .solution}
+{: .question}
+
+> ### {% icon hands_on %} Hands-on: Split the SD-files
+>
+> 1. **Split file to dataset collection** {% icon tool %} with the following parameters:
+> - *"Select the file type to split"*: `SD-files`
+> - *"SD-file to split"*: `Enumerated candidates SDF`
+> - *"Specify number of output files or number of records per file?"*: `Number of output files`
+> - *"Number of new files"*: `10`
+> - *"Method to allocate records to new files"*: `Alternate output files`
+>
+>
+{: .hands_on}
+
+# Active site preparation
+
+The active site also needs to be prepared for docking, using the following steps: 1) conversion to MOL2 format, and 2) generation of the active site using the **rbcavity** {% icon tool %} tool.
+
+> ### {% icon details %} Differences with the original study
+>
+> This stage was carried out as described here. However, it was repeated for each of the fragment hit structures, not just the `Mpro-x0195_0_apo-desolv.pdb` file used here. See [here](https://covid19.galaxyproject.org/cheminformatics/2-ActiveSitePrep/) for more details.
+{: .details}
+
+## Convert protein structure to MOL2 format
+
+The receptor file we are using is in PDB format, but the rDock tool we use for docking requires an input in MOL2 format. Therefore, we first convert the file.
+
+> ### {% icon hands_on %} Hands-on: Conversion to MOL2 format
+>
+> 1. **Compound conversion** {% icon tool %} with the following parameters:
+> - *"Molecular input file"*: `Receptor PDB` dataset.
+> - *"Output format"*: `Sybyl Mol2 format (mol2)`
+> 2. Rename the output file `Receptor MOL2`.
+>
+>
+{: .hands_on}
+
+## Generate Frankenstein ligand
+
+For docking with rDock, a file needs to be created defining the active site. This requires two input files - one for the protein and one for the ligand. We want an active site generation that takes into account the features of all 17 fragments, and therefore need to generate a 'Frankenstein ligand' which possesses the properties of all the fragments. A very simple Galaxy tool is available for this.
+
+> ### {% icon question %} Questions
+>
+> What is a 'Frankenstein ligand' and why do we need it?
+>
+> > ### {% icon solution %} Solution
+> >
+> > The Frankenstein ligand combines the atoms of all the fragments and therefore occupies the entire space of the binding site. Therefore, it is the best choice for cavity definition. See the [information provided by InformaticsMatters](https://www.informaticsmatters.com/blog/2018/11/23/cavities-and-frankenstein-molecules.html) for more details.
+> >
+> {: .solution}
+>
+{: .question}
+
+> ### {% icon hands_on %} Hands-on: Generate Frankenstein ligand
+>
+> 1. **Create Frankenstein ligand** {% icon tool %} with the following parameters:
+> - *"Input file"*: `Hits SDF`
+> 2. Rename the file to `Frankstein SDF`.
+>
+{: .hands_on}
+
+## Generate active site definition
+
+The active site can now be generated using the **rbcavity** {% icon tool %} tool, which requires the receptor in MOL2 format as input as well as a single reference ligand in Mol/SDF format. We use the Frankenstein ligand as the reference.
+
+> ### {% icon hands_on %} Hands-on: Active site preparation
+>
+> 1. **rDock cavity definition** {% icon tool %} with the following parameters:
+> - *"Receptor"*: `Receptor MOL2`
+> - *"Reference ligand"*: `Frankenstein SDF`
+> - *"Mapper sphere radius"*: `3.0`
+> - *"Mapper small sphere radius"*: `1.0`
+> - *"Mapper minimum volume"*: `100`
+> - *"Mapper volume increment"*: `0`
+> - *"Mapper grid step"*: `0.5`
+> - *"Cavity weight"*: `1.0`
+>
+> 2. Rename the output file `Active site`.
+> > ### {% icon comment %} Comment
+> >
+> > The meanings of these parameters are too complex to go into in this tutorial. If you are interested, see the [rDock documentation](http://rdock.sourceforge.net/wp-content/uploads/2015/08/rDock_User_Guide.pdf) for more details.
+> {: .comment}
+>
+{: .hands_on}
+
+# Docking and scoring
+
+Docking and scoring are now performed, using the following steps: 1) docking using rDock, 2) recombining the results into a single SDF file, 3) TransFS scoring, and 4) SuCOS scoring.
+
+> ### {% icon details %} Differences with the original study
+>
+> This section in the original study differed from this tutorial in the following ways:
+> 1. Docking was performed on over 100,000 enumerated candidates, rather than the 300 used here.
+> 2. 25 different poses were generated per candidate, rather than 5, as in this tutorial.
+> 3. Because of the large number of poses to score (more than a million), the scoring steps were parallelized by splitting into collections. This is skipped in the tutorial.
+> 4. The entire process was repeated 17 times, using a different fragment hit as the receptor structure each time.
+> See [here](https://covid19.galaxyproject.org/cheminformatics/3-Docking/) and [here](https://covid19.galaxyproject.org/cheminformatics/3-Docking/) for more details. A full list of Galaxy histories generated is listed [here](https://covid19.galaxyproject.org/cheminformatics/Histories/).
+{: .details}
+
+## Docking with rDock
+
+> ### {% icon hands_on %} Hands-on: Docking
+>
+> 1. **rDock docking** {% icon tool %} with the following parameters:
+> - *"Receptor"*: `Receptor MOL2`
+> - *"Active site"*: `Active site`
+> - *"Ligands"*: `Split file` collection
+> - *"Number of dockings"*: `5`
+> - *"Number of best poses"*: `5`
+>
+> > ### {% icon comment %} Comment
+> >
+> > For more information about docking, check out the [introductory tutorial](../cheminformatics/tutorial.html). It uses a different tool, AutoDock Vina, rather than rDock, but the general principles are the same.
+> {: .comment}
+>
+{: .hands_on}
+
+## Collapse collection to a single file
+
+Having created a collection to parallelize the docking procedure, we can now recombine the results to a single file.
+
+> ### {% icon hands_on %} Hands-on: Collapse collection
+>
+> 1. **Collapse Collection** {% icon tool %} with the following parameters:
+> - *"Collection of files to collapse into single dataset"*: Output of docking step
+> 2. Rename to `Docked poses SDF`.
+>
+{: .hands_on}
+
+The output file should contain around 1,900 docked poses in SDF format.
+
+## Pose scoring with TransFS
+
+In this step, we carry out scoring of the poses using TransFS. This is a deep learning approach developed at the University of Oxford, employing augmentation of training data with incorrectly docked ligands to prompt the model to learn from protein-ligand interactions. ({% cite transfs %})
+
+The TransFS scoring returns a value (saved as `` in the SDF file) between 0 (poor) and 1 (good).
+
+> ### {% icon hands_on %} Hands-on: TransFS scoring
+>
+> 1. **XChem TransFS pose scoring** {% icon tool %} with the following parameters:
+> - *"Receptor"*: `Receptor PDB`
+> - *"Ligands"*: `Docked poses SDF`
+> - *"Distance to waters"*: `2`
+>
+{: .hands_on}
+
+## Pose scoring with SuCOS
+
+This step involves scoring of the poses from each molecule against the original fragment screening hit ligands using the SuCOS MAX shape and feature overlay algorithm. ({% cite sucos %}) The conformation and position of the poses are compared to known structures (i.e. the fragment hits) to determine a score.
+
+SuCOS scoring returns a value (saved as `` in the SDF file) between 0 (poor) and 1 (good).
+
+> ### {% icon hands_on %} Hands-on: SuCOS scoring
+>
+> 1. **Max SuCOS score** {% icon tool %} with the following parameters:
+> - *"Ligands to be scored"*: Output of the TransFS step
+> - *"Set of clusters to score against"*: `Hits SDF`
+> 2. Rename the output file to `Scored poses`.
+{: .hands_on}
+
+# Compound selection
+
+The aim of the original study was to select 500 candidate molecules for synthesis and experimental study. In order to do this, the data for all fragment hits had to be combined (i.e. so that each compound was assigned the lowest score from all the fragments). The resulting table was then compared with a list of compounds available from [Enamine](https://enamine.net/) and [Chemspace](https://chem-space.com/) and the 500 highest scoring matching compounds were selected for purchase.
+
+This step is skipped in the tutorial, as only 100 compounds were tested, using only a single fragment hit structure. If you want, though, check out the [history](https://usegalaxy.eu/u/timdudgeon/h/top-500-enamine--chemspace-bb) and [workflow](https://usegalaxy.eu/u/timdudgeon/w/filter-results) used.
+
+# Conclusion
+{:.no_toc}
+
+This tutorial guided you through docking and scoring of a small set of compounds to the MPro protein. Hopefully, you have a better understanding of how docking can be practically used, as well as how the original study was performed.
diff --git a/topics/computational-chemistry/tutorials/covid19-docking/workflows/index.md b/topics/computational-chemistry/tutorials/covid19-docking/workflows/index.md
new file mode 100644
index 00000000000000..e092e0ae66ddd4
--- /dev/null
+++ b/topics/computational-chemistry/tutorials/covid19-docking/workflows/index.md
@@ -0,0 +1,3 @@
+---
+layout: workflow-list
+---
diff --git a/topics/computational-chemistry/tutorials/covid19-docking/workflows/workflow.ga b/topics/computational-chemistry/tutorials/covid19-docking/workflows/workflow.ga
new file mode 100644
index 00000000000000..47f232724aaa51
--- /dev/null
+++ b/topics/computational-chemistry/tutorials/covid19-docking/workflows/workflow.ga
@@ -0,0 +1,669 @@
+{
+ "uuid":"1c4462a7-0da3-40cb-be85-15a74be5d8ec",
+ "tags":[
+ "computational-chemistry"
+ ],
+ "format-version":"0.1",
+ "name":"Virtual screening of the SARS-CoV-2 main protease with rDock and pose scoring (imported from uploaded file)",
+ "version":3,
+ "steps":{
+ "0":{
+ "tool_id":null,
+ "tool_version":null,
+ "outputs":[
+
+ ],
+ "workflow_outputs":[
+ {
+ "output_name":"output",
+ "uuid":"cdab037e-aee1-49b8-a998-cd9fe7ddcdb7",
+ "label":null
+ }
+ ],
+ "input_connections":{
+
+ },
+ "tool_state":"{\"name\": \"Candidates\"}",
+ "id":0,
+ "uuid":"47bf8c53-ae1e-4af6-bece-6adfd74b627c",
+ "errors":null,
+ "name":"Input dataset",
+ "label":"Candidates",
+ "inputs":[
+ {
+ "name":"Candidates",
+ "description":""
+ }
+ ],
+ "position":{
+ "top":200,
+ "left":218
+ },
+ "annotation":"",
+ "content_id":null,
+ "type":"data_input"
+ },
+ "1":{
+ "tool_id":null,
+ "tool_version":null,
+ "outputs":[
+
+ ],
+ "workflow_outputs":[
+ {
+ "output_name":"output",
+ "uuid":"1fd1c624-37c6-4874-b863-a40eac5e4994",
+ "label":null
+ }
+ ],
+ "input_connections":{
+
+ },
+ "tool_state":"{\"name\": \"Mpro-x0195_0_apo-desolv.pdb\"}",
+ "id":1,
+ "uuid":"156f4aeb-d160-456a-974e-7884b79cb83a",
+ "errors":null,
+ "name":"Input dataset",
+ "label":"Mpro-x0195_0_apo-desolv.pdb",
+ "inputs":[
+ {
+ "name":"Mpro-x0195_0_apo-desolv.pdb",
+ "description":""
+ }
+ ],
+ "position":{
+ "top":289.76666259765625,
+ "left":218
+ },
+ "annotation":"",
+ "content_id":null,
+ "type":"data_input"
+ },
+ "2":{
+ "tool_id":null,
+ "tool_version":null,
+ "outputs":[
+
+ ],
+ "workflow_outputs":[
+ {
+ "output_name":"output",
+ "uuid":"1c8fb3cc-52ad-4f62-a00d-80b14b2c1d4a",
+ "label":null
+ }
+ ],
+ "input_connections":{
+
+ },
+ "tool_state":"{\"name\": \"hits.sdf\"}",
+ "id":2,
+ "uuid":"dd2d3bc0-8138-4e66-b2d9-e5e90e27b659",
+ "errors":null,
+ "name":"Input dataset",
+ "label":"hits.sdf",
+ "inputs":[
+ {
+ "name":"hits.sdf",
+ "description":""
+ }
+ ],
+ "position":{
+ "top":400.5333251953125,
+ "left":218
+ },
+ "annotation":"",
+ "content_id":null,
+ "type":"data_input"
+ },
+ "3":{
+ "tool_id":"toolshed.g2.bx.psu.edu/repos/bgruening/enumerate_charges/enumerate_charges/0.1",
+ "tool_version":"0.1",
+ "outputs":[
+ {
+ "type":"smi",
+ "name":"output"
+ }
+ ],
+ "workflow_outputs":[
+
+ ],
+ "input_connections":{
+ "input":{
+ "output_name":"output",
+ "id":0
+ }
+ },
+ "tool_state":"{\"__page__\": null, \"__rerun_remap_job_id__\": null, \"min_ph\": \"\\\"4.4\\\"\", \"max_ph\": \"\\\"10.4\\\"\", \"input\": \"{\\\"__class__\\\": \\\"ConnectedValue\\\"}\", \"chromInfo\": \"\\\"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\\\"\"}",
+ "id":3,
+ "tool_shed_repository":{
+ "owner":"bgruening",
+ "changeset_revision":"2a868592ebcb",
+ "name":"enumerate_charges",
+ "tool_shed":"toolshed.g2.bx.psu.edu"
+ },
+ "uuid":"73172813-0c22-4d96-846e-c5dbe6702fce",
+ "errors":null,
+ "name":"Enumerate changes",
+ "post_job_actions":{
+ "HideDatasetActionoutput":{
+ "output_name":"output",
+ "action_type":"HideDatasetAction",
+ "action_arguments":{
+
+ }
+ }
+ },
+ "label":null,
+ "inputs":[
+
+ ],
+ "position":{
+ "top":200,
+ "left":478.76666259765625
+ },
+ "annotation":"",
+ "content_id":"toolshed.g2.bx.psu.edu/repos/bgruening/enumerate_charges/enumerate_charges/0.1",
+ "type":"tool"
+ },
+ "4":{
+ "tool_id":"toolshed.g2.bx.psu.edu/repos/bgruening/openbabel_compound_convert/openbabel_compound_convert/2.4.2.2.0",
+ "tool_version":"2.4.2.2.0",
+ "outputs":[
+ {
+ "type":"text",
+ "name":"outfile"
+ }
+ ],
+ "workflow_outputs":[
+
+ ],
+ "input_connections":{
+ "infile":{
+ "output_name":"output",
+ "id":1
+ }
+ },
+ "tool_state":"{\"__page__\": null, \"dative_bonds\": \"\\\"false\\\"\", \"__rerun_remap_job_id__\": null, \"appendtotitle\": \"\\\"\\\"\", \"remove_h\": \"\\\"false\\\"\", \"oformat\": \"{\\\"__current_case__\\\": 41, \\\"gen2d\\\": \\\"false\\\", \\\"gen3d\\\": \\\"false\\\", \\\"mol2_ignore_res\\\": \\\"false\\\", \\\"oformat_opts_selector\\\": \\\"mol2\\\"}\", \"ph\": \"\\\"-1.0\\\"\", \"unique\": \"{\\\"__current_case__\\\": 0, \\\"unique_opts_selector\\\": \\\"\\\"}\", \"infile\": \"{\\\"__class__\\\": \\\"ConnectedValue\\\"}\", \"split\": \"\\\"false\\\"\"}",
+ "id":4,
+ "tool_shed_repository":{
+ "owner":"bgruening",
+ "changeset_revision":"b59c91adeac1",
+ "name":"openbabel_compound_convert",
+ "tool_shed":"toolshed.g2.bx.psu.edu"
+ },
+ "uuid":"af4592e4-64e6-40ed-bd1f-9494dcdc7652",
+ "errors":null,
+ "name":"Compound conversion",
+ "post_job_actions":{
+ "HideDatasetActionoutfile":{
+ "output_name":"outfile",
+ "action_type":"HideDatasetAction",
+ "action_arguments":{
+
+ }
+ }
+ },
+ "label":null,
+ "inputs":[
+
+ ],
+ "position":{
+ "top":321.76666259765625,
+ "left":478.76666259765625
+ },
+ "annotation":"",
+ "content_id":"toolshed.g2.bx.psu.edu/repos/bgruening/openbabel_compound_convert/openbabel_compound_convert/2.4.2.2.0",
+ "type":"tool"
+ },
+ "5":{
+ "tool_id":"toolshed.g2.bx.psu.edu/repos/bgruening/ctb_frankenstein_ligand/ctb_frankenstein_ligand/0.1.1",
+ "tool_version":"0.1.1",
+ "outputs":[
+ {
+ "type":"sdf",
+ "name":"outfile"
+ }
+ ],
+ "workflow_outputs":[
+ {
+ "output_name":"outfile",
+ "uuid":"53c557af-d5b9-4033-be54-7c197591e750",
+ "label":null
+ }
+ ],
+ "input_connections":{
+ "infile":{
+ "output_name":"output",
+ "id":2
+ }
+ },
+ "tool_state":"{\"__page__\": null, \"__rerun_remap_job_id__\": null, \"infile\": \"{\\\"__class__\\\": \\\"RuntimeValue\\\"}\"}",
+ "id":5,
+ "tool_shed_repository":{
+ "owner":"bgruening",
+ "changeset_revision":"8e214e52e461",
+ "name":"ctb_frankenstein_ligand",
+ "tool_shed":"toolshed.g2.bx.psu.edu"
+ },
+ "uuid":"081abe35-347a-4244-99ae-ec35329e3de0",
+ "errors":null,
+ "name":"Create Frankenstein ligand",
+ "post_job_actions":{
+
+ },
+ "label":null,
+ "inputs":[
+ {
+ "name":"infile",
+ "description":"runtime parameter for tool Create Frankenstein ligand"
+ }
+ ],
+ "position":{
+ "top":483.5333251953125,
+ "left":478.76666259765625
+ },
+ "annotation":"",
+ "content_id":"toolshed.g2.bx.psu.edu/repos/bgruening/ctb_frankenstein_ligand/ctb_frankenstein_ligand/0.1.1",
+ "type":"tool"
+ },
+ "6":{
+ "tool_id":"toolshed.g2.bx.psu.edu/repos/bgruening/openbabel_compound_convert/openbabel_compound_convert/2.4.2.2.0",
+ "tool_version":"2.4.2.2.0",
+ "outputs":[
+ {
+ "type":"text",
+ "name":"outfile"
+ }
+ ],
+ "workflow_outputs":[
+
+ ],
+ "input_connections":{
+ "infile":{
+ "output_name":"output",
+ "id":3
+ }
+ },
+ "tool_state":"{\"__page__\": null, \"dative_bonds\": \"\\\"false\\\"\", \"__rerun_remap_job_id__\": null, \"appendtotitle\": \"\\\"\\\"\", \"remove_h\": \"\\\"false\\\"\", \"oformat\": \"{\\\"__current_case__\\\": 58, \\\"gen2d\\\": \\\"false\\\", \\\"gen3d\\\": \\\"true\\\", \\\"oformat_opts_selector\\\": \\\"sdf\\\", \\\"sdf_alias_out\\\": \\\"false\\\", \\\"sdf_exp_h\\\": \\\"false\\\", \\\"sdf_no_prop\\\": \\\"false\\\", \\\"sdf_wedge_bonds\\\": \\\"false\\\"}\", \"ph\": \"\\\"-1.0\\\"\", \"unique\": \"{\\\"__current_case__\\\": 0, \\\"unique_opts_selector\\\": \\\"\\\"}\", \"infile\": \"{\\\"__class__\\\": \\\"ConnectedValue\\\"}\", \"split\": \"\\\"false\\\"\"}",
+ "id":6,
+ "tool_shed_repository":{
+ "owner":"bgruening",
+ "changeset_revision":"b59c91adeac1",
+ "name":"openbabel_compound_convert",
+ "tool_shed":"toolshed.g2.bx.psu.edu"
+ },
+ "uuid":"70d0c83d-9255-46e4-95f3-beaa4622f25e",
+ "errors":null,
+ "name":"Compound conversion",
+ "post_job_actions":{
+ "HideDatasetActionoutfile":{
+ "output_name":"outfile",
+ "action_type":"HideDatasetAction",
+ "action_arguments":{
+
+ }
+ }
+ },
+ "label":null,
+ "inputs":[
+
+ ],
+ "position":{
+ "top":200,
+ "left":806.5333251953125
+ },
+ "annotation":"",
+ "content_id":"toolshed.g2.bx.psu.edu/repos/bgruening/openbabel_compound_convert/openbabel_compound_convert/2.4.2.2.0",
+ "type":"tool"
+ },
+ "7":{
+ "tool_id":"toolshed.g2.bx.psu.edu/repos/bgruening/rdock_rbcavity/rdock_rbcavity/0.1",
+ "tool_version":"0.1",
+ "outputs":[
+ {
+ "type":"rdock_as",
+ "name":"activesite"
+ }
+ ],
+ "workflow_outputs":[
+
+ ],
+ "input_connections":{
+ "receptor":{
+ "output_name":"outfile",
+ "id":4
+ },
+ "ligand":{
+ "output_name":"outfile",
+ "id":5
+ }
+ },
+ "tool_state":"{\"__page__\": null, \"ligand\": \"{\\\"__class__\\\": \\\"ConnectedValue\\\"}\", \"weight\": \"\\\"1.0\\\"\", \"vol_incr\": \"\\\"0.0\\\"\", \"__rerun_remap_job_id__\": null, \"min_volume\": \"\\\"100\\\"\", \"gridstep\": \"\\\"0.5\\\"\", \"sphere\": \"\\\"1.0\\\"\", \"radius\": \"\\\"3.0\\\"\", \"receptor\": \"{\\\"__class__\\\": \\\"ConnectedValue\\\"}\", \"chromInfo\": \"\\\"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\\\"\"}",
+ "id":7,
+ "tool_shed_repository":{
+ "owner":"bgruening",
+ "changeset_revision":"744a777e9f90",
+ "name":"rdock_rbcavity",
+ "tool_shed":"toolshed.g2.bx.psu.edu"
+ },
+ "uuid":"f2b86fbe-cbbf-46a8-b588-9f227d83320a",
+ "errors":null,
+ "name":"rDock cavity definition",
+ "post_job_actions":{
+ "HideDatasetActionactivesite":{
+ "output_name":"activesite",
+ "action_type":"HideDatasetAction",
+ "action_arguments":{
+
+ }
+ }
+ },
+ "label":null,
+ "inputs":[
+
+ ],
+ "position":{
+ "top":361.76666259765625,
+ "left":806.5333251953125
+ },
+ "annotation":"",
+ "content_id":"toolshed.g2.bx.psu.edu/repos/bgruening/rdock_rbcavity/rdock_rbcavity/0.1",
+ "type":"tool"
+ },
+ "8":{
+ "tool_id":"toolshed.g2.bx.psu.edu/repos/bgruening/split_file_to_collection/split_file_to_collection/0.4.0",
+ "tool_version":"0.4.0",
+ "outputs":[
+ {
+ "type":"input",
+ "name":"list_output_sdf"
+ }
+ ],
+ "workflow_outputs":[
+
+ ],
+ "input_connections":{
+ "split_parms|input":{
+ "output_name":"outfile",
+ "id":6
+ }
+ },
+ "tool_state":"{\"__page__\": null, \"__rerun_remap_job_id__\": null, \"chromInfo\": \"\\\"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\\\"\", \"split_parms\": \"{\\\"__current_case__\\\": 4, \\\"input\\\": {\\\"__class__\\\": \\\"ConnectedValue\\\"}, \\\"newfilenames\\\": \\\"split_file\\\", \\\"select_allocate\\\": {\\\"__current_case__\\\": 2, \\\"allocate\\\": \\\"byrow\\\"}, \\\"select_ftype\\\": \\\"sdf\\\", \\\"select_mode\\\": {\\\"__current_case__\\\": 1, \\\"mode\\\": \\\"numnew\\\", \\\"numnew\\\": \\\"10\\\"}}\"}",
+ "id":8,
+ "tool_shed_repository":{
+ "owner":"bgruening",
+ "changeset_revision":"e77b954f0da5",
+ "name":"split_file_to_collection",
+ "tool_shed":"toolshed.g2.bx.psu.edu"
+ },
+ "uuid":"ffb7a033-9c35-4276-82f0-49e5e0e19eb9",
+ "errors":null,
+ "name":"Split file",
+ "post_job_actions":{
+ "HideDatasetActionlist_output_sdf":{
+ "output_name":"list_output_sdf",
+ "action_type":"HideDatasetAction",
+ "action_arguments":{
+
+ }
+ }
+ },
+ "label":null,
+ "inputs":[
+
+ ],
+ "position":{
+ "top":200,
+ "left":1134.2999877929688
+ },
+ "annotation":"",
+ "content_id":"toolshed.g2.bx.psu.edu/repos/bgruening/split_file_to_collection/split_file_to_collection/0.4.0",
+ "type":"tool"
+ },
+ "9":{
+ "tool_id":"toolshed.g2.bx.psu.edu/repos/bgruening/rdock_rbdock/rdock_rbdock/0.1.4",
+ "tool_version":"0.1.4",
+ "outputs":[
+ {
+ "type":"sdf",
+ "name":"output"
+ }
+ ],
+ "workflow_outputs":[
+
+ ],
+ "input_connections":{
+ "receptor":{
+ "output_name":"outfile",
+ "id":4
+ },
+ "ligands":{
+ "output_name":"list_output_sdf",
+ "id":8
+ },
+ "active_site":{
+ "output_name":"activesite",
+ "id":7
+ }
+ },
+ "tool_state":"{\"__page__\": null, \"name\": \"\\\"false\\\"\", \"active_site\": \"{\\\"__class__\\\": \\\"ConnectedValue\\\"}\", \"__rerun_remap_job_id__\": null, \"filter\": \"{\\\"__current_case__\\\": 1, \\\"filter_select\\\": \\\"no_filter\\\"}\", \"num\": \"\\\"5\\\"\", \"custom_rec\": \"{\\\"__current_case__\\\": 1, \\\"custom_rec_select\\\": \\\"false\\\", \\\"flex\\\": \\\"3.0\\\"}\", \"receptor\": \"{\\\"__class__\\\": \\\"ConnectedValue\\\"}\", \"seed\": \"\\\"\\\"\", \"ligands\": \"{\\\"__class__\\\": \\\"ConnectedValue\\\"}\"}",
+ "id":9,
+ "tool_shed_repository":{
+ "owner":"bgruening",
+ "changeset_revision":"a22969b08177",
+ "name":"rdock_rbdock",
+ "tool_shed":"toolshed.g2.bx.psu.edu"
+ },
+ "uuid":"23c7a051-7e65-48f6-929e-c562c4dbe1cb",
+ "errors":null,
+ "name":"rDock docking",
+ "post_job_actions":{
+ "HideDatasetActionoutput":{
+ "output_name":"output",
+ "action_type":"HideDatasetAction",
+ "action_arguments":{
+
+ }
+ }
+ },
+ "label":null,
+ "inputs":[
+
+ ],
+ "position":{
+ "top":200,
+ "left":1413.066650390625
+ },
+ "annotation":"",
+ "content_id":"toolshed.g2.bx.psu.edu/repos/bgruening/rdock_rbdock/rdock_rbdock/0.1.4",
+ "type":"tool"
+ },
+ "10":{
+ "tool_id":"toolshed.g2.bx.psu.edu/repos/nml/collapse_collections/collapse_dataset/4.2",
+ "tool_version":"4.2",
+ "outputs":[
+ {
+ "type":"input",
+ "name":"output"
+ }
+ ],
+ "workflow_outputs":[
+
+ ],
+ "input_connections":{
+ "input_list":{
+ "output_name":"output",
+ "id":9
+ }
+ },
+ "tool_state":"{\"__page__\": null, \"one_header\": \"\\\"false\\\"\", \"__rerun_remap_job_id__\": null, \"filename\": \"{\\\"__current_case__\\\": 1, \\\"add_name\\\": \\\"false\\\"}\", \"input_list\": \"{\\\"__class__\\\": \\\"ConnectedValue\\\"}\", \"chromInfo\": \"\\\"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\\\"\"}",
+ "id":10,
+ "tool_shed_repository":{
+ "owner":"nml",
+ "changeset_revision":"830961c48e42",
+ "name":"collapse_collections",
+ "tool_shed":"toolshed.g2.bx.psu.edu"
+ },
+ "uuid":"bd5a919a-4c2c-437a-bdd7-e023347ed0f3",
+ "errors":null,
+ "name":"Collapse Collection",
+ "post_job_actions":{
+ "HideDatasetActionoutput":{
+ "output_name":"output",
+ "action_type":"HideDatasetAction",
+ "action_arguments":{
+
+ }
+ }
+ },
+ "label":null,
+ "inputs":[
+
+ ],
+ "position":{
+ "top":200,
+ "left":1710.8333740234375
+ },
+ "annotation":"",
+ "content_id":"toolshed.g2.bx.psu.edu/repos/nml/collapse_collections/collapse_dataset/4.2",
+ "type":"tool"
+ },
+ "11":{
+ "tool_id":"xchem_pose_scoring",
+ "tool_version":"0.2.0",
+ "outputs":[
+ {
+ "type":"sdf",
+ "name":"output"
+ },
+ {
+ "type":"txt",
+ "name":"predictions"
+ },
+ {
+ "type":"txt",
+ "name":"inputs_types"
+ },
+ {
+ "type":"tar",
+ "name":"output_receptors"
+ }
+ ],
+ "workflow_outputs":[
+
+ ],
+ "input_connections":{
+ "receptor":{
+ "output_name":"output",
+ "id":1
+ },
+ "ligands":{
+ "output_name":"output",
+ "id":10
+ }
+ },
+ "tool_state":"{\"__page__\": null, \"distance\": \"\\\"2.0\\\"\", \"__rerun_remap_job_id__\": null, \"receptor\": \"{\\\"__class__\\\": \\\"ConnectedValue\\\"}\", \"chromInfo\": \"\\\"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\\\"\", \"ligands\": \"{\\\"__class__\\\": \\\"ConnectedValue\\\"}\", \"mock\": \"\\\"\\\"\"}",
+ "id":11,
+ "uuid":"5064b254-b27e-4f1f-a019-ed4a6b066d59",
+ "errors":null,
+ "name":"XChem TransFS pose scoring",
+ "post_job_actions":{
+ "HideDatasetActionpredictions":{
+ "output_name":"predictions",
+ "action_type":"HideDatasetAction",
+ "action_arguments":{
+
+ }
+ },
+ "HideDatasetActionoutput_receptors":{
+ "output_name":"output_receptors",
+ "action_type":"HideDatasetAction",
+ "action_arguments":{
+
+ }
+ },
+ "HideDatasetActionoutput":{
+ "output_name":"output",
+ "action_type":"HideDatasetAction",
+ "action_arguments":{
+
+ }
+ },
+ "HideDatasetActioninputs_types":{
+ "output_name":"inputs_types",
+ "action_type":"HideDatasetAction",
+ "action_arguments":{
+
+ }
+ }
+ },
+ "label":null,
+ "inputs":[
+
+ ],
+ "position":{
+ "top":200,
+ "left":2038.5999755859375
+ },
+ "annotation":"",
+ "content_id":"xchem_pose_scoring",
+ "type":"tool"
+ },
+ "12":{
+ "tool_id":"toolshed.g2.bx.psu.edu/repos/bgruening/sucos_max_score/sucos_max_score/0.2.3",
+ "tool_version":"0.2.3",
+ "outputs":[
+ {
+ "type":"sdf",
+ "name":"output"
+ }
+ ],
+ "workflow_outputs":[
+
+ ],
+ "input_connections":{
+ "input":{
+ "output_name":"output",
+ "id":11
+ },
+ "clusters":{
+ "output_name":"output",
+ "id":2
+ }
+ },
+ "tool_state":"{\"__page__\": null, \"filter_value\": \"\\\"\\\"\", \"__rerun_remap_job_id__\": null, \"filter_field\": \"\\\"\\\"\", \"clusters\": \"{\\\"__class__\\\": \\\"ConnectedValue\\\"}\", \"input\": \"{\\\"__class__\\\": \\\"ConnectedValue\\\"}\"}",
+ "id":12,
+ "tool_shed_repository":{
+ "owner":"bgruening",
+ "changeset_revision":"d4c67ced6abc",
+ "name":"sucos_max_score",
+ "tool_shed":"toolshed.g2.bx.psu.edu"
+ },
+ "uuid":"9c8db659-3123-40fa-8ae9-a4b9b9751e14",
+ "errors":null,
+ "name":"Max SuCOS score",
+ "post_job_actions":{
+ "HideDatasetActionoutput":{
+ "output_name":"output",
+ "action_type":"HideDatasetAction",
+ "action_arguments":{
+
+ }
+ }
+ },
+ "label":null,
+ "inputs":[
+
+ ],
+ "position":{
+ "top":200,
+ "left":2366.36669921875
+ },
+ "annotation":"",
+ "content_id":"toolshed.g2.bx.psu.edu/repos/bgruening/sucos_max_score/sucos_max_score/0.2.3",
+ "type":"tool"
+ }
+ },
+ "annotation":"Virtual screening of the SARS-CoV-2 main protease with rDock and pose scoring",
+ "a_galaxy_workflow":"true"
+}
diff --git a/topics/computational-chemistry/tutorials/htmd-analysis/tutorial.bib b/topics/computational-chemistry/tutorials/htmd-analysis/tutorial.bib
new file mode 100644
index 00000000000000..063ae02b5299b2
--- /dev/null
+++ b/topics/computational-chemistry/tutorials/htmd-analysis/tutorial.bib
@@ -0,0 +1,764 @@
+
+# This is the bibliography file for your tutorial.
+#
+# To add bibliography (bibtex) entries here, follow these steps:
+# 1) Find the DOI for the article you want to cite
+# 2) Go to https://doi2bib.org and fill in the DOI
+# 3) Copy the resulting bibtex entry into this file
+#
+# To cite the example below, in your tutorial.md file
+# use {% Batut2018 %}
+
+% bmc_article.bib
+%
+% An example of bibtex entries.
+% Entries taken from BMC instructions for authors page.
+
+% uncomment next line to make author-year bibliography
+% @settings{label, options="nameyear"}
+
+@article{blank,
+ author = {},
+ title = {},
+ journal = {},
+ year = {},
+ month = {},
+ volume= {},
+ number= {},
+ pages = {},
+ note = {}
+}
+
+% Article within a journal
+@ARTICLE{koon,
+ author = {Koonin, E V and Altschul, S F and P Bork},
+ title = {BRCA1 protein products: functional motifs},
+ journal = {Nat Genet},
+ year = {1996},
+ volume = {13},
+ pages = {266-267}
+}
+
+% Article within a journal supplement
+@ARTICLE{oreg,
+ author = {Orengo, C A and Bray, J E and Hubbard,
+ T and LoConte, L and Sillitoe, I},
+ title = {Analysis and assessment of ab initio
+ three-dimensional prediction, secondary
+ structure, and contacts prediction},
+ journal = {Proteins},
+ year = {1999},
+ volume = {Suppl 3},
+ pages = {149-170}
+}
+
+% In press article
+@inpress{khar,
+ author = {Kharitonov, S A and Barnes, P J},
+ title = {Clinical aspects of exhaled nitric oxide},
+ journal = {Eur Respir J},
+ note = {in press}
+}
+
+%
+% Published abstract
+%
+@ARTICLE{zvai,
+ author = {Zvaifler, N J and Burger, J A and Marinova-Mutafchieva,
+ L and Taylor, P and Maini, R N},
+ title = {Mesenchymal cells, stromal derived factor-1 and
+ rheumatoid arthritis [abstract]},
+ journal = {Arthritis Rheum},
+ year = {1999},
+ volume = {42},
+ pages = {s250},
+}
+
+
+%
+% Article within conference proceedings
+%
+@Inproceedings{xjon,
+ author = {X Jones},
+ title = {Zeolites and synthetic mechanisms},
+ booktitle = {Proceedings of the First National Conference on
+ Porous Sieves: 27-30 June 1996; Baltimore},
+ year = {1996},
+ editor = {Y Smith},
+ pages = {16-27},
+ organization = {Stoneham: Butterworth-Heinemann}
+}
+
+%%%%%%%%
+% Book chapter, or article within a book
+%
+@incollection{schn,
+ author = {E Schnepf},
+ title = {From prey via endosymbiont to plastids:
+ comparative studies in dinoflagellates},
+ booktitle = {Origins of Plastids},
+ editor = {R A Lewin},
+ publisher = {Chapman and Hall},
+ pages = {53-76},
+ year = {1993},
+ address = {New York},
+ volume = {2},
+ edition = {2nd}
+}
+
+%%%%%%%%
+% Whole issue of journal
+%
+@wholejournal{pond,
+ editor = {B Ponder and S Johnston and L Chodosh},
+ title = {Innovative oncology},
+ journal = {Breast Cancer Res},
+ year = {1998},
+ volume= {10},
+ pages = {1-72}
+}
+
+
+%%%%%%%%
+% Whole conference proceedings
+%
+@proceedings{smith,
+ editor = {Y Smith},
+ title = {Proceedings of the First National Conference
+ on Porous Sieves: 27-30 June 1996; Baltimore},
+ year = 1996,
+ address= {Stoneham},
+ publisher = {Butterworth-Heinemann},
+}
+
+
+%%%%%%%%
+% Complete book
+%
+@book{marg,
+ author = {L Margulis},
+ title = {Origin of Eukaryotic Cells},
+ publisher = {Yale University Press},
+ year = {1970},
+ address = {New Haven}
+}
+
+
+
+%%%%%%%%
+% Monograph or book in series
+%
+@incollection{hunn,
+ author = {G W Hunninghake and J E Gadek},
+ title = {The alveloar macrophage},
+ booktitle = {Cultured Human Cells and Tissues},
+ publisher = {Academic Press},
+ year = {1995},
+ pages = {54-56},
+ editor = {T J R Harris},
+ address = {New York},
+ note = {Stoner G (Series Editor): Methods and Perspectives in Cell Biology, vol 1}
+}
+
+
+%%%%%%%%
+% Book with institutional author
+@manual{advi,
+ title = {Annual Report},
+ organization = {Advisory Committee on Genetic Modification},
+ address = {London},
+ year = {1999}
+}
+
+
+%%%%%%%%
+% PHD Thesis
+%
+@phdthesis{koha,
+ author = {R Kohavi},
+ title = {Wrappers for performance enhancement and
+ obvious decision graphs},
+ school = {Stanford University, Computer Science Department},
+ year = {1995}
+}
+
+%%%%%%%%
+% Webpage Link / URL
+%
+
+
+
+@article{Batut2018,
+ doi = {10.1016/j.cels.2018.05.012},
+ url = {https://doi.org/10.1016/j.cels.2018.05.012},
+ year = {2018},
+ month = jun,
+ publisher = {Elsevier {BV}},
+ volume = {6},
+ number = {6},
+ pages = {752--758.e1},
+ author = {{Batut et al.}},
+ title = {Community-Driven Data Analysis Training for Biology},
+ journal = {Cell Systems}
+}
+
+
+@article{michaudagrawal_mdanalysis_2011,
+ title = {{MDAnalysis}: {A} toolkit for the analysis of molecular dynamics simulations},
+ volume = {32},
+ copyright = {Copyright © 2011 Wiley Periodicals, Inc.},
+ issn = {1096-987X},
+ shorttitle = {{MDAnalysis}},
+ url = {https://onlinelibrary.wiley.com/doi/abs/10.1002/jcc.21787},
+ doi = {10.1002/jcc.21787},
+ abstract = {MDAnalysis is an object-oriented library for structural and temporal analysis of molecular dynamics (MD) simulation trajectories and individual protein structures. It is written in the Python language with some performance-critical code in C. It uses the powerful NumPy package to expose trajectory data as fast and efficient NumPy arrays. It has been tested on systems of millions of particles. Many common file formats of simulation packages including CHARMM, Gromacs, Amber, and NAMD and the Protein Data Bank format can be read and written. Atoms can be selected with a syntax similar to CHARMM's powerful selection commands. MDAnalysis enables both novice and experienced programmers to rapidly write their own analytical tools and access data stored in trajectories in an easily accessible manner that facilitates interactive explorative analysis. MDAnalysis has been tested on and works for most Unix-based platforms such as Linux and Mac OS X. It is freely available under the GNU General Public License from http://mdanalysis.googlecode.com. © 2011 Wiley Periodicals, Inc. J Comput Chem 2011},
+ language = {en},
+ number = {10},
+ urldate = {2020-04-17},
+ journal = {Journal of Computational Chemistry},
+ author = {Michaud‐Agrawal, Naveen and Denning, Elizabeth J. and Woolf, Thomas B. and Beckstein, Oliver},
+ year = {2011},
+ keywords = {analysis, membrane systems, molecular dynamics simulations, object-oriented design, proteins, Python programming language, software},
+ pages = {2319--2327},
+}
+
+@article{mcgibbon_mdtraj_2015,
+ title = {{MDTraj}: {A} {Modern} {Open} {Library} for the {Analysis} of {Molecular} {Dynamics} {Trajectories}},
+ volume = {109},
+ issn = {0006-3495},
+ shorttitle = {{MDTraj}},
+ url = {https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4623899/},
+ doi = {10.1016/j.bpj.2015.08.015},
+ abstract = {As molecular dynamics (MD) simulations continue to evolve into powerful computational tools for studying complex biomolecular systems, the necessity of flexible and easy-to-use software tools for the analysis of these simulations is growing. We have developed MDTraj, a modern, lightweight, and fast software package for analyzing MD simulations. MDTraj reads and writes trajectory data in a wide variety of commonly used formats. It provides a large number of trajectory analysis capabilities including minimal root-mean-square-deviation calculations, secondary structure assignment, and the extraction of common order parameters. The package has a strong focus on interoperability with the wider scientific Python ecosystem, bridging the gap between MD data and the rapidly growing collection of industry-standard statistical analysis and visualization tools in Python. MDTraj is a powerful and user-friendly software package that simplifies the analysis of MD data and connects these datasets with the modern interactive data science software ecosystem in Python.},
+ number = {8},
+ urldate = {2020-04-17},
+ journal = {Biophysical Journal},
+ author = {McGibbon, Robert T. and Beauchamp, Kyle A. and Harrigan, Matthew P. and Klein, Christoph and Swails, Jason M. and Hernández, Carlos X. and Schwantes, Christian R. and Wang, Lee-Ping and Lane, Thomas J. and Pande, Vijay S.},
+ month = oct,
+ year = {2015},
+ pmcid = {PMC4623899},
+ pages = {1528--1532},
+}
+
+@article{skjaerven_integrating_2014,
+ title = {Integrating protein structural dynamics and evolutionary analysis with {Bio3D}},
+ volume = {15},
+ issn = {1471-2105},
+ url = {https://doi.org/10.1186/s12859-014-0399-6},
+ doi = {10.1186/s12859-014-0399-6},
+ abstract = {Popular bioinformatics approaches for studying protein functional dynamics include comparisons of crystallographic structures, molecular dynamics simulations and normal mode analysis. However, determining how observed displacements and predicted motions from these traditionally separate analyses relate to each other, as well as to the evolution of sequence, structure and function within large protein families, remains a considerable challenge. This is in part due to the general lack of tools that integrate information of molecular structure, dynamics and evolution.},
+ number = {1},
+ urldate = {2020-04-17},
+ journal = {BMC Bioinformatics},
+ author = {Skjærven, Lars and Yao, Xin-Qiu and Scarabelli, Guido and Grant, Barry J.},
+ month = dec,
+ year = {2014},
+ pages = {399},
+}
+
+@article{senapathi_biomolecular_2019,
+ title = {Biomolecular {Reaction} and {Interaction} {Dynamics} {Global} {Environment} ({BRIDGE})},
+ volume = {35},
+ issn = {1367-4803},
+ url = {https://academic.oup.com/bioinformatics/article/35/18/3508/5317160},
+ doi = {10.1093/bioinformatics/btz107},
+ abstract = {AbstractMotivation. The pathway from genomics through proteomics and onto a molecular description of biochemical processes makes the discovery of drugs and bio},
+ language = {en},
+ number = {18},
+ urldate = {2020-04-17},
+ journal = {Bioinformatics},
+ author = {Senapathi, Tharindu and Bray, Simon and Barnett, Christopher B. and Grüning, Björn and Naidoo, Kevin J.},
+ month = sep,
+ year = {2019},
+ note = {Publisher: Oxford Academic},
+ pages = {3508--3509},
+}
+
+@article{abraham_gromacs_2015,
+ title = {{GROMACS}: {High} performance molecular simulations through multi-level parallelism from laptops to supercomputers},
+ volume = {1-2},
+ issn = {2352-7110},
+ shorttitle = {{GROMACS}},
+ url = {http://www.sciencedirect.com/science/article/pii/S2352711015000059},
+ doi = {10.1016/j.softx.2015.06.001},
+ abstract = {GROMACS is one of the most widely used open-source and free software codes in chemistry, used primarily for dynamical simulations of biomolecules. It provides a rich set of calculation types, preparation and analysis tools. Several advanced techniques for free-energy calculations are supported. In version 5, it reaches new performance heights, through several new and enhanced parallelization algorithms. These work on every level; SIMD registers inside cores, multithreading, heterogeneous CPU–GPU acceleration, state-of-the-art 3D domain decomposition, and ensemble-level parallelization through built-in replica exchange and the separate Copernicus framework. The latest best-in-class compressed trajectory storage format is supported.},
+ language = {en},
+ urldate = {2020-04-17},
+ journal = {SoftwareX},
+ author = {Abraham, Mark James and Murtola, Teemu and Schulz, Roland and Páll, Szilárd and Smith, Jeremy C. and Hess, Berk and Lindahl, Erik},
+ month = sep,
+ year = {2015},
+ keywords = {Free energy, GPU, Molecular dynamics, SIMD},
+ pages = {19--25},
+}
+
+
+@article{afgan_galaxy_2018,
+ title = {The {Galaxy} platform for accessible, reproducible and collaborative biomedical analyses: 2018 update},
+ volume = {46},
+ issn = {0305-1048},
+ shorttitle = {The {Galaxy} platform for accessible, reproducible and collaborative biomedical analyses},
+ url = {https://www.ncbi.nlm.nih.gov/pmc/articles/PMC6030816/},
+ doi = {10.1093/nar/gky379},
+ abstract = {Galaxy (homepage: https://galaxyproject.org, main public server: https://usegalaxy.org) is a web-based scientific analysis platform used by tens of thousands of scientists across the world to analyze large biomedical datasets such as those found in genomics, proteomics, metabolomics and imaging. Started in 2005, Galaxy continues to focus on three key challenges of data-driven biomedical science: making analyses accessible to all researchers, ensuring analyses are completely reproducible, and making it simple to communicate analyses so that they can be reused and extended. During the last two years, the Galaxy team and the open-source community around Galaxy have made substantial improvements to Galaxy's core framework, user interface, tools, and training materials. Framework and user interface improvements now enable Galaxy to be used for analyzing tens of thousands of datasets, and {\textgreater}5500 tools are now available from the Galaxy ToolShed. The Galaxy community has led an effort to create numerous high-quality tutorials focused on common types of genomic analyses. The Galaxy developer and user communities continue to grow and be integral to Galaxy's development. The number of Galaxy public servers, developers contributing to the Galaxy framework and its tools, and users of the main Galaxy server have all increased substantially.},
+ number = {Web Server issue},
+ urldate = {2020-04-17},
+ journal = {Nucleic Acids Research},
+ author = {Afgan, Enis and Baker, Dannon and Batut, Bérénice and van den Beek, Marius and Bouvier, Dave and Čech, Martin and Chilton, John and Clements, Dave and Coraor, Nate and Grüning, Björn A and Guerler, Aysam and Hillman-Jackson, Jennifer and Hiltemann, Saskia and Jalili, Vahid and Rasche, Helena and Soranzo, Nicola and Goecks, Jeremy and Taylor, James and Nekrutenko, Anton and Blankenberg, Daniel},
+ month = jul,
+ year = {2018},
+ pmcid = {PMC6030816},
+ pages = {W537--W544},
+}
+
+@article{hump_vmd_1996,
+ author={William Humphrey and Andrew Dalke and Klaus Schulten},
+ title={{VMD} -- {V}isual {M}olecular {D}ynamics},
+ journal={Journal of Molecular Graphics},
+ year=1996,
+ volume=14,
+ pages={33-38},
+ note={},
+ tbstatus={Published.},
+ techrep={},
+ tbreference={222}
+}
+
+@article{miller_mmpbsa,
+author = {Miller, Bill R. and McGee, T. Dwight and Swails, Jason M. and Homeyer, Nadine and Gohlke, Holger and Roitberg, Adrian E.},
+title = {MMPBSA.py: An Efficient Program for End-State Free Energy Calculations},
+journal = {Journal of Chemical Theory and Computation},
+volume = {8},
+number = {9},
+pages = {3314-3321},
+year = {2012},
+doi = {10.1021/ct300418h},
+URL = {
+ https://doi.org/10.1021/ct300418h
+ },
+eprint = {
+ https://doi.org/10.1021/ct300418h
+ }
+}
+
+@article{sloggett_bioblend,
+ author = {Sloggett, Clare and Goonasekera, Nuwan and Afgan, Enis},
+ title = "{BioBlend: automating pipeline analyses within Galaxy and CloudMan}",
+ journal = {Bioinformatics},
+ volume = {29},
+ number = {13},
+ pages = {1685-1686},
+ year = {2013},
+ month = {04},
+ abstract = "{Summary: We present BioBlend, a unified API in a high-level language (python) that wraps the functionality of Galaxy and CloudMan APIs. BioBlend makes it easy for bioinformaticians to automate end-to-end large data analysis, from scratch, in a way that is highly accessible to collaborators, by allowing them to both provide the required infrastructure and automate complex analyses over large datasets within the familiar Galaxy environment.Availability and implementation:http://bioblend.readthedocs.org/. Automated installation of BioBlend is available via PyPI (e.g. pip install bioblend). Alternatively, the source code is available from the GitHub repository (https://github.com/afgane/bioblend) under the MIT open source license. The library has been tested and is working on Linux, Macintosh and Windows-based systems.Contact:enis.afgan@unimelb.edu.au}",
+ issn = {1367-4803},
+ doi = {10.1093/bioinformatics/btt199},
+ url = {https://doi.org/10.1093/bioinformatics/btt199},
+}
+
+@article{Schuetz2018,
+ doi = {10.1021/acs.jcim.8b00614},
+ url = {https://doi.org/10.1021/acs.jcim.8b00614},
+ year = {2018},
+ month = nov,
+ publisher = {American Chemical Society ({ACS})},
+ volume = {59},
+ number = {1},
+ pages = {535--549},
+ author = {Doris A. Schuetz and Mattia Bernetti and Martina Bertazzo and Djordje Musil and Hans-Michael Eggenweiler and Maurizio Recanatini and Matteo Masetti and Gerhard F. Ecker and Andrea Cavalli},
+ title = {Predicting Residence Time and Drug Unbinding Pathway through Scaled Molecular Dynamics},
+ journal = {Journal of Chemical Information and Modeling}
+}
+
+@article{Pearl2006,
+ doi = {10.1146/annurev.biochem.75.103004.142738},
+ url = {https://doi.org/10.1146/annurev.biochem.75.103004.142738},
+ year = {2006},
+ month = jun,
+ publisher = {Annual Reviews},
+ volume = {75},
+ number = {1},
+ pages = {271--294},
+ author = {Laurence H. Pearl and Chrisostomos Prodromou},
+ title = {Structure and Mechanism of the {Hsp90} Molecular Chaperone Machinery},
+ journal = {Annual Review of Biochemistry}
+}
+
+@article{Vanommeslaeghe2009,
+ doi = {10.1002/jcc.21367},
+ url = {https://doi.org/10.1002/jcc.21367},
+ year = {2009},
+ publisher = {Wiley},
+ pages = {NA--NA},
+ author = {K. Vanommeslaeghe and E. Hatcher and C. Acharya and S. Kundu and S. Zhong and J. Shim and E. Darian and O. Guvench and P. Lopes and I. Vorobyov and A. D. Mackerell},
+ title = {{CHARMM} general force field: A force field for drug-like molecules compatible with the {CHARMM} all-atom additive biological force fields},
+ journal = {Journal of Computational Chemistry}
+}
+
+@article{Maier2015,
+ doi = {10.1021/acs.jctc.5b00255},
+ url = {https://doi.org/10.1021/acs.jctc.5b00255},
+ year = {2015},
+ month = jul,
+ publisher = {American Chemical Society ({ACS})},
+ volume = {11},
+ number = {8},
+ pages = {3696--3713},
+ author = {James A. Maier and Carmenza Martinez and Koushik Kasavajhala and Lauren Wickstrom and Kevin E. Hauser and Carlos Simmerling},
+ title = {ff14SB: Improving the Accuracy of Protein Side Chain and Backbone Parameters from ff99SB},
+ journal = {Journal of Chemical Theory and Computation}
+}
+
+@article{reif2012,
+author = {Reif, Maria M. and Hünenberger, Philippe H. and Oostenbrink, Chris},
+title = {New Interaction Parameters for Charged Amino Acid Side Chains in the {GROMOS} Force Field},
+journal = {Journal of Chemical Theory and Computation},
+volume = {8},
+number = {10},
+pages = {3705-3723},
+year = {2012},
+doi = {10.1021/ct300156h},
+
+URL = {
+ https://doi.org/10.1021/ct300156h
+
+},
+eprint = {
+ https://doi.org/10.1021/ct300156h
+
+}
+
+}
+
+
+@article{Mobley2018,
+author = {Mobley, David L. and Bannan, Caitlin C. and Rizzi, Andrea and Bayly, Christopher I. and Chodera, John D. and Lim, Victoria T. and Lim, Nathan M. and Beauchamp, Kyle A. and Slochower, David R. and Shirts, Michael R. and Gilson, Michael K. and Eastman, Peter
+K.},
+title = {Escaping Atom Types in Force Fields Using Direct Chemical Perception},
+journal = {Journal of Chemical Theory and Computation},
+volume = {14},
+number = {11},
+pages = {6076-6092},
+year = {2018},
+doi = {10.1021/acs.jctc.8b00640}
+}
+
+@misc{Swails2016,
+ title={{ParmEd}: Cross-program parameter and topology file editor and molecular mechanical simulator engine},
+ author={Swails, J and Hernandez, CX and Mobley, DL and Nguyen, H and Wang, LP and Janowski, P},
+ url={https://parmed.github.io/ParmEd/html/index.html},
+ year={2016},
+ note = {Accessed: 23.01.20}
+}
+
+@book{berendsen01,
+title={Simulating the Physical World: Hierarchical Modeling from Quantum Mechanics to Fluid Dynamics.},
+DOI={10.1017/CBO9780511815348},
+publisher={Cambridge University Press},
+address={Cambridge, United Kingdom},
+author={Berendsen, Herman J. C.},
+year={2007}
+}
+
+@incollection{Lemkul2020,
+ doi = {10.1016/bs.pmbts.2019.12.009},
+ year = {2020},
+ publisher = {Elsevier},
+ pages = {1--71},
+ author = {Justin A. Lemkul},
+ title = {Pairwise-additive and polarizable atomistic force fields for molecular dynamics simulations of proteins},
+ address = {New York},
+ booktitle = {Computational Approaches for Understanding Dynamical Systems: Protein Folding and Assembly}
+}
+
+@article{Wang2004,
+ doi = {10.1002/jcc.20035},
+ year = {2004},
+ publisher = {Wiley},
+ volume = {25},
+ number = {9},
+ pages = {1157--1174},
+ author = {Junmei Wang and Romain M. Wolf and James W. Caldwell and Peter A. Kollman and David A. Case},
+ title = {Development and testing of a general {AMBER} force field},
+ journal = {Journal of Computational Chemistry}
+}
+
+@article{Onufriev2017,
+ doi = {10.1002/wcms.1347},
+ year = {2017},
+ month = nov,
+ publisher = {Wiley},
+ volume = {8},
+ number = {2},
+ pages = {e1347},
+ author = {Alexey V. Onufriev and Saeed Izadi},
+ title = {Water models for biomolecular simulations},
+ journal = {Wiley Interdisciplinary Reviews: Computational Molecular Science}
+}
+
+@article{Rose2018ngl,
+ author = {Rose, Alexander S and Bradley, Anthony R and Valasatava, Yana and Duarte, Jose M and Prlić, Andreas and Rose, Peter W},
+ title = "{NGL viewer: web-based molecular graphics for large complexes}",
+ journal = {Bioinformatics},
+ volume = {34},
+ number = {21},
+ pages = {3755-3758},
+ year = {2018},
+ month = {05},
+ abstract = "{The interactive visualization of very large macromolecular complexes on the web is becoming a challenging problem as experimental techniques advance at an unprecedented rate and deliver structures of increasing size.We have tackled this problem by developing highly memory-efficient and scalable extensions for the NGL WebGL-based molecular viewer and by using Macromolecular Transmission Format (MMTF), a binary and compressed MMTF. These enable NGL to download and render molecular complexes with millions of atoms interactively on desktop computers and smartphones alike, making it a tool of choice for web-based molecular visualization in research and education.The source code is freely available under the MIT license at github.com/arose/ngl and distributed on NPM (npmjs.com/package/ngl). MMTF-JavaScript encoders and decoders are available at github.com/rcsb/mmtf-javascript.}",
+ issn = {1367-4803},
+ doi = {10.1093/bioinformatics/bty419},
+}
+
+
+@article{Trott2009,
+ year = {2009},
+ publisher = {Wiley},
+ pages = {NA--NA},
+ author = {Oleg Trott and Arthur J. Olson},
+ title = {{AutoDock Vina}: Improving the speed and accuracy of docking with a new scoring function, efficient optimization, and multithreading},
+ journal = {Journal of Computational Chemistry},
+ doi={10.1002/jcc.21334},
+ volume={31},
+ number={2},
+}
+
+@article{Ruiz2014,
+ title={{rDock}: a fast, versatile and open source program for docking ligands to proteins and nucleic acids},
+ author={Ruiz-Carmona, Sergio and Alvarez-Garcia, Daniel and Foloppe, Nicolas and Garmendia-Doval, A Beatriz and Juhos, Szilveszter and Schmidtke, Peter and Barril, Xavier and Hubbard, Roderick E and Morley, S David},
+ journal={PLoS Computational Biology},
+ volume={10},
+ number={4},
+ pages={e1003571},
+ year={2014},
+ doi={10.1371/journal.pcbi.1003571},
+ publisher={Public Library of Science}
+}
+
+
+@misc{gtn_comp,
+ key = {Galaxy {Training}: {Computational} chemistry},
+ title = {Galaxy {Training}: {Computational} chemistry},
+ shorttitle = {Galaxy {Training}},
+ url = {https://training.galaxyproject.org/training-material/topics/computational-chemistry/tutorials/htmd-analysis/tutorial.html},
+ abstract = {Modelling, simulation and analysis of biomolecular systems},
+ language = {en},
+
+}
+
+@misc{gtn_collections,
+ key = {Galaxy Training: Collections: Using dataset collection},
+ title = {Galaxy Training: Collections: Using dataset collection},
+ shorttitle = {Galaxy {Training}},
+ url = {https://galaxyproject.github.io/training-material/topics/galaxy-data-manipulation/tutorials/collections/tutorial.html},
+ language = {en},
+ urldate = {2020-04-29},
+}
+
+@misc{gtn_toworkflow,
+ key = {Workflows: Extracting Workflows from Histories},
+ title = {Workflows: Extracting Workflows from Histories},
+ shorttitle = {Galaxy {Training}},
+ url = {https://galaxyproject.github.io/training-material/topics/galaxy-ui/tutorials/history-to-workflow/tutorial.html},
+ language = {en},
+ urldate = {2020-04-29},
+}
+
+@misc{gtn_multiple,
+ key = {Histories: Understanding Galaxy history system},
+ title = {Galaxy Training: Histories: Understanding Galaxy history system},
+ shorttitle = {Galaxy {Training}},
+ url = {https://galaxyproject.github.io/training-material/topics/galaxy-ui/tutorials/history/tutorial.html},
+ language = {en},
+ urldate = {2020-04-29},
+}
+
+@misc{gtn_api,
+ key = {Scripting Galaxy using the API and BioBlend},
+ title = {Galaxy Training: Scripting Galaxy using the API and BioBlend},
+ shorttitle = {Galaxy {Training}},
+ url = {https://training.galaxyproject.org/training-material/topics/dev/tutorials/bioblend-api/slides.html},
+ language = {en},
+ urldate = {2020-04-29},
+}
+
+
+@misc{eu_6hhr,
+ key = {Protein-ligand docking (6hhr)},
+ title = {Galaxy | Europe | Accessible History | Protein-ligand docking (6hhr)},
+ url = {https://cheminformatics.usegalaxy.eu/u/sbray/h/protein-ligand-docking-6hhr},
+ language = {en},
+ urldate = {2020-04-29}
+}
+
+@misc{eu_htmd_simulation_workflow,
+ key = {Protein-ligand HTMD simulation workflow},
+ title = {Galaxy | Europe | Accessible History | Protein-ligand HTMD simulation},
+ url = {https://cheminformatics.usegalaxy.eu/u/sbray/w/protein-ligand-htmd-sim},
+ language = {en},
+ urldate = {2020-04-29}
+}
+
+@misc{eu_htmd_analysis_workflow,
+ key = {Protein-ligand HTMD analysis workflow},
+ title = {Galaxy | Europe | Accessible History | Protein-ligand HTMD analysis},
+ url = {https://cheminformatics.usegalaxy.eu/u/sbray/w/protein-ligand-htmd-analysis},
+ language = {en},
+ urldate = {2020-04-29}
+}
+
+@misc{za_htmd_simulation_workflow,
+ key = {Protein-ligand HTMD simulation workflow},
+ title = {Galaxy | South Africa | Accessible History | Protein-ligand HTMD analysis},
+ url = {https://galaxy-compchem.ilifu.ac.za/u/sbray/w/protein-ligand-htmd-sim},
+ language = {en},
+ urldate = {2020-04-29}
+}
+
+@misc{za_htmd_analysis_workflow,
+ key = {Protein-ligand HTMD analysis workflow},
+ title = {Galaxy | South Africa | Accessible History | Protein-ligand HTMD analysis},
+ url = {https://galaxy-compchem.ilifu.ac.za/u/sbray/w/protein-ligand-htmd-analysis},
+ language = {en},
+ urldate = {2020-04-29}
+}
+
+
+@article{Stebbins1997,
+ doi = {10.1016/s0092-8674(00)80203-2},
+ year = {1997},
+ month = apr,
+ publisher = {Elsevier {BV}},
+ volume = {89},
+ number = {2},
+ pages = {239--250},
+ author = {Charles E Stebbins and Alicia A Russo and Christine Schneider and Neal Rosen and F.Ulrich Hartl and Nikola P Pavletich},
+ title = {Crystal Structure of an {Hsp90}{\textendash}Geldanamycin Complex: Targeting of a Protein Chaperone by an Antitumor Agent},
+ journal = {Cell}
+}
+
+@article{Hermane2019,
+ doi = {10.1039/c9ob00892f},
+ year = {2019},
+ publisher = {Royal Society of Chemistry ({RSC})},
+ volume = {17},
+ number = {21},
+ pages = {5269--5278},
+ author = {Jekaterina Hermane and Simone Eichner and Lena Mancuso and Benjamin Schr\"{o}der and Florenz Sasse and Carsten Zeilinger and Andreas Kirschning},
+ title = {New geldanamycin derivatives with anti {Hsp} properties by mutasynthesis},
+ journal = {Organic {\&} Biomolecular Chemistry}
+}
+
+@article{Schopf2017,
+ doi = {10.1038/nrm.2017.20},
+ year = {2017},
+ month = apr,
+ publisher = {Springer Science and Business Media {LLC}},
+ volume = {18},
+ number = {6},
+ pages = {345--360},
+ author = {Florian H. Schopf and Maximilian M. Biebl and Johannes Buchner},
+ title = {The {HSP}90 chaperone machinery},
+ journal = {Nature Reviews Molecular Cell Biology}
+}
+
+
+@misc{ligand_resorcinol,
+ key = {3-(2,4-{Dihydroxyphenyl})-4-(2-fluorophenyl)-{1H}-1,2,4-triazole-5-thione},
+ title = {3-(2,4-{Dihydroxyphenyl})-4-(2-fluorophenyl)-{1H}-1,2,4-triazole-5-thione},
+ url = {https://pubchem.ncbi.nlm.nih.gov/compound/135508238},
+ abstract = {3-(2,4-Dihydroxyphenyl)-4-(2-fluorophenyl)-1H-1,2,4-triazole-5-thione {\textbar} C14H10FN3O2S {\textbar} CID 135508238 - structure, chemical names, physical and chemical properties, classification, patents, literature, biological activities, safety/hazards/toxicity information, supplier lists, and more.},
+ language = {en},
+ urldate = {2020-04-29},
+ author = {PubChem},
+ note = {Library Catalog: pubchem.ncbi.nlm.nih.gov},
+}
+
+
+
+@article{kuzmanic_determination_2010,
+ title = {Determination of {Ensemble}-{Average} {Pairwise} {Root} {Mean}-{Square} {Deviation} from {Experimental} {B}-{Factors}},
+ volume = {98},
+ issn = {0006-3495},
+ doi = {10.1016/j.bpj.2009.11.011},
+ abstract = {Root mean-square deviation (RMSD) after roto-translational least-squares fitting is a measure of global structural similarity of macromolecules used commonly. On the other hand, experimental x-ray B-factors are used frequently to study local structural heterogeneity and dynamics in macromolecules by providing direct information about root mean-square fluctuations (RMSF) that can also be calculated from molecular dynamics simulations. We provide a mathematical derivation showing that, given a set of conservative assumptions, a root mean-square ensemble-average of an all-against-all distribution of pairwise RMSD for a single molecular species, {\textless}RMSD2{\textgreater}1/2, is directly related to average B-factors ({\textless}B{\textgreater}) and {\textless}RMSF2{\textgreater}1/2. We show this relationship and explore its limits of validity on a heterogeneous ensemble of structures taken from molecular dynamics simulations of villin headpiece generated using distributed-computing techniques and the Folding@Home cluster. Our results provide a basis for quantifying global structural diversity of macromolecules in crystals directly from x-ray experiments, and we show this on a large set of structures taken from the Protein Data Bank. In particular, we show that the ensemble-average pairwise backbone RMSD for a microscopic ensemble underlying a typical protein x-ray structure is ∼1.1 Å, under the assumption that the principal contribution to experimental B-factors is conformational variability.},
+ number = {5},
+ urldate = {2020-04-30},
+ journal = {Biophysical Journal},
+ author = {Kuzmanic, Antonija and Zagrovic, Bojan},
+ month = mar,
+ year = {2010},
+ pmcid = {PMC2830444},
+ pages = {861--871},
+}
+
+
+@article{berjanskii_nmr_2006,
+ title = {{NMR}: prediction of protein flexibility},
+ volume = {1},
+ copyright = {2006 Nature Publishing Group},
+ issn = {1750-2799},
+ shorttitle = {{NMR}},
+ doi = {10.1038/nprot.2006.108},
+ abstract = {We present a protocol for predicting protein flexibility from NMR chemical shifts. The protocol consists of (i) ensuring that the chemical shift assignments are correctly referenced or, if not, performing a reference correction using information derived from the chemical shift index, (ii) calculating the random coil index (RCI), and (iii) predicting the expected root mean square fluctuations (RMSFs) and order parameters (S2) of the protein from the RCI. The key advantages of this protocol over existing methods for studying protein dynamics are that (i) it does not require prior knowledge of a protein's tertiary structure, (ii) it is not sensitive to the protein's overall tumbling and (iii) it does not require additional NMR measurements beyond the standard experiments for backbone assignments. When chemical shift assignments are available, protein flexibility parameters, such as S2 and RMSF, can be calculated within 1–2 h using a spreadsheet program.},
+ language = {en},
+ number = {2},
+ urldate = {2020-04-30},
+ journal = {Nature Protocols},
+ author = {Berjanskii, Mark and Wishart, David S.},
+ month = aug,
+ year = {2006},
+ note = {Number: 2
+Publisher: Nature Publishing Group},
+ pages = {683--688},
+
+}
+
+@article{Lemkul2019,
+ doi = {10.33011/livecoms.1.1.5068},
+ year = {2019},
+ publisher = {University of Colorado at Boulder},
+ volume = {1},
+ number = {1},
+ author = {Justin Lemkul},
+ title = {From Proteins to Perturbed {H}amiltonians: A Suite of Tutorials for the {GROMACS}-2018 Molecular Simulation Package [Article v1.0]},
+ journal = {Living Journal of Computational Molecular Science}
+}
+
+@article{HARVEY20121059,
+title = "High-throughput molecular dynamics: the powerful new tool for drug discovery",
+journal = "Drug Discovery Today",
+volume = "17",
+number = "19",
+pages = "1059 - 1062",
+year = "2012",
+issn = "1359-6446",
+doi = "https://doi.org/10.1016/j.drudis.2012.03.017",
+author = "Matthew J. Harvey and Gianni [De Fabritiis]",
+
+}
+
+@article{Guterres2020,
+ doi = {10.1021/acs.jcim.0c00057},
+ year = {2020},
+ month = mar,
+ publisher = {American Chemical Society ({ACS})},
+ volume = {60},
+ number = {4},
+ pages = {2189--2198},
+ author = {Hugo Guterres and Wonpil Im},
+ title = {Improving Protein-Ligand Docking Results with High-Throughput Molecular Dynamics Simulations},
+ journal = {Journal of Chemical Information and Modeling}
+}
+
+@article{SousadaSilva2012,
+ doi = {10.1186/1756-0500-5-367},
+ year = {2012},
+ publisher = {Springer Science and Business Media {LLC}},
+ volume = {5},
+ number = {1},
+ pages = {367},
+ author = {Alan W Sousa da Silva and Wim F Vranken},
+ title = {{ACPYPE} - {AnteChamber} {PYthon} Parser {interfacE}},
+ journal = {{BMC} Research Notes}
+}
\ No newline at end of file
diff --git a/topics/computational-chemistry/tutorials/htmd-analysis/tutorial.md b/topics/computational-chemistry/tutorials/htmd-analysis/tutorial.md
new file mode 100644
index 00000000000000..1ad95ca8fa7cec
--- /dev/null
+++ b/topics/computational-chemistry/tutorials/htmd-analysis/tutorial.md
@@ -0,0 +1,630 @@
+---
+layout: tutorial_hands_on
+
+title: High Throughput Molecular Dynamics and Analysis
+level: Advanced
+zenodo_link: 'https://zenodo.org/badge/latestdoi/260474701'
+questions:
+- How are protein-ligand systems parameterized for molecular dynamics simulation?
+- What kind of analysis can be carried out on molecular trajectories?
+- How can high-throughput MD be used to study multiple ligands?
+objectives:
+- Learn about force-fields and MD parameterization
+- Learn how to conduct MD simulation and analysis for a protein-ligand system
+- Understand how different molecular interactions contribute to the binding affinity of various ligands for the Hsp90 protein.
+time_estimation: 3H
+key_points:
+- Simulating protein-ligand systems is more complex than simply simulating protein-only systems.
+- There are a range of Galaxy tools for MD simulation (using GROMACS) and analysis.
+- Galaxy makes assembling and scaling up workflows for high-throughput MD straightforward for users.
+contributors:
+- simonbray
+- tsenapathi
+- chrisbarnettster
+- bgruening
+
+---
+
+
+# Introduction
+{:.no_toc}
+
+This tutorial provides an introduction to using high-throughput molecular dynamics to study protein-ligand interaction, as applied to N-terminus of Hsp90 (heat shock protein 90).
+
+
+> ### Agenda
+>
+> In this tutorial, we will cover:
+>
+> 1. TOC
+> {:toc}
+>
+{: .agenda}
+
+# Background
+
+## What is high-throughput molecular dynamics?
+Molecular dynamics (MD) is a method to simulate molecular motion by iterative application of Newton's laws of motion. It is often applied to large biomolecules such as proteins or nucleic acids. A common application is to assess the interaction between these macromolecules and a number of small molecules (e.g.~potential drug candidates). This tutorial provides a guide to setting up and running a high-throughput workflow for screening multiple small molecules, using the open-source GROMACS tools provided through the Galaxy platform. Following simulation, the trajectory data is analyzed using a range of tools to investigate structural properties and correlations over time.
+
+
+## Why is Hsp90 interesting to study?
+The 90 kDa heat shock protein (Hsp90) is a chaperone protein responsible for catalyzing the conversion of a wide variety of proteins to a functional form; examples of the Hsp90 clientele, which totals several hundred proteins, include nuclear steroid hormone receptors and protein kinases({% cite Pearl2006 %}). The mechanism by which Hsp90 acts varies between clients, as does the client binding site; the process is dependent on post-translational modifications of Hsp90 and the identity of co-chaperones which bind and regulate the conformational cycle({% cite Schopf2017 %}).
+
+Due to its vital biochemical role as a chaperone protein involved in facilitating the folding of many client proteins, Hsp90 is an attractive pharmaceutical target. In particular, as protein folding is a potential bottleneck to cellular reproduction and growth, blocking Hsp90 function using inhibitors which bind tightly to the ATP binding site of the NTD could assist in treating cancer; for example, the antibiotic geldanamycin and its analogs are under investigation as possible anti-tumor agents ({% cite Stebbins1997 %}, {% cite Hermane2019 %}).
+
+In the structure which will be examined during this tutorial, the ligand of concern is a resorcinol, a common class of compounds with affinity for the Hsp90 N-terminal domain. It is registered in the PubChem database under the compound ID 135508238 ({% cite ligand_resorcinol %}). As can be seen by viewing the PDB structure, the resorcinol part of the structure is embedded in the binding site, bound by a hydrogen bond to residue aspartate-93. The ligand structure also contains a triazole and a fluorophenyl ring, which lie nearer to the surface of the protein.
+
+![Hsp90 structure, with a ligand bound]({% link topics/computational-chemistry/images/hsp90lig.png %} "Structure of Hsp90, with a ligand bound. Click to view in NGL. ({% cite ngl %})")
+
+
+## Get data
+
+As a first step, we create a new Galaxy history and then we download a crystal structure for the Hsp90 protein from the Protein Data Bank (PDB). The structure is provided under accession code `6HHR` ({% cite Schuetz2018 %}) and shows Hsp90 in complex with a ligand belonging to the resorcinol class.
+
+> ### {% icon hands_on %} Hands-on: Data upload
+>
+> 1. Create a new history for this tutorial
+> 2. Search Galaxy for the 'Get PDB' tool. Request the accession code ```6hhr```.
+> 3. Rename the dataset to 'Hsp90 structure'
+> 4. Check that the datatype is correct (PDB file).
+>
+> {% include snippets/change_datatype.md datatype="datatypes" %}
+>
+{: .hands_on}
+
+
+# Simulation
+
+## Topology generation
+
+Now we have downloaded a PDB structure of the protein we wish to study, we will start preparing it for MD simulation; this process may also be referred to as parameterization or topology generation.
+
+GROMACS distinguishes between constant and dynamic attributes of the atoms in the system. The constant attributes (e.g. atom charges, bonds connecting atoms) are listed in the topology (TOP file), while dynamic attributes (attributes that can change during a simulation, e.g. atom position, velocities and forces) are stored in structure (PDB or GRO) and trajectory (XTC and TRR) files.
+
+The PDB file we start from only explicitly states atom element (i.e.carbon, oxygen, and so on) and 3D Cartesian coordinates of each atom; additionally, it will usually not include hydrogen atoms. Therefore, before beginning simulation, we need to calculate the rest of the information contained within the topology file. Parameterization needs to be done separately for the ligand and protein. Therefore, the first step is to separate the PDB file into two sets of coordinates - one for the ligand and one for the protein.
+
+
+### Extract protein and ligand coordinates
+
+Parameterization needs to be done separately for the ligand and protein. Therefore, the first step is to separate the PDB file into two sets of coordinates - one for the ligand and one for the protein. Here, we can make use of the simple text manipulation tools integrated into Galaxy.
+
+> ### {% icon question %} Question
+>
+> 1. Why do protein and ligand need to be parameterized separately?
+>
+> > ### {% icon solution %} Solution
+> >
+> > 1. Protein and small molecules are constructed differently. A protein is made up of 20 different building blocks (amino acids) - therefore, to construct a protein topology, amino acid topologies simply need to be combined appropriately. By contrast, the structure of small molecules is far more flexible and needs to be calculated for each different structure.
+> >
+> {: .solution}
+>
+{: .question}
+
+> ### {% icon hands_on %} Hands-on: Separate protein and ligand coordinates
+>
+> 1. **Search in textfiles** {% icon tool %} with the following parameters:
+> - *"Select lines from"*: 'Hsp90 structure'
+> - *"that"*: `Don't Match`
+> - *"Regular Expression"*: `HETATM`
+> 2. Rename output to 'Protein (PDB)'
+> 3. **Search in textfiles** {% icon tool %} with the following parameters:
+> - *"Select lines from"*: 'Hsp90 structure'
+> - *"that"*: `Match`
+> - *"Regular Expression"*: `AG5E`
+> 4. Rename output to 'Ligand (PDB)'
+>
+{: .hands_on}
+
+Here, we simply filter the original PDB twice: once for lines which do not match `HETATM`, which returns a PDB file containing only protein, not ligand and solvent; and once for lines which match the ligand's identity code `AG5E`, which returns a PDB file containing only the ligand.
+
+### Set up protein topology
+
+Firstly, we need to calculate the topology for the protein file. We will use the **GROMACS initial setup** {% icon tool %} tool.
+
+> ### {% icon hands_on %} Hands-on: Generate protein topology
+>
+> 1. **GROMACS initial setup** {% icon tool %} with the following parameters:
+> - *"PDB input file"*: 'Protein (PDB)' file
+> - *"Force field"*: `AMBER99SB`
+> - *"Water model"*: `TIP3P`
+> - *"Generate detailed log"*: `Yes`
+>
+> > ### {% icon comment %} Comment
+> > A force field is essentially a function to calculate the potential energy of a system, based on various empirical parameters (for the atoms, bonds, charges, dihedral angles and so on). There are a number of families of forcefields; some of the most commonly used include CHARMM, AMBER, GROMOS and OpenFF (for a recent, accessible overview see ({% cite Lemkul2020 %}).
+> >
+> >
+> > A wide range of models exist for modeling water. Here we are using the common TIP3P model, which is an example of a 'three-site model' - so-called because the molecule is modeled using three points, corresponding to the three atoms of water. (Four- and five-site models include additional 'dummy atoms' representing the negative charges of the lone pairs of the oxygen atom).
+> {: .comment}
+>
+{: .hands_on}
+
+The tool produces four outputs: a GRO file (containing the coordinates of the protein), a TOP file (containing other information, including on charges, masses, bonds and angles), an ITP file (which will be used to restrain the protein position in the equilibration step later on), and a log for the tool.
+
+Please note all GROMACS tools output a log. Generally, you only need to look at this when a job fails. These provide useful information for debugging if we encounter any problems.
+
+
+
+### Generate a topology for the ligand
+
+To generate a topology for the ligand, we will use the **acpype** {% icon tool %} tool ({% cite SousadaSilva2012 %}). This provides a convenient interface to the AmberTools suite and allows us to easily create the ligand topology in the format required by GROMACS.
+
+> ### {% icon hands_on %} Hands-on: Generate ligand topology
+>
+> 1. **Generate MD topologies for small molecules** {% icon tool %} with the following parameters:
+> - *"Input file"*: 'Ligand (PDB)'
+> - *"Charge of the molecule"*: `0`
+> - *"Multiplicity"*: `1`
+> - *"Force field to use for parameterization"*: `gaff`
+> - *"Save GRO file?"*: `Yes`
+>
+{: .hands_on}
+
+Here, we use GAFF (general AMBER force field), which is a generalized AMBER force field which can be applied to almost any small organic molecule.
+
+We select charge and multiplicity as appropriate. The ligand studied here is neutral, so the charge is 0. The multiplicity is 1, which will be the case for every simple organic molecule we encounter; only if we deal with more exotic species such as metal complexes or carbenes will we need to consider higher values.
+
+Having generated topologies, we now need to combine them, define the box
+which contains the system, add solvent and ions, and perform an energy
+minimization step.
+
+
+
+
+## Combine topology and GRO files
+
+While we have separate topology and structure files for both protein and ligand, we need to combine them into a single set of files to continue with the simulation setup. A dedicated Galaxy tool is provided for this, using the Python library ParmEd ({% cite Swails2016 %}).
+
+> ### {% icon hands_on %} Hands-on: Combine GRO and topology files
+>
+> 1. **Merge GROMACS topologies** {% icon tool %} with the following parameters:
+> - {% icon param-file %} *"Protein topology (TOP) file"*: `TOP` file created by the **GROMACS initial setup** tool
+> - {% icon param-file %} *"Ligand topology (TOP or ITP file)"*: `Topology` created by the **acpype** tool
+> - {% icon param-file %} *"Protein structure (GRO) file"*: `GRO` file created by the **GROMACS initial setup** tool
+> - {% icon param-file %} *"Ligand structure (GRO) file"*: `Structure file (GRO format)` created by the **acpype** tool
+>
+{: .hands_on}
+
+
+## Create the simulation box with **GROMACS structure configuration**
+
+The next step, once combined coordinate (GRO) and topology (TOP) files have been created, is to create a simulation box in which the system is situated.
+
+> ### {% icon hands_on %} Hands-on: Create simulation box
+>
+> 1. **GROMACS structure configuration** {% icon tool %} with the following parameters:
+> - {% icon param-file %} *"Input structure"*: `System GRO file` (Input dataset)
+> - *"Configure box?"*: `Yes`
+> - *"Box dimensions in nanometers"*: `1.0`
+> - *"Box type"*: `Triclinic`
+> - *"Generate detailed log"*: `Yes`
+>
+>
+> > ### {% icon comment %} Comment
+> >
+> > This tool returns a new GRO structure file, containing the same coordinates as before, but defining a simulation box such that every atom is a minimum of 1 nm from the box boundary. A distance of at least 1 nm is recommended to avoid interactions between the protein and its mirror image. On the other hand, increasing the box size too far will increase the simulation time, due to the greater number of solvent molecules which need to be treated. A variety of box shapes are available to choose: we select triclinic, as it provides the most efficient packing in space and thus fewer computational resources need to be devoted to simulation of solvent.
+> {: .comment}
+>
+{: .hands_on}
+
+
+
+## Solvation
+
+The next step is solvation of the newly created simulation box - as we are simulating under biological conditions, we use water as the solvent. Note that the system is charged (depending on the pH) - the solvation tool also adds sodium or chloride ions (replacing existing water molecules) as required to neutralize this.
+
+> ### {% icon hands_on %} Hands-on: Solvation
+>
+> 1. **GROMACS solvation and adding ions** {% icon tool %} with the following parameters:
+> - {% icon param-file %} *"GRO structure file"*: `output` (output of **GROMACS structure configuration** {% icon tool %})
+> - {% icon param-file %} *"System topology"*: `output`
+> - *"Generate detailed log"*: `Yes`
+>
+>
+{: .hands_on}
+
+
+## Energy minimization
+
+After the solvation step, parameterization of the system is complete and preparatory simulations can be performed. The first of theses is energy minimization, which can be carried out using the **GROMACS energy minimization** {% icon tool %} tool.
+
+> ### {% icon question %} Question
+>
+> 1. What is the purpose of energy minimization?
+>
+> > ### {% icon solution %} Solution
+> >
+> > 1. Running an energy minimization (EM) algorithm relaxes the structure by removing any steric clashes or unusual geometry which would artificially raise the energy of the system.
+> >
+> {: .solution}
+>
+{: .question}
+
+
+> ### {% icon hands_on %} Hands-on: Energy minimization
+>
+> 1. **GROMACS energy minimization** {% icon tool %} with the following parameters:
+> - {% icon param-file %} *"GRO structure file."*: GRO output of **GROMACS solvation and adding ions** {% icon tool %}
+> - {% icon param-file %} *"Topology (TOP) file."*: TOP output of **GROMACS solvation and adding ions** {% icon tool %}
+> - *"Parameter input"*: `Use default (partially customisable) setting`
+> - *"Number of steps for the MD simulation"*: `50000`
+> - *"EM tolerance"*: `1000.0`
+> - *"Generate detailed log"*: `Yes`
+> - Rename output to `Minimized GRO file`
+>
+{: .hands_on}
+
+The EM tolerance here refers to the maximum force which will be allowed in a minimized system. The simulation will be terminated when the maximum force is less than this value, or when 50000 steps have elapsed.
+
+As an aside, we can use the `Extract energy components` tool to plot the convergence of the potential energy during the minimization.
+
+> ### {% icon hands_on %} Hands-on: Checking EM convergence
+>
+> 1. **Extract energy components with GROMACS** {% icon tool %} with the following parameters:
+> - {% icon param-file %} *"EDR file."*: EDR output of **GROMACS energy minimization** {% icon tool %}
+> - *"Terms to calculate"*: `Potential`
+> - *"Output format"*: `Galaxy tabular`
+> 2. On the output tabular file, click on the 'Visualize this data' icon. This provides a range of visualization options. Select 'Line chart (jqPlot)'.
+> 3. In the visualization window which appears, click on `Select data.` Enter the following parameters:
+> - *"Provide a label"*: `Energy potential`
+> - *"Values for x-axis"*: `Column: 1`
+> - *"Values for y-axis"*: `Column: 2`
+>
+{: .hands_on}
+
+The resulting plot should resemble the figure below. The system first drops rapidly in energy, before slowly converging on the minimized state.
+
+![Energy potential during the EM simulation]({% link topics/computational-chemistry/images/empot.png %} "Energy potential during the EM simulation. Click to view as a Galaxy visualization")
+
+## Equilibration
+
+We now carry out equilibration in two stages: NVT and NPT. This is discussed at greater length in the basic GROMACS tutorial. Equilibration requires restraining the protein structure - we use the ITP file produced by the initial setup tool for this.
+
+> ### {% icon comment %} More detail about equilibration
+>
+> At this point equilibration of the solvent around the solute (i.e. the protein) is necessary. This is performed in two stages: equilibration under an NVT (or isothermal-isochoric) ensemble, followed by an NPT (or isothermal-isobaric) ensemble. Use of the NVT ensemble entails maintaining constant number of particles, volume and temperature, while the NPT ensemble maintains constant number of particles, pressure and temperature.
+>
+> For equilibration, the protein must be held in place while the solvent is allowed to move freely around it. This is achieved using the position restraint file (ITP) we created in system setup. When we specify this restraint, protein movement is not forbidden, but is energetically penalized.
+>
+{: .comment}
+
+
+> ### {% icon hands_on %} Hands-on: NVT equilibration
+>
+> 1. **GROMACS simulation** {% icon tool %} with the following parameters:
+> - {% icon param-file %} *"GRO structure file"*: `Minimized GRO file` (from energy minimization step)
+> - {% icon param-file %} *"Topology (TOP) file"*: TOP file produced by solvation step.
+> - In *"Inputs"*:
+> - {% icon param-file %} *"Position restraint (ITP) file"*: ITP file produced by initial setup step.
+> - In *"Outputs"*:
+> - *"Trajectory output"*: `Return .xtc file (reduced precision)`
+> - *"Structure output"*: `Return .gro file`
+> - *"Produce a checkpoint (CPT) file"*: `Produce CPT output`
+> - *"Produce an energy (EDR) file"*: `Produce EDR output`
+> - In *"Settings"*:
+> - *"Parameter input"*: `Use default (partially customisable) setting`
+> - *"Bond constraints (constraints)"*: `All bonds (all-bonds).`
+> - *"Temperature /K"*: `300`
+> - *"Step length in ps"*: `0.0002`
+> - *"Number of steps that elapse between saving data points (velocities, forces, energies)"*: `1000`
+> - *"Number of steps for the simulation"*: `50000`
+> - *"Generate detailed log"*: `Yes`
+>
+>
+{: .hands_on}
+
+Once the NVT equilibration is complete, it is worth using the `Extract energy components` tool again to check whether the system temperature has converged on 300 K. This can be done as described for energy minimization, this time specifying `Temperature` under `Terms to calculate` rather than `Potential`. The plot should show the temperature reaching 300 K and remaining there, albeit with some fluctuation.
+
+Having stabilized the temperature of the system with NVT equilibration,
+we also need to stabilize the pressure of the system. We therefore
+equilibrate again using the NPT (constant number of particles, pressure,
+temperature) ensemble.
+
+Note that we can continue where the last simulation left off (with new
+parameters) by using the checkpoint (CPT) file saved at the end of the
+NVT simulation.
+
+> ### {% icon hands_on %} Hands-on: NPT equilibration
+>
+> 1. **GROMACS simulation** {% icon tool %} with the following parameters:
+> - {% icon param-file %} *"GRO structure file"*: GRO output of **GROMACS simulation** {% icon tool %} (NVT equilibration)
+> - {% icon param-file %} *"Topology (TOP) file"*: TOP file produced by solvation step.
+> - In *"Inputs"*:
+> - {% icon param-file %} *"Checkpoint (CPT) file"*: Output of **GROMACS simulation** {% icon tool %} (NVT equilibration))
+> - {% icon param-file %} *"Position restraint (ITP) file"*: ITP file produced by initial setup step.
+> - In *"Outputs"*:
+> - *"Trajectory output"*: `Return .xtc file (reduced precision)`
+> - *"Structure output"*: `Return .gro file`
+> - *"Produce a checkpoint (CPT) file"*: `Produce CPT output`
+> - *"Produce an energy (EDR) file"*: `Produce EDR output`
+> - In *"Settings"*:
+> - *"Ensemble"*: `Isothermal-isobaric ensemble (NPT)`
+> - *"Parameter input"*: `Use default (partially customisable) setting`
+> - *"Bond constraints (constraints)"*: `All bonds (all-bonds).`
+> - *"Temperature /K"*: `300`
+> - *"Step length in ps"*: `0.002`
+> - *"Number of steps that elapse between saving data points (velocities, forces, energies)"*: `1000`
+> - *"Number of steps for the simulation"*: `50000`
+> - *"Generate detailed log"*: `Yes`
+>
+>
+{: .hands_on}
+
+After the NPT equilibration is complete, **Extract energy components** {% icon tool %} can be used again to view the pressure of the system. This is done as described for energy minimization, specifying `Pressure` under `Terms to calculate`. The plot should show convergence on 1 bar and remain there, although some fluctuation is expected.
+
+## Production simulation
+
+We can now remove the restraints and continue with the production simulation. The simulation will run for 1 million steps, with a step size of 1 fs, so will have a total length of 1 ns. This is rather short compared to the state-of-the-art, but sufficient for the purposes of a tutorial. For longer-scale simulations, the tool can be used multiple times (with the checkpoint file) to continue the existing simulation.
+
+> ### {% icon hands_on %} Hands-on: Task description
+>
+> 1. **GROMACS simulation** {% icon tool %} with the following parameters:
+> - {% icon param-file %} *"GRO structure file"*: Output of **GROMACS simulation** {% icon tool %} (NPT equilibration)
+> - {% icon param-file %} *"Topology (TOP) file"*: Output of the solvation step
+> - In *"Inputs"*:
+> - {% icon param-file %} *"Checkpoint (CPT) file"*: Output of **GROMACS simulation** {% icon tool %} (NPT simulation))
+> - In *"Outputs"*:
+> - *"Trajectory output"*: `Return .xtc file (reduced precision)`
+> - *"Structure output"*: `Return .gro file`
+> - *"Produce a checkpoint (CPT) file"*: `Produce CPT output`
+> - In *"Settings"*:
+> - *"Ensemble"*: `Isothermal-isobaric ensemble (NPT)`
+> - *"Parameter input"*: `Use default (partially customisable) setting`
+> - *"Temperature /K"*: `300`
+> - *"Step length in ps"*: `0.001`
+> - *"Number of steps that elapse between saving data points (velocities, forces, energies)"*: `1000`
+> - *"Number of steps for the simulation"*: `1000000`
+> - *"Generate detailed log"*: `Yes`
+>
+>
+{: .hands_on}
+
+
+# Analysis
+
+After the completion of the simulation, the following questions arise: 1) is the simulation converged enough, and 2) what interesting molecular properties are observed. To answer these questions, an analysis of the GROMACS simulation outputs (structure and trajectory file) will be carried out using Galaxy tools developed for computational chemistry ({% cite senapathi_biomolecular_2019 %}) based on popular analysis software, such as MDAnalysis ({% cite michaudagrawal_mdanalysis_2011 %}), MDTraj ({% cite mcgibbon_mdtraj_2015 %}), and Bio3D ({% cite skjaerven_integrating_2014 %}). These tools output both tabular files as well as a variety of attractive plots.
+
+
+## Create PDB file needed by most analysis tools
+
+Before beginning a detailed analysis, the structure and trajectory files generated previously need to be converted into different formats. First, convert the structural coordinates of the system in GRO format into PDB format. This PDB file will be used by most analysis tools as a starting structure. Next, convert the trajectory from XTC to DCD format, as a number of tools (particularly those based on Bio3D) only accept trajectories in DCD format.
+
+> ### {% icon hands_on %} Hands-on: Convert coordinate format
+>
+> 1. **GROMACS structure configuration** {% icon tool %} with the following parameters:
+> - *"Output format"*: `PDB file`
+> - *"Configure box?"*: `No`
+>
+> > ### {% icon comment %} Comment
+> >
+> > This tool can also be used to carry out initial setup (as discussed in the simulation methods section) for GROMACS simulations and convert from PDB to GRO format.
+> {: .comment}
+>
+{: .hands_on}
+
+
+## Convert trajectory to DCD format
+
+Convert from XTC to DCD format. A number of the analysis tools being used have been built to analyse trajectories in CHARMM's DCD format.
+
+> ### {% icon hands_on %} Hands-on: Convert trajectory format
+>
+> 1. **MDTraj file converter** {% icon tool %} with the following parameters:
+> - *"Output format"*: `DCD file`
+>
+> > ### {% icon comment %} Comment
+> >
+> > This tool can also be used to interconvert between several trajectory formats.
+> {: .comment}
+>
+{: .hands_on}
+
+
+## RMSD analysis - protein
+
+The Root Mean Square Deviation (RMSD) and Root Mean Square Fluctuation (RMSF) are calculated to check the stability and conformation of the protein and ligand through the course of the simulation.
+RMSD is a standard measure of structural distance between coordinate
+sets that measures the average distance between a group of atoms. The
+RMSD of the Cα atoms of the protein backbone is calculated here and
+is a measure of how much the protein conformation has changed between different time points in the trajectory.
+
+
+> ### {% icon hands_on %} Hands-on: RMSD Analysis
+>
+> 1. **RMSD Analysis** {% icon tool %} with the following parameters:
+> - *"Select domains"*: `C-alpha`
+>
+> > ### {% icon comment %} Comment
+> >
+> > Note that for more complex systems, you may need to consider a more focused selection. For example, if you have a ligand that is a protein consider modifying this selection.
+> {: .comment}
+>
+{: .hands_on}
+
+![RMSD timeseries Hsp90](../../images/htmd_analysis_rmsd1_series.png "RMSD timeseries for the Hsp90 Cα atoms")
+
+![RMSD histogram Hsp90](../../images/htmd_analysis_rmsd1_histo.png "RMSD histogram for the Hsp90 Cα atoms")
+
+The RMSD time series for the protein shows a thermally stable and equilibrated structure that plateaus at 1.0Å with an average RMSD between 0.8Å and 1.0Å. There are no large conformational changes during the simulation. The RMSD histogram confirms this. Note these graphs are automatically created by Galaxy as part of the tool's outputs.
+
+
+## RMSD analysis - ligand
+
+Calculating the RMSD of the ligand is necessary to check if it is stable in the active site and to identify possible binding modes. If the ligand is not stable, there will be large fluctuations in the RMSD.
+
+For the RMSD analysis of the ligand, the `Select domains` parameter of the tool can for convenience be set to `Ligand`; however, this automatic selection sometimes fails. The other alternative, which we apply here, is to specify the `Residue ID` in the textbox provided. In this example the ligand's Residue ID is `G5E`. The output is the requested RMSD data as a time series, the RMSD plotted as a time series and as a histogram.
+
+
+> ### {% icon hands_on %} Hands-on: RMSD analysis
+>
+> 1. **RMSD Analysis** {% icon tool %} with the following parameters:
+> - {% icon param-file %} *"DCD trajectory input"*: `output` (output of **MDTraj file converter** {% icon tool %})
+> - {% icon param-file %} *"PDB input"*: `output` (output of **GROMACS structure configuration** {% icon tool %})
+> - *"Select domains"*: `Residue ID`
+> - *"Residue ID"*: `G5E`
+>
+>
+{: .hands_on}
+
+In our case the ligand is stable with a single binding mode. The RMSD fluctuates around 0.3Å, with a slight fluctuation near the end of the simulation. This is more clearly seen in the histogram. The conformation seen during simulation is very similar to that in the crystal structure and the ligand is stable in the active site.
+
+![RMSD timeseries Hsp90 ligand](../../images/htmd_analysis_rmsd2_series.png "RMSD timeseries for the Hsp90 Residue ID G5E (ligand)")
+
+![RMSD histogram Hsp90 ligand](../../images/htmd_analysis_rmsd2_histo.png "RMSD histogram for the Hsp90 Residue ID G5E (ligand)")
+
+
+## RMSF analysis
+
+The Root Mean Square Fluctuation (RMSF) is valuable to consider, as it represents the deviation at a reference position over time. The fluctuation in space of particular amino acids in the protein are considered. The Cα of the protein, designated by `C-alpha`, is a good selection to understand the change in protein structure. Depending on the system these fluctuations can be correlated to experimental techniques including Nuclear Magnetic Resonance (NMR) and M\"{o}ssbauer spectroscopy ({% cite berjanskii_nmr_2006 %}, {% cite kuzmanic_determination_2010 %}). The output from the tools is the requested RMSF data and the RMSF plotted as a time series.
+
+> ### {% icon hands_on %} Hands-on: RMSF analysis
+>
+> 1. **RMSF Analysis** {% icon tool %} with the following parameters:
+> - {% icon param-file %} *"DCD trajectory input"*: `output` (output of **MDTraj file converter** {% icon tool %})
+> - {% icon param-file %} *"PDB input"*: `output` (output of **GROMACS structure configuration** {% icon tool %})
+> - *"Select domains"*: `C-alpha`
+>
+>
+{: .hands_on}
+
+![RMSF Hsp90](../../images/htmd_analysis_rmsf.png "RMSF(Å) vs the residue position. Large fluctuations occur at various positions, which correspond to flexible loop regions on the surface of the protein.")
+
+When considering the RMSF, fluctuations greater than 1.0Å are of interest; for example see the fluctuations near residue positions 50, 110 and 160. Inspecting the structure with molecular visualization software such as VMD, these can be seen to correspond to flexible loop regions on the protein surface. In addition, very large fluctuations are seen for the C-terminus; this is common and no investigation is needed.
+
+Note that the first few residues of this protein are missing in the PDB, and therefore residue position 0 in the RMSF corresponds to position 17 in the Hsp90 FASTA primary sequence. This is a fairly common problem that can occur with molecular modeling of proteins, where there may be missing residues at the beginning or within the sequence.
+
+
+
+## PCA analysis
+
+Principal component analysis (PCA) converts a set of correlated
+observations (movement of selected atoms in protein) to a set of principal
+components (PCs) which are linearly independent (or uncorrelated). Here several related tools are used.
+The PCA tool calculates the PCA in order to determine the relationship between statistically meaningful conformations (major global motions) sampled during the trajectory. The Cα carbons of the protein backbone are again a good selection for this purpose. Outputs include the PCA raw data and figures of the relevant principal components (PCs) as well as an eigenvalue rank plot which is used to visualize the proportion of variance due to each principal component (remembering that the PCs are ranked eigenvectors based on the variance).
+Having discovered the principal components usually these are visualized. The PCA visualization tool will create trajectories of specific principal components which can be viewed in a molecular viewer such as VMD ({% cite hump_vmd_1996 %}) or NGL viewer ({% cite Rose2018ngl %}). We also consider the PCA cosine content which when close to 1 indicates that the simulation is not converged and a longer simulation is needed. For values below 0.7, no statement can be made about convergence or lack thereof.
+
+> ### {% icon hands_on %} Hands-on: PCA
+>
+> 1. **PCA** {% icon tool %} with the following parameters:
+> - {% icon param-file %} *"DCD trajectory input"*: `output` (output of **MDTraj file converter** {% icon tool %})
+> - {% icon param-file %} *"PDB input"*: `output` (output of **GROMACS structure configuration** {% icon tool %})
+> - *"Select domains"*: `C-alpha`
+>
+>
+{: .hands_on}
+
+
+![PCA Hsp90](../../images/htmd_analysis_pca.png "PCA results which include graphs of PC2 vs PC1, PC2 vs PC3, PC3 vs PC1 colored from blue to red in order of time, and an eigenvalue rank plot. In the eigenvalue plot the cumulative variance is labeled for each data point.")
+
+The first three principal components are responsible for 32.8% of the total variance, as seen in the eigenvalue rank plot. The first principal component (PC1) accounts for 15.4% of the variance (see PC1 vs PC2 and eigenvalue rank plots). Visualization of PC1 using VMD shows a rocking motion and wagging of the C-terminus.
+
+
+> ### {% icon hands_on %} Hands-on: PCA cosine content calculation
+>
+> 1. **Cosine Content** {% icon tool %} with the following parameters:
+> - {% icon param-file %} *"DCD/XTC trajectory input"*: `output` (output of **MDTraj file converter** {% icon tool %})
+> - {% icon param-file %} *"PDB/GRO input"*: `output` (output of **GROMACS structure configuration** {% icon tool %})
+>
+>
+{: .hands_on}
+
+
+The PCA cosine content of the dominant motion related to PC1 is 0.93, indicating that the simulation is not fully converged. This is expected due to the short simulation length. For production level simulations, it is the norm to extend simulations to hundreds of nanoseconds in length, if not microseconds. As this tutorial is designed to be carried out on public webservers, we limit simulations to 1 ns, as we cannot provide a large amount of computational resources for training purposes.
+
+> ### {% icon hands_on %} Hands-on: PCA visualisation
+>
+> 1. **PCA visualization** {% icon tool %} with the following parameters:
+> - {% icon param-file %} *"DCD trajectory input"*: `output` (output of **MDTraj file converter** {% icon tool %})
+> - {% icon param-file %} *"PDB input"*: `output` (output of **GROMACS structure configuration** {% icon tool %})
+> - *"Select domains"*: `C-alpha`
+>
+>
+{: .hands_on}
+
+![PC1 Hsp90 gif](../../images/htmd_analysis_pc1_hsp90.gif "PC1 motion for Hsp90")
+
+## Hydrogen bond analysis
+
+Hydrogen bonding interactions contribute to binding and are worth investigating, in particular persistent hydrogen bonds. All possible hydrogen bonding interactions between the two selected regions, here the protein and the ligand, are investigated over time using the VMD hydrogen bond analysis tool included in Galaxy. Hydrogen bonds are identified and in the output the total number of hydrogen bonds and occupancy over time is returned.
+
+> ### {% icon hands_on %} Hands-on: Hydrogen bond analysis
+>
+> 1. **Hydrogen Bond Analysis using VMD** {% icon tool %} with the following parameters:
+> - {% icon param-file %} *"DCD/XTC trajectory input"*: `output` (output of **MDTraj file converter** {% icon tool %})
+> - {% icon param-file %} *"PDB/GRO input"*: `output` (output of **GROMACS structure configuration** {% icon tool %})
+> - *"Selection 1"*: `protein`
+> - *"Selection 2"*: `resname G5E`
+>
+>
+{: .hands_on}
+
+The active site of this protein is quite hydrophobic, yet multiple hydrogen bonds were identified. The hydrogen bond between aspartate-93 and the ligand (as identified in the crystal structure) was found to be persistent, meeting the hydrogen bond criteria for 89.22% of the simulation. A hydrogen bond between the ligand and the carbonyl group of glycine-97 was found to have a 15.27% occupancy. Hydrogen bonding interactions with threonine-184, asparagine-51 and lysine-58 were also observed but these are not persistent and only present for a minority of the simulation. These values can be accessed from the 'Percentage occupancy of the H-bond' output of the hydrogen bond analysis tool.
+
+
+# Optional: Automating high throughput calculations
+Up until this step, Galaxy tools have been applied sequentially to datasets. This is useful to gain an understanding of the steps involved, but becomes tedious if the workflow needs to be run on multiple protein-ligand systems. Fortunately, Galaxy allows entire workflows to be executed with a single mouse-click, enabling straightforward high-throughput analyses.
+
+We will demonstrate the high-throughput capabilities of Galaxy by running the workflow detailed so far on a further three ligands.
+
+> ### {% icon hands_on %} Hands-on: High-throughput MD
+>
+> 1. Create a new history for running the high-throughput workflow and name it `Hsp90 HTMD simulation`
+> 2. Upload the SD-file containing the new ligand structures from Zenodo and rename it `Ligands (SDF)`
+> 3. Import the simulation workflow from the European ({%cite eu_htmd_simulation_workflow %}) or the South African Galaxy server ({%cite za_htmd_simulation_workflow %}).
+> 4. Run the imported workflow with the following parameters:
+> - *"Send results to a new history"*: `Yes`
+> - *"History name results to a new history"*: `Hsp90 HTMD analysis`
+> - *"GRO input"*: 'Collection of GRO files produced by simulation workflow'
+> - *"XTC input"*: 'Collection of XTC files produced by simulation workflow'
+>
+{: .hands_on}
+
+
+This process runs the entire simulation and analysis procedure described so far on the new set of ligands. It uses Galaxy's [collection feature]({% link topics/galaxy-data-manipulation/tutorials/collections/tutorial.md %}) to organize the data; each item in the history is a collection (essentially a directory containing multiple individual datasets) containing one file corresponding to each of the input ligands.
+
+Note that the SD-file needs to contain ligands with the correct 3D coordinates for MD simulation. The easiest way to obtain these is using a molecular docking tool such as Autodock Vina ({% cite Trott2009 %}) or rDock ({% cite Ruiz2014 %}); tutorials and workflows are available for both of these from the Galaxy Training Network. As an example, the history in which the SD-file used in the HTMD workflow is generated (using AutoDock Vina) is provided ({% cite eu_6hhr %}).
+
+
+Apart from manual setups or collections, there are several other alternatives which are helpful in scaling up workflows. Galaxy supports and provides training material for converting [histories to workflows]({% link topics/galaxy-ui/tutorials/history-to-workflow/tutorial.md %}), using [multiple histories]({% link topics/galaxy-ui/tutorials/history/tutorial.md %}), and the [Galaxy Application Programming Interface (API)]({% link topics/dev/tutorials/bioblend-api/slides.html %}). For beginners and users who prefer a visual interface, automation can be done using multiple histories and collections with the standard Galaxy user interface.
+
+If you are able to write small scripts, you can automate everything you have learned here with the Galaxy API. This allows you to interact with the server to automate repetitive tasks and create more complex workflows (which may have repetition or branching). The simplest way to access the API is through the Python library BioBlend ({% cite sloggett_bioblend %}). An example Python script, which uses BioBlend to run the GROMACS simulation workflow for each of a list of ligands, is given in the hands-on box below.
+
+> ### {% icon hands_on %} Hands-on: Bioblend script
+>
+> ```
+>from bioblend import galaxy
+>
+># Server and account details
+>API_KEY = 'YOUR USEGALAXY.EU API KEY'
+>gi = galaxy.GalaxyInstance(key=API_KEY,
+> url='https://usegalaxy.eu/')
+>
+># ID for GROMACS workflow
+>workflow_id = 'adc6d049e9283789'
+>
+># Dataset IDs for ligands to dock
+>ligands = {
+># ligand_name: dataset ID,
+>'lig1': '11ac94870d0bb33a79c5fa18b0fd3b4c',
+># ...
+>}
+>
+># Loop over ligands, invoking workflow
+>for name, _id in ligands.items():
+> inv = gi.workflows.invoke_workflow(
+> workflow_id,
+> inputs={
+> '1': {'src': 'hda', 'id': _id}
+> },
+> history_name=f'HTMD run on {name}'
+> )
+> ```
+>
+{: .hands_on}
+
+
+# Conclusion
+{:.no_toc}
+
+This tutorial provides a guide on how to study protein-ligand interaction using molecular dynamics in Galaxy. Performing such analyses in Galaxy makes it straightforward to set up, schedule and run workflows, removing much of the difficulty from MD simulation. Thus, the technical barrier to performing high-throughput studies is greatly reduced. Results are structured in the form of Galaxy histories or collections, and include ready-plotted diagrams, which ensure data can be easily understood and reproduced if necessary. Apart from streamlining the process for existing MD users, this tutorial should also prove useful as a pedagogical guide for educating students or newcomers to the field.
+
+After completing the tutorial, the user will be familiar at a basic level with a range of MD analysis techniques, and understand the steps required for a typical MD simulation. Thus, they will be equipped to apply these tools to their own problems.
diff --git a/topics/computational-chemistry/tutorials/htmd-analysis/workflows/analysis_workflow.ga b/topics/computational-chemistry/tutorials/htmd-analysis/workflows/analysis_workflow.ga
new file mode 100644
index 00000000000000..581662119773d4
--- /dev/null
+++ b/topics/computational-chemistry/tutorials/htmd-analysis/workflows/analysis_workflow.ga
@@ -0,0 +1,446 @@
+{
+ "uuid": "38641d95-e161-4aa7-a6a9-1b88e1562f13",
+ "tags": [
+ "computational-chemistry"
+ ],
+ "format-version": "0.1",
+ "name": "Workflow constructed from history 'Hsp90-MDAnalysis'",
+ "version": 0,
+ "steps": {
+ "0": {
+ "tool_id": null,
+ "tool_version": null,
+ "outputs": [],
+ "workflow_outputs": [],
+ "input_connections": {},
+ "tool_state": "{\"name\": \"Galaxy30-[GROMACS_simulation_on_data_28,_data_15,_and_data_26].gro\"}",
+ "id": 0,
+ "uuid": "82ae4961-8bb0-440d-bed6-9206d8b9d860",
+ "errors": null,
+ "name": "Input dataset",
+ "label": null,
+ "inputs": [{
+ "name": "Galaxy30-[GROMACS_simulation_on_data_28,_data_15,_and_data_26].gro",
+ "description": ""
+ }],
+ "position": {
+ "top": 10,
+ "left": 10
+ },
+ "annotation": "",
+ "content_id": null,
+ "type": "data_input"
+ },
+ "1": {
+ "tool_id": null,
+ "tool_version": null,
+ "outputs": [],
+ "workflow_outputs": [],
+ "input_connections": {},
+ "tool_state": "{\"name\": \"Galaxy31-[GROMACS_simulation_on_data_28,_data_15,_and_data_26].xtc\"}",
+ "id": 1,
+ "uuid": "f67e98b9-2b4f-4d7d-9da7-ec3c23ffbd38",
+ "errors": null,
+ "name": "Input dataset",
+ "label": null,
+ "inputs": [{
+ "name": "Galaxy31-[GROMACS_simulation_on_data_28,_data_15,_and_data_26].xtc",
+ "description": ""
+ }],
+ "position": {
+ "top": 130,
+ "left": 10
+ },
+ "annotation": "",
+ "content_id": null,
+ "type": "data_input"
+ },
+ "2": {
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/chemteam/gmx_editconf/gmx_editconf/2019.1.4",
+ "tool_version": "2019.1.4",
+ "outputs": [{
+ "type": "gro",
+ "name": "output"
+ }],
+ "workflow_outputs": [],
+ "input_connections": {
+ "input_file": {
+ "output_name": "output",
+ "id": 0
+ }
+ },
+ "tool_state": "{\"box\": \"{\\\"__current_case__\\\": 1, \\\"config\\\": \\\"false\\\"}\", \"__page__\": null, \"capture_log\": \"\\\"false\\\"\", \"input_file\": \"null\", \"output_format\": \"\\\"pdb\\\"\", \"__workflow_invocation_uuid__\": \"\\\"33a7c4388bba11eaaa3d001b21d75532\\\"\", \"chromInfo\": \"\\\"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\\\"\", \"__rerun_remap_job_id__\": null}",
+ "id": 2,
+ "tool_shed_repository": {
+ "owner": "chemteam",
+ "changeset_revision": "3b8a9d5ced85",
+ "name": "gmx_editconf",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "uuid": "8e7740cc-9889-46df-9534-5b904688efdf",
+ "errors": null,
+ "name": "GROMACS structure configuration",
+ "post_job_actions": {},
+ "label": null,
+ "inputs": [],
+ "position": {
+ "top": 10,
+ "left": 230
+ },
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/chemteam/gmx_editconf/gmx_editconf/2019.1.4",
+ "type": "tool"
+ },
+ "3": {
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/chemteam/md_converter/md_converter/1.9.3.2",
+ "tool_version": "1.9.3.2",
+ "outputs": [{
+ "type": "data",
+ "name": "output"
+ }],
+ "workflow_outputs": [],
+ "input_connections": {
+ "input_file": {
+ "output_name": "output",
+ "id": 1
+ }
+ },
+ "tool_state": "{\"__page__\": null, \"input_file\": \"null\", \"output_format\": \"\\\"dcd\\\"\", \"__workflow_invocation_uuid__\": \"\\\"33a7c4388bba11eaaa3d001b21d75532\\\"\", \"chromInfo\": \"\\\"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\\\"\", \"__rerun_remap_job_id__\": null}",
+ "id": 3,
+ "tool_shed_repository": {
+ "owner": "chemteam",
+ "changeset_revision": "79e8ab8f1e81",
+ "name": "md_converter",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "uuid": "c7e8f328-8ea6-451b-9c00-d305a351d4ac",
+ "errors": null,
+ "name": "MDTraj file converter",
+ "post_job_actions": {},
+ "label": null,
+ "inputs": [],
+ "position": {
+ "top": 130,
+ "left": 230
+ },
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/chemteam/md_converter/md_converter/1.9.3.2",
+ "type": "tool"
+ },
+ "4": {
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/chemteam/bio3d_rmsd/bio3d_rmsd/2.3.4",
+ "tool_version": "2.3.4",
+ "outputs": [{
+ "type": "tabular",
+ "name": "output"
+ }, {
+ "type": "png",
+ "name": "rmsd_plot"
+ }, {
+ "type": "png",
+ "name": "rmsd_hist_plot"
+ }],
+ "workflow_outputs": [],
+ "input_connections": {
+ "pdbin": {
+ "output_name": "output",
+ "id": 2
+ },
+ "dcdin": {
+ "output_name": "output",
+ "id": 3
+ }
+ },
+ "tool_state": "{\"__page__\": null, \"rmsd\": \"{\\\"__current_case__\\\": 0, \\\"sele\\\": \\\"calpha\\\"}\", \"__rerun_remap_job_id__\": null, \"__workflow_invocation_uuid__\": \"\\\"33a7c4388bba11eaaa3d001b21d75532\\\"\", \"pdbin\": \"null\", \"chromInfo\": \"\\\"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\\\"\", \"dcdin\": \"null\"}",
+ "id": 4,
+ "tool_shed_repository": {
+ "owner": "chemteam",
+ "changeset_revision": "77e28e1da9f4",
+ "name": "bio3d_rmsd",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "uuid": "a76c9ffd-fca2-443a-b872-0fc8cf1ae22e",
+ "errors": null,
+ "name": "RMSD Analysis",
+ "post_job_actions": {},
+ "label": null,
+ "inputs": [],
+ "position": {
+ "top": 10,
+ "left": 450
+ },
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/chemteam/bio3d_rmsd/bio3d_rmsd/2.3.4",
+ "type": "tool"
+ },
+ "5": {
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/chemteam/bio3d_rmsf/bio3d_rmsf/2.3.4",
+ "tool_version": "2.3.4",
+ "outputs": [{
+ "type": "tabular",
+ "name": "output"
+ }, {
+ "type": "png",
+ "name": "rmsf_plot"
+ }],
+ "workflow_outputs": [],
+ "input_connections": {
+ "pdbin": {
+ "output_name": "output",
+ "id": 2
+ },
+ "dcdin": {
+ "output_name": "output",
+ "id": 3
+ }
+ },
+ "tool_state": "{\"__page__\": null, \"rmsf\": \"{\\\"__current_case__\\\": 0, \\\"sele\\\": \\\"calpha\\\"}\", \"__rerun_remap_job_id__\": null, \"__workflow_invocation_uuid__\": \"\\\"33a7c4388bba11eaaa3d001b21d75532\\\"\", \"pdbin\": \"null\", \"chromInfo\": \"\\\"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\\\"\", \"dcdin\": \"null\"}",
+ "id": 5,
+ "tool_shed_repository": {
+ "owner": "chemteam",
+ "changeset_revision": "6bcb804a54c3",
+ "name": "bio3d_rmsf",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "uuid": "8ca09773-3d34-45d7-b36e-7a06d6a825fd",
+ "errors": null,
+ "name": "RMSF Analysis",
+ "post_job_actions": {},
+ "label": null,
+ "inputs": [],
+ "position": {
+ "top": 130,
+ "left": 450
+ },
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/chemteam/bio3d_rmsf/bio3d_rmsf/2.3.4",
+ "type": "tool"
+ },
+ "6": {
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/chemteam/bio3d_pca/bio3d_pca/2.3.4",
+ "tool_version": "2.3.4",
+ "outputs": [{
+ "type": "tabular",
+ "name": "output"
+ }, {
+ "type": "png",
+ "name": "pca_plot"
+ }, {
+ "type": "png",
+ "name": "pca_cluster"
+ }, {
+ "type": "png",
+ "name": "pc1_rmsf"
+ }],
+ "workflow_outputs": [],
+ "input_connections": {
+ "pdbin": {
+ "output_name": "output",
+ "id": 2
+ },
+ "dcdin": {
+ "output_name": "output",
+ "id": 3
+ }
+ },
+ "tool_state": "{\"__page__\": null, \"__rerun_remap_job_id__\": null, \"__workflow_invocation_uuid__\": \"\\\"33a7c4388bba11eaaa3d001b21d75532\\\"\", \"dcdin\": \"null\", \"pdbin\": \"null\", \"pca\": \"{\\\"__current_case__\\\": 0, \\\"sele\\\": \\\"calpha\\\"}\", \"chromInfo\": \"\\\"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\\\"\", \"method\": \"\\\"false\\\"\"}",
+ "id": 6,
+ "tool_shed_repository": {
+ "owner": "chemteam",
+ "changeset_revision": "24867ab16f36",
+ "name": "bio3d_pca",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "uuid": "fdcabd33-6303-459e-a14c-0732e97bda39",
+ "errors": null,
+ "name": "PCA",
+ "post_job_actions": {},
+ "label": null,
+ "inputs": [],
+ "position": {
+ "top": 250,
+ "left": 450
+ },
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/chemteam/bio3d_pca/bio3d_pca/2.3.4",
+ "type": "tool"
+ },
+ "7": {
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/chemteam/mdanalysis_cosine_analysis/mdanalysis_cosine_analysis/0.20",
+ "tool_version": "0.20",
+ "outputs": [{
+ "type": "tabular",
+ "name": "output"
+ }, {
+ "type": "txt",
+ "name": "cosout"
+ }],
+ "workflow_outputs": [],
+ "input_connections": {
+ "trajin": {
+ "output_name": "output",
+ "id": 3
+ },
+ "strin": {
+ "output_name": "output",
+ "id": 2
+ }
+ },
+ "tool_state": "{\"index\": \"\\\"0\\\"\", \"__page__\": null, \"strin\": \"null\", \"__rerun_remap_job_id__\": null, \"__workflow_invocation_uuid__\": \"\\\"33a7c4388bba11eaaa3d001b21d75532\\\"\", \"trajin\": \"null\", \"components\": \"\\\"3\\\"\", \"chromInfo\": \"\\\"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\\\"\"}",
+ "id": 7,
+ "tool_shed_repository": {
+ "owner": "chemteam",
+ "changeset_revision": "e39bc1f90d8f",
+ "name": "mdanalysis_cosine_analysis",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "uuid": "32333821-6326-449b-b8fe-ad9685874a76",
+ "errors": null,
+ "name": "Cosine Content",
+ "post_job_actions": {},
+ "label": null,
+ "inputs": [],
+ "position": {
+ "top": 370,
+ "left": 450
+ },
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/chemteam/mdanalysis_cosine_analysis/mdanalysis_cosine_analysis/0.20",
+ "type": "tool"
+ },
+ "8": {
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/chemteam/bio3d_pca_visualize/bio3d_pca_visualize/2.3.4",
+ "tool_version": "2.3.4",
+ "outputs": [{
+ "type": "pdb",
+ "name": "pdbout"
+ }],
+ "workflow_outputs": [],
+ "input_connections": {
+ "pdbin": {
+ "output_name": "output",
+ "id": 2
+ },
+ "dcdin": {
+ "output_name": "output",
+ "id": 3
+ }
+ },
+ "tool_state": "{\"__page__\": null, \"__rerun_remap_job_id__\": null, \"__workflow_invocation_uuid__\": \"\\\"33a7c4388bba11eaaa3d001b21d75532\\\"\", \"dcdin\": \"null\", \"pc_id\": \"\\\"1\\\"\", \"pdbin\": \"null\", \"pca\": \"{\\\"__current_case__\\\": 0, \\\"sele\\\": \\\"calpha\\\"}\", \"chromInfo\": \"\\\"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\\\"\", \"method\": \"\\\"false\\\"\"}",
+ "id": 8,
+ "tool_shed_repository": {
+ "owner": "chemteam",
+ "changeset_revision": "f61a718993fd",
+ "name": "bio3d_pca_visualize",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "uuid": "a08577af-1864-461c-8e18-41baaad461fd",
+ "errors": null,
+ "name": "PCA visualization",
+ "post_job_actions": {},
+ "label": null,
+ "inputs": [],
+ "position": {
+ "top": 490,
+ "left": 450
+ },
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/chemteam/bio3d_pca_visualize/bio3d_pca_visualize/2.3.4",
+ "type": "tool"
+ },
+ "9": {
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/chemteam/vmd_hbonds/vmd_hbonds/1.9.3",
+ "tool_version": "1.9.3",
+ "outputs": [{
+ "type": "txt",
+ "name": "detail"
+ }, {
+ "type": "txt",
+ "name": "hbond"
+ }, {
+ "type": "txt",
+ "name": "report"
+ }],
+ "workflow_outputs": [],
+ "input_connections": {
+ "trajin": {
+ "output_name": "output",
+ "id": 3
+ },
+ "strin": {
+ "output_name": "output",
+ "id": 2
+ }
+ },
+ "tool_state": "{\"distance\": \"\\\"3.0\\\"\", \"angle\": \"\\\"20.0\\\"\", \"__page__\": null, \"sele2\": \"\\\"resname G5E\\\"\", \"__rerun_remap_job_id__\": null, \"__workflow_invocation_uuid__\": \"\\\"33a7c4388bba11eaaa3d001b21d75532\\\"\", \"trajin\": \"null\", \"sele1\": \"\\\"protein\\\"\", \"strin\": \"null\", \"chromInfo\": \"\\\"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\\\"\", \"molid\": \"\\\"0\\\"\"}",
+ "id": 9,
+ "tool_shed_repository": {
+ "owner": "chemteam",
+ "changeset_revision": "8aa5e465b043",
+ "name": "vmd_hbonds",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "uuid": "5f044466-9a0d-4f9c-b2e6-3e9cdc4910c2",
+ "errors": null,
+ "name": "Hydrogen Bond Analysis using VMD",
+ "post_job_actions": {},
+ "label": null,
+ "inputs": [],
+ "position": {
+ "top": 610,
+ "left": 450
+ },
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/chemteam/vmd_hbonds/vmd_hbonds/1.9.3",
+ "type": "tool"
+ },
+ "10": {
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/chemteam/bio3d_rmsd/bio3d_rmsd/2.3.4",
+ "tool_version": "2.3.4",
+ "outputs": [{
+ "type": "tabular",
+ "name": "output"
+ }, {
+ "type": "png",
+ "name": "rmsd_plot"
+ }, {
+ "type": "png",
+ "name": "rmsd_hist_plot"
+ }],
+ "workflow_outputs": [],
+ "input_connections": {
+ "pdbin": {
+ "output_name": "output",
+ "id": 2
+ },
+ "dcdin": {
+ "output_name": "output",
+ "id": 3
+ }
+ },
+ "tool_state": "{\"__page__\": null, \"rmsd\": \"{\\\"__current_case__\\\": 12, \\\"resid\\\": \\\"G5E\\\", \\\"sele\\\": \\\"resid\\\"}\", \"__rerun_remap_job_id__\": null, \"__workflow_invocation_uuid__\": \"\\\"33a7c4388bba11eaaa3d001b21d75532\\\"\", \"pdbin\": \"null\", \"chromInfo\": \"\\\"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\\\"\", \"dcdin\": \"null\"}",
+ "id": 10,
+ "tool_shed_repository": {
+ "owner": "chemteam",
+ "changeset_revision": "77e28e1da9f4",
+ "name": "bio3d_rmsd",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "uuid": "e0fa1995-3a30-4aea-9368-e1e45446b6ba",
+ "errors": null,
+ "name": "RMSD Analysis",
+ "post_job_actions": {},
+ "label": null,
+ "inputs": [],
+ "position": {
+ "top": 730,
+ "left": 450
+ },
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/chemteam/bio3d_rmsd/bio3d_rmsd/2.3.4",
+ "type": "tool"
+ }
+ },
+ "annotation": "Protein-ligand analysis workflow (HTMD)",
+ "a_galaxy_workflow": "true"
+}
diff --git a/topics/computational-chemistry/tutorials/htmd-analysis/workflows/index.md b/topics/computational-chemistry/tutorials/htmd-analysis/workflows/index.md
new file mode 100644
index 00000000000000..e092e0ae66ddd4
--- /dev/null
+++ b/topics/computational-chemistry/tutorials/htmd-analysis/workflows/index.md
@@ -0,0 +1,3 @@
+---
+layout: workflow-list
+---
diff --git a/topics/computational-chemistry/tutorials/htmd-analysis/workflows/main_workflow.ga b/topics/computational-chemistry/tutorials/htmd-analysis/workflows/main_workflow.ga
new file mode 100644
index 00000000000000..3903f462a51edb
--- /dev/null
+++ b/topics/computational-chemistry/tutorials/htmd-analysis/workflows/main_workflow.ga
@@ -0,0 +1,1138 @@
+{
+ "uuid": "253ee9a8-2ae8-4a7d-a5da-90973972ab28",
+ "tags": [
+ "computational-chemistry"
+ ],
+ "format-version": "0.1",
+ "name": "Workflow Hsp90-lig_analysis TS",
+ "version": 0,
+ "steps": {
+ "0": {
+ "tool_id": null,
+ "tool_version": null,
+ "outputs": [],
+ "workflow_outputs": [],
+ "input_connections": {},
+ "tool_state": "{\"name\": \"Galaxy13-[Topology].itp\"}",
+ "id": 0,
+ "uuid": "49009750-5d63-4986-937c-cd00683b7bd7",
+ "errors": null,
+ "name": "Input dataset",
+ "label": null,
+ "inputs": [{
+ "name": "Galaxy13-[Topology].itp",
+ "description": ""
+ }],
+ "position": {
+ "top": 10,
+ "left": 10
+ },
+ "annotation": "",
+ "content_id": null,
+ "type": "data_input"
+ },
+ "1": {
+ "tool_id": null,
+ "tool_version": null,
+ "outputs": [],
+ "workflow_outputs": [],
+ "input_connections": {},
+ "tool_state": "{\"name\": \"Galaxy14-[Structure_file_(GRO_format,_optional)].gro\"}",
+ "id": 1,
+ "uuid": "cee452c1-75df-4b5c-af05-5f5e2fe7f582",
+ "errors": null,
+ "name": "Input dataset",
+ "label": null,
+ "inputs": [{
+ "name": "Galaxy14-[Structure_file_(GRO_format,_optional)].gro",
+ "description": ""
+ }],
+ "position": {
+ "top": 130,
+ "left": 10
+ },
+ "annotation": "",
+ "content_id": null,
+ "type": "data_input"
+ },
+ "2": {
+ "tool_id": null,
+ "tool_version": null,
+ "outputs": [],
+ "workflow_outputs": [],
+ "input_connections": {},
+ "tool_state": "{\"name\": \"Galaxy6-[GROMACS_initial_setup_on_data_2].gro\"}",
+ "id": 2,
+ "uuid": "b42068d1-358f-42ef-825f-3cfa7e83c6f7",
+ "errors": null,
+ "name": "Input dataset",
+ "label": null,
+ "inputs": [{
+ "name": "Galaxy6-[GROMACS_initial_setup_on_data_2].gro",
+ "description": ""
+ }],
+ "position": {
+ "top": 250,
+ "left": 10
+ },
+ "annotation": "",
+ "content_id": null,
+ "type": "data_input"
+ },
+ "3": {
+ "tool_id": null,
+ "tool_version": null,
+ "outputs": [],
+ "workflow_outputs": [],
+ "input_connections": {},
+ "tool_state": "{\"name\": \"Galaxy5-[GROMACS_initial_setup_on_data_2].top\"}",
+ "id": 3,
+ "uuid": "309a68f1-3679-44da-9ed2-1cb142cee24b",
+ "errors": null,
+ "name": "Input dataset",
+ "label": null,
+ "inputs": [{
+ "name": "Galaxy5-[GROMACS_initial_setup_on_data_2].top",
+ "description": ""
+ }],
+ "position": {
+ "top": 370,
+ "left": 10
+ },
+ "annotation": "",
+ "content_id": null,
+ "type": "data_input"
+ },
+ "4": {
+ "tool_id": null,
+ "tool_version": null,
+ "outputs": [],
+ "workflow_outputs": [],
+ "input_connections": {},
+ "tool_state": "{\"name\": \"Galaxy5-[GROMACS_initial_setup_on_data_2].top\"}",
+ "id": 4,
+ "uuid": "0daad8a0-1001-44b7-8c6a-5166c8cd24c9",
+ "errors": null,
+ "name": "Input dataset",
+ "label": null,
+ "inputs": [{
+ "name": "Galaxy5-[GROMACS_initial_setup_on_data_2].top",
+ "description": ""
+ }],
+ "position": {
+ "top": 490,
+ "left": 10
+ },
+ "annotation": "",
+ "content_id": null,
+ "type": "data_input"
+ },
+ "5": {
+ "tool_id": null,
+ "tool_version": null,
+ "outputs": [],
+ "workflow_outputs": [],
+ "input_connections": {},
+ "tool_state": "{\"name\": \"Galaxy37-[GROMACS_solvation_and_adding_ions_on_data_35_and_data_30].top\"}",
+ "id": 5,
+ "uuid": "03e3c449-a5b7-4d49-a6ad-f461489c263e",
+ "errors": null,
+ "name": "Input dataset",
+ "label": null,
+ "inputs": [{
+ "name": "Galaxy37-[GROMACS_solvation_and_adding_ions_on_data_35_and_data_30].top",
+ "description": ""
+ }],
+ "position": {
+ "top": 610,
+ "left": 10
+ },
+ "annotation": "",
+ "content_id": null,
+ "type": "data_input"
+ },
+ "6": {
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/bgruening/get_pdb/get_pdb/0.1.0",
+ "tool_version": "0.1.0",
+ "outputs": [{
+ "type": "pdb",
+ "name": "output"
+ }],
+ "workflow_outputs": [],
+ "input_connections": {},
+ "tool_state": "{\"pdb_id\": \"\\\"6hhr\\\"\", \"__page__\": null, \"__rerun_remap_job_id__\": null, \"chromInfo\": \"\\\"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\\\"\"}",
+ "id": 6,
+ "tool_shed_repository": {
+ "owner": "bgruening",
+ "changeset_revision": "538790c6c21b",
+ "name": "get_pdb",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "uuid": "575d6ca7-b859-4078-8d07-d9749b556cd6",
+ "errors": null,
+ "name": "Get PDB file",
+ "post_job_actions": {},
+ "label": null,
+ "inputs": [],
+ "position": {
+ "top": 730,
+ "left": 10
+ },
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/bgruening/get_pdb/get_pdb/0.1.0",
+ "type": "tool"
+ },
+ "7": {
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/chemteam/gmx_makendx/gmx_makendx/2019.1.4",
+ "tool_version": "2019.1.4",
+ "outputs": [{
+ "type": "ndx",
+ "name": "ndx"
+ }, {
+ "type": "txt",
+ "name": "report"
+ }],
+ "workflow_outputs": [],
+ "input_connections": {
+ "input_file": {
+ "output_name": "output",
+ "id": 1
+ }
+ },
+ "tool_state": "{\"__page__\": null, \"capture_log\": \"\\\"true\\\"\", \"input_file\": \"null\", \"__rerun_remap_job_id__\": null, \"sel\": \"\\\"0 & ! a H*\\\"\", \"chromInfo\": \"\\\"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\\\"\"}",
+ "id": 7,
+ "tool_shed_repository": {
+ "owner": "chemteam",
+ "changeset_revision": "01a435922dd5",
+ "name": "gmx_makendx",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "uuid": "1fae4a94-4dc4-4c3d-a625-6fe6949d3d06",
+ "errors": null,
+ "name": "Create GROMACS index files",
+ "post_job_actions": {},
+ "label": null,
+ "inputs": [],
+ "position": {
+ "top": 370,
+ "left": 230
+ },
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/chemteam/gmx_makendx/gmx_makendx/2019.1.4",
+ "type": "tool"
+ },
+ "8": {
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/chemteam/gmx_editconf/gmx_editconf/2019.1.4",
+ "tool_version": "2019.1.4",
+ "outputs": [{
+ "type": "gro",
+ "name": "output"
+ }, {
+ "type": "txt",
+ "name": "report"
+ }],
+ "workflow_outputs": [],
+ "input_connections": {
+ "input_file": {
+ "output_name": "output",
+ "id": 2
+ }
+ },
+ "tool_state": "{\"box\": \"{\\\"__current_case__\\\": 0, \\\"config\\\": \\\"true\\\", \\\"dim\\\": \\\"1.0\\\", \\\"type\\\": \\\"triclinic\\\"}\", \"__page__\": null, \"capture_log\": \"\\\"true\\\"\", \"input_file\": \"null\", \"output_format\": \"\\\"gro\\\"\", \"chromInfo\": \"\\\"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\\\"\", \"__rerun_remap_job_id__\": null}",
+ "id": 8,
+ "tool_shed_repository": {
+ "owner": "chemteam",
+ "changeset_revision": "e69ce1e7fb6a",
+ "name": "gmx_editconf",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "uuid": "5bab3739-9dd3-4ce5-a102-a63ab892a0ea",
+ "errors": null,
+ "name": "GROMACS structure configuration",
+ "post_job_actions": {},
+ "label": null,
+ "inputs": [],
+ "position": {
+ "top": 250,
+ "left": 230
+ },
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/chemteam/gmx_editconf/gmx_editconf/2019.1.4",
+ "type": "tool"
+ },
+ "9": {
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/bgruening/text_processing/tp_grep_tool/1.1.1",
+ "tool_version": "1.1.1",
+ "outputs": [{
+ "type": "input",
+ "name": "output"
+ }],
+ "workflow_outputs": [],
+ "input_connections": {
+ "infile": {
+ "output_name": "output",
+ "id": 6
+ }
+ },
+ "tool_state": "{\"lines_before\": \"\\\"0\\\"\", \"regex_type\": \"\\\"-P\\\"\", \"__page__\": null, \"color\": \"\\\"NOCOLOR\\\"\", \"invert\": \"\\\"-v\\\"\", \"case_sensitive\": \"\\\"-i\\\"\", \"__rerun_remap_job_id__\": null, \"url_paste\": \"\\\"HETATM\\\"\", \"lines_after\": \"\\\"0\\\"\", \"chromInfo\": \"\\\"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\\\"\", \"infile\": \"null\"}",
+ "id": 9,
+ "tool_shed_repository": {
+ "owner": "bgruening",
+ "changeset_revision": "0a8c6b61f0f4",
+ "name": "text_processing",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "uuid": "af970e71-9955-4a76-ba2f-0eb21ddaf0c1",
+ "errors": null,
+ "name": "Search in textfiles",
+ "post_job_actions": {},
+ "label": null,
+ "inputs": [],
+ "position": {
+ "top": 10,
+ "left": 230
+ },
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/bgruening/text_processing/tp_grep_tool/1.1.1",
+ "type": "tool"
+ },
+ "10": {
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/bgruening/text_processing/tp_grep_tool/1.1.1",
+ "tool_version": "1.1.1",
+ "outputs": [{
+ "type": "input",
+ "name": "output"
+ }],
+ "workflow_outputs": [],
+ "input_connections": {
+ "infile": {
+ "output_name": "output",
+ "id": 6
+ }
+ },
+ "tool_state": "{\"lines_before\": \"\\\"0\\\"\", \"regex_type\": \"\\\"-P\\\"\", \"__page__\": null, \"color\": \"\\\"NOCOLOR\\\"\", \"invert\": \"\\\"\\\"\", \"case_sensitive\": \"\\\"-i\\\"\", \"__rerun_remap_job_id__\": null, \"url_paste\": \"\\\"AG5E\\\"\", \"lines_after\": \"\\\"0\\\"\", \"chromInfo\": \"\\\"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\\\"\", \"infile\": \"null\"}",
+ "id": 10,
+ "tool_shed_repository": {
+ "owner": "bgruening",
+ "changeset_revision": "0a8c6b61f0f4",
+ "name": "text_processing",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "uuid": "eaeb4b1d-64ea-4bdc-9a55-ed02cc9a1fca",
+ "errors": null,
+ "name": "Search in textfiles",
+ "post_job_actions": {},
+ "label": null,
+ "inputs": [],
+ "position": {
+ "top": 130,
+ "left": 230
+ },
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/bgruening/text_processing/tp_grep_tool/1.1.1",
+ "type": "tool"
+ },
+ "11": {
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/chemteam/gmx_restraints/gmx_restraints/2019.1.4",
+ "tool_version": "2019.1.4",
+ "outputs": [{
+ "type": "itp",
+ "name": "output1"
+ }, {
+ "type": "txt",
+ "name": "report"
+ }],
+ "workflow_outputs": [],
+ "input_connections": {
+ "gro_input": {
+ "output_name": "output",
+ "id": 1
+ },
+ "ndx_input": {
+ "output_name": "ndx",
+ "id": 7
+ }
+ },
+ "tool_state": "{\"index\": \"\\\"0\\\"\", \"capture_log\": \"\\\"true\\\"\", \"__page__\": null, \"ndx_input\": \"null\", \"gro_input\": \"null\", \"fc\": \"\\\"1000 1000 1000\\\"\", \"chromInfo\": \"\\\"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\\\"\", \"__rerun_remap_job_id__\": null}",
+ "id": 11,
+ "tool_shed_repository": {
+ "owner": "chemteam",
+ "changeset_revision": "f79f4671eaf2",
+ "name": "gmx_restraints",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "uuid": "51be2f75-fe09-4307-ac46-eef51f0ce11d",
+ "errors": null,
+ "name": "Create GROMACS position restraints files",
+ "post_job_actions": {},
+ "label": null,
+ "inputs": [],
+ "position": {
+ "top": 370,
+ "left": 450
+ },
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/chemteam/gmx_restraints/gmx_restraints/2019.1.4",
+ "type": "tool"
+ },
+ "12": {
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/chemteam/gmx_solvate/gmx_solvate/2019.1.4",
+ "tool_version": "2019.1.4",
+ "outputs": [{
+ "type": "gro",
+ "name": "output1"
+ }, {
+ "type": "top",
+ "name": "output2"
+ }, {
+ "type": "txt",
+ "name": "report"
+ }],
+ "workflow_outputs": [],
+ "input_connections": {
+ "top_input": {
+ "output_name": "output",
+ "id": 4
+ },
+ "gro_input": {
+ "output_name": "output",
+ "id": 8
+ }
+ },
+ "tool_state": "{\"__page__\": null, \"capture_log\": \"\\\"true\\\"\", \"chromInfo\": \"\\\"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\\\"\", \"__rerun_remap_job_id__\": null, \"gro_input\": \"null\", \"water_model\": \"\\\"spc216\\\"\", \"top_input\": \"null\", \"neutralise\": \"\\\"true\\\"\"}",
+ "id": 12,
+ "tool_shed_repository": {
+ "owner": "chemteam",
+ "changeset_revision": "3c77d66e7fe5",
+ "name": "gmx_solvate",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "uuid": "cdb5b93f-247b-4414-8b59-9f638e131291",
+ "errors": null,
+ "name": "GROMACS solvation and adding ions",
+ "post_job_actions": {},
+ "label": null,
+ "inputs": [],
+ "position": {
+ "top": 250,
+ "left": 450
+ },
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/chemteam/gmx_solvate/gmx_solvate/2019.1.4",
+ "type": "tool"
+ },
+ "13": {
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/chemteam/gmx_setup/gmx_setup/2019.1.4",
+ "tool_version": "2019.1.4",
+ "outputs": [{
+ "type": "top",
+ "name": "output1"
+ }, {
+ "type": "gro",
+ "name": "output2"
+ }, {
+ "type": "itp",
+ "name": "output3"
+ }, {
+ "type": "txt",
+ "name": "report"
+ }],
+ "workflow_outputs": [],
+ "input_connections": {
+ "pdb_input": {
+ "output_name": "output",
+ "id": 9
+ }
+ },
+ "tool_state": "{\"__page__\": null, \"ff\": \"\\\"amber99sb\\\"\", \"capture_log\": \"\\\"true\\\"\", \"__rerun_remap_job_id__\": null, \"water\": \"\\\"tip3p\\\"\", \"pdb_input\": \"null\", \"ignore_h\": \"\\\"false\\\"\", \"chromInfo\": \"\\\"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\\\"\"}",
+ "id": 13,
+ "tool_shed_repository": {
+ "owner": "chemteam",
+ "changeset_revision": "ccdf0b30a422",
+ "name": "gmx_setup",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "uuid": "74d0f987-8759-4589-b8d2-f2f5c8a30bda",
+ "errors": null,
+ "name": "GROMACS initial setup",
+ "post_job_actions": {},
+ "label": null,
+ "inputs": [],
+ "position": {
+ "top": 10,
+ "left": 450
+ },
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/chemteam/gmx_setup/gmx_setup/2019.1.4",
+ "type": "tool"
+ },
+ "14": {
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/bgruening/openbabel_compound_convert/openbabel_compound_convert/2.4.2.1.0",
+ "tool_version": "2.4.2.1.0",
+ "outputs": [{
+ "type": "text",
+ "name": "outfile"
+ }],
+ "workflow_outputs": [],
+ "input_connections": {
+ "infile": {
+ "output_name": "output",
+ "id": 10
+ }
+ },
+ "tool_state": "{\"__page__\": null, \"dative_bonds\": \"\\\"false\\\"\", \"__rerun_remap_job_id__\": null, \"appendtotitle\": \"\\\"\\\"\", \"remove_h\": \"\\\"false\\\"\", \"oformat\": \"{\\\"__current_case__\\\": 41, \\\"gen2d\\\": \\\"false\\\", \\\"gen3d\\\": \\\"false\\\", \\\"mol2_ignore_res\\\": \\\"false\\\", \\\"oformat_opts_selector\\\": \\\"mol2\\\"}\", \"ph\": \"\\\"-1.0\\\"\", \"unique\": \"{\\\"__current_case__\\\": 0, \\\"unique_opts_selector\\\": \\\"\\\"}\", \"chromInfo\": \"\\\"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\\\"\", \"infile\": \"null\", \"split\": \"\\\"false\\\"\"}",
+ "id": 14,
+ "tool_shed_repository": {
+ "owner": "bgruening",
+ "changeset_revision": "a072cb207571",
+ "name": "openbabel_compound_convert",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "uuid": "a9399171-56de-4301-90d7-e766fe11ba99",
+ "errors": null,
+ "name": "Compound conversion",
+ "post_job_actions": {},
+ "label": null,
+ "inputs": [],
+ "position": {
+ "top": 130,
+ "left": 450
+ },
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/bgruening/openbabel_compound_convert/openbabel_compound_convert/2.4.2.1.0",
+ "type": "tool"
+ },
+ "15": {
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/chemteam/gmx_em/gmx_em/2019.1.4",
+ "tool_version": "2019.1.4",
+ "outputs": [{
+ "type": "gro",
+ "name": "output1"
+ }, {
+ "type": "txt",
+ "name": "report"
+ }],
+ "workflow_outputs": [],
+ "input_connections": {
+ "top_input": {
+ "output_name": "output2",
+ "id": 12
+ },
+ "gro_input": {
+ "output_name": "output1",
+ "id": 12
+ }
+ },
+ "tool_state": "{\"__page__\": null, \"capture_log\": \"\\\"true\\\"\", \"__rerun_remap_job_id__\": null, \"gro_input\": \"null\", \"top_input\": \"null\", \"mdp\": \"{\\\"__current_case__\\\": 1, \\\"coulombtype\\\": \\\"PME\\\", \\\"cutoffscheme\\\": \\\"Verlet\\\", \\\"emstep\\\": \\\"0.01\\\", \\\"emtol\\\": \\\"1000.0\\\", \\\"integrator\\\": \\\"steep\\\", \\\"md_steps\\\": \\\"50000\\\", \\\"mdpfile\\\": \\\"default\\\", \\\"rcoulomb\\\": \\\"1.0\\\", \\\"rlist\\\": \\\"1.0\\\", \\\"rvdw\\\": \\\"1.0\\\"}\", \"chromInfo\": \"\\\"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\\\"\"}",
+ "id": 15,
+ "tool_shed_repository": {
+ "owner": "chemteam",
+ "changeset_revision": "476cdf677b03",
+ "name": "gmx_em",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "uuid": "a4367418-58d9-4b59-9f1f-4c5dd35b1a25",
+ "errors": null,
+ "name": "GROMACS energy minimization",
+ "post_job_actions": {},
+ "label": null,
+ "inputs": [],
+ "position": {
+ "top": 10,
+ "left": 670
+ },
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/chemteam/gmx_em/gmx_em/2019.1.4",
+ "type": "tool"
+ },
+ "16": {
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/chemteam/gmx_makendx/gmx_makendx/2019.1.4",
+ "tool_version": "2019.1.4",
+ "outputs": [{
+ "type": "ndx",
+ "name": "ndx"
+ }, {
+ "type": "txt",
+ "name": "report"
+ }],
+ "workflow_outputs": [],
+ "input_connections": {
+ "input_file": {
+ "output_name": "output1",
+ "id": 15
+ }
+ },
+ "tool_state": "{\"__page__\": null, \"capture_log\": \"\\\"true\\\"\", \"input_file\": \"null\", \"__rerun_remap_job_id__\": null, \"sel\": \"\\\"1 | 13\\\"\", \"chromInfo\": \"\\\"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\\\"\"}",
+ "id": 16,
+ "tool_shed_repository": {
+ "owner": "chemteam",
+ "changeset_revision": "01a435922dd5",
+ "name": "gmx_makendx",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "uuid": "33c38586-7b9b-4f69-bb48-86b98e6e61b2",
+ "errors": null,
+ "name": "Create GROMACS index files",
+ "post_job_actions": {},
+ "label": null,
+ "inputs": [],
+ "position": {
+ "top": 10,
+ "left": 890
+ },
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/chemteam/gmx_makendx/gmx_makendx/2019.1.4",
+ "type": "tool"
+ },
+ "17": {
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/chemteam/gmx_sim/gmx_sim/2019.1.4.1",
+ "tool_version": "2019.1.4.1",
+ "outputs": [{
+ "type": "gro",
+ "name": "output1"
+ }, {
+ "type": "xtc",
+ "name": "output4"
+ }, {
+ "type": "cpt",
+ "name": "output5"
+ }, {
+ "type": "txt",
+ "name": "report"
+ }],
+ "workflow_outputs": [],
+ "input_connections": {
+ "inps|itp_in": {
+ "output_name": "output3",
+ "id": 13
+ },
+ "top_input": {
+ "output_name": "output",
+ "id": 5
+ },
+ "gro_input": {
+ "output_name": "output1",
+ "id": 15
+ }
+ },
+ "tool_state": "{\"__page__\": null, \"capture_log\": \"\\\"true\\\"\", \"__rerun_remap_job_id__\": null, \"gro_input\": \"null\", \"sets\": \"{\\\"ensemble\\\": \\\"nvt\\\", \\\"mdp\\\": {\\\"__current_case__\\\": 1, \\\"constraints\\\": \\\"all-bonds\\\", \\\"coulombtype\\\": \\\"PME\\\", \\\"cutoffscheme\\\": \\\"Verlet\\\", \\\"integrator\\\": \\\"md\\\", \\\"md_steps\\\": \\\"50000\\\", \\\"mdpfile\\\": \\\"default\\\", \\\"rcoulomb\\\": \\\"1.0\\\", \\\"rlist\\\": \\\"1.0\\\", \\\"rvdw\\\": \\\"1.0\\\", \\\"step_length\\\": \\\"0.002\\\", \\\"temperature\\\": \\\"300\\\", \\\"write_freq\\\": \\\"1000\\\"}}\", \"top_input\": \"null\", \"outps\": \"{\\\"cpt_out\\\": \\\"true\\\", \\\"edr_out\\\": \\\"false\\\", \\\"str\\\": \\\"gro\\\", \\\"tpr_out\\\": \\\"false\\\", \\\"traj\\\": \\\"xtc\\\", \\\"xvg_out\\\": \\\"false\\\"}\", \"chromInfo\": \"\\\"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\\\"\", \"inps\": \"{\\\"cpt_in\\\": null, \\\"itp_in\\\": null, \\\"ndx_in\\\": null}\"}",
+ "id": 17,
+ "tool_shed_repository": {
+ "owner": "chemteam",
+ "changeset_revision": "b1061cc2653a",
+ "name": "gmx_sim",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "uuid": "4f54eda0-96ba-4f83-863f-14d50260655d",
+ "errors": null,
+ "name": "GROMACS simulation",
+ "post_job_actions": {},
+ "label": null,
+ "inputs": [],
+ "position": {
+ "top": 130,
+ "left": 890
+ },
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/chemteam/gmx_sim/gmx_sim/2019.1.4.1",
+ "type": "tool"
+ },
+ "18": {
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/chemteam/gmx_sim/gmx_sim/2019.1.4.1",
+ "tool_version": "2019.1.4.1",
+ "outputs": [{
+ "type": "gro",
+ "name": "output1"
+ }, {
+ "type": "xtc",
+ "name": "output4"
+ }, {
+ "type": "cpt",
+ "name": "output5"
+ }, {
+ "type": "txt",
+ "name": "report"
+ }],
+ "workflow_outputs": [],
+ "input_connections": {
+ "inps|itp_in": {
+ "output_name": "output3",
+ "id": 13
+ },
+ "top_input": {
+ "output_name": "output",
+ "id": 5
+ },
+ "gro_input": {
+ "output_name": "output1",
+ "id": 17
+ },
+ "inps|cpt_in": {
+ "output_name": "output5",
+ "id": 17
+ }
+ },
+ "tool_state": "{\"__page__\": null, \"capture_log\": \"\\\"true\\\"\", \"__rerun_remap_job_id__\": null, \"gro_input\": \"null\", \"sets\": \"{\\\"ensemble\\\": \\\"npt\\\", \\\"mdp\\\": {\\\"__current_case__\\\": 1, \\\"constraints\\\": \\\"all-bonds\\\", \\\"coulombtype\\\": \\\"PME\\\", \\\"cutoffscheme\\\": \\\"Verlet\\\", \\\"integrator\\\": \\\"md\\\", \\\"md_steps\\\": \\\"50000\\\", \\\"mdpfile\\\": \\\"default\\\", \\\"rcoulomb\\\": \\\"1.0\\\", \\\"rlist\\\": \\\"1.0\\\", \\\"rvdw\\\": \\\"1.0\\\", \\\"step_length\\\": \\\"0.002\\\", \\\"temperature\\\": \\\"300\\\", \\\"write_freq\\\": \\\"1000\\\"}}\", \"top_input\": \"null\", \"outps\": \"{\\\"cpt_out\\\": \\\"true\\\", \\\"edr_out\\\": \\\"false\\\", \\\"str\\\": \\\"gro\\\", \\\"tpr_out\\\": \\\"false\\\", \\\"traj\\\": \\\"xtc\\\", \\\"xvg_out\\\": \\\"false\\\"}\", \"chromInfo\": \"\\\"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\\\"\", \"inps\": \"{\\\"cpt_in\\\": null, \\\"itp_in\\\": null, \\\"ndx_in\\\": null}\"}",
+ "id": 18,
+ "tool_shed_repository": {
+ "owner": "chemteam",
+ "changeset_revision": "b1061cc2653a",
+ "name": "gmx_sim",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "uuid": "99b2cc3b-1bca-420d-88c0-63ad5a0c1cac",
+ "errors": null,
+ "name": "GROMACS simulation",
+ "post_job_actions": {},
+ "label": null,
+ "inputs": [],
+ "position": {
+ "top": 10,
+ "left": 1110
+ },
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/chemteam/gmx_sim/gmx_sim/2019.1.4.1",
+ "type": "tool"
+ },
+ "19": {
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/chemteam/gmx_sim/gmx_sim/2019.1.4.1",
+ "tool_version": "2019.1.4.1",
+ "outputs": [{
+ "type": "gro",
+ "name": "output1"
+ }, {
+ "type": "xtc",
+ "name": "output4"
+ }, {
+ "type": "cpt",
+ "name": "output5"
+ }, {
+ "type": "txt",
+ "name": "report"
+ }],
+ "workflow_outputs": [],
+ "input_connections": {
+ "top_input": {
+ "output_name": "output2",
+ "id": 12
+ },
+ "gro_input": {
+ "output_name": "output1",
+ "id": 18
+ },
+ "inps|cpt_in": {
+ "output_name": "output5",
+ "id": 18
+ }
+ },
+ "tool_state": "{\"__page__\": null, \"capture_log\": \"\\\"true\\\"\", \"__rerun_remap_job_id__\": null, \"gro_input\": \"null\", \"sets\": \"{\\\"ensemble\\\": \\\"npt\\\", \\\"mdp\\\": {\\\"__current_case__\\\": 1, \\\"constraints\\\": \\\"none\\\", \\\"coulombtype\\\": \\\"PME\\\", \\\"cutoffscheme\\\": \\\"Verlet\\\", \\\"integrator\\\": \\\"md\\\", \\\"md_steps\\\": \\\"1000000\\\", \\\"mdpfile\\\": \\\"default\\\", \\\"rcoulomb\\\": \\\"1.0\\\", \\\"rlist\\\": \\\"1.0\\\", \\\"rvdw\\\": \\\"1.0\\\", \\\"step_length\\\": \\\"0.001\\\", \\\"temperature\\\": \\\"300\\\", \\\"write_freq\\\": \\\"1000\\\"}}\", \"top_input\": \"null\", \"outps\": \"{\\\"cpt_out\\\": \\\"true\\\", \\\"edr_out\\\": \\\"false\\\", \\\"str\\\": \\\"gro\\\", \\\"tpr_out\\\": \\\"false\\\", \\\"traj\\\": \\\"xtc\\\", \\\"xvg_out\\\": \\\"false\\\"}\", \"chromInfo\": \"\\\"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\\\"\", \"inps\": \"{\\\"cpt_in\\\": null, \\\"itp_in\\\": null, \\\"ndx_in\\\": null}\"}",
+ "id": 19,
+ "tool_shed_repository": {
+ "owner": "chemteam",
+ "changeset_revision": "b1061cc2653a",
+ "name": "gmx_sim",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "uuid": "201ee70f-e5ea-4f83-813e-fd645146328d",
+ "errors": null,
+ "name": "GROMACS simulation",
+ "post_job_actions": {},
+ "label": null,
+ "inputs": [],
+ "position": {
+ "top": 10,
+ "left": 1330
+ },
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/chemteam/gmx_sim/gmx_sim/2019.1.4.1",
+ "type": "tool"
+ },
+ "20": {
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/chemteam/md_converter/md_converter/1.9.3.2",
+ "tool_version": "1.9.3.2",
+ "outputs": [{
+ "type": "data",
+ "name": "output"
+ }],
+ "workflow_outputs": [],
+ "input_connections": {
+ "input_file": {
+ "output_name": "output4",
+ "id": 19
+ }
+ },
+ "tool_state": "{\"__page__\": null, \"output_format\": \"\\\"dcd\\\"\", \"chromInfo\": \"\\\"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\\\"\", \"__rerun_remap_job_id__\": null, \"input_file\": \"null\"}",
+ "id": 20,
+ "tool_shed_repository": {
+ "owner": "chemteam",
+ "changeset_revision": "79e8ab8f1e81",
+ "name": "md_converter",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "uuid": "c25828ec-69ac-4c13-8138-f7a365e1f770",
+ "errors": null,
+ "name": "MDTraj file converter",
+ "post_job_actions": {},
+ "label": null,
+ "inputs": [],
+ "position": {
+ "top": 10,
+ "left": 1550
+ },
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/chemteam/md_converter/md_converter/1.9.3.2",
+ "type": "tool"
+ },
+ "21": {
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/chemteam/gmx_editconf/gmx_editconf/2019.1.4",
+ "tool_version": "2019.1.4",
+ "outputs": [{
+ "type": "gro",
+ "name": "output"
+ }],
+ "workflow_outputs": [],
+ "input_connections": {
+ "input_file": {
+ "output_name": "output1",
+ "id": 19
+ }
+ },
+ "tool_state": "{\"box\": \"{\\\"__current_case__\\\": 1, \\\"config\\\": \\\"false\\\"}\", \"__page__\": null, \"capture_log\": \"\\\"false\\\"\", \"input_file\": \"null\", \"output_format\": \"\\\"pdb\\\"\", \"chromInfo\": \"\\\"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\\\"\", \"__rerun_remap_job_id__\": null}",
+ "id": 21,
+ "tool_shed_repository": {
+ "owner": "chemteam",
+ "changeset_revision": "e69ce1e7fb6a",
+ "name": "gmx_editconf",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "uuid": "e3e86b20-504b-48b0-a530-843d541bda82",
+ "errors": null,
+ "name": "GROMACS structure configuration",
+ "post_job_actions": {},
+ "label": null,
+ "inputs": [],
+ "position": {
+ "top": 130,
+ "left": 1550
+ },
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/chemteam/gmx_editconf/gmx_editconf/2019.1.4",
+ "type": "tool"
+ },
+ "22": {
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/chemteam/bio3d_rmsd/bio3d_rmsd/2.3.4",
+ "tool_version": "2.3.4",
+ "outputs": [{
+ "type": "tabular",
+ "name": "output"
+ }, {
+ "type": "png",
+ "name": "rmsd_plot"
+ }, {
+ "type": "png",
+ "name": "rmsd_hist_plot"
+ }],
+ "workflow_outputs": [],
+ "input_connections": {
+ "pdbin": {
+ "output_name": "output",
+ "id": 21
+ },
+ "dcdin": {
+ "output_name": "output",
+ "id": 20
+ }
+ },
+ "tool_state": "{\"__page__\": null, \"rmsd\": \"{\\\"__current_case__\\\": 0, \\\"sele\\\": \\\"calpha\\\"}\", \"__rerun_remap_job_id__\": null, \"pdbin\": \"null\", \"chromInfo\": \"\\\"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\\\"\", \"dcdin\": \"null\"}",
+ "id": 22,
+ "tool_shed_repository": {
+ "owner": "chemteam",
+ "changeset_revision": "77e28e1da9f4",
+ "name": "bio3d_rmsd",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "uuid": "767b019a-780e-4dd5-8ad3-2036a48da2be",
+ "errors": null,
+ "name": "RMSD Analysis",
+ "post_job_actions": {},
+ "label": null,
+ "inputs": [],
+ "position": {
+ "top": 10,
+ "left": 1770
+ },
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/chemteam/bio3d_rmsd/bio3d_rmsd/2.3.4",
+ "type": "tool"
+ },
+ "23": {
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/chemteam/bio3d_rmsf/bio3d_rmsf/2.3.4",
+ "tool_version": "2.3.4",
+ "outputs": [{
+ "type": "tabular",
+ "name": "output"
+ }, {
+ "type": "png",
+ "name": "rmsf_plot"
+ }],
+ "workflow_outputs": [],
+ "input_connections": {
+ "pdbin": {
+ "output_name": "output",
+ "id": 21
+ },
+ "dcdin": {
+ "output_name": "output",
+ "id": 20
+ }
+ },
+ "tool_state": "{\"__page__\": null, \"rmsf\": \"{\\\"__current_case__\\\": 0, \\\"sele\\\": \\\"calpha\\\"}\", \"__rerun_remap_job_id__\": null, \"pdbin\": \"null\", \"chromInfo\": \"\\\"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\\\"\", \"dcdin\": \"null\"}",
+ "id": 23,
+ "tool_shed_repository": {
+ "owner": "chemteam",
+ "changeset_revision": "6bcb804a54c3",
+ "name": "bio3d_rmsf",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "uuid": "360aa76d-cf2a-4032-a996-fa466744fd21",
+ "errors": null,
+ "name": "RMSF Analysis",
+ "post_job_actions": {},
+ "label": null,
+ "inputs": [],
+ "position": {
+ "top": 130,
+ "left": 1770
+ },
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/chemteam/bio3d_rmsf/bio3d_rmsf/2.3.4",
+ "type": "tool"
+ },
+ "24": {
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/chemteam/vmd_hbonds/vmd_hbonds/1.9.3",
+ "tool_version": "1.9.3",
+ "outputs": [{
+ "type": "txt",
+ "name": "detail"
+ }, {
+ "type": "txt",
+ "name": "hbond"
+ }, {
+ "type": "txt",
+ "name": "report"
+ }],
+ "workflow_outputs": [],
+ "input_connections": {
+ "trajin": {
+ "output_name": "output",
+ "id": 20
+ },
+ "strin": {
+ "output_name": "output",
+ "id": 21
+ }
+ },
+ "tool_state": "{\"distance\": \"\\\"3.0\\\"\", \"angle\": \"\\\"20.0\\\"\", \"__page__\": null, \"sele2\": \"\\\"resname UNL\\\"\", \"__rerun_remap_job_id__\": null, \"trajin\": \"null\", \"sele1\": \"\\\"protein \\\"\", \"strin\": \"null\", \"chromInfo\": \"\\\"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\\\"\", \"molid\": \"\\\"0\\\"\"}",
+ "id": 24,
+ "tool_shed_repository": {
+ "owner": "chemteam",
+ "changeset_revision": "8aa5e465b043",
+ "name": "vmd_hbonds",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "uuid": "ef2d75f6-6a59-4db2-8599-5e625598b35d",
+ "errors": null,
+ "name": "Hydrogen Bond Analysis using VMD",
+ "post_job_actions": {},
+ "label": null,
+ "inputs": [],
+ "position": {
+ "top": 250,
+ "left": 1770
+ },
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/chemteam/vmd_hbonds/vmd_hbonds/1.9.3",
+ "type": "tool"
+ },
+ "25": {
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/chemteam/bio3d_rmsd/bio3d_rmsd/2.3.4",
+ "tool_version": "2.3.4",
+ "outputs": [{
+ "type": "tabular",
+ "name": "output"
+ }, {
+ "type": "png",
+ "name": "rmsd_plot"
+ }, {
+ "type": "png",
+ "name": "rmsd_hist_plot"
+ }],
+ "workflow_outputs": [],
+ "input_connections": {
+ "pdbin": {
+ "output_name": "output",
+ "id": 21
+ },
+ "dcdin": {
+ "output_name": "output",
+ "id": 20
+ }
+ },
+ "tool_state": "{\"__page__\": null, \"rmsd\": \"{\\\"__current_case__\\\": 12, \\\"resid\\\": \\\"UNL\\\", \\\"sele\\\": \\\"resid\\\"}\", \"__rerun_remap_job_id__\": null, \"pdbin\": \"null\", \"chromInfo\": \"\\\"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\\\"\", \"dcdin\": \"null\"}",
+ "id": 25,
+ "tool_shed_repository": {
+ "owner": "chemteam",
+ "changeset_revision": "77e28e1da9f4",
+ "name": "bio3d_rmsd",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "uuid": "c6babea0-be48-4421-a6af-b2d48cf6b95f",
+ "errors": null,
+ "name": "RMSD Analysis",
+ "post_job_actions": {},
+ "label": null,
+ "inputs": [],
+ "position": {
+ "top": 370,
+ "left": 1770
+ },
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/chemteam/bio3d_rmsd/bio3d_rmsd/2.3.4",
+ "type": "tool"
+ },
+ "26": {
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/chemteam/bio3d_pca/bio3d_pca/2.3.4",
+ "tool_version": "2.3.4",
+ "outputs": [{
+ "type": "tabular",
+ "name": "output"
+ }, {
+ "type": "png",
+ "name": "pca_plot"
+ }, {
+ "type": "png",
+ "name": "pca_cluster"
+ }, {
+ "type": "png",
+ "name": "pc1_rmsf"
+ }],
+ "workflow_outputs": [],
+ "input_connections": {
+ "pdbin": {
+ "output_name": "output",
+ "id": 21
+ },
+ "dcdin": {
+ "output_name": "output",
+ "id": 20
+ }
+ },
+ "tool_state": "{\"__page__\": null, \"__rerun_remap_job_id__\": null, \"method\": \"\\\"false\\\"\", \"pdbin\": \"null\", \"pca\": \"{\\\"__current_case__\\\": 0, \\\"sele\\\": \\\"calpha\\\"}\", \"chromInfo\": \"\\\"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\\\"\", \"dcdin\": \"null\"}",
+ "id": 26,
+ "tool_shed_repository": {
+ "owner": "chemteam",
+ "changeset_revision": "24867ab16f36",
+ "name": "bio3d_pca",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "uuid": "f68cdfca-76a3-4065-a9b3-f7427876cd3c",
+ "errors": null,
+ "name": "PCA",
+ "post_job_actions": {},
+ "label": null,
+ "inputs": [],
+ "position": {
+ "top": 490,
+ "left": 1770
+ },
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/chemteam/bio3d_pca/bio3d_pca/2.3.4",
+ "type": "tool"
+ },
+ "27": {
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/chemteam/bio3d_pca_visualize/bio3d_pca_visualize/2.3.4",
+ "tool_version": "2.3.4",
+ "outputs": [{
+ "type": "pdb",
+ "name": "pdbout"
+ }],
+ "workflow_outputs": [],
+ "input_connections": {
+ "pdbin": {
+ "output_name": "output",
+ "id": 21
+ },
+ "dcdin": {
+ "output_name": "output",
+ "id": 20
+ }
+ },
+ "tool_state": "{\"__page__\": null, \"__rerun_remap_job_id__\": null, \"dcdin\": \"null\", \"pc_id\": \"\\\"1\\\"\", \"pdbin\": \"null\", \"pca\": \"{\\\"__current_case__\\\": 0, \\\"sele\\\": \\\"calpha\\\"}\", \"chromInfo\": \"\\\"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\\\"\", \"method\": \"\\\"false\\\"\"}",
+ "id": 27,
+ "tool_shed_repository": {
+ "owner": "chemteam",
+ "changeset_revision": "f61a718993fd",
+ "name": "bio3d_pca_visualize",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "uuid": "d493e6c4-c2c2-4299-b3e7-3b58f9bdf24e",
+ "errors": null,
+ "name": "PCA visualization",
+ "post_job_actions": {},
+ "label": null,
+ "inputs": [],
+ "position": {
+ "top": 610,
+ "left": 1770
+ },
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/chemteam/bio3d_pca_visualize/bio3d_pca_visualize/2.3.4",
+ "type": "tool"
+ },
+ "28": {
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/chemteam/mdanalysis_cosine_analysis/mdanalysis_cosine_analysis/0.20",
+ "tool_version": "0.20",
+ "outputs": [{
+ "type": "tabular",
+ "name": "output"
+ }, {
+ "type": "txt",
+ "name": "cosout"
+ }],
+ "workflow_outputs": [],
+ "input_connections": {
+ "trajin": {
+ "output_name": "output",
+ "id": 20
+ },
+ "strin": {
+ "output_name": "output",
+ "id": 21
+ }
+ },
+ "tool_state": "{\"index\": \"\\\"0\\\"\", \"__page__\": null, \"strin\": \"null\", \"__rerun_remap_job_id__\": null, \"trajin\": \"null\", \"components\": \"\\\"3\\\"\", \"chromInfo\": \"\\\"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\\\"\"}",
+ "id": 28,
+ "tool_shed_repository": {
+ "owner": "chemteam",
+ "changeset_revision": "e39bc1f90d8f",
+ "name": "mdanalysis_cosine_analysis",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "uuid": "8e5d6f10-f397-4781-a418-0a9d34af8716",
+ "errors": null,
+ "name": "Cosine Content",
+ "post_job_actions": {},
+ "label": null,
+ "inputs": [],
+ "position": {
+ "top": 730,
+ "left": 1770
+ },
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/chemteam/mdanalysis_cosine_analysis/mdanalysis_cosine_analysis/0.20",
+ "type": "tool"
+ }
+ },
+ "annotation": "Protein-ligand simulation workflow (HTMD)",
+ "a_galaxy_workflow": "true"
+}
diff --git a/topics/contributing/metadata.yaml b/topics/contributing/metadata.yaml
index c5617447a61e9b..3f182b9ba18582 100644
--- a/topics/contributing/metadata.yaml
+++ b/topics/contributing/metadata.yaml
@@ -3,7 +3,7 @@ name: "contributing"
type: "instructors"
title: "Contributing to the Galaxy Training Material"
summary: "Galaxy is a great solution to train the bioinformatics concepts:
-numerous bioinformatics tools are available (almost 5,000 in the ToolShed), it
+numerous bioinformatics tools are available (more than 7,000 in the ToolShed), it
can be used by people without amy computer science skills, it trains to use
technology, outlining available resources and efforts that have made them
accessible to researchers, it is scalable.
diff --git a/topics/contributing/tutorials/create-new-tutorial-content/tutorial.md b/topics/contributing/tutorials/create-new-tutorial-content/tutorial.md
index fb275aacd5f5ce..93a4fbb01379c3 100644
--- a/topics/contributing/tutorials/create-new-tutorial-content/tutorial.md
+++ b/topics/contributing/tutorials/create-new-tutorial-content/tutorial.md
@@ -178,15 +178,16 @@ To help developing the tutorial, we recommend to create a workflow of the differ
> ### {% icon hands_on %} Hands-on: Create the structure of the tutorial from a workflow
>
> 1. Create a small workflow with one or two steps on a running Galaxy instance
-> 2. Get the workflow id
+> 2. Add the topic name as Tag and the tutorial title as Annotation/Notes to the workflow using the workflow editor.
+> 3. Get the workflow id
> 1. Go the "Share" page of the workflow
> 2. Copy the information after `id=` in the URL of the page
-> 3. Get your API key for this Galaxy instance
+> 4. Get your API key for this Galaxy instance
> 1. Click on **User** --> **Preferences**
> 2. Click on **Manage API key**
> 3. Click on **Create a new key** (if none is available)
> 4. Copy the API key
-> 4. Generate the skeleton of the tutorial locally
+> 5. Generate the skeleton of the tutorial locally
>
> ```
> $ planemo training_generate_from_wf \
@@ -198,7 +199,7 @@ To help developing the tutorial, we recommend to create a workflow of the differ
> --zenodo_link "URL to the Zenodo record (Optional)"
> ```
>
-> > ### {% icon comment %} Using a local worklfow
+> > ### {% icon comment %} Using a local workflow
> > It is also possible to download the workflow locally (with the `.ga` extension), and then run a slightly different command:
> >
> > ```
@@ -210,7 +211,7 @@ To help developing the tutorial, we recommend to create a workflow of the differ
> > ```
> {: .comment}
>
-> 5. Inspect the generated `tutorial.md`
+> 6. Inspect the generated `tutorial.md`
{: .hands_on}
The generated tutorial is structured with:
@@ -778,5 +779,6 @@ A bibliography will automatically be appended to the end of your tutorial (scrol
> If you have a DOI for a paper, you can easily obtain the bibtex citation using [doi2bib.org](https://www.doi2bib.org/).
{: .tip}
+
# Conclusion
{:.no_toc}
diff --git a/topics/contributing/tutorials/create-new-tutorial-technical/tutorial.md b/topics/contributing/tutorials/create-new-tutorial-technical/tutorial.md
index aab64b1b85dd8c..14cae4b16afc50 100644
--- a/topics/contributing/tutorials/create-new-tutorial-technical/tutorial.md
+++ b/topics/contributing/tutorials/create-new-tutorial-technical/tutorial.md
@@ -50,9 +50,10 @@ Once the tutorial is ready, we need to develop a workflow that represents the st
> ### {% icon hands_on %} Hands-on: Extract the workflow
>
-> 1. Download the workflow for the tutorial
-> 2. Save it in the `workflow` directory of the tutorial
-> 3. Check that your `workflow` directory has an `index.md` with the contents:
+> 1. Add the topic name as Tag and the tutorial title as Annotation/Notes to the workflow using the workflow editor.
+> 2. Download the workflow for the tutorial
+> 3. Save it in the `workflow` directory of the tutorial
+> 4. Check that your `workflow` directory has an `index.md` with the contents:
>
> ```yaml
> ---
@@ -60,8 +61,6 @@ Once the tutorial is ready, we need to develop a workflow that represents the st
> ---
> ```
{: .hands_on}
-> 3. Edit the workflow with a text editor to add the topic name as 'tags' and to add the tutorial title as 'annotation' to the workflow.
-
## Testing the workflow (recommended)
@@ -282,7 +281,7 @@ items:
> ```
>
> 3. Check that the `data-library.yaml` has been generated (or updated)
-> 4. Check tha the Zenodo link is in the metadata at the top of the `tutorial.md`
+> 4. Check that the Zenodo link is in the metadata at the top of the `tutorial.md`
{: .hands_on}
# Creating the `data-manager.yaml` (optional)
diff --git a/topics/contributing/tutorials/create-new-tutorial/tutorial.md b/topics/contributing/tutorials/create-new-tutorial/tutorial.md
index d769bb71df2fe7..ee7ea837ae717f 100644
--- a/topics/contributing/tutorials/create-new-tutorial/tutorial.md
+++ b/topics/contributing/tutorials/create-new-tutorial/tutorial.md
@@ -259,8 +259,9 @@ For the next times, you can make it quicker.
>
> 1. Determine the topic
> 2. Create your workflow on a running Galaxy instance
-> 3. Create a Zenodo record with the input data
-> 4. Generate the skeleton of your tutorial
+> 3. Add the topic name as Tag and the tutorial title as Annotation/Notes to the workflow using the workflow editor.
+> 4. Create a Zenodo record with the input data
+> 5. Generate the skeleton of your tutorial
> - option 1: from a workflow located on a Galaxy
> ```
> $ planemo training_init \
@@ -285,7 +286,7 @@ For the next times, you can make it quicker.
> You can use the example workflow file located in `topics/contributing/tutorials/create-new-tutorial/workflows/example-workflow.ga` if
> you do not have a workflow of your own. This is the workflow belonging to the *Galaxy 101* introduction tutorial.
>
-> 5. Fill the remaining metadata in the `tutorial.md`
-> 6. Fill the content of the `tutorial.md`
-> 7. Check it using Jekyll
+> 6. Fill the remaining metadata in the `tutorial.md`
+> 7. Fill the content of the `tutorial.md`
+> 8. Check it using Jekyll
{: .hands_on}
diff --git a/topics/dev/images/Makefile b/topics/dev/images/Makefile
new file mode 100644
index 00000000000000..7c1c37ab7318d3
--- /dev/null
+++ b/topics/dev/images/Makefile
@@ -0,0 +1,10 @@
+INPUTS := $(wildcard *.plantuml.txt)
+OUTPUTS := $(INPUTS:.txt=.svg)
+
+all: plantuml.jar plantuml_options.txt $(OUTPUTS)
+
+$(OUTPUTS): $(INPUTS)
+ java -jar plantuml.jar -c plantuml_options.txt -tsvg $(INPUTS)
+
+plantuml.jar:
+ wget http://jaist.dl.sourceforge.net/project/plantuml/plantuml.jar
diff --git a/topics/dev/images/core_branches.plantuml.svg b/topics/dev/images/core_branches.plantuml.svg
new file mode 100644
index 00000000000000..f65513ded9ea26
--- /dev/null
+++ b/topics/dev/images/core_branches.plantuml.svg
@@ -0,0 +1,91 @@
+
\ No newline at end of file
diff --git a/topics/dev/images/core_branches.plantuml.txt b/topics/dev/images/core_branches.plantuml.txt
new file mode 100644
index 00000000000000..ba54dd4eee94be
--- /dev/null
+++ b/topics/dev/images/core_branches.plantuml.txt
@@ -0,0 +1,19 @@
+@startmindmap
+!include plantuml_style.txt
+!include plantuml_options.txt
+
+* Branches
+**:dev
+ Most active development happens here!
+ New features must be added here;
+**:master/
+ References latest stable branch.
+ Generally never develop or deploy
+ against this branch.;
+**:release_20.05/
+ Release branches - named by month.
+ Generally bug fixes should target
+ oldest relevant branch;
+** release_20.01/
+** release_19.09/
+@endmindmap
diff --git a/topics/dev/images/core_files_ci.plantuml.svg b/topics/dev/images/core_files_ci.plantuml.svg
new file mode 100644
index 00000000000000..530d0fbdf51f78
--- /dev/null
+++ b/topics/dev/images/core_files_ci.plantuml.svg
@@ -0,0 +1,103 @@
+
\ No newline at end of file
diff --git a/topics/dev/images/core_files_ci.plantuml.txt b/topics/dev/images/core_files_ci.plantuml.txt
new file mode 100644
index 00000000000000..59a5fb7bdc2de0
--- /dev/null
+++ b/topics/dev/images/core_files_ci.plantuml.txt
@@ -0,0 +1,25 @@
+@startmindmap
+!include plantuml_style.txt
+!include plantuml_options.txt
+
+* /
+**:.ci
+ scripts for linting Python, etc..;
+***:jenkins/
+ sub-directory per Jenkins job type, wrappers
+ around run_tests for Dockerized testing
+ can be used outside Jenkins;
+**** api/
+**** framework/
+**** selenium/
+** .circleci/
+***:config.yml
+ run unit tests, lint, tool validation, etc.. on CircleCI;
+** .github/workflows
+***:integration.yaml
+ run integration test suite with GitHub Actions;
+*** integration_selenium.yaml
+*** toolshed.yaml
+***:converter_tests.yaml
+ run tool tests for datatype converts;
+@endmindmap
diff --git a/topics/dev/images/core_files_code.plantuml.svg b/topics/dev/images/core_files_code.plantuml.svg
new file mode 100644
index 00000000000000..caed7ec43f7157
--- /dev/null
+++ b/topics/dev/images/core_files_code.plantuml.svg
@@ -0,0 +1,95 @@
+
\ No newline at end of file
diff --git a/topics/dev/images/core_files_code.plantuml.txt b/topics/dev/images/core_files_code.plantuml.txt
new file mode 100644
index 00000000000000..4ad063a0125d33
--- /dev/null
+++ b/topics/dev/images/core_files_code.plantuml.txt
@@ -0,0 +1,21 @@
+@startmindmap
+!include plantuml_style.txt
+!include plantuml_options.txt
+
+* /
+**:lib/
+ root of monolithic Python backend;
+***:galaxy/
+ most of the code that makes up the backend;
+***:galaxy_ext/
+ a few more files used standalone by jobs, etc..;
+***:tool_shed/
+ source code for the Galaxy ToolShed;
+**:packages/
+ Python backend decomposed into pieces (same files);
+**:client/
+ Galaxy frontend project;
+*** galaxy/
+****:scripts/
+ Galaxy frontend ES6 source files;
+@endmindmap
diff --git a/topics/dev/images/core_files_project_docs.plantuml.svg b/topics/dev/images/core_files_project_docs.plantuml.svg
new file mode 100644
index 00000000000000..d134743fa2b94b
--- /dev/null
+++ b/topics/dev/images/core_files_project_docs.plantuml.svg
@@ -0,0 +1,103 @@
+
\ No newline at end of file
diff --git a/topics/dev/images/core_files_project_docs.plantuml.txt b/topics/dev/images/core_files_project_docs.plantuml.txt
new file mode 100644
index 00000000000000..51334d1e6e0b33
--- /dev/null
+++ b/topics/dev/images/core_files_project_docs.plantuml.txt
@@ -0,0 +1,25 @@
+@startmindmap
+!include plantuml_style.txt
+!include plantuml_options.txt
+
+* /
+**:README.rst
+ how to install and start Galaxy;
+**:CONTRIBUTING.md
+ how to contribute to Galaxy;
+**:CODE_OF_CONDUCT.md
+ "expectations for participants within the Galaxy community";
+**:LICENSE.txt
+ AFL license;
+**:CITATION
+ description of how to cite Galaxy;
+**:CONTRIBUTORS.md
+ list of people who have contributed;
+** doc/
+*** source/
+**** project/
+*****:issues.rst
+ describes Github tags, etc;
+*****:organization.rst
+ project governance document;
+@endmindmap
diff --git a/topics/dev/images/core_files_scripts.plantuml.svg b/topics/dev/images/core_files_scripts.plantuml.svg
new file mode 100644
index 00000000000000..db78fa7d2d9bf9
--- /dev/null
+++ b/topics/dev/images/core_files_scripts.plantuml.svg
@@ -0,0 +1,101 @@
+
\ No newline at end of file
diff --git a/topics/dev/images/core_files_scripts.plantuml.txt b/topics/dev/images/core_files_scripts.plantuml.txt
new file mode 100644
index 00000000000000..0c54b46b764f3c
--- /dev/null
+++ b/topics/dev/images/core_files_scripts.plantuml.txt
@@ -0,0 +1,24 @@
+@startmindmap
+!include plantuml_style.txt
+!include plantuml_options.txt
+
+* /
+**:run.sh
+ shell script for starting Galaxy standalone;
+**:Makefile
+ common developement and deployment tasks;
+**:run_tests.sh
+ script meant to be friendly wrapper for running tests;
+**:scripts/
+ directory full of more specific scripts (mostly admin stuff);
+***:galaxy-main
+ script to run Galaxy without a web server;
+***:db_shell.py
+ interactive environment for exploring Galaxy database and models;
+*** cleanup_datasets/
+****:pgcleanup.py
+ optimized postgres commands for managing Galaxy datasets, etc.;
+**:manage_db.sh
+ script to upgrade/downgrade Galaxy database;
+
+@endmindmap
diff --git a/topics/dev/images/core_files_test.plantuml.svg b/topics/dev/images/core_files_test.plantuml.svg
new file mode 100644
index 00000000000000..59eaea650a6de5
--- /dev/null
+++ b/topics/dev/images/core_files_test.plantuml.svg
@@ -0,0 +1,115 @@
+
\ No newline at end of file
diff --git a/topics/dev/images/core_files_test.plantuml.txt b/topics/dev/images/core_files_test.plantuml.txt
new file mode 100644
index 00000000000000..8227717e40a781
--- /dev/null
+++ b/topics/dev/images/core_files_test.plantuml.txt
@@ -0,0 +1,31 @@
+@startmindmap
+!include plantuml_style.txt
+!include plantuml_options.txt
+
+* /
+**:client/galaxy/scripts/
+ JS sources include unit tests;
+***:components/
+ VueJS components define tests next to source file;
+***:mocha/
+ standalone JS mocha unit tests;
+***:qunit/
+ older JS qunit unit tests;
+**:lib/galaxy_test/
+ properly packaged Python tests;
+***:base/
+ base Python infrastructure for testing;
+***:api/
+ tests against Galaxy API;
+***:selenium/
+ end-to-end tests with automated browser;
+**:test/
+ misc Python tests;
+***:integration/
+ API tests against custom Galaxy configs;
+*** integration_selenium/
+***:unit/
+ Python unit tests;
+**:tox.ini
+ Tox entry point for linting Python, etc..;
+@endmindmap
diff --git a/topics/dev/images/hda_dataset.plantuml.svg b/topics/dev/images/hda_dataset.plantuml.svg
index b0241914189fba..c5ac0bcd61c044 100644
--- a/topics/dev/images/hda_dataset.plantuml.svg
+++ b/topics/dev/images/hda_dataset.plantuml.svg
@@ -1 +1,94 @@
-
\ No newline at end of file
+
\ No newline at end of file
diff --git a/topics/dev/images/hda_dataset.plantuml.txt b/topics/dev/images/hda_dataset.plantuml.txt
new file mode 100644
index 00000000000000..92d56b5b1b41c2
--- /dev/null
+++ b/topics/dev/images/hda_dataset.plantuml.txt
@@ -0,0 +1,23 @@
+@startuml
+
+!include plantuml_options.txt
+
+class HistoryDatasetAssociation {
+ hid: integer
+ history_id: integer
+ dataset_id: integer
+ state: string
+ name: string
+ info: string
+}
+
+class Dataset {
+ object_store_id: string
+ external_filename: string
+ _extra_files_path: string
+ file_size: integer
+ total_size: integer
+}
+
+HistoryDatasetAssociation "*" -> "1" Dataset
+@enduml
\ No newline at end of file
diff --git a/topics/dev/images/hda_hdca.plantuml.svg b/topics/dev/images/hda_hdca.plantuml.svg
index 1deb70e2b056dd..f47ff65d0f331d 100644
--- a/topics/dev/images/hda_hdca.plantuml.svg
+++ b/topics/dev/images/hda_hdca.plantuml.svg
@@ -1 +1,114 @@
-
\ No newline at end of file
+
\ No newline at end of file
diff --git a/topics/dev/images/hda_hdca.plantuml.txt b/topics/dev/images/hda_hdca.plantuml.txt
new file mode 100644
index 00000000000000..facb8bf401edb8
--- /dev/null
+++ b/topics/dev/images/hda_hdca.plantuml.txt
@@ -0,0 +1,29 @@
+@startuml
+
+!include plantuml_options.txt
+
+class History
+class HistoryDatasetAssociation {
+ history_content_type = 'dataset'
+ hid
+}
+class HistoryDatasetCollectionAssociation {
+ history_content_type = 'dataset_collection'
+ hid
+}
+class DatasetCollection {
+ collection_type
+}
+class DatasetCollectionElement {
+ element_index
+ element_identifier
+}
+
+History "1" -- "*" HistoryDatasetAssociation
+History "1" -- "*" HistoryDatasetCollectionAssociation
+HistoryDatasetCollectionAssociation "1" -- "1" DatasetCollection
+DatasetCollection "1" -- "0..1" DatasetCollectionElement
+DatasetCollectionElement "*" -- "1" DatasetCollection
+HistoryDatasetAssociation "1" -- "0..1" DatasetCollectionElement
+
+@enduml
\ No newline at end of file
diff --git a/topics/dev/images/objectstore.plantuml.svg b/topics/dev/images/objectstore.plantuml.svg
index 6a52eea244520b..13ae2723e56507 100644
--- a/topics/dev/images/objectstore.plantuml.svg
+++ b/topics/dev/images/objectstore.plantuml.svg
@@ -1 +1,114 @@
-
\ No newline at end of file
+
\ No newline at end of file
diff --git a/topics/dev/images/objectstore.plantuml.txt b/topics/dev/images/objectstore.plantuml.txt
new file mode 100644
index 00000000000000..48438424e1f72d
--- /dev/null
+++ b/topics/dev/images/objectstore.plantuml.txt
@@ -0,0 +1,28 @@
+@startuml
+
+!include plantuml_options.txt
+
+abstract class ObjectStore {
+ exists(obj)
+ file_ready(obj)
+ create(obj)
+ size(obj)
+ delete(obj)
+ get_data(obj)
+ get_filename(obj)
+ update_from_file(obj)
+ get_store_usage_percent()
+}
+
+class DiskObjectStore
+abstract class NestedObjectStore
+
+ObjectStore <|-- DiskObjectStore
+ObjectStore <|-- NestedObjectStore
+ObjectStore <|-- S3ObjectStore
+DiskObjectStore <|-- IRODSObjectStore
+NestedObjectStore <|-- DistributedObjectStore
+NestedObjectStore <|-- HierarchicalObjectStore
+
+
+@enduml
diff --git a/topics/dev/images/plantuml_options.txt b/topics/dev/images/plantuml_options.txt
new file mode 100644
index 00000000000000..de0cce3af1a798
--- /dev/null
+++ b/topics/dev/images/plantuml_options.txt
@@ -0,0 +1,34 @@
+skinparam handwritten true
+' skinparam roundcorner 20
+
+skinparam class {
+ ArrowFontColor DarkOrange
+ BackgroundColor #FFEFD5
+ ArrowColor Orange
+ BorderColor DarkOrange
+}
+
+skinparam object {
+ ArrowFontColor DarkOrange
+ BackgroundColor #FFEFD5
+ ArrowColor Orange
+ BorderColor DarkOrange
+}
+
+skinparam note {
+ BackgroundColor #FFEFD5
+ BorderColor #BF5700
+}
+
+skinparam sequence {
+ ArrowColor Orange
+ ArrowFontColor DarkOrange
+ ActorBorderColor DarkOrange
+ ActorBackgroundColor #FFEFD5
+
+ ParticipantBorderColor DarkOrange
+ ParticipantBackgroundColor #FFEFD5
+
+ LifeLineBorderColor DarkOrange
+ LifeLineBackgroundColor #FFEFD5
+}
diff --git a/topics/dev/images/plantuml_style.txt b/topics/dev/images/plantuml_style.txt
new file mode 100644
index 00000000000000..18911d622b75db
--- /dev/null
+++ b/topics/dev/images/plantuml_style.txt
@@ -0,0 +1,9 @@
+
diff --git a/topics/dev/images/server_client.plantuml.svg b/topics/dev/images/server_client.plantuml.svg
index 723ce49323b368..cc1aa8fcc52832 100644
--- a/topics/dev/images/server_client.plantuml.svg
+++ b/topics/dev/images/server_client.plantuml.svg
@@ -1 +1,85 @@
-
\ No newline at end of file
+
\ No newline at end of file
diff --git a/topics/dev/images/server_client.plantuml.txt b/topics/dev/images/server_client.plantuml.txt
new file mode 100644
index 00000000000000..eadc62249c8e1b
--- /dev/null
+++ b/topics/dev/images/server_client.plantuml.txt
@@ -0,0 +1,20 @@
+@startuml
+
+!include plantuml_options.txt
+
+note over Browser, Server: HTTP
+
+Browser -> Server: Page Request
+activate Server
+Server -->Browser: Static Content (HTML+JS+CSS)
+deactivate Server
+
+note left of Browser: MVC with \nbackbone.js
+Browser -> Server: API Request (JSON)
+activate Server
+note right of Server: Build JSON response\nin Galaxy "API" controllers
+Server --> Browser: API Response (JSON)
+deactivate Server
+
+note left of Browser: HTML rendered from\nclient-side templates\n(in Backbone views).
+@enduml
diff --git a/topics/dev/images/server_client_old.plantuml.svg b/topics/dev/images/server_client_old.plantuml.svg
index c9e8918af7c23c..2258699515fc98 100644
--- a/topics/dev/images/server_client_old.plantuml.svg
+++ b/topics/dev/images/server_client_old.plantuml.svg
@@ -1 +1,73 @@
-
\ No newline at end of file
+
\ No newline at end of file
diff --git a/topics/dev/images/server_client_old.plantuml.txt b/topics/dev/images/server_client_old.plantuml.txt
new file mode 100644
index 00000000000000..2cc5c1ee5d043a
--- /dev/null
+++ b/topics/dev/images/server_client_old.plantuml.txt
@@ -0,0 +1,14 @@
+@startuml
+
+!include plantuml_options.txt
+
+note over Browser, Server: HTTP
+
+Browser -> Server: Page Request
+activate Server
+note right of Server: Build HTML fragments\nusing Mako Python library\nin Galaxy "web" controllers
+Server --> Browser: Static Content (HTML+JS+CSS)
+deactivate Server
+note left of Browser: Render HTML fragraments \nwith JavaScript
+
+@enduml
diff --git a/topics/dev/images/server_client_vuejs.plantuml.svg b/topics/dev/images/server_client_vuejs.plantuml.svg
new file mode 100644
index 00000000000000..ee96bd155b670f
--- /dev/null
+++ b/topics/dev/images/server_client_vuejs.plantuml.svg
@@ -0,0 +1,85 @@
+
\ No newline at end of file
diff --git a/topics/dev/images/server_client_vuejs.plantuml.txt b/topics/dev/images/server_client_vuejs.plantuml.txt
new file mode 100644
index 00000000000000..5f032ac4d00324
--- /dev/null
+++ b/topics/dev/images/server_client_vuejs.plantuml.txt
@@ -0,0 +1,20 @@
+@startuml
+
+!include plantuml_options.txt
+
+note over Browser, Server: HTTP
+
+Browser -> Server: Page Request
+activate Server
+Server -->Browser: Static Content (HTML+JS+CSS)
+deactivate Server
+
+note left of Browser: Components built with VueJS.\nAxios used to build service layer.
+Browser -> Server: API Request (JSON)
+activate Server
+note right of Server: Build JSON response\nin Galaxy "API" controllers
+Server --> Browser: API Response (JSON)
+deactivate Server
+
+note left of Browser: JSON objects stored in Vuex store\nor consumed by components.\nHTML rendered from VueJS\ncomponent templates.\n
+@enduml
diff --git a/topics/dev/images/webapp.plantuml.svg b/topics/dev/images/webapp.plantuml.svg
index 129ec97d6b5caa..1d20adc9da06fc 100644
--- a/topics/dev/images/webapp.plantuml.svg
+++ b/topics/dev/images/webapp.plantuml.svg
@@ -1 +1,98 @@
-
\ No newline at end of file
+
\ No newline at end of file
diff --git a/topics/dev/images/webapp.plantuml.txt b/topics/dev/images/webapp.plantuml.txt
new file mode 100644
index 00000000000000..1822c93c41e700
--- /dev/null
+++ b/topics/dev/images/webapp.plantuml.txt
@@ -0,0 +1,25 @@
+@startuml
+
+!include plantuml_options.txt
+
+object webapp {
+ controllers : dict
+ api_controllers : dict
+ mapper : routes.Mapper
+ handle_request: (environ, start_response) -> ()
+ transaction_factory: (environ) -> GalaxyWebTransaction
+}
+
+object app {
+
+}
+
+object trans {
+
+}
+
+webapp -> "app" app
+app "app" <-- trans
+webapp *-- trans
+
+@enduml
diff --git a/topics/dev/images/webapp_classes.plantuml.svg b/topics/dev/images/webapp_classes.plantuml.svg
new file mode 100644
index 00000000000000..16cbc304c834d8
--- /dev/null
+++ b/topics/dev/images/webapp_classes.plantuml.svg
@@ -0,0 +1,113 @@
+
\ No newline at end of file
diff --git a/topics/dev/images/webapp_classes.plantuml.txt b/topics/dev/images/webapp_classes.plantuml.txt
new file mode 100644
index 00000000000000..6359cc899a3c77
--- /dev/null
+++ b/topics/dev/images/webapp_classes.plantuml.txt
@@ -0,0 +1,28 @@
+@startuml
+
+!include plantuml_options.txt
+
+class galaxy.framework.base.WebApplication {
+ handle_request()
+}
+
+class galaxy.web.framework.webapp.WebApplication {
+
+}
+
+class galaxy.webapps.galaxy.GalaxyWebApplication {
+}
+
+class galaxy.framework.base.DefaultWebTransaction {
+
+}
+
+class galaxy.web.framework.webapp.GalaxyWebTransaction {
+}
+
+galaxy.framework.base.WebApplication <|-- galaxy.web.framework.webapp.WebApplication
+galaxy.web.framework.webapp.WebApplication <|-- galaxy.webapps.galaxy.GalaxyWebApplication
+galaxy.framework.base.WebApplication *- galaxy.framework.base.DefaultWebTransaction
+galaxy.framework.base.DefaultWebTransaction <|-- galaxy.web.framework.webapp.GalaxyWebTransaction
+
+@enduml
diff --git a/topics/dev/images/wsgi_app.plantuml.svg b/topics/dev/images/wsgi_app.plantuml.svg
new file mode 100644
index 00000000000000..f53834fcf80a5b
--- /dev/null
+++ b/topics/dev/images/wsgi_app.plantuml.svg
@@ -0,0 +1,141 @@
+
\ No newline at end of file
diff --git a/topics/dev/images/wsgi_app.plantuml.txt b/topics/dev/images/wsgi_app.plantuml.txt
new file mode 100644
index 00000000000000..96814abee5c996
--- /dev/null
+++ b/topics/dev/images/wsgi_app.plantuml.txt
@@ -0,0 +1,48 @@
+@startuml
+
+!include plantuml_options.txt
+participant Browser
+participant "WSGI Server"
+participant Middleware
+participant WebApplication
+participant Controller
+participant Manager
+
+note over WebApplication, Manager: Galaxy Backend
+
+Browser -> "WSGI Server": TCP/IP Request
+activate "WSGI Server"
+note left of Browser: Static content\nCSS, JS, Image
+"WSGI Server" --> Browser
+deactivate "WSGI Server"
+
+Browser -> "WSGI Server": TCP/IP Request
+activate "WSGI Server"
+"WSGI Server" -> Middleware: wsgi environ
+activate Middleware
+Middleware -> WebApplication: wsgi environ
+activate WebApplication
+WebApplication -> Controller: trans, payload
+activate Controller
+Controller -> Manager:
+activate Manager
+
+note left of Browser: API Request
+note right of Manager: Vanilla Python function calls.\nUse application components\nto perform "business logic"
+
+Manager --> Controller: Python objects
+deactivate Manager
+
+Controller --> WebApplication: JSON
+deactivate Controller
+
+WebApplication --> Middleware:
+deactivate WebApplication
+
+Middleware --> "WSGI Server": Stream of bytes
+deactivate Middleware
+
+"WSGI Server" --> Browser
+deactivate "WSGI Server"
+
+@enduml
diff --git a/topics/dev/tutorials/architecture/slides.html b/topics/dev/tutorials/architecture/slides.html
index f6dfc8fcb025e5..444556f27ef521 100644
--- a/topics/dev/tutorials/architecture/slides.html
+++ b/topics/dev/tutorials/architecture/slides.html
@@ -15,8 +15,8 @@
requirements:
key_points:
- "Galaxy runs out of the box and fetches all needed dependencies."
- - "Running Galaxy in production requires work configuration."
- - "The architecture is designed to be pluggable and extendable."
+ - "Running Galaxy in production requires configuration."
+ - "The architecture is designed to be pluggable and extensible."
- "Learn more about different Galaxy aspects in our [development section](/topics/dev/)."
contributors:
- jmchilton
@@ -46,20 +46,30 @@
---
-## Getting involved in Galaxy
-
----
+class: enlarge200
**Gitter:** [galaxyproject/Lobby ](https://gitter.im/galaxyproject/Lobby)
-**IRC:** [irc.freenode.net#galaxyproject ](https://webchat.freenode.net/?channels=galaxyproject)
-
**GitHub:** [github.com/galaxyproject ](https://github.com/galaxyproject)
**Twitter:** #usegalaxy, @galaxyproject
---
+## The **/galaxyproject** projects
+
+*The social architecture of the ecosystem.*
+
+???
+
+Too many to cover all but we'll cover some of the big ones in a few big
+categories. User-facing applications, projects for Galaxy plugin developers,
+and projects for Galaxy administrators.
+
+---
+
+class: enlarge150
+
### Contributing
All Galaxy development happens on GitHub
@@ -68,28 +78,91 @@
---
-## The **/galaxyproject** projects
+class: enlarge200
+
+**User-Facing Applications**
---
+class: enlarge150
+
[galaxyproject/**galaxy** ](https://github.com/galaxyproject/galaxy)
The main Galaxy application.
Web interface, database model, job running, etc...
-Also includes other web applications including the **ToolShed** and **Reports**
+Also includes other web applications including the **ToolShed**.
---
+class: enlarge150
+
+[galaxyproject/**cloudlaunch** ](https://github.com/galaxyproject/cloudlaunch)
+
+CloudLaunch web application to make it easy to launch images on a cloud, drives *https://launch.usegalaxy.org*
+
+---
+
+class: enlarge150
+
+[galaxyproject/**training-material** ](https://github.com/galaxyproject/training-material)
+
+![logo](https://github.com/galaxyproject/training-material/raw/master/shared/images/GTNLogo1000.png)
+
+Galaxy training material for scientists, developers, and admins. Powers *https://training.galaxyproject.org/*.
+
+---
+
+class: enlarge150
+
+[galaxyproject/**bioblend** ](https://github.com/galaxyproject/bioblend)
+
+Official Python client for the Galaxy, ToolShed, and CloudMan APIs.
+
+Best documented path to scripting the Galaxy API.
+
+---
+
+class: enlarge150
+
+- [galaxyproject/**blend4php**](https://github.com/galaxyproject/blend4php)
+- [**jmchilton/blend4j**](https://github.com/jmchilton/blend4j)
+- [**chapmanb/clj-blend**](https://github.com/chapmanb/clj-blend)
+
+Galaxy API bindings for other languages, less actively maintained.
+
+---
+
+class: enlarge150
+
[galaxyproject/**cloudman**](https://github.com/galaxyproject/cloudman)
-Galaxy CloudMan - a web application which manages a Galaxy cluster in
+Galaxy CloudMan - a web application which manages a Galaxy Kubernetes cluster in
the cloud.
-[galaxyproject/**cloudlaunch** ](https://github.com/galaxyproject/cloudlaunch)
+---
+
+class: enlarge150
+
+[**bgruening/docker-galaxy-stable** ](https://github.com/bgruening/docker-galaxy-stable)
+
+High quality Docker containers for stable Galaxy environments.
+
+Releases corresponding to each new version of Galaxy.
+
+Many flavors available.
+
+---
-CloudLaunch web application to make it easy to launch images on a cloud, drives *https://launch.usegalaxy.org *
+class: white
+![Docker](../../images/docker-chart.png)
+
+---
+
+class: enlarge200
+
+**For Plugin Developers**
---
@@ -106,7 +179,6 @@
Many older tools appearing on usegalaxy.org.
-
---
### Tools Aside - More Repositories
@@ -126,16 +198,9 @@
* [AAFC-MBB Canada repo](https://github.com/AAFC-MBB/Galaxy/tree/master/wrappers)
* [Mark Einon's repo](https://gitlab.com/einonm/galaxy-tools)
-
---
-[galaxyproject/**starforge** ](https://github.com/galaxyproject/starforge)
-
-Build Galaxy Tool dependencies for the ToolShed in Docker containers
-
-Build Galaxy framework dependencies as Python wheels
-
----
+class: enlarge150
[galaxyproject/**planemo** ](https://github.com/galaxyproject/planemo)
@@ -151,7 +216,26 @@
---
-galaxyproject/**{ansible-\*, \*-playbook}**
+class: enlarge150
+
+[galaxyproject/**starforge** ](https://github.com/galaxyproject/starforge)
+
+![StarForge logo](https://raw.githubusercontent.com/galaxyproject/starforge/master/docs/starforge_logo.png)
+
+Build Galaxy framework dependencies as Python wheels when needed.
+
+---
+
+class: enlarge200
+
+**For Deployers and Admins**
+
+---
+
+class: enlarge150
+
+galaxyproject/**{ansible-\*, \*-playbook}**
+usegalaxy-eu/**{ansible-\*, \*-playbook}**
[Ansible](https://www.ansible.com/) components to automate almost every aspect of Galaxy installation and maintenance.
@@ -161,7 +245,11 @@
---
-[galaxyproject/**pulsar**](https://github.com/galaxyproject/pulsar)
+class: enlarge150
+
+[galaxyproject/**pulsar** ](https://github.com/galaxyproject/pulsar)
+
+![Pulsar Logo](https://galaxyproject.org/images/galaxy-logos/pulsar_transparent.png)
Distributed job execution engine for Galaxy.
@@ -173,39 +261,116 @@
---
-[galaxyproject/**bioblend** ](https://github.com/galaxyproject/bioblend)
+class: enlarge150
-Official Python client for the Galaxy, ToolShed, and CloudMan APIs.
+[galaxyproject/**ephemeris** ](https://github.com/galaxyproject/ephemeris)
-Best documented path to scripting the Galaxy API.
+Library and CLI for managing Galaxy plugins - tools, index data, and workflows.
+
+Layer on top of BioBlend building useful utilities for working with the Galaxy API from an administrator perspective.
---
-- [galaxyproject/**blend4php**](https://github.com/galaxyproject/blend4php)
-- [**jmchilton/blend4j**](https://github.com/jmchilton/blend4j)
-- [**chapmanb/clj-blend**](https://github.com/chapmanb/clj-blend)
+class: enlarge150
-Galaxy API bindings for other languages.
+[**usegalaxy-eu/gxadmin **](https://github.com/usegalaxy-eu/gxadmin)
+
+Handy command-line utility for Galaxy administrators.
---
-[**bgruening/docker-galaxy-stable** ](https://github.com/bgruening/docker-galaxy-stable)
+class: enlarge150
-High quality Docker containers for stable Galaxy environments.
+### ephemeris vs gxadmin
-Releases corresponding to each new version of Galaxy.
+Ephemeris generally talks to the Galaxy API and is a pure Python project, gxadmin talks directly to the Galaxy database and relevant files.
-Many flavors available.
+---
+
+## Putting it all together
+
+![](../../images/galaxy_main_scheme.png)
+
+???
+
+TODO: cover gxformat2, helmchart, cloudlaunch
---
-class: white
-![Docker](../../images/docker-chart.png)
+class: enlarge200
+
+[galaxyproject/**galaxy** ](https://github.com/galaxyproject/galaxy)
+
+The rest of the slides will focus on the core repository.
+
+---
+
+## Project Management
+
+*The social architecture of the project.*
+
+---
+
+class: enlarge200
+
+### Contributing Quick Start
+
+Contribution guidelines: https://bit.ly/gx-CONTRIBUTING-md
+
+---
+
+class: enlarge200
+
+### Pull Requests
+
+Nearly all changes should come in through GitHub Pull Requests.
+
+The exceptions include security patches, packaging and release process artifacts, and backporting fixes to older releases.
+
+---
+
+class: enlarge150
+
+### Security (SECURITY_POLICY.md)
+
+In brief, to responsibly report security issues e-mail
+
+[`galaxy-committers@lists.galaxyproject.org`](mailto:galaxy-committers@lists.galaxyproject.org)
+
+---
+
+### Branches
+
+![Branches](../../images/core_branches.plantuml.svg)
+
+---
+
+class: enlarge150
+
+### Committers & Open Goverance
+
+All repository goverance is done in the open on GitHub via Pull Requests and voting. Galaxy goes beyond open source to open goverance.
+
+> "The committers group is the group of trusted developers and advocates who manage the core Galaxy code base."
+
+> "Galaxy Project committers are the only individuals who may commit to the core Galaxy code base."
+
+> "Committers may participate in all formal votes, including votes to modify team membership, merge pull requests, and modify [policies]."
+
+---
+
+class: enlarge200
+
+### Code of Conduct (CODE_OF_CONDUCT.md)
+
+Describes expectations, encourages diversity, and describes how to report issues such as unacceptable behavior.
---
## Principles
+*The guiding principles of the core code architecture.*
+
---
### Aspirational Principles of Galaxy Architecture
@@ -232,9 +397,9 @@
- Consistent colors, fonts, themes, etc...
- Reusable components for presenting common widgets - from the generic (forms and grids) to the specific (tools and histories).
- Tied to specific technologies:
- - JavaScript driven
- - Backbone for MVC
- - webpack & RequireJS for modules
+ - Implemented in JavaScript (ES6)
+ - Built with [webpack](https://webpack.js.org/)
+ - [Vue.js](https://vuejs.org/) for component definitions
---
@@ -243,11 +408,11 @@
Galaxy's backend is in many ways driven by *pluggable interfaces* and
can be adapted to many different technologies.
-- SQLAlchemy allows using sqlite, postgres, or MySQL for a database.
+- SQLAlchemy allows using SQLite, PostgreSQL, or MySQL (sort of) for your database.
- Many different cluster backends or job managers are supported.
- Different frontend proxies (e.g. nginx) are supported as well as web
application containers (e.g. uWSGI).
-- Different storage strategies and technologies are supported (e.g. S3).
+- Different storage strategies and technologies are supported (e.g. S3, iRODS).
- Tool definitions, job metrics, stat middleware, tool dependency resolution, workflow modules,
datatype definitions are all plugin driven.
@@ -273,57 +438,91 @@
---
-## Web Frameworks
+### In other words...
+
+The Galaxy frontend is architected with the bench scientist in mind first and foremost,
+the Galaxy backend is architected with Galaxy administrators in mind first and foremost.
+
+???
+
+TODO: slide connecting architecture to people here
---
-![Client-Server Communications](../../images/server_client.plantuml.svg)
+## Files and Directories
-???
+*The physical architecture of the Galaxy code.*
-Workflow, Data Libraries, Visualization, History, Tool Menu,
-Many Grids, User and preference management.
+---
+
+### Project Docs
+
+![Project Files](../../images/core_files_project_docs.plantuml.svg)
---
-class: white
+### Code
-![Backbone MVC](../../images/backbone-model-view.svg)
+![Code](../../images/core_files_code.plantuml.svg)
-### Backbone MVC
+---
+
+### Scripts
+
+![Scripts](../../images/core_files_scripts.plantuml.svg)
---
-![Client-Server Communications (old)](../../images/server_client_old.plantuml.svg)
+### Test Sources
-???
+![Test Source Files](../../images/core_files_test.plantuml.svg)
-Admin things, Reports and Tool Shed Web app
+---
+
+### Continuous Integration
+
+![Continuous Integration Files](../../images/core_files_ci.plantuml.svg)
---
-![WSGI Application](../../images/wsgi_app.svg)
+## Web Frameworks
-### Galaxy WSGI
+*The architecture of an interaction.*
+
+---
+
+class: reduce70
+
+![Client-Server Communications](../../images/server_client_vuejs.plantuml.svg)
+
+Bits and pieces of older client technologies appear throughout - ranging from Python
+mako templates to generate HTML, lower-level jQuery, and lots of Backbone legacy MVC.
+
+---
+
+![Processing requests on the server](../../images/wsgi_app.plantuml.svg)
+
+Expanding the right side of that diagram. We will move through the component left to right.
---
### WSGI
- Python interface for web servers defined by PEP 333 - https://www.python.org/dev/peps/pep-0333/.
-- Galaxy moving from Paster to uWSGI to host the application.
- - https://bitbucket.org/ianb/paste
- - https://uwsgi-docs.readthedocs.io/
+- Galaxy tends to favor uWSGI, but other options such as Gunicorn and Paste can be used to host the application.
+ - https://uwsgi-docs.readthedocs.io/ (a million bells and whistles, highly performant, a bit brittle)
+ - https://gunicorn.org/ (simpler, more standard Python 3 WSGI server)
+ - https://bitbucket.org/ianb/paste (more of legacy interest, but still heavily used in testing for instance)
---
-![WSGI Request](../../images/wsgi_request.svg)
+![Processing requests on the server](../../images/wsgi_app.plantuml.svg)
---
template: left-aligned
-### Galaxy WSGI Middleware
+### WSGI Middleware
A WSGI function:
@@ -352,6 +551,18 @@
---
+![Processing requests on the server](../../images/wsgi_app.plantuml.svg)
+
+---
+
+### Classes
+
+![GalaxyWebApplication class diagram](../../images/webapp_classes.plantuml.svg)
+
+---
+
+### Instances
+
![webapp](../../images/webapp.plantuml.svg)
---
@@ -412,6 +623,18 @@
---
+class: enlarge150
+
+### Why not just Django?
+
+We do build on many of the same standards, concepts, and libraries - the entire stack isn't custom code but there is a non-trivial web framework defined in "Galaxy".
+
+The Galaxy community likes Django and uses it to build newer webapps, Galaxy simply predates it and has evolved its own framework.
+
+---
+
+class: enlarge150
+
### API Controllers
- `lib/galaxy/webapps/galaxy/controllers/api/`
@@ -422,6 +645,8 @@
---
+class: enlarge150
+
### Legacy Controllers
- `lib/galaxy/webapps/galaxy/controllers/`
@@ -435,6 +660,17 @@
---
+### Managers
+
+High-level business logic that ties all of these components together.
+
+Controllers should ideally be thin wrappers around actions defined in managers.
+
+Whenever model require more than just the database, the operation should be defined
+in a manager instead of in the model.
+
+---
+
### Galaxy Models
- Database interactions powered by SQLAlchemy - https://www.sqlalchemy.org/.
@@ -525,7 +761,7 @@
### Object Store
-.strike[```
+.strike[```python
>>> fh = open( dataset.file_path, 'w' )
>>> fh.write( ‘foo’ )
>>> fh.close()
@@ -533,7 +769,7 @@
>>> fh.read()
```]
-```
+```python
>>> update_from_file( dataset, file_name=‘foo.txt’ )
>>> get_data( dataset )
>>> get_data( dataset, start=42, count=4096 )
@@ -631,18 +867,6 @@
- `phinch`
- `bam_iobio`
-
----
-
-### Managers
-
-High-level business logic that tie all of these components together.
-
-Controllers should ideally be thin wrappers around actions defined in managers.
-
-Whenever model require more than just the database, the operation should be defined
-in a manager instead of in the model.
-
---
## Client Architecture
@@ -653,11 +877,12 @@
### Client Directories
-- Source stylesheets and JavaScript in `client/galaxy/{style|scripts}`
-- "Packed" scripts served by Galaxy stored in `static/{style|scripts}`
+- Source stylesheets and JavaScript (ES6) in `client/galaxy/{style|scripts}`
+- "Packed" bundles served by Galaxy stored in `static/dist`
+ - `run.sh` uses `git diff` to try to determine if client needs to be built before starting Galaxy
- webpack builds these "compiled" artifacts
-Upshot - modify files in `client` and rebuild with `make client` before
+Upshot - to develop against the client, modify files in `client/` and rebuild with `make client` before
deployment.
---
@@ -666,104 +891,59 @@
### Building the Client - Makefile Targets
-```
-client: grunt style ## Rebuild all client-side artifacts
+```Makefile
+client: node-deps ## Rebuild all client-side artifacts (for local dev)
+ cd client && yarn run build
-grunt: npm-deps ## Calls out to Grunt to build client
- cd client && node_modules/grunt-cli/bin/grunt
+client-production: node-deps ## Build optimized artifacts with sourcemaps.
+ cd client && yarn run build-production-maps
-style: npm-deps ## Calls the style task of Grunt
- cd client && node_modules/grunt-cli/bin/grunt style
+client-watch: node-deps ## Rebuild client on each change.
+ cd client && yarn run watch
-npm-deps: ## Install NodeJS dependencies.
- cd client && npm install
-```
-
----
-
-template: left-aligned
+client-format: node-deps ## Reformat client code
+ cd client && yarn run prettier
-### grunt
+client-eslint: node-deps # Run client linting
+ cd client && yarn run eslint
-Build tool for node/JavaScript, tasks in `client/Gruntfile.js`. Default task is
+client-test: node-deps ## Run JS unit tests via Karma
+ cd client && yarn run test
-.center[`grunt.registerTask( 'default', [ 'check-modules', 'uglify', 'webpack' ] );`]
-
-- `check-modules` Verifies node dependencies are correct and exact.
-- [`uglify`](https://github.com/mishoo/UglifyJS) Compresses JavaScript modules in `client` and move to `static` and creates source maps.
- - JavaScript loads much faster but difficult to debug by default
- - Source maps re-enable proper stack traces.
-- `webpack` Bundles modules together into a single JavaScript file - quickly loadable.
+node-deps: ## Install NodeJS and dependencies.
+```
---
-template: left-aligned
-
-### JavaScript Modules - The Problem
-
-From https://requirejs.org/docs/why.html:
-
-- Web sites are turning into Web apps
-- Code complexity grows as the site gets bigger
-- Assembly gets harder
-- Developer wants discrete JS files/modules
-- Deployment wants optimized code in just one or a few HTTP calls
+class: white
+![What is Webpack](../../images/what-is-webpack.svg)
---
-template: left-aligned
-
-### JavaScript Modules - The Solution
+class: enlarge150
-From https://requirejs.org/docs/why.html:
-
- - Some sort of #include/import/require
- - Ability to load nested dependencies
- - Ease of use for developer but then backed by an optimization tool that helps deployment
-
-RequireJS an implementation of AMD.
-
-
----
-
-### JavaScript Modules - Galaxy AMD Example
+### webpack in Galaxy
-```javascript
-/**
- This is the workflow tool form.
-*/
-define(['utils/utils', 'mvc/tool/tool-form-base'],
- function(Utils, ToolFormBase) {
+Packs and "transpiles" Galaxy ES6 code (.js), Galaxy Vue modules (.vue), libraries from npm, scss stylesheets (.scss) into browser native bundles.
- // create form view
- var View = ToolFormBase.extend({
- ...
- });
+Hundreds of high-level well organized files into optimized single files that can be quickly downloaded.
- return {
- View: View
- };
-});
-```
+Lots of active development and complexity around Viz plugins and dependencies for instance, but the webpack configuration file in `config/webpack.config.js` is fairly straightforward.
---
class: white
-![What is Webpack](../../images/what-is-webpack.svg)
+![Webpack in Action](../../images/jsload.png)
---
-### webpack in Galaxy
-
-- Turns Galaxy modules into an "app".
-- Builds two bundles currently - a common set of libraries and an analysis "app".
-- https://github.com/galaxyproject/galaxy/issues/1041
-- https://github.com/galaxyproject/galaxy/pull/1144
-
----
+### Stylesheets
-class: white
-![Webpack in Action](../../images/jsload.png)
+- Galaxy stylesheets are generally defined using the SCSS syntax
+- SCSS is a high-level superset of CSS - https://sass-lang.com/documentation/syntax
+- `sass` is leveraged by webpack to convert these styles to native CSS at client build time
+- Rebuild style with `make style`
+- Galaxy's SCSS files can be found in `client/galaxy/style/scss/`
---
@@ -808,15 +988,6 @@
---
-### Stylesheets
-
-- Galaxy uses the less CSS preprocessor - https://lesscss.org/
-- Rebuild style with `make style`
-- Less files in `client/galaxy/style/less`
-- Build happens with grunt recipe in `client/grunt-tasks/style.js`
-
----
-
## Dependencies
---
@@ -840,11 +1011,8 @@
### Dependencies - JavaScript
-These come bundled with Galaxy, so do not need to be fetched at runtime.
-
-- Dependencies are defined `galaxy/client/bower.json`.
-- Bower (https://bower.io/) is used to re-fetch these.
-- `cd client; grunt install-libs`
+- Dependencies are defined in `client/package.json`.
+- These are fetched from npm and compiled into bundles as part of `make client` and related `Makefile` targets.
---
@@ -854,7 +1022,7 @@
### Cloning Galaxy
-```
+```bash
$ git clone https://github.com/galaxyproject/galaxy.git galaxy
Cloning into 'galaxy'...
remote: Counting objects: 173809, done.
@@ -875,7 +1043,7 @@
### Copying Configs
-```
+```bash
$ sh run.sh
Initializing config/migrated_tools_conf.xml from migrated_tools_conf.xml.sample
Initializing config/shed_tool_conf.xml from shed_tool_conf.xml.sample
@@ -1425,3 +1593,44 @@
## Q & A
---
+
+---
+
+### Legacy Client Architecture
+
+???
+
+Still some backbone and templates here and there, might be better to drop though?
+
+---
+
+![Client-Server Communications](../../images/server_client.plantuml.svg)
+
+???
+
+Workflow, Data Libraries, Visualization, History, Tool Menu,
+Many Grids, User and preference management.
+
+---
+
+class: white
+
+![Backbone MVC](../../images/backbone-model-view.svg)
+
+### Backbone MVC
+
+---
+
+![Client-Server Communications (old)](../../images/server_client_old.plantuml.svg)
+
+???
+
+Admin things, Reports and Tool Shed Web app
+
+---
+
+![WSGI Application](../../images/wsgi_app.svg)
+
+### Galaxy WSGI
+
+---
diff --git a/topics/epigenetics/images/atac-seq/pyGenomeTracksOutput.png b/topics/epigenetics/images/atac-seq/pyGenomeTracksOutput.png
index 109e28a42f018f..c44e2029d91b83 100644
Binary files a/topics/epigenetics/images/atac-seq/pyGenomeTracksOutput.png and b/topics/epigenetics/images/atac-seq/pyGenomeTracksOutput.png differ
diff --git a/topics/epigenetics/images/atac-seq/pyGenomeTracksOutput_100M_zo.png b/topics/epigenetics/images/atac-seq/pyGenomeTracksOutput_100M_zo.png
new file mode 100644
index 00000000000000..e6a96ac0e7010d
Binary files /dev/null and b/topics/epigenetics/images/atac-seq/pyGenomeTracksOutput_100M_zo.png differ
diff --git a/topics/epigenetics/images/atac-seq/pyGenomeTracksOutput_200M_zo.png b/topics/epigenetics/images/atac-seq/pyGenomeTracksOutput_200M_zo.png
new file mode 100644
index 00000000000000..aacb34277b82ae
Binary files /dev/null and b/topics/epigenetics/images/atac-seq/pyGenomeTracksOutput_200M_zo.png differ
diff --git a/topics/epigenetics/images/atac-seq/pyGenomeTracksOutput_20M.png b/topics/epigenetics/images/atac-seq/pyGenomeTracksOutput_20M.png
index a626bd8aa3aff7..63c16baec4350c 100644
Binary files a/topics/epigenetics/images/atac-seq/pyGenomeTracksOutput_20M.png and b/topics/epigenetics/images/atac-seq/pyGenomeTracksOutput_20M.png differ
diff --git a/topics/epigenetics/images/atac-seq/pyGenomeTracksOutput_20M_zo.png b/topics/epigenetics/images/atac-seq/pyGenomeTracksOutput_20M_zo.png
index bbcd0484ff4d58..a3e2c4959ce582 100644
Binary files a/topics/epigenetics/images/atac-seq/pyGenomeTracksOutput_20M_zo.png and b/topics/epigenetics/images/atac-seq/pyGenomeTracksOutput_20M_zo.png differ
diff --git a/topics/epigenetics/tutorials/atac-seq/data-library.yaml b/topics/epigenetics/tutorials/atac-seq/data-library.yaml
new file mode 100644
index 00000000000000..ec672ad013789f
--- /dev/null
+++ b/topics/epigenetics/tutorials/atac-seq/data-library.yaml
@@ -0,0 +1,29 @@
+---
+destination:
+ type: library
+ name: GTN - Material
+ description: Galaxy Training Network Material
+ synopsis: Galaxy Training Network Material. See https://training.galaxyproject.org
+items:
+- name: Epigenetics
+ description: DNA methylation is an epigenetic mechanism used by higher eukaryotes
+ and involved in e.g. gene expression, X-Chromosome inactivating, imprinting, and
+ gene silencing of germline specific gene and repetitive elements.
+ items:
+ - name: ATAC-Seq data analysis
+ items:
+ - name: 'DOI: 10.5281/zenodo.3862793'
+ description: latest
+ items:
+ - url: https://zenodo.org/api/files/b205aa3d-919b-4fbb-a8dd-3ae01d5daa91/ENCFF933NTR.bed.gz
+ src: url
+ ext: bed
+ info: https://zenodo.org/record/3862793
+ - url: https://zenodo.org/api/files/b205aa3d-919b-4fbb-a8dd-3ae01d5daa91/SRR891268_chr22_enriched_R1.fastq.gz
+ src: url
+ ext: fastqsanger.gz
+ info: https://zenodo.org/record/3862793
+ - url: https://zenodo.org/api/files/b205aa3d-919b-4fbb-a8dd-3ae01d5daa91/SRR891268_chr22_enriched_R2.fastq.gz
+ src: url
+ ext: fastqsanger.gz
+ info: https://zenodo.org/record/3862793
diff --git a/topics/epigenetics/tutorials/atac-seq/tutorial.md b/topics/epigenetics/tutorials/atac-seq/tutorial.md
index f81c32097f80e9..9c9040712436c4 100644
--- a/topics/epigenetics/tutorials/atac-seq/tutorial.md
+++ b/topics/epigenetics/tutorials/atac-seq/tutorial.md
@@ -2,7 +2,7 @@
layout: tutorial_hands_on
title: ATAC-Seq data analysis
-zenodo_link: "https://zenodo.org/record/3270536"
+zenodo_link: https://zenodo.org/record/3862793
questions:
- Which DNA regions are accessible in the human lymphoblastoid cell line GM12878?
- How to analyse and visualise ATAC-Seq data?
@@ -35,7 +35,7 @@ In this tutorial we will use data from the study of {% cite Buenrostro2013 %}, t
### When working with real data
{:.no_toc}
-When you use your own data we suggest you to use [this workflow](https://usegalaxy.eu/u/heylf/w/atac-seq-gtm-with-control) which includes the same steps but is compatible with replicates. If you do not have any control data you can import and edit this workflow, removing all steps with the controls. Controls for the ATAC-Seq procedure are not commonly performed, as discussed [here](https://informatics.fas.harvard.edu/atac-seq-guidelines.html), but could be ATAC-Seq of purified DNA.
+When you use your own data we suggest you to use [this workflow](https://usegalaxy.eu/u/ldelisle/w/atac-seq-gtm-with-control-and-macs2) which includes the same steps but is compatible with replicates. If you do not have any control data you can import and edit this workflow, removing all steps with the controls. Controls for the ATAC-Seq procedure are not commonly performed, as discussed [here](https://informatics.fas.harvard.edu/atac-seq-guidelines.html), but could be ATAC-Seq of purified DNA.
> ### Agenda
>
@@ -62,12 +62,12 @@ We first need to download the sequenced reads (FASTQs) as well as other annotati
>
> {% include snippets/create_new_history.md %}
>
-> 2. Import the files from [Zenodo](https://zenodo.org/record/3270536) and [ENCODE](https://www.encodeproject.org/) or from the shared data library
+> 2. Import the files from [Zenodo](https://doi.org/10.5281/zenodo.3862792) or from the shared data library
>
> ```
-> https://zenodo.org/record/3270536/files/SRR891268_R1.fastq.gz
-> https://zenodo.org/record/3270536/files/SRR891268_R2.fastq.gz
-> https://www.encodeproject.org/files/ENCFF933NTR/@@download/ENCFF933NTR.bed.gz
+> https://zenodo.org/record/3862793/files/ENCFF933NTR.bed.gz
+> https://zenodo.org/record/3862793/files/SRR891268_chr22_enriched_R1.fastq.gz
+> https://zenodo.org/record/3862793/files/SRR891268_chr22_enriched_R2.fastq.gz
> ```
>
> {% include snippets/import_via_link.md %}
@@ -77,7 +77,7 @@ We first need to download the sequenced reads (FASTQs) as well as other annotati
>
> {% include snippets/add_tag.md %}
>
-> 4. Check that the datatype of the 2 FASTQ files is `fastqsanger.gz` and the BED file is `bed`. If they are not then change the datatype as described below.
+> 4. Check that the datatype of the 2 FASTQ files is `fastqsanger.gz` and the peak file (ENCFF933NTR.bed.gz) is `encodepeak`. If they are not then change the datatype as described below.
>
> {% include snippets/change_datatype.md datatype="datatypes" %}
>
@@ -87,8 +87,8 @@ We first need to download the sequenced reads (FASTQs) as well as other annotati
> If you are not familiar with FASTQ format, see the [Quality Control tutorial]({% link topics/sequence-analysis/tutorials/quality-control/tutorial.md %})
{: .comment}
>
-> ### {% icon comment %} BED format
-> If you are not familiar with BED format, see the [BED Format](https://genome.ucsc.edu/FAQ/FAQformat.html)
+> ### {% icon comment %} BED / encode narrowPeak format
+> If you are not familiar with BED format or encode narrowPeak format, see the [BED Format](https://genome.ucsc.edu/FAQ/FAQformat.html)
{: .comment}
We will visualise regions later in the analysis and obtain the gene information now. We will get information for chromosome 22 genes (names of transcripts and genomic positions) using the UCSC tool.
@@ -109,17 +109,51 @@ We will visualise regions later in the analysis and obtain the gene information
> 3. Click **Send query to Galaxy**
>
> This table contains all the information but is not in a BED format. To transform it into BED format we will cut out the required columns and rearrange:
+>
> 4. **Cut** columns from a table {% icon tool %} with the following parameters:
> - {% icon param-text %} *"Cut columns"*: `c3,c5,c6,c13,c12,c4`
> - {% icon param-text %} *"Delimited by"*: `Tab`
> - {% icon param-file %} *"From"*: `UCSC Main on Human: wgEncodeGencodeBasicV31 (chr22:1-50,818,468)`
-> 5. Rename the dataset as `chr22 genes`
+>
+> 5. Check the contents of your file, is this as you expect it to be?
+>
+> > ### {% icon question %} Question: Expected output
+> >
+> > Our goal here was to convert the data to BED format.
+> >
+> > 1. Which columns do you expect in your file? (Tip: read about [BED format](https://genome.ucsc.edu/FAQ/FAQformat.html#format1))
+> > 2. Does your file look like a valid BED format?
+> >
+> > > ### {% icon solution %} Solution
+> > >
+> > > 1. We expect at least 3 columns, `chromosome - start - end`, and possibly more optional columns
+> > > 2. Your file should look something like this:
+> > > ```
+> > > Chrom Start End Name Score Strand ThickStart ThickEnd ItemRGB BlockCount BlockSizes BlockStarts
+> > > chr22 10736170 10736283 RF00004 0 -
+> > > chr22 11066417 11068174 CU104787.1 0 +
+> > > chr22 11249808 11249959 RF00002 0 -
+> > > [..]
+> > > ```
+> > >
+> > > - **Troubleshooting:** Is your second column the `Strand` column?
+> > > - Make sure you used the correct **Cut** {% icon tool %} (the one that matches the tool name mentioned in the previous step *exactly*)
+> > > - There is another tool with `(cut)` behind the title, we do NOT want to use this tool in this step.
+> > >
+> > > - **Tip:** Always check your output files to make sure they match your expectations!
+> > >
+> > {: .solution}
+> >
+> {: .question}
+>
+>
+> 6. **Rename** {% icon galaxy-pencil %} the dataset as `chr22 genes`
>
> {% include snippets/rename_dataset.md %}
>
-> 6. Change its datatype to BED
+> 7. **Change** {% icon galaxy-pencil %} its datatype to BED
>
-> {% include snippets/change_datatype.md datatype="datatypes" %}
+> {% include snippets/change_datatype.md datatype="bed" %}
>
{: .hands_on}
@@ -160,7 +194,10 @@ The first step is to check the quality of the reads and the presence of the Next
> > >
> > > 3. **Overrepresented sequences**
> > >
-> > > Nextera adapter sequences are observable in the **Adapter Content** section.
+> > > One sequence is over represented:
+> > > you have 306 reads which are exactly the sequence of the Nextera adapter.
+> > > They correspond to adapters amplified head-to-head.
+> > > 306 is really low (only 0.1% of reads).
> > >
> > {: .solution}
> >
@@ -240,7 +277,7 @@ The forward and reverse adapters are slightly different. We will also trim low q
{: .hands_on}
> ### {% icon comment %} FastQC Results
-> If we run FastQC again we should see under **Adapter Content** that the Nextera adapters are no longer present.
+> If we run FastQC again we should see under **Overrepresented sequences** that there is no more overrepresented sequences and under **Adapter Content** that the Nextera adapters are no longer present.
> ![FastQC screenshot on the adapter content section after cutadapt](../../images/atac-seq/Screenshot_fastqcAftercutadapt.png "FastQC screenshot on the adapter content section after cutadapt")
{: .comment}
@@ -360,7 +397,7 @@ High numbers of mitochondrial reads can be a problem in ATAC-Seq. Some ATAC-Seq
>
> ![Samtools idxstats result](../../images/atac-seq/Screenshot_samtoolsIdxStatsChrM.png "Samtools idxstats result")
>
-> There are 221 000 reads which map to chrM and 170 000 which map to chr22.
+> There are 220 000 reads which map to chrM and 165 000 which map to chr22.
{: .tip}
## Filter Duplicate Reads
@@ -566,7 +603,7 @@ We call peaks with MACS2. Usually people expand 200bp around cut sites (+/-100bp
## Prepare the Datasets
Thanks to **Genrich** we now have a coverage file which represents the coverage of the read start sites extended 50 bp to each side.
-The output of **Genrich** is a BedGraph-ish pileup (6 columns text format with a comment line and a header). We will first need to convert it to a bedgraph format (4 columns text format with no header) to be able to visualise it.
+The output of **Genrich** is a BedGraph-ish pileup (6 columns text format with a comment line and a header). We will first need to convert it to a bedgraph format (4 columns text format with no header) to be able to visualise it.
### Convert BedGraph-ish pileup of **Genrich** to bedgraph
@@ -611,16 +648,6 @@ In order to visualise a specific region (e.g. the gene *RAC2*), we can either us
>
{: .hands_on}
-### Convert the Genrich peaks to BED
-At the moment, **pyGenomeTracks** does not deal with the datatype encodepeak which is a special bed.
-
-> ### {% icon hands_on %} Hands-on: Change the datatype
-> 1. Change the datatype of the output of **Genrich** from encodepeak to bed.
->
-> {% include snippets/change_datatype.md datatype="bed" %}
->
-{: .hands_on }
-
## Create heatmap of coverage at TSS with deepTools
You might also be interested in specific regions. For this, you can compute a heatmap. We will use the **deepTools plotHeatmap**. As an example, we will here make a heatmap centered on the transcription start sites (TSS).
@@ -677,9 +704,9 @@ We will now generate a heatmap. Each line will be a transcript. The coverage wil
> > >
> > > MACS2 coverage is very simple, each 5' is extended 100bp (+/-50bp).
> > > Genrich coverage is evaluated in a more subtle way: if the fragment length is above 100 (the expension size), the coverage will be each 5' extended 100bp (+/-50bp), but if it is less, the coverage will be between each 5' extended 50bp (-50bp - fragment size - + 50bp):
-> > > ![macs2 vs genrich](../../images/atac-seq/Screenshot_macs2vsGenrich.png "macs2 vs genrich coverage")
+> > > ![MACS2 vs Genrich](../../images/atac-seq/Screenshot_macs2vsGenrich.png "MACS2 vs Genrich coverage")
> > > In this example, we see on the left a pair with a long fragment size: both algorithm behave the same.
-> > > On the left a pair with a short fragment size: Genrich reports only one interval joining both extremities wheareas macs2 will still report 2 intervals even if they overlap.
+> > > On the left a pair with a short fragment size: Genrich reports only one interval joining both extremities wheareas MACS2 will still report 2 intervals even if they overlap.
> > {: .tip}
> {: .solution}
>
@@ -697,49 +724,48 @@ We will now generate a heatmap. Each line will be a transcript. The coverage wil
> - *"Plot title"*: `Coverage from Genrich (extended +/-50bp)`
> - {% icon param-file %} *"Track file bigwig format"*: Select the output of **Wig/BedGraph-to-bigWig** {% icon tool %} called `Genrich bigwig`.
> - *"Color of track"*: Select the color of your choice
+> - *"Minimum value"*: 0
> - *"height"*: `5`
> - *"Show visualization of data range"*: `Yes`
-> - *"Include spacer at the end of the track"*: `0.5`
> - {% icon param-repeat %} *"Insert Include tracks in your plot"*
-> - *"Choose style of the track"*: `Gene track / Bed track`
+> - *"Choose style of the track"*: `NarrowPeak track`
> - *"Plot title"*: `Peaks from Genrich (extended +/-50bp)`
-> - {% icon param-file %} *"Track file bed format"*: Select the output of **Genrich** {% icon tool %} (the one you converted from encodepeak to bed).
+> - {% icon param-file %} *"Track file bed format"*: Select the output of **Genrich** {% icon tool %}.
> - *"Color of track"*: Select the color of your choice
-> - *"height"*: `3`
-> - *"Plot labels"*: `No`
-> - *"Include spacer at the end of the track"*: `0.5`
+> - *"display to use"*: `box: Draw a box`
+> - *"Plot labels (name, p-val, q-val)"*: `No`
> - {% icon param-repeat %} *"Insert Include tracks in your plot"*
> - *"Choose style of the track"*: `Bigwig track `
-> - *"Plot title"*: `Coverage from macs2 (extended +/-50bp)`
+> - *"Plot title"*: `Coverage from MACS2 (extended +/-50bp)`
> - {% icon param-file %} *"Track file bigwig format"*: Select the output of **Wig/BedGraph-to-bigWig** {% icon tool %} called `MACS2 bigwig`.
> - *"Color of track"*: Select the color of your choice
+> - *"Minimum value"*: 0
> - *"height"*: `5`
> - *"Show visualization of data range"*: `Yes`
-> - *"Include spacer at the end of the track"*: `0.5`
> - {% icon param-repeat %} *"Insert Include tracks in your plot"*
-> - *"Choose style of the track"*: `Gene track / Bed track`
-> - *"Plot title"*: `Peaks from macs2 (extended +/-50bp)`
+> - *"Choose style of the track"*: `NarrowPeak track`
+> - *"Plot title"*: `Peaks from MACS2 (extended +/-50bp)`
> - {% icon param-file %} *"Track file bed format"*: Select the output of **MACS2** {% icon tool %} (narrow Peaks).
> - *"Color of track"*: Select the color of your choice
-> - *"height"*: `3`
-> - *"Plot labels"*: `No`
-> - *"Include spacer at the end of the track"*: `0.5`
+> - *"display to use"*: `box: Draw a box`
+> - *"Plot labels (name, p-val, q-val)"*: `No`
> - {% icon param-repeat %} *"Insert Include tracks in your plot"*
> - *"Choose style of the track"*: `Gene track / Bed track`
> - *"Plot title"*: `Genes`
> - {% icon param-file %} *"Track file bed format"*: `chr22 genes`
> - *"Color of track"*: Select the color of your choice
> - *"height"*: `5`
-> - *"Include spacer at the end of the track"*: `0.5`
+> - *"Put all labels inside the plotted region"*: `Yes`
+> - *"Allow to put labels in the right margin"*: `Yes`
> - {% icon param-repeat %} *"Insert Include tracks in your plot"*
-> - *"Choose style of the track"*: `Gene track / Bed track`
+> - *"Choose style of the track"*: `NarrowPeak track`
> - *"Plot title"*: `CTCF peaks`
> - {% icon param-file %} *"Track file bed format"*: Select the dataset `bedtools SortBED of ENCFF933NTR.bed.gz`
> - *"Color of track"*: Select the color of your choice
-> - *"Plot labels"*: `No`
-> - *"Include spacer at the end of the track"*: `0.5`
-> - *"Configure x-axis"*: `Yes`
-> - *"Where to place the x-axis"*: `Bottom`
+> - *"display to use"*: `box: Draw a box`
+> - *"Plot labels (name, p-val, q-val)"*: `No`
+> - {% icon param-repeat %} *"Insert Include tracks in your plot"*
+> - *"Choose style of the track"*: `X-axis`
>
> 2. Click on the {% icon galaxy-eye %} (eye) icon of the output.
>
@@ -756,7 +782,7 @@ Unfortunately, Genrich does not work very well with our small training dataset (
> ### {% icon question %} Questions
-> In the ATAC-Seq sample in this selected region we see four peaks detected by Genrich.
+> In the ATAC-Seq sample in this selected region we see four peaks detected by Genrich and MACS2.
>
> 1. How many TSS are accessible in the sample in the displayed region?
> 2. How many CTCF binding loci are accessible?
@@ -774,9 +800,14 @@ Unfortunately, Genrich does not work very well with our small training dataset (
>
{: .question}
-We can see that in this region both peak calling perform the same. However, when zooming out, we see that macs2 is more sensitive:
+We can see that in this region both peak calling perform the same. However, when zooming out, we see that MACS2 is more sensitive:
![pyGenomeTracks output for 20 million of pairs on the whole genome zoom out](../../images/atac-seq/pyGenomeTracksOutput_20M_zo.png "pyGenomeTracks output for 20 million of pairs on the whole genome zoom out").
+When the number of reads increases, the number of peaks with MACS2 increases but the number of peaks with Genrich decreases:
+![pyGenomeTracks output for 100 million of pairs on the whole genome zoom out](../../images/atac-seq/pyGenomeTracksOutput_100M_zo.png "pyGenomeTracks output for 100 million of pairs on the whole genome zoom out").
+![pyGenomeTracks output for 200 million of pairs on the whole genome zoom out](../../images/atac-seq/pyGenomeTracksOutput_200M_zo.png "pyGenomeTracks output for 200 million of pairs on the whole genome zoom out").
+
+
As CTCF binds so ubiquitously and by itself can displace the nucleosome creating accessible regions, a region containing a peak with no corresponding CTCF peak or TSS could be a putative enhancer. In the pyGenomeTracks plot we see a region like this located in the intron of a gene and another one between genes. However, it is impossible to guess from the position which would be the gene controlled by this region. And of course, more analyses are needed to assess if it is a real enhancer, for example, histone ChIP-seq, 3D structure, transgenic assay, etc.
diff --git a/topics/epigenetics/tutorials/atac-seq/workflows/main_workflow.ga b/topics/epigenetics/tutorials/atac-seq/workflows/main_workflow.ga
index 86307fdb4ea339..665cf7efce487b 100644
--- a/topics/epigenetics/tutorials/atac-seq/workflows/main_workflow.ga
+++ b/topics/epigenetics/tutorials/atac-seq/workflows/main_workflow.ga
@@ -1,1218 +1,1462 @@
{
- "a_galaxy_workflow": "true",
- "annotation": "ATAC-Seq data analysis",
- "format-version": "0.1",
- "name": "ATAC Seq GTM",
- "steps": {
- "0": {
- "annotation": "",
- "content_id": null,
- "errors": null,
- "id": 0,
- "input_connections": {},
- "inputs": [],
- "label": "R1 read in fastqsanger(.gz) format",
- "name": "Input dataset",
- "outputs": [],
- "position": {
- "left": 200,
- "top": 200
- },
- "tool_id": null,
- "tool_state": "{}",
- "tool_version": null,
- "type": "data_input",
- "uuid": "f53186d1-9b90-4c78-b89b-7203db2aae69",
- "workflow_outputs": [
- {
- "label": null,
- "output_name": "output",
- "uuid": "87a85c2b-56ae-4c70-9131-8dbb11e8a80d"
- }
- ]
- },
- "1": {
- "annotation": "",
- "content_id": null,
- "errors": null,
- "id": 1,
- "input_connections": {},
- "inputs": [],
- "label": "R2 read in fastqsanger(.gz) format",
- "name": "Input dataset",
- "outputs": [],
- "position": {
- "left": 200,
- "top": 310
- },
- "tool_id": null,
- "tool_state": "{}",
- "tool_version": null,
- "type": "data_input",
- "uuid": "816716b8-7db3-4d05-8ef2-dfa0f8c8c264",
- "workflow_outputs": [
- {
- "label": null,
- "output_name": "output",
- "uuid": "c9f818da-3739-40de-bc6e-2ca06a27f544"
- }
- ]
- },
- "10": {
- "annotation": "",
- "content_id": "toolshed.g2.bx.psu.edu/repos/devteam/picard/picard_MarkDuplicates/2.18.2.2",
- "errors": null,
- "id": 10,
- "input_connections": {
- "inputFile": {
- "id": 9,
- "output_name": "out_file1"
- }
- },
- "inputs": [],
- "label": null,
- "name": "MarkDuplicates",
- "outputs": [
- {
- "name": "metrics_file",
- "type": "txt"
- },
- {
- "name": "outFile",
- "type": "bam"
- }
- ],
- "position": {
- "left": 1444,
- "top": 200
- },
- "post_job_actions": {
- "RenameDatasetActionoutFile": {
- "action_arguments": {
- "newname": "#{inputFile} without duplicates"
- },
- "action_type": "RenameDatasetAction",
- "output_name": "outFile"
- }
- },
- "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/picard/picard_MarkDuplicates/2.18.2.2",
- "tool_shed_repository": {
- "changeset_revision": "7d34178f2812",
- "name": "picard",
- "owner": "devteam",
- "tool_shed": "toolshed.g2.bx.psu.edu"
- },
- "tool_state": "{\"duplicate_scoring_strategy\": \"\\\"SUM_OF_BASE_QUALITIES\\\"\", \"remove_duplicates\": \"\\\"true\\\"\", \"read_name_regex\": \"\\\"\\\"\", \"barcode_tag\": \"\\\"\\\"\", \"__page__\": null, \"__rerun_remap_job_id__\": null, \"optical_duplicate_pixel_distance\": \"\\\"100\\\"\", \"comments\": \"[]\", \"assume_sorted\": \"\\\"true\\\"\", \"validation_stringency\": \"\\\"LENIENT\\\"\", \"inputFile\": \"{\\\"__class__\\\": \\\"ConnectedValue\\\"}\"}",
- "tool_version": "2.18.2.2",
- "type": "tool",
- "uuid": "83e539cb-b708-4c85-8298-3f5bdb4ef0ff",
- "workflow_outputs": [
- {
- "label": null,
- "output_name": "outFile",
- "uuid": "504761e2-71eb-4054-a0bb-ce4fe2776687"
- },
- {
- "label": null,
- "output_name": "metrics_file",
- "uuid": "f1f4bba5-9792-496a-9c40-8a3515c552b8"
- }
- ]
- },
- "11": {
- "annotation": "",
- "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/bedtools/bedtools_bamtobed/2.29.0",
- "errors": null,
- "id": 11,
- "input_connections": {
- "input": {
- "id": 10,
- "output_name": "outFile"
- }
- },
- "inputs": [],
- "label": null,
- "name": "bedtools BAM to BED",
- "outputs": [
- {
- "name": "output",
- "type": "bed"
- }
- ],
- "position": {
- "left": 1772,
- "top": 200
- },
- "post_job_actions": {},
- "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/bedtools/bedtools_bamtobed/2.29.0",
- "tool_shed_repository": {
- "changeset_revision": "b28e0cfa7ba1",
- "name": "bedtools",
- "owner": "iuc",
- "tool_shed": "toolshed.g2.bx.psu.edu"
- },
- "tool_state": "{\"__page__\": null, \"option\": \"\\\"\\\"\", \"__rerun_remap_job_id__\": null, \"tag\": \"\\\"\\\"\", \"split\": \"\\\"false\\\"\", \"ed_score\": \"\\\"false\\\"\", \"input\": \"{\\\"__class__\\\": \\\"ConnectedValue\\\"}\"}",
- "tool_version": "2.29.0",
- "type": "tool",
- "uuid": "ad8b182f-608d-4223-91d0-06fc872a28ba",
- "workflow_outputs": [
- {
- "label": null,
- "output_name": "output",
- "uuid": "e0e48546-7343-4bf7-b7ce-a954a866bc8a"
- }
- ]
- },
- "12": {
- "annotation": "",
- "content_id": "toolshed.g2.bx.psu.edu/repos/devteam/picard/picard_CollectInsertSizeMetrics/2.18.2.1",
- "errors": null,
- "id": 12,
- "input_connections": {
- "inputFile": {
- "id": 10,
- "output_name": "outFile"
- }
- },
- "inputs": [],
- "label": null,
- "name": "CollectInsertSizeMetrics",
- "outputs": [
- {
- "name": "outFile",
- "type": "tabular"
- },
- {
- "name": "histFile",
- "type": "pdf"
- }
- ],
- "position": {
- "left": 1772,
- "top": 342
- },
- "post_job_actions": {
- "HideDatasetActionoutFile": {
- "action_arguments": {},
- "action_type": "HideDatasetAction",
- "output_name": "outFile"
- },
- "RenameDatasetActionhistFile": {
- "action_arguments": {
- "newname": "fragment size plot of #{inputFile}"
- },
- "action_type": "RenameDatasetAction",
- "output_name": "histFile"
- },
- "RenameDatasetActionoutFile": {
- "action_arguments": {
- "newname": "fragment size data of #{inputFile}"
- },
- "action_type": "RenameDatasetAction",
- "output_name": "outFile"
- }
- },
- "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/picard/picard_CollectInsertSizeMetrics/2.18.2.1",
- "tool_shed_repository": {
- "changeset_revision": "f6ced08779c4",
- "name": "picard",
- "owner": "devteam",
- "tool_shed": "toolshed.g2.bx.psu.edu"
- },
- "tool_state": "{\"__page__\": null, \"deviations\": \"\\\"10.0\\\"\", \"__rerun_remap_job_id__\": null, \"hist_width\": \"\\\"\\\"\", \"assume_sorted\": \"\\\"true\\\"\", \"metric_accumulation_level\": \"[\\\"ALL_READS\\\"]\", \"validation_stringency\": \"\\\"LENIENT\\\"\", \"reference_source\": \"{\\\"__current_case__\\\": 0, \\\"ref_file\\\": \\\"hg38\\\", \\\"reference_source_selector\\\": \\\"cached\\\"}\", \"min_pct\": \"\\\"0.05\\\"\", \"inputFile\": \"{\\\"__class__\\\": \\\"ConnectedValue\\\"}\"}",
- "tool_version": "2.18.2.1",
- "type": "tool",
- "uuid": "29f106aa-9c3c-4148-ac6f-5c29537ebefc",
- "workflow_outputs": [
- {
- "label": null,
- "output_name": "histFile",
- "uuid": "35615e33-2c89-4a2d-8841-ad2b6e71bdb6"
- }
- ]
- },
- "13": {
- "annotation": "",
- "content_id": "toolshed.g2.bx.psu.edu/repos/devteam/samtools_sort/samtools_sort/2.0.2",
- "errors": null,
- "id": 13,
- "input_connections": {
- "input1": {
- "id": 10,
- "output_name": "outFile"
- }
- },
- "inputs": [],
- "label": null,
- "name": "Samtools sort",
- "outputs": [
- {
- "name": "output1",
- "type": "bam"
- }
- ],
- "position": {
- "left": 1772,
- "top": 514
- },
- "post_job_actions": {},
- "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/samtools_sort/samtools_sort/2.0.2",
- "tool_shed_repository": {
- "changeset_revision": "f56bdb93ae58",
- "name": "samtools_sort",
- "owner": "devteam",
- "tool_shed": "toolshed.g2.bx.psu.edu"
- },
- "tool_state": "{\"__page__\": null, \"__rerun_remap_job_id__\": null, \"input1\": \"{\\\"__class__\\\": \\\"ConnectedValue\\\"}\", \"prim_key_cond\": \"{\\\"__current_case__\\\": 1, \\\"prim_key_select\\\": \\\"-n\\\"}\"}",
- "tool_version": "2.0.2",
- "type": "tool",
- "uuid": "2ad41cb6-0775-4136-b7c7-04c046b64f92",
- "workflow_outputs": [
- {
- "label": null,
- "output_name": "output1",
- "uuid": "d3d95f19-b68a-4fa3-945d-097554ab79b7"
- }
- ]
- },
- "14": {
- "annotation": "",
- "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/macs2/macs2_callpeak/2.1.1.20160309.6",
- "errors": null,
- "id": 14,
- "input_connections": {
- "treatment|input_treatment_file": {
- "id": 11,
- "output_name": "output"
- }
- },
- "inputs": [],
- "label": null,
- "name": "MACS2 callpeak",
- "outputs": [
- {
- "name": "output_tabular",
- "type": "tabular"
- },
- {
- "name": "output_narrowpeaks",
- "type": "bed"
- },
- {
- "name": "output_summits",
- "type": "bed"
- },
- {
- "name": "output_treat_pileup",
- "type": "bedgraph"
- },
- {
- "name": "output_control_lambda",
- "type": "bedgraph"
- }
- ],
- "position": {
- "left": 2100,
- "top": 200
- },
- "post_job_actions": {
- "HideDatasetActionoutput_control_lambda": {
- "action_arguments": {},
- "action_type": "HideDatasetAction",
- "output_name": "output_control_lambda"
- }
- },
- "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/macs2/macs2_callpeak/2.1.1.20160309.6",
- "tool_shed_repository": {
- "changeset_revision": "424aefbd7777",
- "name": "macs2",
- "owner": "iuc",
- "tool_shed": "toolshed.g2.bx.psu.edu"
- },
- "tool_state": "{\"control\": \"{\\\"__current_case__\\\": 1, \\\"c_select\\\": \\\"No\\\"}\", \"__page__\": null, \"effective_genome_size_options\": \"{\\\"__current_case__\\\": 0, \\\"effective_genome_size_options_selector\\\": \\\"2700000000\\\"}\", \"format\": \"\\\"BED\\\"\", \"outputs\": \"[\\\"peaks_tabular\\\", \\\"summits\\\", \\\"bdg\\\"]\", \"__rerun_remap_job_id__\": null, \"cutoff_options\": \"{\\\"__current_case__\\\": 1, \\\"cutoff_options_selector\\\": \\\"qvalue\\\", \\\"qvalue\\\": \\\"0.05\\\"}\", \"advanced_options\": \"{\\\"broad_options\\\": {\\\"__current_case__\\\": 1, \\\"broad_options_selector\\\": \\\"nobroad\\\", \\\"call_summits\\\": \\\"false\\\"}, \\\"keep_dup_options\\\": {\\\"__current_case__\\\": 1, \\\"keep_dup_options_selector\\\": \\\"1\\\"}, \\\"llocal\\\": \\\"\\\", \\\"nolambda\\\": \\\"false\\\", \\\"ratio\\\": \\\"\\\", \\\"slocal\\\": \\\"\\\", \\\"spmr\\\": \\\"false\\\", \\\"to_large\\\": \\\"false\\\"}\", \"treatment\": \"{\\\"__current_case__\\\": 0, \\\"input_treatment_file\\\": {\\\"__class__\\\": \\\"ConnectedValue\\\"}, \\\"t_multi_select\\\": \\\"No\\\"}\", \"nomodel_type\": \"{\\\"__current_case__\\\": 1, \\\"extsize\\\": \\\"100\\\", \\\"nomodel_type_selector\\\": \\\"nomodel\\\", \\\"shift\\\": \\\"-50\\\"}\"}",
- "tool_version": "2.1.1.20160309.6",
- "type": "tool",
- "uuid": "c99eaf11-db02-43ad-954f-df1014ad16d7",
- "workflow_outputs": [
- {
- "label": null,
- "output_name": "output_treat_pileup",
- "uuid": "a0338e16-0cae-4426-9fb9-4f77832e2a6f"
- },
- {
- "label": null,
- "output_name": "output_tabular",
- "uuid": "d20a6ac0-de44-4474-b6f4-f53c615afb68"
- },
- {
- "label": null,
- "output_name": "output_summits",
- "uuid": "643150d6-1bfc-4e60-ae09-df831ada70e6"
- },
- {
- "label": null,
- "output_name": "output_narrowpeaks",
- "uuid": "cdd6e843-22c0-40ff-be16-ce8490317625"
- }
- ]
- },
- "15": {
- "annotation": "",
- "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/genrich/genrich/0.5+galaxy1",
- "errors": null,
- "id": 15,
- "input_connections": {
- "treatment|input_treatment_file": {
- "id": 13,
- "output_name": "output1"
- }
- },
- "inputs": [],
- "label": null,
- "name": "Genrich",
- "outputs": [
- {
- "name": "outfile",
- "type": "encodepeak"
- },
- {
- "name": "out_bedgraph2",
- "type": "bedgraph"
- },
- {
- "name": "out_dups",
- "type": "txt"
- }
- ],
- "position": {
- "left": 2100,
- "top": 526
- },
- "post_job_actions": {
- "ChangeDatatypeActionoutfile": {
- "action_arguments": {
- "newtype": "bed"
- },
- "action_type": "ChangeDatatypeAction",
- "output_name": "outfile"
- }
- },
- "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/genrich/genrich/0.5+galaxy1",
- "tool_shed_repository": {
- "changeset_revision": "db50f51a2952",
- "name": "genrich",
- "owner": "iuc",
- "tool_shed": "toolshed.g2.bx.psu.edu"
- },
- "tool_state": "{\"control\": \"{\\\"__current_case__\\\": 1, \\\"c_select\\\": \\\"No\\\"}\", \"__page__\": null, \"output_options\": \"{\\\"bed\\\": \\\"false\\\", \\\"bedgraph1\\\": \\\"false\\\", \\\"bedgraph2\\\": \\\"true\\\"}\", \"__rerun_remap_job_id__\": null, \"other_options\": \"{\\\"skip_peak_calling\\\": \\\"false\\\"}\", \"filter_options\": \"{\\\"alignment_lengths\\\": \\\"\\\", \\\"alignment_lengths2\\\": \\\"false\\\", \\\"alignment_score\\\": \\\"0.0\\\", \\\"duplicates\\\": \\\"true\\\", \\\"exclude_chr\\\": \\\"\\\", \\\"min_mapq\\\": \\\"0\\\", \\\"unpaired\\\": \\\"false\\\"}\", \"peakcalling_options\": \"{\\\"max_dist\\\": \\\"100\\\", \\\"max_p\\\": \\\"\\\", \\\"max_q\\\": \\\"0.05\\\", \\\"min_auc\\\": \\\"20.0\\\", \\\"min_peak_length\\\": \\\"0\\\"}\", \"atac_options\": \"{\\\"atac\\\": \\\"true\\\", \\\"expand_sites\\\": \\\"100\\\"}\", \"treatment\": \"{\\\"__current_case__\\\": 0, \\\"input_treatment_file\\\": {\\\"__class__\\\": \\\"ConnectedValue\\\"}, \\\"t_multi_select\\\": \\\"No\\\"}\", \"cond_exclude\": \"{\\\"__current_case__\\\": 0, \\\"exclude_select\\\": \\\"No\\\"}\"}",
- "tool_version": "0.5+galaxy1",
- "type": "tool",
- "uuid": "589a0371-38f1-4c8e-bb0b-90a43d8a272b",
- "workflow_outputs": [
- {
- "label": null,
- "output_name": "outfile",
- "uuid": "2c6d1c46-fe27-4969-9204-81197de54de8"
- },
- {
- "label": null,
- "output_name": "out_bedgraph2",
- "uuid": "a482ef07-2e93-46c2-adee-bba3d461d85c"
- },
- {
- "label": null,
- "output_name": "out_dups",
- "uuid": "09e2d5d2-707a-4ec4-80b8-8b63cb336f6f"
- }
- ]
- },
- "16": {
- "annotation": "",
- "content_id": "wig_to_bigWig",
- "errors": null,
- "id": 16,
- "input_connections": {
- "input1": {
- "id": 14,
- "output_name": "output_treat_pileup"
- }
- },
- "inputs": [],
- "label": null,
- "name": "Wig/BedGraph-to-bigWig",
- "outputs": [
- {
- "name": "out_file1",
- "type": "bigwig"
- }
- ],
- "position": {
- "left": 2428,
- "top": 200
- },
- "post_job_actions": {
- "RenameDatasetActionout_file1": {
- "action_arguments": {
- "newname": "macs2 bigwig"
- },
- "action_type": "RenameDatasetAction",
- "output_name": "out_file1"
- }
- },
- "tool_id": "wig_to_bigWig",
- "tool_state": "{\"__page__\": null, \"__rerun_remap_job_id__\": null, \"input1\": \"{\\\"__class__\\\": \\\"ConnectedValue\\\"}\", \"settings\": \"{\\\"__current_case__\\\": 0, \\\"settingsType\\\": \\\"preset\\\"}\"}",
- "tool_version": "1.1.1",
- "type": "tool",
- "uuid": "abc9c8f4-e0ef-49a2-b538-3b1bb832ad82",
- "workflow_outputs": [
- {
- "label": null,
- "output_name": "out_file1",
- "uuid": "81376180-e079-44c3-bd65-12f9b699133c"
- }
- ]
- },
- "17": {
- "annotation": "",
- "content_id": "Remove beginning1",
- "errors": null,
- "id": 17,
- "input_connections": {
- "input": {
- "id": 15,
- "output_name": "out_bedgraph2"
- }
- },
- "inputs": [
- {
- "description": "runtime parameter for tool Remove beginning",
- "name": "input"
- }
- ],
- "label": null,
- "name": "Remove beginning",
- "outputs": [
- {
- "name": "out_file1",
- "type": "input"
- }
- ],
- "position": {
- "left": 2441,
- "top": 639
- },
- "post_job_actions": {},
- "tool_id": "Remove beginning1",
- "tool_state": "{\"input\": \"{\\\"__class__\\\": \\\"RuntimeValue\\\"}\", \"__rerun_remap_job_id__\": null, \"num_lines\": \"\\\"2\\\"\", \"__page__\": null}",
- "tool_version": "1.0.0",
- "type": "tool",
- "uuid": "6f028851-99a2-4296-8cd1-ccae2872db3c",
- "workflow_outputs": [
- {
- "label": null,
- "output_name": "out_file1",
- "uuid": "a1d7e8c9-5809-438e-b3f5-fe9951f89243"
- }
- ]
- },
- "18": {
- "annotation": "",
- "content_id": "Cut1",
- "errors": null,
- "id": 18,
- "input_connections": {
- "input": {
- "id": 17,
- "output_name": "out_file1"
- }
- },
- "inputs": [
- {
- "description": "runtime parameter for tool Cut",
- "name": "input"
- }
- ],
- "label": null,
- "name": "Cut",
- "outputs": [
- {
- "name": "out_file1",
- "type": "tabular"
- }
- ],
- "position": {
- "left": 2741,
- "top": 624
- },
- "post_job_actions": {
- "ChangeDatatypeActionout_file1": {
- "action_arguments": {
- "newtype": "bedgraph"
- },
- "action_type": "ChangeDatatypeAction",
- "output_name": "out_file1"
- }
- },
- "tool_id": "Cut1",
- "tool_state": "{\"columnList\": \"\\\"c1,c2,c3,c4\\\"\", \"input\": \"{\\\"__class__\\\": \\\"RuntimeValue\\\"}\", \"delimiter\": \"\\\"T\\\"\", \"__rerun_remap_job_id__\": null, \"__page__\": null}",
- "tool_version": "1.0.2",
- "type": "tool",
- "uuid": "d52cda14-0fe3-46cb-9ba4-1bffbb8133b3",
- "workflow_outputs": [
- {
- "label": null,
- "output_name": "out_file1",
- "uuid": "a8f30404-03da-450a-b471-28817e68f1e9"
- }
- ]
- },
- "19": {
- "annotation": "",
- "content_id": "wig_to_bigWig",
- "errors": null,
- "id": 19,
- "input_connections": {
- "input1": {
- "id": 18,
- "output_name": "out_file1"
- }
- },
- "inputs": [],
- "label": null,
- "name": "Wig/BedGraph-to-bigWig",
- "outputs": [
- {
- "name": "out_file1",
- "type": "bigwig"
- }
- ],
- "position": {
- "left": 2995,
- "top": 612
- },
- "post_job_actions": {
- "RenameDatasetActionout_file1": {
- "action_arguments": {
- "newname": "Genrich bigwig"
- },
- "action_type": "RenameDatasetAction",
- "output_name": "out_file1"
- }
- },
- "tool_id": "wig_to_bigWig",
- "tool_state": "{\"__page__\": null, \"__rerun_remap_job_id__\": null, \"input1\": \"{\\\"__class__\\\": \\\"ConnectedValue\\\"}\", \"settings\": \"{\\\"__current_case__\\\": 0, \\\"settingsType\\\": \\\"preset\\\"}\"}",
- "tool_version": "1.1.1",
- "type": "tool",
- "uuid": "4b5a7ea3-7eaf-4f0c-9fba-0ba8475c09bd",
- "workflow_outputs": [
- {
- "label": null,
- "output_name": "out_file1",
- "uuid": "c3f9f58b-2051-4d03-a726-ce9a9bb77385"
- }
- ]
- },
- "2": {
- "annotation": "",
- "content_id": null,
- "errors": null,
- "id": 2,
- "input_connections": {},
- "inputs": [],
- "label": "bed file with genes",
- "name": "Input dataset",
- "outputs": [],
- "position": {
- "left": 200,
- "top": 420
- },
- "tool_id": null,
- "tool_state": "{}",
- "tool_version": null,
- "type": "data_input",
- "uuid": "e98dd9bc-ab4d-49a1-a176-11b5c78f59f7",
- "workflow_outputs": [
- {
- "label": null,
- "output_name": "output",
- "uuid": "5e8336c8-1d02-4884-acb2-957fd52118b8"
- }
- ]
- },
- "20": {
- "annotation": "",
- "content_id": "toolshed.g2.bx.psu.edu/repos/bgruening/deeptools_compute_matrix/deeptools_compute_matrix/3.0.2.0",
- "errors": null,
- "id": 20,
- "input_connections": {
- "multibigwig_conditional|bigwigfiles": [
- {
- "id": 19,
- "output_name": "out_file1"
- },
- {
- "id": 16,
- "output_name": "out_file1"
- }
- ],
- "regionsFiles_0|regionsFile": {
- "id": 2,
- "output_name": "output"
- }
- },
- "inputs": [
- {
- "description": "runtime parameter for tool computeMatrix",
- "name": "advancedOpt"
- }
- ],
- "label": null,
- "name": "computeMatrix",
- "outputs": [
- {
- "name": "outFileName",
- "type": "deeptools_compute_matrix_archive"
- }
- ],
- "position": {
- "left": 3338,
- "top": 200
- },
- "post_job_actions": {},
- "tool_id": "toolshed.g2.bx.psu.edu/repos/bgruening/deeptools_compute_matrix/deeptools_compute_matrix/3.0.2.0",
- "tool_shed_repository": {
- "changeset_revision": "fb9cf9c97ec4",
- "name": "deeptools_compute_matrix",
- "owner": "bgruening",
- "tool_shed": "toolshed.g2.bx.psu.edu"
- },
- "tool_state": "{\"__page__\": null, \"__rerun_remap_job_id__\": null, \"multibigwig_conditional\": \"{\\\"__current_case__\\\": 0, \\\"bigwigfiles\\\": {\\\"__class__\\\": \\\"ConnectedValue\\\"}, \\\"orderMatters\\\": \\\"No\\\"}\", \"mode\": \"{\\\"__current_case__\\\": 1, \\\"afterRegionStartLength\\\": \\\"1000\\\", \\\"beforeRegionStartLength\\\": \\\"1000\\\", \\\"mode_select\\\": \\\"reference-point\\\", \\\"nanAfterEnd\\\": \\\"false\\\", \\\"referencePoint\\\": \\\"TSS\\\"}\", \"output\": \"{\\\"__current_case__\\\": 0, \\\"showOutputSettings\\\": \\\"no\\\"}\", \"advancedOpt\": \"{\\\"__current_case__\\\": 1, \\\"averageTypeBins\\\": \\\"mean\\\", \\\"binSize\\\": \\\"50\\\", \\\"blackListFileName\\\": {\\\"__class__\\\": \\\"RuntimeValue\\\"}, \\\"exonID\\\": \\\"exon\\\", \\\"maxThreshold\\\": \\\"\\\", \\\"metagene\\\": \\\"false\\\", \\\"minThreshold\\\": \\\"\\\", \\\"missingDataAsZero\\\": \\\"true\\\", \\\"samplesLabel\\\": \\\"\\\", \\\"scale\\\": \\\"\\\", \\\"showAdvancedOpt\\\": \\\"yes\\\", \\\"skipZeros\\\": \\\"false\\\", \\\"sortRegions\\\": \\\"keep\\\", \\\"sortUsing\\\": \\\"mean\\\", \\\"transcriptID\\\": \\\"transcript\\\", \\\"transcript_id_designator\\\": \\\"transcript_id\\\"}\", \"regionsFiles\": \"[{\\\"__index__\\\": 0, \\\"regionsFile\\\": {\\\"__class__\\\": \\\"ConnectedValue\\\"}}]\"}",
- "tool_version": "3.0.2.0",
- "type": "tool",
- "uuid": "eef43402-ee63-4496-afb3-33cf9ec08cc0",
- "workflow_outputs": [
- {
- "label": null,
- "output_name": "outFileName",
- "uuid": "6cfa392a-bb20-453b-9606-6f35c38998b7"
- }
- ]
- },
- "21": {
- "annotation": "",
- "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/pygenometracks/pygenomeTracks/3.1.2",
- "errors": null,
- "id": 21,
- "input_connections": {
- "tracks_0|track_file_style_conditional|track_input_bigwig": {
- "id": 19,
- "output_name": "out_file1"
- },
- "tracks_1|track_file_style_conditional|track_input_bed": {
- "id": 15,
- "output_name": "outfile"
- },
- "tracks_2|track_file_style_conditional|track_input_bigwig": {
- "id": 16,
- "output_name": "out_file1"
- },
- "tracks_3|track_file_style_conditional|track_input_bed": {
- "id": 14,
- "output_name": "output_narrowpeaks"
- },
- "tracks_4|track_file_style_conditional|track_input_bed": {
- "id": 6,
- "output_name": "output"
- },
- "tracks_5|track_file_style_conditional|track_input_bed": {
- "id": 7,
- "output_name": "output"
- }
- },
- "inputs": [],
- "label": null,
- "name": "pyGenomeTracks",
- "outputs": [
- {
- "name": "outFileName",
- "type": "png"
- }
- ],
- "position": {
- "left": 3338,
- "top": 464
- },
- "post_job_actions": {},
- "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/pygenometracks/pygenomeTracks/3.1.2",
- "tool_shed_repository": {
- "changeset_revision": "feab9265552d",
- "name": "pygenometracks",
- "owner": "iuc",
- "tool_shed": "toolshed.g2.bx.psu.edu"
- },
- "tool_state": "{\"__page__\": null, \"region\": \"\\\"chr22:21,620,000-21,660,000\\\"\", \"image_file_format\": \"\\\"png\\\"\", \"tracks\": \"[{\\\"__index__\\\": 0, \\\"track_file_style_conditional\\\": {\\\"__current_case__\\\": 5, \\\"alpha\\\": \\\"1.0\\\", \\\"color\\\": \\\"#c0504d\\\", \\\"color_negative\\\": {\\\"__current_case__\\\": 0, \\\"color_negative_select\\\": \\\"no\\\"}, \\\"data_range\\\": \\\"true\\\", \\\"height_bigwig\\\": \\\"5.0\\\", \\\"invert_orientation\\\": \\\"false\\\", \\\"max_value\\\": \\\"\\\", \\\"min_value\\\": \\\"\\\", \\\"overlay_select\\\": \\\"no\\\", \\\"reading_bw_conditional\\\": {\\\"nans_to_zeros\\\": \\\"false\\\", \\\"number_of_bins\\\": \\\"\\\", \\\"summary\\\": \\\"mean\\\"}, \\\"spacer_height\\\": \\\"0.5\\\", \\\"title\\\": \\\"Coverage from Genrich (extended +/-50bp)\\\", \\\"track_file_style_selector\\\": \\\"bigwig_track_option\\\", \\\"track_input_bigwig\\\": {\\\"__class__\\\": \\\"ConnectedValue\\\"}, \\\"type_conditional\\\": {\\\"__current_case__\\\": 2, \\\"type_selector\\\": \\\"fill_option\\\"}}}, {\\\"__index__\\\": 1, \\\"track_file_style_conditional\\\": {\\\"__current_case__\\\": 3, \\\"border_color\\\": \\\"#000000\\\", \\\"color_bed\\\": {\\\"__current_case__\\\": 0, \\\"color\\\": \\\"#c0504d\\\", \\\"color_bed_select\\\": \\\"manually\\\"}, \\\"display\\\": \\\"stacked\\\", \\\"flybase\\\": \\\"true\\\", \\\"fontsize\\\": \\\"\\\", \\\"gene_rows\\\": \\\"\\\", \\\"global_max_row\\\": \\\"false\\\", \\\"height_bed\\\": \\\"3.0\\\", \\\"labels\\\": \\\"false\\\", \\\"spacer_height\\\": \\\"0.5\\\", \\\"title\\\": \\\"Peaks from Genrich (extended +/-50bp)\\\", \\\"track_file_style_selector\\\": \\\"gene_track_option\\\", \\\"track_input_bed\\\": {\\\"__class__\\\": \\\"ConnectedValue\\\"}}}, {\\\"__index__\\\": 2, \\\"track_file_style_conditional\\\": {\\\"__current_case__\\\": 5, \\\"alpha\\\": \\\"1.0\\\", \\\"color\\\": \\\"#00b050\\\", \\\"color_negative\\\": {\\\"__current_case__\\\": 0, \\\"color_negative_select\\\": \\\"no\\\"}, \\\"data_range\\\": \\\"true\\\", \\\"height_bigwig\\\": \\\"5.0\\\", \\\"invert_orientation\\\": \\\"false\\\", \\\"max_value\\\": \\\"\\\", \\\"min_value\\\": \\\"\\\", \\\"overlay_select\\\": \\\"no\\\", \\\"reading_bw_conditional\\\": {\\\"nans_to_zeros\\\": \\\"false\\\", \\\"number_of_bins\\\": \\\"\\\", \\\"summary\\\": \\\"mean\\\"}, \\\"spacer_height\\\": \\\"0.5\\\", \\\"title\\\": \\\"Coverage from macs2 (extended +/-50bp)\\\", \\\"track_file_style_selector\\\": \\\"bigwig_track_option\\\", \\\"track_input_bigwig\\\": {\\\"__class__\\\": \\\"ConnectedValue\\\"}, \\\"type_conditional\\\": {\\\"__current_case__\\\": 2, \\\"type_selector\\\": \\\"fill_option\\\"}}}, {\\\"__index__\\\": 3, \\\"track_file_style_conditional\\\": {\\\"__current_case__\\\": 3, \\\"border_color\\\": \\\"#000000\\\", \\\"color_bed\\\": {\\\"__current_case__\\\": 0, \\\"color\\\": \\\"#00b050\\\", \\\"color_bed_select\\\": \\\"manually\\\"}, \\\"display\\\": \\\"stacked\\\", \\\"flybase\\\": \\\"true\\\", \\\"fontsize\\\": \\\"\\\", \\\"gene_rows\\\": \\\"\\\", \\\"global_max_row\\\": \\\"false\\\", \\\"height_bed\\\": \\\"1.5\\\", \\\"labels\\\": \\\"false\\\", \\\"spacer_height\\\": \\\"0.5\\\", \\\"title\\\": \\\" \\\\tPeaks from macs2 (extended +/-50bp) \\\", \\\"track_file_style_selector\\\": \\\"gene_track_option\\\", \\\"track_input_bed\\\": {\\\"__class__\\\": \\\"ConnectedValue\\\"}}}, {\\\"__index__\\\": 4, \\\"track_file_style_conditional\\\": {\\\"__current_case__\\\": 3, \\\"border_color\\\": \\\"#000000\\\", \\\"color_bed\\\": {\\\"__current_case__\\\": 0, \\\"color\\\": \\\"#000000\\\", \\\"color_bed_select\\\": \\\"manually\\\"}, \\\"display\\\": \\\"stacked\\\", \\\"flybase\\\": \\\"true\\\", \\\"fontsize\\\": \\\"\\\", \\\"gene_rows\\\": \\\"\\\", \\\"global_max_row\\\": \\\"false\\\", \\\"height_bed\\\": \\\"5.0\\\", \\\"labels\\\": \\\"true\\\", \\\"spacer_height\\\": \\\"0.5\\\", \\\"title\\\": \\\"Genes\\\", \\\"track_file_style_selector\\\": \\\"gene_track_option\\\", \\\"track_input_bed\\\": {\\\"__class__\\\": \\\"ConnectedValue\\\"}}}, {\\\"__index__\\\": 5, \\\"track_file_style_conditional\\\": {\\\"__current_case__\\\": 3, \\\"border_color\\\": \\\"#000000\\\", \\\"color_bed\\\": {\\\"__current_case__\\\": 0, \\\"color\\\": \\\"#00b0f0\\\", \\\"color_bed_select\\\": \\\"manually\\\"}, \\\"display\\\": \\\"stacked\\\", \\\"flybase\\\": \\\"true\\\", \\\"fontsize\\\": \\\"\\\", \\\"gene_rows\\\": \\\"\\\", \\\"global_max_row\\\": \\\"false\\\", \\\"height_bed\\\": \\\"1.5\\\", \\\"labels\\\": \\\"false\\\", \\\"spacer_height\\\": \\\"0.5\\\", \\\"title\\\": \\\"CTCF peaks\\\", \\\"track_file_style_selector\\\": \\\"gene_track_option\\\", \\\"track_input_bed\\\": {\\\"__class__\\\": \\\"ConnectedValue\\\"}}}]\", \"__rerun_remap_job_id__\": null, \"x_axis\": \"{\\\"__current_case__\\\": 0, \\\"fontsize\\\": \\\"\\\", \\\"where\\\": \\\"bottom\\\", \\\"x_axis_select\\\": \\\"yes\\\"}\"}",
- "tool_version": "3.1.2",
- "type": "tool",
- "uuid": "ae418b94-d3c4-4bde-88ba-0684cfa6c2c3",
- "workflow_outputs": [
- {
- "label": null,
- "output_name": "outFileName",
- "uuid": "bb9a1996-23ce-4b37-af06-efe2954c8689"
- }
- ]
- },
- "22": {
- "annotation": "",
- "content_id": "toolshed.g2.bx.psu.edu/repos/bgruening/deeptools_plot_heatmap/deeptools_plot_heatmap/3.0.2.0",
- "errors": null,
- "id": 22,
- "input_connections": {
- "matrixFile": {
- "id": 20,
- "output_name": "outFileName"
- }
- },
- "inputs": [],
- "label": null,
- "name": "plotHeatmap",
- "outputs": [
- {
- "name": "outFileName",
- "type": "png"
- }
- ],
- "position": {
- "left": 3666,
- "top": 200
- },
- "post_job_actions": {},
- "tool_id": "toolshed.g2.bx.psu.edu/repos/bgruening/deeptools_plot_heatmap/deeptools_plot_heatmap/3.0.2.0",
- "tool_shed_repository": {
- "changeset_revision": "010e58e9d822",
- "name": "deeptools_plot_heatmap",
- "owner": "bgruening",
- "tool_shed": "toolshed.g2.bx.psu.edu"
- },
- "tool_state": "{\"__page__\": null, \"output\": \"{\\\"__current_case__\\\": 0, \\\"showOutputSettings\\\": \\\"no\\\"}\", \"advancedOpt\": \"{\\\"__current_case__\\\": 0, \\\"showAdvancedOpt\\\": \\\"no\\\"}\", \"__rerun_remap_job_id__\": null, \"matrixFile\": \"{\\\"__class__\\\": \\\"ConnectedValue\\\"}\"}",
- "tool_version": "3.0.2.0",
- "type": "tool",
- "uuid": "656bebc6-0a5f-4735-9424-eda6f89dea22",
- "workflow_outputs": [
- {
- "label": null,
- "output_name": "outFileName",
- "uuid": "64ed7ed5-1b16-4be6-b941-0b723e327733"
- }
- ]
- },
- "3": {
- "annotation": "",
- "content_id": null,
- "errors": null,
- "id": 3,
- "input_connections": {},
- "inputs": [],
- "label": "ctcf peaks",
- "name": "Input dataset",
- "outputs": [],
- "position": {
- "left": 200,
- "top": 530
- },
- "tool_id": null,
- "tool_state": "{}",
- "tool_version": null,
- "type": "data_input",
- "uuid": "afd163ef-195d-44d6-8a80-f4be837d5d43",
- "workflow_outputs": [
- {
- "label": null,
- "output_name": "output",
- "uuid": "80dc3fea-2221-44c9-86b1-f900465caf27"
- }
- ]
- },
- "4": {
- "annotation": "",
- "content_id": "toolshed.g2.bx.psu.edu/repos/devteam/fastqc/fastqc/0.72+galaxy1",
- "errors": null,
- "id": 4,
- "input_connections": {
- "input_file": {
- "id": 0,
- "output_name": "output"
- }
- },
- "inputs": [
- {
- "description": "runtime parameter for tool FastQC",
- "name": "limits"
- },
- {
- "description": "runtime parameter for tool FastQC",
- "name": "contaminants"
- },
- {
- "description": "runtime parameter for tool FastQC",
- "name": "adapters"
- }
- ],
- "label": null,
- "name": "FastQC",
- "outputs": [
- {
- "name": "html_file",
- "type": "html"
- },
- {
- "name": "text_file",
- "type": "txt"
- }
- ],
- "position": {
- "left": 460,
- "top": 494
- },
- "post_job_actions": {
- "RenameDatasetActionhtml_file": {
- "action_arguments": {
- "newname": "FastQC of #{input_file}"
- },
- "action_type": "RenameDatasetAction",
- "output_name": "html_file"
- }
- },
- "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/fastqc/fastqc/0.72+galaxy1",
- "tool_shed_repository": {
- "changeset_revision": "e7b2202befea",
- "name": "fastqc",
- "owner": "devteam",
- "tool_shed": "toolshed.g2.bx.psu.edu"
- },
- "tool_state": "{\"min_length\": \"\\\"\\\"\", \"kmers\": \"\\\"7\\\"\", \"limits\": \"{\\\"__class__\\\": \\\"RuntimeValue\\\"}\", \"input_file\": \"{\\\"__class__\\\": \\\"ConnectedValue\\\"}\", \"__page__\": null, \"__rerun_remap_job_id__\": null, \"contaminants\": \"{\\\"__class__\\\": \\\"RuntimeValue\\\"}\", \"adapters\": \"{\\\"__class__\\\": \\\"RuntimeValue\\\"}\", \"nogroup\": \"\\\"false\\\"\"}",
- "tool_version": "0.72+galaxy1",
- "type": "tool",
- "uuid": "189a22b5-434f-460c-bf5c-321f6daf8d3e",
- "workflow_outputs": [
- {
- "label": null,
- "output_name": "html_file",
- "uuid": "84546090-b1a0-43b3-82e4-d4a27aacc0d0"
- },
- {
- "label": null,
- "output_name": "text_file",
- "uuid": "cf433722-1d9d-4e31-8ecf-43d38bf787ce"
- }
- ]
- },
- "5": {
- "annotation": "",
- "content_id": "toolshed.g2.bx.psu.edu/repos/lparsons/cutadapt/cutadapt/1.16.5",
- "errors": null,
- "id": 5,
- "input_connections": {
- "library|input_1": {
- "id": 0,
- "output_name": "output"
- },
- "library|input_2": {
- "id": 1,
- "output_name": "output"
- }
- },
- "inputs": [],
- "label": "remove nextera adapters with cutadapt",
- "name": "Cutadapt",
- "outputs": [
- {
- "name": "out1",
- "type": "fastqsanger"
- },
- {
- "name": "out2",
- "type": "fastqsanger"
- },
- {
- "name": "report",
- "type": "txt"
- }
- ],
- "position": {
- "left": 460,
- "top": 200
- },
- "post_job_actions": {
- "RenameDatasetActionout1": {
- "action_arguments": {
- "newname": "cutadapt of #{input_1}"
- },
- "action_type": "RenameDatasetAction",
- "output_name": "out1"
- },
- "RenameDatasetActionout2": {
- "action_arguments": {
- "newname": "cutadapt of #{input_2}"
- },
- "action_type": "RenameDatasetAction",
- "output_name": "out2"
- }
- },
- "tool_id": "toolshed.g2.bx.psu.edu/repos/lparsons/cutadapt/cutadapt/1.16.5",
- "tool_shed_repository": {
- "changeset_revision": "49370cb85f0f",
- "name": "cutadapt",
- "owner": "lparsons",
- "tool_shed": "toolshed.g2.bx.psu.edu"
- },
- "tool_state": "{\"__page__\": null, \"output_options\": \"{\\\"info_file\\\": \\\"false\\\", \\\"multiple_output\\\": \\\"false\\\", \\\"report\\\": \\\"true\\\", \\\"rest_file\\\": \\\"false\\\", \\\"too_long_file\\\": \\\"false\\\", \\\"too_short_file\\\": \\\"false\\\", \\\"untrimmed_file\\\": \\\"false\\\", \\\"wildcard_file\\\": \\\"false\\\"}\", \"read_mod_options\": \"{\\\"length\\\": \\\"0\\\", \\\"length_tag\\\": \\\"\\\", \\\"nextseq_trim\\\": \\\"0\\\", \\\"prefix\\\": \\\"\\\", \\\"quality_cutoff\\\": \\\"20\\\", \\\"strip_suffix\\\": \\\"\\\", \\\"suffix\\\": \\\"\\\", \\\"trim_n\\\": \\\"false\\\"}\", \"adapter_options\": \"{\\\"count\\\": \\\"1\\\", \\\"error_rate\\\": \\\"0.1\\\", \\\"mask_adapter\\\": \\\"false\\\", \\\"match_read_wildcards\\\": \\\" \\\", \\\"no_indels\\\": \\\"false\\\", \\\"no_trim\\\": \\\"false\\\", \\\"overlap\\\": \\\"3\\\"}\", \"library\": \"{\\\"__current_case__\\\": 1, \\\"input_1\\\": {\\\"__class__\\\": \\\"ConnectedValue\\\"}, \\\"input_2\\\": {\\\"__class__\\\": \\\"ConnectedValue\\\"}, \\\"r1\\\": {\\\"adapters\\\": [{\\\"__index__\\\": 0, \\\"adapter_source\\\": {\\\"__current_case__\\\": 0, \\\"adapter\\\": \\\"CTGTCTCTTATACACATCTCCGAGCCCACGAGAC\\\", \\\"adapter_name\\\": \\\"Nextera R1\\\", \\\"adapter_source_list\\\": \\\"user\\\"}}], \\\"anywhere_adapters\\\": [], \\\"cut\\\": \\\"0\\\", \\\"front_adapters\\\": []}, \\\"r2\\\": {\\\"adapters2\\\": [{\\\"__index__\\\": 0, \\\"adapter_source2\\\": {\\\"__current_case__\\\": 0, \\\"adapter2\\\": \\\"CTGTCTCTTATACACATCTGACGCTGCCGACGA\\\", \\\"adapter_name2\\\": \\\"Nextera R2\\\", \\\"adapter_source_list2\\\": \\\"user\\\"}}], \\\"anywhere_adapters2\\\": [], \\\"cut2\\\": \\\"0\\\", \\\"front_adapters2\\\": []}, \\\"type\\\": \\\"paired\\\"}\", \"filter_options\": \"{\\\"discard\\\": \\\"false\\\", \\\"discard_untrimmed\\\": \\\"false\\\", \\\"max\\\": \\\"0\\\", \\\"max_n\\\": \\\"\\\", \\\"min\\\": \\\"20\\\", \\\"pair_filter\\\": \\\"any\\\"}\", \"__rerun_remap_job_id__\": null}",
- "tool_version": "1.16.5",
- "type": "tool",
- "uuid": "cf97879c-01f2-4d57-be28-ca10998e10b6",
- "workflow_outputs": [
- {
- "label": null,
- "output_name": "report",
- "uuid": "1cab95b3-54c0-412c-ab28-1278ef04a26f"
- },
- {
- "label": null,
- "output_name": "out1",
- "uuid": "dd4a2ba8-faa2-446c-b9ac-91289ebd73b5"
- },
- {
- "label": null,
- "output_name": "out2",
- "uuid": "c94315ab-825b-4cae-a9ae-839faf90c09f"
- }
- ]
- },
- "6": {
- "annotation": "",
- "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/bedtools/bedtools_sortbed/2.27.1",
- "errors": null,
- "id": 6,
- "input_connections": {
- "input": {
- "id": 2,
- "output_name": "output"
- }
- },
- "inputs": [],
- "label": null,
- "name": "bedtools SortBED",
- "outputs": [
- {
- "name": "output",
- "type": "input"
- }
- ],
- "position": {
- "left": 460,
- "top": 798
- },
- "post_job_actions": {
- "HideDatasetActionoutput": {
- "action_arguments": {},
- "action_type": "HideDatasetAction",
- "output_name": "output"
- }
- },
- "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/bedtools/bedtools_sortbed/2.27.1",
- "tool_shed_repository": {
- "changeset_revision": "87ee588b3d45",
- "name": "bedtools",
- "owner": "iuc",
- "tool_shed": "toolshed.g2.bx.psu.edu"
- },
- "tool_state": "{\"input\": \"{\\\"__class__\\\": \\\"ConnectedValue\\\"}\", \"__rerun_remap_job_id__\": null, \"option\": \"\\\"\\\"\", \"__page__\": null}",
- "tool_version": "2.27.1",
- "type": "tool",
- "uuid": "0188766d-6fb1-49cc-a627-8dba718663d3",
- "workflow_outputs": []
- },
- "7": {
- "annotation": "",
- "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/bedtools/bedtools_sortbed/2.27.1",
- "errors": null,
- "id": 7,
- "input_connections": {
- "input": {
- "id": 3,
- "output_name": "output"
- }
- },
- "inputs": [],
- "label": null,
- "name": "bedtools SortBED",
- "outputs": [
- {
- "name": "output",
- "type": "input"
- }
- ],
- "position": {
- "left": 460,
- "top": 940
- },
- "post_job_actions": {
- "HideDatasetActionoutput": {
- "action_arguments": {},
- "action_type": "HideDatasetAction",
- "output_name": "output"
- }
- },
- "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/bedtools/bedtools_sortbed/2.27.1",
- "tool_shed_repository": {
- "changeset_revision": "87ee588b3d45",
- "name": "bedtools",
- "owner": "iuc",
- "tool_shed": "toolshed.g2.bx.psu.edu"
- },
- "tool_state": "{\"input\": \"{\\\"__class__\\\": \\\"ConnectedValue\\\"}\", \"__rerun_remap_job_id__\": null, \"option\": \"\\\"\\\"\", \"__page__\": null}",
- "tool_version": "2.27.1",
- "type": "tool",
- "uuid": "190fe21a-4738-4be7-aa01-a467355be7c3",
- "workflow_outputs": []
- },
- "8": {
- "annotation": "",
- "content_id": "toolshed.g2.bx.psu.edu/repos/devteam/bowtie2/bowtie2/2.3.4.3",
- "errors": null,
- "id": 8,
- "input_connections": {
- "library|input_1": {
- "id": 5,
- "output_name": "out1"
- },
- "library|input_2": {
- "id": 5,
- "output_name": "out2"
- }
- },
- "inputs": [],
- "label": null,
- "name": "Bowtie2",
- "outputs": [
- {
- "name": "output",
- "type": "bam"
- },
- {
- "name": "mapping_stats",
- "type": "txt"
- }
- ],
- "position": {
- "left": 788,
- "top": 200
- },
- "post_job_actions": {
- "RenameDatasetActionmapping_stats": {
- "action_arguments": {
- "newname": "mapping stats of #{input_1}"
- },
- "action_type": "RenameDatasetAction",
- "output_name": "mapping_stats"
- },
- "RenameDatasetActionoutput": {
- "action_arguments": {
- "newname": "mapping of #{input_1}"
- },
- "action_type": "RenameDatasetAction",
- "output_name": "output"
- }
- },
- "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/bowtie2/bowtie2/2.3.4.3",
- "tool_shed_repository": {
- "changeset_revision": "017aba02828d",
- "name": "bowtie2",
- "owner": "devteam",
- "tool_shed": "toolshed.g2.bx.psu.edu"
- },
- "tool_state": "{\"sam_options\": \"{\\\"__current_case__\\\": 1, \\\"sam_options_selector\\\": \\\"no\\\"}\", \"__page__\": null, \"__rerun_remap_job_id__\": null, \"library\": \"{\\\"__current_case__\\\": 1, \\\"aligned_file\\\": \\\"false\\\", \\\"input_1\\\": {\\\"__class__\\\": \\\"ConnectedValue\\\"}, \\\"input_2\\\": {\\\"__class__\\\": \\\"ConnectedValue\\\"}, \\\"paired_options\\\": {\\\"I\\\": \\\"0\\\", \\\"X\\\": \\\"1000\\\", \\\"__current_case__\\\": 0, \\\"dovetail\\\": \\\"true\\\", \\\"fr_rf_ff\\\": \\\"--fr\\\", \\\"no_contain\\\": \\\"false\\\", \\\"no_discordant\\\": \\\"false\\\", \\\"no_mixed\\\": \\\"false\\\", \\\"no_overlap\\\": \\\"false\\\", \\\"paired_options_selector\\\": \\\"yes\\\"}, \\\"type\\\": \\\"paired\\\", \\\"unaligned_file\\\": \\\"false\\\"}\", \"reference_genome\": \"{\\\"__current_case__\\\": 0, \\\"index\\\": \\\"hg38canon\\\", \\\"source\\\": \\\"indexed\\\"}\", \"rg\": \"{\\\"__current_case__\\\": 3, \\\"rg_selector\\\": \\\"do_not_set\\\"}\", \"save_mapping_stats\": \"\\\"true\\\"\", \"analysis_type\": \"{\\\"__current_case__\\\": 0, \\\"analysis_type_selector\\\": \\\"simple\\\", \\\"presets\\\": \\\"--very-sensitive\\\"}\"}",
- "tool_version": "2.3.4.3",
- "type": "tool",
- "uuid": "d12fa188-7671-4ff7-b13f-9ea2322c9a6a",
- "workflow_outputs": [
- {
- "label": null,
- "output_name": "output",
- "uuid": "1af4d81d-377e-4880-bbac-604b5accce80"
- },
- {
- "label": null,
- "output_name": "mapping_stats",
- "uuid": "3311e54f-3721-44b7-be56-adb2610c8c04"
- }
- ]
- },
- "9": {
- "annotation": "We keep properly paired reads, not on chrM, with MAPQ>=30.",
- "content_id": "toolshed.g2.bx.psu.edu/repos/devteam/bamtools_filter/bamFilter/2.4.1",
- "errors": null,
- "id": 9,
- "input_connections": {
- "input_bam": {
- "id": 8,
- "output_name": "output"
- }
- },
- "inputs": [],
- "label": "Filter BAM",
- "name": "Filter",
- "outputs": [
- {
- "name": "out_file2",
- "type": "txt"
- },
- {
- "name": "out_file1",
- "type": "bam"
- }
- ],
- "position": {
- "left": 1116,
- "top": 200
- },
- "post_job_actions": {
- "RenameDatasetActionout_file1": {
- "action_arguments": {
- "newname": "#{input_bam} filtered"
- },
- "action_type": "RenameDatasetAction",
- "output_name": "out_file1"
- }
- },
- "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/bamtools_filter/bamFilter/2.4.1",
- "tool_shed_repository": {
- "changeset_revision": "4fe8a75334c8",
- "name": "bamtools_filter",
- "owner": "devteam",
- "tool_shed": "toolshed.g2.bx.psu.edu"
- },
- "tool_state": "{\"input_bam\": \"{\\\"__class__\\\": \\\"ConnectedValue\\\"}\", \"__rerun_remap_job_id__\": null, \"rule_configuration\": \"{\\\"__current_case__\\\": 0, \\\"rules_selector\\\": \\\"false\\\"}\", \"conditions\": \"[{\\\"__index__\\\": 0, \\\"filters\\\": [{\\\"__index__\\\": 0, \\\"bam_property\\\": {\\\"__current_case__\\\": 14, \\\"bam_property_selector\\\": \\\"mapQuality\\\", \\\"bam_property_value\\\": \\\">=30\\\"}}, {\\\"__index__\\\": 1, \\\"bam_property\\\": {\\\"__current_case__\\\": 11, \\\"bam_property_selector\\\": \\\"isProperPair\\\", \\\"bam_property_value\\\": \\\"true\\\"}}, {\\\"__index__\\\": 2, \\\"bam_property\\\": {\\\"__current_case__\\\": 20, \\\"bam_property_selector\\\": \\\"reference\\\", \\\"bam_property_value\\\": \\\"!chrM\\\"}}]}]\", \"__page__\": null}",
- "tool_version": "2.4.1",
- "type": "tool",
- "uuid": "9dbe136b-92c2-44b7-ba6d-41b0d4e65f53",
- "workflow_outputs": [
- {
- "label": null,
- "output_name": "out_file2",
- "uuid": "24bcb016-ddbc-4397-b65b-e55f6a6dcf2e"
- },
- {
- "label": null,
- "output_name": "out_file1",
- "uuid": "a7fd4b61-3850-4742-9979-d4022c4c3bf3"
- }
- ]
- }
- },
- "tags": [
- "epigenetics"
- ],
- "uuid": "49c9f3a0-f84e-4e2f-ae9d-2cf0eac9f247",
- "version": 4
+ "a_galaxy_workflow": "true",
+ "annotation": "atac-seq\n",
+ "format-version": "0.1",
+ "name": "ATAC-seq GTM",
+ "steps": {
+ "0": {
+ "annotation": "",
+ "content_id": null,
+ "errors": null,
+ "id": 0,
+ "input_connections": {},
+ "inputs": [],
+ "label": "ATAC R1 read in fastq(.gz) format",
+ "name": "Input dataset",
+ "outputs": [],
+ "position": {
+ "left": 139.921875,
+ "top": 326.671875
+ },
+ "tool_id": null,
+ "tool_state": "{}",
+ "tool_version": null,
+ "type": "data_input",
+ "uuid": "28c9e734-a2c3-4fd8-bb0e-0f7cb07b28c0",
+ "workflow_outputs": [
+ {
+ "label": null,
+ "output_name": "output",
+ "uuid": "88014313-4f9b-478e-ab57-1cab1bade209"
+ }
+ ]
+ },
+ "1": {
+ "annotation": "",
+ "content_id": null,
+ "errors": null,
+ "id": 1,
+ "input_connections": {},
+ "inputs": [],
+ "label": "ATAC R2 read in fastq(.gz) format",
+ "name": "Input dataset",
+ "outputs": [],
+ "position": {
+ "left": 145.328125,
+ "top": 540.890625
+ },
+ "tool_id": null,
+ "tool_state": "{}",
+ "tool_version": null,
+ "type": "data_input",
+ "uuid": "5c751222-c315-4334-806d-450ab3aabf76",
+ "workflow_outputs": [
+ {
+ "label": null,
+ "output_name": "output",
+ "uuid": "11cc13d6-c4f7-4735-a2fd-527fec11738b"
+ }
+ ]
+ },
+ "2": {
+ "annotation": "",
+ "content_id": null,
+ "errors": null,
+ "id": 2,
+ "input_connections": {},
+ "inputs": [],
+ "label": "bed file with genes",
+ "name": "Input dataset",
+ "outputs": [],
+ "position": {
+ "left": 2308.5,
+ "top": 917.96875
+ },
+ "tool_id": null,
+ "tool_state": "{}",
+ "tool_version": null,
+ "type": "data_input",
+ "uuid": "6ee9a913-0bfb-4c69-b90a-279feb412a71",
+ "workflow_outputs": [
+ {
+ "label": null,
+ "output_name": "output",
+ "uuid": "eb1c4a04-7932-4935-ba46-c8c5b04c6c7e"
+ }
+ ]
+ },
+ "3": {
+ "annotation": "",
+ "content_id": null,
+ "errors": null,
+ "id": 3,
+ "input_connections": {},
+ "inputs": [],
+ "label": "ctcf peaks",
+ "name": "Input dataset",
+ "outputs": [],
+ "position": {
+ "left": 2316.59375,
+ "top": 1077.296875
+ },
+ "tool_id": null,
+ "tool_state": "{}",
+ "tool_version": null,
+ "type": "data_input",
+ "uuid": "b356e32f-d1ea-4e32-b607-6d178f184990",
+ "workflow_outputs": [
+ {
+ "label": null,
+ "output_name": "output",
+ "uuid": "b62972fb-70ff-4f11-ae3e-cc74788ab08e"
+ }
+ ]
+ },
+ "4": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/devteam/fastqc/fastqc/0.72+galaxy1",
+ "errors": null,
+ "id": 4,
+ "input_connections": {
+ "input_file": {
+ "id": 0,
+ "output_name": "output"
+ }
+ },
+ "inputs": [
+ {
+ "description": "runtime parameter for tool FastQC",
+ "name": "limits"
+ },
+ {
+ "description": "runtime parameter for tool FastQC",
+ "name": "contaminants"
+ },
+ {
+ "description": "runtime parameter for tool FastQC",
+ "name": "adapters"
+ }
+ ],
+ "label": null,
+ "name": "FastQC",
+ "outputs": [
+ {
+ "name": "html_file",
+ "type": "html"
+ },
+ {
+ "name": "text_file",
+ "type": "txt"
+ }
+ ],
+ "position": {
+ "left": 485.09375,
+ "top": 192.5625
+ },
+ "post_job_actions": {
+ "HideDatasetActiontext_file": {
+ "action_arguments": {},
+ "action_type": "HideDatasetAction",
+ "output_name": "text_file"
+ },
+ "RenameDatasetActionhtml_file": {
+ "action_arguments": {
+ "newname": "FastQC of #{input_file}"
+ },
+ "action_type": "RenameDatasetAction",
+ "output_name": "html_file"
+ }
+ },
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/fastqc/fastqc/0.72+galaxy1",
+ "tool_shed_repository": {
+ "changeset_revision": "e7b2202befea",
+ "name": "fastqc",
+ "owner": "devteam",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"min_length\": \"\\\"\\\"\", \"kmers\": \"\\\"7\\\"\", \"limits\": \"{\\\"__class__\\\": \\\"RuntimeValue\\\"}\", \"input_file\": \"{\\\"__class__\\\": \\\"ConnectedValue\\\"}\", \"__page__\": null, \"__rerun_remap_job_id__\": null, \"contaminants\": \"{\\\"__class__\\\": \\\"RuntimeValue\\\"}\", \"adapters\": \"{\\\"__class__\\\": \\\"RuntimeValue\\\"}\", \"nogroup\": \"\\\"false\\\"\"}",
+ "tool_version": "0.72+galaxy1",
+ "type": "tool",
+ "uuid": "20823f0a-f9c8-4c0f-87c3-437a6a563788",
+ "workflow_outputs": [
+ {
+ "label": null,
+ "output_name": "html_file",
+ "uuid": "fc754ece-cd2e-4928-b13b-b4817f37fedb"
+ }
+ ]
+ },
+ "5": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/lparsons/cutadapt/cutadapt/1.16.5",
+ "errors": null,
+ "id": 5,
+ "input_connections": {
+ "library|input_1": {
+ "id": 0,
+ "output_name": "output"
+ },
+ "library|input_2": {
+ "id": 1,
+ "output_name": "output"
+ }
+ },
+ "inputs": [],
+ "label": "ATAC remove nextera adapters with cutadapt",
+ "name": "Cutadapt",
+ "outputs": [
+ {
+ "name": "out1",
+ "type": "fastqsanger"
+ },
+ {
+ "name": "out2",
+ "type": "fastqsanger"
+ },
+ {
+ "name": "report",
+ "type": "txt"
+ }
+ ],
+ "position": {
+ "left": 453.578125,
+ "top": 463.484375
+ },
+ "post_job_actions": {
+ "RenameDatasetActionout1": {
+ "action_arguments": {
+ "newname": "cutadapt of #{input_1}"
+ },
+ "action_type": "RenameDatasetAction",
+ "output_name": "out1"
+ },
+ "RenameDatasetActionout2": {
+ "action_arguments": {
+ "newname": "cutadapt of #{input_2}"
+ },
+ "action_type": "RenameDatasetAction",
+ "output_name": "out2"
+ }
+ },
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/lparsons/cutadapt/cutadapt/1.16.5",
+ "tool_shed_repository": {
+ "changeset_revision": "49370cb85f0f",
+ "name": "cutadapt",
+ "owner": "lparsons",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"__page__\": null, \"output_options\": \"{\\\"info_file\\\": \\\"false\\\", \\\"multiple_output\\\": \\\"false\\\", \\\"report\\\": \\\"true\\\", \\\"rest_file\\\": \\\"false\\\", \\\"too_long_file\\\": \\\"false\\\", \\\"too_short_file\\\": \\\"false\\\", \\\"untrimmed_file\\\": \\\"false\\\", \\\"wildcard_file\\\": \\\"false\\\"}\", \"read_mod_options\": \"{\\\"length\\\": \\\"0\\\", \\\"length_tag\\\": \\\"\\\", \\\"nextseq_trim\\\": \\\"0\\\", \\\"prefix\\\": \\\"\\\", \\\"quality_cutoff\\\": \\\"20\\\", \\\"strip_suffix\\\": \\\"\\\", \\\"suffix\\\": \\\"\\\", \\\"trim_n\\\": \\\"false\\\"}\", \"adapter_options\": \"{\\\"count\\\": \\\"1\\\", \\\"error_rate\\\": \\\"0.1\\\", \\\"mask_adapter\\\": \\\"false\\\", \\\"match_read_wildcards\\\": \\\" \\\", \\\"no_indels\\\": \\\"false\\\", \\\"no_trim\\\": \\\"false\\\", \\\"overlap\\\": \\\"3\\\"}\", \"library\": \"{\\\"__current_case__\\\": 1, \\\"input_1\\\": {\\\"__class__\\\": \\\"ConnectedValue\\\"}, \\\"input_2\\\": {\\\"__class__\\\": \\\"ConnectedValue\\\"}, \\\"r1\\\": {\\\"adapters\\\": [{\\\"__index__\\\": 0, \\\"adapter_source\\\": {\\\"__current_case__\\\": 0, \\\"adapter\\\": \\\"CTGTCTCTTATACACATCTCCGAGCCCACGAGAC\\\", \\\"adapter_name\\\": \\\"Nextera R1\\\", \\\"adapter_source_list\\\": \\\"user\\\"}}], \\\"anywhere_adapters\\\": [], \\\"cut\\\": \\\"0\\\", \\\"front_adapters\\\": []}, \\\"r2\\\": {\\\"adapters2\\\": [{\\\"__index__\\\": 0, \\\"adapter_source2\\\": {\\\"__current_case__\\\": 0, \\\"adapter2\\\": \\\"CTGTCTCTTATACACATCTGACGCTGCCGACGA\\\", \\\"adapter_name2\\\": \\\"Nextera R2\\\", \\\"adapter_source_list2\\\": \\\"user\\\"}}], \\\"anywhere_adapters2\\\": [], \\\"cut2\\\": \\\"0\\\", \\\"front_adapters2\\\": []}, \\\"type\\\": \\\"paired\\\"}\", \"filter_options\": \"{\\\"discard\\\": \\\"false\\\", \\\"discard_untrimmed\\\": \\\"false\\\", \\\"max\\\": \\\"0\\\", \\\"max_n\\\": \\\"\\\", \\\"min\\\": \\\"20\\\", \\\"pair_filter\\\": \\\"any\\\"}\", \"__rerun_remap_job_id__\": null}",
+ "tool_version": "1.16.5",
+ "type": "tool",
+ "uuid": "96c9a087-9f0d-4659-9dfe-610d8e8fb91c",
+ "workflow_outputs": [
+ {
+ "label": null,
+ "output_name": "report",
+ "uuid": "cb7ea4a9-5603-4dbf-ab24-bac9f866a109"
+ },
+ {
+ "label": null,
+ "output_name": "out1",
+ "uuid": "bb1fcc39-11a2-4725-8e91-9088df231a18"
+ },
+ {
+ "label": null,
+ "output_name": "out2",
+ "uuid": "a8812fd8-19fd-4c84-9fad-16003513d534"
+ }
+ ]
+ },
+ "6": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/devteam/fastqc/fastqc/0.72+galaxy1",
+ "errors": null,
+ "id": 6,
+ "input_connections": {
+ "input_file": {
+ "id": 1,
+ "output_name": "output"
+ }
+ },
+ "inputs": [
+ {
+ "description": "runtime parameter for tool FastQC",
+ "name": "limits"
+ },
+ {
+ "description": "runtime parameter for tool FastQC",
+ "name": "contaminants"
+ },
+ {
+ "description": "runtime parameter for tool FastQC",
+ "name": "adapters"
+ }
+ ],
+ "label": null,
+ "name": "FastQC",
+ "outputs": [
+ {
+ "name": "html_file",
+ "type": "html"
+ },
+ {
+ "name": "text_file",
+ "type": "txt"
+ }
+ ],
+ "position": {
+ "left": 474.296875,
+ "top": 763.640625
+ },
+ "post_job_actions": {
+ "HideDatasetActiontext_file": {
+ "action_arguments": {},
+ "action_type": "HideDatasetAction",
+ "output_name": "text_file"
+ },
+ "RenameDatasetActionhtml_file": {
+ "action_arguments": {
+ "newname": "FastQC of #{input_file}"
+ },
+ "action_type": "RenameDatasetAction",
+ "output_name": "html_file"
+ }
+ },
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/fastqc/fastqc/0.72+galaxy1",
+ "tool_shed_repository": {
+ "changeset_revision": "e7b2202befea",
+ "name": "fastqc",
+ "owner": "devteam",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"min_length\": \"\\\"\\\"\", \"kmers\": \"\\\"7\\\"\", \"limits\": \"{\\\"__class__\\\": \\\"RuntimeValue\\\"}\", \"input_file\": \"{\\\"__class__\\\": \\\"ConnectedValue\\\"}\", \"__page__\": null, \"__rerun_remap_job_id__\": null, \"contaminants\": \"{\\\"__class__\\\": \\\"RuntimeValue\\\"}\", \"adapters\": \"{\\\"__class__\\\": \\\"RuntimeValue\\\"}\", \"nogroup\": \"\\\"false\\\"\"}",
+ "tool_version": "0.72+galaxy1",
+ "type": "tool",
+ "uuid": "25e770ec-9dc3-49d0-a488-d1b630057530",
+ "workflow_outputs": [
+ {
+ "label": null,
+ "output_name": "html_file",
+ "uuid": "e765b995-6fdd-4780-808a-5bcf8844072b"
+ }
+ ]
+ },
+ "7": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/bedtools/bedtools_sortbed/2.29.0",
+ "errors": null,
+ "id": 7,
+ "input_connections": {
+ "input": {
+ "id": 2,
+ "output_name": "output"
+ }
+ },
+ "inputs": [],
+ "label": null,
+ "name": "bedtools SortBED",
+ "outputs": [
+ {
+ "name": "output",
+ "type": "input"
+ }
+ ],
+ "position": {
+ "left": 2668.5,
+ "top": 922.046875
+ },
+ "post_job_actions": {
+ "HideDatasetActionoutput": {
+ "action_arguments": {},
+ "action_type": "HideDatasetAction",
+ "output_name": "output"
+ },
+ "RenameDatasetActionoutput": {
+ "action_arguments": {
+ "newname": "#{input} sorted"
+ },
+ "action_type": "RenameDatasetAction",
+ "output_name": "output"
+ }
+ },
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/bedtools/bedtools_sortbed/2.29.0",
+ "tool_shed_repository": {
+ "changeset_revision": "b28e0cfa7ba1",
+ "name": "bedtools",
+ "owner": "iuc",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"input\": \"{\\\"__class__\\\": \\\"ConnectedValue\\\"}\", \"__rerun_remap_job_id__\": null, \"option\": \"\\\"\\\"\", \"__page__\": null}",
+ "tool_version": "2.29.0",
+ "type": "tool",
+ "uuid": "423d7015-b262-4da6-a797-25aa74af0745",
+ "workflow_outputs": []
+ },
+ "8": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/bedtools/bedtools_sortbed/2.29.0",
+ "errors": null,
+ "id": 8,
+ "input_connections": {
+ "input": {
+ "id": 3,
+ "output_name": "output"
+ }
+ },
+ "inputs": [],
+ "label": null,
+ "name": "bedtools SortBED",
+ "outputs": [
+ {
+ "name": "output",
+ "type": "input"
+ }
+ ],
+ "position": {
+ "left": 2679.296875,
+ "top": 1083.140625
+ },
+ "post_job_actions": {
+ "HideDatasetActionoutput": {
+ "action_arguments": {},
+ "action_type": "HideDatasetAction",
+ "output_name": "output"
+ },
+ "RenameDatasetActionoutput": {
+ "action_arguments": {
+ "newname": "#{input} sorted"
+ },
+ "action_type": "RenameDatasetAction",
+ "output_name": "output"
+ }
+ },
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/bedtools/bedtools_sortbed/2.29.0",
+ "tool_shed_repository": {
+ "changeset_revision": "b28e0cfa7ba1",
+ "name": "bedtools",
+ "owner": "iuc",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"input\": \"{\\\"__class__\\\": \\\"ConnectedValue\\\"}\", \"__rerun_remap_job_id__\": null, \"option\": \"\\\"\\\"\", \"__page__\": null}",
+ "tool_version": "2.29.0",
+ "type": "tool",
+ "uuid": "fd827417-988d-44a6-a6bf-fca6538309df",
+ "workflow_outputs": []
+ },
+ "9": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/devteam/fastqc/fastqc/0.72+galaxy1",
+ "errors": null,
+ "id": 9,
+ "input_connections": {
+ "input_file": {
+ "id": 5,
+ "output_name": "out1"
+ }
+ },
+ "inputs": [
+ {
+ "description": "runtime parameter for tool FastQC",
+ "name": "limits"
+ },
+ {
+ "description": "runtime parameter for tool FastQC",
+ "name": "contaminants"
+ },
+ {
+ "description": "runtime parameter for tool FastQC",
+ "name": "adapters"
+ }
+ ],
+ "label": null,
+ "name": "FastQC",
+ "outputs": [
+ {
+ "name": "html_file",
+ "type": "html"
+ },
+ {
+ "name": "text_file",
+ "type": "txt"
+ }
+ ],
+ "position": {
+ "left": 775.796875,
+ "top": 184.921875
+ },
+ "post_job_actions": {
+ "HideDatasetActiontext_file": {
+ "action_arguments": {},
+ "action_type": "HideDatasetAction",
+ "output_name": "text_file"
+ },
+ "RenameDatasetActionhtml_file": {
+ "action_arguments": {
+ "newname": "FastQC of #{input_file}"
+ },
+ "action_type": "RenameDatasetAction",
+ "output_name": "html_file"
+ }
+ },
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/fastqc/fastqc/0.72+galaxy1",
+ "tool_shed_repository": {
+ "changeset_revision": "e7b2202befea",
+ "name": "fastqc",
+ "owner": "devteam",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"min_length\": \"\\\"\\\"\", \"kmers\": \"\\\"7\\\"\", \"limits\": \"{\\\"__class__\\\": \\\"RuntimeValue\\\"}\", \"input_file\": \"{\\\"__class__\\\": \\\"ConnectedValue\\\"}\", \"__page__\": null, \"__rerun_remap_job_id__\": null, \"contaminants\": \"{\\\"__class__\\\": \\\"RuntimeValue\\\"}\", \"adapters\": \"{\\\"__class__\\\": \\\"RuntimeValue\\\"}\", \"nogroup\": \"\\\"false\\\"\"}",
+ "tool_version": "0.72+galaxy1",
+ "type": "tool",
+ "uuid": "351245fd-2f98-4540-bcf5-1ffc8d859594",
+ "workflow_outputs": [
+ {
+ "label": null,
+ "output_name": "html_file",
+ "uuid": "02a67ec9-6b3a-4413-b1e1-fd56cead1ca7"
+ }
+ ]
+ },
+ "10": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/devteam/bowtie2/bowtie2/2.3.4.3",
+ "errors": null,
+ "id": 10,
+ "input_connections": {
+ "library|input_1": {
+ "id": 5,
+ "output_name": "out1"
+ },
+ "library|input_2": {
+ "id": 5,
+ "output_name": "out2"
+ }
+ },
+ "inputs": [],
+ "label": null,
+ "name": "Bowtie2",
+ "outputs": [
+ {
+ "name": "output",
+ "type": "bam"
+ },
+ {
+ "name": "mapping_stats",
+ "type": "txt"
+ }
+ ],
+ "position": {
+ "left": 776.671875,
+ "top": 473.390625
+ },
+ "post_job_actions": {
+ "RenameDatasetActionmapping_stats": {
+ "action_arguments": {
+ "newname": "mapping stats of #{input_1}"
+ },
+ "action_type": "RenameDatasetAction",
+ "output_name": "mapping_stats"
+ },
+ "RenameDatasetActionoutput": {
+ "action_arguments": {
+ "newname": "mapping of #{input_1}"
+ },
+ "action_type": "RenameDatasetAction",
+ "output_name": "output"
+ }
+ },
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/bowtie2/bowtie2/2.3.4.3",
+ "tool_shed_repository": {
+ "changeset_revision": "017aba02828d",
+ "name": "bowtie2",
+ "owner": "devteam",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"sam_options\": \"{\\\"__current_case__\\\": 1, \\\"sam_options_selector\\\": \\\"no\\\"}\", \"__page__\": null, \"__rerun_remap_job_id__\": null, \"library\": \"{\\\"__current_case__\\\": 1, \\\"aligned_file\\\": \\\"false\\\", \\\"input_1\\\": {\\\"__class__\\\": \\\"ConnectedValue\\\"}, \\\"input_2\\\": {\\\"__class__\\\": \\\"ConnectedValue\\\"}, \\\"paired_options\\\": {\\\"I\\\": \\\"0\\\", \\\"X\\\": \\\"1000\\\", \\\"__current_case__\\\": 0, \\\"dovetail\\\": \\\"true\\\", \\\"fr_rf_ff\\\": \\\"--fr\\\", \\\"no_contain\\\": \\\"false\\\", \\\"no_discordant\\\": \\\"false\\\", \\\"no_mixed\\\": \\\"false\\\", \\\"no_overlap\\\": \\\"false\\\", \\\"paired_options_selector\\\": \\\"yes\\\"}, \\\"type\\\": \\\"paired\\\", \\\"unaligned_file\\\": \\\"false\\\"}\", \"reference_genome\": \"{\\\"__current_case__\\\": 0, \\\"index\\\": \\\"hg38canon\\\", \\\"source\\\": \\\"indexed\\\"}\", \"rg\": \"{\\\"__current_case__\\\": 3, \\\"rg_selector\\\": \\\"do_not_set\\\"}\", \"save_mapping_stats\": \"\\\"true\\\"\", \"analysis_type\": \"{\\\"__current_case__\\\": 0, \\\"analysis_type_selector\\\": \\\"simple\\\", \\\"presets\\\": \\\"--very-sensitive\\\"}\"}",
+ "tool_version": "2.3.4.3",
+ "type": "tool",
+ "uuid": "e0677324-b7fc-44f7-9bb3-6fa8a7671f64",
+ "workflow_outputs": [
+ {
+ "label": null,
+ "output_name": "output",
+ "uuid": "ee885fdb-df50-4d94-99fa-36ef6619797b"
+ },
+ {
+ "label": null,
+ "output_name": "mapping_stats",
+ "uuid": "c4f2bf70-6e4d-443f-9d3f-51d1ff501b91"
+ }
+ ]
+ },
+ "11": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/devteam/fastqc/fastqc/0.72+galaxy1",
+ "errors": null,
+ "id": 11,
+ "input_connections": {
+ "input_file": {
+ "id": 5,
+ "output_name": "out2"
+ }
+ },
+ "inputs": [
+ {
+ "description": "runtime parameter for tool FastQC",
+ "name": "limits"
+ },
+ {
+ "description": "runtime parameter for tool FastQC",
+ "name": "contaminants"
+ },
+ {
+ "description": "runtime parameter for tool FastQC",
+ "name": "adapters"
+ }
+ ],
+ "label": null,
+ "name": "FastQC",
+ "outputs": [
+ {
+ "name": "html_file",
+ "type": "html"
+ },
+ {
+ "name": "text_file",
+ "type": "txt"
+ }
+ ],
+ "position": {
+ "left": 774.890625,
+ "top": 776.234375
+ },
+ "post_job_actions": {
+ "HideDatasetActiontext_file": {
+ "action_arguments": {},
+ "action_type": "HideDatasetAction",
+ "output_name": "text_file"
+ },
+ "RenameDatasetActionhtml_file": {
+ "action_arguments": {
+ "newname": "FastQC of #{input_file}"
+ },
+ "action_type": "RenameDatasetAction",
+ "output_name": "html_file"
+ }
+ },
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/fastqc/fastqc/0.72+galaxy1",
+ "tool_shed_repository": {
+ "changeset_revision": "e7b2202befea",
+ "name": "fastqc",
+ "owner": "devteam",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"min_length\": \"\\\"\\\"\", \"kmers\": \"\\\"7\\\"\", \"limits\": \"{\\\"__class__\\\": \\\"RuntimeValue\\\"}\", \"input_file\": \"{\\\"__class__\\\": \\\"ConnectedValue\\\"}\", \"__page__\": null, \"__rerun_remap_job_id__\": null, \"contaminants\": \"{\\\"__class__\\\": \\\"RuntimeValue\\\"}\", \"adapters\": \"{\\\"__class__\\\": \\\"RuntimeValue\\\"}\", \"nogroup\": \"\\\"false\\\"\"}",
+ "tool_version": "0.72+galaxy1",
+ "type": "tool",
+ "uuid": "265bed3e-c169-40ba-96ae-44d9b12b9635",
+ "workflow_outputs": [
+ {
+ "label": null,
+ "output_name": "html_file",
+ "uuid": "67d416ce-5570-4de1-a6cd-8a97f96567de"
+ }
+ ]
+ },
+ "12": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/devteam/bamtools_filter/bamFilter/2.4.1",
+ "errors": null,
+ "id": 12,
+ "input_connections": {
+ "input_bam": {
+ "id": 10,
+ "output_name": "output"
+ }
+ },
+ "inputs": [],
+ "label": "Filter BAM ATAC",
+ "name": "Filter",
+ "outputs": [
+ {
+ "name": "out_file2",
+ "type": "txt"
+ },
+ {
+ "name": "out_file1",
+ "type": "bam"
+ }
+ ],
+ "position": {
+ "left": 1079.09375,
+ "top": 469.796875
+ },
+ "post_job_actions": {
+ "RenameDatasetActionout_file1": {
+ "action_arguments": {
+ "newname": "#{input_bam} filtered"
+ },
+ "action_type": "RenameDatasetAction",
+ "output_name": "out_file1"
+ }
+ },
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/bamtools_filter/bamFilter/2.4.1",
+ "tool_shed_repository": {
+ "changeset_revision": "cb20f99fd45b",
+ "name": "bamtools_filter",
+ "owner": "devteam",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"input_bam\": \"{\\\"__class__\\\": \\\"ConnectedValue\\\"}\", \"__rerun_remap_job_id__\": null, \"rule_configuration\": \"{\\\"__current_case__\\\": 0, \\\"rules_selector\\\": \\\"false\\\"}\", \"conditions\": \"[{\\\"__index__\\\": 0, \\\"filters\\\": [{\\\"__index__\\\": 0, \\\"bam_property\\\": {\\\"__current_case__\\\": 14, \\\"bam_property_selector\\\": \\\"mapQuality\\\", \\\"bam_property_value\\\": \\\">=30\\\"}}, {\\\"__index__\\\": 1, \\\"bam_property\\\": {\\\"__current_case__\\\": 11, \\\"bam_property_selector\\\": \\\"isProperPair\\\", \\\"bam_property_value\\\": \\\"true\\\"}}, {\\\"__index__\\\": 2, \\\"bam_property\\\": {\\\"__current_case__\\\": 20, \\\"bam_property_selector\\\": \\\"reference\\\", \\\"bam_property_value\\\": \\\"!chrM\\\"}}]}]\", \"__page__\": null}",
+ "tool_version": "2.4.1",
+ "type": "tool",
+ "uuid": "f23d0019-e51f-4a2a-ae4c-35cb39fe8462",
+ "workflow_outputs": [
+ {
+ "label": null,
+ "output_name": "out_file2",
+ "uuid": "93cb7ab3-245c-4e7f-b42a-af31caa8b296"
+ },
+ {
+ "label": null,
+ "output_name": "out_file1",
+ "uuid": "6cd3df3a-5ccd-4df6-858f-eef97e9285d2"
+ }
+ ]
+ },
+ "13": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/devteam/picard/picard_MarkDuplicates/2.18.2.2",
+ "errors": null,
+ "id": 13,
+ "input_connections": {
+ "inputFile": {
+ "id": 12,
+ "output_name": "out_file1"
+ }
+ },
+ "inputs": [],
+ "label": null,
+ "name": "MarkDuplicates",
+ "outputs": [
+ {
+ "name": "metrics_file",
+ "type": "txt"
+ },
+ {
+ "name": "outFile",
+ "type": "bam"
+ }
+ ],
+ "position": {
+ "left": 1349.09375,
+ "top": 468.890625
+ },
+ "post_job_actions": {
+ "RenameDatasetActionoutFile": {
+ "action_arguments": {
+ "newname": "#{inputFile} without duplicates"
+ },
+ "action_type": "RenameDatasetAction",
+ "output_name": "outFile"
+ }
+ },
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/picard/picard_MarkDuplicates/2.18.2.2",
+ "tool_shed_repository": {
+ "changeset_revision": "a1f0b3f4b781",
+ "name": "picard",
+ "owner": "devteam",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"duplicate_scoring_strategy\": \"\\\"SUM_OF_BASE_QUALITIES\\\"\", \"remove_duplicates\": \"\\\"true\\\"\", \"read_name_regex\": \"\\\"\\\"\", \"barcode_tag\": \"\\\"\\\"\", \"__page__\": null, \"__rerun_remap_job_id__\": null, \"optical_duplicate_pixel_distance\": \"\\\"100\\\"\", \"comments\": \"[]\", \"assume_sorted\": \"\\\"true\\\"\", \"validation_stringency\": \"\\\"LENIENT\\\"\", \"inputFile\": \"{\\\"__class__\\\": \\\"ConnectedValue\\\"}\"}",
+ "tool_version": "2.18.2.2",
+ "type": "tool",
+ "uuid": "7ce1de9d-b93d-459b-bad5-00c38831d69a",
+ "workflow_outputs": [
+ {
+ "label": null,
+ "output_name": "outFile",
+ "uuid": "c47a191d-88b4-4c7d-a776-828848a4116b"
+ },
+ {
+ "label": null,
+ "output_name": "metrics_file",
+ "uuid": "5738732e-55f0-4053-a327-ee4fb9473c9e"
+ }
+ ]
+ },
+ "14": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/devteam/picard/picard_CollectInsertSizeMetrics/2.18.2.1",
+ "errors": null,
+ "id": 14,
+ "input_connections": {
+ "inputFile": {
+ "id": 13,
+ "output_name": "outFile"
+ }
+ },
+ "inputs": [],
+ "label": null,
+ "name": "CollectInsertSizeMetrics",
+ "outputs": [
+ {
+ "name": "outFile",
+ "type": "tabular"
+ },
+ {
+ "name": "histFile",
+ "type": "pdf"
+ }
+ ],
+ "position": {
+ "left": 1673.546875,
+ "top": 281.671875
+ },
+ "post_job_actions": {
+ "HideDatasetActionoutFile": {
+ "action_arguments": {},
+ "action_type": "HideDatasetAction",
+ "output_name": "outFile"
+ },
+ "RenameDatasetActionhistFile": {
+ "action_arguments": {
+ "newname": "fragment size plot of #{inputFile}"
+ },
+ "action_type": "RenameDatasetAction",
+ "output_name": "histFile"
+ },
+ "RenameDatasetActionoutFile": {
+ "action_arguments": {
+ "newname": "fragment size data of #{inputFile}"
+ },
+ "action_type": "RenameDatasetAction",
+ "output_name": "outFile"
+ }
+ },
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/picard/picard_CollectInsertSizeMetrics/2.18.2.1",
+ "tool_shed_repository": {
+ "changeset_revision": "a1f0b3f4b781",
+ "name": "picard",
+ "owner": "devteam",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"__page__\": null, \"deviations\": \"\\\"10.0\\\"\", \"__rerun_remap_job_id__\": null, \"hist_width\": \"\\\"\\\"\", \"assume_sorted\": \"\\\"true\\\"\", \"metric_accumulation_level\": \"[\\\"ALL_READS\\\"]\", \"validation_stringency\": \"\\\"LENIENT\\\"\", \"reference_source\": \"{\\\"__current_case__\\\": 0, \\\"ref_file\\\": \\\"hg38\\\", \\\"reference_source_selector\\\": \\\"cached\\\"}\", \"min_pct\": \"\\\"0.05\\\"\", \"inputFile\": \"{\\\"__class__\\\": \\\"ConnectedValue\\\"}\"}",
+ "tool_version": "2.18.2.1",
+ "type": "tool",
+ "uuid": "1780755c-4f3b-4f11-ba52-57c4457348ba",
+ "workflow_outputs": [
+ {
+ "label": null,
+ "output_name": "histFile",
+ "uuid": "d783d55d-bcae-4908-8fd2-f5dbc8e41fd5"
+ }
+ ]
+ },
+ "15": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/devteam/samtools_sort/samtools_sort/2.0.3",
+ "errors": null,
+ "id": 15,
+ "input_connections": {
+ "input1": {
+ "id": 13,
+ "output_name": "outFile"
+ }
+ },
+ "inputs": [],
+ "label": null,
+ "name": "Samtools sort",
+ "outputs": [
+ {
+ "name": "output1",
+ "type": "bam"
+ }
+ ],
+ "position": {
+ "left": 1676.5,
+ "top": 553
+ },
+ "post_job_actions": {
+ "RenameDatasetActionoutput1": {
+ "action_arguments": {
+ "newname": "qname sorted of #{input1}"
+ },
+ "action_type": "RenameDatasetAction",
+ "output_name": "output1"
+ }
+ },
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/samtools_sort/samtools_sort/2.0.3",
+ "tool_shed_repository": {
+ "changeset_revision": "e613c1ad4c4c",
+ "name": "samtools_sort",
+ "owner": "devteam",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"__page__\": null, \"__rerun_remap_job_id__\": null, \"input1\": \"{\\\"__class__\\\": \\\"ConnectedValue\\\"}\", \"prim_key_cond\": \"{\\\"__current_case__\\\": 1, \\\"prim_key_select\\\": \\\"-n\\\"}\"}",
+ "tool_version": "2.0.3",
+ "type": "tool",
+ "uuid": "01337dc5-3fbe-4258-954f-257cbea8da28",
+ "workflow_outputs": [
+ {
+ "label": null,
+ "output_name": "output1",
+ "uuid": "91aafa0a-95aa-4e24-8a59-4ea1bf33f450"
+ }
+ ]
+ },
+ "16": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/bedtools/bedtools_bamtobed/2.29.0",
+ "errors": null,
+ "id": 16,
+ "input_connections": {
+ "input": {
+ "id": 13,
+ "output_name": "outFile"
+ }
+ },
+ "inputs": [],
+ "label": null,
+ "name": "bedtools BAM to BED",
+ "outputs": [
+ {
+ "name": "output",
+ "type": "bed"
+ }
+ ],
+ "position": {
+ "left": 2009.25,
+ "top": 1105.1875
+ },
+ "post_job_actions": {},
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/bedtools/bedtools_bamtobed/2.29.0",
+ "tool_shed_repository": {
+ "changeset_revision": "b28e0cfa7ba1",
+ "name": "bedtools",
+ "owner": "iuc",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"__page__\": null, \"option\": \"\\\"\\\"\", \"__rerun_remap_job_id__\": null, \"tag\": \"\\\"\\\"\", \"split\": \"\\\"false\\\"\", \"ed_score\": \"\\\"false\\\"\", \"input\": \"{\\\"__class__\\\": \\\"ConnectedValue\\\"}\"}",
+ "tool_version": "2.29.0",
+ "type": "tool",
+ "uuid": "97a4e7f6-14e3-4ce4-b8bf-2f01f407a00e",
+ "workflow_outputs": [
+ {
+ "label": null,
+ "output_name": "output",
+ "uuid": "0c47d2f1-01e9-47fa-a20f-371399894a2f"
+ }
+ ]
+ },
+ "17": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/genrich/genrich/0.5+galaxy2",
+ "errors": null,
+ "id": 17,
+ "input_connections": {
+ "treatment|input_treatment_file": {
+ "id": 15,
+ "output_name": "output1"
+ }
+ },
+ "inputs": [],
+ "label": null,
+ "name": "Genrich",
+ "outputs": [
+ {
+ "name": "outfile",
+ "type": "encodepeak"
+ },
+ {
+ "name": "out_bedgraph2",
+ "type": "bedgraph"
+ },
+ {
+ "name": "out_dups",
+ "type": "txt"
+ }
+ ],
+ "position": {
+ "left": 1969.1875,
+ "top": 598.046875
+ },
+ "post_job_actions": {
+ "RenameDatasetActionout_bedgraph2": {
+ "action_arguments": {
+ "newname": "Genrich pile-up of #{input_treatment_file}"
+ },
+ "action_type": "RenameDatasetAction",
+ "output_name": "out_bedgraph2"
+ },
+ "RenameDatasetActionout_dups": {
+ "action_arguments": {
+ "newname": "Genrich PCR duplicates of #{input_treatment_file}"
+ },
+ "action_type": "RenameDatasetAction",
+ "output_name": "out_dups"
+ },
+ "RenameDatasetActionoutfile": {
+ "action_arguments": {
+ "newname": "Genrich peaks of #{input_treatment_file}"
+ },
+ "action_type": "RenameDatasetAction",
+ "output_name": "outfile"
+ }
+ },
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/genrich/genrich/0.5+galaxy2",
+ "tool_shed_repository": {
+ "changeset_revision": "8353f3cc03db",
+ "name": "genrich",
+ "owner": "iuc",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"control\": \"{\\\"__current_case__\\\": 1, \\\"c_select\\\": \\\"No\\\"}\", \"__page__\": null, \"output_options\": \"{\\\"bed\\\": \\\"false\\\", \\\"bedgraph1\\\": \\\"false\\\", \\\"bedgraph2\\\": \\\"true\\\"}\", \"__rerun_remap_job_id__\": null, \"other_options\": \"{\\\"skip_peak_calling\\\": \\\"false\\\"}\", \"filter_options\": \"{\\\"alignment_lengths\\\": \\\"\\\", \\\"alignment_lengths2\\\": \\\"false\\\", \\\"alignment_score\\\": \\\"0.0\\\", \\\"duplicates\\\": \\\"true\\\", \\\"exclude_chr\\\": \\\"\\\", \\\"min_mapq\\\": \\\"0\\\", \\\"unpaired\\\": \\\"false\\\"}\", \"peakcalling_options\": \"{\\\"max_dist\\\": \\\"100\\\", \\\"max_p\\\": \\\"\\\", \\\"max_q\\\": \\\"0.05\\\", \\\"min_auc\\\": \\\"20.0\\\", \\\"min_peak_length\\\": \\\"0\\\"}\", \"atac_options\": \"{\\\"atac\\\": \\\"true\\\", \\\"expand_sites\\\": \\\"100\\\"}\", \"treatment\": \"{\\\"__current_case__\\\": 0, \\\"input_treatment_file\\\": {\\\"__class__\\\": \\\"ConnectedValue\\\"}, \\\"t_multi_select\\\": \\\"No\\\"}\", \"cond_exclude\": \"{\\\"__current_case__\\\": 0, \\\"exclude_select\\\": \\\"No\\\"}\"}",
+ "tool_version": "0.5+galaxy2",
+ "type": "tool",
+ "uuid": "5964c14b-708d-43c1-8ae4-b471286d19e8",
+ "workflow_outputs": [
+ {
+ "label": null,
+ "output_name": "outfile",
+ "uuid": "1d5d7c2b-1315-46a4-924c-a57cc65d7104"
+ },
+ {
+ "label": null,
+ "output_name": "out_bedgraph2",
+ "uuid": "78e29a63-7978-42d2-9a80-d5be6eed09ca"
+ },
+ {
+ "label": null,
+ "output_name": "out_dups",
+ "uuid": "aa03693e-51b5-4cd2-bdb8-fec47a64f769"
+ }
+ ]
+ },
+ "18": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/macs2/macs2_callpeak/2.1.1.20160309.6",
+ "errors": null,
+ "id": 18,
+ "input_connections": {
+ "treatment|input_treatment_file": {
+ "id": 16,
+ "output_name": "output"
+ }
+ },
+ "inputs": [],
+ "label": null,
+ "name": "MACS2 callpeak",
+ "outputs": [
+ {
+ "name": "output_tabular",
+ "type": "tabular"
+ },
+ {
+ "name": "output_narrowpeaks",
+ "type": "bed"
+ },
+ {
+ "name": "output_summits",
+ "type": "bed"
+ },
+ {
+ "name": "output_treat_pileup",
+ "type": "bedgraph"
+ },
+ {
+ "name": "output_control_lambda",
+ "type": "bedgraph"
+ }
+ ],
+ "position": {
+ "left": 2541.890625,
+ "top": 1186.484375
+ },
+ "post_job_actions": {
+ "HideDatasetActionoutput_control_lambda": {
+ "action_arguments": {},
+ "action_type": "HideDatasetAction",
+ "output_name": "output_control_lambda"
+ },
+ "HideDatasetActionoutput_tabular": {
+ "action_arguments": {},
+ "action_type": "HideDatasetAction",
+ "output_name": "output_tabular"
+ },
+ "RenameDatasetActionoutput_narrowpeaks": {
+ "action_arguments": {
+ "newname": "macs2 peaks (narrowPeak) from #{input_treatment_file}"
+ },
+ "action_type": "RenameDatasetAction",
+ "output_name": "output_narrowpeaks"
+ },
+ "RenameDatasetActionoutput_summits": {
+ "action_arguments": {
+ "newname": "macs2 summits from #{input_treatment_file}"
+ },
+ "action_type": "RenameDatasetAction",
+ "output_name": "output_summits"
+ },
+ "RenameDatasetActionoutput_tabular": {
+ "action_arguments": {
+ "newname": "macs2 peaks (tabular) from #{input_treatment_file}"
+ },
+ "action_type": "RenameDatasetAction",
+ "output_name": "output_tabular"
+ },
+ "RenameDatasetActionoutput_treat_pileup": {
+ "action_arguments": {
+ "newname": "macs2 coverage from #{input_treatment_file}"
+ },
+ "action_type": "RenameDatasetAction",
+ "output_name": "output_treat_pileup"
+ }
+ },
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/macs2/macs2_callpeak/2.1.1.20160309.6",
+ "tool_shed_repository": {
+ "changeset_revision": "424aefbd7777",
+ "name": "macs2",
+ "owner": "iuc",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"control\": \"{\\\"__current_case__\\\": 1, \\\"c_select\\\": \\\"No\\\"}\", \"__page__\": null, \"effective_genome_size_options\": \"{\\\"__current_case__\\\": 0, \\\"effective_genome_size_options_selector\\\": \\\"2700000000\\\"}\", \"format\": \"\\\"BED\\\"\", \"outputs\": \"[\\\"peaks_tabular\\\", \\\"summits\\\", \\\"bdg\\\"]\", \"__rerun_remap_job_id__\": null, \"cutoff_options\": \"{\\\"__current_case__\\\": 1, \\\"cutoff_options_selector\\\": \\\"qvalue\\\", \\\"qvalue\\\": \\\"0.05\\\"}\", \"advanced_options\": \"{\\\"broad_options\\\": {\\\"__current_case__\\\": 1, \\\"broad_options_selector\\\": \\\"nobroad\\\", \\\"call_summits\\\": \\\"false\\\"}, \\\"keep_dup_options\\\": {\\\"__current_case__\\\": 2, \\\"keep_dup_options_selector\\\": \\\"all\\\"}, \\\"llocal\\\": \\\"\\\", \\\"nolambda\\\": \\\"false\\\", \\\"ratio\\\": \\\"\\\", \\\"slocal\\\": \\\"\\\", \\\"spmr\\\": \\\"false\\\", \\\"to_large\\\": \\\"false\\\"}\", \"treatment\": \"{\\\"__current_case__\\\": 0, \\\"input_treatment_file\\\": {\\\"__class__\\\": \\\"ConnectedValue\\\"}, \\\"t_multi_select\\\": \\\"No\\\"}\", \"nomodel_type\": \"{\\\"__current_case__\\\": 1, \\\"extsize\\\": \\\"100\\\", \\\"nomodel_type_selector\\\": \\\"nomodel\\\", \\\"shift\\\": \\\"-50\\\"}\"}",
+ "tool_version": "2.1.1.20160309.6",
+ "type": "tool",
+ "uuid": "1c8458d8-4f7e-452a-9458-7edad0c39e9d",
+ "workflow_outputs": [
+ {
+ "label": null,
+ "output_name": "output_treat_pileup",
+ "uuid": "288d4a5f-bac6-41c9-b48a-b12db1ac1fc3"
+ },
+ {
+ "label": null,
+ "output_name": "output_summits",
+ "uuid": "e952ca62-97a1-4c78-8f6f-a827685d4444"
+ },
+ {
+ "label": null,
+ "output_name": "output_narrowpeaks",
+ "uuid": "5b31c4c7-5789-4d5c-9d82-cc3fd40473d7"
+ }
+ ]
+ },
+ "19": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/bgruening/text_processing/tp_awk_tool/1.1.1",
+ "errors": null,
+ "id": 19,
+ "input_connections": {
+ "infile": {
+ "id": 17,
+ "output_name": "out_bedgraph2"
+ }
+ },
+ "inputs": [],
+ "label": null,
+ "name": "Text reformatting",
+ "outputs": [
+ {
+ "name": "outfile",
+ "type": "input"
+ }
+ ],
+ "position": {
+ "left": 2308.046875,
+ "top": 437.8125
+ },
+ "post_job_actions": {
+ "RenameDatasetActionoutfile": {
+ "action_arguments": {
+ "newname": "#{infile} as bedgraph"
+ },
+ "action_type": "RenameDatasetAction",
+ "output_name": "outfile"
+ }
+ },
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/bgruening/text_processing/tp_awk_tool/1.1.1",
+ "tool_shed_repository": {
+ "changeset_revision": "9ff72e942410",
+ "name": "text_processing",
+ "owner": "bgruening",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"__page__\": null, \"__rerun_remap_job_id__\": null, \"code\": \"\\\"NR>=3 {print $1,$2,$3,$4}\\\"\", \"infile\": \"{\\\"__class__\\\": \\\"ConnectedValue\\\"}\"}",
+ "tool_version": "1.1.1",
+ "type": "tool",
+ "uuid": "8de7173f-fcbf-4662-a783-77310beefa99",
+ "workflow_outputs": [
+ {
+ "label": null,
+ "output_name": "outfile",
+ "uuid": "4db0ced8-d082-4f4d-8ee6-1e98229a7b39"
+ }
+ ]
+ },
+ "20": {
+ "annotation": "",
+ "content_id": "wig_to_bigWig",
+ "errors": null,
+ "id": 20,
+ "input_connections": {
+ "input1": {
+ "id": 18,
+ "output_name": "output_treat_pileup"
+ }
+ },
+ "inputs": [],
+ "label": null,
+ "name": "Wig/BedGraph-to-bigWig",
+ "outputs": [
+ {
+ "name": "out_file1",
+ "type": "bigwig"
+ }
+ ],
+ "position": {
+ "left": 2964.59375,
+ "top": 1194.296875
+ },
+ "post_job_actions": {
+ "RenameDatasetActionout_file1": {
+ "action_arguments": {
+ "newname": "bigwig of #{input1}"
+ },
+ "action_type": "RenameDatasetAction",
+ "output_name": "out_file1"
+ }
+ },
+ "tool_id": "wig_to_bigWig",
+ "tool_state": "{\"__page__\": null, \"__rerun_remap_job_id__\": null, \"input1\": \"{\\\"__class__\\\": \\\"ConnectedValue\\\"}\", \"settings\": \"{\\\"__current_case__\\\": 0, \\\"settingsType\\\": \\\"preset\\\"}\"}",
+ "tool_version": "1.1.1",
+ "type": "tool",
+ "uuid": "5cce7368-6275-4a9f-9046-3da58745f579",
+ "workflow_outputs": [
+ {
+ "label": null,
+ "output_name": "out_file1",
+ "uuid": "1d51e34a-25dd-4a91-9db5-0a391602b3a0"
+ }
+ ]
+ },
+ "21": {
+ "annotation": "",
+ "content_id": "wig_to_bigWig",
+ "errors": null,
+ "id": 21,
+ "input_connections": {
+ "input1": {
+ "id": 19,
+ "output_name": "outfile"
+ }
+ },
+ "inputs": [],
+ "label": null,
+ "name": "Wig/BedGraph-to-bigWig",
+ "outputs": [
+ {
+ "name": "out_file1",
+ "type": "bigwig"
+ }
+ ],
+ "position": {
+ "left": 2545.1875,
+ "top": 428.359375
+ },
+ "post_job_actions": {
+ "RenameDatasetActionout_file1": {
+ "action_arguments": {
+ "newname": "bigwig of #{input1}"
+ },
+ "action_type": "RenameDatasetAction",
+ "output_name": "out_file1"
+ }
+ },
+ "tool_id": "wig_to_bigWig",
+ "tool_state": "{\"__page__\": null, \"__rerun_remap_job_id__\": null, \"input1\": \"{\\\"__class__\\\": \\\"ConnectedValue\\\"}\", \"settings\": \"{\\\"__current_case__\\\": 0, \\\"settingsType\\\": \\\"preset\\\"}\"}",
+ "tool_version": "1.1.1",
+ "type": "tool",
+ "uuid": "a64af60d-194d-4bd5-baa4-67927d289ec0",
+ "workflow_outputs": [
+ {
+ "label": null,
+ "output_name": "out_file1",
+ "uuid": "9da301cd-68cd-4a56-835a-1abef1b416ce"
+ }
+ ]
+ },
+ "22": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/bgruening/deeptools_compute_matrix/deeptools_compute_matrix/3.0.2.0",
+ "errors": null,
+ "id": 22,
+ "input_connections": {
+ "multibigwig_conditional|bigwigfiles": [
+ {
+ "id": 21,
+ "output_name": "out_file1"
+ },
+ {
+ "id": 20,
+ "output_name": "out_file1"
+ }
+ ],
+ "regionsFiles_0|regionsFile": {
+ "id": 2,
+ "output_name": "output"
+ }
+ },
+ "inputs": [
+ {
+ "description": "runtime parameter for tool computeMatrix",
+ "name": "advancedOpt"
+ }
+ ],
+ "label": null,
+ "name": "computeMatrix",
+ "outputs": [
+ {
+ "name": "outFileName",
+ "type": "deeptools_compute_matrix_archive"
+ }
+ ],
+ "position": {
+ "left": 2933.09375,
+ "top": 377.0625
+ },
+ "post_job_actions": {},
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/bgruening/deeptools_compute_matrix/deeptools_compute_matrix/3.0.2.0",
+ "tool_shed_repository": {
+ "changeset_revision": "fb9cf9c97ec4",
+ "name": "deeptools_compute_matrix",
+ "owner": "bgruening",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"__page__\": null, \"__rerun_remap_job_id__\": null, \"multibigwig_conditional\": \"{\\\"__current_case__\\\": 0, \\\"bigwigfiles\\\": {\\\"__class__\\\": \\\"ConnectedValue\\\"}, \\\"orderMatters\\\": \\\"No\\\"}\", \"mode\": \"{\\\"__current_case__\\\": 1, \\\"afterRegionStartLength\\\": \\\"1000\\\", \\\"beforeRegionStartLength\\\": \\\"1000\\\", \\\"mode_select\\\": \\\"reference-point\\\", \\\"nanAfterEnd\\\": \\\"false\\\", \\\"referencePoint\\\": \\\"TSS\\\"}\", \"output\": \"{\\\"__current_case__\\\": 0, \\\"showOutputSettings\\\": \\\"no\\\"}\", \"advancedOpt\": \"{\\\"__current_case__\\\": 1, \\\"averageTypeBins\\\": \\\"mean\\\", \\\"binSize\\\": \\\"50\\\", \\\"blackListFileName\\\": {\\\"__class__\\\": \\\"RuntimeValue\\\"}, \\\"exonID\\\": \\\"exon\\\", \\\"maxThreshold\\\": \\\"\\\", \\\"metagene\\\": \\\"false\\\", \\\"minThreshold\\\": \\\"\\\", \\\"missingDataAsZero\\\": \\\"true\\\", \\\"samplesLabel\\\": \\\"\\\", \\\"scale\\\": \\\"\\\", \\\"showAdvancedOpt\\\": \\\"yes\\\", \\\"skipZeros\\\": \\\"false\\\", \\\"sortRegions\\\": \\\"keep\\\", \\\"sortUsing\\\": \\\"mean\\\", \\\"transcriptID\\\": \\\"transcript\\\", \\\"transcript_id_designator\\\": \\\"transcript_id\\\"}\", \"regionsFiles\": \"[{\\\"__index__\\\": 0, \\\"regionsFile\\\": {\\\"__class__\\\": \\\"ConnectedValue\\\"}}]\"}",
+ "tool_version": "3.0.2.0",
+ "type": "tool",
+ "uuid": "5a222119-7228-42a5-a3af-70137e8787f1",
+ "workflow_outputs": [
+ {
+ "label": null,
+ "output_name": "outFileName",
+ "uuid": "cb3f559c-c83b-4eb8-a77c-d875b5bcdf16"
+ }
+ ]
+ },
+ "23": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/pygenometracks/pygenomeTracks/3.3",
+ "errors": null,
+ "id": 23,
+ "input_connections": {
+ "tracks_0|track_file_style_conditional|track_input_bigwig": {
+ "id": 21,
+ "output_name": "out_file1"
+ },
+ "tracks_1|track_file_style_conditional|track_input_narrow_peak": {
+ "id": 17,
+ "output_name": "outfile"
+ },
+ "tracks_2|track_file_style_conditional|track_input_bigwig": {
+ "id": 20,
+ "output_name": "out_file1"
+ },
+ "tracks_3|track_file_style_conditional|track_input_narrow_peak": {
+ "id": 18,
+ "output_name": "output_narrowpeaks"
+ },
+ "tracks_4|track_file_style_conditional|track_input_bed": {
+ "id": 7,
+ "output_name": "output"
+ },
+ "tracks_5|track_file_style_conditional|track_input_narrow_peak": {
+ "id": 8,
+ "output_name": "output"
+ }
+ },
+ "inputs": [],
+ "label": null,
+ "name": "pyGenomeTracks",
+ "outputs": [
+ {
+ "name": "outFileName",
+ "type": "png"
+ }
+ ],
+ "position": {
+ "left": 3181.046875,
+ "top": 744.296875
+ },
+ "post_job_actions": {},
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/pygenometracks/pygenomeTracks/3.3",
+ "tool_shed_repository": {
+ "changeset_revision": "326a3db8d9d1",
+ "name": "pygenometracks",
+ "owner": "iuc",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"__page__\": null, \"__rerun_remap_job_id__\": null, \"global_args\": \"{\\\"dpi\\\": \\\"72\\\", \\\"fontsize\\\": \\\"12\\\", \\\"title\\\": \\\"\\\", \\\"trackLabelFraction\\\": \\\"0.05\\\", \\\"trackLabelHAlign\\\": \\\"left\\\"}\", \"image_file_format\": \"\\\"png\\\"\", \"tracks\": \"[{\\\"__index__\\\": 0, \\\"track_file_style_conditional\\\": {\\\"__current_case__\\\": 6, \\\"alpha\\\": \\\"1.0\\\", \\\"color\\\": \\\"#c0504d\\\", \\\"color_negative\\\": {\\\"__current_case__\\\": 0, \\\"color_negative_select\\\": \\\"false\\\"}, \\\"height_bigwig\\\": \\\"5.0\\\", \\\"invert_orientation\\\": \\\"false\\\", \\\"max_value\\\": \\\"\\\", \\\"min_value\\\": \\\"0.0\\\", \\\"overlay_select\\\": \\\"no\\\", \\\"reading_bw_conditional\\\": {\\\"nans_to_zeros\\\": \\\"false\\\", \\\"number_of_bins\\\": \\\"\\\", \\\"summary\\\": \\\"mean\\\"}, \\\"show_data\\\": \\\"true\\\", \\\"spacer_height\\\": \\\"\\\", \\\"title\\\": \\\"Coverage from Genrich (extended +/-50bp)\\\", \\\"track_file_style_selector\\\": \\\"bigwig_track_option\\\", \\\"track_input_bigwig\\\": {\\\"__class__\\\": \\\"RuntimeValue\\\"}, \\\"type_conditional\\\": {\\\"__current_case__\\\": 2, \\\"type_selector\\\": \\\"fill_option\\\"}}}, {\\\"__index__\\\": 1, \\\"track_file_style_conditional\\\": {\\\"__current_case__\\\": 4, \\\"color\\\": \\\"#c0504d\\\", \\\"display\\\": {\\\"__current_case__\\\": 1, \\\"display_selector\\\": \\\"box\\\", \\\"use_summit\\\": \\\"true\\\"}, \\\"fontsize\\\": \\\"\\\", \\\"height_narrow_peak\\\": \\\"1.5\\\", \\\"invert_orientation\\\": \\\"false\\\", \\\"line_width\\\": \\\"1.0\\\", \\\"overlay_select\\\": \\\"no\\\", \\\"show_labels\\\": \\\"false\\\", \\\"spacer_height\\\": \\\"\\\", \\\"title\\\": \\\"Peaks from Genrich (extended +/-50bp)\\\", \\\"track_file_style_selector\\\": \\\"narrow_peak_track_option\\\", \\\"track_input_narrow_peak\\\": {\\\"__class__\\\": \\\"RuntimeValue\\\"}}}, {\\\"__index__\\\": 2, \\\"track_file_style_conditional\\\": {\\\"__current_case__\\\": 6, \\\"alpha\\\": \\\"1.0\\\", \\\"color\\\": \\\"#00b050\\\", \\\"color_negative\\\": {\\\"__current_case__\\\": 0, \\\"color_negative_select\\\": \\\"false\\\"}, \\\"height_bigwig\\\": \\\"5.0\\\", \\\"invert_orientation\\\": \\\"false\\\", \\\"max_value\\\": \\\"\\\", \\\"min_value\\\": \\\"0.0\\\", \\\"overlay_select\\\": \\\"no\\\", \\\"reading_bw_conditional\\\": {\\\"nans_to_zeros\\\": \\\"false\\\", \\\"number_of_bins\\\": \\\"\\\", \\\"summary\\\": \\\"mean\\\"}, \\\"show_data\\\": \\\"true\\\", \\\"spacer_height\\\": \\\"\\\", \\\"title\\\": \\\"Coverage from MACS2 (extended +/-50bp)\\\", \\\"track_file_style_selector\\\": \\\"bigwig_track_option\\\", \\\"track_input_bigwig\\\": {\\\"__class__\\\": \\\"RuntimeValue\\\"}, \\\"type_conditional\\\": {\\\"__current_case__\\\": 2, \\\"type_selector\\\": \\\"fill_option\\\"}}}, {\\\"__index__\\\": 3, \\\"track_file_style_conditional\\\": {\\\"__current_case__\\\": 4, \\\"color\\\": \\\"#00b050\\\", \\\"display\\\": {\\\"__current_case__\\\": 1, \\\"display_selector\\\": \\\"box\\\", \\\"use_summit\\\": \\\"true\\\"}, \\\"fontsize\\\": \\\"\\\", \\\"height_narrow_peak\\\": \\\"1.5\\\", \\\"invert_orientation\\\": \\\"false\\\", \\\"line_width\\\": \\\"1.0\\\", \\\"overlay_select\\\": \\\"no\\\", \\\"show_labels\\\": \\\"false\\\", \\\"spacer_height\\\": \\\"\\\", \\\"title\\\": \\\"Peaks from MACS2 (extended +/-50bp)\\\", \\\"track_file_style_selector\\\": \\\"narrow_peak_track_option\\\", \\\"track_input_narrow_peak\\\": {\\\"__class__\\\": \\\"RuntimeValue\\\"}}}, {\\\"__index__\\\": 4, \\\"track_file_style_conditional\\\": {\\\"__current_case__\\\": 2, \\\"bed_style\\\": {\\\"__current_case__\\\": 0, \\\"arrowhead_included\\\": \\\"false\\\", \\\"bed_advanced\\\": {\\\"color_utr\\\": {\\\"__current_case__\\\": 0, \\\"color\\\": \\\"#808080\\\", \\\"color_utr_select\\\": \\\"manually\\\"}, \\\"gene_rows\\\": \\\"\\\", \\\"global_max_row\\\": \\\"false\\\", \\\"gtf\\\": {\\\"merge_transcripts\\\": \\\"false\\\", \\\"prefered_name\\\": \\\"transcript_name\\\"}, \\\"height_utr\\\": \\\"1.0\\\", \\\"line_width\\\": \\\"0.5\\\", \\\"max_labels\\\": \\\"60\\\"}, \\\"bed_style_select\\\": \\\"flybase\\\"}, \\\"border_color_bed\\\": {\\\"__current_case__\\\": 0, \\\"border_color_bed_select\\\": \\\"manually\\\", \\\"color\\\": \\\"#000000\\\"}, \\\"color_bed\\\": {\\\"__current_case__\\\": 0, \\\"color\\\": \\\"#000000\\\", \\\"color_bed_select\\\": \\\"manually\\\"}, \\\"display\\\": \\\"stacked\\\", \\\"fontsize\\\": \\\"\\\", \\\"height_bed\\\": \\\"5.0\\\", \\\"invert_orientation\\\": \\\"false\\\", \\\"labels\\\": {\\\"__current_case__\\\": 0, \\\"all_labels_inside\\\": \\\"true\\\", \\\"labels_in_margin\\\": \\\"true\\\", \\\"plot_labels\\\": \\\"true\\\"}, \\\"overlay_select\\\": \\\"no\\\", \\\"spacer_height\\\": \\\"\\\", \\\"title\\\": \\\"Genes\\\", \\\"track_file_style_selector\\\": \\\"gene_track_option\\\", \\\"track_input_bed\\\": {\\\"__class__\\\": \\\"RuntimeValue\\\"}}}, {\\\"__index__\\\": 5, \\\"track_file_style_conditional\\\": {\\\"__current_case__\\\": 4, \\\"color\\\": \\\"#00b0f0\\\", \\\"display\\\": {\\\"__current_case__\\\": 1, \\\"display_selector\\\": \\\"box\\\", \\\"use_summit\\\": \\\"true\\\"}, \\\"fontsize\\\": \\\"\\\", \\\"height_narrow_peak\\\": \\\"1.5\\\", \\\"invert_orientation\\\": \\\"false\\\", \\\"line_width\\\": \\\"1.0\\\", \\\"overlay_select\\\": \\\"no\\\", \\\"show_labels\\\": \\\"false\\\", \\\"spacer_height\\\": \\\"\\\", \\\"title\\\": \\\"CTCF peaks\\\", \\\"track_file_style_selector\\\": \\\"narrow_peak_track_option\\\", \\\"track_input_narrow_peak\\\": {\\\"__class__\\\": \\\"RuntimeValue\\\"}}}, {\\\"__index__\\\": 6, \\\"track_file_style_conditional\\\": {\\\"__current_case__\\\": 11, \\\"fontsize\\\": \\\"\\\", \\\"spacer_height\\\": \\\"\\\", \\\"title\\\": \\\"\\\", \\\"track_file_style_selector\\\": \\\"xaxis_option\\\", \\\"xaxis_where\\\": \\\"bottom\\\"}}]\", \"region\": \"\\\"chr22:37,193,000-37,252,000\\\"\"}",
+ "tool_version": "3.3",
+ "type": "tool",
+ "uuid": "92c04758-fac3-4719-90ef-78b98f19ffae",
+ "workflow_outputs": [
+ {
+ "label": null,
+ "output_name": "outFileName",
+ "uuid": "2d37bc32-b0db-46f4-a7f7-3bd2a63edaca"
+ }
+ ]
+ },
+ "24": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/bgruening/deeptools_plot_heatmap/deeptools_plot_heatmap/3.0.2.0",
+ "errors": null,
+ "id": 24,
+ "input_connections": {
+ "matrixFile": {
+ "id": 22,
+ "output_name": "outFileName"
+ }
+ },
+ "inputs": [],
+ "label": null,
+ "name": "plotHeatmap",
+ "outputs": [
+ {
+ "name": "outFileName",
+ "type": "png"
+ }
+ ],
+ "position": {
+ "left": 3276,
+ "top": 402.265625
+ },
+ "post_job_actions": {},
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/bgruening/deeptools_plot_heatmap/deeptools_plot_heatmap/3.0.2.0",
+ "tool_shed_repository": {
+ "changeset_revision": "010e58e9d822",
+ "name": "deeptools_plot_heatmap",
+ "owner": "bgruening",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"__page__\": null, \"output\": \"{\\\"__current_case__\\\": 0, \\\"showOutputSettings\\\": \\\"no\\\"}\", \"advancedOpt\": \"{\\\"__current_case__\\\": 0, \\\"showAdvancedOpt\\\": \\\"no\\\"}\", \"__rerun_remap_job_id__\": null, \"matrixFile\": \"{\\\"__class__\\\": \\\"ConnectedValue\\\"}\"}",
+ "tool_version": "3.0.2.0",
+ "type": "tool",
+ "uuid": "073b0ded-41a7-4648-9b79-db19d2b951a2",
+ "workflow_outputs": [
+ {
+ "label": null,
+ "output_name": "outFileName",
+ "uuid": "f43f7a7c-b71d-4d76-a656-4992c1f4e219"
+ }
+ ]
+ }
+ },
+ "tags": [
+ "epigenetics"
+ ],
+ "uuid": "8ecad8c4-8f22-4c18-b914-5833c9664218",
+ "version": 12
}
\ No newline at end of file
diff --git a/topics/instructors/README.md b/topics/instructors/README.md
index cff579ca36291e..bebf24c7008fd2 100644
--- a/topics/instructors/README.md
+++ b/topics/instructors/README.md
@@ -2,10 +2,10 @@ Teaching and Hosting Galaxy trainings
=====================================
Galaxy is a great solution to train the bioinformatics concepts:
-numerous bioinformatics tools are available (almost 5,000 in the ToolShed), it
+numerous bioinformatics tools are available (more than 7,000 in the ToolShed), it
can be used by people without any computer science skills, it trains to use
technology, outlining available resources and efforts that have made them
accessible to researchers, it is scalable.
The Galaxy community via the Galaxy Training Network supports instructors and
-workshop organizers using with a series of recommandations and regular meetings.
\ No newline at end of file
+workshop organizers using with a series of recommandations and regular meetings.
diff --git a/topics/introduction/slides/introduction.html b/topics/introduction/slides/introduction.html
index 2f3d8ec0d99026..b2c7740898a3bc 100644
--- a/topics/introduction/slides/introduction.html
+++ b/topics/introduction/slides/introduction.html
@@ -69,7 +69,7 @@
## Galaxy growth
- More than 7,000 ready to use tools for users
-- More than 7,500 [citations](https://www.zotero.org/groups/1732893/galaxy)
+- More than 9,500 [citations](https://www.zotero.org/groups/1732893/galaxy)
- More than 350 [public Galaxy resources](https://galaxyproject.org/use/)
- - 120+ public servers, many more non-public
- - Both general-purpose and domain-specific
diff --git a/topics/introduction/tutorials/galaxy-intro-101-everyone/tutorial.md b/topics/introduction/tutorials/galaxy-intro-101-everyone/tutorial.md
index d0c4289317a94e..57eb5434944af5 100644
--- a/topics/introduction/tutorials/galaxy-intro-101-everyone/tutorial.md
+++ b/topics/introduction/tutorials/galaxy-intro-101-everyone/tutorial.md
@@ -454,6 +454,8 @@ check whether we can spot any immediate patterns.
> - *"Plotting multiple groups"*: `Plot multiple groups of data on one plot`
> - *"column differentiating the different groups"*: `5`
> - *"Color schemes to differentiate your groups"*: `Set 2 - predefined color pallete`
+> - In *"Output Options"*:
+> - *Additional output format*: `PDF`
>
> 2. **View** {% icon galaxy-eye%} the resulting plot:
>
@@ -738,4 +740,3 @@ To share a history, click on the {% icon galaxy-gear %} icon in the history pane
{:.no_toc}
{% icon trophy %} Well done! You have just performed your first analysis in Galaxy. Additionally you can share your results and methods with others.
-
diff --git a/topics/introduction/tutorials/galaxy-intro-101-everyone/workflows/main_workflow.ga b/topics/introduction/tutorials/galaxy-intro-101-everyone/workflows/main_workflow.ga
index 152ae7d2300bce..8b969e017681de 100644
--- a/topics/introduction/tutorials/galaxy-intro-101-everyone/workflows/main_workflow.ga
+++ b/topics/introduction/tutorials/galaxy-intro-101-everyone/workflows/main_workflow.ga
@@ -1,351 +1,435 @@
{
- "uuid": "a9657758-d95c-4000-a257-c5fbafcb8cec",
- "tags": [
- "introduction"
- ],
- "format-version": "0.1",
- "name": "GTN Training: Galaxy 101 For Everyone",
- "version": 1,
- "steps": {
- "0": {
- "tool_id": null,
- "tool_version": null,
- "outputs": [],
- "workflow_outputs": [],
- "input_connections": {},
- "tool_state": "{\"name\": \"iris\"}",
- "id": 0,
- "uuid": "6a8d8ca9-8bb9-414b-af8f-50d07721b363",
- "errors": null,
- "name": "Input dataset",
- "label": "iris",
- "inputs": [
- {
- "name": "iris",
- "description": ""
+ "a_galaxy_workflow": "true",
+ "annotation": "introduction",
+ "format-version": "0.1",
+ "name": "GTN Training: Galaxy 101 For Everyone",
+ "steps": {
+ "0": {
+ "annotation": "",
+ "content_id": null,
+ "errors": null,
+ "id": 0,
+ "input_connections": {},
+ "inputs": [
+ {
+ "description": "",
+ "name": "iris"
+ }
+ ],
+ "label": "iris",
+ "name": "Input dataset",
+ "outputs": [],
+ "position": {
+ "left": 200,
+ "top": 405
+ },
+ "tool_id": null,
+ "tool_state": "{\"optional\": false}",
+ "tool_version": null,
+ "type": "data_input",
+ "uuid": "6a8d8ca9-8bb9-414b-af8f-50d07721b363",
+ "workflow_outputs": []
+ },
+ "1": {
+ "annotation": "",
+ "content_id": "csv_to_tabular",
+ "errors": null,
+ "id": 1,
+ "input_connections": {
+ "csv": {
+ "id": 0,
+ "output_name": "output"
+ }
+ },
+ "inputs": [],
+ "label": null,
+ "name": "Convert CSV to tabular",
+ "outputs": [
+ {
+ "name": "tabular",
+ "type": "tabular"
+ }
+ ],
+ "position": {
+ "left": 439,
+ "top": 376
+ },
+ "post_job_actions": {
+ "HideDatasetActiontabular": {
+ "action_arguments": {},
+ "action_type": "HideDatasetAction",
+ "output_name": "tabular"
+ }
+ },
+ "tool_id": "csv_to_tabular",
+ "tool_state": "{\"__page__\": null, \"__rerun_remap_job_id__\": null, \"csv\": {\"__class__\": \"ConnectedValue\"}, \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\"}",
+ "tool_version": "1.0.0",
+ "type": "tool",
+ "uuid": "f4c788f6-afb6-4350-9531-b6830601dbae",
+ "workflow_outputs": []
+ },
+ "2": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/datamash_ops/datamash_ops/1.1.0",
+ "errors": null,
+ "id": 2,
+ "input_connections": {
+ "in_file": {
+ "id": 1,
+ "output_name": "tabular"
+ }
+ },
+ "inputs": [],
+ "label": null,
+ "name": "Datamash",
+ "outputs": [
+ {
+ "name": "out_file",
+ "type": "tabular"
+ }
+ ],
+ "position": {
+ "left": 547,
+ "top": 200
+ },
+ "post_job_actions": {
+ "HideDatasetActionout_file": {
+ "action_arguments": {},
+ "action_type": "HideDatasetAction",
+ "output_name": "out_file"
+ }
+ },
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/datamash_ops/datamash_ops/1.1.0",
+ "tool_shed_repository": {
+ "changeset_revision": "562f3c677828",
+ "name": "datamash_ops",
+ "owner": "iuc",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"operations\": [{\"__index__\": 0, \"op_name\": \"mean\", \"op_column\": \"1\"}, {\"__index__\": 1, \"op_name\": \"sstdev\", \"op_column\": \"1\"}, {\"__index__\": 2, \"op_name\": \"mean\", \"op_column\": \"2\"}, {\"__index__\": 3, \"op_name\": \"sstdev\", \"op_column\": \"2\"}, {\"__index__\": 4, \"op_name\": \"mean\", \"op_column\": \"3\"}, {\"__index__\": 5, \"op_name\": \"sstdev\", \"op_column\": \"3\"}, {\"__index__\": 6, \"op_name\": \"mean\", \"op_column\": \"4\"}, {\"__index__\": 7, \"op_name\": \"sstdev\", \"op_column\": \"4\"}], \"__page__\": null, \"ignore_case\": \"true\", \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\", \"__rerun_remap_job_id__\": null, \"header_in\": \"true\", \"in_file\": {\"__class__\": \"ConnectedValue\"}, \"need_sort\": \"true\", \"print_full_line\": \"false\", \"header_out\": \"false\", \"grouping\": \"5\"}",
+ "tool_version": "1.1.0",
+ "type": "tool",
+ "uuid": "e1a58a79-5102-450e-bfe0-200f0e64139d",
+ "workflow_outputs": []
+ },
+ "3": {
+ "annotation": "",
+ "content_id": "Remove beginning1",
+ "errors": null,
+ "id": 3,
+ "input_connections": {
+ "input": {
+ "id": 1,
+ "output_name": "tabular"
+ }
+ },
+ "inputs": [],
+ "label": null,
+ "name": "Remove beginning",
+ "outputs": [
+ {
+ "name": "out_file1",
+ "type": "input"
+ }
+ ],
+ "position": {
+ "left": 740,
+ "top": 388
+ },
+ "post_job_actions": {
+ "HideDatasetActionout_file1": {
+ "action_arguments": {},
+ "action_type": "HideDatasetAction",
+ "output_name": "out_file1"
+ }
+ },
+ "tool_id": "Remove beginning1",
+ "tool_state": "{\"input\": {\"__class__\": \"ConnectedValue\"}, \"__rerun_remap_job_id__\": null, \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\", \"num_lines\": \"1\", \"__page__\": null}",
+ "tool_version": "1.0.0",
+ "type": "tool",
+ "uuid": "5a721290-088b-48c0-adf0-7645c84b6dfb",
+ "workflow_outputs": []
+ },
+ "4": {
+ "annotation": "",
+ "content_id": "Cut1",
+ "errors": null,
+ "id": 4,
+ "input_connections": {
+ "input": {
+ "id": 3,
+ "output_name": "out_file1"
+ }
+ },
+ "inputs": [],
+ "label": null,
+ "name": "Cut",
+ "outputs": [
+ {
+ "name": "out_file1",
+ "type": "tabular"
+ }
+ ],
+ "position": {
+ "left": 967,
+ "top": 544
+ },
+ "post_job_actions": {
+ "HideDatasetActionout_file1": {
+ "action_arguments": {},
+ "action_type": "HideDatasetAction",
+ "output_name": "out_file1"
+ }
+ },
+ "tool_id": "Cut1",
+ "tool_state": "{\"__page__\": null, \"__rerun_remap_job_id__\": null, \"delimiter\": \"T\", \"columnList\": \"c5\", \"input\": {\"__class__\": \"ConnectedValue\"}, \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\"}",
+ "tool_version": "1.0.2",
+ "type": "tool",
+ "uuid": "1a42cbbc-aa27-464c-be01-4a8cf10e7726",
+ "workflow_outputs": []
+ },
+ "5": {
+ "annotation": "",
+ "content_id": "Grouping1",
+ "errors": null,
+ "id": 5,
+ "input_connections": {
+ "input1": {
+ "id": 3,
+ "output_name": "out_file1"
+ }
+ },
+ "inputs": [],
+ "label": null,
+ "name": "Group",
+ "outputs": [
+ {
+ "name": "out_file1",
+ "type": "tabular"
+ }
+ ],
+ "position": {
+ "left": 967,
+ "top": 664
+ },
+ "post_job_actions": {
+ "HideDatasetActionout_file1": {
+ "action_arguments": {},
+ "action_type": "HideDatasetAction",
+ "output_name": "out_file1"
+ }
+ },
+ "tool_id": "Grouping1",
+ "tool_state": "{\"operations\": [], \"__page__\": null, \"input1\": {\"__class__\": \"ConnectedValue\"}, \"ignorelines\": null, \"groupcol\": \"5\", \"__rerun_remap_job_id__\": null, \"ignorecase\": \"false\", \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\"}",
+ "tool_version": "2.1.4",
+ "type": "tool",
+ "uuid": "dcd3ddc6-d871-45dd-94ed-3eae56dd56db",
+ "workflow_outputs": []
+ },
+ "6": {
+ "annotation": "",
+ "content_id": "Grouping1",
+ "errors": null,
+ "id": 6,
+ "input_connections": {
+ "input1": {
+ "id": 3,
+ "output_name": "out_file1"
+ }
+ },
+ "inputs": [],
+ "label": null,
+ "name": "Group",
+ "outputs": [
+ {
+ "name": "out_file1",
+ "type": "tabular"
+ }
+ ],
+ "position": {
+ "left": 967,
+ "top": 784
+ },
+ "post_job_actions": {
+ "HideDatasetActionout_file1": {
+ "action_arguments": {},
+ "action_type": "HideDatasetAction",
+ "output_name": "out_file1"
+ }
+ },
+ "tool_id": "Grouping1",
+ "tool_state": "{\"operations\": [{\"opcol\": \"1\", \"__index__\": 0, \"optype\": \"length\", \"opround\": \"no\", \"opdefault\": \"\"}], \"__page__\": null, \"input1\": {\"__class__\": \"ConnectedValue\"}, \"ignorelines\": null, \"groupcol\": \"5\", \"__rerun_remap_job_id__\": null, \"ignorecase\": \"false\", \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\"}",
+ "tool_version": "2.1.4",
+ "type": "tool",
+ "uuid": "285a3852-71cb-4137-be1d-b0ec0a59d9e9",
+ "workflow_outputs": []
+ },
+ "7": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/ggplot2_point/ggplot2_point/2.2.1+galaxy1",
+ "errors": null,
+ "id": 7,
+ "input_connections": {
+ "input1": {
+ "id": 3,
+ "output_name": "out_file1"
+ }
+ },
+ "inputs": [
+ {
+ "description": "runtime parameter for tool Scatterplot w ggplot2",
+ "name": "input1"
+ }
+ ],
+ "label": null,
+ "name": "Scatterplot w ggplot2",
+ "outputs": [
+ {
+ "name": "output1",
+ "type": "png"
+ },
+ {
+ "name": "output2",
+ "type": "pdf"
+ }
+ ],
+ "position": {
+ "left": 967,
+ "top": 904
+ },
+ "post_job_actions": {
+ "HideDatasetActionoutput1": {
+ "action_arguments": {},
+ "action_type": "HideDatasetAction",
+ "output_name": "output1"
+ }
+ },
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/ggplot2_point/ggplot2_point/2.2.1+galaxy1",
+ "tool_shed_repository": {
+ "changeset_revision": "9cec81e1b90e",
+ "name": "ggplot2_point",
+ "owner": "iuc",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"adv\": {\"axis_title_customization\": {\"__current_case__\": 0, \"axis_customization\": \"default\"}, \"plot_title_customization\": {\"__current_case__\": 0, \"axis_customization\": \"default\"}, \"transform\": \"none\", \"scaling\": {\"plot_scaling\": \"Automatic\", \"__current_case__\": 0}, \"theme\": \"bw\", \"points\": {\"pointcolor\": \"black\", \"alpha\": \"1.0\", \"pointoptions\": \"defined\", \"__current_case__\": 1, \"size\": \"2.0\"}, \"factor\": {\"factorcol\": \"5\", \"colororder\": \"1\", \"colors\": \"Set2\", \"__current_case__\": 1, \"factoring\": \"Single\"}, \"axis_text_customization\": {\"__current_case__\": 0, \"axis_customization\": \"default\"}, \"type\": \"points\", \"legend\": \"yes\", \"gridlinecust\": \"default\"}, \"xlab\": \"Sepal length\", \"input1\": {\"__class__\": \"RuntimeValue\"}, \"title\": \"Sepal length as a function of sepal width\", \"__page__\": null, \"__rerun_remap_job_id__\": null, \"xplot\": \"1\", \"yplot\": \"2\", \"ylab\": \"Sepal width\", \"out\": {\"additional_output_format\": \"pdf\", \"width_output_dim\": \"7.0\", \"dpi_output_dim\": \"300.0\", \"height_output_dim\": \"7.0\", \"unit_output_dim\": \"in\"}}",
+ "tool_version": "2.2.1+galaxy1",
+ "type": "tool",
+ "uuid": "fa412209-5e3a-448e-8b0b-fa88870e19c1",
+ "workflow_outputs": [
+ {
+ "label": null,
+ "output_name": "output2",
+ "uuid": "adcb2e3f-ee9a-41c5-9b5a-ceb2afcb5e7f"
+ }
+ ]
+ },
+ "8": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/ggplot2_point/ggplot2_point/2.2.1+galaxy1",
+ "errors": null,
+ "id": 8,
+ "input_connections": {
+ "input1": {
+ "id": 3,
+ "output_name": "out_file1"
+ }
+ },
+ "inputs": [
+ {
+ "description": "runtime parameter for tool Scatterplot w ggplot2",
+ "name": "input1"
+ }
+ ],
+ "label": null,
+ "name": "Scatterplot w ggplot2",
+ "outputs": [
+ {
+ "name": "output1",
+ "type": "png"
+ },
+ {
+ "name": "output2",
+ "type": "pdf"
+ }
+ ],
+ "position": {
+ "left": 967,
+ "top": 1024
+ },
+ "post_job_actions": {
+ "HideDatasetActionoutput1": {
+ "action_arguments": {},
+ "action_type": "HideDatasetAction",
+ "output_name": "output1"
+ }
+ },
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/ggplot2_point/ggplot2_point/2.2.1+galaxy1",
+ "tool_shed_repository": {
+ "changeset_revision": "9cec81e1b90e",
+ "name": "ggplot2_point",
+ "owner": "iuc",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"adv\": {\"axis_title_customization\": {\"__current_case__\": 0, \"axis_customization\": \"default\"}, \"plot_title_customization\": {\"__current_case__\": 0, \"axis_customization\": \"default\"}, \"transform\": \"none\", \"scaling\": {\"plot_scaling\": \"Automatic\", \"__current_case__\": 0}, \"theme\": \"bw\", \"points\": {\"pointcolor\": \"black\", \"alpha\": \"1.0\", \"pointoptions\": \"defined\", \"__current_case__\": 1, \"size\": \"2.0\"}, \"factor\": {\"factorcol\": \"5\", \"colororder\": \"1\", \"colors\": \"Set2\", \"__current_case__\": 1, \"factoring\": \"Single\"}, \"axis_text_customization\": {\"__current_case__\": 0, \"axis_customization\": \"default\"}, \"type\": \"points\", \"legend\": \"yes\", \"gridlinecust\": \"default\"}, \"xlab\": \"Petal length\", \"input1\": {\"__class__\": \"RuntimeValue\"}, \"title\": \"Petal length as a function of petal width\", \"__page__\": null, \"__rerun_remap_job_id__\": null, \"xplot\": \"3\", \"yplot\": \"4\", \"ylab\": \"Petal width\", \"out\": {\"additional_output_format\": \"pdf\", \"width_output_dim\": \"7.0\", \"dpi_output_dim\": \"300.0\", \"height_output_dim\": \"7.0\", \"unit_output_dim\": \"in\"}}",
+ "tool_version": "2.2.1+galaxy1",
+ "type": "tool",
+ "uuid": "8e7b3f8d-2b8b-46cb-93d4-0d74bf54441e",
+ "workflow_outputs": [
+ {
+ "label": null,
+ "output_name": "output2",
+ "uuid": "a39df0d2-253a-4db2-b86d-9cbf0e5c1822"
+ }
+ ]
+ },
+ "9": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/bgruening/text_processing/tp_sorted_uniq/1.1.0",
+ "errors": null,
+ "id": 9,
+ "input_connections": {
+ "infile": {
+ "id": 4,
+ "output_name": "out_file1"
+ }
+ },
+ "inputs": [],
+ "label": null,
+ "name": "Unique",
+ "outputs": [
+ {
+ "name": "outfile",
+ "type": "input"
+ }
+ ],
+ "position": {
+ "left": 1187,
+ "top": 424
+ },
+ "post_job_actions": {},
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/bgruening/text_processing/tp_sorted_uniq/1.1.0",
+ "tool_shed_repository": {
+ "changeset_revision": "0a8c6b61f0f4",
+ "name": "text_processing",
+ "owner": "bgruening",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"__page__\": null, \"ignore_case\": \"false\", \"adv_opts\": {\"adv_opts_selector\": \"basic\", \"__current_case__\": 0}, \"__rerun_remap_job_id__\": null, \"is_numeric\": \"false\", \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\", \"infile\": {\"__class__\": \"ConnectedValue\"}}",
+ "tool_version": "1.1.0",
+ "type": "tool",
+ "uuid": "3e2d84e9-2fff-4321-9db6-41af99367162",
+ "workflow_outputs": [
+ {
+ "label": null,
+ "output_name": "outfile",
+ "uuid": "ec39c0ad-eb26-47d6-b9a7-c2e5ffa043c6"
+ }
+ ]
}
- ],
- "position": {
- "top": 405,
- "left": 200
- },
- "annotation": "",
- "content_id": null,
- "type": "data_input"
},
- "1": {
- "tool_id": "csv_to_tabular",
- "tool_version": "1.0.0",
- "outputs": [
- {
- "type": "tabular",
- "name": "tabular"
- }
- ],
- "workflow_outputs": [],
- "input_connections": {
- "csv": {
- "output_name": "output",
- "id": 0
- }
- },
- "tool_state": "{\"__page__\": null, \"__rerun_remap_job_id__\": null, \"csv\": \"{\\\"__class__\\\": \\\"ConnectedValue\\\"}\", \"chromInfo\": \"\\\"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\\\"\"}",
- "id": 1,
- "uuid": "f4c788f6-afb6-4350-9531-b6830601dbae",
- "errors": null,
- "name": "Convert CSV to tabular",
- "post_job_actions": {},
- "label": null,
- "inputs": [],
- "position": {
- "top": 376,
- "left": 439
- },
- "annotation": "",
- "content_id": "csv_to_tabular",
- "type": "tool"
- },
- "2": {
- "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/datamash_ops/datamash_ops/1.1.0",
- "tool_version": "1.1.0",
- "outputs": [
- {
- "type": "tabular",
- "name": "out_file"
- }
- ],
- "workflow_outputs": [],
- "input_connections": {
- "in_file": {
- "output_name": "tabular",
- "id": 1
- }
- },
- "tool_state": "{\"operations\": \"[{\\\"__index__\\\": 0, \\\"op_column\\\": \\\"1\\\", \\\"op_name\\\": \\\"mean\\\"}, {\\\"__index__\\\": 1, \\\"op_column\\\": \\\"1\\\", \\\"op_name\\\": \\\"sstdev\\\"}, {\\\"__index__\\\": 2, \\\"op_column\\\": \\\"2\\\", \\\"op_name\\\": \\\"mean\\\"}, {\\\"__index__\\\": 3, \\\"op_column\\\": \\\"2\\\", \\\"op_name\\\": \\\"sstdev\\\"}, {\\\"__index__\\\": 4, \\\"op_column\\\": \\\"3\\\", \\\"op_name\\\": \\\"mean\\\"}, {\\\"__index__\\\": 5, \\\"op_column\\\": \\\"3\\\", \\\"op_name\\\": \\\"sstdev\\\"}, {\\\"__index__\\\": 6, \\\"op_column\\\": \\\"4\\\", \\\"op_name\\\": \\\"mean\\\"}, {\\\"__index__\\\": 7, \\\"op_column\\\": \\\"4\\\", \\\"op_name\\\": \\\"sstdev\\\"}]\", \"__page__\": null, \"ignore_case\": \"\\\"true\\\"\", \"chromInfo\": \"\\\"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\\\"\", \"__rerun_remap_job_id__\": null, \"header_in\": \"\\\"true\\\"\", \"in_file\": \"{\\\"__class__\\\": \\\"ConnectedValue\\\"}\", \"need_sort\": \"\\\"true\\\"\", \"print_full_line\": \"\\\"false\\\"\", \"header_out\": \"\\\"false\\\"\", \"grouping\": \"\\\"5\\\"\"}",
- "id": 2,
- "tool_shed_repository": {
- "owner": "iuc",
- "changeset_revision": "562f3c677828",
- "name": "datamash_ops",
- "tool_shed": "toolshed.g2.bx.psu.edu"
- },
- "uuid": "e1a58a79-5102-450e-bfe0-200f0e64139d",
- "errors": null,
- "name": "Datamash",
- "post_job_actions": {},
- "label": null,
- "inputs": [],
- "position": {
- "top": 200,
- "left": 547
- },
- "annotation": "",
- "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/datamash_ops/datamash_ops/1.1.0",
- "type": "tool"
- },
- "3": {
- "tool_id": "Remove beginning1",
- "tool_version": "1.0.0",
- "outputs": [
- {
- "type": "input",
- "name": "out_file1"
- }
- ],
- "workflow_outputs": [],
- "input_connections": {
- "input": {
- "output_name": "tabular",
- "id": 1
- }
- },
- "tool_state": "{\"input\": \"{\\\"__class__\\\": \\\"ConnectedValue\\\"}\", \"__rerun_remap_job_id__\": null, \"chromInfo\": \"\\\"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\\\"\", \"num_lines\": \"\\\"1\\\"\", \"__page__\": null}",
- "id": 3,
- "uuid": "5a721290-088b-48c0-adf0-7645c84b6dfb",
- "errors": null,
- "name": "Remove beginning",
- "post_job_actions": {},
- "label": null,
- "inputs": [],
- "position": {
- "top": 388,
- "left": 740
- },
- "annotation": "",
- "content_id": "Remove beginning1",
- "type": "tool"
- },
- "4": {
- "tool_id": "Cut1",
- "tool_version": "1.0.2",
- "outputs": [
- {
- "type": "tabular",
- "name": "out_file1"
- }
- ],
- "workflow_outputs": [],
- "input_connections": {
- "input": {
- "output_name": "out_file1",
- "id": 3
- }
- },
- "tool_state": "{\"__page__\": null, \"__rerun_remap_job_id__\": null, \"delimiter\": \"\\\"T\\\"\", \"columnList\": \"\\\"c5\\\"\", \"input\": \"{\\\"__class__\\\": \\\"ConnectedValue\\\"}\", \"chromInfo\": \"\\\"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\\\"\"}",
- "id": 4,
- "uuid": "1a42cbbc-aa27-464c-be01-4a8cf10e7726",
- "errors": null,
- "name": "Cut",
- "post_job_actions": {},
- "label": null,
- "inputs": [],
- "position": {
- "top": 544,
- "left": 967
- },
- "annotation": "",
- "content_id": "Cut1",
- "type": "tool"
- },
- "5": {
- "tool_id": "Grouping1",
- "tool_version": "2.1.3",
- "outputs": [
- {
- "type": "tabular",
- "name": "out_file1"
- }
- ],
- "workflow_outputs": [],
- "input_connections": {
- "input1": {
- "output_name": "out_file1",
- "id": 3
- }
- },
- "tool_state": "{\"operations\": \"[]\", \"__page__\": null, \"input1\": \"{\\\"__class__\\\": \\\"ConnectedValue\\\"}\", \"ignorelines\": \"null\", \"groupcol\": \"\\\"5\\\"\", \"__rerun_remap_job_id__\": null, \"ignorecase\": \"\\\"false\\\"\", \"chromInfo\": \"\\\"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\\\"\"}",
- "id": 5,
- "uuid": "dcd3ddc6-d871-45dd-94ed-3eae56dd56db",
- "errors": null,
- "name": "Group",
- "post_job_actions": {},
- "label": null,
- "inputs": [],
- "position": {
- "top": 664,
- "left": 967
- },
- "annotation": "",
- "content_id": "Grouping1",
- "type": "tool"
- },
- "6": {
- "tool_id": "Grouping1",
- "tool_version": "2.1.3",
- "outputs": [
- {
- "type": "tabular",
- "name": "out_file1"
- }
- ],
- "workflow_outputs": [],
- "input_connections": {
- "input1": {
- "output_name": "out_file1",
- "id": 3
- }
- },
- "tool_state": "{\"operations\": \"[{\\\"__index__\\\": 0, \\\"opcol\\\": \\\"1\\\", \\\"opdefault\\\": \\\"\\\", \\\"opround\\\": \\\"no\\\", \\\"optype\\\": \\\"length\\\"}]\", \"__page__\": null, \"input1\": \"{\\\"__class__\\\": \\\"ConnectedValue\\\"}\", \"ignorelines\": \"null\", \"groupcol\": \"\\\"5\\\"\", \"__rerun_remap_job_id__\": null, \"ignorecase\": \"\\\"false\\\"\", \"chromInfo\": \"\\\"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\\\"\"}",
- "id": 6,
- "uuid": "285a3852-71cb-4137-be1d-b0ec0a59d9e9",
- "errors": null,
- "name": "Group",
- "post_job_actions": {},
- "label": null,
- "inputs": [],
- "position": {
- "top": 784,
- "left": 967
- },
- "annotation": "",
- "content_id": "Grouping1",
- "type": "tool"
- },
- "7": {
- "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/ggplot2_point/ggplot2_point/2.2.1+galaxy1",
- "tool_version": "2.2.1+galaxy1",
- "outputs": [
- {
- "type": "png",
- "name": "output1"
- }
- ],
- "workflow_outputs": [],
- "input_connections": {
- "input1": {
- "output_name": "out_file1",
- "id": 3
- }
- },
- "tool_state": "{\"adv\": \"{\\\"axis_text_customization\\\": {\\\"__current_case__\\\": 0, \\\"axis_customization\\\": \\\"default\\\"}, \\\"axis_title_customization\\\": {\\\"__current_case__\\\": 0, \\\"axis_customization\\\": \\\"default\\\"}, \\\"factor\\\": {\\\"__current_case__\\\": 1, \\\"colororder\\\": \\\"1\\\", \\\"colors\\\": \\\"Set2\\\", \\\"factorcol\\\": \\\"5\\\", \\\"factoring\\\": \\\"Single\\\"}, \\\"gridlinecust\\\": \\\"default\\\", \\\"legend\\\": \\\"yes\\\", \\\"plot_title_customization\\\": {\\\"__current_case__\\\": 0, \\\"axis_customization\\\": \\\"default\\\"}, \\\"points\\\": {\\\"__current_case__\\\": 1, \\\"alpha\\\": \\\"1.0\\\", \\\"pointcolor\\\": \\\"black\\\", \\\"pointoptions\\\": \\\"defined\\\", \\\"size\\\": \\\"2.0\\\"}, \\\"scaling\\\": {\\\"__current_case__\\\": 0, \\\"plot_scaling\\\": \\\"Automatic\\\"}, \\\"theme\\\": \\\"bw\\\", \\\"transform\\\": \\\"none\\\", \\\"type\\\": \\\"points\\\"}\", \"xlab\": \"\\\"Sepal length\\\"\", \"input1\": \"{\\\"__class__\\\": \\\"ConnectedValue\\\"}\", \"title\": \"\\\"Sepal length as a function of sepal width\\\"\", \"__page__\": null, \"__rerun_remap_job_id__\": null, \"xplot\": \"\\\"1\\\"\", \"yplot\": \"\\\"2\\\"\", \"ylab\": \"\\\"Sepal width\\\"\", \"chromInfo\": \"\\\"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\\\"\", \"out\": \"{\\\"additional_output_format\\\": \\\"none\\\", \\\"dpi_output_dim\\\": \\\"300.0\\\", \\\"height_output_dim\\\": \\\"7.0\\\", \\\"unit_output_dim\\\": \\\"in\\\", \\\"width_output_dim\\\": \\\"7.0\\\"}\"}",
- "id": 7,
- "tool_shed_repository": {
- "owner": "iuc",
- "changeset_revision": "9cec81e1b90e",
- "name": "ggplot2_point",
- "tool_shed": "toolshed.g2.bx.psu.edu"
- },
- "uuid": "fa412209-5e3a-448e-8b0b-fa88870e19c1",
- "errors": null,
- "name": "Scatterplot w ggplot2",
- "post_job_actions": {},
- "label": null,
- "inputs": [],
- "position": {
- "top": 904,
- "left": 967
- },
- "annotation": "",
- "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/ggplot2_point/ggplot2_point/2.2.1+galaxy1",
- "type": "tool"
- },
- "8": {
- "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/ggplot2_point/ggplot2_point/2.2.1+galaxy1",
- "tool_version": "2.2.1+galaxy1",
- "outputs": [
- {
- "type": "png",
- "name": "output1"
- }
- ],
- "workflow_outputs": [],
- "input_connections": {
- "input1": {
- "output_name": "out_file1",
- "id": 3
- }
- },
- "tool_state": "{\"adv\": \"{\\\"axis_text_customization\\\": {\\\"__current_case__\\\": 0, \\\"axis_customization\\\": \\\"default\\\"}, \\\"axis_title_customization\\\": {\\\"__current_case__\\\": 0, \\\"axis_customization\\\": \\\"default\\\"}, \\\"factor\\\": {\\\"__current_case__\\\": 1, \\\"colororder\\\": \\\"1\\\", \\\"colors\\\": \\\"Set2\\\", \\\"factorcol\\\": \\\"5\\\", \\\"factoring\\\": \\\"Single\\\"}, \\\"gridlinecust\\\": \\\"default\\\", \\\"legend\\\": \\\"yes\\\", \\\"plot_title_customization\\\": {\\\"__current_case__\\\": 0, \\\"axis_customization\\\": \\\"default\\\"}, \\\"points\\\": {\\\"__current_case__\\\": 1, \\\"alpha\\\": \\\"1.0\\\", \\\"pointcolor\\\": \\\"black\\\", \\\"pointoptions\\\": \\\"defined\\\", \\\"size\\\": \\\"2.0\\\"}, \\\"scaling\\\": {\\\"__current_case__\\\": 0, \\\"plot_scaling\\\": \\\"Automatic\\\"}, \\\"theme\\\": \\\"bw\\\", \\\"transform\\\": \\\"none\\\", \\\"type\\\": \\\"points\\\"}\", \"xlab\": \"\\\"Petal length\\\"\", \"input1\": \"{\\\"__class__\\\": \\\"ConnectedValue\\\"}\", \"title\": \"\\\"Petal length as a function of petal width\\\"\", \"__page__\": null, \"__rerun_remap_job_id__\": null, \"xplot\": \"\\\"3\\\"\", \"yplot\": \"\\\"4\\\"\", \"ylab\": \"\\\"Petal width\\\"\", \"chromInfo\": \"\\\"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\\\"\", \"out\": \"{\\\"additional_output_format\\\": \\\"none\\\", \\\"dpi_output_dim\\\": \\\"300.0\\\", \\\"height_output_dim\\\": \\\"7.0\\\", \\\"unit_output_dim\\\": \\\"in\\\", \\\"width_output_dim\\\": \\\"7.0\\\"}\"}",
- "id": 8,
- "tool_shed_repository": {
- "owner": "iuc",
- "changeset_revision": "9cec81e1b90e",
- "name": "ggplot2_point",
- "tool_shed": "toolshed.g2.bx.psu.edu"
- },
- "uuid": "8e7b3f8d-2b8b-46cb-93d4-0d74bf54441e",
- "errors": null,
- "name": "Scatterplot w ggplot2",
- "post_job_actions": {},
- "label": null,
- "inputs": [],
- "position": {
- "top": 1024,
- "left": 967
- },
- "annotation": "",
- "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/ggplot2_point/ggplot2_point/2.2.1+galaxy1",
- "type": "tool"
- },
- "9": {
- "tool_id": "toolshed.g2.bx.psu.edu/repos/bgruening/text_processing/tp_sorted_uniq/1.1.0",
- "tool_version": "1.1.0",
- "outputs": [
- {
- "type": "input",
- "name": "outfile"
- }
- ],
- "workflow_outputs": [],
- "input_connections": {
- "infile": {
- "output_name": "out_file1",
- "id": 4
- }
- },
- "tool_state": "{\"__page__\": null, \"ignore_case\": \"\\\"false\\\"\", \"adv_opts\": \"{\\\"__current_case__\\\": 0, \\\"adv_opts_selector\\\": \\\"basic\\\"}\", \"__rerun_remap_job_id__\": null, \"is_numeric\": \"\\\"false\\\"\", \"chromInfo\": \"\\\"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\\\"\", \"infile\": \"{\\\"__class__\\\": \\\"ConnectedValue\\\"}\"}",
- "id": 9,
- "tool_shed_repository": {
- "owner": "bgruening",
- "changeset_revision": "0a8c6b61f0f4",
- "name": "text_processing",
- "tool_shed": "toolshed.g2.bx.psu.edu"
- },
- "uuid": "3e2d84e9-2fff-4321-9db6-41af99367162",
- "errors": null,
- "name": "Unique",
- "post_job_actions": {},
- "label": null,
- "inputs": [],
- "position": {
- "top": 424,
- "left": 1187
- },
- "annotation": "",
- "content_id": "toolshed.g2.bx.psu.edu/repos/bgruening/text_processing/tp_sorted_uniq/1.1.0",
- "type": "tool"
- }
- },
- "annotation": "Galaxy 101 for everyone",
- "a_galaxy_workflow": "true"
-}
\ No newline at end of file
+ "tags": [
+ "introduction"
+ ],
+ "uuid": "4f508ea6-854b-4813-8043-848a259e32c2",
+ "version": 1
+}
diff --git a/topics/introduction/tutorials/galaxy-intro-101/tutorial.md b/topics/introduction/tutorials/galaxy-intro-101/tutorial.md
index 162f43e4706c6c..01ee35a3e6c849 100644
--- a/topics/introduction/tutorials/galaxy-intro-101/tutorial.md
+++ b/topics/introduction/tutorials/galaxy-intro-101/tutorial.md
@@ -231,7 +231,7 @@ Let's take a look at this dataset. The first six columns correspond to the exons
> For the first 3 exons in your file, what is the number of SNPs that fall into that exon?
>
> > ### {% icon solution %} Solution
-> > At the time of writing, for hg38/GENCODE v29, joined with "Common SNPs(151)", using ctrl-f to look for how many times each is used:
+> > At the time of writing, for hg38/GENCODE v29, joined with "Common SNPs(151)", using ctrl-f (cmd-f on Mac OS) to look for how many times each is used:
> >
> > Gene | Occurences
> > ---- | ----------
@@ -356,7 +356,11 @@ A good way to learn about these exons is to look at their genomic surrounding. T
>
> {% include snippets/change_dbkey.md dbkey="hg38" %}
>
-> 2. To **visualize the data in UCSC genome browser**, click on `display at UCSC main` option visible when you expand the history item.
+> 2. Second, check that the **format** of your latest history dataset is `bed`. If not, click on the {% icon galaxy-pencil %} pencil icon and modify the **Datatype** field to `bed`.
+>
+> {% include snippets/change_datatype.md datatype="bed" %}
+>
+> 3. To **visualize the data in UCSC genome browser**, click on `display at UCSC main` option visible when you expand the history item.
>
> ![`display at UCSC main` link](../../images/101_displayucsc.png)
>
diff --git a/topics/introduction/tutorials/galaxy-intro-ngs-data-managment/tutorial.md b/topics/introduction/tutorials/galaxy-intro-ngs-data-managment/tutorial.md
index cee91818fd0d83..ea467f1ba0a5d5 100644
--- a/topics/introduction/tutorials/galaxy-intro-ngs-data-managment/tutorial.md
+++ b/topics/introduction/tutorials/galaxy-intro-ngs-data-managment/tutorial.md
@@ -105,8 +105,7 @@ It is common to prepare pair-end and mate-pair sequencing libraries. This is hig
| |
|----|
| ![Paired-end and mate-pair reads](../../images/pe_mp.png) |
-|**Paired-end and mate-pair reads**. In paired end sequencing (left) the actual ends of rather short DNA molecules (less than 1kb) are determined, while for mate pair sequencing (right) the ends of long molecules are joined and prepared in special sequencing libraries. In these mate pair protocols, the ends of long, size-selected molecules are connected with an internal adapter sequence (i.e. linker, yellow) in a circularization reaction. The circular molecule is then processed using restriction enzymes or fragmentation. Fragments are enriched for the linker and outer library adapters are added around the two combined molecule ends. The internal adapter can then be used as a second priming site for an additional sequencing reaction in the same orientation or sequencing can be performed from the second adapter, from the reverse strand. (From "Understanding and improving high-throughput
-sequencing data production and analysis", Ph.D. dissertation by [Martin Kircher](https://www.qucosa.de/fileadmin/data/qucosa/documents/7110/pflichtexemplar_final.pdf))|
+|**Paired-end and mate-pair reads**. In paired end sequencing (left) the actual ends of rather short DNA molecules (less than 1kb) are determined, while for mate pair sequencing (right) the ends of long molecules are joined and prepared in special sequencing libraries. In these mate pair protocols, the ends of long, size-selected molecules are connected with an internal adapter sequence (i.e. linker, yellow) in a circularization reaction. The circular molecule is then processed using restriction enzymes or fragmentation. Fragments are enriched for the linker and outer library adapters are added around the two combined molecule ends. The internal adapter can then be used as a second priming site for an additional sequencing reaction in the same orientation or sequencing can be performed from the second adapter, from the reverse strand. (From "Understanding and improving high-throughput sequencing data production and analysis", Ph.D. dissertation by [Martin Kircher](https://www.qucosa.de/fileadmin/data/qucosa/documents/7110/pflichtexemplar_final.pdf))|
Thus in both cases (paired-end and mate-pair) a single physical piece of DNA (or RNA in the case of RNA-seq) is sequenced from two ends and so generates two reads. These can be represented as separate files (two FASTQ files with first and second reads) or a single file were reads for each end are interleaved. Here are examples:
diff --git a/topics/introduction/tutorials/galaxy-intro-short/tutorial.md b/topics/introduction/tutorials/galaxy-intro-short/tutorial.md
index aa827d477eb415..70c49f4668c2b7 100644
--- a/topics/introduction/tutorials/galaxy-intro-short/tutorial.md
+++ b/topics/introduction/tutorials/galaxy-intro-short/tutorial.md
@@ -56,7 +56,7 @@ contributors:
> > - The main Galaxy server is [usegalaxy.org](https://usegalaxy.org/)
> > - The European Galaxy server is [usegalaxy.eu](https://usegalaxy.eu/)
> >
-> > You can also find more possible Galaxy servers at the top of this tutorial in **Galaxy instances**
+> > You can also find more possible Galaxy servers at the top of this tutorial in **Available on these Galaxies**
> {: .comment}
{: .hands_on}
@@ -192,10 +192,10 @@ Let's run a tool to filter out lower-quality reads from our FASTQ file.
> ### {% icon hands_on %} Hands-on: Run another tool
-> 1. Type **Filter by quality**
+> 1. Type **Filter by quality** in the tools panel search box (top)
> 2. Click on the tool **Filter by quality** {% icon tool %}
> 3. Set the following parameters:
-> - {% icon param-file %} *"Library to filter"*: the input FASTQ file
+> - {% icon param-file %} *"Input FASTQ file"*: the input FASTQ file
> - *"Quality cut-off value"*: 35
> - *"Percent of bases in sequence that must have quality equal to / higher than cut-off value"*: 80
> 4. Click **Execute**
diff --git a/topics/metabolomics/images/BC_theo.png b/topics/metabolomics/images/lcms_BC_theo.png
similarity index 100%
rename from topics/metabolomics/images/BC_theo.png
rename to topics/metabolomics/images/lcms_BC_theo.png
diff --git a/topics/metabolomics/images/BC_theo2.png b/topics/metabolomics/images/lcms_BC_theo2.png
similarity index 100%
rename from topics/metabolomics/images/BC_theo2.png
rename to topics/metabolomics/images/lcms_BC_theo2.png
diff --git a/topics/metabolomics/images/BPC_9samp.png b/topics/metabolomics/images/lcms_BPC_9samp.png
similarity index 100%
rename from topics/metabolomics/images/BPC_9samp.png
rename to topics/metabolomics/images/lcms_BPC_9samp.png
diff --git a/topics/metabolomics/images/QM_9samp_raw.png b/topics/metabolomics/images/lcms_QM_9samp_raw.png
similarity index 100%
rename from topics/metabolomics/images/QM_9samp_raw.png
rename to topics/metabolomics/images/lcms_QM_9samp_raw.png
diff --git a/topics/metabolomics/images/group_9samp.png b/topics/metabolomics/images/lcms_group_9samp.png
similarity index 100%
rename from topics/metabolomics/images/group_9samp.png
rename to topics/metabolomics/images/lcms_group_9samp.png
diff --git a/topics/metabolomics/tutorials/lcms/tutorial.md b/topics/metabolomics/tutorials/lcms/tutorial.md
index bafde43bc3c786..1594b4bc5f8a4b 100644
--- a/topics/metabolomics/tutorials/lcms/tutorial.md
+++ b/topics/metabolomics/tutorials/lcms/tutorial.md
@@ -43,7 +43,7 @@ for metabolomic analysis, and shows how to conduct metabolomic data analysis fro
To illustrate this approach, we will use data from {% cite Thvenot2015 %}. The objectives of this paper was to analyze
the influence of age, body mass index, and gender on the urine metabolome. To do so, the authors collected samples
from 183 employees from the French Alternative Energies and Atomic Energy Commission (CEA) and performed LC-HRMS LTQ-Orbitrap
-(negative ionization mode).
+(negative ionization mode) analyses.
Since the original dataset takes a few hours to be processed, we chose to take a limited subset of individuals for this tutorial.
This will allow you to perform an example of metabolomic workflow, from pre-processing to annotation, in a limited time, even though
@@ -177,16 +177,16 @@ What we referenced here as a *sampleMetadata* file corresponds to a table contai
A sample metadata file contains various information for each of your raw files:
- **Classes** which will be used during the preprocessing steps
-- **Number of batches** which will be useful for a batch correction step, along with sample types (pool/sample) and injection order
-- Different **experimental conditions** which can be used for the statistics
+- **Analytical batches** which will be useful for a batch correction step, along with **sample types** (pool/sample) and **injection order**
+- Different **experimental conditions** which can be used for statistics
- Any information about samples that you want to keep, in a *column* format
The content of your sample metadata file has to be filled by you, since it is not contained in your raw data.
Note that you can either:
- Upload an existing metadata file
- Use a template to create one (because it can be painful to get the sample list without misspelling or omission)
- 1. Generate a template with the tool **xcms get a sampleMetadata file** {% icon tool %}
- 2. Fill it using your favorite table editor (Excel, LibreOffice)
+ 1. Generate a template with the **xcms get a sampleMetadata file** {% icon tool %} tool
+ 2. Fill it using your favorite table editor (Excel, LibreOffice etc.)
3. Upload it within Galaxy
> ### {% icon tip %} Optional: Generate the right template with **xcms get a sampleMetadata file** {% icon tool %}
@@ -219,7 +219,7 @@ use a spreadsheet software such as Microsoft Excel or LibreOffice.
> ### {% icon comment %} Important: Save your table in the correct format
>
-> The file has to be a `.tsv` (tab-separated values). Neither `.xlsx` nor `.odt` are supported.
+> The file has to be a `.txt`or a `.tsv` (tab-separated values). Neither `.xlsx` nor `.odt` are supported.
> If you use a spreadsheet software, be sure to change the default format to **Text (Tab delimited)** or equivalent.
{: .warning}
@@ -227,7 +227,7 @@ Once your sampleMetadata table is ready, you can proceed to the upload. In this
> ### {% icon tip %} Optional: Filling the *sampleMetadata* using the template obtained from Galaxy
>
-> For this tutorial, we already provide the *sampleMetadata* file, so you only have upload it to Galaxy. Below we
+> For this tutorial, we already provide the *sampleMetadata* file, so you only have to upload it to Galaxy. Below we
explain how we filled this file from the template we generated in Galaxy.
>
> First, we used **xcms get a sampleMetadata file** {% icon tool %} as mentioned in the previous tip box.
@@ -288,7 +288,7 @@ be uploaded into Galaxy.
> ### {% icon hands_on %} Hands-on: Upload the sampleMetada
>
-> 1. Import the `sampleMetadata_completed.tsv` file from Zenodo or from a shared data library
+> 1. Import the `sampleMetadata_completed.tsv` file from Zenodo or from a shared data library (ask your instructor)
> ```
> https://zenodo.org/record/3244991/files/sampleMetadata_completed.tsv
> ```
@@ -304,42 +304,47 @@ be uploaded into Galaxy.
> > ### {% icon comment %} Comment
> >
> > Here we provided the sampleMetadata file so we know that the upload led to a 'tabular' file. But from experience we also know that
-it can happen that, when uploading a sampleMetadata table, user obtained other inappropriate types of data. This is generally due to the file
+it can happen that, when uploading a sampleMetadata table, a user obtains other inappropriate types of data. This is generally due to the file
not following all the requirements about the format (*e.g.* wrong separator, or lines with different numbers of columns).
> > Thus, we highly recommend that you always take a second to check the data type after the upload. This way you can handle the problem
-right away if you appear to get one of these obvious issues.
+right away if you happen to get one of these obvious issues.
> {: .comment}
>
+> 3. Rename your sampleMetadata file with a shorter name 'sampleMetadata_completed.tsv'
>
-> > ### {% icon question %} Question
-> >
-> > 1. How many columns should I have in my sampleMetadata file?
-> > 2. What kind of class can I have?
+> {% include snippets/rename_dataset.md %}
+>
+{: .hands_on}
+
+> ### {% icon question %} Question
+>
+> 1. How many columns should I have in my sampleMetadata file?
+> 2. What kind of class can I have?
+>
+> > ### {% icon solution %} Solution
> >
-> > > ### {% icon solution %} Solution
-> > >
-> > > 1. At least 2, with the identifiers and the class column. But as many as you need to describe the potential variability of your samples
-> (*e.g.* the person in charge of the sample preparation, the temperature...). The statistical analysis will expose the relevant parameters.
-> > > 2. Sample, QC, blank... The class (the 2nd column) is useful for the preprocessing step with XCMS to detect the metabolite across the samples.
-> So it can be important to separate very different types of samples, as biological ones and blank ones for example. If you don't have any specific class
-> that you want to consider in XCMS preprocessing, just fill everywhere with `sample` or a dot `.` for example.
-> > >
-> > {: .solution}
+> > 1. At least 2, with the identifiers and the class column. But as many as you need to describe the potential variability of your samples
+(*e.g.* the person in charge of the sample preparation, the temperature...). The statistical analysis will expose the relevant parameters.
+> > 2. Sample, QC, blank... The class (the 2nd column) is useful for the preprocessing step with XCMS to detect the metabolite across the samples.
+So it can be important to separate very different types of samples, as biological ones and blank ones for example. If you don't have any specific class
+that you want to consider in XCMS preprocessing, just fill everywhere with `sample` or a dot `.` for example.
> >
-> {: .question}
-{: .hands_on}
+> {: .solution}
+>
+{: .question}
+
## Getting an overview of your samples' chromatograms
You may be interested in getting an overview of what your samples' chromatograms look like, for example to see if some of
-your samples have distinct overall characteristics, for example unexpected chromatographic peaks, or huge overall intensity.
+your samples have distinct overall characteristics, *e.g.* unexpected chromatographic peaks or huge overall intensity.
You can use the *sampleMedata* file we previously uploaded to add some group colours to your samples when visualising your chromatograms.
The tool automatically takes the second column as colour groups when a file is provided.
Note that you can also check the chromatograms at any moment during the workflow, in particular at the following steps:
- - After **MSnbase readMSData** {% icon tool %}: to help you to define retention time ranges that you may want to discard from the very beginning (*"Specta Filters"* in **findChromPeaks** {% icon tool %})
- - After **adjustRtime** {% icon tool %}: to check the result of the correction (rerun *adjustRtime* with other settings)
+ - After **MSnbase readMSData** {% icon tool %} to help you to define retention time ranges that you may want to discard from the very beginning (*"Specta Filters"* in **findChromPeaks** {% icon tool %})
+ - After **adjustRtime** {% icon tool %} to check the result of the correction (and potentially rerun *adjustRtime* with other settings)
> ### {% icon hands_on %} Hands-on: xcms plot chromatogram
>
@@ -351,15 +356,16 @@ Note that you can also check the chromatograms at any moment during the workflow
>
> > ### {% icon comment %} Comment
> >
-> > If you use this tool at a later step of XCMS workflow and provided in the Merger step a sampleMetadata with a second column containing groups
-(see further in this tutorial), you will get colouring according to these groups even without providing a sampleMetadata file as a 'plot chromatogram' parameter.
+> > If you use this tool at a later step of XCMS workflow while having provided, in the Merger step (see further in this tutorial), a sampleMetadata with a second column containing groups,
+you will get colouring according to these groups even without providing a sampleMetadata file as a 'plot chromatogram' parameter.
> {: .comment}
>
{: .hands_on}
-This tool generates Base Peak Intensity Chromatograms (BPIs) and Total Ion Chromatograms (TICs). If you provided groups like we do here, you obtain two plots: one with colours based on provided groups, one with one colour per sample.
+This tool generates Base Peak Intensity Chromatograms (BPIs) and Total Ion Chromatograms (TICs). If you provide groups as we do here, you obtain two plots:
+one with colours based on provided groups, one with one colour per sample.
-![Base Peak Intensity Chromatograms](../../images/BPC_9samp.png)
+![Base Peak Intensity Chromatograms](../../images/lcms_BPC_9samp.png)
## First XCMS step: *peak picking*
@@ -370,7 +376,7 @@ independently. The idea here is, for each peak, to proceed to chromatographic pe
The XCMS solution provides two different algorithms to perform chromatographic peak detection: *matchedFilter* and
*centWave*. The matchedFilter strategy is the first one provided by the XCMS R package. It is compatible with any
LC-MS device, but was developed at a time when high resolution mass spectrometry was not common standard yet. On the
-other side, the **centWave** algorithm {% cite Tautenhahn2008 %} was specifically developed for high resolution mass spectrometry, dedicated to
+other side, the **centWave** algorithm ({% cite Tautenhahn2008 %}) was specifically developed for high resolution mass spectrometry, dedicated to
data in centroid mode. In this tutorial, you will practice using the centWave algorithm.
@@ -413,17 +419,17 @@ ranges, or *Noise filter* (as in this hands-on) not to use low intensity measure
>
{: .hands_on}
-At this step, you obtained a dataset collection containing one `RData` file per sample, with independent lists of ions. Although this
-is already a nice result, what you may want now is to get all this files together to identify which are the shared ions between samples.
+At this step, you obtain a dataset collection containing one `RData` file per sample, with independent lists of ions. Although this
+is already a nice result, what you may want now is to get all this files together to identify which ions are shared between samples.
To do so, XCMS provides a function that is called *groupChromPeaks* (or group). But before proceeding to this grouping step, first you
need to group your individual RData files into a single one.
-## Merge the different samples in one dataset
+## Gathering the different samples in one Rdata file
A dedicated tool exists to merge the different `RData` files into a single one: **xcms findChromPeaks Merger** {% icon tool %}. Although you can simply take as
input your dataset collection alone, the tool also provides de possibility to take into account a sampleMetadata file. Indeed,
-depending of your analytical sequence, you may want to treat part of your samples a different way when proceeding to the grouping step **xcms groupChromPeaks (group)** {% icon tool %}.
+depending of your analytical sequence, you may want to treat part of your samples a different way when proceeding to the grouping step using **xcms groupChromPeaks (group)** {% icon tool %}.
This can be the case for example if you have in your analytical sequence some blank samples (your injection solvent) that you want to
extract along with your biological samples to be able to use them as a reference for noise estimation and noise filtering. The fact that
@@ -454,7 +460,7 @@ To obtain such a table, we need to determine, among the individual ion lists, wh
'grouping'.
The group function aligns ions extracted with close retention time and close m/z values in the different samples. In order to define this
-similarity, we have to define on one hand a m/z windows and on the other hand a retention time window. A binning is then performed in the
+similarity, we have to define on one hand a m/z window and on the other hand a retention time window. A binning is then performed in the
mass domain. The size of the bins is called width of overlapping m/z slices. You have to set it according to your mass spectrometer resolution.
Then, a kernel density estimator algorithm is used to detect region of retention time with high density of ions. This algorithm uses a Gaussian
@@ -475,16 +481,16 @@ than a given number of samples. Either a percentage of the total number of sampl
{: .hands_on}
-This grouping step is very important because it defines the final data matrix which will be used especially for the statistical analyses.
+This grouping step is very important because it defines the data matrix which will be used especially for the statistical analyses.
User has to check the effect of parameter values on the result.
In order to check the result of the grouping function, a pdf file is created. It provides one plot per m/z slice found in the data. Each picture
represents the peak density across samples, plotting the corresponding Gaussian model which width is defined by the bandwidth parameter. Each red
-dot corresponds to a sample. The plot allows to assess the quality of alignment. The vertical grey line width is associated with the bandwidth parameter.
+dot corresponds to a sample. The plot allows to assess the quality of alignment. The grey areas' width is associated with the bandwidth parameter.
-Hear is an example of two m/z slides obtained from the hands-on:
+Here is an example of two m/z slides obtained from the hands-on:
-![plotChromPeakDensity.pdf](../../images/group_9samp.png)
+![plotChromPeakDensity.pdf](../../images/lcms_group_9samp.png)
> ### {% icon question %} Questions
@@ -496,7 +502,7 @@ Hear is an example of two m/z slides obtained from the hands-on:
> >
> > 1. There are 3 peak groups in this m/z slice. The two peaks that are not assigned to peak groups are alone in their retention time area. Thus,
the number of samples under the corresponding density peaks does not reach the minimum fraction of samples set by the user (0.5) to consider a peak group.
-> > 2. If the bandwidth value had been set to a smaller value, the density peak width would have been smaller. With a small enough bandwidth value,
+> > 2. If the bandwidth value had been set to a smaller value, the density peak width would have been smaller. With a small-enough bandwidth value,
there could have been two density peaks instead of one under the current first density peak. Thus, the sample in line 5 would have been out of the
previous peak group, thus not assigned to any peak group due to the 0.5 minimum fraction limit.
> >
@@ -504,6 +510,10 @@ previous peak group, thus not assigned to any peak group due to the 0.5 minimum
>
{: .question}
+When looking at the plots from plotChromPeakDensity.pdf, we can notice that in some cases there seems to be a small drift of retention time for some samples.
+This phenomenon is well known with LC-MS techniques. To be able to attribute correct groups for peaks, it may be needed to perform some retention time
+correction accross samples. Thus, the idea is (when needed) to apply a retention time strategy on the output of your grouping step, then to perform
+a second grouping step on the corrected data.
## Optional XCMS step: *retention time correction*
@@ -536,13 +546,14 @@ The algorithm uses statistical smoothing methods. You can choose between linear
> >
> > If you have a very large number of samples (*e.g.* a thousand), it might be impossible to find peaks that are present in 100% of your samples.
If that is the case and you still set a very high value for the minimum required fraction of samples, the tool can not complete successfully the retention
-time correction.
+time correction. A special attention should also be given to this parameter when you expect a large number of peaks not to be present in part of your samples
+(*e.g.* when dealing with some blank samples).
> {: .comment}
>
{: .hands_on}
-This tool generates a plot output that you can use to visualise how retention time was apply across the samples and along the chromatogram.
+This tool generates a plot output that you can use to visualise how retention time was applied across the samples and along the chromatogram.
It also allows you to check whether the well behaved peaks were distributed homogeneously along the chromatogram.
> ### {% icon tip %} Tip: Check the impact of RT correction using 'xcms plot chromatogram'
@@ -567,7 +578,8 @@ correction by comparing the chromatogram you obtained previously to a new one ge
The retention time correction step is not mandatory. However, when it is used retention time are modified.
-Consequently, applying this step on your data requires to complete it with an additional 'grouping' step: **xcms groupChromPeaks (group)**
+Consequently, applying this step on your data requires to complete it with an additional 'grouping' step using the
+**xcms groupChromPeaks (group)** {% icon tool %} tool again.
Parameters for this second group step are expected to be similar to the first group step. Nonetheless,
since retention times are supposed to be less variable inside a same peak group now, in some cases it can be relevant to
@@ -606,7 +618,7 @@ sample. What do you notice when looking at the intensity of the first ion regard
> > ### {% icon solution %} Solution
> >
> > 1. The final grouping step led to 5815 ions.
-> > 2. The first ion (M58T69) has an 'NA' value for the first sample (QC1_014). This is also the case for several other ions
+> > 2. The first ion (M58T69) has a 'NA' value for the first sample (QC1_014). This is also the case for several other ions
and samples.
> >
> {: .solution}
@@ -627,7 +639,8 @@ of NAs in your data. Indeed, this will allow you to check whether your results a
may want to go back to some of your parameter choices in previous XCMS steps.
> To perform your NA diagnosis, you can use the variableMetadata file and dataMatrix file that you obtained with the last grouping step
with the 'Get the Peak List' option to `Yes`. The variableMetadata file contains information about your ions: you will find information
-about the number of peaks detected for each ion. The dataMatrix files contains the intensities for each ion and each sample.
+about the number of peaks detected for each ion. The dataMatrix files contains the intensities for each ion and each sample; you can get
+an overview of it using the **Intensity Check** {% icon tool %} module.
{: .comment}
@@ -660,8 +673,8 @@ your data:
- A *dataMatrix* file (with the intensities)
- A *variableMetadata* file (with information about ions such as retention times, m/z)
-Nonetheless, before proceeding with the next step in the workflow (processing and filtering of your data), you can add an optional step
-with **CAMERA.annotate** {% icon tool %}. This tool uses the CAMERA R package to perform a first annotation of your data based on XCMS outputs.
+Nonetheless, before proceeding with the next step in the workflow (processing and filtering of your data), you can add an optional step with the
+**CAMERA.annotate** {% icon tool %} module. This tool uses the CAMERA R package to perform a first annotation of your data based on XCMS outputs.
## Annotation with CAMERA [Optional]
@@ -669,11 +682,11 @@ with **CAMERA.annotate** {% icon tool %}. This tool uses the CAMERA R package to
This last step provides annotation of isotopes, adducts and neutral losses. It gives also some basic univariate statistics in case you
considered several groups for your XCMS extraction.
-There is a huge number of parameters that will not be detailed in this short tutorial. However most of the default values are suitable
-to run this function for a first attempt. Nevertheless, a few parameters have to be set at each run:
+There is a huge number of parameters that will not be detailed in this short tutorial. However most of the default values can be kept here
+for a first attempt to run this function. Nevertheless, a few parameters have to be set at each run:
- The polarity has to be set since it affects annotation.
- For statistical analysis, you have to define if you have two or more conditions to compare. These conditions had to be defined in the
- sample metadata uploaded with your sample files.
+ sample metadata uploaded previously and taken into account in the XCMS workflow.
- You can define how many significant ions will be used for extracted ions chromatogram (EIC) plot. These plots will be included in a pdf file.
Apart from the PDF file, the main three outcomes from **CAMERA.annotate** {% icon tool %} are three columns added in the variableMetadata file:
@@ -708,10 +721,10 @@ corresponding information in your variableMetadata file for later use.
# Stopover: debriefing and preparation for next steps
-At the end of the Preprocessing, you should have three tabulation-separated tables:
- - A **sampleMetadata**: given and completed by the user
- - A **dataMatrix**: from XCMS.fillChromPeaks
- - A **variableMetadata** from either XCMS.fillChromPeaks or CAMERA.annotate
+At the end of the Preprocessing step, you should have three tabulation-separated tables:
+ - A **sampleMetadata** file given and completed by the user
+ - A **dataMatrix** file from XCMS.fillChromPeaks
+ - A **variableMetadata** file from either XCMS.fillChromPeaks or CAMERA.annotate
Concerning the **sampleMetadata** file, for the next steps of the workflow, there are four columns that are mandatory to go through all the analysis:
- `injectionOrder`: a numerical column of injection order
@@ -752,7 +765,7 @@ long and thus may reduce the names' readability. Hence, we highly recommend you
# Data processing: quality checks, normalisation, data filtering
-In the previous step of LC-MS workflow, you saw how to extract features from your acquisition files. This data is
+In the previous step of the LC-MS workflow, you saw how to extract features from your acquisition files. This data is
shaped in a format allowing the use of various standard statistical methods. However, being able to perform a
statistical analysis does not mean necessarily being able to highlight relevant information. Indeed, data are often affected
by various sources of unwanted variability. It can limit the effectiveness of statistical methods, leading sometimes to
@@ -771,7 +784,7 @@ In this tutorial, we chose to limit the data processing to 3 steps:
## Step 1: global variability in the data
-Commonly, LC-MS analysis generates a significant number of variables (hundreds to thousands). Getting a complete view of
+Commonly, LC-MS analyses generate a significant number of variables (hundreds to thousands). Getting a complete view of
such dataset may not be an easy task, but getting a glimpse of it is possible using some common unsupervised multivariate
analysis. One of the most commonly used method is the **Principal Components Analysis** (PCA). You can get a basic PCA along with
over useful information using the **Quality Metrics** {% icon tool %} tool.
@@ -787,14 +800,14 @@ over useful information using the **Quality Metrics** {% icon tool %} tool.
For a first overview of your data, you can focus on the graphical output of this tool: **Quality_Metrics_figure.pdf**.
It provides a variety of useful information:
- - Summary of the intensities in the dataMatrix file
- - View of these intensities with a color scale
- - 2-components PCA score plot to check for clusters or outliers
- - Sum of intensities per sample according to injection order to check the presence of signal drift or batch effect
- - Z-scores for intensity distribution and proportion of missing values
- - Ions' standard deviation (sd) and mean values
+ - Summary of the intensities in the dataMatrix file (information.txt file and plot top center paragraph)
+ - View of these intensities with a color scale (plot bottom right panel)
+ - 2-components PCA score plot to check for clusters or outliers (plot top left panel)
+ - Sum of intensities per sample according to injection order to check the presence of signal drift or batch effect (plot top center panel)
+ - Z-scores for intensity distribution and proportion of missing values (plot bottom left panel)
+ - Ions' standard deviation (sd) and mean values (plot top right panel)
-![Quality_Metrics_figure.pdf](../../images/QM_9samp_raw.png)
+![Quality_Metrics_figure.pdf](../../images/lcms_QM_9samp_raw.png)
> ### {% icon question %} Question time: cross-referencing information
>
@@ -814,7 +827,7 @@ there are no biological differences. Thus, comparing samples becomes difficult a
Consequently, with only a quarter of the ions being stable regarding pool intensities, performing statistical analyses on
this full dataset would probably lead to unreliable results.
> > 2. We can see in the figure that the global intensity of samples seems to decrease with the injection order. In particular,
-the fact that the pooled samples' intensities decreases leads us to suspect a signal drift due to the clogging effect of successive
+the fact that the pooled samples' intensities decrease leads us to suspect a signal drift due to the clogging effect of successive
injection of samples.
> > This signal drift could be the reason why so many ions in the dataset led to high CV values for pools, since it prevents
at least part of the ions to be stable regarding pools' intensities.
@@ -828,16 +841,16 @@ the dataset may be due to the signal drift.
>
{: .question}
-## Step 2: handling the signal drift observed although the analytical sequence
+## Step 2: handling the signal drift observed all through the analytical sequence
It is known that when injecting successively a large number of samples, the system tends to get dirty, and this may cause a measure drift.
To prevent inability to catch signal anymore, in case of large injection series, the sequence is generally divided into several batches
and the source is cleaned between batches. Unfortunately, these signal drift and batch design can add significant variability in data,
making sample comparison complicated. In case data is impacted by these effects, it is highly recommended to normalise the data to get
-rid of these unwanted effects.
+rid of these unwanted variabilities.
-In our case study, we saw that the data seemed to be affected by signal drift. Thus, we will use **Batch_correction** {% icon tool %} to
-get rid of it.
+In our case study, we saw that the data seemed to be affected by signal drift. Thus, we will use the **Batch_correction** {% icon tool %} tool
+to get rid of it.
> ### {% icon hands_on %} Hands-on: Data normalisation
>
@@ -849,7 +862,7 @@ get rid of it.
> - *"Factor of interest "*: `gender`
>
> > ### {% icon comment %} Comment
-> > The choice of the type of regression model to use depends on several parameters.
+> > The choice of type of regression model to use depends on several parameters.
> > In this case-study, since we only have 3 pools, there are only two possible choices: *linear* or *all loess sample*
> > When possible, we recommend to use pools to correct the signal drift, that is why we chose to run the tool with *linear*.
> {: .comment}
@@ -860,12 +873,12 @@ get rid of it.
For each ion independently, the normalisation process works as described in the following picture:
-![How this works](../../images/BC_theo.png)
+![How this works](../../images/lcms_BC_theo.png)
The methodology is meant to correct for signal drift. In the tool, it is combined with a correction for batch effect. Thus, if your
-sequence is divided into several batches, the idea is to obtain something like the following:
+sequence is divided into several batches, the idea is to obtain something similar to the following picture:
-![Before/after picture](../../images/BC_theo2.png)
+![Before/after picture](../../images/lcms_BC_theo2.png)
In the case of *linear* regression model, the tool performs some tests before applying the normalisation for quality purposes.
For some ions, if the normalisation process would have led to inconsistent results, the concerned ions are not corrected for signal drift.
@@ -902,7 +915,7 @@ by biological variability. Thus, we can filter the ions that do not respect this
{: .hands_on}
The tool provides a variableMetadata tabular output, containing all the computed CV values. You can then use these values to filter
-your data using **Generic_Filter** {% icon tool %}.
+your data using the **Generic_Filter** {% icon tool %} tool.
> ### {% icon hands_on %} Hands-on: Data filtering
@@ -939,15 +952,21 @@ statistical analysis, and you do not need the pools anymore since they do not pa
> ### {% icon question %} Questions
>
-> 1. What does the *1.0* threshold mean in the hands-on exercise you just executed?
-> 2. How many variables are left in your dataset? How many samples?
+> 1. What does the *0.3* threshold mean in the hands-on exercise you just executed?
+> 2. What does the *1.0* threshold mean in the hands-on exercise you just executed?
+> 3. How many variables are left in your dataset? How many samples?
>
> > ### {% icon solution %} Solution
> >
-> > 1. The *1.0* value corresponds to the maximum value kept in the dataset ('Interval of values to remove: *upper*') regarding the
+> > 1. The *0.3* value corresponds to the maximum value kept in the dataset ('Interval of values to remove: *upper*') regarding the
+*poolCV* column in your *Variable metadata* file. As mentioned previously in 'Step 1: global variability in the data' section,
+pool CV values are commonly considered as reflecting unstable ions when superior to 0.3.
+Although the signal drift correction decreased the proportion of ions with pool CV > 30% from 74% to 53%, we still need to get
+rid of these remaining unstable ions for which sample comparisons would be difficult and at high risk of being unreliable.
+> > 2. The *1.0* value corresponds to the maximum value kept in the dataset ('Interval of values to remove: *upper*') regarding the
*poolCV_over_sampleCV* column in your *Variable metadata* file. This means that any ion with a pool CV / sample CV ratio above 1
(*i.e.* a pool CV greater than the sample CV) is discarded from the dataset.
-> > 2. Filtering led to 2706 ions and 6 samples.
+> > 3. Filtering led to 2706 ions and 6 samples.
> >
> {: .solution}
>
@@ -955,7 +974,6 @@ statistical analysis, and you do not need the pools anymore since they do not pa
-
# Statistical analysis to find variables of interest
The question of data filtering and correction must be addressed in all projects, even thought in some cases it may lead to
@@ -969,15 +987,15 @@ that the choice of your statistical analysis strategy depends on both your data
dataset size) and your study design. You should think carefully about what is appropriate for your own project.
In this tutorial, we will take the example of univariate analysis, using the `bmi` column of the **sampleMetadata file** as
-our variable of interest (body mass index). Since this variable is quantitative, we will chose in this example to measure
-the link between the BMI and the measured ions using **statistical correlation calculation**. For more examples of
+the study's biological factor investigated (body mass index). Since this variable is quantitative, we will chose in this example to measure
+the link between the BMI and the measured ions using a **statistical correlation calculation**. For more examples of
statistical analysis performed on LC-MS data, you can take a few minutes to watch the [usemetabo.org](https://usemetabo.org) open course video
[here](https://usemetabo.org/courses/w4mlc-ms-statistical-analysis).
## Computation of statistical indices
-First step is to compute the correlation coefficients used to estimate the link between the variable of interest `bmi`
-and the ions that we have in our dataset. For this calculation we can use **Univariate** {% icon tool %}.
+The first step is to compute the correlation coefficients used to estimate the link between the biological variable `bmi`
+and the ions that we have in our dataset. For this calculation we can use the **Univariate** {% icon tool %} tool.
> ### {% icon hands_on %} Hands-on: Statistical analysis
>
@@ -995,8 +1013,8 @@ and the ions that we have in our dataset. For this calculation we can use **Univ
based on a relevant statistical strategy (which would more likely be to *use* multiple testing correction). It is based on
the fact that with only 6 biological samples in a dataset of 2706 ions it is almost impossible to settle for correlation
coefficients significantly different from zero. Consequently, to illustrate better the filtering step that will follow,
-we chose not to apply the multiple testing correction, allowing us to obtain 'significant' results regarding statistical
-indices.
+we chose not to apply the multiple testing correction, allowing us to obtain something that looks like 'significant' results
+regarding statistical indices.
> {: .comment}
>
{: .hands_on}
@@ -1026,7 +1044,7 @@ generally associated with thresholds allowing us to determine which ions should
In our example of correlation analysis, two indices can be used to filter the data.
- **P-values:** it indicates whether it is likely for a given correlation coefficient not to be actually different from zero;
considering a threshold of 0.05 generally corresponds to a misleading risk of 5%.
- - **Correlation coefficient:** it indicates if the correlation between a given ion and the variable of interest is strong or not;
+ - **Correlation coefficient:** it indicates if the correlation between a given ion and the biological factor is strong or not;
it goes from -1 to 1, with 0 meaning no correlation; in our example we consider as a sufficiently strong link a coefficient with
absolute value above 0.9.
@@ -1063,7 +1081,7 @@ In this tutorial, the statistical filtering led to 25 remaining ions, linked to
# Annotation
-Now that you have a short list of interesting ions, you may be interested in knowing from which molecules these ions come from.
+Now that you have a short list of interesting ions, you may be interested in knowing which molecules these ions come from.
Identification is generally a difficult and time-consuming step. To help you in that process or to get a potential first glance
of the nature of your selected subset, annotation can be a first valuable step.
@@ -1072,8 +1090,9 @@ it can help you save a lot of time giving you hints about what to search for. Th
reference mass bank face to face. This will give you potential origins of your ions.
To be able to perform annotation, you will 'only' need to gather the mass list of your subset of ions, a reference bank, and a tool
-to proceed to the matching. The use of 'only' is tricky here, since the subset of ions may be the only thing that is turnkey at this
-step of the workflow (if you consider the previous steps are all cleared now).
+to proceed to the matching. The use of 'only' is tricky here, since even the subset of ions to use may not be turnkey at this step
+of the workflow (even if you consider the previous steps as cleared now, selected ions may not be the ones most appropriate to
+get a foothold in annotation).
For example, what may be the reference bank that you need for the annotation step? This is a crucial question. It exists a variety
of online resources with well-known reference banks, but which one to choose? Some banks may have overlapping content, but also
@@ -1083,7 +1102,8 @@ an adequate reference bank online. However, it is also possible that none of the
case, you may need to construct your own database, to be able to search for relevant matches for your ions of interest.
In this tutorial, we chose the 'easy' case of human urinary samples. Thus, one possibility we have is to use the online reference
-bank HMDB (The Human Metabolome Database). Let's try requesting directly into this widely used bank using **HMDB MS search** {% icon tool %}
+bank HMDB (The Human Metabolome Database). Let's try requesting directly into this widely used bank using the **HMDB MS search**
+{% icon tool %} tool.
> ### {% icon hands_on %} Hands-on: Annotating the data using the HMDB
@@ -1100,7 +1120,7 @@ bank HMDB (The Human Metabolome Database). Let's try requesting directly into th
{: .hands_on}
Here, we tried to provide a Mass-to-charge ratio (*i.e.* a mass delta) based on what we globally know about the technique used to
-analyze the samples. Even if this parameter may seems simple, it is important to settle with a relevant value. If you provide a
+analyze the samples. Even if this parameter may seems simple, it is important to settle it with a relevant value. If you provide a
value that is too low, you may not be able to have matches for your ions even though the original molecule is present in the database.
On the opposite, if the value provided is too high, you may end with a huge number of matches, which could be time-consuming to
review to identify relevant proposed annotation.
diff --git a/topics/metagenomics/images/Krona.png b/topics/metagenomics/images/Krona.png
deleted file mode 100644
index 4777ff35acaa73..00000000000000
Binary files a/topics/metagenomics/images/Krona.png and /dev/null differ
diff --git a/topics/proteomics/images/maxquant_lfq_contaminants.png b/topics/proteomics/images/maxquant_lfq_contaminants.png
new file mode 100644
index 00000000000000..1873a884ad5040
Binary files /dev/null and b/topics/proteomics/images/maxquant_lfq_contaminants.png differ
diff --git a/topics/proteomics/images/maxquant_lfq_lcmsms.png b/topics/proteomics/images/maxquant_lfq_lcmsms.png
new file mode 100644
index 00000000000000..f1f9ca41d3eda4
Binary files /dev/null and b/topics/proteomics/images/maxquant_lfq_lcmsms.png differ
diff --git a/topics/proteomics/images/maxquant_lfq_missedcleavages.png b/topics/proteomics/images/maxquant_lfq_missedcleavages.png
new file mode 100644
index 00000000000000..99b3b952fa2876
Binary files /dev/null and b/topics/proteomics/images/maxquant_lfq_missedcleavages.png differ
diff --git a/topics/proteomics/images/maxquant_lfq_proteinid.png b/topics/proteomics/images/maxquant_lfq_proteinid.png
new file mode 100644
index 00000000000000..d5a55607801182
Binary files /dev/null and b/topics/proteomics/images/maxquant_lfq_proteinid.png differ
diff --git a/topics/proteomics/images/maxquant_lfq_quant_methods.png b/topics/proteomics/images/maxquant_lfq_quant_methods.png
new file mode 100644
index 00000000000000..317630424fe7f0
Binary files /dev/null and b/topics/proteomics/images/maxquant_lfq_quant_methods.png differ
diff --git a/topics/proteomics/images/maxquant_lfq_serum_composition.png b/topics/proteomics/images/maxquant_lfq_serum_composition.png
new file mode 100644
index 00000000000000..4e30d554476caf
Binary files /dev/null and b/topics/proteomics/images/maxquant_lfq_serum_composition.png differ
diff --git a/topics/proteomics/images/maxquant_param_missedcl.png b/topics/proteomics/images/maxquant_param_missedcl.png
new file mode 100644
index 00000000000000..8bd85e3c39abf9
Binary files /dev/null and b/topics/proteomics/images/maxquant_param_missedcl.png differ
diff --git a/topics/proteomics/images/maxquant_ptxqc_overview.png b/topics/proteomics/images/maxquant_ptxqc_overview.png
new file mode 100644
index 00000000000000..4e55bed9ea5d61
Binary files /dev/null and b/topics/proteomics/images/maxquant_ptxqc_overview.png differ
diff --git a/topics/proteomics/tutorials/labelfree-vs-labelled/tutorial.md b/topics/proteomics/tutorials/labelfree-vs-labelled/tutorial.md
index dc5eec53ed4ff2..8af183495c6090 100644
--- a/topics/proteomics/tutorials/labelfree-vs-labelled/tutorial.md
+++ b/topics/proteomics/tutorials/labelfree-vs-labelled/tutorial.md
@@ -3,6 +3,7 @@ layout: tutorial_hands_on
title: "Label-free versus Labelled - How to Choose Your Quantitation Method"
zenodo_link: ""
+level: Introductory
questions:
- "What are benefits and drawbacks of different quantitation methods?"
- "How to choose which quantitation method bests suits my need?"
diff --git a/topics/proteomics/tutorials/maxquant-label-free/data-library.yaml b/topics/proteomics/tutorials/maxquant-label-free/data-library.yaml
new file mode 100644
index 00000000000000..03ff8d47aa16d9
--- /dev/null
+++ b/topics/proteomics/tutorials/maxquant-label-free/data-library.yaml
@@ -0,0 +1,43 @@
+---
+destination:
+ type: library
+ name: GTN - Material
+ description: Galaxy Training Network Material
+ synopsis: Galaxy Training Network Material. See https://training.galaxyproject.org
+items:
+- name: Proteomics
+ description: Training material for proteomics workflows in Galaxy
+ items:
+ - name: Label-free data analysis using MaxQuant
+ items:
+ - name: '10.5281/zenodo.3774452'
+ description: latest
+ items:
+ - url: https://zenodo.org/record/3774452/files/Sample1.raw
+ src: url
+ ext: thermo.raw
+ info: https://zenodo.org/record/3774452
+ - url: https://zenodo.org/record/3774452/files/Sample2.raw
+ src: url
+ ext: thermo.raw
+ info: https://zenodo.org/record/3774452
+ - url: https://zenodo.org/record/3774452/files/Protein_database.fasta
+ src: url
+ ext: fasta
+ info: https://zenodo.org/record/3774452
+ - url: https://zenodo.org/record/3774452/files/PTXQC_report.pdf
+ src: url
+ ext: pdf
+ info: https://zenodo.org/record/3774452
+ - url: https://zenodo.org/record/3774452/files/MaxQuant_Protein_Groups.tabular
+ src: url
+ ext: tabular
+ info: https://zenodo.org/record/3774452
+ - url: https://zenodo.org/record/3774452/files/MaxQuant_Peptides.tabular
+ src: url
+ ext: tabular
+ info: https://zenodo.org/record/3774452
+ - url: https://zenodo.org/record/3774452/files/MaxQuant_mqpar.xml
+ src: url
+ ext: xml
+ info: https://zenodo.org/record/3774452
diff --git a/topics/proteomics/tutorials/maxquant-label-free/tutorial.bib b/topics/proteomics/tutorials/maxquant-label-free/tutorial.bib
new file mode 100644
index 00000000000000..250783f5fa4590
--- /dev/null
+++ b/topics/proteomics/tutorials/maxquant-label-free/tutorial.bib
@@ -0,0 +1,41 @@
+@article{Cox2008,
+ doi = {10.1038/nbt.1511},
+ url = {https://doi.org/10.1038/nbt.1511},
+ year = {2008},
+ month = nov,
+ publisher = {Springer Science and Business Media {LLC}},
+ volume = {26},
+ number = {12},
+ pages = {1367--1372},
+ author = {J\"{u}rgen Cox and Matthias Mann},
+ title = {{MaxQuant} enables high peptide identification rates, individualized p.p.b.-range mass accuracies and proteome-wide protein quantification},
+ journal = {Nature Biotechnology}
+}
+
+@article{Geyer2017,
+ doi = {10.15252/msb.20156297},
+ url = {https://doi.org/10.15252/msb.20156297},
+ year = {2017},
+ month = sep,
+ publisher = {{EMBO}},
+ volume = {13},
+ number = {9},
+ pages = {942},
+ author = {Philipp E Geyer and Lesca M Holdt and Daniel Teupser and Matthias Mann},
+ title = {Revisiting biomarker discovery by plasma~proteomics},
+ journal = {Molecular Systems Biology}
+}
+
+@article{Bielow2015,
+ doi = {10.1021/acs.jproteome.5b00780},
+ url = {https://doi.org/10.1021/acs.jproteome.5b00780},
+ year = {2015},
+ month = dec,
+ publisher = {American Chemical Society ({ACS})},
+ volume = {15},
+ number = {3},
+ pages = {777--787},
+ author = {Chris Bielow and Guido Mastrobuoni and Stefan Kempa},
+ title = {Proteomics Quality Control: Quality Control Software for {MaxQuant} Results},
+ journal = {Journal of Proteome Research}
+}
diff --git a/topics/proteomics/tutorials/maxquant-label-free/tutorial.md b/topics/proteomics/tutorials/maxquant-label-free/tutorial.md
new file mode 100644
index 00000000000000..63809472adff83
--- /dev/null
+++ b/topics/proteomics/tutorials/maxquant-label-free/tutorial.md
@@ -0,0 +1,252 @@
+---
+layout: tutorial_hands_on
+
+title: Label-free data analysis using MaxQuant
+zenodo_link: 'https://zenodo.org/record/3774452'
+level: Introductory
+questions:
+- How to perform label-free analysis in Maxquant?
+- Which are the most abundant proteins in serum?
+- How successful was the depletion of those in our experiment?
+objectives:
+- Analysis of human serum proteome samples with label-free quantification in MaxQuant
+time_estimation: 1H
+key_points:
+- MaxQuant offers a single tool solution for protein identification and quantification.
+- Label-free quantitation reveals the most abundant proteins in serum samples.
+contributors:
+- foellmelanie
+- matthias313
+
+---
+
+
+# Introduction
+{:.no_toc}
+
+The proteome refers to the entirety of proteins in a biological system (e.g cell, tissue, organism). Proteomics is the large-scale experimental analysis of proteins and proteomes, most often performed by mass spectrometry that enables great sensitivity and throughput. Especially for complex protein mixtures, bottom-up mass spectrometry is the standard approach. In bottom-up proteomics, proteins are digested with a specific protease into peptides and the measured peptides are *in silico* reassembled into the corresponding proteins. Inside the mass spectrometer, not only the peptides are measured (MS1 level), but the peptides are also fragmented into smaller peptides which are measured again (MS2 level). This is referred to as tandem-mass spectrometry (MS/MS). Identification of peptides is performed by peptide spectrum matching of the theoretical spectra generated from the input protein database (fasta file) with the measured spectra. Peptide quantification is most often performed by measuring the area under the curve of the MS1 level peptide peaks, but special techniques such as TMT allow to quantify peptides on MS2 level. Nowadays, bottom-up tandem-mass spectrometry approaches allow for the identification and quantification of several thousand proteins.
+
+![MQ_lcmsms](../../images/maxquant_lfq_lcmsms.png "Proteomics using liquid chromatography tandem-mass spectrometry (LC-MS/MS). Adapted from wikipedia.")
+
+
+A plethora of software solutions were developed for the analysis of proteomics data. MaxQuant is one of the most popular proteomics softwares because it is an easy to use and free software that offers functionalities for nearly all kinds of proteomics data analysis challenges {% cite Cox2008 %}. Mass spectrometry raw data is normally obtained in a vendor specific, proprietary file format. MaxQuant can directly take those raw files as input. For peptide identification MaxQuant uses a search engine called "Andromeda". MaxQuant offers highly accurate functionalities for many different proteomics quantification strategies, e.g. label-free, SILAC, TMT.
+
+Blood is a commonly used biofluid for diagnostic procedures. The cell-free liquid blood portion is called plasma and after coagulation serum. Plasma and serum proteomics are frequently performed to find new biomarkers e.g. for diagnostic purposes and personalized medicine ({% cite Geyer2017 %}). Serum and Plasma proteomics are particularily challenging due to protein concentration differences in the orders of ten magnitudes. Therefore, most sample preparation protocols include a depletion step in which the most abundant proteins are (partially) depleted from the sample e.g. via columns with immobilized antibodies.
+
+This stand-alone tutorial introduces the data analysis from raw data files to protein identification and quantification of two label-free human serum samples with the MaxQuant software. One sample is a pure serum sample, while the other sample has been depleted for several abundant blood proteins. One of the questions in this tutorial is to find out which sample was depleted and which not.
+
+For more advanced proteomics workflows, please consult the OpenMS [identification]({{site.baseurl}}/topics/proteomics/tutorials/protein-id-oms/tutorial.html), [quantification]({{site.baseurl}}/topics/proteomics/tutorials/protein-quant-sil/tutorial.html) as well as [SearchGUI/PeptideShaker]({{site.baseurl}}/topics/proteomics/tutorials/protein-id-sg-ps/tutorial.html) tutorials.
+
+
+> ### Agenda
+>
+> In this tutorial, we will cover:
+>
+> 1. TOC
+> {:toc}
+>
+{: .agenda}
+
+# Get data
+
+The serum proteomic samples and the fasta file for this training were deposited at [Zenodo](https://zenodo.org/record/3774452). It is of course possible to use other fasta files that contain human proteome sequences, but to ensure that the results are compatible we recommend to use the provided fasta file. MaxQuant not only adds known contaminants to the fasta file, but also generates the "decoy" hits for false discovery rate estimation itself, therefore the fasta file is not allowed to have decoy entries. To learn more about fasta files, have a look at [Protein FASTA Database Handling]({{site.baseurl}}/topics/proteomics/tutorials/database-handling/tutorial.html).
+
+
+> ### {% icon hands_on %} Hands-on: Data upload
+>
+> 1. Create a new history for this tutorial and give it a meaningful name
+>
+> {% include snippets/create_new_history.md %}
+>
+> 2. Import the fasta and raw files from [Zenodo](https://zenodo.org/record/3774452)
+>
+> ```
+> https://zenodo.org/record/3774452/files/Protein_database.fasta
+> https://zenodo.org/record/3774452/files/Sample1.raw
+> https://zenodo.org/record/3774452/files/Sample2.raw
+> ```
+> {% include snippets/import_via_link.md %}
+>
+> 3. Once the files are green, rename the raw datasets into 'sample1' and 'sample2' and the fasta file into 'protein database'
+> {% include snippets/rename_dataset.md %}
+> 4. Set the data type to thermo.raw for 'sample1' and 'sample2'
+>
+> {% include snippets/change_datatype.md datatype="thermo.raw" %}
+>
+{: .hands_on}
+
+# MaxQuant Analysis
+
+The MaxQuant Galaxy implementation contains the most important MaxQuant parameters. As an alternative, **MaxQuant (using mqpar.xml)** {% icon tool %} can be used with a preconfigured mqpar.xml file. We will explain the parameters after starting the MaxQuant run which takes some time to finish.
+
+
+> ### {% icon hands_on %} Hands-on: MaxQuant analysis
+>
+> 1. **MaxQuant** {% icon tool %} with the following parameters:
+> - In *"Input Options"*:
+> - {% icon param-file %} *"FASTA files"*: `protein database`
+> - In *"Search Options"*:
+> - *"minimum unique peptides"*: `1`
+> - In *"Parameter Group"*:
+> - {% icon param-files %} *"Infiles"*: `sample1`, `sample2`
+> - *"missed cleavages"*: `1`
+> - *"variable modifications"*: ` `
+> - *"Quantitation Methods"*: `label free quantification`
+> - *"Generate PTXQC (proteomics quality control pipeline) report?"*: `Yes`
+> - In *"Output Options"*:
+> - *"Select the desired outputs."*: `Protein Groups` `Peptides` `mqpar.xml`
+>
+> > ### {% icon comment %} Comment: Protein Groups
+> > Proteins that share all their peptides with other proteins cannot be unambiguously identified. Therefore, MaxQuant groups such proteins into one protein group and only one common quantification will be calculated. The different protein properties are separated by semicolon.
+> {: .comment}
+{: .hands_on}
+
+## More details on MaxQuant Parameters
+
+The *"minimum peptide length"* defines the minimum number of amino acids a peptide should have to be included for protein identification and quantification. Below 7 amino acids a peptide cannot be unique and is therefore not informative, thus typical values are in the range 7-9.
+
+Several, even longer peptides are not unique, meaning that they are shared by several proteins e.g. when they are part of a common protein domain. During protein inference the peptides are statistically assembled into the corresponding proteins and the decision should be mainly based on the unique peptides. Therefore, we set *"min unique peptides"* to `1` - only proteins that have at least one unique peptide are reported in the output table.
+
+In most bottom-up proteomics experiments Trypsin is used as a protease because it has many advantages such as it's accurate cleavage specificity. Trypsin cleaves peptides c-terminal of Arginine (R) and Lysine (K), except when those are followed by Proline (P). Therefore, in MaxQuant the default *"enzyme"* is set to `Trypsin/P`. This trypsin specific cleavage rule is used by MaxQuant to perform an *in silico* digestion of the protein database that was provided in the fasta file.
+
+Protease digestion is not always complete, therefore we set the *"number of missed cleavages"* to `1`, meaning that the *in silico* digestion also includes peptides that have an additional Arginine or Lysine in their sequence.
+
+![MQ_param_missed](../../images/maxquant_param_missedcl.png "Trypsin specificity and missed cleavages. With one missed cleavage all three peptides will be part of the *in silico* peptide database")
+
+From the *in silico* generated peptide database the masses of the peptides are calculated and matched to the measured masses in order to identify them. A peptide's mass will change due to peptide modifications such as chemical labelling for example applied in different quantitation strategies or biological post translational modifications. Therefore, it is important to also include possible peptide modifications in the *in silico* generated peptide mass list. *"Fixed modifications"* are modifications that occur on every occurence of the specific amino acid. Those are often artificially introduced modifications such as Carbamidomethylation of cysteins (C) to prevent re-formation of the disulfid bridges. This is a common procedure in proteomics sample preparation and therefore also the default option in MaxQuant: `Carbamidomethyl (C)`. *"Variable modifications"* are modifications that do not occur on every amino acid such as Oxidation of Methionine might only occur on some Methionines and not all, but only a few peptide N-termini are acetylated. Variable modifications increase the *in silico* peptide database because each peptide's mass is calculated once with and once without the additional modification. To keep computation times as short as possible we did not use any variable modification this training, despite the MaxQuant defaults Oxidation of Methionine and Acetylation of N-termini would have been completely valid to use.
+
+MaxQuant supports different *"Quantitation Methods"*. The three main categories are `label-free quantification`, `label-based quantification` and `reporter ion MS2` quantification. In this tutorial we have chosen `label-free` because we did not apply any specific labeling/quantitation strategy to the samples.
+
+![MQ_quant_methods](../../images/maxquant_lfq_quant_methods.png "Overview of MaxQuant quantification methods")
+
+The PTXQC software ({% cite Bielow2015 %}) was built to enable direct proteomcs quality control from MaxQuant result files. This quality control can be directly used in the Galaxy MaxQuant wrapper by setting *"Generate PTXQC"* to `yes`. This will generate a pdf file with multiple quality control plots. Be aware that the cutoffs set in PTXQC might not be applicable to your experiment and mass spectrometer type and therefore "under performing" and "fail" do not necessarily mean that the quality is poor.
+
+![PTXQC_overview](../../images/maxquant_ptxqc_overview.png "Overview of PTXQC quality measures for sample1 and sample2")
+
+
+MaxQuant automatically generates several output files. In the *"Output Options"* all or some of the output files can be selected. The protein information can be obtained by selecting `Protein Groups`, while the peptide information is obtained by choosing `Peptides`. The applied MaxQuant parameters are stored in the `mqpar.xml` This file can be re-used as an input file in the **MaxQuant (using mqpar.xml)** {% icon tool %}.
+
+
+> ### {% icon details %} More MaxQuant parameters
+> - The *"parse rules"* in the input section are applied to the fasta sequence headers. The default automatically extracts the Uniprot ID from fasta files that were downloaded from uniprot. Regular expressions can be adjusted to keep other information from the fasta file.
+>
+> - For pre-fractionated data an *"experimental design template"* has to be used. This has to be a tab-separated text file with a column for the fractions (e.g. 1-10) and a column for the experiment (sample1, sample2, sample3) and a column for post translational modifications (PTM). Examples are given in the help section of the MaxQuant tool.
+>
+> - *"Match between run"* allows to transfer identifications (peptide sequences) from one run to another. If the MS1 (full length peptide) signal is present in both runs, but was only selected for fragmentation in one of them, MaxQuant can transfer the resulting peptide sequence to the run where the MS1 peptide was not fragmented. The Information if a peptides was identified via fragmentation (MS/MS) or match between run (matching) can be found in the evidence output.
+>
+> - MaxQuant allows to process different raw files with different parameters. In this tutorial we have loaded both files into the same *"parameter group"* in order to process them with the same parameters. To apply different parameters, new parameter groups can be added by clicking on the {% icon param-repeat %} *"insert parameter group"* button. In each *"parameter group"* one or several raw files can be specified and for them only the parameter specified within this parameter group section are applied.
+{: .details}
+
+> ### {% icon tip %} Tip: Continue with results from Zenodo
+>
+> In case the MaxQuant run is not yet finished, the results can be downloaded from Zenodo to be able to continue the tutorial
+> 1. Import the files from [Zenodo](https://zenodo.org/record/3774452)
+>
+> ```
+> https://zenodo.org/record/3774452/files/PTXQC_report.pdf
+> https://zenodo.org/record/3774452/files/MaxQuant_Protein_Groups.tabular
+> https://zenodo.org/record/3774452/files/MaxQuant_Peptides.tabular
+> https://zenodo.org/record/3774452/files/MaxQuant_mqpar.xml
+> ```
+{: .tip}
+
+> ### {% icon question %} Questions
+>
+> 1. How many proteins were found in total?
+> 2. How many peptides were found in total?
+> 3. How many proteins identified and quantified? (Tipp: There is a tool called "filter data on any column", select the LFQ column for both files and remove rows containing "0")
+>
+>
+> > ### {% icon solution %} Solution
+> >
+> > 1. 271 protein (groups) were found in total.
+> > 2. 2387 peptides were found in total.
+> > 3. Sample1: 237, Sample2: 123 (**filter data on any column** {% icon tool %} on the `protein groups` file *"with following condition"* `c32!=0` or `c33!=0` and *"Number of header lines" `1`)
+> >
+> {: .solution}
+>
+{: .question}
+
+
+# Quality control results
+
+To get a first overview of the MaxQuant results, the PTXQC report is helpful. Click on the {% icon galaxy-eye %} eye of the PTXQC pdf file to open it in Galaxy. Screening through the different plots might already give you a hint which of the samples was pure and which was depleted of abundant proteins. Both samples failed in some categories (see Figure 4 above), especially due to low peptide and protein numbers, which is expected in serum samples and therefore not a quality problem.
+
+> ### {% icon question %} Questions
+>
+> 1. How good was the tryptic digestion (percentage of zero missed cleavages)?
+> 2. Which sample yielded more protein identifications?
+> 3. In which sample were more contaminantes quantified?
+> 4. Do you already have a guess on which sample was depleted?
+>
+>
+> > ### {% icon solution %} Solution
+> >
+> > 1. The digestion was not ideal but good enough to work with. The proportion of zero missed cleavages was 75% for sample1 and around 85% for sample2. ![PTXQC_missed](../../images/maxquant_lfq_missedcleavages.png)
+> > 2. Sample 1 yielded more protein identifications ![PTXQC_ids](../../images/maxquant_lfq_proteinid.png)
+> > 3. Sample 2 has more contaminants, especially serum albumin is high abundant. ![PTXQC_contaminants](../../images/maxquant_lfq_contaminants.png)
+> > 4. Sample1, more information can be found in the next section.
+> >
+> {: .solution}
+
+
+# Serum composition
+
+To explore the proteomic composition of the two serum samples some postprocessing steps are necessary. The protein groups file has many different columns, therefore the first step is to extract only columns that are of interest for this task. This are the columns with the fasta headers (this includes the protein name as it was written in the fasta file) and the two columns with LFQ intensities for both files. To find the most abundant proteins the LFQ intensities can be sorted. On this sorted dataset we will explore the composition of the serum proteins within both samples using an interactive pie charts diagram.
+
+> ### {% icon hands_on %} Hands-on: Exploring serum composition
+>
+> 1. **Cut columns from a table** {% icon tool %} with the following parameters:
+> - *"Cut columns"*: `c8,c32,c33`
+> - {% icon param-file %} *"From"*: `proteinGroups` (output of **MaxQuant** {% icon tool %})
+>
+> 2. **Sort data in ascending or descending order** {% icon tool %} with the following parameters:
+> - {% icon param-file %} *"Sort Query"*: `cut_file` (output of **Cut** {% icon tool %})
+> - *"Number of header lines"*: `1`
+> - In *"Column selections"*:
+> - *"on column"*: `c2`
+> - *"in"*: `Descending order`
+> - *"Flavor"*: `General numeric sort ( scientific notation -g)`
+>
+> 3. **Sort data in ascending or descending order** {% icon tool %} with the following parameters:
+> - {% icon param-file %} *"Sort Query"*: `cut_file` (output of **Cut** {% icon tool %})
+> - *"Number of header lines"*: `1`
+> - In *"Column selections"*:
+> - *"on column"*: `c3`
+> - *"in"*: `Descending order`
+> - *"Flavor"*: `General numeric sort ( scientific notation -g)`
+> 4. Click {% icon galaxy-barchart %} “Visualize this data” on the last **Sort** {% icon tool %} result.
+> - Select `Pie chart (NVD3)`
+> - *"Provide a title"*: `Serum compositions`
+> - Click `Select data` {% icon galaxy-chart-select-data %}
+> - *"Provide a label"*: `sample1`
+> - *"Labels"*: `Column: 1`
+> - *"Values"*: `Column: 2`
+> - Click `Insert data series`
+> - *"Provide a label"*: `sample2`
+> - *"Labels"*: `Column: 1`
+> - *"Values"*: `Column: 3`
+> - Save {% icon galaxy-save %} (file is saved under "User" --> "Visualizations")
+{: .hands_on}
+
+
+![serum_composition](../../images/maxquant_lfq_serum_composition.png "Quantitative serum composition. In Galaxy one can hover over the graph to see the protein names")
+
+
+> ### {% icon question %} Questions
+>
+> 1. What are the top 5 most abundant proteins in both files? Do they reflect typical serum proteins?
+> 2. Which sample was depleted for the top serum proteins?
+> 3. How much did the serum albumin abundance percentage decrease? Was the depletion overall succesful?
+>
+> > ### {% icon solution %} Solution
+> >
+> > 1. Sample1: Complement C4-A, Ceruloplasmin, Hemopexin, Serum albumin, Complement factor B. Sample2: Serum albumin, Immunoglobulin heavy constant gamma 1, Serotransferrin, Immunoglobulin kappa constant, Haptoglobin. All of those proteins are typical (high abundant) serum proteins [plasma proteins found by MS](https://www.proteinatlas.org/humanproteome/blood/proteins+detected+in+ms).
+> > 2. Sample1 was depleted, sample2 was pure serum.
+> > 3. In the depleted sample1, there is a depletion in some of the most abundant proteins, especially Albumin, which proportion of the total sample intensities decreased by 58 percentage. Compared to the pure serum the depleted sample showed a duplication of identified and quantified proteins rendering it quite successful. However, there is still room for improvement as some of the most abundant proteins which should have been depleted did not change their abundance compared to the overall protein abundance.
+> >
+> {: .solution}
+>
+{: .question}
+
+
diff --git a/topics/proteomics/tutorials/maxquant-label-free/workflows/index.md b/topics/proteomics/tutorials/maxquant-label-free/workflows/index.md
new file mode 100644
index 00000000000000..e092e0ae66ddd4
--- /dev/null
+++ b/topics/proteomics/tutorials/maxquant-label-free/workflows/index.md
@@ -0,0 +1,3 @@
+---
+layout: workflow-list
+---
diff --git a/topics/proteomics/tutorials/maxquant-label-free/workflows/maxquant_label_free.ga b/topics/proteomics/tutorials/maxquant-label-free/workflows/maxquant_label_free.ga
new file mode 100644
index 00000000000000..3e3dcac8af3f4c
--- /dev/null
+++ b/topics/proteomics/tutorials/maxquant-label-free/workflows/maxquant_label_free.ga
@@ -0,0 +1,265 @@
+{
+ "a_galaxy_workflow": "true",
+ "annotation": "Label-free quantification in MaxQuant",
+ "format-version": "0.1",
+ "name": "Maxquant_lfq_serum",
+ "steps": {
+ "0": {
+ "annotation": "",
+ "content_id": null,
+ "errors": null,
+ "id": 0,
+ "input_connections": {},
+ "inputs": [
+ {
+ "description": "",
+ "name": "sample1"
+ }
+ ],
+ "label": "sample1",
+ "name": "Input dataset",
+ "outputs": [],
+ "position": {
+ "left": 200,
+ "top": 189
+ },
+ "tool_id": null,
+ "tool_state": "{\"name\": \"sample1\"}",
+ "tool_version": null,
+ "type": "data_input",
+ "uuid": "b9140d52-823c-44a1-a317-e9efed37d9e5",
+ "workflow_outputs": []
+ },
+ "1": {
+ "annotation": "",
+ "content_id": null,
+ "errors": null,
+ "id": 1,
+ "input_connections": {},
+ "inputs": [
+ {
+ "description": "",
+ "name": "sample2"
+ }
+ ],
+ "label": "sample2",
+ "name": "Input dataset",
+ "outputs": [],
+ "position": {
+ "left": 201,
+ "top": 327
+ },
+ "tool_id": null,
+ "tool_state": "{\"name\": \"sample2\"}",
+ "tool_version": null,
+ "type": "data_input",
+ "uuid": "aa7fe2a8-8def-440f-bab3-2d30503286a9",
+ "workflow_outputs": []
+ },
+ "2": {
+ "annotation": "",
+ "content_id": null,
+ "errors": null,
+ "id": 2,
+ "input_connections": {},
+ "inputs": [],
+ "label": "Fasta file",
+ "name": "Input dataset",
+ "outputs": [],
+ "position": {
+ "left": 206.61666870117188,
+ "top": 450.11669921875
+ },
+ "tool_id": null,
+ "tool_state": "{}",
+ "tool_version": null,
+ "type": "data_input",
+ "uuid": "7aa7bc43-1201-463b-b9c6-b782550dadf7",
+ "workflow_outputs": [
+ {
+ "label": null,
+ "output_name": "output",
+ "uuid": "c2b015dc-bbf5-4460-8af5-3f45b5262b6c"
+ }
+ ]
+ },
+ "3": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/galaxyp/maxquant/maxquant/1.6.10.43",
+ "errors": null,
+ "id": 3,
+ "input_connections": {
+ "input_opts|fasta_files": {
+ "id": 2,
+ "output_name": "output"
+ },
+ "paramGroups_0|files": [
+ {
+ "id": 1,
+ "output_name": "output"
+ },
+ {
+ "id": 0,
+ "output_name": "output"
+ }
+ ]
+ },
+ "inputs": [
+ {
+ "description": "runtime parameter for tool MaxQuant",
+ "name": "search_opts"
+ }
+ ],
+ "label": null,
+ "name": "MaxQuant",
+ "outputs": [
+ {
+ "name": "proteinGroups",
+ "type": "tabular"
+ },
+ {
+ "name": "mqpar",
+ "type": "xml"
+ },
+ {
+ "name": "peptides",
+ "type": "tabular"
+ },
+ {
+ "name": "ptxqc_report",
+ "type": "pdf"
+ }
+ ],
+ "position": {
+ "left": 490,
+ "top": 195
+ },
+ "post_job_actions": {},
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/galaxyp/maxquant/maxquant/1.6.10.43",
+ "tool_shed_repository": {
+ "changeset_revision": "7f432d87c82c",
+ "name": "maxquant",
+ "owner": "galaxyp",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"__page__\": null, \"output_opts\": \"{\\\"dry_run\\\": \\\"false\\\", \\\"output\\\": [\\\"proteinGroups\\\", \\\"mqpar\\\", \\\"peptides\\\"]}\", \"__rerun_remap_job_id__\": null, \"paramGroups\": \"[{\\\"__index__\\\": 0, \\\"enzymes\\\": [\\\"Trypsin/P\\\"], \\\"files\\\": {\\\"__class__\\\": \\\"ConnectedValue\\\"}, \\\"fixedModifications\\\": [\\\"Carbamidomethyl (C)\\\"], \\\"maxMissedCleavages\\\": \\\"1\\\", \\\"quant_method\\\": {\\\"__current_case__\\\": 2, \\\"lfqAvEdgesPerNode\\\": \\\"6\\\", \\\"lfqMinEdgesPerNode\\\": \\\"3\\\", \\\"lfqMinRatioCount\\\": \\\"2\\\", \\\"lfqSkipNorm\\\": \\\"true\\\", \\\"select_quant_method\\\": \\\"lfq\\\"}, \\\"variableModifications\\\": null}]\", \"qc\": \"{\\\"__current_case__\\\": 0, \\\"do_it\\\": \\\"true\\\", \\\"evidence\\\": \\\"true\\\", \\\"msms\\\": \\\"true\\\", \\\"msmsscans\\\": \\\"true\\\", \\\"parameters\\\": \\\"true\\\", \\\"proteingroups\\\": \\\"true\\\", \\\"summary\\\": \\\"true\\\"}\", \"input_opts\": \"{\\\"description_parse_rule\\\": \\\"^>.*\\\\\\\\|.*\\\\\\\\|[^ ]+ (.*) OS.*$\\\", \\\"fasta_files\\\": {\\\"__class__\\\": \\\"ConnectedValue\\\"}, \\\"ftype\\\": \\\".thermo.raw\\\", \\\"identifier_parse_rule\\\": \\\"^>.*\\\\\\\\|(.*)\\\\\\\\|.*$\\\"}\", \"lfq_opts\": \"{\\\"advancedSiteIntensities\\\": \\\"true\\\", \\\"do_ibaq\\\": {\\\"__current_case__\\\": 1, \\\"ibaq\\\": \\\"False\\\"}, \\\"lfqRequireMsms\\\": \\\"true\\\", \\\"lfqStabilizeLargeRatios\\\": \\\"true\\\", \\\"separateLfq\\\": \\\"false\\\"}\", \"search_opts\": \"{\\\"calc_peak_properties\\\": \\\"false\\\", \\\"match_between_runs\\\": \\\"false\\\", \\\"max_peptide_mass\\\": \\\"4600\\\", \\\"min_peptide_len\\\": \\\"7\\\", \\\"min_unique_pep\\\": \\\"1\\\", \\\"template\\\": {\\\"__class__\\\": \\\"RuntimeValue\\\"}}\"}",
+ "tool_version": null,
+ "type": "tool",
+ "uuid": "5fb07b2a-da7c-4ac8-85ec-fd8ed99e8707",
+ "workflow_outputs": []
+ },
+ "4": {
+ "annotation": "",
+ "content_id": "Cut1",
+ "errors": null,
+ "id": 4,
+ "input_connections": {
+ "input": {
+ "id": 3,
+ "output_name": "proteinGroups"
+ }
+ },
+ "inputs": [],
+ "label": null,
+ "name": "Cut",
+ "outputs": [
+ {
+ "name": "out_file1",
+ "type": "tabular"
+ }
+ ],
+ "position": {
+ "left": 799,
+ "top": 184
+ },
+ "post_job_actions": {},
+ "tool_id": "Cut1",
+ "tool_state": "{\"__page__\": null, \"__rerun_remap_job_id__\": null, \"delimiter\": \"\\\"T\\\"\", \"columnList\": \"\\\"c8,c32,c33\\\"\", \"input\": \"{\\\"__class__\\\": \\\"ConnectedValue\\\"}\", \"chromInfo\": \"\\\"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\\\"\"}",
+ "tool_version": null,
+ "type": "tool",
+ "uuid": "851fb2bf-434f-4ade-b0d3-69ff35567daa",
+ "workflow_outputs": []
+ },
+ "5": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/bgruening/text_processing/tp_sort_header_tool/1.1.1",
+ "errors": null,
+ "id": 5,
+ "input_connections": {
+ "infile": {
+ "id": 4,
+ "output_name": "out_file1"
+ }
+ },
+ "inputs": [],
+ "label": null,
+ "name": "Sort",
+ "outputs": [
+ {
+ "name": "outfile",
+ "type": "input"
+ }
+ ],
+ "position": {
+ "left": 1054,
+ "top": 194
+ },
+ "post_job_actions": {},
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/bgruening/text_processing/tp_sort_header_tool/1.1.1",
+ "tool_shed_repository": {
+ "changeset_revision": "74a8bef53a00",
+ "name": "text_processing",
+ "owner": "bgruening",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"sortkeys\": \"[{\\\"__index__\\\": 0, \\\"column\\\": \\\"2\\\", \\\"order\\\": \\\"r\\\", \\\"style\\\": \\\"g\\\"}]\", \"__page__\": null, \"ignore_case\": \"\\\"false\\\"\", \"__rerun_remap_job_id__\": null, \"header\": \"\\\"1\\\"\", \"unique\": \"\\\"false\\\"\", \"chromInfo\": \"\\\"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\\\"\", \"infile\": \"{\\\"__class__\\\": \\\"ConnectedValue\\\"}\"}",
+ "tool_version": null,
+ "type": "tool",
+ "uuid": "3dd97e27-9437-4390-a9e3-c8083dcdac56",
+ "workflow_outputs": []
+ },
+ "6": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/bgruening/text_processing/tp_sort_header_tool/1.1.1",
+ "errors": null,
+ "id": 6,
+ "input_connections": {
+ "infile": {
+ "id": 4,
+ "output_name": "out_file1"
+ }
+ },
+ "inputs": [],
+ "label": null,
+ "name": "Sort",
+ "outputs": [
+ {
+ "name": "outfile",
+ "type": "input"
+ }
+ ],
+ "position": {
+ "left": 1053,
+ "top": 335
+ },
+ "post_job_actions": {},
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/bgruening/text_processing/tp_sort_header_tool/1.1.1",
+ "tool_shed_repository": {
+ "changeset_revision": "74a8bef53a00",
+ "name": "text_processing",
+ "owner": "bgruening",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"sortkeys\": \"[{\\\"__index__\\\": 0, \\\"column\\\": \\\"3\\\", \\\"order\\\": \\\"r\\\", \\\"style\\\": \\\"g\\\"}]\", \"__page__\": null, \"ignore_case\": \"\\\"false\\\"\", \"__rerun_remap_job_id__\": null, \"header\": \"\\\"1\\\"\", \"unique\": \"\\\"false\\\"\", \"chromInfo\": \"\\\"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\\\"\", \"infile\": \"{\\\"__class__\\\": \\\"ConnectedValue\\\"}\"}",
+ "tool_version": null,
+ "type": "tool",
+ "uuid": "aa036605-c97c-47cd-be4b-39cc18b9887e",
+ "workflow_outputs": []
+ }
+ },
+ "tags": [
+ "proteomics"
+ ],
+ "uuid": "99b62afb-14b2-48ad-a756-3bb342d12169",
+ "version": 3
+}
diff --git a/topics/proteomics/tutorials/ntails/tutorial.md b/topics/proteomics/tutorials/ntails/tutorial.md
index 88f5989e481862..71670bd3ac80bc 100644
--- a/topics/proteomics/tutorials/ntails/tutorial.md
+++ b/topics/proteomics/tutorials/ntails/tutorial.md
@@ -4,6 +4,7 @@ layout: tutorial_hands_on
title: "Detection and quantitation of N-termini (degradomics) via N-TAILS"
edam_ontology: "topic_0121"
zenodo_link: ""
+level: Intermediate
questions:
- "How can protein N-termini be enriched for LC-MS/MS?"
- "How to analyze the LC-MS/MS data?"
diff --git a/topics/proteomics/tutorials/protein-id-oms/tutorial.md b/topics/proteomics/tutorials/protein-id-oms/tutorial.md
index a5674b1acd1275..eaacfdede95108 100644
--- a/topics/proteomics/tutorials/protein-id-oms/tutorial.md
+++ b/topics/proteomics/tutorials/protein-id-oms/tutorial.md
@@ -3,6 +3,7 @@ layout: tutorial_hands_on
title: "Peptide and Protein ID using OpenMS tools"
zenodo_link: "https://zenodo.org/record/546301"
+level: Intermediate
questions:
- "How to convert LC-MS/MS raw files?"
- "How to identify peptides?"
diff --git a/topics/proteomics/tutorials/protein-id-sg-ps/tutorial.md b/topics/proteomics/tutorials/protein-id-sg-ps/tutorial.md
index dfb6145b9e8ef6..cebff44b1b4a71 100644
--- a/topics/proteomics/tutorials/protein-id-sg-ps/tutorial.md
+++ b/topics/proteomics/tutorials/protein-id-sg-ps/tutorial.md
@@ -3,6 +3,7 @@ layout: tutorial_hands_on
title: "Peptide and Protein ID using SearchGUI and PeptideShaker"
zenodo_link: "https://zenodo.org/record/546301"
+level: Introductory
questions:
- "How to convert LC-MS/MS raw files?"
- "How to identify peptides?"
diff --git a/topics/proteomics/tutorials/proteogenomics-dbsearch/tutorial.md b/topics/proteomics/tutorials/proteogenomics-dbsearch/tutorial.md
index c68f6472373ac0..a7633695d45e20 100644
--- a/topics/proteomics/tutorials/proteogenomics-dbsearch/tutorial.md
+++ b/topics/proteomics/tutorials/proteogenomics-dbsearch/tutorial.md
@@ -293,7 +293,7 @@ Now that we have the list of known peptides, the query tabular tool is used to m
>
> 1. **Query Tabular** {% icon tool %} with the following parameters:
>
-> - {% icon param-repeat %} **Insert Database Table** (a):
+> - {% icon param-repeat %} **Insert Database Table** (a): `output from group`
> - Section **Table Options**:
> - *"Tabular Dataset for Table"*: Uniprot
> - *"Use first line as column names"* : `No`
@@ -301,7 +301,8 @@ Now that we have the list of known peptides, the query tabular tool is used to m
> - {% icon param-repeat %} **Insert Table Index**:
> - *"Table Index"*: `No`
> - *"Index on Columns"*: `Prot`
-> - {% icon param-repeat %} **Insert Database Table** (b):
+>
+> - {% icon param-repeat %} **Insert Database Table** (b): `PSM report`
> - Section **Filter Dataset Input**:
> - {% icon param-repeat %} **Insert Filter Tabular Input Lines**
> - *"Filter by"*: `skip leading lines`
@@ -316,7 +317,7 @@ Now that we have the list of known peptides, the query tabular tool is used to m
> - *"Table Index"*: `No`
> - *"Index on Columns"*: `id`
>
-> - {% icon param-repeat %} **Insert Database Table** (c):
+> - {% icon param-repeat %} **Insert Database Table** (c):`PSM report`
>
> - Section **Filter Dataset Input**
> - {% icon param-repeat %} **Insert Filter Tabular Input Lines**
diff --git a/topics/proteomics/tutorials/secretome-prediction/tutorial.md b/topics/proteomics/tutorials/secretome-prediction/tutorial.md
index 153653e8b37eb7..12e5154cf30b8c 100644
--- a/topics/proteomics/tutorials/secretome-prediction/tutorial.md
+++ b/topics/proteomics/tutorials/secretome-prediction/tutorial.md
@@ -3,6 +3,7 @@ layout: tutorial_hands_on
title: "Secretome Prediction"
zenodo_link: "https://zenodo.org/record/519260"
+level: Intermediate
questions:
- "How to predict cellular protein localization based upon GO-terms?"
- "How to combine multiple localization predictions?"
diff --git a/topics/sequence-analysis/tutorials/mapping/bam_explanation.md b/topics/sequence-analysis/tutorials/mapping/bam_explanation.md
index baabf8c93d9415..0298c15a016cd8 100644
--- a/topics/sequence-analysis/tutorials/mapping/bam_explanation.md
+++ b/topics/sequence-analysis/tutorials/mapping/bam_explanation.md
@@ -1,4 +1,4 @@
-A BAM ([Binary Alignment Map](https://en.wikipedia.org/wiki/SAM_(file_format))) file is a compressed, binary file storing the sequences mapped to a reference sequence.
+A BAM ([Binary Alignment Map](https://en.wikipedia.org/wiki/SAM_(file_format))) file is a compressed binary file storing the read sequences, whether they have been aligned to a reference sequence (e.g. a chromosome), and if so, the position on the reference sequence at which they have been aligned.
> ### {% icon hands_on %} Hands-on: Inspect a BAM/SAM file
>
diff --git a/topics/sequence-analysis/tutorials/quality-control/tutorial.md b/topics/sequence-analysis/tutorials/quality-control/tutorial.md
index e81d5913679edf..e6ba088bb2ad99 100644
--- a/topics/sequence-analysis/tutorials/quality-control/tutorial.md
+++ b/topics/sequence-analysis/tutorials/quality-control/tutorial.md
@@ -608,7 +608,7 @@ Paired-end sequencing generates 2 FASTQ files:
- One file with the sequences corresponding to **forward** orientation of all the fragments
- One file with the sequences corresponding to **reverse** orientation of all the fragments
-Usually we recognize these two files which belong to one sample by the name which has the same identifier for the reads but a different extension, e.g. `sampleA_R1.fastq` for the forward reads and `sampleA_R2.fastq` for the reverse reads. It can also be `_r` or `_1` for the forward reads and `_f` or `_2` for the reverse reads.
+Usually we recognize these two files which belong to one sample by the name which has the same identifier for the reads but a different extension, e.g. `sampleA_R1.fastq` for the forward reads and `sampleA_R2.fastq` for the reverse reads. It can also be `_f` or `_1` for the forward reads and `_r` or `_2` for the reverse reads.
The data we analyzed in the previous step was not single-end data but the forward reads of paired-end data. We will now do the quality control on the reverse reads.
@@ -686,7 +686,7 @@ After trimming, reverse reads will be shorter because of their quality and then
> >
> > > ### {% icon solution %} Solution
> > > 1. 44,164 bp (`Quality-trimmed:`) for the forward reads and 138,638 bp for the reverse reads.
-> > > 2. 1,376 sequences have been removed because at least one read was shorter than the length cutoff (334 when only the forward reads were analyzed).
+> > > 2. 1,376 sequences have been removed because at least one read was shorter than the length cutoff (322 when only the forward reads were analyzed).
> > {: .solution }
> {: .question}
>
diff --git a/topics/statistics/images/confusion_matrix_dl.png b/topics/statistics/images/confusion_matrix_dl.png
new file mode 100644
index 00000000000000..63e4645e063307
Binary files /dev/null and b/topics/statistics/images/confusion_matrix_dl.png differ
diff --git a/topics/statistics/images/dl_bioinformatics.png b/topics/statistics/images/dl_bioinformatics.png
new file mode 100644
index 00000000000000..da672a8a34b6e6
Binary files /dev/null and b/topics/statistics/images/dl_bioinformatics.png differ
diff --git a/topics/statistics/images/eq1.png b/topics/statistics/images/eq1.png
new file mode 100644
index 00000000000000..776a4db5c59d37
Binary files /dev/null and b/topics/statistics/images/eq1.png differ
diff --git a/topics/statistics/images/eq2.png b/topics/statistics/images/eq2.png
new file mode 100644
index 00000000000000..1caf72c103c1c7
Binary files /dev/null and b/topics/statistics/images/eq2.png differ
diff --git a/topics/statistics/images/eq3.png b/topics/statistics/images/eq3.png
new file mode 100644
index 00000000000000..d99acb79ae9990
Binary files /dev/null and b/topics/statistics/images/eq3.png differ
diff --git a/topics/statistics/images/eq4.png b/topics/statistics/images/eq4.png
new file mode 100644
index 00000000000000..6a9ca58e731d7e
Binary files /dev/null and b/topics/statistics/images/eq4.png differ
diff --git a/topics/statistics/images/eq5.png b/topics/statistics/images/eq5.png
new file mode 100644
index 00000000000000..6c5a19bcf1908c
Binary files /dev/null and b/topics/statistics/images/eq5.png differ
diff --git a/topics/statistics/images/mse.png b/topics/statistics/images/mse.png
new file mode 100644
index 00000000000000..a83ae4b00355bb
Binary files /dev/null and b/topics/statistics/images/mse.png differ
diff --git a/topics/statistics/images/neural_network.svg b/topics/statistics/images/neural_network.svg
new file mode 100644
index 00000000000000..59819ff0dc61d7
--- /dev/null
+++ b/topics/statistics/images/neural_network.svg
@@ -0,0 +1,3 @@
+
+
+
\ No newline at end of file
diff --git a/topics/statistics/images/neuron.svg b/topics/statistics/images/neuron.svg
new file mode 100644
index 00000000000000..4808f7e71740d7
--- /dev/null
+++ b/topics/statistics/images/neuron.svg
@@ -0,0 +1,3 @@
+
+
+
\ No newline at end of file
diff --git a/topics/statistics/images/partial_derivative.png b/topics/statistics/images/partial_derivative.png
new file mode 100644
index 00000000000000..c62174de1e70c7
Binary files /dev/null and b/topics/statistics/images/partial_derivative.png differ
diff --git a/topics/statistics/tutorials/classification_machinelearning/data-library.yaml b/topics/statistics/tutorials/classification_machinelearning/data-library.yaml
new file mode 100644
index 00000000000000..b707a389d8b320
--- /dev/null
+++ b/topics/statistics/tutorials/classification_machinelearning/data-library.yaml
@@ -0,0 +1,28 @@
+---
+destination:
+ type: library
+ name: GTN - Material
+ description: Galaxy Training Network Material
+ synopsis: Galaxy Training Network Material. See https://training.galaxyproject.org
+items:
+- name: Statistics and machine learning
+ description: Classification algorithms in machine learning using Galaxy
+ tools
+ items:
+ - name: Classification in Machine Learning
+ items:
+ - name: 'DOI:10.5281/zenodo.3738729'
+ description: latest
+ items:
+ - url: https://zenodo.org/record/3738729/files/train_rows.csv
+ src: url
+ ext: csv
+ info: https://doi.org/10.5281/zenodo.3738729
+ - url: https://zenodo.org/record/3738729/files/test_rows_labels.csv
+ src: url
+ ext: csv
+ info: https://doi.org/10.5281/zenodo.3738729
+ - url: https://zenodo.org/record/3738729/files/test_rows.csv
+ src: url
+ ext: csv
+ info: https://doi.org/10.5281/zenodo.3738729
diff --git a/topics/statistics/tutorials/classification_machinelearning/images/classification.png b/topics/statistics/tutorials/classification_machinelearning/images/classification.png
new file mode 100644
index 00000000000000..da099cc50d8848
Binary files /dev/null and b/topics/statistics/tutorials/classification_machinelearning/images/classification.png differ
diff --git a/topics/statistics/tutorials/classification_machinelearning/images/confusion_matrix_NN.png b/topics/statistics/tutorials/classification_machinelearning/images/confusion_matrix_NN.png
new file mode 100644
index 00000000000000..116e7c3f257a40
Binary files /dev/null and b/topics/statistics/tutorials/classification_machinelearning/images/confusion_matrix_NN.png differ
diff --git a/topics/statistics/tutorials/classification_machinelearning/images/confusion_matrix_bagging.png b/topics/statistics/tutorials/classification_machinelearning/images/confusion_matrix_bagging.png
new file mode 100644
index 00000000000000..301c578a63f327
Binary files /dev/null and b/topics/statistics/tutorials/classification_machinelearning/images/confusion_matrix_bagging.png differ
diff --git a/topics/statistics/tutorials/classification_machinelearning/images/confusion_matrix_linear.png b/topics/statistics/tutorials/classification_machinelearning/images/confusion_matrix_linear.png
new file mode 100644
index 00000000000000..5c500cf35d1fa3
Binary files /dev/null and b/topics/statistics/tutorials/classification_machinelearning/images/confusion_matrix_linear.png differ
diff --git a/topics/statistics/tutorials/classification_machinelearning/images/precision_recall_NN.png b/topics/statistics/tutorials/classification_machinelearning/images/precision_recall_NN.png
new file mode 100644
index 00000000000000..31c9a80786f491
Binary files /dev/null and b/topics/statistics/tutorials/classification_machinelearning/images/precision_recall_NN.png differ
diff --git a/topics/statistics/tutorials/classification_machinelearning/images/precision_recall_bagging.png b/topics/statistics/tutorials/classification_machinelearning/images/precision_recall_bagging.png
new file mode 100644
index 00000000000000..496dc9b79aa8c9
Binary files /dev/null and b/topics/statistics/tutorials/classification_machinelearning/images/precision_recall_bagging.png differ
diff --git a/topics/statistics/tutorials/classification_machinelearning/images/precision_recall_linear.png b/topics/statistics/tutorials/classification_machinelearning/images/precision_recall_linear.png
new file mode 100644
index 00000000000000..c0f810b7553788
Binary files /dev/null and b/topics/statistics/tutorials/classification_machinelearning/images/precision_recall_linear.png differ
diff --git a/topics/statistics/tutorials/classification_machinelearning/images/roc_NN.png b/topics/statistics/tutorials/classification_machinelearning/images/roc_NN.png
new file mode 100644
index 00000000000000..4ac2011eb48f3e
Binary files /dev/null and b/topics/statistics/tutorials/classification_machinelearning/images/roc_NN.png differ
diff --git a/topics/statistics/tutorials/classification_machinelearning/images/roc_bagging.png b/topics/statistics/tutorials/classification_machinelearning/images/roc_bagging.png
new file mode 100644
index 00000000000000..f6f5014b478870
Binary files /dev/null and b/topics/statistics/tutorials/classification_machinelearning/images/roc_bagging.png differ
diff --git a/topics/statistics/tutorials/classification_machinelearning/images/roc_linear.png b/topics/statistics/tutorials/classification_machinelearning/images/roc_linear.png
new file mode 100644
index 00000000000000..94b969ee4d174d
Binary files /dev/null and b/topics/statistics/tutorials/classification_machinelearning/images/roc_linear.png differ
diff --git a/topics/statistics/tutorials/classification_machinelearning/images/roc_rf.png b/topics/statistics/tutorials/classification_machinelearning/images/roc_rf.png
new file mode 100644
index 00000000000000..d6dd26f89a47fe
Binary files /dev/null and b/topics/statistics/tutorials/classification_machinelearning/images/roc_rf.png differ
diff --git a/topics/statistics/tutorials/classification_machinelearning/images/roc_svm.png b/topics/statistics/tutorials/classification_machinelearning/images/roc_svm.png
new file mode 100644
index 00000000000000..9f4a3248e68246
Binary files /dev/null and b/topics/statistics/tutorials/classification_machinelearning/images/roc_svm.png differ
diff --git a/topics/statistics/tutorials/classification_machinelearning/tutorial.md b/topics/statistics/tutorials/classification_machinelearning/tutorial.md
new file mode 100755
index 00000000000000..054595f79a3e75
--- /dev/null
+++ b/topics/statistics/tutorials/classification_machinelearning/tutorial.md
@@ -0,0 +1,541 @@
+---
+layout: tutorial_hands_on
+
+title: 'Classification in Machine Learning'
+zenodo_link: https://zenodo.org/record/3738729#.XoZyHXUzaV4
+questions:
+- What is classification and how we can use classification techniques?
+objectives:
+- Learn classification background
+- Learn what a quantitative structure-analysis relationship (QSAR) model is and how it can be constructed in Galaxy
+- Learn to apply logistic regression, k-nearest neighbors, support verctor machines, random forests and bagging algorithms
+- Learn how visualizations can be used to analyze the classification results
+key_points:
+- Classification is a supervised approach in machine learning.
+- For classification tasks, data is divided into training and test sets.
+- Using classification, the samples are learned using the training set and predicted using the test set.
+- For each classification algorithm, it parameters should be optimised based on the dataset.
+- Machine learning algorithms can be applied to chemical datasets to predict important properties.
+time_estimation: 2H
+contributors:
+- khanteymoori
+- anuprulez
+- simonbray
+---
+
+# Introduction
+{:.no_toc}
+
+In this tutorial you will learn how to apply Galaxy tools to solve [classification](https://en.wikipedia.org/wiki/Statistical_classification) problems. First, we will introduce classification briefly, and then examine logistic regression, which is an example of a linear classifier. Next, we will discuss the nearest neighbor classifier, which is a simple but nonlinear classifier. Then advanced classifiers, such as support vector machines, random forest and ensemble classifiers will be introduced and applied. Furthermore, we will show how to visualize the results in each step.
+
+Finally, we will discuss how to train the classifiers by finding the values of their parameters that minimize a cost function. We will work through a real problem in the field of cheminformatics to learn how the classifiers and learning algorithms work.
+
+Classification is a [supervised learning](https://en.wikipedia.org/wiki/Supervised_learning) method in machine learning and the algorithm which is used for this learning task is called a classifier. In this tutorial we will build a classifier which can predict whether a chemical substance is biodegradable or not. Substances which degrade quickly are preferable to those which degrade slowly, as they do not accumulate and pose a risk to the environment. Therefore, it is useful to be able to predict easily in advance whether a substance is biodegradable prior to production and usage in consumer products.
+
+> ### Agenda
+>
+> In this tutorial, we will cover:
+>
+> 1. TOC
+> {:toc}
+>
+{: .agenda}
+
+
+# Classification
+
+Classification is the process of assigning every object from a collection to exactly one class from a known set of classes by learning a "decision boundary" in a dataset. This dataset is called a training dataset and contains multiple samples, together with a desired class for each sample. The training dataset contains "features" as columns and a mapping between these features and the class label is learned for each sample.
+
+The performance of mapping is evaluated using a test dataset, which is separate from the training dataset. The test dataset contains only the feature columns, but not the class column. The class column is predicted using the mapping learned on the training dataset. An example of a classification task is assigning a patient (the object) to a group of healthy or ill (the classes) people on the basis of his or her medical record. In this tutorial, we will use a classifier to train a model using a training dataset, predict the targets for test dataset and visualize the results using plots.
+
+![classification](images/classification.png "Classification of samples belonging to different classes.")
+
+In figure [1](#figure-1), the line is a boundary which separates a class from another class (for example from tumor to no tumor). The task of a classifier is to learn this boundary, which can be used to classify or categorize an unseen/new sample. The line is the decision boundary; there are different ways to learn it, which correspond to different classification algorithms. If the dataset is linearly separable, linear classifiers can produce good classification results. However, when the dataset is complex and requires non-linear decision boundaries, more powerful classifiers like `support vector machine` or `ensemble` based classifiers may prove to be beneficial.
+
+The data classification process includes two steps:
+1. Building the classifier or model: This step is the learning step, in which the classification algorithms build the classifier. The classifier is built from the training set made up of database samples and their associated class labels. Each sample that constitutes the training set is referred to as a class.
+
+2. Applying the classifier to a classification task: In this step, the classifier is used for classification. Here the test data is used to estimate the accuracy of classification rules. The classification rules can be applied to the new data samples if the accuracy is considered acceptable.
+
+
+# Quantitative Structure - Activity Relationship biodegradation
+
+The classification problem we will study in this tutorial is related to biodegradation. Chemical substances which decay slowly will accumulate over time, which poses a threat to the environment. Therefore, it is useful to be able to predict in advance whether a substance will break down quickly or not.
+
+Quantitative structure-activity relationship (QSAR) and quantitative structure-property relationship (QSPR) models attempt to predict the activity or property of chemicals based on their chemical structure. To achieve this, a database of compounds is collected for which the property of interest is known. For each compound, molecular descriptors are collected which describe the structure (for example: molecular weight, number of nitrogen atoms, number of carbon-carbon double bonds). Using these descriptors, a model is constructed which is capable of predicting the property of interest for a new, unknown molecule. In this tutorial we will use a database assembled from experimental data of the Japanese Ministry of International Trade and Industry to create a classification model for biodegradation. We then will be able to use this model to classify new molecules into one of two classes: biodegradable or non-biodegradable.
+
+As a benchmark, we will use the [dataset](https://pubs.acs.org/doi/10.1021/ci4000213) assembled by Mansouri et al. using data from the National Institute of Technology and Evaluation of Japan. This database contains 1055 molecules, together with precalculated molecular descriptors.
+
+In this tutorial, we will apply a couple of [scikit-learn](https://scikit-learn.org/stable/) machine learning tools to the dataset provided by Mansouri et al. to predict whether a molecule is biodegradable or not.
+In the following part, we will perform classification on the biodegradability dataset using a linear classifier and then will create some plots to analyze the results.
+
+## Get train and test datasets
+
+We have two datasets available; the training dataset contains 837 molecules, while the test dataset contains 218 molecules.
+
+Let's begin by uploading the necessary datasets.
+
+> ### {% icon hands_on %} Hands-on: Data upload
+>
+> 1. Create a new history for this tutorial
+> 2. Import the files from [Zenodo](https://zenodo.org/record/3738729#.Xs1EeHUzY5k)
+>
+> ```
+> https://zenodo.org/record/3738729/files/train_rows.csv
+> https://zenodo.org/record/3738729/files/test_rows_labels.csv
+> https://zenodo.org/record/3738729/files/test_rows.csv
+> ```
+>
+> {% include snippets/import_via_link.md %}
+>
+> 3. Rename the datasets as `train_rows`, `test_rows_labels` and `test_rows` respectively.
+>
+> {% include snippets/rename_dataset.md %}
+>
+{: .hands_on}
+
+The `train_rows` contains a column `Class` which is the class label or target. We will evaluate our model on `test_rows` and compare the predicted class with the true class value in `test_rows_labels`
+{: .comment}
+
+> ### {% icon details %} Preparing the data for classification
+>
+> Preparing the data involves these following major tasks:
+> 1. Data Cleaning: involves removing noise and treatment of missing values. The noise is removed by applying noise filtering techniques and the problem of missing values is solved by replacing a missing value with different techniques, for example substitution, mean imputation and regression imputation.
+> 2. Relevance Analysis: the database may also have attributes which are irrelevant for classification. Correlation analysis is used to know whether any two given attributes are related - e.g. one of the features and the target variable.
+> 3. Normalization: the data is transformed using normalization. Normalization involves scaling all values for q given attribute in order to make them fall within a small specified range. Normalization is used when in the learning step, neural networks or the methods involving measurements are used.
+>
+{: .details}
+
+# Learn the logistic regression classifier
+
+As the first step, to learn the mapping between several features and the classes, we will apply the linear classifier. It learns features from the training dataset and maps all the rows to their respective class. The process of mapping gives a trained model. [Logistic regression](https://en.wikipedia.org/wiki/Logistic_regression) is named for the function used at the core of the method, the logistic function, and it is an instance of supervised classification in which we know the correct label of the class for each sample and the algorithm estimate of the true class. We want to learn parameters (weight and bias for the line) that make the estimated class for each training observation as close as possible to the true class label. This requires two components; the first is a metric for how close the current class label is to the true label. Rather than measure similarity, we usually talk about the opposite of this, the distance between the classifier output and the desired output, and we call this distance, the loss function or the cost function.
+
+The second thing we need is an optimization algorithm for iteratively updating the weights so as to minimize this loss function. The standard algorithm for this is gradient descent. So, the dataset is divided into two parts - training and test sets. The training set is used to train a classifier and the test set is used to evaluate the performance of the trained model.
+
+> ### {% icon hands_on %} Hands-on: Train logistic regression classifier
+>
+> 1. **Generalized linear models** {% icon tool %} with the following parameters to train the regressor:
+> - *"Select a Classification Task"*: `Train a model`
+> - *"Select a linear method"*: `Logistic Regression model`
+> - *"Select input type"*: `tabular data`
+> - {% icon param-file %} *"Training samples dataset"*: `train_rows.csv`
+> - {% icon param-check %} *"Does the dataset contain header"*: `Yes`
+> - {% icon param-select %} *"Choose how to select data by column"*: `All columns EXCLUDING some by column header name(s)`
+> - {% icon param-text %} *"Type header name(s)"*: `Class`
+> - {% icon param-file %} *"Dataset containing class labels"*: `train_rows.csv`
+> - {% icon param-check %} *"Does the dataset contain header"*: `Yes`
+> - {% icon param-select %} *"Choose how to select data by column"*: `Select columns by column header name(s)`
+> - {% icon param-text %} *"Select target column(s)"*: `Class`
+> 2. Rename the generated file to `LogisticRegression_model`
+{: .hands_on}
+
+> ### {% icon question %} Question
+>
+> What is learned by the logistic regression model?
+>
+> > ### {% icon solution %} Solution
+> >
+> > In the logistic regressoion model, the coefficients of the logistic regression algorithm have be estimated from our training data. This is done using [maximum-likelihood estimation](https://en.wikipedia.org/wiki/Maximum_likelihood_estimation).
+> >
+> {: .solution}
+>
+{: .question}
+
+## Predict class using test dataset
+
+After learning on the training dataset, we should evaluate the performance on the test dataset to know whether the learning algorithm learned a good classifier from the training dataset or not. This classifier is used to predict a new sample and a similar accuracy is expected.
+
+Now, we will predict class in the test dataset using this classifier in order to see if it has learned important features which can generalize on a new dataset. The test dataset (`test_rows`) contains the same number of features but does not contain the `Class` column. This is predicted using the trained classifier.
+
+
+> ### {% icon hands_on %} Hands-on: Predict class using the logistic regression classifier
+>
+> 1. **Generalized linear models** {% icon tool %} with the following parameters to predict targets of test dataset using the trained model:
+> - *"Select a Classification Task"*: `Load a model and predict`
+> - {% icon param-file %} *"Models"*: `LogisticRegression_model`
+> - {% icon param-file %} *"Data (tabular)"*: `test_rows.csv`
+> - {% icon param-check %} *"Does the dataset contain header"*: `Yes`
+> - {% icon param-select %} *"Select the type of prediction"*: `Predict class labels`
+> 2. Rename the generated file to `LogisticRegression_result`
+{: .hands_on}
+
+## Visualize the logistic regression classification results
+
+We will evaluate the classification by comparing the predicted with the expected classes. In the previous step, we classified the test dataset (`LogisticRegression_result`). We have one more dataset (`test_rows_labels`) which contains the true class label of the test set. Using the true and predicted class labels in the test set, we will verify the performance by analyzing the plots. As you can see, `LogisticRegression_result` has no header, so first we should remove the header from `test_rows_labels` to compare.
+
+> ### {% icon hands_on %} Hands-on: Remove the header
+>
+> 1. **Remove beginning** {% icon tool %} with the following parameters:
+> - {% icon param-file %} *"Remove first"*: `1`
+> - {% icon param-file %} *"from"*: `test_rows_labels.csv`
+> 2. Rename the generated file to `test_rows_labels_noheader.csv`
+{: .hands_on}
+
+
+Now we visualize and analyze the classification using the "Plot confusion matrix, precision, recall and ROC and AUC curves" tool.
+
+> ### {% icon hands_on %} Hands-on: Check and visualize the classification
+> 1. **Plot confusion matrix, precision, recall and ROC and AUC curves** {% icon tool %} with the following parameters to visualize the classification:
+> - {% icon param-file %} *"Select input data file"*: `test_rows_labels_noheader.csv`
+> - {% icon param-file %} *"Select predicted data file"*: `LogisticRegression_result`
+> - {% icon param-file %} *"Select trained model"*: `LogisticRegression_model`
+{: .hands_on}
+
+The visualization tool creates the following plots:
+
+1. [Confusion matrix](https://en.wikipedia.org/wiki/Confusion_matrix): The confusion matrix summarizes the classification performance of a classifier with respect to the test data. It is a two-dimensional matrix; the horizontal axis (x-axis) shows the predicted labels and the vertical axis (y-axis) shows the true labels. Each rectangular box shows a count of samples falling into the four output combinations (true class, predicted class) - (1, 0), (1, 1), (0, 1) and (0, 0). In Figure 2, confusion matrix of the predictions is a heatmap. For a good prediction, the diagonal running from top-left to bottom-right should contain a smaller number of samples, because it shows the counts of incorrectly predicted samples. Hovering over each box in Galaxy shows the true and predicted class labels and the count of samples.
+
+ ![confusion_matrix](images/confusion_matrix_linear.png "Confusion matrix for the logistic regression classifier. ")
+
+2. [Precision, recall and F1 score](https://en.wikipedia.org/wiki/Precision_and_recall): Precision, recall and F1 score. These scores determine the robustness of classification. It is important to analyze the plot for any classification task to verify the accuracy across different classes which provides more information about the balanced or imbalanced accuracy across multiple classes present in the dataset.
+
+ ![prf1_scores](images/precision_recall_linear.png "Precision, recall and F1 score for the logistic regression classifier.")
+
+3. [Receiver operator characteristics (ROC) and area under ROC (AUC)](https://towardsdatascience.com/understanding-auc-roc-curve-68b2303cc9c5): Receiver operator characteristics (ROC) and area under ROC (AUC). The ROC curve is shown in blue. For a good prediction, it should be more towards the top-left of this plot, which results in a high AUC value. For a bad prediction, it is close to the orange line (y = x), resulting in a low AUC value (closer to 0.5). An AUC value of exactly 0.5 means the prediction is doing no better than a random number generator at predicting the classes.
+
+ ![roc_scores](images/roc_linear.png "Receiver operator characteristics (ROC) and area under ROC (AUC) for the logistic regression classifier.")
+
+These plots are important to visualize the quality of the classifier and the true and predicted classes.
+
+
+> ### {% icon question %} Question
+>
+> Inspect the plots. What can you say about the classification?
+>
+> > ### {% icon solution %} Solution
+> >
+> > Figures 2,3 and 4 show that the classification is acceptable, but as you will see in the next steps, the results can be improved.
+> >
+> {: .solution}
+{: .question}
+
+# K-Nearest Neighbor (KNN)
+
+At the second step, we will use k-nearest neighbor classifier. In the [k-nearest neighbor](https://en.wikipedia.org/wiki/K-nearest_neighbors_algorithm) classifier, a sample is classified by a majority vote of its neighbors. The sample is assigned to the class which is most common among its k nearest neighbors. k is a positive integer and typically it is small. For example, if k = 1, then the sample is simply assigned to the class of that single nearest neighbor. Surprisingly, when the number of data points is large, this classifier is not that bad. Choosing the best value of k is very important. If k is too small, the classifier will be sensitive to noise points and If k is too large, the neighborhood may include points from other classes and cause errors. To select the k that is right for your data, we recommend running the KNN algorithm several times with different values of k and choosing the k that reduces the number of errors the most.
+
+> ### {% icon question %} Question
+>
+> What are advantages and disadvantages about this model?
+>
+> > ### {% icon solution %} Solution
+> > Advantages:
+> > - It is very simple algorithm to understand and interpret.
+> >
+> > - It is very useful for nonlinear data because there is no assumption of linearity in this algorithm.
+> >
+> > - It is a versatile algorithm, as we can use it for classification as well as regression.
+> >
+> > - It has relatively high accuracy, but there are much better supervised learning models than KNN.
+> >
+> > - It works very well in low dimensions for complex decision surfaces.
+> >
+> > Disadvantages:
+> >
+> > - Classification is slow, because it stores all the training data.
+> >
+> > - High memory storage required as compared to other supervised learning algorithms.
+> >
+> > - Prediction is slow in case of big training samples.
+> >
+> > - It is very sensitive to the scale of data as well as irrelevant features.
+> >
+> > - It suffers a lot from the curse of dimensionality.
+> >
+> {: .solution}
+{: .question}
+
+> ### {% icon hands_on %} Hands-on: Train k-nearest neighbor classifier
+>
+> 1. **Nearest Neighbors Classification** {% icon tool %} with the following parameters to train the regressor:
+> - *"Select a Classification Task"*: `Train a model`
+> - *"Classifier type"*: `Nearest Neighbors`
+> - *"Select input type"*: `tabular data`
+> - {% icon param-file %} *"Training samples dataset"*: `train_rows.csv`
+> - {% icon param-check %} *"Does the dataset contain header"*: `Yes`
+> - {% icon param-select %} *"Choose how to select data by column"*: `All columns EXCLUDING some by column header name(s)`
+> - {% icon param-text %} *"Type header name(s)"*: `Class`
+> - {% icon param-file %} *"Dataset containing class labels"*: `train_rows.csv`
+> - {% icon param-check %} *"Does the dataset contain header"*: `Yes`
+> - {% icon param-select %} *"Choose how to select data by column"*: `Select columns by column header name(s)`
+> - {% icon param-text %} *"Select target column(s)"*: `Class`
+> - {% icon param-select %} *"Neighbor selection method"*: `k-nearest neighbors`
+> 2. Rename the generated file to `NearestNeighbors_model`
+{: .hands_on}
+
+> ### {% icon question %} Question
+>
+> What is the value of k (number of neighbors) for the model?
+>
+> > ### {% icon solution %} Solution
+> > As you can see in the Advanced Options, the default value for the number of neighbors is 5, and we used the default value. You can set this parameter based on your problem and data.
+> >
+> {: .solution}
+{: .question}
+
+Now, we should evaluate the performance on the test dataset to find out whether the KNN classifier is a good model from the training dataset or not.
+
+> ### {% icon hands_on %} Hands-on: Predict class using the k-nearest neighbor classifier
+>
+> 1. **Nearest Neighbors Classification** {% icon tool %} with the following parameters to predict targets of test dataset using the trained model:
+> - *"Select a Classification Task"*: `Load a model and predict`
+> - {% icon param-file %} *"Models"*: `NearestNeighbors_model`
+> - {% icon param-file %} *"Data (tabular)"*: `test_rows.csv`
+> - {% icon param-check %} *"Does the dataset contain header"*: `Yes`
+> - {% icon param-select %} *"Select the type of prediction"*: `Predict class labels`
+> 2. Rename the generated file to `NearestNeighbors_result`
+{: .hands_on}
+
+
+Now we visualize and analyze the classification. As you can see, `NearestNeighbors_result` has a header, so use `test_rows_labels` to compare.
+
+> ### {% icon hands_on %} Hands-on: Check and visualize the classification
+> 1. **Plot confusion matrix, precision, recall and ROC and AUC curves** {% icon tool %} with the following parameters to visualize the classification:
+> - {% icon param-file %} *"Select input data file"*: `test_rows_labels.csv`
+> - {% icon param-file %} *"Select predicted data file"*: `NearestNeighbors_result`
+> - {% icon param-file %} *"Select trained model"*: `NearestNeighbors_model`
+{: .hands_on}
+
+The visualization tool creates diagrams for the Confusion matrix, Precision, recall and F1 score, Receiver operator characteristics (ROC) and area under ROC (AUC) as follows:
+
+![confusion_matrix](images/confusion_matrix_NN.png "Confusion matrix for the k-nearest neighbor classifier.")
+
+![prf1_scores](images/precision_recall_NN.png "Precision, recall and F1 score for the k-nearest neighbor classifier.")
+
+![roc_scores](images/roc_NN.png "Receiver operator characteristics (ROC) and area under ROC (AUC) for the k-nearest neighbor classifier.")
+
+
+# Support Vector Machines (SVM)
+
+[Support Vector Machines](https://en.wikipedia.org/wiki/Support-vector_machine) (SVMs) have been extensively researched in the machine learning community for the last decade and actively applied to applications in various domains such as bioinformatics. SVM is a generalization of a classifier called maximal margin classifier and is introduced as a binary classifier intended to separate two classes when obtaining the optimal hyperplane and decision boundary. SVMs are based on the assumption that the input data can be linearly separable in a geometric space. The maximal margin classifier is simple, but it cannot be applied to the majority of datasets, since the classes must be separated by a linear boundary and this is often not the case when working with real world data. That is why the support vector classifier was introduced as an extension of the maximal margin classifier, which can be applied in a broader range of cases.
+
+To solve this problem SVM uses kernel functions to map the input to a high dimension feature space, i.e hyperplane, where a linear decision boundary is constructed in such a manner that the boundary maximises the margin between two classes. The kernel approach is simply an efficient computational approach for accommodating a non-linear boundary between classes.
+
+Without going into technical details, a kernel is a function that quantifies the similarity of two observations. Two special properties of SVMs are that SVMs achieve (1) high generalization by maximizing the margin and (2) support an efficient learning of nonlinear functions by
+kernel trick. In the next step, we will build a SVM classifier with our data.
+
+> ### {% icon hands_on %} Hands-on: Train a SVM classifier
+>
+> 1. **Support vector machines (SVMs)** {% icon tool %} with the following parameters to train the regressor:
+> - *"Select a Classification Task"*: `Train a model`
+> - *"Select a linear method"*: `Linear Support Vector Classification`
+> - *"Select input type"*: `tabular data`
+> - {% icon param-file %} *"Training samples dataset"*: `train_rows.csv`
+> - {% icon param-check %} *"Does the dataset contain header"*: `Yes`
+> - {% icon param-select %} *"Choose how to select data by column"*: `All columns EXCLUDING some by column header name(s)`
+> - {% icon param-text %} *"Type header name(s)"*: `Class`
+> - {% icon param-file %} *"Dataset containing class labels"*: `train_rows.csv`
+> - {% icon param-check %} *"Does the dataset contain header"*: `Yes`
+> - {% icon param-select %} *"Choose how to select data by column"*: `Select columns by column header name(s)`
+> - {% icon param-text %} *"Select target column(s)"*: `Class`
+> 2. Rename the generated file to `SVM_model`
+{: .hands_on}
+
+> ### {% icon question %} Question
+>
+> What is learned by the support vector machines?
+>
+> > ### {% icon solution %} Solution
+> >
+> > The coefficients of the line with the maximal margin in the kernel space is learned in the training phase.
+> >
+> {: .solution}
+>
+{: .question}
+
+
+Now we will evaluate the performance of the SVM classifier:
+
+> ### {% icon hands_on %} Hands-on: Predict class SVM classifier
+>
+> 1. **Support vector machines (SVMs)** {% icon tool %} with the following parameters to predict targets of test dataset using the trained model:
+> - *"Select a Classification Task"*: `Load a model and predict`
+> - {% icon param-file %} *"Models"*: `SVM_model`
+> - {% icon param-file %} *"Data (tabular)"*: `test_rows.csv`
+> - {% icon param-check %} *"Does the dataset contain header"*: `Yes`
+> - {% icon param-select %} *"Select the type of prediction"*: `Predict class labels`
+> 2. Rename the generated file to `SVM_result`
+{: .hands_on}
+
+
+Now let's visualize the results:
+
+> ### {% icon hands_on %} Hands-on: Check and visualize the classification
+> 1. **Plot confusion matrix, precision, recall and ROC and AUC curves** {% icon tool %} with the following parameters to visualize the classification:
+> - {% icon param-file %} *"Select input data file"*: `test_rows_labels.csv`
+> - {% icon param-file %} *"Select predicted data file"*: `SVM_result`
+> - {% icon param-file %} *"Select trained model"*: `SVM_model`
+{: .hands_on}
+
+The visualization tool creates the following ROC plot:
+
+![roc_scores](images/roc_svm.png "Receiver operator characteristics (ROC) and area under ROC (AUC) for the SVM classifier.")
+
+
+# Random Forest
+
+[Random forest](https://en.wikipedia.org/wiki/Random_forest) is an ensemble of decision trees, and usually trained with the “bagging” method. The [Ensemble](https://scikit-learn.org/stable/modules/ensemble.html#ensemble) method uses multiple learning models internally for better predictions and the general idea of the bagging method is that a combination of learning models increases the overall result. It uses multiple decision tree regressors internally and predicts by taking the collective performances of the predictions by multiple decision trees. It has a good predictive power and is robust to outliers. It creates an ensemble of weak learners (decision trees) and iteratively minimizes error.
+
+One big advantage of random forest is that it can be used for both classification and regression problems. The main idea behind the random forest is adding additional randomness to the model, while growing the trees and instead of searching for the most important feature while splitting a node, it searches for the best feature among a random subset of features. This results in a better model because of wide diversity. Generally, the more trees in the forest, the more robust the model. Therefore, when using the random forest classifier, a larger number of trees in the forest gives higher accuracy results. Similarly there are two stages in the random forest algorithm; one is random forest creation, the other is to make a prediction from the random forest classifier created in the first stage.
+
+> ### {% icon hands_on %} Hands-on: Train random forest
+>
+> 1. **Ensemble methods** {% icon tool %} with the following parameters to train the regressor:
+> - *"Select a Classification Task"*: `Train a model`
+> - *"Select an ensemble method"*: `Random forest classifier`
+> - *"Select input type"*: `tabular data`
+> - {% icon param-file %} *"Training samples dataset"*: `train_rows.csv`
+> - {% icon param-check %} *"Does the dataset contain header"*: `Yes`
+> - {% icon param-select %} *"Choose how to select data by column"*: `All columns EXCLUDING some by column header name(s)`
+> - {% icon param-text %} *"Type header name(s)"*: `Class`
+> - {% icon param-file %} *"Dataset containing class labels"*: `train_rows.csv`
+> - {% icon param-check %} *"Does the dataset contain header"*: `Yes`
+> - {% icon param-select %} *"Choose how to select data by column"*: `Select columns by column header name(s)`
+> - {% icon param-text %} *"Select target column(s)"*: `Class`
+> 2. Rename the generated file to `RandomForest_model`
+{: .hands_on}
+
+> ### {% icon question %} Question
+>
+> What are the advantages of random forest classifier compared with KNN and SVM?
+>
+> > ### {% icon solution %} Solution
+> > 1. The overfitting problem will never arise when we use the random forest algorithm in any classification problem.
+> > 2. The same random forest algorithm can be used for both classification and regression task.
+> > 3. The random forest algorithm can be used for feature engineering, which means identifying the most important features out of the available features from the training dataset.
+> {: .solution}
+>
+{: .question}
+
+
+After learning on the training dataset, we should evaluate the performance on the test dataset.
+
+> ### {% icon hands_on %} Hands-on: Predict targets using the random forest
+>
+> 1. **Ensemble methods** {% icon tool %} with the following parameters to predict targets of test dataset using the trained model:
+> - *"Select a Classification Task"*: `Load a model and predict`
+> - {% icon param-file %} *"Models"*: `RandomForest_model`
+> - {% icon param-file %} *"Data (tabular)"*: `train_rows_test.csv`
+> - {% icon param-check %} *"Does the dataset contain header"*: `Yes`
+> - {% icon param-select %} *"Select the type of prediction"*: `Predict class labels`
+> 2. Rename the generated file to `RandomForest_result`
+{: .hands_on}
+
+The visualization tool creates the following ROC plot:
+
+![roc_scores](images/roc_rf.png "Receiver operator characteristics (ROC) and area under ROC (AUC) for the random forest classifier.")
+
+
+> ### {% icon question %} Question
+>
+> Inspect the plots. What can you say about the classification?
+>
+> > ### {% icon solution %} Solution
+> >
+> > Figures show that we achieved an AUC score of `1.0` for the test set using random forest. It means the prediction is very good, in fact it has no error at all. Unfortunately, this is not usually the case when dealing with chemical data.
+> {: .solution}
+{: .question}
+
+
+# Create data processing pipeline
+
+At the last step, we will create a bagging classifier by using the **Pipeline builder** tool. Bagging or Bootstrap Aggregating is a widely used ensemble learning algorithm in machine learning. The bagging algorithm creates multiple models from randomly taken subsets of the training dataset and then aggregates learners to build overall stronger classifiers that combine the predictions to produce a final prediction. The **Pipeline builder** tool builds the classifier and returns a zipped file.
+
+> ### {% icon hands_on %} Hands-on: Create pipeline
+>
+> 1. **Pipeline builder** {% icon tool %} with the following parameters:
+> - In *"Final Estimator"*:
+> - *"Choose the module that contains target estimator"*: `sklearn.ensemble`
+> - *"Choose estimator class"*: `BaggingClassifier`
+> - In *"Output parameters for searchCV?"*: `Yes`
+>
+> We choose `Final Estimator` as we have only the estimator and no preprocessor and need the parameters of only the estimator.
+>
+{: .hands_on}
+
+
+## Extract hyperparameters
+
+We use the **Estimator attributes** tool to get a list of different hyperparameters of the estimator. This tool creates a tabular file with a list of all the different hyperparameters of the preprocessors and estimators. This tabular file will be used in the **Hyperparameter search** tool to populate the list of hyperparameters with their respective (default) values.
+
+> ### {% icon hands_on %} Hands-on: Estimator attributes
+>
+> 1. **Estimator attributes** {% icon tool %} with the following parameters:
+> - {% icon param-files %} *"Choose the dataset containing estimator/pipeline object"*: `final estimator builder` file (output of **Pipeline builder** {% icon tool %})
+> - *"Select an attribute retrieval type"*: `Estimator - get_params()`
+>
+{: .hands_on}
+
+## Search for the best values of hyperparameters
+
+After extracting the parameter names from the **Pipeline builder** file, we will use the **Hyperparameter search** tool to find the best values for each hyperparameter. These values will lead us to create the best model based on the search space chosen for each hyperparameter. We use only one parameter `n_estimators` of `BaggingClassifier` for this task. This parameter specifies the number of bagging stages the learning process has to go through. The default value of `n_estimators` for this regressor is `10`. However, we are not sure if this gives the best accuracy. Therefore, it is important to set this parameter to different values to find the optimal one. We choose some values which are less than `10` and a few which are more than `10`. The hyperparameter search will look for the optimal number of estimators and gives the best-trained model as one of the outputs. This model is used in the next step to classify the test dataset.
+
+> ### {% icon hands_on %} Hands-on: Hyperparameter search
+>
+> 1. **Hyperparameter search** {% icon tool %} with the following parameters:
+> - *"Select a model selection search scheme"*: `GridSearchCV - Exhaustive search over specified parameter values for an estimator `
+> - {% icon param-files %} *"Choose the dataset containing pipeline/estimator object"*: `zipped` file (output of **Pipeline builder** {% icon tool %})
+> - {% icon param-files %} *"Is the estimator a deep learning model?"*: `NO` {% icon tool %})
+> - In *"Search parameters Builder"*:
+> - {% icon param-files %} *"Choose the dataset containing parameter names"*: `tabular` file (output of **Estimator attributes** {% icon tool %})
+> - In *"Parameter settings for search"*:
+> - {% icon param-repeat %} *"1: Parameter settings for search"*
+> - *"Choose a parameter name (with current value)"*: `n_estimators: 10`
+> - *"Search list"*: `[5,10,20,50]`
+> - *"Select input type"*: `tabular data`
+> - {% icon param-files %} *"Training samples dataset"*: `train_rows` tabular file
+> - *"Does the dataset contain header"*: `Yes`
+> - *"Choose how to select data by column"*: `All columns BUT by column header name(s)`
+> - *"Type header name(s)"*: `Class`
+> - {% icon param-files %} *"Dataset containing class labels or target values"*: `train_rows` tabular file
+> - *"Does the dataset contain header"*: `Yes`
+> - *"Choose how to select data by column"*: `Select columns by column header name(s)`
+> - *"Type header name(s)"*: `Class`
+> - *"Whether to hold a portion of samples for test exclusively?"*: `Nope`
+> - *"Save best estimator?"*: `Fitted best estimator or Detailed cv_results_from nested CV`
+>
+{: .hands_on}
+
+> ### {% icon question %} Question
+>
+> What is the optimal number of estimators for the given dataset?
+>
+> Hint: Please look at the `mean_test_score` column in the tabular result from the **Hyperparameter search** tool.
+>
+> > ### {% icon solution %} Solution
+> >
+> > 20 - even though the default value of the number of estimators for Bagging Classifier is `10`, `20` gives the best accuracy. That's why it is important to perform hyperparameter search to tune these parameters for any dataset.
+> >
+> {: .solution}
+>
+{: .question}
+
+Using the **Hyperparameter search** tool, we found the best model, based on the training data. Now, we will predict age in the test dataset using this model.
+
+> ### {% icon hands_on %} Hands-on: Predict age
+>
+> 1. **Ensemble methods for classification and regression** {% icon tool %} with the following parameters:
+> - *"Select a Classification Task"*: `Load a model and predict`
+> - {% icon param-files %} *"Models"*: `zipped` file (output of **Hyperparameter search** {% icon tool %})
+> - {% icon param-files %} *"Data (tabular)"*: `test_rows` tabular file
+> - *"Does the dataset contain header"*: `Yes`
+>
+{: .hands_on}
+
+
+Now we will verify the performance by creating and inspecting the plots:
+
+
+![confusion_matrix](images/confusion_matrix_bagging.png "Confusion matrix for the bagging classifier.")
+
+![prf1_scores](images/precision_recall_bagging.png "Precision, recall and F1 score for the bagging classifier.")
+
+![roc_scores](images/roc_bagging.png "Residual plot between residual (predicted - true) and predicted targets. The plot shows a random pattern of points.")
+
+
+Figure 13 shows that we again achieved an AUC value of `1.00`, which shows that our model is highly effective at predicting whether or not a molecule is biodegradable.
+
+
+# Conclusion
+By following these steps, we learned how to build classifiers and visualize the classification results using Galaxy's machine learning and plotting tools. The features of the training dataset are mapped to the classes. This mapping is used to make predictions on an unseen (test) dataset. The quality of classifiers is visualized using a plotting tool.
+
+There are multiple other classification algorithms, a few are simpler to use (with fewer parameters) and some are more powerful, which can be tried out on this dataset and on other datasets as well. Different datasets can also be analyzed using these classifiers. The classifiers have many parameters which can be altered while performing the analyses to see if they affect the classification accuracy. It may be beneficial to perform a hyperparameter search to tune these parameters for different datasets. In addition, we learned the relevance of machine algorithms for QSAR analyses and constructed a model which successfully predicted an important chemical property - the biodegradability of a substance.
diff --git a/topics/statistics/tutorials/classification_machinelearning/workflows/index.md b/topics/statistics/tutorials/classification_machinelearning/workflows/index.md
new file mode 100644
index 00000000000000..e092e0ae66ddd4
--- /dev/null
+++ b/topics/statistics/tutorials/classification_machinelearning/workflows/index.md
@@ -0,0 +1,3 @@
+---
+layout: workflow-list
+---
diff --git a/topics/statistics/tutorials/classification_machinelearning/workflows/ml_classification.ga b/topics/statistics/tutorials/classification_machinelearning/workflows/ml_classification.ga
new file mode 100644
index 00000000000000..2c72e3346557a8
--- /dev/null
+++ b/topics/statistics/tutorials/classification_machinelearning/workflows/ml_classification.ga
@@ -0,0 +1,903 @@
+{
+ "a_galaxy_workflow": "true",
+ "annotation": "Classification in Machine Learning",
+ "tags":[
+ "statistics",
+ "classification",
+ "ml",
+ "cheminformatics"
+ ],
+ "format-version": "0.1",
+ "name": "ml_classification",
+ "steps": {
+ "0": {
+ "annotation": "",
+ "content_id": null,
+ "errors": null,
+ "id": 0,
+ "input_connections": {},
+ "inputs": [
+ {
+ "description": "",
+ "name": "train_rows.csv"
+ }
+ ],
+ "label": "train_rows.csv",
+ "name": "Input dataset",
+ "outputs": [],
+ "position": {
+ "left": 200,
+ "top": 200
+ },
+ "tool_id": null,
+ "tool_state": "{\"name\": \"train_rows.csv\"}",
+ "tool_version": null,
+ "type": "data_input",
+ "uuid": "2b808085-c925-4327-8310-e45702344b59",
+ "workflow_outputs": []
+ },
+ "1": {
+ "annotation": "",
+ "content_id": null,
+ "errors": null,
+ "id": 1,
+ "input_connections": {},
+ "inputs": [
+ {
+ "description": "",
+ "name": "test_rows_labels.csv"
+ }
+ ],
+ "label": "test_rows_labels.csv",
+ "name": "Input dataset",
+ "outputs": [],
+ "position": {
+ "left": 200,
+ "top": 320
+ },
+ "tool_id": null,
+ "tool_state": "{\"name\": \"test_rows_labels.csv\"}",
+ "tool_version": null,
+ "type": "data_input",
+ "uuid": "b04a2c8d-d019-49cd-8ef3-f2565cc9a2d5",
+ "workflow_outputs": []
+ },
+ "10": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/bgruening/sklearn_generalized_linear/sklearn_generalized_linear/1.0.8.2",
+ "errors": null,
+ "id": 10,
+ "input_connections": {
+ "selected_tasks|infile_data": {
+ "id": 2,
+ "output_name": "output"
+ },
+ "selected_tasks|infile_model": {
+ "id": 4,
+ "output_name": "outfile_fit"
+ }
+ },
+ "inputs": [],
+ "label": null,
+ "name": "Generalized linear models",
+ "outputs": [
+ {
+ "name": "outfile_predict",
+ "type": "tabular"
+ }
+ ],
+ "position": {
+ "left": 640,
+ "top": 200
+ },
+ "post_job_actions": {},
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/bgruening/sklearn_generalized_linear/sklearn_generalized_linear/1.0.8.2",
+ "tool_shed_repository": {
+ "changeset_revision": "3e4a7684d402",
+ "name": "sklearn_generalized_linear",
+ "owner": "bgruening",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"__page__\": null, \"selected_tasks\": \"{\\\"__current_case__\\\": 0, \\\"header\\\": \\\"true\\\", \\\"infile_data\\\": {\\\"__class__\\\": \\\"ConnectedValue\\\"}, \\\"infile_model\\\": {\\\"__class__\\\": \\\"ConnectedValue\\\"}, \\\"prediction_options\\\": {\\\"__current_case__\\\": 0, \\\"prediction_option\\\": \\\"predict\\\"}, \\\"selected_task\\\": \\\"load\\\"}\", \"chromInfo\": \"\\\"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\\\"\", \"__rerun_remap_job_id__\": null}",
+ "tool_version": "1.0.8.2",
+ "type": "tool",
+ "uuid": "4835c73b-4115-4b33-b9c0-423c59d7c84a",
+ "workflow_outputs": []
+ },
+ "11": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/bgruening/sklearn_nn_classifier/sklearn_nn_classifier/1.0.8.2",
+ "errors": null,
+ "id": 11,
+ "input_connections": {
+ "selected_tasks|infile_data": {
+ "id": 2,
+ "output_name": "output"
+ },
+ "selected_tasks|infile_model": {
+ "id": 5,
+ "output_name": "outfile_fit"
+ }
+ },
+ "inputs": [],
+ "label": null,
+ "name": "Nearest Neighbors Classification",
+ "outputs": [
+ {
+ "name": "outfile_predict",
+ "type": "tabular"
+ }
+ ],
+ "position": {
+ "left": 640,
+ "top": 320
+ },
+ "post_job_actions": {},
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/bgruening/sklearn_nn_classifier/sklearn_nn_classifier/1.0.8.2",
+ "tool_shed_repository": {
+ "changeset_revision": "39ae3c043096",
+ "name": "sklearn_nn_classifier",
+ "owner": "bgruening",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"__page__\": null, \"selected_tasks\": \"{\\\"__current_case__\\\": 0, \\\"header\\\": \\\"true\\\", \\\"infile_data\\\": {\\\"__class__\\\": \\\"ConnectedValue\\\"}, \\\"infile_model\\\": {\\\"__class__\\\": \\\"ConnectedValue\\\"}, \\\"prediction_options\\\": {\\\"__current_case__\\\": 0, \\\"prediction_option\\\": \\\"predict\\\"}, \\\"selected_task\\\": \\\"load\\\"}\", \"chromInfo\": \"\\\"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\\\"\", \"__rerun_remap_job_id__\": null}",
+ "tool_version": "1.0.8.2",
+ "type": "tool",
+ "uuid": "1ed20e3a-e850-4e29-abf9-767ee22077df",
+ "workflow_outputs": []
+ },
+ "12": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/bgruening/sklearn_svm_classifier/sklearn_svm_classifier/1.0.8.2",
+ "errors": null,
+ "id": 12,
+ "input_connections": {
+ "selected_tasks|infile_data": {
+ "id": 2,
+ "output_name": "output"
+ },
+ "selected_tasks|infile_model": {
+ "id": 6,
+ "output_name": "outfile_fit"
+ }
+ },
+ "inputs": [],
+ "label": null,
+ "name": "Support vector machines (SVMs)",
+ "outputs": [
+ {
+ "name": "outfile_predict",
+ "type": "tabular"
+ }
+ ],
+ "position": {
+ "left": 640,
+ "top": 440
+ },
+ "post_job_actions": {},
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/bgruening/sklearn_svm_classifier/sklearn_svm_classifier/1.0.8.2",
+ "tool_shed_repository": {
+ "changeset_revision": "d2afc87db26b",
+ "name": "sklearn_svm_classifier",
+ "owner": "bgruening",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"__page__\": null, \"selected_tasks\": \"{\\\"__current_case__\\\": 0, \\\"header\\\": \\\"true\\\", \\\"infile_data\\\": {\\\"__class__\\\": \\\"ConnectedValue\\\"}, \\\"infile_model\\\": {\\\"__class__\\\": \\\"ConnectedValue\\\"}, \\\"prediction_options\\\": {\\\"__current_case__\\\": 0, \\\"prediction_option\\\": \\\"predict\\\"}, \\\"selected_task\\\": \\\"load\\\"}\", \"chromInfo\": \"\\\"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\\\"\", \"__rerun_remap_job_id__\": null}",
+ "tool_version": "1.0.8.2",
+ "type": "tool",
+ "uuid": "ca471e7c-00f3-4571-84a8-966163604716",
+ "workflow_outputs": []
+ },
+ "13": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/bgruening/sklearn_ensemble/sklearn_ensemble/1.0.8.2",
+ "errors": null,
+ "id": 13,
+ "input_connections": {
+ "selected_tasks|infile_data": {
+ "id": 2,
+ "output_name": "output"
+ },
+ "selected_tasks|infile_model": {
+ "id": 7,
+ "output_name": "outfile_fit"
+ }
+ },
+ "inputs": [],
+ "label": null,
+ "name": "Ensemble methods",
+ "outputs": [
+ {
+ "name": "outfile_predict",
+ "type": "tabular"
+ }
+ ],
+ "position": {
+ "left": 640,
+ "top": 560
+ },
+ "post_job_actions": {},
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/bgruening/sklearn_ensemble/sklearn_ensemble/1.0.8.2",
+ "tool_shed_repository": {
+ "changeset_revision": "3ab7af14f1b5",
+ "name": "sklearn_ensemble",
+ "owner": "bgruening",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"__page__\": null, \"selected_tasks\": \"{\\\"__current_case__\\\": 0, \\\"header\\\": \\\"true\\\", \\\"infile_data\\\": {\\\"__class__\\\": \\\"ConnectedValue\\\"}, \\\"infile_model\\\": {\\\"__class__\\\": \\\"ConnectedValue\\\"}, \\\"prediction_options\\\": {\\\"__current_case__\\\": 0, \\\"prediction_option\\\": \\\"predict\\\"}, \\\"selected_task\\\": \\\"load\\\"}\", \"chromInfo\": \"\\\"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\\\"\", \"__rerun_remap_job_id__\": null}",
+ "tool_version": "1.0.8.2",
+ "type": "tool",
+ "uuid": "c74df17c-0e1d-4c49-9114-01ba21dd59d9",
+ "workflow_outputs": []
+ },
+ "14": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/bgruening/sklearn_searchcv/sklearn_searchcv/1.0.8.2",
+ "errors": null,
+ "id": 14,
+ "input_connections": {
+ "input_options|infile1": {
+ "id": 0,
+ "output_name": "output"
+ },
+ "input_options|infile2": {
+ "id": 0,
+ "output_name": "output"
+ },
+ "search_schemes|infile_estimator": {
+ "id": 3,
+ "output_name": "outfile"
+ },
+ "search_schemes|search_params_builder|infile_params": {
+ "id": 9,
+ "output_name": "outfile"
+ }
+ },
+ "inputs": [],
+ "label": null,
+ "name": "Hyperparameter Search",
+ "outputs": [
+ {
+ "name": "outfile_result",
+ "type": "tabular"
+ },
+ {
+ "name": "outfile_object",
+ "type": "zip"
+ }
+ ],
+ "position": {
+ "left": 640,
+ "top": 680
+ },
+ "post_job_actions": {},
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/bgruening/sklearn_searchcv/sklearn_searchcv/1.0.8.2",
+ "tool_shed_repository": {
+ "changeset_revision": "1ae5dfd5ac17",
+ "name": "sklearn_searchcv",
+ "owner": "bgruening",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"__page__\": null, \"search_schemes\": \"{\\\"__current_case__\\\": 0, \\\"infile_estimator\\\": {\\\"__class__\\\": \\\"ConnectedValue\\\"}, \\\"is_deep_learning\\\": \\\"false\\\", \\\"options\\\": {\\\"cv_selector\\\": {\\\"__current_case__\\\": 0, \\\"n_splits\\\": \\\"3\\\", \\\"selected_cv\\\": \\\"default\\\"}, \\\"error_score\\\": \\\"true\\\", \\\"iid\\\": \\\"true\\\", \\\"return_train_score\\\": \\\"false\\\", \\\"scoring\\\": {\\\"__current_case__\\\": 0, \\\"primary_scoring\\\": \\\"default\\\"}, \\\"verbose\\\": \\\"0\\\"}, \\\"search_params_builder\\\": {\\\"infile_params\\\": {\\\"__class__\\\": \\\"ConnectedValue\\\"}, \\\"param_set\\\": [{\\\"__index__\\\": 0, \\\"sp_list\\\": \\\"[5,10,20,50]\\\", \\\"sp_name\\\": \\\"n_estimators\\\"}]}, \\\"selected_search_scheme\\\": \\\"GridSearchCV\\\"}\", \"__rerun_remap_job_id__\": null, \"save\": \"\\\"save_estimator\\\"\", \"chromInfo\": \"\\\"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\\\"\", \"input_options\": \"{\\\"__current_case__\\\": 0, \\\"column_selector_options_1\\\": {\\\"__current_case__\\\": 3, \\\"col1\\\": \\\"Class\\\", \\\"selected_column_selector_option\\\": \\\"all_but_by_header_name\\\"}, \\\"column_selector_options_2\\\": {\\\"__current_case__\\\": 2, \\\"col2\\\": \\\"Class\\\", \\\"selected_column_selector_option2\\\": \\\"by_header_name\\\"}, \\\"header1\\\": \\\"true\\\", \\\"header2\\\": \\\"true\\\", \\\"infile1\\\": {\\\"__class__\\\": \\\"ConnectedValue\\\"}, \\\"infile2\\\": {\\\"__class__\\\": \\\"ConnectedValue\\\"}, \\\"selected_input\\\": \\\"tabular\\\"}\", \"outer_split\": \"{\\\"__current_case__\\\": 0, \\\"split_mode\\\": \\\"no\\\"}\"}",
+ "tool_version": "1.0.8.2",
+ "type": "tool",
+ "uuid": "ea214a5b-d3f0-42ba-bc38-409c188126a7",
+ "workflow_outputs": []
+ },
+ "15": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/bgruening/plotly_ml_performance_plots/plotly_ml_performance_plots/0.2",
+ "errors": null,
+ "id": 15,
+ "input_connections": {
+ "infile_input": {
+ "id": 8,
+ "output_name": "out_file1"
+ },
+ "infile_output": {
+ "id": 10,
+ "output_name": "outfile_predict"
+ },
+ "infile_trained_model": {
+ "id": 4,
+ "output_name": "outfile_fit"
+ }
+ },
+ "inputs": [],
+ "label": null,
+ "name": "Plot confusion matrix, precision, recall and ROC and AUC curves",
+ "outputs": [
+ {
+ "name": "output_confusion",
+ "type": "html"
+ },
+ {
+ "name": "output_prf",
+ "type": "html"
+ },
+ {
+ "name": "output_roc",
+ "type": "html"
+ }
+ ],
+ "position": {
+ "left": 860,
+ "top": 200
+ },
+ "post_job_actions": {},
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/bgruening/plotly_ml_performance_plots/plotly_ml_performance_plots/0.2",
+ "tool_shed_repository": {
+ "changeset_revision": "62e3a4e8c54c",
+ "name": "plotly_ml_performance_plots",
+ "owner": "bgruening",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"__page__\": null, \"infile_trained_model\": \"{\\\"__class__\\\": \\\"ConnectedValue\\\"}\", \"__rerun_remap_job_id__\": null, \"infile_output\": \"{\\\"__class__\\\": \\\"ConnectedValue\\\"}\", \"infile_input\": \"{\\\"__class__\\\": \\\"ConnectedValue\\\"}\", \"chromInfo\": \"\\\"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\\\"\"}",
+ "tool_version": "0.2",
+ "type": "tool",
+ "uuid": "8ae35e8c-cbdb-4549-974b-ef506157119c",
+ "workflow_outputs": []
+ },
+ "16": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/bgruening/plotly_ml_performance_plots/plotly_ml_performance_plots/0.2",
+ "errors": null,
+ "id": 16,
+ "input_connections": {
+ "infile_input": {
+ "id": 1,
+ "output_name": "output"
+ },
+ "infile_output": {
+ "id": 11,
+ "output_name": "outfile_predict"
+ },
+ "infile_trained_model": {
+ "id": 5,
+ "output_name": "outfile_fit"
+ }
+ },
+ "inputs": [],
+ "label": null,
+ "name": "Plot confusion matrix, precision, recall and ROC and AUC curves",
+ "outputs": [
+ {
+ "name": "output_confusion",
+ "type": "html"
+ },
+ {
+ "name": "output_prf",
+ "type": "html"
+ },
+ {
+ "name": "output_roc",
+ "type": "html"
+ }
+ ],
+ "position": {
+ "left": 860,
+ "top": 320
+ },
+ "post_job_actions": {},
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/bgruening/plotly_ml_performance_plots/plotly_ml_performance_plots/0.2",
+ "tool_shed_repository": {
+ "changeset_revision": "62e3a4e8c54c",
+ "name": "plotly_ml_performance_plots",
+ "owner": "bgruening",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"__page__\": null, \"infile_trained_model\": \"{\\\"__class__\\\": \\\"ConnectedValue\\\"}\", \"__rerun_remap_job_id__\": null, \"infile_output\": \"{\\\"__class__\\\": \\\"ConnectedValue\\\"}\", \"infile_input\": \"{\\\"__class__\\\": \\\"ConnectedValue\\\"}\", \"chromInfo\": \"\\\"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\\\"\"}",
+ "tool_version": "0.2",
+ "type": "tool",
+ "uuid": "ffcdace4-4bbe-4612-a8fd-30dea3b2d242",
+ "workflow_outputs": []
+ },
+ "17": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/bgruening/plotly_ml_performance_plots/plotly_ml_performance_plots/0.2",
+ "errors": null,
+ "id": 17,
+ "input_connections": {
+ "infile_input": {
+ "id": 1,
+ "output_name": "output"
+ },
+ "infile_output": {
+ "id": 12,
+ "output_name": "outfile_predict"
+ },
+ "infile_trained_model": {
+ "id": 6,
+ "output_name": "outfile_fit"
+ }
+ },
+ "inputs": [],
+ "label": null,
+ "name": "Plot confusion matrix, precision, recall and ROC and AUC curves",
+ "outputs": [
+ {
+ "name": "output_confusion",
+ "type": "html"
+ },
+ {
+ "name": "output_prf",
+ "type": "html"
+ },
+ {
+ "name": "output_roc",
+ "type": "html"
+ }
+ ],
+ "position": {
+ "left": 860,
+ "top": 440
+ },
+ "post_job_actions": {},
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/bgruening/plotly_ml_performance_plots/plotly_ml_performance_plots/0.2",
+ "tool_shed_repository": {
+ "changeset_revision": "62e3a4e8c54c",
+ "name": "plotly_ml_performance_plots",
+ "owner": "bgruening",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"__page__\": null, \"infile_trained_model\": \"{\\\"__class__\\\": \\\"ConnectedValue\\\"}\", \"__rerun_remap_job_id__\": null, \"infile_output\": \"{\\\"__class__\\\": \\\"ConnectedValue\\\"}\", \"infile_input\": \"{\\\"__class__\\\": \\\"ConnectedValue\\\"}\", \"chromInfo\": \"\\\"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\\\"\"}",
+ "tool_version": "0.2",
+ "type": "tool",
+ "uuid": "13a307c9-c589-49c6-98c0-5d4c4d6c18d2",
+ "workflow_outputs": []
+ },
+ "18": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/bgruening/plotly_ml_performance_plots/plotly_ml_performance_plots/0.2",
+ "errors": null,
+ "id": 18,
+ "input_connections": {
+ "infile_input": {
+ "id": 1,
+ "output_name": "output"
+ },
+ "infile_output": {
+ "id": 13,
+ "output_name": "outfile_predict"
+ },
+ "infile_trained_model": {
+ "id": 7,
+ "output_name": "outfile_fit"
+ }
+ },
+ "inputs": [],
+ "label": null,
+ "name": "Plot confusion matrix, precision, recall and ROC and AUC curves",
+ "outputs": [
+ {
+ "name": "output_confusion",
+ "type": "html"
+ },
+ {
+ "name": "output_prf",
+ "type": "html"
+ },
+ {
+ "name": "output_roc",
+ "type": "html"
+ }
+ ],
+ "position": {
+ "left": 860,
+ "top": 560
+ },
+ "post_job_actions": {},
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/bgruening/plotly_ml_performance_plots/plotly_ml_performance_plots/0.2",
+ "tool_shed_repository": {
+ "changeset_revision": "62e3a4e8c54c",
+ "name": "plotly_ml_performance_plots",
+ "owner": "bgruening",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"__page__\": null, \"infile_trained_model\": \"{\\\"__class__\\\": \\\"ConnectedValue\\\"}\", \"__rerun_remap_job_id__\": null, \"infile_output\": \"{\\\"__class__\\\": \\\"ConnectedValue\\\"}\", \"infile_input\": \"{\\\"__class__\\\": \\\"ConnectedValue\\\"}\", \"chromInfo\": \"\\\"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\\\"\"}",
+ "tool_version": "0.2",
+ "type": "tool",
+ "uuid": "d6df48d4-3a4c-41a4-b53b-c04533b16742",
+ "workflow_outputs": []
+ },
+ "19": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/bgruening/sklearn_ensemble/sklearn_ensemble/1.0.8.2",
+ "errors": null,
+ "id": 19,
+ "input_connections": {
+ "selected_tasks|infile_data": {
+ "id": 2,
+ "output_name": "output"
+ },
+ "selected_tasks|infile_model": {
+ "id": 14,
+ "output_name": "outfile_object"
+ }
+ },
+ "inputs": [],
+ "label": null,
+ "name": "Ensemble methods",
+ "outputs": [
+ {
+ "name": "outfile_predict",
+ "type": "tabular"
+ }
+ ],
+ "position": {
+ "left": 860,
+ "top": 680
+ },
+ "post_job_actions": {},
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/bgruening/sklearn_ensemble/sklearn_ensemble/1.0.8.2",
+ "tool_shed_repository": {
+ "changeset_revision": "3ab7af14f1b5",
+ "name": "sklearn_ensemble",
+ "owner": "bgruening",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"__page__\": null, \"selected_tasks\": \"{\\\"__current_case__\\\": 0, \\\"header\\\": \\\"true\\\", \\\"infile_data\\\": {\\\"__class__\\\": \\\"ConnectedValue\\\"}, \\\"infile_model\\\": {\\\"__class__\\\": \\\"ConnectedValue\\\"}, \\\"prediction_options\\\": {\\\"__current_case__\\\": 0, \\\"prediction_option\\\": \\\"predict\\\"}, \\\"selected_task\\\": \\\"load\\\"}\", \"chromInfo\": \"\\\"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\\\"\", \"__rerun_remap_job_id__\": null}",
+ "tool_version": "1.0.8.2",
+ "type": "tool",
+ "uuid": "aa1bed69-0ee6-4ade-b9b2-6d2ce89bb36e",
+ "workflow_outputs": []
+ },
+ "2": {
+ "annotation": "",
+ "content_id": null,
+ "errors": null,
+ "id": 2,
+ "input_connections": {},
+ "inputs": [
+ {
+ "description": "",
+ "name": "test_rows.csv"
+ }
+ ],
+ "label": "test_rows.csv",
+ "name": "Input dataset",
+ "outputs": [],
+ "position": {
+ "left": 200,
+ "top": 440
+ },
+ "tool_id": null,
+ "tool_state": "{\"name\": \"test_rows.csv\"}",
+ "tool_version": null,
+ "type": "data_input",
+ "uuid": "0c0a1583-9e8c-4382-af2b-feae2d1fc845",
+ "workflow_outputs": []
+ },
+ "20": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/bgruening/plotly_ml_performance_plots/plotly_ml_performance_plots/0.2",
+ "errors": null,
+ "id": 20,
+ "input_connections": {
+ "infile_input": {
+ "id": 1,
+ "output_name": "output"
+ },
+ "infile_output": {
+ "id": 19,
+ "output_name": "outfile_predict"
+ },
+ "infile_trained_model": {
+ "id": 14,
+ "output_name": "outfile_object"
+ }
+ },
+ "inputs": [],
+ "label": null,
+ "name": "Plot confusion matrix, precision, recall and ROC and AUC curves",
+ "outputs": [
+ {
+ "name": "output_confusion",
+ "type": "html"
+ },
+ {
+ "name": "output_prf",
+ "type": "html"
+ },
+ {
+ "name": "output_roc",
+ "type": "html"
+ }
+ ],
+ "position": {
+ "left": 1080,
+ "top": 200
+ },
+ "post_job_actions": {},
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/bgruening/plotly_ml_performance_plots/plotly_ml_performance_plots/0.2",
+ "tool_shed_repository": {
+ "changeset_revision": "62e3a4e8c54c",
+ "name": "plotly_ml_performance_plots",
+ "owner": "bgruening",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"__page__\": null, \"infile_trained_model\": \"{\\\"__class__\\\": \\\"ConnectedValue\\\"}\", \"__rerun_remap_job_id__\": null, \"infile_output\": \"{\\\"__class__\\\": \\\"ConnectedValue\\\"}\", \"infile_input\": \"{\\\"__class__\\\": \\\"ConnectedValue\\\"}\", \"chromInfo\": \"\\\"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\\\"\"}",
+ "tool_version": "0.2",
+ "type": "tool",
+ "uuid": "635ba9ca-1f96-44ff-970e-d2877b12292c",
+ "workflow_outputs": []
+ },
+ "3": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/bgruening/sklearn_build_pipeline/sklearn_build_pipeline/1.0.8.2",
+ "errors": null,
+ "id": 3,
+ "input_connections": {},
+ "inputs": [],
+ "label": null,
+ "name": "Pipeline Builder",
+ "outputs": [
+ {
+ "name": "outfile",
+ "type": "zip"
+ },
+ {
+ "name": "outfile_params",
+ "type": "tabular"
+ }
+ ],
+ "position": {
+ "left": 200,
+ "top": 560
+ },
+ "post_job_actions": {},
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/bgruening/sklearn_build_pipeline/sklearn_build_pipeline/1.0.8.2",
+ "tool_shed_repository": {
+ "changeset_revision": "97dce66fe158",
+ "name": "sklearn_build_pipeline",
+ "owner": "bgruening",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"__page__\": null, \"pipeline_component\": \"[{\\\"__index__\\\": 0, \\\"component_selector\\\": {\\\"__current_case__\\\": 0, \\\"component_type\\\": \\\"None\\\"}}]\", \"__rerun_remap_job_id__\": null, \"get_params\": \"\\\"true\\\"\", \"final_estimator\": \"{\\\"estimator_selector\\\": {\\\"__current_case__\\\": 2, \\\"selected_estimator\\\": \\\"BaggingClassifier\\\", \\\"selected_module\\\": \\\"ensemble\\\", \\\"text_params\\\": \\\"\\\"}}\", \"chromInfo\": \"\\\"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\\\"\"}",
+ "tool_version": "1.0.8.2",
+ "type": "tool",
+ "uuid": "c83ed09b-1377-4f27-a39e-21bc55c45054",
+ "workflow_outputs": []
+ },
+ "4": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/bgruening/sklearn_generalized_linear/sklearn_generalized_linear/1.0.8.2",
+ "errors": null,
+ "id": 4,
+ "input_connections": {
+ "selected_tasks|selected_algorithms|input_options|infile1": {
+ "id": 0,
+ "output_name": "output"
+ },
+ "selected_tasks|selected_algorithms|input_options|infile2": {
+ "id": 0,
+ "output_name": "output"
+ }
+ },
+ "inputs": [],
+ "label": null,
+ "name": "Generalized linear models",
+ "outputs": [
+ {
+ "name": "outfile_fit",
+ "type": "zip"
+ }
+ ],
+ "position": {
+ "left": 420,
+ "top": 200
+ },
+ "post_job_actions": {},
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/bgruening/sklearn_generalized_linear/sklearn_generalized_linear/1.0.8.2",
+ "tool_shed_repository": {
+ "changeset_revision": "3e4a7684d402",
+ "name": "sklearn_generalized_linear",
+ "owner": "bgruening",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"__page__\": null, \"selected_tasks\": \"{\\\"__current_case__\\\": 1, \\\"selected_algorithms\\\": {\\\"__current_case__\\\": 5, \\\"input_options\\\": {\\\"__current_case__\\\": 0, \\\"column_selector_options_1\\\": {\\\"__current_case__\\\": 3, \\\"col1\\\": \\\"Class\\\", \\\"selected_column_selector_option\\\": \\\"all_but_by_header_name\\\"}, \\\"column_selector_options_2\\\": {\\\"__current_case__\\\": 2, \\\"col2\\\": \\\"Class\\\", \\\"selected_column_selector_option2\\\": \\\"by_header_name\\\"}, \\\"header1\\\": \\\"true\\\", \\\"header2\\\": \\\"true\\\", \\\"infile1\\\": {\\\"__class__\\\": \\\"ConnectedValue\\\"}, \\\"infile2\\\": {\\\"__class__\\\": \\\"ConnectedValue\\\"}, \\\"selected_input\\\": \\\"tabular\\\"}, \\\"options\\\": {\\\"C\\\": \\\"1.0\\\", \\\"dual\\\": \\\"false\\\", \\\"fit_intercept\\\": \\\"true\\\", \\\"intercept_scaling\\\": \\\"1.0\\\", \\\"max_iter\\\": \\\"100\\\", \\\"multi_class\\\": \\\"ovr\\\", \\\"penalty\\\": \\\"l2\\\", \\\"random_state\\\": \\\"\\\", \\\"solver\\\": \\\"liblinear\\\", \\\"tol\\\": \\\"0.0001\\\", \\\"warm_start\\\": \\\"false\\\"}, \\\"selected_algorithm\\\": \\\"LogisticRegression\\\"}, \\\"selected_task\\\": \\\"train\\\"}\", \"chromInfo\": \"\\\"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\\\"\", \"__rerun_remap_job_id__\": null}",
+ "tool_version": "1.0.8.2",
+ "type": "tool",
+ "uuid": "e8b9c2b7-9512-42e2-b0dc-2e04ed0d4d4f",
+ "workflow_outputs": []
+ },
+ "5": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/bgruening/sklearn_nn_classifier/sklearn_nn_classifier/1.0.8.2",
+ "errors": null,
+ "id": 5,
+ "input_connections": {
+ "selected_tasks|selected_algorithms|input_options|infile1": {
+ "id": 0,
+ "output_name": "output"
+ },
+ "selected_tasks|selected_algorithms|input_options|infile2": {
+ "id": 0,
+ "output_name": "output"
+ }
+ },
+ "inputs": [],
+ "label": null,
+ "name": "Nearest Neighbors Classification",
+ "outputs": [
+ {
+ "name": "outfile_fit",
+ "type": "zip"
+ }
+ ],
+ "position": {
+ "left": 420,
+ "top": 440
+ },
+ "post_job_actions": {},
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/bgruening/sklearn_nn_classifier/sklearn_nn_classifier/1.0.8.2",
+ "tool_shed_repository": {
+ "changeset_revision": "39ae3c043096",
+ "name": "sklearn_nn_classifier",
+ "owner": "bgruening",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"__page__\": null, \"selected_tasks\": \"{\\\"__current_case__\\\": 1, \\\"selected_algorithms\\\": {\\\"__current_case__\\\": 0, \\\"input_options\\\": {\\\"__current_case__\\\": 0, \\\"column_selector_options_1\\\": {\\\"__current_case__\\\": 3, \\\"col1\\\": \\\"Class\\\", \\\"selected_column_selector_option\\\": \\\"all_but_by_header_name\\\"}, \\\"column_selector_options_2\\\": {\\\"__current_case__\\\": 2, \\\"col2\\\": \\\"Class\\\", \\\"selected_column_selector_option2\\\": \\\"by_header_name\\\"}, \\\"header1\\\": \\\"true\\\", \\\"header2\\\": \\\"true\\\", \\\"infile1\\\": {\\\"__class__\\\": \\\"ConnectedValue\\\"}, \\\"infile2\\\": {\\\"__class__\\\": \\\"ConnectedValue\\\"}, \\\"selected_input\\\": \\\"tabular\\\"}, \\\"sampling_methods\\\": {\\\"__current_case__\\\": 0, \\\"options\\\": {\\\"algorithm\\\": \\\"auto\\\", \\\"leaf_size\\\": \\\"30\\\", \\\"n_neighbors\\\": \\\"5\\\", \\\"weights\\\": \\\"uniform\\\"}, \\\"sampling_method\\\": \\\"KNeighborsClassifier\\\"}, \\\"selected_algorithm\\\": \\\"nneighbors\\\"}, \\\"selected_task\\\": \\\"train\\\"}\", \"chromInfo\": \"\\\"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\\\"\", \"__rerun_remap_job_id__\": null}",
+ "tool_version": "1.0.8.2",
+ "type": "tool",
+ "uuid": "1c441ec4-0f66-436b-a762-e92c646f3185",
+ "workflow_outputs": []
+ },
+ "6": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/bgruening/sklearn_svm_classifier/sklearn_svm_classifier/1.0.8.2",
+ "errors": null,
+ "id": 6,
+ "input_connections": {
+ "selected_tasks|selected_algorithms|input_options|infile1": {
+ "id": 0,
+ "output_name": "output"
+ },
+ "selected_tasks|selected_algorithms|input_options|infile2": {
+ "id": 0,
+ "output_name": "output"
+ }
+ },
+ "inputs": [],
+ "label": null,
+ "name": "Support vector machines (SVMs)",
+ "outputs": [
+ {
+ "name": "outfile_fit",
+ "type": "zip"
+ }
+ ],
+ "position": {
+ "left": 420,
+ "top": 560
+ },
+ "post_job_actions": {},
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/bgruening/sklearn_svm_classifier/sklearn_svm_classifier/1.0.8.2",
+ "tool_shed_repository": {
+ "changeset_revision": "d2afc87db26b",
+ "name": "sklearn_svm_classifier",
+ "owner": "bgruening",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"__page__\": null, \"selected_tasks\": \"{\\\"__current_case__\\\": 1, \\\"selected_algorithms\\\": {\\\"__current_case__\\\": 2, \\\"input_options\\\": {\\\"__current_case__\\\": 0, \\\"column_selector_options_1\\\": {\\\"__current_case__\\\": 3, \\\"col1\\\": \\\"Class\\\", \\\"selected_column_selector_option\\\": \\\"all_but_by_header_name\\\"}, \\\"column_selector_options_2\\\": {\\\"__current_case__\\\": 2, \\\"col2\\\": \\\"Class\\\", \\\"selected_column_selector_option2\\\": \\\"by_header_name\\\"}, \\\"header1\\\": \\\"true\\\", \\\"header2\\\": \\\"true\\\", \\\"infile1\\\": {\\\"__class__\\\": \\\"ConnectedValue\\\"}, \\\"infile2\\\": {\\\"__class__\\\": \\\"ConnectedValue\\\"}, \\\"selected_input\\\": \\\"tabular\\\"}, \\\"options\\\": {\\\"C\\\": \\\"1.0\\\", \\\"dual\\\": \\\"true\\\", \\\"fit_intercept\\\": \\\"true\\\", \\\"intercept_scaling\\\": \\\"1.0\\\", \\\"loss\\\": \\\"squared_hinge\\\", \\\"max_iter\\\": \\\"1000\\\", \\\"multi_class\\\": \\\"ovr\\\", \\\"penalty\\\": \\\"l2\\\", \\\"random_state\\\": \\\"\\\", \\\"tol\\\": \\\"0.001\\\"}, \\\"selected_algorithm\\\": \\\"LinearSVC\\\"}, \\\"selected_task\\\": \\\"train\\\"}\", \"chromInfo\": \"\\\"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\\\"\", \"__rerun_remap_job_id__\": null}",
+ "tool_version": "1.0.8.2",
+ "type": "tool",
+ "uuid": "c09be878-c47d-4af8-90a2-8f9f171f89c4",
+ "workflow_outputs": []
+ },
+ "7": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/bgruening/sklearn_ensemble/sklearn_ensemble/1.0.8.2",
+ "errors": null,
+ "id": 7,
+ "input_connections": {
+ "selected_tasks|selected_algorithms|input_options|infile1": {
+ "id": 0,
+ "output_name": "output"
+ },
+ "selected_tasks|selected_algorithms|input_options|infile2": {
+ "id": 0,
+ "output_name": "output"
+ }
+ },
+ "inputs": [],
+ "label": null,
+ "name": "Ensemble methods",
+ "outputs": [
+ {
+ "name": "outfile_fit",
+ "type": "zip"
+ }
+ ],
+ "position": {
+ "left": 420,
+ "top": 680
+ },
+ "post_job_actions": {},
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/bgruening/sklearn_ensemble/sklearn_ensemble/1.0.8.2",
+ "tool_shed_repository": {
+ "changeset_revision": "3ab7af14f1b5",
+ "name": "sklearn_ensemble",
+ "owner": "bgruening",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"__page__\": null, \"selected_tasks\": \"{\\\"__current_case__\\\": 1, \\\"selected_algorithms\\\": {\\\"__current_case__\\\": 0, \\\"input_options\\\": {\\\"__current_case__\\\": 0, \\\"column_selector_options_1\\\": {\\\"__current_case__\\\": 3, \\\"col1\\\": \\\"Class\\\", \\\"selected_column_selector_option\\\": \\\"all_but_by_header_name\\\"}, \\\"column_selector_options_2\\\": {\\\"__current_case__\\\": 2, \\\"col2\\\": \\\"Class\\\", \\\"selected_column_selector_option2\\\": \\\"by_header_name\\\"}, \\\"header1\\\": \\\"true\\\", \\\"header2\\\": \\\"true\\\", \\\"infile1\\\": {\\\"__class__\\\": \\\"ConnectedValue\\\"}, \\\"infile2\\\": {\\\"__class__\\\": \\\"ConnectedValue\\\"}, \\\"selected_input\\\": \\\"tabular\\\"}, \\\"options\\\": {\\\"bootstrap\\\": \\\"true\\\", \\\"criterion\\\": \\\"gini\\\", \\\"max_depth\\\": \\\"\\\", \\\"max_leaf_nodes\\\": \\\"\\\", \\\"min_samples_leaf\\\": \\\"1\\\", \\\"min_samples_split\\\": \\\"2\\\", \\\"min_weight_fraction_leaf\\\": \\\"0.0\\\", \\\"n_estimators\\\": \\\"100\\\", \\\"oob_score\\\": \\\"false\\\", \\\"random_state\\\": \\\"\\\", \\\"select_max_features\\\": {\\\"__current_case__\\\": 0, \\\"max_features\\\": \\\"auto\\\"}, \\\"warm_start\\\": \\\"false\\\"}, \\\"selected_algorithm\\\": \\\"RandomForestClassifier\\\"}, \\\"selected_task\\\": \\\"train\\\"}\", \"chromInfo\": \"\\\"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\\\"\", \"__rerun_remap_job_id__\": null}",
+ "tool_version": "1.0.8.2",
+ "type": "tool",
+ "uuid": "9aad05c6-ee98-48fa-b6f7-9afb41bb59b2",
+ "workflow_outputs": []
+ },
+ "8": {
+ "annotation": "",
+ "content_id": "Remove beginning1",
+ "errors": null,
+ "id": 8,
+ "input_connections": {
+ "input": {
+ "id": 1,
+ "output_name": "output"
+ }
+ },
+ "inputs": [],
+ "label": null,
+ "name": "Remove beginning",
+ "outputs": [
+ {
+ "name": "out_file1",
+ "type": "input"
+ }
+ ],
+ "position": {
+ "left": 420,
+ "top": 320
+ },
+ "post_job_actions": {},
+ "tool_id": "Remove beginning1",
+ "tool_state": "{\"input\": \"{\\\"__class__\\\": \\\"ConnectedValue\\\"}\", \"__rerun_remap_job_id__\": null, \"chromInfo\": \"\\\"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\\\"\", \"num_lines\": \"\\\"1\\\"\", \"__page__\": null}",
+ "tool_version": "1.0.0",
+ "type": "tool",
+ "uuid": "a4e05203-d91f-416b-92fb-c93c63c10c7d",
+ "workflow_outputs": []
+ },
+ "9": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/bgruening/sklearn_estimator_attributes/sklearn_estimator_attributes/1.0.8.2",
+ "errors": null,
+ "id": 9,
+ "input_connections": {
+ "infile_object": {
+ "id": 3,
+ "output_name": "outfile"
+ }
+ },
+ "inputs": [],
+ "label": null,
+ "name": "Estimator attributes",
+ "outputs": [
+ {
+ "name": "outfile",
+ "type": "tabular"
+ }
+ ],
+ "position": {
+ "left": 420,
+ "top": 800
+ },
+ "post_job_actions": {},
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/bgruening/sklearn_estimator_attributes/sklearn_estimator_attributes/1.0.8.2",
+ "tool_shed_repository": {
+ "changeset_revision": "bd27a211182a",
+ "name": "sklearn_estimator_attributes",
+ "owner": "bgruening",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"attribute_type\": \"\\\"get_params\\\"\", \"__rerun_remap_job_id__\": null, \"chromInfo\": \"\\\"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\\\"\", \"infile_object\": \"{\\\"__class__\\\": \\\"ConnectedValue\\\"}\", \"__page__\": null}",
+ "tool_version": "1.0.8.2",
+ "type": "tool",
+ "uuid": "dfb666ff-c252-4eb1-a6d0-2fbbed72b3e0",
+ "workflow_outputs": []
+ }
+ },
+ "uuid": "8bd5d426-c45e-401a-8274-613f3708a1d3",
+ "version": 1
+}
diff --git a/topics/statistics/tutorials/classification_regression/workflows/classification_LSVC-test.yml b/topics/statistics/tutorials/classification_regression/workflows/classification_LSVC-test.yml
new file mode 100644
index 00000000000000..8fabf7846f4ebc
--- /dev/null
+++ b/topics/statistics/tutorials/classification_regression/workflows/classification_LSVC-test.yml
@@ -0,0 +1,28 @@
+---
+- doc: Test Linear SVC classifier workflow
+ job:
+ breast-w_train:
+ class: File
+ location: https://zenodo.org/record/3248907/files/breast-w_train.tsv
+ filetype: tabular
+ breast-w_test:
+ class: File
+ location: https://zenodo.org/record/3248907/files/breast-w_test.tsv
+ filetype: tabular
+ breast-w_targets:
+ class: File
+ location: https://zenodo.org/record/3248907/files/breast-w_targets.tsv
+ filetype: tabular
+ outputs:
+ output_roc:
+ asserts:
+ has_text:
+ text: ''
+ output_confusion:
+ asserts:
+ has_text:
+ text: ''
+ output_prf:
+ asserts:
+ has_text:
+ text: ''
diff --git a/topics/statistics/tutorials/classification_regression/workflows/classification_LSVC.ga b/topics/statistics/tutorials/classification_regression/workflows/classification_LSVC.ga
index 3e91c0fdcf7e52..c5ac4fd2a172f5 100644
--- a/topics/statistics/tutorials/classification_regression/workflows/classification_LSVC.ga
+++ b/topics/statistics/tutorials/classification_regression/workflows/classification_LSVC.ga
@@ -215,17 +215,17 @@
{
"output_name": "output_roc",
"uuid": "0fac82f3-c1b2-482f-a31f-30b623a0f223",
- "label": null
+ "label": "output_roc"
},
{
"output_name": "output_confusion",
"uuid": "431f3596-7b80-44f0-83ee-dda8c58e1e8c",
- "label": null
+ "label": "output_confusion"
},
{
"output_name": "output_prf",
"uuid": "ab62de11-fa69-4d59-9ebe-d5d0562f3c18",
- "label": null
+ "label": "output_prf"
}
],
"input_connections": {
diff --git a/topics/statistics/tutorials/classification_regression/workflows/regression_GradientBoosting-test.yml b/topics/statistics/tutorials/classification_regression/workflows/regression_GradientBoosting-test.yml
new file mode 100644
index 00000000000000..da2374725774a8
--- /dev/null
+++ b/topics/statistics/tutorials/classification_regression/workflows/regression_GradientBoosting-test.yml
@@ -0,0 +1,28 @@
+---
+- doc: Test Gradient Boosting regressor workflow
+ job:
+ body_fat_train:
+ class: File
+ location: https://zenodo.org/record/3248907/files/body_fat_train.tsv
+ filetype: tabular
+ body_fat_test:
+ class: File
+ location: https://zenodo.org/record/3248907/files/body_fat_test.tsv
+ filetype: tabular
+ body_fat_test_labels:
+ class: File
+ location: https://zenodo.org/record/3248907/files/body_fat_test_labels.tsv
+ filetype: tabular
+ outputs:
+ output_scatter_plot:
+ asserts:
+ has_text:
+ text: ''
+ output_actual_vs_pred:
+ asserts:
+ has_text:
+ text: ''
+ output_residual_plot:
+ asserts:
+ has_text:
+ text: ''
diff --git a/topics/statistics/tutorials/classification_regression/workflows/regression_GradientBoosting.ga b/topics/statistics/tutorials/classification_regression/workflows/regression_GradientBoosting.ga
index 3d6d20412e7659..342eeb6b079250 100644
--- a/topics/statistics/tutorials/classification_regression/workflows/regression_GradientBoosting.ga
+++ b/topics/statistics/tutorials/classification_regression/workflows/regression_GradientBoosting.ga
@@ -215,17 +215,17 @@
{
"output_name": "output_scatter_plot",
"uuid": "cf1a5f94-f79d-4d13-b142-c3e20e25b658",
- "label": null
+ "label": "output_scatter_plot"
},
{
"output_name": "output_actual_vs_pred",
"uuid": "e86ba3fd-3f85-4ba9-ad66-4c1e5caa875a",
- "label": null
+ "label": "output_actual_vs_pred"
},
{
"output_name": "output_residual_plot",
"uuid": "2cbc04d3-d2d9-4623-b63d-51f1b024d611",
- "label": null
+ "label": "output_residual_plot"
}
],
"input_connections": {
diff --git a/topics/statistics/tutorials/clustering_machinelearning/data-library.yaml b/topics/statistics/tutorials/clustering_machinelearning/data-library.yaml
new file mode 100644
index 00000000000000..81fb7aea9492bb
--- /dev/null
+++ b/topics/statistics/tutorials/clustering_machinelearning/data-library.yaml
@@ -0,0 +1,27 @@
+---
+destination:
+ type: library
+ name: GTN - Material
+ description: Galaxy Training Network Material
+ synopsis: Galaxy Training Network Material. See https://training.galaxyproject.org
+items:
+- name: Statistics and machine learning
+ description: Using clustering algorithms as an unsupervised machine learning methods using Galaxy tools
+ items:
+ - name: Clustering in Machine Learning
+ items:
+ - name: 'DOI: 10.5281/zenodo.3813447'
+ description: latest
+ items:
+ - url: https://zenodo.org/record/3813447/files/iris.csv
+ src: url
+ ext: csv
+ info: https://zenodo.org/record/3813447#.XrVJJHUzaV4
+ - url: https://zenodo.org/record/3813447/files/moon.csv
+ src: url
+ ext: csv
+ info: https://zenodo.org/record/3813447#.XrVJJHUzaV4
+ - url: https://zenodo.org/record/3813447/files/circles.csv
+ src: url
+ ext: csv
+ info: https://zenodo.org/record/3813447#.XrVJJHUzaV4
diff --git a/topics/statistics/tutorials/clustering_machinelearning/images/Hierarchical_clustering_1.png b/topics/statistics/tutorials/clustering_machinelearning/images/Hierarchical_clustering_1.png
new file mode 100644
index 00000000000000..f47fc6fb2ba5a1
Binary files /dev/null and b/topics/statistics/tutorials/clustering_machinelearning/images/Hierarchical_clustering_1.png differ
diff --git a/topics/statistics/tutorials/clustering_machinelearning/images/Hierarchical_clustering_2.png b/topics/statistics/tutorials/clustering_machinelearning/images/Hierarchical_clustering_2.png
new file mode 100644
index 00000000000000..9ed9c6d597f1ea
Binary files /dev/null and b/topics/statistics/tutorials/clustering_machinelearning/images/Hierarchical_clustering_2.png differ
diff --git a/topics/statistics/tutorials/clustering_machinelearning/images/Hierarchical_iris.png b/topics/statistics/tutorials/clustering_machinelearning/images/Hierarchical_iris.png
new file mode 100644
index 00000000000000..c0e628edc6eaf9
Binary files /dev/null and b/topics/statistics/tutorials/clustering_machinelearning/images/Hierarchical_iris.png differ
diff --git a/topics/statistics/tutorials/clustering_machinelearning/images/circles_clustering.png b/topics/statistics/tutorials/clustering_machinelearning/images/circles_clustering.png
new file mode 100644
index 00000000000000..ce37687b796346
Binary files /dev/null and b/topics/statistics/tutorials/clustering_machinelearning/images/circles_clustering.png differ
diff --git a/topics/statistics/tutorials/clustering_machinelearning/images/circles_moon_scatter.png b/topics/statistics/tutorials/clustering_machinelearning/images/circles_moon_scatter.png
new file mode 100644
index 00000000000000..00881faf220977
Binary files /dev/null and b/topics/statistics/tutorials/clustering_machinelearning/images/circles_moon_scatter.png differ
diff --git a/topics/statistics/tutorials/clustering_machinelearning/images/circles_scatter.png b/topics/statistics/tutorials/clustering_machinelearning/images/circles_scatter.png
new file mode 100644
index 00000000000000..fdf4fe9f556d28
Binary files /dev/null and b/topics/statistics/tutorials/clustering_machinelearning/images/circles_scatter.png differ
diff --git a/topics/statistics/tutorials/clustering_machinelearning/images/data_after_clustering.png b/topics/statistics/tutorials/clustering_machinelearning/images/data_after_clustering.png
new file mode 100644
index 00000000000000..db3ad843fc60e6
Binary files /dev/null and b/topics/statistics/tutorials/clustering_machinelearning/images/data_after_clustering.png differ
diff --git a/topics/statistics/tutorials/clustering_machinelearning/images/data_before_clustering.png b/topics/statistics/tutorials/clustering_machinelearning/images/data_before_clustering.png
new file mode 100644
index 00000000000000..825456ec32c4a7
Binary files /dev/null and b/topics/statistics/tutorials/clustering_machinelearning/images/data_before_clustering.png differ
diff --git a/topics/statistics/tutorials/clustering_machinelearning/images/dbscan_circles.png b/topics/statistics/tutorials/clustering_machinelearning/images/dbscan_circles.png
new file mode 100644
index 00000000000000..87150941450c10
Binary files /dev/null and b/topics/statistics/tutorials/clustering_machinelearning/images/dbscan_circles.png differ
diff --git a/topics/statistics/tutorials/clustering_machinelearning/images/dbscan_moon.png b/topics/statistics/tutorials/clustering_machinelearning/images/dbscan_moon.png
new file mode 100644
index 00000000000000..73f2aa0a6f42ec
Binary files /dev/null and b/topics/statistics/tutorials/clustering_machinelearning/images/dbscan_moon.png differ
diff --git a/topics/statistics/tutorials/clustering_machinelearning/images/dbscan_scatter.png b/topics/statistics/tutorials/clustering_machinelearning/images/dbscan_scatter.png
new file mode 100644
index 00000000000000..d1ae7ed3e47da0
Binary files /dev/null and b/topics/statistics/tutorials/clustering_machinelearning/images/dbscan_scatter.png differ
diff --git a/topics/statistics/tutorials/clustering_machinelearning/images/hierarchical_circles.png b/topics/statistics/tutorials/clustering_machinelearning/images/hierarchical_circles.png
new file mode 100644
index 00000000000000..69e27ec42cd8f0
Binary files /dev/null and b/topics/statistics/tutorials/clustering_machinelearning/images/hierarchical_circles.png differ
diff --git a/topics/statistics/tutorials/clustering_machinelearning/images/hierarchical_moon.png b/topics/statistics/tutorials/clustering_machinelearning/images/hierarchical_moon.png
new file mode 100644
index 00000000000000..518175a2a55132
Binary files /dev/null and b/topics/statistics/tutorials/clustering_machinelearning/images/hierarchical_moon.png differ
diff --git a/topics/statistics/tutorials/clustering_machinelearning/images/hierarchical_scatter.png b/topics/statistics/tutorials/clustering_machinelearning/images/hierarchical_scatter.png
new file mode 100644
index 00000000000000..bf285e94170835
Binary files /dev/null and b/topics/statistics/tutorials/clustering_machinelearning/images/hierarchical_scatter.png differ
diff --git a/topics/statistics/tutorials/clustering_machinelearning/images/iris_flowers.png b/topics/statistics/tutorials/clustering_machinelearning/images/iris_flowers.png
new file mode 100644
index 00000000000000..db18d025167247
Binary files /dev/null and b/topics/statistics/tutorials/clustering_machinelearning/images/iris_flowers.png differ
diff --git a/topics/statistics/tutorials/clustering_machinelearning/images/k_means_circles.png b/topics/statistics/tutorials/clustering_machinelearning/images/k_means_circles.png
new file mode 100644
index 00000000000000..e7a198695c6c8d
Binary files /dev/null and b/topics/statistics/tutorials/clustering_machinelearning/images/k_means_circles.png differ
diff --git a/topics/statistics/tutorials/clustering_machinelearning/images/k_means_moon.png b/topics/statistics/tutorials/clustering_machinelearning/images/k_means_moon.png
new file mode 100644
index 00000000000000..bc103ffba76a66
Binary files /dev/null and b/topics/statistics/tutorials/clustering_machinelearning/images/k_means_moon.png differ
diff --git a/topics/statistics/tutorials/clustering_machinelearning/images/k_means_scatter.png b/topics/statistics/tutorials/clustering_machinelearning/images/k_means_scatter.png
new file mode 100644
index 00000000000000..3293eeabd7a099
Binary files /dev/null and b/topics/statistics/tutorials/clustering_machinelearning/images/k_means_scatter.png differ
diff --git a/topics/statistics/tutorials/clustering_machinelearning/images/moon_clustering.png b/topics/statistics/tutorials/clustering_machinelearning/images/moon_clustering.png
new file mode 100644
index 00000000000000..62bf77ba695a8e
Binary files /dev/null and b/topics/statistics/tutorials/clustering_machinelearning/images/moon_clustering.png differ
diff --git a/topics/statistics/tutorials/clustering_machinelearning/images/moon_scatter.png b/topics/statistics/tutorials/clustering_machinelearning/images/moon_scatter.png
new file mode 100644
index 00000000000000..69ea5585597714
Binary files /dev/null and b/topics/statistics/tutorials/clustering_machinelearning/images/moon_scatter.png differ
diff --git a/topics/statistics/tutorials/clustering_machinelearning/images/number_of_clusters.png b/topics/statistics/tutorials/clustering_machinelearning/images/number_of_clusters.png
new file mode 100644
index 00000000000000..6c1ac389a11c98
Binary files /dev/null and b/topics/statistics/tutorials/clustering_machinelearning/images/number_of_clusters.png differ
diff --git a/topics/statistics/tutorials/clustering_machinelearning/images/raceid_distance.svg b/topics/statistics/tutorials/clustering_machinelearning/images/raceid_distance.svg
new file mode 100644
index 00000000000000..b7a91922561aba
--- /dev/null
+++ b/topics/statistics/tutorials/clustering_machinelearning/images/raceid_distance.svg
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/topics/statistics/tutorials/clustering_machinelearning/tutorial.md b/topics/statistics/tutorials/clustering_machinelearning/tutorial.md
new file mode 100644
index 00000000000000..a36ecae980f5ed
--- /dev/null
+++ b/topics/statistics/tutorials/clustering_machinelearning/tutorial.md
@@ -0,0 +1,484 @@
+---
+layout: tutorial_hands_on
+
+title: 'Clustering in Machine Learning'
+zenodo_link: https://zenodo.org/record/3813447
+questions:
+- How to use clustering algorithms to categorized data in different clusters
+objectives:
+- Learn clustering background
+- Learn hierarchical clustering algorithm
+- Learn k-means clustering algorithm
+- Learn DBSCAN clustering algorithm
+- Apply clustering algorithms to different data sets
+- Learn how to visualize the results
+key_points:
+- Using clustering methods, category and cluster of the data are learned using hierarchical, k-means and DBSCAN
+- For each clustering algorithm, number of clusters and the algorithm parameters should be optimised based on the dataset
+time_estimation: 2H
+contributors:
+- khanteymoori
+- anuprulez
+---
+
+# Introduction
+{:.no_toc}
+
+The goal of unsupervised learning is to discover hidden structure or patterns in unlabeled training data. In this tutorial we will discuss an unsupervised learning task called clustering.
+Clustering is the grouping of specific objects based on their characteristics and their similarities. These groups are called clusters. A cluster consists of data object with high inter similarity and low intra similarity. It means members of the same cluster, are more similar to each other by a given metric than they are to the members of the other clusters. The goal of clustering is to subdivide a set of items in such a way that similar items fall into the same cluster, whereas dissimilar items fall in different clusters. This brings up two questions: first, how do we decide what is similar; and second, how do we use this to cluster the items?
+Clustering is central to many bioinformatics research. In particular, clustering helps at analyzing unstructured and high-dimensional data in the form of sequences, expressions, texts and images. For example clustering of gene expressions provides understanding gene functions, cellular processes, subtypes of cells and gene regulations.
+
+For example, in biology, clustering is often one of the first steps in gene expression analysis and is used to find groups of genes with similar expression patterns.
+
+We represent an observation as an n-dimensional vector. For example, assume that your training data consists of the samples plotted in the following Figure 1:
+
+> ![data](images/data_before_clustering.png "Sample data before clustering")
+
+Clustering reveal the following three groups, indicated by different colors:
+
+> ![data](images/data_after_clustering.png "Sample data after clustering")
+
+
+Broadly speaking, clustering can be divided into two subgroups :
+
+- Hard Clustering: Each data point either belongs to a cluster completely or not.
+
+- Soft Clustering: Instead of putting each data point into a separate cluster, a probability or likelihood of that data point to be in those clusters is assigned.
+
+The goal of clustering is to determine the internal grouping in a set of unlabeled data. But how to decide what constitutes a good clustering? It can be shown that there is no absolute best criterion which would be independent of the final aim of the clustering. Consequently, the users should apply an appropriate criterion based on the problem.
+
+
+
+> ### Agenda
+>
+> In this tutorial, we will cover:
+>
+> 1. TOC
+> {:toc}
+>
+{: .agenda}
+
+
+# Types of clustering algorithms
+
+Since clustering is a subjective task, there are many algorithms for data clustering. Every method follows a different set of rules for defining similarity. There are many clustering algorithms known. But few of the algorithms are used popularly and we can categorized them as follows:
+
+ - Connectivity models: As the name suggests, these models are based on the notion that the data points closer in data space exhibit more similarity to each other than the data points lying farther away. These models can follow two approaches. In the first approach, they start with classifying all data points into separate clusters and then aggregating them as the distance decreases. In the second approach, all data points are classified as a single cluster and then partitioned as the distance increases. Also, the choice of distance function is subjective. These models are very easy to interpret but lacks scalability for handling big datasets. Examples of these models are hierarchical clustering algorithm and its variants.
+
+ - Centroid models: These are iterative clustering algorithms in which the notion of similarity is derived by the closeness of a data point to the centroid of the clusters. K-means clustering algorithm is a popular algorithm that falls into this category. In these models, the number of clusters required at the end have to be mentioned beforehand, which makes it important to have prior knowledge of the dataset. These models run iteratively to find the local optima.
+
+ - Density Models: These models search the data space for areas of varied density of data points in the data space. It isolates various different density regions and assign the data points within these regions in the same cluster. Popular example of density models is DBSCAN.
+
+ - Distribution models: These clustering algorithms are based on the notion of how probable it is that all data points in the cluster belong to the same distribution, for example normal, Gaussian. These models often suffer from overfitting. A popular example of these models is expectation-maximization algorithm which uses multivariate normal distributions.
+
+
+Now, in this tutorial, we will be taking you through three of the most popular clustering algorithms in detail, hierarchical clustering, k-means and DBSCAN and a comparison between these methods.
+
+ In the following, We will discuss these clustering algorithm and their parameters and how to apply them to categorize iris data and some other data sets.
+
+
+# Clustering Distance Measures
+
+Since clustering is the grouping of similar objects, some sort of measure that can determine whether two objects are similar or dissimilar is required. There are two main type of measures used to estimate this relation: distance measures and similarity measures. The notions of distance and similarity are related, since the smaller the distance between two objects, the more similar they are to each other. All measures refer to the feature values in some way, but they consider different properties of the feature vector. There is no optimal similarity measure, since the usage depends on the task.
+
+Many clustering algorithms use distance measures to determine the similarity or dissimilarity between any pair of objects. A valid distance measure should be symmetric and obtains its minimum value (usually zero) in case of identical vectors.
+The clustering requires some methods for computing the distance or the (dis)similarity between each pair of observations. The result of this computation is known as a dissimilarity or distance matrix.
+
+The choice of distance measures is a critical step in clustering. It defines how the similarity of two elements (x, y) is calculated and it will influence the shape of the
+clusters. The classical distance measures are Euclidean and Manhattan distances and for most common clustering tools, the default distance measure is Euclidean. If Euclidean distance is chosen, then observations with high values of features will be clustered together. The same holds true for observations with low values of features. In Figure 3, we are trying to group the cells using Euclidean distance and this distance matrix.
+
+![Distances](images/raceid_distance.svg "Euclidean distance between three points (R, P, V) across three features (G1, G2, G3)")
+
+
+ > ### {% icon question %} Questions
+ >
+ > 1. Why are there zeroes along the diagonal of the above example distance matrix?
+ > 1. Is there any symmetry in this matrix?
+ >
+ > > ### {% icon solution %} Solution
+ > >
+ > > 1. The distance between a point to itself is zero.
+ > > 1. The distance between point *a* to point *b* is the same as the distance between point *b* to point *a* using the Euclidean distance metric.
+ > >
+ > {: .solution }
+ >
+ {: .question }
+
+Other dissimilarity measures exist such as correlation-based distances, which are widely used for gene expression data analyses. Correlation-based distance considers two objects to be similar if their features are highly correlated, even though the observed values may be far apart in terms of Euclidean distance. The distance between two objects is 0 when they are perfectly correlated. Pearson’s correlation is quite sensitive to outliers. This does not matter
+when clustering samples, because the correlation is over thousands of genes. During clustering genes, it is important to be aware of the possible impact of outliers. This
+can be mitigated by using Spearman’s correlation instead of Pearson’s correlation.
+
+# Hierarchical Clustering
+
+Before seeing hierarchical clustering in action, let us first understand the theory behind the hierarchical clustering. Hierarchical clustering, as the name suggests is an algorithm that builds hierarchy of clusters. This algorithm starts with all the data points assigned to a cluster of their own. Then two nearest clusters are merged into the same cluster. In the end, this algorithm terminates when there is only a single cluster left.
+
+Following are the steps that are performed during hierarchical clustering:
+
+1. In the beginning, every data point in the dataset is treated as a cluster which means that we have N clusters at the beginning of the algorithm.
+
+2. The distance between all the points is calculated and two points closest to each other are joined together to a form a cluster.
+
+3. Next, the point which is closest to the cluster formed in step 2, will be joined to the cluster.
+
+4. Steps 2 and 3 are repeated until one big cluster is formed.
+
+5. Finally, the big cluster is divided into K small clusters with the help of dendrograms.
+
+Let’s now see how dendrograms help in hierarchical clustering.
+
+> ![data](images/Hierarchical_clustering_1.png "Hierarchical Clustering")
+
+At the bottom, we start with data points, each assigned to separate clusters. Two closest clusters are then merged till we have just one cluster at the top. The height in the dendrogram at which two clusters are merged represents the distance between two clusters in the data space.
+
+The decision of the number of clusters that can best depict different groups can be chosen by observing the dendrogram. The best choice of the number of clusters is the number of vertical lines in the dendrogram cut by a horizontal line that can transverse the maximum distance vertically without intersecting a cluster.
+
+In the above example, the best choice of number of clusters will be 4 as the red horizontal line in the dendrogram below covers maximum vertical distance AB.
+> ![data](images/Hierarchical_clustering_2.png "Hierarchical Clustering")
+
+
+This algorithm has been implemented above using bottom up approach. It is also possible to follow top-down approach starting with all data points assigned in the same cluster and recursively performing splits till each data point is assigned a separate cluster.
+The decision of merging two clusters is taken on the basis of closeness of these clusters.
+
+
+Now we will apply hierarchical clustering to Iris data set, and learn to measure its performance. At the first step, we should upload the Iris data set.
+
+> ### {% icon comment %} Background
+> The Iris flower data set or Fisher’s Iris data set is a multivariate dataset introduced by the British statistician and biologist Ronald Fisher in his 1936 paper ({% cite Fisher1936 %}).
+> Each row of the table represents an iris flower, including its species and dimensions of its botanical parts, sepal and petal, in centimeters.
+> For more history of this dataset read here [Wikipedia](https://en.wikipedia.org/wiki/Iris_flower_data_set).
+{: .comment}
+
+
+> ### {% icon hands_on %} Hands-on: Data upload
+>
+> 1. **Import** {% icon galaxy-upload %} the file `iris.csv` from [Zenodo](https://zenodo.org/record/3813447/files/iris.csv) or from the data library
+>
+> ```
+> https://zenodo.org/record/3813447/files/iris.csv
+> ```
+>
+> {% include snippets/import_via_link.md %}
+> {% include snippets/import_from_data_library.md %}
+>
+>
+> 2. **Rename** {% icon galaxy-pencil %} the dataset to `iris`
+>
+> {% include snippets/rename_dataset.md %}
+>
+> 3. Check the **datatype**
+> - Click on the history item to expand it to get more information.
+> - The datatype of the iris dataset should be `csv`.
+> - **Change** {% icon galaxy-pencil %} the datatype *if* it is different than `csv`.
+> - Option 1: Datatypes can be **autodetected**
+> - Option 2: Datatypes can be **manually set**
+>
+> {% include snippets/detect_datatype.md datatype="datatypes" %}
+> {% include snippets/change_datatype.md datatype="csv" %}
+>
+{: .hands_on}
+
+Our objective is to categorize the similar flowers in different groups (Figure 6). We know that we have **3** species of iris flowers (setosa, versicolor, virginica) with
+**50** samples for each. These species look very much alike as shown on the figure below.
+
+![3 species of Iris flowers](images/iris_flowers.png "3 species of Iris flowers")
+
+In our dataset, we have the following features measured for each sample: Petal length, Petal width, Sepal length, Sepal width
+
+Figure 7 shows the dendrogram of these data.
+
+> ![data](images/Hierarchical_iris.png "Iris Data Hierarchical Clustering")
+
+
+For the clustering purpose, the tools we will use require tab-separated files, so in the first step we will convert the file from comma-separated to tab-separated format. Galaxy has built-in format converters we can use for this.
+
+
+> ### {% icon hands_on %} Hands-on: Converting dataset format
+>
+> 1. **Convert** {% icon galaxy-pencil %} the CSV file (comma-separated values) to tabular format (tsv; tab-separated values)
+>
+> {% include snippets/convert_datatype.md conversion="Convert CSV to Tabular" %}
+>
+> 2. **Rename** {% icon galaxy-pencil %} the resulting dataset to `iris tabular`
+>
+> >
+{: .hands_on}
+
+At the first step we use
+
+> ### {% icon hands_on %} Hands-on: Hierarchical clustering
+>
+> 1. **Numeric Clustering** {% icon tool %} with the following clustering parameters:
+> - *"Select the format of input data"*: `Tabular Format (tabular,txt)`
+> - {% icon param-file %} *"Data file with numeric values"*: `iris tabular`
+> - {% icon param-check %} *"Does the dataset contain header"*: `Yes`
+> - {% icon param-select %} *"Choose how to select data by column"*: `All columns EXCLUDING some by column header name(s)`
+> - {% icon param-text %} *"Type header name(s)"*: `Species`
+> - {% icon param-select %} *"Clustering Algorithm"*: `Hierarchical Agglomerative Clustering`
+> - In *"Advanced option"*
+> - {% icon param-text %} *"Number of clusters"*: `2`
+> - {% icon param-select %} *"Affinity"*: `Euclidean`
+> - {% icon param-select %} *"Linkage"*: `ward`
+> 2. Rename the generated file to `Hierarchical clustering`
+{: .hands_on}
+
+If you view the result table, you can see the last column is the label for each cluster and as you see, all the setosa samples are grouped in one cluster and two other species (versicolor and virginica) are grouped in the second cluster. From Figure 3 it is obvious that versicolor and virginica are more similar to each other.
+
+## Visulaze the clustering results
+
+The resulting candidate clustering can be visualized using the Scatterplot w ggplot2 tool. Each sample is color-coded based on its clustering for that sample.
+Let's visualize the clustering results to see how groups have been built.
+
+> ### {% icon hands_on %} Hands-on: Visualize hierarchical clustering result
+>
+> 1. **Scatterplot w ggplot2** {% icon tool %} with the following parameters:
+> - {% icon param-file %} *"Input tabular dataset"*: **Hierarchical clustering**
+> - *"Column to plot on x-axis"*: `1`
+> - *"Column to plot on y-axis"*: `2`
+> - *"Plot title"*: `Hierarchical Clustering in Iris data`
+> - *"Label for x axis"*: `Sepal length`
+> - *"Label for y axis"*: `Sepal width`
+> - In *"Advanced Options"*:
+> - *"Data point options"*: `User defined point options`
+> - *"relative size of points"*: `2.0`
+> - *"Plotting multiple groups"*: `Plot multiple groups of data on one plot`
+> - *"column differentiating the different groups"*: `6`
+> - *"Color schemes to differentiate your groups"*: `Set 2 - predefined color pallete`
+>
+> 2. **View** {% icon galaxy-eye%} the resulting plot:
+>
+> 3. Rename to `Hierarchical scatter plot`
+
+
+> ![data](images/hierarchical_scatter.png "Hierarchical Clustering Scatter Plot")
+
+
+# K-means Algorithm
+
+K-means clustering is the most commonly used unsupervised machine learning algorithm for partitioning a given data set into a set of k clusters, where k represents the number of groups pre-specified by the user.
+In k-means clustering, each cluster is represented by its center or centroid which corresponds to the mean of points assigned to the cluster.The basic idea behind k-means clustering consists of defining clusters so that the total intra-cluster variation is minimized.
+The K-Means is popular because of its speed and scalability. There are several k-means algorithms available. The standard algorithm defines the total within-cluster variation as the sum of squared Euclidean distances between items and the corresponding centroid. K is a the hyperparameter of the algorithm and k-means algorithm can be summarized as follow:
+
+1. Specify the number of clusters (K) to be created (by the analyst)
+
+2. Select randomly k objects from the data set as the initial cluster centers or means
+
+3. Assign each observation to their closest centroid, based on the Euclidean distance between the object and the centroid
+
+4. For each of the k clusters update cluster centroid by calculating the new mean values of all the data points in the cluster.
+
+5. Iteratively minimize the total within sum of square. That is, iterate steps 3 and 4 until the cluster assignments stop changing or the maximum number of
+iterations is reached.
+
+The parameters that minimize the cost function are learned through an iterative process of assigning observations to clusters and then moving the clusters. The basic restriction for k-means algorithm is that your data should be continuous in nature. It won’t work if data is categorical in nature.
+
+> ### {% icon hands_on %} Hands-on: K-means clustering
+>
+> 1. **Numeric Clustering** {% icon tool %} with the following clustering parameters:
+> - *"Select the format of input data"*: `Tabular Format (tabular,txt)`
+> - {% icon param-file %} *"Data file with numeric values"*: `iris tabular`
+> - {% icon param-check %} *"Does the dataset contain header"*: `Yes`
+> - {% icon param-select %} *"Choose how to select data by column"*: `All columns EXCLUDING some by column header name(s)`
+> - {% icon param-text %} *"Type header name(s)"*: `Species`
+> - {% icon param-select %} *"Clustering Algorithm"*: `KMeans`
+> - In *"Advanced option"*
+> - {% icon param-text %} *"Number of clusters"*: `2`
+> 2. Rename the generated file to `k-means clustering`
+{: .hands_on}
+
+
+> ### {% icon hands_on %} Hands-on: Visualize k-means clustering result
+>
+> 1. **Scatterplot w ggplot2** {% icon tool %} with the following parameters:
+> - {% icon param-file %} *"Input tabular dataset"*: **k-means clustering**
+> - *"Column to plot on x-axis"*: `1`
+> - *"Column to plot on y-axis"*: `2`
+> - *"Plot title"*: `K-means Clustering in Iris data`
+> - *"Label for x axis"*: `Sepal length`
+> - *"Label for y axis"*: `Sepal width`
+> - In *"Advanced Options"*:
+> - *"Data point options"*: `User defined point options`
+> - *"relative size of points"*: `2.0`
+> - *"Plotting multiple groups"*: `Plot multiple groups of data on one plot`
+> - *"column differentiating the different groups"*: `6`
+> - *"Color schemes to differentiate your groups"*: `Set 2 - predefined color pallete`
+>
+> 2. **View** {% icon galaxy-eye%} the resulting plot:
+>
+> 3. Rename to `k-means scatter plot`
+
+
+> ![data](images/k_means_scatter.png "K-means Clustering Scatter Plot")
+
+
+> ### {% icon question %} Question
+>
+> How to choose the right number of expected clusters (k)?
+>
+>
+> > ### {% icon solution %} Solution
+> >
+> > Major difficulty found with k-mean is the choice of the number of clusters. Different methods is proposed to solve this problem.
+> > Here, we provide a simple solution. The idea is to compute k-means clustering using different values of clusters k. Next, the within sum of square is drawn according to the number of clusters. The location of a bend (knee) in the plot is generally considered as an indicator of the appropriate number of clusters.
+> > ![data](images/number_of_clusters.png "Optimal number of clusters")
+> > The plot above represents the variance within the clusters. It decreases as k increases, but it can be seen a bend (or “elbow”) at k = 4. This bend indicates that
+> > additional clusters beyond the fourth have little value.
+> {: .solution}
+{: .question}
+
+
+
+> ### {% icon question %} Question
+>
+> What are the differences between k-means and Hierarchical clustering
+>
+> > ### {% icon solution %} Solution
+> >
+> > Hierarchical clustering can’t handle big data well but k-means clustering can. This is because the time complexity of K Means is linear i.e. O(n) while that of hierarchical clustering is quadratic i.e. O(n2).
+> >
+> > In k-means clustering, since we start with random choice of clusters, the results produced by running the algorithm multiple times might differ. While results are reproducible in Hierarchical clustering.
+> >
+> > K-means is found to work well when the shape of the clusters is hyper spherical (like circle in 2D, sphere in 3D).
+> >
+> > K-means clustering requires prior knowledge of K i.e. no. of clusters you want to divide your data into. But, you can stop at whatever number of clusters you find appropriate in hierarchical clustering by interpreting the dendrogram
+> >
+> {: .solution}
+{: .question}
+
+
+# Clustering with the DBSCAN algorithm
+
+DBSCAN (Density-Based Spatial Clustering of Applications with Noise) is a popular clustering algorithm and views clusters as areas of high density separated by areas of low density. Due to this rather generic view, clusters found by DBSCAN can be any shape, as opposed to k-means which assumes that clusters are convex shaped. The central component to the DBSCAN is the concept of core samples, which are samples that are in areas of high density. A cluster is therefore a set of core samples, each close to each other (measured by some distance measure) and a set of non-core samples that are close to a core sample (but are not themselves core samples). There are two parameters to the algorithm, maximum neighborhood distance and minimal core point density, which define formally what we mean when we say dense. There are two important parameters in DBSCAN algorithm, min_samples, is the number of samples in a neighborhood for a point to be considered as a core point and eps is the maximum distance between two samples for one to be considered as in the neighborhood of the other. Higher min_samples or lower eps indicate higher density necessary to form a cluster.
+DBSCAN does not require one to specify the number of clusters in the data a priori, as opposed to k-means.
+
+
+> ### {% icon hands_on %} Hands-on: DBSCAN clustering
+>
+> 1. **Numeric Clustering** {% icon tool %} with the following clustering parameters:
+> - *"Select the format of input data"*: `Tabular Format (tabular,txt)`
+> - {% icon param-file %} *"Data file with numeric values"*: `iris tabular`
+> - {% icon param-check %} *"Does the dataset contain header"*: `Yes`
+> - {% icon param-select %} *"Choose how to select data by column"*: `All columns EXCLUDING some by column header name(s)`
+> - {% icon param-text %} *"Type header name(s)"*: `Species`
+> - {% icon param-select %} *"Clustering Algorithm"*: `DBSCAN`
+> 2. Rename the generated file to `DBSCAN clustering`
+{: .hands_on}
+
+
+> ### {% icon hands_on %} Hands-on: Visualize DBSCAN clustering result
+>
+> 1. **Scatterplot w ggplot2** {% icon tool %} with the following parameters:
+> - {% icon param-file %} *"Input tabular dataset"*: **DBSCAN clustering**
+> - *"Column to plot on x-axis"*: `1`
+> - *"Column to plot on y-axis"*: `2`
+> - *"Plot title"*: `DBSCAN Clustering in Iris data`
+> - *"Label for x axis"*: `Sepal length`
+> - *"Label for y axis"*: `Sepal width`
+> - In *"Advanced Options"*:
+> - *"Data point options"*: `User defined point options`
+> - *"relative size of points"*: `2.0`
+> - *"Plotting multiple groups"*: `Plot multiple groups of data on one plot`
+> - *"column differentiating the different groups"*: `6`
+> - *"Color schemes to differentiate your groups"*: `Set 2 - predefined color pallete`
+>
+> 2. **View** {% icon galaxy-eye%} the resulting plot:
+>
+> 3. Rename to `DBSCAN scatter plot`
+
+
+> ![data](images/dbscan_scatter.png "DBSCAN Clustering Scatter Plot")
+
+
+You will also notice that the blue point in the plot is not contained within any cluster. DBSCAN does not necessarily categorize every data point, and is therefore terrific with handling outliers in the dataset.
+
+> ### {% icon question %} Question
+>
+> How we can evaluate the clustering results?
+>
+>
+> > ### {% icon solution %} Solution
+> >
+> > Clustering is an unsupervised learning algorithm; there are no labels or ground truth to compare with the clusters. However, we can still evaluate the performance of
+the algorithm using intrinsic measures.
+> > There is a performance measure for clustering evaluation which is called the silhouette coefficient. The silhouette coefficient is a measure of the compactness and separation of the clusters.
+> > It increases as the quality of the clusters increase; it is large for compact clusters that are far from each other and small for large, overlapping clusters. The silhouette coefficient is calculated per instance; for a set of instances, it is calculated as the mean of the individual samples scores.
+> {: .solution}
+{: .question}
+
+# Applying the clustering algorithms on the other datasets
+
+You can do the same steps on the other data sets, moon and circles. First, import the data files, [moon.csv](https://zenodo.org/record/3813447/files/moon.csv) and [circles.csv](https://zenodo.org/record/3813447/files/circles.csv) from Zenodo or data library and rename them to `moon` and `circles` respectively.
+
+> ### {% icon hands_on %} Hands-on: Visualize scatter plot of data
+>
+> 1. **Scatterplot w ggplot2** {% icon tool %} with the following parameters:
+> - {% icon param-file %} *"Input tabular dataset"*: input datasets selected with **Multiple datasets**
+>
+> {% include snippets/select_multiple_datasets.md %}
+> - *"Column to plot on x-axis"*: `1`
+> - *"Column to plot on y-axis"*: `2`
+> - *"Plot title"*: `Scatter Plot`
+> - *"Label for x axis"*: `X`
+> - *"Label for y axis"*: `Y`
+> 2. **View** {% icon galaxy-eye%} the resulting plots
+
+
+> ![data](images/circles_moon_scatter.png "Scatter Plot of Circles and Moon Data")
+
+Now you can cluster these data using the introduced algorithms.
+
+> ### {% icon hands_on %} Hands-on: Hierarchical clustering of circles data
+>
+> 1. **Numeric Clustering** {% icon tool %} with the following clustering parameters:
+> - *"Select the format of input data"*: `Tabular Format (tabular,txt)`
+> - {% icon param-file %} *"Data file with numeric values"*: `cirlces tabular`
+> - {% icon param-check %} *"Does the dataset contain header"*: `Yes`
+> - {% icon param-select %} *"Choose how to select data by column"*: `All`
+> - {% icon param-select %} *"Clustering Algorithm"*: `Hierarchical Agglomerative Clustering`
+> - In *"Advanced option"*
+> - {% icon param-text %} *"Number of clusters"*: `2`
+> - {% icon param-select %} *"Affinity"*: `Euclidean`
+> - {% icon param-select %} *"Linkage"*: `ward`
+> 2. Rename the generated file to `circles hierarchical clustering`
+{: .hands_on}
+
+
+Then , you can visualize the clustering result using the following steps:
+
+> ### {% icon hands_on %} Hands-on: Visualize hierarchical clustering result on circles data
+>
+> 1. **Scatterplot w ggplot2** {% icon tool %} with the following parameters:
+> - {% icon param-file %} *"Input tabular dataset"*: **circles hierarchical clustering**
+> - *"Column to plot on x-axis"*: `1`
+> - *"Column to plot on y-axis"*: `2`
+> - *"Plot title"*: `Hierarchical Clustering in Circles data`
+> - *"Label for x axis"*: `Sepal length`
+> - *"Label for y axis"*: `Sepal width`
+> - In *"Advanced Options"*:
+> - *"Data point options"*: `User defined point options`
+> - *"relative size of points"*: `2.0`
+> - *"Plotting multiple groups"*: `Plot multiple groups of data on one plot`
+> - *"column differentiating the different groups"*: `3`
+> - *"Color schemes to differentiate your groups"*: `Set 2 - predefined color pallete`
+>
+> 2. **View** {% icon galaxy-eye%} the resulting plot:
+
+
+In the next steps, you can apply these three algorithms (hierarchical, k-means and DBSCAN) in similar steps to moon and circles datasets. In k-means algorithm, k=2 and for the DBSCAN algorithm, the parameters are not the default parameters and you should set them as follows: for the circles data set (Maximum neighborhood distance=0.2 and Minimal core point density=5) and for the moon datasets (Maximum neighborhood distance=0.3 and Minimal core point density=4). You can see the scatter plots of the clustering results in Figure 13 and Figure 14.
+
+> ![data](images/circles_clustering.png "Plot of Clustering Algorithms on Circles Data")
+
+> ![data](images/moon_clustering.png "Plot of Clustering Algorithms on Moon Data")
+
+
+# Conclusion
+
+In this tutorial, we discussed the clustering algorithms which is used to discover structure in unlabeled data. You learned about the hierarchical, k-means and DBSCAN algorithms. By following these steps, we learned how to perform clustering and visualize results using clustering and plotting tools respectively in Galaxy. There are many other clustering approaches which can be tried out on these datasets to find how they perform. Different datasets can also be analysed using these algorithms. The clustering algorithms have some parameters which can be altered while performing the analyses to see if they affect the clustering or not. In using clustering algorithms, we need to take care of some important aspects like treating outliers in our data and making sure each cluster has sufficient population. Some data pre-processors can also be used to clean the datasets.
+
+
+
+
+
+
+
+
+
diff --git a/topics/statistics/tutorials/clustering_machinelearning/workflows/clustering.ga b/topics/statistics/tutorials/clustering_machinelearning/workflows/clustering.ga
new file mode 100644
index 00000000000000..a05745d123c474
--- /dev/null
+++ b/topics/statistics/tutorials/clustering_machinelearning/workflows/clustering.ga
@@ -0,0 +1,885 @@
+{
+ "a_galaxy_workflow": "true",
+ "annotation": "Clustering in Machine Learning",
+ "tags":[
+ "statistics",
+ "clustering",
+ "ml"
+ ],
+ "format-version": "0.1",
+ "name": "Clustering in Machine Learning",
+ "steps": {
+ "0": {
+ "annotation": "",
+ "content_id": null,
+ "errors": null,
+ "id": 0,
+ "input_connections": {},
+ "inputs": [
+ {
+ "description": "",
+ "name": "iris"
+ }
+ ],
+ "label": "iris",
+ "name": "Input dataset",
+ "outputs": [],
+ "position": {
+ "left": 200,
+ "top": 200
+ },
+ "tool_id": null,
+ "tool_state": "{\"name\": \"iris\"}",
+ "tool_version": null,
+ "type": "data_input",
+ "uuid": "8ea6762f-5566-4e0d-85f2-f6ff2db47c1b",
+ "workflow_outputs": []
+ },
+ "1": {
+ "annotation": "",
+ "content_id": null,
+ "errors": null,
+ "id": 1,
+ "input_connections": {},
+ "inputs": [
+ {
+ "description": "",
+ "name": "circles"
+ }
+ ],
+ "label": "circles",
+ "name": "Input dataset",
+ "outputs": [],
+ "position": {
+ "left": 200,
+ "top": 320
+ },
+ "tool_id": null,
+ "tool_state": "{\"name\": \"circles\"}",
+ "tool_version": null,
+ "type": "data_input",
+ "uuid": "3eb71a88-7eab-4f3c-9956-9a1cf50c4dff",
+ "workflow_outputs": []
+ },
+ "10": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/bgruening/sklearn_numeric_clustering/sklearn_numeric_clustering/1.0.8.1",
+ "errors": null,
+ "id": 10,
+ "input_connections": {
+ "input_types|infile": {
+ "id": 2,
+ "output_name": "output"
+ }
+ },
+ "inputs": [],
+ "label": null,
+ "name": "Numeric Clustering",
+ "outputs": [
+ {
+ "name": "outfile",
+ "type": "tabular"
+ }
+ ],
+ "position": {
+ "left": 420,
+ "top": 1040
+ },
+ "post_job_actions": {},
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/bgruening/sklearn_numeric_clustering/sklearn_numeric_clustering/1.0.8.1",
+ "tool_shed_repository": {
+ "changeset_revision": "1dd433d2c92c",
+ "name": "sklearn_numeric_clustering",
+ "owner": "bgruening",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"input_types\": \"{\\\"__current_case__\\\": 1, \\\"algorithm_options\\\": {\\\"__current_case__\\\": 1, \\\"options\\\": {\\\"algorithm\\\": \\\"auto\\\", \\\"eps\\\": \\\"0.3\\\", \\\"leaf_size\\\": \\\"30\\\", \\\"metric\\\": \\\"euclidean\\\", \\\"min_samples\\\": \\\"4\\\"}, \\\"selected_algorithm\\\": \\\"DBSCAN\\\"}, \\\"column_selector_options\\\": {\\\"__current_case__\\\": 4, \\\"selected_column_selector_option\\\": \\\"all_columns\\\"}, \\\"header\\\": \\\"true\\\", \\\"infile\\\": {\\\"__class__\\\": \\\"ConnectedValue\\\"}, \\\"selected_input_type\\\": \\\"tabular\\\"}\", \"__rerun_remap_job_id__\": null, \"chromInfo\": \"\\\"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\\\"\", \"__page__\": null}",
+ "tool_version": "1.0.8.1",
+ "type": "tool",
+ "uuid": "fbaf39ad-cf3a-40a0-ad65-5eda71381360",
+ "workflow_outputs": []
+ },
+ "11": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/bgruening/sklearn_numeric_clustering/sklearn_numeric_clustering/1.0.8.1",
+ "errors": null,
+ "id": 11,
+ "input_connections": {
+ "input_types|infile": {
+ "id": 2,
+ "output_name": "output"
+ }
+ },
+ "inputs": [],
+ "label": null,
+ "name": "Numeric Clustering",
+ "outputs": [
+ {
+ "name": "outfile",
+ "type": "tabular"
+ }
+ ],
+ "position": {
+ "left": 420,
+ "top": 1160
+ },
+ "post_job_actions": {},
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/bgruening/sklearn_numeric_clustering/sklearn_numeric_clustering/1.0.8.1",
+ "tool_shed_repository": {
+ "changeset_revision": "1dd433d2c92c",
+ "name": "sklearn_numeric_clustering",
+ "owner": "bgruening",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"input_types\": \"{\\\"__current_case__\\\": 1, \\\"algorithm_options\\\": {\\\"__current_case__\\\": 0, \\\"options\\\": {\\\"algorithm\\\": \\\"auto\\\", \\\"copy_x\\\": \\\"true\\\", \\\"init\\\": \\\"k-means++\\\", \\\"max_iter\\\": \\\"300\\\", \\\"n_clusters\\\": \\\"2\\\", \\\"n_init\\\": \\\"10\\\", \\\"random_state\\\": \\\"\\\", \\\"tol\\\": \\\"0.0001\\\"}, \\\"selected_algorithm\\\": \\\"KMeans\\\"}, \\\"column_selector_options\\\": {\\\"__current_case__\\\": 4, \\\"selected_column_selector_option\\\": \\\"all_columns\\\"}, \\\"header\\\": \\\"true\\\", \\\"infile\\\": {\\\"__class__\\\": \\\"ConnectedValue\\\"}, \\\"selected_input_type\\\": \\\"tabular\\\"}\", \"__rerun_remap_job_id__\": null, \"chromInfo\": \"\\\"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\\\"\", \"__page__\": null}",
+ "tool_version": "1.0.8.1",
+ "type": "tool",
+ "uuid": "20d7b0cf-c234-441d-9b16-1f640e57acab",
+ "workflow_outputs": []
+ },
+ "12": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/bgruening/sklearn_numeric_clustering/sklearn_numeric_clustering/1.0.8.1",
+ "errors": null,
+ "id": 12,
+ "input_connections": {
+ "input_types|infile": {
+ "id": 3,
+ "output_name": "tabular"
+ }
+ },
+ "inputs": [],
+ "label": null,
+ "name": "Numeric Clustering",
+ "outputs": [
+ {
+ "name": "outfile",
+ "type": "tabular"
+ }
+ ],
+ "position": {
+ "left": 640,
+ "top": 200
+ },
+ "post_job_actions": {},
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/bgruening/sklearn_numeric_clustering/sklearn_numeric_clustering/1.0.8.1",
+ "tool_shed_repository": {
+ "changeset_revision": "1dd433d2c92c",
+ "name": "sklearn_numeric_clustering",
+ "owner": "bgruening",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"input_types\": \"{\\\"__current_case__\\\": 1, \\\"algorithm_options\\\": {\\\"__current_case__\\\": 7, \\\"options\\\": {\\\"affinity\\\": \\\"euclidean\\\", \\\"linkage\\\": \\\"ward\\\", \\\"n_clusters\\\": \\\"2\\\"}, \\\"selected_algorithm\\\": \\\"AgglomerativeClustering\\\"}, \\\"column_selector_options\\\": {\\\"__current_case__\\\": 3, \\\"col\\\": \\\"Species\\\", \\\"selected_column_selector_option\\\": \\\"all_but_by_header_name\\\"}, \\\"header\\\": \\\"true\\\", \\\"infile\\\": {\\\"__class__\\\": \\\"ConnectedValue\\\"}, \\\"selected_input_type\\\": \\\"tabular\\\"}\", \"__rerun_remap_job_id__\": null, \"chromInfo\": \"\\\"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\\\"\", \"__page__\": null}",
+ "tool_version": "1.0.8.1",
+ "type": "tool",
+ "uuid": "b25334f4-01d0-433a-8a20-7f6f96a407af",
+ "workflow_outputs": []
+ },
+ "13": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/bgruening/sklearn_numeric_clustering/sklearn_numeric_clustering/1.0.8.1",
+ "errors": null,
+ "id": 13,
+ "input_connections": {
+ "input_types|infile": {
+ "id": 3,
+ "output_name": "tabular"
+ }
+ },
+ "inputs": [],
+ "label": null,
+ "name": "Numeric Clustering",
+ "outputs": [
+ {
+ "name": "outfile",
+ "type": "tabular"
+ }
+ ],
+ "position": {
+ "left": 640,
+ "top": 320
+ },
+ "post_job_actions": {},
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/bgruening/sklearn_numeric_clustering/sklearn_numeric_clustering/1.0.8.1",
+ "tool_shed_repository": {
+ "changeset_revision": "1dd433d2c92c",
+ "name": "sklearn_numeric_clustering",
+ "owner": "bgruening",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"input_types\": \"{\\\"__current_case__\\\": 1, \\\"algorithm_options\\\": {\\\"__current_case__\\\": 0, \\\"options\\\": {\\\"algorithm\\\": \\\"auto\\\", \\\"copy_x\\\": \\\"true\\\", \\\"init\\\": \\\"k-means++\\\", \\\"max_iter\\\": \\\"300\\\", \\\"n_clusters\\\": \\\"2\\\", \\\"n_init\\\": \\\"10\\\", \\\"random_state\\\": \\\"\\\", \\\"tol\\\": \\\"0.0001\\\"}, \\\"selected_algorithm\\\": \\\"KMeans\\\"}, \\\"column_selector_options\\\": {\\\"__current_case__\\\": 3, \\\"col\\\": \\\"Species\\\", \\\"selected_column_selector_option\\\": \\\"all_but_by_header_name\\\"}, \\\"header\\\": \\\"true\\\", \\\"infile\\\": {\\\"__class__\\\": \\\"ConnectedValue\\\"}, \\\"selected_input_type\\\": \\\"tabular\\\"}\", \"__rerun_remap_job_id__\": null, \"chromInfo\": \"\\\"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\\\"\", \"__page__\": null}",
+ "tool_version": "1.0.8.1",
+ "type": "tool",
+ "uuid": "0a23c938-2d2b-4edf-93d2-5ca0fee61ea4",
+ "workflow_outputs": []
+ },
+ "14": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/bgruening/sklearn_numeric_clustering/sklearn_numeric_clustering/1.0.8.1",
+ "errors": null,
+ "id": 14,
+ "input_connections": {
+ "input_types|infile": {
+ "id": 3,
+ "output_name": "tabular"
+ }
+ },
+ "inputs": [],
+ "label": null,
+ "name": "Numeric Clustering",
+ "outputs": [
+ {
+ "name": "outfile",
+ "type": "tabular"
+ }
+ ],
+ "position": {
+ "left": 640,
+ "top": 440
+ },
+ "post_job_actions": {},
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/bgruening/sklearn_numeric_clustering/sklearn_numeric_clustering/1.0.8.1",
+ "tool_shed_repository": {
+ "changeset_revision": "1dd433d2c92c",
+ "name": "sklearn_numeric_clustering",
+ "owner": "bgruening",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"input_types\": \"{\\\"__current_case__\\\": 1, \\\"algorithm_options\\\": {\\\"__current_case__\\\": 1, \\\"options\\\": {\\\"algorithm\\\": \\\"auto\\\", \\\"eps\\\": \\\"0.5\\\", \\\"leaf_size\\\": \\\"30\\\", \\\"metric\\\": \\\"euclidean\\\", \\\"min_samples\\\": \\\"5\\\"}, \\\"selected_algorithm\\\": \\\"DBSCAN\\\"}, \\\"column_selector_options\\\": {\\\"__current_case__\\\": 3, \\\"col\\\": \\\"Species\\\", \\\"selected_column_selector_option\\\": \\\"all_but_by_header_name\\\"}, \\\"header\\\": \\\"true\\\", \\\"infile\\\": {\\\"__class__\\\": \\\"ConnectedValue\\\"}, \\\"selected_input_type\\\": \\\"tabular\\\"}\", \"__rerun_remap_job_id__\": null, \"chromInfo\": \"\\\"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\\\"\", \"__page__\": null}",
+ "tool_version": "1.0.8.1",
+ "type": "tool",
+ "uuid": "9b17d7ba-7d03-44aa-a6b1-8105f55a4b0f",
+ "workflow_outputs": []
+ },
+ "15": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/ggplot2_point/ggplot2_point/2.2.1+galaxy2",
+ "errors": null,
+ "id": 15,
+ "input_connections": {
+ "input1": {
+ "id": 5,
+ "output_name": "outfile"
+ }
+ },
+ "inputs": [],
+ "label": null,
+ "name": "Scatterplot with ggplot2",
+ "outputs": [
+ {
+ "name": "output1",
+ "type": "png"
+ }
+ ],
+ "position": {
+ "left": 640,
+ "top": 680
+ },
+ "post_job_actions": {},
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/ggplot2_point/ggplot2_point/2.2.1+galaxy2",
+ "tool_shed_repository": {
+ "changeset_revision": "87908c76ca8d",
+ "name": "ggplot2_point",
+ "owner": "iuc",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"adv\": \"{\\\"axis_text_customization\\\": {\\\"__current_case__\\\": 0, \\\"axis_customization\\\": \\\"default\\\"}, \\\"axis_title_customization\\\": {\\\"__current_case__\\\": 0, \\\"axis_customization\\\": \\\"default\\\"}, \\\"factor\\\": {\\\"__current_case__\\\": 1, \\\"colororder\\\": \\\"1\\\", \\\"colors\\\": \\\"Set2\\\", \\\"factorcol\\\": \\\"3\\\", \\\"factoring\\\": \\\"Single\\\"}, \\\"gridlinecust\\\": \\\"default\\\", \\\"legend\\\": \\\"yes\\\", \\\"plot_title_customization\\\": {\\\"__current_case__\\\": 0, \\\"axis_customization\\\": \\\"default\\\"}, \\\"points\\\": {\\\"__current_case__\\\": 1, \\\"alpha\\\": \\\"1.0\\\", \\\"pointcolor\\\": \\\"black\\\", \\\"pointoptions\\\": \\\"defined\\\", \\\"size\\\": \\\"2.0\\\"}, \\\"scaling\\\": {\\\"__current_case__\\\": 0, \\\"plot_scaling\\\": \\\"Automatic\\\"}, \\\"theme\\\": \\\"bw\\\", \\\"transform\\\": \\\"none\\\", \\\"type\\\": \\\"points\\\"}\", \"xlab\": \"\\\"X\\\"\", \"input1\": \"{\\\"__class__\\\": \\\"ConnectedValue\\\"}\", \"title\": \"\\\"Hierarchical Clustering on Circles Data\\\"\", \"__page__\": null, \"__rerun_remap_job_id__\": null, \"xplot\": \"\\\"1\\\"\", \"yplot\": \"\\\"2\\\"\", \"ylab\": \"\\\"Y\\\"\", \"chromInfo\": \"\\\"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\\\"\", \"out\": \"{\\\"additional_output_format\\\": \\\"none\\\", \\\"dpi_output_dim\\\": \\\"300.0\\\", \\\"height_output_dim\\\": \\\"7.0\\\", \\\"unit_output_dim\\\": \\\"in\\\", \\\"width_output_dim\\\": \\\"7.0\\\"}\"}",
+ "tool_version": "2.2.1+galaxy2",
+ "type": "tool",
+ "uuid": "86da9f0e-b190-4def-b4a5-6e62fb2b6fd3",
+ "workflow_outputs": []
+ },
+ "16": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/ggplot2_point/ggplot2_point/2.2.1+galaxy2",
+ "errors": null,
+ "id": 16,
+ "input_connections": {
+ "input1": {
+ "id": 6,
+ "output_name": "outfile"
+ }
+ },
+ "inputs": [],
+ "label": null,
+ "name": "Scatterplot with ggplot2",
+ "outputs": [
+ {
+ "name": "output1",
+ "type": "png"
+ }
+ ],
+ "position": {
+ "left": 640,
+ "top": 800
+ },
+ "post_job_actions": {},
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/ggplot2_point/ggplot2_point/2.2.1+galaxy2",
+ "tool_shed_repository": {
+ "changeset_revision": "87908c76ca8d",
+ "name": "ggplot2_point",
+ "owner": "iuc",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"adv\": \"{\\\"axis_text_customization\\\": {\\\"__current_case__\\\": 0, \\\"axis_customization\\\": \\\"default\\\"}, \\\"axis_title_customization\\\": {\\\"__current_case__\\\": 0, \\\"axis_customization\\\": \\\"default\\\"}, \\\"factor\\\": {\\\"__current_case__\\\": 1, \\\"colororder\\\": \\\"1\\\", \\\"colors\\\": \\\"Set2\\\", \\\"factorcol\\\": \\\"3\\\", \\\"factoring\\\": \\\"Single\\\"}, \\\"gridlinecust\\\": \\\"default\\\", \\\"legend\\\": \\\"yes\\\", \\\"plot_title_customization\\\": {\\\"__current_case__\\\": 0, \\\"axis_customization\\\": \\\"default\\\"}, \\\"points\\\": {\\\"__current_case__\\\": 1, \\\"alpha\\\": \\\"1.0\\\", \\\"pointcolor\\\": \\\"black\\\", \\\"pointoptions\\\": \\\"defined\\\", \\\"size\\\": \\\"2.0\\\"}, \\\"scaling\\\": {\\\"__current_case__\\\": 0, \\\"plot_scaling\\\": \\\"Automatic\\\"}, \\\"theme\\\": \\\"bw\\\", \\\"transform\\\": \\\"none\\\", \\\"type\\\": \\\"points\\\"}\", \"xlab\": \"\\\"X\\\"\", \"input1\": \"{\\\"__class__\\\": \\\"ConnectedValue\\\"}\", \"title\": \"\\\"K-means Clustering on Circles Data\\\"\", \"__page__\": null, \"__rerun_remap_job_id__\": null, \"xplot\": \"\\\"1\\\"\", \"yplot\": \"\\\"2\\\"\", \"ylab\": \"\\\"Y\\\"\", \"chromInfo\": \"\\\"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\\\"\", \"out\": \"{\\\"additional_output_format\\\": \\\"none\\\", \\\"dpi_output_dim\\\": \\\"300.0\\\", \\\"height_output_dim\\\": \\\"7.0\\\", \\\"unit_output_dim\\\": \\\"in\\\", \\\"width_output_dim\\\": \\\"7.0\\\"}\"}",
+ "tool_version": "2.2.1+galaxy2",
+ "type": "tool",
+ "uuid": "0df119aa-bdfb-4fd3-9062-98faae31ed04",
+ "workflow_outputs": []
+ },
+ "17": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/ggplot2_point/ggplot2_point/2.2.1+galaxy2",
+ "errors": null,
+ "id": 17,
+ "input_connections": {
+ "input1": {
+ "id": 7,
+ "output_name": "outfile"
+ }
+ },
+ "inputs": [],
+ "label": null,
+ "name": "Scatterplot with ggplot2",
+ "outputs": [
+ {
+ "name": "output1",
+ "type": "png"
+ }
+ ],
+ "position": {
+ "left": 640,
+ "top": 1040
+ },
+ "post_job_actions": {},
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/ggplot2_point/ggplot2_point/2.2.1+galaxy2",
+ "tool_shed_repository": {
+ "changeset_revision": "87908c76ca8d",
+ "name": "ggplot2_point",
+ "owner": "iuc",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"adv\": \"{\\\"axis_text_customization\\\": {\\\"__current_case__\\\": 0, \\\"axis_customization\\\": \\\"default\\\"}, \\\"axis_title_customization\\\": {\\\"__current_case__\\\": 0, \\\"axis_customization\\\": \\\"default\\\"}, \\\"factor\\\": {\\\"__current_case__\\\": 1, \\\"colororder\\\": \\\"1\\\", \\\"colors\\\": \\\"Set2\\\", \\\"factorcol\\\": \\\"3\\\", \\\"factoring\\\": \\\"Single\\\"}, \\\"gridlinecust\\\": \\\"default\\\", \\\"legend\\\": \\\"yes\\\", \\\"plot_title_customization\\\": {\\\"__current_case__\\\": 0, \\\"axis_customization\\\": \\\"default\\\"}, \\\"points\\\": {\\\"__current_case__\\\": 1, \\\"alpha\\\": \\\"1.0\\\", \\\"pointcolor\\\": \\\"black\\\", \\\"pointoptions\\\": \\\"defined\\\", \\\"size\\\": \\\"2.0\\\"}, \\\"scaling\\\": {\\\"__current_case__\\\": 0, \\\"plot_scaling\\\": \\\"Automatic\\\"}, \\\"theme\\\": \\\"bw\\\", \\\"transform\\\": \\\"none\\\", \\\"type\\\": \\\"points\\\"}\", \"xlab\": \"\\\"X\\\"\", \"input1\": \"{\\\"__class__\\\": \\\"ConnectedValue\\\"}\", \"title\": \"\\\"DBSCAN Clustering on Circles Data\\\"\", \"__page__\": null, \"__rerun_remap_job_id__\": null, \"xplot\": \"\\\"1\\\"\", \"yplot\": \"\\\"2\\\"\", \"ylab\": \"\\\"Y\\\"\", \"chromInfo\": \"\\\"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\\\"\", \"out\": \"{\\\"additional_output_format\\\": \\\"none\\\", \\\"dpi_output_dim\\\": \\\"300.0\\\", \\\"height_output_dim\\\": \\\"7.0\\\", \\\"unit_output_dim\\\": \\\"in\\\", \\\"width_output_dim\\\": \\\"7.0\\\"}\"}",
+ "tool_version": "2.2.1+galaxy2",
+ "type": "tool",
+ "uuid": "16c3f360-fccf-4638-884a-307f91c7c7bb",
+ "workflow_outputs": []
+ },
+ "18": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/ggplot2_point/ggplot2_point/2.2.1+galaxy2",
+ "errors": null,
+ "id": 18,
+ "input_connections": {
+ "input1": {
+ "id": 9,
+ "output_name": "outfile"
+ }
+ },
+ "inputs": [],
+ "label": null,
+ "name": "Scatterplot with ggplot2",
+ "outputs": [
+ {
+ "name": "output1",
+ "type": "png"
+ }
+ ],
+ "position": {
+ "left": 640,
+ "top": 560
+ },
+ "post_job_actions": {},
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/ggplot2_point/ggplot2_point/2.2.1+galaxy2",
+ "tool_shed_repository": {
+ "changeset_revision": "87908c76ca8d",
+ "name": "ggplot2_point",
+ "owner": "iuc",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"adv\": \"{\\\"axis_text_customization\\\": {\\\"__current_case__\\\": 0, \\\"axis_customization\\\": \\\"default\\\"}, \\\"axis_title_customization\\\": {\\\"__current_case__\\\": 0, \\\"axis_customization\\\": \\\"default\\\"}, \\\"factor\\\": {\\\"__current_case__\\\": 1, \\\"colororder\\\": \\\"1\\\", \\\"colors\\\": \\\"Set2\\\", \\\"factorcol\\\": \\\"3\\\", \\\"factoring\\\": \\\"Single\\\"}, \\\"gridlinecust\\\": \\\"default\\\", \\\"legend\\\": \\\"yes\\\", \\\"plot_title_customization\\\": {\\\"__current_case__\\\": 0, \\\"axis_customization\\\": \\\"default\\\"}, \\\"points\\\": {\\\"__current_case__\\\": 1, \\\"alpha\\\": \\\"1.0\\\", \\\"pointcolor\\\": \\\"black\\\", \\\"pointoptions\\\": \\\"defined\\\", \\\"size\\\": \\\"2.0\\\"}, \\\"scaling\\\": {\\\"__current_case__\\\": 0, \\\"plot_scaling\\\": \\\"Automatic\\\"}, \\\"theme\\\": \\\"bw\\\", \\\"transform\\\": \\\"none\\\", \\\"type\\\": \\\"points\\\"}\", \"xlab\": \"\\\"X\\\"\", \"input1\": \"{\\\"__class__\\\": \\\"ConnectedValue\\\"}\", \"title\": \"\\\"Hierarchical Clustering on Moon Data\\\"\", \"__page__\": null, \"__rerun_remap_job_id__\": null, \"xplot\": \"\\\"1\\\"\", \"yplot\": \"\\\"2\\\"\", \"ylab\": \"\\\"Y\\\"\", \"chromInfo\": \"\\\"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\\\"\", \"out\": \"{\\\"additional_output_format\\\": \\\"none\\\", \\\"dpi_output_dim\\\": \\\"300.0\\\", \\\"height_output_dim\\\": \\\"7.0\\\", \\\"unit_output_dim\\\": \\\"in\\\", \\\"width_output_dim\\\": \\\"7.0\\\"}\"}",
+ "tool_version": "2.2.1+galaxy2",
+ "type": "tool",
+ "uuid": "70b1ca6c-19ba-41c8-8707-fe25a0f83f5f",
+ "workflow_outputs": []
+ },
+ "19": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/ggplot2_point/ggplot2_point/2.2.1+galaxy2",
+ "errors": null,
+ "id": 19,
+ "input_connections": {
+ "input1": {
+ "id": 10,
+ "output_name": "outfile"
+ }
+ },
+ "inputs": [],
+ "label": null,
+ "name": "Scatterplot with ggplot2",
+ "outputs": [
+ {
+ "name": "output1",
+ "type": "png"
+ }
+ ],
+ "position": {
+ "left": 640,
+ "top": 1160
+ },
+ "post_job_actions": {},
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/ggplot2_point/ggplot2_point/2.2.1+galaxy2",
+ "tool_shed_repository": {
+ "changeset_revision": "87908c76ca8d",
+ "name": "ggplot2_point",
+ "owner": "iuc",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"adv\": \"{\\\"axis_text_customization\\\": {\\\"__current_case__\\\": 0, \\\"axis_customization\\\": \\\"default\\\"}, \\\"axis_title_customization\\\": {\\\"__current_case__\\\": 0, \\\"axis_customization\\\": \\\"default\\\"}, \\\"factor\\\": {\\\"__current_case__\\\": 1, \\\"colororder\\\": \\\"1\\\", \\\"colors\\\": \\\"Set2\\\", \\\"factorcol\\\": \\\"3\\\", \\\"factoring\\\": \\\"Single\\\"}, \\\"gridlinecust\\\": \\\"default\\\", \\\"legend\\\": \\\"yes\\\", \\\"plot_title_customization\\\": {\\\"__current_case__\\\": 0, \\\"axis_customization\\\": \\\"default\\\"}, \\\"points\\\": {\\\"__current_case__\\\": 1, \\\"alpha\\\": \\\"1.0\\\", \\\"pointcolor\\\": \\\"black\\\", \\\"pointoptions\\\": \\\"defined\\\", \\\"size\\\": \\\"2.0\\\"}, \\\"scaling\\\": {\\\"__current_case__\\\": 0, \\\"plot_scaling\\\": \\\"Automatic\\\"}, \\\"theme\\\": \\\"bw\\\", \\\"transform\\\": \\\"none\\\", \\\"type\\\": \\\"points\\\"}\", \"xlab\": \"\\\"X\\\"\", \"input1\": \"{\\\"__class__\\\": \\\"ConnectedValue\\\"}\", \"title\": \"\\\"DBSCAN Clustering on Moon Data\\\"\", \"__page__\": null, \"__rerun_remap_job_id__\": null, \"xplot\": \"\\\"1\\\"\", \"yplot\": \"\\\"2\\\"\", \"ylab\": \"\\\"Y\\\"\", \"chromInfo\": \"\\\"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\\\"\", \"out\": \"{\\\"additional_output_format\\\": \\\"none\\\", \\\"dpi_output_dim\\\": \\\"300.0\\\", \\\"height_output_dim\\\": \\\"7.0\\\", \\\"unit_output_dim\\\": \\\"in\\\", \\\"width_output_dim\\\": \\\"7.0\\\"}\"}",
+ "tool_version": "2.2.1+galaxy2",
+ "type": "tool",
+ "uuid": "06c68d29-3008-4c83-b8db-5076dc67da11",
+ "workflow_outputs": []
+ },
+ "2": {
+ "annotation": "",
+ "content_id": null,
+ "errors": null,
+ "id": 2,
+ "input_connections": {},
+ "inputs": [
+ {
+ "description": "",
+ "name": "moon"
+ }
+ ],
+ "label": "moon",
+ "name": "Input dataset",
+ "outputs": [],
+ "position": {
+ "left": 200,
+ "top": 440
+ },
+ "tool_id": null,
+ "tool_state": "{\"name\": \"moon\"}",
+ "tool_version": null,
+ "type": "data_input",
+ "uuid": "84e54369-42be-439e-993c-c211e27c1b80",
+ "workflow_outputs": []
+ },
+ "20": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/ggplot2_point/ggplot2_point/2.2.1+galaxy2",
+ "errors": null,
+ "id": 20,
+ "input_connections": {
+ "input1": {
+ "id": 11,
+ "output_name": "outfile"
+ }
+ },
+ "inputs": [],
+ "label": null,
+ "name": "Scatterplot with ggplot2",
+ "outputs": [
+ {
+ "name": "output1",
+ "type": "png"
+ }
+ ],
+ "position": {
+ "left": 640,
+ "top": 920
+ },
+ "post_job_actions": {},
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/ggplot2_point/ggplot2_point/2.2.1+galaxy2",
+ "tool_shed_repository": {
+ "changeset_revision": "87908c76ca8d",
+ "name": "ggplot2_point",
+ "owner": "iuc",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"adv\": \"{\\\"axis_text_customization\\\": {\\\"__current_case__\\\": 0, \\\"axis_customization\\\": \\\"default\\\"}, \\\"axis_title_customization\\\": {\\\"__current_case__\\\": 0, \\\"axis_customization\\\": \\\"default\\\"}, \\\"factor\\\": {\\\"__current_case__\\\": 1, \\\"colororder\\\": \\\"1\\\", \\\"colors\\\": \\\"Set2\\\", \\\"factorcol\\\": \\\"3\\\", \\\"factoring\\\": \\\"Single\\\"}, \\\"gridlinecust\\\": \\\"default\\\", \\\"legend\\\": \\\"yes\\\", \\\"plot_title_customization\\\": {\\\"__current_case__\\\": 0, \\\"axis_customization\\\": \\\"default\\\"}, \\\"points\\\": {\\\"__current_case__\\\": 1, \\\"alpha\\\": \\\"1.0\\\", \\\"pointcolor\\\": \\\"black\\\", \\\"pointoptions\\\": \\\"defined\\\", \\\"size\\\": \\\"2.0\\\"}, \\\"scaling\\\": {\\\"__current_case__\\\": 0, \\\"plot_scaling\\\": \\\"Automatic\\\"}, \\\"theme\\\": \\\"bw\\\", \\\"transform\\\": \\\"none\\\", \\\"type\\\": \\\"points\\\"}\", \"xlab\": \"\\\"X\\\"\", \"input1\": \"{\\\"__class__\\\": \\\"ConnectedValue\\\"}\", \"title\": \"\\\"K-means Clustering on Moon Data\\\"\", \"__page__\": null, \"__rerun_remap_job_id__\": null, \"xplot\": \"\\\"1\\\"\", \"yplot\": \"\\\"2\\\"\", \"ylab\": \"\\\"Y\\\"\", \"chromInfo\": \"\\\"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\\\"\", \"out\": \"{\\\"additional_output_format\\\": \\\"none\\\", \\\"dpi_output_dim\\\": \\\"300.0\\\", \\\"height_output_dim\\\": \\\"7.0\\\", \\\"unit_output_dim\\\": \\\"in\\\", \\\"width_output_dim\\\": \\\"7.0\\\"}\"}",
+ "tool_version": "2.2.1+galaxy2",
+ "type": "tool",
+ "uuid": "d62fb208-c3ba-472e-af50-d757cde6a837",
+ "workflow_outputs": []
+ },
+ "21": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/ggplot2_point/ggplot2_point/2.2.1+galaxy2",
+ "errors": null,
+ "id": 21,
+ "input_connections": {
+ "input1": {
+ "id": 12,
+ "output_name": "outfile"
+ }
+ },
+ "inputs": [],
+ "label": null,
+ "name": "Scatterplot with ggplot2",
+ "outputs": [
+ {
+ "name": "output1",
+ "type": "png"
+ }
+ ],
+ "position": {
+ "left": 860,
+ "top": 200
+ },
+ "post_job_actions": {},
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/ggplot2_point/ggplot2_point/2.2.1+galaxy2",
+ "tool_shed_repository": {
+ "changeset_revision": "87908c76ca8d",
+ "name": "ggplot2_point",
+ "owner": "iuc",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"adv\": \"{\\\"axis_text_customization\\\": {\\\"__current_case__\\\": 0, \\\"axis_customization\\\": \\\"default\\\"}, \\\"axis_title_customization\\\": {\\\"__current_case__\\\": 0, \\\"axis_customization\\\": \\\"default\\\"}, \\\"factor\\\": {\\\"__current_case__\\\": 1, \\\"colororder\\\": \\\"1\\\", \\\"colors\\\": \\\"Set2\\\", \\\"factorcol\\\": \\\"6\\\", \\\"factoring\\\": \\\"Single\\\"}, \\\"gridlinecust\\\": \\\"default\\\", \\\"legend\\\": \\\"yes\\\", \\\"plot_title_customization\\\": {\\\"__current_case__\\\": 0, \\\"axis_customization\\\": \\\"default\\\"}, \\\"points\\\": {\\\"__current_case__\\\": 1, \\\"alpha\\\": \\\"1.0\\\", \\\"pointcolor\\\": \\\"black\\\", \\\"pointoptions\\\": \\\"defined\\\", \\\"size\\\": \\\"2.0\\\"}, \\\"scaling\\\": {\\\"__current_case__\\\": 0, \\\"plot_scaling\\\": \\\"Automatic\\\"}, \\\"theme\\\": \\\"bw\\\", \\\"transform\\\": \\\"none\\\", \\\"type\\\": \\\"points\\\"}\", \"xlab\": \"\\\"Sepal length\\\"\", \"input1\": \"{\\\"__class__\\\": \\\"ConnectedValue\\\"}\", \"title\": \"\\\"Hierarchical Clustering in Iris data\\\"\", \"__page__\": null, \"__rerun_remap_job_id__\": null, \"xplot\": \"\\\"1\\\"\", \"yplot\": \"\\\"2\\\"\", \"ylab\": \"\\\"Sepal width\\\"\", \"chromInfo\": \"\\\"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\\\"\", \"out\": \"{\\\"additional_output_format\\\": \\\"none\\\", \\\"dpi_output_dim\\\": \\\"300.0\\\", \\\"height_output_dim\\\": \\\"7.0\\\", \\\"unit_output_dim\\\": \\\"in\\\", \\\"width_output_dim\\\": \\\"7.0\\\"}\"}",
+ "tool_version": "2.2.1+galaxy2",
+ "type": "tool",
+ "uuid": "fa31d028-44bc-4ebf-9123-1fdb5bddb477",
+ "workflow_outputs": []
+ },
+ "22": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/ggplot2_point/ggplot2_point/2.2.1+galaxy2",
+ "errors": null,
+ "id": 22,
+ "input_connections": {
+ "input1": {
+ "id": 13,
+ "output_name": "outfile"
+ }
+ },
+ "inputs": [],
+ "label": null,
+ "name": "Scatterplot with ggplot2",
+ "outputs": [
+ {
+ "name": "output1",
+ "type": "png"
+ }
+ ],
+ "position": {
+ "left": 860,
+ "top": 320
+ },
+ "post_job_actions": {},
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/ggplot2_point/ggplot2_point/2.2.1+galaxy2",
+ "tool_shed_repository": {
+ "changeset_revision": "87908c76ca8d",
+ "name": "ggplot2_point",
+ "owner": "iuc",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"adv\": \"{\\\"axis_text_customization\\\": {\\\"__current_case__\\\": 0, \\\"axis_customization\\\": \\\"default\\\"}, \\\"axis_title_customization\\\": {\\\"__current_case__\\\": 0, \\\"axis_customization\\\": \\\"default\\\"}, \\\"factor\\\": {\\\"__current_case__\\\": 1, \\\"colororder\\\": \\\"1\\\", \\\"colors\\\": \\\"Set2\\\", \\\"factorcol\\\": \\\"6\\\", \\\"factoring\\\": \\\"Single\\\"}, \\\"gridlinecust\\\": \\\"default\\\", \\\"legend\\\": \\\"yes\\\", \\\"plot_title_customization\\\": {\\\"__current_case__\\\": 0, \\\"axis_customization\\\": \\\"default\\\"}, \\\"points\\\": {\\\"__current_case__\\\": 1, \\\"alpha\\\": \\\"1.0\\\", \\\"pointcolor\\\": \\\"black\\\", \\\"pointoptions\\\": \\\"defined\\\", \\\"size\\\": \\\"2.0\\\"}, \\\"scaling\\\": {\\\"__current_case__\\\": 0, \\\"plot_scaling\\\": \\\"Automatic\\\"}, \\\"theme\\\": \\\"bw\\\", \\\"transform\\\": \\\"none\\\", \\\"type\\\": \\\"points\\\"}\", \"xlab\": \"\\\"Sepal length\\\"\", \"input1\": \"{\\\"__class__\\\": \\\"ConnectedValue\\\"}\", \"title\": \"\\\"K-means Clustering in Iris data\\\"\", \"__page__\": null, \"__rerun_remap_job_id__\": null, \"xplot\": \"\\\"1\\\"\", \"yplot\": \"\\\"2\\\"\", \"ylab\": \"\\\"Sepal width\\\"\", \"chromInfo\": \"\\\"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\\\"\", \"out\": \"{\\\"additional_output_format\\\": \\\"none\\\", \\\"dpi_output_dim\\\": \\\"300.0\\\", \\\"height_output_dim\\\": \\\"7.0\\\", \\\"unit_output_dim\\\": \\\"in\\\", \\\"width_output_dim\\\": \\\"7.0\\\"}\"}",
+ "tool_version": "2.2.1+galaxy2",
+ "type": "tool",
+ "uuid": "497d818f-d5a8-43fd-b8ee-163497561918",
+ "workflow_outputs": []
+ },
+ "23": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/ggplot2_point/ggplot2_point/2.2.1+galaxy2",
+ "errors": null,
+ "id": 23,
+ "input_connections": {
+ "input1": {
+ "id": 14,
+ "output_name": "outfile"
+ }
+ },
+ "inputs": [],
+ "label": null,
+ "name": "Scatterplot with ggplot2",
+ "outputs": [
+ {
+ "name": "output1",
+ "type": "png"
+ }
+ ],
+ "position": {
+ "left": 860,
+ "top": 440
+ },
+ "post_job_actions": {},
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/ggplot2_point/ggplot2_point/2.2.1+galaxy2",
+ "tool_shed_repository": {
+ "changeset_revision": "87908c76ca8d",
+ "name": "ggplot2_point",
+ "owner": "iuc",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"adv\": \"{\\\"axis_text_customization\\\": {\\\"__current_case__\\\": 0, \\\"axis_customization\\\": \\\"default\\\"}, \\\"axis_title_customization\\\": {\\\"__current_case__\\\": 0, \\\"axis_customization\\\": \\\"default\\\"}, \\\"factor\\\": {\\\"__current_case__\\\": 1, \\\"colororder\\\": \\\"1\\\", \\\"colors\\\": \\\"Set2\\\", \\\"factorcol\\\": \\\"6\\\", \\\"factoring\\\": \\\"Single\\\"}, \\\"gridlinecust\\\": \\\"default\\\", \\\"legend\\\": \\\"yes\\\", \\\"plot_title_customization\\\": {\\\"__current_case__\\\": 0, \\\"axis_customization\\\": \\\"default\\\"}, \\\"points\\\": {\\\"__current_case__\\\": 1, \\\"alpha\\\": \\\"1.0\\\", \\\"pointcolor\\\": \\\"black\\\", \\\"pointoptions\\\": \\\"defined\\\", \\\"size\\\": \\\"2.0\\\"}, \\\"scaling\\\": {\\\"__current_case__\\\": 0, \\\"plot_scaling\\\": \\\"Automatic\\\"}, \\\"theme\\\": \\\"bw\\\", \\\"transform\\\": \\\"none\\\", \\\"type\\\": \\\"points\\\"}\", \"xlab\": \"\\\"Sepal length\\\"\", \"input1\": \"{\\\"__class__\\\": \\\"ConnectedValue\\\"}\", \"title\": \"\\\"DBSCAN Clustering in Iris data\\\"\", \"__page__\": null, \"__rerun_remap_job_id__\": null, \"xplot\": \"\\\"1\\\"\", \"yplot\": \"\\\"2\\\"\", \"ylab\": \"\\\"Sepal width\\\"\", \"chromInfo\": \"\\\"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\\\"\", \"out\": \"{\\\"additional_output_format\\\": \\\"none\\\", \\\"dpi_output_dim\\\": \\\"300.0\\\", \\\"height_output_dim\\\": \\\"7.0\\\", \\\"unit_output_dim\\\": \\\"in\\\", \\\"width_output_dim\\\": \\\"7.0\\\"}\"}",
+ "tool_version": "2.2.1+galaxy2",
+ "type": "tool",
+ "uuid": "27049b9b-3f88-4bc0-95e1-42e6866bd593",
+ "workflow_outputs": []
+ },
+ "3": {
+ "annotation": "",
+ "content_id": "csv_to_tabular",
+ "errors": null,
+ "id": 3,
+ "input_connections": {
+ "csv": {
+ "id": 0,
+ "output_name": "output"
+ }
+ },
+ "inputs": [],
+ "label": null,
+ "name": "Convert CSV to tabular",
+ "outputs": [
+ {
+ "name": "tabular",
+ "type": "tabular"
+ }
+ ],
+ "position": {
+ "left": 420,
+ "top": 200
+ },
+ "post_job_actions": {},
+ "tool_id": "csv_to_tabular",
+ "tool_state": "{\"__page__\": null, \"__rerun_remap_job_id__\": null, \"csv\": \"{\\\"__class__\\\": \\\"ConnectedValue\\\"}\", \"chromInfo\": \"\\\"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\\\"\"}",
+ "tool_version": "1.0.0",
+ "type": "tool",
+ "uuid": "9ade0904-a651-424a-b058-2bebf4cf7a6d",
+ "workflow_outputs": []
+ },
+ "4": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/ggplot2_point/ggplot2_point/2.2.1+galaxy2",
+ "errors": null,
+ "id": 4,
+ "input_connections": {
+ "input1": {
+ "id": 1,
+ "output_name": "output"
+ }
+ },
+ "inputs": [],
+ "label": null,
+ "name": "Scatterplot with ggplot2",
+ "outputs": [
+ {
+ "name": "output1",
+ "type": "png"
+ }
+ ],
+ "position": {
+ "left": 420,
+ "top": 320
+ },
+ "post_job_actions": {},
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/ggplot2_point/ggplot2_point/2.2.1+galaxy2",
+ "tool_shed_repository": {
+ "changeset_revision": "87908c76ca8d",
+ "name": "ggplot2_point",
+ "owner": "iuc",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"adv\": \"{\\\"axis_text_customization\\\": {\\\"__current_case__\\\": 0, \\\"axis_customization\\\": \\\"default\\\"}, \\\"axis_title_customization\\\": {\\\"__current_case__\\\": 0, \\\"axis_customization\\\": \\\"default\\\"}, \\\"factor\\\": {\\\"__current_case__\\\": 0, \\\"factoring\\\": \\\"Default\\\"}, \\\"gridlinecust\\\": \\\"default\\\", \\\"legend\\\": \\\"yes\\\", \\\"plot_title_customization\\\": {\\\"__current_case__\\\": 0, \\\"axis_customization\\\": \\\"default\\\"}, \\\"points\\\": {\\\"__current_case__\\\": 0, \\\"pointoptions\\\": \\\"default\\\"}, \\\"scaling\\\": {\\\"__current_case__\\\": 0, \\\"plot_scaling\\\": \\\"Automatic\\\"}, \\\"theme\\\": \\\"bw\\\", \\\"transform\\\": \\\"none\\\", \\\"type\\\": \\\"points\\\"}\", \"xlab\": \"\\\"X\\\"\", \"input1\": \"{\\\"__class__\\\": \\\"ConnectedValue\\\"}\", \"title\": \"\\\"Scatter Plot\\\"\", \"__page__\": null, \"__rerun_remap_job_id__\": null, \"xplot\": \"\\\"1\\\"\", \"yplot\": \"\\\"2\\\"\", \"ylab\": \"\\\"Y\\\"\", \"chromInfo\": \"\\\"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\\\"\", \"out\": \"{\\\"additional_output_format\\\": \\\"none\\\", \\\"dpi_output_dim\\\": \\\"300.0\\\", \\\"height_output_dim\\\": \\\"7.0\\\", \\\"unit_output_dim\\\": \\\"in\\\", \\\"width_output_dim\\\": \\\"7.0\\\"}\"}",
+ "tool_version": "2.2.1+galaxy2",
+ "type": "tool",
+ "uuid": "1e1131ee-189a-4913-9844-cecf1e5b8cae",
+ "workflow_outputs": []
+ },
+ "5": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/bgruening/sklearn_numeric_clustering/sklearn_numeric_clustering/1.0.8.1",
+ "errors": null,
+ "id": 5,
+ "input_connections": {
+ "input_types|infile": {
+ "id": 1,
+ "output_name": "output"
+ }
+ },
+ "inputs": [],
+ "label": null,
+ "name": "Numeric Clustering",
+ "outputs": [
+ {
+ "name": "outfile",
+ "type": "tabular"
+ }
+ ],
+ "position": {
+ "left": 420,
+ "top": 560
+ },
+ "post_job_actions": {},
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/bgruening/sklearn_numeric_clustering/sklearn_numeric_clustering/1.0.8.1",
+ "tool_shed_repository": {
+ "changeset_revision": "1dd433d2c92c",
+ "name": "sklearn_numeric_clustering",
+ "owner": "bgruening",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"input_types\": \"{\\\"__current_case__\\\": 1, \\\"algorithm_options\\\": {\\\"__current_case__\\\": 7, \\\"options\\\": {\\\"affinity\\\": \\\"euclidean\\\", \\\"linkage\\\": \\\"ward\\\", \\\"n_clusters\\\": \\\"2\\\"}, \\\"selected_algorithm\\\": \\\"AgglomerativeClustering\\\"}, \\\"column_selector_options\\\": {\\\"__current_case__\\\": 4, \\\"selected_column_selector_option\\\": \\\"all_columns\\\"}, \\\"header\\\": \\\"true\\\", \\\"infile\\\": {\\\"__class__\\\": \\\"ConnectedValue\\\"}, \\\"selected_input_type\\\": \\\"tabular\\\"}\", \"__rerun_remap_job_id__\": null, \"chromInfo\": \"\\\"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\\\"\", \"__page__\": null}",
+ "tool_version": "1.0.8.1",
+ "type": "tool",
+ "uuid": "24e6e79d-e4e5-4ba7-8e9b-cb7177b096ba",
+ "workflow_outputs": []
+ },
+ "6": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/bgruening/sklearn_numeric_clustering/sklearn_numeric_clustering/1.0.8.1",
+ "errors": null,
+ "id": 6,
+ "input_connections": {
+ "input_types|infile": {
+ "id": 1,
+ "output_name": "output"
+ }
+ },
+ "inputs": [],
+ "label": null,
+ "name": "Numeric Clustering",
+ "outputs": [
+ {
+ "name": "outfile",
+ "type": "tabular"
+ }
+ ],
+ "position": {
+ "left": 420,
+ "top": 800
+ },
+ "post_job_actions": {},
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/bgruening/sklearn_numeric_clustering/sklearn_numeric_clustering/1.0.8.1",
+ "tool_shed_repository": {
+ "changeset_revision": "1dd433d2c92c",
+ "name": "sklearn_numeric_clustering",
+ "owner": "bgruening",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"input_types\": \"{\\\"__current_case__\\\": 1, \\\"algorithm_options\\\": {\\\"__current_case__\\\": 0, \\\"options\\\": {\\\"algorithm\\\": \\\"auto\\\", \\\"copy_x\\\": \\\"true\\\", \\\"init\\\": \\\"k-means++\\\", \\\"max_iter\\\": \\\"300\\\", \\\"n_clusters\\\": \\\"2\\\", \\\"n_init\\\": \\\"10\\\", \\\"random_state\\\": \\\"\\\", \\\"tol\\\": \\\"0.0001\\\"}, \\\"selected_algorithm\\\": \\\"KMeans\\\"}, \\\"column_selector_options\\\": {\\\"__current_case__\\\": 4, \\\"selected_column_selector_option\\\": \\\"all_columns\\\"}, \\\"header\\\": \\\"true\\\", \\\"infile\\\": {\\\"__class__\\\": \\\"ConnectedValue\\\"}, \\\"selected_input_type\\\": \\\"tabular\\\"}\", \"__rerun_remap_job_id__\": null, \"chromInfo\": \"\\\"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\\\"\", \"__page__\": null}",
+ "tool_version": "1.0.8.1",
+ "type": "tool",
+ "uuid": "c186cbdf-7e09-4e0e-9df1-25c84117af04",
+ "workflow_outputs": []
+ },
+ "7": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/bgruening/sklearn_numeric_clustering/sklearn_numeric_clustering/1.0.8.1",
+ "errors": null,
+ "id": 7,
+ "input_connections": {
+ "input_types|infile": {
+ "id": 1,
+ "output_name": "output"
+ }
+ },
+ "inputs": [],
+ "label": null,
+ "name": "Numeric Clustering",
+ "outputs": [
+ {
+ "name": "outfile",
+ "type": "tabular"
+ }
+ ],
+ "position": {
+ "left": 420,
+ "top": 920
+ },
+ "post_job_actions": {},
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/bgruening/sklearn_numeric_clustering/sklearn_numeric_clustering/1.0.8.1",
+ "tool_shed_repository": {
+ "changeset_revision": "1dd433d2c92c",
+ "name": "sklearn_numeric_clustering",
+ "owner": "bgruening",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"input_types\": \"{\\\"__current_case__\\\": 1, \\\"algorithm_options\\\": {\\\"__current_case__\\\": 1, \\\"options\\\": {\\\"algorithm\\\": \\\"auto\\\", \\\"eps\\\": \\\"0.2\\\", \\\"leaf_size\\\": \\\"30\\\", \\\"metric\\\": \\\"euclidean\\\", \\\"min_samples\\\": \\\"5\\\"}, \\\"selected_algorithm\\\": \\\"DBSCAN\\\"}, \\\"column_selector_options\\\": {\\\"__current_case__\\\": 4, \\\"selected_column_selector_option\\\": \\\"all_columns\\\"}, \\\"header\\\": \\\"true\\\", \\\"infile\\\": {\\\"__class__\\\": \\\"ConnectedValue\\\"}, \\\"selected_input_type\\\": \\\"tabular\\\"}\", \"__rerun_remap_job_id__\": null, \"chromInfo\": \"\\\"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\\\"\", \"__page__\": null}",
+ "tool_version": "1.0.8.1",
+ "type": "tool",
+ "uuid": "4623fe42-e4ac-4af0-88fc-e811e3fc1799",
+ "workflow_outputs": []
+ },
+ "8": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/ggplot2_point/ggplot2_point/2.2.1+galaxy2",
+ "errors": null,
+ "id": 8,
+ "input_connections": {
+ "input1": {
+ "id": 2,
+ "output_name": "output"
+ }
+ },
+ "inputs": [],
+ "label": null,
+ "name": "Scatterplot with ggplot2",
+ "outputs": [
+ {
+ "name": "output1",
+ "type": "png"
+ }
+ ],
+ "position": {
+ "left": 420,
+ "top": 440
+ },
+ "post_job_actions": {},
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/ggplot2_point/ggplot2_point/2.2.1+galaxy2",
+ "tool_shed_repository": {
+ "changeset_revision": "87908c76ca8d",
+ "name": "ggplot2_point",
+ "owner": "iuc",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"adv\": \"{\\\"axis_text_customization\\\": {\\\"__current_case__\\\": 0, \\\"axis_customization\\\": \\\"default\\\"}, \\\"axis_title_customization\\\": {\\\"__current_case__\\\": 0, \\\"axis_customization\\\": \\\"default\\\"}, \\\"factor\\\": {\\\"__current_case__\\\": 0, \\\"factoring\\\": \\\"Default\\\"}, \\\"gridlinecust\\\": \\\"default\\\", \\\"legend\\\": \\\"yes\\\", \\\"plot_title_customization\\\": {\\\"__current_case__\\\": 0, \\\"axis_customization\\\": \\\"default\\\"}, \\\"points\\\": {\\\"__current_case__\\\": 0, \\\"pointoptions\\\": \\\"default\\\"}, \\\"scaling\\\": {\\\"__current_case__\\\": 0, \\\"plot_scaling\\\": \\\"Automatic\\\"}, \\\"theme\\\": \\\"bw\\\", \\\"transform\\\": \\\"none\\\", \\\"type\\\": \\\"points\\\"}\", \"xlab\": \"\\\"X\\\"\", \"input1\": \"{\\\"__class__\\\": \\\"ConnectedValue\\\"}\", \"title\": \"\\\"Scatter Plot\\\"\", \"__page__\": null, \"__rerun_remap_job_id__\": null, \"xplot\": \"\\\"1\\\"\", \"yplot\": \"\\\"2\\\"\", \"ylab\": \"\\\"Y\\\"\", \"chromInfo\": \"\\\"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\\\"\", \"out\": \"{\\\"additional_output_format\\\": \\\"none\\\", \\\"dpi_output_dim\\\": \\\"300.0\\\", \\\"height_output_dim\\\": \\\"7.0\\\", \\\"unit_output_dim\\\": \\\"in\\\", \\\"width_output_dim\\\": \\\"7.0\\\"}\"}",
+ "tool_version": "2.2.1+galaxy2",
+ "type": "tool",
+ "uuid": "caba26fb-d235-4ad3-83a5-2a02a6821f57",
+ "workflow_outputs": []
+ },
+ "9": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/bgruening/sklearn_numeric_clustering/sklearn_numeric_clustering/1.0.8.1",
+ "errors": null,
+ "id": 9,
+ "input_connections": {
+ "input_types|infile": {
+ "id": 2,
+ "output_name": "output"
+ }
+ },
+ "inputs": [],
+ "label": null,
+ "name": "Numeric Clustering",
+ "outputs": [
+ {
+ "name": "outfile",
+ "type": "tabular"
+ }
+ ],
+ "position": {
+ "left": 420,
+ "top": 680
+ },
+ "post_job_actions": {},
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/bgruening/sklearn_numeric_clustering/sklearn_numeric_clustering/1.0.8.1",
+ "tool_shed_repository": {
+ "changeset_revision": "1dd433d2c92c",
+ "name": "sklearn_numeric_clustering",
+ "owner": "bgruening",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"input_types\": \"{\\\"__current_case__\\\": 1, \\\"algorithm_options\\\": {\\\"__current_case__\\\": 7, \\\"options\\\": {\\\"affinity\\\": \\\"euclidean\\\", \\\"linkage\\\": \\\"ward\\\", \\\"n_clusters\\\": \\\"2\\\"}, \\\"selected_algorithm\\\": \\\"AgglomerativeClustering\\\"}, \\\"column_selector_options\\\": {\\\"__current_case__\\\": 4, \\\"selected_column_selector_option\\\": \\\"all_columns\\\"}, \\\"header\\\": \\\"true\\\", \\\"infile\\\": {\\\"__class__\\\": \\\"ConnectedValue\\\"}, \\\"selected_input_type\\\": \\\"tabular\\\"}\", \"__rerun_remap_job_id__\": null, \"chromInfo\": \"\\\"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\\\"\", \"__page__\": null}",
+ "tool_version": "1.0.8.1",
+ "type": "tool",
+ "uuid": "1abf8463-f8f6-4313-894b-d371baeeacbf",
+ "workflow_outputs": []
+ }
+ },
+ "uuid": "14f73bbe-9ae6-4c09-a8a4-4b15391044bb",
+ "version": 1
+}
diff --git a/topics/statistics/tutorials/clustering_machinelearning/workflows/index.md b/topics/statistics/tutorials/clustering_machinelearning/workflows/index.md
new file mode 100644
index 00000000000000..e092e0ae66ddd4
--- /dev/null
+++ b/topics/statistics/tutorials/clustering_machinelearning/workflows/index.md
@@ -0,0 +1,3 @@
+---
+layout: workflow-list
+---
diff --git a/topics/statistics/tutorials/intro_deep_learning/data-library.yaml b/topics/statistics/tutorials/intro_deep_learning/data-library.yaml
new file mode 100644
index 00000000000000..064a820fab8ba0
--- /dev/null
+++ b/topics/statistics/tutorials/intro_deep_learning/data-library.yaml
@@ -0,0 +1,32 @@
+---
+destination:
+ type: library
+ name: GTN - Material
+ description: Galaxy Training Network Material
+ synopsis: Galaxy Training Network Material. See https://training.galaxyproject.org
+items:
+- name: Statistics and machine learning
+ description: Statistical Analyses for omics data and machine learning using Galaxy
+ tools
+ items:
+ - name: Introduction to deep learning
+ items:
+ - name: 'DOI: 10.5281/zenodo.3706539'
+ description: latest
+ items:
+ - url: https://zenodo.org/record/3706539/files/X_test.tsv
+ src: url
+ ext: tsv
+ info: https://zenodo.org/record/3706539#.XmjDYHVKg5k
+ - url: https://zenodo.org/record/3706539/files/X_train.tsv
+ src: url
+ ext: tsv
+ info: https://zenodo.org/record/3706539#.XmjDYHVKg5k
+ - url: https://zenodo.org/record/3706539/files/y_test.tsv
+ src: url
+ ext: tsv
+ info: https://zenodo.org/record/3706539#.XmjDYHVKg5k
+ - url: https://zenodo.org/record/3706539/files/y_train.tsv
+ src: url
+ ext: tsv
+ info: https://zenodo.org/record/3706539#.XmjDYHVKg5k
diff --git a/topics/statistics/tutorials/intro_deep_learning/tutorial.md b/topics/statistics/tutorials/intro_deep_learning/tutorial.md
new file mode 100755
index 00000000000000..fcbaa4be25e6f5
--- /dev/null
+++ b/topics/statistics/tutorials/intro_deep_learning/tutorial.md
@@ -0,0 +1,254 @@
+---
+layout: tutorial_hands_on
+
+title: Introduction to deep learning
+zenodo_link: https://zenodo.org/record/3706539#.XmjDYHVKg5k
+questions:
+- What are deep learning and neural networks?
+- Why is it useful?
+- How to create a neural network architecture for classification?
+objectives:
+- Learn basic principles of deep learning
+- Learn about how to create an end-to-end neural network architecture
+- Learn about Galaxy deep learning tools
+- Learn how to interpret predictions
+key_points:
+- Multiple tools to constitute a neural network architecture
+- Interpretation of predictions using visualisation tools
+time_estimation: 1H
+contributors:
+- anuprulez
+- khanteymoori
+---
+
+## Introduction
+
+### Deep learning and neural networks
+[Deep learning](https://en.wikipedia.org/wiki/Deep_learning), a branch of artificial intelligence, provides a collection of learning methods to model data with complex architectures to perform different non-linear transformations of data. Using these transformations, patterns are recognised in large volumes of data and new data can be categorised using these patterns extracted on existing data. These patterns are learned by computational models devised using different architectures of neural networks. In the recent years, the neural network architectures such as convolutional, long short-term memory networks, deep belief networks have become increasingly popular as machine learning tools in the fields of computer vision, image analysis, bioinformatics, speech recognition, natural language processing and so on achieving state-of-the-art performance, sometimes exceeding human performance. The availability of greater computational resources, more data, new algorithms for training deep models and easy to use libraries for implementation and training of neural networks are the drivers of this development. Deep learning works by approximating the mathematical function which maps data to its output and it has been shown that it can [approximate](https://arxiv.org/pdf/1910.03344.pdf) any function making it widely popular across multiple fields to analyse data. A neural network is a web of artificial neurons which are also called processing units. The idea of a neural network is inspired by [biological neural networks](https://en.wikipedia.org/wiki/Neural_circuit) where neuronal circuits are used to process information and learn. An artificial neural network is structured into multiple layers where each layer contains several neurons. The neurons from adjacent layers are interconnected ([feed-forward neural network](https://en.wikipedia.org/wiki/Feedforward_neural_network)) allowing the exchange of information between layers of neurons.
+
+![data](../../images/neuron.svg "Structure of an artificial neuron. The input x (x1, x2, ..., xn) and its corresponding weight w (w1, w2, ..., wn) are vectors. The input and its weight are transformed to produce an output y")
+
+An artificial neuron is shown in Figure 1. The neuron, shown in orange, takes input `x` (only `x1` and `x2` are shown for simplicity) and computes output `y`. The entities `w1`, `w2` are the weights of the connections (between inputs and neuron). The weights and inputs are combined following the basic principles of mathematics to produce output `y` (shown in Figures 2, 3 and 4).
+
+![data](../../images/eq1.png "Transformation of a component (x1) of the input vector (x).")
+
+![data](../../images/eq2.png "Transformation of a component (x2) of the input vector (x).")
+
+Weights denote the significance of a particular input to produce the observed output. When it is large, the input is significant and when small, the input is less significant to produce the output. These weights can be initialised randomly and they are modified throughout the learning by a neural network. Using the updated inputs (as shown in the above equations), the output is computed:
+
+![data](../../images/eq3.png "Computation of output y using input x, weight w and activation function f.")
+
+where *f* is an activation function. An [activation function](https://keras.io/activations/) is a mathematical function which translates the combination of inputs to an output. The choices of these functions are many - sigmoid, linear, tanh, ReLU and so on. For example, sigmoid is:
+
+![data](../../images/eq4.png "Sigmoid activation function.")
+
+The above equation will return a real number between 0 and 1.
+
+Rectified exponential linear unit (ReLU) is given by:
+
+![data](../../images/eq5.png "Rectified exponential linear unit (ReLU) activation function.")
+
+As discussed earlier, neurons make the building blocks of a neural network and are arranged in several layers and a usual neural network looks like as shown in Figure 7.
+
+#### Input layer
+In the neural network (Figure 7), the input layer is shown in green. This layer receives input data and passes it on to the next layer. The number of neurons in this layer depends on the number of dimensions of input data. For example, if input data (matrix) is of size (500, 10), 500 rows (samples) and 10 columns (features), then the number of neurons in the input layer should be 10. Each neuron in input layer is connected to all neurons in the next layer. All these connections have a separate weight (denoted by `w`).
+
+#### Hidden layer
+The next two layers after the input layer are called hidden layers. In the first hidden layer too, all the neurons are connected to all other neurons in the adjacent (hidden) layer. The number of hidden layers determines if the resulting neural network is deep (2 or more hidden layers) or shallow. When the number of hidden layers is 2 or more, the structure or architecture of the neural network is deep and overall learning is called deep learning. More the number of hidden layers, the more complex the architecture is. A complex architecture is beneficial for learning unique patterns from big data. But, complex architecture is prone to [overfitting](https://en.wikipedia.org/wiki/Overfitting) when a neural network starts memorising data without learning unique and general patterns.
+
+![data](../../images/neural_network.svg "A neural network consisting of 4 layers - 1 input, 2 hidden and 1 output. The neurons in each layer are connected to all neurons in the adjacent layer. Each connection between a pair of neurons contains a weight.")
+
+The number of hidden layers and the size of each hidden layer is not fixed as it completely depends on the data. If the dataset is small (say only 1,000 samples), then it is sufficient to choose a less complex architecture (fewer hidden layers) to avoid the danger of overfitting. However, if the dataset is large (say > 100,000 samples), more complex architecture can be chosen. In short, the architecture of the hidden layer is completely dependent on the nature and size of data.
+
+#### Output layer
+This layer collects output computed using input data and weights which are optimised during learning. An activation function is chosen to transform the combination of input and weight to an output. Some examples of activation functions have been discussed above.
+
+#### Optimisation
+Compted or predicted output, collected at the output layer, and the actual output are compared to find error (or loss). Neural network learning aims is to minimise this error so that the predicted output gets as close to the actual output as possible. This process of minimising error between predicted and actual output is called optimisation. There are several optimisers such as [gradient descent](https://en.wikipedia.org/wiki/Gradient_descent), root mean square propagation ([RMSProp](https://en.wikipedia.org/wiki/Stochastic_gradient_descent#RMSProp)), [adadelta](https://arxiv.org/pdf/1212.5701.pdf) and so on are available. These optimisers work by primarily adjusting the weights of connections so that the error is minimised. Once, a set of weights are achieved which provides the best accuracy or minimum error, the learning is terminated because the weights cannot be updated anymore which can further minimise the error.
+
+#### Neural network training
+Training is a process where input data is passed to a neural network at input layer and when finished, a trained model is created containing all the learned parameters such as weights of all connections in the neural network. Usually, a portion of data is extracted and saved as test data which is not used for training. It is used only for evaluating the trained model to get an unbiased estimate of learning and prediction strength. The partitioning of data into training and test can be set by deep learning practitioners. An example of partition can be - 70% training and 30% test data.
+
+#### Batch and Epoch
+While training a neural network, input data is passed in small batches. A batch is a subset of training data. An epoch is one iteration when all the training data is used for training in multiple batches. For example, if there is training data of size (500, 10) and batch size is fixed at 50, then there would be 10 batches (50 * 10 = 500) in each epoch. Each batch will have 50 samples and they are passed to the input layer of neural network. The loss computed at the output layer is propagated back and the weights are adjusted. The newly adjusted weights are used for the second batch of samples and so on. When all batches are finished, then one epoch of learning is done. The number of epochs and the size of a batch are parameters to be set by deep learning practitioners. These parameters depend on the size of data and should be tuned according to the data for optimum results.
+
+![data](../../images/mse.png "Mean squared error loss function.")
+
+#### Loss function
+The error between the computed and actual output is calculated using a loss function which is necessary to evaluate the strength of learning. Learning is good when loss decreases with training epochs otherwise, training should be stopped and the architecture should be carefully adjusted. There are several choices of loss functions too. Functions such as root mean squared error (RMSE) and absolute error (AE) are used for regression problems while cross-entropy error functions such as binary cross-entropy and categorical cross-entropy are used in classification problems. An example of loss function is shown in Figure 8.
+
+> ### {% icon question %} Questions
+>
+> 1. What do you understand by an architecture of a neural network?
+> 2. How does a neural network learn?
+>
+> > ### {% icon solution %} Solution
+> >
+> > 1. Architecture of a neural network consists of multiple layers such as input, hidden, convolutional, output and their number of respective neurons, optimiser, loss and activation functions etc.
+> > 2. The learning happens by minimising the loss between the computed and actual output. The weights of all neuronal connections are adjusted (increased or decreased) to achieve the minimum loss. To ascertain the amount of change for weights, a technique known as backpropagation is used. Using this technique, the loss computed at the output layer is "propagated" back in the neural network (from output to input layer) and each neuronal connection is assigned a share of the total loss. In other words, how much each neuron is contributing to the total accumulated loss. For example, w1 is adjusted according to equation:
+> > ![data](../../images/partial_derivative.png "Weight w1 is updated by computing a partial derivative of loss L with respect to weight. The derivative is multiplied with learning rate n.")
+> > In the above equation, `L` is the total loss, `w1` is the weight of a connection between an input neuron and a hidden neuron. Similarly, all weights are adjusted and in the subsequent iteration, the updated weights are used to compute loss at the output layer. Parameter `n` is the learning rate which determines how small or big changes are needed in weights. It can either be a fixed quantity or a variable one. In case of a variable learning rate, it usually starts with a large number (say 1.0) and subsequently decays to a small number (say 0.001) as the training epochs proceed because initially a large learning rate helps to reach close to the minimum error quickly and then it is decayed to slow down the learning so that it stabilises at the minimum. More on backpropagation can be read [here](http://neuralnetworksanddeeplearning.com/chap2.html).
+> >
+> {: .solution}
+>
+{: .question}
+
+### Relevance of deep learning in Bioinformatics
+Deep learning is an established tool in finding patterns in big data for multiple fields of research such as computer vision, image analysis, drug response prediction, protein structure prediction and so on. Different research areas use different architectures of neural network which are suitable to their respective data. For example - in computer vision and image analysis, convolutional neural network (CNN) is popular, graph convolutional neural network is often used for drug response prediction, recurrent neural network is useful for identifying motifs in protein sequences and so on. The table below shows more examples of neural networks which are popular with different fields of bioinformatics. These use-cases of deep learning in bioinformatics prove that it is essential to explore deep learning algorithms to find patterns in big data in biology. More details can be found in [Deep learning in bioinformatics: Introduction, application, and perspective in the big data era](https://www.sciencedirect.com/science/article/pii/S1046202318303256).
+
+![data](../../images/dl_bioinformatics.png "Different architectures of neural networks for different fields of bioinformatics.")
+
+## Get training and test datasets
+The datasets used for this tutorial contain gene expression profiles of humans suffering from two types of cancer - [acute myeloid leukemia (AML)](https://en.wikipedia.org/wiki/Acute_myeloid_leukemia) and [acute lymphoblastic leukemia (ALL)](https://en.wikipedia.org/wiki/Acute_lymphoblastic_leukemia). The tutorial aims to differentiate between these two cancer types, predicting a cancer type for each patient, by learning unique patterns in gene expression profiles of patients. The data is divided into 2 parts - one for training and another for prediction. Each part contains two datasets - one has the gene expression profiles and another has labels (the types of cancer). The size of the training data (`X_train`) is (38, 7129) where 38 is the number of patients and 7129 is the number of genes. The label dataset (`y_train`) is of size (38, 1) and contains the information of the type of cancer for each patient (label encoding is 0 for ALL and 1 for AML). The test dataset (`X_test`) is of size (34, 7129) and contains the same genes for 34 different patients. The label dataset for test is `y_test` and is of size (34, 1). The neural network, which will be formulated in the remaining part of the tutorial, learns on the training data and its labels to create a trained model. The prediction ability of this model is evaluated on the test data (which is unseen during training to get an unbiased estimate of prediction ability). These datasets are uploaded to Galaxy by following the steps defined below:
+
+> ### {% icon hands_on %} Hands-on: Data upload
+>
+> 1. Create a new history for this tutorial
+>
+> {% include snippets/create_new_history.md %}
+>
+> 2. Import the files from [Zenodo](https://zenodo.org/record/3706539#.XmjDYHVKg5k)
+>
+> ```
+> https://zenodo.org/record/3706539/files/X_test.tsv
+> https://zenodo.org/record/3706539/files/X_train.tsv
+> https://zenodo.org/record/3706539/files/y_test.tsv
+> https://zenodo.org/record/3706539/files/y_train.tsv
+> ```
+>
+> 3. Rename the datasets as `X_test`, `X_train`, `y_test` and `y_train` respectively.
+>
+> {% include snippets/rename_dataset.md %}
+> {% include snippets/import_via_link.md %}
+>
+> 4. Check that the datatype is `tabular`.
+>
+> {% include snippets/change_datatype.md datatype="datatypes" %}
+>
+{: .hands_on}
+
+
+## Neural network architecture
+Defining a neural network architecture needs to ascertain the types and number of layers, the number of neurons for each layer, activation functions for all layers, type of optimiser and loss function. Choosing these parameters may require many experiments with data as there is no golden rule to choose the best combination of these parameters. The neural network used in this tutorial has an input layer, 2 hidden layers and one output layer. The input layer has a parameter `input_shape` which is set according to the number of dimensions of data. It is set to (7129,) which is the number of genes present in data. The hidden layers have 16 neurons (units) each and the output layer has only one because a scalar output is expected (0 or 1). This partial architecture (having input shape, types and size of layers, and activation functions) of the neural network is defined as follows:
+
+### Create architecture: Choose layers
+
+> ### {% icon hands_on %} Hands-on: Create a deep learning model architecture using Keras
+>
+> 1. **Create a deep learning model architecture using Keras** {% icon tool %} with the following parameters:
+> - *"Select keras model type"*: `Sequential`
+> - *"input_shape"*: `(7129, )`
+>
+> - In *"LAYER"*:
+> - {% icon param-repeat %} *"1: LAYER"*:
+> - *"Choose the type of layer"*: `Core -- Dense`
+> - *"units"*: `16`
+> - *"Activation function"*: `elu`
+> - {% icon param-repeat %} *"2: LAYER"*:
+> - *"Choose the type of layer"*: `Core -- Dense`
+> - *"units"*: `16`
+> - *"Activation function"*: `elu`
+> - {% icon param-repeat %} *"3: LAYER"*:
+> - *"Choose the type of layer"*: `Core -- Dense`
+> - *"units"*: `1`
+> - *"Activation function"*: `sigmoid`
+>
+>
+{: .hands_on}
+
+The tool returns a JSON output file containing data about the neural network layers and their attributes like their types, number of units they have and their activation functions. This file is used as an input to the next step where the architecture of the neural network is completed by adding optimiser, loss function, and training parameters such as the number of epochs and batch size. The loss function is chosen as `binary_crossentropy` as the learning task is the classification of two labels (0 and 1).
+
+### Create architecture: Add training parameters
+
+> ### {% icon hands_on %} Hands-on: Create deep learning model with an optimizer, loss function and fit parameters
+>
+> 1. **Create deep learning model with an optimizer, loss function and fit parameters** {% icon tool %} with the following parameters:
+> - *"Choose a building mode"*: `Build a training model`
+> - *"Select the dataset containing model configurations (JSON)"*: `Keras model config` (output of **Create a deep learning model architecture using Keras** {% icon tool %})
+> - *"Do classification or regression?"*: `KerasGClassifier`
+>
+> `KerasGClassifier` is chosen because the learning task is classfication i.e. assigning each patient a type of cancer.
+> - In *"Compile Parameters"*:
+> - *"Select a loss function"*: `binary_crossentropy`
+>
+> The loss function is `binary_crossentropy` because the labels are discrete and binary (0 and 1).
+> - *"Select an optimizer"*: `RMSprop - RMSProp optimizer`
+> - In *"Fit Parameters"*:
+> - *"epochs"*: `10`
+> - *"batch_size"*: `4`
+>
+> The training data is small (only 38 patients). Therefore the number of epochs and batch size are also small.
+>
+{: .hands_on}
+
+The tool returns a zipped file containing an object of the neural network architecture (define in the last two steps) which is used as a classifier to train it on data. Once the architecture is finalised, its associated object is used for training combining it with the training data as follows:
+
+### Deep learning training
+A neural network is trained on training data to learn hidden representations and mapping from features (genes) to both the types of cancer. As discussed earlier, the neural network minimises the error, which is given by the loss function, between actual and predicted labels while adjusting the weights of connections among neurons in multiple layers. Once the training is finished, the architecture and learned weights are saved. They are used to predict labels in test data. The deep learning training is set up as follows:
+
+> ### {% icon hands_on %} Hands-on: Deep learning training and evaluation conduct deep training and evaluation either implicitly or explicitly
+>
+> 1. **Deep learning training and evaluation conduct deep training and evaluation** {% icon tool %} with the following parameters:
+> - *"Select a scheme"*: `Train and validate`
+> - *"Choose the dataset containing pipeline/estimator object"*: `Keras model builder` (output of **Create deep learning model** {% icon tool %})
+> - *"Select input type"*: `tabular data`
+> - *"Training samples dataset"*: `X_train`
+> - *"Does the dataset contain header"*: `Yes`
+> - *"Choose how to select data by column"*: `All columns`
+> - *"Dataset containing class labels or target values"*: `y_train`
+> - *"Does the dataset contain header"*: `Yes`
+> - *"Choose how to select data by column"*: `All columns`
+>
+{: .hands_on}
+
+The tool gives 3 files as output - a tabular file containing output (accuracy of cross-validation) of training, a zipped file with the trained model (fitted estimator) and an H5 (HDF5) file containing the weights of neural network layers. The files containing the fitted estimator and weights are used to recreate the model and this recreated model is used to predict labels in test data.
+
+### Prediction on test data
+After training, the saved architecture (fitted estimator) and weights are used to predict labels for the test data. For each patient in the test data, a type of cancer is predicted using the trained model learned in the previous step.
+
+> ### {% icon hands_on %} Hands-on: Model Prediction predicts on new data using a preffited model
+>
+> 1. **Model Prediction predicts on new data using a preffited model** {% icon tool %} with the following parameters:
+> - *"Choose the dataset containing pipeline/estimator object"*: `Fitted estimator or estimator skeleton` (output of **Deep learning training and evaluation** {% icon tool %})
+> - *"Choose the dataset containing weights for the estimator above"*: `Weights trained` (output of **Create deep learning model** {% icon tool %})
+> - *"Select invocation method"*: `predict`
+> - *"Select input data type for prediction"*: `tabular data`
+> - *"Training samples dataset"*: `X_test`
+> - *"Does the dataset contain header"*: `Yes`
+> - *"Choose how to select data by column"*: `All columns`
+>
+>
+{: .hands_on}
+
+The tool returns the predicted labels (0 for ALL and 1 AML) for test data in a tabular format. The size of this data is (34,1) where 34 is the number of cancer patients in test data.
+
+## Visualisation
+Visualising the results is important to ascertain the generalisation ability of the trained model on an unseen dataset. Using a dataset with the actual labels for the test data, the performance of the trained model is estimated by comparing the actual labels against the predicted labels using a confusion matrix plot.
+
+> ### {% icon hands_on %} Hands-on: Machine Learning Visualization Extension includes several types of plotting for machine learning
+>
+> 1. **Machine Learning Visualization Extension includes several types of plotting for machine learning** {% icon tool %} with the following parameters:
+> - *"Select a plotting type"*: `Confusion matrix for classes`
+> - *"Select dataset containing true labels"*: `y_test`
+> - *"Does the dataset contain header"*: `Yes`
+> - *"Choose how to select data by column"*: `All columns`
+> - *"Select dataset containing predicted labels"*: `Model prediction` (output of **Model Prediction predicts on new data using a preffited model** {% icon tool %})
+> - *"Does the dataset contain header"*: `Yes`
+>
+>
+{: .hands_on}
+
+> ### {% icon comment %} Comment
+> Please note that your predictions could be different from the plot shown in Figure 11 because the training data is small and the predictions may vary. Stability in predictions can be achieved if the deep learning model is trained on large data. But, for this tutorial, it is kept small to reduce the training time as the aim is to showcase how to create a pipeline for deep learning training. Generally, deep learning models are trained on large data and may keep running for a few hours to a few days.
+{: .comment}
+
+The image below shows [confusion matrix](https://en.wikipedia.org/wiki/Confusion_matrix) which is a square matrix. It contains actual labels on the y-axis and predicted labels on the x-axis. Each cell in the matrix plot gives the number of cancer patients who got predicted correctly or incorrectly. For example, the number in the top-left cell (0, 0) denotes how many of these patients are predicted correctly for ALL (17/20). The higher the number in this cell, the better is the model for this cell. In the top-right cell, 3 patients who have ALL but they are predicted having AML. Similarly, the bottom-right cell denotes how many patients are predicted correctly for AML (10/14). In the bottom-left cell, 4 patients have AML but are predicted as ALL.
+
+![data](../../images/confusion_matrix_dl.png "Confusion matrix for true and predicted classes")
+
+
+## Conclusion
+The tutorial presents a case-study to predict labels (ALL and AML) of 34 new cancer patients after learning gene expression profiles of 38 cancer patients through multiple steps of a deep learning pipeline. All these steps show how to create a neural network architecture using Galaxy's deep learning tools and analyse results using a confusion matrix visualisation. Similarly, multiple different architectures of neural networks can be created well-suited to datasets and aim of particular experiments. Moreover, it should be noted that one architecture of neural network giving promising results on a dataset may not work at all with another dataset. It is essential to perform multiple experiments with a dataset to formulate an optimal neural network architecture.
+
+{:.no_toc}
+
diff --git a/topics/statistics/tutorials/machinelearning/workflows/machine_learning-test.yml b/topics/statistics/tutorials/machinelearning/workflows/machine_learning-test.yml
new file mode 100644
index 00000000000000..daedd370a3a379
--- /dev/null
+++ b/topics/statistics/tutorials/machinelearning/workflows/machine_learning-test.yml
@@ -0,0 +1,16 @@
+---
+- doc: Test Basics of Machine Learning workflow
+ job:
+ breast-w_train:
+ class: File
+ location: https://zenodo.org/api/files/0d468136-5025-4c0f-bf8b-a8277a513a93/breast-w_train.tsv
+ filetype: tabular
+ breast-w_test:
+ class: File
+ location: https://zenodo.org/api/files/0d468136-5025-4c0f-bf8b-a8277a513a93/breast-w_test.tsv
+ filetype: tabular
+ outputs:
+ outfile_predict:
+ asserts:
+ has_text:
+ text: 'Mitoses'
diff --git a/topics/statistics/tutorials/machinelearning/workflows/machine_learning.ga b/topics/statistics/tutorials/machinelearning/workflows/machine_learning.ga
index 2670588b4c2e54..bebcdb65996739 100644
--- a/topics/statistics/tutorials/machinelearning/workflows/machine_learning.ga
+++ b/topics/statistics/tutorials/machinelearning/workflows/machine_learning.ga
@@ -111,7 +111,13 @@
"name": "outfile_predict"
}
],
- "workflow_outputs": [],
+ "workflow_outputs": [
+ {
+ "label": "outfile_predict",
+ "output_name": "outfile_predict",
+ "uuid": "03e30beb-fc01-4184-8649-057c13bb626a"
+ }
+ ],
"input_connections": {
"selected_tasks|infile_data": {
"output_name": "output",
diff --git a/topics/transcriptomics/images/ref-based/rna-seq-reads.png b/topics/transcriptomics/images/ref-based/rna-seq-reads.png
new file mode 100644
index 00000000000000..7eaed3e444b38a
Binary files /dev/null and b/topics/transcriptomics/images/ref-based/rna-seq-reads.png differ
diff --git a/topics/transcriptomics/images/ref-based/rna-seq-reads.svg b/topics/transcriptomics/images/ref-based/rna-seq-reads.svg
new file mode 100644
index 00000000000000..39c38c1a2649d9
--- /dev/null
+++ b/topics/transcriptomics/images/ref-based/rna-seq-reads.svg
@@ -0,0 +1,273 @@
+
+
+
+
diff --git a/topics/transcriptomics/images/rna-interactome/chira.png b/topics/transcriptomics/images/rna-interactome/chira.png
new file mode 100644
index 00000000000000..860e13308562f0
Binary files /dev/null and b/topics/transcriptomics/images/rna-interactome/chira.png differ
diff --git a/topics/transcriptomics/images/rna-interactome/chira_fastqc_seq_content.png b/topics/transcriptomics/images/rna-interactome/chira_fastqc_seq_content.png
new file mode 100644
index 00000000000000..0dbe3c24b5aa44
Binary files /dev/null and b/topics/transcriptomics/images/rna-interactome/chira_fastqc_seq_content.png differ
diff --git a/topics/transcriptomics/images/rna-interactome/chiraviz_choose.png b/topics/transcriptomics/images/rna-interactome/chiraviz_choose.png
new file mode 100644
index 00000000000000..815e2e8645205b
Binary files /dev/null and b/topics/transcriptomics/images/rna-interactome/chiraviz_choose.png differ
diff --git a/topics/transcriptomics/images/rna-interactome/chiraviz_home.png b/topics/transcriptomics/images/rna-interactome/chiraviz_home.png
new file mode 100644
index 00000000000000..1d39e944fe1950
Binary files /dev/null and b/topics/transcriptomics/images/rna-interactome/chiraviz_home.png differ
diff --git a/topics/transcriptomics/images/rna-interactome/chiraviz_single.png b/topics/transcriptomics/images/rna-interactome/chiraviz_single.png
new file mode 100644
index 00000000000000..2e1feb6a43a421
Binary files /dev/null and b/topics/transcriptomics/images/rna-interactome/chiraviz_single.png differ
diff --git a/topics/transcriptomics/images/wab96wellplate.svg b/topics/transcriptomics/images/wab96wellplate.svg
new file mode 100644
index 00000000000000..c6bcb28e06fdc6
--- /dev/null
+++ b/topics/transcriptomics/images/wab96wellplate.svg
@@ -0,0 +1,356 @@
+
+
+
diff --git a/topics/transcriptomics/images/wabbalancedbatches.png b/topics/transcriptomics/images/wabbalancedbatches.png
new file mode 100644
index 00000000000000..8480e80d28a5ba
Binary files /dev/null and b/topics/transcriptomics/images/wabbalancedbatches.png differ
diff --git a/topics/transcriptomics/images/wabbatchproblems.png b/topics/transcriptomics/images/wabbatchproblems.png
new file mode 100644
index 00000000000000..4fc585644b4c5c
Binary files /dev/null and b/topics/transcriptomics/images/wabbatchproblems.png differ
diff --git a/topics/transcriptomics/images/wabexampleplates.png b/topics/transcriptomics/images/wabexampleplates.png
new file mode 100644
index 00000000000000..62d743c9b8a30c
Binary files /dev/null and b/topics/transcriptomics/images/wabexampleplates.png differ
diff --git a/topics/transcriptomics/images/wablanesbad.png b/topics/transcriptomics/images/wablanesbad.png
new file mode 100644
index 00000000000000..ac5deba2c73f26
Binary files /dev/null and b/topics/transcriptomics/images/wablanesbad.png differ
diff --git a/topics/transcriptomics/images/wablanesgood.png b/topics/transcriptomics/images/wablanesgood.png
new file mode 100644
index 00000000000000..38285a3dc10913
Binary files /dev/null and b/topics/transcriptomics/images/wablanesgood.png differ
diff --git a/topics/transcriptomics/images/wabplate.png b/topics/transcriptomics/images/wabplate.png
new file mode 100644
index 00000000000000..25fa8944b12afd
Binary files /dev/null and b/topics/transcriptomics/images/wabplate.png differ
diff --git a/topics/transcriptomics/images/wabreplicates.png b/topics/transcriptomics/images/wabreplicates.png
new file mode 100644
index 00000000000000..5c97e7f62f37f9
Binary files /dev/null and b/topics/transcriptomics/images/wabreplicates.png differ
diff --git a/topics/transcriptomics/images/wabseqlane.png b/topics/transcriptomics/images/wabseqlane.png
new file mode 100644
index 00000000000000..083a299a03ed6b
Binary files /dev/null and b/topics/transcriptomics/images/wabseqlane.png differ
diff --git a/topics/transcriptomics/tutorials/de-novo/tutorial.md b/topics/transcriptomics/tutorials/de-novo/tutorial.md
index a414e80202acee..069739bcb305b7 100755
--- a/topics/transcriptomics/tutorials/de-novo/tutorial.md
+++ b/topics/transcriptomics/tutorials/de-novo/tutorial.md
@@ -101,8 +101,8 @@ For quality control, we use similar tools as described in [NGS-QC tutorial]({{si
> 2. **Trimmomatic** {% icon tool %}: Trim off the low quality bases from the ends of the reads to increase mapping efficiency. Run `Trimmomatic` on each pair of forward and reverse reads with the following settings:
>
> - *"Single-end or paired-end reads?"*: `Paired-end (two separate input files)`
-> - {% icon param-file %} *"Input FASTQ file (R1/first of pair)"*: `G1E forward read (R1)`
-> - {% icon param-file %} *"Input FASTQ file (R2/second of pair)"*: `G1E reverse read (R1)`
+> - {% icon param-file %} *"Input FASTQ file (R1/first of pair)"*: `G1E_rep1 forward read`
+> - {% icon param-file %} *"Input FASTQ file (R2/second of pair)"*: `G1E_rep1 reverse read`
> - *"Perform initial ILLUMINACLIP step?"*: `No`
>
> 3. **FastQC** {% icon tool %}: Re-run `FastQC` on trimmed reads and inspect the differences.
@@ -118,6 +118,9 @@ For quality control, we use similar tools as described in [NGS-QC tutorial]({{si
> > {: .solution }
> {: .question}
> ![Before and after trimming comparison](../../images/BeforeAndAfterTrimming.png)
+>
+> 4. **Trimmomatic** {% icon tool %}: Run `Trimmomatic` on the remaining forward/reverse read pairs with the same parameters.
+>
{: .hands_on}
Now that we have trimmed our reads and are fortunate that there is a reference genome assembly for mouse, we will align our trimmed reads to the genome.
@@ -162,8 +165,8 @@ Spliced mappers have been developed to efficiently map transcript-derived reads
> - *"Source for the reference genome"*: `Use a built-in genome`
> - {% icon param-file %} *"Select a reference genome"*: `Mouse (Mus Musculus): mm10`
> - *"Single-end or paired-end reads?"*: `Paired-end`
-> - {% icon param-file %} *"FASTA/Q file #1"*: trimmed G1E forward read (R1)
-> - {% icon param-file %} *"FASTA/Q file #2"*: trimmed G1E reverse read (R1)
+> - {% icon param-file %} *"FASTA/Q file #1"*: Trimmomatic on G1E_rep1 forward read (R1 paired)
+> - {% icon param-file %} *"FASTA/Q file #2"*: Trimmomatic on G1E_rep1 reverse read (R2 paired)
> - *"Specify strand information"*: `Forward(FR)`
> - *"Advanced options"*
> - *"Spliced alignment options"*
@@ -195,13 +198,14 @@ We just generated four transcriptomes with `Stringtie` representing each of the
> ### {% icon hands_on %} Hands-on: Transcriptome assembly
>
> 1. **Stringtie-merge** {% icon tool %}: Run `Stringtie-merge` on the `Stringtie` assembled transcripts along with the RefSeq annotation file we imported earlier.
-> - {% icon param-file %} *"input_gtf"*: `all four `Stringtie` assemblies`
-> - {% icon param-file %} *"guide_gff"*: `RefSeq GTF mm10`
+> - {% icon param-file %} *"Transcripts"*: `all four `Stringtie` assemblies`
+> - {% icon param-file %} *"Reference annotation to include in the merging"*: `RefSeq_reference_GTF`
>
> 2. **GFFCompare** {% icon tool %}: Run `GFFCompare` on the `Stringtie-merge` generated transcriptome along with the RefSeq annotation file.
> - {% icon param-file %} *"GTF inputs for comparison"*: `output of Stringtie-merge`
> - *"Use Reference Annotation"*: `Yes`
-> - {% icon param-file %} *"Reference Annotation"*: `RefSeq GTF mm10`
+> - *"Choose the source for the reference annotation"*: `History`
+> - {% icon param-file %} *"Reference Annotation"*: `RefSeq_reference_GTF`
> - *"Use Sequence Data"*: `Yes`
> - *"Choose the source for the reference list"*: `Locally cached`
> - *"Using reference genome"*: 'Mouse (Mus Musculus): mm10'
diff --git a/topics/transcriptomics/tutorials/de-novo/workflows/transcriptomics-denovo-workflow.ga b/topics/transcriptomics/tutorials/de-novo/workflows/transcriptomics-denovo-workflow.ga
index ff283cf28934c2..fc144f11ee0194 100644
--- a/topics/transcriptomics/tutorials/de-novo/workflows/transcriptomics-denovo-workflow.ga
+++ b/topics/transcriptomics/tutorials/de-novo/workflows/transcriptomics-denovo-workflow.ga
@@ -1,2234 +1,3002 @@
{
- "a_galaxy_workflow": "true",
- "annotation": "De novo transcriptome reconstruction with RNA-Seq",
- "format-version": "0.1",
- "name": "Transcriptomics Denovo Workflow",
- "steps": {
- "0": {
- "annotation": "",
- "content_id": null,
- "errors": null,
- "id": 0,
- "input_connections": {},
- "inputs": [
- {
- "description": "",
- "name": "G1E_rep1_forward_read"
- }
- ],
- "label": null,
- "name": "Input dataset",
- "outputs": [],
- "position": {
- "left": 10,
- "top": 10
- },
- "tool_id": null,
- "tool_state": "{\"name\": \"G1E_rep1_forward_read\"}",
- "tool_version": null,
- "type": "data_input",
- "uuid": "cbc6590c-d6de-4abf-b53f-6c83b4ab4120",
- "workflow_outputs": []
- },
- "1": {
- "annotation": "",
- "content_id": null,
- "errors": null,
- "id": 1,
- "input_connections": {},
- "inputs": [
- {
- "description": "",
- "name": "G1E_rep1_reverse_read"
- }
- ],
- "label": null,
- "name": "Input dataset",
- "outputs": [],
- "position": {
- "left": 10,
- "top": 130
- },
- "tool_id": null,
- "tool_state": "{\"name\": \"G1E_rep1_reverse_read\"}",
- "tool_version": null,
- "type": "data_input",
- "uuid": "c2ba4aba-060a-40ba-88d6-e6fef8a1f2eb",
- "workflow_outputs": []
- },
- "10": {
- "annotation": "",
- "content_id": "toolshed.g2.bx.psu.edu/repos/devteam/fastqc/fastqc/0.72",
- "errors": null,
- "id": 10,
- "input_connections": {
- "input_file": {
- "id": 1,
- "output_name": "output"
- }
- },
- "inputs": [],
- "label": null,
- "name": "FastQC",
- "outputs": [
- {
- "name": "html_file",
- "type": "html"
- },
- {
- "name": "text_file",
- "type": "txt"
- }
- ],
- "position": {
- "left": 230,
- "top": 130
- },
- "post_job_actions": {},
- "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/fastqc/fastqc/0.72",
- "tool_shed_repository": {
- "changeset_revision": "c15237684a01",
- "name": "fastqc",
- "owner": "devteam",
- "tool_shed": "toolshed.g2.bx.psu.edu"
- },
- "tool_state": "{\"__page__\": null, \"limits\": \"null\", \"input_file\": \"null\", \"__rerun_remap_job_id__\": null, \"__workflow_invocation_uuid__\": \"\\\"fde8952eb77711e8a468005056ba55fb\\\"\", \"contaminants\": \"null\", \"chromInfo\": \"\\\"/cvmfs/data.galaxyproject.org/managed/len/ucsc/?.len\\\"\"}",
- "tool_version": "0.72",
- "type": "tool",
- "uuid": "a4db4907-e8e9-4534-a37c-7c1d6c34de20",
- "workflow_outputs": []
- },
- "11": {
- "annotation": "",
- "content_id": "toolshed.g2.bx.psu.edu/repos/pjbriggs/trimmomatic/trimmomatic/0.36.5",
- "errors": null,
- "id": 11,
- "input_connections": {
- "readtype|fastq_r1_in": {
- "id": 0,
- "output_name": "output"
- },
- "readtype|fastq_r2_in": {
- "id": 1,
- "output_name": "output"
- }
- },
- "inputs": [],
- "label": null,
- "name": "Trimmomatic",
- "outputs": [
- {
- "name": "fastq_out_paired",
- "type": "input"
- },
- {
- "name": "fastq_out_unpaired",
- "type": "input"
- },
- {
- "name": "fastq_out_r1_paired",
- "type": "input"
- },
- {
- "name": "fastq_out_r2_paired",
- "type": "input"
- },
- {
- "name": "fastq_out_r1_unpaired",
- "type": "input"
- },
- {
- "name": "fastq_out_r2_unpaired",
- "type": "input"
- },
- {
- "name": "fastq_out",
- "type": "input"
- }
- ],
- "position": {
- "left": 230,
- "top": 250
- },
- "post_job_actions": {},
- "tool_id": "toolshed.g2.bx.psu.edu/repos/pjbriggs/trimmomatic/trimmomatic/0.36.5",
- "tool_shed_repository": {
- "changeset_revision": "dfa082f84068",
- "name": "trimmomatic",
- "owner": "pjbriggs",
- "tool_shed": "toolshed.g2.bx.psu.edu"
- },
- "tool_state": "{\"operations\": \"[{\\\"__index__\\\": 0, \\\"operation\\\": {\\\"window_size\\\": \\\"4\\\", \\\"name\\\": \\\"SLIDINGWINDOW\\\", \\\"__current_case__\\\": 0, \\\"required_quality\\\": \\\"20\\\"}}]\", \"__page__\": null, \"__rerun_remap_job_id__\": null, \"readtype\": \"{\\\"single_or_paired\\\": \\\"pair_of_files\\\", \\\"fastq_r1_in\\\": null, \\\"__current_case__\\\": 1, \\\"fastq_r2_in\\\": null}\", \"illuminaclip\": \"{\\\"do_illuminaclip\\\": \\\"false\\\", \\\"__current_case__\\\": 1}\", \"__workflow_invocation_uuid__\": \"\\\"fde8952eb77711e8a468005056ba55fb\\\"\", \"chromInfo\": \"\\\"/cvmfs/data.galaxyproject.org/managed/len/ucsc/?.len\\\"\"}",
- "tool_version": "0.36.5",
- "type": "tool",
- "uuid": "d7a54fe5-9cd2-4209-9a02-b7d2d636b337",
- "workflow_outputs": []
- },
- "12": {
- "annotation": "",
- "content_id": "toolshed.g2.bx.psu.edu/repos/devteam/fastqc/fastqc/0.72",
- "errors": null,
- "id": 12,
- "input_connections": {
- "input_file": {
- "id": 2,
- "output_name": "output"
- }
- },
- "inputs": [],
- "label": null,
- "name": "FastQC",
- "outputs": [
- {
- "name": "html_file",
- "type": "html"
- },
- {
- "name": "text_file",
- "type": "txt"
- }
- ],
- "position": {
- "left": 230,
- "top": 370
- },
- "post_job_actions": {},
- "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/fastqc/fastqc/0.72",
- "tool_shed_repository": {
- "changeset_revision": "c15237684a01",
- "name": "fastqc",
- "owner": "devteam",
- "tool_shed": "toolshed.g2.bx.psu.edu"
- },
- "tool_state": "{\"__page__\": null, \"limits\": \"null\", \"input_file\": \"null\", \"__rerun_remap_job_id__\": null, \"__workflow_invocation_uuid__\": \"\\\"fde8952eb77711e8a468005056ba55fb\\\"\", \"contaminants\": \"null\", \"chromInfo\": \"\\\"/cvmfs/data.galaxyproject.org/managed/len/ucsc/?.len\\\"\"}",
- "tool_version": "0.72",
- "type": "tool",
- "uuid": "c96f55e9-c898-4e79-abe2-0795b54d3a05",
- "workflow_outputs": []
- },
- "13": {
- "annotation": "",
- "content_id": "toolshed.g2.bx.psu.edu/repos/devteam/fastqc/fastqc/0.72",
- "errors": null,
- "id": 13,
- "input_connections": {
- "input_file": {
- "id": 3,
- "output_name": "output"
- }
- },
- "inputs": [],
- "label": null,
- "name": "FastQC",
- "outputs": [
- {
- "name": "html_file",
- "type": "html"
- },
- {
- "name": "text_file",
- "type": "txt"
- }
- ],
- "position": {
- "left": 230,
- "top": 490
- },
- "post_job_actions": {},
- "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/fastqc/fastqc/0.72",
- "tool_shed_repository": {
- "changeset_revision": "c15237684a01",
- "name": "fastqc",
- "owner": "devteam",
- "tool_shed": "toolshed.g2.bx.psu.edu"
- },
- "tool_state": "{\"__page__\": null, \"limits\": \"null\", \"input_file\": \"null\", \"__rerun_remap_job_id__\": null, \"__workflow_invocation_uuid__\": \"\\\"fde8952eb77711e8a468005056ba55fb\\\"\", \"contaminants\": \"null\", \"chromInfo\": \"\\\"/cvmfs/data.galaxyproject.org/managed/len/ucsc/?.len\\\"\"}",
- "tool_version": "0.72",
- "type": "tool",
- "uuid": "d30af66b-0ab0-4bb9-8009-ce96a6c16993",
- "workflow_outputs": []
- },
- "14": {
- "annotation": "",
- "content_id": "toolshed.g2.bx.psu.edu/repos/pjbriggs/trimmomatic/trimmomatic/0.36.5",
- "errors": null,
- "id": 14,
- "input_connections": {
- "readtype|fastq_r1_in": {
- "id": 2,
- "output_name": "output"
- },
- "readtype|fastq_r2_in": {
- "id": 3,
- "output_name": "output"
- }
- },
- "inputs": [],
- "label": null,
- "name": "Trimmomatic",
- "outputs": [
- {
- "name": "fastq_out_paired",
- "type": "input"
- },
- {
- "name": "fastq_out_unpaired",
- "type": "input"
- },
- {
- "name": "fastq_out_r1_paired",
- "type": "input"
- },
- {
- "name": "fastq_out_r2_paired",
- "type": "input"
- },
- {
- "name": "fastq_out_r1_unpaired",
- "type": "input"
- },
- {
- "name": "fastq_out_r2_unpaired",
- "type": "input"
- },
- {
- "name": "fastq_out",
- "type": "input"
- }
- ],
- "position": {
- "left": 230,
- "top": 610
- },
- "post_job_actions": {},
- "tool_id": "toolshed.g2.bx.psu.edu/repos/pjbriggs/trimmomatic/trimmomatic/0.36.5",
- "tool_shed_repository": {
- "changeset_revision": "dfa082f84068",
- "name": "trimmomatic",
- "owner": "pjbriggs",
- "tool_shed": "toolshed.g2.bx.psu.edu"
- },
- "tool_state": "{\"operations\": \"[{\\\"__index__\\\": 0, \\\"operation\\\": {\\\"window_size\\\": \\\"4\\\", \\\"name\\\": \\\"SLIDINGWINDOW\\\", \\\"__current_case__\\\": 0, \\\"required_quality\\\": \\\"20\\\"}}]\", \"__page__\": null, \"__rerun_remap_job_id__\": null, \"readtype\": \"{\\\"single_or_paired\\\": \\\"pair_of_files\\\", \\\"fastq_r1_in\\\": null, \\\"__current_case__\\\": 1, \\\"fastq_r2_in\\\": null}\", \"illuminaclip\": \"{\\\"do_illuminaclip\\\": \\\"false\\\", \\\"__current_case__\\\": 1}\", \"__workflow_invocation_uuid__\": \"\\\"fde8952eb77711e8a468005056ba55fb\\\"\", \"chromInfo\": \"\\\"/cvmfs/data.galaxyproject.org/managed/len/ucsc/?.len\\\"\"}",
- "tool_version": "0.36.5",
- "type": "tool",
- "uuid": "3d3cfa70-c027-40aa-b01c-52c36035fe92",
- "workflow_outputs": []
- },
- "15": {
- "annotation": "",
- "content_id": "toolshed.g2.bx.psu.edu/repos/devteam/fastqc/fastqc/0.72",
- "errors": null,
- "id": 15,
- "input_connections": {
- "input_file": {
- "id": 4,
- "output_name": "output"
- }
- },
- "inputs": [],
- "label": null,
- "name": "FastQC",
- "outputs": [
- {
- "name": "html_file",
- "type": "html"
- },
- {
- "name": "text_file",
- "type": "txt"
- }
- ],
- "position": {
- "left": 230,
- "top": 730
- },
- "post_job_actions": {},
- "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/fastqc/fastqc/0.72",
- "tool_shed_repository": {
- "changeset_revision": "c15237684a01",
- "name": "fastqc",
- "owner": "devteam",
- "tool_shed": "toolshed.g2.bx.psu.edu"
- },
- "tool_state": "{\"__page__\": null, \"limits\": \"null\", \"input_file\": \"null\", \"__rerun_remap_job_id__\": null, \"__workflow_invocation_uuid__\": \"\\\"fde8952eb77711e8a468005056ba55fb\\\"\", \"contaminants\": \"null\", \"chromInfo\": \"\\\"/cvmfs/data.galaxyproject.org/managed/len/ucsc/?.len\\\"\"}",
- "tool_version": "0.72",
- "type": "tool",
- "uuid": "a6295400-a0ee-4790-9294-44e39e5df96c",
- "workflow_outputs": []
- },
- "16": {
- "annotation": "",
- "content_id": "toolshed.g2.bx.psu.edu/repos/devteam/fastqc/fastqc/0.72",
- "errors": null,
- "id": 16,
- "input_connections": {
- "input_file": {
- "id": 5,
- "output_name": "output"
- }
- },
- "inputs": [],
- "label": null,
- "name": "FastQC",
- "outputs": [
- {
- "name": "html_file",
- "type": "html"
- },
- {
- "name": "text_file",
- "type": "txt"
- }
- ],
- "position": {
- "left": 230,
- "top": 850
- },
- "post_job_actions": {},
- "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/fastqc/fastqc/0.72",
- "tool_shed_repository": {
- "changeset_revision": "c15237684a01",
- "name": "fastqc",
- "owner": "devteam",
- "tool_shed": "toolshed.g2.bx.psu.edu"
- },
- "tool_state": "{\"__page__\": null, \"limits\": \"null\", \"input_file\": \"null\", \"__rerun_remap_job_id__\": null, \"__workflow_invocation_uuid__\": \"\\\"fde8952eb77711e8a468005056ba55fb\\\"\", \"contaminants\": \"null\", \"chromInfo\": \"\\\"/cvmfs/data.galaxyproject.org/managed/len/ucsc/?.len\\\"\"}",
- "tool_version": "0.72",
- "type": "tool",
- "uuid": "d7866331-a1c1-4c4b-8f6b-f7feb78fa78c",
- "workflow_outputs": []
- },
- "17": {
- "annotation": "",
- "content_id": "toolshed.g2.bx.psu.edu/repos/pjbriggs/trimmomatic/trimmomatic/0.36.5",
- "errors": null,
- "id": 17,
- "input_connections": {
- "readtype|fastq_r1_in": {
- "id": 4,
- "output_name": "output"
- },
- "readtype|fastq_r2_in": {
- "id": 5,
- "output_name": "output"
- }
- },
- "inputs": [],
- "label": null,
- "name": "Trimmomatic",
- "outputs": [
- {
- "name": "fastq_out_paired",
- "type": "input"
- },
- {
- "name": "fastq_out_unpaired",
- "type": "input"
- },
- {
- "name": "fastq_out_r1_paired",
- "type": "input"
- },
- {
- "name": "fastq_out_r2_paired",
- "type": "input"
- },
- {
- "name": "fastq_out_r1_unpaired",
- "type": "input"
- },
- {
- "name": "fastq_out_r2_unpaired",
- "type": "input"
- },
- {
- "name": "fastq_out",
- "type": "input"
- }
- ],
- "position": {
- "left": 230,
- "top": 970
- },
- "post_job_actions": {},
- "tool_id": "toolshed.g2.bx.psu.edu/repos/pjbriggs/trimmomatic/trimmomatic/0.36.5",
- "tool_shed_repository": {
- "changeset_revision": "dfa082f84068",
- "name": "trimmomatic",
- "owner": "pjbriggs",
- "tool_shed": "toolshed.g2.bx.psu.edu"
- },
- "tool_state": "{\"operations\": \"[{\\\"__index__\\\": 0, \\\"operation\\\": {\\\"window_size\\\": \\\"4\\\", \\\"name\\\": \\\"SLIDINGWINDOW\\\", \\\"__current_case__\\\": 0, \\\"required_quality\\\": \\\"20\\\"}}]\", \"__page__\": null, \"__rerun_remap_job_id__\": null, \"readtype\": \"{\\\"single_or_paired\\\": \\\"pair_of_files\\\", \\\"fastq_r1_in\\\": null, \\\"__current_case__\\\": 1, \\\"fastq_r2_in\\\": null}\", \"illuminaclip\": \"{\\\"do_illuminaclip\\\": \\\"false\\\", \\\"__current_case__\\\": 1}\", \"__workflow_invocation_uuid__\": \"\\\"fde8952eb77711e8a468005056ba55fb\\\"\", \"chromInfo\": \"\\\"/cvmfs/data.galaxyproject.org/managed/len/ucsc/?.len\\\"\"}",
- "tool_version": "0.36.5",
- "type": "tool",
- "uuid": "c7ff55a4-e3df-4fde-93a3-ab0e175d5d6e",
- "workflow_outputs": []
- },
- "18": {
- "annotation": "",
- "content_id": "toolshed.g2.bx.psu.edu/repos/devteam/fastqc/fastqc/0.72",
- "errors": null,
- "id": 18,
- "input_connections": {
- "input_file": {
- "id": 6,
- "output_name": "output"
- }
- },
- "inputs": [],
- "label": null,
- "name": "FastQC",
- "outputs": [
- {
- "name": "html_file",
- "type": "html"
- },
- {
- "name": "text_file",
- "type": "txt"
- }
- ],
- "position": {
- "left": 230,
- "top": 1090
- },
- "post_job_actions": {},
- "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/fastqc/fastqc/0.72",
- "tool_shed_repository": {
- "changeset_revision": "c15237684a01",
- "name": "fastqc",
- "owner": "devteam",
- "tool_shed": "toolshed.g2.bx.psu.edu"
- },
- "tool_state": "{\"__page__\": null, \"limits\": \"null\", \"input_file\": \"null\", \"__rerun_remap_job_id__\": null, \"__workflow_invocation_uuid__\": \"\\\"fde8952eb77711e8a468005056ba55fb\\\"\", \"contaminants\": \"null\", \"chromInfo\": \"\\\"/cvmfs/data.galaxyproject.org/managed/len/ucsc/?.len\\\"\"}",
- "tool_version": "0.72",
- "type": "tool",
- "uuid": "e56ae1f1-b6ee-4f0a-873b-fae2c345c216",
- "workflow_outputs": []
- },
- "19": {
- "annotation": "",
- "content_id": "toolshed.g2.bx.psu.edu/repos/devteam/fastqc/fastqc/0.72",
- "errors": null,
- "id": 19,
- "input_connections": {
- "input_file": {
- "id": 7,
- "output_name": "output"
- }
- },
- "inputs": [],
- "label": null,
- "name": "FastQC",
- "outputs": [
- {
- "name": "html_file",
- "type": "html"
- },
- {
- "name": "text_file",
- "type": "txt"
- }
- ],
- "position": {
- "left": 230,
- "top": 1210
- },
- "post_job_actions": {},
- "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/fastqc/fastqc/0.72",
- "tool_shed_repository": {
- "changeset_revision": "c15237684a01",
- "name": "fastqc",
- "owner": "devteam",
- "tool_shed": "toolshed.g2.bx.psu.edu"
- },
- "tool_state": "{\"__page__\": null, \"limits\": \"null\", \"input_file\": \"null\", \"__rerun_remap_job_id__\": null, \"__workflow_invocation_uuid__\": \"\\\"fde8952eb77711e8a468005056ba55fb\\\"\", \"contaminants\": \"null\", \"chromInfo\": \"\\\"/cvmfs/data.galaxyproject.org/managed/len/ucsc/?.len\\\"\"}",
- "tool_version": "0.72",
- "type": "tool",
- "uuid": "d1068d72-f84b-431c-9137-38eb42606147",
- "workflow_outputs": []
- },
- "2": {
- "annotation": "",
- "content_id": null,
- "errors": null,
- "id": 2,
- "input_connections": {},
- "inputs": [
- {
- "description": "",
- "name": "G1E_rep2_forward_read"
- }
- ],
- "label": null,
- "name": "Input dataset",
- "outputs": [],
- "position": {
- "left": 10,
- "top": 250
- },
- "tool_id": null,
- "tool_state": "{\"name\": \"G1E_rep2_forward_read\"}",
- "tool_version": null,
- "type": "data_input",
- "uuid": "c24cf053-248c-4922-874e-240f187853ca",
- "workflow_outputs": []
- },
- "20": {
- "annotation": "",
- "content_id": "toolshed.g2.bx.psu.edu/repos/pjbriggs/trimmomatic/trimmomatic/0.36.5",
- "errors": null,
- "id": 20,
- "input_connections": {
- "readtype|fastq_r1_in": {
- "id": 6,
- "output_name": "output"
- },
- "readtype|fastq_r2_in": {
- "id": 7,
- "output_name": "output"
- }
- },
- "inputs": [],
- "label": null,
- "name": "Trimmomatic",
- "outputs": [
- {
- "name": "fastq_out_paired",
- "type": "input"
- },
- {
- "name": "fastq_out_unpaired",
- "type": "input"
- },
- {
- "name": "fastq_out_r1_paired",
- "type": "input"
- },
- {
- "name": "fastq_out_r2_paired",
- "type": "input"
- },
- {
- "name": "fastq_out_r1_unpaired",
- "type": "input"
- },
- {
- "name": "fastq_out_r2_unpaired",
- "type": "input"
- },
- {
- "name": "fastq_out",
- "type": "input"
- }
- ],
- "position": {
- "left": 230,
- "top": 1330
- },
- "post_job_actions": {},
- "tool_id": "toolshed.g2.bx.psu.edu/repos/pjbriggs/trimmomatic/trimmomatic/0.36.5",
- "tool_shed_repository": {
- "changeset_revision": "dfa082f84068",
- "name": "trimmomatic",
- "owner": "pjbriggs",
- "tool_shed": "toolshed.g2.bx.psu.edu"
- },
- "tool_state": "{\"operations\": \"[{\\\"__index__\\\": 0, \\\"operation\\\": {\\\"window_size\\\": \\\"4\\\", \\\"name\\\": \\\"SLIDINGWINDOW\\\", \\\"__current_case__\\\": 0, \\\"required_quality\\\": \\\"20\\\"}}]\", \"__page__\": null, \"__rerun_remap_job_id__\": null, \"readtype\": \"{\\\"single_or_paired\\\": \\\"pair_of_files\\\", \\\"fastq_r1_in\\\": null, \\\"__current_case__\\\": 1, \\\"fastq_r2_in\\\": null}\", \"illuminaclip\": \"{\\\"do_illuminaclip\\\": \\\"false\\\", \\\"__current_case__\\\": 1}\", \"__workflow_invocation_uuid__\": \"\\\"fde8952eb77711e8a468005056ba55fb\\\"\", \"chromInfo\": \"\\\"/cvmfs/data.galaxyproject.org/managed/len/ucsc/?.len\\\"\"}",
- "tool_version": "0.36.5",
- "type": "tool",
- "uuid": "81433b00-0996-44b0-ae73-db880a8c68c0",
- "workflow_outputs": []
- },
- "21": {
- "annotation": "",
- "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/hisat2/hisat2/2.1.0",
- "errors": null,
- "id": 21,
- "input_connections": {
- "library|input_1": {
- "id": 11,
- "output_name": "fastq_out_r1_paired"
- },
- "library|input_2": {
- "id": 11,
- "output_name": "fastq_out_r2_paired"
- }
- },
- "inputs": [],
- "label": null,
- "name": "HISAT2",
- "outputs": [
- {
- "name": "output_alignments",
- "type": "bam"
- },
- {
- "name": "output_unaligned_reads_l",
- "type": "fastqsanger"
- },
- {
- "name": "output_aligned_reads_l",
- "type": "fastqsanger"
- },
- {
- "name": "output_unaligned_reads_r",
- "type": "fastqsanger"
- },
- {
- "name": "output_aligned_reads_r",
- "type": "fastqsanger"
- },
- {
- "name": "summary_file",
- "type": "txt"
- }
- ],
- "position": {
- "left": 450,
- "top": 10
- },
- "post_job_actions": {},
- "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/hisat2/hisat2/2.1.0",
- "tool_shed_repository": {
- "changeset_revision": "6ab42baa56e9",
- "name": "hisat2",
- "owner": "iuc",
- "tool_shed": "toolshed.g2.bx.psu.edu"
- },
- "tool_state": "{\"adv\": \"{\\\"alignment_options\\\": {\\\"__current_case__\\\": 0, \\\"alignment_options_selector\\\": \\\"defaults\\\"}, \\\"output_options\\\": {\\\"output_options_selector\\\": \\\"defaults\\\", \\\"__current_case__\\\": 0}, \\\"other_options\\\": {\\\"other_options_selector\\\": \\\"defaults\\\", \\\"__current_case__\\\": 0}, \\\"scoring_options\\\": {\\\"scoring_options_selector\\\": \\\"defaults\\\", \\\"__current_case__\\\": 0}, \\\"spliced_options\\\": {\\\"coefficient\\\": \\\"0.0\\\", \\\"canonical_penalty\\\": \\\"0\\\", \\\"no_spliced_alignment_options\\\": {\\\"no_spliced_alignment\\\": \\\"\\\", \\\"__current_case__\\\": 1}, \\\"nc_function_type\\\": \\\"C\\\", \\\"constant_term\\\": \\\"0.0\\\", \\\"nc_coefficient\\\": \\\"1.0\\\", \\\"noncanonical_penalty\\\": \\\"3\\\", \\\"known_splice_gtf\\\": null, \\\"nc_constant_term\\\": \\\"-8.0\\\", \\\"min_intron\\\": \\\"20\\\", \\\"function_type\\\": \\\"C\\\", \\\"__current_case__\\\": 1, \\\"notmplen\\\": \\\"false\\\", \\\"tma\\\": \\\"--dta\\\", \\\"max_intron\\\": \\\"500000\\\", \\\"spliced_options_selector\\\": \\\"advanced\\\"}, \\\"reporting_options\\\": {\\\"reporting_options_selector\\\": \\\"defaults\\\", \\\"__current_case__\\\": 0}, \\\"input_options\\\": {\\\"input_options_selector\\\": \\\"defaults\\\", \\\"__current_case__\\\": 0}}\", \"__page__\": null, \"sum\": \"{\\\"new_summary\\\": \\\"false\\\", \\\"summary_file\\\": \\\"false\\\"}\", \"library\": \"{\\\"rna_strandness\\\": \\\"FR\\\", \\\"input_2\\\": null, \\\"__current_case__\\\": 1, \\\"input_1\\\": null, \\\"type\\\": \\\"paired\\\", \\\"paired_options\\\": {\\\"paired_options_selector\\\": \\\"defaults\\\", \\\"__current_case__\\\": 0}}\", \"reference_genome\": \"{\\\"source\\\": \\\"indexed\\\", \\\"__current_case__\\\": 0, \\\"index\\\": \\\"mm10\\\"}\", \"__workflow_invocation_uuid__\": \"\\\"fde8952eb77711e8a468005056ba55fb\\\"\", \"chromInfo\": \"\\\"/cvmfs/data.galaxyproject.org/managed/len/ucsc/?.len\\\"\", \"__rerun_remap_job_id__\": null}",
- "tool_version": "2.1.0",
- "type": "tool",
- "uuid": "817cc498-44f1-4813-92f0-3b88fed3451f",
- "workflow_outputs": []
- },
- "22": {
- "annotation": "",
- "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/hisat2/hisat2/2.1.0",
- "errors": null,
- "id": 22,
- "input_connections": {
- "library|input_1": {
- "id": 14,
- "output_name": "fastq_out_r1_paired"
- },
- "library|input_2": {
- "id": 14,
- "output_name": "fastq_out_r2_paired"
- }
- },
- "inputs": [],
- "label": null,
- "name": "HISAT2",
- "outputs": [
- {
- "name": "output_alignments",
- "type": "bam"
- },
- {
- "name": "output_unaligned_reads_l",
- "type": "fastqsanger"
- },
- {
- "name": "output_aligned_reads_l",
- "type": "fastqsanger"
- },
- {
- "name": "output_unaligned_reads_r",
- "type": "fastqsanger"
- },
- {
- "name": "output_aligned_reads_r",
- "type": "fastqsanger"
- },
- {
- "name": "summary_file",
- "type": "txt"
- }
- ],
- "position": {
- "left": 450,
- "top": 130
- },
- "post_job_actions": {},
- "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/hisat2/hisat2/2.1.0",
- "tool_shed_repository": {
- "changeset_revision": "6ab42baa56e9",
- "name": "hisat2",
- "owner": "iuc",
- "tool_shed": "toolshed.g2.bx.psu.edu"
- },
- "tool_state": "{\"adv\": \"{\\\"alignment_options\\\": {\\\"__current_case__\\\": 0, \\\"alignment_options_selector\\\": \\\"defaults\\\"}, \\\"output_options\\\": {\\\"output_options_selector\\\": \\\"defaults\\\", \\\"__current_case__\\\": 0}, \\\"other_options\\\": {\\\"other_options_selector\\\": \\\"defaults\\\", \\\"__current_case__\\\": 0}, \\\"scoring_options\\\": {\\\"scoring_options_selector\\\": \\\"defaults\\\", \\\"__current_case__\\\": 0}, \\\"spliced_options\\\": {\\\"coefficient\\\": \\\"0.0\\\", \\\"canonical_penalty\\\": \\\"0\\\", \\\"no_spliced_alignment_options\\\": {\\\"no_spliced_alignment\\\": \\\"\\\", \\\"__current_case__\\\": 1}, \\\"nc_function_type\\\": \\\"C\\\", \\\"constant_term\\\": \\\"0.0\\\", \\\"nc_coefficient\\\": \\\"1.0\\\", \\\"noncanonical_penalty\\\": \\\"3\\\", \\\"known_splice_gtf\\\": null, \\\"nc_constant_term\\\": \\\"-8.0\\\", \\\"min_intron\\\": \\\"20\\\", \\\"function_type\\\": \\\"C\\\", \\\"__current_case__\\\": 1, \\\"notmplen\\\": \\\"false\\\", \\\"tma\\\": \\\"--dta\\\", \\\"max_intron\\\": \\\"500000\\\", \\\"spliced_options_selector\\\": \\\"advanced\\\"}, \\\"reporting_options\\\": {\\\"reporting_options_selector\\\": \\\"defaults\\\", \\\"__current_case__\\\": 0}, \\\"input_options\\\": {\\\"input_options_selector\\\": \\\"defaults\\\", \\\"__current_case__\\\": 0}}\", \"__page__\": null, \"sum\": \"{\\\"new_summary\\\": \\\"false\\\", \\\"summary_file\\\": \\\"false\\\"}\", \"library\": \"{\\\"rna_strandness\\\": \\\"FR\\\", \\\"input_2\\\": null, \\\"__current_case__\\\": 1, \\\"input_1\\\": null, \\\"type\\\": \\\"paired\\\", \\\"paired_options\\\": {\\\"paired_options_selector\\\": \\\"defaults\\\", \\\"__current_case__\\\": 0}}\", \"reference_genome\": \"{\\\"source\\\": \\\"indexed\\\", \\\"__current_case__\\\": 0, \\\"index\\\": \\\"mm10\\\"}\", \"__workflow_invocation_uuid__\": \"\\\"fde8952eb77711e8a468005056ba55fb\\\"\", \"chromInfo\": \"\\\"/cvmfs/data.galaxyproject.org/managed/len/ucsc/?.len\\\"\", \"__rerun_remap_job_id__\": null}",
- "tool_version": "2.1.0",
- "type": "tool",
- "uuid": "1e92db79-135e-4a5e-9d3f-ea0d7f5045da",
- "workflow_outputs": []
- },
- "23": {
- "annotation": "",
- "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/hisat2/hisat2/2.1.0",
- "errors": null,
- "id": 23,
- "input_connections": {
- "library|input_1": {
- "id": 17,
- "output_name": "fastq_out_r1_paired"
- },
- "library|input_2": {
- "id": 17,
- "output_name": "fastq_out_r2_paired"
- }
- },
- "inputs": [],
- "label": null,
- "name": "HISAT2",
- "outputs": [
- {
- "name": "output_alignments",
- "type": "bam"
- },
- {
- "name": "output_unaligned_reads_l",
- "type": "fastqsanger"
- },
- {
- "name": "output_aligned_reads_l",
- "type": "fastqsanger"
- },
- {
- "name": "output_unaligned_reads_r",
- "type": "fastqsanger"
- },
- {
- "name": "output_aligned_reads_r",
- "type": "fastqsanger"
- },
- {
- "name": "summary_file",
- "type": "txt"
- }
- ],
- "position": {
- "left": 450,
- "top": 250
- },
- "post_job_actions": {},
- "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/hisat2/hisat2/2.1.0",
- "tool_shed_repository": {
- "changeset_revision": "6ab42baa56e9",
- "name": "hisat2",
- "owner": "iuc",
- "tool_shed": "toolshed.g2.bx.psu.edu"
- },
- "tool_state": "{\"adv\": \"{\\\"alignment_options\\\": {\\\"__current_case__\\\": 0, \\\"alignment_options_selector\\\": \\\"defaults\\\"}, \\\"output_options\\\": {\\\"output_options_selector\\\": \\\"defaults\\\", \\\"__current_case__\\\": 0}, \\\"other_options\\\": {\\\"other_options_selector\\\": \\\"defaults\\\", \\\"__current_case__\\\": 0}, \\\"scoring_options\\\": {\\\"scoring_options_selector\\\": \\\"defaults\\\", \\\"__current_case__\\\": 0}, \\\"spliced_options\\\": {\\\"coefficient\\\": \\\"0.0\\\", \\\"canonical_penalty\\\": \\\"0\\\", \\\"no_spliced_alignment_options\\\": {\\\"no_spliced_alignment\\\": \\\"\\\", \\\"__current_case__\\\": 1}, \\\"nc_function_type\\\": \\\"C\\\", \\\"constant_term\\\": \\\"0.0\\\", \\\"nc_coefficient\\\": \\\"1.0\\\", \\\"noncanonical_penalty\\\": \\\"3\\\", \\\"known_splice_gtf\\\": null, \\\"nc_constant_term\\\": \\\"-8.0\\\", \\\"min_intron\\\": \\\"20\\\", \\\"function_type\\\": \\\"C\\\", \\\"__current_case__\\\": 1, \\\"notmplen\\\": \\\"false\\\", \\\"tma\\\": \\\"--dta\\\", \\\"max_intron\\\": \\\"500000\\\", \\\"spliced_options_selector\\\": \\\"advanced\\\"}, \\\"reporting_options\\\": {\\\"reporting_options_selector\\\": \\\"defaults\\\", \\\"__current_case__\\\": 0}, \\\"input_options\\\": {\\\"input_options_selector\\\": \\\"defaults\\\", \\\"__current_case__\\\": 0}}\", \"__page__\": null, \"sum\": \"{\\\"new_summary\\\": \\\"false\\\", \\\"summary_file\\\": \\\"false\\\"}\", \"library\": \"{\\\"rna_strandness\\\": \\\"FR\\\", \\\"input_2\\\": null, \\\"__current_case__\\\": 1, \\\"input_1\\\": null, \\\"type\\\": \\\"paired\\\", \\\"paired_options\\\": {\\\"paired_options_selector\\\": \\\"defaults\\\", \\\"__current_case__\\\": 0}}\", \"reference_genome\": \"{\\\"source\\\": \\\"indexed\\\", \\\"__current_case__\\\": 0, \\\"index\\\": \\\"mm10\\\"}\", \"__workflow_invocation_uuid__\": \"\\\"fde8952eb77711e8a468005056ba55fb\\\"\", \"chromInfo\": \"\\\"/cvmfs/data.galaxyproject.org/managed/len/ucsc/?.len\\\"\", \"__rerun_remap_job_id__\": null}",
- "tool_version": "2.1.0",
- "type": "tool",
- "uuid": "c488973c-6fe1-4f11-9268-1f108e3e1aae",
- "workflow_outputs": []
- },
- "24": {
- "annotation": "",
- "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/hisat2/hisat2/2.1.0",
- "errors": null,
- "id": 24,
- "input_connections": {
- "library|input_1": {
- "id": 20,
- "output_name": "fastq_out_r1_paired"
- },
- "library|input_2": {
- "id": 20,
- "output_name": "fastq_out_r2_paired"
- }
- },
- "inputs": [],
- "label": null,
- "name": "HISAT2",
- "outputs": [
- {
- "name": "output_alignments",
- "type": "bam"
- },
- {
- "name": "output_unaligned_reads_l",
- "type": "fastqsanger"
- },
- {
- "name": "output_aligned_reads_l",
- "type": "fastqsanger"
- },
- {
- "name": "output_unaligned_reads_r",
- "type": "fastqsanger"
- },
- {
- "name": "output_aligned_reads_r",
- "type": "fastqsanger"
- },
- {
- "name": "summary_file",
- "type": "txt"
- }
- ],
- "position": {
- "left": 450,
- "top": 370
- },
- "post_job_actions": {},
- "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/hisat2/hisat2/2.1.0",
- "tool_shed_repository": {
- "changeset_revision": "6ab42baa56e9",
- "name": "hisat2",
- "owner": "iuc",
- "tool_shed": "toolshed.g2.bx.psu.edu"
- },
- "tool_state": "{\"adv\": \"{\\\"alignment_options\\\": {\\\"__current_case__\\\": 0, \\\"alignment_options_selector\\\": \\\"defaults\\\"}, \\\"output_options\\\": {\\\"output_options_selector\\\": \\\"defaults\\\", \\\"__current_case__\\\": 0}, \\\"other_options\\\": {\\\"other_options_selector\\\": \\\"defaults\\\", \\\"__current_case__\\\": 0}, \\\"scoring_options\\\": {\\\"scoring_options_selector\\\": \\\"defaults\\\", \\\"__current_case__\\\": 0}, \\\"spliced_options\\\": {\\\"coefficient\\\": \\\"0.0\\\", \\\"canonical_penalty\\\": \\\"0\\\", \\\"no_spliced_alignment_options\\\": {\\\"no_spliced_alignment\\\": \\\"\\\", \\\"__current_case__\\\": 1}, \\\"nc_function_type\\\": \\\"C\\\", \\\"constant_term\\\": \\\"0.0\\\", \\\"nc_coefficient\\\": \\\"1.0\\\", \\\"noncanonical_penalty\\\": \\\"3\\\", \\\"known_splice_gtf\\\": null, \\\"nc_constant_term\\\": \\\"-8.0\\\", \\\"min_intron\\\": \\\"20\\\", \\\"function_type\\\": \\\"C\\\", \\\"__current_case__\\\": 1, \\\"notmplen\\\": \\\"false\\\", \\\"tma\\\": \\\"--dta\\\", \\\"max_intron\\\": \\\"500000\\\", \\\"spliced_options_selector\\\": \\\"advanced\\\"}, \\\"reporting_options\\\": {\\\"reporting_options_selector\\\": \\\"defaults\\\", \\\"__current_case__\\\": 0}, \\\"input_options\\\": {\\\"input_options_selector\\\": \\\"defaults\\\", \\\"__current_case__\\\": 0}}\", \"__page__\": null, \"sum\": \"{\\\"new_summary\\\": \\\"false\\\", \\\"summary_file\\\": \\\"false\\\"}\", \"library\": \"{\\\"rna_strandness\\\": \\\"FR\\\", \\\"input_2\\\": null, \\\"__current_case__\\\": 1, \\\"input_1\\\": null, \\\"type\\\": \\\"paired\\\", \\\"paired_options\\\": {\\\"paired_options_selector\\\": \\\"defaults\\\", \\\"__current_case__\\\": 0}}\", \"reference_genome\": \"{\\\"source\\\": \\\"indexed\\\", \\\"__current_case__\\\": 0, \\\"index\\\": \\\"mm10\\\"}\", \"__workflow_invocation_uuid__\": \"\\\"fde8952eb77711e8a468005056ba55fb\\\"\", \"chromInfo\": \"\\\"/cvmfs/data.galaxyproject.org/managed/len/ucsc/?.len\\\"\", \"__rerun_remap_job_id__\": null}",
- "tool_version": "2.1.0",
- "type": "tool",
- "uuid": "614c1c41-fcd0-447e-8514-c0876641809d",
- "workflow_outputs": []
- },
- "25": {
- "annotation": "",
- "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/stringtie/stringtie/1.3.3.2",
- "errors": null,
- "id": 25,
- "input_connections": {
- "input_bam": {
- "id": 21,
- "output_name": "output_alignments"
- }
- },
- "inputs": [],
- "label": null,
- "name": "StringTie",
- "outputs": [
- {
- "name": "output_gtf",
- "type": "gtf"
- },
- {
- "name": "gene_abundance_estimation",
- "type": "gtf"
- },
- {
- "name": "coverage",
- "type": "gtf"
- },
- {
- "name": "exon_expression",
- "type": "tabular"
- },
- {
- "name": "intron_expression",
- "type": "tabular"
- },
- {
- "name": "transcript_expression",
- "type": "tabular"
- },
- {
- "name": "exon_transcript_mapping",
- "type": "tabular"
- },
- {
- "name": "intron_transcript_mapping",
- "type": "tabular"
- },
- {
- "name": "gene_counts",
- "type": "tabular"
- },
- {
- "name": "transcript_counts",
- "type": "tabular"
- },
- {
- "name": "legend",
- "type": "tabular"
- }
- ],
- "position": {
- "left": 670,
- "top": 10
- },
- "post_job_actions": {},
- "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/stringtie/stringtie/1.3.3.2",
- "tool_shed_repository": {
- "changeset_revision": "eafd5dc95228",
- "name": "stringtie",
- "owner": "iuc",
- "tool_shed": "toolshed.g2.bx.psu.edu"
- },
- "tool_state": "{\"adv\": \"{\\\"min_bundle_cov\\\": \\\"2\\\", \\\"min_tlen\\\": \\\"200\\\", \\\"bdist\\\": \\\"50\\\", \\\"multi_mapping\\\": \\\"false\\\", \\\"abundance_estimation\\\": \\\"false\\\", \\\"fraction\\\": \\\"0.15\\\", \\\"disable_trimming\\\": \\\"false\\\", \\\"omit_sequences\\\": \\\"\\\", \\\"name_prefix\\\": \\\"\\\", \\\"min_anchor_len\\\": \\\"10\\\", \\\"bundle_fraction\\\": \\\"0.95\\\", \\\"min_anchor_cov\\\": \\\"1\\\"}\", \"__page__\": null, \"chromInfo\": \"\\\"/cvmfs/data.galaxyproject.org/managed/len/ucsc/mm10.len\\\"\", \"__rerun_remap_job_id__\": null, \"__workflow_invocation_uuid__\": \"\\\"fde8952eb77711e8a468005056ba55fb\\\"\", \"rna_strandness\": \"\\\"--fr\\\"\", \"input_bam\": \"null\", \"guide\": \"{\\\"use_guide\\\": \\\"no\\\", \\\"__current_case__\\\": 0}\"}",
- "tool_version": "1.3.3.2",
- "type": "tool",
- "uuid": "5fbbf39b-c5da-435e-a7cf-4d0d3ed9e05e",
- "workflow_outputs": []
- },
- "26": {
- "annotation": "",
- "content_id": "toolshed.g2.bx.psu.edu/repos/bgruening/deeptools_bam_coverage/deeptools_bam_coverage/3.0.2.0",
- "errors": null,
- "id": 26,
- "input_connections": {
- "bamInput": {
- "id": 21,
- "output_name": "output_alignments"
- }
- },
- "inputs": [],
- "label": null,
- "name": "bamCoverage",
- "outputs": [
- {
- "name": "outFileName",
- "type": "bigwig"
- }
- ],
- "position": {
- "left": 670,
- "top": 490
- },
- "post_job_actions": {},
- "tool_id": "toolshed.g2.bx.psu.edu/repos/bgruening/deeptools_bam_coverage/deeptools_bam_coverage/3.0.2.0",
- "tool_shed_repository": {
- "changeset_revision": "3033c3fba046",
- "name": "deeptools_bam_coverage",
- "owner": "bgruening",
- "tool_shed": "toolshed.g2.bx.psu.edu"
- },
- "tool_state": "{\"__page__\": null, \"outFileFormat\": \"\\\"bigwig\\\"\", \"region\": \"\\\"\\\"\", \"bamInput\": \"null\", \"binSize\": \"\\\"1\\\"\", \"scaling\": \"{\\\"effectiveGenomeSize\\\": {\\\"effectiveGenomeSize_opt\\\": \\\"2304947926\\\", \\\"__current_case__\\\": 6}, \\\"type\\\": \\\"1x\\\", \\\"__current_case__\\\": 4}\", \"advancedOpt\": \"{\\\"ignoreDuplicates\\\": \\\"false\\\", \\\"centerReads\\\": \\\"false\\\", \\\"ignoreForNormalization\\\": \\\"\\\", \\\"minFragmentLength\\\": \\\"0\\\", \\\"minMappingQuality\\\": \\\"1\\\", \\\"MNase\\\": \\\"false\\\", \\\"samFlagInclude\\\": \\\"\\\", \\\"filterRNAstrand\\\": \\\"forward\\\", \\\"Offset\\\": \\\"\\\", \\\"smoothLength\\\": \\\"\\\", \\\"showAdvancedOpt\\\": \\\"yes\\\", \\\"__current_case__\\\": 1, \\\"samFlagExclude\\\": \\\"\\\", \\\"doExtendCustom\\\": {\\\"__current_case__\\\": 0, \\\"doExtend\\\": \\\"no\\\"}, \\\"skipNAs\\\": \\\"false\\\", \\\"scaleFactor\\\": \\\"1.0\\\", \\\"blackListFileName\\\": null, \\\"maxFragmentLength\\\": \\\"0\\\"}\", \"chromInfo\": \"\\\"/cvmfs/data.galaxyproject.org/managed/len/ucsc/mm10.len\\\"\", \"__rerun_remap_job_id__\": null}",
- "tool_version": "3.0.2.0",
- "type": "tool",
- "uuid": "d61115e2-0a95-4b80-93bb-a14183d0e219",
- "workflow_outputs": []
- },
- "27": {
- "annotation": "",
- "content_id": "toolshed.g2.bx.psu.edu/repos/bgruening/deeptools_bam_coverage/deeptools_bam_coverage/3.0.2.0",
- "errors": null,
- "id": 27,
- "input_connections": {
- "bamInput": {
- "id": 21,
- "output_name": "output_alignments"
- }
- },
- "inputs": [],
- "label": null,
- "name": "bamCoverage",
- "outputs": [
- {
- "name": "outFileName",
- "type": "bigwig"
- }
- ],
- "position": {
- "left": 670,
- "top": 970
- },
- "post_job_actions": {},
- "tool_id": "toolshed.g2.bx.psu.edu/repos/bgruening/deeptools_bam_coverage/deeptools_bam_coverage/3.0.2.0",
- "tool_shed_repository": {
- "changeset_revision": "3033c3fba046",
- "name": "deeptools_bam_coverage",
- "owner": "bgruening",
- "tool_shed": "toolshed.g2.bx.psu.edu"
- },
- "tool_state": "{\"__page__\": null, \"outFileFormat\": \"\\\"bigwig\\\"\", \"region\": \"\\\"\\\"\", \"bamInput\": \"null\", \"binSize\": \"\\\"1\\\"\", \"scaling\": \"{\\\"effectiveGenomeSize\\\": {\\\"effectiveGenomeSize_opt\\\": \\\"2304947926\\\", \\\"__current_case__\\\": 6}, \\\"type\\\": \\\"1x\\\", \\\"__current_case__\\\": 4}\", \"advancedOpt\": \"{\\\"ignoreDuplicates\\\": \\\"false\\\", \\\"centerReads\\\": \\\"false\\\", \\\"ignoreForNormalization\\\": \\\"\\\", \\\"minFragmentLength\\\": \\\"0\\\", \\\"minMappingQuality\\\": \\\"1\\\", \\\"MNase\\\": \\\"false\\\", \\\"samFlagInclude\\\": \\\"\\\", \\\"filterRNAstrand\\\": \\\"reverse\\\", \\\"Offset\\\": \\\"\\\", \\\"smoothLength\\\": \\\"\\\", \\\"showAdvancedOpt\\\": \\\"yes\\\", \\\"__current_case__\\\": 1, \\\"samFlagExclude\\\": \\\"\\\", \\\"doExtendCustom\\\": {\\\"__current_case__\\\": 0, \\\"doExtend\\\": \\\"no\\\"}, \\\"skipNAs\\\": \\\"false\\\", \\\"scaleFactor\\\": \\\"1.0\\\", \\\"blackListFileName\\\": null, \\\"maxFragmentLength\\\": \\\"0\\\"}\", \"chromInfo\": \"\\\"/cvmfs/data.galaxyproject.org/managed/len/ucsc/mm10.len\\\"\", \"__rerun_remap_job_id__\": null}",
- "tool_version": "3.0.2.0",
- "type": "tool",
- "uuid": "72051e92-7fc3-447f-b289-e9d7e7043397",
- "workflow_outputs": []
- },
- "28": {
- "annotation": "",
- "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/stringtie/stringtie/1.3.3.2",
- "errors": null,
- "id": 28,
- "input_connections": {
- "input_bam": {
- "id": 22,
- "output_name": "output_alignments"
- }
- },
- "inputs": [],
- "label": null,
- "name": "StringTie",
- "outputs": [
- {
- "name": "output_gtf",
- "type": "gtf"
- },
- {
- "name": "gene_abundance_estimation",
- "type": "gtf"
- },
- {
- "name": "coverage",
- "type": "gtf"
- },
- {
- "name": "exon_expression",
- "type": "tabular"
- },
- {
- "name": "intron_expression",
- "type": "tabular"
- },
- {
- "name": "transcript_expression",
- "type": "tabular"
- },
- {
- "name": "exon_transcript_mapping",
- "type": "tabular"
- },
- {
- "name": "intron_transcript_mapping",
- "type": "tabular"
- },
- {
- "name": "gene_counts",
- "type": "tabular"
- },
- {
- "name": "transcript_counts",
- "type": "tabular"
- },
- {
- "name": "legend",
- "type": "tabular"
- }
- ],
- "position": {
- "left": 670,
- "top": 130
- },
- "post_job_actions": {},
- "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/stringtie/stringtie/1.3.3.2",
- "tool_shed_repository": {
- "changeset_revision": "eafd5dc95228",
- "name": "stringtie",
- "owner": "iuc",
- "tool_shed": "toolshed.g2.bx.psu.edu"
- },
- "tool_state": "{\"adv\": \"{\\\"min_bundle_cov\\\": \\\"2\\\", \\\"min_tlen\\\": \\\"200\\\", \\\"bdist\\\": \\\"50\\\", \\\"multi_mapping\\\": \\\"false\\\", \\\"abundance_estimation\\\": \\\"false\\\", \\\"fraction\\\": \\\"0.15\\\", \\\"disable_trimming\\\": \\\"false\\\", \\\"omit_sequences\\\": \\\"\\\", \\\"name_prefix\\\": \\\"\\\", \\\"min_anchor_len\\\": \\\"10\\\", \\\"bundle_fraction\\\": \\\"0.95\\\", \\\"min_anchor_cov\\\": \\\"1\\\"}\", \"__page__\": null, \"chromInfo\": \"\\\"/cvmfs/data.galaxyproject.org/managed/len/ucsc/mm10.len\\\"\", \"__rerun_remap_job_id__\": null, \"__workflow_invocation_uuid__\": \"\\\"fde8952eb77711e8a468005056ba55fb\\\"\", \"rna_strandness\": \"\\\"--fr\\\"\", \"input_bam\": \"null\", \"guide\": \"{\\\"use_guide\\\": \\\"no\\\", \\\"__current_case__\\\": 0}\"}",
- "tool_version": "1.3.3.2",
- "type": "tool",
- "uuid": "c96d2ba9-5fb2-444f-afa1-8311a54460c0",
- "workflow_outputs": []
- },
- "29": {
- "annotation": "",
- "content_id": "toolshed.g2.bx.psu.edu/repos/bgruening/deeptools_bam_coverage/deeptools_bam_coverage/3.0.2.0",
- "errors": null,
- "id": 29,
- "input_connections": {
- "bamInput": {
- "id": 22,
- "output_name": "output_alignments"
- }
- },
- "inputs": [],
- "label": null,
- "name": "bamCoverage",
- "outputs": [
- {
- "name": "outFileName",
- "type": "bigwig"
- }
- ],
- "position": {
- "left": 670,
- "top": 610
- },
- "post_job_actions": {},
- "tool_id": "toolshed.g2.bx.psu.edu/repos/bgruening/deeptools_bam_coverage/deeptools_bam_coverage/3.0.2.0",
- "tool_shed_repository": {
- "changeset_revision": "3033c3fba046",
- "name": "deeptools_bam_coverage",
- "owner": "bgruening",
- "tool_shed": "toolshed.g2.bx.psu.edu"
- },
- "tool_state": "{\"__page__\": null, \"outFileFormat\": \"\\\"bigwig\\\"\", \"region\": \"\\\"\\\"\", \"bamInput\": \"null\", \"binSize\": \"\\\"1\\\"\", \"scaling\": \"{\\\"effectiveGenomeSize\\\": {\\\"effectiveGenomeSize_opt\\\": \\\"2304947926\\\", \\\"__current_case__\\\": 6}, \\\"type\\\": \\\"1x\\\", \\\"__current_case__\\\": 4}\", \"advancedOpt\": \"{\\\"ignoreDuplicates\\\": \\\"false\\\", \\\"centerReads\\\": \\\"false\\\", \\\"ignoreForNormalization\\\": \\\"\\\", \\\"minFragmentLength\\\": \\\"0\\\", \\\"minMappingQuality\\\": \\\"1\\\", \\\"MNase\\\": \\\"false\\\", \\\"samFlagInclude\\\": \\\"\\\", \\\"filterRNAstrand\\\": \\\"forward\\\", \\\"Offset\\\": \\\"\\\", \\\"smoothLength\\\": \\\"\\\", \\\"showAdvancedOpt\\\": \\\"yes\\\", \\\"__current_case__\\\": 1, \\\"samFlagExclude\\\": \\\"\\\", \\\"doExtendCustom\\\": {\\\"__current_case__\\\": 0, \\\"doExtend\\\": \\\"no\\\"}, \\\"skipNAs\\\": \\\"false\\\", \\\"scaleFactor\\\": \\\"1.0\\\", \\\"blackListFileName\\\": null, \\\"maxFragmentLength\\\": \\\"0\\\"}\", \"chromInfo\": \"\\\"/cvmfs/data.galaxyproject.org/managed/len/ucsc/mm10.len\\\"\", \"__rerun_remap_job_id__\": null}",
- "tool_version": "3.0.2.0",
- "type": "tool",
- "uuid": "e2b0c494-a05a-4fd7-9d32-97f466fe3bcf",
- "workflow_outputs": []
- },
- "3": {
- "annotation": "",
- "content_id": null,
- "errors": null,
- "id": 3,
- "input_connections": {},
- "inputs": [
- {
- "description": "",
- "name": "G1E_rep2_reverse_read"
- }
- ],
- "label": null,
- "name": "Input dataset",
- "outputs": [],
- "position": {
- "left": 10,
- "top": 370
- },
- "tool_id": null,
- "tool_state": "{\"name\": \"G1E_rep2_reverse_read\"}",
- "tool_version": null,
- "type": "data_input",
- "uuid": "4d89fd77-a8ae-41c8-8dfe-e9446fdeaddb",
- "workflow_outputs": []
- },
- "30": {
- "annotation": "",
- "content_id": "toolshed.g2.bx.psu.edu/repos/bgruening/deeptools_bam_coverage/deeptools_bam_coverage/3.0.2.0",
- "errors": null,
- "id": 30,
- "input_connections": {
- "bamInput": {
- "id": 22,
- "output_name": "output_alignments"
- }
- },
- "inputs": [],
- "label": null,
- "name": "bamCoverage",
- "outputs": [
- {
- "name": "outFileName",
- "type": "bigwig"
- }
- ],
- "position": {
- "left": 670,
- "top": 1090
- },
- "post_job_actions": {},
- "tool_id": "toolshed.g2.bx.psu.edu/repos/bgruening/deeptools_bam_coverage/deeptools_bam_coverage/3.0.2.0",
- "tool_shed_repository": {
- "changeset_revision": "3033c3fba046",
- "name": "deeptools_bam_coverage",
- "owner": "bgruening",
- "tool_shed": "toolshed.g2.bx.psu.edu"
- },
- "tool_state": "{\"__page__\": null, \"outFileFormat\": \"\\\"bigwig\\\"\", \"region\": \"\\\"\\\"\", \"bamInput\": \"null\", \"binSize\": \"\\\"1\\\"\", \"scaling\": \"{\\\"effectiveGenomeSize\\\": {\\\"effectiveGenomeSize_opt\\\": \\\"2304947926\\\", \\\"__current_case__\\\": 6}, \\\"type\\\": \\\"1x\\\", \\\"__current_case__\\\": 4}\", \"advancedOpt\": \"{\\\"ignoreDuplicates\\\": \\\"false\\\", \\\"centerReads\\\": \\\"false\\\", \\\"ignoreForNormalization\\\": \\\"\\\", \\\"minFragmentLength\\\": \\\"0\\\", \\\"minMappingQuality\\\": \\\"1\\\", \\\"MNase\\\": \\\"false\\\", \\\"samFlagInclude\\\": \\\"\\\", \\\"filterRNAstrand\\\": \\\"reverse\\\", \\\"Offset\\\": \\\"\\\", \\\"smoothLength\\\": \\\"\\\", \\\"showAdvancedOpt\\\": \\\"yes\\\", \\\"__current_case__\\\": 1, \\\"samFlagExclude\\\": \\\"\\\", \\\"doExtendCustom\\\": {\\\"__current_case__\\\": 0, \\\"doExtend\\\": \\\"no\\\"}, \\\"skipNAs\\\": \\\"false\\\", \\\"scaleFactor\\\": \\\"1.0\\\", \\\"blackListFileName\\\": null, \\\"maxFragmentLength\\\": \\\"0\\\"}\", \"chromInfo\": \"\\\"/cvmfs/data.galaxyproject.org/managed/len/ucsc/mm10.len\\\"\", \"__rerun_remap_job_id__\": null}",
- "tool_version": "3.0.2.0",
- "type": "tool",
- "uuid": "b4c95c39-273b-4138-bbad-8e46a3adfaae",
- "workflow_outputs": []
- },
- "31": {
- "annotation": "",
- "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/stringtie/stringtie/1.3.3.2",
- "errors": null,
- "id": 31,
- "input_connections": {
- "input_bam": {
- "id": 23,
- "output_name": "output_alignments"
- }
- },
- "inputs": [],
- "label": null,
- "name": "StringTie",
- "outputs": [
- {
- "name": "output_gtf",
- "type": "gtf"
- },
- {
- "name": "gene_abundance_estimation",
- "type": "gtf"
- },
- {
- "name": "coverage",
- "type": "gtf"
- },
- {
- "name": "exon_expression",
- "type": "tabular"
- },
- {
- "name": "intron_expression",
- "type": "tabular"
- },
- {
- "name": "transcript_expression",
- "type": "tabular"
- },
- {
- "name": "exon_transcript_mapping",
- "type": "tabular"
- },
- {
- "name": "intron_transcript_mapping",
- "type": "tabular"
- },
- {
- "name": "gene_counts",
- "type": "tabular"
- },
- {
- "name": "transcript_counts",
- "type": "tabular"
- },
- {
- "name": "legend",
- "type": "tabular"
- }
- ],
- "position": {
- "left": 670,
- "top": 250
- },
- "post_job_actions": {},
- "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/stringtie/stringtie/1.3.3.2",
- "tool_shed_repository": {
- "changeset_revision": "eafd5dc95228",
- "name": "stringtie",
- "owner": "iuc",
- "tool_shed": "toolshed.g2.bx.psu.edu"
- },
- "tool_state": "{\"adv\": \"{\\\"min_bundle_cov\\\": \\\"2\\\", \\\"min_tlen\\\": \\\"200\\\", \\\"bdist\\\": \\\"50\\\", \\\"multi_mapping\\\": \\\"false\\\", \\\"abundance_estimation\\\": \\\"false\\\", \\\"fraction\\\": \\\"0.15\\\", \\\"disable_trimming\\\": \\\"false\\\", \\\"omit_sequences\\\": \\\"\\\", \\\"name_prefix\\\": \\\"\\\", \\\"min_anchor_len\\\": \\\"10\\\", \\\"bundle_fraction\\\": \\\"0.95\\\", \\\"min_anchor_cov\\\": \\\"1\\\"}\", \"__page__\": null, \"chromInfo\": \"\\\"/cvmfs/data.galaxyproject.org/managed/len/ucsc/mm10.len\\\"\", \"__rerun_remap_job_id__\": null, \"__workflow_invocation_uuid__\": \"\\\"fde8952eb77711e8a468005056ba55fb\\\"\", \"rna_strandness\": \"\\\"--fr\\\"\", \"input_bam\": \"null\", \"guide\": \"{\\\"use_guide\\\": \\\"no\\\", \\\"__current_case__\\\": 0}\"}",
- "tool_version": "1.3.3.2",
- "type": "tool",
- "uuid": "4c15f586-8fe3-4dca-94fe-2ca0205ec76e",
- "workflow_outputs": []
- },
- "32": {
- "annotation": "",
- "content_id": "toolshed.g2.bx.psu.edu/repos/bgruening/deeptools_bam_coverage/deeptools_bam_coverage/3.0.2.0",
- "errors": null,
- "id": 32,
- "input_connections": {
- "bamInput": {
- "id": 23,
- "output_name": "output_alignments"
- }
- },
- "inputs": [],
- "label": null,
- "name": "bamCoverage",
- "outputs": [
- {
- "name": "outFileName",
- "type": "bigwig"
- }
- ],
- "position": {
- "left": 670,
- "top": 730
- },
- "post_job_actions": {},
- "tool_id": "toolshed.g2.bx.psu.edu/repos/bgruening/deeptools_bam_coverage/deeptools_bam_coverage/3.0.2.0",
- "tool_shed_repository": {
- "changeset_revision": "3033c3fba046",
- "name": "deeptools_bam_coverage",
- "owner": "bgruening",
- "tool_shed": "toolshed.g2.bx.psu.edu"
- },
- "tool_state": "{\"__page__\": null, \"outFileFormat\": \"\\\"bigwig\\\"\", \"region\": \"\\\"\\\"\", \"bamInput\": \"null\", \"binSize\": \"\\\"1\\\"\", \"scaling\": \"{\\\"effectiveGenomeSize\\\": {\\\"effectiveGenomeSize_opt\\\": \\\"2304947926\\\", \\\"__current_case__\\\": 6}, \\\"type\\\": \\\"1x\\\", \\\"__current_case__\\\": 4}\", \"advancedOpt\": \"{\\\"ignoreDuplicates\\\": \\\"false\\\", \\\"centerReads\\\": \\\"false\\\", \\\"ignoreForNormalization\\\": \\\"\\\", \\\"minFragmentLength\\\": \\\"0\\\", \\\"minMappingQuality\\\": \\\"1\\\", \\\"MNase\\\": \\\"false\\\", \\\"samFlagInclude\\\": \\\"\\\", \\\"filterRNAstrand\\\": \\\"forward\\\", \\\"Offset\\\": \\\"\\\", \\\"smoothLength\\\": \\\"\\\", \\\"showAdvancedOpt\\\": \\\"yes\\\", \\\"__current_case__\\\": 1, \\\"samFlagExclude\\\": \\\"\\\", \\\"doExtendCustom\\\": {\\\"__current_case__\\\": 0, \\\"doExtend\\\": \\\"no\\\"}, \\\"skipNAs\\\": \\\"false\\\", \\\"scaleFactor\\\": \\\"1.0\\\", \\\"blackListFileName\\\": null, \\\"maxFragmentLength\\\": \\\"0\\\"}\", \"chromInfo\": \"\\\"/cvmfs/data.galaxyproject.org/managed/len/ucsc/mm10.len\\\"\", \"__rerun_remap_job_id__\": null}",
- "tool_version": "3.0.2.0",
- "type": "tool",
- "uuid": "9a7a8ac0-a980-4b19-91d8-a775af979957",
- "workflow_outputs": []
- },
- "33": {
- "annotation": "",
- "content_id": "toolshed.g2.bx.psu.edu/repos/bgruening/deeptools_bam_coverage/deeptools_bam_coverage/3.0.2.0",
- "errors": null,
- "id": 33,
- "input_connections": {
- "bamInput": {
- "id": 23,
- "output_name": "output_alignments"
- }
- },
- "inputs": [],
- "label": null,
- "name": "bamCoverage",
- "outputs": [
- {
- "name": "outFileName",
- "type": "bigwig"
- }
- ],
- "position": {
- "left": 670,
- "top": 1210
- },
- "post_job_actions": {},
- "tool_id": "toolshed.g2.bx.psu.edu/repos/bgruening/deeptools_bam_coverage/deeptools_bam_coverage/3.0.2.0",
- "tool_shed_repository": {
- "changeset_revision": "3033c3fba046",
- "name": "deeptools_bam_coverage",
- "owner": "bgruening",
- "tool_shed": "toolshed.g2.bx.psu.edu"
- },
- "tool_state": "{\"__page__\": null, \"outFileFormat\": \"\\\"bigwig\\\"\", \"region\": \"\\\"\\\"\", \"bamInput\": \"null\", \"binSize\": \"\\\"1\\\"\", \"scaling\": \"{\\\"effectiveGenomeSize\\\": {\\\"effectiveGenomeSize_opt\\\": \\\"2304947926\\\", \\\"__current_case__\\\": 6}, \\\"type\\\": \\\"1x\\\", \\\"__current_case__\\\": 4}\", \"advancedOpt\": \"{\\\"ignoreDuplicates\\\": \\\"false\\\", \\\"centerReads\\\": \\\"false\\\", \\\"ignoreForNormalization\\\": \\\"\\\", \\\"minFragmentLength\\\": \\\"0\\\", \\\"minMappingQuality\\\": \\\"1\\\", \\\"MNase\\\": \\\"false\\\", \\\"samFlagInclude\\\": \\\"\\\", \\\"filterRNAstrand\\\": \\\"reverse\\\", \\\"Offset\\\": \\\"\\\", \\\"smoothLength\\\": \\\"\\\", \\\"showAdvancedOpt\\\": \\\"yes\\\", \\\"__current_case__\\\": 1, \\\"samFlagExclude\\\": \\\"\\\", \\\"doExtendCustom\\\": {\\\"__current_case__\\\": 0, \\\"doExtend\\\": \\\"no\\\"}, \\\"skipNAs\\\": \\\"false\\\", \\\"scaleFactor\\\": \\\"1.0\\\", \\\"blackListFileName\\\": null, \\\"maxFragmentLength\\\": \\\"0\\\"}\", \"chromInfo\": \"\\\"/cvmfs/data.galaxyproject.org/managed/len/ucsc/mm10.len\\\"\", \"__rerun_remap_job_id__\": null}",
- "tool_version": "3.0.2.0",
- "type": "tool",
- "uuid": "cac5762b-01b2-492e-ab4a-ed6ca63b65de",
- "workflow_outputs": []
- },
- "34": {
- "annotation": "",
- "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/stringtie/stringtie/1.3.3.2",
- "errors": null,
- "id": 34,
- "input_connections": {
- "input_bam": {
- "id": 24,
- "output_name": "output_alignments"
- }
- },
- "inputs": [],
- "label": null,
- "name": "StringTie",
- "outputs": [
- {
- "name": "output_gtf",
- "type": "gtf"
- },
- {
- "name": "gene_abundance_estimation",
- "type": "gtf"
- },
- {
- "name": "coverage",
- "type": "gtf"
- },
- {
- "name": "exon_expression",
- "type": "tabular"
- },
- {
- "name": "intron_expression",
- "type": "tabular"
- },
- {
- "name": "transcript_expression",
- "type": "tabular"
- },
- {
- "name": "exon_transcript_mapping",
- "type": "tabular"
- },
- {
- "name": "intron_transcript_mapping",
- "type": "tabular"
- },
- {
- "name": "gene_counts",
- "type": "tabular"
- },
- {
- "name": "transcript_counts",
- "type": "tabular"
- },
- {
- "name": "legend",
- "type": "tabular"
- }
- ],
- "position": {
- "left": 670,
- "top": 370
- },
- "post_job_actions": {},
- "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/stringtie/stringtie/1.3.3.2",
- "tool_shed_repository": {
- "changeset_revision": "eafd5dc95228",
- "name": "stringtie",
- "owner": "iuc",
- "tool_shed": "toolshed.g2.bx.psu.edu"
- },
- "tool_state": "{\"adv\": \"{\\\"min_bundle_cov\\\": \\\"2\\\", \\\"min_tlen\\\": \\\"200\\\", \\\"bdist\\\": \\\"50\\\", \\\"multi_mapping\\\": \\\"false\\\", \\\"abundance_estimation\\\": \\\"false\\\", \\\"fraction\\\": \\\"0.15\\\", \\\"disable_trimming\\\": \\\"false\\\", \\\"omit_sequences\\\": \\\"\\\", \\\"name_prefix\\\": \\\"\\\", \\\"min_anchor_len\\\": \\\"10\\\", \\\"bundle_fraction\\\": \\\"0.95\\\", \\\"min_anchor_cov\\\": \\\"1\\\"}\", \"__page__\": null, \"chromInfo\": \"\\\"/cvmfs/data.galaxyproject.org/managed/len/ucsc/mm10.len\\\"\", \"__rerun_remap_job_id__\": null, \"__workflow_invocation_uuid__\": \"\\\"fde8952eb77711e8a468005056ba55fb\\\"\", \"rna_strandness\": \"\\\"--fr\\\"\", \"input_bam\": \"null\", \"guide\": \"{\\\"use_guide\\\": \\\"no\\\", \\\"__current_case__\\\": 0}\"}",
- "tool_version": "1.3.3.2",
- "type": "tool",
- "uuid": "1538c29a-305e-454e-801c-e4501fb326b1",
- "workflow_outputs": []
- },
- "35": {
- "annotation": "",
- "content_id": "toolshed.g2.bx.psu.edu/repos/bgruening/deeptools_bam_coverage/deeptools_bam_coverage/3.0.2.0",
- "errors": null,
- "id": 35,
- "input_connections": {
- "bamInput": {
- "id": 24,
- "output_name": "output_alignments"
- }
- },
- "inputs": [],
- "label": null,
- "name": "bamCoverage",
- "outputs": [
- {
- "name": "outFileName",
- "type": "bigwig"
- }
- ],
- "position": {
- "left": 670,
- "top": 850
- },
- "post_job_actions": {},
- "tool_id": "toolshed.g2.bx.psu.edu/repos/bgruening/deeptools_bam_coverage/deeptools_bam_coverage/3.0.2.0",
- "tool_shed_repository": {
- "changeset_revision": "3033c3fba046",
- "name": "deeptools_bam_coverage",
- "owner": "bgruening",
- "tool_shed": "toolshed.g2.bx.psu.edu"
- },
- "tool_state": "{\"__page__\": null, \"outFileFormat\": \"\\\"bigwig\\\"\", \"region\": \"\\\"\\\"\", \"bamInput\": \"null\", \"binSize\": \"\\\"1\\\"\", \"scaling\": \"{\\\"effectiveGenomeSize\\\": {\\\"effectiveGenomeSize_opt\\\": \\\"2304947926\\\", \\\"__current_case__\\\": 6}, \\\"type\\\": \\\"1x\\\", \\\"__current_case__\\\": 4}\", \"advancedOpt\": \"{\\\"ignoreDuplicates\\\": \\\"false\\\", \\\"centerReads\\\": \\\"false\\\", \\\"ignoreForNormalization\\\": \\\"\\\", \\\"minFragmentLength\\\": \\\"0\\\", \\\"minMappingQuality\\\": \\\"1\\\", \\\"MNase\\\": \\\"false\\\", \\\"samFlagInclude\\\": \\\"\\\", \\\"filterRNAstrand\\\": \\\"forward\\\", \\\"Offset\\\": \\\"\\\", \\\"smoothLength\\\": \\\"\\\", \\\"showAdvancedOpt\\\": \\\"yes\\\", \\\"__current_case__\\\": 1, \\\"samFlagExclude\\\": \\\"\\\", \\\"doExtendCustom\\\": {\\\"__current_case__\\\": 0, \\\"doExtend\\\": \\\"no\\\"}, \\\"skipNAs\\\": \\\"false\\\", \\\"scaleFactor\\\": \\\"1.0\\\", \\\"blackListFileName\\\": null, \\\"maxFragmentLength\\\": \\\"0\\\"}\", \"chromInfo\": \"\\\"/cvmfs/data.galaxyproject.org/managed/len/ucsc/mm10.len\\\"\", \"__rerun_remap_job_id__\": null}",
- "tool_version": "3.0.2.0",
- "type": "tool",
- "uuid": "63c35755-3978-4c47-9063-bd06737ccafc",
- "workflow_outputs": []
- },
- "36": {
- "annotation": "",
- "content_id": "toolshed.g2.bx.psu.edu/repos/bgruening/deeptools_bam_coverage/deeptools_bam_coverage/3.0.2.0",
- "errors": null,
- "id": 36,
- "input_connections": {
- "bamInput": {
- "id": 24,
- "output_name": "output_alignments"
- }
- },
- "inputs": [],
- "label": null,
- "name": "bamCoverage",
- "outputs": [
- {
- "name": "outFileName",
- "type": "bigwig"
- }
- ],
- "position": {
- "left": 670,
- "top": 1330
- },
- "post_job_actions": {},
- "tool_id": "toolshed.g2.bx.psu.edu/repos/bgruening/deeptools_bam_coverage/deeptools_bam_coverage/3.0.2.0",
- "tool_shed_repository": {
- "changeset_revision": "3033c3fba046",
- "name": "deeptools_bam_coverage",
- "owner": "bgruening",
- "tool_shed": "toolshed.g2.bx.psu.edu"
- },
- "tool_state": "{\"__page__\": null, \"outFileFormat\": \"\\\"bigwig\\\"\", \"region\": \"\\\"\\\"\", \"bamInput\": \"null\", \"binSize\": \"\\\"1\\\"\", \"scaling\": \"{\\\"effectiveGenomeSize\\\": {\\\"effectiveGenomeSize_opt\\\": \\\"2304947926\\\", \\\"__current_case__\\\": 6}, \\\"type\\\": \\\"1x\\\", \\\"__current_case__\\\": 4}\", \"advancedOpt\": \"{\\\"ignoreDuplicates\\\": \\\"false\\\", \\\"centerReads\\\": \\\"false\\\", \\\"ignoreForNormalization\\\": \\\"\\\", \\\"minFragmentLength\\\": \\\"0\\\", \\\"minMappingQuality\\\": \\\"1\\\", \\\"MNase\\\": \\\"false\\\", \\\"samFlagInclude\\\": \\\"\\\", \\\"filterRNAstrand\\\": \\\"reverse\\\", \\\"Offset\\\": \\\"\\\", \\\"smoothLength\\\": \\\"\\\", \\\"showAdvancedOpt\\\": \\\"yes\\\", \\\"__current_case__\\\": 1, \\\"samFlagExclude\\\": \\\"\\\", \\\"doExtendCustom\\\": {\\\"__current_case__\\\": 0, \\\"doExtend\\\": \\\"no\\\"}, \\\"skipNAs\\\": \\\"false\\\", \\\"scaleFactor\\\": \\\"1.0\\\", \\\"blackListFileName\\\": null, \\\"maxFragmentLength\\\": \\\"0\\\"}\", \"chromInfo\": \"\\\"/cvmfs/data.galaxyproject.org/managed/len/ucsc/mm10.len\\\"\", \"__rerun_remap_job_id__\": null}",
- "tool_version": "3.0.2.0",
- "type": "tool",
- "uuid": "dbe2b8a3-bc6a-461c-8ca3-06b3966edbdd",
- "workflow_outputs": []
- },
- "37": {
- "annotation": "",
- "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/stringtie/stringtie_merge/1.3.3",
- "errors": null,
- "id": 37,
- "input_connections": {
- "guide_gff": {
- "id": 8,
- "output_name": "output"
- },
- "input_gtf": [
- {
- "id": 31,
- "output_name": "output_gtf"
- },
- {
- "id": 34,
- "output_name": "output_gtf"
- },
- {
+ "a_galaxy_workflow": "true",
+ "annotation": "De novo transcriptome reconstruction with RNA-Seq",
+ "format-version": "0.1",
+ "name": "De novo transcriptome reconstruction with RNA-Seq",
+ "steps": {
+ "0": {
+ "annotation": "",
+ "content_id": null,
+ "errors": null,
+ "id": 0,
+ "input_connections": {},
+ "inputs": [
+ {
+ "description": "",
+ "name": "G1E_rep1_forward_read"
+ }
+ ],
+ "label": "G1E_rep1_forward_read",
+ "name": "Input dataset",
+ "outputs": [],
+ "position": {
+ "left": 200,
+ "top": 290
+ },
+ "tool_id": null,
+ "tool_state": "{\"optional\": false}",
+ "tool_version": null,
+ "type": "data_input",
+ "uuid": "1c887335-aab6-4cba-b42c-245709f52480",
+ "workflow_outputs": [
+ {
+ "label": null,
+ "output_name": "output",
+ "uuid": "58b0d678-aab8-4e89-9f22-2638b2b587e9"
+ }
+ ]
+ },
+ "1": {
+ "annotation": "",
+ "content_id": null,
+ "errors": null,
+ "id": 1,
+ "input_connections": {},
+ "inputs": [
+ {
+ "description": "",
+ "name": "G1E_rep1_reverse_read"
+ }
+ ],
+ "label": "G1E_rep1_reverse_read",
+ "name": "Input dataset",
+ "outputs": [],
+ "position": {
+ "left": 200,
+ "top": 419
+ },
+ "tool_id": null,
+ "tool_state": "{\"optional\": false}",
+ "tool_version": null,
+ "type": "data_input",
+ "uuid": "3722470f-b8c8-4def-9cb1-8a151b4013e7",
+ "workflow_outputs": [
+ {
+ "label": null,
+ "output_name": "output",
+ "uuid": "8cfef7f7-d925-4f92-bb80-d5ec340f0524"
+ }
+ ]
+ },
+ "2": {
+ "annotation": "",
+ "content_id": null,
+ "errors": null,
+ "id": 2,
+ "input_connections": {},
+ "inputs": [
+ {
+ "description": "",
+ "name": "G1E_rep2_forward_read"
+ }
+ ],
+ "label": "G1E_rep2_forward_read",
+ "name": "Input dataset",
+ "outputs": [],
+ "position": {
+ "left": 200,
+ "top": 548
+ },
+ "tool_id": null,
+ "tool_state": "{\"optional\": false}",
+ "tool_version": null,
+ "type": "data_input",
+ "uuid": "4e1331e5-07c7-4156-8b13-5f2622bbc01e",
+ "workflow_outputs": [
+ {
+ "label": null,
+ "output_name": "output",
+ "uuid": "c843315e-60a3-46a7-ae29-ed9d97270cff"
+ }
+ ]
+ },
+ "3": {
+ "annotation": "",
+ "content_id": null,
+ "errors": null,
+ "id": 3,
+ "input_connections": {},
+ "inputs": [
+ {
+ "description": "",
+ "name": "G1E_rep2_reverse_read"
+ }
+ ],
+ "label": "G1E_rep2_reverse_read",
+ "name": "Input dataset",
+ "outputs": [],
+ "position": {
+ "left": 200,
+ "top": 677
+ },
+ "tool_id": null,
+ "tool_state": "{\"optional\": false}",
+ "tool_version": null,
+ "type": "data_input",
+ "uuid": "06814797-9497-436e-bddc-82b22dd2873c",
+ "workflow_outputs": [
+ {
+ "label": null,
+ "output_name": "output",
+ "uuid": "5fd7967a-4a00-4d7b-9a62-7a8b0cf0a478"
+ }
+ ]
+ },
+ "4": {
+ "annotation": "",
+ "content_id": null,
+ "errors": null,
+ "id": 4,
+ "input_connections": {},
+ "inputs": [
+ {
+ "description": "",
+ "name": "Megakaryocyte_rep1_forward_read"
+ }
+ ],
+ "label": "Megakaryocyte_rep1_forward_read",
+ "name": "Input dataset",
+ "outputs": [],
+ "position": {
+ "left": 200,
+ "top": 806
+ },
+ "tool_id": null,
+ "tool_state": "{\"optional\": false}",
+ "tool_version": null,
+ "type": "data_input",
+ "uuid": "92aa4ae0-48ec-4bf3-a67d-f6ec80df2684",
+ "workflow_outputs": [
+ {
+ "label": null,
+ "output_name": "output",
+ "uuid": "2a6f1fbe-2895-4083-85f8-93237ecce594"
+ }
+ ]
+ },
+ "5": {
+ "annotation": "",
+ "content_id": null,
+ "errors": null,
+ "id": 5,
+ "input_connections": {},
+ "inputs": [
+ {
+ "description": "",
+ "name": "Megakaryocyte_rep1_reverse_read"
+ }
+ ],
+ "label": "Megakaryocyte_rep1_reverse_read",
+ "name": "Input dataset",
+ "outputs": [],
+ "position": {
+ "left": 200,
+ "top": 935
+ },
+ "tool_id": null,
+ "tool_state": "{\"optional\": false}",
+ "tool_version": null,
+ "type": "data_input",
+ "uuid": "997ad557-f4ad-4438-bf9b-5febe42df714",
+ "workflow_outputs": [
+ {
+ "label": null,
+ "output_name": "output",
+ "uuid": "2c37da60-ac5e-4149-8905-53be33257156"
+ }
+ ]
+ },
+ "6": {
+ "annotation": "",
+ "content_id": null,
+ "errors": null,
+ "id": 6,
+ "input_connections": {},
+ "inputs": [
+ {
+ "description": "",
+ "name": "Megakaryocyte_rep2_forward_read"
+ }
+ ],
+ "label": "Megakaryocyte_rep2_forward_read",
+ "name": "Input dataset",
+ "outputs": [],
+ "position": {
+ "left": 200,
+ "top": 1064
+ },
+ "tool_id": null,
+ "tool_state": "{\"optional\": false}",
+ "tool_version": null,
+ "type": "data_input",
+ "uuid": "724e4a6c-1f6c-493e-b994-7a2a2978c675",
+ "workflow_outputs": [
+ {
+ "label": null,
+ "output_name": "output",
+ "uuid": "9edf754e-c3d8-499b-9269-4686133c640f"
+ }
+ ]
+ },
+ "7": {
+ "annotation": "",
+ "content_id": null,
+ "errors": null,
+ "id": 7,
+ "input_connections": {},
+ "inputs": [
+ {
+ "description": "",
+ "name": "Megakaryocyte_rep2_reverse_read"
+ }
+ ],
+ "label": "Megakaryocyte_rep2_reverse_read",
+ "name": "Input dataset",
+ "outputs": [],
+ "position": {
+ "left": 200,
+ "top": 1193
+ },
+ "tool_id": null,
+ "tool_state": "{\"optional\": false}",
+ "tool_version": null,
+ "type": "data_input",
+ "uuid": "5a3c7b9a-ed6b-434e-b8b4-a001229ea40c",
+ "workflow_outputs": [
+ {
+ "label": null,
+ "output_name": "output",
+ "uuid": "1fc42c4e-b960-44cd-b1e5-600341e1988f"
+ }
+ ]
+ },
+ "8": {
+ "annotation": "",
+ "content_id": null,
+ "errors": null,
+ "id": 8,
+ "input_connections": {},
+ "inputs": [
+ {
+ "description": "",
+ "name": "RefSeq_reference_GTF"
+ }
+ ],
+ "label": "RefSeq_reference_GTF",
+ "name": "Input dataset",
+ "outputs": [],
+ "position": {
+ "left": 200,
+ "top": 1322
+ },
+ "tool_id": null,
+ "tool_state": "{\"optional\": false}",
+ "tool_version": null,
+ "type": "data_input",
+ "uuid": "e828db47-2ad6-471d-b20e-a12b417adf5f",
+ "workflow_outputs": [
+ {
+ "label": null,
+ "output_name": "output",
+ "uuid": "8bdd01c7-48df-41f1-aaf1-87f318e4e380"
+ }
+ ]
+ },
+ "9": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/devteam/fastqc/fastqc/0.72+galaxy1",
+ "errors": null,
+ "id": 9,
+ "input_connections": {
+ "input_file": {
+ "id": 0,
+ "output_name": "output"
+ }
+ },
+ "inputs": [
+ {
+ "description": "runtime parameter for tool FastQC",
+ "name": "limits"
+ },
+ {
+ "description": "runtime parameter for tool FastQC",
+ "name": "contaminants"
+ },
+ {
+ "description": "runtime parameter for tool FastQC",
+ "name": "adapters"
+ }
+ ],
+ "label": null,
+ "name": "FastQC",
+ "outputs": [
+ {
+ "name": "html_file",
+ "type": "html"
+ },
+ {
+ "name": "text_file",
+ "type": "txt"
+ }
+ ],
+ "position": {
+ "left": 450,
+ "top": 290
+ },
+ "post_job_actions": {
+ "HideDatasetActionhtml_file": {
+ "action_arguments": {},
+ "action_type": "HideDatasetAction",
+ "output_name": "html_file"
+ },
+ "HideDatasetActiontext_file": {
+ "action_arguments": {},
+ "action_type": "HideDatasetAction",
+ "output_name": "text_file"
+ }
+ },
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/fastqc/fastqc/0.72+galaxy1",
+ "tool_shed_repository": {
+ "changeset_revision": "e7b2202befea",
+ "name": "fastqc",
+ "owner": "devteam",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"min_length\": \"\", \"kmers\": \"7\", \"limits\": {\"__class__\": \"RuntimeValue\"}, \"input_file\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null, \"contaminants\": {\"__class__\": \"RuntimeValue\"}, \"adapters\": {\"__class__\": \"RuntimeValue\"}, \"nogroup\": \"false\"}",
+ "tool_version": "0.72+galaxy1",
+ "type": "tool",
+ "uuid": "6d2ea23f-a8a9-499b-929c-47fce09e47a8",
+ "workflow_outputs": []
+ },
+ "10": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/devteam/fastqc/fastqc/0.72+galaxy1",
+ "errors": null,
+ "id": 10,
+ "input_connections": {
+ "input_file": {
+ "id": 1,
+ "output_name": "output"
+ }
+ },
+ "inputs": [
+ {
+ "description": "runtime parameter for tool FastQC",
+ "name": "limits"
+ },
+ {
+ "description": "runtime parameter for tool FastQC",
+ "name": "contaminants"
+ },
+ {
+ "description": "runtime parameter for tool FastQC",
+ "name": "adapters"
+ }
+ ],
+ "label": null,
+ "name": "FastQC",
+ "outputs": [
+ {
+ "name": "html_file",
+ "type": "html"
+ },
+ {
+ "name": "text_file",
+ "type": "txt"
+ }
+ ],
+ "position": {
+ "left": 450,
+ "top": 590
+ },
+ "post_job_actions": {
+ "HideDatasetActionhtml_file": {
+ "action_arguments": {},
+ "action_type": "HideDatasetAction",
+ "output_name": "html_file"
+ },
+ "HideDatasetActiontext_file": {
+ "action_arguments": {},
+ "action_type": "HideDatasetAction",
+ "output_name": "text_file"
+ }
+ },
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/fastqc/fastqc/0.72+galaxy1",
+ "tool_shed_repository": {
+ "changeset_revision": "e7b2202befea",
+ "name": "fastqc",
+ "owner": "devteam",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"min_length\": \"\", \"kmers\": \"7\", \"limits\": {\"__class__\": \"RuntimeValue\"}, \"input_file\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null, \"contaminants\": {\"__class__\": \"RuntimeValue\"}, \"adapters\": {\"__class__\": \"RuntimeValue\"}, \"nogroup\": \"false\"}",
+ "tool_version": "0.72+galaxy1",
+ "type": "tool",
+ "uuid": "f281aa57-2179-4e83-964c-d55dc7604459",
+ "workflow_outputs": []
+ },
+ "11": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/pjbriggs/trimmomatic/trimmomatic/0.38.0",
+ "errors": null,
+ "id": 11,
+ "input_connections": {
+ "readtype|fastq_r1_in": {
+ "id": 0,
+ "output_name": "output"
+ },
+ "readtype|fastq_r2_in": {
+ "id": 1,
+ "output_name": "output"
+ }
+ },
+ "inputs": [],
+ "label": null,
+ "name": "Trimmomatic",
+ "outputs": [
+ {
+ "name": "fastq_out_r1_paired",
+ "type": "input"
+ },
+ {
+ "name": "fastq_out_r2_paired",
+ "type": "input"
+ },
+ {
+ "name": "fastq_out_r1_unpaired",
+ "type": "input"
+ },
+ {
+ "name": "fastq_out_r2_unpaired",
+ "type": "input"
+ }
+ ],
+ "position": {
+ "left": 450,
+ "top": 890
+ },
+ "post_job_actions": {
+ "HideDatasetActionfastq_out_r1_paired": {
+ "action_arguments": {},
+ "action_type": "HideDatasetAction",
+ "output_name": "fastq_out_r1_paired"
+ },
+ "HideDatasetActionfastq_out_r1_unpaired": {
+ "action_arguments": {},
+ "action_type": "HideDatasetAction",
+ "output_name": "fastq_out_r1_unpaired"
+ },
+ "HideDatasetActionfastq_out_r2_paired": {
+ "action_arguments": {},
+ "action_type": "HideDatasetAction",
+ "output_name": "fastq_out_r2_paired"
+ },
+ "HideDatasetActionfastq_out_r2_unpaired": {
+ "action_arguments": {},
+ "action_type": "HideDatasetAction",
+ "output_name": "fastq_out_r2_unpaired"
+ }
+ },
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/pjbriggs/trimmomatic/trimmomatic/0.38.0",
+ "tool_shed_repository": {
+ "changeset_revision": "898b67846b47",
+ "name": "trimmomatic",
+ "owner": "pjbriggs",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"operations\": [{\"__index__\": 0, \"operation\": {\"window_size\": \"4\", \"name\": \"SLIDINGWINDOW\", \"__current_case__\": 0, \"required_quality\": \"20\"}}], \"__page__\": null, \"output_logs\": \"false\", \"output_err\": \"false\", \"__rerun_remap_job_id__\": null, \"readtype\": {\"single_or_paired\": \"pair_of_files\", \"fastq_r1_in\": {\"__class__\": \"ConnectedValue\"}, \"__current_case__\": 1, \"fastq_r2_in\": {\"__class__\": \"ConnectedValue\"}}, \"illuminaclip\": {\"do_illuminaclip\": \"false\", \"__current_case__\": 1}}",
+ "tool_version": "0.38.0",
+ "type": "tool",
+ "uuid": "16c06f57-e6d0-4c25-97be-d65076624070",
+ "workflow_outputs": []
+ },
+ "12": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/devteam/fastqc/fastqc/0.72+galaxy1",
+ "errors": null,
+ "id": 12,
+ "input_connections": {
+ "input_file": {
+ "id": 2,
+ "output_name": "output"
+ }
+ },
+ "inputs": [
+ {
+ "description": "runtime parameter for tool FastQC",
+ "name": "limits"
+ },
+ {
+ "description": "runtime parameter for tool FastQC",
+ "name": "contaminants"
+ },
+ {
+ "description": "runtime parameter for tool FastQC",
+ "name": "adapters"
+ }
+ ],
+ "label": null,
+ "name": "FastQC",
+ "outputs": [
+ {
+ "name": "html_file",
+ "type": "html"
+ },
+ {
+ "name": "text_file",
+ "type": "txt"
+ }
+ ],
+ "position": {
+ "left": 450,
+ "top": 1150
+ },
+ "post_job_actions": {
+ "HideDatasetActionhtml_file": {
+ "action_arguments": {},
+ "action_type": "HideDatasetAction",
+ "output_name": "html_file"
+ },
+ "HideDatasetActiontext_file": {
+ "action_arguments": {},
+ "action_type": "HideDatasetAction",
+ "output_name": "text_file"
+ }
+ },
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/fastqc/fastqc/0.72+galaxy1",
+ "tool_shed_repository": {
+ "changeset_revision": "e7b2202befea",
+ "name": "fastqc",
+ "owner": "devteam",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"min_length\": \"\", \"kmers\": \"7\", \"limits\": {\"__class__\": \"RuntimeValue\"}, \"input_file\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null, \"contaminants\": {\"__class__\": \"RuntimeValue\"}, \"adapters\": {\"__class__\": \"RuntimeValue\"}, \"nogroup\": \"false\"}",
+ "tool_version": "0.72+galaxy1",
+ "type": "tool",
+ "uuid": "05a92089-56b4-4a06-8220-36007afe9d57",
+ "workflow_outputs": []
+ },
+ "13": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/devteam/fastqc/fastqc/0.72+galaxy1",
+ "errors": null,
+ "id": 13,
+ "input_connections": {
+ "input_file": {
+ "id": 3,
+ "output_name": "output"
+ }
+ },
+ "inputs": [
+ {
+ "description": "runtime parameter for tool FastQC",
+ "name": "limits"
+ },
+ {
+ "description": "runtime parameter for tool FastQC",
+ "name": "contaminants"
+ },
+ {
+ "description": "runtime parameter for tool FastQC",
+ "name": "adapters"
+ }
+ ],
+ "label": null,
+ "name": "FastQC",
+ "outputs": [
+ {
+ "name": "html_file",
+ "type": "html"
+ },
+ {
+ "name": "text_file",
+ "type": "txt"
+ }
+ ],
+ "position": {
+ "left": 450,
+ "top": 1450
+ },
+ "post_job_actions": {
+ "HideDatasetActionhtml_file": {
+ "action_arguments": {},
+ "action_type": "HideDatasetAction",
+ "output_name": "html_file"
+ },
+ "HideDatasetActiontext_file": {
+ "action_arguments": {},
+ "action_type": "HideDatasetAction",
+ "output_name": "text_file"
+ }
+ },
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/fastqc/fastqc/0.72+galaxy1",
+ "tool_shed_repository": {
+ "changeset_revision": "e7b2202befea",
+ "name": "fastqc",
+ "owner": "devteam",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"min_length\": \"\", \"kmers\": \"7\", \"limits\": {\"__class__\": \"RuntimeValue\"}, \"input_file\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null, \"contaminants\": {\"__class__\": \"RuntimeValue\"}, \"adapters\": {\"__class__\": \"RuntimeValue\"}, \"nogroup\": \"false\"}",
+ "tool_version": "0.72+galaxy1",
+ "type": "tool",
+ "uuid": "7211e7e0-fefb-474d-8e26-f282d9e886cd",
+ "workflow_outputs": []
+ },
+ "14": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/pjbriggs/trimmomatic/trimmomatic/0.38.0",
+ "errors": null,
+ "id": 14,
+ "input_connections": {
+ "readtype|fastq_r1_in": {
+ "id": 2,
+ "output_name": "output"
+ },
+ "readtype|fastq_r2_in": {
+ "id": 3,
+ "output_name": "output"
+ }
+ },
+ "inputs": [],
+ "label": null,
+ "name": "Trimmomatic",
+ "outputs": [
+ {
+ "name": "fastq_out_r1_paired",
+ "type": "input"
+ },
+ {
+ "name": "fastq_out_r2_paired",
+ "type": "input"
+ },
+ {
+ "name": "fastq_out_r1_unpaired",
+ "type": "input"
+ },
+ {
+ "name": "fastq_out_r2_unpaired",
+ "type": "input"
+ }
+ ],
+ "position": {
+ "left": 450,
+ "top": 1750
+ },
+ "post_job_actions": {
+ "HideDatasetActionfastq_out_r1_paired": {
+ "action_arguments": {},
+ "action_type": "HideDatasetAction",
+ "output_name": "fastq_out_r1_paired"
+ },
+ "HideDatasetActionfastq_out_r1_unpaired": {
+ "action_arguments": {},
+ "action_type": "HideDatasetAction",
+ "output_name": "fastq_out_r1_unpaired"
+ },
+ "HideDatasetActionfastq_out_r2_paired": {
+ "action_arguments": {},
+ "action_type": "HideDatasetAction",
+ "output_name": "fastq_out_r2_paired"
+ },
+ "HideDatasetActionfastq_out_r2_unpaired": {
+ "action_arguments": {},
+ "action_type": "HideDatasetAction",
+ "output_name": "fastq_out_r2_unpaired"
+ }
+ },
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/pjbriggs/trimmomatic/trimmomatic/0.38.0",
+ "tool_shed_repository": {
+ "changeset_revision": "898b67846b47",
+ "name": "trimmomatic",
+ "owner": "pjbriggs",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"operations\": [{\"__index__\": 0, \"operation\": {\"window_size\": \"4\", \"name\": \"SLIDINGWINDOW\", \"__current_case__\": 0, \"required_quality\": \"20\"}}], \"__page__\": null, \"output_logs\": \"false\", \"output_err\": \"false\", \"__rerun_remap_job_id__\": null, \"readtype\": {\"single_or_paired\": \"pair_of_files\", \"fastq_r1_in\": {\"__class__\": \"ConnectedValue\"}, \"__current_case__\": 1, \"fastq_r2_in\": {\"__class__\": \"ConnectedValue\"}}, \"illuminaclip\": {\"do_illuminaclip\": \"false\", \"__current_case__\": 1}}",
+ "tool_version": "0.38.0",
+ "type": "tool",
+ "uuid": "3b018b22-9e7c-408f-b2ac-ca314c51f62b",
+ "workflow_outputs": []
+ },
+ "15": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/devteam/fastqc/fastqc/0.72+galaxy1",
+ "errors": null,
+ "id": 15,
+ "input_connections": {
+ "input_file": {
+ "id": 4,
+ "output_name": "output"
+ }
+ },
+ "inputs": [
+ {
+ "description": "runtime parameter for tool FastQC",
+ "name": "limits"
+ },
+ {
+ "description": "runtime parameter for tool FastQC",
+ "name": "contaminants"
+ },
+ {
+ "description": "runtime parameter for tool FastQC",
+ "name": "adapters"
+ }
+ ],
+ "label": null,
+ "name": "FastQC",
+ "outputs": [
+ {
+ "name": "html_file",
+ "type": "html"
+ },
+ {
+ "name": "text_file",
+ "type": "txt"
+ }
+ ],
+ "position": {
+ "left": 450,
+ "top": 2010
+ },
+ "post_job_actions": {
+ "HideDatasetActionhtml_file": {
+ "action_arguments": {},
+ "action_type": "HideDatasetAction",
+ "output_name": "html_file"
+ },
+ "HideDatasetActiontext_file": {
+ "action_arguments": {},
+ "action_type": "HideDatasetAction",
+ "output_name": "text_file"
+ }
+ },
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/fastqc/fastqc/0.72+galaxy1",
+ "tool_shed_repository": {
+ "changeset_revision": "e7b2202befea",
+ "name": "fastqc",
+ "owner": "devteam",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"min_length\": \"\", \"kmers\": \"7\", \"limits\": {\"__class__\": \"RuntimeValue\"}, \"input_file\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null, \"contaminants\": {\"__class__\": \"RuntimeValue\"}, \"adapters\": {\"__class__\": \"RuntimeValue\"}, \"nogroup\": \"false\"}",
+ "tool_version": "0.72+galaxy1",
+ "type": "tool",
+ "uuid": "d2d8062f-98ac-469c-b204-d5601d3c9a5c",
+ "workflow_outputs": []
+ },
+ "16": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/devteam/fastqc/fastqc/0.72+galaxy1",
+ "errors": null,
+ "id": 16,
+ "input_connections": {
+ "input_file": {
+ "id": 5,
+ "output_name": "output"
+ }
+ },
+ "inputs": [
+ {
+ "description": "runtime parameter for tool FastQC",
+ "name": "limits"
+ },
+ {
+ "description": "runtime parameter for tool FastQC",
+ "name": "contaminants"
+ },
+ {
+ "description": "runtime parameter for tool FastQC",
+ "name": "adapters"
+ }
+ ],
+ "label": null,
+ "name": "FastQC",
+ "outputs": [
+ {
+ "name": "html_file",
+ "type": "html"
+ },
+ {
+ "name": "text_file",
+ "type": "txt"
+ }
+ ],
+ "position": {
+ "left": 450,
+ "top": 2310
+ },
+ "post_job_actions": {
+ "HideDatasetActionhtml_file": {
+ "action_arguments": {},
+ "action_type": "HideDatasetAction",
+ "output_name": "html_file"
+ },
+ "HideDatasetActiontext_file": {
+ "action_arguments": {},
+ "action_type": "HideDatasetAction",
+ "output_name": "text_file"
+ }
+ },
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/fastqc/fastqc/0.72+galaxy1",
+ "tool_shed_repository": {
+ "changeset_revision": "e7b2202befea",
+ "name": "fastqc",
+ "owner": "devteam",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"min_length\": \"\", \"kmers\": \"7\", \"limits\": {\"__class__\": \"RuntimeValue\"}, \"input_file\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null, \"contaminants\": {\"__class__\": \"RuntimeValue\"}, \"adapters\": {\"__class__\": \"RuntimeValue\"}, \"nogroup\": \"false\"}",
+ "tool_version": "0.72+galaxy1",
+ "type": "tool",
+ "uuid": "01813945-e169-49a0-a2c4-11de9cbee79e",
+ "workflow_outputs": []
+ },
+ "17": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/pjbriggs/trimmomatic/trimmomatic/0.38.0",
+ "errors": null,
+ "id": 17,
+ "input_connections": {
+ "readtype|fastq_r1_in": {
+ "id": 4,
+ "output_name": "output"
+ },
+ "readtype|fastq_r2_in": {
+ "id": 5,
+ "output_name": "output"
+ }
+ },
+ "inputs": [],
+ "label": null,
+ "name": "Trimmomatic",
+ "outputs": [
+ {
+ "name": "fastq_out_r1_paired",
+ "type": "input"
+ },
+ {
+ "name": "fastq_out_r2_paired",
+ "type": "input"
+ },
+ {
+ "name": "fastq_out_r1_unpaired",
+ "type": "input"
+ },
+ {
+ "name": "fastq_out_r2_unpaired",
+ "type": "input"
+ }
+ ],
+ "position": {
+ "left": 450,
+ "top": 2610
+ },
+ "post_job_actions": {
+ "HideDatasetActionfastq_out_r1_paired": {
+ "action_arguments": {},
+ "action_type": "HideDatasetAction",
+ "output_name": "fastq_out_r1_paired"
+ },
+ "HideDatasetActionfastq_out_r1_unpaired": {
+ "action_arguments": {},
+ "action_type": "HideDatasetAction",
+ "output_name": "fastq_out_r1_unpaired"
+ },
+ "HideDatasetActionfastq_out_r2_paired": {
+ "action_arguments": {},
+ "action_type": "HideDatasetAction",
+ "output_name": "fastq_out_r2_paired"
+ },
+ "HideDatasetActionfastq_out_r2_unpaired": {
+ "action_arguments": {},
+ "action_type": "HideDatasetAction",
+ "output_name": "fastq_out_r2_unpaired"
+ }
+ },
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/pjbriggs/trimmomatic/trimmomatic/0.38.0",
+ "tool_shed_repository": {
+ "changeset_revision": "898b67846b47",
+ "name": "trimmomatic",
+ "owner": "pjbriggs",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"operations\": [{\"__index__\": 0, \"operation\": {\"window_size\": \"4\", \"name\": \"SLIDINGWINDOW\", \"__current_case__\": 0, \"required_quality\": \"20\"}}], \"__page__\": null, \"output_logs\": \"false\", \"output_err\": \"false\", \"__rerun_remap_job_id__\": null, \"readtype\": {\"single_or_paired\": \"pair_of_files\", \"fastq_r1_in\": {\"__class__\": \"ConnectedValue\"}, \"__current_case__\": 1, \"fastq_r2_in\": {\"__class__\": \"ConnectedValue\"}}, \"illuminaclip\": {\"do_illuminaclip\": \"false\", \"__current_case__\": 1}}",
+ "tool_version": "0.38.0",
+ "type": "tool",
+ "uuid": "7c82667c-816f-4018-8cda-5664cd17194f",
+ "workflow_outputs": []
+ },
+ "18": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/devteam/fastqc/fastqc/0.72+galaxy1",
+ "errors": null,
+ "id": 18,
+ "input_connections": {
+ "input_file": {
+ "id": 6,
+ "output_name": "output"
+ }
+ },
+ "inputs": [
+ {
+ "description": "runtime parameter for tool FastQC",
+ "name": "limits"
+ },
+ {
+ "description": "runtime parameter for tool FastQC",
+ "name": "contaminants"
+ },
+ {
+ "description": "runtime parameter for tool FastQC",
+ "name": "adapters"
+ }
+ ],
+ "label": null,
+ "name": "FastQC",
+ "outputs": [
+ {
+ "name": "html_file",
+ "type": "html"
+ },
+ {
+ "name": "text_file",
+ "type": "txt"
+ }
+ ],
+ "position": {
+ "left": 450,
+ "top": 2870
+ },
+ "post_job_actions": {
+ "HideDatasetActionhtml_file": {
+ "action_arguments": {},
+ "action_type": "HideDatasetAction",
+ "output_name": "html_file"
+ },
+ "HideDatasetActiontext_file": {
+ "action_arguments": {},
+ "action_type": "HideDatasetAction",
+ "output_name": "text_file"
+ }
+ },
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/fastqc/fastqc/0.72+galaxy1",
+ "tool_shed_repository": {
+ "changeset_revision": "e7b2202befea",
+ "name": "fastqc",
+ "owner": "devteam",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"min_length\": \"\", \"kmers\": \"7\", \"limits\": {\"__class__\": \"RuntimeValue\"}, \"input_file\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null, \"contaminants\": {\"__class__\": \"RuntimeValue\"}, \"adapters\": {\"__class__\": \"RuntimeValue\"}, \"nogroup\": \"false\"}",
+ "tool_version": "0.72+galaxy1",
+ "type": "tool",
+ "uuid": "80a6fe76-67da-454b-b8bd-8797236c7946",
+ "workflow_outputs": []
+ },
+ "19": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/devteam/fastqc/fastqc/0.72+galaxy1",
+ "errors": null,
+ "id": 19,
+ "input_connections": {
+ "input_file": {
+ "id": 7,
+ "output_name": "output"
+ }
+ },
+ "inputs": [
+ {
+ "description": "runtime parameter for tool FastQC",
+ "name": "limits"
+ },
+ {
+ "description": "runtime parameter for tool FastQC",
+ "name": "contaminants"
+ },
+ {
+ "description": "runtime parameter for tool FastQC",
+ "name": "adapters"
+ }
+ ],
+ "label": null,
+ "name": "FastQC",
+ "outputs": [
+ {
+ "name": "html_file",
+ "type": "html"
+ },
+ {
+ "name": "text_file",
+ "type": "txt"
+ }
+ ],
+ "position": {
+ "left": 450,
+ "top": 3170
+ },
+ "post_job_actions": {
+ "HideDatasetActionhtml_file": {
+ "action_arguments": {},
+ "action_type": "HideDatasetAction",
+ "output_name": "html_file"
+ },
+ "HideDatasetActiontext_file": {
+ "action_arguments": {},
+ "action_type": "HideDatasetAction",
+ "output_name": "text_file"
+ }
+ },
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/fastqc/fastqc/0.72+galaxy1",
+ "tool_shed_repository": {
+ "changeset_revision": "e7b2202befea",
+ "name": "fastqc",
+ "owner": "devteam",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"min_length\": \"\", \"kmers\": \"7\", \"limits\": {\"__class__\": \"RuntimeValue\"}, \"input_file\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null, \"contaminants\": {\"__class__\": \"RuntimeValue\"}, \"adapters\": {\"__class__\": \"RuntimeValue\"}, \"nogroup\": \"false\"}",
+ "tool_version": "0.72+galaxy1",
+ "type": "tool",
+ "uuid": "331cbef9-727e-4a74-93f2-d3ca9b6197a3",
+ "workflow_outputs": []
+ },
+ "20": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/pjbriggs/trimmomatic/trimmomatic/0.38.0",
+ "errors": null,
+ "id": 20,
+ "input_connections": {
+ "readtype|fastq_r1_in": {
+ "id": 6,
+ "output_name": "output"
+ },
+ "readtype|fastq_r2_in": {
+ "id": 7,
+ "output_name": "output"
+ }
+ },
+ "inputs": [],
+ "label": null,
+ "name": "Trimmomatic",
+ "outputs": [
+ {
+ "name": "fastq_out_r1_paired",
+ "type": "input"
+ },
+ {
+ "name": "fastq_out_r2_paired",
+ "type": "input"
+ },
+ {
+ "name": "fastq_out_r1_unpaired",
+ "type": "input"
+ },
+ {
+ "name": "fastq_out_r2_unpaired",
+ "type": "input"
+ }
+ ],
+ "position": {
+ "left": 450,
+ "top": 3470
+ },
+ "post_job_actions": {
+ "HideDatasetActionfastq_out_r1_paired": {
+ "action_arguments": {},
+ "action_type": "HideDatasetAction",
+ "output_name": "fastq_out_r1_paired"
+ },
+ "HideDatasetActionfastq_out_r1_unpaired": {
+ "action_arguments": {},
+ "action_type": "HideDatasetAction",
+ "output_name": "fastq_out_r1_unpaired"
+ },
+ "HideDatasetActionfastq_out_r2_paired": {
+ "action_arguments": {},
+ "action_type": "HideDatasetAction",
+ "output_name": "fastq_out_r2_paired"
+ },
+ "HideDatasetActionfastq_out_r2_unpaired": {
+ "action_arguments": {},
+ "action_type": "HideDatasetAction",
+ "output_name": "fastq_out_r2_unpaired"
+ }
+ },
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/pjbriggs/trimmomatic/trimmomatic/0.38.0",
+ "tool_shed_repository": {
+ "changeset_revision": "898b67846b47",
+ "name": "trimmomatic",
+ "owner": "pjbriggs",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"operations\": [{\"__index__\": 0, \"operation\": {\"window_size\": \"4\", \"name\": \"SLIDINGWINDOW\", \"__current_case__\": 0, \"required_quality\": \"20\"}}], \"__page__\": null, \"output_logs\": \"false\", \"output_err\": \"false\", \"__rerun_remap_job_id__\": null, \"readtype\": {\"single_or_paired\": \"pair_of_files\", \"fastq_r1_in\": {\"__class__\": \"ConnectedValue\"}, \"__current_case__\": 1, \"fastq_r2_in\": {\"__class__\": \"ConnectedValue\"}}, \"illuminaclip\": {\"do_illuminaclip\": \"false\", \"__current_case__\": 1}}",
+ "tool_version": "0.38.0",
+ "type": "tool",
+ "uuid": "c6cb852e-2ef3-4351-b4d5-10054347dc30",
+ "workflow_outputs": []
+ },
+ "21": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/hisat2/hisat2/2.1.0+galaxy5",
+ "errors": null,
+ "id": 21,
+ "input_connections": {
+ "library|input_1": {
+ "id": 11,
+ "output_name": "fastq_out_r1_paired"
+ },
+ "library|input_2": {
+ "id": 11,
+ "output_name": "fastq_out_r2_paired"
+ }
+ },
+ "inputs": [],
+ "label": null,
+ "name": "HISAT2",
+ "outputs": [
+ {
+ "name": "output_alignments",
+ "type": "bam"
+ }
+ ],
+ "position": {
+ "left": 778,
+ "top": 290
+ },
+ "post_job_actions": {},
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/hisat2/hisat2/2.1.0+galaxy5",
+ "tool_shed_repository": {
+ "changeset_revision": "0c16cad5e03b",
+ "name": "hisat2",
+ "owner": "iuc",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"adv\": {\"alignment_options\": {\"__current_case__\": 0, \"alignment_options_selector\": \"defaults\"}, \"output_options\": {\"output_options_selector\": \"defaults\", \"__current_case__\": 0}, \"other_options\": {\"other_options_selector\": \"defaults\", \"__current_case__\": 0}, \"scoring_options\": {\"scoring_options_selector\": \"defaults\", \"__current_case__\": 0}, \"spliced_options\": {\"coefficient\": \"0.0\", \"canonical_penalty\": \"0\", \"no_spliced_alignment_options\": {\"no_spliced_alignment\": \"\", \"__current_case__\": 1}, \"nc_function_type\": \"C\", \"constant_term\": \"0.0\", \"nc_coefficient\": \"1.0\", \"noncanonical_penalty\": \"3\", \"known_splice_gtf\": {\"__class__\": \"RuntimeValue\"}, \"nc_constant_term\": \"-8.0\", \"min_intron\": \"20\", \"function_type\": \"C\", \"__current_case__\": 1, \"notmplen\": \"false\", \"tma\": \"--dta\", \"max_intron\": \"500000\", \"spliced_options_selector\": \"advanced\", \"novel_splicesite_outfile\": \"false\"}, \"reporting_options\": {\"reporting_options_selector\": \"defaults\", \"__current_case__\": 0}, \"input_options\": {\"input_options_selector\": \"defaults\", \"__current_case__\": 0}}, \"__page__\": null, \"sum\": {\"new_summary\": \"false\", \"summary_file\": \"false\"}, \"library\": {\"rna_strandness\": \"FR\", \"input_2\": {\"__class__\": \"ConnectedValue\"}, \"__current_case__\": 1, \"input_1\": {\"__class__\": \"ConnectedValue\"}, \"type\": \"paired\", \"paired_options\": {\"paired_options_selector\": \"defaults\", \"__current_case__\": 0}}, \"reference_genome\": {\"source\": \"indexed\", \"__current_case__\": 0, \"index\": \"mm10\"}, \"__rerun_remap_job_id__\": null}",
+ "tool_version": "2.1.0+galaxy5",
+ "type": "tool",
+ "uuid": "47e82c2c-8015-4f9b-b52e-744dfa71663b",
+ "workflow_outputs": [
+ {
+ "label": null,
+ "output_name": "output_alignments",
+ "uuid": "5bb58c35-9471-4d5d-8641-8ed9ff17f2bc"
+ }
+ ]
+ },
+ "22": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/devteam/fastqc/fastqc/0.72+galaxy1",
+ "errors": null,
+ "id": 22,
+ "input_connections": {
+ "input_file": {
+ "id": 11,
+ "output_name": "fastq_out_r1_paired"
+ }
+ },
+ "inputs": [
+ {
+ "description": "runtime parameter for tool FastQC",
+ "name": "limits"
+ },
+ {
+ "description": "runtime parameter for tool FastQC",
+ "name": "contaminants"
+ },
+ {
+ "description": "runtime parameter for tool FastQC",
+ "name": "adapters"
+ }
+ ],
+ "label": null,
+ "name": "FastQC",
+ "outputs": [
+ {
+ "name": "html_file",
+ "type": "html"
+ },
+ {
+ "name": "text_file",
+ "type": "txt"
+ }
+ ],
+ "position": {
+ "left": 778,
+ "top": 1090
+ },
+ "post_job_actions": {},
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/fastqc/fastqc/0.72+galaxy1",
+ "tool_shed_repository": {
+ "changeset_revision": "e7b2202befea",
+ "name": "fastqc",
+ "owner": "devteam",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"min_length\": \"\", \"kmers\": \"7\", \"limits\": {\"__class__\": \"RuntimeValue\"}, \"input_file\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null, \"contaminants\": {\"__class__\": \"RuntimeValue\"}, \"adapters\": {\"__class__\": \"RuntimeValue\"}, \"nogroup\": \"false\"}",
+ "tool_version": "0.72+galaxy1",
+ "type": "tool",
+ "uuid": "cf6c24f8-d0da-4ee3-b2b6-35fe2b713cdc",
+ "workflow_outputs": [
+ {
+ "label": null,
+ "output_name": "html_file",
+ "uuid": "41b1e0fe-cc76-4c56-a314-e605644ec7c8"
+ },
+ {
+ "label": null,
+ "output_name": "text_file",
+ "uuid": "b45e4668-08bb-4357-b365-3d3fc0192cd2"
+ }
+ ]
+ },
+ "23": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/devteam/fastqc/fastqc/0.72+galaxy1",
+ "errors": null,
+ "id": 23,
+ "input_connections": {
+ "input_file": {
+ "id": 11,
+ "output_name": "fastq_out_r2_paired"
+ }
+ },
+ "inputs": [
+ {
+ "description": "runtime parameter for tool FastQC",
+ "name": "limits"
+ },
+ {
+ "description": "runtime parameter for tool FastQC",
+ "name": "contaminants"
+ },
+ {
+ "description": "runtime parameter for tool FastQC",
+ "name": "adapters"
+ }
+ ],
+ "label": null,
+ "name": "FastQC",
+ "outputs": [
+ {
+ "name": "html_file",
+ "type": "html"
+ },
+ {
+ "name": "text_file",
+ "type": "txt"
+ }
+ ],
+ "position": {
+ "left": 778,
+ "top": 1390
+ },
+ "post_job_actions": {},
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/fastqc/fastqc/0.72+galaxy1",
+ "tool_shed_repository": {
+ "changeset_revision": "e7b2202befea",
+ "name": "fastqc",
+ "owner": "devteam",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"min_length\": \"\", \"kmers\": \"7\", \"limits\": {\"__class__\": \"RuntimeValue\"}, \"input_file\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null, \"contaminants\": {\"__class__\": \"RuntimeValue\"}, \"adapters\": {\"__class__\": \"RuntimeValue\"}, \"nogroup\": \"false\"}",
+ "tool_version": "0.72+galaxy1",
+ "type": "tool",
+ "uuid": "c768bbe0-3965-4da4-b049-1ddd225d2f7e",
+ "workflow_outputs": [
+ {
+ "label": null,
+ "output_name": "html_file",
+ "uuid": "93faa429-8ff4-412a-9092-754e44a122a5"
+ },
+ {
+ "label": null,
+ "output_name": "text_file",
+ "uuid": "d70e0a34-5f4d-48e2-bced-272445267269"
+ }
+ ]
+ },
+ "24": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/hisat2/hisat2/2.1.0+galaxy5",
+ "errors": null,
+ "id": 24,
+ "input_connections": {
+ "library|input_1": {
+ "id": 14,
+ "output_name": "fastq_out_r1_paired"
+ },
+ "library|input_2": {
+ "id": 14,
+ "output_name": "fastq_out_r2_paired"
+ }
+ },
+ "inputs": [],
+ "label": null,
+ "name": "HISAT2",
+ "outputs": [
+ {
+ "name": "output_alignments",
+ "type": "bam"
+ }
+ ],
+ "position": {
+ "left": 778,
+ "top": 490
+ },
+ "post_job_actions": {},
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/hisat2/hisat2/2.1.0+galaxy5",
+ "tool_shed_repository": {
+ "changeset_revision": "0c16cad5e03b",
+ "name": "hisat2",
+ "owner": "iuc",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"adv\": {\"alignment_options\": {\"__current_case__\": 0, \"alignment_options_selector\": \"defaults\"}, \"output_options\": {\"output_options_selector\": \"defaults\", \"__current_case__\": 0}, \"other_options\": {\"other_options_selector\": \"defaults\", \"__current_case__\": 0}, \"scoring_options\": {\"scoring_options_selector\": \"defaults\", \"__current_case__\": 0}, \"spliced_options\": {\"coefficient\": \"0.0\", \"canonical_penalty\": \"0\", \"no_spliced_alignment_options\": {\"no_spliced_alignment\": \"\", \"__current_case__\": 1}, \"nc_function_type\": \"C\", \"constant_term\": \"0.0\", \"nc_coefficient\": \"1.0\", \"noncanonical_penalty\": \"3\", \"known_splice_gtf\": {\"__class__\": \"RuntimeValue\"}, \"nc_constant_term\": \"-8.0\", \"min_intron\": \"20\", \"function_type\": \"C\", \"__current_case__\": 1, \"notmplen\": \"false\", \"tma\": \"--dta\", \"max_intron\": \"500000\", \"spliced_options_selector\": \"advanced\", \"novel_splicesite_outfile\": \"false\"}, \"reporting_options\": {\"reporting_options_selector\": \"defaults\", \"__current_case__\": 0}, \"input_options\": {\"input_options_selector\": \"defaults\", \"__current_case__\": 0}}, \"__page__\": null, \"sum\": {\"new_summary\": \"false\", \"summary_file\": \"false\"}, \"library\": {\"rna_strandness\": \"FR\", \"input_2\": {\"__class__\": \"ConnectedValue\"}, \"__current_case__\": 1, \"input_1\": {\"__class__\": \"ConnectedValue\"}, \"type\": \"paired\", \"paired_options\": {\"paired_options_selector\": \"defaults\", \"__current_case__\": 0}}, \"reference_genome\": {\"source\": \"indexed\", \"__current_case__\": 0, \"index\": \"mm10\"}, \"__rerun_remap_job_id__\": null}",
+ "tool_version": "2.1.0+galaxy5",
+ "type": "tool",
+ "uuid": "7ced6c99-3501-4f2e-9109-30b1db74d056",
+ "workflow_outputs": [
+ {
+ "label": null,
+ "output_name": "output_alignments",
+ "uuid": "fb406cfe-34be-4c6a-be06-c5efd6f785ba"
+ }
+ ]
+ },
+ "25": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/devteam/fastqc/fastqc/0.72+galaxy1",
+ "errors": null,
"id": 25,
- "output_name": "output_gtf"
- },
- {
+ "input_connections": {
+ "input_file": {
+ "id": 14,
+ "output_name": "fastq_out_r1_paired"
+ }
+ },
+ "inputs": [
+ {
+ "description": "runtime parameter for tool FastQC",
+ "name": "limits"
+ },
+ {
+ "description": "runtime parameter for tool FastQC",
+ "name": "contaminants"
+ },
+ {
+ "description": "runtime parameter for tool FastQC",
+ "name": "adapters"
+ }
+ ],
+ "label": null,
+ "name": "FastQC",
+ "outputs": [
+ {
+ "name": "html_file",
+ "type": "html"
+ },
+ {
+ "name": "text_file",
+ "type": "txt"
+ }
+ ],
+ "position": {
+ "left": 778,
+ "top": 1690
+ },
+ "post_job_actions": {},
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/fastqc/fastqc/0.72+galaxy1",
+ "tool_shed_repository": {
+ "changeset_revision": "e7b2202befea",
+ "name": "fastqc",
+ "owner": "devteam",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"min_length\": \"\", \"kmers\": \"7\", \"limits\": {\"__class__\": \"RuntimeValue\"}, \"input_file\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null, \"contaminants\": {\"__class__\": \"RuntimeValue\"}, \"adapters\": {\"__class__\": \"RuntimeValue\"}, \"nogroup\": \"false\"}",
+ "tool_version": "0.72+galaxy1",
+ "type": "tool",
+ "uuid": "f5a2c061-1560-4c48-b34e-6f23d62e6a80",
+ "workflow_outputs": [
+ {
+ "label": null,
+ "output_name": "html_file",
+ "uuid": "dc04a264-a7ff-444a-bb74-9448d9fd9311"
+ },
+ {
+ "label": null,
+ "output_name": "text_file",
+ "uuid": "811c9a2c-ebc4-4399-9fa8-186d15fadf04"
+ }
+ ]
+ },
+ "26": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/devteam/fastqc/fastqc/0.72+galaxy1",
+ "errors": null,
+ "id": 26,
+ "input_connections": {
+ "input_file": {
+ "id": 14,
+ "output_name": "fastq_out_r2_paired"
+ }
+ },
+ "inputs": [
+ {
+ "description": "runtime parameter for tool FastQC",
+ "name": "limits"
+ },
+ {
+ "description": "runtime parameter for tool FastQC",
+ "name": "contaminants"
+ },
+ {
+ "description": "runtime parameter for tool FastQC",
+ "name": "adapters"
+ }
+ ],
+ "label": null,
+ "name": "FastQC",
+ "outputs": [
+ {
+ "name": "html_file",
+ "type": "html"
+ },
+ {
+ "name": "text_file",
+ "type": "txt"
+ }
+ ],
+ "position": {
+ "left": 778,
+ "top": 1990
+ },
+ "post_job_actions": {},
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/fastqc/fastqc/0.72+galaxy1",
+ "tool_shed_repository": {
+ "changeset_revision": "e7b2202befea",
+ "name": "fastqc",
+ "owner": "devteam",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"min_length\": \"\", \"kmers\": \"7\", \"limits\": {\"__class__\": \"RuntimeValue\"}, \"input_file\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null, \"contaminants\": {\"__class__\": \"RuntimeValue\"}, \"adapters\": {\"__class__\": \"RuntimeValue\"}, \"nogroup\": \"false\"}",
+ "tool_version": "0.72+galaxy1",
+ "type": "tool",
+ "uuid": "adb607ce-5be3-4dcf-a981-d38144fa1679",
+ "workflow_outputs": [
+ {
+ "label": null,
+ "output_name": "html_file",
+ "uuid": "4da3a94f-7d68-4c93-989d-a95b2286a343"
+ },
+ {
+ "label": null,
+ "output_name": "text_file",
+ "uuid": "854c4e5f-b2af-4c12-aefe-78f90e96f4d5"
+ }
+ ]
+ },
+ "27": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/hisat2/hisat2/2.1.0+galaxy5",
+ "errors": null,
+ "id": 27,
+ "input_connections": {
+ "library|input_1": {
+ "id": 17,
+ "output_name": "fastq_out_r1_paired"
+ },
+ "library|input_2": {
+ "id": 17,
+ "output_name": "fastq_out_r2_paired"
+ }
+ },
+ "inputs": [],
+ "label": null,
+ "name": "HISAT2",
+ "outputs": [
+ {
+ "name": "output_alignments",
+ "type": "bam"
+ }
+ ],
+ "position": {
+ "left": 778,
+ "top": 690
+ },
+ "post_job_actions": {},
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/hisat2/hisat2/2.1.0+galaxy5",
+ "tool_shed_repository": {
+ "changeset_revision": "0c16cad5e03b",
+ "name": "hisat2",
+ "owner": "iuc",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"adv\": {\"alignment_options\": {\"__current_case__\": 0, \"alignment_options_selector\": \"defaults\"}, \"output_options\": {\"output_options_selector\": \"defaults\", \"__current_case__\": 0}, \"other_options\": {\"other_options_selector\": \"defaults\", \"__current_case__\": 0}, \"scoring_options\": {\"scoring_options_selector\": \"defaults\", \"__current_case__\": 0}, \"spliced_options\": {\"coefficient\": \"0.0\", \"canonical_penalty\": \"0\", \"no_spliced_alignment_options\": {\"no_spliced_alignment\": \"\", \"__current_case__\": 1}, \"nc_function_type\": \"C\", \"constant_term\": \"0.0\", \"nc_coefficient\": \"1.0\", \"noncanonical_penalty\": \"3\", \"known_splice_gtf\": {\"__class__\": \"RuntimeValue\"}, \"nc_constant_term\": \"-8.0\", \"min_intron\": \"20\", \"function_type\": \"C\", \"__current_case__\": 1, \"notmplen\": \"false\", \"tma\": \"--dta\", \"max_intron\": \"500000\", \"spliced_options_selector\": \"advanced\", \"novel_splicesite_outfile\": \"false\"}, \"reporting_options\": {\"reporting_options_selector\": \"defaults\", \"__current_case__\": 0}, \"input_options\": {\"input_options_selector\": \"defaults\", \"__current_case__\": 0}}, \"__page__\": null, \"sum\": {\"new_summary\": \"false\", \"summary_file\": \"false\"}, \"library\": {\"rna_strandness\": \"FR\", \"input_2\": {\"__class__\": \"ConnectedValue\"}, \"__current_case__\": 1, \"input_1\": {\"__class__\": \"ConnectedValue\"}, \"type\": \"paired\", \"paired_options\": {\"paired_options_selector\": \"defaults\", \"__current_case__\": 0}}, \"reference_genome\": {\"source\": \"indexed\", \"__current_case__\": 0, \"index\": \"mm10\"}, \"__rerun_remap_job_id__\": null}",
+ "tool_version": "2.1.0+galaxy5",
+ "type": "tool",
+ "uuid": "49664070-ec4d-4077-82ae-856976b09f16",
+ "workflow_outputs": [
+ {
+ "label": null,
+ "output_name": "output_alignments",
+ "uuid": "0ef710de-cb6c-4502-83c5-f3c1fa3f4f77"
+ }
+ ]
+ },
+ "28": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/devteam/fastqc/fastqc/0.72+galaxy1",
+ "errors": null,
"id": 28,
- "output_name": "output_gtf"
- }
- ]
- },
- "inputs": [],
- "label": null,
- "name": "StringTie merge",
- "outputs": [
- {
- "name": "out_gtf",
- "type": "gtf"
- }
- ],
- "position": {
- "left": 890,
- "top": 10
- },
- "post_job_actions": {},
- "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/stringtie/stringtie_merge/1.3.3",
- "tool_shed_repository": {
- "changeset_revision": "eafd5dc95228",
- "name": "stringtie",
- "owner": "iuc",
- "tool_shed": "toolshed.g2.bx.psu.edu"
- },
- "tool_state": "{\"keep_introns\": \"\\\"false\\\"\", \"min_fpkm\": \"\\\"1.0\\\"\", \"__page__\": null, \"__rerun_remap_job_id__\": null, \"min_len\": \"\\\"50\\\"\", \"__workflow_invocation_uuid__\": \"\\\"fde8952eb77711e8a468005056ba55fb\\\"\", \"guide_gff\": \"null\", \"min_cov\": \"\\\"0\\\"\", \"min_iso\": \"\\\"0.01\\\"\", \"min_tpm\": \"\\\"1.0\\\"\", \"chromInfo\": \"\\\"/cvmfs/data.galaxyproject.org/managed/len/ucsc/mm10.len\\\"\", \"input_gtf\": \"null\", \"gap_len\": \"\\\"250\\\"\"}",
- "tool_version": "1.3.3",
- "type": "tool",
- "uuid": "47307af1-9fd1-4936-a01e-ca9d660173e7",
- "workflow_outputs": []
- },
- "38": {
- "annotation": "",
- "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/gffcompare/gffcompare/0.9.8",
- "errors": null,
- "id": 38,
- "input_connections": {
- "annotation|reference_annotation": {
- "id": 8,
- "output_name": "output"
- },
- "inputs": {
- "id": 37,
- "output_name": "out_gtf"
- }
- },
- "inputs": [],
- "label": null,
- "name": "GffCompare",
- "outputs": [
- {
- "name": "transcripts_stats",
- "type": "txt"
- },
- {
- "name": "transcripts_loci",
- "type": "tabular"
- },
- {
- "name": "transcripts_tracking",
- "type": "tabular"
- },
- {
- "name": "transcripts_combined",
- "type": "gtf"
- },
- {
- "name": "transcripts_annotated",
- "type": "gtf"
- }
- ],
- "position": {
- "left": 1110,
- "top": 10
- },
- "post_job_actions": {},
- "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/gffcompare/gffcompare/0.9.8",
- "tool_shed_repository": {
- "changeset_revision": "3c97c841a443",
- "name": "gffcompare",
- "owner": "iuc",
- "tool_shed": "toolshed.g2.bx.psu.edu"
- },
- "tool_state": "{\"seq_data\": \"{\\\"use_seq_data\\\": \\\"Yes\\\", \\\"seq_source\\\": {\\\"index_source\\\": \\\"cached\\\", \\\"index\\\": \\\"mm10\\\", \\\"__current_case__\\\": 0}, \\\"__current_case__\\\": 1}\", \"inputs\": \"null\", \"max_dist_group\": \"\\\"100\\\"\", \"__page__\": null, \"max_dist_exon\": \"\\\"100\\\"\", \"__rerun_remap_job_id__\": null, \"__workflow_invocation_uuid__\": \"\\\"fde8952eb77711e8a468005056ba55fb\\\"\", \"chromInfo\": \"\\\"/cvmfs/data.galaxyproject.org/managed/len/ucsc/mm10.len\\\"\", \"discard_single_exon\": \"\\\"\\\"\", \"discard_intron_redundant_transfrags\": \"\\\"false\\\"\", \"annotation\": \"{\\\"reference_annotation\\\": null, \\\"use_ref_annotation\\\": \\\"Yes\\\", \\\"ignore_nonoverlapping_reference\\\": \\\"false\\\", \\\"__current_case__\\\": 0, \\\"ignore_nonoverlapping_transfrags\\\": \\\"false\\\"}\"}",
- "tool_version": "0.9.8",
- "type": "tool",
- "uuid": "6292c087-6c93-435e-ab1f-348ce4bff2bb",
- "workflow_outputs": []
- },
- "39": {
- "annotation": "",
- "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/featurecounts/featurecounts/1.6.0.6",
- "errors": null,
- "id": 39,
- "input_connections": {
- "alignment": {
- "id": 21,
- "output_name": "output_alignments"
- },
- "anno|reference_gene_sets": {
- "id": 38,
- "output_name": "transcripts_annotated"
- }
- },
- "inputs": [],
- "label": null,
- "name": "featureCounts",
- "outputs": [
- {
- "name": "output_medium",
- "type": "tabular"
- },
- {
- "name": "output_short",
- "type": "tabular"
- },
- {
- "name": "output_full",
- "type": "tabular"
- },
- {
- "name": "output_summary",
- "type": "tabular"
- },
- {
- "name": "output_feature_lengths",
- "type": "tabular"
- },
- {
- "name": "output_jcounts",
- "type": "tabular"
- }
- ],
- "position": {
- "left": 1330,
- "top": 10
- },
- "post_job_actions": {},
- "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/featurecounts/featurecounts/1.6.0.6",
- "tool_shed_repository": {
- "changeset_revision": "92808b865dfb",
- "name": "featurecounts",
- "owner": "iuc",
- "tool_shed": "toolshed.g2.bx.psu.edu"
- },
- "tool_state": "{\"pe_parameters\": \"{\\\"only_both_ends\\\": \\\"false\\\", \\\"exclude_chimerics\\\": \\\"true\\\", \\\"fragment_counting_enabled\\\": {\\\"fragment_counting\\\": \\\"\\\", \\\"__current_case__\\\": 1}}\", \"format\": \"\\\"tabdel_short\\\"\", \"__page__\": null, \"__rerun_remap_job_id__\": null, \"strand_specificity\": \"\\\"1\\\"\", \"include_feature_length_file\": \"\\\"false\\\"\", \"__workflow_invocation_uuid__\": \"\\\"fde8952eb77711e8a468005056ba55fb\\\"\", \"anno\": \"{\\\"anno_select\\\": \\\"history\\\", \\\"reference_gene_sets\\\": null, \\\"__current_case__\\\": 2}\", \"chromInfo\": \"\\\"/cvmfs/data.galaxyproject.org/managed/len/ucsc/mm10.len\\\"\", \"extended_parameters\": \"{\\\"gff_feature_attribute\\\": \\\"transcript_id\\\", \\\"exon_exon_junction_read_counting_enabled\\\": {\\\"__current_case__\\\": 1, \\\"count_exon_exon_junction_reads\\\": \\\"false\\\"}, \\\"read_extension_3p\\\": \\\"0\\\", \\\"frac_overlap_feature\\\": \\\"0\\\", \\\"min_overlap\\\": \\\"1\\\", \\\"read_extension_5p\\\": \\\"0\\\", \\\"contribute_to_multiple_features\\\": \\\"false\\\", \\\"frac_overlap\\\": \\\"0\\\", \\\"primary\\\": \\\"false\\\", \\\"count_split_alignments_only\\\": \\\"false\\\", \\\"by_read_group\\\": \\\"false\\\", \\\"ignore_dup\\\": \\\"false\\\", \\\"mapping_quality\\\": \\\"12\\\", \\\"summarization_level\\\": \\\"false\\\", \\\"largest_overlap\\\": \\\"false\\\", \\\"read_reduction\\\": \\\"\\\", \\\"multimapping_enabled\\\": {\\\"multimapping_counts\\\": \\\"\\\", \\\"__current_case__\\\": 1}, \\\"long_reads\\\": \\\"false\\\", \\\"gff_feature_type\\\": \\\"exon\\\"}\", \"alignment\": \"null\"}",
- "tool_version": "1.6.0.6",
- "type": "tool",
- "uuid": "fc054e7c-a603-4aa6-b3cf-d2e1cd4d7be9",
- "workflow_outputs": []
- },
- "4": {
- "annotation": "",
- "content_id": null,
- "errors": null,
- "id": 4,
- "input_connections": {},
- "inputs": [
- {
- "description": "",
- "name": "Megakaryocyte_rep1_forward_read"
- }
- ],
- "label": null,
- "name": "Input dataset",
- "outputs": [],
- "position": {
- "left": 10,
- "top": 490
- },
- "tool_id": null,
- "tool_state": "{\"name\": \"Megakaryocyte_rep1_forward_read\"}",
- "tool_version": null,
- "type": "data_input",
- "uuid": "41e16899-910c-4750-84cf-4bddc17437c2",
- "workflow_outputs": []
- },
- "40": {
- "annotation": "",
- "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/featurecounts/featurecounts/1.6.0.6",
- "errors": null,
- "id": 40,
- "input_connections": {
- "alignment": {
- "id": 22,
- "output_name": "output_alignments"
- },
- "anno|reference_gene_sets": {
- "id": 38,
- "output_name": "transcripts_annotated"
- }
- },
- "inputs": [],
- "label": null,
- "name": "featureCounts",
- "outputs": [
- {
- "name": "output_medium",
- "type": "tabular"
- },
- {
- "name": "output_short",
- "type": "tabular"
- },
- {
- "name": "output_full",
- "type": "tabular"
- },
- {
- "name": "output_summary",
- "type": "tabular"
- },
- {
- "name": "output_feature_lengths",
- "type": "tabular"
- },
- {
- "name": "output_jcounts",
- "type": "tabular"
- }
- ],
- "position": {
- "left": 1330,
- "top": 130
- },
- "post_job_actions": {},
- "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/featurecounts/featurecounts/1.6.0.6",
- "tool_shed_repository": {
- "changeset_revision": "92808b865dfb",
- "name": "featurecounts",
- "owner": "iuc",
- "tool_shed": "toolshed.g2.bx.psu.edu"
- },
- "tool_state": "{\"pe_parameters\": \"{\\\"only_both_ends\\\": \\\"false\\\", \\\"exclude_chimerics\\\": \\\"true\\\", \\\"fragment_counting_enabled\\\": {\\\"fragment_counting\\\": \\\"\\\", \\\"__current_case__\\\": 1}}\", \"format\": \"\\\"tabdel_short\\\"\", \"__page__\": null, \"__rerun_remap_job_id__\": null, \"strand_specificity\": \"\\\"1\\\"\", \"include_feature_length_file\": \"\\\"false\\\"\", \"__workflow_invocation_uuid__\": \"\\\"fde8952eb77711e8a468005056ba55fb\\\"\", \"anno\": \"{\\\"anno_select\\\": \\\"history\\\", \\\"reference_gene_sets\\\": null, \\\"__current_case__\\\": 2}\", \"chromInfo\": \"\\\"/cvmfs/data.galaxyproject.org/managed/len/ucsc/mm10.len\\\"\", \"extended_parameters\": \"{\\\"gff_feature_attribute\\\": \\\"transcript_id\\\", \\\"exon_exon_junction_read_counting_enabled\\\": {\\\"__current_case__\\\": 1, \\\"count_exon_exon_junction_reads\\\": \\\"false\\\"}, \\\"read_extension_3p\\\": \\\"0\\\", \\\"frac_overlap_feature\\\": \\\"0\\\", \\\"min_overlap\\\": \\\"1\\\", \\\"read_extension_5p\\\": \\\"0\\\", \\\"contribute_to_multiple_features\\\": \\\"false\\\", \\\"frac_overlap\\\": \\\"0\\\", \\\"primary\\\": \\\"false\\\", \\\"count_split_alignments_only\\\": \\\"false\\\", \\\"by_read_group\\\": \\\"false\\\", \\\"ignore_dup\\\": \\\"false\\\", \\\"mapping_quality\\\": \\\"12\\\", \\\"summarization_level\\\": \\\"false\\\", \\\"largest_overlap\\\": \\\"false\\\", \\\"read_reduction\\\": \\\"\\\", \\\"multimapping_enabled\\\": {\\\"multimapping_counts\\\": \\\"\\\", \\\"__current_case__\\\": 1}, \\\"long_reads\\\": \\\"false\\\", \\\"gff_feature_type\\\": \\\"exon\\\"}\", \"alignment\": \"null\"}",
- "tool_version": "1.6.0.6",
- "type": "tool",
- "uuid": "a29ab182-8103-425e-86e8-d71a08277f7c",
- "workflow_outputs": []
- },
- "41": {
- "annotation": "",
- "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/featurecounts/featurecounts/1.6.0.6",
- "errors": null,
- "id": 41,
- "input_connections": {
- "alignment": {
- "id": 23,
- "output_name": "output_alignments"
- },
- "anno|reference_gene_sets": {
- "id": 38,
- "output_name": "transcripts_annotated"
- }
- },
- "inputs": [],
- "label": null,
- "name": "featureCounts",
- "outputs": [
- {
- "name": "output_medium",
- "type": "tabular"
- },
- {
- "name": "output_short",
- "type": "tabular"
- },
- {
- "name": "output_full",
- "type": "tabular"
- },
- {
- "name": "output_summary",
- "type": "tabular"
- },
- {
- "name": "output_feature_lengths",
- "type": "tabular"
- },
- {
- "name": "output_jcounts",
- "type": "tabular"
- }
- ],
- "position": {
- "left": 1330,
- "top": 250
- },
- "post_job_actions": {},
- "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/featurecounts/featurecounts/1.6.0.6",
- "tool_shed_repository": {
- "changeset_revision": "92808b865dfb",
- "name": "featurecounts",
- "owner": "iuc",
- "tool_shed": "toolshed.g2.bx.psu.edu"
- },
- "tool_state": "{\"pe_parameters\": \"{\\\"only_both_ends\\\": \\\"false\\\", \\\"exclude_chimerics\\\": \\\"true\\\", \\\"fragment_counting_enabled\\\": {\\\"fragment_counting\\\": \\\"\\\", \\\"__current_case__\\\": 1}}\", \"format\": \"\\\"tabdel_short\\\"\", \"__page__\": null, \"__rerun_remap_job_id__\": null, \"strand_specificity\": \"\\\"1\\\"\", \"include_feature_length_file\": \"\\\"false\\\"\", \"__workflow_invocation_uuid__\": \"\\\"fde8952eb77711e8a468005056ba55fb\\\"\", \"anno\": \"{\\\"anno_select\\\": \\\"history\\\", \\\"reference_gene_sets\\\": null, \\\"__current_case__\\\": 2}\", \"chromInfo\": \"\\\"/cvmfs/data.galaxyproject.org/managed/len/ucsc/mm10.len\\\"\", \"extended_parameters\": \"{\\\"gff_feature_attribute\\\": \\\"transcript_id\\\", \\\"exon_exon_junction_read_counting_enabled\\\": {\\\"__current_case__\\\": 1, \\\"count_exon_exon_junction_reads\\\": \\\"false\\\"}, \\\"read_extension_3p\\\": \\\"0\\\", \\\"frac_overlap_feature\\\": \\\"0\\\", \\\"min_overlap\\\": \\\"1\\\", \\\"read_extension_5p\\\": \\\"0\\\", \\\"contribute_to_multiple_features\\\": \\\"false\\\", \\\"frac_overlap\\\": \\\"0\\\", \\\"primary\\\": \\\"false\\\", \\\"count_split_alignments_only\\\": \\\"false\\\", \\\"by_read_group\\\": \\\"false\\\", \\\"ignore_dup\\\": \\\"false\\\", \\\"mapping_quality\\\": \\\"12\\\", \\\"summarization_level\\\": \\\"false\\\", \\\"largest_overlap\\\": \\\"false\\\", \\\"read_reduction\\\": \\\"\\\", \\\"multimapping_enabled\\\": {\\\"multimapping_counts\\\": \\\"\\\", \\\"__current_case__\\\": 1}, \\\"long_reads\\\": \\\"false\\\", \\\"gff_feature_type\\\": \\\"exon\\\"}\", \"alignment\": \"null\"}",
- "tool_version": "1.6.0.6",
- "type": "tool",
- "uuid": "8b73ac2b-9e21-4cd3-bff2-ce0215d3a213",
- "workflow_outputs": []
- },
- "42": {
- "annotation": "",
- "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/featurecounts/featurecounts/1.6.0.6",
- "errors": null,
- "id": 42,
- "input_connections": {
- "alignment": {
- "id": 24,
- "output_name": "output_alignments"
- },
- "anno|reference_gene_sets": {
- "id": 38,
- "output_name": "transcripts_annotated"
- }
- },
- "inputs": [],
- "label": null,
- "name": "featureCounts",
- "outputs": [
- {
- "name": "output_medium",
- "type": "tabular"
- },
- {
- "name": "output_short",
- "type": "tabular"
- },
- {
- "name": "output_full",
- "type": "tabular"
- },
- {
- "name": "output_summary",
- "type": "tabular"
- },
- {
- "name": "output_feature_lengths",
- "type": "tabular"
- },
- {
- "name": "output_jcounts",
- "type": "tabular"
- }
- ],
- "position": {
- "left": 1330,
- "top": 370
- },
- "post_job_actions": {},
- "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/featurecounts/featurecounts/1.6.0.6",
- "tool_shed_repository": {
- "changeset_revision": "92808b865dfb",
- "name": "featurecounts",
- "owner": "iuc",
- "tool_shed": "toolshed.g2.bx.psu.edu"
- },
- "tool_state": "{\"pe_parameters\": \"{\\\"only_both_ends\\\": \\\"false\\\", \\\"exclude_chimerics\\\": \\\"true\\\", \\\"fragment_counting_enabled\\\": {\\\"fragment_counting\\\": \\\"\\\", \\\"__current_case__\\\": 1}}\", \"format\": \"\\\"tabdel_short\\\"\", \"__page__\": null, \"__rerun_remap_job_id__\": null, \"strand_specificity\": \"\\\"1\\\"\", \"include_feature_length_file\": \"\\\"false\\\"\", \"__workflow_invocation_uuid__\": \"\\\"fde8952eb77711e8a468005056ba55fb\\\"\", \"anno\": \"{\\\"anno_select\\\": \\\"history\\\", \\\"reference_gene_sets\\\": null, \\\"__current_case__\\\": 2}\", \"chromInfo\": \"\\\"/cvmfs/data.galaxyproject.org/managed/len/ucsc/mm10.len\\\"\", \"extended_parameters\": \"{\\\"gff_feature_attribute\\\": \\\"transcript_id\\\", \\\"exon_exon_junction_read_counting_enabled\\\": {\\\"__current_case__\\\": 1, \\\"count_exon_exon_junction_reads\\\": \\\"false\\\"}, \\\"read_extension_3p\\\": \\\"0\\\", \\\"frac_overlap_feature\\\": \\\"0\\\", \\\"min_overlap\\\": \\\"1\\\", \\\"read_extension_5p\\\": \\\"0\\\", \\\"contribute_to_multiple_features\\\": \\\"false\\\", \\\"frac_overlap\\\": \\\"0\\\", \\\"primary\\\": \\\"false\\\", \\\"count_split_alignments_only\\\": \\\"false\\\", \\\"by_read_group\\\": \\\"false\\\", \\\"ignore_dup\\\": \\\"false\\\", \\\"mapping_quality\\\": \\\"12\\\", \\\"summarization_level\\\": \\\"false\\\", \\\"largest_overlap\\\": \\\"false\\\", \\\"read_reduction\\\": \\\"\\\", \\\"multimapping_enabled\\\": {\\\"multimapping_counts\\\": \\\"\\\", \\\"__current_case__\\\": 1}, \\\"long_reads\\\": \\\"false\\\", \\\"gff_feature_type\\\": \\\"exon\\\"}\", \"alignment\": \"null\"}",
- "tool_version": "1.6.0.6",
- "type": "tool",
- "uuid": "02a66170-4c0c-4c1e-8073-ec626190390e",
- "workflow_outputs": []
- },
- "43": {
- "annotation": "",
- "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/deseq2/deseq2/2.11.40.2",
- "errors": null,
- "id": 43,
- "input_connections": {
- "rep_factorName_0|rep_factorLevel_0|countsFile": [
- {
+ "input_connections": {
+ "input_file": {
+ "id": 17,
+ "output_name": "fastq_out_r1_paired"
+ }
+ },
+ "inputs": [
+ {
+ "description": "runtime parameter for tool FastQC",
+ "name": "limits"
+ },
+ {
+ "description": "runtime parameter for tool FastQC",
+ "name": "contaminants"
+ },
+ {
+ "description": "runtime parameter for tool FastQC",
+ "name": "adapters"
+ }
+ ],
+ "label": null,
+ "name": "FastQC",
+ "outputs": [
+ {
+ "name": "html_file",
+ "type": "html"
+ },
+ {
+ "name": "text_file",
+ "type": "txt"
+ }
+ ],
+ "position": {
+ "left": 778,
+ "top": 2290
+ },
+ "post_job_actions": {},
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/fastqc/fastqc/0.72+galaxy1",
+ "tool_shed_repository": {
+ "changeset_revision": "e7b2202befea",
+ "name": "fastqc",
+ "owner": "devteam",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"min_length\": \"\", \"kmers\": \"7\", \"limits\": {\"__class__\": \"RuntimeValue\"}, \"input_file\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null, \"contaminants\": {\"__class__\": \"RuntimeValue\"}, \"adapters\": {\"__class__\": \"RuntimeValue\"}, \"nogroup\": \"false\"}",
+ "tool_version": "0.72+galaxy1",
+ "type": "tool",
+ "uuid": "a7e3b025-5fb9-4c9c-a232-a10a2c80d835",
+ "workflow_outputs": [
+ {
+ "label": null,
+ "output_name": "html_file",
+ "uuid": "63e1f1dd-70ab-469e-8bf6-966cfc7a84c6"
+ },
+ {
+ "label": null,
+ "output_name": "text_file",
+ "uuid": "2d555968-dc97-450f-99df-25eaa2368d9b"
+ }
+ ]
+ },
+ "29": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/devteam/fastqc/fastqc/0.72+galaxy1",
+ "errors": null,
+ "id": 29,
+ "input_connections": {
+ "input_file": {
+ "id": 17,
+ "output_name": "fastq_out_r2_paired"
+ }
+ },
+ "inputs": [
+ {
+ "description": "runtime parameter for tool FastQC",
+ "name": "limits"
+ },
+ {
+ "description": "runtime parameter for tool FastQC",
+ "name": "contaminants"
+ },
+ {
+ "description": "runtime parameter for tool FastQC",
+ "name": "adapters"
+ }
+ ],
+ "label": null,
+ "name": "FastQC",
+ "outputs": [
+ {
+ "name": "html_file",
+ "type": "html"
+ },
+ {
+ "name": "text_file",
+ "type": "txt"
+ }
+ ],
+ "position": {
+ "left": 778,
+ "top": 2590
+ },
+ "post_job_actions": {},
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/fastqc/fastqc/0.72+galaxy1",
+ "tool_shed_repository": {
+ "changeset_revision": "e7b2202befea",
+ "name": "fastqc",
+ "owner": "devteam",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"min_length\": \"\", \"kmers\": \"7\", \"limits\": {\"__class__\": \"RuntimeValue\"}, \"input_file\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null, \"contaminants\": {\"__class__\": \"RuntimeValue\"}, \"adapters\": {\"__class__\": \"RuntimeValue\"}, \"nogroup\": \"false\"}",
+ "tool_version": "0.72+galaxy1",
+ "type": "tool",
+ "uuid": "57898d1b-fd1d-452d-a78f-5f42d4961fcb",
+ "workflow_outputs": [
+ {
+ "label": null,
+ "output_name": "html_file",
+ "uuid": "e0d035b4-c63f-4117-b15b-6106996e2546"
+ },
+ {
+ "label": null,
+ "output_name": "text_file",
+ "uuid": "f20d8a11-0740-47b6-94c2-3bbbb3bf56d9"
+ }
+ ]
+ },
+ "30": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/hisat2/hisat2/2.1.0+galaxy5",
+ "errors": null,
+ "id": 30,
+ "input_connections": {
+ "library|input_1": {
+ "id": 20,
+ "output_name": "fastq_out_r1_paired"
+ },
+ "library|input_2": {
+ "id": 20,
+ "output_name": "fastq_out_r2_paired"
+ }
+ },
+ "inputs": [],
+ "label": null,
+ "name": "HISAT2",
+ "outputs": [
+ {
+ "name": "output_alignments",
+ "type": "bam"
+ }
+ ],
+ "position": {
+ "left": 778,
+ "top": 890
+ },
+ "post_job_actions": {},
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/hisat2/hisat2/2.1.0+galaxy5",
+ "tool_shed_repository": {
+ "changeset_revision": "0c16cad5e03b",
+ "name": "hisat2",
+ "owner": "iuc",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"adv\": {\"alignment_options\": {\"__current_case__\": 0, \"alignment_options_selector\": \"defaults\"}, \"output_options\": {\"output_options_selector\": \"defaults\", \"__current_case__\": 0}, \"other_options\": {\"other_options_selector\": \"defaults\", \"__current_case__\": 0}, \"scoring_options\": {\"scoring_options_selector\": \"defaults\", \"__current_case__\": 0}, \"spliced_options\": {\"coefficient\": \"0.0\", \"canonical_penalty\": \"0\", \"no_spliced_alignment_options\": {\"no_spliced_alignment\": \"\", \"__current_case__\": 1}, \"nc_function_type\": \"C\", \"constant_term\": \"0.0\", \"nc_coefficient\": \"1.0\", \"noncanonical_penalty\": \"3\", \"known_splice_gtf\": {\"__class__\": \"RuntimeValue\"}, \"nc_constant_term\": \"-8.0\", \"min_intron\": \"20\", \"function_type\": \"C\", \"__current_case__\": 1, \"notmplen\": \"false\", \"tma\": \"--dta\", \"max_intron\": \"500000\", \"spliced_options_selector\": \"advanced\", \"novel_splicesite_outfile\": \"false\"}, \"reporting_options\": {\"reporting_options_selector\": \"defaults\", \"__current_case__\": 0}, \"input_options\": {\"input_options_selector\": \"defaults\", \"__current_case__\": 0}}, \"__page__\": null, \"sum\": {\"new_summary\": \"false\", \"summary_file\": \"false\"}, \"library\": {\"rna_strandness\": \"FR\", \"input_2\": {\"__class__\": \"ConnectedValue\"}, \"__current_case__\": 1, \"input_1\": {\"__class__\": \"ConnectedValue\"}, \"type\": \"paired\", \"paired_options\": {\"paired_options_selector\": \"defaults\", \"__current_case__\": 0}}, \"reference_genome\": {\"source\": \"indexed\", \"__current_case__\": 0, \"index\": \"mm10\"}, \"__rerun_remap_job_id__\": null}",
+ "tool_version": "2.1.0+galaxy5",
+ "type": "tool",
+ "uuid": "e2ab9d1e-6b3e-43b8-9c62-57218aa7bc49",
+ "workflow_outputs": [
+ {
+ "label": null,
+ "output_name": "output_alignments",
+ "uuid": "5525e4aa-5ef7-46fe-bfbf-61eeff7471d6"
+ }
+ ]
+ },
+ "31": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/devteam/fastqc/fastqc/0.72+galaxy1",
+ "errors": null,
+ "id": 31,
+ "input_connections": {
+ "input_file": {
+ "id": 20,
+ "output_name": "fastq_out_r1_paired"
+ }
+ },
+ "inputs": [
+ {
+ "description": "runtime parameter for tool FastQC",
+ "name": "limits"
+ },
+ {
+ "description": "runtime parameter for tool FastQC",
+ "name": "contaminants"
+ },
+ {
+ "description": "runtime parameter for tool FastQC",
+ "name": "adapters"
+ }
+ ],
+ "label": null,
+ "name": "FastQC",
+ "outputs": [
+ {
+ "name": "html_file",
+ "type": "html"
+ },
+ {
+ "name": "text_file",
+ "type": "txt"
+ }
+ ],
+ "position": {
+ "left": 778,
+ "top": 2890
+ },
+ "post_job_actions": {},
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/fastqc/fastqc/0.72+galaxy1",
+ "tool_shed_repository": {
+ "changeset_revision": "e7b2202befea",
+ "name": "fastqc",
+ "owner": "devteam",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"min_length\": \"\", \"kmers\": \"7\", \"limits\": {\"__class__\": \"RuntimeValue\"}, \"input_file\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null, \"contaminants\": {\"__class__\": \"RuntimeValue\"}, \"adapters\": {\"__class__\": \"RuntimeValue\"}, \"nogroup\": \"false\"}",
+ "tool_version": "0.72+galaxy1",
+ "type": "tool",
+ "uuid": "38e24d6f-fe1b-4939-953a-452be1f51d09",
+ "workflow_outputs": [
+ {
+ "label": null,
+ "output_name": "html_file",
+ "uuid": "21c23f17-d675-4e2a-8d1b-1ac6e167c556"
+ },
+ {
+ "label": null,
+ "output_name": "text_file",
+ "uuid": "c4a3e0be-83e2-4cb3-a9fc-4487f66eb7f4"
+ }
+ ]
+ },
+ "32": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/devteam/fastqc/fastqc/0.72+galaxy1",
+ "errors": null,
+ "id": 32,
+ "input_connections": {
+ "input_file": {
+ "id": 20,
+ "output_name": "fastq_out_r2_paired"
+ }
+ },
+ "inputs": [
+ {
+ "description": "runtime parameter for tool FastQC",
+ "name": "limits"
+ },
+ {
+ "description": "runtime parameter for tool FastQC",
+ "name": "contaminants"
+ },
+ {
+ "description": "runtime parameter for tool FastQC",
+ "name": "adapters"
+ }
+ ],
+ "label": null,
+ "name": "FastQC",
+ "outputs": [
+ {
+ "name": "html_file",
+ "type": "html"
+ },
+ {
+ "name": "text_file",
+ "type": "txt"
+ }
+ ],
+ "position": {
+ "left": 778,
+ "top": 3190
+ },
+ "post_job_actions": {},
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/fastqc/fastqc/0.72+galaxy1",
+ "tool_shed_repository": {
+ "changeset_revision": "e7b2202befea",
+ "name": "fastqc",
+ "owner": "devteam",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"min_length\": \"\", \"kmers\": \"7\", \"limits\": {\"__class__\": \"RuntimeValue\"}, \"input_file\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null, \"contaminants\": {\"__class__\": \"RuntimeValue\"}, \"adapters\": {\"__class__\": \"RuntimeValue\"}, \"nogroup\": \"false\"}",
+ "tool_version": "0.72+galaxy1",
+ "type": "tool",
+ "uuid": "e6a19a1f-e2cc-4160-911b-860d09cc1b8d",
+ "workflow_outputs": [
+ {
+ "label": null,
+ "output_name": "html_file",
+ "uuid": "c11f2b2f-2859-40bd-83c5-162cf3986c0c"
+ },
+ {
+ "label": null,
+ "output_name": "text_file",
+ "uuid": "f77d32d9-c7c9-44f8-a268-e4020e322a17"
+ }
+ ]
+ },
+ "33": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/stringtie/stringtie/1.3.6",
+ "errors": null,
+ "id": 33,
+ "input_connections": {
+ "input_bam": {
+ "id": 21,
+ "output_name": "output_alignments"
+ }
+ },
+ "inputs": [],
+ "label": null,
+ "name": "StringTie",
+ "outputs": [
+ {
+ "name": "output_gtf",
+ "type": "gtf"
+ }
+ ],
+ "position": {
+ "left": 1106,
+ "top": 290
+ },
+ "post_job_actions": {},
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/stringtie/stringtie/1.3.6",
+ "tool_shed_repository": {
+ "changeset_revision": "eba36e001f45",
+ "name": "stringtie",
+ "owner": "iuc",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"adv\": {\"min_bundle_cov\": \"2\", \"min_tlen\": \"200\", \"bdist\": \"50\", \"multi_mapping\": \"false\", \"abundance_estimation\": \"false\", \"fraction\": \"0.15\", \"disable_trimming\": \"false\", \"omit_sequences\": \"\", \"name_prefix\": \"\", \"min_anchor_len\": \"10\", \"bundle_fraction\": \"0.95\", \"min_anchor_cov\": \"1\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null, \"rna_strandness\": \"--fr\", \"input_bam\": {\"__class__\": \"ConnectedValue\"}, \"guide\": {\"use_guide\": \"no\", \"__current_case__\": 0}}",
+ "tool_version": "1.3.6",
+ "type": "tool",
+ "uuid": "05be1a79-4ee1-4205-9f53-1e59cc483797",
+ "workflow_outputs": [
+ {
+ "label": null,
+ "output_name": "output_gtf",
+ "uuid": "8df32560-faae-49df-898a-02f664bda16a"
+ }
+ ]
+ },
+ "34": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/bgruening/deeptools_bam_coverage/deeptools_bam_coverage/3.3.0.0.0",
+ "errors": null,
+ "id": 34,
+ "input_connections": {
+ "bamInput": {
+ "id": 21,
+ "output_name": "output_alignments"
+ }
+ },
+ "inputs": [
+ {
+ "description": "runtime parameter for tool bamCoverage",
+ "name": "advancedOpt"
+ }
+ ],
+ "label": null,
+ "name": "bamCoverage",
+ "outputs": [
+ {
+ "name": "outFileName",
+ "type": "bigwig"
+ }
+ ],
+ "position": {
+ "left": 1106,
+ "top": 850
+ },
+ "post_job_actions": {},
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/bgruening/deeptools_bam_coverage/deeptools_bam_coverage/3.3.0.0.0",
+ "tool_shed_repository": {
+ "changeset_revision": "7a7fd0f5f15d",
+ "name": "deeptools_bam_coverage",
+ "owner": "bgruening",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"exactScaling\": \"false\", \"outFileFormat\": \"bigwig\", \"__page__\": null, \"region\": \"\", \"bamInput\": {\"__class__\": \"ConnectedValue\"}, \"binSize\": \"1\", \"scaling\": {\"effectiveGenomeSize\": {\"effectiveGenomeSize_opt\": \"2304947926\", \"__current_case__\": 6}, \"type\": \"1x\", \"__current_case__\": 4}, \"advancedOpt\": {\"ignoreDuplicates\": \"false\", \"centerReads\": \"false\", \"ignoreForNormalization\": \"\", \"minFragmentLength\": \"0\", \"minMappingQuality\": \"1\", \"MNase\": \"false\", \"samFlagInclude\": \"\", \"filterRNAstrand\": \"forward\", \"Offset\": \"\", \"smoothLength\": \"\", \"showAdvancedOpt\": \"yes\", \"__current_case__\": 1, \"samFlagExclude\": \"\", \"doExtendCustom\": {\"__current_case__\": 0, \"doExtend\": \"no\"}, \"skipNAs\": \"false\", \"scaleFactor\": \"1.0\", \"blackListFileName\": {\"__class__\": \"RuntimeValue\"}, \"maxFragmentLength\": \"0\"}, \"__rerun_remap_job_id__\": null}",
+ "tool_version": "3.3.0.0.0",
+ "type": "tool",
+ "uuid": "9df5e150-d61c-4859-b1f8-edfcbef9ec49",
+ "workflow_outputs": [
+ {
+ "label": null,
+ "output_name": "outFileName",
+ "uuid": "a8de282b-03c0-4ead-939d-8f337804b292"
+ }
+ ]
+ },
+ "35": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/bgruening/deeptools_bam_coverage/deeptools_bam_coverage/3.3.0.0.0",
+ "errors": null,
+ "id": 35,
+ "input_connections": {
+ "bamInput": {
+ "id": 21,
+ "output_name": "output_alignments"
+ }
+ },
+ "inputs": [
+ {
+ "description": "runtime parameter for tool bamCoverage",
+ "name": "advancedOpt"
+ }
+ ],
+ "label": null,
+ "name": "bamCoverage",
+ "outputs": [
+ {
+ "name": "outFileName",
+ "type": "bigwig"
+ }
+ ],
+ "position": {
+ "left": 1106,
+ "top": 1530
+ },
+ "post_job_actions": {},
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/bgruening/deeptools_bam_coverage/deeptools_bam_coverage/3.3.0.0.0",
+ "tool_shed_repository": {
+ "changeset_revision": "7a7fd0f5f15d",
+ "name": "deeptools_bam_coverage",
+ "owner": "bgruening",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"exactScaling\": \"false\", \"outFileFormat\": \"bigwig\", \"__page__\": null, \"region\": \"\", \"bamInput\": {\"__class__\": \"ConnectedValue\"}, \"binSize\": \"1\", \"scaling\": {\"effectiveGenomeSize\": {\"effectiveGenomeSize_opt\": \"2304947926\", \"__current_case__\": 6}, \"type\": \"1x\", \"__current_case__\": 4}, \"advancedOpt\": {\"ignoreDuplicates\": \"false\", \"centerReads\": \"false\", \"ignoreForNormalization\": \"\", \"minFragmentLength\": \"0\", \"minMappingQuality\": \"1\", \"MNase\": \"false\", \"samFlagInclude\": \"\", \"filterRNAstrand\": \"reverse\", \"Offset\": \"\", \"smoothLength\": \"\", \"showAdvancedOpt\": \"yes\", \"__current_case__\": 1, \"samFlagExclude\": \"\", \"doExtendCustom\": {\"__current_case__\": 0, \"doExtend\": \"no\"}, \"skipNAs\": \"false\", \"scaleFactor\": \"1.0\", \"blackListFileName\": {\"__class__\": \"RuntimeValue\"}, \"maxFragmentLength\": \"0\"}, \"__rerun_remap_job_id__\": null}",
+ "tool_version": "3.3.0.0.0",
+ "type": "tool",
+ "uuid": "1a9a4e17-62a8-40c5-b9bd-f0992e7cb134",
+ "workflow_outputs": [
+ {
+ "label": null,
+ "output_name": "outFileName",
+ "uuid": "8b7bb1f6-2ae9-4deb-a902-7769b1657ecf"
+ }
+ ]
+ },
+ "36": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/stringtie/stringtie/1.3.6",
+ "errors": null,
+ "id": 36,
+ "input_connections": {
+ "input_bam": {
+ "id": 24,
+ "output_name": "output_alignments"
+ }
+ },
+ "inputs": [],
+ "label": null,
+ "name": "StringTie",
+ "outputs": [
+ {
+ "name": "output_gtf",
+ "type": "gtf"
+ }
+ ],
+ "position": {
+ "left": 1106,
+ "top": 430
+ },
+ "post_job_actions": {},
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/stringtie/stringtie/1.3.6",
+ "tool_shed_repository": {
+ "changeset_revision": "eba36e001f45",
+ "name": "stringtie",
+ "owner": "iuc",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"adv\": {\"min_bundle_cov\": \"2\", \"min_tlen\": \"200\", \"bdist\": \"50\", \"multi_mapping\": \"false\", \"abundance_estimation\": \"false\", \"fraction\": \"0.15\", \"disable_trimming\": \"false\", \"omit_sequences\": \"\", \"name_prefix\": \"\", \"min_anchor_len\": \"10\", \"bundle_fraction\": \"0.95\", \"min_anchor_cov\": \"1\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null, \"rna_strandness\": \"--fr\", \"input_bam\": {\"__class__\": \"ConnectedValue\"}, \"guide\": {\"use_guide\": \"no\", \"__current_case__\": 0}}",
+ "tool_version": "1.3.6",
+ "type": "tool",
+ "uuid": "6feba9a1-0cdb-41e6-9f7e-d2ab4b3f6adc",
+ "workflow_outputs": [
+ {
+ "label": null,
+ "output_name": "output_gtf",
+ "uuid": "7496fd31-357e-4884-b95e-d3e6ddf47c51"
+ }
+ ]
+ },
+ "37": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/bgruening/deeptools_bam_coverage/deeptools_bam_coverage/3.3.0.0.0",
+ "errors": null,
+ "id": 37,
+ "input_connections": {
+ "bamInput": {
+ "id": 24,
+ "output_name": "output_alignments"
+ }
+ },
+ "inputs": [
+ {
+ "description": "runtime parameter for tool bamCoverage",
+ "name": "advancedOpt"
+ }
+ ],
+ "label": null,
+ "name": "bamCoverage",
+ "outputs": [
+ {
+ "name": "outFileName",
+ "type": "bigwig"
+ }
+ ],
+ "position": {
+ "left": 1106,
+ "top": 1020
+ },
+ "post_job_actions": {},
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/bgruening/deeptools_bam_coverage/deeptools_bam_coverage/3.3.0.0.0",
+ "tool_shed_repository": {
+ "changeset_revision": "7a7fd0f5f15d",
+ "name": "deeptools_bam_coverage",
+ "owner": "bgruening",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"exactScaling\": \"false\", \"outFileFormat\": \"bigwig\", \"__page__\": null, \"region\": \"\", \"bamInput\": {\"__class__\": \"ConnectedValue\"}, \"binSize\": \"1\", \"scaling\": {\"effectiveGenomeSize\": {\"effectiveGenomeSize_opt\": \"2304947926\", \"__current_case__\": 6}, \"type\": \"1x\", \"__current_case__\": 4}, \"advancedOpt\": {\"ignoreDuplicates\": \"false\", \"centerReads\": \"false\", \"ignoreForNormalization\": \"\", \"minFragmentLength\": \"0\", \"minMappingQuality\": \"1\", \"MNase\": \"false\", \"samFlagInclude\": \"\", \"filterRNAstrand\": \"forward\", \"Offset\": \"\", \"smoothLength\": \"\", \"showAdvancedOpt\": \"yes\", \"__current_case__\": 1, \"samFlagExclude\": \"\", \"doExtendCustom\": {\"__current_case__\": 0, \"doExtend\": \"no\"}, \"skipNAs\": \"false\", \"scaleFactor\": \"1.0\", \"blackListFileName\": {\"__class__\": \"RuntimeValue\"}, \"maxFragmentLength\": \"0\"}, \"__rerun_remap_job_id__\": null}",
+ "tool_version": "3.3.0.0.0",
+ "type": "tool",
+ "uuid": "dfb2f1aa-f121-4dba-9105-a177ef5722ff",
+ "workflow_outputs": [
+ {
+ "label": null,
+ "output_name": "outFileName",
+ "uuid": "0b03495f-c57b-4a4b-959e-59bfcb3dafd0"
+ }
+ ]
+ },
+ "38": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/bgruening/deeptools_bam_coverage/deeptools_bam_coverage/3.3.0.0.0",
+ "errors": null,
+ "id": 38,
+ "input_connections": {
+ "bamInput": {
+ "id": 24,
+ "output_name": "output_alignments"
+ }
+ },
+ "inputs": [
+ {
+ "description": "runtime parameter for tool bamCoverage",
+ "name": "advancedOpt"
+ }
+ ],
+ "label": null,
+ "name": "bamCoverage",
+ "outputs": [
+ {
+ "name": "outFileName",
+ "type": "bigwig"
+ }
+ ],
+ "position": {
+ "left": 1106,
+ "top": 1700
+ },
+ "post_job_actions": {},
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/bgruening/deeptools_bam_coverage/deeptools_bam_coverage/3.3.0.0.0",
+ "tool_shed_repository": {
+ "changeset_revision": "7a7fd0f5f15d",
+ "name": "deeptools_bam_coverage",
+ "owner": "bgruening",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"exactScaling\": \"false\", \"outFileFormat\": \"bigwig\", \"__page__\": null, \"region\": \"\", \"bamInput\": {\"__class__\": \"ConnectedValue\"}, \"binSize\": \"1\", \"scaling\": {\"effectiveGenomeSize\": {\"effectiveGenomeSize_opt\": \"2304947926\", \"__current_case__\": 6}, \"type\": \"1x\", \"__current_case__\": 4}, \"advancedOpt\": {\"ignoreDuplicates\": \"false\", \"centerReads\": \"false\", \"ignoreForNormalization\": \"\", \"minFragmentLength\": \"0\", \"minMappingQuality\": \"1\", \"MNase\": \"false\", \"samFlagInclude\": \"\", \"filterRNAstrand\": \"reverse\", \"Offset\": \"\", \"smoothLength\": \"\", \"showAdvancedOpt\": \"yes\", \"__current_case__\": 1, \"samFlagExclude\": \"\", \"doExtendCustom\": {\"__current_case__\": 0, \"doExtend\": \"no\"}, \"skipNAs\": \"false\", \"scaleFactor\": \"1.0\", \"blackListFileName\": {\"__class__\": \"RuntimeValue\"}, \"maxFragmentLength\": \"0\"}, \"__rerun_remap_job_id__\": null}",
+ "tool_version": "3.3.0.0.0",
+ "type": "tool",
+ "uuid": "d00922af-38a0-450b-81e9-5f94a8f4f5ba",
+ "workflow_outputs": [
+ {
+ "label": null,
+ "output_name": "outFileName",
+ "uuid": "ac07c391-dfc6-4fa7-bac9-2ac77095814a"
+ }
+ ]
+ },
+ "39": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/stringtie/stringtie/1.3.6",
+ "errors": null,
"id": 39,
- "output_name": "output_short"
- },
- {
+ "input_connections": {
+ "input_bam": {
+ "id": 27,
+ "output_name": "output_alignments"
+ }
+ },
+ "inputs": [],
+ "label": null,
+ "name": "StringTie",
+ "outputs": [
+ {
+ "name": "output_gtf",
+ "type": "gtf"
+ }
+ ],
+ "position": {
+ "left": 1107,
+ "top": 570
+ },
+ "post_job_actions": {},
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/stringtie/stringtie/1.3.6",
+ "tool_shed_repository": {
+ "changeset_revision": "eba36e001f45",
+ "name": "stringtie",
+ "owner": "iuc",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"adv\": {\"min_bundle_cov\": \"2\", \"min_tlen\": \"200\", \"bdist\": \"50\", \"multi_mapping\": \"false\", \"abundance_estimation\": \"false\", \"fraction\": \"0.15\", \"disable_trimming\": \"false\", \"omit_sequences\": \"\", \"name_prefix\": \"\", \"min_anchor_len\": \"10\", \"bundle_fraction\": \"0.95\", \"min_anchor_cov\": \"1\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null, \"rna_strandness\": \"--fr\", \"input_bam\": {\"__class__\": \"ConnectedValue\"}, \"guide\": {\"use_guide\": \"no\", \"__current_case__\": 0}}",
+ "tool_version": "1.3.6",
+ "type": "tool",
+ "uuid": "d9a5d396-a2ec-4c32-9522-946cea6b83d3",
+ "workflow_outputs": [
+ {
+ "label": null,
+ "output_name": "output_gtf",
+ "uuid": "1552601b-7419-40fd-9a1a-37b240f6c982"
+ }
+ ]
+ },
+ "40": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/bgruening/deeptools_bam_coverage/deeptools_bam_coverage/3.3.0.0.0",
+ "errors": null,
"id": 40,
- "output_name": "output_short"
- }
- ],
- "rep_factorName_0|rep_factorLevel_1|countsFile": [
- {
+ "input_connections": {
+ "bamInput": {
+ "id": 27,
+ "output_name": "output_alignments"
+ }
+ },
+ "inputs": [
+ {
+ "description": "runtime parameter for tool bamCoverage",
+ "name": "advancedOpt"
+ }
+ ],
+ "label": null,
+ "name": "bamCoverage",
+ "outputs": [
+ {
+ "name": "outFileName",
+ "type": "bigwig"
+ }
+ ],
+ "position": {
+ "left": 1106,
+ "top": 1190
+ },
+ "post_job_actions": {},
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/bgruening/deeptools_bam_coverage/deeptools_bam_coverage/3.3.0.0.0",
+ "tool_shed_repository": {
+ "changeset_revision": "7a7fd0f5f15d",
+ "name": "deeptools_bam_coverage",
+ "owner": "bgruening",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"exactScaling\": \"false\", \"outFileFormat\": \"bigwig\", \"__page__\": null, \"region\": \"\", \"bamInput\": {\"__class__\": \"ConnectedValue\"}, \"binSize\": \"1\", \"scaling\": {\"effectiveGenomeSize\": {\"effectiveGenomeSize_opt\": \"2304947926\", \"__current_case__\": 6}, \"type\": \"1x\", \"__current_case__\": 4}, \"advancedOpt\": {\"ignoreDuplicates\": \"false\", \"centerReads\": \"false\", \"ignoreForNormalization\": \"\", \"minFragmentLength\": \"0\", \"minMappingQuality\": \"1\", \"MNase\": \"false\", \"samFlagInclude\": \"\", \"filterRNAstrand\": \"forward\", \"Offset\": \"\", \"smoothLength\": \"\", \"showAdvancedOpt\": \"yes\", \"__current_case__\": 1, \"samFlagExclude\": \"\", \"doExtendCustom\": {\"__current_case__\": 0, \"doExtend\": \"no\"}, \"skipNAs\": \"false\", \"scaleFactor\": \"1.0\", \"blackListFileName\": {\"__class__\": \"RuntimeValue\"}, \"maxFragmentLength\": \"0\"}, \"__rerun_remap_job_id__\": null}",
+ "tool_version": "3.3.0.0.0",
+ "type": "tool",
+ "uuid": "a433db0d-a73f-4be1-8c9a-cb9dffbeb4e2",
+ "workflow_outputs": [
+ {
+ "label": null,
+ "output_name": "outFileName",
+ "uuid": "3fcfd3a2-7025-471b-bd48-bb54dbae0ca8"
+ }
+ ]
+ },
+ "41": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/bgruening/deeptools_bam_coverage/deeptools_bam_coverage/3.3.0.0.0",
+ "errors": null,
"id": 41,
- "output_name": "output_short"
- },
- {
+ "input_connections": {
+ "bamInput": {
+ "id": 27,
+ "output_name": "output_alignments"
+ }
+ },
+ "inputs": [
+ {
+ "description": "runtime parameter for tool bamCoverage",
+ "name": "advancedOpt"
+ }
+ ],
+ "label": null,
+ "name": "bamCoverage",
+ "outputs": [
+ {
+ "name": "outFileName",
+ "type": "bigwig"
+ }
+ ],
+ "position": {
+ "left": 1106,
+ "top": 1870
+ },
+ "post_job_actions": {},
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/bgruening/deeptools_bam_coverage/deeptools_bam_coverage/3.3.0.0.0",
+ "tool_shed_repository": {
+ "changeset_revision": "7a7fd0f5f15d",
+ "name": "deeptools_bam_coverage",
+ "owner": "bgruening",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"exactScaling\": \"false\", \"outFileFormat\": \"bigwig\", \"__page__\": null, \"region\": \"\", \"bamInput\": {\"__class__\": \"ConnectedValue\"}, \"binSize\": \"1\", \"scaling\": {\"effectiveGenomeSize\": {\"effectiveGenomeSize_opt\": \"2304947926\", \"__current_case__\": 6}, \"type\": \"1x\", \"__current_case__\": 4}, \"advancedOpt\": {\"ignoreDuplicates\": \"false\", \"centerReads\": \"false\", \"ignoreForNormalization\": \"\", \"minFragmentLength\": \"0\", \"minMappingQuality\": \"1\", \"MNase\": \"false\", \"samFlagInclude\": \"\", \"filterRNAstrand\": \"reverse\", \"Offset\": \"\", \"smoothLength\": \"\", \"showAdvancedOpt\": \"yes\", \"__current_case__\": 1, \"samFlagExclude\": \"\", \"doExtendCustom\": {\"__current_case__\": 0, \"doExtend\": \"no\"}, \"skipNAs\": \"false\", \"scaleFactor\": \"1.0\", \"blackListFileName\": {\"__class__\": \"RuntimeValue\"}, \"maxFragmentLength\": \"0\"}, \"__rerun_remap_job_id__\": null}",
+ "tool_version": "3.3.0.0.0",
+ "type": "tool",
+ "uuid": "441aaaaa-306b-445d-a0eb-49d7d197797f",
+ "workflow_outputs": [
+ {
+ "label": null,
+ "output_name": "outFileName",
+ "uuid": "fa0df65c-1398-409b-8dce-f6f6fb4cf433"
+ }
+ ]
+ },
+ "42": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/stringtie/stringtie/1.3.6",
+ "errors": null,
"id": 42,
- "output_name": "output_short"
- }
- ]
- },
- "inputs": [],
- "label": null,
- "name": "DESeq2",
- "outputs": [
- {
- "name": "split_output",
- "type": "input"
- },
- {
- "name": "deseq_out",
- "type": "tabular"
- },
- {
- "name": "plots",
- "type": "pdf"
- },
- {
- "name": "counts_out",
- "type": "tabular"
+ "input_connections": {
+ "input_bam": {
+ "id": 30,
+ "output_name": "output_alignments"
+ }
+ },
+ "inputs": [],
+ "label": null,
+ "name": "StringTie",
+ "outputs": [
+ {
+ "name": "output_gtf",
+ "type": "gtf"
+ }
+ ],
+ "position": {
+ "left": 1107,
+ "top": 710
+ },
+ "post_job_actions": {},
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/stringtie/stringtie/1.3.6",
+ "tool_shed_repository": {
+ "changeset_revision": "eba36e001f45",
+ "name": "stringtie",
+ "owner": "iuc",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"adv\": {\"min_bundle_cov\": \"2\", \"min_tlen\": \"200\", \"bdist\": \"50\", \"multi_mapping\": \"false\", \"abundance_estimation\": \"false\", \"fraction\": \"0.15\", \"disable_trimming\": \"false\", \"omit_sequences\": \"\", \"name_prefix\": \"\", \"min_anchor_len\": \"10\", \"bundle_fraction\": \"0.95\", \"min_anchor_cov\": \"1\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null, \"rna_strandness\": \"--fr\", \"input_bam\": {\"__class__\": \"ConnectedValue\"}, \"guide\": {\"use_guide\": \"no\", \"__current_case__\": 0}}",
+ "tool_version": "1.3.6",
+ "type": "tool",
+ "uuid": "8b977c7a-1b77-42c7-a2db-e3c5956ee6ad",
+ "workflow_outputs": [
+ {
+ "label": null,
+ "output_name": "output_gtf",
+ "uuid": "11730597-b79b-4c58-978c-44d3f352a899"
+ }
+ ]
+ },
+ "43": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/bgruening/deeptools_bam_coverage/deeptools_bam_coverage/3.3.0.0.0",
+ "errors": null,
+ "id": 43,
+ "input_connections": {
+ "bamInput": {
+ "id": 30,
+ "output_name": "output_alignments"
+ }
+ },
+ "inputs": [
+ {
+ "description": "runtime parameter for tool bamCoverage",
+ "name": "advancedOpt"
+ }
+ ],
+ "label": null,
+ "name": "bamCoverage",
+ "outputs": [
+ {
+ "name": "outFileName",
+ "type": "bigwig"
+ }
+ ],
+ "position": {
+ "left": 1106,
+ "top": 1360
+ },
+ "post_job_actions": {},
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/bgruening/deeptools_bam_coverage/deeptools_bam_coverage/3.3.0.0.0",
+ "tool_shed_repository": {
+ "changeset_revision": "7a7fd0f5f15d",
+ "name": "deeptools_bam_coverage",
+ "owner": "bgruening",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"exactScaling\": \"false\", \"outFileFormat\": \"bigwig\", \"__page__\": null, \"region\": \"\", \"bamInput\": {\"__class__\": \"ConnectedValue\"}, \"binSize\": \"1\", \"scaling\": {\"effectiveGenomeSize\": {\"effectiveGenomeSize_opt\": \"2304947926\", \"__current_case__\": 6}, \"type\": \"1x\", \"__current_case__\": 4}, \"advancedOpt\": {\"ignoreDuplicates\": \"false\", \"centerReads\": \"false\", \"ignoreForNormalization\": \"\", \"minFragmentLength\": \"0\", \"minMappingQuality\": \"1\", \"MNase\": \"false\", \"samFlagInclude\": \"\", \"filterRNAstrand\": \"forward\", \"Offset\": \"\", \"smoothLength\": \"\", \"showAdvancedOpt\": \"yes\", \"__current_case__\": 1, \"samFlagExclude\": \"\", \"doExtendCustom\": {\"__current_case__\": 0, \"doExtend\": \"no\"}, \"skipNAs\": \"false\", \"scaleFactor\": \"1.0\", \"blackListFileName\": {\"__class__\": \"RuntimeValue\"}, \"maxFragmentLength\": \"0\"}, \"__rerun_remap_job_id__\": null}",
+ "tool_version": "3.3.0.0.0",
+ "type": "tool",
+ "uuid": "a0b2660b-eba8-4e48-9855-980a45969237",
+ "workflow_outputs": [
+ {
+ "label": null,
+ "output_name": "outFileName",
+ "uuid": "b322012e-acd7-44f4-a4e2-ee4eb4e4bf64"
+ }
+ ]
+ },
+ "44": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/bgruening/deeptools_bam_coverage/deeptools_bam_coverage/3.3.0.0.0",
+ "errors": null,
+ "id": 44,
+ "input_connections": {
+ "bamInput": {
+ "id": 30,
+ "output_name": "output_alignments"
+ }
+ },
+ "inputs": [
+ {
+ "description": "runtime parameter for tool bamCoverage",
+ "name": "advancedOpt"
+ }
+ ],
+ "label": null,
+ "name": "bamCoverage",
+ "outputs": [
+ {
+ "name": "outFileName",
+ "type": "bigwig"
+ }
+ ],
+ "position": {
+ "left": 1106,
+ "top": 2040
+ },
+ "post_job_actions": {
+ "HideDatasetActionoutFileName": {
+ "action_arguments": {},
+ "action_type": "HideDatasetAction",
+ "output_name": "outFileName"
+ }
+ },
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/bgruening/deeptools_bam_coverage/deeptools_bam_coverage/3.3.0.0.0",
+ "tool_shed_repository": {
+ "changeset_revision": "7a7fd0f5f15d",
+ "name": "deeptools_bam_coverage",
+ "owner": "bgruening",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"exactScaling\": \"false\", \"outFileFormat\": \"bigwig\", \"__page__\": null, \"region\": \"\", \"bamInput\": {\"__class__\": \"ConnectedValue\"}, \"binSize\": \"1\", \"scaling\": {\"effectiveGenomeSize\": {\"effectiveGenomeSize_opt\": \"2304947926\", \"__current_case__\": 6}, \"type\": \"1x\", \"__current_case__\": 4}, \"advancedOpt\": {\"ignoreDuplicates\": \"false\", \"centerReads\": \"false\", \"ignoreForNormalization\": \"\", \"minFragmentLength\": \"0\", \"minMappingQuality\": \"1\", \"MNase\": \"false\", \"samFlagInclude\": \"\", \"filterRNAstrand\": \"reverse\", \"Offset\": \"\", \"smoothLength\": \"\", \"showAdvancedOpt\": \"yes\", \"__current_case__\": 1, \"samFlagExclude\": \"\", \"doExtendCustom\": {\"__current_case__\": 0, \"doExtend\": \"no\"}, \"skipNAs\": \"false\", \"scaleFactor\": \"1.0\", \"blackListFileName\": {\"__class__\": \"RuntimeValue\"}, \"maxFragmentLength\": \"0\"}, \"__rerun_remap_job_id__\": null}",
+ "tool_version": "3.3.0.0.0",
+ "type": "tool",
+ "uuid": "7189ed72-d46f-49af-b8e1-f8db46e1ba2e",
+ "workflow_outputs": []
+ },
+ "45": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/stringtie/stringtie_merge/1.3.6",
+ "errors": null,
+ "id": 45,
+ "input_connections": {
+ "guide_gff": {
+ "id": 8,
+ "output_name": "output"
+ },
+ "input_gtf": [
+ {
+ "id": 33,
+ "output_name": "output_gtf"
+ },
+ {
+ "id": 36,
+ "output_name": "output_gtf"
+ },
+ {
+ "id": 42,
+ "output_name": "output_gtf"
+ },
+ {
+ "id": 39,
+ "output_name": "output_gtf"
+ }
+ ]
+ },
+ "inputs": [],
+ "label": null,
+ "name": "StringTie merge",
+ "outputs": [
+ {
+ "name": "out_gtf",
+ "type": "gtf"
+ }
+ ],
+ "position": {
+ "left": 1441,
+ "top": 298
+ },
+ "post_job_actions": {},
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/stringtie/stringtie_merge/1.3.6",
+ "tool_shed_repository": {
+ "changeset_revision": "eba36e001f45",
+ "name": "stringtie",
+ "owner": "iuc",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"min_iso\": \"0.01\", \"min_fpkm\": \"1.0\", \"keep_introns\": \"false\", \"__page__\": null, \"__rerun_remap_job_id__\": null, \"min_len\": \"50\", \"guide_gff\": {\"__class__\": \"ConnectedValue\"}, \"min_cov\": \"0\", \"min_tpm\": \"1.0\", \"input_gtf\": {\"__class__\": \"ConnectedValue\"}, \"gap_len\": \"250\"}",
+ "tool_version": "1.3.6",
+ "type": "tool",
+ "uuid": "d4d8f2e1-7ac2-46d0-b541-a6a5cd488a33",
+ "workflow_outputs": [
+ {
+ "label": null,
+ "output_name": "out_gtf",
+ "uuid": "9b43230c-7595-46a5-96bf-15894bb56666"
+ }
+ ]
+ },
+ "46": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/gffcompare/gffcompare/0.11.2",
+ "errors": null,
+ "id": 46,
+ "input_connections": {
+ "annotation|ref_source|reference_annotation": {
+ "id": 8,
+ "output_name": "output"
+ },
+ "gffinputs": {
+ "id": 45,
+ "output_name": "out_gtf"
+ }
+ },
+ "inputs": [],
+ "label": null,
+ "name": "GffCompare",
+ "outputs": [
+ {
+ "name": "refmap_output",
+ "type": "input"
+ },
+ {
+ "name": "tmap_output",
+ "type": "input"
+ },
+ {
+ "name": "transcripts_stats",
+ "type": "txt"
+ },
+ {
+ "name": "transcripts_loci",
+ "type": "tabular"
+ },
+ {
+ "name": "transcripts_tracking",
+ "type": "tabular"
+ },
+ {
+ "name": "transcripts_annotated",
+ "type": "gtf"
+ }
+ ],
+ "position": {
+ "left": 1762,
+ "top": 290
+ },
+ "post_job_actions": {
+ "HideDatasetActionrefmap_output": {
+ "action_arguments": {},
+ "action_type": "HideDatasetAction",
+ "output_name": "refmap_output"
+ },
+ "HideDatasetActiontmap_output": {
+ "action_arguments": {},
+ "action_type": "HideDatasetAction",
+ "output_name": "tmap_output"
+ },
+ "HideDatasetActiontranscripts_annotated": {
+ "action_arguments": {},
+ "action_type": "HideDatasetAction",
+ "output_name": "transcripts_annotated"
+ },
+ "HideDatasetActiontranscripts_loci": {
+ "action_arguments": {},
+ "action_type": "HideDatasetAction",
+ "output_name": "transcripts_loci"
+ },
+ "HideDatasetActiontranscripts_stats": {
+ "action_arguments": {},
+ "action_type": "HideDatasetAction",
+ "output_name": "transcripts_stats"
+ },
+ "HideDatasetActiontranscripts_tracking": {
+ "action_arguments": {},
+ "action_type": "HideDatasetAction",
+ "output_name": "transcripts_tracking"
+ }
+ },
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/gffcompare/gffcompare/0.11.2",
+ "tool_shed_repository": {
+ "changeset_revision": "0f710191a66d",
+ "name": "gffcompare",
+ "owner": "iuc",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"seq_data\": {\"use_seq_data\": \"Yes\", \"seq_source\": {\"index_source\": \"cached\", \"index\": \"mm10\", \"__current_case__\": 0}, \"__current_case__\": 1}, \"__page__\": null, \"max_dist_group\": \"100\", \"adv_output\": {\"A\": \"false\", \"p\": \"TCONS\", \"C\": \"false\", \"K\": \"false\", \"X\": \"false\"}, \"max_dist_exon\": \"100\", \"__rerun_remap_job_id__\": null, \"no_merge\": \"false\", \"discard_duplicates\": \"\", \"gffinputs\": {\"__class__\": \"ConnectedValue\"}, \"discard_single_exon\": \"\", \"chr_stats\": \"false\", \"annotation\": {\"strict_match\": \"false\", \"ref_source\": {\"reference_annotation\": {\"__class__\": \"ConnectedValue\"}, \"ref_source_sel\": \"history\", \"__current_case__\": 1}, \"__current_case__\": 0, \"ignore_nonoverlapping_transfrags\": \"false\", \"use_ref_annotation\": \"Yes\", \"ignore_nonoverlapping_reference\": \"false\", \"refmap_tmap\": [\"refmap\", \"tmap\"]}}",
+ "tool_version": "0.11.2",
+ "type": "tool",
+ "uuid": "7198ef30-af2d-4152-899c-b2ba7dc99d56",
+ "workflow_outputs": []
+ },
+ "47": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/featurecounts/featurecounts/1.6.4+galaxy1",
+ "errors": null,
+ "id": 47,
+ "input_connections": {
+ "alignment": {
+ "id": 21,
+ "output_name": "output_alignments"
+ },
+ "anno|reference_gene_sets": {
+ "id": 46,
+ "output_name": "transcripts_annotated"
+ }
+ },
+ "inputs": [],
+ "label": null,
+ "name": "featureCounts",
+ "outputs": [
+ {
+ "name": "output_short",
+ "type": "tabular"
+ },
+ {
+ "name": "output_summary",
+ "type": "tabular"
+ }
+ ],
+ "position": {
+ "left": 2090,
+ "top": 290
+ },
+ "post_job_actions": {
+ "HideDatasetActionoutput_short": {
+ "action_arguments": {},
+ "action_type": "HideDatasetAction",
+ "output_name": "output_short"
+ }
+ },
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/featurecounts/featurecounts/1.6.4+galaxy1",
+ "tool_shed_repository": {
+ "changeset_revision": "a37612abf7f9",
+ "name": "featurecounts",
+ "owner": "iuc",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"pe_parameters\": {\"only_both_ends\": \"false\", \"exclude_chimerics\": \"true\", \"fragment_counting_enabled\": {\"fragment_counting\": \"\", \"__current_case__\": 1}}, \"strand_specificity\": \"1\", \"__page__\": null, \"__rerun_remap_job_id__\": null, \"format\": \"tabdel_short\", \"include_feature_length_file\": \"false\", \"anno\": {\"anno_select\": \"history\", \"reference_gene_sets\": {\"__class__\": \"ConnectedValue\"}, \"__current_case__\": 2}, \"extended_parameters\": {\"gff_feature_attribute\": \"transcript_id\", \"exon_exon_junction_read_counting_enabled\": {\"__current_case__\": 1, \"count_exon_exon_junction_reads\": \"false\"}, \"read_extension_3p\": \"0\", \"frac_overlap_feature\": \"0\", \"min_overlap\": \"1\", \"read_extension_5p\": \"0\", \"contribute_to_multiple_features\": \"false\", \"frac_overlap\": \"0\", \"primary\": \"false\", \"count_split_alignments_only\": \"false\", \"by_read_group\": \"false\", \"ignore_dup\": \"false\", \"mapping_quality\": \"12\", \"summarization_level\": \"false\", \"R\": \"false\", \"largest_overlap\": \"false\", \"read_reduction\": \"\", \"multimapping_enabled\": {\"multimapping_counts\": \"\", \"__current_case__\": 1}, \"long_reads\": \"false\", \"gff_feature_type\": \"exon\"}, \"alignment\": {\"__class__\": \"ConnectedValue\"}}",
+ "tool_version": "1.6.4+galaxy1",
+ "type": "tool",
+ "uuid": "95abe7d8-6d49-41dd-8147-e5c7c3764db7",
+ "workflow_outputs": [
+ {
+ "label": null,
+ "output_name": "output_summary",
+ "uuid": "03051c5b-afcc-467c-95f2-116fa87c9a79"
+ }
+ ]
+ },
+ "48": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/featurecounts/featurecounts/1.6.4+galaxy1",
+ "errors": null,
+ "id": 48,
+ "input_connections": {
+ "alignment": {
+ "id": 24,
+ "output_name": "output_alignments"
+ },
+ "anno|reference_gene_sets": {
+ "id": 46,
+ "output_name": "transcripts_annotated"
+ }
+ },
+ "inputs": [],
+ "label": null,
+ "name": "featureCounts",
+ "outputs": [
+ {
+ "name": "output_short",
+ "type": "tabular"
+ },
+ {
+ "name": "output_summary",
+ "type": "tabular"
+ }
+ ],
+ "position": {
+ "left": 2085,
+ "top": 508
+ },
+ "post_job_actions": {
+ "HideDatasetActionoutput_short": {
+ "action_arguments": {},
+ "action_type": "HideDatasetAction",
+ "output_name": "output_short"
+ }
+ },
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/featurecounts/featurecounts/1.6.4+galaxy1",
+ "tool_shed_repository": {
+ "changeset_revision": "a37612abf7f9",
+ "name": "featurecounts",
+ "owner": "iuc",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"pe_parameters\": {\"only_both_ends\": \"false\", \"exclude_chimerics\": \"true\", \"fragment_counting_enabled\": {\"fragment_counting\": \"\", \"__current_case__\": 1}}, \"strand_specificity\": \"1\", \"__page__\": null, \"__rerun_remap_job_id__\": null, \"format\": \"tabdel_short\", \"include_feature_length_file\": \"false\", \"anno\": {\"anno_select\": \"history\", \"reference_gene_sets\": {\"__class__\": \"ConnectedValue\"}, \"__current_case__\": 2}, \"extended_parameters\": {\"gff_feature_attribute\": \"transcript_id\", \"exon_exon_junction_read_counting_enabled\": {\"__current_case__\": 1, \"count_exon_exon_junction_reads\": \"false\"}, \"read_extension_3p\": \"0\", \"frac_overlap_feature\": \"0\", \"min_overlap\": \"1\", \"read_extension_5p\": \"0\", \"contribute_to_multiple_features\": \"false\", \"frac_overlap\": \"0\", \"primary\": \"false\", \"count_split_alignments_only\": \"false\", \"by_read_group\": \"false\", \"ignore_dup\": \"false\", \"mapping_quality\": \"12\", \"summarization_level\": \"false\", \"R\": \"false\", \"largest_overlap\": \"false\", \"read_reduction\": \"\", \"multimapping_enabled\": {\"multimapping_counts\": \"\", \"__current_case__\": 1}, \"long_reads\": \"false\", \"gff_feature_type\": \"exon\"}, \"alignment\": {\"__class__\": \"ConnectedValue\"}}",
+ "tool_version": "1.6.4+galaxy1",
+ "type": "tool",
+ "uuid": "9ae13bea-d205-488c-8c9e-6727e69d7a12",
+ "workflow_outputs": [
+ {
+ "label": null,
+ "output_name": "output_summary",
+ "uuid": "90818597-3cff-42ee-8c5d-9e73babac7a3"
+ }
+ ]
+ },
+ "49": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/featurecounts/featurecounts/1.6.4+galaxy1",
+ "errors": null,
+ "id": 49,
+ "input_connections": {
+ "alignment": {
+ "id": 27,
+ "output_name": "output_alignments"
+ },
+ "anno|reference_gene_sets": {
+ "id": 46,
+ "output_name": "transcripts_annotated"
+ }
+ },
+ "inputs": [],
+ "label": null,
+ "name": "featureCounts",
+ "outputs": [
+ {
+ "name": "output_short",
+ "type": "tabular"
+ },
+ {
+ "name": "output_summary",
+ "type": "tabular"
+ }
+ ],
+ "position": {
+ "left": 2090,
+ "top": 730
+ },
+ "post_job_actions": {
+ "HideDatasetActionoutput_short": {
+ "action_arguments": {},
+ "action_type": "HideDatasetAction",
+ "output_name": "output_short"
+ }
+ },
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/featurecounts/featurecounts/1.6.4+galaxy1",
+ "tool_shed_repository": {
+ "changeset_revision": "a37612abf7f9",
+ "name": "featurecounts",
+ "owner": "iuc",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"pe_parameters\": {\"only_both_ends\": \"false\", \"exclude_chimerics\": \"true\", \"fragment_counting_enabled\": {\"fragment_counting\": \"\", \"__current_case__\": 1}}, \"strand_specificity\": \"1\", \"__page__\": null, \"__rerun_remap_job_id__\": null, \"format\": \"tabdel_short\", \"include_feature_length_file\": \"false\", \"anno\": {\"anno_select\": \"history\", \"reference_gene_sets\": {\"__class__\": \"ConnectedValue\"}, \"__current_case__\": 2}, \"extended_parameters\": {\"gff_feature_attribute\": \"transcript_id\", \"exon_exon_junction_read_counting_enabled\": {\"__current_case__\": 1, \"count_exon_exon_junction_reads\": \"false\"}, \"read_extension_3p\": \"0\", \"frac_overlap_feature\": \"0\", \"min_overlap\": \"1\", \"read_extension_5p\": \"0\", \"contribute_to_multiple_features\": \"false\", \"frac_overlap\": \"0\", \"primary\": \"false\", \"count_split_alignments_only\": \"false\", \"by_read_group\": \"false\", \"ignore_dup\": \"false\", \"mapping_quality\": \"12\", \"summarization_level\": \"false\", \"R\": \"false\", \"largest_overlap\": \"false\", \"read_reduction\": \"\", \"multimapping_enabled\": {\"multimapping_counts\": \"\", \"__current_case__\": 1}, \"long_reads\": \"false\", \"gff_feature_type\": \"exon\"}, \"alignment\": {\"__class__\": \"ConnectedValue\"}}",
+ "tool_version": "1.6.4+galaxy1",
+ "type": "tool",
+ "uuid": "db762d9c-4d9f-427a-a5d0-0c1bff0ee11b",
+ "workflow_outputs": [
+ {
+ "label": null,
+ "output_name": "output_summary",
+ "uuid": "bfb857ae-181f-40e5-a85c-45ea832609ee"
+ }
+ ]
+ },
+ "50": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/featurecounts/featurecounts/1.6.4+galaxy1",
+ "errors": null,
+ "id": 50,
+ "input_connections": {
+ "alignment": {
+ "id": 30,
+ "output_name": "output_alignments"
+ },
+ "anno|reference_gene_sets": {
+ "id": 46,
+ "output_name": "transcripts_annotated"
+ }
+ },
+ "inputs": [],
+ "label": null,
+ "name": "featureCounts",
+ "outputs": [
+ {
+ "name": "output_short",
+ "type": "tabular"
+ },
+ {
+ "name": "output_summary",
+ "type": "tabular"
+ }
+ ],
+ "position": {
+ "left": 2090,
+ "top": 950
+ },
+ "post_job_actions": {
+ "HideDatasetActionoutput_short": {
+ "action_arguments": {},
+ "action_type": "HideDatasetAction",
+ "output_name": "output_short"
+ }
+ },
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/featurecounts/featurecounts/1.6.4+galaxy1",
+ "tool_shed_repository": {
+ "changeset_revision": "a37612abf7f9",
+ "name": "featurecounts",
+ "owner": "iuc",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"pe_parameters\": {\"only_both_ends\": \"false\", \"exclude_chimerics\": \"true\", \"fragment_counting_enabled\": {\"fragment_counting\": \"\", \"__current_case__\": 1}}, \"strand_specificity\": \"1\", \"__page__\": null, \"__rerun_remap_job_id__\": null, \"format\": \"tabdel_short\", \"include_feature_length_file\": \"false\", \"anno\": {\"anno_select\": \"history\", \"reference_gene_sets\": {\"__class__\": \"ConnectedValue\"}, \"__current_case__\": 2}, \"extended_parameters\": {\"gff_feature_attribute\": \"transcript_id\", \"exon_exon_junction_read_counting_enabled\": {\"__current_case__\": 1, \"count_exon_exon_junction_reads\": \"false\"}, \"read_extension_3p\": \"0\", \"frac_overlap_feature\": \"0\", \"min_overlap\": \"1\", \"read_extension_5p\": \"0\", \"contribute_to_multiple_features\": \"false\", \"frac_overlap\": \"0\", \"primary\": \"false\", \"count_split_alignments_only\": \"false\", \"by_read_group\": \"false\", \"ignore_dup\": \"false\", \"mapping_quality\": \"12\", \"summarization_level\": \"false\", \"R\": \"false\", \"largest_overlap\": \"false\", \"read_reduction\": \"\", \"multimapping_enabled\": {\"multimapping_counts\": \"\", \"__current_case__\": 1}, \"long_reads\": \"false\", \"gff_feature_type\": \"exon\"}, \"alignment\": {\"__class__\": \"ConnectedValue\"}}",
+ "tool_version": "1.6.4+galaxy1",
+ "type": "tool",
+ "uuid": "16d0abf6-f6e0-4b5c-a857-114cd7b9b319",
+ "workflow_outputs": [
+ {
+ "label": null,
+ "output_name": "output_summary",
+ "uuid": "35019ed7-acfd-410b-86f4-7f75b84f737e"
+ }
+ ]
+ },
+ "51": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/deseq2/deseq2/2.11.40.6",
+ "errors": null,
+ "id": 51,
+ "input_connections": {
+ "select_data|rep_factorName_0|rep_factorLevel_0|countsFile": [
+ {
+ "id": 47,
+ "output_name": "output_short"
+ },
+ {
+ "id": 48,
+ "output_name": "output_short"
+ }
+ ],
+ "select_data|rep_factorName_0|rep_factorLevel_1|countsFile": [
+ {
+ "id": 49,
+ "output_name": "output_short"
+ },
+ {
+ "id": 50,
+ "output_name": "output_short"
+ }
+ ]
+ },
+ "inputs": [
+ {
+ "description": "runtime parameter for tool DESeq2",
+ "name": "batch_factors"
+ }
+ ],
+ "label": null,
+ "name": "DESeq2",
+ "outputs": [
+ {
+ "name": "deseq_out",
+ "type": "tabular"
+ },
+ {
+ "name": "plots",
+ "type": "pdf"
+ },
+ {
+ "name": "counts_out",
+ "type": "tabular"
+ }
+ ],
+ "position": {
+ "left": 2418,
+ "top": 290
+ },
+ "post_job_actions": {
+ "HideDatasetActioncounts_out": {
+ "action_arguments": {},
+ "action_type": "HideDatasetAction",
+ "output_name": "counts_out"
+ }
+ },
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/deseq2/deseq2/2.11.40.6",
+ "tool_shed_repository": {
+ "changeset_revision": "0696db066a5b",
+ "name": "deseq2",
+ "owner": "iuc",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"fit_type\": \"1\", \"__page__\": null, \"normVST\": \"false\", \"tximport\": {\"tximport_selector\": \"count\", \"__current_case__\": 1}, \"esf\": \"\", \"outlier_replace_off\": \"false\", \"__rerun_remap_job_id__\": null, \"auto_mean_filter_off\": \"false\", \"header\": \"true\", \"normCounts\": \"true\", \"outlier_filter_off\": \"false\", \"many_contrasts\": \"false\", \"batch_factors\": {\"__class__\": \"RuntimeValue\"}, \"pdf\": \"true\", \"normRLog\": \"false\", \"select_data\": {\"how\": \"datasets_per_level\", \"rep_factorName\": [{\"__index__\": 0, \"factorName\": \"FactorName\", \"rep_factorLevel\": [{\"__index__\": 0, \"factorLevel\": \"GIE\", \"countsFile\": {\"__class__\": \"ConnectedValue\"}}, {\"__index__\": 1, \"factorLevel\": \"Mega\", \"countsFile\": {\"__class__\": \"ConnectedValue\"}}]}], \"__current_case__\": 1}}",
+ "tool_version": "2.11.40.6",
+ "type": "tool",
+ "uuid": "31920f6a-7735-48ea-b477-e7b2933b4f97",
+ "workflow_outputs": [
+ {
+ "label": null,
+ "output_name": "plots",
+ "uuid": "ba05745d-db6f-4077-a524-713fa808be82"
+ },
+ {
+ "label": null,
+ "output_name": "deseq_out",
+ "uuid": "5fab25c2-11d8-424e-9877-4d91fd75c60f"
+ }
+ ]
+ },
+ "52": {
+ "annotation": "",
+ "content_id": "Filter1",
+ "errors": null,
+ "id": 52,
+ "input_connections": {
+ "input": {
+ "id": 51,
+ "output_name": "deseq_out"
+ }
+ },
+ "inputs": [],
+ "label": null,
+ "name": "Filter",
+ "outputs": [
+ {
+ "name": "out_file1",
+ "type": "input"
+ }
+ ],
+ "position": {
+ "left": 2746,
+ "top": 290
+ },
+ "post_job_actions": {},
+ "tool_id": "Filter1",
+ "tool_state": "{\"__page__\": null, \"__rerun_remap_job_id__\": null, \"cond\": \"c7<0.05\", \"input\": {\"__class__\": \"ConnectedValue\"}, \"header_lines\": \"0\", \"chromInfo\": \"/cvmfs/data.galaxyproject.org/managed/len/ucsc/mm10.len\"}",
+ "tool_version": "1.1.0",
+ "type": "tool",
+ "uuid": "d3f69676-a4ab-422d-9477-e48469e7eb0e",
+ "workflow_outputs": [
+ {
+ "label": null,
+ "output_name": "out_file1",
+ "uuid": "ed66aa96-1b4f-4307-8ddf-a9a8ff50412c"
+ }
+ ]
+ },
+ "53": {
+ "annotation": "",
+ "content_id": "Filter1",
+ "errors": null,
+ "id": 53,
+ "input_connections": {
+ "input": {
+ "id": 52,
+ "output_name": "out_file1"
+ }
+ },
+ "inputs": [],
+ "label": null,
+ "name": "Filter",
+ "outputs": [
+ {
+ "name": "out_file1",
+ "type": "input"
+ }
+ ],
+ "position": {
+ "left": 2996,
+ "top": 290
+ },
+ "post_job_actions": {},
+ "tool_id": "Filter1",
+ "tool_state": "{\"__page__\": null, \"__rerun_remap_job_id__\": null, \"cond\": \"c3>0\", \"input\": {\"__class__\": \"ConnectedValue\"}, \"header_lines\": \"0\", \"chromInfo\": \"/cvmfs/data.galaxyproject.org/managed/len/ucsc/mm10.len\"}",
+ "tool_version": "1.1.0",
+ "type": "tool",
+ "uuid": "8addb367-a444-40b4-899d-224ac94c3ab5",
+ "workflow_outputs": [
+ {
+ "label": null,
+ "output_name": "out_file1",
+ "uuid": "4bb7c046-c14f-448c-9d71-fe78fb90db94"
+ }
+ ]
+ },
+ "54": {
+ "annotation": "",
+ "content_id": "Filter1",
+ "errors": null,
+ "id": 54,
+ "input_connections": {
+ "input": {
+ "id": 52,
+ "output_name": "out_file1"
+ }
+ },
+ "inputs": [],
+ "label": null,
+ "name": "Filter",
+ "outputs": [
+ {
+ "name": "out_file1",
+ "type": "input"
+ }
+ ],
+ "position": {
+ "left": 2996,
+ "top": 410
+ },
+ "post_job_actions": {},
+ "tool_id": "Filter1",
+ "tool_state": "{\"__page__\": null, \"__rerun_remap_job_id__\": null, \"cond\": \"c3<0\", \"input\": {\"__class__\": \"ConnectedValue\"}, \"header_lines\": \"0\", \"chromInfo\": \"/cvmfs/data.galaxyproject.org/managed/len/ucsc/mm10.len\"}",
+ "tool_version": "1.1.0",
+ "type": "tool",
+ "uuid": "ed3d5b8f-82ef-4992-9528-ad9238b30d2f",
+ "workflow_outputs": [
+ {
+ "label": null,
+ "output_name": "out_file1",
+ "uuid": "d7dc54e1-74ec-4105-81f0-e205538bbb6f"
+ }
+ ]
}
- ],
- "position": {
- "left": 1550,
- "top": 10
- },
- "post_job_actions": {},
- "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/deseq2/deseq2/2.11.40.2",
- "tool_shed_repository": {
- "changeset_revision": "9a616afdbda5",
- "name": "deseq2",
- "owner": "iuc",
- "tool_shed": "toolshed.g2.bx.psu.edu"
- },
- "tool_state": "{\"fit_type\": \"\\\"1\\\"\", \"__page__\": null, \"tximport\": \"{\\\"tximport_selector\\\": \\\"count\\\", \\\"__current_case__\\\": 1}\", \"outlier_replace_off\": \"\\\"false\\\"\", \"__rerun_remap_job_id__\": null, \"auto_mean_filter_off\": \"\\\"false\\\"\", \"header\": \"\\\"true\\\"\", \"rep_factorName\": \"[{\\\"__index__\\\": 0, \\\"factorName\\\": \\\"FactorName\\\", \\\"rep_factorLevel\\\": [{\\\"__index__\\\": 0, \\\"factorLevel\\\": \\\"GIE\\\", \\\"countsFile\\\": null}, {\\\"__index__\\\": 1, \\\"factorLevel\\\": \\\"Mega\\\", \\\"countsFile\\\": null}]}]\", \"normCounts\": \"\\\"true\\\"\", \"__workflow_invocation_uuid__\": \"\\\"fde8952eb77711e8a468005056ba55fb\\\"\", \"many_contrasts\": \"\\\"false\\\"\", \"pdf\": \"\\\"true\\\"\", \"chromInfo\": \"\\\"/cvmfs/data.galaxyproject.org/managed/len/ucsc/mm10.len\\\"\", \"outlier_filter_off\": \"\\\"false\\\"\"}",
- "tool_version": "2.11.40.2",
- "type": "tool",
- "uuid": "73d67b36-4316-446b-85f6-5b1e2b64f684",
- "workflow_outputs": []
},
- "44": {
- "annotation": "",
- "content_id": "Filter1",
- "errors": null,
- "id": 44,
- "input_connections": {
- "input": {
- "id": 43,
- "output_name": "deseq_out"
- }
- },
- "inputs": [],
- "label": null,
- "name": "Filter",
- "outputs": [
- {
- "name": "out_file1",
- "type": "input"
- }
- ],
- "position": {
- "left": 1770,
- "top": 10
- },
- "post_job_actions": {},
- "tool_id": "Filter1",
- "tool_state": "{\"__page__\": null, \"__rerun_remap_job_id__\": null, \"cond\": \"\\\"c7<0.05\\\"\", \"input\": \"null\", \"header_lines\": \"\\\"0\\\"\", \"chromInfo\": \"\\\"/cvmfs/data.galaxyproject.org/managed/len/ucsc/mm10.len\\\"\"}",
- "tool_version": "1.1.0",
- "type": "tool",
- "uuid": "3581c4be-1732-47fc-ad14-73f5e3a42b23",
- "workflow_outputs": []
- },
- "45": {
- "annotation": "",
- "content_id": "Filter1",
- "errors": null,
- "id": 45,
- "input_connections": {
- "input": {
- "id": 44,
- "output_name": "out_file1"
- }
- },
- "inputs": [],
- "label": null,
- "name": "Filter",
- "outputs": [
- {
- "name": "out_file1",
- "type": "input"
- }
- ],
- "position": {
- "left": 1990,
- "top": 10
- },
- "post_job_actions": {},
- "tool_id": "Filter1",
- "tool_state": "{\"__page__\": null, \"__rerun_remap_job_id__\": null, \"cond\": \"\\\"c3>0\\\"\", \"input\": \"null\", \"header_lines\": \"\\\"0\\\"\", \"chromInfo\": \"\\\"/cvmfs/data.galaxyproject.org/managed/len/ucsc/mm10.len\\\"\"}",
- "tool_version": "1.1.0",
- "type": "tool",
- "uuid": "c9d279df-d6bb-4b48-a743-250e9fb30548",
- "workflow_outputs": []
- },
- "46": {
- "annotation": "",
- "content_id": "Filter1",
- "errors": null,
- "id": 46,
- "input_connections": {
- "input": {
- "id": 44,
- "output_name": "out_file1"
- }
- },
- "inputs": [],
- "label": null,
- "name": "Filter",
- "outputs": [
- {
- "name": "out_file1",
- "type": "input"
- }
- ],
- "position": {
- "left": 1990,
- "top": 130
- },
- "post_job_actions": {},
- "tool_id": "Filter1",
- "tool_state": "{\"__page__\": null, \"__rerun_remap_job_id__\": null, \"cond\": \"\\\"c3<0\\\"\", \"input\": \"null\", \"header_lines\": \"\\\"0\\\"\", \"chromInfo\": \"\\\"/cvmfs/data.galaxyproject.org/managed/len/ucsc/mm10.len\\\"\"}",
- "tool_version": "1.1.0",
- "type": "tool",
- "uuid": "aa0f881b-5524-4a0c-9cc5-c8fdfe2e3b89",
- "workflow_outputs": []
- },
- "5": {
- "annotation": "",
- "content_id": null,
- "errors": null,
- "id": 5,
- "input_connections": {},
- "inputs": [
- {
- "description": "",
- "name": "Megakaryocyte_rep1_reverse_read"
- }
- ],
- "label": null,
- "name": "Input dataset",
- "outputs": [],
- "position": {
- "left": 10,
- "top": 610
- },
- "tool_id": null,
- "tool_state": "{\"name\": \"Megakaryocyte_rep1_reverse_read\"}",
- "tool_version": null,
- "type": "data_input",
- "uuid": "e7b2ddb1-99fc-4898-9c10-e5ac8fa9fd3a",
- "workflow_outputs": []
- },
- "6": {
- "annotation": "",
- "content_id": null,
- "errors": null,
- "id": 6,
- "input_connections": {},
- "inputs": [
- {
- "description": "",
- "name": "Megakaryocyte_rep2_forward_read"
- }
- ],
- "label": null,
- "name": "Input dataset",
- "outputs": [],
- "position": {
- "left": 10,
- "top": 730
- },
- "tool_id": null,
- "tool_state": "{\"name\": \"Megakaryocyte_rep2_forward_read\"}",
- "tool_version": null,
- "type": "data_input",
- "uuid": "8eff105a-d216-4399-bac1-a67d3f09514f",
- "workflow_outputs": []
- },
- "7": {
- "annotation": "",
- "content_id": null,
- "errors": null,
- "id": 7,
- "input_connections": {},
- "inputs": [
- {
- "description": "",
- "name": "Megakaryocyte_rep2_reverse_read"
- }
- ],
- "label": null,
- "name": "Input dataset",
- "outputs": [],
- "position": {
- "left": 10,
- "top": 850
- },
- "tool_id": null,
- "tool_state": "{\"name\": \"Megakaryocyte_rep2_reverse_read\"}",
- "tool_version": null,
- "type": "data_input",
- "uuid": "f22827e1-68d3-4ae0-9d4a-67bca6a8b288",
- "workflow_outputs": []
- },
- "8": {
- "annotation": "",
- "content_id": null,
- "errors": null,
- "id": 8,
- "input_connections": {},
- "inputs": [
- {
- "description": "",
- "name": "RefSeq_reference_GTF"
- }
- ],
- "label": null,
- "name": "Input dataset",
- "outputs": [],
- "position": {
- "left": 10,
- "top": 970
- },
- "tool_id": null,
- "tool_state": "{\"name\": \"RefSeq_reference_GTF\"}",
- "tool_version": null,
- "type": "data_input",
- "uuid": "758d9ac9-4e4a-45fa-a5ad-2fb4f5f7f87d",
- "workflow_outputs": []
- },
- "9": {
- "annotation": "",
- "content_id": "toolshed.g2.bx.psu.edu/repos/devteam/fastqc/fastqc/0.72",
- "errors": null,
- "id": 9,
- "input_connections": {
- "input_file": {
- "id": 0,
- "output_name": "output"
- }
- },
- "inputs": [],
- "label": null,
- "name": "FastQC",
- "outputs": [
- {
- "name": "html_file",
- "type": "html"
- },
- {
- "name": "text_file",
- "type": "txt"
- }
- ],
- "position": {
- "left": 230,
- "top": 10
- },
- "post_job_actions": {},
- "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/fastqc/fastqc/0.72",
- "tool_shed_repository": {
- "changeset_revision": "c15237684a01",
- "name": "fastqc",
- "owner": "devteam",
- "tool_shed": "toolshed.g2.bx.psu.edu"
- },
- "tool_state": "{\"__page__\": null, \"limits\": \"null\", \"input_file\": \"null\", \"__rerun_remap_job_id__\": null, \"__workflow_invocation_uuid__\": \"\\\"fde8952eb77711e8a468005056ba55fb\\\"\", \"contaminants\": \"null\", \"chromInfo\": \"\\\"/cvmfs/data.galaxyproject.org/managed/len/ucsc/?.len\\\"\"}",
- "tool_version": "0.72",
- "type": "tool",
- "uuid": "32c35846-2972-4d71-8af6-03350acd490a",
- "workflow_outputs": []
- }
- },
- "tags": [
- "transcriptomics"
- ],
- "uuid": "dc8abeee-46fa-4965-9a58-396b3ac0c310"
-}
\ No newline at end of file
+ "tags": [
+ "transcriptomics"
+ ],
+ "uuid": "42a843bd-6438-482b-8ef9-88978509e013",
+ "version": 7
+}
diff --git a/topics/transcriptomics/tutorials/ref-based/tutorial.bib b/topics/transcriptomics/tutorials/ref-based/tutorial.bib
index 90615bc5be0a92..bed40db9567a0b 100644
--- a/topics/transcriptomics/tutorials/ref-based/tutorial.bib
+++ b/topics/transcriptomics/tutorials/ref-based/tutorial.bib
@@ -13,7 +13,7 @@ @article{anders2015htseq
@article{brooks2011conservation,
title={Conservation of an RNA regulatory map between Drosophila and mammals},
author={Brooks, Angela N and Yang, Li and Duff, Michael O and Hansen, Kasper D and Park, Jung W and Dudoit, Sandrine and Brenner, Steven E and Graveley, Brenton R},
- journal={Genome research},
+ journal={Genome Research},
url = {https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3032923/},
volume={21},
number={2},
@@ -34,10 +34,22 @@ @article{dobin2013star
publisher={Oxford University Press}
}
+@article{ewels2016multiqc,
+ title={MultiQC: summarize analysis results for multiple tools and samples in a single report},
+ author={Ewels, Philip and Magnusson, M{\aa}ns and Lundin, Sverker and K{\"a}ller, Max},
+ journal={Bioinformatics},
+ volume={32},
+ number={19},
+ pages={3047--3048},
+ year={2016},
+ publisher={Oxford University Press},
+ url={https://academic.oup.com/bioinformatics/article/32/19/3047/2196507}
+}
+
@article{kim2013tophat2,
title={TopHat2: accurate alignment of transcriptomes in the presence of insertions, deletions and gene fusions},
author={Kim, Daehwan and Pertea, Geo and Trapnell, Cole and Pimentel, Harold and Kelley, Ryan and Salzberg, Steven L},
- journal={Genome biology},
+ journal={Genome Biology},
url = {https://genomebiology.biomedcentral.com/articles/10.1186/gb-2013-14-4-r36},
volume={14},
number={4},
@@ -49,7 +61,7 @@ @article{kim2013tophat2
@article{kim2015hisat,
title={HISAT: a fast spliced aligner with low memory requirements},
author={Kim, Daehwan and Langmead, Ben and Salzberg, Steven L},
- journal={Nature methods},
+ journal={Nature Methods},
url = {https://www.nature.com/articles/nmeth.3317},
volume={12},
number={4},
@@ -61,7 +73,7 @@ @article{kim2015hisat
@article{kim2019graph,
title={Graph-based genome alignment and genotyping with HISAT2 and HISAT-genotype},
author={Kim, Daehwan and Paggi, Joseph M and Park, Chanhee and Bennett, Christopher and Salzberg, Steven L},
- journal={Nature biotechnology},
+ journal={Nature Biotechnology},
url = {https://www.nature.com/articles/s41587-019-0201-4},
volume={37},
number={8},
@@ -70,11 +82,10 @@ @article{kim2019graph
publisher={Nature Publishing Group}
}
-
@article{levin2010comprehensive,
title={Comprehensive comparative analysis of strand-specific RNA sequencing methods},
author={Levin, Joshua Z and Yassour, Moran and Adiconis, Xian and Nusbaum, Chad and Thompson, Dawn Anne and Friedman, Nir and Gnirke, Andreas and Regev, Aviv},
- journal={Nature methods},
+ journal={Nature Methods},
url = {https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3005310/},
volume={7},
number={9},
@@ -98,7 +109,7 @@ @article{liao2013featurecounts
@article{love2014moderated,
title={Moderated estimation of fold change and dispersion for RNA-seq data with DESeq2},
author={Love, Michael I and Huber, Wolfgang and Anders, Simon},
- journal={Genome biology},
+ journal={Genome Biology},
url = {https://genomebiology.biomedcentral.com/articles/10.1186/s13059-014-0550-8},
volume={15},
number={12},
@@ -132,7 +143,7 @@ @article{marcel2011cutadapt
@article{robinson2011integrative,
title={Integrative genomics viewer},
author={Robinson, James T and Thorvaldsd{\'o}ttir, Helga and Winckler, Wendy and Guttman, Mitchell and Lander, Eric S and Getz, Gad and Mesirov, Jill P},
- journal={Nature biotechnology},
+ journal={Nature Biotechnology},
url = {https://www.nature.com/nbt/journal/v29/n1/abs/nbt.1754.html},
volume={29},
number={1},
@@ -141,7 +152,6 @@ @article{robinson2011integrative
publisher={Nature Publishing Group}
}
-
@article{trapnell2009tophat,
title={TopHat: discovering splice junctions with RNA-Seq},
author={Trapnell, Cole and Pachter, Lior and Salzberg, Steven L},
@@ -154,6 +164,18 @@ @article{trapnell2009tophat
publisher={Oxford University Press}
}
+@article{thurmond2018flybase,
+ title={FlyBase 2.0: the next generation},
+ author={Thurmond, Jim and Goodman, Joshua L and Strelets, Victor B and Attrill, Helen and Gramates, L Sian and Marygold, Steven J and Matthews, Beverley B and Millburn, Gillian and Antonazzo, Giulia and Trovisco, Vitor and others},
+ journal={Nucleic Acids Research},
+ volume={47},
+ number={D1},
+ pages={D759--D765},
+ year={2018},
+ publisher={Oxford University Press},
+ url={https://academic.oup.com/nar/article-abstract/47/D1/D759/5144957}
+}
+
@article{wang2012rseqc,
title={RSeQC: quality control of RNA-seq experiments},
author={Wang, Liguo and Wang, Shengqin and Li, Wei},
@@ -169,7 +191,7 @@ @article{wang2012rseqc
@article{young2010gene,
title={Gene ontology analysis for RNA-seq: accounting for selection bias},
author={Young, Matthew D and Wakefield, Matthew J and Smyth, Gordon K and Oshlack, Alicia},
- journal={Genome biology},
+ journal={Genome Biology},
url = {https://genomebiology.biomedcentral.com/articles/10.1186/gb-2010-11-2-r14},
volume={11},
number={2},
diff --git a/topics/transcriptomics/tutorials/ref-based/tutorial.md b/topics/transcriptomics/tutorials/ref-based/tutorial.md
index 8748d378a027e8..cca7ea68a9fce2 100644
--- a/topics/transcriptomics/tutorials/ref-based/tutorial.md
+++ b/topics/transcriptomics/tutorials/ref-based/tutorial.md
@@ -87,8 +87,8 @@ In the second part of the tutorial, read counts of all 7 samples are used to ide
> {% include snippets/create_new_history.md %}
>
> 2. Import the FASTQ file pairs from [Zenodo]({{ page.zenodo_link }}) or a data library:
-> - `GSM461177` (untreated): `GSM461177_1` and `GSM461177_2`
-> - `GSM461180` (treated): `GSM461180_1` and `GSM461180_2`
+> - `GSM461177` (untreated): `GSM461177_1` and `GSM461177_2`
+> - `GSM461180` (treated): `GSM461180_1` and `GSM461180_2`
>
> ```
> {{ page.zenodo_link }}/files/GSM461177_1.fastqsanger
@@ -126,14 +126,14 @@ The reads are raw data from the sequencing machine without any pretreatments. Th
# Quality control
-During sequencing, errors are introduced, such as incorrect nucleotides being called. These are due to the technical limitations of each sequencing platform. Sequencing errors might bias the analysis and can lead to a misinterpretation of the data.
+During sequencing, errors are introduced, such as incorrect nucleotides being called. These are due to the technical limitations of each sequencing platform. Sequencing errors might bias the analysis and can lead to a misinterpretation of the data. Adapters may also be present if the reads are longer than the fragments sequenced and trimming these may improve the number of reads mapped.
-Sequence quality control is therefore an essential first step in your analysis. We will use similar tools as described in the ["Quality control" training]({% link topics/sequence-analysis/tutorials/quality-control/tutorial.md %}): [FastQC](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/) and [Cutadapt](https://cutadapt.readthedocs.io/en/stable/guide.html) ({% cite marcel2011cutadapt %}).
+Sequence quality control is therefore an essential first step in your analysis. We will use similar tools as described in the ["Quality control" tutorial]({% link topics/sequence-analysis/tutorials/quality-control/tutorial.md %}): [FastQC](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/) to create a report of sequence quality, [MultiQC](https://multiqc.info/) ({% cite ewels2016multiqc %}) to aggregate generated reports and [Cutadapt](https://cutadapt.readthedocs.io/en/stable/guide.html) ({% cite marcel2011cutadapt %}) to improve the quality of sequences via trimming and filtering.
> ### {% icon hands_on %} Hands-on: Quality control
>
> 1. **FastQC** {% icon tool %} with the following parameters:
-> - {% icon param-files %} *"Short read data from your current history"*: input datasets selected with **Multiple datasets**
+> - {% icon param-files %} *"Short read data from your current history"*: input datasets selected with **Multiple datasets**
>
> {% include snippets/select_multiple_datasets.md %}
>
@@ -152,11 +152,11 @@ Sequence quality control is therefore an essential first step in your analysis.
> {: .question}
>
> 3. **MultiQC** {% icon tool %} with the following parameters to aggregate the FastQC reports:
-> - In *"Results"*
-> - *"Which tool was used generate logs?"*: `FastQC`
-> - In *"FastQC output"*
-> - *"Type of FastQC output?"*: `Raw data`
-> - {% icon param-files %} *"FastQC output"*: `Raw data` files (output of **FastQC** {% icon tool %})
+> - In *"Results"*
+> - *"Which tool was used generate logs?"*: `FastQC`
+> - In *"FastQC output"*
+> - *"Type of FastQC output?"*: `Raw data`
+> - {% icon param-files %} *"FastQC output"*: `Raw data` files (output of **FastQC** {% icon tool %})
>
> 4. Inspect the webpage output from MultiQC for each FASTQ
>
@@ -167,32 +167,33 @@ Sequence quality control is therefore an essential first step in your analysis.
> >
> > > ### {% icon solution %} Solution
> > >
-> > > 1. Everything seems OK for 3 of the files, but for `GSM461180_2` the quality decreases quite a lot at the end of the sequences:
-> > > - The `GSM461177` have 10.3 millions of sequences and `GSM461180` 12.3 millions
-> > > - All except `GSM461180_2` have a high proportion of duplicated reads (expected in RNA-Seq data)
+> > > 1. Everything seems good for 3 of the files. The `GSM461177` have 10.3 millions of sequences and `GSM461180` 12.3 millions of sequences. But in `GSM461180_2` (reverse reads of GSM461180) the quality decreases quite a lot at the end of the sequences.
+> > >
+> > > All files except `GSM461180_2` have a high proportion of duplicated reads (expected in RNA-Seq data)
+> > >
+> > > ![Sequence Counts](../../images/ref-based/fastqc_sequence_counts_plot.png "Sequence Counts")
> > >
-> > > ![Sequence Counts](../../images/ref-based/fastqc_sequence_counts_plot.png "Sequence Counts")
+> > > The "Per base sequence quality" is globally good with a slight decrease at the end of the sequences. For `GSM461180_2`, the decrease is quite large.
> > >
-> > > - The "Per base sequence quality" is globally good with a slight decrease at the end of the sequences. For `GSM461180_2`, the decrease is quite large.
+> > > ![Sequence Quality](../../images/ref-based/fastqc_per_base_sequence_quality_plot.png "Sequence Quality")
> > >
-> > > ![Sequence Quality](../../images/ref-based/fastqc_per_base_sequence_quality_plot.png "Sequence Quality")
+> > > The mean quality score over the reads is quite high, but the distribution is slightly different for `GSM461180_2`
> > >
-> > > - The mean quality score over the reads is quite high, but the distribution is slightly different for `GSM461180_2`
+> > > ![Per Sequence Quality Scores](../../images/ref-based/fastqc_per_sequence_quality_scores_plot.png "Per Sequence Quality Scores")
> > >
-> > > ![Per Sequence Quality Scores](../../images/ref-based/fastqc_per_sequence_quality_scores_plot.png "Per Sequence Quality Scores")
+> > > Reads are not really following a normal distribution of GC content, except `GSM461180_2`
> > >
-> > > - Reads are not really following a normal distribution of GC content, except `GSM461180_2`
+> > > ![Per Sequence GC Content](../../images/ref-based/fastqc_per_sequence_gc_content_plot.png "Per Sequence GC Content")
> > >
-> > > ![Per Sequence GC Content](../../images/ref-based/fastqc_per_sequence_gc_content_plot.png "Per Sequence GC Content")
+> > > Few N in the reads
> > >
-> > > - Few N in the reads
+> > > ![Per base N content](../../images/ref-based/fastqc_per_base_n_content_plot.png "Per base N content")
> > >
-> > > ![Per base N content](../../images/ref-based/fastqc_per_base_n_content_plot.png "Per base N content")
+> > > Duplicated sequences: >10 to >500
> > >
-> > > - Duplicated sequences: >10 to >500
+> > > ![Sequence Duplication Levels](../../images/ref-based/fastqc_sequence_duplication_levels_plot.png "Sequence Duplication Levels")
> > >
-> > > ![Sequence Duplication Levels](../../images/ref-based/fastqc_sequence_duplication_levels_plot.png "Sequence Duplication Levels")
-> > > - Almost no known adapters and overrepresented sequences
+> > > Almost no known adapters and overrepresented sequences
> > >
> > > 2. If the quality of the reads is not good, we should:
> > > 1. Check what is wrong and think about it: it may come from the type of sequencing or what we sequenced (high quantity of overrepresented sequences in transcriptomics data, biaised percentage of bases in HiC data)
@@ -204,7 +205,7 @@ Sequence quality control is therefore an essential first step in your analysis.
>
{: .hands_on}
-We should trim sequenced read to get rid of bases that were sequenced with high uncertainty (= low quality bases) at the read ends but also remove the reads of overall bad quality.
+We should trim the reads to get rid of bases that were sequenced with high uncertainty (i.e. low quality bases) at the read ends, and also remove the reads of overall bad quality.
{% include topics/sequence-analysis/tutorials/quality-control/paired_end_question.md forward="GSM461177_1" reverse="GSM461177_2" %}
@@ -234,7 +235,7 @@ We should trim sequenced read to get rid of bases that were sequenced with high
> > 2. How many sequence pairs have been removed because at least one read was shorter than the length cutoff?
> >
> > > ### {% icon solution %} Solution
-> > > 1. For `GSM461177`, 5,072,810 bp has been trimmed for the forward reads (read 1) and 8,648,619 bp on the reverse (read 2) because of quality. For `GSM461180`, 10,224,537 bp on forward and 51,746,850 bp on the reverse. It is not a surprise: we saw that at the end of the sequences the quality was dropping more for the reverse reads than for the forward reads, specially for `GSM461180_2`.
+> > > 1. For `GSM461177`, 5,072,810 bp has been trimmed for the forward reads (read 1) and 8,648,619 bp on the reverse (read 2) because of quality. For `GSM461180`, 10,224,537 bp on forward and 51,746,850 bp on the reverse. It is not a surprise: we saw that at the end of the reads the quality was dropping more for the reverse reads than for the forward reads, especially for `GSM461180`.
> > > 2. 147,810 (1.4%) reads were too short for `GSM461177` and 1,101,875 (9%) for `GSM461180`.
> > {: .solution }
> {: .question}
@@ -242,27 +243,22 @@ We should trim sequenced read to get rid of bases that were sequenced with high
# Mapping
-To make sense of the reads, we need to first figure out where the sequenced DNA fragments originated from in the genome, so we can then determine to which genes they belong.
-
-This process is known as aligning or 'mapping' the reads to a reference. This is equivalent to solving a jigsaw puzzle, but unfortunately, not all pieces are unique.
+To make sense of the reads, we need to first figure out where the sequences originated from in the genome, so we can then determine to which genes they belong. When a reference genome for the organism is available, this process is known as aligning or "mapping" the reads to the reference. This is equivalent to solving a jigsaw puzzle, but unfortunately, not all pieces are unique.
> ### {% icon comment %} Comment
>
> Do you want to learn more about the principles behind mapping? Follow our [training]({% link topics/sequence-analysis/tutorials/mapping/tutorial.md %}).
{: .comment}
-As a reference genome for *Drosophila melanogaster* is available, we can map the sequences to this genome in order to identify which genes are affected by the *Pasilla* gene depletion.
+In this study, the authors used *Drosophila melanogaster* cells. We should then map the quality-controlled sequences to the reference genome of *Drosophila melanogaster*.
{% include topics/sequence-analysis/tutorials/mapping/ref_genome_explanation.md answer_3="The genome of *Drosophila melanogaster* is known and assembled and it can be used as the reference genome in this analysis. Note that new versions of reference genomes may be released if the assembly improves, for this tutorial we are going to use the release 6 of the *Drosophila melanogaster* reference genome assembly [(dm6)](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4383921/)."%}
-With eukaryotic transcriptomes most reads originate from processed mRNAs lacking introns, therefore they cannot be simply mapped back to the genome as we normally do for DNA data. Instead the reads must be separated into two categories:
-
-- Reads that can be mapped entirely within an exon
-- Reads spanning two or more exons
+With eukaryotic transcriptomes most reads originate from processed mRNAs lacking introns:
-![Five types of RNA-Seq reads](../../images/five_type_rna_seq_reads.png "The five types of RNA-seq reads (Figure 1a from {% cite kim2015hisat %})")
+![Types of RNA-Seq reads](../../images/ref-based/rna-seq-reads.png "The types of RNA-seq reads (adaption of the Figure 1a from {% cite kim2015hisat %}): reads that mapped entirely within an exon (in red), reads spanning over 2 exons (in blue), read spanning over more than 2 exons (in purple)")
-Spliced mappers have been developed to efficiently map transcript-derived reads against genomes:
+Therefore they cannot be simply mapped back to the genome as we normally do for DNA data. Spliced-awared mappers have been developed to efficiently map transcript-derived reads against a reference genome:
![Splice-aware alignment](../../images/splice_aware_alignment.png "Principle of spliced mappers: (1) identification of the reads spanning a single exon, (2) identification of the splicing junctions on the unmapped reads")
@@ -296,7 +292,7 @@ Spliced mappers have been developed to efficiently map transcript-derived reads
## Mapping
-We will map our RNA reads to the *Drosophila melanogaster* genome using **STAR** ({% cite dobin2013star %}).
+We will map our reads to the *Drosophila melanogaster* genome using **STAR** ({% cite dobin2013star %}).
> ### {% icon hands_on %} Hands-on: Spliced mapping
>
@@ -342,7 +338,7 @@ We will map our RNA reads to the *Drosophila melanogaster* genome using **STAR**
> >
> > > ### {% icon solution %} Solution
> > >
-> > > 1. More than 83% for GSM461177 and more than 79% for GSM461180
+> > > 1. More than 83% for GSM461177 and more than 79% for GSM461180. We can proceed with the analysis since only percentages below 70% should be investigated for potential contamination.
> > > 2. We also have access to the number and percentage of reads that are mapped at several location, mapped at too many different location, not mapped because too short.
> > >
> > > ![STAR Alignment Scores](../../images/ref-based/star_alignment_plot.png "Alignment scores")
@@ -353,15 +349,15 @@ We will map our RNA reads to the *Drosophila melanogaster* genome using **STAR**
> {: .question}
{: .hands_on}
-**STAR** generates a BAM file with the mapped reads.
+According to the **MultiQC** report, more than 80% of reads for both samples are mapped exactly once to the reference genome. We can proceed with the analysis since only percentages below 70% should be investigated for potential contamination. Both samples have a low (less than 10%) percentage of reads that mapped to multiple locations on the reference genome. This is in the normal range for Illumina short-read sequencing, but may be lower for newer long-read sequencing datasets that can span larger repeated regions in the reference genome.
+
+The main output of **STAR** is a BAM file.
{% include topics/sequence-analysis/tutorials/mapping/bam_explanation.md mapper="RNA STAR" %}
## Inspection of the mapping results
-The BAM file contains information about where the reads are mapped on the reference genome. But as it is a binary file containing information for many reads (several million for these samples), it is difficult to inspect and explore the file.
-
-A powerful tool to visualize the content of BAM files is the Integrative Genomics Viewer (**IGV**, {% cite robinson2011integrative %}).
+The BAM file contains information for all our reads, making it difficult to inspect and explore in text format. A powerful tool to visualize the content of BAM files is the Integrative Genomics Viewer (**IGV**, {% cite robinson2011integrative %}).
> ### {% icon hands_on %} Hands-on: Inspection of mapping results
>
@@ -571,18 +567,18 @@ A powerful tool to visualize the content of BAM files is the Integrative Genomic
> Now that we have checked the results of the read mapping, we can proceed to the next phase of the analysis.
{: .details}
-After the mapping, we have the information on where the reads are located on the reference genome. We also know how well they were mapped. The next step in RNA-Seq data analysis is quantification of the number of reads mapped to genomic features (genes, transcripts, exons, ...).
+After the mapping, we have now the information on where the reads are located on the reference genome and how well they were mapped. The next step in RNA-Seq data analysis is quantification of the number of reads mapped to genomic features (genes, transcripts, exons, ...).
> ### {% icon comment %} Comment
>
> The quantification depends on both the reference genome (the FASTA file) and its associated annotations (the GTF file). It is extremely important to use an annotation file that corresponds to the same version of the reference genome you used for the mapping (e.g. `dm6` here), as the chromosomal coordinates of genes are usually different amongst different reference genome versions.
{: .comment}
-In order to identify genes that are regulated by the *Pasilla* gene, we need to identify genes which are differentially expressed between samples with PS gene depletion (treated) and control (untreated) samples.
+Here we will focus on the genes as we would like to identify the ones that are differentially expressed because of the Pasilla gene knockdown.
# Counting the number of reads per annotated gene
-To compare the expression of single genes between different conditions (*e.g.* with or without PS depletion), an essential first step is to quantify the number of reads per gene.
+To compare the expression of single genes between different conditions (*e.g.* with or without PS depletion), an essential first step is to quantify the number of reads per gene, or more specifically the number of reads mapping to the exons of each gene.
![Counting the number of reads per annotated gene](../../images/gene_counting.png "Counting the number of reads per annotated gene")
@@ -609,13 +605,13 @@ To compare the expression of single genes between different conditions (*e.g.* w
> {: .solution}
{: .question}
-Two main tools could be used for that: [**HTSeq-count**](http://htseq.readthedocs.io/en/release_0.9.1/count.html) ({% cite anders2015htseq %}) or **featureCounts** ({% cite liao2013featurecounts %}). FeatureCounts is considerably faster and requires far less computational resources, so we will use it here.
+Two main tools could be used for that: [**HTSeq-count**](http://htseq.readthedocs.io/en/release_0.9.1/count.html) ({% cite anders2015htseq %}) or **featureCounts** ({% cite liao2013featurecounts %}). **featureCounts** is considerably faster and requires far less computational resources, so we will use it here.
-In principle, the counting of reads overlapping with genomic features is a fairly simple task. But there are some details that need to be given to **featureCounts**: for example the strandness.
+In principle, the counting of reads overlapping with genomic features is a fairly simple task. But there are some details that need to be given to **featureCounts**, e.g. the strandness.
## Estimation of the strandness
-RNAs that are typically targeted in RNA-Seq experiments are single stranded (*e.g.*, mRNAs) and thus have polarity (5' and 3' ends that are functionally distinct). During a typical RNA-Seq experiment the information about strandness is lost after both strands of cDNA are synthesized, size selected, and converted into a sequencing library. However, this information can be quite useful for the read counting step:
+RNAs that are typically targeted in RNA-Seq experiments are single stranded (*e.g.*, mRNAs) and thus have polarity (5' and 3' ends that are functionally distinct). During a typical RNA-Seq experiment the information about strandness is lost after both strands of cDNA are synthesized, size selected, and converted into a sequencing library. However, this information can be quite useful for the read counting step, especially for reads located on the overlap of 2 genes that are on different strands.
![Why strandness?](../../images/ref-based/strandness_why.png "Read1 will be assigned to gene1 located on the forward strand but Read2 could be assigned to gene1 (forward strand) or gene2 (reverse strand) depending if the strandness information is conserved.")
@@ -639,7 +635,7 @@ Some library preparation protocols create so called *stranded* RNA-Seq libraries
This information should be provided with your FASTQ files, ask your sequencing facility! If not, try to find it on the site where you downloaded the data or in the corresponding publication.
-Another option is to estimate these parameters with a tool called **Infer Experiment** from the RSeQC ({% cite wang2012rseqc %}) tool suite. This tool takes the output of your mappings (BAM files), selects a subsample of your reads and compares their genome coordinates and strands with those of the reference gene model (from an annotation file). Based on the strand of the genes, it can gauge whether sequencing is strand-specific, and if so, how reads are stranded (forward or reverse):
+Another option is to estimate these parameters with a tool called **Infer Experiment** from the RSeQC ({% cite wang2012rseqc %}) tool suite. This tool takes the BAM files from the mapping, selects a subsample of the reads and compares their genome coordinates and strands with those of the reference gene model (from an annotation file). Based on the strand of the genes, it can gauge whether sequencing is strand-specific, and if so, how reads are stranded (forward or reverse):
![How to estimate the strandness?](../../images/ref-based/strandness_cases.png "In a stranded forward library, reads map mostly on the genes located on forward strand (here gene1). With stranded reverse library, reads map mostly on genes on the reverse strand (here gene2). With unstranded library, reads maps on genes on both strands.")
@@ -656,6 +652,7 @@ Another option is to estimate these parameters with a tool called **Infer Experi
{: .hands_on}
**Infer Experiment** {% icon tool %} tool generates one file with information on:
+
- Paired-end or single-end library
- Fraction of reads failed to determine
- 2 lines
@@ -750,17 +747,15 @@ We now run **featureCounts** to count the number of reads per annotated gene.
>
{: .hands_on}
-The main output of **featureCounts** is a table with the counts for each genes in the provided annotation.
+The main output of **featureCounts** is a table with the counts, i.e. the number of reads (or fragments in the case of paired-end reads) mapped to each gene (in rows, with their ID in the first column) in the provided annotation. **FeatureCount** generates also the **feature length** output datasets. We will need this file later on when we will run the **goseq** tool.
> ### {% icon question %} Question
>
-> 1. What information does the generated table contain?
-> 2. Which feature has the most counts for both samples? (Hint: Use the Sort tool)
+> Which feature has the most counts for both samples? (Hint: Use the Sort tool)
>
> > ### {% icon solution %} Solution
> >
-> > 1. The table has two columns: the gene ID and the number of reads (or fragments in the case of paired-end reads) mapped to the gene
-> > 2. To display the most abundantly detected feature, we need to sort the table of counts. This can be done using the **Sort** {% icon tool %} tool:
+> > To display the most abundantly detected feature, we need to sort the table of counts. This can be done using the **Sort** {% icon tool %} tool:
> > - {% icon param-file %} *"Sort Query"*: count file, output of by **featureCounts**
> > - *"Number of header"*: `1`
> > - In *"1: Column selections"*:
@@ -805,8 +800,6 @@ The main output of **featureCounts** is a table with the counts for each genes i
> {: .solution}
{: .question}
-**FeatureCount** generates also the **feature length** output datasets. We will need this file later on when we will run the **goseq** tool.
-
Here we counted reads mapped to genes for two samples. It is really interesting to redo the same procedure on the other datasets, especially to check how parameters differ given the different type of data (single-end versus paired-end).
> ### {% icon hands_on %} (Optional) Hands-on: Re-run on the other datasets
@@ -1113,7 +1106,7 @@ This expression analysis is estimated from read counts and attempts are made to
> We recommend to combine the count tables for different technical replicates (but not for biological replicates) before a differential expression analysis (see [DESeq2 documentation](http://bioconductor.org/packages/devel/bioc/vignettes/DESeq2/inst/doc/DESeq2.html#collapsing-technical-replicates))
{: .details}
-Multiple factors with several levels can then be incorporated in the analysis. After normalization we can compare the response of the expression of any gene to the presence of different levels of a factor in a statistically reliable way.
+Multiple factors with several levels can then be incorporated in the analysis describing known sources of variation (e.g. treatment, tissue type, gender, batches), with several levels representing the conditions for one factor. After normalization we can compare the response of the expression of any gene to the presence of different levels of a factor in a statistically reliable way.
In our example, we have samples with two varying factors that can contribute to differences in gene expression:
@@ -1131,27 +1124,23 @@ Here, treatment is the primary factor that we are interested in. The sequencing
>
> 1. **DESeq2** {% icon tool %} with the following parameters:
> - *"how"*: `Select datasets per level`
-> - In *"Factor"*:
-> - In "1: Factor"
-> - *"Specify a factor name"*: `Treatment`
-> - In *"Factor level"*:
-> - In *"1: Factor level"*:
-> - *"Specify a factor level"*: `treated`
-> - {% icon param-files %} *"Counts file(s)"*: the 3 gene count files with `treat` in their name
-> - In *"2: Factor level"*:
-> - *"Specify a factor level"*: `untreated`
-> - {% icon param-files %} *"Counts file(s)"*: the 4 gene count files with `untreat` in their name
->
-> - Click on {% icon param-repeat %} *"Insert Factor"* (not on "Insert Factor level")
-> - In "2: Factor"
-> - "Specify a factor name" to `Sequencing`
-> - In *"Factor level"*:
-> - In *"1: Factor level"*:
-> - *"Specify a factor level"*: `PE`
-> - {% icon param-files %} *"Counts file(s)"*: the 4 gene count files with `paired` in their name
-> - In *"2: Factor level"*:
-> - *"Specify a factor level"*: `SE`
-> - {% icon param-files %} *"Counts file(s)"*: the 3 gene count files with `single` in their name
+> - In "1: Factor"
+> - *"Specify a factor name"*: `Treatment`
+> - In *"1: Factor level"*:
+> - *"Specify a factor level"*: `treated`
+> - {% icon param-files %} *"Counts file(s)"*: the 3 gene count files with `treat` in their name
+> - In *"2: Factor level"*:
+> - *"Specify a factor level"*: `untreated`
+> - {% icon param-files %} *"Counts file(s)"*: the 4 gene count files with `untreat` in their name
+> - Click on {% icon param-repeat %} *"Insert Factor"* (not on "Insert Factor level")
+> - In "2: Factor"
+> - "Specify a factor name" to `Sequencing`
+> - In *"1: Factor level"*:
+> - *"Specify a factor level"*: `PE`
+> - {% icon param-files %} *"Counts file(s)"*: the 4 gene count files with `paired` in their name
+> - In *"2: Factor level"*:
+> - *"Specify a factor level"*: `SE`
+> - {% icon param-files %} *"Counts file(s)"*: the 3 gene count files with `single` in their name
> - *"Files have header?"*: `No`
> - *"Visualising the analysis results"*: `Yes`
> - *"Output normalized counts table"*: `Yes`
@@ -1320,7 +1309,7 @@ Now we would like to extract the most differentially expressed genes due to the
We now have a table with 130 lines corresponding to the most differentially expressed genes. For each gene, we have its ID, its mean normalized counts (averaged over all samples from both conditions), its $$log_{2} FC$$ and other information.
-The ID for each gene is something like FBgn0003360, which is an ID from the corresponding database, here Flybase. These IDs are unique but sometimes we prefer to have the gene names, even if they may not reference an unique gene (e.g. duplicated after re-annotation). But gene names may hint already to a function or they help you to search for desired candidates. We would also like to have the location of these genes within the genome. To do that, we need do add extra annotations from the annotation file which we used for mapping and counting.
+The ID for each gene is something like FBgn0003360, which is an ID from the corresponding database, here Flybase ({% cite thurmond2018flybase %}). These IDs are unique but sometimes we prefer to have the gene names, even if they may not reference an unique gene (e.g. duplicated after re-annotation). But gene names may hint already to a function or they help you to search for desired candidates. We would also like to display the location of these genes within the genome. We can extract such information from the annotation file which we used for mapping and counting.
> ### {% icon hands_on %} Hands-on: Annotation of the differentially expressed genes
>
@@ -1338,6 +1327,7 @@ The ID for each gene is something like FBgn0003360, which is an ID from the corr
{: .hands_on}
The generated output is an extension of the previous file:
+
1. Gene identifiers
2. Mean normalized counts over all samples
3. Log2 fold change
@@ -1391,6 +1381,7 @@ The column names may not be precise so we would like to add them before going fu
We could plot the $$log_{2} FC$$ for the extracted genes, but here we would like to look at a heatmap of expression for these genes in the different samples. So we need to extract the normalized counts for these genes.
We proceed in several steps:
+
- Extract and plot the normalized counts for these genes for each sample with a heatmap, using the normalized count file generated by DESeq2
- Compute, extract and plot the Z-score of the normalized counts
@@ -1406,7 +1397,7 @@ We proceed in several steps:
To extract the normalized counts for the interesting genes, we join the normalized count table generated by DESeq2 with the table we just generated. We will then keep only the lines corresponding to the most differentially expressed genes.
> ### {% icon hands_on %} Hands-on: Extract the normalized counts of the most differentially expressed genes
-> 3. **Join two Datasets** {% icon tool %} with the following parameters:
+> 1. **Join two Datasets** {% icon tool %} with the following parameters:
> - {% icon param-file %} *"Join"*: the `Normalized counts` file (output of **DESeq2** {% icon tool %})
> - *"using column"*: `Column: 1`
> - {% icon param-file %} *"with"*: `Genes with significant adj p-value & abs(FC) > 2`
@@ -1416,7 +1407,7 @@ To extract the normalized counts for the interesting genes, we join the normaliz
>
> The generated file has more columns than we need for the heatmap: mean normalized counts, $$log_{2} FC$$ and other annotation information. We need to remove the extra columns.
>
-> 4. **Cut** {% icon tool %} to extract the columns with the gene IDs and normalized counts:
+> 2. **Cut** {% icon tool %} to extract the columns with the gene IDs and normalized counts:
> - *"Cut columns"*: `c1-c8`
> - *"Delimited by"*: `Tab`
> - {% icon param-file %} *"From"*: the joined dataset (output of **Join two Datasets** {% icon tool %})
@@ -1469,6 +1460,7 @@ The Z-score $$z_{i,j}$$ for a gene $$i$$ in a sample $$j$$ given the normalized
{: .comment}
To compute the Z-score, we break the process into 2 steps:
+
1. Substract each value by the mean of values in the row (i.e. $$x_{i,j}- \overline{x_i}$$) using the normalized count table
2. Divide the previous values by the standard deviation of values of row, using 2 tables (the normalized counts and the table computed in the previous step)
@@ -1535,18 +1527,19 @@ We would like now to plot a heatmap for the Z-scores:
# Functional enrichment analysis of the DE genes
-We have extracted genes that are differentially expressed in treated (PS gene-depleted) samples compared to untreated samples. Now, we would like to know if the differentially expressed genes are enriched transcripts of genes which belong to more common or specific categories to classify their potential function a bit better. These categories are called Gene Ontologies (GO).
+We have extracted genes that are differentially expressed in treated (PS gene-depleted) samples compared to untreated samples. Now, we would like to know if the differentially expressed genes are enriched transcripts of genes which belong to more common or specific categories in order to identify biological functions that might be impacted.
## Gene Ontology analysis
[Gene Ontology (GO)](http://www.geneontology.org/) analysis is widely used to reduce complexity and highlight biological processes in genome-wide expression studies. However, standard methods give biased results on RNA-Seq data due to over-detection of differential expression for long and highly-expressed transcripts.
-The [**goseq**](https://bioconductor.org/packages/release/bioc/vignettes/goseq/inst/doc/goseq.pdf) tool ({% cite young2010gene %}) provides methods for performing GO analysis of RNA-Seq data, taking length bias into account. The methods and software used by goseq are equally applicable to other category based tests of RNA-Seq data, such as KEGG pathway analysis.
+[**goseq**](https://bioconductor.org/packages/release/bioc/vignettes/goseq/inst/doc/goseq.pdf) ({% cite young2010gene %}) provides methods for performing GO analysis of RNA-Seq data while taking length bias into account. **goseq** could also be applied to other category based tests of RNA-Seq data, such as KEGG pathway analysis, as discussed in a further section.
**goseq** needs 2 files as inputs:
+
- A tabular file with the differentially expressed genes from all genes assayed in the RNA-Seq experiment with 2 columns:
- the Gene IDs (unique within the file), in uppercase letters
- - a boolean telling if the gene is differentially expressed or not: `True` if differentially expressed or `False` if not
+ - a boolean indicating whether the gene is differentially expressed or not (`True` if differentially expressed or `False` if not)
- A file with information about the length of a gene to correct for potential length bias in differentially expressed genes
> ### {% icon hands_on %} Hands-on: Prepare the datasets for goseq
@@ -1600,7 +1593,8 @@ We have now the two required input files for goseq.
**goseq** generates with these parameters 3 outputs:
-1. A big table (`Ranked category list - Wallenius method`) with the following columns for each GO term:
+1. A table (`Ranked category list - Wallenius method`) with the following columns for each GO term:
+
1. `category`: GO category
2. `over_rep_pval`: *p*-value for over-representation of the term in the differentially expressed genes
3. `under_rep_pval`: *p*-value for under-representation of the term in the differentially expressed genes
@@ -1648,7 +1642,7 @@ We have now the two required input files for goseq.
3. A table with the differentially expressed genes (from the list we provided) associated to the GO terms (`DE genes for categories (GO/KEGG terms)`)
-> ### {% icon comment %} Advanced tutorial on visualization
+> ### {% icon comment %} Advanced tutorial on enrichment analysis
>
> In this tutorial, we covered GO enrichment analysis with **goseq**. To learn other gene set enrichment analysis, please have a look at the ["RNA-Seq genes to pathways"]({% link topics/transcriptomics/tutorials/rna-seq-genes-to-pathways/tutorial.md %}) tutorial.
{: .comment}
@@ -1701,6 +1695,7 @@ As for the GO terms, 2 files are generated:
We could investigate which genes are involved in which pathways by looking at the second file generated by **goseq**. But it can be cumbersome and we would like to see the pathways as represented in the previous image. **Pathview** ({% cite luo2013pathview %}) can help to generate automatically similar images as the previous one but also add extra information about the genes (e.g. expression) in our study.
This tool needs 2 main information as inputs:
+
- Pathway ID(s) to plot, either as just one ID or as a file with one column with the pathway IDs
- A tabular file with the genes in the RNA-Seq experiment with 2 (or more) columns:
- the gene IDs (unique within the file)
diff --git a/topics/transcriptomics/tutorials/rna-interactome/data-library.yaml b/topics/transcriptomics/tutorials/rna-interactome/data-library.yaml
new file mode 100644
index 00000000000000..40c9a28babfc27
--- /dev/null
+++ b/topics/transcriptomics/tutorials/rna-interactome/data-library.yaml
@@ -0,0 +1,35 @@
+---
+destination:
+ type: library
+ name: GTN - Material
+ description: Galaxy Training Network Material
+ synopsis: Galaxy Training Network Material. See https://training.galaxyproject.org
+items:
+- name: RNA interactome
+ description: RNA interactome data analysis
+ items:
+ - name: Chimeric Read Analysis from RNA-RNA interactome data
+ items:
+ - name: 'DOI: 10.5281/zenodo.3709188'
+ description: latest
+ items:
+ - url: https://zenodo.org/api/files/be3bc8ed-75bd-4470-b662-bca1a503b28d/miRNA_mature.fa.gz
+ src: url
+ ext: fasta.gz
+ info: https://zenodo.org/record/3709188
+ - url: https://zenodo.org/api/files/be3bc8ed-75bd-4470-b662-bca1a503b28d/Mus_musculus.GRCm38.dna.fa.gz
+ src: url
+ ext: fasta.gz
+ info: https://zenodo.org/record/3709188
+ - url: https://zenodo.org/api/files/be3bc8ed-75bd-4470-b662-bca1a503b28d/SRR2413302.100k.fastq.gz
+ src: url
+ ext: fastqsanger.gz
+ info: https://zenodo.org/record/3709188
+ - url: https://zenodo.org/api/files/be3bc8ed-75bd-4470-b662-bca1a503b28d/transcriptome.fa.gz
+ src: url
+ ext: fasta.gz
+ info: https://zenodo.org/record/3709188
+ - url: https://zenodo.org/api/files/be3bc8ed-75bd-4470-b662-bca1a503b28d/whole_transcriptome.gff.gz
+ src: url
+ ext: gff.gz
+ info: https://zenodo.org/record/3709188
diff --git a/topics/transcriptomics/tutorials/rna-interactome/tutorial.md b/topics/transcriptomics/tutorials/rna-interactome/tutorial.md
new file mode 100644
index 00000000000000..03215e206c912e
--- /dev/null
+++ b/topics/transcriptomics/tutorials/rna-interactome/tutorial.md
@@ -0,0 +1,335 @@
+---
+layout: tutorial_hands_on
+
+title: RNA-RNA interactome data analysis
+zenodo_link: https://zenodo.org/record/3696889
+questions:
+- What are the difficulties in mapping chimeric reads from RNA interactome data?
+- How multi mapping is a big problem in these datasets?
+- How to filter for meaningful results from large analysis output files?
+objectives:
+- Quality control and data preparation
+- Mapping chimeric reads
+- Quantification of the mapped loci
+- Visualization and filtering of results
+time_estimation: 2H
+contributors:
+- pavanvidem
+
+---
+
+
+# Introduction
+{:.no_toc}
+
+
+
+With the advances in the next-generation sequencing technologies, genome-wide RNA-RNA interaction predictions are now
+possible. The most recent line of development has been to ligate the microRNA to the site-specific interaction region of
+ the target, selecting these interactions via cross-linking to one of the Argonaute proteins required for microRNA-based
+ regulation, and to sequence the resulting chimeric RNA molecule, for example, the CLASH and CLEAR-CLIP protocols.
+Going beyond microRNAs, these protocols can be applied to RNA interactions that involve a regulatory protein
+other than Argonaute. To generalize even further, researchers have applied the same idea to the detection of all
+transcriptome-wide RNA-RNA interactions, which include both inter- and intramolecular base pairing without the necessity
+ of choosing a specific regulatory protein for cross-linking. These protocols include LIGR-Seq that maps the human
+ RNA-RNA interactome and PARIS that focused on long-range structures in human and mouse.
+
+The reads from these experiments are chimeric with each arm generated from one of the interaction
+partners. Due to short lengths, often these sequenced arms ambiguously map to multiple locations and inferring the
+origin of these can be quite complicated. Theoretically, alignment tools like `HISAT2` and `STAR` can be used to align
+chimeric reads, but they are not efficient at this task. The other alignment tools like `BWA-MEM` or `Bowtie2` can be
+used in local alignment settings to map these chimeric reads. In this case user needs to adjust the alignment parameters
+to match the read lengths and there needs to be a lot of post-processing to be done to choose the best hits. Recently,
+there is also an alignment tool called `CLAN` published to specifically map the chimeric reads from CLASH experiments.
+
+In this tutorial, we will learn the analysis of a CLEAR-CLIP data set using a tool suite called `ChiRA`. The data used
+is a random subsampling (100k reads) of a mouse cortex sample
+([GSM1881541](https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSM1881541)) prepared using CLEAR-CLIP protocol. It is
+a complete analysis framework that can be used starting from raw sequencing reads to analysis and visualization of
+results. `ChiRA` uses `BWA-MEM` or `CLAN` to map the reads. Subsequently, it also merges the overlappig alignments and
+ chooses the best alignments per read by quantifying the all the loci that reads map to. In the end, it scores each
+ alignment and outputs only the best alignments per read. The final part of this tutorial gives an insight into how to
+ filter, export and visualize your results using the visualization framework `ChiRAViz`.
+
+
+![ChiRA workflow](../../images/rna-interactome/chira.png "ChiRA workflow. First the reads deduplicated and mapped to transcriptome. Then the mapped loci are merged based on overlapping. The merged loci are quantified and the interactions are scored and reported.")
+
+> ### Agenda
+>
+> In this tutorial, we will cover:
+>
+> 1. TOC
+> {:toc}
+>
+{: .agenda}
+
+# Get data
+
+> ### {% icon hands_on %} Hands-on: Data upload
+>
+> 1. Create a new history for this tutorial
+> 2. Import the files from [Zenodo](https://zenodo.org/record/3709188) or from the shared data library
+>
+> ```
+> https://zenodo.org/record/3709188/files/miRNA_mature.fa.gz
+> https://zenodo.org/record/3709188/files/Mus_musculus.GRCm38.dna.fa.gz
+> https://zenodo.org/record/3709188/files/SRR2413302.100k.fastq.gz
+> https://zenodo.org/record/3709188/files/transcriptome.fa.gz
+> https://zenodo.org/record/3709188/files/whole_transcriptome.gff.gz
+> ```
+>
+> {% include snippets/import_via_link.md %}
+> {% include snippets/import_from_data_library.md %}
+>
+> 3. Rename the datasets
+> 4. Check that the datatype
+>
+> {% include snippets/change_datatype.md datatype="datatypes" %}
+>
+> 5. Add to each database a tag corresponding to ...
+>
+> {% include snippets/add_tag.md %}
+>
+{: .hands_on}
+
+# Preprocessing
+
+Before starting with the analysis of data it is always good to check the sequenced reads for low quality bases and
+adapters.
+
+## Quality control
+
+> ### {% icon hands_on %} Hands-on: Quality check
+First use `FastQC` to assess the read quality
+>
+> 1. **FastQC** {% icon tool %} with the following parameters:
+> - {% icon param-file %} *"Short read data from your current history"*: `SRR2413302.100k.fastq.gz` (Input dataset)
+>
+{: .hands_on}
+
+> ### {% icon question %} Questions
+>
+> 1. Why do you think `FastQC` failed to find any adapters?
+>
+> > ### {% icon solution %} Solution
+> >
+> > 1. Because `FastQC` it uses a set of standard adapters to screen for adapters. The "special" adapters used in this
+library preparation are not present in the `FastQC` standard adapters list.
+> >
+> {: .solution}
+>
+{: .question}
+
+## Adapter trimming
+Due to the inefficiency of the current RNA interactome protocols, not all reads are not made up of RNA hybrids. In some
+cases, reads contain single RNA fragments with adapters or nothing but only adapters. Hence adapter removal is a very important step
+in this analysis. In this step, we use `cutadapt` to trim the adapters. As the adapters used in this library are not
+standard Illumina adapters, we need to provide them manually.
+
+> ### {% icon hands_on %} Hands-on: Adapter trimming
+We use `cutadapt` to trim the adapter content
+>
+> 1. **cutadapt** {% icon tool %} with the following parameters:
+> - {% icon param-file %} *"FASTQ/A file"*: `SRR2413302.fastq` (Input dataset)
+> - In *"Read 1 Options"*
+> - *"3' (End) Adapters"* -> *"Insert 3' (End) Adapters"*
+> - *"Source"*: `Enter Custom sequence`
+> - *"Enter custom 3' adapter sequence"*: `GTGTCAGTCACTTCCAGCGG`
+> - *"5' (Front) Adapters"* -> *"Insert 5' (Front) Adapters"*
+> - *"Source"*: `Enter Custom sequence`
+> - *"Enter custom 5' adapter sequence"*: `NNNNAGGGAGGACGATGCGG`
+> - In *"Filter Options"*
+> - *"Minimum length"*: `10`
+>
+{: .hands_on}
+
+
+> ### {% icon hands_on %} Hands-on: Post adapter trimming quality check
+It is interesting to see whether our manually entered adapters were trimmed
+>
+> 1. **FastQC** {% icon tool %} with the following parameters:
+> - {% icon param-file %} *"Short read data from your current history"*: `Read 1 Output` (output of **cutadapt** {% icon tool %})
+> - Observe the **Per base sequence content**
+> ![FastQC per base sequence content](../../images/rna-interactome/chira_fastqc_seq_content.png)
+{: .hands_on}
+> ### {% icon question %} Questions
+>
+> 1. Would you be concerned about the abnormal "Per base sequence content towards the end"?
+>
+> > ### {% icon solution %} Solution
+> >
+> > 1. Normally yes, but in this case not. Always look at this plot in combination with "Sequence Length Distribution"
+plot. It looks like there is huge difference in base composition between 56th and 57th bases. But the number of
+sequences that constitute this is very important. From the sequence length distribution, almost all the sequences are of
+ length of 55 bases. Hence the abnormality in the per base sequence content is just because it is from
+ very few (if not only 1) sequence.
+> >
+> {: .solution}
+>
+{: .question}
+
+# Analysis of interactome data using `ChiRA` tool suite
+
+The analysis includes several steps that deal with deduplication mapping, quantification and extraction of interacting
+partners.
+
+## Remove duplicate sequences
+
+First, we eliminate the duplicate sequences from the library to reduce the computational effort. This will also have an
+impact on the quantification of the loci because often these identical sequences might be PCR duplicates.
+
+> ### {% icon hands_on %} Hands-on
+>
+> 1. **ChiRA collapse** {% icon tool %} with the following parameters:
+> - {% icon param-file %} *"Input FASTQ file"*: `Read 1 Output` (output of **cutadapt** {% icon tool %})
+>
+> > ### {% icon tip %} Tip: Dealing with UMIs
+> >
+> > * If you have UMIs (at the 5' end) in the sequenced reads, please set *"Length of the UMI if present at the 5' end of your reads"*.
+> > * The UMI will be trimmed and put in the unique sequence id.
+> {: .tip}
+>
+{: .hands_on}
+
+
+## Map reads to the reference transcriptome
+
+> ### {% icon hands_on %} Hands-on: Map chimeric reads from fasta file
+Here we use `BWA-MEM` aligner in local alignment mode to locate the chimeric arms on the
+transcriptome. Your reference can be single or split in two. Two references are
+ideal for example if you have CLASH experimental data where you have separate
+miRNA and target references.
+>
+> 1. **ChiRA map** {% icon tool %} with the following parameters:
+> - {% icon param-file %} *"Input FASTA file"*: `fasta file` (output of **ChiRA collapse** {% icon tool %})
+> - *"Single or split reference?"*: `Split reference`
+> - {% icon param-file %} *"Reference FASTA file"*: `miRNA_mature.fa` (Input dataset)
+> - {% icon param-file %} *"Second reference FASTA file"*: `transcriptome.fa` (Input dataset)
+> - *"aligner"*: `BWA-MEM`
+>
+{: .hands_on}
+
+
+## Merge overlapping alignment information
+
+In this step, we merge the overlapping aligned positions to define the read concentrated loci. If an annotation GTF file
+produced, the transcriptomic alignment positions are first converted to their corresponding genomic positions. The
+merging is also done on reads defining which parts of the reads are mapping that indicates potential interacting segments
+of read.
+
+> ### {% icon hands_on %} Hands-on:
+>
+> 1. **ChiRA merge** {% icon tool %} with the following parameters:
+> - {% icon param-file %} *"Input BED file of alignments"*: `ChiRA aligned BED` (output of **ChiRA map** {% icon tool %})
+> - *"Do you have an annotation in GTF format?"*: `Yes`
+> - {% icon param-file %} *"Annotations in GTF format"*: `whole_transcriptome.gff.gz` (Input dataset)
+>
+> > ### {% icon tip %} Tip: Parameters for interactome/structurome data like SPLASH, PARIS
+> >
+> > * If you have data that contains large interaction regions, the default `Overlap based` merging may not capture whole interaction regions.
+> > * In that case use `blockbuster` merging mode and adjust the paramertes accordingly.
+> - From *"Select the mode of merging"*: `Gaussian based (blockbuster)`
+> {: .tip}
+>
+{: .hands_on}
+
+## Quantify aligned loci to score the alignments
+
+Now we have the loci where the potential interacting read segments are mapped to. Due to the small length of these arms, there is a
+very high chance of multi mapping. Another reason for this is the lenient mapping parameters that are used to increase
+the mapping sensitivity. Quantification needs 2 files containing read segements and loci where they are mapping to. From
+this information, `ChiRA quanitify` tries to infer the correct origin of reads and calculates the expression of the loci
+using a simple expectation-maximization algorithm.
+
+> ### {% icon hands_on %} Hands-on: Task description
+>
+> 1. **ChiRA qauntify** {% icon tool %} with the following parameters:
+> - {% icon param-file %} *"BED file of aligned segments"*: `ChiRA aligned read segments` (output of **ChiRA merge** {% icon tool %})
+> - {% icon param-file %} *"Tabular file of merged alignments"*: `ChiRA merged alignments` (output of **ChiRA merge** {% icon tool %})
+>
+{: .hands_on}
+
+## Extract the best scoring chimeras
+
+After having the information about the loci expression, the final step extracts only the best scoring interacting
+partners for each read. All the combinations of the transcripts that are overlapping with the interacting loci are
+reported. If there is more than one locus with the equal best score then all the best hits are reported. If you have the
+genomic fasta file the tool can hybridize the interacting loci sequences using `IntaRNA`.
+
+> ### {% icon hands_on %} Hands-on: Task description
+>
+> 1. **ChiRA extract** {% icon tool %} with the following parameters:
+> - {% icon param-file %} *"File containing CRLs information"*: `ChiRA quantified loci` (output of **ChiRA qauntify** {% icon tool %})
+> - *"Have genomic information?"*: `Yes`
+> - {% icon param-file %} *"Annotations in GTF format"*: `whole_transcriptome.gff.gz` (Input dataset)
+> - *"Choose the source for the FASTA file"*: `History`
+> - {% icon param-file %} *"FASTA file"*: `Mus_musculus.GRCm38.dna.fa.gz` (Input dataset)
+> - *"Did you use single or split reference for alignment?"*: `Split reference`
+> - {% icon param-file %} *"Reference FASTA file"*: `miRNA_mature.fa` (Input dataset)
+> - {% icon param-file %} *"Second reference FASTA file"*: `transcriptome.da` (Input dataset)
+> - *"Hybridize"*: `Yes`
+>
+{: .hands_on}
+
+# Visualization
+
+The output tabular file generated in the above step can be huge with up to some millions of rows depending on the
+library size and more than 30 columns. Extracting useful data from this can be very tedious. For example, extracting and
+visualizing the distribution of target biotypes of your favorite miRNA can be very tricky and might need more than a
+hand full of galaxy tools to achieve. For this reason, there exists a visualization and filtering tool for this data along
+with `ChiRA` known as `ChiRAViz`. It is a galaxy visualization framework to work with the output of `ChiRA`. But it does
+not directly work with the tabular output we have. Rather it needs a "sqlite" database. For this reason, we first build a
+sqlite database from the `ChiRA` output.
+
+> ### {% icon hands_on %} Hands-on: Data preperation
+>
+> 1. **Query Tabular** {% icon tool %} with the following parameters:
+> - In *"Database Table"*:
+> - {% icon param-repeat %} *"Insert Database Table"*
+> - {% icon param-file %} *"Tabular Dataset for Table"*: `ChiRA chimeric reads` (output of **ChiRA extract** {% icon tool %})
+> - In *"Table Options"*
+> - *"Use first line as column names"*: `Yes`
+> - *"Save the sqlite database in your history"*: `Yes`
+>
+> 2. Change the datatype to `chira.sqlite`
+>
+> {% include snippets/change_datatype.md datatype="chira.sqlite" %}
+>
+{: .hands_on}
+
+> ### {% icon hands_on %} Hands-on: Visualize and chimeras
+>
+> 1. Please click on {% icon galaxy-barchart %} *"Visualize this data"*. Then click on the `ChiRAViz` visualization.
+> This loads the data into the visualization framework and shows some basic plots from the data.
+> - The visualization split into two to show the left and the right arms information.
+> - On home page pie charts of left and right chimeric arms, types of interactions and top 50 expressed RNAs are shown.
+>![ChiRAViz home page](../../images/rna-interactome/chiraviz_home.png)
+> 2. Then choose the kind of interactions information you want to see. For example, choose the most abundant `miRNA` and `3_prime_UTR` and click on **"Get interactions"**.
+>
+>![ChiRAViz selector](../../images/rna-interactome/chiraviz_choose.png)
+>
+> - **"Chimera"** panel in the middle depicts the mapping positions on the read with read length.
+> - **"Interacting partners"** panel shows the information on which transcripts the left and right arm are mapping to with their alignment positions on the transcripts.
+> - **"Alignment Information"** panel shows the alignment if present with a possibility to download the alignment.
+>![ChiRAViz single interaction](../../images/rna-interactome/chiraviz_single.png)
+>
+{: .hands_on}
+
+> ### {% icon hands_on %} Hands-on: Filter interactions and export results
+> `ChiRAViz` provides filters to search for keywords like gene symbols, sort interactions by score, filter by score or hybridization energy. Then the filtered interactions can be summarized or exported to a file. In this step, we filter the interactions with most abundant miRNA and consider those which have an `IntaRNA` predicted hybrid.
+> - Go to the home page by clicking **Home** on the top.
+> - Go to the bottom of the home page to the plot showing top 50 symbols based on their expression.
+> - Click on the most abundant miRNA `mmu-miR-466i-5p`. This will populate the left panel with all the interactions containing `mmu-miR-466i-5p`.
+> - Filter the entries that contain `IntaRNA` hybrid. If there is no hybrid predicted the value in that filed is `NA`
+> - From **"Filter by..."** choose `Hybrid`
+> - From **"Choose operator..."** choose `<>`
+> - Enter `NA` in the value field and hit the enter key.
+> - At this point, you can click on **Summary** to view the summary plots for this subset of interactions.
+> - Tick **"Check all"** on the bottom left corner and then click on **Export** to export the resulting interactions.
+> - This will export all the results to a file.
+{: .hands_on}
+
+# Conclusion
+{:.no_toc}
+
+Though chimeric reads look normal when inspected in a FASTQ file, the origin of each read is from two different RNA fragments. Limitations of the current sequencing protocols limit the length of each sequenced interacting RNA fragment. These smaller RNA fragments are often harder to map considering that the boundaries of each RNA fragment in the read are unknown. In this tutorial, we have seen how to map these reads and infer the true origins of them by quantifying the mapped loci. The visualization framework gives flexibility in filtering and searching output files, visualize the summaries of filtered data as well as exporting them.
diff --git a/topics/transcriptomics/tutorials/rna-interactome/workflows/RNA-interactome-analysis.ga b/topics/transcriptomics/tutorials/rna-interactome/workflows/RNA-interactome-analysis.ga
new file mode 100644
index 00000000000000..fe166ee8f6670b
--- /dev/null
+++ b/topics/transcriptomics/tutorials/rna-interactome/workflows/RNA-interactome-analysis.ga
@@ -0,0 +1,598 @@
+{
+ "a_galaxy_workflow": "true",
+ "annotation": "RNA-RNA interactome analysis using ChiRA tools uite",
+ "format-version": "0.1",
+ "name": "RNA-RNA interactome analysis",
+ "steps": {
+ "0": {
+ "annotation": "",
+ "content_id": null,
+ "errors": null,
+ "id": 0,
+ "input_connections": {},
+ "inputs": [],
+ "label": "FASTQ file",
+ "name": "Input dataset",
+ "outputs": [],
+ "position": {
+ "left": 200,
+ "top": 200
+ },
+ "tool_id": null,
+ "tool_state": "{}",
+ "tool_version": null,
+ "type": "data_input",
+ "uuid": "3903dadd-157d-46cc-aeda-382d801f8402",
+ "workflow_outputs": [
+ {
+ "label": null,
+ "output_name": "output",
+ "uuid": "11ea3f49-0078-4f7e-88f3-8fed6487765d"
+ }
+ ]
+ },
+ "1": {
+ "annotation": "",
+ "content_id": null,
+ "errors": null,
+ "id": 1,
+ "input_connections": {},
+ "inputs": [],
+ "label": "1st reference FASTA file",
+ "name": "Input dataset",
+ "outputs": [],
+ "position": {
+ "left": 200,
+ "top": 289
+ },
+ "tool_id": null,
+ "tool_state": "{}",
+ "tool_version": null,
+ "type": "data_input",
+ "uuid": "712d65a9-072c-4255-9e6a-8ef132609153",
+ "workflow_outputs": [
+ {
+ "label": null,
+ "output_name": "output",
+ "uuid": "f297f71b-4c13-4d70-b3fa-32c9eaadfbe6"
+ }
+ ]
+ },
+ "10": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/chira_quantify/chira_quantify/1.1.2+galaxy0",
+ "errors": null,
+ "id": 10,
+ "input_connections": {
+ "merged": {
+ "id": 9,
+ "output_name": "merged_bed"
+ },
+ "segments": {
+ "id": 9,
+ "output_name": "segments_bed"
+ }
+ },
+ "inputs": [],
+ "label": null,
+ "name": "ChiRA qauntify",
+ "outputs": [
+ {
+ "name": "loci",
+ "type": "tabular"
+ }
+ ],
+ "position": {
+ "left": 1772,
+ "top": 200
+ },
+ "post_job_actions": {},
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/chira_quantify/chira_quantify/1.1.2+galaxy0",
+ "tool_shed_repository": {
+ "changeset_revision": "903f7a4cfbf0",
+ "name": "chira_quantify",
+ "owner": "iuc",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"__page__\": null, \"segments\": \"{\\\"__class__\\\": \\\"ConnectedValue\\\"}\", \"min_locus_size\": \"\\\"5\\\"\", \"merged\": \"{\\\"__class__\\\": \\\"ConnectedValue\\\"}\", \"crl\": \"\\\"true\\\"\", \"crl_share\": \"\\\"0.7\\\"\", \"__rerun_remap_job_id__\": null, \"em_threshold\": \"\\\"1.0\\\"\"}",
+ "tool_version": "1.1.2+galaxy0",
+ "type": "tool",
+ "uuid": "0c8c33aa-b6b8-4a03-a201-f08befb15060",
+ "workflow_outputs": [
+ {
+ "label": null,
+ "output_name": "loci",
+ "uuid": "c4bc0ce3-cfe2-4812-ab9a-18a8bc238da3"
+ }
+ ]
+ },
+ "11": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/chira_extract/chira_extract/1.1.2+galaxy0",
+ "errors": null,
+ "id": 11,
+ "input_connections": {
+ "annotation|fasta_source|fasta": {
+ "id": 4,
+ "output_name": "output"
+ },
+ "annotation|gtf": {
+ "id": 3,
+ "output_name": "output"
+ },
+ "loci": {
+ "id": 10,
+ "output_name": "loci"
+ },
+ "reference|ref_fasta1": {
+ "id": 1,
+ "output_name": "output"
+ },
+ "reference|ref_fasta2": {
+ "id": 2,
+ "output_name": "output"
+ }
+ },
+ "inputs": [],
+ "label": null,
+ "name": "ChiRA extract",
+ "outputs": [
+ {
+ "name": "chimeras",
+ "type": "tabular"
+ },
+ {
+ "name": "singletons",
+ "type": "tabular"
+ }
+ ],
+ "position": {
+ "left": 2100,
+ "top": 200
+ },
+ "post_job_actions": {},
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/chira_extract/chira_extract/1.1.2+galaxy0",
+ "tool_shed_repository": {
+ "changeset_revision": "dea6bc47a47e",
+ "name": "chira_extract",
+ "owner": "iuc",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"__page__\": null, \"reference\": \"{\\\"__current_case__\\\": 0, \\\"ref_fasta1\\\": {\\\"__class__\\\": \\\"ConnectedValue\\\"}, \\\"ref_fasta2\\\": {\\\"__class__\\\": \\\"ConnectedValue\\\"}, \\\"ref_type\\\": \\\"split\\\"}\", \"tpm_cutoff\": \"\\\"0.0\\\"\", \"chimeric_overlap\": \"\\\"2\\\"\", \"loci\": \"{\\\"__class__\\\": \\\"ConnectedValue\\\"}\", \"hybridize\": \"\\\"true\\\"\", \"__rerun_remap_job_id__\": null, \"score_cutoff\": \"\\\"0.0\\\"\", \"annotation\": \"{\\\"__current_case__\\\": 0, \\\"annot_choice\\\": \\\"yes\\\", \\\"fasta_source\\\": {\\\"__current_case__\\\": 0, \\\"fasta\\\": {\\\"__class__\\\": \\\"ConnectedValue\\\"}, \\\"fasta_source_selector\\\": \\\"history\\\"}, \\\"gtf\\\": {\\\"__class__\\\": \\\"ConnectedValue\\\"}}\"}",
+ "tool_version": "1.1.2+galaxy0",
+ "type": "tool",
+ "uuid": "ce6df2a0-1499-400a-9d59-ae87afbcdd61",
+ "workflow_outputs": [
+ {
+ "label": null,
+ "output_name": "singletons",
+ "uuid": "a0dbc038-4cf6-475f-b2c7-71e4871c2303"
+ },
+ {
+ "label": null,
+ "output_name": "chimeras",
+ "uuid": "7cb1d67a-2c73-4f37-822d-3d089236e938"
+ }
+ ]
+ },
+ "12": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/query_tabular/query_tabular/3.0.0",
+ "errors": null,
+ "id": 12,
+ "input_connections": {
+ "tables_0|table": {
+ "id": 11,
+ "output_name": "chimeras"
+ }
+ },
+ "inputs": [
+ {
+ "description": "runtime parameter for tool Query Tabular",
+ "name": "add_to_database"
+ }
+ ],
+ "label": null,
+ "name": "Query Tabular",
+ "outputs": [
+ {
+ "name": "sqlitedb",
+ "type": "sqlite"
+ }
+ ],
+ "position": {
+ "left": 2428,
+ "top": 200
+ },
+ "post_job_actions": {
+ "HideDatasetActionsqlitedb": {
+ "action_arguments": {},
+ "action_type": "HideDatasetAction",
+ "output_name": "sqlitedb"
+ }
+ },
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/query_tabular/query_tabular/3.0.0",
+ "tool_shed_repository": {
+ "changeset_revision": "33d61c89fb8d",
+ "name": "query_tabular",
+ "owner": "iuc",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"tables\": \"[{\\\"__index__\\\": 0, \\\"input_opts\\\": {\\\"linefilters\\\": []}, \\\"table\\\": {\\\"__class__\\\": \\\"ConnectedValue\\\"}, \\\"tbl_opts\\\": {\\\"col_names\\\": \\\"\\\", \\\"column_names_from_first_line\\\": \\\"true\\\", \\\"indexes\\\": [], \\\"load_named_columns\\\": \\\"false\\\", \\\"pkey_autoincr\\\": \\\"\\\", \\\"table_name\\\": \\\"\\\"}}]\", \"save_db\": \"\\\"true\\\"\", \"addqueries\": \"{\\\"queries\\\": []}\", \"__page__\": null, \"__rerun_remap_job_id__\": null, \"sqlquery\": \"\\\"\\\"\", \"add_to_database\": \"{\\\"withdb\\\": {\\\"__class__\\\": \\\"RuntimeValue\\\"}}\", \"query_result\": \"{\\\"__current_case__\\\": 0, \\\"header\\\": \\\"yes\\\", \\\"header_prefix\\\": \\\"35\\\"}\", \"modify_database\": \"{\\\"sql_stmts\\\": []}\", \"workdb\": \"\\\"workdb.sqlite\\\"\"}",
+ "tool_version": "3.0.0",
+ "type": "tool",
+ "uuid": "0e5f7bba-78f6-4493-877d-77810e634d5d",
+ "workflow_outputs": []
+ },
+ "2": {
+ "annotation": "",
+ "content_id": null,
+ "errors": null,
+ "id": 2,
+ "input_connections": {},
+ "inputs": [],
+ "label": "2nd reference FASTA file",
+ "name": "Input dataset",
+ "outputs": [],
+ "position": {
+ "left": 200,
+ "top": 398
+ },
+ "tool_id": null,
+ "tool_state": "{}",
+ "tool_version": null,
+ "type": "data_input",
+ "uuid": "9a1ad53b-13d2-4165-9363-139e71ab4362",
+ "workflow_outputs": [
+ {
+ "label": null,
+ "output_name": "output",
+ "uuid": "a5dd02f4-d338-4e29-9487-073de07db0d2"
+ }
+ ]
+ },
+ "3": {
+ "annotation": "",
+ "content_id": null,
+ "errors": null,
+ "id": 3,
+ "input_connections": {},
+ "inputs": [],
+ "label": "Annotation GTF file",
+ "name": "Input dataset",
+ "outputs": [],
+ "position": {
+ "left": 200,
+ "top": 507
+ },
+ "tool_id": null,
+ "tool_state": "{}",
+ "tool_version": null,
+ "type": "data_input",
+ "uuid": "912457cd-b29e-4d31-9371-f7079518cabc",
+ "workflow_outputs": [
+ {
+ "label": null,
+ "output_name": "output",
+ "uuid": "f0dbac2f-0662-4eea-9d79-90455e6b8122"
+ }
+ ]
+ },
+ "4": {
+ "annotation": "",
+ "content_id": null,
+ "errors": null,
+ "id": 4,
+ "input_connections": {},
+ "inputs": [],
+ "label": "Whole genome FASTA file",
+ "name": "Input dataset",
+ "outputs": [],
+ "position": {
+ "left": 200,
+ "top": 616
+ },
+ "tool_id": null,
+ "tool_state": "{}",
+ "tool_version": null,
+ "type": "data_input",
+ "uuid": "3f5597ea-26fd-4a87-b730-6586e25e74c0",
+ "workflow_outputs": [
+ {
+ "label": null,
+ "output_name": "output",
+ "uuid": "e3f87532-5000-42ff-862d-8cdbae711bc3"
+ }
+ ]
+ },
+ "5": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/lparsons/cutadapt/cutadapt/1.16.5",
+ "errors": null,
+ "id": 5,
+ "input_connections": {
+ "library|input_1": {
+ "id": 0,
+ "output_name": "output"
+ }
+ },
+ "inputs": [],
+ "label": null,
+ "name": "Cutadapt",
+ "outputs": [
+ {
+ "name": "out1",
+ "type": "fastqsanger"
+ },
+ {
+ "name": "report",
+ "type": "txt"
+ }
+ ],
+ "position": {
+ "left": 460,
+ "top": 200
+ },
+ "post_job_actions": {
+ "HideDatasetActionreport": {
+ "action_arguments": {},
+ "action_type": "HideDatasetAction",
+ "output_name": "report"
+ }
+ },
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/lparsons/cutadapt/cutadapt/1.16.5",
+ "tool_shed_repository": {
+ "changeset_revision": "49370cb85f0f",
+ "name": "cutadapt",
+ "owner": "lparsons",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"__page__\": null, \"output_options\": \"{\\\"info_file\\\": \\\"false\\\", \\\"multiple_output\\\": \\\"false\\\", \\\"report\\\": \\\"true\\\", \\\"rest_file\\\": \\\"false\\\", \\\"too_long_file\\\": \\\"false\\\", \\\"too_short_file\\\": \\\"false\\\", \\\"untrimmed_file\\\": \\\"false\\\", \\\"wildcard_file\\\": \\\"false\\\"}\", \"read_mod_options\": \"{\\\"length\\\": \\\"0\\\", \\\"length_tag\\\": \\\"\\\", \\\"nextseq_trim\\\": \\\"0\\\", \\\"prefix\\\": \\\"\\\", \\\"quality_cutoff\\\": \\\"0\\\", \\\"strip_suffix\\\": \\\"\\\", \\\"suffix\\\": \\\"\\\", \\\"trim_n\\\": \\\"false\\\"}\", \"adapter_options\": \"{\\\"count\\\": \\\"1\\\", \\\"error_rate\\\": \\\"0.1\\\", \\\"mask_adapter\\\": \\\"false\\\", \\\"match_read_wildcards\\\": \\\" \\\", \\\"no_indels\\\": \\\"false\\\", \\\"no_trim\\\": \\\"false\\\", \\\"overlap\\\": \\\"3\\\"}\", \"library\": \"{\\\"__current_case__\\\": 0, \\\"input_1\\\": {\\\"__class__\\\": \\\"ConnectedValue\\\"}, \\\"r1\\\": {\\\"adapters\\\": [{\\\"__index__\\\": 0, \\\"adapter_source\\\": {\\\"__current_case__\\\": 0, \\\"adapter\\\": {\\\"__class__\\\": \\\"RuntimeValue\\\"}, \\\"adapter_name\\\": {\\\"__class__\\\": \\\"RuntimeValue\\\"}, \\\"adapter_source_list\\\": \\\"user\\\"}}], \\\"anywhere_adapters\\\": [], \\\"cut\\\": \\\"0\\\", \\\"front_adapters\\\": [{\\\"__index__\\\": 0, \\\"front_adapter_source\\\": {\\\"__current_case__\\\": 0, \\\"front_adapter\\\": {\\\"__class__\\\": \\\"RuntimeValue\\\"}, \\\"front_adapter_name\\\": {\\\"__class__\\\": \\\"RuntimeValue\\\"}, \\\"front_adapter_source_list\\\": \\\"user\\\"}}]}, \\\"type\\\": \\\"single\\\"}\", \"filter_options\": \"{\\\"discard\\\": \\\"false\\\", \\\"discard_untrimmed\\\": \\\"false\\\", \\\"max\\\": \\\"0\\\", \\\"max_n\\\": \\\"\\\", \\\"min\\\": \\\"10\\\", \\\"pair_filter\\\": \\\"any\\\"}\", \"__rerun_remap_job_id__\": null}",
+ "tool_version": "1.16.5",
+ "type": "tool",
+ "uuid": "144ba49e-64fe-42d7-8a48-42e65c143a6c",
+ "workflow_outputs": [
+ {
+ "label": null,
+ "output_name": "out1",
+ "uuid": "43730f51-c3cc-4a3c-b52c-4868b729f278"
+ }
+ ]
+ },
+ "6": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/chira_collapse/chira_collapse/1.1.2+galaxy0",
+ "errors": null,
+ "id": 6,
+ "input_connections": {
+ "in": {
+ "id": 5,
+ "output_name": "out1"
+ }
+ },
+ "inputs": [],
+ "label": null,
+ "name": "ChiRA collapse",
+ "outputs": [
+ {
+ "name": "out",
+ "type": "fasta"
+ }
+ ],
+ "position": {
+ "left": 788,
+ "top": 200
+ },
+ "post_job_actions": {},
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/chira_collapse/chira_collapse/1.1.2+galaxy0",
+ "tool_shed_repository": {
+ "changeset_revision": "2a57d2ef04c6",
+ "name": "chira_collapse",
+ "owner": "iuc",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"umi_len\": \"\\\"0\\\"\", \"__page__\": null, \"__rerun_remap_job_id__\": null, \"in\": \"{\\\"__class__\\\": \\\"ConnectedValue\\\"}\"}",
+ "tool_version": "1.1.2+galaxy0",
+ "type": "tool",
+ "uuid": "b2d0228a-c9e6-4135-93d3-2320899e04a4",
+ "workflow_outputs": [
+ {
+ "label": null,
+ "output_name": "out",
+ "uuid": "437b9634-e708-4726-80d6-9895821a2fd1"
+ }
+ ]
+ },
+ "7": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/devteam/fastqc/fastqc/0.72+galaxy1",
+ "errors": null,
+ "id": 7,
+ "input_connections": {
+ "input_file": {
+ "id": 5,
+ "output_name": "out1"
+ }
+ },
+ "inputs": [
+ {
+ "description": "runtime parameter for tool FastQC",
+ "name": "limits"
+ },
+ {
+ "description": "runtime parameter for tool FastQC",
+ "name": "contaminants"
+ },
+ {
+ "description": "runtime parameter for tool FastQC",
+ "name": "adapters"
+ }
+ ],
+ "label": null,
+ "name": "FastQC",
+ "outputs": [
+ {
+ "name": "html_file",
+ "type": "html"
+ },
+ {
+ "name": "text_file",
+ "type": "txt"
+ }
+ ],
+ "position": {
+ "left": 788,
+ "top": 340
+ },
+ "post_job_actions": {},
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/fastqc/fastqc/0.72+galaxy1",
+ "tool_shed_repository": {
+ "changeset_revision": "e7b2202befea",
+ "name": "fastqc",
+ "owner": "devteam",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"min_length\": \"\\\"\\\"\", \"kmers\": \"\\\"7\\\"\", \"limits\": \"{\\\"__class__\\\": \\\"RuntimeValue\\\"}\", \"input_file\": \"{\\\"__class__\\\": \\\"ConnectedValue\\\"}\", \"__page__\": null, \"__rerun_remap_job_id__\": null, \"contaminants\": \"{\\\"__class__\\\": \\\"RuntimeValue\\\"}\", \"adapters\": \"{\\\"__class__\\\": \\\"RuntimeValue\\\"}\", \"nogroup\": \"\\\"false\\\"\"}",
+ "tool_version": "0.72+galaxy1",
+ "type": "tool",
+ "uuid": "7e08144c-3e94-48a7-b216-4fc9b510ad0c",
+ "workflow_outputs": [
+ {
+ "label": null,
+ "output_name": "html_file",
+ "uuid": "efabbe41-e1bd-4868-b354-448601dba564"
+ },
+ {
+ "label": null,
+ "output_name": "text_file",
+ "uuid": "f3bf1924-e981-4fd6-8229-4558eb69e0ff"
+ }
+ ]
+ },
+ "8": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/chira_map/chira_map/1.1.2+galaxy0",
+ "errors": null,
+ "id": 8,
+ "input_connections": {
+ "query": {
+ "id": 6,
+ "output_name": "out"
+ },
+ "reference|ref_fasta1": {
+ "id": 1,
+ "output_name": "output"
+ },
+ "reference|ref_fasta2": {
+ "id": 2,
+ "output_name": "output"
+ }
+ },
+ "inputs": [],
+ "label": null,
+ "name": "ChiRA map",
+ "outputs": [
+ {
+ "name": "mapped_bed",
+ "type": "bed"
+ },
+ {
+ "name": "unmapped_fasta",
+ "type": "fasta"
+ }
+ ],
+ "position": {
+ "left": 1116,
+ "top": 200
+ },
+ "post_job_actions": {},
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/chira_map/chira_map/1.1.2+galaxy0",
+ "tool_shed_repository": {
+ "changeset_revision": "39bb70c2764e",
+ "name": "chira_map",
+ "owner": "iuc",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"__page__\": null, \"reference\": \"{\\\"__current_case__\\\": 0, \\\"ref_fasta1\\\": {\\\"__class__\\\": \\\"ConnectedValue\\\"}, \\\"ref_fasta2\\\": {\\\"__class__\\\": \\\"ConnectedValue\\\"}, \\\"ref_type\\\": \\\"split\\\"}\", \"chimeric_overlap\": \"\\\"2\\\"\", \"query\": \"{\\\"__class__\\\": \\\"ConnectedValue\\\"}\", \"__rerun_remap_job_id__\": null, \"alignment\": \"{\\\"__current_case__\\\": 0, \\\"align_score1\\\": \\\"18\\\", \\\"align_score2\\\": \\\"10\\\", \\\"aligner\\\": \\\"bwa\\\", \\\"seed_length1\\\": \\\"12\\\", \\\"seed_length2\\\": \\\"6\\\", \\\"stranded\\\": \\\"fw\\\"}\"}",
+ "tool_version": "1.1.2+galaxy0",
+ "type": "tool",
+ "uuid": "3d5fd509-97e4-41bf-b4d8-9e8f27efa75c",
+ "workflow_outputs": [
+ {
+ "label": null,
+ "output_name": "unmapped_fasta",
+ "uuid": "9a841054-24b3-47fb-a415-a64be71e82af"
+ },
+ {
+ "label": null,
+ "output_name": "mapped_bed",
+ "uuid": "e441e28c-2498-483d-a903-39102006babe"
+ }
+ ]
+ },
+ "9": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/chira_merge/chira_merge/1.1.2+galaxy0",
+ "errors": null,
+ "id": 9,
+ "input_connections": {
+ "alignments": {
+ "id": 8,
+ "output_name": "mapped_bed"
+ },
+ "annotation|gtf": {
+ "id": 3,
+ "output_name": "output"
+ }
+ },
+ "inputs": [],
+ "label": null,
+ "name": "ChiRA merge",
+ "outputs": [
+ {
+ "name": "segments_bed",
+ "type": "bed"
+ },
+ {
+ "name": "merged_bed",
+ "type": "tabular"
+ }
+ ],
+ "position": {
+ "left": 1444,
+ "top": 200
+ },
+ "post_job_actions": {
+ "HideDatasetActionsegments_bed": {
+ "action_arguments": {},
+ "action_type": "HideDatasetAction",
+ "output_name": "segments_bed"
+ }
+ },
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/chira_merge/chira_merge/1.1.2+galaxy0",
+ "tool_shed_repository": {
+ "changeset_revision": "0170de5072d4",
+ "name": "chira_merge",
+ "owner": "iuc",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"__page__\": null, \"__rerun_remap_job_id__\": null, \"merge\": \"{\\\"__current_case__\\\": 0, \\\"alignment_overlap\\\": \\\"0.7\\\", \\\"mode\\\": \\\"overlap\\\"}\", \"segment_overlap\": \"\\\"0.7\\\"\", \"alignments\": \"{\\\"__class__\\\": \\\"ConnectedValue\\\"}\", \"annotation\": \"{\\\"__current_case__\\\": 0, \\\"choice\\\": \\\"yes\\\", \\\"gtf\\\": {\\\"__class__\\\": \\\"ConnectedValue\\\"}}\"}",
+ "tool_version": "1.1.2+galaxy0",
+ "type": "tool",
+ "uuid": "512af86c-54c5-4a3b-ac1c-4f332f6b4bf9",
+ "workflow_outputs": [
+ {
+ "label": null,
+ "output_name": "merged_bed",
+ "uuid": "add0d308-e195-44cf-ba56-b10caa656210"
+ }
+ ]
+ }
+ },
+ "tags": [
+ "RNA",
+ "transcriptomics"
+ ],
+ "uuid": "26ec4dda-2693-4cfe-9fd9-9932315aeaa5",
+ "version": 4
+}
diff --git a/topics/transcriptomics/tutorials/rna-interactome/workflows/index.md b/topics/transcriptomics/tutorials/rna-interactome/workflows/index.md
new file mode 100644
index 00000000000000..e092e0ae66ddd4
--- /dev/null
+++ b/topics/transcriptomics/tutorials/rna-interactome/workflows/index.md
@@ -0,0 +1,3 @@
+---
+layout: workflow-list
+---
diff --git a/topics/transcriptomics/tutorials/scrna-plates-batches-barcodes/slides.html b/topics/transcriptomics/tutorials/scrna-plates-batches-barcodes/slides.html
index 4897ccc237d802..8fcd40ede8785f 100644
--- a/topics/transcriptomics/tutorials/scrna-plates-batches-barcodes/slides.html
+++ b/topics/transcriptomics/tutorials/scrna-plates-batches-barcodes/slides.html
@@ -6,22 +6,17 @@
tags:
- single-cell
questions:
- - "What are barcodes and how are they applied to batches?"
- - "What is the difference between a plate and a batch?"
- - "Are batches and lanes the same?"
- - "Why are plating setups important to know?"
- - "Is it necessary to check barcodes that don't exist in a batch?"
+ - "What is a batch when it comes to single cells?"
+ - "What’s the difference between a barcode and an index?"
+ - "What is batch effect and how do you prevent it?"
+ - "What is a lane?"
objectives:
- - "How to describe a plating setup"
- - "Proper naming conventions when dealing with multiple batches"
- - "Checking for false positives across batches"
+ - "How to set up plates to prevent batch effect"
+ - "Proper naming conventions when dealing with scRNA-seq samples"
time_estimation: "1h"
key_points:
- - "Plates are split into Lanes"
- - "Batches are Lanes, but names are not reused"
- - "Multiple batches can exist in many Plates"
- - "Checking all barcodes across Batches eliminates false positives / cross-contamination"
- - "Intelligent plating strategies can guard against cross-contamination"
+ - "Balanced batches and replicates allow bioinformatic batch correction"
+ - "A sequencing lane often contains multiple batches, and is itself a batch effect!"
# (This will be split out of the pre-processing later)
# requirements:
@@ -39,6 +34,7 @@
- scrna-preprocessing
contributors:
+ - nomadscientist
- mtekman
- astrovsky01
@@ -48,7 +44,7 @@
### Sorting Plates
-.image-100[![slide5](../../images/scrna-plates-batches-barcodes_slide5.svg)]
+.image-100[![slide5](../../images/wab96wellplate.svg)]
.left[Plates are *N x M* arrays of wells that cells are sorted, to then be individually amplified and sequenced.]
@@ -56,72 +52,88 @@
### Sorting Plates
-.image-100[![slide6](../../images/scrna-plates-batches-barcodes_slide6.svg)]
+.image-80[![slide6](../../images/wabexampleplates.png)]
-.left[Not all wells need to be occupied by cells, some wells are left empty.]
-.center[Why is this the case?]
---
-### Sorting Plates
+### Setting up Plates
-.image-100[![slide7](../../images/scrna-plates-batches-barcodes_slide7.svg)]
+.image-75[![slide7](../../images/wabbatchproblems.png)]
-.center[Library preparation is not always perfect]
-.center[Sample **contamination** from neighbouring wells]
+.center[What is the problem with this plate setup?]
---
-### Plates and Lanes
+### Setting up Plates
+
+.image-50[![slide7](../../images/wabbatchproblems.png)]
+
+.center[Batch effect (plate vs plate) cannot be separated from treatment effect in either scenario.]
+---
+### Setting up Plates
+
+.image-50[![slide7](../../images/wabbalancedbatches.png)]
-.image-50[![slide5](../../images/scrna-plates-batches-barcodes_slide5.svg)]
+.left[Either of these are better set-ups. Mixing columns is good, but not required. Ultimately, batch effect can now be separated from variable effect.]
-.center[Sorting Plates are divided into *lanes*:]
+---
+### Setting up Plates
-.image-50[![slide9](../../images/scrna-plates-batches-barcodes_slide9.svg)]
+.image-90[![slide7](../../images/wabreplicates.png)]
+
+.center[Can't mix samples on plates? Separate replicates evenly and process together.]
???
-Plates are $$N \times M$$ arrays or wells that cells are sorted into and then individually amplified and sequenced. The way these slot are filled depends entirely on the protocol, but usually not all slots are filled. The reason for this will become clear momentarily.
-* Lanes demarcate evenly-sized *n x m* rectangular regions on a plate (where *n < N and m < M*).
-* Multiple lanes can exist on plate, lanes ideally do not overlap within a plate.
+If putting multiple treatments on the same plate is not an option, then having enough separated replicates will also allow for batch correction. Here, you can assess the variation between replicates (and thus batches) as well as between treatments.
+---
+
+### What about sequencing lanes?
+
+.center[So now it's time to sequence our samples! How do we combine samples into sequencing lanes?]
+
+.image-90[![slide7](../../images/wabseqlane.png)]
+
+.center[This works well, but what if you have too many samples for one lane?]
---
-### Plates and Lanes
+### What about sequencing lanes?
+
+.center[So now it's time to sequence our samples! How do we combine samples into sequencing lanes?]
+
+.image-90[![slide7](../../images/wablanesbad.png)]
+
+.center[Does this look ok?]
+
+--
-.image-100[![slide9](../../images/scrna-plates-batches-barcodes_slide9.svg)]
-* All slots within a lane are sequenced at the same time.
-* All lanes within a plate *may or may not* be sequenced at the same time.
-* Lanes can therefore be thought of as different *batches*.
+.center[No! You've turned each treatment (A & B) into a batch!]
---
-### Lanes and Batches
+### What about sequencing lanes?
-.image-100[![slide10](../../images/scrna-plates-batches-barcodes_slide10.svg)]
-???
-* Lanes and batches describe the same spaces, but not the same samples.
-* For this reason, batch numbers should *not* persist across different plates.
+.image-75[![slide7](../../images/wablanesgood.png)]
+
+.center[This is the way to balance your batches at the lane-level.]
---
### Distinguishing cells in a plate
-.image-100[![slide11](../../images/scrna-plates-batches-barcodes_slide11.svg)]
+.image-60[![slide11](../../images/wabplate.png)]
* Cells are selected from a plate by their *barcodes*
* Barcodes must be unique
- + e.g. *4 x 18 = 72* slots in the plate, need 72 unique cell barcodes
-
-???
-The way these slots are filled depends entirely on the protocol, but usually not all slots are filled. The reason for this will become clear momentarily
+ + e.g. 96 wells in a plate, need 96 barcodes to sequence them together
---
@@ -149,19 +161,19 @@
### Questions about Cell Barcodes
-.image-75[![slide14](../../images/scrna-plates-batches-barcodes_slide14.svg)]
+.image-50[![slide14](../../images/wabplate.png)]
-.left[Assuming a lane is a 20x5 array:]
+.left[Assuming you sequence one 96-well plate:]
1. How many cell barcodes are needed for a single lane?
-1. How many cell barcodes are needed for a plate with 10 lanes?
+1. How many cell barcodes are needed if you combine 10 plates into a single sequencing lane?
1. What would be the minimum length of the barcodes for each of the previous questions?
--
.footnote[
-1. *20 x 5 = 100 unique barcodes per lane*
-1. *20 x 5 x 10 = 1,000 unique barcodes per plate*
+1. *96 unique barcodes per lane*
+1. *96 x 10 = 960 unique barcodes per plate*
]
---
@@ -173,26 +185,26 @@
A single lane?
- * 100 barcodes, 4 nucleotides for each base of a barcode
+ * 96 barcodes, 4 nucleotides for each base of a barcode
| Barcodes | Result |
|---------|------|
| $$4^2 = 16$$ | No, 2 bases is not enough |
| $$4^3 = 64$$ | No, 3 bases is not enough |
-| $$4^4 = 256$$ | Yes, 4 bases is enough to cover 100 barcodes (and more!) |
+| $$4^4 = 256$$ | Yes, 4 bases is enough to cover 96 barcodes (and more!) |
]
--
.pull-right[
-A plate with 10 lanes?
+10 plates in a single lane?
- * 1000 barcodes, 4 nucleotides for each base of a barcode
+ * 960 barcodes, 4 nucleotides for each base of a barcode
| Barcodes | Result |
|---------|------|
|$$4^4 = 256$$ | No, 4 bases is not enough |
-|$$4^5 = 1024$$ | Yes, 5 bases is enough to cover 1,000 barcodes (just barely!) |
+|$$4^5 = 1024$$ | Yes, 5 bases is enough to cover 960 barcodes (just barely!) |
]
@@ -206,7 +218,7 @@
-->
.pull-left[
-* Is 5 nucleotides really enough to capture 1000 cells?
+* Is 5 nucleotides really enough to capture 960 cells?
* What could go wrong if all barcodes are separated by 1 bp, as shown?
]
@@ -342,9 +354,9 @@
* Every transcript in a specific cell has the same cell barcode
-* Barcodes are *designed* and are not random oligonucleotides
+* Barcodes are *designed* for smaller plate-based protocols, while for split-pool and similar techniques they are randomised.
-* Barcode use is *limited* by length and plate size
+* Barcode use is *limited* by length and read depth
]
--
@@ -360,271 +372,10 @@
]
---
-.pull-right[* How?]
-
-
----
-
-### Intelligent Plating Strategies
-
-* The main contending questions are:
-
- 1. How large is each batch?
-
-???
-How many slots in a batch that we need to barcode for
-
---
-
- 2. How many batches on a plate?
-
---
-
- 3. Should each batch use the same barcodes?
-
---
-
- 4. What **constraints** are there on the plate?
-
-
-
-???
-A technician always has to balance quality against cost, and this is illustrated in the following examples:
-
----
-
-### Example Setup
-
-.pull-left[
-* Barcodes:
- * 24 unique barcodes, with an edit distance of E=2:
-
- AAA ACC AGG TTT TAA TCC
- ATT CCC CAA TGG NAA NCC
- CGG CTT GGG NGG NTT ANN
- GAA GCC GTT CNN GNN TNN
-
-]
---
-.pull-right[
-* Plates and Lanes:
- * 12 slots per lane (3x4)
- * 4 lanes per plate
-
-* Constraints
- * Only 2 lanes sequenced at the same time
- * i.e. half the plate is sequenced at the same time
-]
-
-???
-Here we use N as an extra base just for example purposes, but you do sometimes see this in other barcodes.
-2 lanes at a time, only half
-
----
-
-### Example 1: Single Plate with a Single Lane
-
-.bottom-info-box[
-Available Barcodes
-
- AAA ACC AGG TTT TAA TCC
- ATT CCC CAA TGG NAA NCC
- CGG CTT GGG NGG NTT ANN
- GAA GCC GTT CNN GNN TNN
-
-]
---
-
-.image-75[![slide36](../../images/scrna-plates-batches-barcodes_slide36.svg)]
-
-.pull-left[
-* 12 total slots in Plate 1
-
-* 12 slots in Lane 1
-
-* All slots filled
-]
-
---
-.pull-right[
-* We only need to use half of our barcodes
-
-* Why is this wasteful?
-]
???
-* Half of the barcodes used for that lane, and the other half we can ignore.
-* Wasteful because we are not getting full use of our 24 barcodes in a single sequencing run
-
----
-
-### Example 2: Single Plate with 2 Lanes
-
-.bottom-info-box[
-Available Barcodes
-
- AAA ACC AGG TTT TAA TCC
- ATT CCC CAA TGG NAA NCC
- CGG CTT GGG NGG NTT ANN
- GAA GCC GTT CNN GNN TNN
-
-]
-
-* 24 total slots in Plate 1
-* 12 slots in Lane 1, and 12 slots in Lane 2
-* All slots filled
-
-.image-75[![slide38](../../images/scrna-plates-batches-barcodes_slide38.svg)]
-
---
-* We can use all of our barcodes
-* Maximum number of cells can be sequenced in a single run
-* Why might this be too optimal?
-
-
-???
-Here we use all barcodes since these lanes will be sequenced at the same time
-Let's look at one final example to see why using all our barcodes on a plate might not be optimal.
-
----
-
-### Example 3: Single Plate with 2 Lanes, only 1 active
-
-.bottom-info-box[
-Available Barcodes
-
- AAA ACC AGG TTT TAA TCC
- ATT CCC CAA TGG NAA NCC
- CGG CTT GGG NGG NTT ANN
- GAA GCC GTT CNN GNN TNN
-
-]
-
-* 24 total slots in Plate 1
-* 12 slots in Lane 1, and 12 slots in Lane 2
-* Only slots in Lane 1 filled
-
-.image-75[![slide40](../../images/scrna-plates-batches-barcodes_slide40.svg)]
-
-
---
-* Why are we still using all barcodes?
-* Why have we not filled in Lane 2?
-
-
-???
-All barcodes used, why leave one lane empty?
-
----
-
-### Example 3: Single Plate with 2 Lanes, only 1 active
-
-.bottom-info-box[
-Available Barcodes
-
- AAA ACC AGG TTT TAA TCC
- ATT CCC CAA TGG NAA NCC
- CGG CTT GGG NGG NTT ANN
- GAA GCC GTT CNN GNN TNN
-
-]
-
-* What would it mean if we sequenced reads with {TTT,TAA,...,TNN} as their Cell Barcodes?
-
-.image-75[![slide40](../../images/scrna-plates-batches-barcodes_slide40.svg)]
-
-
---
-* **Cross-Contamination**
- * There should be *no cells in that lane*!
- * These are contaminants from Lane 1
- * We can ignore these reads and select for the {AAA,ACC,...,GTT}
-
-
-???
-If we see any reads in the Plate which contain barcodes {TTT,TAA,TCC, etc} then we know that some contamination has occurred *because there should be no cells there*. One reason is that the second lane was not completely cleaned before being used.
-
----
-
-### Example 4: 2 Plates with 2 Lanes, only 1 active (alternate)
-
-.bottom-info-box[
-Available Barcodes
-
- AAA ACC AGG TTT TAA TCC
- ATT CCC CAA TGG NAA NCC
- CGG CTT GGG NGG NTT ANN
- GAA GCC GTT CNN GNN TNN
-
-]
-
-
-* Same as before but extra plate. A single lane used to check for contaminants.
-* Alternate barcodes used for each plate
-
-.image-50[![slide44](../../images/scrna-plates-batches-barcodes_slide44.svg)]
-
-
-
-* Why alternate active lanes across plates?
-
-
---
-
- * Plate 1 and Plate 2 are sometimes the *same plate*
- * Contaminants might carry over *undetected* if same lane is used!
-
-
-???
-* Here we have repeated previous example, but with an extra plate. In the first plate, the first half of the barcodes are used, and in the second plate, the second half of the barcodes are used.
-
-* Why alternate the barcodes between plates? The full set of barcodes does not change, so why not keep the same format?
-
-* Loaded at different times, washed clean, re-used.
-
-* Again, the answer is to reduce cross-contamination. Plate2 will be loaded after Plate1 (and perhaps Plate2 and Plate1 will use the same plate!) If we see any reads in Plate2 that should not be there, we can now surmise where they came from. We also have the added benefit of protecting the cells in Plate2 from those that may have been used in Plate1, since they are in completely different positions across plates.
-
----
-
-### Example 5: 1 Plate with 2 lanes, both active
-
-.bottom-info-box[
-Available Barcodes
-
- AAA ACC AGG TTT TAA TCC
- ATT CCC CAA TGG NAA NCC
- CGG CTT GGG NGG NTT ANN
- GAA GCC GTT CNN GNN TNN
-
-]
-
-* Both lanes filled. *All barcodes* are applied individually to *each* lane
-
-.image-100[![slide46](../../images/scrna-plates-batches-barcodes_slide46.svg)]
-
-.pull-left[
-
-* Why check all barcodes against each lane?
-
-* Why not separate lanes across different plates?
-
-]
---
-.pull-right[
-* If {TTT,TAA,...,TNN} is detected in Lane 1, or vice versa → **Contamination!**
-
-* *Benefit of* **detecting cross-contamination** *whilst still* **maximising plate usage**
-]
-???
-1. Why apply the full set of barcodes to each lane, when only half will actually label?
-2. What benefit does this serve, instead of separating them over different plates as in the previous example?
-
-A1. This setup is actually the same as example 4, but with the two plates merged. Here we can check for cross-contamination in each lane by measuring the real cell labels against the false barcodes. If in lane 1, we detect a significant number of reads with cell barcodes of `TAA` or `ANN`, we can assume that some cross-contamination has occurred since we should not be able to detect these barcodes in that lane. The converse is also true of lane 2.
-
-A2. We have the benefit of detecting cross-contamination with the same advantages as example 4, but with the cost advantage of sequencing two batches at the same time.
-
+The number of reads you want per cell determines how many cells you run in a sequencing lane, which in turn tells you how many barcodes you need.
---
# Summary
@@ -643,6 +394,4 @@
* Reduce sequencing errors
-* Check for cross contamination
-
]
diff --git a/topics/transcriptomics/tutorials/scrna-preprocessing/slides.html b/topics/transcriptomics/tutorials/scrna-preprocessing/slides.html
new file mode 100644
index 00000000000000..b5fcdda57785fc
--- /dev/null
+++ b/topics/transcriptomics/tutorials/scrna-preprocessing/slides.html
@@ -0,0 +1,304 @@
+---
+layout: tutorial_slides
+logo: "GTN"
+title: "Dealing with Cross-Contamination in Fixed Barcode Protocols"
+zenodo_link: ""
+tags:
+ - single-cell
+questions:
+ - "What is cross-contamination?"
+ - "What are fixed barcodes?"
+ - "Why are plating setups important to know?"
+ - "Is it necessary to check barcodes that don't exist in a batch?"
+objectives:
+ - "How to describe a plating setup"
+ - "Proper naming conventions when dealing with multiple batches"
+ - "Checking for false positives across batches"
+time_estimation: "1h"
+key_points:
+ - "Eliminating false positives"
+ - "Intelligent plating strategies can guard against cross-contamination"
+
+requirements:
+ -
+ type: "internal"
+ topic_name: transcriptomics
+ tutorials:
+ - scrna-plates-batches-barcodes
+
+contributors:
+ - mtekman
+ - astrovsky01
+
+
+---
+### Fixed Barcode Protocols and Multiplexing
+
+
+The main contending questions are:
+
+ 1. How large is each batch?
+
+???
+How many slots in a batch that we need to barcode for
+
+--
+
+ 2. How many batches on a plate?
+
+--
+
+ 3. Should each batch use the same barcodes?
+
+--
+
+ 4. What **constraints** are there on the plate?
+
+
+
+???
+A technician always has to balance quality against cost, and this is illustrated in the following examples:
+
+---
+
+### Example Setup
+
+.pull-left[
+* Barcodes:
+ * 24 unique barcodes, with an edit distance of E=2:
+
+ AAA ACC AGG TTT TAA TCC
+ ATT CCC CAA TGG NAA NCC
+ CGG CTT GGG NGG NTT ANN
+ GAA GCC GTT CNN GNN TNN
+
+]
+--
+.pull-right[
+* Plates and Batches:
+ * 12 slots per lane (3x4)
+ * 4 batches per plate
+
+* Constraints
+ * Only 2 batches sequenced at the same time
+ * i.e. half the plate is sequenced at the same time
+]
+
+???
+Here we use N as an extra base just for example purposes, but you do sometimes see this in other barcodes.
+2 batches at a time, only half
+
+---
+
+### Example 1: Single Plate with a Single Lane
+
+.bottom-info-box[
+Available Barcodes
+
+ AAA ACC AGG TTT TAA TCC
+ ATT CCC CAA TGG NAA NCC
+ CGG CTT GGG NGG NTT ANN
+ GAA GCC GTT CNN GNN TNN
+
+]
+--
+
+.image-75[![slide36](../../images/scrna-plates-batches-barcodes_slide36.svg)]
+
+.pull-left[
+* 12 total slots in Plate 1
+
+* 12 slots in Lane 1
+
+* All slots filled
+]
+
+--
+.pull-right[
+* We only need to use half of our barcodes
+
+* Why is this wasteful?
+]
+
+
+???
+* Half of the barcodes used for that lane, and the other half we can ignore.
+* Wasteful because we are not getting full use of our 24 barcodes in a single sequencing run
+
+---
+
+### Example 2: Single Plate with 2 Batches
+
+.bottom-info-box[
+Available Barcodes
+
+ AAA ACC AGG TTT TAA TCC
+ ATT CCC CAA TGG NAA NCC
+ CGG CTT GGG NGG NTT ANN
+ GAA GCC GTT CNN GNN TNN
+
+]
+
+* 24 total slots in Plate 1
+* 12 slots in Lane 1, and 12 slots in Lane 2
+* All slots filled
+
+.image-75[![slide38](../../images/scrna-plates-batches-barcodes_slide38.svg)]
+
+--
+* We can use all of our barcodes
+* Maximum number of cells can be sequenced in a single run
+* Why might this be too optimal?
+
+
+???
+Here we use all barcodes since these batches will be sequenced at the same time
+Let's look at one final example to see why using all our barcodes on a plate might not be optimal.
+
+---
+
+### Example 3: Single Plate with 2 Batches, only 1 active
+
+.bottom-info-box[
+Available Barcodes
+
+ AAA ACC AGG TTT TAA TCC
+ ATT CCC CAA TGG NAA NCC
+ CGG CTT GGG NGG NTT ANN
+ GAA GCC GTT CNN GNN TNN
+
+]
+
+* 24 total slots in Plate 1
+* 12 slots in Lane 1, and 12 slots in Lane 2
+* Only slots in Lane 1 filled
+
+.image-75[![slide40](../../images/scrna-plates-batches-barcodes_slide40.svg)]
+
+
+--
+* Why are we still using all barcodes?
+* Why have we not filled in Lane 2?
+
+
+???
+All barcodes used, why leave one lane empty?
+
+---
+
+### Example 3: Single Plate with 2 Batches, only 1 active
+
+.bottom-info-box[
+Available Barcodes
+
+ AAA ACC AGG TTT TAA TCC
+ ATT CCC CAA TGG NAA NCC
+ CGG CTT GGG NGG NTT ANN
+ GAA GCC GTT CNN GNN TNN
+
+]
+
+* What would it mean if we sequenced reads with {TTT,TAA,...,TNN} as their Cell Barcodes?
+
+.image-75[![slide40](../../images/scrna-plates-batches-barcodes_slide40.svg)]
+
+
+--
+* **Cross-Contamination**
+ * There should be *no cells in that lane*!
+ * These are contaminants from Lane 1
+ * We can ignore these reads and select for the {AAA,ACC,...,GTT}
+
+
+???
+If we see any reads in the Plate which contain barcodes {TTT,TAA,TCC, etc} then we know that some contamination has occurred *because there should be no cells there*. One reason is that the second lane was not completely cleaned before being used.
+
+---
+
+### Example 4: 2 Plates with 2 Batches, only 1 active (alternate)
+
+.bottom-info-box[
+Available Barcodes
+
+ AAA ACC AGG TTT TAA TCC
+ ATT CCC CAA TGG NAA NCC
+ CGG CTT GGG NGG NTT ANN
+ GAA GCC GTT CNN GNN TNN
+
+]
+
+
+* Same as before but extra plate. A single lane used to check for contaminants.
+* Alternate barcodes used for each plate
+
+.image-50[![slide44](../../images/scrna-plates-batches-barcodes_slide44.svg)]
+
+
+
+* Why alternate active batches across plates?
+
+
+--
+
+ * Plate 1 and Plate 2 are sometimes the *same plate*
+ * Contaminants might carry over *undetected* if same lane is used!
+
+
+???
+* Here we have repeated previous example, but with an extra plate. In the first plate, the first half of the barcodes are used, and in the second plate, the second half of the barcodes are used.
+
+* Why alternate the barcodes between plates? The full set of barcodes does not change, so why not keep the same format?
+
+* Loaded at different times, washed clean, re-used.
+
+* Again, the answer is to reduce cross-contamination. Plate2 will be loaded after Plate1 (and perhaps Plate2 and Plate1 will use the same plate!) If we see any reads in Plate2 that should not be there, we can now surmise where they came from. We also have the added benefit of protecting the cells in Plate2 from those that may have been used in Plate1, since they are in completely different positions across plates.
+
+---
+
+### Example 5: 1 Plate with 2 Batches, both active
+
+.bottom-info-box[
+Available Barcodes
+
+ AAA ACC AGG TTT TAA TCC
+ ATT CCC CAA TGG NAA NCC
+ CGG CTT GGG NGG NTT ANN
+ GAA GCC GTT CNN GNN TNN
+
+]
+
+* Both batches filled. *All barcodes* are applied individually to *each* lane
+
+.image-100[![slide46](../../images/scrna-plates-batches-barcodes_slide46.svg)]
+
+.pull-left[
+
+* Why check all barcodes against each lane?
+
+* Why not separate batches across different plates?
+
+]
+--
+.pull-right[
+* If {TTT,TAA,...,TNN} is detected in Lane 1, or vice versa → **Contamination!**
+
+* *Benefit of* **detecting cross-contamination** *whilst still* **maximising plate usage**
+]
+???
+1. Why apply the full set of barcodes to each lane, when only half will actually label?
+2. What benefit does this serve, instead of separating them over different plates as in the previous example?
+
+A1. This setup is actually the same as example 4, but with the two plates merged. Here we can check for cross-contamination in each lane by measuring the real cell labels against the false barcodes. If in lane 1, we detect a significant number of reads with cell barcodes of `TAA` or `ANN`, we can assume that some cross-contamination has occurred since we should not be able to detect these barcodes in that lane. The converse is also true of lane 2.
+
+A2. We have the benefit of detecting cross-contamination with the same advantages as example 4, but with the cost advantage of sequencing two batches at the same time.
+
+---
+
+# Summary
+
+* Barcodes are often reused across batches if there is a fixed and limited number of them
+
+* Intelligent plating strategies reduce sequencing errors
+
+* Cross contamination can be detected by examining false positives across plates
+
diff --git a/topics/transcriptomics/tutorials/scrna-preprocessing/tutorial.md b/topics/transcriptomics/tutorials/scrna-preprocessing/tutorial.md
index d746314a460f69..d44d02c3ec99e6 100644
--- a/topics/transcriptomics/tutorials/scrna-preprocessing/tutorial.md
+++ b/topics/transcriptomics/tutorials/scrna-preprocessing/tutorial.md
@@ -148,15 +148,11 @@ The size of scRNA files (.fastq) are typically in the gigabyte range and are som
{: .hands_on}
-
-
## Barcode Extraction
> ### {% icon comment %} Note
>
-> Before performing the barcode extraction process, it is recommended that you familiarise yourself with the concepts of designing cell barcodes as given by the [*Plates, Batches, and Barcodes*]({% link topics/transcriptomics/tutorials/scrna-plates-batches-barcodes/slides.html %}) slides 1-26, as well as the [*Understanding Barcodes*]({% link topics/transcriptomics/tutorials/scrna-umis/tutorial.md %}) hands-on material for an introduction into transcript barcodes.
+> Before performing the barcode extraction process, it is recommended that you familiarise yourself with the concepts of designing cell barcodes as given by the [*Plates, Batches, and Barcodes*]({% link topics/transcriptomics/tutorials/scrna-plates-batches-barcodes/slides.html %}), as well as the [*Understanding Barcodes*]({% link topics/transcriptomics/tutorials/scrna-umis/tutorial.md %}) hands-on material for an introduction into transcript barcodes.
>
{: .comment}
@@ -636,7 +632,7 @@ Once the merge is complete, we can now peek at our full combined matrix by once
In the new combined matrix we see that we have 1536 cells, but this number is greatly overestimated. This is because *not all batches use the same barcodes*, and yet we applied the full set of 192 barcodes against our FASTQ data during the [*Barcode Extraction*](#barcode-extraction) stage previously.
-The reason we do this is to test for cross-contamination between batches, the details of which are better explained in the [*Plates, Batches, and Barcodes*]({% link topics/transcriptomics/tutorials/scrna-plates-batches-barcodes/slides.html %}) slides 26-52.
+The reason we do this is to test for cross-contamination between batches, the details of which are better explained in the [*accompanying slides*]({% link topics/transcriptomics/tutorials/scrna-preprocessing/slides.html %}).
## Guarding against Cross-Contamination
diff --git a/topics/variant-analysis/images/mt_bam_merging.png b/topics/variant-analysis/images/mt_bam_merging.png
index c6060253df7795..e52192635ddfad 100644
Binary files a/topics/variant-analysis/images/mt_bam_merging.png and b/topics/variant-analysis/images/mt_bam_merging.png differ
diff --git a/topics/variant-analysis/images/mt_bwa_mem.png b/topics/variant-analysis/images/mt_bwa_mem.png
index 7cdd037e9ff1cd..2d91b7e9f6fd6c 100644
Binary files a/topics/variant-analysis/images/mt_bwa_mem.png and b/topics/variant-analysis/images/mt_bwa_mem.png differ
diff --git a/topics/variant-analysis/images/mt_cut.png b/topics/variant-analysis/images/mt_cut.png
index b2dcd1eaa7cb02..d0b25a7516ceb1 100644
Binary files a/topics/variant-analysis/images/mt_cut.png and b/topics/variant-analysis/images/mt_cut.png differ
diff --git a/topics/variant-analysis/images/mt_dedup.png b/topics/variant-analysis/images/mt_dedup.png
index c712b8c2af15b4..96adddf093b085 100644
Binary files a/topics/variant-analysis/images/mt_dedup.png and b/topics/variant-analysis/images/mt_dedup.png differ
diff --git a/topics/variant-analysis/images/mt_filtering.png b/topics/variant-analysis/images/mt_filtering.png
index 14881e1aa55e82..07120975257abc 100644
Binary files a/topics/variant-analysis/images/mt_filtering.png and b/topics/variant-analysis/images/mt_filtering.png differ
diff --git a/topics/variant-analysis/images/mt_freebayes_allelic_scope.png b/topics/variant-analysis/images/mt_freebayes_allelic_scope.png
index e138639ea02a07..22d17559967c0a 100644
Binary files a/topics/variant-analysis/images/mt_freebayes_allelic_scope.png and b/topics/variant-analysis/images/mt_freebayes_allelic_scope.png differ
diff --git a/topics/variant-analysis/images/mt_freebayes_alloptions.png b/topics/variant-analysis/images/mt_freebayes_alloptions.png
index ac7c146dc5163e..b5bc6fc7dad0bd 100644
Binary files a/topics/variant-analysis/images/mt_freebayes_alloptions.png and b/topics/variant-analysis/images/mt_freebayes_alloptions.png differ
diff --git a/topics/variant-analysis/images/mt_freebayes_genome.png b/topics/variant-analysis/images/mt_freebayes_genome.png
index 7ef9d1f5b020f9..43599f568899aa 100644
Binary files a/topics/variant-analysis/images/mt_freebayes_genome.png and b/topics/variant-analysis/images/mt_freebayes_genome.png differ
diff --git a/topics/variant-analysis/images/mt_freebayes_inputfilters.png b/topics/variant-analysis/images/mt_freebayes_inputfilters.png
index 2fcabf43b91510..6328c5c2df712c 100644
Binary files a/topics/variant-analysis/images/mt_freebayes_inputfilters.png and b/topics/variant-analysis/images/mt_freebayes_inputfilters.png differ
diff --git a/topics/variant-analysis/images/mt_freebayes_popmodel.png b/topics/variant-analysis/images/mt_freebayes_popmodel.png
index ad89ffa63322a1..f24854c0a0a86a 100644
Binary files a/topics/variant-analysis/images/mt_freebayes_popmodel.png and b/topics/variant-analysis/images/mt_freebayes_popmodel.png differ
diff --git a/topics/variant-analysis/images/mt_freebayes_regions.png b/topics/variant-analysis/images/mt_freebayes_regions.png
index 695b6efa9564a1..9fd85812538e93 100644
Binary files a/topics/variant-analysis/images/mt_freebayes_regions.png and b/topics/variant-analysis/images/mt_freebayes_regions.png differ
diff --git a/topics/variant-analysis/images/mt_imported_data.png b/topics/variant-analysis/images/mt_imported_data.png
index ed31bbb912ec3d..a6608d6656be95 100644
Binary files a/topics/variant-analysis/images/mt_imported_data.png and b/topics/variant-analysis/images/mt_imported_data.png differ
diff --git a/topics/variant-analysis/images/mt_left_align.png b/topics/variant-analysis/images/mt_left_align.png
index 9548cc378ffbf5..5a2895c708580a 100644
Binary files a/topics/variant-analysis/images/mt_left_align.png and b/topics/variant-analysis/images/mt_left_align.png differ
diff --git a/topics/variant-analysis/images/mt_lib.png b/topics/variant-analysis/images/mt_lib.png
index 93aaa30699e084..057f084430ea49 100644
Binary files a/topics/variant-analysis/images/mt_lib.png and b/topics/variant-analysis/images/mt_lib.png differ
diff --git a/topics/variant-analysis/images/mt_qc.png b/topics/variant-analysis/images/mt_qc.png
index 16ad10ba009eb0..46a1143437c9cd 100644
Binary files a/topics/variant-analysis/images/mt_qc.png and b/topics/variant-analysis/images/mt_qc.png differ
diff --git a/topics/variant-analysis/images/mt_tab.png b/topics/variant-analysis/images/mt_tab.png
index dce524c5670851..3fab14b5c08341 100644
Binary files a/topics/variant-analysis/images/mt_tab.png and b/topics/variant-analysis/images/mt_tab.png differ
diff --git a/topics/variant-analysis/images/mt_vcfToTab.png b/topics/variant-analysis/images/mt_vcfToTab.png
index 982ad347d469f2..459b3f9e68719c 100644
Binary files a/topics/variant-analysis/images/mt_vcfToTab.png and b/topics/variant-analysis/images/mt_vcfToTab.png differ
diff --git a/topics/variant-analysis/images/mt_vcf_dataset_collapsed.png b/topics/variant-analysis/images/mt_vcf_dataset_collapsed.png
index f886ebb0d69350..10920617f54aad 100644
Binary files a/topics/variant-analysis/images/mt_vcf_dataset_collapsed.png and b/topics/variant-analysis/images/mt_vcf_dataset_collapsed.png differ
diff --git a/topics/variant-analysis/images/mt_vcf_dataset_expanded.png b/topics/variant-analysis/images/mt_vcf_dataset_expanded.png
index a1fc00d42a79fc..aa84f0f3a1e04b 100644
Binary files a/topics/variant-analysis/images/mt_vcf_dataset_expanded.png and b/topics/variant-analysis/images/mt_vcf_dataset_expanded.png differ
diff --git a/topics/variant-analysis/images/mt_vcffilter.png b/topics/variant-analysis/images/mt_vcffilter.png
index d535eb0b18ac86..3feb1362d0c3d9 100644
Binary files a/topics/variant-analysis/images/mt_vcffilter.png and b/topics/variant-analysis/images/mt_vcffilter.png differ
diff --git a/topics/variant-analysis/images/mt_vcfiobio.png b/topics/variant-analysis/images/mt_vcfiobio.png
index 4cc4a697c5ad75..88a46728ec303c 100644
Binary files a/topics/variant-analysis/images/mt_vcfiobio.png and b/topics/variant-analysis/images/mt_vcfiobio.png differ
diff --git a/topics/variant-analysis/tutorials/non-dip/tutorial.md b/topics/variant-analysis/tutorials/non-dip/tutorial.md
index 41191a230c0204..e64fc354d3f767 100644
--- a/topics/variant-analysis/tutorials/non-dip/tutorial.md
+++ b/topics/variant-analysis/tutorials/non-dip/tutorial.md
@@ -17,39 +17,63 @@ key_points:
- "FreeBayes can be effectively used to call variants in haploid systems."
contributors:
- nekrut
+ - astrovsky01
---
# Introduction
-The majority of life on Earth is non-diploid and represented by prokaryotes, viruses and their derivatives such as our own mitochondria or plant's chloroplasts. In non-diploid systems allele frequencies can range anywhere between 0 and 100% and there could be multiple (not just two) alleles per locus. The main challenge associated with non-diploid variant calling is the difficulty in distinguishing between sequencing noise (abundant in all NGS platforms) and true low frequency variants. Some of the early attempts to do this well have been accomplished on human mitochondrial DNA although the same approaches will work equally good on viral and bacterial genomes:
+The majority of life on Earth is non-diploid and represented by prokaryotes, viruses, and their derivatives, such as our own mitochondria or plant's chloroplasts. In non-diploid systems, allele frequencies can range anywhere between 0 and 100% and there could be multiple (not just two) alleles per locus. The main challenge associated with non-diploid variant calling is the difficulty in distinguishing between the sequencing noise (abundant in all NGS platforms) and true low frequency variants. Some of the early attempts to do this well have been accomplished on human mitochondrial DNA although the same approaches will work equally good on viral and bacterial genomes:
* 2014 - [Maternal age effect and severe germ-line bottleneck in the inheritance of human mitochondrial DNA](http://www.pnas.org/content/111/43/15474.abstract)
* 2015 - [Extensive tissue-related and allele-related mtDNA heteroplasmy suggests positive selection for somatic mutations](http://www.pnas.org/content/112/8/2491.abstract).
-As an example of non-diploid system we will be using human mitochondrial genome as an example. However, this approach will also work for most bacterial and viral genomes as well.
+As an example of non-diploid systems, we will be using human mitochondrial genome. However, this approach will also work for most bacterial and viral genomes.
There are two ways one can call variants:
1. By comparing reads against an existing genome assembly
-2. By assembling genome first and then mapping against that assembly
+2. By assembling a genome first and then mapping against that assembly
| |
|--------------------------|
| ![2 approaches](../../images/ref_vs_assembly.jpg) |
| This figure from a manuscript by [Olson:2015](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4493402/) contrasts the two approaches. |
-In this tutorials we will take the *first* path is which we map reads against an existing assembly. Later in the course (after we learn about assembly approaches) we will try the second approach as well.
+In this tutorials we will take the *first* path, in which we map reads against an existing assembly. Later in the course (after we learn about assembly approaches) we will try the second approach as well.
-The goal of this example is to detect heteroplasmies (variants within mitochondrial DNA). Mitochondria is transmitted maternally and heteroplasmy frequencies may change dramatically and unpredictably during the transmission, due to a germ-line bottleneck [Cree:2008](https://www.nature.com/ng/journal/v40/n2/abs/ng.2007.63.html). As we mentioned above the procedure for finding variants in bacterial or viral genomes will be essentially the same.
+The goal of this example is to detect heteroplasmies (variants within mitochondrial DNA). Mitochondria are transmitted maternally, and heteroplasmy frequencies may change dramatically and unpredictably during the transmission due to a germ-line bottleneck [Cree:2008](https://www.nature.com/ng/journal/v40/n2/abs/ng.2007.63.html). As we mentioned above, the procedure for finding variants in bacterial or viral genomes will be essentially the same.
-Zenodo contains [datasets representing a child and a mother](https://doi.org/10.5281/zenodo.1251112). These datasets are obtained by paired-end Illumina sequencing of human genomic DNA enriched for mitochondria. The enrichment was performed using long-range PCR with two primer pairs that amplify the entire mitochondrial genome. This means that these samples still contain a lot of DNA from the nuclear genome, which, in this case, is a contaminant.
+Zenodo contains [datasets representing a child and a mother](https://doi.org/10.5281/zenodo.1251112). These datasets were obtained by paired-end Illumina sequencing of human genomic DNA enriched for mitochondria. The enrichment was performed using long-range PCR with two primer pairs that amplify the entire mitochondrial genome. Samples will therefore still contain a lot of DNA from the nuclear genome, which, in this case, is a contaminant.
# Importing example datasets
-For this tutorial we have prepared a subset of data previously [published](http://www.pnas.org/content/111/43/15474.abstract) by our group. Let's import these data into Galaxy.
+For this tutorial we have prepared a subset of data previously [published](http://www.pnas.org/content/111/43/15474.abstract) by our group. Let's import these data into Galaxy. They are available from [this Galaxy Library](https://usegalaxy.org/library/list#folders/Fe4842bd0c37b03a7) or via [Zenodo](https://zenodo.org/record/582600)
-> ### Data upload from a Galaxy Library
+> ### {% icon hands_on %} Hands-on: Getting the data
+
+> ### Option 1: Data upload from a Galaxy Library
+>
+>
+>
+> * Create and name a new history for this tutorial.
+>
+> {% include snippets/create_new_history.md %}
+>
+> * Import from [Zenodo](https://zenodo.org/record/1251112) or from the data library the files:
+>
+> ```
+> https://zenodo.org/record/1251112/files/raw_child-ds-1.fq
+> https://zenodo.org/record/1251112/files/raw_child-ds-2.fq
+> https://zenodo.org/record/1251112/files/raw_mother-ds-1.fq
+> https://zenodo.org/record/1251112/files/raw_mother-ds-2.fq
+> ```
+>
+{: .hands_on}
+
+> ### Option 2: Data upload from a Galaxy Library
+>
+>
>
> ![Data upload from a Galaxy Library](../../images/mt_lib.png)
>
@@ -58,13 +82,15 @@ For this tutorial we have prepared a subset of data previously [published](http:
> * Click **to History** button.
> * Galaxy will prompt you to ask whether you want to import these data into already existing or new history.
> * It is better to create a new history, so type some descriptive name within `or create new` text field
-> * click **Import**
+> * Click **Import**
> * A green message will appear once the import is done. Click on it and will see the history you have just created. It will be populated with the four datasets as shown below:
>
> ![Imported data in history](../../images/mt_imported_data.png)
>
{: .hands_on}
+
+
# QC'ing the data
Before proceeding with the analysis, we need to find out how good the data actually is. For this will use `FastQC` tool that can be found in **NGS: QC and manipulation → FastQC** section of Galaxy tools:
@@ -118,7 +144,7 @@ Preparation of sequencing libraries (at least at the time of writing) for techno
Duplicates can be identified based on their outer alignment coordinates or using sequence-based clustering. One of the common ways for identification of duplicate reads is the `MarkDuplicates` utility from [Picard](https://broadinstitute.github.io/picard/command-line-overview.html) package. It is designed to identify both PCR and optical duplicates (the following is an excerpt from Picard documentation):
-*Duplicates are identified as read pairs having identical 5' positions (coordinate and strand) for both reads in a mate pair (and optionally, matching unique molecular identifier reads; see BARCODE_TAG option). Optical, or more broadly Sequencing, duplicates are duplicates that appear clustered together spatially during sequencing and can arise from optical/imagine-processing artifacts or from bio-chemical processes during clonal amplification and sequencing; they are identified using the READ_NAME_REGEX and the OPTICAL_DUPLICATE_PIXEL_DISTANCE options. The tool's main output is a new SAM or BAM file in which duplicates have been identified in the SAM flags field, or optionally removed (see REMOVE_DUPLICATE and REMOVE_SEQUENCING_DUPLICATES), and optionally marked with a duplicate type in the 'DT' optional attribute. In addition, it also outputs a metrics file containing the numbers of READ_PAIRS_EXAMINED, UNMAPPED_READS, UNPAIRED_READS, UNPAIRED_READ DUPLICATES, READ_PAIR_DUPLICATES, and READ_PAIR_OPTICAL_DUPLICATES. Usage example: java -jar picard.jar MarkDuplicates I=input.bam \ O=marked_duplicates.bam M=marked_dup_metrics.txt.*
+*Duplicates are identified as read pairs having identical 5' positions (coordinate and strand) for both reads in a mate pair (and optionally, matching unique molecular identifier reads; see BARCODE_TAG option). Optical, or more broadly Sequencing, duplicates are duplicates that appear clustered together spatially during sequencing and can arise from optical/imagine-processing artifacts or from bio-chemical processes during clonal amplification and sequencing; they are identified using the READ_NAME_REGEX and the OPTICAL_DUPLICATE_PIXEL_DISTANCE options. The tool's main output is a new SAM or BAM file in which duplicates have been identified in the SAM flags field, or optionally removed (see REMOVE_DUPLICATE and REMOVE_SEQUENCING_DUPLICATES), and optionally marked with a duplicate type in the 'DT' optional attribute. In addition, it also outputs a metrics file containing the numbers of READ_PAIRS_EXAMINED, SECONDARY_OR_SUPPLEMENTARY_RDS, UNMAPPED_READS, UNPAIRED_READS, UNPAIRED_READ DUPLICATES, READ_PAIR_DUPLICATES, and READ_PAIR_OPTICAL_DUPLICATES. Usage example: java -jar picard.jar MarkDuplicates I=input.bam \ O=marked_duplicates.bam M=marked_dup_metrics.txt.*
Let's use **NGS: Picard** → **MarkDuplicates** tool:
@@ -132,8 +158,8 @@ Let's use **NGS: Picard** → **MarkDuplicates** tool:
**MarkDuplicates** produces a BAM dataset with duplicates removed and also a metrics file. Let's take a look at the metrics data:
```
-raw_child-ds- 55 27551 55 50 1658 0 0.061026 219628
-raw_mother-ds- 96 54972 96 90 4712 0 0.086459 302063
+raw_child-ds- 55 27551 849 55 50 1658 1 0.061026 219750
+raw_mother-ds- 95 54973 1951 95 89 4712 2 0.08645 302188
```
where columns are:
@@ -141,6 +167,7 @@ where columns are:
- LIBRARY (read group in our case)
- UNPAIRED_READS_EXAMINED
- READ_PAIRS_EXAMINED-
+- SECONDARY_OR_SUPPLEMENTARY_RDS
- UNMAPPED_READS
- UNPAIRED_READ_DUPLICATES
- READ_PAIR_DUPLICATES
@@ -196,7 +223,7 @@ Remember that we are trying to call variants in mitochondrial genome. Let focus
>
>Filtering reads. There are several important point to note here:
>
->- **mapQuality** is set to ⋝ 20 Mapping quality reflects the probability that the read is placed *incorrectly*. It uses [phred scale](https://en.wikipedia.org/wiki/Phred_quality_score). Thus 20 is 1/100 or 1% chance that the read is incorrectly mapped. By setting this parameter to ⋝ 20 we will keep all reads that have 1% or less probability of being mapped incorrectly.
+>- **mapQuality** is set to ≥20. Mapping quality reflects the probability that the read is placed *incorrectly*. It uses [phred scale](https://en.wikipedia.org/wiki/Phred_quality_score). Thus 20 is 1/100 or 1% chance that the read is incorrectly mapped. By setting this parameter to ≥20, we will keep all reads that have 1% or less probability of being mapped incorrectly.
>- *isPaired* will eliminate singleton (unpaired) reads (make sure **Yes** is clicked on)
>- *isProperPair* will only keep reads that map to the same chromosome and are properly placed (again, make sure **Yes** is clicked)
>- *reference* is set to *chrM*
@@ -204,7 +231,7 @@ Remember that we are trying to call variants in mitochondrial genome. Let focus
# Calling non-diploid variants with FreeBayes
-FreeBayes is widely used for calling variants in diploid systems. However, it can also be used for calling variants in pooled samples where the number of samples is not known. This is the exact scenario we have here: in our sample we have multiple mitochondrial (or bacterial or viral) genomes but we do not know exactly how many. Thus we will use the `--pooled-continuous` option of FreeBayes to generate *frequency-based* variant calls as well as some other options highlighted below (the tool is in **NGS: Variant Analysis** → **FreeBayes**):
+FreeBayes is widely used for calling variants in diploid systems. However, it can also be used for calling variants in pooled samples where the number of samples is not known. This is the exact scenario we have here: in our sample we have multiple mitochondrial (or bacterial or viral) genomes, but we do not know exactly how many. Thus we will use the `--pooled-continuous` option of FreeBayes to generate *frequency-based* variant calls as well as some other options highlighted below (the tool is in **NGS: Variant Analysis** → **FreeBayes**):
> ### Running `FreeBayes`
>
@@ -245,24 +272,49 @@ FreeBayes is widely used for calling variants in diploid systems. However, it ca
>* **Exclude alleles from analysis if their supporting base quality less than** to `30` (phred score of 30). This will make FreeBayes to only consider high quality bases.
{: .hands_on}
-This will produce a [VCF dataset](https://samtools.github.io/hts-specs/VCFv4.2.pdf) shows below (you may need to scroll sideways to see it in full). It lists 30 sites of interest (everything starting with a `#` is a comment):
+This will produce a [VCF dataset](https://samtools.github.io/hts-specs/VCFv4.2.pdf) shows below (you may need to scroll sideways to see it in full). It lists 25 sites of interest (everything starting with a `#` is a comment):
```
Chrom Pos ID Ref Alt Qual Filter Info Format data
-##fileformat=VCFv4.1
-##fileDate=20161108
-##source=freeBayes v0.9.20
+##fileformat=VCFv4.2
+##fileDate=20200211
+##source=freeBayes v1.1.0-46-g8d2b3a0-dirty
##reference=/galaxy/data/hg38/sam_index/hg38.fa
+##contig=
+##contig=
+##contig=
+##contig=
+##contig=
+##contig=
+##contig=
+##contig=
+##contig=
+##contig=
+##contig=
+##contig=
+##contig=
+##contig=
+##contig=
+##contig=
+##contig=
+##contig=
+##contig=
+##contig=
+##contig=
+##contig=
+##contig=
+##contig=
+##contig=
##phasing=none
-##commandline="freebayes --bam localbam_0.bam --fasta-reference /galaxy/data/hg38/sam_index/hg38.fa --vcf /galaxy-repl/main/files/017/782/dataset_17782376.dat --region chrM:1..16000"
+##commandline="freebayes --region chrM:1..16000 --bam b_0.bam --fasta-reference /galaxy/data/hg38/sam_index/hg38.fa --vcf ./vcf_output/part_chrM:1..16000.vcf --theta 0.001 --ploidy 1 -J -K -X -u -n 0 --haplotype-length 3 --min-repeat-size 5 --min-repeat-entropy 0 -m 20 -q 30 -R 0 -Y 0 -e 1000 -F 0.2 -C 2 -G 1 --min-coverage 0 --min-alternate-qsum 0"
##INFO=
##INFO=
##INFO=
##INFO=
##INFO=
##INFO=
-##INFO=
-##INFO=
+##INFO=
+##INFO=
##INFO=
##INFO=
##INFO=
@@ -287,64 +339,63 @@ Chrom Pos ID Ref Alt Qual Filter Info Format data
##INFO=
##INFO=
##INFO=
-##INFO=
+##INFO=
##INFO=
-##INFO=
+##INFO=
##INFO=
##INFO=
##INFO=
##INFO=
##INFO=
##INFO=
-##INFO=
+##INFO=
+##INFO=
+##INFO=
##FORMAT=
##FORMAT=
##FORMAT=
##FORMAT=
+##FORMAT=
##FORMAT=
##FORMAT=
##FORMAT=
##FORMAT=
+##FORMAT=
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT raw_child-ds- raw_mother-ds-
-chrM 73 . A G 33368.5 . AB=0;ABP=0;AC=4;AF=1;AN=4;AO=1095;CIGAR=1X;DP=1098;DPB=1098;DPRA=0;EPP=107.005;EPPR=5.18177;GTI=0;LEN=1;MEANALT=1.5;MQM=55.7744;MQMR=60;NS=2;NUMALT=1;ODDS=509.945;PAIRED=1;PAIREDR=1;PAO=0;PQA=0;PQR=0;PRO=0;QA=37602;QR=37;RO=1;RPL=359;RPP=284.863;RPPR=5.18177;RPR=736;RUN=1;SAF=507;SAP=16.0213;SAR=588;SRF=0;SRP=5.18177;SRR=1;TYPE=snp;technology.ILLUMINA=1 GT:DP:RO:QR:AO:QA:GL 1/1:273:0:0:273:9187:-827.167,-82.1812,0 1/1:825:1:37:822:28415:-2554.14,-241.429,0
-chrM 263 . A G 13774.9 . AB=0;ABP=0;AC=4;AF=1;AN=4;AO=508;CIGAR=1X;DP=524;DPB=524;DPRA=0;EPP=39.1901;EPPR=0;GTI=0;LEN=1;MEANALT=2.5;MQM=60;MQMR=0;NS=2;NUMALT=1;ODDS=255.818;PAIRED=1;PAIREDR=0;PAO=0;PQA=0;PQR=0;PRO=0;QA=15693;QR=0;RO=0;RPL=373;RPP=245.138;RPPR=0;RPR=135;RUN=1;SAF=219;SAP=23.9556;SAR=289;SRF=0;SRP=0;SRR=0;TYPE=snp;technology.ILLUMINA=1 GT:DP:RO:QR:AO:QA:GL 1/1:154:0:0:150:4661:-419.661,-45.1545,0 1/1:370:0:0:358:11032:-993.047,-107.769,0
-chrM 309 . CT CCTC,CC 4399.1 . AB=0.56535,0.285714;ABP=15.2141,134.229;AC=2,2;AF=0.5,0.5;AN=4;AO=186,94;CIGAR=1M2I1X,1M1X;DP=329;DPB=555.5;DPRA=0,0;EPP=23.6043,97.6311;EPPR=31.9633;GTI=0;LEN=3,1;MEANALT=6,6;MQM=60,59.8085;MQMR=60;NS=2;NUMALT=2;ODDS=89.3381;PAIRED=1,1;PAIREDR=1;PAO=13.3333,13.3333;PQA=339,339;PQR=290;PRO=11.3333;QA=4084,2577;QR=686;RO=30;RPL=114,78;RPP=23.6043,91.8097;RPPR=38.0434;RPR=72,16;RUN=1,1;SAF=0,63;SAP=406.904,26.6655;SAR=186,31;SRF=21;SRP=13.4334;SRR=9;TYPE=complex,snp;technology.ILLUMINA=1,1 GT:DP:RO:QR:AO:QA:GL 1/2:93:6:123:59,23:1308,638:-159.543,-53.5005,-52.9104,-105.161,0,-113.176 1/2:236:24:563:127,71:2776,1939:-368.987,-136.961,-169.835,-200.763,0,-245.13
-chrM 513 . GCACACACACAC GCACACACACACAC 3522.72 . AB=0.647399;ABP=35.6577;AC=3;AF=0.75;AN=4;AO=156;CIGAR=1M2I11M;DP=231;DPB=321.083;DPRA=0;EPP=75.17;EPPR=5.48477;GTI=1;LEN=2;MEANALT=13.5;MQM=60;MQMR=60;NS=2;NUMALT=1;ODDS=3.87694;PAIRED=1;PAIREDR=1;PAO=46.5;PQA=1383.5;PQR=1383.5;PRO=46.5;QA=4585;QR=1403;RO=43;RPL=39;RPP=87.6977;RPPR=3.0608;RPR=117;RUN=1;SAF=111;SAP=63.6445;SAR=45;SRF=26;SRP=7.10075;SRR=17;TYPE=ins;technology.ILLUMINA=1 GT:DP:RO:QR:AO:QA:GL 1/1:58:7:225:44:1251:-105.403,0,-13.0389 0/1:173:36:1178:112:3334:-290.141,0,-96.0932
-chrM 750 . A G 51447.4 . AB=0;ABP=0;AC=4;AF=1;AN=4;AO=1722;CIGAR=1X;DP=1736;DPB=1736;DPRA=0;EPP=3.03048;EPPR=20.3821;GTI=0;LEN=1;MEANALT=3;MQM=59.8868;MQMR=60;NS=2;NUMALT=1;ODDS=753.623;PAIRED=1;PAIREDR=1;PAO=0;PQA=0;PQR=0;PRO=0;QA=57871;QR=122;RO=8;RPL=720;RPP=103.291;RPPR=12.7819;RPR=1002;RUN=1;SAF=1151;SAP=427.217;SAR=571;SRF=1;SRP=12.7819;SRR=7;TYPE=snp;technology.ILLUMINA=1 GT:DP:RO:QR:AO:QA:GL 1/1:436:4:53:429:13615:-1220.76,-116.422,0 1/1:1300:4:69:1293:44256:-3977.02,-373.134,0
-chrM 1438 . A G 79172.1 . AB=0;ABP=0;AC=4;AF=1;AN=4;AO=2474;CIGAR=1X;DP=2480;DPB=2480;DPRA=0;EPP=7.56039;EPPR=3.44459;GTI=0;LEN=1;MEANALT=1.5;MQM=59.8319;MQMR=58;NS=2;NUMALT=1;ODDS=1085.01;PAIRED=1;PAIREDR=1;PAO=0;PQA=0;PQR=0;PRO=0;QA=88621;QR=102;RO=5;RPL=1546;RPP=338.232;RPPR=3.44459;RPR=928;RUN=1;SAF=1055;SAP=119.304;SAR=1419;SRF=3;SRP=3.44459;SRR=2;TYPE=snp;technology.ILLUMINA=1 GT:DP:RO:QR:AO:QA:GL 1/1:551:1:16:550:19482:-1752.13,-161.526,0 1/1:1929:4:86:1924:69139:-6214.93,-560.827,0
-chrM 2487 . A C 4432.57 . AB=0.278634;ABP=775.959;AC=1;AF=0.25;AN=4;AO=621;CIGAR=1X;DP=2340;DPB=2340;DPRA=0;EPP=15.1824;EPPR=99.2128;GTI=1;LEN=1;MEANALT=2.5;MQM=59.3285;MQMR=59.9115;NS=2;NUMALT=1;ODDS=63.7254;PAIRED=1;PAIREDR=1;PAO=0;PQA=0;PQR=0;PRO=0;QA=9402;QR=56614;RO=1707;RPL=281;RPP=15.1824;RPPR=127.637;RPR=340;RUN=1;SAF=0;SAP=1351.49;SAR=621;SRF=1352;SRP=1267.49;SRR=355;TYPE=snp;technology.ILLUMINA=1 GT:DP:RO:QR:AO:QA:GL 0/0:524:405:13274:115:1754:-119.206,0,-1156.18 0/1:1816:1302:43340:506:7648:-607.816,0,-3820.28
-chrM 2706 . A G 49482.2 . AB=0;ABP=0;AC=4;AF=1;AN=4;AO=1889;CIGAR=1X;DP=1969;DPB=1969;DPRA=0;EPP=11.3157;EPPR=6.95112;GTI=0;LEN=1;MEANALT=2.5;MQM=59.9645;MQMR=59.5926;NS=2;NUMALT=1;ODDS=759.158;PAIRED=1;PAIREDR=1;PAO=0;PQA=0;PQR=0;PRO=0;QA=55923;QR=722;RO=27;RPL=810;RPP=86.1918;RPPR=5.02092;RPR=1079;RUN=1;SAF=802;SAP=96.3813;SAR=1087;SRF=18;SRP=9.52472;SRR=9;TYPE=snp;technology.ILLUMINA=1 GT:DP:RO:QR:AO:QA:GL 1/1:408:5:99:391:11529:-1028.82,-99.3894,0 1/1:1561:22:623:1498:44394:-3939.48,-352.569,0
-chrM 3197 . T C 135699 . AB=0;ABP=0;AC=4;AF=1;AN=4;AO=4208;CIGAR=1X;DP=4241;DPB=4241;DPRA=0;EPP=168.325;EPPR=3.13803;GTI=0;LEN=1;MEANALT=3;MQM=59.9943;MQMR=60;NS=2;NUMALT=1;ODDS=2145.76;PAIRED=1;PAIREDR=1;PAO=0;PQA=0;PQR=0;PRO=0;QA=152221;QR=558;RO=17;RPL=2378;RPP=157.977;RPPR=6.20364;RPR=1830;RUN=1;SAF=1641;SAP=445.497;SAR=2567;SRF=8;SRP=3.13803;SRR=9;TYPE=snp;technology.ILLUMINA=1 GT:DP:RO:QR:AO:QA:GL 1/1:1499:6:179:1486:53236:-4775.26,-416.785,0 1/1:2742:11:379:2722:98985:-8874.65,-758.304,0
-chrM 3243 . A G 46067 . AB=0.612338;ABP=290.859;AC=2;AF=0.5;AN=4;AO=1608;CIGAR=1X;DP=2626;DPB=2626;DPRA=0;EPP=31.0126;EPPR=64.3549;GTI=0;LEN=1;MEANALT=2;MQM=59.9627;MQMR=59.815;NS=2;NUMALT=1;ODDS=1288.98;PAIRED=1;PAIREDR=1;PAO=0;PQA=0;PQR=0;PRO=0;QA=53165;QR=35336;RO=1011;RPL=974;RPP=159.119;RPPR=763.402;RPR=634;RUN=1;SAF=558;SAP=329.898;SAR=1050;SRF=383;SRP=131.935;SRR=628;TYPE=snp;technology.ILLUMINA=1 GT:DP:RO:QR:AO:QA:GL 0/1:1068:221:7574:841:27395:-2380.4,0,-596.524 0/1:1558:790:27762:767:25770:-2317.69,0,-2496.98
-chrM 3483 . G C 685.467 . AB=0.254386;ABP=182.214;AC=1;AF=0.25;AN=4;AO=127;CIGAR=1X;DP=550;DPB=550;DPRA=0;EPP=37.6342;EPPR=22.2028;GTI=1;LEN=1;MEANALT=1.5;MQM=59.4646;MQMR=59.8504;NS=2;NUMALT=1;ODDS=25.0865;PAIRED=1;PAIREDR=1;PAO=0;PQA=0;PQR=0;PRO=0;QA=2032;QR=13200;RO=421;RPL=87;RPP=40.7802;RPPR=245.89;RPR=40;RUN=1;SAF=1;SAP=270.17;SAR=126;SRF=321;SRP=254.927;SRR=100;TYPE=snp;technology.ILLUMINA=1 GT:DP:RO:QR:AO:QA:GL 0/0:208:166:5264:40:608:-35.5966,0,-454.8 0/1:342:255:7936:87:1424:-108.297,0,-694.524
-chrM 3488 . T A 682.097 . AB=0.264706;ABP=166.509;AC=1;AF=0.25;AN=4;AO=130;CIGAR=1X;DP=546;DPB=546;DPRA=0;EPP=44.7694;EPPR=34.7681;GTI=1;LEN=1;MEANALT=1;MQM=59.4231;MQMR=59.7139;NS=2;NUMALT=1;ODDS=17.5994;PAIRED=1;PAIREDR=1;PAO=0;PQA=0;PQR=0;PRO=0;QA=2069;QR=13578;RO=416;RPL=90;RPP=44.7694;RPPR=211.806;RPR=40;RUN=1;SAF=0;SAP=285.302;SAR=130;SRF=315;SRP=242.06;SRR=101;TYPE=snp;technology.ILLUMINA=1 GT:DP:RO:QR:AO:QA:GL 0/0:206:166:5535:40:650:-39.5324,0,-479.353 0/1:340:250:8043:90:1419:-109.544,0,-705.868
-chrM 3511 . A C 434.289 . AB=0.260394;ABP=230.901;AC=1;AF=0.25;AN=4;AO=185;CIGAR=1X;DP=752;DPB=752;DPRA=0;EPP=322.569;EPPR=29.1769;GTI=1;LEN=1;MEANALT=3;MQM=59.7838;MQMR=59.7348;NS=2;NUMALT=1;ODDS=57.1305;PAIRED=1;PAIREDR=1;PAO=0;PQA=0;PQR=0;PRO=0;QA=2698;QR=16673;RO=558;RPL=11;RPP=314.869;RPPR=115.475;RPR=174;RUN=1;SAF=1;SAP=396.094;SAR=184;SRF=292;SRP=5.64097;SRR=266;TYPE=snp;technology.ILLUMINA=1 GT:DP:RO:QR:AO:QA:GL 0/0:295:226:6735:66:923:-61.5611,0,-584.793 0/1:457:332:9938:119:1775:-135.644,0,-870.462
-chrM 4769 . A G 54711.1 . AB=0;ABP=0;AC=4;AF=1;AN=4;AO=1746;CIGAR=1X;DP=1752;DPB=1752;DPRA=0;EPP=85.7949;EPPR=5.18177;GTI=0;LEN=1;MEANALT=2.5;MQM=51.2801;MQMR=58;NS=2;NUMALT=1;ODDS=911.774;PAIRED=1;PAIREDR=1;PAO=0;PQA=0;PQR=0;PRO=0;QA=61628;QR=15;RO=1;RPL=549;RPP=525.238;RPPR=5.18177;RPR=1197;RUN=1;SAF=1003;SAP=87.0833;SAR=743;SRF=1;SRP=5.18177;SRR=0;TYPE=snp;technology.ILLUMINA=1 GT:DP:RO:QR:AO:QA:GL 1/1:604:1:15:601:20766:-1867.77,-177.095,0 1/1:1148:0:0:1145:40862:-3677.79,-344.679,0
-chrM 5539 . A G 11837 . AB=0.479167;ABP=6.26751;AC=2;AF=0.5;AN=4;AO=414;CIGAR=1X;DP=864;DPB=864;DPRA=0;EPP=192.358;EPPR=179.441;GTI=0;LEN=1;MEANALT=1.5;MQM=54.1957;MQMR=53.5924;NS=2;NUMALT=1;ODDS=622.768;PAIRED=1;PAIREDR=1;PAO=0;PQA=0;PQR=0;PRO=0;QA=14380;QR=15965;RO=449;RPL=85;RPP=315.283;RPPR=358.189;RPR=329;RUN=1;SAF=309;SAP=221.29;SAR=105;SRF=337;SRP=247.845;SRR=112;TYPE=snp;technology.ILLUMINA=1 GT:DP:RO:QR:AO:QA:GL 0/1:338:249:8721:89:3010:-252.807,0,-766.809 0/1:526:200:7244:325:11370:-1015.56,0,-644.23
-chrM 7028 . C T 76141.7 . AB=0;ABP=0;AC=4;AF=1;AN=4;AO=2473;CIGAR=1X;DP=2499;DPB=2499;DPRA=0;EPP=3.74876;EPPR=34.9902;GTI=0;LEN=1;MEANALT=2.5;MQM=55.905;MQMR=57.6364;NS=2;NUMALT=1;ODDS=1210.59;PAIRED=1;PAIREDR=1;PAO=0;PQA=0;PQR=0;PRO=0;QA=85103;QR=439;RO=22;RPL=1260;RPP=4.94996;RPPR=4.58955;RPR=1213;RUN=1;SAF=1102;SAP=66.5485;SAR=1371;SRF=9;SRP=4.58955;SRR=13;TYPE=snp;technology.ILLUMINA=1 GT:DP:RO:QR:AO:QA:GL 1/1:827:6:107:820:28371:-2543.94,-224.334,0 1/1:1672:16:332:1653:56732:-5076.14,-434.286,0
-chrM 7269 . G A 62196.6 . AB=0;ABP=0;AC=4;AF=1;AN=4;AO=1937;CIGAR=1X;DP=1947;DPB=1947;DPRA=0;EPP=54.8308;EPPR=5.18177;GTI=0;LEN=1;MEANALT=3;MQM=58.2685;MQMR=60;NS=2;NUMALT=1;ODDS=1033.49;PAIRED=1;PAIREDR=1;PAO=0;PQA=0;PQR=0;PRO=0;QA=69240;QR=16;RO=1;RPL=1011;RPP=11.1099;RPPR=5.18177;RPR=926;RUN=1;SAF=933;SAP=8.66151;SAR=1004;SRF=0;SRP=5.18177;SRR=1;TYPE=snp;technology.ILLUMINA=1 GT:DP:RO:QR:AO:QA:GL 1/1:704:1:16:698:24364:-2191.49,-206.139,0 1/1:1243:0:0:1239:44876:-4039.06,-372.976,0
-chrM 8557 . G C 2590.97 . AB=0.267066;ABP=790.051;AC=2;AF=0.5;AN=4;AO=446;CIGAR=1X;DP=1670;DPB=1670;DPRA=0;EPP=44.2196;EPPR=97.7883;GTI=0;LEN=1;MEANALT=3;MQM=57.6951;MQMR=59.5256;NS=2;NUMALT=1;ODDS=125.064;PAIRED=1;PAIREDR=1;PAO=0;PQA=0;PQR=0;PRO=0;QA=6303;QR=38747;RO=1212;RPL=177;RPP=44.2196;RPPR=385.426;RPR=269;RUN=1;SAF=2;SAP=954.193;SAR=444;SRF=906;SRP=648.002;SRR=306;TYPE=snp;technology.ILLUMINA=1 GT:DP:RO:QR:AO:QA:GL 0/1:724:538:17225:181:2490:-182.373,0,-1508.7 0/1:946:674:21522:265:3813:-301.57,0,-1895.55
-chrM 8860 . A G 55525 . AB=0;ABP=0;AC=4;AF=1;AN=4;AO=1846;CIGAR=1X;DP=1861;DPB=1861;DPRA=0;EPP=5.72052;EPPR=6.91895;GTI=0;LEN=1;MEANALT=3;MQM=47.1728;MQMR=58.6;NS=2;NUMALT=1;ODDS=1039.99;PAIRED=1;PAIREDR=1;PAO=0;PQA=0;PQR=0;PRO=0;QA=61929;QR=160;RO=5;RPL=984;RPP=20.5185;RPPR=6.91895;RPR=862;RUN=1;SAF=987;SAP=22.283;SAR=859;SRF=2;SRP=3.44459;SRR=3;TYPE=snp;technology.ILLUMINA=1 GT:DP:RO:QR:AO:QA:GL 1/1:729:0:0:726:24114:-2170.46,-218.548,0 1/1:1132:5:160:1120:37815:-3389.07,-311.012,0
-chrM 9477 . G A 34109.5 . AB=0;ABP=0;AC=4;AF=1;AN=4;AO=1099;CIGAR=1X;DP=1104;DPB=1104;DPRA=0;EPP=9.42988;EPPR=3.0103;GTI=0;LEN=1;MEANALT=2;MQM=59.3794;MQMR=60;NS=2;NUMALT=1;ODDS=565.855;PAIRED=1;PAIREDR=1;PAO=0;PQA=0;PQR=0;PRO=0;QA=38032;QR=67;RO=2;RPL=598;RPP=21.6012;RPPR=7.35324;RPR=501;RUN=1;SAF=542;SAP=3.45487;SAR=557;SRF=1;SRP=3.0103;SRR=1;TYPE=snp;technology.ILLUMINA=1 GT:DP:RO:QR:AO:QA:GL 1/1:401:2:67:398:13308:-1191.76,-109.337,0 1/1:703:0:0:701:24724:-2225.39,-211.022,0
-chrM 9548 . G A 26846.1 . AB=0;ABP=0;AC=4;AF=1;AN=4;AO=942;CIGAR=1X;DP=970;DPB=970;DPRA=0;EPP=3.04718;EPPR=3.73412;GTI=0;LEN=1;MEANALT=3;MQM=59.6921;MQMR=60;NS=2;NUMALT=1;ODDS=502.835;PAIRED=1;PAIREDR=1;PAO=0;PQA=0;PQR=0;PRO=0;QA=29956;QR=66;RO=3;RPL=524;RPP=28.9112;RPPR=3.73412;RPR=418;RUN=1;SAF=487;SAP=5.3708;SAR=455;SRF=3;SRP=9.52472;SRR=0;TYPE=snp;technology.ILLUMINA=1 GT:DP:RO:QR:AO:QA:GL 1/1:364:1:38:350:10822:-970.712,-99.6786,0 1/1:606:2:28:592:19134:-1719.73,-171.045,0
-chrM 11467 . A G 164822 . AB=0;ABP=0;AC=4;AF=1;AN=4;AO=5200;CIGAR=1X;DP=5225;DPB=5225;DPRA=0;EPP=350.339;EPPR=4.78696;GTI=0;LEN=1;MEANALT=2.5;MQM=59.9342;MQMR=53.6364;NS=2;NUMALT=1;ODDS=2859.91;PAIRED=1;PAIREDR=1;PAO=0;PQA=0;PQR=0;PRO=0;QA=187277;QR=283;RO=11;RPL=3887;RPP=2769.75;RPPR=3.20771;RPR=1313;RUN=1;SAF=2257;SAP=199.527;SAR=2943;SRF=6;SRP=3.20771;SRR=5;TYPE=snp;technology.ILLUMINA=1 GT:DP:RO:QR:AO:QA:GL 1/1:2016:2:46:2008:71984:-6474.61,-594.606,0 1/1:3209:9:237:3192:115293:-10355.2,-916.222,0
-chrM 11719 . G A 95624.7 . AB=0;ABP=0;AC=4;AF=1;AN=4;AO=3302;CIGAR=1X;DP=3356;DPB=3356;DPRA=0;EPP=179.466;EPPR=18.4661;GTI=0;LEN=1;MEANALT=2;MQM=59.5924;MQMR=58.2353;NS=2;NUMALT=1;ODDS=1506.69;PAIRED=1;PAIREDR=1;PAO=0;PQA=0;PQR=0;PRO=0;QA=106982;QR=483;RO=17;RPL=1766;RPP=37.7986;RPPR=6.20364;RPR=1536;RUN=1;SAF=1728;SAP=18.6065;SAR=1574;SRF=3;SRP=18.4661;SRR=14;TYPE=snp;technology.ILLUMINA=1 GT:DP:RO:QR:AO:QA:GL 1/1:911:4:122:891:28560:-2559.58,-247.982,0 1/1:2445:13:361:2411:78422:-7025.63,-662.959,0
-chrM 12308 . A G 67204.7 . AB=0;ABP=0;AC=4;AF=1;AN=4;AO=2144;CIGAR=1X;DP=2161;DPB=2161;DPRA=0;EPP=8.55647;EPPR=3.87889;GTI=0;LEN=1;MEANALT=2;MQM=59.9664;MQMR=59.7;NS=2;NUMALT=1;ODDS=949.477;PAIRED=1;PAIREDR=1;PAO=0;PQA=0;PQR=0;PRO=0;QA=75192;QR=257;RO=10;RPL=1284;RPP=185.09;RPPR=3.87889;RPR=860;RUN=1;SAF=1005;SAP=21.1964;SAR=1139;SRF=4;SRP=3.87889;SRR=6;TYPE=snp;technology.ILLUMINA=1 GT:DP:RO:QR:AO:QA:GL 1/1:635:7:216:628:21603:-1924.87,-155.503,0 1/1:1526:3:41:1516:53589:-4819.52,-444.815,0
-chrM 12372 . G A 62064 . AB=0;ABP=0;AC=4;AF=1;AN=4;AO=1984;CIGAR=1X;DP=1992;DPB=1992;DPRA=0;EPP=10.3697;EPPR=4.45795;GTI=0;LEN=1;MEANALT=2;MQM=59.9919;MQMR=60;NS=2;NUMALT=1;ODDS=933.299;PAIRED=1;PAIREDR=1;PAO=0;PQA=0;PQR=0;PRO=0;QA=69281;QR=192;RO=6;RPL=861;RPP=78.1406;RPPR=4.45795;RPR=1123;RUN=1;SAF=1010;SAP=4.42876;SAR=974;SRF=4;SRP=4.45795;SRR=2;TYPE=snp;technology.ILLUMINA=1 GT:DP:RO:QR:AO:QA:GL 1/1:634:5:155:628:21590:-1929.21,-164.556,0 1/1:1358:1:37:1356:47691:-4288.95,-401.925,0
-chrM 13617 . T C 28593.6 . AB=0;ABP=0;AC=4;AF=1;AN=4;AO=901;CIGAR=1X;DP=906;DPB=906;DPRA=0;EPP=251.346;EPPR=3.73412;GTI=0;LEN=1;MEANALT=2;MQM=59.9034;MQMR=60;NS=2;NUMALT=1;ODDS=462.343;PAIRED=1;PAIREDR=1;PAO=0;PQA=0;PQR=0;PRO=0;QA=32868;QR=92;RO=3;RPL=674;RPP=484.564;RPPR=9.52472;RPR=227;RUN=1;SAF=339;SAP=122.861;SAR=562;SRF=2;SRP=3.73412;SRR=1;TYPE=snp;technology.ILLUMINA=1 GT:DP:RO:QR:AO:QA:GL 1/1:303:1:14:301:10938:-983.358,-87.1961,0 1/1:603:2:78:600:21930:-1966.72,-168.809,0
-chrM 14766 . C T 60668.6 . AB=0;ABP=0;AC=4;AF=1;AN=4;AO=2022;CIGAR=1X;DP=2039;DPB=2039;DPRA=0;EPP=13.3243;EPPR=19.0002;GTI=0;LEN=1;MEANALT=2.5;MQM=59.9782;MQMR=60;NS=2;NUMALT=1;ODDS=954.02;PAIRED=1;PAIREDR=1;PAO=0;PQA=0;PQR=0;PRO=0;QA=67719;QR=140;RO=11;RPL=989;RPP=5.08941;RPPR=12.6832;RPR=1033;RUN=1;SAF=1199;SAP=154.837;SAR=823;SRF=1;SRP=19.0002;SRR=10;TYPE=snp;technology.ILLUMINA=1 GT:DP:RO:QR:AO:QA:GL 1/1:637:6:78:628:20191:-1810.35,-169.906,0 1/1:1402:5:62:1394:47528:-4272.15,-401.924,0
-chrM 14793 . A G 58080 . AB=0;ABP=0;AC=4;AF=1;AN=4;AO=1967;CIGAR=1X;DP=1998;DPB=1998;DPRA=0;EPP=8.57532;EPPR=5.80219;GTI=0;LEN=1;MEANALT=3;MQM=59.9736;MQMR=59.2857;NS=2;NUMALT=1;ODDS=930.516;PAIRED=1;PAIREDR=1;PAO=0;PQA=0;PQR=0;PRO=0;QA=64876;QR=104;RO=7;RPL=1133;RPP=101.705;RPPR=3.32051;RPR=834;RUN=1;SAF=1124;SAP=90.1794;SAR=843;SRF=1;SRP=10.7656;SRR=6;TYPE=snp;technology.ILLUMINA=1 GT:DP:RO:QR:AO:QA:GL 1/1:600:4:62:589:19341:-1735.29,-163.219,0 1/1:1398:3:42:1378:45535:-4094.54,-403.304,0
-chrM 15301 . G A 76440.4 . AB=0;ABP=0;AC=4;AF=1;AN=4;AO=2590;CIGAR=1X;DP=2644;DPB=2644;DPRA=0;EPP=3.76487;EPPR=7.94546;GTI=0;LEN=1;MEANALT=3;MQM=60;MQMR=60;NS=2;NUMALT=1;ODDS=1170.15;PAIRED=1;PAIREDR=1;PAO=0;PQA=0;PQR=0;PRO=0;QA=85385;QR=292;RO=11;RPL=1134;RPP=89.9396;RPPR=4.78696;RPR=1456;RUN=1;SAF=1194;SAP=37.2206;SAR=1396;SRF=3;SRP=7.94546;SRR=8;TYPE=snp;technology.ILLUMINA=1 GT:DP:RO:QR:AO:QA:GL 1/1:726:5:116:709:23434:-2098.78,-192.286,0 1/1:1918:6:176:1881:61951:-5559.91,-535.386,0
-chrM 15326 . A G 79542.1 . AB=0;ABP=0;AC=4;AF=1;AN=4;AO=2574;CIGAR=1X;DP=2586;DPB=2586;DPRA=0;EPP=3.76956;EPPR=5.18177;GTI=0;LEN=1;MEANALT=3;MQM=60;MQMR=60;NS=2;NUMALT=1;ODDS=1207.35;PAIRED=1;PAIREDR=1;PAO=0;PQA=0;PQR=0;PRO=0;QA=88636;QR=86;RO=4;RPL=1116;RPP=101.683;RPPR=5.18177;RPR=1458;RUN=1;SAF=1198;SAP=29.7395;SAR=1376;SRF=0;SRP=11.6962;SRR=4;TYPE=snp;technology.ILLUMINA=1 GT:DP:RO:QR:AO:QA:GL 1/1:715:2:50:710:24322:-2184.63,-204.389,0 1/1:1871:2:36:1864:64314:-5785.22,-552.228,0
+chrM 73 . A G 30438.6 . AB=0;ABP=0;AC=2;AF=1;AN=2;AO=949;CIGAR=1X;DP=950;DPB=950;DPRA=0;EPP=144.879;EPPR=5.18177;GTI=0;LEN=1;MEANALT=1;MQM=55.8314;MQMR=60;NS=2;NUMALT=1;ODDS=2187.64;PAIRED=1;PAIREDR=1;PAO=0;PQA=0;PQR=0;PRO=0;QA=34948;QR=37;RO=1;RPL=322;RPP=215.867;RPPR=5.18177;RPR=627;RUN=1;SAF=421;SAP=29.2075;SAR=528;SRF=0;SRP=5.18177;SRR=1;TYPE=snp;technology.ILLUMINA=1 GT:DP:AD:RO:QR:AO:QA:GL 1:226:0,226:0:0:226:8314:-739.462,0 1:724:1,723:1:37:723:26634:-2346.86,0
+chrM 263 . A G 11603.3 . AB=0;ABP=0;AC=2;AF=1;AN=2;AO=364;CIGAR=1X;DP=364;DPB=364;DPRA=0;EPP=16.755;EPPR=0;GTI=0;LEN=1;MEANALT=1;MQM=60;MQMR=0;NS=2;NUMALT=1;ODDS=982.556;PAIRED=1;PAIREDR=0;PAO=0;PQA=0;PQR=0;PRO=0;QA=13201;QR=0;RO=0;RPL=276;RPP=213.858;RPPR=0;RPR=88;RUN=1;SAF=172;SAP=5.39653;SAR=192;SRF=0;SRP=0;SRR=0;TYPE=snp;technology.ILLUMINA=1 GT:DP:AD:RO:QR:AO:QA:GL 1:108:0,108:0:0:108:3896:-350.812,0 1:256:0,256:0:0:256:9305:-837.344,0
+chrM 310 . TCC CCC 4471 . AB=0;ABP=0;AC=2;AF=1;AN=2;AO=150;CIGAR=1X2M;DP=171;DPB=180.333;DPRA=0;EPP=7.70068;EPPR=4.45795;GTI=0;LEN=1;MEANALT=2.5;MQM=59.88;MQMR=60;NS=2;NUMALT=1;ODDS=407.373;PAIRED=1;PAIREDR=1;PAO=11;PQA=237;PQR=0;PRO=0;QA=5147;QR=194;RO=6;RPL=113;RPP=86.6265;RPPR=8.80089;RPR=37;RUN=1;SAF=35;SAP=95.6598;SAR=115;SRF=1;SRP=8.80089;SRR=5;TYPE=snp;technology.ILLUMINA=1 GT:DP:AD:RO:QR:AO:QA:GL 1:54:0,49:0:0:49:1645:-150.867,0 1:117:6,101:6:194:101:3502:-315.963,0
+chrM 513 . GCACACACACAC GCACACACACACAC 2095.84 . AB=0;ABP=0;AC=2;AF=1;AN=2;AO=109;CIGAR=1M2I11M;DP=150;DPB=172.167;DPRA=0;EPP=109.173;EPPR=6.05036;GTI=0;LEN=2;MEANALT=3.5;MQM=60;MQMR=60;NS=2;NUMALT=1;ODDS=225.991;PAIRED=1;PAIREDR=1;PAO=5;PQA=101.5;PQR=101.5;PRO=5;QA=3824;QR=1196;RO=35;RPL=19;RPP=103.436;RPPR=4.56135;RPR=90;RUN=1;SAF=80;SAP=54.8268;SAR=29;SRF=24;SRP=13.4954;SRR=11;TYPE=ins;technology.ILLUMINA=1 GT:DP:AD:RO:QR:AO:QA:GL 1:34:5,28:5:163:28:996:-74.9521,0 1:116:30,81:30:1033:81:2828:-161.475,0
+chrM 750 . A G 47254.6 . AB=0;ABP=0;AC=2;AF=1;AN=2;AO=1459;CIGAR=1X;DP=1459;DPB=1459;DPRA=0;EPP=48.5904;EPPR=0;GTI=0;LEN=1;MEANALT=1;MQM=59.8705;MQMR=0;NS=2;NUMALT=1;ODDS=3230.11;PAIRED=1;PAIREDR=0;PAO=0;PQA=0;PQR=0;PRO=0;QA=53335;QR=0;RO=0;RPL=594;RPP=112.315;RPPR=0;RPR=865;RUN=1;SAF=1007;SAP=461.453;SAR=452;SRF=0;SRP=0;SRR=0;TYPE=snp;technology.ILLUMINA=1 GT:DP:AD:RO:QR:AO:QA:GL 1:331:0,331:0:0:331:11988:-1073.7,0 1:1128:0,1128:0:0:1128:41347:-3719.34,0
+chrM 1438 . A G 75374.8 . AB=0;ABP=0;AC=2;AF=1;AN=2;AO=2237;CIGAR=1X;DP=2238;DPB=2238;DPRA=0;EPP=47.8812;EPPR=5.18177;GTI=0;LEN=1;MEANALT=1;MQM=59.8605;MQMR=60;NS=2;NUMALT=1;ODDS=5003.07;PAIRED=1;PAIREDR=1;PAO=0;PQA=0;PQR=0;PRO=0;QA=84494;QR=37;RO=1;RPL=1397;RPP=304.171;RPPR=5.18177;RPR=840;RUN=1;SAF=925;SAP=148.392;SAR=1312;SRF=0;SRP=5.18177;SRR=1;TYPE=snp;technology.ILLUMINA=1 GT:DP:AD:RO:QR:AO:QA:GL 1:488:0,488:0:0:488:18416:-1656.39,0 1:1750:1,1749:1:37:1749:66078:-5935.03,0
+chrM 2706 . A G 39098.5 . AB=0;ABP=0;AC=2;AF=1;AN=2;AO=1222;CIGAR=1X;DP=1235;DPB=1235;DPRA=0;EPP=3.188;EPPR=3.17734;GTI=0;LEN=1;MEANALT=1;MQM=59.9926;MQMR=60;NS=2;NUMALT=1;ODDS=2496.46;PAIRED=1;PAIREDR=1;PAO=0;PQA=0;PQR=0;PRO=0;QA=45333;QR=474;RO=13;RPL=507;RPP=79.8897;RPPR=3.17734;RPR=715;RUN=1;SAF=225;SAP=1062.06;SAR=997;SRF=5;SRP=4.51363;SRR=8;TYPE=snp;technology.ILLUMINA=1 GT:DP:AD:RO:QR:AO:QA:GL 1:246:1,245:1:32:245:9136:-818.843,0 1:989:12,977:12:442:977:36197:-3215.79,0
+chrM 3197 . T C 130583 . AB=0;ABP=0;AC=2;AF=1;AN=2;AO=3879;CIGAR=1X;DP=3892;DPB=3892;DPRA=0;EPP=59.2643;EPPR=3.17734;GTI=0;LEN=1;MEANALT=1;MQM=59.9938;MQMR=60;NS=2;NUMALT=1;ODDS=12192.8;PAIRED=1;PAIREDR=1;PAO=0;PQA=0;PQR=0;PRO=0;QA=146498;QR=463;RO=13;RPL=2190;RPP=143.521;RPPR=7.18621;RPR=1689;RUN=1;SAF=1459;SAP=519.999;SAR=2420;SRF=7;SRP=3.17734;SRR=6;TYPE=snp;technology.ILLUMINA=1 GT:DP:AD:RO:QR:AO:QA:GL 1:1358:4,1354:4:150:1354:50893:-4563.63,0 1:2534:9,2525:9:313:2525:95605:-8569.92,0
+chrM 3243 . A G 10397.4 . AB=0;ABP=0;AC=1;AF=0.5;AN=2;AO=1365;CIGAR=1X;DP=3092;DPB=3092;DPRA=0;EPP=116.418;EPPR=46.9792;GTI=0;LEN=1;MEANALT=1;MQM=59.956;MQMR=59.8917;NS=2;NUMALT=1;ODDS=2394.09;PAIRED=1;PAIREDR=1;PAO=0;PQA=0;PQR=0;PRO=0;QA=49004;QR=63273;RO=1727;RPL=849;RPP=179.415;RPPR=105.14;RPR=516;RUN=1;SAF=443;SAP=368.01;SAR=922;SRF=637;SRP=261.033;SRR=1090;TYPE=snp;technology.ILLUMINA=1 GT:DP:AD:RO:QR:AO:QA:GL 1:1035:341,694:341:12439:694:24871:-1118.32,0 0:2057:1386,671:1386:50834:671:24133:0,-2399.65
+chrM 4769 . A G 48890.4 . AB=0;ABP=0;AC=2;AF=1;AN=2;AO=1545;CIGAR=1X;DP=1545;DPB=1545;DPRA=0;EPP=162.63;EPPR=0;GTI=0;LEN=1;MEANALT=1;MQM=51.4796;MQMR=0;NS=2;NUMALT=1;ODDS=4468.76;PAIRED=1;PAIREDR=0;PAO=0;PQA=0;PQR=0;PRO=0;QA=57939;QR=0;RO=0;RPL=463;RPP=541.537;RPPR=0;RPR=1082;RUN=1;SAF=890;SAP=80.6281;SAR=655;SRF=0;SRP=0;SRR=0;TYPE=snp;technology.ILLUMINA=1 GT:DP:AD:RO:QR:AO:QA:GL 1:517:0,517:0:0:517:19209:-1665.33,0 1:1028:0,1028:0:0:1028:38730:-3308.01,0
+chrM 5539 . A G 3281.62 . AB=0;ABP=0;AC=1;AF=0.5;AN=2;AO=379;CIGAR=1X;DP=787;DPB=787;DPRA=0;EPP=216.428;EPPR=224.5;GTI=0;LEN=1;MEANALT=1;MQM=54.1504;MQMR=53.777;NS=2;NUMALT=1;ODDS=755.62;PAIRED=1;PAIREDR=1;PAO=0;PQA=0;PQR=0;PRO=0;QA=13803;QR=15250;RO=408;RPL=74;RPP=308.741;RPPR=351.808;RPR=305;RUN=1;SAF=286;SAP=216.428;SAR=93;SRF=318;SRP=279.681;SRR=90;TYPE=snp;technology.ILLUMINA=1 GT:DP:AD:RO:QR:AO:QA:GL 0:299:221,78:221:8252:78:2824:0,-485.016 1:488:187,301:187:6998:301:10979:-358.012,0
+chrM 7028 . C T 70453.4 . AB=0;ABP=0;AC=2;AF=1;AN=2;AO=2109;CIGAR=1X;DP=2114;DPB=2114;DPRA=0;EPP=63.8084;EPPR=3.44459;GTI=0;LEN=1;MEANALT=1;MQM=55.9113;MQMR=59.2;NS=2;NUMALT=1;ODDS=6416.42;PAIRED=1;PAIREDR=1;PAO=0;PQA=0;PQR=0;PRO=0;QA=78988;QR=192;RO=5;RPL=1123;RPP=22.3353;RPPR=3.44459;RPR=986;RUN=1;SAF=969;SAP=33.1175;SAR=1140;SRF=3;SRP=3.44459;SRR=2;TYPE=snp;technology.ILLUMINA=1 GT:DP:AD:RO:QR:AO:QA:GL 1:708:1,707:1:39:707:26465:-2367.64,0 1:1406:4,1402:4:153:1402:52523:-4692.5,0
+chrM 7269 . G A 59443.2 . AB=0;ABP=0;AC=2;AF=1;AN=2;AO=1773;CIGAR=1X;DP=1773;DPB=1773;DPRA=0;EPP=129.209;EPPR=0;GTI=0;LEN=1;MEANALT=1;MQM=58.3328;MQMR=0;NS=2;NUMALT=1;ODDS=5563.82;PAIRED=1;PAIREDR=0;PAO=0;PQA=0;PQR=0;PRO=0;QA=66393;QR=0;RO=0;RPL=929;RPP=11.8591;RPPR=0;RPR=844;RUN=1;SAF=848;SAP=10.2718;SAR=925;SRF=0;SRP=0;SRR=0;TYPE=snp;technology.ILLUMINA=1 GT:DP:AD:RO:QR:AO:QA:GL 1:619:0,619:0:0:619:23040:-2069.26,0 1:1154:0,1154:0:0:1154:43353:-3894.52,0
+chrM 8860 . A G 48674.5 . AB=0;ABP=0;AC=2;AF=1;AN=2;AO=1552;CIGAR=1X;DP=1556;DPB=1556;DPRA=0;EPP=36.1924;EPPR=5.18177;GTI=0;LEN=1;MEANALT=1;MQM=46.5528;MQMR=58.25;NS=2;NUMALT=1;ODDS=4986.6;PAIRED=1;PAIREDR=1;PAO=0;PQA=0;PQR=0;PRO=0;QA=56983;QR=146;RO=4;RPL=845;RPP=29.6556;RPPR=5.18177;RPR=707;RUN=1;SAF=844;SAP=28.8889;SAR=708;SRF=2;SRP=3.0103;SRR=2;TYPE=snp;technology.ILLUMINA=1 GT:DP:AD:RO:QR:AO:QA:GL 1:598:0,598:0:0:598:21921:-1876.69,0 1:958:4,954:4:146:954:35062:-3002.8,0
+chrM 9477 . G A 31596.7 . AB=0;ABP=0;AC=2;AF=1;AN=2;AO=944;CIGAR=1X;DP=946;DPB=946;DPRA=0;EPP=58.9901;EPPR=3.0103;GTI=0;LEN=1;MEANALT=1;MQM=59.3178;MQMR=60;NS=2;NUMALT=1;ODDS=2909.29;PAIRED=1;PAIREDR=1;PAO=0;PQA=0;PQR=0;PRO=0;QA=35363;QR=67;RO=2;RPL=521;RPP=25.1023;RPPR=7.35324;RPR=423;RUN=1;SAF=469;SAP=3.09311;SAR=475;SRF=1;SRP=3.0103;SRR=1;TYPE=snp;technology.ILLUMINA=1 GT:DP:AD:RO:QR:AO:QA:GL 1:329:2,327:2:67:327:12108:-1080.66,0 1:617:0,617:0:0:617:23255:-2090.4,0
+chrM 9548 . G A 23689.1 . AB=0;ABP=0;AC=2;AF=1;AN=2;AO=729;CIGAR=1X;DP=730;DPB=730;DPRA=0;EPP=65.6375;EPPR=5.18177;GTI=0;LEN=1;MEANALT=1;MQM=59.7133;MQMR=60;NS=2;NUMALT=1;ODDS=2252;PAIRED=1;PAIREDR=1;PAO=0;PQA=0;PQR=0;PRO=0;QA=26539;QR=38;RO=1;RPL=394;RPP=13.3792;RPPR=5.18177;RPR=335;RUN=1;SAF=339;SAP=10.7579;SAR=390;SRF=1;SRP=5.18177;SRR=0;TYPE=snp;technology.ILLUMINA=1 GT:DP:AD:RO:QR:AO:QA:GL 1:261:1,260:1:38:260:9395:-838.704,0 1:469:0,469:0:0:469:17144:-1542,0
+chrM 11467 . A G 157655 . AB=0;ABP=0;AC=2;AF=1;AN=2;AO=4755;CIGAR=1X;DP=4759;DPB=4759;DPRA=0;EPP=620.69;EPPR=3.0103;GTI=0;LEN=1;MEANALT=1;MQM=59.9394;MQMR=42.5;NS=2;NUMALT=1;ODDS=15848.9;PAIRED=1;PAIREDR=1;PAO=0;PQA=0;PQR=0;PRO=0;QA=179486;QR=151;RO=4;RPL=3558;RPP=2548.64;RPPR=3.0103;RPR=1197;RUN=1;SAF=1995;SAP=270.266;SAR=2760;SRF=2;SRP=3.0103;SRR=2;TYPE=snp;technology.ILLUMINA=1 GT:DP:AD:RO:QR:AO:QA:GL 1:1820:0,1820:0:0:1820:68685:-6177.29,0 1:2939:4,2935:4:151:2935:110801:-9948.16,0
+chrM 11719 . G A 86257.7 . AB=0;ABP=0;AC=2;AF=1;AN=2;AO=2676;CIGAR=1X;DP=2687;DPB=2687;DPRA=0;EPP=15.4873;EPPR=12.6832;GTI=0;LEN=1;MEANALT=1;MQM=59.6039;MQMR=60;NS=2;NUMALT=1;ODDS=6603.9;PAIRED=1;PAIREDR=1;PAO=0;PQA=0;PQR=0;PRO=0;QA=96531;QR=397;RO=11;RPL=1384;RPP=9.87851;RPPR=4.78696;RPR=1292;RUN=1;SAF=1292;SAP=9.87851;SAR=1384;SRF=2;SRP=12.6832;SRR=9;TYPE=snp;technology.ILLUMINA=1 GT:DP:AD:RO:QR:AO:QA:GL 1:714:3,711:3:109:711:25643:-2282.19,0 1:1973:8,1965:8:288:1965:70888:-6347.71,0
+chrM 12308 . A G 63434.1 . AB=0;ABP=0;AC=2;AF=1;AN=2;AO=1900;CIGAR=1X;DP=1905;DPB=1905;DPRA=0;EPP=73.302;EPPR=3.44459;GTI=0;LEN=1;MEANALT=1;MQM=59.9621;MQMR=60;NS=2;NUMALT=1;ODDS=5056.51;PAIRED=1;PAIREDR=1;PAO=0;PQA=0;PQR=0;PRO=0;QA=71094;QR=184;RO=5;RPL=1169;RPP=222.265;RPPR=3.44459;RPR=731;RUN=1;SAF=907;SAP=11.463;SAR=993;SRF=3;SRP=3.44459;SRR=2;TYPE=snp;technology.ILLUMINA=1 GT:DP:AD:RO:QR:AO:QA:GL 1:547:5,542:5:184:542:20196:-1799.85,0 1:1358:0,1358:0:0:1358:50898:-4577.25,0
+chrM 12372 . G A 58077.1 . AB=0;ABP=0;AC=2;AF=1;AN=2;AO=1737;CIGAR=1X;DP=1741;DPB=1741;DPRA=0;EPP=74.4189;EPPR=5.18177;GTI=0;LEN=1;MEANALT=1;MQM=60;MQMR=60;NS=2;NUMALT=1;ODDS=4910.27;PAIRED=1;PAIREDR=1;PAO=0;PQA=0;PQR=0;PRO=0;QA=64920;QR=149;RO=4;RPL=775;RPP=46.726;RPPR=3.0103;RPR=962;RUN=1;SAF=941;SAP=29.2942;SAR=796;SRF=3;SRP=5.18177;SRR=1;TYPE=snp;technology.ILLUMINA=1 GT:DP:AD:RO:QR:AO:QA:GL 1:539:3,536:3:112:536:19922:-1781.72,0 1:1202:1,1201:1:37:1201:44998:-4043.68,0
+chrM 13617 . T C 27663.7 . AB=0;ABP=0;AC=2;AF=1;AN=2;AO=846;CIGAR=1X;DP=848;DPB=848;DPRA=0;EPP=303.228;EPPR=3.0103;GTI=0;LEN=1;MEANALT=1;MQM=59.8972;MQMR=60;NS=2;NUMALT=1;ODDS=2549.91;PAIRED=1;PAIREDR=1;PAO=0;PQA=0;PQR=0;PRO=0;QA=31932;QR=78;RO=2;RPL=638;RPP=477.603;RPPR=7.35324;RPR=208;RUN=1;SAF=314;SAP=124.993;SAR=532;SRF=1;SRP=3.0103;SRR=1;TYPE=snp;technology.ILLUMINA=1 GT:DP:AD:RO:QR:AO:QA:GL 1:284:0,284:0:0:284:10656:-956.335,0 1:564:2,562:2:78:562:21276:-1906.39,0
+chrM 14766 . C T 54363.3 . AB=0;ABP=0;AC=2;AF=1;AN=2;AO=1633;CIGAR=1X;DP=1633;DPB=1633;DPRA=0;EPP=21.2132;EPPR=0;GTI=0;LEN=1;MEANALT=1;MQM=59.9853;MQMR=0;NS=2;NUMALT=1;ODDS=4408.47;PAIRED=1;PAIREDR=0;PAO=0;PQA=0;PQR=0;PRO=0;QA=61054;QR=0;RO=0;RPL=852;RPP=9.71354;RPPR=0;RPR=781;RUN=1;SAF=1113;SAP=470.614;SAR=520;SRF=0;SRP=0;SRR=0;TYPE=snp;technology.ILLUMINA=1 GT:DP:AD:RO:QR:AO:QA:GL 1:473:0,473:0:0:473:17508:-1575.01,0 1:1160:0,1160:0:0:1160:43546:-3916.74,0
+chrM 14793 . A G 52354.7 . AB=0;ABP=0;AC=2;AF=1;AN=2;AO=1605;CIGAR=1X;DP=1605;DPB=1605;DPRA=0;EPP=123.965;EPPR=0;GTI=0;LEN=1;MEANALT=1;MQM=59.9688;MQMR=0;NS=2;NUMALT=1;ODDS=4357.82;PAIRED=1;PAIREDR=0;PAO=0;PQA=0;PQR=0;PRO=0;QA=58638;QR=0;RO=0;RPL=950;RPP=120.75;RPPR=0;RPR=655;RUN=1;SAF=940;SAP=105.327;SAR=665;SRF=0;SRP=0;SRR=0;TYPE=snp;technology.ILLUMINA=1 GT:DP:AD:RO:QR:AO:QA:GL 1:475:0,475:0:0:475:17313:-1557.59,0 1:1130:0,1130:0:0:1130:41325:-3717.26,0
+chrM 15301 . G A 67202.5 . AB=0;ABP=0;AC=2;AF=1;AN=2;AO=2029;CIGAR=1X;DP=2036;DPB=2036;DPRA=0;EPP=73.6971;EPPR=4.45795;GTI=0;LEN=1;MEANALT=1.5;MQM=60;MQMR=60;NS=2;NUMALT=1;ODDS=5288.64;PAIRED=1;PAIREDR=1;PAO=0;PQA=0;PQR=0;PRO=0;QA=75132;QR=224;RO=6;RPL=962;RPP=14.8095;RPPR=3.0103;RPR=1067;RUN=1;SAF=1129;SAP=59.1336;SAR=900;SRF=1;SRP=8.80089;SRR=5;TYPE=snp;technology.ILLUMINA=1 GT:DP:AD:RO:QR:AO:QA:GL 1:563:2,561:2:75:561:20714:-1856.3,0 1:1473:4,1468:4:149:1468:54418:-4880.92,0
+chrM 15326 . A G 73749.6 . AB=0;ABP=0;AC=2;AF=1;AN=2;AO=2200;CIGAR=1X;DP=2201;DPB=2201;DPRA=0;EPP=68.7112;EPPR=5.18177;GTI=0;LEN=1;MEANALT=1;MQM=60;MQMR=60;NS=2;NUMALT=1;ODDS=5718.37;PAIRED=1;PAIREDR=1;PAO=0;PQA=0;PQR=0;PRO=0;QA=82190;QR=38;RO=1;RPL=1013;RPP=32.8937;RPPR=5.18177;RPR=1187;RUN=1;SAF=1088;SAP=3.57883;SAR=1112;SRF=0;SRP=5.18177;SRR=1;TYPE=snp;technology.ILLUMINA=1 GT:DP:AD:RO:QR:AO:QA:GL 1:597:1,596:1:38:596:22276:-2000.05,0 1:1604:0,1604:0:0:1604:59914:-5388.94,0
```
# Filtering VCF data
After filtering the data with stringent input parameters (restricting base quality to a minimum of 30 and mapping quality to a minimum of 20) a considerable amount variants due to read-alignment bias exits. [Erik Garrison](https://github.com/ekg) has a beautiful illustration of various biases potentially affecting called variants (and making a locus sequence-able):
-| |
-|----------------------------|
-|![Various biases potentially affecting called variants](../../images/mt_biases.png)|
-|Here you can see that in an ideal case (indicated with a green star) a variant is evenly represent by different areas of sequencing reads (cycle and placement biases) and is balanced across the two strands (strand bias). Allele imbalance is not applicable in our case as it reflects significant deviation from the diploid (50/50) expectation (see [here](../../images/freebayes.pdf) for more details).|
+
+
+![Various biases potentially affecting called variants](../../images/mt_biases.png)
+Here you can see that in an ideal case (indicated with a green star) a variant is evenly represent by different areas of sequencing reads (cycle and placement biases) and is balanced across the two strands (strand bias). Allele imbalance is not applicable in our case as it reflects significant deviation from the diploid (50/50) expectation (see [here](../../images/freebayes.pdf) for more details).
A robust tool set for processing VCF data is provided by [vcflib](https://github.com/vcflib/vcflib) developed by Erik Garrison, the author of FreeBayes. One way to filter VCF is using `INFO` fields of the VCF dataset. If you look at the VCF dataset shown above you will see all comment lines beginning with `##INFO`. These are `INFO` fields. Each VCF record contains a list of `INFO` tags describing a wide range of properties for each VCF record. You will see that FreeBayes and NVC differ significantly in the number and types of `INFO` fields each of these caller generates. This why the two require different filtering strategies.
@@ -364,15 +415,12 @@ To perform filtering we will use **NGS: VCF Manipulation** → **VCFfilter**
>Filtering FreeBayes VCF for strand bias (`SPR` and `SAP`), placement bias (`EPP`), variant quality (`QUAL`), and depth of coverage (`DP`).
{: .hands_on}
-The resulting VCF only contains five variants (most comments fields are omitted here):
+The resulting VCF only contains two variants (most comments fields are omitted here):
```
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT raw_child-ds- raw_mother-ds-
-chrM 3243 . A G 46067 . AB=0.612338;ABP=290.859;AC=2;AF=0.5;AN=4;AO=1608;CIGAR=1X;DP=2626;DPB=2626;DPRA=0;EPP=31.0126;EPPR=64.3549;GTI=0;LEN=1;MEANALT=2;MQM=59.9627;MQMR=59.815;NS=2;NUMALT=1;ODDS=1288.98;PAIRED=1;PAIREDR=1;PAO=0;PQA=0;PQR=0;PRO=0;QA=53165;QR=35336;RO=1011;RPL=974;RPP=159.119;RPPR=763.402;RPR=634;RUN=1;SAF=558;SAP=329.898;SAR=1050;SRF=383;SRP=131.935;SRR=628;TYPE=snp;technology.ILLUMINA=1 GT:DP:RO:QR:AO:QA:GL 0/1:1068:221:7574:841:27395:-2380.4,0,-596.524 0/1:1558:790:27762:767:25770:-2317.69,0,-2496.98
-chrM 3483 . G C 685.467 . AB=0.254386;ABP=182.214;AC=1;AF=0.25;AN=4;AO=127;CIGAR=1X;DP=550;DPB=550;DPRA=0;EPP=37.6342;EPPR=22.2028;GTI=1;LEN=1;MEANALT=1.5;MQM=59.4646;MQMR=59.8504;NS=2;NUMALT=1;ODDS=25.0865;PAIRED=1;PAIREDR=1;PAO=0;PQA=0;PQR=0;PRO=0;QA=2032;QR=13200;RO=421;RPL=87;RPP=40.7802;RPPR=245.89;RPR=40;RUN=1;SAF=1;SAP=270.17;SAR=126;SRF=321;SRP=254.927;SRR=100;TYPE=snp;technology.ILLUMINA=1 GT:DP:RO:QR:AO:QA:GL 0/0:208:166:5264:40:608:-35.5966,0,-454.8 0/1:342:255:7936:87:1424:-108.297,0,-694.524
-chrM 3488 . T A 682.097 . AB=0.264706;ABP=166.509;AC=1;AF=0.25;AN=4;AO=130;CIGAR=1X;DP=546;DPB=546;DPRA=0;EPP=44.7694;EPPR=34.7681;GTI=1;LEN=1;MEANALT=1;MQM=59.4231;MQMR=59.7139;NS=2;NUMALT=1;ODDS=17.5994;PAIRED=1;PAIREDR=1;PAO=0;PQA=0;PQR=0;PRO=0;QA=2069;QR=13578;RO=416;RPL=90;RPP=44.7694;RPPR=211.806;RPR=40;RUN=1;SAF=0;SAP=285.302;SAR=130;SRF=315;SRP=242.06;SRR=101;TYPE=snp;technology.ILLUMINA=1 GT:DP:RO:QR:AO:QA:GL 0/0:206:166:5535:40:650:-39.5324,0,-479.353 0/1:340:250:8043:90:1419:-109.544,0,-705.868
-chrM 5539 . A G 11837 . AB=0.479167;ABP=6.26751;AC=2;AF=0.5;AN=4;AO=414;CIGAR=1X;DP=864;DPB=864;DPRA=0;EPP=192.358;EPPR=179.441;GTI=0;LEN=1;MEANALT=1.5;MQM=54.1957;MQMR=53.5924;NS=2;NUMALT=1;ODDS=622.768;PAIRED=1;PAIREDR=1;PAO=0;PQA=0;PQR=0;PRO=0;QA=14380;QR=15965;RO=449;RPL=85;RPP=315.283;RPPR=358.189;RPR=329;RUN=1;SAF=309;SAP=221.29;SAR=105;SRF=337;SRP=247.845;SRR=112;TYPE=snp;technology.ILLUMINA=1 GT:DP:RO:QR:AO:QA:GL 0/1:338:249:8721:89:3010:-252.807,0,-766.809 0/1:526:200:7244:325:11370:-1015.56,0,-644.23
-chrM 8557 . G C 2590.97 . AB=0.267066;ABP=790.051;AC=2;AF=0.5;AN=4;AO=446;CIGAR=1X;DP=1670;DPB=1670;DPRA=0;EPP=44.2196;EPPR=97.7883;GTI=0;LEN=1;MEANALT=3;MQM=57.6951;MQMR=59.5256;NS=2;NUMALT=1;ODDS=125.064;PAIRED=1;PAIREDR=1;PAO=0;PQA=0;PQR=0;PRO=0;QA=6303;QR=38747;RO=1212;RPL=177;RPP=44.2196;RPPR=385.426;RPR=269;RUN=1;SAF=2;SAP=954.193;SAR=444;SRF=906;SRP=648.002;SRR=306;TYPE=snp;technology.ILLUMINA=1 GT:DP:RO:QR:AO:QA:GL 0/1:724:538:17225:181:2490:-182.373,0,-1508.7 0/1:946:674:21522:265:3813:-301.57,0,-1895.55
+chrM 3243 . A G 10397.4 . AB=0;ABP=0;AC=1;AF=0.5;AN=2;AO=1365;CIGAR=1X;DP=3092;DPB=3092;DPRA=0;EPP=116.418;EPPR=46.9792;GTI=0;LEN=1;MEANALT=1;MQM=59.956;MQMR=59.8917;NS=2;NUMALT=1;ODDS=2394.09;PAIRED=1;PAIREDR=1;PAO=0;PQA=0;PQR=0;PRO=0;QA=49004;QR=63273;RO=1727;RPL=849;RPP=179.415;RPPR=105.14;RPR=516;RUN=1;SAF=443;SAP=368.01;SAR=922;SRF=637;SRP=261.033;SRR=1090;TYPE=snp;technology.ILLUMINA=1 GT:DP:AD:RO:QR:AO:QA:GL 1:1035:341,694:341:12439:694:24871:-1118.32,0 0:2057:1386,671:1386:50834:671:24133:0,-2399.65
+chrM 5539 . A G 3281.62 . AB=0;ABP=0;AC=1;AF=0.5;AN=2;AO=379;CIGAR=1X;DP=787;DPB=787;DPRA=0;EPP=216.428;EPPR=224.5;GTI=0;LEN=1;MEANALT=1;MQM=54.1504;MQMR=53.777;NS=2;NUMALT=1;ODDS=755.62;PAIRED=1;PAIREDR=1;PAO=0;PQA=0;PQR=0;PRO=0;QA=13803;QR=15250;RO=408;RPL=74;RPP=308.741;RPPR=351.808;RPR=305;RUN=1;SAF=286;SAP=216.428;SAR=93;SRF=318;SRP=279.681;SRR=90;TYPE=snp;technology.ILLUMINA=1 GT:DP:AD:RO:QR:AO:QA:GL 0:299:221,78:221:8252:78:2824:0,-485.016 1:488:187,301:187:6998:301:10979:-358.012,0
```
# Looking at the data
@@ -400,7 +448,7 @@ VCF.IOBIO can be invoked by expanding a VCF dataset in Galaxy's history by click
## IGV
-Similarly to VCF.BIOIO expanding a history item representing a VCF dataset will reveal an IGV link:
+Similarly to VCF.IOBIO expanding a history item representing a VCF dataset will reveal an IGV link:
> ### Displaying data in IGV
>
@@ -420,7 +468,7 @@ Similarly to VCF.BIOIO expanding a history item representing a VCF dataset will
# Digging into the data
-Visualizing VCF dataset may be a good way to get an overall idea of the data, but it does not tell a lot of details. For example, above we have visualized site 3,243 using IGV. It is interesting but we need to find out more. One thing we can do is to convert VCF dataset into a tab-delimited representation and play a bit more with it.
+Visualizing VCF dataset may be a good way to get an overall idea of the data, but it does not tell a lot of details. For example, above we have visualized site 3,243 using IGV. It is interesting but we need to find out more. One thing we can do is to convert VCF dataset into a tab-delimited representation and with it play a bit more.
Using **NGS: VCF Manipulation** → **VCFtoTab-delimited** on the filtered VCF dataset:
@@ -434,7 +482,7 @@ Using **NGS: VCF Manipulation** → **VCFtoTab-delimited** on the filtered V
>
>![VCFtoTab-delimited output](../../images/mt_tab.png)
>
->There are 53 columns in this dataset (not all are shown here).
+>There are 62 columns in this dataset (not all are shown here).
{: .hands_on}
The columns in the above dataset represent INFO and Genotype fields on the original VCF dataset. Let's restrict ourselves to just a few:
@@ -442,9 +490,9 @@ The columns in the above dataset represent INFO and Genotype fields on the origi
* 2 `POS` - position along mitochondrial genome
* 4 `REF` - reference allele
* 5 `ALT` - alternative allele
-* 50 `SAMPLE` - name of the sample
-* 51 `AO` - number of alternative observations (how many times do we see the alternative allele at this position in this sample)
-* 52 `DP` - depth of coverage at this site for this sample
+* 52 `SAMPLE` - name of the sample
+* 54 `AO` - number of alternative observations (how many times do we see the alternative allele at this position in this sample)
+* 55 `DP` - depth of coverage at this site for this sample
To cut these columns out we will use **Text Manipulation** → **Cut**
@@ -452,41 +500,34 @@ To cut these columns out we will use **Text Manipulation** → **Cut**
>
>![Cut tool options](../../images/mt_cut.png)
>
->Note that column names are pre-ceded with `c`
+>Note that column names are preceded with `c`
>
>This will generate the following dataset:
>
>```
>POS REF ALT SAMPLE AO DP
>--------------------------------------
->3243 A G raw_child-ds- 841 1068
->3243 A G raw_mother-ds- 767 1558
->3483 G C raw_child-ds- 40 208
->3483 G C raw_mother-ds- 87 342
->3488 T A raw_child-ds- 40 206
->3488 T A raw_mother-ds- 90 340
->5539 A G raw_child-ds- 89 338
->5539 A G raw_mother-ds- 325 526
->8557 G C raw_child-ds- 181 724
->8557 G C raw_mother-ds- 265 946
->
+>3243 A G raw_child-ds- 694 1035
+>3243 A G raw_mother-ds- 671 2057
+>5539 A G raw_child-ds- 78 299
+>5539 A G raw_mother-ds- 301 488
>```
{: .hands_on}
-Let's look at site 4,243. At this site Mother has 841 `G`s (since `G` is an alternative allele) and 1,068-841=227 `A`s. This child has 767 `G`s and 1,558-767=791 `A`s:
+Let's look at site 3,243. At this site Mother has 691 `G`s (since `G` is an alternative allele) and 1,035-2057=1386 `A`s. This child has 694 `G`s and 1,035-694=341 `A`s:
```
Allele A G
-------------------
-Mother 227 841
-Child 791 767
+Mother 1386 691
+Child 341 694
```
-Thus the *major* allele in mother (`G`) becomes the *minor* allele in child -- a remarkable frequency change due to mitochondrial bottleneck!
+Thus the *major* allele in mother (`A`) becomes the *minor* allele in child -- a remarkable frequency change due to mitochondrial bottleneck!
# Take a look at the whole thing
This entire analysis is available as a [Galaxy history](https://usegalaxy.org/u/aun1/h/non-diploid-freebayes) that you can import into your Galaxy account and play with.
-Now you know how to call variants in non-diploid system, so try it on bacteria, viruses etc...
+Now you know how to call variants in non-diploid system, so try it on bacteria, viruses, etc.
diff --git a/topics/variant-analysis/tutorials/non-dip/workflows/Calling_variants_in_non-diploid_systems.ga b/topics/variant-analysis/tutorials/non-dip/workflows/Calling_variants_in_non-diploid_systems.ga
new file mode 100644
index 00000000000000..2f1c763cb39cf9
--- /dev/null
+++ b/topics/variant-analysis/tutorials/non-dip/workflows/Calling_variants_in_non-diploid_systems.ga
@@ -0,0 +1,848 @@
+{
+ "a_galaxy_workflow": "true",
+ "annotation": "variant-analysis",
+ "format-version": "0.1",
+ "name": "Calling variants in non-diploid systems",
+ "steps": {
+ "0": {
+ "annotation": "",
+ "content_id": null,
+ "errors": null,
+ "id": 0,
+ "input_connections": {},
+ "inputs": [
+ {
+ "description": "",
+ "name": "raw_child-ds-1.fq"
+ }
+ ],
+ "label": "raw_child-ds-1.fq",
+ "name": "Input dataset",
+ "outputs": [],
+ "position": {
+ "left": 200,
+ "top": 301.5
+ },
+ "tool_id": null,
+ "tool_state": "{\"optional\": false}",
+ "tool_version": null,
+ "type": "data_input",
+ "uuid": "2dcb08e6-5039-4144-a603-47a5c91c3a29",
+ "workflow_outputs": [
+ {
+ "label": null,
+ "output_name": "output",
+ "uuid": "bcc1e2df-5a6b-4aaa-856c-ed31856351d2"
+ }
+ ]
+ },
+ "1": {
+ "annotation": "",
+ "content_id": null,
+ "errors": null,
+ "id": 1,
+ "input_connections": {},
+ "inputs": [
+ {
+ "description": "",
+ "name": "raw_child-ds-2.fq"
+ }
+ ],
+ "label": "raw_child-ds-2.fq",
+ "name": "Input dataset",
+ "outputs": [],
+ "position": {
+ "left": 200,
+ "top": 383.234375
+ },
+ "tool_id": null,
+ "tool_state": "{\"optional\": false}",
+ "tool_version": null,
+ "type": "data_input",
+ "uuid": "36a31c5b-b283-4009-9bbd-b47ec72a2915",
+ "workflow_outputs": [
+ {
+ "label": null,
+ "output_name": "output",
+ "uuid": "97a549ba-b426-48eb-a4bd-249fa8d08b3a"
+ }
+ ]
+ },
+ "2": {
+ "annotation": "",
+ "content_id": null,
+ "errors": null,
+ "id": 2,
+ "input_connections": {},
+ "inputs": [
+ {
+ "description": "",
+ "name": "raw_mother-ds-1.fq"
+ }
+ ],
+ "label": "raw_mother-ds-1.fq",
+ "name": "Input dataset",
+ "outputs": [],
+ "position": {
+ "left": 200,
+ "top": 465
+ },
+ "tool_id": null,
+ "tool_state": "{\"optional\": false}",
+ "tool_version": null,
+ "type": "data_input",
+ "uuid": "699c61d4-2f31-4853-9e1b-a49f11bb1bd3",
+ "workflow_outputs": [
+ {
+ "label": null,
+ "output_name": "output",
+ "uuid": "d6550e2e-9e6e-45a8-98b9-374f0d41dc9f"
+ }
+ ]
+ },
+ "3": {
+ "annotation": "",
+ "content_id": null,
+ "errors": null,
+ "id": 3,
+ "input_connections": {},
+ "inputs": [
+ {
+ "description": "",
+ "name": "raw_mother-ds-2.fq"
+ }
+ ],
+ "label": "raw_mother-ds-2.fq",
+ "name": "Input dataset",
+ "outputs": [],
+ "position": {
+ "left": 200,
+ "top": 546.75
+ },
+ "tool_id": null,
+ "tool_state": "{\"optional\": false}",
+ "tool_version": null,
+ "type": "data_input",
+ "uuid": "3f624810-6b7e-4f63-aa97-6f391b2f8612",
+ "workflow_outputs": [
+ {
+ "label": null,
+ "output_name": "output",
+ "uuid": "ce1307f3-c166-4439-81c3-6fb06eb760d9"
+ }
+ ]
+ },
+ "4": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/devteam/fastqc/fastqc/0.72+galaxy1",
+ "errors": null,
+ "id": 4,
+ "input_connections": {
+ "input_file": {
+ "id": 0,
+ "output_name": "output"
+ }
+ },
+ "inputs": [],
+ "label": null,
+ "name": "FastQC",
+ "outputs": [
+ {
+ "name": "html_file",
+ "type": "html"
+ },
+ {
+ "name": "text_file",
+ "type": "txt"
+ }
+ ],
+ "position": {
+ "left": 387.5,
+ "top": 301.5
+ },
+ "post_job_actions": {
+ "HideDatasetActiontext_file": {
+ "action_arguments": {},
+ "action_type": "HideDatasetAction",
+ "output_name": "text_file"
+ }
+ },
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/fastqc/fastqc/0.72+galaxy1",
+ "tool_shed_repository": {
+ "changeset_revision": "e7b2202befea",
+ "name": "fastqc",
+ "owner": "devteam",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"min_length\": \"\", \"kmers\": \"7\", \"limits\": null, \"input_file\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"adapters\": null, \"__rerun_remap_job_id__\": null, \"contaminants\": null, \"chromInfo\": \"/galaxy/server/database/tool-data/shared/ucsc/chrom/?.len\", \"__input_ext\": \"fastqsanger\", \"nogroup\": \"false\"}",
+ "tool_version": "0.72+galaxy1",
+ "type": "tool",
+ "uuid": "71a54431-4e1a-4472-b1bd-f8f9a7790525",
+ "workflow_outputs": [
+ {
+ "label": null,
+ "output_name": "html_file",
+ "uuid": "f98d02c3-a4f2-4f3a-9bd4-c72e7550a877"
+ }
+ ]
+ },
+ "5": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/devteam/fastqc/fastqc/0.72+galaxy1",
+ "errors": null,
+ "id": 5,
+ "input_connections": {
+ "input_file": {
+ "id": 1,
+ "output_name": "output"
+ }
+ },
+ "inputs": [],
+ "label": null,
+ "name": "FastQC",
+ "outputs": [
+ {
+ "name": "html_file",
+ "type": "html"
+ },
+ {
+ "name": "text_file",
+ "type": "txt"
+ }
+ ],
+ "position": {
+ "left": 387.5,
+ "top": 526.5
+ },
+ "post_job_actions": {
+ "HideDatasetActiontext_file": {
+ "action_arguments": {},
+ "action_type": "HideDatasetAction",
+ "output_name": "text_file"
+ }
+ },
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/fastqc/fastqc/0.72+galaxy1",
+ "tool_shed_repository": {
+ "changeset_revision": "e7b2202befea",
+ "name": "fastqc",
+ "owner": "devteam",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"min_length\": \"\", \"kmers\": \"7\", \"limits\": null, \"input_file\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"adapters\": null, \"__rerun_remap_job_id__\": null, \"contaminants\": null, \"chromInfo\": \"/galaxy/server/database/tool-data/shared/ucsc/chrom/?.len\", \"__input_ext\": \"fastqsanger\", \"nogroup\": \"false\"}",
+ "tool_version": "0.72+galaxy1",
+ "type": "tool",
+ "uuid": "88a42cbf-c8b6-4cbf-b344-9b3e3c45a045",
+ "workflow_outputs": [
+ {
+ "label": null,
+ "output_name": "html_file",
+ "uuid": "7f939259-0a4e-488e-9b60-4583361487c0"
+ }
+ ]
+ },
+ "6": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/devteam/bwa/bwa_mem/0.7.17.1",
+ "errors": null,
+ "id": 6,
+ "input_connections": {
+ "fastq_input|fastq_input1": {
+ "id": 0,
+ "output_name": "output"
+ },
+ "fastq_input|fastq_input2": {
+ "id": 1,
+ "output_name": "output"
+ }
+ },
+ "inputs": [
+ {
+ "description": "runtime parameter for tool Map with BWA-MEM",
+ "name": "fastq_input"
+ },
+ {
+ "description": "runtime parameter for tool Map with BWA-MEM",
+ "name": "fastq_input"
+ }
+ ],
+ "label": null,
+ "name": "Map with BWA-MEM",
+ "outputs": [
+ {
+ "name": "bam_output",
+ "type": "bam"
+ }
+ ],
+ "position": {
+ "left": 387.5,
+ "top": 1201.5
+ },
+ "post_job_actions": {},
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/bwa/bwa_mem/0.7.17.1",
+ "tool_shed_repository": {
+ "changeset_revision": "4f774c1e6049",
+ "name": "bwa",
+ "owner": "devteam",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"__page__\": null, \"__rerun_remap_job_id__\": null, \"rg\": {\"rg_selector\": \"set\", \"KS\": \"\", \"read_group_sm_conditional\": {\"do_auto_name\": \"true\", \"__current_case__\": 0}, \"CN\": \"\", \"PU\": \"\", \"read_group_lb_conditional\": {\"do_auto_name\": \"true\", \"__current_case__\": 0}, \"read_group_id_conditional\": {\"do_auto_name\": \"true\", \"__current_case__\": 0}, \"PG\": \"\", \"__current_case__\": 1, \"DT\": \"\", \"PI\": \"\", \"DS\": \"\", \"PL\": \"ILLUMINA\", \"FO\": \"\"}, \"fastq_input\": {\"iset_stats\": \"\", \"fastq_input2\": {\"__class__\": \"RuntimeValue\"}, \"__current_case__\": 0, \"fastq_input_selector\": \"paired\", \"fastq_input1\": {\"__class__\": \"RuntimeValue\"}}, \"analysis_type\": {\"analysis_type_selector\": \"illumina\", \"__current_case__\": 0}, \"reference_source\": {\"ref_file\": \"hg38canon\", \"reference_source_selector\": \"cached\", \"__current_case__\": 0}}",
+ "tool_version": "0.7.17.1",
+ "type": "tool",
+ "uuid": "dca51976-8bdf-4e4b-82b0-3fe467c0b0d1",
+ "workflow_outputs": [
+ {
+ "label": null,
+ "output_name": "bam_output",
+ "uuid": "bd5362a4-86c7-41b2-a33b-8726b5f7163c"
+ }
+ ]
+ },
+ "7": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/devteam/fastqc/fastqc/0.72+galaxy1",
+ "errors": null,
+ "id": 7,
+ "input_connections": {
+ "input_file": {
+ "id": 2,
+ "output_name": "output"
+ }
+ },
+ "inputs": [],
+ "label": null,
+ "name": "FastQC",
+ "outputs": [
+ {
+ "name": "html_file",
+ "type": "html"
+ },
+ {
+ "name": "text_file",
+ "type": "txt"
+ }
+ ],
+ "position": {
+ "left": 387.5,
+ "top": 751.5
+ },
+ "post_job_actions": {
+ "HideDatasetActiontext_file": {
+ "action_arguments": {},
+ "action_type": "HideDatasetAction",
+ "output_name": "text_file"
+ }
+ },
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/fastqc/fastqc/0.72+galaxy1",
+ "tool_shed_repository": {
+ "changeset_revision": "e7b2202befea",
+ "name": "fastqc",
+ "owner": "devteam",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"min_length\": \"\", \"kmers\": \"7\", \"limits\": null, \"input_file\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"adapters\": null, \"__rerun_remap_job_id__\": null, \"contaminants\": null, \"chromInfo\": \"/galaxy/server/database/tool-data/shared/ucsc/chrom/?.len\", \"__input_ext\": \"fastqsanger\", \"nogroup\": \"false\"}",
+ "tool_version": "0.72+galaxy1",
+ "type": "tool",
+ "uuid": "d310b4c9-c42c-4814-a733-b3ee6b22c97e",
+ "workflow_outputs": [
+ {
+ "label": null,
+ "output_name": "html_file",
+ "uuid": "c41f6648-0c71-414c-8274-46c203d096be"
+ }
+ ]
+ },
+ "8": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/devteam/fastqc/fastqc/0.72+galaxy1",
+ "errors": null,
+ "id": 8,
+ "input_connections": {
+ "input_file": {
+ "id": 3,
+ "output_name": "output"
+ }
+ },
+ "inputs": [],
+ "label": null,
+ "name": "FastQC",
+ "outputs": [
+ {
+ "name": "html_file",
+ "type": "html"
+ },
+ {
+ "name": "text_file",
+ "type": "txt"
+ }
+ ],
+ "position": {
+ "left": 387.5,
+ "top": 976.5
+ },
+ "post_job_actions": {
+ "HideDatasetActiontext_file": {
+ "action_arguments": {},
+ "action_type": "HideDatasetAction",
+ "output_name": "text_file"
+ }
+ },
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/fastqc/fastqc/0.72+galaxy1",
+ "tool_shed_repository": {
+ "changeset_revision": "e7b2202befea",
+ "name": "fastqc",
+ "owner": "devteam",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"min_length\": \"\", \"kmers\": \"7\", \"limits\": null, \"input_file\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"adapters\": null, \"__rerun_remap_job_id__\": null, \"contaminants\": null, \"chromInfo\": \"/galaxy/server/database/tool-data/shared/ucsc/chrom/?.len\", \"__input_ext\": \"fastqsanger\", \"nogroup\": \"false\"}",
+ "tool_version": "0.72+galaxy1",
+ "type": "tool",
+ "uuid": "57cbf8b4-5fea-47f0-ac76-36e8fabadbd1",
+ "workflow_outputs": [
+ {
+ "label": null,
+ "output_name": "html_file",
+ "uuid": "6a0212a5-01af-451d-888a-e8087fa2e306"
+ }
+ ]
+ },
+ "9": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/devteam/bwa/bwa_mem/0.7.17.1",
+ "errors": null,
+ "id": 9,
+ "input_connections": {
+ "fastq_input|fastq_input1": {
+ "id": 2,
+ "output_name": "output"
+ },
+ "fastq_input|fastq_input2": {
+ "id": 3,
+ "output_name": "output"
+ }
+ },
+ "inputs": [
+ {
+ "description": "runtime parameter for tool Map with BWA-MEM",
+ "name": "fastq_input"
+ },
+ {
+ "description": "runtime parameter for tool Map with BWA-MEM",
+ "name": "fastq_input"
+ }
+ ],
+ "label": null,
+ "name": "Map with BWA-MEM",
+ "outputs": [
+ {
+ "name": "bam_output",
+ "type": "bam"
+ }
+ ],
+ "position": {
+ "left": 387.5,
+ "top": 1344
+ },
+ "post_job_actions": {},
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/bwa/bwa_mem/0.7.17.1",
+ "tool_shed_repository": {
+ "changeset_revision": "4f774c1e6049",
+ "name": "bwa",
+ "owner": "devteam",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"__page__\": null, \"__rerun_remap_job_id__\": null, \"rg\": {\"rg_selector\": \"set\", \"KS\": \"\", \"read_group_sm_conditional\": {\"do_auto_name\": \"true\", \"__current_case__\": 0}, \"CN\": \"\", \"PU\": \"\", \"read_group_lb_conditional\": {\"do_auto_name\": \"true\", \"__current_case__\": 0}, \"read_group_id_conditional\": {\"do_auto_name\": \"true\", \"__current_case__\": 0}, \"PG\": \"\", \"__current_case__\": 1, \"DT\": \"\", \"PI\": \"\", \"DS\": \"\", \"PL\": \"ILLUMINA\", \"FO\": \"\"}, \"fastq_input\": {\"iset_stats\": \"\", \"fastq_input2\": {\"__class__\": \"RuntimeValue\"}, \"__current_case__\": 0, \"fastq_input_selector\": \"paired\", \"fastq_input1\": {\"__class__\": \"RuntimeValue\"}}, \"analysis_type\": {\"analysis_type_selector\": \"illumina\", \"__current_case__\": 0}, \"reference_source\": {\"ref_file\": \"hg38canon\", \"reference_source_selector\": \"cached\", \"__current_case__\": 0}}",
+ "tool_version": "0.7.17.1",
+ "type": "tool",
+ "uuid": "2c721f3a-bd7d-4ac2-9807-fcea112f45f5",
+ "workflow_outputs": [
+ {
+ "label": null,
+ "output_name": "bam_output",
+ "uuid": "970e9b52-ade1-40e4-867e-d4972b883640"
+ }
+ ]
+ },
+ "10": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/devteam/picard/picard_MergeSamFiles/2.18.2.1",
+ "errors": null,
+ "id": 10,
+ "input_connections": {
+ "inputFile": [
+ {
+ "id": 9,
+ "output_name": "bam_output"
+ },
+ {
+ "id": 6,
+ "output_name": "bam_output"
+ }
+ ]
+ },
+ "inputs": [],
+ "label": null,
+ "name": "MergeSamFiles",
+ "outputs": [
+ {
+ "name": "outFile",
+ "type": "bam"
+ }
+ ],
+ "position": {
+ "left": 633.5,
+ "top": 302.234375
+ },
+ "post_job_actions": {},
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/picard/picard_MergeSamFiles/2.18.2.1",
+ "tool_shed_repository": {
+ "changeset_revision": "7d34178f2812",
+ "name": "picard",
+ "owner": "devteam",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"__page__\": null, \"__rerun_remap_job_id__\": null, \"comments\": [], \"assume_sorted\": \"false\", \"merge_sequence_dictionaries\": \"false\", \"validation_stringency\": \"LENIENT\", \"inputFile\": {\"__class__\": \"ConnectedValue\"}}",
+ "tool_version": "2.18.2.1",
+ "type": "tool",
+ "uuid": "3a7b6eae-bc97-4190-a6c9-8b8c455947d6",
+ "workflow_outputs": [
+ {
+ "label": null,
+ "output_name": "outFile",
+ "uuid": "a863a01d-af52-42f0-90e1-497e54b8ec22"
+ }
+ ]
+ },
+ "11": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/devteam/picard/picard_MarkDuplicates/2.18.2.2",
+ "errors": null,
+ "id": 11,
+ "input_connections": {
+ "inputFile": {
+ "id": 10,
+ "output_name": "outFile"
+ }
+ },
+ "inputs": [
+ {
+ "description": "runtime parameter for tool MarkDuplicates",
+ "name": "inputFile"
+ }
+ ],
+ "label": null,
+ "name": "MarkDuplicates",
+ "outputs": [
+ {
+ "name": "metrics_file",
+ "type": "txt"
+ },
+ {
+ "name": "outFile",
+ "type": "bam"
+ }
+ ],
+ "position": {
+ "left": 879.5,
+ "top": 301.5
+ },
+ "post_job_actions": {
+ "HideDatasetActionmetrics_file": {
+ "action_arguments": {},
+ "action_type": "HideDatasetAction",
+ "output_name": "metrics_file"
+ }
+ },
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/picard/picard_MarkDuplicates/2.18.2.2",
+ "tool_shed_repository": {
+ "changeset_revision": "7d34178f2812",
+ "name": "picard",
+ "owner": "devteam",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"duplicate_scoring_strategy\": \"SUM_OF_BASE_QUALITIES\", \"remove_duplicates\": \"false\", \"read_name_regex\": \"\", \"barcode_tag\": \"\", \"__page__\": null, \"__rerun_remap_job_id__\": null, \"optical_duplicate_pixel_distance\": \"100\", \"comments\": [], \"assume_sorted\": \"true\", \"validation_stringency\": \"LENIENT\", \"inputFile\": {\"__class__\": \"RuntimeValue\"}}",
+ "tool_version": "2.18.2.2",
+ "type": "tool",
+ "uuid": "8f2528f7-0be4-4ea7-9516-ecae820e09bb",
+ "workflow_outputs": [
+ {
+ "label": null,
+ "output_name": "outFile",
+ "uuid": "9fc964c6-7395-454a-b8c0-f31b50c24d71"
+ }
+ ]
+ },
+ "12": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/devteam/freebayes/bamleftalign/1.1.0.46-0",
+ "errors": null,
+ "id": 12,
+ "input_connections": {
+ "reference_source|input_bam": {
+ "id": 11,
+ "output_name": "outFile"
+ }
+ },
+ "inputs": [],
+ "label": null,
+ "name": "BamLeftAlign",
+ "outputs": [
+ {
+ "name": "output_bam",
+ "type": "bam"
+ }
+ ],
+ "position": {
+ "left": 1125.5,
+ "top": 301.5
+ },
+ "post_job_actions": {},
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/freebayes/bamleftalign/1.1.0.46-0",
+ "tool_shed_repository": {
+ "changeset_revision": "156b60c1530f",
+ "name": "freebayes",
+ "owner": "devteam",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"__page__\": null, \"__rerun_remap_job_id__\": null, \"iterations\": \"5\", \"reference_source\": {\"ref_file\": \"hg38\", \"reference_source_selector\": \"cached\", \"input_bam\": {\"__class__\": \"ConnectedValue\"}, \"__current_case__\": 0}, \"chromInfo\": \"/cvmfs/data.galaxyproject.org/managed/len/ucsc/hg38.len\", \"__input_ext\": \"bam\"}",
+ "tool_version": "1.1.0.46-0",
+ "type": "tool",
+ "uuid": "f5f18be9-bd62-41c6-8af4-fb38e93f3858",
+ "workflow_outputs": [
+ {
+ "label": null,
+ "output_name": "output_bam",
+ "uuid": "5a4be808-df52-4766-a1d0-bf95c02f37d7"
+ }
+ ]
+ },
+ "13": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/devteam/bamtools_filter/bamFilter/2.4.1",
+ "errors": null,
+ "id": 13,
+ "input_connections": {
+ "input_bam": {
+ "id": 12,
+ "output_name": "output_bam"
+ }
+ },
+ "inputs": [],
+ "label": null,
+ "name": "Filter",
+ "outputs": [
+ {
+ "name": "out_file2",
+ "type": "txt"
+ },
+ {
+ "name": "out_file1",
+ "type": "bam"
+ }
+ ],
+ "position": {
+ "left": 1371.5,
+ "top": 301.5
+ },
+ "post_job_actions": {
+ "HideDatasetActionout_file2": {
+ "action_arguments": {},
+ "action_type": "HideDatasetAction",
+ "output_name": "out_file2"
+ }
+ },
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/bamtools_filter/bamFilter/2.4.1",
+ "tool_shed_repository": {
+ "changeset_revision": "bd735cae4ce6",
+ "name": "bamtools_filter",
+ "owner": "devteam",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"__page__\": null, \"chromInfo\": \"/cvmfs/data.galaxyproject.org/managed/len/ucsc/hg38.len\", \"__rerun_remap_job_id__\": null, \"conditions\": [{\"__index__\": 0, \"filters\": [{\"__index__\": 0, \"bam_property\": {\"bam_property_selector\": \"mapQuality\", \"bam_property_value\": \">=20\", \"__current_case__\": 14}}, {\"__index__\": 1, \"bam_property\": {\"bam_property_selector\": \"isPaired\", \"bam_property_value\": \"true\", \"__current_case__\": 9}}, {\"__index__\": 2, \"bam_property\": {\"bam_property_selector\": \"isProperPair\", \"bam_property_value\": \"true\", \"__current_case__\": 11}}, {\"__index__\": 3, \"bam_property\": {\"bam_property_selector\": \"reference\", \"bam_property_value\": \"chrM\", \"__current_case__\": 20}}]}], \"rule_configuration\": {\"rules_selector\": \"false\", \"__current_case__\": 0}, \"input_bam\": {\"__class__\": \"ConnectedValue\"}, \"__input_ext\": \"bam\"}",
+ "tool_version": "2.4.1",
+ "type": "tool",
+ "uuid": "3daf6f14-257f-4f1d-b07e-a636c672b685",
+ "workflow_outputs": [
+ {
+ "label": null,
+ "output_name": "out_file1",
+ "uuid": "93440503-62dd-4e94-9751-14b32c2cc7e8"
+ }
+ ]
+ },
+ "14": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/devteam/freebayes/freebayes/1.1.0.46-0",
+ "errors": null,
+ "id": 14,
+ "input_connections": {
+ "reference_source|batchmode|input_bams": {
+ "id": 13,
+ "output_name": "out_file1"
+ }
+ },
+ "inputs": [],
+ "label": null,
+ "name": "FreeBayes",
+ "outputs": [
+ {
+ "name": "output_vcf",
+ "type": "vcf"
+ }
+ ],
+ "position": {
+ "left": 1650.5,
+ "top": 291.5
+ },
+ "post_job_actions": {},
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/freebayes/freebayes/1.1.0.46-0",
+ "tool_shed_repository": {
+ "changeset_revision": "156b60c1530f",
+ "name": "freebayes",
+ "owner": "devteam",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"__page__\": null, \"__rerun_remap_job_id__\": null, \"options_type\": {\"options_type_selector\": \"full\", \"allele_scope\": {\"no_partial_observations\": \"false\", \"I\": \"false\", \"n\": \"0\", \"min_repeat_length\": \"5\", \"i\": \"false\", \"min_repeat_entropy\": \"0\", \"u\": \"true\", \"__current_case__\": 0, \"X\": \"true\", \"haplotype_length\": \"3\", \"allele_scope_selector\": \"set\"}, \"reporting\": {\"reporting_selector\": \"do_not_set\", \"__current_case__\": 1}, \"genotype_likelihoods\": {\"__current_case__\": 1, \"genotype_likelihoods_selector\": \"do_not_set\"}, \"O\": \"false\", \"population_model\": {\"K\": \"true\", \"J\": \"true\", \"P\": \"1\", \"T\": \"0.001\", \"__current_case__\": 0, \"population_model_selector\": \"set\"}, \"reference_allele\": {\"reference_allele_selector\": \"do_not_set\", \"__current_case__\": 1}, \"optional_inputs\": {\"optional_inputs_selector\": \"do_not_set\", \"__current_case__\": 1}, \"__current_case__\": 0, \"population_mappability_priors\": {\"__current_case__\": 1, \"population_mappability_priors_selector\": \"do_not_set\"}, \"input_filters\": {\"min_coverage\": \"0\", \"C\": \"2\", \"e\": \"1000\", \"__current_case__\": 0, \"G\": \"1\", \"F\": \"0.2\", \"standard_filters\": \"false\", \"m\": \"20\", \"q\": \"30\", \"mismatch_filters\": {\"mismatch_filters_selector\": \"do_not_set\", \"__current_case__\": 1}, \"R\": \"0\", \"use_duplicate_reads\": \"false\", \"Y\": \"0\", \"input_filters_selector\": \"set\", \"min_alternate_qsum\": \"0\"}, \"algorithmic_features\": {\"algorithmic_features_selector\": \"do_not_set\", \"__current_case__\": 1}}, \"reference_source\": {\"batchmode\": {\"input_bams\": {\"__class__\": \"ConnectedValue\"}, \"processmode\": \"individual\", \"__current_case__\": 0}, \"ref_file\": \"hg38\", \"reference_source_selector\": \"cached\", \"__current_case__\": 0}, \"chromInfo\": \"/cvmfs/data.galaxyproject.org/managed/len/ucsc/hg38.len\", \"__input_ext\": \"bam\", \"target_limit_type\": {\"target_limit_type_selector\": \"limit_by_region\", \"region_end\": \"16000\", \"region_chromosome\": \"chrM\", \"region_start\": \"1\", \"__current_case__\": 2}}",
+ "tool_version": "1.1.0.46-0",
+ "type": "tool",
+ "uuid": "729b01c1-b981-4aa7-ba0d-9a7f14121603",
+ "workflow_outputs": [
+ {
+ "label": null,
+ "output_name": "output_vcf",
+ "uuid": "75edfff2-93ba-4ebb-a2db-ad32111cf78a"
+ }
+ ]
+ },
+ "15": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/devteam/vcffilter/vcffilter2/1.0.0_rc1+galaxy3",
+ "errors": null,
+ "id": 15,
+ "input_connections": {
+ "input1": {
+ "id": 14,
+ "output_name": "output_vcf"
+ }
+ },
+ "inputs": [],
+ "label": null,
+ "name": "VCFfilter:",
+ "outputs": [
+ {
+ "name": "out_file1",
+ "type": "vcf"
+ }
+ ],
+ "position": {
+ "left": 1920.796875,
+ "top": 294.734375
+ },
+ "post_job_actions": {},
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/vcffilter/vcffilter2/1.0.0_rc1+galaxy3",
+ "tool_shed_repository": {
+ "changeset_revision": "81972652519c",
+ "name": "vcffilter",
+ "owner": "devteam",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"append_filter\": \"false\", \"__page__\": null, \"filter_sites\": \"false\", \"allele_tag\": \"false\", \"region\": \"\", \"filter_repeat\": [{\"filter_value\": \"SRP > 20\", \"__index__\": 0, \"filter_type\": \"-f\"}, {\"filter_value\": \"SAP > 20\", \"__index__\": 1, \"filter_type\": \"-f\"}, {\"filter_value\": \"EPP > 20\", \"__index__\": 2, \"filter_type\": \"-f\"}, {\"filter_value\": \"QUAL > 30\", \"__index__\": 3, \"filter_type\": \"-f\"}, {\"filter_value\": \"DP > 100\", \"__index__\": 4, \"filter_type\": \"-f\"}], \"tag_pass\": \"false\", \"input1\": {\"__class__\": \"ConnectedValue\"}, \"__rerun_remap_job_id__\": null, \"invert\": \"false\", \"or\": \"false\", \"tag_fail\": \"false\"}",
+ "tool_version": "1.0.0_rc1+galaxy3",
+ "type": "tool",
+ "uuid": "e3afc76b-2480-4983-a0af-5c4d1793a597",
+ "workflow_outputs": [
+ {
+ "label": null,
+ "output_name": "out_file1",
+ "uuid": "2cbaab24-e113-417d-8000-d86f58056b1e"
+ }
+ ]
+ },
+ "16": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/devteam/vcf2tsv/vcf2tsv/1.0.0_rc1+galaxy0",
+ "errors": null,
+ "id": 16,
+ "input_connections": {
+ "input": {
+ "id": 15,
+ "output_name": "out_file1"
+ }
+ },
+ "inputs": [],
+ "label": null,
+ "name": "VCFtoTab-delimited:",
+ "outputs": [
+ {
+ "name": "out_file1",
+ "type": "tabular"
+ }
+ ],
+ "position": {
+ "left": 2120,
+ "top": 292.5
+ },
+ "post_job_actions": {},
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/vcf2tsv/vcf2tsv/1.0.0_rc1+galaxy0",
+ "tool_shed_repository": {
+ "changeset_revision": "e92b3c0f9224",
+ "name": "vcf2tsv",
+ "owner": "devteam",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"__page__\": null, \"g_option\": \"true\", \"__rerun_remap_job_id__\": null, \"null_filler\": \"\", \"input\": {\"__class__\": \"ConnectedValue\"}}",
+ "tool_version": "1.0.0_rc1+galaxy0",
+ "type": "tool",
+ "uuid": "fead8662-0459-431b-b525-fafe5f6ec51e",
+ "workflow_outputs": [
+ {
+ "label": null,
+ "output_name": "out_file1",
+ "uuid": "df78f5bb-ef20-46a6-aa2b-bb163d9a877c"
+ }
+ ]
+ },
+ "17": {
+ "annotation": "",
+ "content_id": "Cut1",
+ "errors": null,
+ "id": 17,
+ "input_connections": {
+ "input": {
+ "id": 16,
+ "output_name": "out_file1"
+ }
+ },
+ "inputs": [],
+ "label": null,
+ "name": "Cut",
+ "outputs": [
+ {
+ "name": "out_file1",
+ "type": "tabular"
+ }
+ ],
+ "position": {
+ "left": 2354,
+ "top": 301.5
+ },
+ "post_job_actions": {},
+ "tool_id": "Cut1",
+ "tool_state": "{\"columnList\": \"c2,c4,c5,c52,c54,c55\", \"input\": {\"__class__\": \"ConnectedValue\"}, \"delimiter\": \"T\", \"__rerun_remap_job_id__\": null, \"__page__\": null}",
+ "tool_version": "1.0.2",
+ "type": "tool",
+ "uuid": "9e0ffe7d-7016-49d0-be12-0d2c1f089c60",
+ "workflow_outputs": [
+ {
+ "label": null,
+ "output_name": "out_file1",
+ "uuid": "ef081c3d-223d-4091-8910-7491ffb52e8c"
+ }
+ ]
+ }
+ },
+ "tags": [
+ "variant-analysis"
+ ],
+ "uuid": "041a2dcb-f735-438e-8fa4-3f38cd0145ee",
+ "version": 9
+}
diff --git a/topics/variant-analysis/tutorials/non-dip/workflows/index.md b/topics/variant-analysis/tutorials/non-dip/workflows/index.md
new file mode 100644
index 00000000000000..e092e0ae66ddd4
--- /dev/null
+++ b/topics/variant-analysis/tutorials/non-dip/workflows/index.md
@@ -0,0 +1,3 @@
+---
+layout: workflow-list
+---
diff --git a/topics/visualisation/metadata.yaml b/topics/visualisation/metadata.yaml
index 79a777574f5a91..75452ce97e0725 100644
--- a/topics/visualisation/metadata.yaml
+++ b/topics/visualisation/metadata.yaml
@@ -1,4 +1,4 @@
----
+---
name: "visualisation"
type: "use"
title: "Visualisation"