diff --git a/.nf-core.yml b/.nf-core.yml index 9217204b..d2b352c0 100644 --- a/.nf-core.yml +++ b/.nf-core.yml @@ -1,7 +1,7 @@ lint: files_exist: - conf/igenomes.config - - conf/igenomes_ignored.config + - conf/igenomes_ignored.config - .github/PULL_REQUEST_TEMPLATE.md files_unchanged: - .github/CONTRIBUTING.md diff --git a/README.md b/README.md index 892dab54..94ea2265 100644 --- a/README.md +++ b/README.md @@ -73,10 +73,9 @@ nextflow run nf-core/multiplesequencealign \ ## How to set up an easy run: > [!NOTE] ->We have a lot more of use cases examples under [FAQs]("https://nf-co.re/multiplesequencealign/usage/FAQs) +> We have a lot more of use cases examples under [FAQs]("https://nf-co.re/multiplesequencealign/usage/FAQs) > Find some example input data [here](https://github.com/nf-core/test-datasets/tree/multiplesequencealign) - ### CASE 1: One input dataset, one tool. If you only have one dataset and want align it using one specific MSA tool (e.g. FAMSA or FOLDMASON): diff --git a/docs/usage.md b/docs/usage.md index d43a4d37..6ff4d629 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -31,7 +31,6 @@ work # Directory containing the nextflow working files We have a lot of use cases examples under [FAQs]("https://nf-co.re/multiplesequencealign/usage/FAQs) ::: - ## Samplesheet input The sample sheet defines the **input data** that the pipeline will process. @@ -118,28 +117,25 @@ Currently available GUIDE TREE methods are: (Optional): - [FAMSA](https://github.com/refresh-bio/FAMSA) - [MAFFT](https://mafft.cbrc.jp/alignment/server/index.html) - -Here some specific Guide Tree settings: -Use the values in columns `tree` and `args_tree`. The rest of the columns are just explainatory here. - -| tree | args_Tree | Distance Measure | Core Algorithm | Speed-up Heuristic | -|-----------|-------------------------|------------------------------------------|--------------------------------------------|--------------------| -| MAFFT | | k-mer-based | UPGMA + single linkage combined | | -| MAFFT | --minimumlinkage | k-mer-based | single linkage | | -| MAFFT | --averagelinkage | k-mer-based | UPGMA | | -| MAFFT | --parttree | k-mer-based | single linkage + UPGMA combined | PartTree | -| MAFFT | --dpparttree | dynamic programming alignment-based | single linkage + UPGMA combined | PartTree | -| MAFFT | --fastaparttree | FASTA alignment-based | single linkage + UPGMA combined | PartTree | -| CLUSTALO | | sequence embedding + approx. alignment | UPGMA | bisecting K-means | -| FAMSA | | longest common subsequence-based | single linkage | | -| FAMSA | -gt upgma | longest common subsequence-based | UPGMA | | -| FAMSA | -gt nj | longest common subsequence-based | neighbour joining | | -| FAMSA | -parttree | longest common subsequence-based | single linkage | PartTree | -| FAMSA | -gt upgma -parttree | longest common subsequence-based | UPGMA | PartTree | -| FAMSA | -medoidtree | longest common subsequence-based | single linkage | MedoidTree | -| FAMSA | -gt upgma -medoidtree | longest common subsequence-based | UPGMA | MedoidTree | - - +Here some specific Guide Tree settings: +Use the values in columns `tree` and `args_tree`. The rest of the columns are just explainatory here. + +| tree | args_Tree | Distance Measure | Core Algorithm | Speed-up Heuristic | +| -------- | --------------------- | -------------------------------------- | ------------------------------- | ------------------ | +| MAFFT | | k-mer-based | UPGMA + single linkage combined | | +| MAFFT | --minimumlinkage | k-mer-based | single linkage | | +| MAFFT | --averagelinkage | k-mer-based | UPGMA | | +| MAFFT | --parttree | k-mer-based | single linkage + UPGMA combined | PartTree | +| MAFFT | --dpparttree | dynamic programming alignment-based | single linkage + UPGMA combined | PartTree | +| MAFFT | --fastaparttree | FASTA alignment-based | single linkage + UPGMA combined | PartTree | +| CLUSTALO | | sequence embedding + approx. alignment | UPGMA | bisecting K-means | +| FAMSA | | longest common subsequence-based | single linkage | | +| FAMSA | -gt upgma | longest common subsequence-based | UPGMA | | +| FAMSA | -gt nj | longest common subsequence-based | neighbour joining | | +| FAMSA | -parttree | longest common subsequence-based | single linkage | PartTree | +| FAMSA | -gt upgma -parttree | longest common subsequence-based | UPGMA | PartTree | +| FAMSA | -medoidtree | longest common subsequence-based | single linkage | MedoidTree | +| FAMSA | -gt upgma -medoidtree | longest common subsequence-based | UPGMA | MedoidTree | ## 3. Align @@ -150,7 +146,7 @@ The available assembly methods are listed below (those that accept guide trees i - [CLUSTALO](http://clustal.org/omega/#Documentation) (accepts guide tree) - [FAMSA](https://github.com/refresh-bio/FAMSA) (accepts guide tree) - [KALIGN](https://github.com/TimoLassmann/kalign) -- [LEARNMSA](https://github.com/Gaius-Augustus/learnMSA) *Read note below +- [LEARNMSA](https://github.com/Gaius-Augustus/learnMSA) \*Read note below - [MAFFT](https://mafft.cbrc.jp/alignment/server/index.html) - [MAGUS](https://github.com/vlasmirnov/MAGUS) (accepts guide tree) - [MUSCLE5](https://drive5.com/muscle5/manual/) @@ -158,9 +154,8 @@ The available assembly methods are listed below (those that accept guide trees i - [REGRESSIVE](https://tcoffee.readthedocs.io/en/latest/tcoffee_quickstart_regressive.html) (accepts guide tree) - [UPP](https://github.com/smirarab/sepp) (accepts guide tree) - > [!NOTE] -> LearnMSA can (and should) run on GPUs. If you have GPUs available please turn the GPU run mode on using `--use_gpu`. You might have to update you configuration file if you are running on a cluster with custom queue names. Check the [CRG](https://github.com/nf-core/configs/blob/master/conf/pipeline/multiplesequencealign/crg.config) one to see an example. +> LearnMSA can (and should) run on GPUs. If you have GPUs available please turn the GPU run mode on using `--use_gpu`. You might have to update you configuration file if you are running on a cluster with custom queue names. Check the [CRG](https://github.com/nf-core/configs/blob/master/conf/pipeline/multiplesequencealign/crg.config) one to see an example. **sequence- and structure-based** (require both fasta and structures as input): @@ -231,7 +226,6 @@ outdir: './results/' You can also generate such `YAML`/`JSON` files via [nf-core/launch](https://nf-co.re/launch). - ### Updating the pipeline When you run the above command, Nextflow automatically pulls the pipeline code from GitHub and stores it as a cached version. When running the pipeline after this, it will always use the cached version if available - even if the pipeline has been updated since. To make sure that you're running the latest version of the pipeline, make sure that you regularly update the cached version of the pipeline: diff --git a/docs/usage/FAQs.md b/docs/usage/FAQs.md index f9a559a2..70239bbb 100644 --- a/docs/usage/FAQs.md +++ b/docs/usage/FAQs.md @@ -2,24 +2,21 @@ ## TODO: replace main.nf with nf-core/multiplesequencealign and test.fa with <> AND ADD LINK - -### INPUTS +### INPUTS ### USECASES -
Where can I find some example input data? Find some example input data here
-
I want to deploy one tool on one dataset. I am not interested in any evaluation, report etc. You should use the easy_deploy profile! - This will skip all the evaluation, reporting etc. step and keep the deployment to the minimum. + This will skip all the evaluation, reporting etc. step and keep the deployment to the minimum. The following example: running FAMSA (with arguments -refine_mode on) using the guidetree built using CLUSTALO. @@ -50,27 +47,24 @@ --outdir results You can leave the --tree and --args_aligner and --args_tree empty (just do not use the flags). Default values will be used. - Foldmason is just an example, you can pick any other structural aligner. + Foldmason is just an example, you can pick any other structural aligner.
-
One dataset, multiple tools. - You should use the toolsheet to specify the tools use. + You should use the toolsheet to specify the tools use. nextflow run main.nf &\ -profile easy_deploy,docker \ --seqs \ --tools \ - --outdir results + --outdir results - Your input dataset can be passed via the --seqs or --pdbs_dir, as explained in the examples above. +Your input dataset can be passed via the --seqs or --pdbs_dir, as explained in the examples above.
- -
Can i run the same tool multiple times with different arguments? @@ -78,25 +72,23 @@
-
Can i run a structural evaluation on sequence-based aligners? - Yes, as long as you provide the structures, either via the samplesheet or via the --pdbs_dir flag. + Yes, as long as you provide the structures, either via the samplesheet or via the --pdbs_dir flag. - You can also run proteinfold before to get your structures, in case you do not have them already. - Here instructions on how to do it. + You can also run proteinfold before to get your structures, in case you do not have them already. + Here instructions on how to do it. # ADD LINK -
- +
What happens if I have the only PDBs and not the corresponding fasta files? - No problem, you can provide the PDBs as input (either via the samplesheet using the optional_data column or via the flag --pdbs_dir). - - The flag --skip_pdbcoversion false will make sure that the fasta file is automatically extracted from the provided PDBs and subsequently used in the pipeline. + No problem, you can provide the PDBs as input (either via the samplesheet using the optional_data column or via the flag --pdbs_dir). + + The flag --skip_pdbcoversion false will make sure that the fasta file is automatically extracted from the provided PDBs and subsequently used in the pipeline. nextflow run main.nf &\ -profile easy_deploy,docker \ @@ -105,5 +97,5 @@ --tree CLUSTALO \ --outdir results \ --skip_pdbconversion false -
+ diff --git a/modules.json b/modules.json index df609d87..b22cfff8 100644 --- a/modules.json +++ b/modules.json @@ -8,223 +8,161 @@ "clustalo/align": { "branch": "master", "git_sha": "7b32b09fe7787c0fc6924e7b6f223a0b1daf0d2f", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "clustalo/guidetree": { "branch": "master", "git_sha": "dc94b6ee04a05ddb9f7ae050712ff30a13149164", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "csvtk/concat": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "csvtk/join": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": [ - "modules" - ], + "installed_by": ["modules"], "patch": "modules/nf-core/csvtk/join/csvtk-join.diff" }, "famsa/align": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "famsa/guidetree": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "fastavalidator": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "fastqc": { "branch": "master", "git_sha": "dc94b6ee04a05ddb9f7ae050712ff30a13149164", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "foldmason/createdb": { "branch": "master", "git_sha": "0270c0fbbbb09456d7823605e4285c4a2c5bbf40", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "foldmason/easymsa": { "branch": "master", "git_sha": "0270c0fbbbb09456d7823605e4285c4a2c5bbf40", - "installed_by": [ - "modules" - ], + "installed_by": ["modules"], "patch": "modules/nf-core/foldmason/easymsa/foldmason-easymsa.diff" }, "foldmason/msa2lddtreport": { "branch": "master", "git_sha": "d3555a4a33ae94269b65f79f7066ac2fcb836005", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "kalign/align": { "branch": "master", "git_sha": "cadb9bbfe56001ac421e0ee87808b0ccc754593a", - "installed_by": [ - "modules" - ], + "installed_by": ["modules"], "patch": "modules/nf-core/kalign/align/kalign-align.diff" }, "learnmsa/align": { "branch": "master", "git_sha": "ad8452951809634bc5215390cb8ab0c0faaaf519", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "mafft/align": { "branch": "master", "git_sha": "868cb0d7fc4862991fb7c2b4cd7289806cd53f81", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "mafft/guidetree": { "branch": "master", "git_sha": "968b494e20f439a9ed3d23c89274e6a4a625eb92", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "magus/align": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "mtmalign/align": { "branch": "master", "git_sha": "4eecd9a0c06fa508ae314c06ac952c161c019679", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "multiqc": { "branch": "master", "git_sha": "cf17ca47590cc578dfb47db1c2a44ef86f89976d", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "muscle5/super5": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "pigz/compress": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "pigz/uncompress": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "tcoffee/align": { "branch": "master", "git_sha": "3e548877f25a5980a177cc4f81d2d2e8c24164ef", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "tcoffee/alncompare": { "branch": "master", "git_sha": "ffa000ab3c33df25a165b5f9a039c4cbb665a77b", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "tcoffee/consensus": { "branch": "master", "git_sha": "023e51187884ea6cc7290767486f551565f1b77a", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "tcoffee/extractfrompdb": { "branch": "master", "git_sha": "3e548877f25a5980a177cc4f81d2d2e8c24164ef", - "installed_by": [ - "modules" - ], + "installed_by": ["modules"], "patch": "modules/nf-core/tcoffee/extractfrompdb/tcoffee-extractfrompdb.diff" }, "tcoffee/irmsd": { "branch": "master", "git_sha": "3e548877f25a5980a177cc4f81d2d2e8c24164ef", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "tcoffee/regressive": { "branch": "master", "git_sha": "66b22564bc1bc0db7292f2073cdef954ead773e7", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "tcoffee/seqreformat": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "tcoffee/tcs": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "untar": { "branch": "master", "git_sha": "3e548877f25a5980a177cc4f81d2d2e8c24164ef", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "upp/align": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] } } }, @@ -233,26 +171,20 @@ "utils_nextflow_pipeline": { "branch": "master", "git_sha": "c2b22d85f30a706a3073387f30380704fcae013b", - "installed_by": [ - "subworkflows" - ] + "installed_by": ["subworkflows"] }, "utils_nfcore_pipeline": { "branch": "master", "git_sha": "51ae5406a030d4da1e49e4dab49756844fdd6c7a", - "installed_by": [ - "subworkflows" - ] + "installed_by": ["subworkflows"] }, "utils_nfschema_plugin": { "branch": "master", "git_sha": "2fd2cd6d0e7b273747f32e465fdc6bcc3ae0814e", - "installed_by": [ - "subworkflows" - ] + "installed_by": ["subworkflows"] } } } } } -} \ No newline at end of file +}