diff --git a/assets/samplesheet.csv b/assets/samplesheet.csv index b761e295..cc478c4c 100644 --- a/assets/samplesheet.csv +++ b/assets/samplesheet.csv @@ -1,3 +1,2 @@ id,fasta,reference,optional_data -seatoxin-ref,https://raw.githubusercontent.com/nf-core/test-datasets/multiplesequencealign/testdata/setoxin-ref.fa,https://raw.githubusercontent.com/nf-core/test-datasets/multiplesequencealign/testdata/setoxin.ref,https://raw.githubusercontent.com/nf-core/test-datasets/multiplesequencealign/testdata/structures/seatoxin-ref.tar.gz -toxin-ref,https://raw.githubusercontent.com/nf-core/test-datasets/multiplesequencealign/testdata/toxin-ref.fa,https://raw.githubusercontent.com/nf-core/test-datasets/multiplesequencealign/testdata/toxin.ref, +seatoxin-ref,https://raw.githubusercontent.com/nf-core/test-datasets/multiplesequencealign/testdata/setoxin-ref.fa,https://raw.githubusercontent.com/nf-core/test-datasets/multiplesequencealign/testdata/setoxin.ref,https://raw.githubusercontent.com/nf-core/test-datasets/multiplesequencealign/testdata/structures/seatoxin-ref.tar.gz \ No newline at end of file diff --git a/assets/toolsheet.csv b/assets/toolsheet.csv index 7a3377ff..04e0e679 100644 --- a/assets/toolsheet.csv +++ b/assets/toolsheet.csv @@ -1,3 +1,2 @@ tree,args_tree,aligner,args_aligner -,,FOLDMASON, FAMSA,,FAMSA, diff --git a/cleaned_trace.csv b/cleaned_trace.csv new file mode 100644 index 00000000..045a600e --- /dev/null +++ b/cleaned_trace.csv @@ -0,0 +1 @@ +[[task_id:8, hash:25/a00b09, native_id:3120660, name:NFCORE_MULTIPLESEQUENCEALIGN:MULTIPLESEQUENCEALIGN:ALIGN:COMPUTE_TREES:CLUSTALO_GUIDETREE (seatoxin-ref), status:COMPLETED, exit:0, realtime:0ms, %cpu:39.2%, rss:3.1 MB, peak_rss:3.1 MB, vmem:5.4 MB, peak_vmem:5.4 MB, rchar:68.4 KB, wchar:832 B, cpus:2, start:2024-12-13 14:51:37.471, tag:seatoxin-ref, id:seatoxin-ref, args:null, full_name:NFCORE_MULTIPLESEQUENCEALIGN:MULTIPLESEQUENCEALIGN:ALIGN:COMPUTE_TREES:CLUSTALO_GUIDETREE, process:CLUSTALO_GUIDETREE, subworkflow:COMPUTE_TREES], [task_id:10, hash:fb/43b990, native_id:3120996, name:NFCORE_MULTIPLESEQUENCEALIGN:MULTIPLESEQUENCEALIGN:ALIGN:COMPUTE_TREES:CLUSTALO_GUIDETREE (toxin-ref), status:COMPLETED, exit:0, realtime:0ms, %cpu:97.0%, rss:3 MB, peak_rss:3 MB, vmem:5.4 MB, peak_vmem:5.4 MB, rchar:70.7 KB, wchar:2.7 KB, cpus:2, start:2024-12-13 14:51:39.049, tag:toxin-ref, id:toxin-ref, args:null, full_name:NFCORE_MULTIPLESEQUENCEALIGN:MULTIPLESEQUENCEALIGN:ALIGN:COMPUTE_TREES:CLUSTALO_GUIDETREE, process:CLUSTALO_GUIDETREE, subworkflow:COMPUTE_TREES], [task_id:12, hash:c0/beddb1, native_id:3121216, name:NFCORE_MULTIPLESEQUENCEALIGN:MULTIPLESEQUENCEALIGN:ALIGN:COMPUTE_TREES:FAMSA_GUIDETREE (seatoxin-ref args: -gt upgma -medoidtree), status:COMPLETED, exit:0, realtime:0ms, %cpu:41.6%, rss:2.9 MB, peak_rss:2.9 MB, vmem:5.4 MB, peak_vmem:5.4 MB, rchar:77.7 KB, wchar:3.5 KB, cpus:2, start:2024-12-13 14:51:40.326, tag:seatoxin-ref args: -gt upgma -medoidtree, id:seatoxin-ref, args:-gt upgma -medoidtree, full_name:NFCORE_MULTIPLESEQUENCEALIGN:MULTIPLESEQUENCEALIGN:ALIGN:COMPUTE_TREES:FAMSA_GUIDETREE, process:FAMSA_GUIDETREE, subworkflow:COMPUTE_TREES], [task_id:15, hash:cd/87115e, native_id:3121867, name:NFCORE_MULTIPLESEQUENCEALIGN:MULTIPLESEQUENCEALIGN:ALIGN:COMPUTE_TREES:FAMSA_GUIDETREE (toxin-ref), status:COMPLETED, exit:0, realtime:0ms, %cpu:46.4%, rss:3 MB, peak_rss:3 MB, vmem:5.4 MB, peak_vmem:5.4 MB, rchar:77 KB, wchar:3.7 KB, cpus:2, start:2024-12-13 14:51:42.670, tag:toxin-ref, id:toxin-ref, args:null, full_name:NFCORE_MULTIPLESEQUENCEALIGN:MULTIPLESEQUENCEALIGN:ALIGN:COMPUTE_TREES:FAMSA_GUIDETREE, process:FAMSA_GUIDETREE, subworkflow:COMPUTE_TREES], [task_id:14, hash:2b/424e9a, native_id:3122153, name:NFCORE_MULTIPLESEQUENCEALIGN:MULTIPLESEQUENCEALIGN:ALIGN:COMPUTE_TREES:FAMSA_GUIDETREE (toxin-ref args: -gt upgma -medoidtree), status:COMPLETED, exit:0, realtime:0ms, %cpu:49.2%, rss:3 MB, peak_rss:3 MB, vmem:5.4 MB, peak_vmem:5.4 MB, rchar:78.8 KB, wchar:3.7 KB, cpus:2, start:2024-12-13 14:51:45.916, tag:toxin-ref args: -gt upgma -medoidtree, id:toxin-ref, args:-gt upgma -medoidtree, full_name:NFCORE_MULTIPLESEQUENCEALIGN:MULTIPLESEQUENCEALIGN:ALIGN:COMPUTE_TREES:FAMSA_GUIDETREE, process:FAMSA_GUIDETREE, subworkflow:COMPUTE_TREES], [task_id:13, hash:64/99e6a0, native_id:3122770, name:NFCORE_MULTIPLESEQUENCEALIGN:MULTIPLESEQUENCEALIGN:ALIGN:COMPUTE_TREES:FAMSA_GUIDETREE (seatoxin-ref), status:COMPLETED, exit:0, realtime:0ms, %cpu:82.4%, rss:3 MB, peak_rss:3 MB, vmem:5.4 MB, peak_vmem:5.4 MB, rchar:77.7 KB, wchar:3.5 KB, cpus:2, start:2024-12-13 14:51:49.370, tag:seatoxin-ref, id:seatoxin-ref, args:null, full_name:NFCORE_MULTIPLESEQUENCEALIGN:MULTIPLESEQUENCEALIGN:ALIGN:COMPUTE_TREES:FAMSA_GUIDETREE, process:FAMSA_GUIDETREE, subworkflow:COMPUTE_TREES], [task_id:11, hash:4e/56256c, native_id:3123495, name:NFCORE_MULTIPLESEQUENCEALIGN:MULTIPLESEQUENCEALIGN:ALIGN:COMPUTE_TREES:MAFFT_GUIDETREE (toxin-ref), status:COMPLETED, exit:0, realtime:1s, %cpu:61.0%, rss:6.2 MB, peak_rss:6.2 MB, vmem:8.7 MB, peak_vmem:8.7 MB, rchar:1010.5 KB, wchar:9.7 KB, cpus:2, start:2024-12-13 14:51:53.106, tag:toxin-ref, id:toxin-ref, args:null, full_name:NFCORE_MULTIPLESEQUENCEALIGN:MULTIPLESEQUENCEALIGN:ALIGN:COMPUTE_TREES:MAFFT_GUIDETREE, process:MAFFT_GUIDETREE, subworkflow:COMPUTE_TREES], [task_id:9, hash:bb/32bbe3, native_id:3123438, name:NFCORE_MULTIPLESEQUENCEALIGN:MULTIPLESEQUENCEALIGN:ALIGN:COMPUTE_TREES:MAFFT_GUIDETREE (seatoxin-ref), status:COMPLETED, exit:0, realtime:1s, %cpu:70.7%, rss:7.4 MB, peak_rss:7.4 MB, vmem:9.6 MB, peak_vmem:9.6 MB, rchar:998.5 KB, wchar:4 KB, cpus:2, start:2024-12-13 14:51:52.950, tag:seatoxin-ref, id:seatoxin-ref, args:null, full_name:NFCORE_MULTIPLESEQUENCEALIGN:MULTIPLESEQUENCEALIGN:ALIGN:COMPUTE_TREES:MAFFT_GUIDETREE, process:MAFFT_GUIDETREE, subworkflow:COMPUTE_TREES], [task_id:16, hash:02/a68687, native_id:3123916, name:NFCORE_MULTIPLESEQUENCEALIGN:MULTIPLESEQUENCEALIGN:ALIGN:TCOFFEE_REGRESSIVE (seatoxin-ref tree: CLUSTALO args: -reg -reg_method famsa_msa -reg_nseq 1000 -output fasta_aln), status:COMPLETED, exit:0, realtime:1s, %cpu:132.0%, rss:19.7 MB, peak_rss:19.7 MB, vmem:25 MB, peak_vmem:25 MB, rchar:739.6 KB, wchar:18 KB, cpus:2, start:2024-12-13 14:51:56.435, tag:seatoxin-ref tree: CLUSTALO args: -reg -reg_method famsa_msa -reg_nseq 1000 -output fasta_aln, id:seatoxin-ref, args:-reg -reg_method famsa_msa -reg_nseq 1000 -output fasta_aln, full_name:NFCORE_MULTIPLESEQUENCEALIGN:MULTIPLESEQUENCEALIGN:ALIGN:TCOFFEE_REGRESSIVE, process:TCOFFEE_REGRESSIVE, subworkflow:ALIGN], [task_id:20, hash:e2/ae4d65, native_id:3124382, name:NFCORE_MULTIPLESEQUENCEALIGN:MULTIPLESEQUENCEALIGN:ALIGN:FAMSA_ALIGN (seatoxin-ref tree: FAMSA argstree: -gt upgma -medoidtree), status:COMPLETED, exit:0, realtime:0ms, %cpu:35.1%, rss:3.1 MB, peak_rss:3.1 MB, vmem:5.4 MB, peak_vmem:5.4 MB, rchar:77.8 KB, wchar:3.6 KB, cpus:2, start:2024-12-13 14:51:58.528, tag:seatoxin-ref tree: FAMSA argstree: -gt upgma -medoidtree, id:seatoxin-ref, args:null, full_name:NFCORE_MULTIPLESEQUENCEALIGN:MULTIPLESEQUENCEALIGN:ALIGN:FAMSA_ALIGN, process:FAMSA_ALIGN, subworkflow:ALIGN], [task_id:29, hash:ec/261911, native_id:3124594, name:NFCORE_MULTIPLESEQUENCEALIGN:MULTIPLESEQUENCEALIGN:ALIGN:FAMSA_ALIGN (toxin-ref tree: FAMSA argstree: -gt upgma -medoidtree), status:COMPLETED, exit:0, realtime:0ms, %cpu:38.8%, rss:3 MB, peak_rss:3 MB, vmem:5.4 MB, peak_vmem:5.4 MB, rchar:79.1 KB, wchar:5.1 KB, cpus:2, start:2024-12-13 14:52:00.125, tag:toxin-ref tree: FAMSA argstree: -gt upgma -medoidtree, id:toxin-ref, args:null, full_name:NFCORE_MULTIPLESEQUENCEALIGN:MULTIPLESEQUENCEALIGN:ALIGN:FAMSA_ALIGN, process:FAMSA_ALIGN, subworkflow:ALIGN], [task_id:19, hash:f0/237036, native_id:3123953, name:NFCORE_MULTIPLESEQUENCEALIGN:MULTIPLESEQUENCEALIGN:ALIGN:TCOFFEE_REGRESSIVE (toxin-ref tree: CLUSTALO args: -reg -reg_method famsa_msa -reg_nseq 1000 -output fasta_aln), status:COMPLETED, exit:0, realtime:5s, %cpu:236.5%, rss:28.6 MB, peak_rss:28.6 MB, vmem:541.4 MB, peak_vmem:604.7 MB, rchar:760.3 KB, wchar:35.3 KB, cpus:2, start:2024-12-13 14:51:56.534, tag:toxin-ref tree: CLUSTALO args: -reg -reg_method famsa_msa -reg_nseq 1000 -output fasta_aln, id:toxin-ref, args:-reg -reg_method famsa_msa -reg_nseq 1000 -output fasta_aln, full_name:NFCORE_MULTIPLESEQUENCEALIGN:MULTIPLESEQUENCEALIGN:ALIGN:TCOFFEE_REGRESSIVE, process:TCOFFEE_REGRESSIVE, subworkflow:ALIGN], [task_id:41, hash:b9/514cfe, native_id:3125807, name:NFCORE_MULTIPLESEQUENCEALIGN:MULTIPLESEQUENCEALIGN:ALIGN:FOLDMASON_EASYMSA (seatoxin-ref tree: MAFFT), status:COMPLETED, exit:0, realtime:466ms, %cpu:55.6%, rss:3.5 MB, peak_rss:3.5 MB, vmem:4.6 MB, peak_vmem:4.6 MB, rchar:478.6 KB, wchar:13.7 KB, cpus:2, start:2024-12-13 14:52:06.780, tag:seatoxin-ref tree: MAFFT, id:seatoxin-ref, args:null, full_name:NFCORE_MULTIPLESEQUENCEALIGN:MULTIPLESEQUENCEALIGN:ALIGN:FOLDMASON_EASYMSA, process:FOLDMASON_EASYMSA, subworkflow:ALIGN], [task_id:44, hash:8f/974d84, native_id:3126779, name:NFCORE_MULTIPLESEQUENCEALIGN:MULTIPLESEQUENCEALIGN:ALIGN:MTMALIGN_ALIGN (seatoxin-ref tree: DEFAULT), status:COMPLETED, exit:0, realtime:1s, %cpu:40.4%, rss:3 MB, peak_rss:3 MB, vmem:4.3 MB, peak_vmem:4.3 MB, rchar:7.4 MB, wchar:1.2 MB, cpus:2, start:2024-12-13 14:52:14.147, tag:seatoxin-ref tree: DEFAULT, id:seatoxin-ref, args:null, full_name:NFCORE_MULTIPLESEQUENCEALIGN:MULTIPLESEQUENCEALIGN:ALIGN:MTMALIGN_ALIGN, process:MTMALIGN_ALIGN, subworkflow:ALIGN], [task_id:27, hash:ae/306059, native_id:3127119, name:NFCORE_MULTIPLESEQUENCEALIGN:MULTIPLESEQUENCEALIGN:ALIGN:MAGUS_ALIGN (toxin-ref tree: FAMSA), status:COMPLETED, exit:0, realtime:2s, %cpu:151.5%, rss:41.5 MB, peak_rss:41.5 MB, vmem:340.4 MB, peak_vmem:404.3 MB, rchar:18 MB, wchar:566.5 KB, cpus:4, start:2024-12-13 14:52:15.662, tag:toxin-ref tree: FAMSA, id:toxin-ref, args:null, full_name:NFCORE_MULTIPLESEQUENCEALIGN:MULTIPLESEQUENCEALIGN:ALIGN:MAGUS_ALIGN, process:MAGUS_ALIGN, subworkflow:ALIGN], [task_id:30, hash:b0/f779ab, native_id:3127162, name:NFCORE_MULTIPLESEQUENCEALIGN:MULTIPLESEQUENCEALIGN:ALIGN:MAGUS_ALIGN (seatoxin-ref tree: FAMSA), status:COMPLETED, exit:0, realtime:2s, %cpu:129.1%, rss:44.5 MB, peak_rss:44.5 MB, vmem:416.7 MB, peak_vmem:416.7 MB, rchar:17.5 MB, wchar:198.4 KB, cpus:4, start:2024-12-13 14:52:16.283, tag:seatoxin-ref tree: FAMSA, id:seatoxin-ref, args:null, full_name:NFCORE_MULTIPLESEQUENCEALIGN:MULTIPLESEQUENCEALIGN:ALIGN:MAGUS_ALIGN, process:MAGUS_ALIGN, subworkflow:ALIGN], [task_id:42, hash:80/68e61e, native_id:3127291, name:NFCORE_MULTIPLESEQUENCEALIGN:MULTIPLESEQUENCEALIGN:ALIGN:FOLDMASON_EASYMSA (seatoxin-ref tree: DEFAULT), status:COMPLETED, exit:0, realtime:379ms, %cpu:54.3%, rss:4.8 MB, peak_rss:4.8 MB, vmem:7.4 MB, peak_vmem:7.4 MB, rchar:478.5 KB, wchar:13.7 KB, cpus:2, start:2024-12-13 14:52:18.466, tag:seatoxin-ref tree: DEFAULT, id:seatoxin-ref, args:null, full_name:NFCORE_MULTIPLESEQUENCEALIGN:MULTIPLESEQUENCEALIGN:ALIGN:FOLDMASON_EASYMSA, process:FOLDMASON_EASYMSA, subworkflow:ALIGN], [task_id:49, hash:65/fd280e, native_id:3127774, name:NFCORE_MULTIPLESEQUENCEALIGN:MULTIPLESEQUENCEALIGN:ALIGN:MUSCLE5_SUPER5 (seatoxin-ref tree: DEFAULT), status:COMPLETED, exit:0, realtime:0ms, %cpu:66.7%, rss:3 MB, peak_rss:3 MB, vmem:4.3 MB, peak_vmem:4.3 MB, rchar:115.8 KB, wchar:2.2 KB, cpus:2, start:2024-12-13 14:52:20.477, tag:seatoxin-ref tree: DEFAULT, id:seatoxin-ref, args:null, full_name:NFCORE_MULTIPLESEQUENCEALIGN:MULTIPLESEQUENCEALIGN:ALIGN:MUSCLE5_SUPER5, process:MUSCLE5_SUPER5, subworkflow:ALIGN], [task_id:53, hash:22/c86cb3, native_id:3127884, name:NFCORE_MULTIPLESEQUENCEALIGN:MULTIPLESEQUENCEALIGN:ALIGN:TCOFFEE_REGRESSIVE (seatoxin-ref tree: DEFAULT args: -reg -reg_method famsa_msa -reg_nseq 1000 -output fasta_aln), status:COMPLETED, exit:0, realtime:1s, %cpu:160.9%, rss:6.9 MB, peak_rss:6.9 MB, vmem:12.5 MB, peak_vmem:12.5 MB, rchar:739.6 KB, wchar:18 KB, cpus:2, start:2024-12-13 14:52:21.051, tag:seatoxin-ref tree: DEFAULT args: -reg -reg_method famsa_msa -reg_nseq 1000 -output fasta_aln, id:seatoxin-ref, args:-reg -reg_method famsa_msa -reg_nseq 1000 -output fasta_aln, full_name:NFCORE_MULTIPLESEQUENCEALIGN:MULTIPLESEQUENCEALIGN:ALIGN:TCOFFEE_REGRESSIVE, process:TCOFFEE_REGRESSIVE, subworkflow:ALIGN], [task_id:58, hash:39/9220fc, native_id:3127969, name:NFCORE_MULTIPLESEQUENCEALIGN:MULTIPLESEQUENCEALIGN:ALIGN:MUSCLE5_SUPER5 (toxin-ref tree: DEFAULT), status:COMPLETED, exit:0, realtime:0ms, %cpu:95.2%, rss:8 MB, peak_rss:8 MB, vmem:11.3 MB, peak_vmem:11.4 MB, rchar:117.5 KB, wchar:4.1 KB, cpus:2, start:2024-12-13 14:52:21.624, tag:toxin-ref tree: DEFAULT, id:toxin-ref, args:null, full_name:NFCORE_MULTIPLESEQUENCEALIGN:MULTIPLESEQUENCEALIGN:ALIGN:MUSCLE5_SUPER5, process:MUSCLE5_SUPER5, subworkflow:ALIGN], [task_id:66, hash:16/158971, native_id:3128388, name:NFCORE_MULTIPLESEQUENCEALIGN:MULTIPLESEQUENCEALIGN:ALIGN:FAMSA_ALIGN (toxin-ref tree: DEFAULT), status:COMPLETED, exit:0, realtime:0ms, %cpu:58.5%, rss:2.9 MB, peak_rss:2.9 MB, vmem:5.4 MB, peak_vmem:5.4 MB, rchar:78.7 KB, wchar:5.1 KB, cpus:2, start:2024-12-13 14:52:23.041, tag:toxin-ref tree: DEFAULT, id:toxin-ref, args:null, full_name:NFCORE_MULTIPLESEQUENCEALIGN:MULTIPLESEQUENCEALIGN:ALIGN:FAMSA_ALIGN, process:FAMSA_ALIGN, subworkflow:ALIGN], [task_id:50, hash:b2/93c7ae, native_id:3128574, name:NFCORE_MULTIPLESEQUENCEALIGN:MULTIPLESEQUENCEALIGN:ALIGN:KALIGN_ALIGN (seatoxin-ref tree: DEFAULT), status:COMPLETED, exit:0, realtime:0ms, %cpu:58.0%, rss:3.2 MB, peak_rss:3.2 MB, vmem:4.3 MB, peak_vmem:4.3 MB, rchar:107.1 KB, wchar:2 KB, cpus:2, start:2024-12-13 14:52:26.062, tag:seatoxin-ref tree: DEFAULT, id:seatoxin-ref, args:null, full_name:NFCORE_MULTIPLESEQUENCEALIGN:MULTIPLESEQUENCEALIGN:ALIGN:KALIGN_ALIGN, process:KALIGN_ALIGN, subworkflow:ALIGN], [task_id:45, hash:04/013905, native_id:3128851, name:NFCORE_MULTIPLESEQUENCEALIGN:MULTIPLESEQUENCEALIGN:ALIGN:FAMSA_ALIGN (seatoxin-ref tree: DEFAULT), status:COMPLETED, exit:0, realtime:0ms, %cpu:53.3%, rss:2.8 MB, peak_rss:2.8 MB, vmem:5.4 MB, peak_vmem:5.4 MB, rchar:75.8 KB, wchar:3.6 KB, cpus:2, start:2024-12-13 14:52:27.334, tag:seatoxin-ref tree: DEFAULT, id:seatoxin-ref, args:null, full_name:NFCORE_MULTIPLESEQUENCEALIGN:MULTIPLESEQUENCEALIGN:ALIGN:FAMSA_ALIGN, process:FAMSA_ALIGN, subworkflow:ALIGN], [task_id:55, hash:71/f243ff, native_id:3129007, name:NFCORE_MULTIPLESEQUENCEALIGN:MULTIPLESEQUENCEALIGN:ALIGN:KALIGN_ALIGN (toxin-ref tree: DEFAULT), status:COMPLETED, exit:0, realtime:0ms, %cpu:52.2%, rss:3.1 MB, peak_rss:3.1 MB, vmem:4.3 MB, peak_vmem:4.3 MB, rchar:109.4 KB, wchar:4.6 KB, cpus:2, start:2024-12-13 14:52:28.251, tag:toxin-ref tree: DEFAULT, id:toxin-ref, args:null, full_name:NFCORE_MULTIPLESEQUENCEALIGN:MULTIPLESEQUENCEALIGN:ALIGN:KALIGN_ALIGN, process:KALIGN_ALIGN, subworkflow:ALIGN], [task_id:48, hash:5e/5e1d97, native_id:3129055, name:NFCORE_MULTIPLESEQUENCEALIGN:MULTIPLESEQUENCEALIGN:ALIGN:CLUSTALO_ALIGN (seatoxin-ref tree: DEFAULT), status:COMPLETED, exit:0, realtime:0ms, %cpu:51.6%, rss:3.1 MB, peak_rss:3.1 MB, vmem:4.3 MB, peak_vmem:4.3 MB, rchar:100.2 KB, wchar:731 B, cpus:2, start:2024-12-13 14:52:29.054, tag:seatoxin-ref tree: DEFAULT, id:seatoxin-ref, args:null, full_name:NFCORE_MULTIPLESEQUENCEALIGN:MULTIPLESEQUENCEALIGN:ALIGN:CLUSTALO_ALIGN, process:CLUSTALO_ALIGN, subworkflow:ALIGN], [task_id:63, hash:b9/5e763e, native_id:3128334, name:NFCORE_MULTIPLESEQUENCEALIGN:MULTIPLESEQUENCEALIGN:ALIGN:TCOFFEE3D_ALIGN (seatoxin-ref tree: DEFAULT args: -method TMalign_pair -output fasta_aln), status:COMPLETED, exit:0, realtime:7s, %cpu:85.5%, rss:107.6 MB, peak_rss:107.6 MB, vmem:128.6 MB, peak_vmem:128.6 MB, rchar:45.2 MB, wchar:6.5 MB, cpus:2, start:2024-12-13 14:52:22.862, tag:seatoxin-ref tree: DEFAULT args: -method TMalign_pair -output fasta_aln, id:seatoxin-ref, args:-method TMalign_pair -output fasta_aln, full_name:NFCORE_MULTIPLESEQUENCEALIGN:MULTIPLESEQUENCEALIGN:ALIGN:TCOFFEE3D_ALIGN, process:TCOFFEE3D_ALIGN, subworkflow:ALIGN], [task_id:70, hash:ec/585d49, native_id:3129161, name:NFCORE_MULTIPLESEQUENCEALIGN:MULTIPLESEQUENCEALIGN:ALIGN:TCOFFEE_ALIGN (toxin-ref tree: DEFAULT args: -output fasta_aln), status:COMPLETED, exit:0, realtime:2s, %cpu:138.1%, rss:97.4 MB, peak_rss:97.4 MB, vmem:119.1 MB, peak_vmem:119.1 MB, rchar:3.1 MB, wchar:2.8 MB, cpus:2, start:2024-12-13 14:52:29.925, tag:toxin-ref tree: DEFAULT args: -output fasta_aln, id:toxin-ref, args:-output fasta_aln, full_name:NFCORE_MULTIPLESEQUENCEALIGN:MULTIPLESEQUENCEALIGN:ALIGN:TCOFFEE_ALIGN, process:TCOFFEE_ALIGN, subworkflow:ALIGN], [task_id:65, hash:57/06536c, native_id:3129638, name:NFCORE_MULTIPLESEQUENCEALIGN:MULTIPLESEQUENCEALIGN:ALIGN:UPP_ALIGN (toxin-ref tree: DEFAULT args: -m amino), status:COMPLETED, exit:0, realtime:7.3s, %cpu:82.1%, rss:114.4 MB, peak_rss:114.4 MB, vmem:737.2 MB, peak_vmem:801.2 MB, rchar:22 MB, wchar:418.6 KB, cpus:2, start:2024-12-13 14:52:32.794, tag:toxin-ref tree: DEFAULT args: -m amino, id:toxin-ref, args:-m amino, full_name:NFCORE_MULTIPLESEQUENCEALIGN:MULTIPLESEQUENCEALIGN:ALIGN:UPP_ALIGN, process:UPP_ALIGN, subworkflow:ALIGN], [task_id:61, hash:00/74f5e7, native_id:3131161, name:NFCORE_MULTIPLESEQUENCEALIGN:MULTIPLESEQUENCEALIGN:ALIGN:UPP_ALIGN (seatoxin-ref tree: DEFAULT args: -m amino), status:COMPLETED, exit:0, realtime:5.1s, %cpu:70.5%, rss:122.2 MB, peak_rss:122.2 MB, vmem:748.2 MB, peak_vmem:811.9 MB, rchar:17.8 MB, wchar:192.7 KB, cpus:2, start:2024-12-13 14:52:43.153, tag:seatoxin-ref tree: DEFAULT args: -m amino, id:seatoxin-ref, args:-m amino, full_name:NFCORE_MULTIPLESEQUENCEALIGN:MULTIPLESEQUENCEALIGN:ALIGN:UPP_ALIGN, process:UPP_ALIGN, subworkflow:ALIGN], [task_id:51, hash:0e/f9563d, native_id:3129569, name:NFCORE_MULTIPLESEQUENCEALIGN:MULTIPLESEQUENCEALIGN:ALIGN:LEARNMSA_ALIGN (seatoxin-ref tree: DEFAULT), status:COMPLETED, exit:0, realtime:2m 40s, %cpu:131.3%, rss:1.3 GB, peak_rss:1.3 GB, vmem:4.8 GB, peak_vmem:4.8 GB, rchar:80.4 MB, wchar:290.3 KB, cpus:2, start:2024-12-13 14:52:32.456, tag:seatoxin-ref tree: DEFAULT, id:seatoxin-ref, args:null, full_name:NFCORE_MULTIPLESEQUENCEALIGN:MULTIPLESEQUENCEALIGN:ALIGN:LEARNMSA_ALIGN, process:LEARNMSA_ALIGN, subworkflow:ALIGN], [task_id:46, hash:6b/099687, native_id:3152735, name:NFCORE_MULTIPLESEQUENCEALIGN:MULTIPLESEQUENCEALIGN:ALIGN:MAFFT_ALIGN (seatoxin-ref tree: DEFAULT), status:COMPLETED, exit:0, realtime:0ms, %cpu:70.6%, rss:7.2 MB, peak_rss:7.2 MB, vmem:9.6 MB, peak_vmem:9.6 MB, rchar:979.3 KB, wchar:4.2 KB, cpus:4, start:2024-12-13 14:55:15.756, tag:seatoxin-ref tree: DEFAULT, id:seatoxin-ref, args:null, full_name:NFCORE_MULTIPLESEQUENCEALIGN:MULTIPLESEQUENCEALIGN:ALIGN:MAFFT_ALIGN, process:MAFFT_ALIGN, subworkflow:ALIGN], [task_id:52, hash:98/dac5d4, native_id:3152842, name:NFCORE_MULTIPLESEQUENCEALIGN:MULTIPLESEQUENCEALIGN:ALIGN:MAGUS_ALIGN (seatoxin-ref tree: DEFAULT), status:COMPLETED, exit:0, realtime:2s, %cpu:142.7%, rss:54.3 MB, peak_rss:54.3 MB, vmem:358.5 MB, peak_vmem:422.5 MB, rchar:18.3 MB, wchar:214.2 KB, cpus:4, start:2024-12-13 14:55:16.918, tag:seatoxin-ref tree: DEFAULT, id:seatoxin-ref, args:null, full_name:NFCORE_MULTIPLESEQUENCEALIGN:MULTIPLESEQUENCEALIGN:ALIGN:MAGUS_ALIGN, process:MAGUS_ALIGN, subworkflow:ALIGN], [task_id:47, hash:6e/dde5a6, native_id:3153362, name:NFCORE_MULTIPLESEQUENCEALIGN:MULTIPLESEQUENCEALIGN:ALIGN:MAFFT_ALIGN (seatoxin-ref tree: DEFAULT args: --dpparttree), status:COMPLETED, exit:0, realtime:0ms, %cpu:57.3%, rss:6.2 MB, peak_rss:6.2 MB, vmem:8.6 MB, peak_vmem:8.6 MB, rchar:979.3 KB, wchar:4.3 KB, cpus:4, start:2024-12-13 14:55:19.898, tag:seatoxin-ref tree: DEFAULT args: --dpparttree, id:seatoxin-ref, args:--dpparttree, full_name:NFCORE_MULTIPLESEQUENCEALIGN:MULTIPLESEQUENCEALIGN:ALIGN:MAFFT_ALIGN, process:MAFFT_ALIGN, subworkflow:ALIGN], [task_id:56, hash:3a/2b31a1, native_id:3153502, name:NFCORE_MULTIPLESEQUENCEALIGN:MULTIPLESEQUENCEALIGN:ALIGN:MAGUS_ALIGN (toxin-ref tree: DEFAULT), status:COMPLETED, exit:0, realtime:3s, %cpu:193.3%, rss:48 MB, peak_rss:48 MB, vmem:622.9 MB, peak_vmem:750.9 MB, rchar:18.8 MB, wchar:615.5 KB, cpus:4, start:2024-12-13 14:55:21.103, tag:toxin-ref tree: DEFAULT, id:toxin-ref, args:null, full_name:NFCORE_MULTIPLESEQUENCEALIGN:MULTIPLESEQUENCEALIGN:ALIGN:MAGUS_ALIGN, process:MAGUS_ALIGN, subworkflow:ALIGN], [task_id:57, hash:99/792bfd, native_id:3153835, name:NFCORE_MULTIPLESEQUENCEALIGN:MULTIPLESEQUENCEALIGN:ALIGN:MAFFT_ALIGN (toxin-ref tree: DEFAULT), status:COMPLETED, exit:0, realtime:0ms, %cpu:65.3%, rss:6.3 MB, peak_rss:6.3 MB, vmem:8.6 MB, peak_vmem:8.6 MB, rchar:991.3 KB, wchar:10.4 KB, cpus:4, start:2024-12-13 14:55:24.815, tag:toxin-ref tree: DEFAULT, id:toxin-ref, args:null, full_name:NFCORE_MULTIPLESEQUENCEALIGN:MULTIPLESEQUENCEALIGN:ALIGN:MAFFT_ALIGN, process:MAFFT_ALIGN, subworkflow:ALIGN], [task_id:67, hash:bb/df1b15, native_id:3153974, name:NFCORE_MULTIPLESEQUENCEALIGN:MULTIPLESEQUENCEALIGN:ALIGN:TCOFFEE_REGRESSIVE (toxin-ref tree: DEFAULT args: -reg -reg_method famsa_msa -reg_nseq 1000 -output fasta_aln), status:COMPLETED, exit:0, realtime:5s, %cpu:242.7%, rss:28.1 MB, peak_rss:28.1 MB, vmem:541.6 MB, peak_vmem:544.1 MB, rchar:760.2 KB, wchar:35.3 KB, cpus:2, start:2024-12-13 14:55:26.054, tag:toxin-ref tree: DEFAULT args: -reg -reg_method famsa_msa -reg_nseq 1000 -output fasta_aln, id:toxin-ref, args:-reg -reg_method famsa_msa -reg_nseq 1000 -output fasta_aln, full_name:NFCORE_MULTIPLESEQUENCEALIGN:MULTIPLESEQUENCEALIGN:ALIGN:TCOFFEE_REGRESSIVE, process:TCOFFEE_REGRESSIVE, subworkflow:ALIGN], [task_id:62, hash:2b/c18f9c, native_id:3154883, name:NFCORE_MULTIPLESEQUENCEALIGN:MULTIPLESEQUENCEALIGN:ALIGN:CLUSTALO_ALIGN (toxin-ref tree: DEFAULT), status:COMPLETED, exit:0, realtime:0ms, %cpu:101.4%, rss:3.1 MB, peak_rss:3.1 MB, vmem:4.3 MB, peak_vmem:4.3 MB, rchar:102.5 KB, wchar:2.1 KB, cpus:2, start:2024-12-13 14:55:32.007, tag:toxin-ref tree: DEFAULT, id:toxin-ref, args:null, full_name:NFCORE_MULTIPLESEQUENCEALIGN:MULTIPLESEQUENCEALIGN:ALIGN:CLUSTALO_ALIGN, process:CLUSTALO_ALIGN, subworkflow:ALIGN], [task_id:54, hash:ca/ad6951, native_id:3155027, name:NFCORE_MULTIPLESEQUENCEALIGN:MULTIPLESEQUENCEALIGN:ALIGN:TCOFFEE_REGRESSIVE (seatoxin-ref tree: DEFAULT args: -reg_nseq 3 -reg -reg_method famsa_msa -output fasta_aln), status:COMPLETED, exit:0, realtime:1s, %cpu:177.9%, rss:10.4 MB, peak_rss:10.4 MB, vmem:15.9 MB, peak_vmem:15.9 MB, rchar:1.1 MB, wchar:29.4 KB, cpus:2, start:2024-12-13 14:55:33.008, tag:seatoxin-ref tree: DEFAULT args: -reg_nseq 3 -reg -reg_method famsa_msa -output fasta_aln, id:seatoxin-ref, args:-reg_nseq 3 -reg -reg_method famsa_msa -output fasta_aln, full_name:NFCORE_MULTIPLESEQUENCEALIGN:MULTIPLESEQUENCEALIGN:ALIGN:TCOFFEE_REGRESSIVE, process:TCOFFEE_REGRESSIVE, subworkflow:ALIGN], [task_id:60, hash:47/cecc6a, native_id:3132055, name:NFCORE_MULTIPLESEQUENCEALIGN:MULTIPLESEQUENCEALIGN:ALIGN:LEARNMSA_ALIGN (toxin-ref tree: DEFAULT), status:COMPLETED, exit:0, realtime:2m 46s, %cpu:144.5%, rss:1.4 GB, peak_rss:1.4 GB, vmem:4.8 GB, peak_vmem:4.8 GB, rchar:80.4 MB, wchar:293 KB, cpus:2, start:2024-12-13 14:52:49.350, tag:toxin-ref tree: DEFAULT, id:toxin-ref, args:null, full_name:NFCORE_MULTIPLESEQUENCEALIGN:MULTIPLESEQUENCEALIGN:ALIGN:LEARNMSA_ALIGN, process:LEARNMSA_ALIGN, subworkflow:ALIGN], [task_id:69, hash:ab/928d2c, native_id:3155625, name:NFCORE_MULTIPLESEQUENCEALIGN:MULTIPLESEQUENCEALIGN:ALIGN:TCOFFEE_ALIGN (seatoxin-ref tree: DEFAULT args: -output fasta_aln), status:COMPLETED, exit:0, realtime:1s, %cpu:58.0%, rss:6.9 MB, peak_rss:6.9 MB, vmem:12.5 MB, peak_vmem:12.5 MB, rchar:429.2 KB, wchar:147.5 KB, cpus:2, start:2024-12-13 14:55:36.205, tag:seatoxin-ref tree: DEFAULT args: -output fasta_aln, id:seatoxin-ref, args:-output fasta_aln, full_name:NFCORE_MULTIPLESEQUENCEALIGN:MULTIPLESEQUENCEALIGN:ALIGN:TCOFFEE_ALIGN, process:TCOFFEE_ALIGN, subworkflow:ALIGN], [task_id:64, hash:0e/b8a5b1, native_id:3155416, name:NFCORE_MULTIPLESEQUENCEALIGN:MULTIPLESEQUENCEALIGN:ALIGN:TCOFFEE3D_ALIGN (seatoxin-ref tree: DEFAULT args: -method TMalign_pair -output fasta_aln), status:COMPLETED, exit:0, realtime:7s, %cpu:74.1%, rss:107.9 MB, peak_rss:107.9 MB, vmem:128.6 MB, peak_vmem:128.6 MB, rchar:45.2 MB, wchar:6.5 MB, cpus:2, start:2024-12-13 14:55:34.761, tag:seatoxin-ref tree: DEFAULT args: -method TMalign_pair -output fasta_aln, id:seatoxin-ref, args:-method TMalign_pair -output fasta_aln, full_name:NFCORE_MULTIPLESEQUENCEALIGN:MULTIPLESEQUENCEALIGN:ALIGN:TCOFFEE3D_ALIGN, process:TCOFFEE3D_ALIGN, subworkflow:ALIGN], [task_id:68, hash:0e/d0da82, native_id:3155780, name:NFCORE_MULTIPLESEQUENCEALIGN:MULTIPLESEQUENCEALIGN:ALIGN:TCOFFEE_REGRESSIVE (toxin-ref tree: DEFAULT args: -reg_nseq 3 -reg -reg_method famsa_msa -output fasta_aln), status:COMPLETED, exit:0, realtime:6s, %cpu:236.5%, rss:49.7 MB, peak_rss:49.7 MB, vmem:541.6 MB, peak_vmem:668.1 MB, rchar:4.8 MB, wchar:52.7 KB, cpus:2, start:2024-12-13 14:55:37.454, tag:toxin-ref tree: DEFAULT args: -reg_nseq 3 -reg -reg_method famsa_msa -output fasta_aln, id:toxin-ref, args:-reg_nseq 3 -reg -reg_method famsa_msa -output fasta_aln, full_name:NFCORE_MULTIPLESEQUENCEALIGN:MULTIPLESEQUENCEALIGN:ALIGN:TCOFFEE_REGRESSIVE, process:TCOFFEE_REGRESSIVE, subworkflow:ALIGN], [task_id:59, hash:e0/f51956, native_id:3167813, name:NFCORE_MULTIPLESEQUENCEALIGN:MULTIPLESEQUENCEALIGN:ALIGN:MAFFT_ALIGN (toxin-ref tree: DEFAULT args: --dpparttree), status:COMPLETED, exit:0, realtime:0ms, %cpu:72.1%, rss:6.1 MB, peak_rss:6.1 MB, vmem:8.6 MB, peak_vmem:8.6 MB, rchar:991.4 KB, wchar:11.1 KB, cpus:4, start:2024-12-13 14:56:49.959, tag:toxin-ref tree: DEFAULT args: --dpparttree, id:toxin-ref, args:--dpparttree, full_name:NFCORE_MULTIPLESEQUENCEALIGN:MULTIPLESEQUENCEALIGN:ALIGN:MAFFT_ALIGN, process:MAFFT_ALIGN, subworkflow:ALIGN], [task_id:271, hash:fc/1a56e0, native_id:3172125, name:NFCORE_MULTIPLESEQUENCEALIGN:MULTIPLESEQUENCEALIGN:ALIGN:CONSENSUS (toxin-ref), status:COMPLETED, exit:0, realtime:770ms, %cpu:60.6%, rss:7.1 MB, peak_rss:7.1 MB, vmem:13 MB, peak_vmem:13 MB, rchar:4.1 MB, wchar:3.4 MB, cpus:2, start:2024-12-13 14:57:14.360, tag:toxin-ref, id:toxin-ref, args:null, full_name:NFCORE_MULTIPLESEQUENCEALIGN:MULTIPLESEQUENCEALIGN:ALIGN:CONSENSUS, process:CONSENSUS, subworkflow:ALIGN], [task_id:270, hash:64/734dc8, native_id:3172235, name:NFCORE_MULTIPLESEQUENCEALIGN:MULTIPLESEQUENCEALIGN:ALIGN:CONSENSUS (seatoxin-ref), status:COMPLETED, exit:0, realtime:651ms, %cpu:57.4%, rss:13.2 MB, peak_rss:13.2 MB, vmem:19 MB, peak_vmem:19 MB, rchar:1 MB, wchar:584.9 KB, cpus:2, start:2024-12-13 14:57:14.990, tag:seatoxin-ref, id:seatoxin-ref, args:null, full_name:NFCORE_MULTIPLESEQUENCEALIGN:MULTIPLESEQUENCEALIGN:ALIGN:CONSENSUS, process:CONSENSUS, subworkflow:ALIGN]] \ No newline at end of file diff --git a/co2footprint_summary_20241213-53303754.txt b/co2footprint_summary_20241213-53303754.txt deleted file mode 100644 index 301e086f..00000000 --- a/co2footprint_summary_20241213-53303754.txt +++ /dev/null @@ -1,6 +0,0 @@ -Total CO2e footprint measures of this workflow run -CO2e emissions: 126.64 mg -Energy consumption: 266.61 mWh - -The calculation of these values is based on the carbon footprint computation method developed in the Green Algorithms project. -Lannelongue, L., Grealey, J., Inouye, M., Green Algorithms: Quantifying the Carbon Footprint of Computation. Adv. Sci. 2021, 2100707. https://doi.org/10.1002/advs.202100707 diff --git a/co2footprint_summary_20241213-53488757.txt b/co2footprint_summary_20241213-53488757.txt deleted file mode 100644 index 2f22885f..00000000 --- a/co2footprint_summary_20241213-53488757.txt +++ /dev/null @@ -1,6 +0,0 @@ -Total CO2e footprint measures of this workflow run -CO2e emissions: 2.4 g -Energy consumption: 5.06 Wh - -The calculation of these values is based on the carbon footprint computation method developed in the Green Algorithms project. -Lannelongue, L., Grealey, J., Inouye, M., Green Algorithms: Quantifying the Carbon Footprint of Computation. Adv. Sci. 2021, 2100707. https://doi.org/10.1002/advs.202100707 diff --git a/nextflow.config b/nextflow.config index 171bcd11..b4613d0a 100644 --- a/nextflow.config +++ b/nextflow.config @@ -280,6 +280,7 @@ def co2_timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') co2footprint { traceFile = "${params.outdir}/pipeline_info/co2footprint_trace_${co2_timestamp}.txt" reportFile = "${params.outdir}/pipeline_info/co2footprint_report_${co2_timestamp}.html" + summaryFile = "${params.outdir}/pipeline_info/co2footprint_summary_${co2_timestamp}.txt" } validation { diff --git a/test_merged.csv b/test_merged.csv index 0637a088..450c97d8 100644 --- a/test_merged.csv +++ b/test_merged.csv @@ -1 +1,2 @@ -[] \ No newline at end of file +id,seqlength_mean,seqlength_median,seqlength_max,n_sequences,perc_sim,plddt,tree,args_tree,args_tree_clean,aligner,args_aligner,args_aligner_clean,sp,total_gaps,avg_gaps,TCS,tc,EVALUATED,APDB,iRMSD,NiRMSD,name,realtime_tree,%cpu_tree,rss_tree,peak_rss_tree,vmem_tree,peak_mem_tree,rchar_tree,wchar_tree,cpus_tree,energy_consumption_tree,CO2e_tree,powerdraw_cpu_tree,cpu_model_tree,requested_memory_tree,realtime_aligner,%cpu_aligner,rss_aligner,peak_rss_aligner,vmem_aligner,peak_mem_aligner,rchar_aligner,wchar_aligner,cpus_aligner,energy_consumption_aligner,CO2e_aligner,powerdraw_cpu_aligner,cpu_model_aligner,requested_memory_aligner +seatoxin-ref,47.0,48.0,49,5,46.20,0.833333,null,,default,FAMSA,,default,81.0,20,4,835,46.9,84.62,45.97,1.23,1.45,NFCORE_MULTIPLESEQUENCEALIGN:MULTIPLESEQUENCEALIGN:ALIGN:FAMSA_ALIGN (seatoxin-ref tree: FAMSA),0.016666666666666666,36.9%,0.003,3 MB,5.4 MB,null,77.6 KB,3.5 KB,2,4.28 mWh,2.03 mg,12.0,Intel(R) Core(TM) i7-9700 CPU @ 3.00GHz,12.0 GB,0.0,42.1%,0.0031,3.1 MB,5.4 MB,null,77.7 KB,3.6 KB,2,0.0 pWh,0.0 pg,12.0,Intel(R) Core(TM) i7-9700 CPU @ 3.00GHz,12.0 GB \ No newline at end of file diff --git a/test_merging.groovy b/test_merging.groovy index 5066bb6d..945481ae 100644 --- a/test_merging.groovy +++ b/test_merging.groovy @@ -1,10 +1,52 @@ @Grab('com.xlson.groovycsv:groovycsv:1.3') import static com.xlson.groovycsv.CsvParser.parseCsv -def cleanTrace(trace) { + +/** + * Saves a list of maps to a CSV file. + * + * @param data The list of maps to be saved. Each map represents a row in the CSV. + * @param fileName The name of the file to save the CSV data to. + */ +def saveMapToCsv(List data, String fileName) { + if (data.isEmpty()) { + println "No data to write" + return + } + + // Extract headers from the keys of the first map + def headers = data[0].keySet().join(',') + + // Generate CSV content by joining the values of each map with commas + def csvContent = data.collect { row -> + row.values().join(',') + }.join('\n') + + // Write headers and CSV content to the specified file + new File(fileName).withWriter { writer -> + writer.write(headers + '\n' + csvContent) + } +} + + +/** + * Cleans the trace data by converting each row into a mutable map + * and performing necessary transformations. + * + * The following transformations are performed: + * - Extract the tag from the 'name' column using a regex pattern + * - Extract 'id' and 'args' from the tag + * - Process the 'full_name' to extract workflow and process details + * + * @param trace The trace data to be cleaned. + * @return The cleaned trace data. + */ +def cleanTrace(ArrayList trace) { + // Convert each row into a mutable map for dynamic property addition def cleanedTrace = trace.collect { row -> - def mutableRow = row.toMap() + + def mutableRow = new LinkedHashMap(row) // Extract the tag from the 'name' column using a regex pattern def tagMatch = (mutableRow.name =~ /\((.*)\)/) @@ -12,7 +54,7 @@ def cleanTrace(trace) { // Extract 'id' and 'args' from the tag safely mutableRow.id = mutableRow.tag?.tokenize(' ')?.first() - mutableRow.args = mutableRow.tag?.split("args:")?.with { it.size() > 1 ? it[1].trim() : null } + mutableRow.args = mutableRow.tag?.split("args:")?.with { it.size() > 1 ? it[1].trim() : "default" } // Process the 'full_name' to extract workflow and process details mutableRow.full_name = mutableRow.name.split(/\(/)?.first()?.trim() @@ -27,6 +69,11 @@ def cleanTrace(trace) { } } + // if args_tree is null, default + if (mutableRow.args == null) { + mutableRow.args = "default" + } + return mutableRow } @@ -34,8 +81,23 @@ def cleanTrace(trace) { return cleanedTrace.findAll { it != null } } -// Utility function to convert time strings to minutes -def convertTime(String timeStr) { + +/** + * Utility function to convert time strings to minutes. + * + * This function takes a time string in the format of hours, minutes, seconds, and milliseconds, + * and converts it to a total number of minutes. + * + * Example input formats: + * - "1h 30m" + * - "45m 30s" + * - "2h 15m 10s 500ms" + * + * @param timeStr The time string to be converted. + * @return The total time in minutes as a double. + * @throws IllegalArgumentException if the time string is not in the correct format. + */ + def convertTime(String timeStr) { def pattern = /((?\d+(\.\d+)?)h)?\s*((?\d+(\.\d+)?)m)?\s*((?\d+(\.\d+)?)s)?\s*((?\d+(\.\d+)?)ms)?/ def matcher = timeStr.trim() =~ pattern @@ -51,8 +113,20 @@ def convertTime(String timeStr) { return (hours * 60) + minutes + (seconds / 60) + (milliseconds / 60000) } -// Utility function to convert memory to GB -def convertMemory(String memory) { +/** + * Utility function to convert memory to GB. + * + * This function takes a memory string with units (GB, MB, KB) and converts it to gigabytes (GB). + * + * Example input formats: + * - "16GB" + * - "2048MB" + * - "1048576KB" + * + * @param memory The memory string to be converted. + * @return The memory in gigabytes as a double, or null if the input is invalid. + */ + def convertMemory(String memory) { if (!memory) return null if (memory.contains("GB")) { @@ -65,65 +139,198 @@ def convertMemory(String memory) { return null } -// Prepare trace trees -def prepTreeTrace(trace) { - def traceTrees = trace.findAll { it.subworkflow == "COMPUTE_TREES" } - traceTrees.each { row -> - row.args_tree = row.args - row.tree = row.process.replace("_GUIDETREE", "") - row.time_tree = convertTime(row.realtime) - row.memory_tree = convertMemory(row.rss) - row.cpus_tree = row.cpus + +def prepTrace(trace, suffix_to_replace, subworkflow, keys) { + + // Extract the tree and align traces separately + def trace_subworkflow = trace.findAll { it.subworkflow == subworkflow } + + // For each row, create a new row with the necessary keys and values + trace_subworkflow.each { row -> + def newRow = [:] + + // Clean the names (remove the unnecessary suffix) + newRow.tree = row.process.replace(suffix_to_replace, "") + + def suffix = "" + if(subworkflow == "ALIGN") { + suffix = "_aligner" + specific_key = "aligner" + } else if(subworkflow == "COMPUTE_TREES") { + suffix = "_tree" + specific_key = "tree" + } + + + keys.each { key -> + + + def newKey = key + suffix + + if (key in ['id', 'name', "tree", "aligner"]) { + newKey = key + } + + row[specific_key] = row.process.replace(suffix_to_replace, "") + + if ((key == 'realtime' || key == 'rss')) { + newRow[newKey] = (key == 'realtime') ? convertTime(row[key]) : convertMemory(row[key]) + }else if(key == "args") { + newRow[newKey+"_clean"] = row.args + }else { + newRow[newKey] = row[key] + } + } + + row.clear() + row.putAll(newRow) } - return traceTrees + return trace_subworkflow } -// Prepare align traces -def prepAlignTrace(trace) { - def traceAlign = trace.findAll { it.subworkflow == "ALIGN" } - traceAlign.each { row -> - row.args_aligner = row.args - row.aligner = row.process.replace("_ALIGN", "") - row.time_align = convertTime(row.realtime) - row.memory_align = convertMemory(row.rss) - row.cpus_align = row.cpus + + +/** + * Processes the latest trace file in the specified directory based on the given pattern. + * + * This function identifies and parses the latest trace file, filters lines related to evaluation, + * and converts the trace data into CSV format. + * + * @param traceDirPath The path to the directory containing trace files. + * @param filePattern The pattern to identify the trace files. + * @return The parsed CSV data from the trace file. + */ +def latesTraceFileToCSV(String traceDirPath, String filePattern) { + // Identify and parse the latest trace file based on the given pattern + def traceFile = new File(traceDirPath).listFiles().findAll { it.name.startsWith(filePattern) }.sort { -it.lastModified() }.take(1)[0] + + // Keep only the lines that report running times related to evaluation + def header = traceFile.readLines()[0].replaceAll("\t", ",") + def traceFileAlign = traceFile.readLines().findAll { it.contains("COMPLETED") && it.contains("MULTIPLESEQUENCEALIGN:ALIGN") }.collect { it.replaceAll("\t", ",") }.join("\n") + def trace = header + "\n" + traceFileAlign + + // Parse the trace data into CSV format + def traceCsv = parseCsv(trace) + + return traceCsv +} + + +def keepKeysFromIterator(iterator, keysToKeep) { + def modifiedData = iterator.collect { row -> + def mutableRow = row.toMap().findAll { key, value -> + keysToKeep.contains(key) + } + return mutableRow } - return traceAlign + // conver back to iterator + modifiedData = modifiedData.collect { it as Map } + return modifiedData } -def merge_summary_and_traces(summary_file, trace_dir_path, outFileName){ - // Read the summary file with the scientific evaluation - def data = new File(summary_file).readLines() +def mergeListsById(list1, list2, idKey) { - // Identify and parse the latest trace file - def trace_file = new File("${trace_dir_path}").listFiles().findAll { it.name.startsWith("execution_trace") }.sort { -it.lastModified() }.take(1)[0] + def map1 = list1.collectEntries { [(it[idKey]): it] } - // Keep only the lines that report running times related to evaluation - def header = trace_file.readLines()[0].replaceAll("\t", ",") - def trace_file_align = trace_file.readLines().findAll { it.contains("CACHED") && it.contains("MULTIPLESEQUENCEALIGN:ALIGN") }.collect { it.replaceAll("\t", ",") }.join("\n") - def trace = header + "\n" + trace_file_align - def trace_csv = parseCsv(trace) + def mergedList = list2.collect { row -> + def id = row[idKey] + def mergedRow = map1.containsKey(id) ? map1[id] + row : row + return mergedRow + } + + // convert back to iterator + return mergedList +} + +/** + * Processes the latest trace file in the specified directory. + * + * This function identifies and parses the latest trace file based on the given pattern, filters lines related to evaluation, + * cleans the trace data, and extracts tree and alignment traces separately. + * + * @param traceDirPath The path to the directory containing trace files. + * @param filePattern The pattern to identify the trace files. + * @return A map containing the tree traces and alignment traces. + */ +def processLatestTraceFile(String traceDirPath) { + + def traceCsv = latesTraceFileToCSV(traceDirPath, "execution_trace") + def co2Csv = latesTraceFileToCSV(traceDirPath, "co2footprint_trace") + + co2Csv = keepKeysFromIterator(co2Csv, ["name", "energy_consumption", "CO2e", "powerdraw_cpu", "cpu_model", "requested_memory"]) + trace_co2_csv = mergeListsById(traceCsv.collect { it.toMap() }, co2Csv, "name") + + keys = ["id","name", "args", "tree", "aligner", "realtime", "%cpu", "rss", "peak_rss", "vmem", "peak_mem", "rchar", "wchar", "cpus", "energy_consumption", "CO2e", "powerdraw_cpu", "cpu_model", "requested_memory"] + // Extract the information from the tag + def cleanTraceData = cleanTrace(trace_co2_csv) - def cleanTraceData = cleanTrace(trace_csv) - def traceTrees = prepTreeTrace(cleanTraceData) - def traceAlign = prepAlignTrace(cleanTraceData) + // Extract the tree and align traces separately + def traceTrees = prepTrace(cleanTraceData, suffix_to_replace = "_GUIDETREE", subworkflow = "COMPUTE_TREES", keys) + def traceAlign = prepTrace(cleanTraceData, suffix_to_replace = "_ALIGN", subworkflow = "ALIGN", keys) + + // Return the extracted traces as a map + return [traceTrees: traceTrees, traceAlign: traceAlign] +} + + + +def merge_summary_and_traces(summary_file, trace_dir_path, outFileName){ + + // ------------------- + // TRACE FILE + // ------------------- + + // 1. Identify and parse the latest trace file + // 2. Clean the trace (only completed tasks, keep only needed columns) + // 3. Extract tree and align traces separately + def trace_file = processLatestTraceFile(trace_dir_path) + + // ------------------- + // SUMMARY FILE + // ------------------- + + // Parse the summary data (scientific accuracy file: SP, TC etc.) + def data = parseCsv(new File(summary_file).readLines().collect { it.replaceAll("\t", ",") }.join("\n")) + data = data.collect { row -> + def mutableRow = row.toMap() + return mutableRow + } + + print("##############################################################") + print("\n") + print(trace_file) + print("\n\n") + // Merge the summary data with the trace data def mergedData = [] data.each { row -> - def treeMatch = traceTrees.find { it.id == row.id && it.tree == row.tree && it.args_tree == row.args_tree } - def alignMatch = traceAlign.find { it.id == row.id && it.aligner == row.aligner && it.args_aligner == row.args_aligner } + + + print("##### matching -----------------------------------------------------------") + print("\n") + print(row) + print("\n") + def treeMatch = trace_file.traceTrees.find { it.id == row.id && it.tree == row.tree && it.args_tree_clean == row.args_tree_clean } + print(treeMatch) + print("\n") + def alignMatch = trace_file.traceAlign.find { it.id == row.id && it.aligner == row.aligner && it.args_aligner_clean == row.args_aligner_clean } + print(alignMatch) + print("\n") def mergedRow = row + (treeMatch ?: [:]) + (alignMatch ?: [:]) mergedData << mergedRow } - new File(outFileName).withWriter { writer -> writer.write(mergedData as String) } + + // Save the merged data to a file + saveMapToCsv(mergedData, outFileName) } -outdir = "/home/luisasantus/Desktop/multiplesequencealign/results" +outdir = "/home/luisasantus/Desktop/multiplesequencealign/res2" def summary_file = "${outdir}/summary/complete_summary_stats_eval.csv" def outFileName = "${outdir}/../test_merged.csv" def trace_dir_path = "${outdir}/pipeline_info/" +def co2_path = "${outdir}/pipeline_info/execution_trace_CO2" merge_summary_and_traces(summary_file, trace_dir_path, outFileName) \ No newline at end of file diff --git a/trace_trees.csv b/trace_trees.csv new file mode 100644 index 00000000..9faa0313 --- /dev/null +++ b/trace_trees.csv @@ -0,0 +1 @@ +[[task_id:8, hash:25/a00b09, native_id:3120660, name:NFCORE_MULTIPLESEQUENCEALIGN:MULTIPLESEQUENCEALIGN:ALIGN:COMPUTE_TREES:CLUSTALO_GUIDETREE (seatoxin-ref), status:COMPLETED, exit:0, realtime:0ms, %cpu:39.2%, rss:3.1 MB, peak_rss:3.1 MB, vmem:5.4 MB, peak_vmem:5.4 MB, rchar:68.4 KB, wchar:832 B, cpus:2, start:2024-12-13 14:51:37.471, tag:seatoxin-ref, id:seatoxin-ref, args:null, full_name:NFCORE_MULTIPLESEQUENCEALIGN:MULTIPLESEQUENCEALIGN:ALIGN:COMPUTE_TREES:CLUSTALO_GUIDETREE, process:CLUSTALO_GUIDETREE, subworkflow:COMPUTE_TREES, args_tree:null, tree:CLUSTALO, time_tree:0.0, memory_tree:0.0031, cpus_tree:2], [task_id:10, hash:fb/43b990, native_id:3120996, name:NFCORE_MULTIPLESEQUENCEALIGN:MULTIPLESEQUENCEALIGN:ALIGN:COMPUTE_TREES:CLUSTALO_GUIDETREE (toxin-ref), status:COMPLETED, exit:0, realtime:0ms, %cpu:97.0%, rss:3 MB, peak_rss:3 MB, vmem:5.4 MB, peak_vmem:5.4 MB, rchar:70.7 KB, wchar:2.7 KB, cpus:2, start:2024-12-13 14:51:39.049, tag:toxin-ref, id:toxin-ref, args:null, full_name:NFCORE_MULTIPLESEQUENCEALIGN:MULTIPLESEQUENCEALIGN:ALIGN:COMPUTE_TREES:CLUSTALO_GUIDETREE, process:CLUSTALO_GUIDETREE, subworkflow:COMPUTE_TREES, args_tree:null, tree:CLUSTALO, time_tree:0.0, memory_tree:0.003, cpus_tree:2], [task_id:12, hash:c0/beddb1, native_id:3121216, name:NFCORE_MULTIPLESEQUENCEALIGN:MULTIPLESEQUENCEALIGN:ALIGN:COMPUTE_TREES:FAMSA_GUIDETREE (seatoxin-ref args: -gt upgma -medoidtree), status:COMPLETED, exit:0, realtime:0ms, %cpu:41.6%, rss:2.9 MB, peak_rss:2.9 MB, vmem:5.4 MB, peak_vmem:5.4 MB, rchar:77.7 KB, wchar:3.5 KB, cpus:2, start:2024-12-13 14:51:40.326, tag:seatoxin-ref args: -gt upgma -medoidtree, id:seatoxin-ref, args:-gt upgma -medoidtree, full_name:NFCORE_MULTIPLESEQUENCEALIGN:MULTIPLESEQUENCEALIGN:ALIGN:COMPUTE_TREES:FAMSA_GUIDETREE, process:FAMSA_GUIDETREE, subworkflow:COMPUTE_TREES, args_tree:-gt upgma -medoidtree, tree:FAMSA, time_tree:0.0, memory_tree:0.0029, cpus_tree:2], [task_id:15, hash:cd/87115e, native_id:3121867, name:NFCORE_MULTIPLESEQUENCEALIGN:MULTIPLESEQUENCEALIGN:ALIGN:COMPUTE_TREES:FAMSA_GUIDETREE (toxin-ref), status:COMPLETED, exit:0, realtime:0ms, %cpu:46.4%, rss:3 MB, peak_rss:3 MB, vmem:5.4 MB, peak_vmem:5.4 MB, rchar:77 KB, wchar:3.7 KB, cpus:2, start:2024-12-13 14:51:42.670, tag:toxin-ref, id:toxin-ref, args:null, full_name:NFCORE_MULTIPLESEQUENCEALIGN:MULTIPLESEQUENCEALIGN:ALIGN:COMPUTE_TREES:FAMSA_GUIDETREE, process:FAMSA_GUIDETREE, subworkflow:COMPUTE_TREES, args_tree:null, tree:FAMSA, time_tree:0.0, memory_tree:0.003, cpus_tree:2], [task_id:14, hash:2b/424e9a, native_id:3122153, name:NFCORE_MULTIPLESEQUENCEALIGN:MULTIPLESEQUENCEALIGN:ALIGN:COMPUTE_TREES:FAMSA_GUIDETREE (toxin-ref args: -gt upgma -medoidtree), status:COMPLETED, exit:0, realtime:0ms, %cpu:49.2%, rss:3 MB, peak_rss:3 MB, vmem:5.4 MB, peak_vmem:5.4 MB, rchar:78.8 KB, wchar:3.7 KB, cpus:2, start:2024-12-13 14:51:45.916, tag:toxin-ref args: -gt upgma -medoidtree, id:toxin-ref, args:-gt upgma -medoidtree, full_name:NFCORE_MULTIPLESEQUENCEALIGN:MULTIPLESEQUENCEALIGN:ALIGN:COMPUTE_TREES:FAMSA_GUIDETREE, process:FAMSA_GUIDETREE, subworkflow:COMPUTE_TREES, args_tree:-gt upgma -medoidtree, tree:FAMSA, time_tree:0.0, memory_tree:0.003, cpus_tree:2], [task_id:13, hash:64/99e6a0, native_id:3122770, name:NFCORE_MULTIPLESEQUENCEALIGN:MULTIPLESEQUENCEALIGN:ALIGN:COMPUTE_TREES:FAMSA_GUIDETREE (seatoxin-ref), status:COMPLETED, exit:0, realtime:0ms, %cpu:82.4%, rss:3 MB, peak_rss:3 MB, vmem:5.4 MB, peak_vmem:5.4 MB, rchar:77.7 KB, wchar:3.5 KB, cpus:2, start:2024-12-13 14:51:49.370, tag:seatoxin-ref, id:seatoxin-ref, args:null, full_name:NFCORE_MULTIPLESEQUENCEALIGN:MULTIPLESEQUENCEALIGN:ALIGN:COMPUTE_TREES:FAMSA_GUIDETREE, process:FAMSA_GUIDETREE, subworkflow:COMPUTE_TREES, args_tree:null, tree:FAMSA, time_tree:0.0, memory_tree:0.003, cpus_tree:2], [task_id:11, hash:4e/56256c, native_id:3123495, name:NFCORE_MULTIPLESEQUENCEALIGN:MULTIPLESEQUENCEALIGN:ALIGN:COMPUTE_TREES:MAFFT_GUIDETREE (toxin-ref), status:COMPLETED, exit:0, realtime:1s, %cpu:61.0%, rss:6.2 MB, peak_rss:6.2 MB, vmem:8.7 MB, peak_vmem:8.7 MB, rchar:1010.5 KB, wchar:9.7 KB, cpus:2, start:2024-12-13 14:51:53.106, tag:toxin-ref, id:toxin-ref, args:null, full_name:NFCORE_MULTIPLESEQUENCEALIGN:MULTIPLESEQUENCEALIGN:ALIGN:COMPUTE_TREES:MAFFT_GUIDETREE, process:MAFFT_GUIDETREE, subworkflow:COMPUTE_TREES, args_tree:null, tree:MAFFT, time_tree:0.016666666666666666, memory_tree:0.0062, cpus_tree:2], [task_id:9, hash:bb/32bbe3, native_id:3123438, name:NFCORE_MULTIPLESEQUENCEALIGN:MULTIPLESEQUENCEALIGN:ALIGN:COMPUTE_TREES:MAFFT_GUIDETREE (seatoxin-ref), status:COMPLETED, exit:0, realtime:1s, %cpu:70.7%, rss:7.4 MB, peak_rss:7.4 MB, vmem:9.6 MB, peak_vmem:9.6 MB, rchar:998.5 KB, wchar:4 KB, cpus:2, start:2024-12-13 14:51:52.950, tag:seatoxin-ref, id:seatoxin-ref, args:null, full_name:NFCORE_MULTIPLESEQUENCEALIGN:MULTIPLESEQUENCEALIGN:ALIGN:COMPUTE_TREES:MAFFT_GUIDETREE, process:MAFFT_GUIDETREE, subworkflow:COMPUTE_TREES, args_tree:null, tree:MAFFT, time_tree:0.016666666666666666, memory_tree:0.0074, cpus_tree:2]] \ No newline at end of file