Skip to content

Commit

Permalink
Merge pull request #29 from ajmaurais/pdc_client_update
Browse files Browse the repository at this point in the history
PDC client update
  • Loading branch information
mriffle authored Feb 26, 2025
2 parents 66a41ed + 5554bff commit ef243f7
Show file tree
Hide file tree
Showing 10 changed files with 54 additions and 24 deletions.
2 changes: 1 addition & 1 deletion container_images.config
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ params {
diann: 'quay.io/protio/diann:1.8.1',
bibliospec: 'quay.io/protio/bibliospec-linux:3.0',
panorama_client: 'quay.io/protio/panorama-client:1.1.0',
pdc_client: 'quay.io/mauraisa/pdc_client:0.15',
pdc_client: 'quay.io/mauraisa/pdc_client:2.0.1',
encyclopedia: 'quay.io/protio/encyclopedia:2.12.30-2',
encyclopedia3_mriffle: 'quay.io/protio/encyclopedia:3.0.0-MRIFFLE',
qc_pipeline: 'quay.io/mauraisa/dia_qc_report:2.3.1',
Expand Down
5 changes: 4 additions & 1 deletion docs/source/workflow_parameters.rst
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ The ``params`` Section
- If starting with raw files, this is the value used by ``msconvert`` for the ``do_simasspectra`` parameter. Default: ``true``.
* -
- ``msconvert.mz_shift_ppm``
- If starting with raw files, ``msconvert`` will shift all mz values by ``n`` ppm when converting to ``mzML``. If ``null`` the mz values are not shifed. Default: ``null``.
- If starting with raw files, ``msconvert`` will shift all mz values by ``n`` ppm when converting to ``mzML``. If ``null`` the mz values are not shifted. Default: ``null``.
* -
- ``encyclopedia.chromatogram.params``
- If you are generating a chromatogram library for quantification, this is the command line options passed to EncyclopeDIA during the chromatogram generation step. Default: ``'-enableAdvancedOptions -v2scoring'`` If you do not wish to pass any options to EncyclopeDIA, this must be set to ``''``.
Expand Down Expand Up @@ -142,6 +142,9 @@ The ``params`` Section
* -
- ``skyline.protein_parsimony``
- If ``true``, protein parsimony is performed in Skyline. If ``false`` the protein assignments given by the search engine are used as protein groups. Default is ``false``.
* -
- ``skyline.fasta``
- The fasta file to use as a background proteome in Skyline. If ``null`` the same fasta file (``params.fasta``) used for the DIA search is used. Default is ``null``.
* -
- ``skyline.group_by_gene``
- If ``true``, when protein parsimony is performed in Skyline protein groups are formed by gene instead of by protein. Default is ``false``.
Expand Down
15 changes: 11 additions & 4 deletions main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ workflow {
all_elib_ch = null // hold all elibs generated by encyclopedia
all_diann_file_ch = null // all files generated by diann to upload

// version file channles
// version file channels
encyclopedia_version = null
diann_version = null
proteowizard_version = null
Expand All @@ -65,6 +65,7 @@ workflow {
// check for old param variable names
params.skyline.document_name = check_old_param_name('skyline_document_name',
'skyline.document_name')
skyline_document_name = params.skyline.document_name
params.skyline.skip = check_old_param_name('skip_skyline',
'skyline.skip')
params.skyline.template_file = check_old_param_name('skyline_template_file',
Expand Down Expand Up @@ -100,6 +101,7 @@ workflow {
get_pdc_files()
wide_mzml_ch = get_pdc_files.out.wide_mzml_ch
pdc_study_name = get_pdc_files.out.study_name
skyline_document_name = skyline_document_name == 'final' ? pdc_study_name : skyline_document_name
} else{
get_wide_mzmls(params.quant_spectra_dir, params.quant_spectra_glob, aws_secret_id)
wide_mzml_ch = get_wide_mzmls.out.mzml_ch
Expand Down Expand Up @@ -156,6 +158,7 @@ workflow {
replicate_metadata = get_input_files.out.replicate_metadata
}
fasta = get_input_files.out.fasta
skyline_fasta = get_input_files.out.skyline_fasta
skyline_template_zipfile = get_input_files.out.skyline_template_zipfile
skyr_file_ch = get_input_files.out.skyr_files

Expand Down Expand Up @@ -366,10 +369,11 @@ workflow {
if(skyline_template_zipfile != null) {
skyline_import(
skyline_template_zipfile,
fasta,
skyline_fasta,
final_elib,
wide_mzml_ch,
replicate_metadata
replicate_metadata,
skyline_document_name
)
proteowizard_version = skyline_import.out.proteowizard_version
}
Expand Down Expand Up @@ -433,13 +437,15 @@ workflow {
dia_qc_version).splitText()

input_files = fasta.map{ it -> ['Fasta file', it.name] }.concat(
fasta.map{ it -> ['Skyline fasta file', it.name] },
spectral_library.map{ it -> ['Spectra library', it.baseName] },
all_mzml_ch.map{ it -> ['Spectra file', it.baseName] })

save_run_details(input_files.collect(), version_files.collect())
run_details_file = save_run_details.out.run_details

combine_file_hashes(fasta, spectral_library,
fasta_files = fasta.concat(skyline_fasta).unique()
combine_file_hashes(fasta_files, spectral_library,
search_file_stats,
final_skyline_file,
final_skyline_hash,
Expand Down Expand Up @@ -486,6 +492,7 @@ def is_panorama_authentication_required() {

return params.panorama.upload ||
(params.fasta && panorama_auth_required_for_url(params.fasta)) ||
(params.skyline.fasta && panorama_auth_required_for_url(params.skyline.fasta)) ||
(params.spectral_library && panorama_auth_required_for_url(params.spectral_library)) ||
(params.replicate_metadata && panorama_auth_required_for_url(params.replicate_metadata)) ||
(params.skyline.template_file && panorama_auth_required_for_url(params.skyline.template_file)) ||
Expand Down
19 changes: 11 additions & 8 deletions modules/pdc.nf
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@ process GET_STUDY_METADATA {
val pdc_study_id

output:
path('study_metadata.tsv'), emit: metadata
path('study_metadata_annotations.csv'), emit: skyline_annotations
path('*_flat.json'), emit: metadata
path('*_skyline_annotations.csv'), emit: skyline_annotations
env(study_id), emit: study_id
env(study_name), emit: study_name
path('pdc_client_version.txt'), emit: version
Expand All @@ -28,7 +28,7 @@ process GET_STUDY_METADATA {
'''
study_id=$(PDC_client studyID !{pdc_client_args} !{pdc_study_id} | tee study_id.txt)
study_name=$(PDC_client studyName --normalize !{pdc_client_args} ${study_id} | tee study_name.txt)
PDC_client metadata !{pdc_client_args} -f tsv !{n_files_arg} --skylineAnnotations ${study_id}
PDC_client metadata !{pdc_client_args} --flatten -f json !{n_files_arg} --skylineAnnotations ${study_id}
echo "pdc_client_git_repo='$GIT_REPO - $GIT_BRANCH [$GIT_SHORT_HASH]'" > pdc_client_version.txt
'''
Expand All @@ -52,20 +52,23 @@ process METADATA_TO_SKY_ANNOTATIONS {

process GET_FILE {
storeDir "${params.panorama_cache_directory}"
label 'process_low_constant'
cpus 1
memory 8.GB
time 2.h
maxForks 10
errorStrategy { sleep(Math.pow(2, task.attempt) * 200 as long); return 'retry' }
maxRetries 3
container params.images.pdc_client
errorStrategy 'retry'
maxRetries 1

input:
tuple val(url), val(file_name), val(md5)
tuple val(url), val(file_name), val(md5), val(file_size)

output:
path(file_name), emit: downloaded_file

shell:
'''
PDC_client file -o '!{file_name}' -m '!{md5}' '!{url}'
PDC_client file -o '!{file_name}' --size '!{file_size}' --md5sum '!{md5}' --url '!{url}'
'''

stub:
Expand Down
9 changes: 5 additions & 4 deletions modules/skyline.nf
Original file line number Diff line number Diff line change
Expand Up @@ -134,9 +134,10 @@ process SKYLINE_MERGE_RESULTS {
path skyd_files
val mzml_files
path fasta
val skyline_document_name

output:
path("${params.skyline.document_name}.sky.zip"), emit: final_skyline_zipfile
path("*.sky.zip"), emit: final_skyline_zipfile
path("skyline-merge.stdout"), emit: stdout
path("skyline-merge.stderr"), emit: stderr
path('output_file_hashes.txt'), emit: output_file_hashes
Expand All @@ -158,13 +159,13 @@ process SKYLINE_MERGE_RESULTS {
--in="${skyline_zipfile.baseName}" --memstamp \
${import_files_params} \
${params.skyline.protein_parsimony ? protein_parsimony_args : ''} \
--out="${params.skyline.document_name}.sky" \
--out="${skyline_document_name}.sky" \
--save \
--share-zip="${params.skyline.document_name}.sky.zip" \
--share-zip="${skyline_document_name}.sky.zip" \
--share-type="complete" \
> >(tee 'skyline-merge.stdout') 2> >(tee 'skyline-merge.stderr' >&2)
md5sum ${params.skyline.document_name}.sky.zip | sed -E 's/([a-f0-9]{32}) [ \\*](.*)/\\1\\t\\2/' > output_file_hashes.txt
md5sum ${skyline_document_name}.sky.zip | sed -E 's/([a-f0-9]{32}) [ \\*](.*)/\\1\\t\\2/' > output_file_hashes.txt
"""

stub:
Expand Down
4 changes: 3 additions & 1 deletion nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -87,8 +87,10 @@ params {

// Minimize Skyline document?
skyline.minimize = false

skyline.group_by_gene = false
skyline.protein_parsimony = false
skyline.fasta = null

// Whether or not to use hardlinks with Skyline
skyline.use_hardlinks = false
Expand Down Expand Up @@ -215,7 +217,7 @@ manifest {
homePage = 'https://github.com/mriffle/nf-skyline-dia-ms'
description = 'DIA workflows for TEI-REX project'
mainScript = 'main.nf'
nextflowVersion = '!>=21.10.3'
nextflowVersion = '!>=23.04.2'
}

// Capture exit codes from upstream processes when piping
Expand Down
4 changes: 2 additions & 2 deletions workflows/combine_file_hashes.nf
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ def get_search_file_dir() {

workflow combine_file_hashes {
take:
fasta
fasta_files
spectral_library

search_file_stats
Expand Down Expand Up @@ -55,7 +55,7 @@ workflow combine_file_hashes {
).map{ it -> tuple(it[0], it[1], it[3], it[2])}

// Combine files we need to calculate the hash of into a single channel
file_stat_files = fasta.concat(spectral_library).map{
file_stat_files = fasta_files.concat(spectral_library).map{
it -> tuple(it.name, it, params.result_dir, it.size())
}.concat(
skyline_reports.map{ tuple(it.name, it, params.output_directories.skyline.reports, it.size()) },
Expand Down
12 changes: 12 additions & 0 deletions workflows/get_input_files.nf
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ workflow get_input_files {

emit:
fasta
skyline_fasta
spectral_library
skyline_template_zipfile
skyr_files
Expand All @@ -51,6 +52,17 @@ workflow get_input_files {
fasta = Channel.empty()
}

if(params.skyline.fasta){
if(panorama_auth_required_for_url(params.fasta)) {
PANORAMA_GET_FASTA(params.skyline.fasta, aws_secret_id)
skyline_fasta = PANORAMA_GET_FASTA.out.panorama_file
} else {
skyline_fasta = Channel.value(file(params.skyline.fasta, checkIfExists: true))
}
} else {
skyline_fasta = fasta
}

if(params.spectral_library) {
if(panorama_auth_required_for_url(params.spectral_library)) {
PANORAMA_GET_SPECTRAL_LIBRARY(params.spectral_library, aws_secret_id)
Expand Down
4 changes: 2 additions & 2 deletions workflows/get_pdc_files.nf
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,8 @@ workflow get_pdc_files {
study_name = get_pdc_study_metadata.out.study_name

metadata \
| splitCsv(header:true, sep:'\t') \
| map{row -> tuple(row.url, row.file_name, row.md5sum)} \
| splitJson() \
| map{row -> tuple(row['url'], row['file_name'], row['md5sum'], row['file_size'])} \
| GET_FILE

MSCONVERT(GET_FILE.out.downloaded_file)
Expand Down
4 changes: 3 additions & 1 deletion workflows/skyline_import.nf
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ workflow skyline_import {
elib
wide_mzml_file_ch
replicate_metadata
skyline_document_name

emit:
skyline_results
Expand All @@ -36,7 +37,8 @@ workflow skyline_import {
skyline_zipfile,
SKYLINE_IMPORT_MZML.out.skyd_file.collect(),
wide_mzml_file_ch.collect(),
fasta
fasta,
skyline_document_name
)

if(params.replicate_metadata != null || params.pdc.study_id != null) {
Expand Down

0 comments on commit ef243f7

Please sign in to comment.