Skip to content

Commit

Permalink
Merge pull request #1000 from biorack/untargeted_dev
Browse files Browse the repository at this point in the history
Untargeted dev
  • Loading branch information
bkieft-usa authored Dec 10, 2024
2 parents 3aa20c8 + 3da22d4 commit d8bdf89
Show file tree
Hide file tree
Showing 2 changed files with 51 additions and 19 deletions.
10 changes: 6 additions & 4 deletions metatlas/untargeted/run_untargeted_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ def main():
logging.info(f'Arguments used: {args}')

##### Step 1/7: Syncing LIMS and NERSC to identify new projects with raw data that are not yet in the untargeted task list
new_projects = mzm.update_new_untargeted_tasks(validate_names=args.validate_names, mzmine_batch_params=args.mzmine_batch_params, \
new_projects = mzm.update_new_untargeted_tasks(direct_input=args.direct_input,validate_names=args.validate_names, custom_mzmine_batch_params=args.custom_mzmine_batch_params, \
output_dir=args.output_dir, raw_data_dir=args.raw_data_dir, raw_data_subdir=args.raw_data_subdir, \
skip_blank_filter=args.skip_blank_filter, background_designator=args.background_designator, \
fps_files_only=args.fps_files_only, skip_sync=step_bools[0])
Expand Down Expand Up @@ -83,7 +83,7 @@ def add_arguments(parser):
parser.add_argument('--overwrite_fbmn', action='store_true', help='Overwrite existing fbmn results files that are already in the output directory')
## Step 1 only
parser.add_argument('--validate_names', action='store_true', help='Validate filenames and project names')
parser.add_argument('--mzmine_batch_params', type=str, default=None, help='Add custom mzmine batch parameters xml')
parser.add_argument('--custom_mzmine_batch_params', type=str, default=None, help='Full path to custom mzmine batch parameters xml. If using FPS only mode, supply a csv list of pos and neg parameter files')
parser.add_argument('--skip_blank_filter', action='store_true', help='Do not filter out files with "Blank" in the name from the untargeted task list')
parser.add_argument('--fps_files_only', action='store_true', help='Only FPS files will be input, so do not check for polarity in file name and use custom mzmine batch parameters')
## Step 1.5 only
Expand Down Expand Up @@ -121,10 +121,10 @@ def add_arguments(parser):

def check_args(args):
##### Check if the input arguments are valid
if args.mzmine_batch_params is not None and not os.path.exists(args.mzmine_batch_params):
if args.custom_mzmine_batch_params is not None and not os.path.exists(args.custom_mzmine_batch_params):
logging.error('Custom mzmine batch parameters file does not exist. Please check flag and path.')
sys.exit(1)
if args.fps_files_only and args.mzmine_batch_params is None:
if args.fps_files_only and args.custom_mzmine_batch_params is None:
logging.error('FPS files only flag requires custom mzmine batch parameters. Please check flags.')
sys.exit(1)
if args.direct_input:
Expand All @@ -133,6 +133,8 @@ def check_args(args):
args.background_designator = args.background_designator.split(',')
if args.skip_steps:
args.skip_steps = args.skip_steps.split(',')
if args.custom_mzmine_batch_params:
args.custom_mzmine_batch_params = args.custom_mzmine_batch_params.split(',')
if args.overwrite_drive is True and args.gdrive_upload is False:
logging.error('Incompatible flags. Cannot overwrite google drive if not uploading to google drive.')
sys.exit(1)
Expand Down
60 changes: 45 additions & 15 deletions metatlas/untargeted/tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -2045,7 +2045,8 @@ def update_new_untargeted_tasks(
skip_sync: bool,
output_dir: str,
raw_data_dir: str,
mzmine_batch_params: Optional[str] = None,
direct_input: Optional[str] = None,
custom_mzmine_batch_params: Optional[str] = None,
raw_data_subdir: Optional[str] = None,
skip_blank_filter: Optional[bool] = False,
fps_files_only: Optional[bool] = False
Expand Down Expand Up @@ -2112,6 +2113,8 @@ def update_new_untargeted_tasks(
logging.info(tab_print("Finding new projects to initate...", 1))
new_folders = np.setdiff1d(all_folders,folders_in_tasks)
new_folders = list(set(new_folders) & set(time_old_folders) & set(dirs_with_m2_files))
if direct_input is not None:
new_folders = [folder for folder in new_folders if folder in direct_input]
if len(new_folders) == 0:
logging.info(tab_print("No new projects to add to untargeted tasks!", 2))
return None
Expand All @@ -2138,8 +2141,8 @@ def update_new_untargeted_tasks(
lims_untargeted_table_updater['output_dir'] = output_dir
_, validate_machine_name, _ = vfn.field_exists(PurePath(project_name), field_num=6)
logging.info(tab_print("Inferred machine name: %s"%(validate_machine_name), 2))
if mzmine_batch_params is None:
if validate_machine_name is None: # Assume more lenient parameters if machine name cannot be validated
if custom_mzmine_batch_params is None: # When there is not a custom input
if validate_machine_name is None: # Assume more lenient parameters if machine name is not going to be validated
logging.warning(tab_print("Warning! Could not validate machine name. Using lenient (IQX) MZmine parameters...", 2))
mzmine_running_parameters = mzine_batch_params_file_iqx
mzmine_parameter = 5
Expand All @@ -2152,12 +2155,15 @@ def update_new_untargeted_tasks(
else: # Assume more lenient parameters if machine name cannot be validated
mzmine_running_parameters = mzine_batch_params_file_iqx
mzmine_parameter = 5
logging.info(tab_print("Using MZmine parameters: %s"%(os.path.basename(mzmine_running_parameters)), 2))
lims_untargeted_table_updater['mzmine_parameter_sheet'] = mzmine_running_parameters
lims_untargeted_table_updater['mzmine_parameter_row'] = mzmine_parameter
else:
mzmine_running_parameters = mzmine_batch_params
mzmine_running_parameters = ','.join(custom_mzmine_batch_params)
mzmine_parameter = 5
logging.info(tab_print("Using MZmine parameters: %s"%(os.path.basename(mzmine_running_parameters)), 2))
lims_untargeted_table_updater['mzmine_parameter_sheet'] = mzmine_running_parameters
lims_untargeted_table_updater['mzmine_parameter_row'] = mzmine_parameter
logging.info(tab_print("Using custom MZmine parameter file(s): %s"%(mzmine_running_parameters), 2))
lims_untargeted_table_updater['mzmine_parameter_sheet'] = mzmine_running_parameters
lims_untargeted_table_updater['mzmine_parameter_row'] = mzmine_parameter

for polarity in ['positive','negative']: # Don't initiate mzmine jobs on polarities that don't have sample mzmls
polarity_short = polarity[:3]
Expand Down Expand Up @@ -2188,14 +2194,38 @@ def update_new_untargeted_tasks(
metadata_filename = os.path.join(basepath,'%s_metadata.tab'%(parent_dir))
metadata_df.to_csv(metadata_filename, sep='\t', index=False)

logging.info(tab_print("%s MZmine parameter file (*_batch-params.xml)"%(polarity), 3))
params_filename = build_untargeted_filename(output_dir,project_name,polarity,'batch-params-mzmine')
with open(mzmine_running_parameters,'r') as fid:
orig_params = fid.read()
new_param_path = os.path.join(basepath,parent_dir)
custom_params = orig_params.replace('/Users/bpb/Downloads/mzmine_outputs',new_param_path)
with open(params_filename,'w') as fid:
fid.write('%s'%custom_params)
if custom_mzmine_batch_params is None:
logging.info(tab_print("%s MZmine parameter file (*_batch-params.xml)"%(polarity), 3))
params_filename = build_untargeted_filename(output_dir,project_name,polarity,'batch-params-mzmine')
with open(mzmine_running_parameters,'r') as fid:
orig_params = fid.read()
new_param_path = os.path.join(basepath,parent_dir)
custom_params = orig_params.replace('/Users/bpb/Downloads/mzmine_outputs',new_param_path)
with open(params_filename,'w') as fid:
fid.write('%s'%custom_params)
elif custom_mzmine_batch_params is not None and len(custom_mzmine_batch_params) == 1:
logging.info(tab_print("%s MZmine parameter file (*_batch-params.xml)"%(polarity), 3))
params_filename = build_untargeted_filename(output_dir,project_name,polarity,'batch-params-mzmine')
with open(mzmine_running_parameters,'r') as fid:
orig_params = fid.read()
new_param_path = os.path.join(basepath,parent_dir)
custom_params = orig_params.replace('/Users/bpb/Downloads/mzmine_outputs',new_param_path)
with open(params_filename,'w') as fid:
fid.write('%s'%custom_params)
else:
logging.info(tab_print("%s MZmine parameter file (CUSTOM *_batch-params.xml)"%(polarity), 3))
custom_params_list = custom_mzmine_batch_params.split(',')
for custom_param in custom_params_list:
if polarity_short.upper()+"-" in custom_param:
mzmine_running_parameters = custom_param
break
params_filename = build_untargeted_filename(output_dir,project_name,polarity,'batch-params-mzmine')
with open(mzmine_running_parameters,'r') as fid:
orig_params = fid.read()
new_param_path = os.path.join(basepath,parent_dir)
custom_params = orig_params.replace('/Users/bpb/Downloads/mzmine_outputs',new_param_path)
with open(params_filename,'w') as fid:
fid.write('%s'%custom_params)

logging.info(tab_print("%s mzML path list file (*_filelist.txt)"%(polarity), 3))
file_list = new_project_dict[polarity]['file_list']
Expand Down

0 comments on commit d8bdf89

Please sign in to comment.