diff --git a/ftpSummaryStatsScript/depo_ftp_to_staging.py b/ftpSummaryStatsScript/depo_ftp_to_staging.py index 2b7b22c..4c83386 100644 --- a/ftpSummaryStatsScript/depo_ftp_to_staging.py +++ b/ftpSummaryStatsScript/depo_ftp_to_staging.py @@ -23,7 +23,8 @@ # in case of latency for writing the file. MOD_THRESHOLD_SEC = 3600 -#MOD_THRESHOLD_SEC = 36 # DEV ONLY +# DEV ONLY +# MOD_THRESHOLD_SEC = 36 RANGE_SIZE = 1000 @@ -55,24 +56,42 @@ def rm_dir(path) -> None: """ logger.info(f"Removing path: {path}") shutil.rmtree(path=path) - + def sync_files(source_dir, staging_dir): - dirs_to_sync = get_dirs_to_sync(source_dir) + try: + dirs_to_sync = get_dirs_to_sync(source_dir) + except Exception as e: + logger.error(f"Error getting directories to sync: {e}") + return + logger.debug(dirs_to_sync) for study in dirs_to_sync: - basename = os.path.basename(study) - gcst_regex = re.search(r'GCST[0-9]+', basename) - gcst = gcst_regex.group(0) if gcst_regex else None + try: + basename = os.path.basename(study) + gcst_regex = re.search(r'GCST[0-9]+', basename) + gcst = gcst_regex.group(0) if gcst_regex else None + except AttributeError: + logger.error("Regex match failed, skipping.") + continue + if gcst: logger.debug(gcst) - gcst_range = get_gcst_range(gcst) - gcst_range_dir = os.path.join(staging_dir, gcst_range) - dest = gcst_range_dir + "/" - make_dir(gcst_range_dir) - logger.info("Sync {} --> {}".format(study, dest)) - subprocess.call(['rsync', '-prvh','--chmod=Du=rwx,Dg=rwx,Do=rx,Fu=rw,Fg=rw,Fo=r', study, dest]) - rm_dir(path=study) + try: + gcst_range = get_gcst_range(gcst) + gcst_range_dir = os.path.join(staging_dir, gcst_range) + dest = gcst_range_dir + "/" + make_dir(gcst_range_dir) + except Exception as e: + logger.error(f"Error preparing directory {gcst_range_dir}: {e}") + continue + + logger.info(f"Sync {study} --> {dest}") + try: + subprocess.call(['rsync', '-prvh','--chmod=Du=rwx,Dg=rwx,Do=rx,Fu=rw,Fg=rw,Fo=r', study, dest]) + rm_dir(path=study) + except Exception as e: + logger.error(f"Error syncing or removing {study}: {e}") def main(): diff --git a/ftpSummaryStatsScript/ftp_sync.py b/ftpSummaryStatsScript/ftp_sync.py index 8192157..9591f43 100644 --- a/ftpSummaryStatsScript/ftp_sync.py +++ b/ftpSummaryStatsScript/ftp_sync.py @@ -165,9 +165,10 @@ def get_sumstats_status(self, get_curation_status=True): self.ftp_studies_dict = self._accessions_from_dirnames(self.get_ftp_contents()) self.ftp_studies = set(self.ftp_studies_dict.keys()) + # DEV ONLY + # self.curation_published = self.staging_studies if get_curation_status: self.curation_published = set(self.get_curation_published_list()) - # #self.curation_published = set(api_list.RESP) # LOCAL DEVELOPING ONLY logger.info("published: {}".format(self.studies_to_release_published)) #((studies that are published and on staging) - any that already exist on FTP) + (recently modified and published)