Skip to content

Commit

Permalink
Merge pull request #154 from #151
Browse files Browse the repository at this point in the history
Gwas utils#151
  • Loading branch information
jdhayhurst authored Jul 21, 2023
2 parents c5b1c8a + 7abdba0 commit 6e485f1
Show file tree
Hide file tree
Showing 2 changed files with 75 additions and 22 deletions.
45 changes: 35 additions & 10 deletions harmonisationUtils/db.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,22 @@ def insert_study(self, study: tuple) -> None:
""",
study)
self.commit()

def update_harmonisation_status(self, study_id: str, status: bool) -> None:
self.cur.execute("""
UPDATE studies
SET isHarm = ?
WHERE study = ?
""",
(status, study_id))

def update_in_progress_status(self, study_id: str, status: bool) -> None:
self.cur.execute("""
UPDATE studies
SET inProg = ?
WHERE study = ?
""",
(status, study_id))

def select_studies(self, studies: list) -> list:
sql = f"""
Expand All @@ -63,17 +79,26 @@ def select_studies(self, studies: list) -> list:
def select_by(
self,
study: Union[list, None],
harmonised_only: bool,
harmonisation_type: list,
limit: Union[int, None],
in_progress: bool,
priority: int
harmonised_only: Union[bool, None],
harmonisation_type: Union[list, None],
limit: int,
in_progress: Union[bool, None],
priority: Union[int, None]
) -> list:
args = [[harmonised_only], harmonisation_type, [in_progress], [priority]]
conditions = ["isHarm is ?",
f"harmType in ({','.join(['?']*len(harmonisation_type))})",
"inProg is ?",
f"priority <= ?"]
args = []
conditions = []
if harmonised_only is not None:
args.append([harmonised_only])
conditions.append("isHarm is ?")
if harmonisation_type is not None:
args.append(harmonisation_type)
conditions.append(f"harmType in ({','.join(['?']*len(harmonisation_type))})")
if in_progress is not None:
args.append([in_progress])
conditions.append("inProg is ?")
if priority is not None:
args.append([priority])
conditions.append(f"priority <= ?")
sql = f"""
SELECT * FROM studies
WHERE
Expand Down
52 changes: 40 additions & 12 deletions harmonisationUtils/queue_harmonisation.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,8 +139,13 @@ def update_harmonisation_queue(self) -> None:
self.add_studies_to_queue(study_ids=[study_id],
harmonisation_type=self._harmonisation_type_from_metadata(study_id)
)
harmonised: list = [Study(study_id=study, in_progress=False, is_harmonised=True)
for study in self.fs_studies.get_harmonised()]
for study in harmonised:
self.db.update_harmonisation_status(study_id=study.study_id, status=True)
self.db.update_in_progress_status(study_id=study.study_id, status=False)
self.db.reset_last_run(timestamp=generate_datestamp())

def release_files_from_queue(
self,
studies: Union[list, None] = None,
Expand Down Expand Up @@ -214,13 +219,27 @@ def rebuild_harmonisation_queue(self) -> None:
self.db.insert_study(study=astuple(study))
self.db.reset_last_run(timestamp=generate_datestamp())


def get_studies(
self,
studies: Union[list, None] = None
) -> List[Study]:
result = self.db.select_studies(studies=studies)
studies: Union[list, None] = None,
harmonised_only: Union[bool, None] = None,
harmonisation_type: List[HarmonisationType] = [e.value for e in HarmonisationType],
limit: int = 200,
in_progress: Union[bool, None] = None,
priority: int = 3
) -> List[Study]:
result = self.db.select_by(study=studies,
harmonised_only=harmonised_only,
harmonisation_type=harmonisation_type,
limit=limit,
in_progress=in_progress,
priority=priority)
return self._db_study_to_object(result)

def get_harmonised_list(self) -> list:
return self.get_studies(harmonised_only=True)

def _get_from_db(
self,
studies: Union[list, None] = None,
Expand Down Expand Up @@ -306,7 +325,7 @@ def get_folder_contents(parent: Path, pattern: str) -> List[Path]:


def rsync(source: Path, dest: Path, pattern: str = "*") -> bool:
source_str = str(source)
source_str = str(source) + "/"
dest_str = str(dest) + "/"
try:
subprocess.run(['rsync',
Expand Down Expand Up @@ -343,6 +362,7 @@ def arg_checker(args) -> bool:
Returns:
True if arguments are ok.
"""
args_ok = True
if args.action == 'refresh':
args_ok = all([args.source_dir,
args.harmonisation_dir,
Expand All @@ -355,21 +375,20 @@ def arg_checker(args) -> bool:
args_ok = all([args.source_dir,
args.harmonisation_dir,
args.ftp_dir])
if args.action == 'status':
args_ok = all([args.study])
if args.action == 'update':
args_ok = all([args.study])
return args_ok


def main():
parser = argparse.ArgumentParser()
parser.add_argument('--action', type=str, choices=['refresh', 'release', 'add', 'rebuild', 'status', 'update'],
parser.add_argument('--action', type=str, choices=['refresh', 'release', 'rebuild', 'status', 'update', 'harmonised_list'],
help=('refresh: update the harmonisation queue with newly submitted studies; '
'release: release the next batch of files from the queue; '
'rebuild: rebuild the entire harmonisation queue based on the files on the file system; '
'status: get the status of a list of studies; '
'update: add/update a list of studies'
'update: add/update a list of studies; '
'harmonised_list: list of harmonised studies'
),
required=True)
parser.add_argument('--study', nargs='*', help='Specific study accession ids.', default=None)
Expand All @@ -393,7 +412,14 @@ def main():
if args.action == 'rebuild':
queuer.rebuild_harmonisation_queue()
if args.action == 'status':
studies = queuer.get_studies(studies=args.study)
if args.study:
studies = queuer.get_studies(studies=args.study)
else:
studies = queuer.get_studies(priority=args.priority,
harmonisation_type=[args.harmonisation_type],
harmonised_only=args.is_harmonised,
in_progress=args.in_progress,
limit=args.number)
for study in studies:
print(study)
if args.action == 'update':
Expand All @@ -413,8 +439,10 @@ def main():
limit=args.number)
if args.action == 'refresh':
queuer.update_harmonisation_queue()


if args.action == 'harmonised_list':
hl = queuer.get_harmonised_list()
for study in hl:
print(study)


if __name__ == '__main__':
Expand Down

0 comments on commit 6e485f1

Please sign in to comment.