Skip to content

Commit

Permalink
Merge pull request #69 from RIVM-bioinformatics/dev
Browse files Browse the repository at this point in the history
minor update for installation stability
  • Loading branch information
florianzwagemaker authored Apr 4, 2024
2 parents 3c0b5b8 + 749ad4a commit 1d8b5bf
Show file tree
Hide file tree
Showing 17 changed files with 105 additions and 103 deletions.
4 changes: 3 additions & 1 deletion SARS2seq/SARS2seq.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,9 @@ def currentpath():
)

optional_args.add_argument(
"--skip-updates", action="store_true", help="Skip the update check",
"--skip-updates",
action="store_true",
help="Skip the update check",
)

if len(givenargs) < 1:
Expand Down
2 changes: 1 addition & 1 deletion SARS2seq/runconfigs.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ def set_cores(cores):

def get_max_local_mem():
avl_mem_bytes = os.sysconf("SC_PAGE_SIZE") * os.sysconf("SC_PHYS_PAGES")
return int(round(avl_mem_bytes / (1024.0 ** 2) - 2000, -3))
return int(round(avl_mem_bytes / (1024.0**2) - 2000, -3))


def SnakemakeConfig(conf, cpus, dryrun):
Expand Down
4 changes: 2 additions & 2 deletions SARS2seq/runreport.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,9 +107,9 @@ def WriteReport(workingdir, inpath, startpath, conf, sparams, sconfig, status, t
pdf = analysis_details(
pdf, "\t\t\t\tScorpio version:", pangolin_tags["scorpio"].lstrip("v")
)
#pdf = analysis_details(
# pdf = analysis_details(
# pdf, "\t\t\t\tPangoLEARN version:", pangolin_tags["pangolearn"].lstrip("v")
#)
# )
# pdf = analysis_details(
# pdf,
# "\t\t\t\tPango-designation:",
Expand Down
3 changes: 1 addition & 2 deletions SARS2seq/workflow/envs/Alignment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,7 @@ channels:
- bioconda
- conda-forge
- intel
- anaconda
- defaults
- nodefaults
dependencies:
- python=3.7
- minimap2==2.24
Expand Down
3 changes: 1 addition & 2 deletions SARS2seq/workflow/envs/Clean.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,7 @@ channels:
- bioconda
- conda-forge
- intel
- anaconda
- defaults
- nodefaults
dependencies:
- python=3.8
- ampligone==1.2.1
Expand Down
8 changes: 4 additions & 4 deletions SARS2seq/workflow/envs/Consensus.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,7 @@ channels:
- bioconda
- conda-forge
- intel
- anaconda
- defaults
- nodefaults
dependencies:
- python=3.7
- libffi==3.3
Expand All @@ -16,5 +15,6 @@ dependencies:
- tqdm=4.62
- pip
- pip:
- gffpandas>=1.2
- git+https://github.com/RIVM-bioinformatics/TrueConsense.git@v0.5.0
#this is temporary until gffpandas has a patched version released on conda/pypi or when we port the required function in TrueConsense to AminoExtract
- git+https://github.com/florianzwagemaker/gffpandas.git@main
- git+https://github.com/RIVM-bioinformatics/TrueConsense.git@v0.5.1
3 changes: 1 addition & 2 deletions SARS2seq/workflow/envs/Mutations.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@ channels:
- bioconda
- conda-forge
- intel
- anaconda
- defaults
- nodefaults
dependencies:
- bcftools==1.14
3 changes: 1 addition & 2 deletions SARS2seq/workflow/envs/Typing.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,7 @@ channels:
- bioconda
- conda-forge
- intel
- anaconda
- defaults
- nodefaults
dependencies:
- python=3.7
- libffi==3.3
Expand Down
5 changes: 4 additions & 1 deletion SARS2seq/workflow/scripts/Subtypingpicker.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,10 @@
)

arg.add_argument(
"--boc", type=str, metavar="File", required=False,
"--boc",
type=str,
metavar="File",
required=False,
)

arg.add_argument(
Expand Down
135 changes: 65 additions & 70 deletions SARS2seq/workflow/scripts/amplicon_covs.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
"--primers",
metavar="File",
type=str,
help="input file with primers as given by AmpliGone",
help="input BED file with primers as given by AmpliGone",
required=True,
)

Expand All @@ -35,24 +35,32 @@


def split_frames(df):
left = ["LEFT", "PLUS", "POSITIVE"]
right = ["RIGHT", "MINUS", "NEGATIVE"]
left = ["LEFT", "PLUS", "POSITIVE", "FORWARD"]
right = ["RIGHT", "MINUS", "NEGATIVE", "REVERSE"]

leftdf = pd.DataFrame(columns=df.columns)
rightdf = pd.DataFrame(columns=df.columns)

for x in df.itertuples():
if any(l in x[1] for l in left) is True:
leftdf = leftdf.append(
pd.DataFrame(
{"name": x.name, "start": x.start, "end": x.end}, index=[0]
)
if any(l in x[1] for l in left):
leftdf = pd.concat(
[
leftdf,
pd.DataFrame(
{"name": x.name, "start": x.start, "end": x.end}, index=[0]
),
],
ignore_index=True,
)
if any(r in x[1] for r in right) is True:
rightdf = rightdf.append(
pd.DataFrame(
{"name": x.name, "start": x.start, "end": x.end}, index=[0]
)
if any(r in x[1] for r in right):
rightdf = pd.concat(
[
rightdf,
pd.DataFrame(
{"name": x.name, "start": x.start, "end": x.end}, index=[0]
),
],
ignore_index=True,
)

leftdf.reset_index(inplace=True)
Expand All @@ -65,7 +73,16 @@ def split_frames(df):


def remove_keyword(prname):
keywords = ["LEFT", "RIGHT", "PLUS", "MINUS", "POSITIVE", "NEGATIVE"]
keywords = [
"LEFT",
"RIGHT",
"PLUS",
"MINUS",
"POSITIVE",
"NEGATIVE",
"FORWARD",
"REVERSE",
]
sname = prname.split("_")
for y, z in enumerate(sname):
if z in keywords:
Expand Down Expand Up @@ -108,29 +125,25 @@ def index_to_remove_starts(one, indexone, two, indextwo):
def remove_alt_primer_l(df):
xx = df.to_dict(orient="records")
to_rm = []
lastindex = list(enumerate(xx))[-1][0]
lastindex = list(enumerate(xx))[-1][0] if xx else -1
for a, x in enumerate(xx):
if a != lastindex:
if xx[a].get("name") == xx[a + 1].get("name"):
rm_indx = index_to_remove_ends(xx[a], a, xx[a + 1], a + 1)
if rm_indx is not None:
to_rm.append(rm_indx)
filtereddf = df.drop(to_rm)
return filtereddf
if a != lastindex and xx[a].get("name") == xx[a + 1].get("name"):
rm_indx = index_to_remove_ends(xx[a], a, xx[a + 1], a + 1)
if rm_indx is not None:
to_rm.append(rm_indx)
return df.drop(to_rm)


def remove_alt_primer_r(df):
xx = df.to_dict(orient="records")
to_rm = []
lastindex = list(enumerate(xx))[-1][0]
lastindex = list(enumerate(xx))[-1][0] if xx else -1
for a, x in enumerate(xx):
if a != lastindex:
if xx[a].get("name") == xx[a + 1].get("name"):
rm_indx = index_to_remove_starts(xx[a], a, xx[a + 1], a + 1)
if rm_indx is not None:
to_rm.append(rm_indx)
filtereddf = df.drop(to_rm)
return filtereddf
if a != lastindex and xx[a].get("name") == xx[a + 1].get("name"):
rm_indx = index_to_remove_starts(xx[a], a, xx[a + 1], a + 1)
if rm_indx is not None:
to_rm.append(rm_indx)
return df.drop(to_rm)


def Find_NonOverlap(df):
Expand All @@ -141,23 +154,12 @@ def Find_NonOverlap(df):
firstindex = list(enumerate(dd))[0][0]
for x, v in enumerate(dd):
t_end = v.get("rightstart")
if x != firstindex:
s = dd[x - 1].get("rightstart")
s = dd[x - 1].get("rightstart") if x != firstindex else v.get("leftend")
end_override = dd[x + 1].get("leftend") if x != lastindex else None
primerstart = s
if end_override is not None and end_override in range(primerstart, t_end):
primerend = end_override
else:
s = v.get("leftend")
if x != lastindex:
end_override = dd[x + 1].get("leftend")
else:
end_override = None
if end_override is not None:
if end_override in range(s, t_end):
primerstart = s
primerend = end_override
else:
primerstart = s
primerend = t_end
else:
primerstart = s
primerend = t_end
startingpoint[primerstart] = v.get("name")
endingpoint[primerend] = v.get("name")
Expand Down Expand Up @@ -187,14 +189,11 @@ def Average_cov(primers, covs):
primd = primers.to_dict(orient="records")
averages = {}

for x, v in enumerate(primd):
localcov = []

for v in primd:
prstart = v.get("unique_start")
prend = v.get("unique_end")
pr_range = list(range(prstart, prend))
for i in pr_range:
localcov.append(covd[i].get("cov"))
localcov = [covd[i].get("cov") for i in pr_range]
averages[avg(localcov)] = v.get("name")

avgdf = (
Expand All @@ -207,30 +206,23 @@ def Average_cov(primers, covs):
return primers


def pad_name(name):
name = name.split("_")
name[-1] = name[-1].zfill(3)
return "_".join(name)


if __name__ == "__main__":
covs = pd.read_csv(
flags.coverages, sep="\t", names=["position", "cov"], index_col="position"
)

try:
primer_df = pd.read_csv(
flags.primers,
sep="\t",
comment="#",
usecols=range(6),
header=None,
names=["ref", "start", "end", "name", "score", "strand"],
dtype=dict(
ref=str,
start="Int64",
end="Int64",
name=str,
score=str,
strand=str,
),
)
prims = primer_df[["name", "start", "end"]]
prims = prims.sort_values(by="start")
prims = pd.read_csv(
flags.primers, sep="\t", usecols=[1, 2, 3], names=["start", "end", "name"]
)[["name", "start", "end"]]
prims = prims.sort_values(by="start").reindex()

except Exception:
print("Error reading primers file")
with open(flags.output, "w") as f:
Expand All @@ -241,7 +233,7 @@ def Average_cov(primers, covs):
)
sys.exit()

if len(prims) <= 0:
if len(prims) <= 1:
print("Primers file is empty, writing output and exiting...")
with open(flags.output, "w") as f:
f.write(
Expand Down Expand Up @@ -285,6 +277,9 @@ def Average_cov(primers, covs):
]
).rename(columns={"avg_cov": flags.key})

# ensure the values like "MeV_1" or "MeV_19" in column "name" are padded like 001, 002, 003, etc.
with_average["name"] = with_average["name"].apply(pad_name)

with_average = with_average.transpose()

with_average.to_csv(flags.output, sep=",", index=True, header=False)
6 changes: 5 additions & 1 deletion SARS2seq/workflow/scripts/concat_amplicon_covs.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,11 @@
)

args.add_argument(
"--output", type=str, metavar="File", help="Output file name", required=True,
"--output",
type=str,
metavar="File",
help="Output file name",
required=True,
)

flags = args.parse_args()
Expand Down
6 changes: 5 additions & 1 deletion SARS2seq/workflow/scripts/index_rawalign.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,11 @@ def checkbam(fname):
args = argparse.ArgumentParser()

args.add_argument(
"-i", "--input", type=checkbam, required=True, help="Input bam file",
"-i",
"--input",
type=checkbam,
required=True,
help="Input bam file",
)
args.add_argument(
"-r", "--reference", type=str, required=True, help="Reference fasta file"
Expand Down
5 changes: 3 additions & 2 deletions SARS2seq/workflow/scripts/overlap_coords.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,8 +167,9 @@ def FindUniqueCoords(df):
.reset_index()
.rename(columns={0: "name", "index": "unique_end"})
)
df = pd.merge(df, startdf, on="name", how="inner")
df = pd.merge(df, enddf, on="name", how="inner")
if not startdf.empty and not enddf.empty:
df = pd.merge(df, startdf, on="name", how="inner")
df = pd.merge(df, enddf, on="name", how="inner")

return df

Expand Down
8 changes: 4 additions & 4 deletions SARS2seq/workflow/scripts/typingagg.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,10 @@

if len(pangolin.index) < 1:
pangolin = {
'version': [None],
'lineage': [None],
'scorpio_call': [None],
'qc_status': [None],
"version": [None],
"lineage": [None],
"scorpio_call": [None],
"qc_status": [None],
}

else:
Expand Down
9 changes: 4 additions & 5 deletions env.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,11 @@ channels:
- bioconda
- conda-forge
- intel
- anaconda
- defaults
- nodefaults
dependencies:
- python=3.7
- conda=4.11
- mamba
- python=3.8
# - conda=4.11
- mamba>1.0
- drmaa==0.7.9
- snakemake==7.12.1
- tabulate==0.8.10
Expand Down
Loading

0 comments on commit 1d8b5bf

Please sign in to comment.