-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathconvert_satc_format.py
47 lines (37 loc) · 1.3 KB
/
convert_satc_format.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
"""
convert_satc_format.py
Daniel Cotter -- 02/16/24
Convert satc dump files from the old format (count, anchor+target, sample)
to the new format (sample, anchor, target, count)
"""
import argparse
import csv
# Parse commandline arguments
def parse_args():
parser = argparse.ArgumentParser(
description="Convert satc dump files to new format"
)
parser.add_argument("input", help="Input file", type=str)
parser.add_argument("output", help="Output file", type=str)
parser.add_argument("-k", "--anchor", help="Anchor length", default=27, type=int)
return parser.parse_args()
# Process each line of the input file and write to the output file
def process_file(input_file: str, output_file: str, k: int) -> None:
with open(input_file, "r") as f:
with open(output_file, "w") as o:
writer = csv.writer(o, delimiter="\t")
for line in f:
line = line.strip().split()
count = line[0]
anchor = line[1][:k]
target = line[1][k:]
sample = line[2]
writer.writerow([sample, anchor, target, count])
return None
# Main function
def main():
args = parse_args()
process_file(args.input, args.output, args.anchor)
# run main function
if __name__ == "__main__":
main()