-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathSubjectIDExcelConversion
89 lines (71 loc) · 2.56 KB
/
SubjectIDExcelConversion
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
import os
import sys
import re
import xlrd
import openpyxl
if len(sys.argv) <5:
print("Usage: python %s requires input excel workbooks, end cell #, and sheet name from data book" % (sys.argv[0]))
sys.exit() #Prints reminder to user if parameters are missing
print(sys.argv[1]) #Data workbook with IDs to be converted:sys.argv[1]
print(sys.argv[2]) #Conversion workbook:sys.argv[2]
print(sys.argv[3]) #Last cell with ID to be converted from data workbook: sys.argv[3]
print(sys.argv[4]) #Sheet name containing ID's to be pulled from data workbook: sys.argv[4]
print(sys.argv[5]) #Name of column in conversion worksheet with subject IDs
data_book=xlrd.open_workbook(sys.argv[1])
convert_book=xlrd.open_workbook(sys.argv[2])
to_convert=[] #list of CRASHids needing conversion to subject ids
subj_ids=[] #subject ids (values in dictionary)
common_ids=[] #common CRASHids used for conversion (in both excel books, keys of dictionary)
ordered_subjIDs=[] #final list of subjIDs to be rewritten into data excel workbook
#PULL IDs FROM DATA NEEDING CONVERSION
sheet1=data_book.sheet_by_name(sys.argv[4])
r=sheet1.row_values(0) #
for value in r:
#print(value) testing values
pattern=re.compile("(\w{1}\d{3})\w{2}") #
match_obj=pattern.search(value)
if match_obj:
stripped_id=match_obj.group(1)
#print(stripped_id)
to_convert.append(stripped_id)
print(to_convert)
#PULL subj IDs and COMMON IDs FROM CONVERSION TABLE
sheet2=convert_book.sheets()[0]
c=sheet2.col_values(0)
c2=sheet2.col_values(1)
for value in c:
if value == sys.argv[5]:
print("Removed column header:" + sys.argv[5])
else:
subj_ids.append(int(value))
#print(subj_ids)
for value in c2:
#print(value)
pattern=re.compile("(\w{1}\d{3}).\w{1}.")
match_obj=pattern.search(value)
if match_obj:
common_id=match_obj.group(1)
common_ids.append(common_id)
#print(common_ids)
converter_dict=dict(zip(common_ids,subj_ids))
#print(converter_dict.keys())
#print(converter_dict)
#Converting CrashIDs to subjIDs
for i in to_convert:
if key == i:
# print(converter_dict[key])
ordered_subjIDs.append(converter_dict[key])
#ordered_subjIDs.insert(0, 'Gene')
print(ordered_subjIDs)
#REWRITING DATAWORKBOOK
xfile = openpyxl.load_workbook(sys.argv[1])
sheet = xfile.get_sheet_by_name(sys.argv[4])
i=1
while i<len(ordered_subjIDs):
for j in ordered_subjIDs:
sheet.cell(row=1,column=i).value=j
i+=1
filepath=(str(sys.argv[1])).split('.')[0]
#print(filepath)
xfile.save(filepath +'_out.xlsx')
print(filepath+'_out.xlsx')