SubjectIDExcelConversion

import os
import sys
import re
import xlrd
import openpyxl


if len(sys.argv) <5:
    print("Usage: python %s requires input excel workbooks, end cell #, and sheet name from data book" % (sys.argv[0]))
    sys.exit() #Prints reminder to user if parameters are missing

print(sys.argv[1]) #Data workbook with IDs to be converted:sys.argv[1]
print(sys.argv[2]) #Conversion workbook:sys.argv[2]
print(sys.argv[3]) #Last cell with ID to be converted from data workbook: sys.argv[3]
print(sys.argv[4]) #Sheet name containing ID's to be pulled from data workbook: sys.argv[4]
print(sys.argv[5]) #Name of column in conversion worksheet with subject IDs
data_book=xlrd.open_workbook(sys.argv[1])
convert_book=xlrd.open_workbook(sys.argv[2])

to_convert=[] #list of CRASHids needing conversion to subject ids
subj_ids=[] #subject ids (values in dictionary)
common_ids=[] #common CRASHids used for conversion (in both excel books, keys of dictionary)
ordered_subjIDs=[] #final list of subjIDs to be rewritten into data excel workbook


#PULL IDs FROM DATA NEEDING CONVERSION
sheet1=data_book.sheet_by_name(sys.argv[4])
r=sheet1.row_values(0) #

for value in r:
    #print(value) testing values
    pattern=re.compile("(\w{1}\d{3})\w{2}")  #
    match_obj=pattern.search(value)
    if match_obj:
        stripped_id=match_obj.group(1)
        #print(stripped_id)
        to_convert.append(stripped_id)
print(to_convert)

#PULL subj IDs and COMMON IDs FROM CONVERSION TABLE
sheet2=convert_book.sheets()[0]
c=sheet2.col_values(0)
c2=sheet2.col_values(1)

for value in c:
   if value == sys.argv[5]:
       print("Removed column header:" + sys.argv[5])
   else:
       subj_ids.append(int(value))
#print(subj_ids)

for value in c2:
    #print(value)
    pattern=re.compile("(\w{1}\d{3}).\w{1}.")
    match_obj=pattern.search(value)
    if match_obj:
        common_id=match_obj.group(1)
        common_ids.append(common_id)
#print(common_ids)


converter_dict=dict(zip(common_ids,subj_ids))
#print(converter_dict.keys())
#print(converter_dict)

#Converting CrashIDs to subjIDs
for i in to_convert:
        if key == i:
            # print(converter_dict[key])
            ordered_subjIDs.append(converter_dict[key])
#ordered_subjIDs.insert(0, 'Gene')
print(ordered_subjIDs)

#REWRITING DATAWORKBOOK
xfile = openpyxl.load_workbook(sys.argv[1])
sheet = xfile.get_sheet_by_name(sys.argv[4])
i=1
while i<len(ordered_subjIDs):
    for j in ordered_subjIDs:
        sheet.cell(row=1,column=i).value=j
        i+=1

filepath=(str(sys.argv[1])).split('.')[0]

#print(filepath)

xfile.save(filepath +'_out.xlsx')
print(filepath+'_out.xlsx')