-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathAWSUtils.py
145 lines (124 loc) · 4.83 KB
/
AWSUtils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
import sys
import os
import shutil
from Bio import SeqIO
import re
import boto3
import botocore
from subprocess import Popen, PIPE
import logging
import datetime
MainTempDir = "/bigdisk/tmp"
s3_resource = boto3.resource('s3')
s3_client = boto3.client('s3')
class BasicFileUtils:
def precopyDir(src, tmpdest):
tmpdir = MainTempDir
if len(tmpdest) > 0:
tmpdir = tmpdest
if not src.startswith('s3:'):
if not os.path.isdir(src):
print ("Input dir {} does not exist. Exiting ...".format(src))
logging.error("Input dir {} does not exist".format(src))
sys.exit(2)
shutil.copytree(src, tmpdest)
else:
BasicFileUtils.processCommand("aws s3 sync " + src + "/ " + tmpdest + " --quiet")
return tmpdir
def fileExist(src):
s = re.split('\/',src)
bucket=s[2]
key="/".join(s[3:])
try:
s3_resource.Object(bucket, key).load()
except botocore.exceptions.ClientError as e:
if e.response['Error']['Code'] == "404":
logging.error("{} does not exist".format(key))
sys.exit(2)
else:
print("Connection to s3 failed")
raise
def precopyFile(src, tmpdest, dest):
tmpdir = MainTempDir
if len(tmpdest) > 0:
tmpdir = tmpdest
head, tail = os.path.split(src)
otmpfile = tmpdir + "/" + tail
if not src.startswith('s3:'):
if not os.path.isfile(src):
print ("{} file {} does not exist. Exiting ...".format(dest, src))
logging.error("{} file {} does not exist".format(dest, src))
sys.exit(2)
shutil.copyfile(src, otmpfile)
else:
s = re.split('\/',src)
bucket=s[2]
key="/".join(s[3:])
try:
s3_resource.Bucket(bucket).download_file(key, otmpfile)
except botocore.exceptions.ClientError as e:
if e.response['Error']['Code'] == "404":
if dest == 'input':
print("The object does not exist.")
logging.error("The object does not exist")
sys.exit(2)
else:
raise
return otmpfile
def postcopy(srcdir, destdir):
if not destdir.startswith("s3:"):
if not os.path.isdir(destdir):
os.mkdir(destdir)
shutil.copytree(srcdir, destdir, symlinks=False, ignore=None)
else:
s = re.split('\/',destdir)
bucket=s[2]
dstkey="/".join(s[3:])
if dstkey.endswith('/'):
dstkey = dstkey[:-1]
BasicFileUtils.processCommand("aws s3 sync {} {} --quiet".format(srcdir, destdir))
logging.shutdown()
def processCommand(command):
try:
p = Popen(command, shell=True, stdout=PIPE, stderr=PIPE)
p.wait()
output, error = p.communicate()
if p.returncode != 0:
print("{} -> Failed".format(command, output, error))
logging.error("{} \nfailed {} -> {}".format(command, output, error))
sys.exit(2)
except Exception as e:
print("{} -> Failed".format(command))
logging.error("{} \nFailed {}".format(command, e))
sys.exit(2)
# logging.info("Successfully: {}".format(command))
def uploadDirectory(srcpath, bucketname, dstkey):
for root,dirs,files in os.walk(srcpath):
for d0 in dirs:
BasicFileUtils.uploadDirectory(root+'/'+d0, bucketname, dstkey+'/'+d0)
for f0 in files:
srcname = "{}/{}".format(root, f0)
dstname = "{}/{}".format(dstkey, f0)
#print(root, f0, dstkey, f0)
print("Copying {} to {}".format(srcname, dstname))
s3_client.upload_file(srcname, bucketname, dstname)
def clean(clist):
for f in clist:
if os.path.exists(f):
if os.path.isdir(f):
shutil.rmtree(f)
else:
os.remove(f)
# Clearing FASTA files from temp directory
def clearFa(src):
for f in os.listdir(src):
if f.endswith('.fa'):
BasicFileUtils.processCommand("rm {}/{}".format(src, f))
# Initalize log file
def create_log(path, curDate, var):
log_file = "{}/gRNAs_{}_step1_{}.log".format(path, var, curDate)
logging.basicConfig(filename=log_file,
level=logging.INFO,
format='%(asctime)s %(levelname)-8s %(message)s',
datefmt='%Y-%m-%d %H:%M:%S')
logging.info("gRNAs Production for {} Started!".format(var))