-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdownlodProjS3.py
83 lines (63 loc) · 2.03 KB
/
downlodProjS3.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
# version 1.00
import boto3
import os
import argparse
parser=argparse.ArgumentParser(description='Transfer latest run from S3 bucket')
# add arguments
parser.add_argument(
"-p",
"--project",
type=str,
nargs="?",
default="virus",
help="project name, virus or ICMC"
)
# parse arguments
args=parser.parse_args()
project=args.project
#S3
s3 = boto3.resource('s3')
# read from S3
obj = s3.Object('transfer-files-emory', 'download.me')
file=obj.get()['Body'].read().decode('utf-8')
# split S3 path to components
file_split=file.split('/')
def splitPath(project):
if project=='virus':
project_name=file_split[3]
run_namae=file_split[4]
elif project=='ICMC':
project_name=file_split[4]
run_namae=file_split[5]
return project_name, run_namae
project_name, run_namae = splitPath(project=project)
print(project_name)
print(run_namae)
# select output directory where to put files
def project_out_dir(project):
if project=='virus':
project_path='C:/Users/Administrator/OneDrive - Emory University/virus/output/'
elif project == 'ICMC':
project_path='C:/Users/Administrator/OneDrive - Emory University/'
return(project_path)
run_path=project_out_dir(project=project)
print(run_path)
# create directory and download
def downloadDirectoryFroms3(bucketName, remoteDirectoryName):
s3_resource = boto3.resource('s3')
bucket = s3_resource.Bucket(bucketName)
for obj in bucket.objects.filter(Prefix = remoteDirectoryName):
out_path=(run_path+obj.key)
print(out_path)
if not os.path.exists(os.path.dirname(out_path)):
os.makedirs(os.path.dirname(out_path))
bucket.download_file(obj.key, out_path)
def getRemoteDirName(project):
if project=='ICMC':
remDir='ICMC/%s/%s/custom_output' % (project_name, run_namae)
elif project=='virus':
remDir=project_name
return remDir
remDir=getRemoteDirName(project=project)
# download files
downloadDirectoryFroms3(bucketName='transfer-files-emory', remoteDirectoryName=remDir)