-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathglacier.py
190 lines (169 loc) · 7.06 KB
/
glacier.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
import sys
import boto3
import hashlib
from binascii import hexlify
import os
import datetime
from math import floor, ceil
import compress
import json
import pickle
import dateutil.parser
from inventory import Inventory, File
class Glacier:
def __init__(self, _vaultName=None):
self.glacier = boto3.client("glacier")
self.res = boto3.resource("glacier")
if (_vaultName == None):
# Get Last Vault Name
for i in self.res.vaults.all(): self.vaultName = i.name
else:
self.vaultName = _vaultName
self.vault = self.res.Vault("-", self.vaultName)
self.inventory = None
self.files = None
self.active_jobs = []
def uploadFileMultiPart(self, filename):
# The part size must be a megabyte (1024 KB) multiplied by a power of 2,
# for example 1048576 (1 MB), 2097152 (2 MB), 4194304 (4 MB), 8388608 (8 MB),
# and so on. The minimum allowable part size is 1 MB, and the maximum
# is 4 GB (4096 MB).
# size: Size of each part in bytes, except the last. The last part can be smaller.
#size = 1024*1024*pow(2,7) #2^7 = 128 --> 128mb per part
size = 1024*1024*pow(2,10) #2^10 = 1024 --> 1GB per part
multipartDict = self.glacier.initiate_multipart_upload(vaultName=self.vaultName,
archiveDescription=filename, partSize=str(size))
res = boto3.resource("glacier")
print("Requesting Multipart Job")
multipart = res.MultipartUpload("-",self.vaultName, multipartDict["uploadId"])
print("Job Id Received, Initializing Multipart Upload")
# Read File
total = os.path.getsize(filename)
f = open(filename,"rb")
data = f.read(size)
last = 0
while (data):
sha256 = self.sha256treePartial(data)
if (len(data) != size): size = len(data)
partRange = "bytes {0}-{1}/*".format(last, (last+size-1)) # Format '0-4194303'
#print("Sending Part:",partRange, sha256)
ret = multipart.upload_part(vaultName=self.vaultName,range=partRange, body=data)#, checksum=sha256)
#print("Return:",ret)
last += len(data)
print("Progress:", floor(100*last/total), "%")
#
data = f.read(size)
#TODO: compare checksum
print("All Files Uploaded")
print("Verifying Checksum...")
sha256 = self.sha256tree(f)
archive = multipart.complete(archiveSize=str(last), checksum=sha256)
print("Upload Completed:",archive)
def uploadDirectory(self, f):
name = os.path.basename(f).replace(".","_")
n = datetime.datetime.now()
name = name + n.strftime("-%Y_%m_%d")
name = name + ".tar.gz"
print("Compressing Directory into a Temporary File")
compress.compressDir(name, f)
self.uploadFileMultiPart(name)
#os.remove(name)
#print("Removed Temporary Compressed Directory")
print("File not removed:", name)
print("Directory Uploaded Successfully")
def uploadFile(self, filename):
# Read File
f = open(filename,"rb")
data = f.read()
comcheck = self.sha256tree(f)
# Upload File
t = self.glacier.upload_archive(vaultName=self.vaultName,archiveDescription=filename,body=data)
status = t["ResponseMetadata"]["HTTPStatusCode"]
aid = t["archiveId"]
checksum = t["checksum"]
txtStatus = "File Uploaded Successfully!"
if (status != 201): txtStatus = "Something went wrong: " + str(status)
# TODO: Update Inventory
newFile = File( {"Size":os.path.getsize(filename),
"CreationDate": datetime.datetime.utcnow().isoformat(),
"ArchiveDescription": filename,
"ArchiveId": aid,
"SHA256TreeHash": checksum} )
newFile.isNew = True
self.inventory.files.append( newFile )
#TODO: Do checksum comparison
return aid, txtStatus, checksum, comcheck
def deleteFile(self, ffile):
print("Deleting....", ffile.aid)
self.glacier.delete_archive(vaultName=self.vaultName, archiveId=ffile.aid)
ffile.deleted = True
print("Requested File to be Deleted")
def initListFiles(self):
#res = boto3.resource("glacier")
#vault = res.Vault("-", self.vaultName)
a = self.vault.initiate_inventory_retrieval()
#TODO: give some kind of feedback
self.active_jobs.append(a.job_id)
def listJobs(self):
glacier = boto3.client("glacier")
j = glacier.list_jobs(vaultName=self.vaultName)
print(j)
ret = []
res = boto3.resource("glacier")
for job in j["JobList"]:
if (job["StatusCode"] == "Succeeded" and job["Action"] == "InventoryRetrieval"):
if (self.inventory == None or
self.inventory.date < dateutil.parser.parse(job["CreationDate"])):
a = res.Job("-",self.vaultName, job["JobId"])
print(job)
print(a)
data = a.get_output()["body"]
#TODO: Only update inventory if needed. Dont want to lose new/deleted info
self.inventory = Inventory( json.loads(data.read().decode("utf-8")) )
return j["JobList"]
def loadDefault(self):
if (os.path.isfile("inventory.pkl")):
fInv = open("inventory.pkl", "rb")
self.inventory = pickle.load(fInv)
else:
self.listJobs()
def closeDefault(self):
if (self.inventory != None):
fInv = open("inventory.pkl","wb")
pickle.dump( self.inventory, fInv )
fInv.close()
def sha256tree(self, f):
f.seek(0, 0)
thash = []
size = 1024*1024 # 1MV
data = f.read( size )
while (data != b""):
thash.append( hashlib.sha256(data).digest() )
data = f.read( size )
while (len(thash) > 1):
temp = thash
thash = []
while (len(temp) > 1):
data = temp[0] + temp[1]
temp = temp[2:]
thash.append( hashlib.sha256(data).digest() )
if (len(temp) == 1): thash.append( temp[0] )
return hexlify( thash[0] ).decode("ascii")
def sha256treePartial(self, full_data):
thash = []
size = 1024*1024 # 1MV
last = 0
data = full_data[last: last+size]
while (data != b""):
thash.append( hashlib.sha256(data).digest() )
last += size
data = full_data[last: last+size]
while (len(thash) > 1):
temp = thash
thash = []
while (len(temp) > 1):
data = temp[0] + temp[1]
temp = temp[2:]
thash.append( hashlib.sha256(data).digest() )
if (len(temp) == 1): thash.append( temp[0] )
return hexlify( thash[0] ).decode("ascii")