Skip to content

Commit 3afce4e

Browse files
committed
Updates per comments on PR
1 parent fecb31f commit 3afce4e

6 files changed

Lines changed: 71 additions & 34 deletions

File tree

examples/download_asset.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ def benchmark(asset_id):
66
token = os.getenv("FRAMEIO_TOKEN")
77
client = FrameioClient(token)
88
asset_info = client.get_asset(asset_id)
9-
accelerated_filename = client.download(asset_info, "downloads", prefix="accelerated_", acceleration=True, concurrency=20)
9+
accelerated_filename = client.download(asset_info, "downloads", prefix="accelerated_", multi_part=True, concurrency=20)
1010

1111
# print("Normal speed: {}, Accelerated speed: {}".format(normal_speed, accelerated_speed))
1212

frameioclient/client.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -401,7 +401,7 @@ def upload(self, asset, file):
401401
uploader = FrameioUploader(asset, file)
402402
uploader.upload()
403403

404-
def download(self, asset, download_folder, prefix=None, acceleration=False, concurrency=5):
404+
def download(self, asset, download_folder, prefix=None, multi_part=False, concurrency=5):
405405
"""
406406
Download an asset. The method will exit once the file is downloaded.
407407
@@ -413,7 +413,7 @@ def download(self, asset, download_folder, prefix=None, acceleration=False, conc
413413
414414
client.download(asset, "~./Downloads")
415415
"""
416-
downloader = FrameioDownloader(asset, download_folder, prefix, acceleration, concurrency)
416+
downloader = FrameioDownloader(asset, download_folder, prefix, multi_part, concurrency)
417417
return downloader.download_handler()
418418

419419
def get_comment(self, comment_id, **kwargs):

frameioclient/download.py

Lines changed: 34 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -6,26 +6,36 @@
66
import threading
77
import concurrent.futures
88

9-
from .utils import format_bytes
10-
from .exceptions import DownloadException, WatermarkIDDownloadException
9+
from .utils import format_bytes, normalize_filename
10+
from .exceptions import DownloadException, WatermarkIDDownloadException, AssetNotFullyUploaded
1111

1212
thread_local = threading.local()
1313

1414
class FrameioDownloader(object):
15-
def __init__(self, asset, download_folder, prefix, acceleration=False, concurrency=5):
16-
self.acceleration = acceleration
15+
def __init__(self, asset, download_folder, prefix, multi_part=False, concurrency=5):
16+
self.multi_part = multi_part
1717
self.asset = asset
18+
self.asset_type = None
1819
self.download_folder = download_folder
1920
self.resolution_map = dict()
2021
self.destination = None
2122
self.watermarked = False
22-
self.file_size = asset['filesize']
23+
self.file_size = asset["filesize"]
2324
self.concurrency = concurrency
2425
self.futures = list()
25-
self.chunk_size = (52428800 / 2) # 25 MB chunk size or so
26-
self.chunks = math.floor(self.file_size/self.chunk_size)
26+
self.chunk_size = (25 * 1024 * 1024) # 25 MB chunk size
27+
self.chunks = math.ceil(self.file_size/self.chunk_size)
2728
self.prefix = prefix
28-
self.filename = asset['name']
29+
self.filename = normalize_filename(asset["name"])
30+
31+
self._evaluate_asset()
32+
33+
def _evaluate_asset(self):
34+
if self.asset.get("_type") != "file":
35+
raise DownloadException(message="Unsupport Asset type: {}".format(self.asset.get("_type")))
36+
37+
if self.asset.get("upload_completed_at") == None:
38+
raise AssetNotFullyUploaded
2939

3040
def _get_session(self):
3141
if not hasattr(thread_local, "session"):
@@ -35,11 +45,11 @@ def _get_session(self):
3545
def _create_file_stub(self):
3646
try:
3747
fp = open(self.destination, "wb")
38-
fp.write(b'\0' * self.file_size)
48+
fp.write(b"\0" * self.file_size)
3949
fp.close()
40-
except Exception as e:
50+
except FileExistsError as e:
4151
print(e)
42-
return False
52+
raise e
4353
return True
4454

4555
def get_download_key(self):
@@ -88,49 +98,48 @@ def download_handler(self):
8898
if self.watermarked == True:
8999
return self.download(url)
90100
else:
91-
if self.acceleration == True:
92-
return self.accelerated_download(url)
101+
if self.multi_part == True:
102+
return self.multi_part_download(url)
93103
else:
94104
return self.download(url)
95105

96106
def download(self, url):
97107
start_time = time.time()
98-
print("Beginning download -- {} -- {}".format(self.asset['name'], format_bytes(self.file_size, type="size")))
108+
print("Beginning download -- {} -- {}".format(self.asset["name"], format_bytes(self.file_size, type="size")))
99109

100110
# Downloading
101111
r = requests.get(url)
102-
open(self.destination, 'wb').write(r.content)
112+
open(self.destination, "wb").write(r.content)
103113

104114
download_time = time.time() - start_time
105115
download_speed = format_bytes(math.ceil(self.file_size/(download_time)))
106-
print("Downloaded {} at {}".format(self.file_size, download_speed))
116+
print("Downloaded {} at {}".format(format_bytes(self.file_size), download_speed))
107117

108118
return self.destination, download_speed
109119

110-
def accelerated_download(self, url):
120+
def multi_part_download(self, url):
111121
start_time = time.time()
112122

113123
# Generate stub
114124
try:
115125
self._create_file_stub()
116126

117127
except Exception as e:
118-
raise DownloadException
119-
print("Aborting", e)
128+
raise DownloadException(message=e)
120129

121130
offset = math.ceil(self.file_size / self.chunks)
122131
in_byte = 0 # Set initially here, but then override
123132

124-
print("Accelerated download -- {} -- {}".format(self.asset['name'], format_bytes(self.file_size, type="size")))
133+
print("Multi-part download -- {} -- {}".format(self.asset["name"], format_bytes(self.file_size, type="size")))
125134

126135
# Queue up threads
127136
with concurrent.futures.ThreadPoolExecutor(max_workers=self.concurrency) as executor:
128137
for i in range(int(self.chunks)):
129-
out_byte = offset * (i+1) # Advance by one byte to get proper offset
138+
out_byte = offset * (i+1) # Increment by the iterable + 1 so we don't mutiply by zero
130139
task = (url, in_byte, out_byte, i)
131140

132141
self.futures.append(executor.submit(self.download_chunk, task))
133-
in_byte = out_byte + 1 # Reset new in byte
142+
in_byte = out_byte # Reset new in byte equal to last out byte
134143

135144
# Wait on threads to finish
136145
for future in concurrent.futures.as_completed(self.futures):
@@ -143,14 +152,13 @@ def accelerated_download(self, url):
143152
# Calculate and print stats
144153
download_time = time.time() - start_time
145154
download_speed = format_bytes(math.ceil(self.file_size/(download_time)))
146-
print("Downloaded {} at {}".format(self.file_size, download_speed))
155+
print("Downloaded {} at {}".format(format_bytes(self.file_size), download_speed))
147156

148157
return self.destination
149158

150-
151159
def download_chunk(self, task):
152160
# Download a particular chunk
153-
# Called by the threadpool execuor
161+
# Called by the threadpool executor
154162

155163
url = task[0]
156164
start_byte = task[1]
@@ -161,7 +169,7 @@ def download_chunk(self, task):
161169
print("Getting chunk {}/{}".format(chunk_number + 1, self.chunks))
162170

163171
# Specify the starting and ending of the file
164-
headers = {'Range': 'bytes=%d-%d' % (start_byte, end_byte)}
172+
headers = {"Range": "bytes=%d-%d" % (start_byte, end_byte)}
165173

166174
# Grab the data as a stream
167175
r = session.get(url, headers=headers, stream=True)
@@ -171,6 +179,4 @@ def download_chunk(self, task):
171179
fp.write(r.content) # Write the data
172180
print("Done writing chunk {}/{}".format(chunk_number + 1, self.chunks))
173181

174-
print("Completed chunk {}/{}".format(chunk_number + 1, self.chunks))
175-
176182
return "Complete!"

frameioclient/exceptions.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,17 @@ class DownloadException(Exception):
2727
"""
2828
def __init__(
2929
self,
30-
message="Unable to download for some reason."
30+
message="Generic Dowload exception."
31+
):
32+
self.message = message
33+
super().__init__(self.message)
34+
35+
class AssetNotFullyUploaded(Exception):
36+
"""Exception raised when trying to download a file that isn't yet fully upload.
37+
"""
38+
def __init__(
39+
self,
40+
message="Unable to download this asset because it not yet fully uploaded."
3141
):
3242
self.message = message
3343
super().__init__(self.message)

frameioclient/utils.py

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import xxhash
22
import sys
3+
import re
34

45
KB = 1024
56
MB = KB * KB
@@ -79,4 +80,24 @@ def compare_items(dict1, dict2):
7980
if comparison == False:
8081
print("File mismatch between upload and download")
8182

82-
return comparison
83+
return comparison
84+
85+
def get_valid_filename(s):
86+
"""
87+
Strip out invalid characters from a filename using regex
88+
"""
89+
s = str(s).strip().replace(' ', '_')
90+
return re.sub(r'(?u)[^-\w.]', '', s)
91+
92+
def normalize_filename(fn):
93+
"""
94+
Normalize filename using pure python
95+
"""
96+
validchars = "-_.() "
97+
out = ""
98+
for c in fn:
99+
if str.isalpha(c) or str.isdigit(c) or (c in validchars):
100+
out += c
101+
else:
102+
out += "_"
103+
return out

tests/integration.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,7 @@ def test_download(client, override=False):
107107
start_time = time.time()
108108
print("{}/{} Beginning to download: {}".format(count, len(asset_list), asset['name']))
109109

110-
client.download(asset, 'downloads', acceleration=True, concurrency=20)
110+
client.download(asset, 'downloads', multi_part=True, concurrency=20)
111111

112112
download_time = time.time() - start_time
113113
download_speed = format_bytes(ceil(asset['filesize']/(download_time)))

0 commit comments

Comments
 (0)