Skip to content

Commit 14a0182

Browse files
Merge pull request #2168 from FedML-AI/bhargav191098/removing_archive
Removing archive from upload for files.
2 parents bec28a6 + 5fb5ed4 commit 14a0182

2 files changed

Lines changed: 58 additions & 20 deletions

File tree

python/fedml/api/modules/storage.py

Lines changed: 57 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
import requests
66
import math
7+
from enum import Enum, unique
78

89
import requests.exceptions
910
import tqdm
@@ -26,6 +27,10 @@ def __init__(self, data: dict):
2627
self.tag_list = data.get("tags", None)
2728
self.download_url = data.get("fileUrl", None)
2829

30+
class DataType(Enum):
31+
FILE = "file"
32+
DIRECTORY = "directory"
33+
INVALID = "invalid"
2934

3035
# Todo (alaydshah): Store service name in metadata
3136
# Todo (alaydshah): If data already exists, don't upload again. Instead suggest to use update command
@@ -38,27 +43,45 @@ def upload(data_path, api_key, name, description, tag_list, service, show_progre
3843

3944
if user_id is None:
4045
return FedMLResponse(code=ResponseCode.FAILURE, message=message)
46+
47+
data_type = _get_data_type(data_path)
4148

42-
if(not _check_data_path(data_path)):
49+
if(data_type == DataType.INVALID):
4350
return FedMLResponse(code=ResponseCode.FAILURE,message="Invalid data path")
4451

45-
archive_path, message = _archive_data(data_path)
46-
if not archive_path:
52+
if(data_type == DataType.DIRECTORY):
53+
to_upload_path, message = _archive_data(data_path)
54+
name = os.path.splitext(os.path.basename(to_upload_path))[0] if name is None else name
55+
file_name = name + ".zip"
56+
else:
57+
to_upload_path = data_path
58+
base_name = os.path.basename(to_upload_path)
59+
file_extension = os.path.splitext(base_name)[1]
60+
given_extension = None
61+
if name is not None:
62+
given_extension = os.path.splitext(name)[1]
63+
if given_extension is None or given_extension == "":
64+
name = name + file_extension
65+
else:
66+
name = base_name
67+
68+
file_name = name
69+
70+
if not to_upload_path:
4771
return FedMLResponse(code=ResponseCode.FAILURE, message=message)
4872

49-
name = os.path.splitext(os.path.basename(archive_path))[0] if name is None else name
50-
file_name = name + ".zip"
73+
#TODO(bhargav191098) - Better done on the backend. Remove and pass file_name once completed on backend.
5174
dest_path = os.path.join(user_id, file_name)
52-
file_size = os.path.getsize(archive_path)
75+
file_size = os.path.getsize(to_upload_path)
5376

54-
file_uploaded_url, message = _upload_multipart(api_key, file_name, archive_path, show_progress,
77+
file_uploaded_url, message = _upload_multipart(api_key, dest_path, to_upload_path, show_progress,
5578
out_progress_to_err,
5679
progress_desc, metadata)
5780

58-
59-
os.remove(archive_path)
81+
if(data_type == "dir"):
82+
os.remove(to_upload_path)
6083
if not file_uploaded_url:
61-
return FedMLResponse(code=ResponseCode.FAILURE, message=f"Failed to upload file: {archive_path}")
84+
return FedMLResponse(code=ResponseCode.FAILURE, message=f"Failed to upload file: {to_upload_path}")
6285

6386
json_data = {
6487
"datasetName": name,
@@ -95,13 +118,26 @@ def download(data_name, api_key, service, dest_path, show_progress=True) -> FedM
95118
logging.error(error_message)
96119
return FedMLResponse(code=ResponseCode.FAILURE, message=error_message)
97120
download_url = metadata.download_url
98-
zip_file_name = data_name + ".zip"
99-
path_local = os.path.abspath(zip_file_name)
121+
given_extension = os.path.splitext(data_name)[1]
122+
is_file = True
123+
if(given_extension is None or given_extension ==""):
124+
is_file = False
125+
126+
if not is_file:
127+
download_file_name = data_name + ".zip"
128+
else:
129+
download_file_name = data_name
130+
path_local = os.path.abspath(download_file_name)
100131
dest_path = os.path.abspath(dest_path) if dest_path else data_name
101-
if _download_using_presigned_url(download_url, zip_file_name, show_progress=show_progress):
132+
if _download_using_presigned_url(download_url, download_file_name, show_progress=show_progress):
102133
try:
103-
shutil.unpack_archive(path_local, dest_path)
104-
os.remove(path_local)
134+
if not is_file:
135+
shutil.unpack_archive(path_local, dest_path)
136+
os.remove(path_local)
137+
else:
138+
if not os.path.exists(dest_path):
139+
os.makedirs(dest_path)
140+
shutil.move(path_local,dest_path)
105141
abs_dest_path = os.path.abspath(dest_path)
106142
return FedMLResponse(code=ResponseCode.SUCCESS, message=f"Successfully downloaded and unzipped data at "
107143
f"{abs_dest_path}", data=abs_dest_path)
@@ -438,10 +474,12 @@ def _get_storage_service(service):
438474
else:
439475
raise NotImplementedError(f"Service {service} not implemented")
440476

441-
def _check_data_path(data_path):
442-
if os.path.isdir(data_path) or os.path.isfile(data_path):
443-
return True
444-
return False
477+
def _get_data_type(data_path):
478+
if os.path.isdir(data_path):
479+
return DataType.DIRECTORY
480+
elif os.path.isfile(data_path):
481+
return DataType.FILE
482+
return DataType.INVALID
445483

446484

447485
def _archive_data(data_path: str) -> (str, str):

python/fedml/cli/modules/storage.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ def validate_argument(ctx, param, value):
4747
@click.help_option("--help", "-h")
4848
@click.argument("data_path", nargs=1, callback=validate_argument)
4949
@click.option("--name", "-n", type=str, help="Name your data to store. If not provided, the name will be the same as "
50-
"the data file or directory name.")
50+
"the data file or directory name. For files, extension need not be mentioned!")
5151
@click.option("--description", "-d", type=str, help="Add description to your data to store. If not provided, "
5252
"the description will be empty.")
5353
@click.option("--user_metadata", "-um", type=str, help="User-defined metadata in the form of a dictionary, for instance, "

0 commit comments

Comments
 (0)