Skip to content

Commit 31b7ae0

Browse files
committed
[Deploy] Hotfix: job runner context lost when logout.
1 parent 4cc39fb commit 31b7ae0

File tree

2 files changed

+9
-2
lines changed

2 files changed

+9
-2
lines changed

python/fedml/computing/scheduler/model_scheduler/master_job_runner_manager.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,11 +42,18 @@ def send_deployment_stages(
4242
message_center=message_center
4343
)
4444

45-
def send_deployment_delete_request_to_edges(self, end_point_id, payload, model_msg_object, message_center=None):
45+
def send_deployment_delete_request_to_edges(self, end_point_id, payload, model_msg_object, message_center=None,
46+
args=None):
4647
run_id_str = str(end_point_id)
4748
if self.job_runners.get(run_id_str, None) is not None:
4849
self.job_runners[run_id_str].send_deployment_delete_request_to_edges(
4950
payload, model_msg_object, message_center=message_center)
51+
else:
52+
# Hotfix: re-instantiate the job runner
53+
# TODO(Alay, Raphael): Try to dig into whether re-instantiate the job runner is necessary
54+
self.job_runners[run_id_str] = self._generate_job_runner_instance(args)
55+
self.job_runners[run_id_str].send_deployment_delete_request_to_edges(
56+
payload, model_msg_object, message_center=message_center)
5057

5158
def stop_device_inference_monitor(self, run_id, end_point_name, model_id, model_name, model_version):
5259
run_id_str = str(run_id)

python/fedml/computing/scheduler/model_scheduler/master_protocol_manager.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -132,7 +132,7 @@ def callback_delete_deployment(self, topic, payload):
132132

133133
# Send delete deployment request to the edge devices
134134
FedMLDeployJobRunnerManager.get_instance().send_deployment_delete_request_to_edges(
135-
model_msg_object.run_id, payload, model_msg_object, message_center=self.message_center)
135+
model_msg_object.run_id, payload, model_msg_object, message_center=self.message_center, args=self.args)
136136

137137
# Stop processes on master
138138
FedMLDeployJobRunnerManager.get_instance().stop_job_runner(model_msg_object.run_id)

0 commit comments

Comments
 (0)