Skip to content

Commit e263f74

Browse files
authored
Merge pull request #2105 from FedML-AI/alexleung/dev_branch_latest
Alexleung/dev branch latest
2 parents 41b76eb + 14429ad commit e263f74

4 files changed

Lines changed: 44 additions & 49 deletions

File tree

python/examples/launch/serve_mnist/fedml_model_config.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
workspace: "./"
22
entry_point: "mnist_serve_main.py"
33

4+
auto_detect_public_ip: true
5+
46
data_cache_dir: ""
57
bootstrap: ""
68

python/fedml/computing/scheduler/comm_utils/job_monitor.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -223,7 +223,7 @@ def monitor_replicas_number():
223223
curr_version = fedml.get_env_version()
224224
num_replica_url_path = "fedmlModelServer/api/v1/endpoint/replica-info"
225225
mlops_prefix = fedml._get_backend_service()
226-
url = f"{mlops_prefix}{num_replica_url_path}"
226+
url = f"{mlops_prefix}/{num_replica_url_path}"
227227

228228
cached_token = FedMLModelCache.get_instance().get_end_point_token_with_eid(endpoint_id)
229229
if cached_token is None:

python/fedml/computing/scheduler/model_scheduler/device_model_inference.py

Lines changed: 28 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -26,35 +26,35 @@
2626
pass
2727

2828

29-
# class Settings(BaseSettings):
30-
# redis_addr: str
31-
# redis_port: str
32-
# redis_password: str
33-
# end_point_name: str
34-
# model_name: str
35-
# model_version: str
36-
# model_infer_url: str
37-
# version: str
38-
# use_mqtt_inference: bool
39-
# use_worker_gateway: bool
40-
# ext_info: str
29+
class Settings(BaseSettings):
30+
redis_addr: str
31+
redis_port: str
32+
redis_password: str
33+
end_point_name: str
34+
model_name: str
35+
model_version: str
36+
model_infer_url: str
37+
version: str
38+
use_mqtt_inference: bool
39+
use_worker_gateway: bool
40+
ext_info: str
41+
42+
43+
settings = Settings()
44+
45+
# class settings:
46+
# redis_addr = "127.0.0.1"
47+
# redis_port = 6379
48+
# redis_password = "fedml_default"
49+
# end_point_name = ""
50+
# model_name = ""
51+
# model_version = ""
52+
# model_infer_url = "127.0.0.1"
53+
# version = "dev"
54+
# use_mqtt_inference = False
55+
# use_worker_gateway = False
56+
# ext_info = "2b34303961245c4f175f2236282d7a272c040b0904747579087f6a760112030109010c215d54505707140005190a051c347f365c4a430c020a7d39120e26032a78730f797f7c031f0901657e75"
4157
#
42-
#
43-
# settings = Settings()
44-
45-
class settings:
46-
redis_addr = "127.0.0.1"
47-
redis_port = 6379
48-
redis_password = "fedml_default"
49-
end_point_name = ""
50-
model_name = ""
51-
model_version = ""
52-
model_infer_url = "127.0.0.1"
53-
version = "dev"
54-
use_mqtt_inference = False
55-
use_worker_gateway = False
56-
ext_info = "2b34303961245c4f175f2236282d7a272c040b0904747579087f6a760112030109010c215d54505707140005190a051c347f365c4a430c020a7d39120e26032a78730f797f7c031f0901657e75"
57-
5858

5959
api = FastAPI()
6060

python/fedml/computing/scheduler/model_scheduler/master_job_runner.py

Lines changed: 13 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -115,8 +115,7 @@ def run_impl(
115115

116116
# start unified inference server
117117
self.start_device_inference_gateway(
118-
run_id, end_point_name, model_id, model_name, model_version,
119-
agent_config=self.agent_config, inference_port=inference_port)
118+
inference_port=inference_port, agent_config=self.agent_config)
120119

121120
# start inference monitor server
122121
self.stop_device_inference_monitor(
@@ -464,20 +463,16 @@ def process_deployment_result_message(self, topic=None, payload=None):
464463

465464
@staticmethod
466465
def start_device_inference_gateway(
467-
run_id, end_point_name, model_id,
468-
model_name, model_version, inference_port=ServerConstants.MODEL_INFERENCE_DEFAULT_PORT,
466+
inference_port=ServerConstants.MODEL_INFERENCE_DEFAULT_PORT,
469467
agent_config=None, redis_addr="localhost", redis_port=6379, redis_password="fedml_default"
470468
):
471469
# start unified inference server
472-
running_model_name = ServerConstants.get_running_model_name(end_point_name,
473-
model_name, model_version, run_id, model_id)
474470
python_program = get_python_program()
475471
master_port = os.getenv("FEDML_MASTER_PORT", None)
476472
if master_port is not None:
477473
inference_port = int(master_port)
478474
if not ServerConstants.is_running_on_k8s():
479-
logging.info(f"start the model inference gateway, end point {run_id}, "
480-
f"model name {model_name} at port {inference_port}...")
475+
logging.info(f"start the model inference gateway...")
481476
use_mqtt_inference = os.getenv("FEDML_USE_MQTT_INFERENCE", "False")
482477
use_mqtt_inference = True if use_mqtt_inference.lower() == 'true' else False
483478
use_worker_gateway = os.getenv("FEDML_USE_WORKER_GATEWAY", "False")
@@ -501,8 +496,8 @@ def start_device_inference_gateway(
501496
"USE_MQTT_INFERENCE={} USE_WORKER_GATEWAY={} EXT_INFO={} "
502497
"{} -m uvicorn {} --host 0.0.0.0 --port {} --reload --reload-delay 3 --reload-dir {} "
503498
"--log-level critical".format(
504-
redis_addr, redis_port, redis_password, end_point_name,
505-
model_name, model_version, "", fedml.get_env_version(), use_mqtt_inference,
499+
redis_addr, str(redis_port), redis_password, "",
500+
"", "", "", fedml.get_env_version(), use_mqtt_inference,
506501
use_worker_gateway, ext_info, python_program, inference_gw_cmd, str(inference_port),
507502
fedml_base_dir),
508503
should_capture_stdout=False, should_capture_stderr=False)
@@ -545,6 +540,14 @@ def stop_device_inference_monitor(run_id, end_point_name, model_id, model_name,
545540
def recover_inference_and_monitor():
546541
# noinspection PyBroadException
547542
try:
543+
agent_config = dict()
544+
try:
545+
agent_config["mqtt_config"], _, _, _ = MLOpsConfigs.fetch_all_configs()
546+
except Exception as e:
547+
pass
548+
549+
FedMLDeployMasterJobRunner.start_device_inference_gateway(agent_config=agent_config)
550+
548551
history_jobs = FedMLServerDataInterface.get_instance().get_history_jobs()
549552
for job in history_jobs.job_list:
550553
if job.running_json is None:
@@ -563,16 +566,6 @@ def recover_inference_and_monitor():
563566
if not is_activated:
564567
continue
565568

566-
agent_config = dict()
567-
try:
568-
agent_config["mqtt_config"], _, _, _ = MLOpsConfigs.fetch_all_configs()
569-
except Exception as e:
570-
pass
571-
572-
FedMLDeployMasterJobRunner.start_device_inference_gateway(
573-
run_id, end_point_name, model_id, model_name, model_version, inference_port=inference_port,
574-
agent_config=agent_config)
575-
576569
FedMLDeployMasterJobRunner.stop_device_inference_monitor(
577570
run_id, end_point_name, model_id, model_name, model_version)
578571
FedMLDeployMasterJobRunner.start_device_inference_monitor(

0 commit comments

Comments
 (0)