Skip to content

Commit c29cf1d

Browse files
committed
[Deploy] Unified timeout key.
1 parent c151831 commit c29cf1d

3 files changed

Lines changed: 7 additions & 3 deletions

File tree

python/fedml/computing/scheduler/model_scheduler/device_model_cache.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -139,7 +139,7 @@ def set_user_setting_replica_num(self, end_point_id,
139139
"target_queries_per_replica": target_queries_per_replica,
140140
"aggregation_window_size_seconds": aggregation_window_size_seconds,
141141
"scale_down_delay_seconds": scale_down_delay_seconds,
142-
"request_timeout_sec": timeout_s
142+
ServerConstants.INFERENCE_REQUEST_TIMEOUT_KEY: timeout_s
143143
}
144144
try:
145145
self.redis_connection.set(self.get_user_setting_replica_num_key(end_point_id), json.dumps(replica_num_dict))

python/fedml/computing/scheduler/model_scheduler/device_model_inference.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ async def auth_middleware(request: Request, call_next):
6666

6767
# Get the request timeout from the endpoint settings.
6868
request_timeout_s = FEDML_MODEL_CACHE.get_endpoint_settings(end_point_id) \
69-
.get("request_timeout_s", ClientConstants.INFERENCE_REQUEST_TIMEOUT)
69+
.get(ServerConstants.INFERENCE_REQUEST_TIMEOUT_KEY, ServerConstants.INFERENCE_REQUEST_TIMEOUT_DEFAULT)
7070

7171
# Only proceed if the past k metrics collection is not empty.
7272
if pask_k_metrics:
@@ -76,7 +76,8 @@ async def auth_middleware(request: Request, call_next):
7676
mean_latency = sum(past_k_latencies_sec) / len(past_k_latencies_sec)
7777

7878
# If timeout threshold is exceeded then cancel and return time out error.
79-
if (mean_latency * pending_requests_num) > request_timeout_s:
79+
should_block = (mean_latency * pending_requests_num) > request_timeout_s
80+
if should_block:
8081
return JSONResponse(
8182
{"error": True, "message": "Request timed out."},
8283
status_code=status.HTTP_504_GATEWAY_TIMEOUT)

python/fedml/computing/scheduler/model_scheduler/device_server_constants.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,9 @@ class ServerConstants(object):
104104
AUTO_DETECT_PUBLIC_IP = "auto_detect_public_ip"
105105
MODEL_INFERENCE_DEFAULT_PORT = 2203
106106
MODEL_CACHE_KEY_EXPIRE_TIME = 1 * 10
107+
108+
INFERENCE_REQUEST_TIMEOUT_KEY = "request_timeout_sec"
109+
INFERENCE_REQUEST_TIMEOUT_DEFAULT = 30
107110
# -----End-----
108111

109112
MODEL_DEPLOYMENT_STAGE1 = {"index": 1, "text": "ReceivedRequest"}

0 commit comments

Comments
 (0)