[Deploy] Unified timeout key.

Raphael-Jin · Raphael-Jin · commit c29cf1d6e6be · 2024-06-10T19:58:00.000Z
diff --git a/python/fedml/computing/scheduler/model_scheduler/device_model_cache.py b/python/fedml/computing/scheduler/model_scheduler/device_model_cache.py
@@ -139,7 +139,7 @@ def set_user_setting_replica_num(self, end_point_id,
             "target_queries_per_replica": target_queries_per_replica,
             "aggregation_window_size_seconds": aggregation_window_size_seconds,
             "scale_down_delay_seconds": scale_down_delay_seconds,
-            "request_timeout_sec": timeout_s
+            ServerConstants.INFERENCE_REQUEST_TIMEOUT_KEY: timeout_s
         }
         try:
             self.redis_connection.set(self.get_user_setting_replica_num_key(end_point_id), json.dumps(replica_num_dict))
diff --git a/python/fedml/computing/scheduler/model_scheduler/device_model_inference.py b/python/fedml/computing/scheduler/model_scheduler/device_model_inference.py
@@ -66,7 +66,7 @@ async def auth_middleware(request: Request, call_next):
 
             # Get the request timeout from the endpoint settings.
             request_timeout_s = FEDML_MODEL_CACHE.get_endpoint_settings(end_point_id) \
-                .get("request_timeout_s", ClientConstants.INFERENCE_REQUEST_TIMEOUT)
+                .get(ServerConstants.INFERENCE_REQUEST_TIMEOUT_KEY, ServerConstants.INFERENCE_REQUEST_TIMEOUT_DEFAULT)
 
             # Only proceed if the past k metrics collection is not empty.
             if pask_k_metrics:
@@ -76,7 +76,8 @@ async def auth_middleware(request: Request, call_next):
                 mean_latency = sum(past_k_latencies_sec) / len(past_k_latencies_sec)
 
                 # If timeout threshold is exceeded then cancel and return time out error.
-                if (mean_latency * pending_requests_num) > request_timeout_s:
+                should_block = (mean_latency * pending_requests_num) > request_timeout_s
+                if should_block:
                     return JSONResponse(
                         {"error": True, "message": "Request timed out."},
                         status_code=status.HTTP_504_GATEWAY_TIMEOUT)
diff --git a/python/fedml/computing/scheduler/model_scheduler/device_server_constants.py b/python/fedml/computing/scheduler/model_scheduler/device_server_constants.py
@@ -104,6 +104,9 @@ class ServerConstants(object):
     AUTO_DETECT_PUBLIC_IP = "auto_detect_public_ip"
     MODEL_INFERENCE_DEFAULT_PORT = 2203
     MODEL_CACHE_KEY_EXPIRE_TIME = 1 * 10
+
+    INFERENCE_REQUEST_TIMEOUT_KEY = "request_timeout_sec"
+    INFERENCE_REQUEST_TIMEOUT_DEFAULT = 30
     # -----End-----
 
     MODEL_DEPLOYMENT_STAGE1 = {"index": 1, "text": "ReceivedRequest"}

Original file line number	Diff line number	Diff line change
`@@ -139,7 +139,7 @@ def set_user_setting_replica_num(self, end_point_id,`
`139`	`139`	`"target_queries_per_replica": target_queries_per_replica,`
`140`	`140`	`"aggregation_window_size_seconds": aggregation_window_size_seconds,`
`141`	`141`	`"scale_down_delay_seconds": scale_down_delay_seconds,`
`142`		`- "request_timeout_sec": timeout_s`
	`142`	`+ ServerConstants.INFERENCE_REQUEST_TIMEOUT_KEY: timeout_s`
`143`	`143`	`}`
`144`	`144`	`try:`
`145`	`145`	`self.redis_connection.set(self.get_user_setting_replica_num_key(end_point_id), json.dumps(replica_num_dict))`