@@ -55,18 +55,18 @@ async def auth_middleware(request: Request, call_next):
5555 {"error" : True , "message" : "Invalid JSON." },
5656 status_code = status .HTTP_400_BAD_REQUEST )
5757
58- # Get total pending requests.
59- pending_requests_num = FEDML_MODEL_CACHE .get_pending_requests_counter ()
58+ # Get endpoint's total pending requests.
59+ end_point_id = request_json .get ("end_point_id" , None )
60+ pending_requests_num = FEDML_MODEL_CACHE .get_pending_requests_counter (end_point_id )
6061 if pending_requests_num :
61- end_point_id = request_json .get ("end_point_id" , None )
6262 # Fetch metrics of the past k=3 requests.
6363 pask_k_metrics = FEDML_MODEL_CACHE .get_endpoint_metrics (
6464 end_point_id = end_point_id ,
6565 k_recent = 3 )
6666
6767 # Get the request timeout from the endpoint settings.
6868 request_timeout_s = FEDML_MODEL_CACHE .get_endpoint_settings (end_point_id ) \
69- .get ("request_timeout_s" , ClientConstants . INFERENCE_REQUEST_TIMEOUT )
69+ .get (ServerConstants . INFERENCE_REQUEST_TIMEOUT_KEY , ServerConstants . INFERENCE_REQUEST_TIMEOUT_DEFAULT )
7070
7171 # Only proceed if the past k metrics collection is not empty.
7272 if pask_k_metrics :
@@ -76,7 +76,8 @@ async def auth_middleware(request: Request, call_next):
7676 mean_latency = sum (past_k_latencies_sec ) / len (past_k_latencies_sec )
7777
7878 # If timeout threshold is exceeded then cancel and return time out error.
79- if (mean_latency * pending_requests_num ) > request_timeout_s :
79+ should_block = (mean_latency * pending_requests_num ) > request_timeout_s
80+ if should_block :
8081 return JSONResponse (
8182 {"error" : True , "message" : "Request timed out." },
8283 status_code = status .HTTP_504_GATEWAY_TIMEOUT )
@@ -173,7 +174,7 @@ async def _predict(
173174 header = None
174175) -> Union [MutableMapping [str , Any ], Response , StreamingResponse ]:
175176 # Always increase the pending requests counter on a new incoming request.
176- FEDML_MODEL_CACHE .update_pending_requests_counter (increase = True )
177+ FEDML_MODEL_CACHE .update_pending_requests_counter (end_point_id , increase = True )
177178 inference_response = {}
178179
179180 try :
@@ -205,14 +206,14 @@ async def _predict(
205206 if not is_endpoint_activated (in_end_point_id ):
206207 inference_response = {"error" : True , "message" : "endpoint is not activated." }
207208 logging_inference_request (input_json , inference_response )
208- FEDML_MODEL_CACHE .update_pending_requests_counter (decrease = True )
209+ FEDML_MODEL_CACHE .update_pending_requests_counter (end_point_id , decrease = True )
209210 return inference_response
210211
211212 # Found idle inference device
212213 idle_device , end_point_id , model_id , model_name , model_version , inference_host , inference_output_url = \
213214 found_idle_inference_device (in_end_point_id , in_end_point_name , in_model_name , in_model_version )
214215 if idle_device is None or idle_device == "" :
215- FEDML_MODEL_CACHE .update_pending_requests_counter (decrease = True )
216+ FEDML_MODEL_CACHE .update_pending_requests_counter (end_point_id , decrease = True )
216217 return {"error" : True , "error_code" : status .HTTP_404_NOT_FOUND ,
217218 "message" : "can not found active inference worker for this endpoint." }
218219
@@ -252,18 +253,18 @@ async def _predict(
252253 pass
253254
254255 logging_inference_request (input_json , inference_response )
255- FEDML_MODEL_CACHE .update_pending_requests_counter (decrease = True )
256+ FEDML_MODEL_CACHE .update_pending_requests_counter (end_point_id , decrease = True )
256257 return inference_response
257258 else :
258259 inference_response = {"error" : True , "message" : "token is not valid." }
259260 logging_inference_request (input_json , inference_response )
260- FEDML_MODEL_CACHE .update_pending_requests_counter (decrease = True )
261+ FEDML_MODEL_CACHE .update_pending_requests_counter (end_point_id , decrease = True )
261262 return inference_response
262263
263264 except Exception as e :
264265 logging .error ("Inference Exception: {}" .format (traceback .format_exc ()))
265266 # Need to reduce the pending requests counter in whatever exception that may be raised.
266- FEDML_MODEL_CACHE .update_pending_requests_counter (decrease = True )
267+ FEDML_MODEL_CACHE .update_pending_requests_counter (end_point_id , decrease = True )
267268
268269
269270def retrieve_info_by_endpoint_id (end_point_id , in_end_point_name = None , in_model_name = None ,
0 commit comments