@@ -55,10 +55,10 @@ async def auth_middleware(request: Request, call_next):
5555 {"error" : True , "message" : "Invalid JSON." },
5656 status_code = status .HTTP_400_BAD_REQUEST )
5757
58- # Get total pending requests.
59- pending_requests_num = FEDML_MODEL_CACHE .get_pending_requests_counter ()
58+ # Get endpoint's total pending requests.
59+ end_point_id = request_json .get ("end_point_id" , None )
60+ pending_requests_num = FEDML_MODEL_CACHE .get_pending_requests_counter (end_point_id )
6061 if pending_requests_num :
61- end_point_id = request_json .get ("end_point_id" , None )
6262 # Fetch metrics of the past k=3 requests.
6363 pask_k_metrics = FEDML_MODEL_CACHE .get_endpoint_metrics (
6464 end_point_id = end_point_id ,
@@ -173,7 +173,7 @@ async def _predict(
173173 header = None
174174) -> Union [MutableMapping [str , Any ], Response , StreamingResponse ]:
175175 # Always increase the pending requests counter on a new incoming request.
176- FEDML_MODEL_CACHE .update_pending_requests_counter (increase = True )
176+ FEDML_MODEL_CACHE .update_pending_requests_counter (end_point_id , increase = True )
177177 inference_response = {}
178178
179179 try :
@@ -205,14 +205,14 @@ async def _predict(
205205 if not is_endpoint_activated (in_end_point_id ):
206206 inference_response = {"error" : True , "message" : "endpoint is not activated." }
207207 logging_inference_request (input_json , inference_response )
208- FEDML_MODEL_CACHE .update_pending_requests_counter (decrease = True )
208+ FEDML_MODEL_CACHE .update_pending_requests_counter (end_point_id , decrease = True )
209209 return inference_response
210210
211211 # Found idle inference device
212212 idle_device , end_point_id , model_id , model_name , model_version , inference_host , inference_output_url = \
213213 found_idle_inference_device (in_end_point_id , in_end_point_name , in_model_name , in_model_version )
214214 if idle_device is None or idle_device == "" :
215- FEDML_MODEL_CACHE .update_pending_requests_counter (decrease = True )
215+ FEDML_MODEL_CACHE .update_pending_requests_counter (end_point_id , decrease = True )
216216 return {"error" : True , "error_code" : status .HTTP_404_NOT_FOUND ,
217217 "message" : "can not found active inference worker for this endpoint." }
218218
@@ -252,18 +252,18 @@ async def _predict(
252252 pass
253253
254254 logging_inference_request (input_json , inference_response )
255- FEDML_MODEL_CACHE .update_pending_requests_counter (decrease = True )
255+ FEDML_MODEL_CACHE .update_pending_requests_counter (end_point_id , decrease = True )
256256 return inference_response
257257 else :
258258 inference_response = {"error" : True , "message" : "token is not valid." }
259259 logging_inference_request (input_json , inference_response )
260- FEDML_MODEL_CACHE .update_pending_requests_counter (decrease = True )
260+ FEDML_MODEL_CACHE .update_pending_requests_counter (end_point_id , decrease = True )
261261 return inference_response
262262
263263 except Exception as e :
264264 logging .error ("Inference Exception: {}" .format (traceback .format_exc ()))
265265 # Need to reduce the pending requests counter in whatever exception that may be raised.
266- FEDML_MODEL_CACHE .update_pending_requests_counter (decrease = True )
266+ FEDML_MODEL_CACHE .update_pending_requests_counter (end_point_id , decrease = True )
267267
268268
269269def retrieve_info_by_endpoint_id (end_point_id , in_end_point_name = None , in_model_name = None ,
0 commit comments