Adding hash set for counting the number of pending requests per endpoint.

fedml-dimitris · Raphael-Jin · commit c151831deb46 · 2024-06-10T19:50:17.000Z
diff --git a/python/fedml/computing/scheduler/model_scheduler/device_model_cache.py b/python/fedml/computing/scheduler/model_scheduler/device_model_cache.py
@@ -974,20 +974,21 @@ def delete_endpoint_scaling_down_decision_time(self, end_point_id) -> bool:
             self.FEDML_MODEL_ENDPOINT_SCALING_DOWN_DECISION_TIME_TAG,
             end_point_id))
 
-    def get_pending_requests_counter(self) -> int:
-        if not self.redis_connection.exists(self.FEDML_PENDING_REQUESTS_COUNTER):
-            self.redis_connection.set(self.FEDML_PENDING_REQUESTS_COUNTER, 0)
-        return int(self.redis_connection.get(self.FEDML_PENDING_REQUESTS_COUNTER))
-
-    def update_pending_requests_counter(self, increase=False, decrease=False) -> int:
-        if not self.redis_connection.exists(self.FEDML_PENDING_REQUESTS_COUNTER):
-            self.redis_connection.set(self.FEDML_PENDING_REQUESTS_COUNTER, 0)
+    def get_pending_requests_counter(self, end_point_id) -> int:
+        # If the endpoint does not exist inside the Hash collection, set its counter to 0.
+        if self.redis_connection.hexists(self.FEDML_PENDING_REQUESTS_COUNTER, end_point_id):
+            return int(self.redis_connection.hget(self.FEDML_PENDING_REQUESTS_COUNTER, end_point_id))
+        return 0
+
+    def update_pending_requests_counter(self, end_point_id, increase=False, decrease=False) -> int:
+        if not self.redis_connection.hexists(self.FEDML_PENDING_REQUESTS_COUNTER, end_point_id):
+            self.redis_connection.hset(self.FEDML_PENDING_REQUESTS_COUNTER, mapping={end_point_id: 0})
         if increase:
-            self.redis_connection.incr(self.FEDML_PENDING_REQUESTS_COUNTER)
+            self.redis_connection.hincrby(self.FEDML_PENDING_REQUESTS_COUNTER, end_point_id, 1)
         if decrease:
+            # Careful on the negative, there is no native function for hash decreases.
+            self.redis_connection.hincrby(self.FEDML_PENDING_REQUESTS_COUNTER, end_point_id, -1)
             # Making sure the counter never becomes negative!
-            if self.get_pending_requests_counter() < 0:
-                self.redis_connection.set(self.FEDML_PENDING_REQUESTS_COUNTER, 0)
-            else:
-                self.redis_connection.decr(self.FEDML_PENDING_REQUESTS_COUNTER)
-        return self.get_pending_requests_counter()
+            if self.get_pending_requests_counter(end_point_id) < 0:
+                self.redis_connection.hset(self.FEDML_PENDING_REQUESTS_COUNTER, mapping={end_point_id: 0})
+        return self.get_pending_requests_counter(end_point_id)
diff --git a/python/fedml/computing/scheduler/model_scheduler/device_model_inference.py b/python/fedml/computing/scheduler/model_scheduler/device_model_inference.py
@@ -55,10 +55,10 @@ async def auth_middleware(request: Request, call_next):
                 {"error": True, "message": "Invalid JSON."},
                 status_code=status.HTTP_400_BAD_REQUEST)
 
-        # Get total pending requests.
-        pending_requests_num = FEDML_MODEL_CACHE.get_pending_requests_counter()
+        # Get endpoint's total pending requests.
+        end_point_id = request_json.get("end_point_id", None)
+        pending_requests_num = FEDML_MODEL_CACHE.get_pending_requests_counter(end_point_id)
         if pending_requests_num:
-            end_point_id = request_json.get("end_point_id", None)
             # Fetch metrics of the past k=3 requests.
             pask_k_metrics = FEDML_MODEL_CACHE.get_endpoint_metrics(
                 end_point_id=end_point_id,
@@ -173,7 +173,7 @@ async def _predict(
         header=None
 ) -> Union[MutableMapping[str, Any], Response, StreamingResponse]:
     # Always increase the pending requests counter on a new incoming request.
-    FEDML_MODEL_CACHE.update_pending_requests_counter(increase=True)
+    FEDML_MODEL_CACHE.update_pending_requests_counter(end_point_id, increase=True)
     inference_response = {}
 
     try:
@@ -205,14 +205,14 @@ async def _predict(
             if not is_endpoint_activated(in_end_point_id):
                 inference_response = {"error": True, "message": "endpoint is not activated."}
                 logging_inference_request(input_json, inference_response)
-                FEDML_MODEL_CACHE.update_pending_requests_counter(decrease=True)
+                FEDML_MODEL_CACHE.update_pending_requests_counter(end_point_id, decrease=True)
                 return inference_response
 
             # Found idle inference device
             idle_device, end_point_id, model_id, model_name, model_version, inference_host, inference_output_url = \
                 found_idle_inference_device(in_end_point_id, in_end_point_name, in_model_name, in_model_version)
             if idle_device is None or idle_device == "":
-                FEDML_MODEL_CACHE.update_pending_requests_counter(decrease=True)
+                FEDML_MODEL_CACHE.update_pending_requests_counter(end_point_id, decrease=True)
                 return {"error": True, "error_code": status.HTTP_404_NOT_FOUND,
                         "message": "can not found active inference worker for this endpoint."}
 
@@ -252,18 +252,18 @@ async def _predict(
                 pass
 
             logging_inference_request(input_json, inference_response)
-            FEDML_MODEL_CACHE.update_pending_requests_counter(decrease=True)
+            FEDML_MODEL_CACHE.update_pending_requests_counter(end_point_id, decrease=True)
             return inference_response
         else:
             inference_response = {"error": True, "message": "token is not valid."}
             logging_inference_request(input_json, inference_response)
-            FEDML_MODEL_CACHE.update_pending_requests_counter(decrease=True)
+            FEDML_MODEL_CACHE.update_pending_requests_counter(end_point_id, decrease=True)
             return inference_response
 
     except Exception as e:
         logging.error("Inference Exception: {}".format(traceback.format_exc()))
         # Need to reduce the pending requests counter in whatever exception that may be raised.
-        FEDML_MODEL_CACHE.update_pending_requests_counter(decrease=True)
+        FEDML_MODEL_CACHE.update_pending_requests_counter(end_point_id, decrease=True)
 
 
 def retrieve_info_by_endpoint_id(end_point_id, in_end_point_name=None, in_model_name=None,