Polishing the autoscaler real test.

fedml-dimitris · fedml-dimitris · commit 31c57e01d426 · 2024-06-18T21:32:41.000-04:00
diff --git a/python/fedml/computing/scheduler/model_scheduler/autoscaler/test/scaling_algorithm_real_test.py b/python/fedml/computing/scheduler/model_scheduler/autoscaler/test/scaling_algorithm_real_test.py
@@ -2,9 +2,10 @@
 import logging
 
 from collections import namedtuple
-from fedml.computing.scheduler.model_scheduler.autoscaler.autoscaler import Autoscaler, ReactivePolicy
+from fedml.computing.scheduler.model_scheduler.autoscaler.autoscaler import Autoscaler
 from fedml.core.mlops.mlops_runtime_log import MLOpsRuntimeLog
 from fedml.computing.scheduler.model_scheduler.device_model_cache import FedMLModelCache
+from fedml.computing.scheduler.model_scheduler.autoscaler.policies import ConcurrentQueryPolicy
 
 
 if __name__ == "__main__":
@@ -18,9 +19,6 @@
     parser.add_argument('--redis_addr', default="local")
     parser.add_argument('--redis_port', default=6379)
     parser.add_argument('--redis_password', default="fedml_default")
-    parser.add_argument('--metric',
-                        default="latency",
-                        help="Either latency or qps")
     args = parser.parse_args()
 
     fedml_model_cache = FedMLModelCache.get_instance()
@@ -32,50 +30,18 @@
     # Init the autoscaler
     autoscaler = Autoscaler(args.redis_addr, args.redis_port, args.redis_password)
 
-    latency_reactive_policy_default = {
-        "metric": "latency",
-        "ewm_mins": 15,
-        "ewm_alpha": 0.5,
-        "ub_threshold": 0.5,
-        "lb_threshold": 0.99,
-        "triggering_value": 1.6561916828471053
+    autoscaling_policy_config = {
+            "current_replicas": 1,
+            "min_replicas": 1,
+            "max_replicas": 3,
+            "queries_per_replica": 2,
+            "window_size_secs": 60,
+            "scaledown_delay_secs": 120,
     }
-    qps_reactive_policy_default = {
-        "metric": "qps",
-        "ewm_mins": 15,
-        "ewm_alpha": 0.5,
-        "ub_threshold": 2,
-        "lb_threshold": 0.5
-    }
-    policy_config = latency_reactive_policy_default \
-        if args.metric == "latency" else qps_reactive_policy_default
-    autoscaling_policy = ReactivePolicy(**policy_config)
-
-    for endpoint_settings in endpoints_settings_list:
-        endpoint_state = endpoint_settings["state"]
-        if endpoint_state == "DEPLOYED" and endpoint_settings["enable_auto_scaling"]:
-
-            e_id, e_name, model_name = \
-                endpoint_settings["endpoint_id"], \
-                endpoint_settings["endpoint_name"], \
-                endpoint_settings["model_name"]
-            logging.info(f"Querying the autoscaler for endpoint {e_id} with user settings {endpoint_settings}.")
-
-            # For every endpoint we just update the policy configuration.
-            autoscaling_policy.min_replicas = endpoint_settings["scale_min"]
-            autoscaling_policy.max_replicas = endpoint_settings["scale_max"]
-            # We retrieve a list of replicas for every endpoint. The number
-            # of running replicas is the length of that list.
-            current_replicas = len(fedml_model_cache.get_endpoint_replicas_results(e_id))
-            autoscaling_policy.current_replicas = current_replicas
-            logging.info(f"Endpoint {e_id} autoscaling policy: {autoscaling_policy}.")
-
-            scale_op = autoscaler.scale_operation_endpoint(
-                autoscaling_policy,
-                str(e_id))
-
-            new_replicas = current_replicas + scale_op.value
+    autoscaling_policy = ConcurrentQueryPolicy(**autoscaling_policy_config)
 
-            logging.info(f"Scaling operation {scale_op.value} for endpoint {e_id} .")
-            logging.info(f"New Replicas {new_replicas} for endpoint {e_id} .")
-            logging.info(f"Current Replicas {current_replicas} for endpoint {e_id} .")
+    e_id = 1821952311
+    scale_op = autoscaler.scale_operation_endpoint(
+        autoscaling_policy,
+        str(e_id))
+    logging.info(f"Scaling operation {scale_op.value} for endpoint {e_id} .")