Skip to content

Commit e53f518

Browse files
authored
PD deployment support without router (#7412)
1 parent a498720 commit e53f518

6 files changed

Lines changed: 472 additions & 126 deletions

File tree

examples/splitwise/start_v0_tp1.sh

Lines changed: 0 additions & 113 deletions
This file was deleted.

fastdeploy/config.py

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2010,13 +2010,13 @@ def __init__(
20102010
and self.router_config
20112011
and self.router_config.router
20122012
):
2013-
# For RL scenario: version.yaml will be required for models in future releases.
2013+
# For RL scenario, version.yaml is required for models
20142014
# Temporarily enforce use router to be enabled.
20152015
self.model_config.read_model_version()
20162016

20172017
self.read_from_config()
20182018
self.postprocess()
2019-
self.init_cache_info()
2019+
self.init_pd_info()
20202020
if test_mode:
20212021
return
20222022
self.check()
@@ -2371,18 +2371,17 @@ def print(self):
23712371
logger.info("{:<20}:{:<6}{}".format(k, "", v))
23722372
logger.info("=============================================================")
23732373

2374-
def init_cache_info(self):
2374+
def init_pd_info(self):
23752375
"""
2376-
initialize cache info
2376+
initialize info for pd deployment
23772377
"""
2378-
# TODO: group the splitiwse params
23792378
# There are two methods for splitwise deployment:
23802379
# 1. v0 splitwise_scheduler or dp_scheduler
2381-
# 2. v1 local_scheduler + router
2380+
# 2. v1 local_scheduler + router (optional)
23822381
self.splitwise_version = None
23832382
if self.scheduler_config.name in ("splitwise", "dp"):
23842383
self.splitwise_version = "v0"
2385-
elif self.scheduler_config.name == "local" and self.router_config and self.router_config.router:
2384+
elif self.scheduler_config.name == "local":
23862385
self.splitwise_version = "v1"
23872386

23882387
# the information for registering this server to router or splitwise_scheduler

fastdeploy/engine/args_utils.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -600,10 +600,15 @@ def __post_init__(self):
600600
raise NotImplementedError("Only ENABLE_V1_KVCACHE_SCHEDULER=1 support max_logprobs=-1")
601601

602602
if self.splitwise_role != "mixed":
603-
if self.scheduler_name == "local" and self.router is None:
603+
if self.scheduler_name == "splitwise":
604604
raise ValueError(
605-
f"When using {self.splitwise_role} role and the {self.scheduler_name} "
606-
f"scheduler, please provide --router argument."
605+
"Setting scheduler_name as splitwise is not supported in pd deployment, "
606+
"please use router as scheduler."
607+
)
608+
if self.scheduler_name == "local" and self.router is None:
609+
console_logger.warning(
610+
f"Running {self.splitwise_role} role with {self.scheduler_name} "
611+
f"scheduler without --router. Router registration and request routing will be disabled."
607612
)
608613

609614
if not (

fastdeploy/engine/expert_service.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,7 @@ def start(self, ipc_signal_suffix, local_data_parallel_id):
109109
if envs.FD_ENABLE_RETURN_TEXT:
110110
self.engine.create_data_processor()
111111
if self.cfg.scheduler_config.name == "dp":
112-
self.cfg.init_cache_info()
112+
self.cfg.init_pd_info()
113113
self.engine.scheduler.start(local_data_parallel_id)
114114

115115
if ipc_signal_suffix is not None:
@@ -122,7 +122,7 @@ def start(self, ipc_signal_suffix, local_data_parallel_id):
122122
self.llm_logger.info(f"start expert service {local_data_parallel_id}")
123123

124124
if self.cfg.scheduler_config.name == "splitwise":
125-
self.cfg.init_cache_info()
125+
self.cfg.init_pd_info()
126126
role = self.cfg.scheduler_config.splitwise_role
127127
host_ip = self.cfg.host_ip
128128
self.engine.scheduler.start(role, host_ip, self.cfg.register_info)

0 commit comments

Comments
 (0)