Skip to content

Commit 0ddb6e4

Browse files
authored
[Optimization] 移除 num_blocks 上限限制 (#7241)
1 parent e83d458 commit 0ddb6e4

5 files changed

Lines changed: 3 additions & 12 deletions

File tree

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -173,6 +173,7 @@ custom_ops/tmp*
173173
build
174174

175175
.ccls-cache
176+
.claude
176177

177178
third_party
178179

fastdeploy/worker/iluvatar_worker.py

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -126,11 +126,6 @@ def initialize_kv_cache(self) -> None:
126126
# 2. Calculate the appropriate number of blocks
127127
model_block_memory_used = self.worker.cal_theortical_kvcache()
128128
num_blocks_local = int(available_kv_cache_memory // model_block_memory_used)
129-
# NOTE(liuzichang): Too many block will lead to illegal memory access
130-
# We will develop dynamic limits in future.
131-
if num_blocks_local > 40000:
132-
logger.info(f"------- Reset num_blocks_local {num_blocks_local} to 40000")
133-
num_blocks_local = min(40000, num_blocks_local)
134129
logger.info(f"------- model_block_memory_used:{model_block_memory_used} --------")
135130
logger.info(f"------- num_blocks_local:{num_blocks_local} --------")
136131

fastdeploy/worker/worker_process.py

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -666,11 +666,6 @@ def initialize_kv_cache(self) -> None:
666666
# 2. Calculate the appropriate number of blocks
667667
model_block_memory_used = self.worker.cal_theortical_kvcache()
668668
num_blocks_local = int(available_kv_cache_memory // model_block_memory_used)
669-
# NOTE(liuzichang): Too many block will lead to illegal memory access
670-
# We will develop dynamic limits in future.
671-
if num_blocks_local > 40000:
672-
logger.info(f"------- Reset num_blocks_local {num_blocks_local} to 40000")
673-
num_blocks_local = min(40000, num_blocks_local)
674669
logger.info(f"------- model_block_memory_used:{model_block_memory_used / 1024**3} GB --------")
675670
logger.info(f"------- num_blocks_local:{num_blocks_local} --------")
676671

tests/e2e/4cards_cases/test_Qwen3_30b_tp4.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -281,7 +281,7 @@ def test_non_thinking_prompt(api_url, headers):
281281
def test_profile_reset_block_num():
282282
"""测试profile reset_block_num功能,与baseline diff不能超过5%"""
283283
log_file = "./log/config.log"
284-
baseline = 40000
284+
baseline = 74000
285285

286286
if not os.path.exists(log_file):
287287
pytest.fail(f"Log file not found: {log_file}")

tests/e2e/test_EB_VL_Lite_serving.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -736,7 +736,7 @@ def test_profile_reset_block_num():
736736
"""测试profile reset_block_num功能,与baseline diff不能超过5%"""
737737
log_dir = os.getenv("FD_LOG_DIR", "log")
738738
log_file = os.path.join(log_dir, "config.log")
739-
baseline = 40000
739+
baseline = 65400
740740

741741
if not os.path.exists(log_file):
742742
pytest.fail(f"Log file not found: {log_file}")

0 commit comments

Comments
 (0)