Skip to content

Commit f0e1eb4

Browse files
committed
Add TE NVFP4 quantization config without RHT
1 parent b314c5a commit f0e1eb4

2 files changed

Lines changed: 2 additions & 0 deletions

File tree

src/MaxText/configs/types.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,7 @@ class QuantizationType(str, Enum):
8282
TE_FP8_CS = "te_fp8_currentscaling"
8383
TE_MXFP8 = "te_mxfp8"
8484
TE_NVFP4 = "te_nvfp4"
85+
TE_NVFP4_NO_RHT = "te_nvfp4_no_rht"
8586

8687

8788
class KvQuantAxis(str, Enum):

src/MaxText/layers/quantizations.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -752,6 +752,7 @@ def _get_recipe(recipe_name: str):
752752
"te_fp8_currentscaling": recipe.Float8CurrentScaling,
753753
"te_mxfp8": recipe.MXFP8BlockScaling,
754754
"te_nvfp4": recipe.NVFP4BlockScaling, # pytype: disable=module-attr
755+
"te_nvfp4_no_rht": functools.partial(recipe.NVFP4BlockScaling, disable_rht=True), # pytype: disable=module-attr
755756
}
756757
if recipe_name not in RECIPES:
757758
raise ValueError(f"Invalid TransformerEngine recipe: {recipe_name}")

0 commit comments

Comments
 (0)