Add support for int4.

Google-ML-Automation · Google-ML-Automation · commit f11f5507c987 · 2026-03-09T13:02:43.000-07:00
PiperOrigin-RevId: 880998732
diff --git a/src/maxtext/configs/types.py b/src/maxtext/configs/types.py
@@ -81,6 +81,7 @@ class QuantizationType(str, Enum):
   """Supported quantization schemes."""
 
   NONE = ""
+  INT4 = "int4"
   INT8 = "int8"
   INTMP = "intmp"
   FP8 = "fp8"
diff --git a/src/maxtext/layers/quantizations.py b/src/maxtext/layers/quantizations.py
@@ -655,6 +655,15 @@ def get_fp8_full_qwix_rule(config: Config):
 
 def get_quantization_rule(config: Config):
   match config.quantization:
+    case "int4":
+      return qwix.QtRule(
+          module_path="decoder/.*layers.*",
+          weight_qtype=jnp.int4,
+          act_qtype=jnp.int4,
+          bwd_qtype=jnp.int4,
+          bwd_weight_grad_tile_size=1 / config.quantization_local_shard_count,
+          op_names=("dot_general",),
+      )
     case "int8":
       return qwix.QtRule(
           module_path="decoder/.*layers.*",
@@ -702,6 +711,8 @@ def get_qt_provider(config):
   match config.quantization:
     case "int8":
       return qwix.QtProvider([get_quantization_rule(config)])
+    case "int4":
+      return qwix.QtProvider([get_quantization_rule(config)])
     case "fp8":
       return qwix.QtProvider([get_quantization_rule(config)])
     case "fp8_full":