AI-Hypercomputer
diff --git a/‎requirements.txt‎
Lines changed: 1 addition & 0 deletions b/‎requirements.txt‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎src/maxdiffusion/configs/base14.yml‎
Lines changed: 4 additions & 0 deletions b/‎src/maxdiffusion/configs/base14.yml‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎src/maxdiffusion/configs/base21.yml‎
Lines changed: 4 additions & 0 deletions b/‎src/maxdiffusion/configs/base21.yml‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎src/maxdiffusion/configs/base_2_base.yml‎
Lines changed: 5 additions & 0 deletions b/‎src/maxdiffusion/configs/base_2_base.yml‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎src/maxdiffusion/configs/base_flux_dev.yml‎
Lines changed: 5 additions & 0 deletions b/‎src/maxdiffusion/configs/base_flux_dev.yml‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎src/maxdiffusion/configs/base_flux_schnell.yml‎
Lines changed: 4 additions & 0 deletions b/‎src/maxdiffusion/configs/base_flux_schnell.yml‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎src/maxdiffusion/configs/base_xl.yml‎
Lines changed: 6 additions & 0 deletions b/‎src/maxdiffusion/configs/base_xl.yml‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎src/maxdiffusion/configs/base_xl_lightning.yml‎
Lines changed: 4 additions & 0 deletions b/‎src/maxdiffusion/configs/base_xl_lightning.yml‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎src/maxdiffusion/configuration_utils.py‎
Lines changed: 2 additions & 1 deletion b/‎src/maxdiffusion/configuration_utils.py‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎src/maxdiffusion/models/attention_flax.py‎
Lines changed: 30 additions & 1 deletion b/‎src/maxdiffusion/models/attention_flax.py‎
Lines changed: 30 additions & 1 deletion
@@ -30,3 +30,4 @@ huggingface_hub==0.24.7
 transformers==4.48.1
 einops==0.8.0
 sentencepiece
+aqtp
@@ -216,3 +216,7 @@ prior_loss_weight: 1.0
 num_class_images: 100
 # If true, set dataset_save_location.
 cache_dreambooth_dataset: False
+quantization: ''
+# Shard the range finding operation for quantization. By default this is set to number of slices.
+quantization_local_shard_count: -1
+compile_topology_num_slices: -1 # Number of target slices, set to a positive integer.
@@ -217,3 +217,7 @@ prior_loss_weight: 1.0
 num_class_images: 100
 # If true, set dataset_save_location.
 cache_dreambooth_dataset: False
+quantization: ''
+# Shard the range finding operation for quantization. By default this is set to number of slices.
+quantization_local_shard_count: -1
+compile_topology_num_slices: -1 # Number of target slices, set to a positive integer.
@@ -231,3 +231,8 @@ prior_loss_weight: 1.0
 num_class_images: 100
 # If true, set dataset_save_location.
 cache_dreambooth_dataset: False
+
+quantization: ''
+# Shard the range finding operation for quantization. By default this is set to number of slices.
+quantization_local_shard_count: -1
+compile_topology_num_slices: -1 # Number of target slices, set to a positive integer.
@@ -260,3 +260,8 @@ controlnet_model_name_or_path: 'diffusers/controlnet-canny-sdxl-1.0'
 controlnet_from_pt: True
 controlnet_conditioning_scale: 0.5
 controlnet_image: 'https://upload.wikimedia.org/wikipedia/commons/thumb/c/c1/Google_%22G%22_logo.svg/1024px-Google_%22G%22_logo.svg.png'
+quantization: ''
+# Shard the range finding operation for quantization. By default this is set to number of slices.
+quantization_local_shard_count: -1
+compile_topology_num_slices: -1 # Number of target slices, set to a positive integer.
+
@@ -268,3 +268,7 @@ controlnet_model_name_or_path: 'diffusers/controlnet-canny-sdxl-1.0'
 controlnet_from_pt: True
 controlnet_conditioning_scale: 0.5
 controlnet_image: 'https://upload.wikimedia.org/wikipedia/commons/thumb/c/c1/Google_%22G%22_logo.svg/1024px-Google_%22G%22_logo.svg.png'
+quantization: ''
+# Shard the range finding operation for quantization. By default this is set to number of slices.
+quantization_local_shard_count: -1
+compile_topology_num_slices: -1 # Number of target slices, set to a positive integer.
@@ -233,3 +233,9 @@ controlnet_model_name_or_path: 'diffusers/controlnet-canny-sdxl-1.0'
 controlnet_from_pt: True
 controlnet_conditioning_scale: 0.5
 controlnet_image: 'https://upload.wikimedia.org/wikipedia/commons/thumb/c/c1/Google_%22G%22_logo.svg/1024px-Google_%22G%22_logo.svg.png'
+enable_mllog: False
+
+quantization: ''
+# Shard the range finding operation for quantization. By default this is set to number of slices.
+quantization_local_shard_count: -1
+compile_topology_num_slices: -1 # Number of target slices, set to a positive integer.
@@ -185,3 +185,7 @@ lora_config: {
 # }
 
 enable_mllog: False
+quantization: ''
+# Shard the range finding operation for quantization. By default this is set to number of slices.
+quantization_local_shard_count: -1
+compile_topology_num_slices: -1 # Number of target slices, set to a positive integer.
@@ -577,6 +577,7 @@ def to_json_saveable(value):
     config_dict.pop("mesh", None)
     config_dict.pop("precision", None)
     config_dict.pop("weights_dtype", None)
+    config_dict.pop("quant", None)
 
     return json.dumps(config_dict, indent=2, sort_keys=True) + "\n"
 
@@ -659,7 +660,7 @@ def init(self, *args, **kwargs):
       # ignore flax specific attributes
       if field.name in self._flax_internal_args:
         continue
-      if type(field.default) == dataclasses._MISSING_TYPE:
+      if type(field.default) == dataclasses._MISSING_TYPE:  # noqa: E721
         default_kwargs[field.name] = None
       else:
         default_kwargs[field.name] = getattr(self, field.name)
 
@@ -24,6 +24,9 @@
 from einops import rearrange
 from .. import common_types, max_logging
 
+from . import quantizations
+
+
 Array = common_types.Array
 Mesh = common_types.Mesh
 DType = common_types.DType
@@ -36,6 +39,14 @@
 HEAD = common_types.HEAD
 D_KV = common_types.D_KV
 EMBED = common_types.EMBED
+Quant = quantizations.AqtQuantization
+
+
+Quant = quantizations.AqtQuantization
+
+
+def _maybe_aqt_einsum(quant: Quant):
+  return jnp.einsum if quant is None else quant.einsum()
 
 
 class AttentionOp(nn.Module):
@@ -51,6 +62,7 @@ class AttentionOp(nn.Module):
   flash_min_seq_length: int = 4096
   flash_block_sizes: BlockSizes = None
   dtype: DType = jnp.float32
+  quant: Quant = None
 
   def setup(self):
     if self.attention_kernel == "cudnn_flash_te":
@@ -585,6 +597,7 @@ class FlaxAttention(nn.Module):
           jax mesh is required if attention is set to flash.
       dtype (:obj:`jnp.dtype`, *optional*, defaults to jnp.float32):
           Parameters `dtype`
+      quant (`AqtQuantization`, *optional*, defaults to None)
 
   """
 
@@ -605,6 +618,7 @@ class FlaxAttention(nn.Module):
   value_axis_names: AxisNames = (BATCH, LENGTH, HEAD)
   out_axis_names: AxisNames = (BATCH, LENGTH, HEAD)
   precision: jax.lax.Precision = None
+  quant: Quant = None
 
   def setup(self):
 
@@ -624,10 +638,13 @@ def setup(self):
         split_head_dim=self.split_head_dim,
         flash_block_sizes=self.flash_block_sizes,
         dtype=self.dtype,
+        quant=self.quant,
     )
 
     qkv_init_kernel = nn.with_logical_partitioning(nn.initializers.lecun_normal(), ("embed", "heads"))
-
+    dot_general_cls = None
+    if self.quant:
+      dot_general_cls = self.quant.dot_general_cls()
     self.query = nn.Dense(
         inner_dim,
         kernel_init=qkv_init_kernel,
@@ -636,6 +653,7 @@ def setup(self):
         param_dtype=self.weights_dtype,
         name="to_q",
         precision=self.precision,
+        dot_general_cls=dot_general_cls,
     )
 
     self.key = nn.Dense(
@@ -646,6 +664,7 @@ def setup(self):
         param_dtype=self.weights_dtype,
         name="to_k",
         precision=self.precision,
+        dot_general_cls=dot_general_cls,
     )
 
     self.value = nn.Dense(
@@ -656,6 +675,7 @@ def setup(self):
         param_dtype=self.weights_dtype,
         name="to_v",
         precision=self.precision,
+        dot_general_cls=dot_general_cls,
     )
 
     self.proj_attn = nn.Dense(
@@ -665,6 +685,7 @@ def setup(self):
         param_dtype=self.weights_dtype,
         name="to_out_0",
         precision=self.precision,
+        dot_general_cls=dot_general_cls,
     )
     self.dropout_layer = nn.Dropout(rate=self.dropout)
 
@@ -717,6 +738,7 @@ class FlaxBasicTransformerBlock(nn.Module):
           Overrides default block sizes for flash attention.
       mesh (`jax.sharding.mesh`, *optional*, defaults to `None`):
           jax mesh is required if attention is set to flash.
+      quant (`AqtQuantization`, *optional*, defaults to None)
   """
 
   dim: int
@@ -733,6 +755,7 @@ class FlaxBasicTransformerBlock(nn.Module):
   flash_block_sizes: BlockSizes = None
   mesh: jax.sharding.Mesh = None
   precision: jax.lax.Precision = None
+  quant: Quant = None
 
   def setup(self):
     # self attention (or cross_attention if only_cross_attention is True)
@@ -750,6 +773,7 @@ def setup(self):
         dtype=self.dtype,
         weights_dtype=self.weights_dtype,
         precision=self.precision,
+        quant=self.quant,
     )
     # cross attention
     self.attn2 = FlaxAttention(
@@ -766,6 +790,7 @@ def setup(self):
         dtype=self.dtype,
         weights_dtype=self.weights_dtype,
         precision=self.precision,
+        quant=self.quant,
     )
     self.ff = FlaxFeedForward(
         dim=self.dim, dropout=self.dropout, dtype=self.dtype, weights_dtype=self.weights_dtype, precision=self.precision
@@ -838,6 +863,8 @@ class FlaxTransformer2DModel(nn.Module):
           Overrides default block sizes for flash attention.
       mesh (`jax.sharding.mesh`, *optional*, defaults to `None`):
           jax mesh is required if attention is set to flash.
+      quant (`AqtQuantization`, *optional*, defaults to None)
+            Configures AQT quantization github.com/google/aqt.
   """
 
   in_channels: int
@@ -858,6 +885,7 @@ class FlaxTransformer2DModel(nn.Module):
   norm_num_groups: int = 32
   precision: jax.lax.Precision = None
   hidden_state_axis_names: AxisNames = (BATCH, LENGTH, D_KV)
+  quant: Quant = (None,)
 
   def setup(self):
     self.norm = nn.GroupNorm(num_groups=self.norm_num_groups, epsilon=1e-5, dtype=self.dtype, param_dtype=self.weights_dtype)
@@ -903,6 +931,7 @@ def setup(self):
             flash_block_sizes=self.flash_block_sizes,
             mesh=self.mesh,
             precision=self.precision,
+            quant=self.quant,
         )
         for _ in range(self.depth)
     ]