We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent 21e3372 commit a784f87Copy full SHA for a784f87
1 file changed
tests/integration/train_tests.py
@@ -505,6 +505,7 @@ def test_tpu_zero1_gradient_accumulation(self):
505
@pytest.mark.integration_test
506
@pytest.mark.gpu_only
507
@pytest.mark.scheduled_only
508
+ @pytest.mark.skip(reason="b/489133823. Previously transient in b/462548581.")
509
def test_gpu_zero1_gradient_accumulation(self):
510
os.environ["NVTE_FUSED_ATTN"] = "1" # Enable fused attention
511
zero1_ga = [ # tests Zero-1 optimizer sharding with gradient accumulation
0 commit comments