Skip to content

Commit 9189224

Browse files
committed
refactor: Added logging options to CombinedDataset and its factory method
1 parent b1c0145 commit 9189224

2 files changed

Lines changed: 10 additions & 2 deletions

File tree

src/modalities/dataloader/dataset.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -451,6 +451,9 @@ def __init__(self, datasets: list[Dataset], log_chunk_switch: bool = False, log_
451451
452452
Args:
453453
datasets (list[Dataset]): A list of datasets to combine.
454+
log_chunk_switch (bool, optional): Whether to log when switching between dataset chunks. Defaults to False.
455+
log_initial_pos (bool, optional): Whether to log the initial position of at the beginning of a training
456+
or warmstart. Defaults to False.
454457
"""
455458
self.log_chunk_switch = log_chunk_switch
456459
self.log_initial_pos = log_initial_pos

src/modalities/dataloader/dataset_factory.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -116,13 +116,18 @@ def get_packed_mem_map_dataset_megatron(
116116
return dataset
117117

118118
@staticmethod
119-
def get_combined_dataset(datasets: list[Dataset]) -> Dataset:
119+
def get_combined_dataset(
120+
datasets: list[Dataset], log_chunk_switch: bool = False, log_initial_pos: bool = False
121+
) -> Dataset:
120122
"""Factory method for creating a combined datset .
121123
122124
Args:
123125
datasets (list[Dataset]): List of datasets to combine.
126+
log_chunk_switch (bool, optional): Whether to log when switching between dataset chunks. Defaults to False.
127+
log_initial_pos (bool, optional): Whether to log the initial position of at the beginning of a
128+
training or warmstart. Defaults to False.
124129
125130
Returns:
126131
Dataset: CombinedDataset object.
127132
"""
128-
return CombinedDataset(datasets=datasets)
133+
return CombinedDataset(datasets=datasets, log_chunk_switch=log_chunk_switch, log_initial_pos=log_initial_pos)

0 commit comments

Comments
 (0)