instruct model setup

This commit is contained in:
Your Name
2025-08-28 17:57:59 +00:00
parent 77c563f358
commit d49b4ff2d5
55 changed files with 27760 additions and 326 deletions
@@ -1,7 +1,7 @@
"""
2025.8.4
2025.8.5
4.55.1
2025.8.9
2025.8.10
4.55.4
0.21.0
__UNSLOTH_VERSIONING__
"""
@@ -136,9 +136,10 @@ class UnslothAlignPropConfig(AlignPropConfig):
default = -1,
metadata = {'help': 'Chunk size to reduce memory usage. -1 is most efficient.'},
)
def __init__(
self,
exp_name = 'inference',
exp_name = 'train',
run_name = '',
seed = 3407,
log_with = None,
@@ -169,6 +170,7 @@ class UnslothAlignPropConfig(AlignPropConfig):
push_to_hub = False,
vllm_sampling_params = None,
unsloth_num_chunks = -1,
**kwargs,
):
@@ -204,6 +206,7 @@ class UnslothAlignPropConfig(AlignPropConfig):
push_to_hub = push_to_hub,**kwargs)
self.vllm_sampling_params = vllm_sampling_params
self.unsloth_num_chunks = unsloth_num_chunks
pass
class _UnslothAlignPropTrainer(PyTorchModelHubMixin):
@@ -663,3 +666,13 @@ class UnslothAlignPropTrainer(_UnslothAlignPropTrainer):
image_samples_hook = image_samples_hook,**kwargs)
pass
if hasattr(logger, "addFilter"):
import logging
class HideLoggingMessage(logging.Filter):
def __init__(self, text): self.text = text
def filter(self, x): return not (self.text in x.getMessage())
pass
logger.addFilter(HideLoggingMessage("`use_cache=True`"))
+41 -9
View File
@@ -1,7 +1,7 @@
"""
2025.8.4
2025.8.5
4.55.1
2025.8.9
2025.8.10
4.55.4
0.21.0
__UNSLOTH_VERSIONING__
"""
@@ -116,6 +116,10 @@ class UnslothBCOConfig(BCOConfig):
default = -1,
metadata = {'help': 'Chunk size to reduce memory usage. -1 is most efficient.'},
)
max_seq_length : Optional[int] = field(
default = None,
metadata = {'help': 'Maximum sequence length to truncate to.'},
)
def __init__(
self,
output_dir = None,
@@ -265,6 +269,7 @@ class UnslothBCOConfig(BCOConfig):
max_density_ratio = 10.0,
vllm_sampling_params = None,
unsloth_num_chunks = -1,
max_seq_length = None,
**kwargs,
):
if learning_rate < 1e-7: raise FloatingPointError(f'Unsloth: Your learning rate of `{learning_rate}` is too small and less than 1e-7! Consider increasing it, otherwise gradient updates will be close to 0!')
@@ -274,7 +279,7 @@ class UnslothBCOConfig(BCOConfig):
save_strategy = 'no'
if dataset_num_proc is None:
from multiprocessing import cpu_count
dataset_num_proc = min(cpu_count()*2, 2)
dataset_num_proc = max(cpu_count()+4, 2)
super().__init__(
output_dir = output_dir,
@@ -424,6 +429,7 @@ class UnslothBCOConfig(BCOConfig):
max_density_ratio = max_density_ratio,**kwargs)
self.vllm_sampling_params = vllm_sampling_params
self.unsloth_num_chunks = unsloth_num_chunks
self.max_seq_length = max_seq_length
pass
class _UnslothBCOTrainer(Trainer):
@@ -1722,7 +1728,7 @@ class UnslothBCOTrainer(_UnslothBCOTrainer):
print('Unsloth: Switching to float32 training since model cannot work with float16')
force_float32 = True
mixed_precision_dtype = os.environ.get('UNSLOTH_MIXED_PRECISION', 'float32')
dtype = getattr(model.config, 'torch_dtype', None)
dtype = getattr(model.config, 'dtype', None) or getattr(model.config, 'torch_dtype', None)
if dtype is None: dtype = model.get_input_embeddings().dtype
from unsloth_zoo.utils import _get_dtype
dtype = _get_dtype(dtype)
@@ -1788,9 +1794,17 @@ class UnslothBCOTrainer(_UnslothBCOTrainer):
from unsloth_zoo.vision_utils import UnslothVisionDataCollator
if not isinstance(data_collator, UnslothVisionDataCollator):
if isinstance(data_collator, DataCollatorForSeq2Seq) and 'labels' not in train_dataset.column_names:
data_collator = TransformersDataCollatorForLanguageModeling(__tokenizer, mlm = False, mlm_probability = 0.0)
data_collator = TransformersDataCollatorForLanguageModeling(
__tokenizer,
mlm = False,
mlm_probability = 0.0,
pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None),
)
elif isinstance(data_collator, TransformersDataCollatorForLanguageModeling) and 'labels' in train_dataset.column_names:
data_collator = DataCollatorForSeq2Seq(__tokenizer)
data_collator = DataCollatorForSeq2Seq(
__tokenizer,
pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None),
)
else:
if hasattr(args, 'remove_unused_columns'): args.remove_unused_columns = False
if hasattr(args, 'dataset_text_field'): args.dataset_text_field = ''
@@ -1798,9 +1812,17 @@ class UnslothBCOTrainer(_UnslothBCOTrainer):
if not isinstance(data_collator, UnslothVisionDataCollator):
if not hasattr(__tokenizer, 'pad') and hasattr(__tokenizer, 'tokenizer'):
if isinstance(data_collator, DataCollatorForSeq2Seq):
data_collator = DataCollatorForSeq2Seq(__tokenizer.tokenizer)
data_collator = DataCollatorForSeq2Seq(
__tokenizer.tokenizer,
pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None),
)
else:
data_collator = TransformersDataCollatorForLanguageModeling(__tokenizer.tokenizer, mlm = False, mlm_probability = 0.0)
data_collator = TransformersDataCollatorForLanguageModeling(
__tokenizer.tokenizer,
mlm = False,
mlm_probability = 0.0,
pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None),
)
other_metrics = []
from unsloth_zoo.logging_utils import PatchRLStatistics
@@ -1839,3 +1861,13 @@ class UnslothBCOTrainer(_UnslothBCOTrainer):
pass
pass
if hasattr(logger, "addFilter"):
import logging
class HideLoggingMessage(logging.Filter):
def __init__(self, text): self.text = text
def filter(self, x): return not (self.text in x.getMessage())
pass
logger.addFilter(HideLoggingMessage("`use_cache=True`"))
+31 -9
View File
@@ -1,7 +1,7 @@
"""
2025.8.4
2025.8.5
4.55.1
2025.8.9
2025.8.10
4.55.4
0.21.0
__UNSLOTH_VERSIONING__
"""
@@ -119,6 +119,10 @@ class UnslothCPOConfig(CPOConfig):
default = -1,
metadata = {'help': 'Chunk size to reduce memory usage. -1 is most efficient.'},
)
max_seq_length : Optional[int] = field(
default = None,
metadata = {'help': 'Maximum sequence length to truncate to.'},
)
def __init__(
self,
output_dir = None,
@@ -267,6 +271,7 @@ class UnslothCPOConfig(CPOConfig):
dataset_num_proc = None,
vllm_sampling_params = None,
unsloth_num_chunks = -1,
max_seq_length = None,
**kwargs,
):
if learning_rate < 1e-7: raise FloatingPointError(f'Unsloth: Your learning rate of `{learning_rate}` is too small and less than 1e-7! Consider increasing it, otherwise gradient updates will be close to 0!')
@@ -276,7 +281,7 @@ class UnslothCPOConfig(CPOConfig):
save_strategy = 'no'
if dataset_num_proc is None:
from multiprocessing import cpu_count
dataset_num_proc = min(cpu_count()*2, 2)
dataset_num_proc = max(cpu_count()+4, 2)
super().__init__(
output_dir = output_dir,
@@ -425,6 +430,7 @@ class UnslothCPOConfig(CPOConfig):
dataset_num_proc = dataset_num_proc,**kwargs)
self.vllm_sampling_params = vllm_sampling_params
self.unsloth_num_chunks = unsloth_num_chunks
self.max_seq_length = max_seq_length
pass
class _UnslothCPOTrainer(Trainer):
@@ -1498,7 +1504,7 @@ class UnslothCPOTrainer(_UnslothCPOTrainer):
print('Unsloth: Switching to float32 training since model cannot work with float16')
force_float32 = True
mixed_precision_dtype = os.environ.get('UNSLOTH_MIXED_PRECISION', 'float32')
dtype = getattr(model.config, 'torch_dtype', None)
dtype = getattr(model.config, 'dtype', None) or getattr(model.config, 'torch_dtype', None)
if dtype is None: dtype = model.get_input_embeddings().dtype
from unsloth_zoo.utils import _get_dtype
dtype = _get_dtype(dtype)
@@ -1564,9 +1570,17 @@ class UnslothCPOTrainer(_UnslothCPOTrainer):
from unsloth_zoo.vision_utils import UnslothVisionDataCollator
if not isinstance(data_collator, UnslothVisionDataCollator):
if isinstance(data_collator, DataCollatorForSeq2Seq) and 'labels' not in train_dataset.column_names:
data_collator = TransformersDataCollatorForLanguageModeling(__tokenizer, mlm = False, mlm_probability = 0.0)
data_collator = TransformersDataCollatorForLanguageModeling(
__tokenizer,
mlm = False,
mlm_probability = 0.0,
pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None),
)
elif isinstance(data_collator, TransformersDataCollatorForLanguageModeling) and 'labels' in train_dataset.column_names:
data_collator = DataCollatorForSeq2Seq(__tokenizer)
data_collator = DataCollatorForSeq2Seq(
__tokenizer,
pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None),
)
else:
if hasattr(args, 'remove_unused_columns'): args.remove_unused_columns = False
if hasattr(args, 'dataset_text_field'): args.dataset_text_field = ''
@@ -1574,9 +1588,17 @@ class UnslothCPOTrainer(_UnslothCPOTrainer):
if not isinstance(data_collator, UnslothVisionDataCollator):
if not hasattr(__tokenizer, 'pad') and hasattr(__tokenizer, 'tokenizer'):
if isinstance(data_collator, DataCollatorForSeq2Seq):
data_collator = DataCollatorForSeq2Seq(__tokenizer.tokenizer)
data_collator = DataCollatorForSeq2Seq(
__tokenizer.tokenizer,
pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None),
)
else:
data_collator = TransformersDataCollatorForLanguageModeling(__tokenizer.tokenizer, mlm = False, mlm_probability = 0.0)
data_collator = TransformersDataCollatorForLanguageModeling(
__tokenizer.tokenizer,
mlm = False,
mlm_probability = 0.0,
pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None),
)
other_metrics = []
from unsloth_zoo.logging_utils import PatchRLStatistics
+17 -4
View File
@@ -1,7 +1,7 @@
"""
2025.8.4
2025.8.5
4.55.1
2025.8.9
2025.8.10
4.55.4
0.21.0
__UNSLOTH_VERSIONING__
"""
@@ -152,9 +152,10 @@ class UnslothDDPOConfig(DDPOConfig):
default = -1,
metadata = {'help': 'Chunk size to reduce memory usage. -1 is most efficient.'},
)
def __init__(
self,
exp_name = 'inference',
exp_name = 'train',
run_name = '',
seed = 3407,
log_with = None,
@@ -194,6 +195,7 @@ class UnslothDDPOConfig(DDPOConfig):
push_to_hub = False,
vllm_sampling_params = None,
unsloth_num_chunks = -1,
**kwargs,
):
@@ -238,6 +240,7 @@ class UnslothDDPOConfig(DDPOConfig):
push_to_hub = push_to_hub,**kwargs)
self.vllm_sampling_params = vllm_sampling_params
self.unsloth_num_chunks = unsloth_num_chunks
pass
class _UnslothDDPOTrainer(PyTorchModelHubMixin):
@@ -899,3 +902,13 @@ class UnslothDDPOTrainer(_UnslothDDPOTrainer):
image_samples_hook = image_samples_hook,**kwargs)
pass
if hasattr(logger, "addFilter"):
import logging
class HideLoggingMessage(logging.Filter):
def __init__(self, text): self.text = text
def filter(self, x): return not (self.text in x.getMessage())
pass
logger.addFilter(HideLoggingMessage("`use_cache=True`"))
+31 -9
View File
@@ -1,7 +1,7 @@
"""
2025.8.4
2025.8.5
4.55.1
2025.8.9
2025.8.10
4.55.4
0.21.0
__UNSLOTH_VERSIONING__
"""
@@ -215,6 +215,10 @@ class UnslothDPOConfig(DPOConfig):
default = -1,
metadata = {'help': 'Chunk size to reduce memory usage. -1 is most efficient.'},
)
max_seq_length : Optional[int] = field(
default = None,
metadata = {'help': 'Maximum sequence length to truncate to.'},
)
def __init__(
self,
output_dir = None,
@@ -380,6 +384,7 @@ class UnslothDPOConfig(DPOConfig):
generate_during_eval = False,
vllm_sampling_params = None,
unsloth_num_chunks = -1,
max_seq_length = None,
**kwargs,
):
if learning_rate < 1e-7: raise FloatingPointError(f'Unsloth: Your learning rate of `{learning_rate}` is too small and less than 1e-7! Consider increasing it, otherwise gradient updates will be close to 0!')
@@ -389,7 +394,7 @@ class UnslothDPOConfig(DPOConfig):
save_strategy = 'no'
if dataset_num_proc is None:
from multiprocessing import cpu_count
dataset_num_proc = min(cpu_count()*2, 2)
dataset_num_proc = max(cpu_count()+4, 2)
super().__init__(
output_dir = output_dir,
@@ -555,6 +560,7 @@ class UnslothDPOConfig(DPOConfig):
generate_during_eval = generate_during_eval,**kwargs)
self.vllm_sampling_params = vllm_sampling_params
self.unsloth_num_chunks = unsloth_num_chunks
self.max_seq_length = max_seq_length
pass
class _UnslothDPOTrainer(Trainer):
@@ -2397,7 +2403,7 @@ class UnslothDPOTrainer(_UnslothDPOTrainer):
print('Unsloth: Switching to float32 training since model cannot work with float16')
force_float32 = True
mixed_precision_dtype = os.environ.get('UNSLOTH_MIXED_PRECISION', 'float32')
dtype = getattr(model.config, 'torch_dtype', None)
dtype = getattr(model.config, 'dtype', None) or getattr(model.config, 'torch_dtype', None)
if dtype is None: dtype = model.get_input_embeddings().dtype
from unsloth_zoo.utils import _get_dtype
dtype = _get_dtype(dtype)
@@ -2463,9 +2469,17 @@ class UnslothDPOTrainer(_UnslothDPOTrainer):
from unsloth_zoo.vision_utils import UnslothVisionDataCollator
if not isinstance(data_collator, UnslothVisionDataCollator):
if isinstance(data_collator, DataCollatorForSeq2Seq) and 'labels' not in train_dataset.column_names:
data_collator = TransformersDataCollatorForLanguageModeling(__tokenizer, mlm = False, mlm_probability = 0.0)
data_collator = TransformersDataCollatorForLanguageModeling(
__tokenizer,
mlm = False,
mlm_probability = 0.0,
pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None),
)
elif isinstance(data_collator, TransformersDataCollatorForLanguageModeling) and 'labels' in train_dataset.column_names:
data_collator = DataCollatorForSeq2Seq(__tokenizer)
data_collator = DataCollatorForSeq2Seq(
__tokenizer,
pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None),
)
else:
if hasattr(args, 'remove_unused_columns'): args.remove_unused_columns = False
if hasattr(args, 'dataset_text_field'): args.dataset_text_field = ''
@@ -2473,9 +2487,17 @@ class UnslothDPOTrainer(_UnslothDPOTrainer):
if not isinstance(data_collator, UnslothVisionDataCollator):
if not hasattr(__tokenizer, 'pad') and hasattr(__tokenizer, 'tokenizer'):
if isinstance(data_collator, DataCollatorForSeq2Seq):
data_collator = DataCollatorForSeq2Seq(__tokenizer.tokenizer)
data_collator = DataCollatorForSeq2Seq(
__tokenizer.tokenizer,
pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None),
)
else:
data_collator = TransformersDataCollatorForLanguageModeling(__tokenizer.tokenizer, mlm = False, mlm_probability = 0.0)
data_collator = TransformersDataCollatorForLanguageModeling(
__tokenizer.tokenizer,
mlm = False,
mlm_probability = 0.0,
pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None),
)
other_metrics = []
from unsloth_zoo.logging_utils import PatchRLStatistics
+37 -9
View File
@@ -1,7 +1,7 @@
"""
2025.8.4
2025.8.5
4.55.1
2025.8.9
2025.8.10
4.55.4
0.21.0
__UNSLOTH_VERSIONING__
"""
@@ -89,6 +89,10 @@ class UnslothGKDConfig(GKDConfig):
default = -1,
metadata = {'help': 'Chunk size to reduce memory usage. -1 is most efficient.'},
)
max_seq_length : Optional[int] = field(
default = None,
metadata = {'help': 'Maximum sequence length to truncate to.'},
)
def __init__(
self,
output_dir = None,
@@ -245,6 +249,7 @@ class UnslothGKDConfig(GKDConfig):
seq_kd = False,
vllm_sampling_params = None,
unsloth_num_chunks = -1,
max_seq_length = None,
**kwargs,
):
if learning_rate < 1e-7: raise FloatingPointError(f'Unsloth: Your learning rate of `{learning_rate}` is too small and less than 1e-7! Consider increasing it, otherwise gradient updates will be close to 0!')
@@ -254,7 +259,13 @@ class UnslothGKDConfig(GKDConfig):
save_strategy = 'no'
if dataset_num_proc is None:
from multiprocessing import cpu_count
dataset_num_proc = min(cpu_count()*2, 2)
dataset_num_proc = max(cpu_count()+4, 2)
if os.environ.get('UNSLOTH_ENABLE_FLEX_ATTENTION', '0') == '1':
from unsloth_zoo.flex_attention import HAS_FLEX_ATTENTION
if HAS_FLEX_ATTENTION and pad_to_multiple_of is None:
from unsloth_zoo.flex_attention import FLEX_ATTENTION_BLOCK_SIZE
pad_to_multiple_of = FLEX_ATTENTION_BLOCK_SIZE
if temperature <= 0:
raise MathError('Unsloth: Please set a positive non-zero temperature since your results will be wrong.')
elif temperature >= 10:
@@ -416,6 +427,7 @@ class UnslothGKDConfig(GKDConfig):
seq_kd = seq_kd,**kwargs)
self.vllm_sampling_params = vllm_sampling_params
self.unsloth_num_chunks = unsloth_num_chunks
self.max_seq_length = max_seq_length
pass
class _UnslothGKDTrainer(SFTTrainer):
@@ -759,7 +771,7 @@ class UnslothGKDTrainer(_UnslothGKDTrainer):
print('Unsloth: Switching to float32 training since model cannot work with float16')
force_float32 = True
mixed_precision_dtype = os.environ.get('UNSLOTH_MIXED_PRECISION', 'float32')
dtype = getattr(model.config, 'torch_dtype', None)
dtype = getattr(model.config, 'dtype', None) or getattr(model.config, 'torch_dtype', None)
if dtype is None: dtype = model.get_input_embeddings().dtype
from unsloth_zoo.utils import _get_dtype
dtype = _get_dtype(dtype)
@@ -825,9 +837,17 @@ class UnslothGKDTrainer(_UnslothGKDTrainer):
from unsloth_zoo.vision_utils import UnslothVisionDataCollator
if not isinstance(data_collator, UnslothVisionDataCollator):
if isinstance(data_collator, DataCollatorForSeq2Seq) and 'labels' not in train_dataset.column_names:
data_collator = TransformersDataCollatorForLanguageModeling(__tokenizer, mlm = False, mlm_probability = 0.0)
data_collator = TransformersDataCollatorForLanguageModeling(
__tokenizer,
mlm = False,
mlm_probability = 0.0,
pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None),
)
elif isinstance(data_collator, TransformersDataCollatorForLanguageModeling) and 'labels' in train_dataset.column_names:
data_collator = DataCollatorForSeq2Seq(__tokenizer)
data_collator = DataCollatorForSeq2Seq(
__tokenizer,
pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None),
)
else:
if hasattr(args, 'remove_unused_columns'): args.remove_unused_columns = False
if hasattr(args, 'dataset_text_field'): args.dataset_text_field = ''
@@ -835,9 +855,17 @@ class UnslothGKDTrainer(_UnslothGKDTrainer):
if not isinstance(data_collator, UnslothVisionDataCollator):
if not hasattr(__tokenizer, 'pad') and hasattr(__tokenizer, 'tokenizer'):
if isinstance(data_collator, DataCollatorForSeq2Seq):
data_collator = DataCollatorForSeq2Seq(__tokenizer.tokenizer)
data_collator = DataCollatorForSeq2Seq(
__tokenizer.tokenizer,
pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None),
)
else:
data_collator = TransformersDataCollatorForLanguageModeling(__tokenizer.tokenizer, mlm = False, mlm_probability = 0.0)
data_collator = TransformersDataCollatorForLanguageModeling(
__tokenizer.tokenizer,
mlm = False,
mlm_probability = 0.0,
pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None),
)
other_metrics = []
from unsloth_zoo.logging_utils import PatchRLStatistics
+7 -4
View File
@@ -1,7 +1,7 @@
"""
2025.8.4
2025.8.5
4.55.1
2025.8.9
2025.8.10
4.55.4
0.21.0
__UNSLOTH_VERSIONING__
"""
@@ -727,6 +727,7 @@ class UnslothGRPOConfig(GRPOConfig):
default = -1,
metadata = {'help': 'Chunk size to reduce memory usage. -1 is most efficient.'},
)
def __init__(
self,
output_dir = None,
@@ -904,6 +905,7 @@ class UnslothGRPOConfig(GRPOConfig):
wandb_log_unique_prompts = False,
vllm_sampling_params = None,
unsloth_num_chunks = -1,
**kwargs,
):
if learning_rate < 1e-7: raise FloatingPointError(f'Unsloth: Your learning rate of `{learning_rate}` is too small and less than 1e-7! Consider increasing it, otherwise gradient updates will be close to 0!')
@@ -1116,6 +1118,7 @@ class UnslothGRPOConfig(GRPOConfig):
wandb_log_unique_prompts = wandb_log_unique_prompts,**kwargs)
self.vllm_sampling_params = vllm_sampling_params
self.unsloth_num_chunks = unsloth_num_chunks
pass
class _UnslothGRPOTrainer(Trainer):
@@ -2814,7 +2817,7 @@ class UnslothGRPOTrainer(_UnslothGRPOTrainer):
print('Unsloth: Switching to float32 training since model cannot work with float16')
force_float32 = True
mixed_precision_dtype = os.environ.get('UNSLOTH_MIXED_PRECISION', 'float32')
dtype = getattr(model.config, 'torch_dtype', None)
dtype = getattr(model.config, 'dtype', None) or getattr(model.config, 'torch_dtype', None)
if dtype is None: dtype = model.get_input_embeddings().dtype
from unsloth_zoo.utils import _get_dtype
dtype = _get_dtype(dtype)
@@ -1,7 +1,7 @@
"""
2025.8.4
2025.8.5
4.55.1
2025.8.9
2025.8.10
4.55.4
0.21.0
__UNSLOTH_VERSIONING__
"""
@@ -87,6 +87,10 @@ class UnslothIterativeSFTConfig(IterativeSFTConfig):
default = -1,
metadata = {'help': 'Chunk size to reduce memory usage. -1 is most efficient.'},
)
max_seq_length : Optional[int] = field(
default = None,
metadata = {'help': 'Maximum sequence length to truncate to.'},
)
def __init__(
self,
output_dir = None,
@@ -223,6 +227,7 @@ class UnslothIterativeSFTConfig(IterativeSFTConfig):
optimize_device_cache = False,
vllm_sampling_params = None,
unsloth_num_chunks = -1,
max_seq_length = None,
**kwargs,
):
if learning_rate < 1e-7: raise FloatingPointError(f'Unsloth: Your learning rate of `{learning_rate}` is too small and less than 1e-7! Consider increasing it, otherwise gradient updates will be close to 0!')
@@ -366,6 +371,7 @@ class UnslothIterativeSFTConfig(IterativeSFTConfig):
optimize_device_cache = optimize_device_cache,**kwargs)
self.vllm_sampling_params = vllm_sampling_params
self.unsloth_num_chunks = unsloth_num_chunks
self.max_seq_length = max_seq_length
pass
class _UnslothIterativeSFTTrainer(Trainer):
@@ -825,7 +831,7 @@ class UnslothIterativeSFTTrainer(_UnslothIterativeSFTTrainer):
print('Unsloth: Switching to float32 training since model cannot work with float16')
force_float32 = True
mixed_precision_dtype = os.environ.get('UNSLOTH_MIXED_PRECISION', 'float32')
dtype = getattr(model.config, 'torch_dtype', None)
dtype = getattr(model.config, 'dtype', None) or getattr(model.config, 'torch_dtype', None)
if dtype is None: dtype = model.get_input_embeddings().dtype
from unsloth_zoo.utils import _get_dtype
dtype = _get_dtype(dtype)
+31 -9
View File
@@ -1,7 +1,7 @@
"""
2025.8.4
2025.8.5
4.55.1
2025.8.9
2025.8.10
4.55.4
0.21.0
__UNSLOTH_VERSIONING__
"""
@@ -126,6 +126,10 @@ class UnslothKTOConfig(KTOConfig):
default = -1,
metadata = {'help': 'Chunk size to reduce memory usage. -1 is most efficient.'},
)
max_seq_length : Optional[int] = field(
default = None,
metadata = {'help': 'Maximum sequence length to truncate to.'},
)
def __init__(
self,
output_dir = None,
@@ -277,6 +281,7 @@ class UnslothKTOConfig(KTOConfig):
base_model_attribute_name = 'model',
vllm_sampling_params = None,
unsloth_num_chunks = -1,
max_seq_length = None,
**kwargs,
):
if learning_rate < 1e-7: raise FloatingPointError(f'Unsloth: Your learning rate of `{learning_rate}` is too small and less than 1e-7! Consider increasing it, otherwise gradient updates will be close to 0!')
@@ -286,7 +291,7 @@ class UnslothKTOConfig(KTOConfig):
save_strategy = 'no'
if dataset_num_proc is None:
from multiprocessing import cpu_count
dataset_num_proc = min(cpu_count()*2, 2)
dataset_num_proc = max(cpu_count()+4, 2)
super().__init__(
output_dir = output_dir,
@@ -438,6 +443,7 @@ class UnslothKTOConfig(KTOConfig):
base_model_attribute_name = base_model_attribute_name,**kwargs)
self.vllm_sampling_params = vllm_sampling_params
self.unsloth_num_chunks = unsloth_num_chunks
self.max_seq_length = max_seq_length
pass
class _UnslothKTOTrainer(Trainer):
@@ -1934,7 +1940,7 @@ class UnslothKTOTrainer(_UnslothKTOTrainer):
print('Unsloth: Switching to float32 training since model cannot work with float16')
force_float32 = True
mixed_precision_dtype = os.environ.get('UNSLOTH_MIXED_PRECISION', 'float32')
dtype = getattr(model.config, 'torch_dtype', None)
dtype = getattr(model.config, 'dtype', None) or getattr(model.config, 'torch_dtype', None)
if dtype is None: dtype = model.get_input_embeddings().dtype
from unsloth_zoo.utils import _get_dtype
dtype = _get_dtype(dtype)
@@ -2000,9 +2006,17 @@ class UnslothKTOTrainer(_UnslothKTOTrainer):
from unsloth_zoo.vision_utils import UnslothVisionDataCollator
if not isinstance(data_collator, UnslothVisionDataCollator):
if isinstance(data_collator, DataCollatorForSeq2Seq) and 'labels' not in train_dataset.column_names:
data_collator = TransformersDataCollatorForLanguageModeling(__tokenizer, mlm = False, mlm_probability = 0.0)
data_collator = TransformersDataCollatorForLanguageModeling(
__tokenizer,
mlm = False,
mlm_probability = 0.0,
pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None),
)
elif isinstance(data_collator, TransformersDataCollatorForLanguageModeling) and 'labels' in train_dataset.column_names:
data_collator = DataCollatorForSeq2Seq(__tokenizer)
data_collator = DataCollatorForSeq2Seq(
__tokenizer,
pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None),
)
else:
if hasattr(args, 'remove_unused_columns'): args.remove_unused_columns = False
if hasattr(args, 'dataset_text_field'): args.dataset_text_field = ''
@@ -2010,9 +2024,17 @@ class UnslothKTOTrainer(_UnslothKTOTrainer):
if not isinstance(data_collator, UnslothVisionDataCollator):
if not hasattr(__tokenizer, 'pad') and hasattr(__tokenizer, 'tokenizer'):
if isinstance(data_collator, DataCollatorForSeq2Seq):
data_collator = DataCollatorForSeq2Seq(__tokenizer.tokenizer)
data_collator = DataCollatorForSeq2Seq(
__tokenizer.tokenizer,
pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None),
)
else:
data_collator = TransformersDataCollatorForLanguageModeling(__tokenizer.tokenizer, mlm = False, mlm_probability = 0.0)
data_collator = TransformersDataCollatorForLanguageModeling(
__tokenizer.tokenizer,
mlm = False,
mlm_probability = 0.0,
pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None),
)
other_metrics = []
from unsloth_zoo.logging_utils import PatchRLStatistics
+31 -9
View File
@@ -1,7 +1,7 @@
"""
2025.8.4
2025.8.5
4.55.1
2025.8.9
2025.8.10
4.55.4
0.21.0
__UNSLOTH_VERSIONING__
"""
@@ -71,6 +71,10 @@ class UnslothNashMDConfig(NashMDConfig):
default = -1,
metadata = {'help': 'Chunk size to reduce memory usage. -1 is most efficient.'},
)
max_seq_length : Optional[int] = field(
default = None,
metadata = {'help': 'Maximum sequence length to truncate to.'},
)
def __init__(
self,
output_dir = None,
@@ -217,6 +221,7 @@ class UnslothNashMDConfig(NashMDConfig):
model_init_kwargs = None,
vllm_sampling_params = None,
unsloth_num_chunks = -1,
max_seq_length = None,
**kwargs,
):
if learning_rate < 1e-7: raise FloatingPointError(f'Unsloth: Your learning rate of `{learning_rate}` is too small and less than 1e-7! Consider increasing it, otherwise gradient updates will be close to 0!')
@@ -226,7 +231,7 @@ class UnslothNashMDConfig(NashMDConfig):
save_strategy = 'no'
if dataset_num_proc is None:
from multiprocessing import cpu_count
dataset_num_proc = min(cpu_count()*2, 2)
dataset_num_proc = max(cpu_count()+4, 2)
if temperature <= 0:
raise MathError('Unsloth: Please set a positive non-zero temperature since your results will be wrong.')
elif temperature >= 10:
@@ -378,6 +383,7 @@ class UnslothNashMDConfig(NashMDConfig):
model_init_kwargs = model_init_kwargs,**kwargs)
self.vllm_sampling_params = vllm_sampling_params
self.unsloth_num_chunks = unsloth_num_chunks
self.max_seq_length = max_seq_length
pass
class _UnslothNashMDTrainer(OnlineDPOTrainer):
@@ -897,7 +903,7 @@ class UnslothNashMDTrainer(_UnslothNashMDTrainer):
print('Unsloth: Switching to float32 training since model cannot work with float16')
force_float32 = True
mixed_precision_dtype = os.environ.get('UNSLOTH_MIXED_PRECISION', 'float32')
dtype = getattr(model.config, 'torch_dtype', None)
dtype = getattr(model.config, 'dtype', None) or getattr(model.config, 'torch_dtype', None)
if dtype is None: dtype = model.get_input_embeddings().dtype
from unsloth_zoo.utils import _get_dtype
dtype = _get_dtype(dtype)
@@ -963,9 +969,17 @@ class UnslothNashMDTrainer(_UnslothNashMDTrainer):
from unsloth_zoo.vision_utils import UnslothVisionDataCollator
if not isinstance(data_collator, UnslothVisionDataCollator):
if isinstance(data_collator, DataCollatorForSeq2Seq) and 'labels' not in train_dataset.column_names:
data_collator = TransformersDataCollatorForLanguageModeling(__tokenizer, mlm = False, mlm_probability = 0.0)
data_collator = TransformersDataCollatorForLanguageModeling(
__tokenizer,
mlm = False,
mlm_probability = 0.0,
pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None),
)
elif isinstance(data_collator, TransformersDataCollatorForLanguageModeling) and 'labels' in train_dataset.column_names:
data_collator = DataCollatorForSeq2Seq(__tokenizer)
data_collator = DataCollatorForSeq2Seq(
__tokenizer,
pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None),
)
else:
if hasattr(args, 'remove_unused_columns'): args.remove_unused_columns = False
if hasattr(args, 'dataset_text_field'): args.dataset_text_field = ''
@@ -973,9 +987,17 @@ class UnslothNashMDTrainer(_UnslothNashMDTrainer):
if not isinstance(data_collator, UnslothVisionDataCollator):
if not hasattr(__tokenizer, 'pad') and hasattr(__tokenizer, 'tokenizer'):
if isinstance(data_collator, DataCollatorForSeq2Seq):
data_collator = DataCollatorForSeq2Seq(__tokenizer.tokenizer)
data_collator = DataCollatorForSeq2Seq(
__tokenizer.tokenizer,
pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None),
)
else:
data_collator = TransformersDataCollatorForLanguageModeling(__tokenizer.tokenizer, mlm = False, mlm_probability = 0.0)
data_collator = TransformersDataCollatorForLanguageModeling(
__tokenizer.tokenizer,
mlm = False,
mlm_probability = 0.0,
pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None),
)
other_metrics = []
from unsloth_zoo.logging_utils import PatchRLStatistics
+31 -9
View File
@@ -1,7 +1,7 @@
"""
2025.8.4
2025.8.5
4.55.1
2025.8.9
2025.8.10
4.55.4
0.21.0
__UNSLOTH_VERSIONING__
"""
@@ -104,6 +104,10 @@ class UnslothORPOConfig(ORPOConfig):
default = -1,
metadata = {'help': 'Chunk size to reduce memory usage. -1 is most efficient.'},
)
max_seq_length : Optional[int] = field(
default = None,
metadata = {'help': 'Maximum sequence length to truncate to.'},
)
def __init__(
self,
output_dir = None,
@@ -248,6 +252,7 @@ class UnslothORPOConfig(ORPOConfig):
dataset_num_proc = None,
vllm_sampling_params = None,
unsloth_num_chunks = -1,
max_seq_length = None,
**kwargs,
):
if learning_rate < 1e-7: raise FloatingPointError(f'Unsloth: Your learning rate of `{learning_rate}` is too small and less than 1e-7! Consider increasing it, otherwise gradient updates will be close to 0!')
@@ -257,7 +262,7 @@ class UnslothORPOConfig(ORPOConfig):
save_strategy = 'no'
if dataset_num_proc is None:
from multiprocessing import cpu_count
dataset_num_proc = min(cpu_count()*2, 2)
dataset_num_proc = max(cpu_count()+4, 2)
super().__init__(
output_dir = output_dir,
@@ -402,6 +407,7 @@ class UnslothORPOConfig(ORPOConfig):
dataset_num_proc = dataset_num_proc,**kwargs)
self.vllm_sampling_params = vllm_sampling_params
self.unsloth_num_chunks = unsloth_num_chunks
self.max_seq_length = max_seq_length
pass
class _UnslothORPOTrainer(Trainer):
@@ -1454,7 +1460,7 @@ class UnslothORPOTrainer(_UnslothORPOTrainer):
print('Unsloth: Switching to float32 training since model cannot work with float16')
force_float32 = True
mixed_precision_dtype = os.environ.get('UNSLOTH_MIXED_PRECISION', 'float32')
dtype = getattr(model.config, 'torch_dtype', None)
dtype = getattr(model.config, 'dtype', None) or getattr(model.config, 'torch_dtype', None)
if dtype is None: dtype = model.get_input_embeddings().dtype
from unsloth_zoo.utils import _get_dtype
dtype = _get_dtype(dtype)
@@ -1520,9 +1526,17 @@ class UnslothORPOTrainer(_UnslothORPOTrainer):
from unsloth_zoo.vision_utils import UnslothVisionDataCollator
if not isinstance(data_collator, UnslothVisionDataCollator):
if isinstance(data_collator, DataCollatorForSeq2Seq) and 'labels' not in train_dataset.column_names:
data_collator = TransformersDataCollatorForLanguageModeling(__tokenizer, mlm = False, mlm_probability = 0.0)
data_collator = TransformersDataCollatorForLanguageModeling(
__tokenizer,
mlm = False,
mlm_probability = 0.0,
pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None),
)
elif isinstance(data_collator, TransformersDataCollatorForLanguageModeling) and 'labels' in train_dataset.column_names:
data_collator = DataCollatorForSeq2Seq(__tokenizer)
data_collator = DataCollatorForSeq2Seq(
__tokenizer,
pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None),
)
else:
if hasattr(args, 'remove_unused_columns'): args.remove_unused_columns = False
if hasattr(args, 'dataset_text_field'): args.dataset_text_field = ''
@@ -1530,9 +1544,17 @@ class UnslothORPOTrainer(_UnslothORPOTrainer):
if not isinstance(data_collator, UnslothVisionDataCollator):
if not hasattr(__tokenizer, 'pad') and hasattr(__tokenizer, 'tokenizer'):
if isinstance(data_collator, DataCollatorForSeq2Seq):
data_collator = DataCollatorForSeq2Seq(__tokenizer.tokenizer)
data_collator = DataCollatorForSeq2Seq(
__tokenizer.tokenizer,
pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None),
)
else:
data_collator = TransformersDataCollatorForLanguageModeling(__tokenizer.tokenizer, mlm = False, mlm_probability = 0.0)
data_collator = TransformersDataCollatorForLanguageModeling(
__tokenizer.tokenizer,
mlm = False,
mlm_probability = 0.0,
pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None),
)
other_metrics = []
from unsloth_zoo.logging_utils import PatchRLStatistics
@@ -1,7 +1,7 @@
"""
2025.8.4
2025.8.5
4.55.1
2025.8.9
2025.8.10
4.55.4
0.21.0
__UNSLOTH_VERSIONING__
"""
@@ -124,6 +124,10 @@ class UnslothOnlineDPOConfig(OnlineDPOConfig):
default = -1,
metadata = {'help': 'Chunk size to reduce memory usage. -1 is most efficient.'},
)
max_seq_length : Optional[int] = field(
default = None,
metadata = {'help': 'Maximum sequence length to truncate to.'},
)
def __init__(
self,
output_dir = None,
@@ -270,6 +274,7 @@ class UnslothOnlineDPOConfig(OnlineDPOConfig):
model_init_kwargs = None,
vllm_sampling_params = None,
unsloth_num_chunks = -1,
max_seq_length = None,
**kwargs,
):
if learning_rate < 1e-7: raise FloatingPointError(f'Unsloth: Your learning rate of `{learning_rate}` is too small and less than 1e-7! Consider increasing it, otherwise gradient updates will be close to 0!')
@@ -279,7 +284,7 @@ class UnslothOnlineDPOConfig(OnlineDPOConfig):
save_strategy = 'no'
if dataset_num_proc is None:
from multiprocessing import cpu_count
dataset_num_proc = min(cpu_count()*2, 2)
dataset_num_proc = max(cpu_count()+4, 2)
if temperature <= 0:
raise MathError('Unsloth: Please set a positive non-zero temperature since your results will be wrong.')
elif temperature >= 10:
@@ -431,6 +436,7 @@ class UnslothOnlineDPOConfig(OnlineDPOConfig):
model_init_kwargs = model_init_kwargs,**kwargs)
self.vllm_sampling_params = vllm_sampling_params
self.unsloth_num_chunks = unsloth_num_chunks
self.max_seq_length = max_seq_length
pass
class _UnslothOnlineDPOTrainer(Trainer):
@@ -1204,7 +1210,7 @@ class UnslothOnlineDPOTrainer(_UnslothOnlineDPOTrainer):
print('Unsloth: Switching to float32 training since model cannot work with float16')
force_float32 = True
mixed_precision_dtype = os.environ.get('UNSLOTH_MIXED_PRECISION', 'float32')
dtype = getattr(model.config, 'torch_dtype', None)
dtype = getattr(model.config, 'dtype', None) or getattr(model.config, 'torch_dtype', None)
if dtype is None: dtype = model.get_input_embeddings().dtype
from unsloth_zoo.utils import _get_dtype
dtype = _get_dtype(dtype)
@@ -1270,9 +1276,17 @@ class UnslothOnlineDPOTrainer(_UnslothOnlineDPOTrainer):
from unsloth_zoo.vision_utils import UnslothVisionDataCollator
if not isinstance(data_collator, UnslothVisionDataCollator):
if isinstance(data_collator, DataCollatorForSeq2Seq) and 'labels' not in train_dataset.column_names:
data_collator = TransformersDataCollatorForLanguageModeling(__tokenizer, mlm = False, mlm_probability = 0.0)
data_collator = TransformersDataCollatorForLanguageModeling(
__tokenizer,
mlm = False,
mlm_probability = 0.0,
pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None),
)
elif isinstance(data_collator, TransformersDataCollatorForLanguageModeling) and 'labels' in train_dataset.column_names:
data_collator = DataCollatorForSeq2Seq(__tokenizer)
data_collator = DataCollatorForSeq2Seq(
__tokenizer,
pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None),
)
else:
if hasattr(args, 'remove_unused_columns'): args.remove_unused_columns = False
if hasattr(args, 'dataset_text_field'): args.dataset_text_field = ''
@@ -1280,9 +1294,17 @@ class UnslothOnlineDPOTrainer(_UnslothOnlineDPOTrainer):
if not isinstance(data_collator, UnslothVisionDataCollator):
if not hasattr(__tokenizer, 'pad') and hasattr(__tokenizer, 'tokenizer'):
if isinstance(data_collator, DataCollatorForSeq2Seq):
data_collator = DataCollatorForSeq2Seq(__tokenizer.tokenizer)
data_collator = DataCollatorForSeq2Seq(
__tokenizer.tokenizer,
pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None),
)
else:
data_collator = TransformersDataCollatorForLanguageModeling(__tokenizer.tokenizer, mlm = False, mlm_probability = 0.0)
data_collator = TransformersDataCollatorForLanguageModeling(
__tokenizer.tokenizer,
mlm = False,
mlm_probability = 0.0,
pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None),
)
other_metrics = []
from unsloth_zoo.logging_utils import PatchRLStatistics
+29 -10
View File
@@ -1,7 +1,7 @@
"""
2025.8.4
2025.8.5
4.55.1
2025.8.9
2025.8.10
4.55.4
0.21.0
__UNSLOTH_VERSIONING__
"""
@@ -106,6 +106,7 @@ class UnslothPPOConfig(PPOConfig):
default = -1,
metadata = {'help': 'Chunk size to reduce memory usage. -1 is most efficient.'},
)
def __init__(
self,
output_dir = None,
@@ -270,6 +271,7 @@ class UnslothPPOConfig(PPOConfig):
ds3_gather_for_generation = True,
vllm_sampling_params = None,
unsloth_num_chunks = -1,
**kwargs,
):
if learning_rate < 1e-7: raise FloatingPointError(f'Unsloth: Your learning rate of `{learning_rate}` is too small and less than 1e-7! Consider increasing it, otherwise gradient updates will be close to 0!')
@@ -279,7 +281,7 @@ class UnslothPPOConfig(PPOConfig):
save_strategy = 'no'
if dataset_num_proc is None:
from multiprocessing import cpu_count
dataset_num_proc = min(cpu_count()*2, 2)
dataset_num_proc = max(cpu_count()+4, 2)
if temperature <= 0:
raise MathError('Unsloth: Please set a positive non-zero temperature since your results will be wrong.')
elif temperature >= 10:
@@ -449,6 +451,7 @@ class UnslothPPOConfig(PPOConfig):
ds3_gather_for_generation = ds3_gather_for_generation,**kwargs)
self.vllm_sampling_params = vllm_sampling_params
self.unsloth_num_chunks = unsloth_num_chunks
pass
class _UnslothPPOTrainer(Trainer):
@@ -944,7 +947,7 @@ class _UnslothPPOTrainer(Trainer):
pg_clipfrac = masked_mean(
(pg_losses2 > pg_losses).float(), ~padding_mask[micro_batch_inds]
)
prob_dist = torch.nn.functional.softmax(logits, dim=-1)
prob_dist = torch.nn.functional.softmax(logits, dim=-1, dtype = torch.float32).to(logits.dtype)
entropy = torch.logsumexp(logits, dim=-1) - torch.sum(prob_dist * logits, dim=-1)
approxkl = 0.5 * (logprobs_diff**2).mean()
approxkl_stats[ppo_epoch_idx, minibatch_idx, gradient_accumulation_idx] = approxkl
@@ -1202,7 +1205,7 @@ class UnslothPPOTrainer(_UnslothPPOTrainer):
print('Unsloth: Switching to float32 training since model cannot work with float16')
force_float32 = True
mixed_precision_dtype = os.environ.get('UNSLOTH_MIXED_PRECISION', 'float32')
dtype = getattr(model.config, 'torch_dtype', None)
dtype = getattr(model.config, 'dtype', None) or getattr(model.config, 'torch_dtype', None)
if dtype is None: dtype = model.get_input_embeddings().dtype
from unsloth_zoo.utils import _get_dtype
dtype = _get_dtype(dtype)
@@ -1268,9 +1271,17 @@ class UnslothPPOTrainer(_UnslothPPOTrainer):
from unsloth_zoo.vision_utils import UnslothVisionDataCollator
if not isinstance(data_collator, UnslothVisionDataCollator):
if isinstance(data_collator, DataCollatorForSeq2Seq) and 'labels' not in train_dataset.column_names:
data_collator = TransformersDataCollatorForLanguageModeling(__tokenizer, mlm = False, mlm_probability = 0.0)
data_collator = TransformersDataCollatorForLanguageModeling(
__tokenizer,
mlm = False,
mlm_probability = 0.0,
pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None),
)
elif isinstance(data_collator, TransformersDataCollatorForLanguageModeling) and 'labels' in train_dataset.column_names:
data_collator = DataCollatorForSeq2Seq(__tokenizer)
data_collator = DataCollatorForSeq2Seq(
__tokenizer,
pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None),
)
else:
if hasattr(args, 'remove_unused_columns'): args.remove_unused_columns = False
if hasattr(args, 'dataset_text_field'): args.dataset_text_field = ''
@@ -1278,9 +1289,17 @@ class UnslothPPOTrainer(_UnslothPPOTrainer):
if not isinstance(data_collator, UnslothVisionDataCollator):
if not hasattr(__tokenizer, 'pad') and hasattr(__tokenizer, 'tokenizer'):
if isinstance(data_collator, DataCollatorForSeq2Seq):
data_collator = DataCollatorForSeq2Seq(__tokenizer.tokenizer)
data_collator = DataCollatorForSeq2Seq(
__tokenizer.tokenizer,
pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None),
)
else:
data_collator = TransformersDataCollatorForLanguageModeling(__tokenizer.tokenizer, mlm = False, mlm_probability = 0.0)
data_collator = TransformersDataCollatorForLanguageModeling(
__tokenizer.tokenizer,
mlm = False,
mlm_probability = 0.0,
pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None),
)
other_metrics = []
from unsloth_zoo.logging_utils import PatchRLStatistics
+31 -9
View File
@@ -1,7 +1,7 @@
"""
2025.8.4
2025.8.5
4.55.1
2025.8.9
2025.8.10
4.55.4
0.21.0
__UNSLOTH_VERSIONING__
"""
@@ -87,6 +87,10 @@ class UnslothPRMConfig(PRMConfig):
default = -1,
metadata = {'help': 'Chunk size to reduce memory usage. -1 is most efficient.'},
)
max_seq_length : Optional[int] = field(
default = None,
metadata = {'help': 'Maximum sequence length to truncate to.'},
)
def __init__(
self,
output_dir = None,
@@ -227,6 +231,7 @@ class UnslothPRMConfig(PRMConfig):
dataset_num_proc = None,
vllm_sampling_params = None,
unsloth_num_chunks = -1,
max_seq_length = None,
**kwargs,
):
if learning_rate < 1e-7: raise FloatingPointError(f'Unsloth: Your learning rate of `{learning_rate}` is too small and less than 1e-7! Consider increasing it, otherwise gradient updates will be close to 0!')
@@ -236,7 +241,7 @@ class UnslothPRMConfig(PRMConfig):
save_strategy = 'no'
if dataset_num_proc is None:
from multiprocessing import cpu_count
dataset_num_proc = min(cpu_count()*2, 2)
dataset_num_proc = max(cpu_count()+4, 2)
super().__init__(
output_dir = output_dir,
@@ -376,6 +381,7 @@ class UnslothPRMConfig(PRMConfig):
dataset_num_proc = dataset_num_proc,**kwargs)
self.vllm_sampling_params = vllm_sampling_params
self.unsloth_num_chunks = unsloth_num_chunks
self.max_seq_length = max_seq_length
pass
class _UnslothPRMTrainer(Trainer):
@@ -728,7 +734,7 @@ class UnslothPRMTrainer(_UnslothPRMTrainer):
print('Unsloth: Switching to float32 training since model cannot work with float16')
force_float32 = True
mixed_precision_dtype = os.environ.get('UNSLOTH_MIXED_PRECISION', 'float32')
dtype = getattr(model.config, 'torch_dtype', None)
dtype = getattr(model.config, 'dtype', None) or getattr(model.config, 'torch_dtype', None)
if dtype is None: dtype = model.get_input_embeddings().dtype
from unsloth_zoo.utils import _get_dtype
dtype = _get_dtype(dtype)
@@ -794,9 +800,17 @@ class UnslothPRMTrainer(_UnslothPRMTrainer):
from unsloth_zoo.vision_utils import UnslothVisionDataCollator
if not isinstance(data_collator, UnslothVisionDataCollator):
if isinstance(data_collator, DataCollatorForSeq2Seq) and 'labels' not in train_dataset.column_names:
data_collator = TransformersDataCollatorForLanguageModeling(__tokenizer, mlm = False, mlm_probability = 0.0)
data_collator = TransformersDataCollatorForLanguageModeling(
__tokenizer,
mlm = False,
mlm_probability = 0.0,
pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None),
)
elif isinstance(data_collator, TransformersDataCollatorForLanguageModeling) and 'labels' in train_dataset.column_names:
data_collator = DataCollatorForSeq2Seq(__tokenizer)
data_collator = DataCollatorForSeq2Seq(
__tokenizer,
pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None),
)
else:
if hasattr(args, 'remove_unused_columns'): args.remove_unused_columns = False
if hasattr(args, 'dataset_text_field'): args.dataset_text_field = ''
@@ -804,9 +818,17 @@ class UnslothPRMTrainer(_UnslothPRMTrainer):
if not isinstance(data_collator, UnslothVisionDataCollator):
if not hasattr(__tokenizer, 'pad') and hasattr(__tokenizer, 'tokenizer'):
if isinstance(data_collator, DataCollatorForSeq2Seq):
data_collator = DataCollatorForSeq2Seq(__tokenizer.tokenizer)
data_collator = DataCollatorForSeq2Seq(
__tokenizer.tokenizer,
pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None),
)
else:
data_collator = TransformersDataCollatorForLanguageModeling(__tokenizer.tokenizer, mlm = False, mlm_probability = 0.0)
data_collator = TransformersDataCollatorForLanguageModeling(
__tokenizer.tokenizer,
mlm = False,
mlm_probability = 0.0,
pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None),
)
other_metrics = []
from unsloth_zoo.logging_utils import PatchRLStatistics
+28 -9
View File
@@ -1,7 +1,7 @@
"""
2025.8.4
2025.8.5
4.55.1
2025.8.9
2025.8.10
4.55.4
0.21.0
__UNSLOTH_VERSIONING__
"""
@@ -99,6 +99,7 @@ class UnslothRLOOConfig(RLOOConfig):
default = -1,
metadata = {'help': 'Chunk size to reduce memory usage. -1 is most efficient.'},
)
def __init__(
self,
output_dir = None,
@@ -261,6 +262,7 @@ class UnslothRLOOConfig(RLOOConfig):
ds3_gather_for_generation = True,
vllm_sampling_params = None,
unsloth_num_chunks = -1,
**kwargs,
):
if learning_rate < 1e-7: raise FloatingPointError(f'Unsloth: Your learning rate of `{learning_rate}` is too small and less than 1e-7! Consider increasing it, otherwise gradient updates will be close to 0!')
@@ -270,7 +272,7 @@ class UnslothRLOOConfig(RLOOConfig):
save_strategy = 'no'
if dataset_num_proc is None:
from multiprocessing import cpu_count
dataset_num_proc = min(cpu_count()*2, 2)
dataset_num_proc = max(cpu_count()+4, 2)
if temperature <= 0:
raise MathError('Unsloth: Please set a positive non-zero temperature since your results will be wrong.')
elif temperature >= 10:
@@ -438,6 +440,7 @@ class UnslothRLOOConfig(RLOOConfig):
ds3_gather_for_generation = ds3_gather_for_generation,**kwargs)
self.vllm_sampling_params = vllm_sampling_params
self.unsloth_num_chunks = unsloth_num_chunks
pass
class _UnslothRLOOTrainer(Trainer):
@@ -865,7 +868,7 @@ class _UnslothRLOOTrainer(Trainer):
with torch.no_grad():
pg_clipfrac = (pg_losses2 > pg_losses).float().mean()
prob_dist = torch.nn.functional.softmax(logits, dim=-1)
prob_dist = torch.nn.functional.softmax(logits, dim=-1, dtype = torch.float32).to(logits.dtype)
entropy = torch.logsumexp(logits, dim=-1) - torch.sum(prob_dist * logits, dim=-1)
approxkl = 0.5 * (logprobs_diff**2).mean()
approxkl_stats[ppo_epoch_idx, minibatch_idx, gradient_accumulation_idx] = approxkl
@@ -1125,9 +1128,17 @@ class UnslothRLOOTrainer(_UnslothRLOOTrainer):
from unsloth_zoo.vision_utils import UnslothVisionDataCollator
if not isinstance(data_collator, UnslothVisionDataCollator):
if isinstance(data_collator, DataCollatorForSeq2Seq) and 'labels' not in train_dataset.column_names:
data_collator = TransformersDataCollatorForLanguageModeling(__tokenizer, mlm = False, mlm_probability = 0.0)
data_collator = TransformersDataCollatorForLanguageModeling(
__tokenizer,
mlm = False,
mlm_probability = 0.0,
pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None),
)
elif isinstance(data_collator, TransformersDataCollatorForLanguageModeling) and 'labels' in train_dataset.column_names:
data_collator = DataCollatorForSeq2Seq(__tokenizer)
data_collator = DataCollatorForSeq2Seq(
__tokenizer,
pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None),
)
else:
if hasattr(args, 'remove_unused_columns'): args.remove_unused_columns = False
if hasattr(args, 'dataset_text_field'): args.dataset_text_field = ''
@@ -1135,9 +1146,17 @@ class UnslothRLOOTrainer(_UnslothRLOOTrainer):
if not isinstance(data_collator, UnslothVisionDataCollator):
if not hasattr(__tokenizer, 'pad') and hasattr(__tokenizer, 'tokenizer'):
if isinstance(data_collator, DataCollatorForSeq2Seq):
data_collator = DataCollatorForSeq2Seq(__tokenizer.tokenizer)
data_collator = DataCollatorForSeq2Seq(
__tokenizer.tokenizer,
pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None),
)
else:
data_collator = TransformersDataCollatorForLanguageModeling(__tokenizer.tokenizer, mlm = False, mlm_probability = 0.0)
data_collator = TransformersDataCollatorForLanguageModeling(
__tokenizer.tokenizer,
mlm = False,
mlm_probability = 0.0,
pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None),
)
other_metrics = []
from unsloth_zoo.logging_utils import PatchRLStatistics
+31 -9
View File
@@ -1,7 +1,7 @@
"""
2025.8.4
2025.8.5
4.55.1
2025.8.9
2025.8.10
4.55.4
0.21.0
__UNSLOTH_VERSIONING__
"""
@@ -86,6 +86,10 @@ class UnslothRewardConfig(RewardConfig):
default = -1,
metadata = {'help': 'Chunk size to reduce memory usage. -1 is most efficient.'},
)
max_seq_length : Optional[int] = field(
default = None,
metadata = {'help': 'Maximum sequence length to truncate to.'},
)
def __init__(
self,
output_dir = None,
@@ -222,6 +226,7 @@ class UnslothRewardConfig(RewardConfig):
center_rewards_coefficient = None,
vllm_sampling_params = None,
unsloth_num_chunks = -1,
max_seq_length = None,
**kwargs,
):
if learning_rate < 1e-7: raise FloatingPointError(f'Unsloth: Your learning rate of `{learning_rate}` is too small and less than 1e-7! Consider increasing it, otherwise gradient updates will be close to 0!')
@@ -231,7 +236,7 @@ class UnslothRewardConfig(RewardConfig):
save_strategy = 'no'
if dataset_num_proc is None:
from multiprocessing import cpu_count
dataset_num_proc = min(cpu_count()*2, 2)
dataset_num_proc = max(cpu_count()+4, 2)
super().__init__(
output_dir = output_dir,
@@ -368,6 +373,7 @@ class UnslothRewardConfig(RewardConfig):
center_rewards_coefficient = center_rewards_coefficient,**kwargs)
self.vllm_sampling_params = vllm_sampling_params
self.unsloth_num_chunks = unsloth_num_chunks
self.max_seq_length = max_seq_length
pass
class _UnslothRewardTrainer(Trainer):
@@ -746,7 +752,7 @@ class UnslothRewardTrainer(_UnslothRewardTrainer):
print('Unsloth: Switching to float32 training since model cannot work with float16')
force_float32 = True
mixed_precision_dtype = os.environ.get('UNSLOTH_MIXED_PRECISION', 'float32')
dtype = getattr(model.config, 'torch_dtype', None)
dtype = getattr(model.config, 'dtype', None) or getattr(model.config, 'torch_dtype', None)
if dtype is None: dtype = model.get_input_embeddings().dtype
from unsloth_zoo.utils import _get_dtype
dtype = _get_dtype(dtype)
@@ -812,9 +818,17 @@ class UnslothRewardTrainer(_UnslothRewardTrainer):
from unsloth_zoo.vision_utils import UnslothVisionDataCollator
if not isinstance(data_collator, UnslothVisionDataCollator):
if isinstance(data_collator, DataCollatorForSeq2Seq) and 'labels' not in train_dataset.column_names:
data_collator = TransformersDataCollatorForLanguageModeling(__tokenizer, mlm = False, mlm_probability = 0.0)
data_collator = TransformersDataCollatorForLanguageModeling(
__tokenizer,
mlm = False,
mlm_probability = 0.0,
pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None),
)
elif isinstance(data_collator, TransformersDataCollatorForLanguageModeling) and 'labels' in train_dataset.column_names:
data_collator = DataCollatorForSeq2Seq(__tokenizer)
data_collator = DataCollatorForSeq2Seq(
__tokenizer,
pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None),
)
else:
if hasattr(args, 'remove_unused_columns'): args.remove_unused_columns = False
if hasattr(args, 'dataset_text_field'): args.dataset_text_field = ''
@@ -822,9 +836,17 @@ class UnslothRewardTrainer(_UnslothRewardTrainer):
if not isinstance(data_collator, UnslothVisionDataCollator):
if not hasattr(__tokenizer, 'pad') and hasattr(__tokenizer, 'tokenizer'):
if isinstance(data_collator, DataCollatorForSeq2Seq):
data_collator = DataCollatorForSeq2Seq(__tokenizer.tokenizer)
data_collator = DataCollatorForSeq2Seq(
__tokenizer.tokenizer,
pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None),
)
else:
data_collator = TransformersDataCollatorForLanguageModeling(__tokenizer.tokenizer, mlm = False, mlm_probability = 0.0)
data_collator = TransformersDataCollatorForLanguageModeling(
__tokenizer.tokenizer,
mlm = False,
mlm_probability = 0.0,
pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None),
)
other_metrics = []
from unsloth_zoo.logging_utils import PatchRLStatistics
+54 -20
View File
@@ -1,7 +1,7 @@
"""
2025.8.4
2025.8.5
4.55.1
2025.8.9
2025.8.10
4.55.4
0.21.0
__UNSLOTH_VERSIONING__
"""
@@ -10,7 +10,7 @@ import torch
import torch.nn as nn
from torch.nn import functional as F
from typing import Any, List, Optional, Tuple, Union, Dict, Set, Callable
from trl.trainer.sft_trainer import (Any, AutoModelForCausalLM, AutoTokenizer, BaseImageProcessor, Callable, DataCollator, DataCollatorForLanguageModeling, Dataset, EvalPrediction, FeatureExtractionMixin, IterableDataset, Optional, Path, PeftConfig, PeftModel, PreTrainedModel, PreTrainedTokenizerBase, ProcessorMixin, SFTConfig, SFTTrainer, Trainer, TrainerCallback, TrainingArguments, Union, clone_chat_template, contextlib, dataclass, dataclasses, defaultdict, generate_model_card, get_act_offloading_ctx_manager, get_comet_experiment_url, get_peft_model, is_conversational, is_peft_available, is_wandb_available, nn, os, pad, peft, peft_module_casting_to_bf16, prepare_model_for_kbit_training, torch, version, warnings, Callable, DataCollator, DataCollatorForLanguageModeling, Dataset, IterableDataset, Optional, Union, os, pad, Optional, PeftModel, PreTrainedModel, Trainer, is_peft_available, os, peft, torch, os)
from trl.trainer.sft_trainer import (Any, AutoModelForCausalLM, AutoTokenizer, BaseImageProcessor, Callable, DataCollator, DataCollatorForLanguageModeling, Dataset, EvalPrediction, FeatureExtractionMixin, IterableDataset, Optional, Path, PeftConfig, PeftModel, PreTrainedModel, PreTrainedTokenizerBase, ProcessorMixin, SFTConfig, SFTTrainer, Trainer, TrainerCallback, TrainingArguments, Union, clone_chat_template, contextlib, dataclass, dataclasses, defaultdict, generate_model_card, get_act_offloading_ctx_manager, get_comet_experiment_url, get_peft_model, is_conversational, is_peft_available, is_wandb_available, nn, os, pack_dataset, pad, peft, peft_module_casting_to_bf16, prepare_model_for_kbit_training, torch, version, warnings, Callable, DataCollator, DataCollatorForLanguageModeling, Dataset, IterableDataset, Optional, Union, os, pack_dataset, pad, Optional, PeftModel, PreTrainedModel, Trainer, is_peft_available, os, peft, torch, os)
import os
@@ -132,6 +132,10 @@ class UnslothSFTConfig(SFTConfig):
default = -1,
metadata = {'help': 'Chunk size to reduce memory usage. -1 is most efficient.'},
)
max_seq_length : Optional[int] = field(
default = None,
metadata = {'help': 'Maximum sequence length to truncate to.'},
)
def __init__(
self,
output_dir = None,
@@ -280,6 +284,7 @@ class UnslothSFTConfig(SFTConfig):
activation_offloading = False,
vllm_sampling_params = None,
unsloth_num_chunks = -1,
max_seq_length = None,
**kwargs,
):
if learning_rate < 1e-7: raise FloatingPointError(f'Unsloth: Your learning rate of `{learning_rate}` is too small and less than 1e-7! Consider increasing it, otherwise gradient updates will be close to 0!')
@@ -289,7 +294,13 @@ class UnslothSFTConfig(SFTConfig):
save_strategy = 'no'
if dataset_num_proc is None:
from multiprocessing import cpu_count
dataset_num_proc = min(cpu_count()*2, 2)
dataset_num_proc = max(cpu_count()+4, 2)
if os.environ.get('UNSLOTH_ENABLE_FLEX_ATTENTION', '0') == '1':
from unsloth_zoo.flex_attention import HAS_FLEX_ATTENTION
if HAS_FLEX_ATTENTION and pad_to_multiple_of is None:
from unsloth_zoo.flex_attention import FLEX_ATTENTION_BLOCK_SIZE
pad_to_multiple_of = FLEX_ATTENTION_BLOCK_SIZE
super().__init__(
output_dir = output_dir,
@@ -438,6 +449,7 @@ class UnslothSFTConfig(SFTConfig):
activation_offloading = activation_offloading,**kwargs)
self.vllm_sampling_params = vllm_sampling_params
self.unsloth_num_chunks = unsloth_num_chunks
self.max_seq_length = max_seq_length
pass
class _UnslothSFTTrainer(Trainer):
@@ -868,7 +880,11 @@ class _UnslothSFTTrainer(Trainer):
pass
if not isinstance(dataset, IterableDataset):
map_kwargs["num_proc"] = getattr(args, "dataset_num_proc", 2)
dataset_num_proc = getattr(args, "dataset_num_proc", None)
if dataset_num_proc is None:
from multiprocessing import cpu_count
dataset_num_proc = max(cpu_count()+4, 2)
map_kwargs["num_proc"] = dataset_num_proc
else:
map_kwargs["batch_size"] = dataset._ex_iterable.batch_size
@@ -882,18 +898,22 @@ class _UnslothSFTTrainer(Trainer):
pass
pass
if packing:
print("Unsloth: Hugging Face's packing is currently buggy - we're disabling it for now!")
return dataset
# Try using new packing which works in TRL
try:
pack_dataset
except:
print("Unsloth: Hugging Face's packing is currently buggy - we're disabling it for now!")
return dataset
if max_seq_length == 0:
raise ValueError("When packing is enabled, `max_seq_length` can't be `None`.")
if use_desc: map_kwargs["desc"] = f"Unsloth: Packing {dataset_name} dataset"
dataset = dataset.select_columns(used_column_names).map(
pack_examples,
batched = True,
fn_kwargs = {"seq_length": max_seq_length,},
**map_kwargs,
dataset = pack_dataset(
dataset.select_columns(used_column_names),
max_seq_length,
getattr(args, "packing_strategy", "bfd"),
map_kwargs,
)
pass
return dataset
@@ -1101,7 +1121,7 @@ class UnslothSFTTrainer(_UnslothSFTTrainer):
print('Unsloth: Switching to float32 training since model cannot work with float16')
force_float32 = True
mixed_precision_dtype = os.environ.get('UNSLOTH_MIXED_PRECISION', 'float32')
dtype = getattr(model.config, 'torch_dtype', None)
dtype = getattr(model.config, 'dtype', None) or getattr(model.config, 'torch_dtype', None)
if dtype is None: dtype = model.get_input_embeddings().dtype
from unsloth_zoo.utils import _get_dtype
dtype = _get_dtype(dtype)
@@ -1166,9 +1186,7 @@ class UnslothSFTTrainer(_UnslothSFTTrainer):
max_length = args.max_length
else:
model_max_length = getattr(model, 'max_seq_length', None)
# print(model_max_length, 'mml1')
if model_max_length is None: model_max_length = getattr(model, 'max_length', None)
# print(model_max_length, 'mml2')
if model_max_length is not None:
args.max_length = model_max_length
max_length = args.max_length
@@ -1189,9 +1207,17 @@ class UnslothSFTTrainer(_UnslothSFTTrainer):
from unsloth_zoo.vision_utils import UnslothVisionDataCollator
if not isinstance(data_collator, UnslothVisionDataCollator):
if isinstance(data_collator, DataCollatorForSeq2Seq) and 'labels' not in train_dataset.column_names:
data_collator = TransformersDataCollatorForLanguageModeling(__tokenizer, mlm = False, mlm_probability = 0.0)
data_collator = TransformersDataCollatorForLanguageModeling(
__tokenizer,
mlm = False,
mlm_probability = 0.0,
pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None),
)
elif isinstance(data_collator, TransformersDataCollatorForLanguageModeling) and 'labels' in train_dataset.column_names:
data_collator = DataCollatorForSeq2Seq(__tokenizer)
data_collator = DataCollatorForSeq2Seq(
__tokenizer,
pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None),
)
else:
if hasattr(args, 'remove_unused_columns'): args.remove_unused_columns = False
if hasattr(args, 'dataset_text_field'): args.dataset_text_field = ''
@@ -1199,9 +1225,17 @@ class UnslothSFTTrainer(_UnslothSFTTrainer):
if not isinstance(data_collator, UnslothVisionDataCollator):
if not hasattr(__tokenizer, 'pad') and hasattr(__tokenizer, 'tokenizer'):
if isinstance(data_collator, DataCollatorForSeq2Seq):
data_collator = DataCollatorForSeq2Seq(__tokenizer.tokenizer)
data_collator = DataCollatorForSeq2Seq(
__tokenizer.tokenizer,
pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None),
)
else:
data_collator = TransformersDataCollatorForLanguageModeling(__tokenizer.tokenizer, mlm = False, mlm_probability = 0.0)
data_collator = TransformersDataCollatorForLanguageModeling(
__tokenizer.tokenizer,
mlm = False,
mlm_probability = 0.0,
pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None),
)
other_metrics = []
from unsloth_zoo.logging_utils import PatchRLStatistics
+31 -9
View File
@@ -1,7 +1,7 @@
"""
2025.8.4
2025.8.5
4.55.1
2025.8.9
2025.8.10
4.55.4
0.21.0
__UNSLOTH_VERSIONING__
"""
@@ -70,6 +70,10 @@ class UnslothXPOConfig(XPOConfig):
default = -1,
metadata = {'help': 'Chunk size to reduce memory usage. -1 is most efficient.'},
)
max_seq_length : Optional[int] = field(
default = None,
metadata = {'help': 'Maximum sequence length to truncate to.'},
)
def __init__(
self,
output_dir = None,
@@ -216,6 +220,7 @@ class UnslothXPOConfig(XPOConfig):
model_init_kwargs = None,
vllm_sampling_params = None,
unsloth_num_chunks = -1,
max_seq_length = None,
**kwargs,
):
if learning_rate < 1e-7: raise FloatingPointError(f'Unsloth: Your learning rate of `{learning_rate}` is too small and less than 1e-7! Consider increasing it, otherwise gradient updates will be close to 0!')
@@ -225,7 +230,7 @@ class UnslothXPOConfig(XPOConfig):
save_strategy = 'no'
if dataset_num_proc is None:
from multiprocessing import cpu_count
dataset_num_proc = min(cpu_count()*2, 2)
dataset_num_proc = max(cpu_count()+4, 2)
if temperature <= 0:
raise MathError('Unsloth: Please set a positive non-zero temperature since your results will be wrong.')
elif temperature >= 10:
@@ -377,6 +382,7 @@ class UnslothXPOConfig(XPOConfig):
model_init_kwargs = model_init_kwargs,**kwargs)
self.vllm_sampling_params = vllm_sampling_params
self.unsloth_num_chunks = unsloth_num_chunks
self.max_seq_length = max_seq_length
pass
class _UnslothXPOTrainer(OnlineDPOTrainer):
@@ -940,7 +946,7 @@ class UnslothXPOTrainer(_UnslothXPOTrainer):
print('Unsloth: Switching to float32 training since model cannot work with float16')
force_float32 = True
mixed_precision_dtype = os.environ.get('UNSLOTH_MIXED_PRECISION', 'float32')
dtype = getattr(model.config, 'torch_dtype', None)
dtype = getattr(model.config, 'dtype', None) or getattr(model.config, 'torch_dtype', None)
if dtype is None: dtype = model.get_input_embeddings().dtype
from unsloth_zoo.utils import _get_dtype
dtype = _get_dtype(dtype)
@@ -1006,9 +1012,17 @@ class UnslothXPOTrainer(_UnslothXPOTrainer):
from unsloth_zoo.vision_utils import UnslothVisionDataCollator
if not isinstance(data_collator, UnslothVisionDataCollator):
if isinstance(data_collator, DataCollatorForSeq2Seq) and 'labels' not in train_dataset.column_names:
data_collator = TransformersDataCollatorForLanguageModeling(__tokenizer, mlm = False, mlm_probability = 0.0)
data_collator = TransformersDataCollatorForLanguageModeling(
__tokenizer,
mlm = False,
mlm_probability = 0.0,
pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None),
)
elif isinstance(data_collator, TransformersDataCollatorForLanguageModeling) and 'labels' in train_dataset.column_names:
data_collator = DataCollatorForSeq2Seq(__tokenizer)
data_collator = DataCollatorForSeq2Seq(
__tokenizer,
pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None),
)
else:
if hasattr(args, 'remove_unused_columns'): args.remove_unused_columns = False
if hasattr(args, 'dataset_text_field'): args.dataset_text_field = ''
@@ -1016,9 +1030,17 @@ class UnslothXPOTrainer(_UnslothXPOTrainer):
if not isinstance(data_collator, UnslothVisionDataCollator):
if not hasattr(__tokenizer, 'pad') and hasattr(__tokenizer, 'tokenizer'):
if isinstance(data_collator, DataCollatorForSeq2Seq):
data_collator = DataCollatorForSeq2Seq(__tokenizer.tokenizer)
data_collator = DataCollatorForSeq2Seq(
__tokenizer.tokenizer,
pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None),
)
else:
data_collator = TransformersDataCollatorForLanguageModeling(__tokenizer.tokenizer, mlm = False, mlm_probability = 0.0)
data_collator = TransformersDataCollatorForLanguageModeling(
__tokenizer.tokenizer,
mlm = False,
mlm_probability = 0.0,
pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None),
)
other_metrics = []
from unsloth_zoo.logging_utils import PatchRLStatistics