instruct model setup
This commit is contained in:
@@ -1,7 +1,7 @@
|
||||
"""
|
||||
2025.8.4
|
||||
2025.8.5
|
||||
4.55.1
|
||||
2025.8.9
|
||||
2025.8.10
|
||||
4.55.4
|
||||
0.21.0
|
||||
__UNSLOTH_VERSIONING__
|
||||
"""
|
||||
@@ -136,9 +136,10 @@ class UnslothAlignPropConfig(AlignPropConfig):
|
||||
default = -1,
|
||||
metadata = {'help': 'Chunk size to reduce memory usage. -1 is most efficient.'},
|
||||
)
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
exp_name = 'inference',
|
||||
exp_name = 'train',
|
||||
run_name = '',
|
||||
seed = 3407,
|
||||
log_with = None,
|
||||
@@ -169,6 +170,7 @@ class UnslothAlignPropConfig(AlignPropConfig):
|
||||
push_to_hub = False,
|
||||
vllm_sampling_params = None,
|
||||
unsloth_num_chunks = -1,
|
||||
|
||||
**kwargs,
|
||||
):
|
||||
|
||||
@@ -204,6 +206,7 @@ class UnslothAlignPropConfig(AlignPropConfig):
|
||||
push_to_hub = push_to_hub,**kwargs)
|
||||
self.vllm_sampling_params = vllm_sampling_params
|
||||
self.unsloth_num_chunks = unsloth_num_chunks
|
||||
|
||||
pass
|
||||
|
||||
class _UnslothAlignPropTrainer(PyTorchModelHubMixin):
|
||||
@@ -663,3 +666,13 @@ class UnslothAlignPropTrainer(_UnslothAlignPropTrainer):
|
||||
image_samples_hook = image_samples_hook,**kwargs)
|
||||
|
||||
pass
|
||||
|
||||
|
||||
if hasattr(logger, "addFilter"):
|
||||
import logging
|
||||
class HideLoggingMessage(logging.Filter):
|
||||
def __init__(self, text): self.text = text
|
||||
def filter(self, x): return not (self.text in x.getMessage())
|
||||
pass
|
||||
logger.addFilter(HideLoggingMessage("`use_cache=True`"))
|
||||
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
"""
|
||||
2025.8.4
|
||||
2025.8.5
|
||||
4.55.1
|
||||
2025.8.9
|
||||
2025.8.10
|
||||
4.55.4
|
||||
0.21.0
|
||||
__UNSLOTH_VERSIONING__
|
||||
"""
|
||||
@@ -116,6 +116,10 @@ class UnslothBCOConfig(BCOConfig):
|
||||
default = -1,
|
||||
metadata = {'help': 'Chunk size to reduce memory usage. -1 is most efficient.'},
|
||||
)
|
||||
max_seq_length : Optional[int] = field(
|
||||
default = None,
|
||||
metadata = {'help': 'Maximum sequence length to truncate to.'},
|
||||
)
|
||||
def __init__(
|
||||
self,
|
||||
output_dir = None,
|
||||
@@ -265,6 +269,7 @@ class UnslothBCOConfig(BCOConfig):
|
||||
max_density_ratio = 10.0,
|
||||
vllm_sampling_params = None,
|
||||
unsloth_num_chunks = -1,
|
||||
max_seq_length = None,
|
||||
**kwargs,
|
||||
):
|
||||
if learning_rate < 1e-7: raise FloatingPointError(f'Unsloth: Your learning rate of `{learning_rate}` is too small and less than 1e-7! Consider increasing it, otherwise gradient updates will be close to 0!')
|
||||
@@ -274,7 +279,7 @@ class UnslothBCOConfig(BCOConfig):
|
||||
save_strategy = 'no'
|
||||
if dataset_num_proc is None:
|
||||
from multiprocessing import cpu_count
|
||||
dataset_num_proc = min(cpu_count()*2, 2)
|
||||
dataset_num_proc = max(cpu_count()+4, 2)
|
||||
|
||||
super().__init__(
|
||||
output_dir = output_dir,
|
||||
@@ -424,6 +429,7 @@ class UnslothBCOConfig(BCOConfig):
|
||||
max_density_ratio = max_density_ratio,**kwargs)
|
||||
self.vllm_sampling_params = vllm_sampling_params
|
||||
self.unsloth_num_chunks = unsloth_num_chunks
|
||||
self.max_seq_length = max_seq_length
|
||||
pass
|
||||
|
||||
class _UnslothBCOTrainer(Trainer):
|
||||
@@ -1722,7 +1728,7 @@ class UnslothBCOTrainer(_UnslothBCOTrainer):
|
||||
print('Unsloth: Switching to float32 training since model cannot work with float16')
|
||||
force_float32 = True
|
||||
mixed_precision_dtype = os.environ.get('UNSLOTH_MIXED_PRECISION', 'float32')
|
||||
dtype = getattr(model.config, 'torch_dtype', None)
|
||||
dtype = getattr(model.config, 'dtype', None) or getattr(model.config, 'torch_dtype', None)
|
||||
if dtype is None: dtype = model.get_input_embeddings().dtype
|
||||
from unsloth_zoo.utils import _get_dtype
|
||||
dtype = _get_dtype(dtype)
|
||||
@@ -1788,9 +1794,17 @@ class UnslothBCOTrainer(_UnslothBCOTrainer):
|
||||
from unsloth_zoo.vision_utils import UnslothVisionDataCollator
|
||||
if not isinstance(data_collator, UnslothVisionDataCollator):
|
||||
if isinstance(data_collator, DataCollatorForSeq2Seq) and 'labels' not in train_dataset.column_names:
|
||||
data_collator = TransformersDataCollatorForLanguageModeling(__tokenizer, mlm = False, mlm_probability = 0.0)
|
||||
data_collator = TransformersDataCollatorForLanguageModeling(
|
||||
__tokenizer,
|
||||
mlm = False,
|
||||
mlm_probability = 0.0,
|
||||
pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None),
|
||||
)
|
||||
elif isinstance(data_collator, TransformersDataCollatorForLanguageModeling) and 'labels' in train_dataset.column_names:
|
||||
data_collator = DataCollatorForSeq2Seq(__tokenizer)
|
||||
data_collator = DataCollatorForSeq2Seq(
|
||||
__tokenizer,
|
||||
pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None),
|
||||
)
|
||||
else:
|
||||
if hasattr(args, 'remove_unused_columns'): args.remove_unused_columns = False
|
||||
if hasattr(args, 'dataset_text_field'): args.dataset_text_field = ''
|
||||
@@ -1798,9 +1812,17 @@ class UnslothBCOTrainer(_UnslothBCOTrainer):
|
||||
if not isinstance(data_collator, UnslothVisionDataCollator):
|
||||
if not hasattr(__tokenizer, 'pad') and hasattr(__tokenizer, 'tokenizer'):
|
||||
if isinstance(data_collator, DataCollatorForSeq2Seq):
|
||||
data_collator = DataCollatorForSeq2Seq(__tokenizer.tokenizer)
|
||||
data_collator = DataCollatorForSeq2Seq(
|
||||
__tokenizer.tokenizer,
|
||||
pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None),
|
||||
)
|
||||
else:
|
||||
data_collator = TransformersDataCollatorForLanguageModeling(__tokenizer.tokenizer, mlm = False, mlm_probability = 0.0)
|
||||
data_collator = TransformersDataCollatorForLanguageModeling(
|
||||
__tokenizer.tokenizer,
|
||||
mlm = False,
|
||||
mlm_probability = 0.0,
|
||||
pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None),
|
||||
)
|
||||
other_metrics = []
|
||||
|
||||
from unsloth_zoo.logging_utils import PatchRLStatistics
|
||||
@@ -1839,3 +1861,13 @@ class UnslothBCOTrainer(_UnslothBCOTrainer):
|
||||
pass
|
||||
|
||||
pass
|
||||
|
||||
|
||||
if hasattr(logger, "addFilter"):
|
||||
import logging
|
||||
class HideLoggingMessage(logging.Filter):
|
||||
def __init__(self, text): self.text = text
|
||||
def filter(self, x): return not (self.text in x.getMessage())
|
||||
pass
|
||||
logger.addFilter(HideLoggingMessage("`use_cache=True`"))
|
||||
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
"""
|
||||
2025.8.4
|
||||
2025.8.5
|
||||
4.55.1
|
||||
2025.8.9
|
||||
2025.8.10
|
||||
4.55.4
|
||||
0.21.0
|
||||
__UNSLOTH_VERSIONING__
|
||||
"""
|
||||
@@ -119,6 +119,10 @@ class UnslothCPOConfig(CPOConfig):
|
||||
default = -1,
|
||||
metadata = {'help': 'Chunk size to reduce memory usage. -1 is most efficient.'},
|
||||
)
|
||||
max_seq_length : Optional[int] = field(
|
||||
default = None,
|
||||
metadata = {'help': 'Maximum sequence length to truncate to.'},
|
||||
)
|
||||
def __init__(
|
||||
self,
|
||||
output_dir = None,
|
||||
@@ -267,6 +271,7 @@ class UnslothCPOConfig(CPOConfig):
|
||||
dataset_num_proc = None,
|
||||
vllm_sampling_params = None,
|
||||
unsloth_num_chunks = -1,
|
||||
max_seq_length = None,
|
||||
**kwargs,
|
||||
):
|
||||
if learning_rate < 1e-7: raise FloatingPointError(f'Unsloth: Your learning rate of `{learning_rate}` is too small and less than 1e-7! Consider increasing it, otherwise gradient updates will be close to 0!')
|
||||
@@ -276,7 +281,7 @@ class UnslothCPOConfig(CPOConfig):
|
||||
save_strategy = 'no'
|
||||
if dataset_num_proc is None:
|
||||
from multiprocessing import cpu_count
|
||||
dataset_num_proc = min(cpu_count()*2, 2)
|
||||
dataset_num_proc = max(cpu_count()+4, 2)
|
||||
|
||||
super().__init__(
|
||||
output_dir = output_dir,
|
||||
@@ -425,6 +430,7 @@ class UnslothCPOConfig(CPOConfig):
|
||||
dataset_num_proc = dataset_num_proc,**kwargs)
|
||||
self.vllm_sampling_params = vllm_sampling_params
|
||||
self.unsloth_num_chunks = unsloth_num_chunks
|
||||
self.max_seq_length = max_seq_length
|
||||
pass
|
||||
|
||||
class _UnslothCPOTrainer(Trainer):
|
||||
@@ -1498,7 +1504,7 @@ class UnslothCPOTrainer(_UnslothCPOTrainer):
|
||||
print('Unsloth: Switching to float32 training since model cannot work with float16')
|
||||
force_float32 = True
|
||||
mixed_precision_dtype = os.environ.get('UNSLOTH_MIXED_PRECISION', 'float32')
|
||||
dtype = getattr(model.config, 'torch_dtype', None)
|
||||
dtype = getattr(model.config, 'dtype', None) or getattr(model.config, 'torch_dtype', None)
|
||||
if dtype is None: dtype = model.get_input_embeddings().dtype
|
||||
from unsloth_zoo.utils import _get_dtype
|
||||
dtype = _get_dtype(dtype)
|
||||
@@ -1564,9 +1570,17 @@ class UnslothCPOTrainer(_UnslothCPOTrainer):
|
||||
from unsloth_zoo.vision_utils import UnslothVisionDataCollator
|
||||
if not isinstance(data_collator, UnslothVisionDataCollator):
|
||||
if isinstance(data_collator, DataCollatorForSeq2Seq) and 'labels' not in train_dataset.column_names:
|
||||
data_collator = TransformersDataCollatorForLanguageModeling(__tokenizer, mlm = False, mlm_probability = 0.0)
|
||||
data_collator = TransformersDataCollatorForLanguageModeling(
|
||||
__tokenizer,
|
||||
mlm = False,
|
||||
mlm_probability = 0.0,
|
||||
pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None),
|
||||
)
|
||||
elif isinstance(data_collator, TransformersDataCollatorForLanguageModeling) and 'labels' in train_dataset.column_names:
|
||||
data_collator = DataCollatorForSeq2Seq(__tokenizer)
|
||||
data_collator = DataCollatorForSeq2Seq(
|
||||
__tokenizer,
|
||||
pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None),
|
||||
)
|
||||
else:
|
||||
if hasattr(args, 'remove_unused_columns'): args.remove_unused_columns = False
|
||||
if hasattr(args, 'dataset_text_field'): args.dataset_text_field = ''
|
||||
@@ -1574,9 +1588,17 @@ class UnslothCPOTrainer(_UnslothCPOTrainer):
|
||||
if not isinstance(data_collator, UnslothVisionDataCollator):
|
||||
if not hasattr(__tokenizer, 'pad') and hasattr(__tokenizer, 'tokenizer'):
|
||||
if isinstance(data_collator, DataCollatorForSeq2Seq):
|
||||
data_collator = DataCollatorForSeq2Seq(__tokenizer.tokenizer)
|
||||
data_collator = DataCollatorForSeq2Seq(
|
||||
__tokenizer.tokenizer,
|
||||
pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None),
|
||||
)
|
||||
else:
|
||||
data_collator = TransformersDataCollatorForLanguageModeling(__tokenizer.tokenizer, mlm = False, mlm_probability = 0.0)
|
||||
data_collator = TransformersDataCollatorForLanguageModeling(
|
||||
__tokenizer.tokenizer,
|
||||
mlm = False,
|
||||
mlm_probability = 0.0,
|
||||
pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None),
|
||||
)
|
||||
other_metrics = []
|
||||
|
||||
from unsloth_zoo.logging_utils import PatchRLStatistics
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
"""
|
||||
2025.8.4
|
||||
2025.8.5
|
||||
4.55.1
|
||||
2025.8.9
|
||||
2025.8.10
|
||||
4.55.4
|
||||
0.21.0
|
||||
__UNSLOTH_VERSIONING__
|
||||
"""
|
||||
@@ -152,9 +152,10 @@ class UnslothDDPOConfig(DDPOConfig):
|
||||
default = -1,
|
||||
metadata = {'help': 'Chunk size to reduce memory usage. -1 is most efficient.'},
|
||||
)
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
exp_name = 'inference',
|
||||
exp_name = 'train',
|
||||
run_name = '',
|
||||
seed = 3407,
|
||||
log_with = None,
|
||||
@@ -194,6 +195,7 @@ class UnslothDDPOConfig(DDPOConfig):
|
||||
push_to_hub = False,
|
||||
vllm_sampling_params = None,
|
||||
unsloth_num_chunks = -1,
|
||||
|
||||
**kwargs,
|
||||
):
|
||||
|
||||
@@ -238,6 +240,7 @@ class UnslothDDPOConfig(DDPOConfig):
|
||||
push_to_hub = push_to_hub,**kwargs)
|
||||
self.vllm_sampling_params = vllm_sampling_params
|
||||
self.unsloth_num_chunks = unsloth_num_chunks
|
||||
|
||||
pass
|
||||
|
||||
class _UnslothDDPOTrainer(PyTorchModelHubMixin):
|
||||
@@ -899,3 +902,13 @@ class UnslothDDPOTrainer(_UnslothDDPOTrainer):
|
||||
image_samples_hook = image_samples_hook,**kwargs)
|
||||
|
||||
pass
|
||||
|
||||
|
||||
if hasattr(logger, "addFilter"):
|
||||
import logging
|
||||
class HideLoggingMessage(logging.Filter):
|
||||
def __init__(self, text): self.text = text
|
||||
def filter(self, x): return not (self.text in x.getMessage())
|
||||
pass
|
||||
logger.addFilter(HideLoggingMessage("`use_cache=True`"))
|
||||
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
"""
|
||||
2025.8.4
|
||||
2025.8.5
|
||||
4.55.1
|
||||
2025.8.9
|
||||
2025.8.10
|
||||
4.55.4
|
||||
0.21.0
|
||||
__UNSLOTH_VERSIONING__
|
||||
"""
|
||||
@@ -215,6 +215,10 @@ class UnslothDPOConfig(DPOConfig):
|
||||
default = -1,
|
||||
metadata = {'help': 'Chunk size to reduce memory usage. -1 is most efficient.'},
|
||||
)
|
||||
max_seq_length : Optional[int] = field(
|
||||
default = None,
|
||||
metadata = {'help': 'Maximum sequence length to truncate to.'},
|
||||
)
|
||||
def __init__(
|
||||
self,
|
||||
output_dir = None,
|
||||
@@ -380,6 +384,7 @@ class UnslothDPOConfig(DPOConfig):
|
||||
generate_during_eval = False,
|
||||
vllm_sampling_params = None,
|
||||
unsloth_num_chunks = -1,
|
||||
max_seq_length = None,
|
||||
**kwargs,
|
||||
):
|
||||
if learning_rate < 1e-7: raise FloatingPointError(f'Unsloth: Your learning rate of `{learning_rate}` is too small and less than 1e-7! Consider increasing it, otherwise gradient updates will be close to 0!')
|
||||
@@ -389,7 +394,7 @@ class UnslothDPOConfig(DPOConfig):
|
||||
save_strategy = 'no'
|
||||
if dataset_num_proc is None:
|
||||
from multiprocessing import cpu_count
|
||||
dataset_num_proc = min(cpu_count()*2, 2)
|
||||
dataset_num_proc = max(cpu_count()+4, 2)
|
||||
|
||||
super().__init__(
|
||||
output_dir = output_dir,
|
||||
@@ -555,6 +560,7 @@ class UnslothDPOConfig(DPOConfig):
|
||||
generate_during_eval = generate_during_eval,**kwargs)
|
||||
self.vllm_sampling_params = vllm_sampling_params
|
||||
self.unsloth_num_chunks = unsloth_num_chunks
|
||||
self.max_seq_length = max_seq_length
|
||||
pass
|
||||
|
||||
class _UnslothDPOTrainer(Trainer):
|
||||
@@ -2397,7 +2403,7 @@ class UnslothDPOTrainer(_UnslothDPOTrainer):
|
||||
print('Unsloth: Switching to float32 training since model cannot work with float16')
|
||||
force_float32 = True
|
||||
mixed_precision_dtype = os.environ.get('UNSLOTH_MIXED_PRECISION', 'float32')
|
||||
dtype = getattr(model.config, 'torch_dtype', None)
|
||||
dtype = getattr(model.config, 'dtype', None) or getattr(model.config, 'torch_dtype', None)
|
||||
if dtype is None: dtype = model.get_input_embeddings().dtype
|
||||
from unsloth_zoo.utils import _get_dtype
|
||||
dtype = _get_dtype(dtype)
|
||||
@@ -2463,9 +2469,17 @@ class UnslothDPOTrainer(_UnslothDPOTrainer):
|
||||
from unsloth_zoo.vision_utils import UnslothVisionDataCollator
|
||||
if not isinstance(data_collator, UnslothVisionDataCollator):
|
||||
if isinstance(data_collator, DataCollatorForSeq2Seq) and 'labels' not in train_dataset.column_names:
|
||||
data_collator = TransformersDataCollatorForLanguageModeling(__tokenizer, mlm = False, mlm_probability = 0.0)
|
||||
data_collator = TransformersDataCollatorForLanguageModeling(
|
||||
__tokenizer,
|
||||
mlm = False,
|
||||
mlm_probability = 0.0,
|
||||
pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None),
|
||||
)
|
||||
elif isinstance(data_collator, TransformersDataCollatorForLanguageModeling) and 'labels' in train_dataset.column_names:
|
||||
data_collator = DataCollatorForSeq2Seq(__tokenizer)
|
||||
data_collator = DataCollatorForSeq2Seq(
|
||||
__tokenizer,
|
||||
pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None),
|
||||
)
|
||||
else:
|
||||
if hasattr(args, 'remove_unused_columns'): args.remove_unused_columns = False
|
||||
if hasattr(args, 'dataset_text_field'): args.dataset_text_field = ''
|
||||
@@ -2473,9 +2487,17 @@ class UnslothDPOTrainer(_UnslothDPOTrainer):
|
||||
if not isinstance(data_collator, UnslothVisionDataCollator):
|
||||
if not hasattr(__tokenizer, 'pad') and hasattr(__tokenizer, 'tokenizer'):
|
||||
if isinstance(data_collator, DataCollatorForSeq2Seq):
|
||||
data_collator = DataCollatorForSeq2Seq(__tokenizer.tokenizer)
|
||||
data_collator = DataCollatorForSeq2Seq(
|
||||
__tokenizer.tokenizer,
|
||||
pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None),
|
||||
)
|
||||
else:
|
||||
data_collator = TransformersDataCollatorForLanguageModeling(__tokenizer.tokenizer, mlm = False, mlm_probability = 0.0)
|
||||
data_collator = TransformersDataCollatorForLanguageModeling(
|
||||
__tokenizer.tokenizer,
|
||||
mlm = False,
|
||||
mlm_probability = 0.0,
|
||||
pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None),
|
||||
)
|
||||
other_metrics = []
|
||||
|
||||
from unsloth_zoo.logging_utils import PatchRLStatistics
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
"""
|
||||
2025.8.4
|
||||
2025.8.5
|
||||
4.55.1
|
||||
2025.8.9
|
||||
2025.8.10
|
||||
4.55.4
|
||||
0.21.0
|
||||
__UNSLOTH_VERSIONING__
|
||||
"""
|
||||
@@ -89,6 +89,10 @@ class UnslothGKDConfig(GKDConfig):
|
||||
default = -1,
|
||||
metadata = {'help': 'Chunk size to reduce memory usage. -1 is most efficient.'},
|
||||
)
|
||||
max_seq_length : Optional[int] = field(
|
||||
default = None,
|
||||
metadata = {'help': 'Maximum sequence length to truncate to.'},
|
||||
)
|
||||
def __init__(
|
||||
self,
|
||||
output_dir = None,
|
||||
@@ -245,6 +249,7 @@ class UnslothGKDConfig(GKDConfig):
|
||||
seq_kd = False,
|
||||
vllm_sampling_params = None,
|
||||
unsloth_num_chunks = -1,
|
||||
max_seq_length = None,
|
||||
**kwargs,
|
||||
):
|
||||
if learning_rate < 1e-7: raise FloatingPointError(f'Unsloth: Your learning rate of `{learning_rate}` is too small and less than 1e-7! Consider increasing it, otherwise gradient updates will be close to 0!')
|
||||
@@ -254,7 +259,13 @@ class UnslothGKDConfig(GKDConfig):
|
||||
save_strategy = 'no'
|
||||
if dataset_num_proc is None:
|
||||
from multiprocessing import cpu_count
|
||||
dataset_num_proc = min(cpu_count()*2, 2)
|
||||
dataset_num_proc = max(cpu_count()+4, 2)
|
||||
if os.environ.get('UNSLOTH_ENABLE_FLEX_ATTENTION', '0') == '1':
|
||||
from unsloth_zoo.flex_attention import HAS_FLEX_ATTENTION
|
||||
if HAS_FLEX_ATTENTION and pad_to_multiple_of is None:
|
||||
from unsloth_zoo.flex_attention import FLEX_ATTENTION_BLOCK_SIZE
|
||||
pad_to_multiple_of = FLEX_ATTENTION_BLOCK_SIZE
|
||||
|
||||
if temperature <= 0:
|
||||
raise MathError('Unsloth: Please set a positive non-zero temperature since your results will be wrong.')
|
||||
elif temperature >= 10:
|
||||
@@ -416,6 +427,7 @@ class UnslothGKDConfig(GKDConfig):
|
||||
seq_kd = seq_kd,**kwargs)
|
||||
self.vllm_sampling_params = vllm_sampling_params
|
||||
self.unsloth_num_chunks = unsloth_num_chunks
|
||||
self.max_seq_length = max_seq_length
|
||||
pass
|
||||
|
||||
class _UnslothGKDTrainer(SFTTrainer):
|
||||
@@ -759,7 +771,7 @@ class UnslothGKDTrainer(_UnslothGKDTrainer):
|
||||
print('Unsloth: Switching to float32 training since model cannot work with float16')
|
||||
force_float32 = True
|
||||
mixed_precision_dtype = os.environ.get('UNSLOTH_MIXED_PRECISION', 'float32')
|
||||
dtype = getattr(model.config, 'torch_dtype', None)
|
||||
dtype = getattr(model.config, 'dtype', None) or getattr(model.config, 'torch_dtype', None)
|
||||
if dtype is None: dtype = model.get_input_embeddings().dtype
|
||||
from unsloth_zoo.utils import _get_dtype
|
||||
dtype = _get_dtype(dtype)
|
||||
@@ -825,9 +837,17 @@ class UnslothGKDTrainer(_UnslothGKDTrainer):
|
||||
from unsloth_zoo.vision_utils import UnslothVisionDataCollator
|
||||
if not isinstance(data_collator, UnslothVisionDataCollator):
|
||||
if isinstance(data_collator, DataCollatorForSeq2Seq) and 'labels' not in train_dataset.column_names:
|
||||
data_collator = TransformersDataCollatorForLanguageModeling(__tokenizer, mlm = False, mlm_probability = 0.0)
|
||||
data_collator = TransformersDataCollatorForLanguageModeling(
|
||||
__tokenizer,
|
||||
mlm = False,
|
||||
mlm_probability = 0.0,
|
||||
pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None),
|
||||
)
|
||||
elif isinstance(data_collator, TransformersDataCollatorForLanguageModeling) and 'labels' in train_dataset.column_names:
|
||||
data_collator = DataCollatorForSeq2Seq(__tokenizer)
|
||||
data_collator = DataCollatorForSeq2Seq(
|
||||
__tokenizer,
|
||||
pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None),
|
||||
)
|
||||
else:
|
||||
if hasattr(args, 'remove_unused_columns'): args.remove_unused_columns = False
|
||||
if hasattr(args, 'dataset_text_field'): args.dataset_text_field = ''
|
||||
@@ -835,9 +855,17 @@ class UnslothGKDTrainer(_UnslothGKDTrainer):
|
||||
if not isinstance(data_collator, UnslothVisionDataCollator):
|
||||
if not hasattr(__tokenizer, 'pad') and hasattr(__tokenizer, 'tokenizer'):
|
||||
if isinstance(data_collator, DataCollatorForSeq2Seq):
|
||||
data_collator = DataCollatorForSeq2Seq(__tokenizer.tokenizer)
|
||||
data_collator = DataCollatorForSeq2Seq(
|
||||
__tokenizer.tokenizer,
|
||||
pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None),
|
||||
)
|
||||
else:
|
||||
data_collator = TransformersDataCollatorForLanguageModeling(__tokenizer.tokenizer, mlm = False, mlm_probability = 0.0)
|
||||
data_collator = TransformersDataCollatorForLanguageModeling(
|
||||
__tokenizer.tokenizer,
|
||||
mlm = False,
|
||||
mlm_probability = 0.0,
|
||||
pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None),
|
||||
)
|
||||
other_metrics = []
|
||||
|
||||
from unsloth_zoo.logging_utils import PatchRLStatistics
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
"""
|
||||
2025.8.4
|
||||
2025.8.5
|
||||
4.55.1
|
||||
2025.8.9
|
||||
2025.8.10
|
||||
4.55.4
|
||||
0.21.0
|
||||
__UNSLOTH_VERSIONING__
|
||||
"""
|
||||
@@ -727,6 +727,7 @@ class UnslothGRPOConfig(GRPOConfig):
|
||||
default = -1,
|
||||
metadata = {'help': 'Chunk size to reduce memory usage. -1 is most efficient.'},
|
||||
)
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
output_dir = None,
|
||||
@@ -904,6 +905,7 @@ class UnslothGRPOConfig(GRPOConfig):
|
||||
wandb_log_unique_prompts = False,
|
||||
vllm_sampling_params = None,
|
||||
unsloth_num_chunks = -1,
|
||||
|
||||
**kwargs,
|
||||
):
|
||||
if learning_rate < 1e-7: raise FloatingPointError(f'Unsloth: Your learning rate of `{learning_rate}` is too small and less than 1e-7! Consider increasing it, otherwise gradient updates will be close to 0!')
|
||||
@@ -1116,6 +1118,7 @@ class UnslothGRPOConfig(GRPOConfig):
|
||||
wandb_log_unique_prompts = wandb_log_unique_prompts,**kwargs)
|
||||
self.vllm_sampling_params = vllm_sampling_params
|
||||
self.unsloth_num_chunks = unsloth_num_chunks
|
||||
|
||||
pass
|
||||
|
||||
class _UnslothGRPOTrainer(Trainer):
|
||||
@@ -2814,7 +2817,7 @@ class UnslothGRPOTrainer(_UnslothGRPOTrainer):
|
||||
print('Unsloth: Switching to float32 training since model cannot work with float16')
|
||||
force_float32 = True
|
||||
mixed_precision_dtype = os.environ.get('UNSLOTH_MIXED_PRECISION', 'float32')
|
||||
dtype = getattr(model.config, 'torch_dtype', None)
|
||||
dtype = getattr(model.config, 'dtype', None) or getattr(model.config, 'torch_dtype', None)
|
||||
if dtype is None: dtype = model.get_input_embeddings().dtype
|
||||
from unsloth_zoo.utils import _get_dtype
|
||||
dtype = _get_dtype(dtype)
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
"""
|
||||
2025.8.4
|
||||
2025.8.5
|
||||
4.55.1
|
||||
2025.8.9
|
||||
2025.8.10
|
||||
4.55.4
|
||||
0.21.0
|
||||
__UNSLOTH_VERSIONING__
|
||||
"""
|
||||
@@ -87,6 +87,10 @@ class UnslothIterativeSFTConfig(IterativeSFTConfig):
|
||||
default = -1,
|
||||
metadata = {'help': 'Chunk size to reduce memory usage. -1 is most efficient.'},
|
||||
)
|
||||
max_seq_length : Optional[int] = field(
|
||||
default = None,
|
||||
metadata = {'help': 'Maximum sequence length to truncate to.'},
|
||||
)
|
||||
def __init__(
|
||||
self,
|
||||
output_dir = None,
|
||||
@@ -223,6 +227,7 @@ class UnslothIterativeSFTConfig(IterativeSFTConfig):
|
||||
optimize_device_cache = False,
|
||||
vllm_sampling_params = None,
|
||||
unsloth_num_chunks = -1,
|
||||
max_seq_length = None,
|
||||
**kwargs,
|
||||
):
|
||||
if learning_rate < 1e-7: raise FloatingPointError(f'Unsloth: Your learning rate of `{learning_rate}` is too small and less than 1e-7! Consider increasing it, otherwise gradient updates will be close to 0!')
|
||||
@@ -366,6 +371,7 @@ class UnslothIterativeSFTConfig(IterativeSFTConfig):
|
||||
optimize_device_cache = optimize_device_cache,**kwargs)
|
||||
self.vllm_sampling_params = vllm_sampling_params
|
||||
self.unsloth_num_chunks = unsloth_num_chunks
|
||||
self.max_seq_length = max_seq_length
|
||||
pass
|
||||
|
||||
class _UnslothIterativeSFTTrainer(Trainer):
|
||||
@@ -825,7 +831,7 @@ class UnslothIterativeSFTTrainer(_UnslothIterativeSFTTrainer):
|
||||
print('Unsloth: Switching to float32 training since model cannot work with float16')
|
||||
force_float32 = True
|
||||
mixed_precision_dtype = os.environ.get('UNSLOTH_MIXED_PRECISION', 'float32')
|
||||
dtype = getattr(model.config, 'torch_dtype', None)
|
||||
dtype = getattr(model.config, 'dtype', None) or getattr(model.config, 'torch_dtype', None)
|
||||
if dtype is None: dtype = model.get_input_embeddings().dtype
|
||||
from unsloth_zoo.utils import _get_dtype
|
||||
dtype = _get_dtype(dtype)
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
"""
|
||||
2025.8.4
|
||||
2025.8.5
|
||||
4.55.1
|
||||
2025.8.9
|
||||
2025.8.10
|
||||
4.55.4
|
||||
0.21.0
|
||||
__UNSLOTH_VERSIONING__
|
||||
"""
|
||||
@@ -126,6 +126,10 @@ class UnslothKTOConfig(KTOConfig):
|
||||
default = -1,
|
||||
metadata = {'help': 'Chunk size to reduce memory usage. -1 is most efficient.'},
|
||||
)
|
||||
max_seq_length : Optional[int] = field(
|
||||
default = None,
|
||||
metadata = {'help': 'Maximum sequence length to truncate to.'},
|
||||
)
|
||||
def __init__(
|
||||
self,
|
||||
output_dir = None,
|
||||
@@ -277,6 +281,7 @@ class UnslothKTOConfig(KTOConfig):
|
||||
base_model_attribute_name = 'model',
|
||||
vllm_sampling_params = None,
|
||||
unsloth_num_chunks = -1,
|
||||
max_seq_length = None,
|
||||
**kwargs,
|
||||
):
|
||||
if learning_rate < 1e-7: raise FloatingPointError(f'Unsloth: Your learning rate of `{learning_rate}` is too small and less than 1e-7! Consider increasing it, otherwise gradient updates will be close to 0!')
|
||||
@@ -286,7 +291,7 @@ class UnslothKTOConfig(KTOConfig):
|
||||
save_strategy = 'no'
|
||||
if dataset_num_proc is None:
|
||||
from multiprocessing import cpu_count
|
||||
dataset_num_proc = min(cpu_count()*2, 2)
|
||||
dataset_num_proc = max(cpu_count()+4, 2)
|
||||
|
||||
super().__init__(
|
||||
output_dir = output_dir,
|
||||
@@ -438,6 +443,7 @@ class UnslothKTOConfig(KTOConfig):
|
||||
base_model_attribute_name = base_model_attribute_name,**kwargs)
|
||||
self.vllm_sampling_params = vllm_sampling_params
|
||||
self.unsloth_num_chunks = unsloth_num_chunks
|
||||
self.max_seq_length = max_seq_length
|
||||
pass
|
||||
|
||||
class _UnslothKTOTrainer(Trainer):
|
||||
@@ -1934,7 +1940,7 @@ class UnslothKTOTrainer(_UnslothKTOTrainer):
|
||||
print('Unsloth: Switching to float32 training since model cannot work with float16')
|
||||
force_float32 = True
|
||||
mixed_precision_dtype = os.environ.get('UNSLOTH_MIXED_PRECISION', 'float32')
|
||||
dtype = getattr(model.config, 'torch_dtype', None)
|
||||
dtype = getattr(model.config, 'dtype', None) or getattr(model.config, 'torch_dtype', None)
|
||||
if dtype is None: dtype = model.get_input_embeddings().dtype
|
||||
from unsloth_zoo.utils import _get_dtype
|
||||
dtype = _get_dtype(dtype)
|
||||
@@ -2000,9 +2006,17 @@ class UnslothKTOTrainer(_UnslothKTOTrainer):
|
||||
from unsloth_zoo.vision_utils import UnslothVisionDataCollator
|
||||
if not isinstance(data_collator, UnslothVisionDataCollator):
|
||||
if isinstance(data_collator, DataCollatorForSeq2Seq) and 'labels' not in train_dataset.column_names:
|
||||
data_collator = TransformersDataCollatorForLanguageModeling(__tokenizer, mlm = False, mlm_probability = 0.0)
|
||||
data_collator = TransformersDataCollatorForLanguageModeling(
|
||||
__tokenizer,
|
||||
mlm = False,
|
||||
mlm_probability = 0.0,
|
||||
pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None),
|
||||
)
|
||||
elif isinstance(data_collator, TransformersDataCollatorForLanguageModeling) and 'labels' in train_dataset.column_names:
|
||||
data_collator = DataCollatorForSeq2Seq(__tokenizer)
|
||||
data_collator = DataCollatorForSeq2Seq(
|
||||
__tokenizer,
|
||||
pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None),
|
||||
)
|
||||
else:
|
||||
if hasattr(args, 'remove_unused_columns'): args.remove_unused_columns = False
|
||||
if hasattr(args, 'dataset_text_field'): args.dataset_text_field = ''
|
||||
@@ -2010,9 +2024,17 @@ class UnslothKTOTrainer(_UnslothKTOTrainer):
|
||||
if not isinstance(data_collator, UnslothVisionDataCollator):
|
||||
if not hasattr(__tokenizer, 'pad') and hasattr(__tokenizer, 'tokenizer'):
|
||||
if isinstance(data_collator, DataCollatorForSeq2Seq):
|
||||
data_collator = DataCollatorForSeq2Seq(__tokenizer.tokenizer)
|
||||
data_collator = DataCollatorForSeq2Seq(
|
||||
__tokenizer.tokenizer,
|
||||
pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None),
|
||||
)
|
||||
else:
|
||||
data_collator = TransformersDataCollatorForLanguageModeling(__tokenizer.tokenizer, mlm = False, mlm_probability = 0.0)
|
||||
data_collator = TransformersDataCollatorForLanguageModeling(
|
||||
__tokenizer.tokenizer,
|
||||
mlm = False,
|
||||
mlm_probability = 0.0,
|
||||
pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None),
|
||||
)
|
||||
other_metrics = []
|
||||
|
||||
from unsloth_zoo.logging_utils import PatchRLStatistics
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
"""
|
||||
2025.8.4
|
||||
2025.8.5
|
||||
4.55.1
|
||||
2025.8.9
|
||||
2025.8.10
|
||||
4.55.4
|
||||
0.21.0
|
||||
__UNSLOTH_VERSIONING__
|
||||
"""
|
||||
@@ -71,6 +71,10 @@ class UnslothNashMDConfig(NashMDConfig):
|
||||
default = -1,
|
||||
metadata = {'help': 'Chunk size to reduce memory usage. -1 is most efficient.'},
|
||||
)
|
||||
max_seq_length : Optional[int] = field(
|
||||
default = None,
|
||||
metadata = {'help': 'Maximum sequence length to truncate to.'},
|
||||
)
|
||||
def __init__(
|
||||
self,
|
||||
output_dir = None,
|
||||
@@ -217,6 +221,7 @@ class UnslothNashMDConfig(NashMDConfig):
|
||||
model_init_kwargs = None,
|
||||
vllm_sampling_params = None,
|
||||
unsloth_num_chunks = -1,
|
||||
max_seq_length = None,
|
||||
**kwargs,
|
||||
):
|
||||
if learning_rate < 1e-7: raise FloatingPointError(f'Unsloth: Your learning rate of `{learning_rate}` is too small and less than 1e-7! Consider increasing it, otherwise gradient updates will be close to 0!')
|
||||
@@ -226,7 +231,7 @@ class UnslothNashMDConfig(NashMDConfig):
|
||||
save_strategy = 'no'
|
||||
if dataset_num_proc is None:
|
||||
from multiprocessing import cpu_count
|
||||
dataset_num_proc = min(cpu_count()*2, 2)
|
||||
dataset_num_proc = max(cpu_count()+4, 2)
|
||||
if temperature <= 0:
|
||||
raise MathError('Unsloth: Please set a positive non-zero temperature since your results will be wrong.')
|
||||
elif temperature >= 10:
|
||||
@@ -378,6 +383,7 @@ class UnslothNashMDConfig(NashMDConfig):
|
||||
model_init_kwargs = model_init_kwargs,**kwargs)
|
||||
self.vllm_sampling_params = vllm_sampling_params
|
||||
self.unsloth_num_chunks = unsloth_num_chunks
|
||||
self.max_seq_length = max_seq_length
|
||||
pass
|
||||
|
||||
class _UnslothNashMDTrainer(OnlineDPOTrainer):
|
||||
@@ -897,7 +903,7 @@ class UnslothNashMDTrainer(_UnslothNashMDTrainer):
|
||||
print('Unsloth: Switching to float32 training since model cannot work with float16')
|
||||
force_float32 = True
|
||||
mixed_precision_dtype = os.environ.get('UNSLOTH_MIXED_PRECISION', 'float32')
|
||||
dtype = getattr(model.config, 'torch_dtype', None)
|
||||
dtype = getattr(model.config, 'dtype', None) or getattr(model.config, 'torch_dtype', None)
|
||||
if dtype is None: dtype = model.get_input_embeddings().dtype
|
||||
from unsloth_zoo.utils import _get_dtype
|
||||
dtype = _get_dtype(dtype)
|
||||
@@ -963,9 +969,17 @@ class UnslothNashMDTrainer(_UnslothNashMDTrainer):
|
||||
from unsloth_zoo.vision_utils import UnslothVisionDataCollator
|
||||
if not isinstance(data_collator, UnslothVisionDataCollator):
|
||||
if isinstance(data_collator, DataCollatorForSeq2Seq) and 'labels' not in train_dataset.column_names:
|
||||
data_collator = TransformersDataCollatorForLanguageModeling(__tokenizer, mlm = False, mlm_probability = 0.0)
|
||||
data_collator = TransformersDataCollatorForLanguageModeling(
|
||||
__tokenizer,
|
||||
mlm = False,
|
||||
mlm_probability = 0.0,
|
||||
pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None),
|
||||
)
|
||||
elif isinstance(data_collator, TransformersDataCollatorForLanguageModeling) and 'labels' in train_dataset.column_names:
|
||||
data_collator = DataCollatorForSeq2Seq(__tokenizer)
|
||||
data_collator = DataCollatorForSeq2Seq(
|
||||
__tokenizer,
|
||||
pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None),
|
||||
)
|
||||
else:
|
||||
if hasattr(args, 'remove_unused_columns'): args.remove_unused_columns = False
|
||||
if hasattr(args, 'dataset_text_field'): args.dataset_text_field = ''
|
||||
@@ -973,9 +987,17 @@ class UnslothNashMDTrainer(_UnslothNashMDTrainer):
|
||||
if not isinstance(data_collator, UnslothVisionDataCollator):
|
||||
if not hasattr(__tokenizer, 'pad') and hasattr(__tokenizer, 'tokenizer'):
|
||||
if isinstance(data_collator, DataCollatorForSeq2Seq):
|
||||
data_collator = DataCollatorForSeq2Seq(__tokenizer.tokenizer)
|
||||
data_collator = DataCollatorForSeq2Seq(
|
||||
__tokenizer.tokenizer,
|
||||
pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None),
|
||||
)
|
||||
else:
|
||||
data_collator = TransformersDataCollatorForLanguageModeling(__tokenizer.tokenizer, mlm = False, mlm_probability = 0.0)
|
||||
data_collator = TransformersDataCollatorForLanguageModeling(
|
||||
__tokenizer.tokenizer,
|
||||
mlm = False,
|
||||
mlm_probability = 0.0,
|
||||
pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None),
|
||||
)
|
||||
other_metrics = []
|
||||
|
||||
from unsloth_zoo.logging_utils import PatchRLStatistics
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
"""
|
||||
2025.8.4
|
||||
2025.8.5
|
||||
4.55.1
|
||||
2025.8.9
|
||||
2025.8.10
|
||||
4.55.4
|
||||
0.21.0
|
||||
__UNSLOTH_VERSIONING__
|
||||
"""
|
||||
@@ -104,6 +104,10 @@ class UnslothORPOConfig(ORPOConfig):
|
||||
default = -1,
|
||||
metadata = {'help': 'Chunk size to reduce memory usage. -1 is most efficient.'},
|
||||
)
|
||||
max_seq_length : Optional[int] = field(
|
||||
default = None,
|
||||
metadata = {'help': 'Maximum sequence length to truncate to.'},
|
||||
)
|
||||
def __init__(
|
||||
self,
|
||||
output_dir = None,
|
||||
@@ -248,6 +252,7 @@ class UnslothORPOConfig(ORPOConfig):
|
||||
dataset_num_proc = None,
|
||||
vllm_sampling_params = None,
|
||||
unsloth_num_chunks = -1,
|
||||
max_seq_length = None,
|
||||
**kwargs,
|
||||
):
|
||||
if learning_rate < 1e-7: raise FloatingPointError(f'Unsloth: Your learning rate of `{learning_rate}` is too small and less than 1e-7! Consider increasing it, otherwise gradient updates will be close to 0!')
|
||||
@@ -257,7 +262,7 @@ class UnslothORPOConfig(ORPOConfig):
|
||||
save_strategy = 'no'
|
||||
if dataset_num_proc is None:
|
||||
from multiprocessing import cpu_count
|
||||
dataset_num_proc = min(cpu_count()*2, 2)
|
||||
dataset_num_proc = max(cpu_count()+4, 2)
|
||||
|
||||
super().__init__(
|
||||
output_dir = output_dir,
|
||||
@@ -402,6 +407,7 @@ class UnslothORPOConfig(ORPOConfig):
|
||||
dataset_num_proc = dataset_num_proc,**kwargs)
|
||||
self.vllm_sampling_params = vllm_sampling_params
|
||||
self.unsloth_num_chunks = unsloth_num_chunks
|
||||
self.max_seq_length = max_seq_length
|
||||
pass
|
||||
|
||||
class _UnslothORPOTrainer(Trainer):
|
||||
@@ -1454,7 +1460,7 @@ class UnslothORPOTrainer(_UnslothORPOTrainer):
|
||||
print('Unsloth: Switching to float32 training since model cannot work with float16')
|
||||
force_float32 = True
|
||||
mixed_precision_dtype = os.environ.get('UNSLOTH_MIXED_PRECISION', 'float32')
|
||||
dtype = getattr(model.config, 'torch_dtype', None)
|
||||
dtype = getattr(model.config, 'dtype', None) or getattr(model.config, 'torch_dtype', None)
|
||||
if dtype is None: dtype = model.get_input_embeddings().dtype
|
||||
from unsloth_zoo.utils import _get_dtype
|
||||
dtype = _get_dtype(dtype)
|
||||
@@ -1520,9 +1526,17 @@ class UnslothORPOTrainer(_UnslothORPOTrainer):
|
||||
from unsloth_zoo.vision_utils import UnslothVisionDataCollator
|
||||
if not isinstance(data_collator, UnslothVisionDataCollator):
|
||||
if isinstance(data_collator, DataCollatorForSeq2Seq) and 'labels' not in train_dataset.column_names:
|
||||
data_collator = TransformersDataCollatorForLanguageModeling(__tokenizer, mlm = False, mlm_probability = 0.0)
|
||||
data_collator = TransformersDataCollatorForLanguageModeling(
|
||||
__tokenizer,
|
||||
mlm = False,
|
||||
mlm_probability = 0.0,
|
||||
pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None),
|
||||
)
|
||||
elif isinstance(data_collator, TransformersDataCollatorForLanguageModeling) and 'labels' in train_dataset.column_names:
|
||||
data_collator = DataCollatorForSeq2Seq(__tokenizer)
|
||||
data_collator = DataCollatorForSeq2Seq(
|
||||
__tokenizer,
|
||||
pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None),
|
||||
)
|
||||
else:
|
||||
if hasattr(args, 'remove_unused_columns'): args.remove_unused_columns = False
|
||||
if hasattr(args, 'dataset_text_field'): args.dataset_text_field = ''
|
||||
@@ -1530,9 +1544,17 @@ class UnslothORPOTrainer(_UnslothORPOTrainer):
|
||||
if not isinstance(data_collator, UnslothVisionDataCollator):
|
||||
if not hasattr(__tokenizer, 'pad') and hasattr(__tokenizer, 'tokenizer'):
|
||||
if isinstance(data_collator, DataCollatorForSeq2Seq):
|
||||
data_collator = DataCollatorForSeq2Seq(__tokenizer.tokenizer)
|
||||
data_collator = DataCollatorForSeq2Seq(
|
||||
__tokenizer.tokenizer,
|
||||
pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None),
|
||||
)
|
||||
else:
|
||||
data_collator = TransformersDataCollatorForLanguageModeling(__tokenizer.tokenizer, mlm = False, mlm_probability = 0.0)
|
||||
data_collator = TransformersDataCollatorForLanguageModeling(
|
||||
__tokenizer.tokenizer,
|
||||
mlm = False,
|
||||
mlm_probability = 0.0,
|
||||
pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None),
|
||||
)
|
||||
other_metrics = []
|
||||
|
||||
from unsloth_zoo.logging_utils import PatchRLStatistics
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
"""
|
||||
2025.8.4
|
||||
2025.8.5
|
||||
4.55.1
|
||||
2025.8.9
|
||||
2025.8.10
|
||||
4.55.4
|
||||
0.21.0
|
||||
__UNSLOTH_VERSIONING__
|
||||
"""
|
||||
@@ -124,6 +124,10 @@ class UnslothOnlineDPOConfig(OnlineDPOConfig):
|
||||
default = -1,
|
||||
metadata = {'help': 'Chunk size to reduce memory usage. -1 is most efficient.'},
|
||||
)
|
||||
max_seq_length : Optional[int] = field(
|
||||
default = None,
|
||||
metadata = {'help': 'Maximum sequence length to truncate to.'},
|
||||
)
|
||||
def __init__(
|
||||
self,
|
||||
output_dir = None,
|
||||
@@ -270,6 +274,7 @@ class UnslothOnlineDPOConfig(OnlineDPOConfig):
|
||||
model_init_kwargs = None,
|
||||
vllm_sampling_params = None,
|
||||
unsloth_num_chunks = -1,
|
||||
max_seq_length = None,
|
||||
**kwargs,
|
||||
):
|
||||
if learning_rate < 1e-7: raise FloatingPointError(f'Unsloth: Your learning rate of `{learning_rate}` is too small and less than 1e-7! Consider increasing it, otherwise gradient updates will be close to 0!')
|
||||
@@ -279,7 +284,7 @@ class UnslothOnlineDPOConfig(OnlineDPOConfig):
|
||||
save_strategy = 'no'
|
||||
if dataset_num_proc is None:
|
||||
from multiprocessing import cpu_count
|
||||
dataset_num_proc = min(cpu_count()*2, 2)
|
||||
dataset_num_proc = max(cpu_count()+4, 2)
|
||||
if temperature <= 0:
|
||||
raise MathError('Unsloth: Please set a positive non-zero temperature since your results will be wrong.')
|
||||
elif temperature >= 10:
|
||||
@@ -431,6 +436,7 @@ class UnslothOnlineDPOConfig(OnlineDPOConfig):
|
||||
model_init_kwargs = model_init_kwargs,**kwargs)
|
||||
self.vllm_sampling_params = vllm_sampling_params
|
||||
self.unsloth_num_chunks = unsloth_num_chunks
|
||||
self.max_seq_length = max_seq_length
|
||||
pass
|
||||
|
||||
class _UnslothOnlineDPOTrainer(Trainer):
|
||||
@@ -1204,7 +1210,7 @@ class UnslothOnlineDPOTrainer(_UnslothOnlineDPOTrainer):
|
||||
print('Unsloth: Switching to float32 training since model cannot work with float16')
|
||||
force_float32 = True
|
||||
mixed_precision_dtype = os.environ.get('UNSLOTH_MIXED_PRECISION', 'float32')
|
||||
dtype = getattr(model.config, 'torch_dtype', None)
|
||||
dtype = getattr(model.config, 'dtype', None) or getattr(model.config, 'torch_dtype', None)
|
||||
if dtype is None: dtype = model.get_input_embeddings().dtype
|
||||
from unsloth_zoo.utils import _get_dtype
|
||||
dtype = _get_dtype(dtype)
|
||||
@@ -1270,9 +1276,17 @@ class UnslothOnlineDPOTrainer(_UnslothOnlineDPOTrainer):
|
||||
from unsloth_zoo.vision_utils import UnslothVisionDataCollator
|
||||
if not isinstance(data_collator, UnslothVisionDataCollator):
|
||||
if isinstance(data_collator, DataCollatorForSeq2Seq) and 'labels' not in train_dataset.column_names:
|
||||
data_collator = TransformersDataCollatorForLanguageModeling(__tokenizer, mlm = False, mlm_probability = 0.0)
|
||||
data_collator = TransformersDataCollatorForLanguageModeling(
|
||||
__tokenizer,
|
||||
mlm = False,
|
||||
mlm_probability = 0.0,
|
||||
pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None),
|
||||
)
|
||||
elif isinstance(data_collator, TransformersDataCollatorForLanguageModeling) and 'labels' in train_dataset.column_names:
|
||||
data_collator = DataCollatorForSeq2Seq(__tokenizer)
|
||||
data_collator = DataCollatorForSeq2Seq(
|
||||
__tokenizer,
|
||||
pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None),
|
||||
)
|
||||
else:
|
||||
if hasattr(args, 'remove_unused_columns'): args.remove_unused_columns = False
|
||||
if hasattr(args, 'dataset_text_field'): args.dataset_text_field = ''
|
||||
@@ -1280,9 +1294,17 @@ class UnslothOnlineDPOTrainer(_UnslothOnlineDPOTrainer):
|
||||
if not isinstance(data_collator, UnslothVisionDataCollator):
|
||||
if not hasattr(__tokenizer, 'pad') and hasattr(__tokenizer, 'tokenizer'):
|
||||
if isinstance(data_collator, DataCollatorForSeq2Seq):
|
||||
data_collator = DataCollatorForSeq2Seq(__tokenizer.tokenizer)
|
||||
data_collator = DataCollatorForSeq2Seq(
|
||||
__tokenizer.tokenizer,
|
||||
pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None),
|
||||
)
|
||||
else:
|
||||
data_collator = TransformersDataCollatorForLanguageModeling(__tokenizer.tokenizer, mlm = False, mlm_probability = 0.0)
|
||||
data_collator = TransformersDataCollatorForLanguageModeling(
|
||||
__tokenizer.tokenizer,
|
||||
mlm = False,
|
||||
mlm_probability = 0.0,
|
||||
pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None),
|
||||
)
|
||||
other_metrics = []
|
||||
|
||||
from unsloth_zoo.logging_utils import PatchRLStatistics
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
"""
|
||||
2025.8.4
|
||||
2025.8.5
|
||||
4.55.1
|
||||
2025.8.9
|
||||
2025.8.10
|
||||
4.55.4
|
||||
0.21.0
|
||||
__UNSLOTH_VERSIONING__
|
||||
"""
|
||||
@@ -106,6 +106,7 @@ class UnslothPPOConfig(PPOConfig):
|
||||
default = -1,
|
||||
metadata = {'help': 'Chunk size to reduce memory usage. -1 is most efficient.'},
|
||||
)
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
output_dir = None,
|
||||
@@ -270,6 +271,7 @@ class UnslothPPOConfig(PPOConfig):
|
||||
ds3_gather_for_generation = True,
|
||||
vllm_sampling_params = None,
|
||||
unsloth_num_chunks = -1,
|
||||
|
||||
**kwargs,
|
||||
):
|
||||
if learning_rate < 1e-7: raise FloatingPointError(f'Unsloth: Your learning rate of `{learning_rate}` is too small and less than 1e-7! Consider increasing it, otherwise gradient updates will be close to 0!')
|
||||
@@ -279,7 +281,7 @@ class UnslothPPOConfig(PPOConfig):
|
||||
save_strategy = 'no'
|
||||
if dataset_num_proc is None:
|
||||
from multiprocessing import cpu_count
|
||||
dataset_num_proc = min(cpu_count()*2, 2)
|
||||
dataset_num_proc = max(cpu_count()+4, 2)
|
||||
if temperature <= 0:
|
||||
raise MathError('Unsloth: Please set a positive non-zero temperature since your results will be wrong.')
|
||||
elif temperature >= 10:
|
||||
@@ -449,6 +451,7 @@ class UnslothPPOConfig(PPOConfig):
|
||||
ds3_gather_for_generation = ds3_gather_for_generation,**kwargs)
|
||||
self.vllm_sampling_params = vllm_sampling_params
|
||||
self.unsloth_num_chunks = unsloth_num_chunks
|
||||
|
||||
pass
|
||||
|
||||
class _UnslothPPOTrainer(Trainer):
|
||||
@@ -944,7 +947,7 @@ class _UnslothPPOTrainer(Trainer):
|
||||
pg_clipfrac = masked_mean(
|
||||
(pg_losses2 > pg_losses).float(), ~padding_mask[micro_batch_inds]
|
||||
)
|
||||
prob_dist = torch.nn.functional.softmax(logits, dim=-1)
|
||||
prob_dist = torch.nn.functional.softmax(logits, dim=-1, dtype = torch.float32).to(logits.dtype)
|
||||
entropy = torch.logsumexp(logits, dim=-1) - torch.sum(prob_dist * logits, dim=-1)
|
||||
approxkl = 0.5 * (logprobs_diff**2).mean()
|
||||
approxkl_stats[ppo_epoch_idx, minibatch_idx, gradient_accumulation_idx] = approxkl
|
||||
@@ -1202,7 +1205,7 @@ class UnslothPPOTrainer(_UnslothPPOTrainer):
|
||||
print('Unsloth: Switching to float32 training since model cannot work with float16')
|
||||
force_float32 = True
|
||||
mixed_precision_dtype = os.environ.get('UNSLOTH_MIXED_PRECISION', 'float32')
|
||||
dtype = getattr(model.config, 'torch_dtype', None)
|
||||
dtype = getattr(model.config, 'dtype', None) or getattr(model.config, 'torch_dtype', None)
|
||||
if dtype is None: dtype = model.get_input_embeddings().dtype
|
||||
from unsloth_zoo.utils import _get_dtype
|
||||
dtype = _get_dtype(dtype)
|
||||
@@ -1268,9 +1271,17 @@ class UnslothPPOTrainer(_UnslothPPOTrainer):
|
||||
from unsloth_zoo.vision_utils import UnslothVisionDataCollator
|
||||
if not isinstance(data_collator, UnslothVisionDataCollator):
|
||||
if isinstance(data_collator, DataCollatorForSeq2Seq) and 'labels' not in train_dataset.column_names:
|
||||
data_collator = TransformersDataCollatorForLanguageModeling(__tokenizer, mlm = False, mlm_probability = 0.0)
|
||||
data_collator = TransformersDataCollatorForLanguageModeling(
|
||||
__tokenizer,
|
||||
mlm = False,
|
||||
mlm_probability = 0.0,
|
||||
pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None),
|
||||
)
|
||||
elif isinstance(data_collator, TransformersDataCollatorForLanguageModeling) and 'labels' in train_dataset.column_names:
|
||||
data_collator = DataCollatorForSeq2Seq(__tokenizer)
|
||||
data_collator = DataCollatorForSeq2Seq(
|
||||
__tokenizer,
|
||||
pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None),
|
||||
)
|
||||
else:
|
||||
if hasattr(args, 'remove_unused_columns'): args.remove_unused_columns = False
|
||||
if hasattr(args, 'dataset_text_field'): args.dataset_text_field = ''
|
||||
@@ -1278,9 +1289,17 @@ class UnslothPPOTrainer(_UnslothPPOTrainer):
|
||||
if not isinstance(data_collator, UnslothVisionDataCollator):
|
||||
if not hasattr(__tokenizer, 'pad') and hasattr(__tokenizer, 'tokenizer'):
|
||||
if isinstance(data_collator, DataCollatorForSeq2Seq):
|
||||
data_collator = DataCollatorForSeq2Seq(__tokenizer.tokenizer)
|
||||
data_collator = DataCollatorForSeq2Seq(
|
||||
__tokenizer.tokenizer,
|
||||
pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None),
|
||||
)
|
||||
else:
|
||||
data_collator = TransformersDataCollatorForLanguageModeling(__tokenizer.tokenizer, mlm = False, mlm_probability = 0.0)
|
||||
data_collator = TransformersDataCollatorForLanguageModeling(
|
||||
__tokenizer.tokenizer,
|
||||
mlm = False,
|
||||
mlm_probability = 0.0,
|
||||
pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None),
|
||||
)
|
||||
other_metrics = []
|
||||
|
||||
from unsloth_zoo.logging_utils import PatchRLStatistics
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
"""
|
||||
2025.8.4
|
||||
2025.8.5
|
||||
4.55.1
|
||||
2025.8.9
|
||||
2025.8.10
|
||||
4.55.4
|
||||
0.21.0
|
||||
__UNSLOTH_VERSIONING__
|
||||
"""
|
||||
@@ -87,6 +87,10 @@ class UnslothPRMConfig(PRMConfig):
|
||||
default = -1,
|
||||
metadata = {'help': 'Chunk size to reduce memory usage. -1 is most efficient.'},
|
||||
)
|
||||
max_seq_length : Optional[int] = field(
|
||||
default = None,
|
||||
metadata = {'help': 'Maximum sequence length to truncate to.'},
|
||||
)
|
||||
def __init__(
|
||||
self,
|
||||
output_dir = None,
|
||||
@@ -227,6 +231,7 @@ class UnslothPRMConfig(PRMConfig):
|
||||
dataset_num_proc = None,
|
||||
vllm_sampling_params = None,
|
||||
unsloth_num_chunks = -1,
|
||||
max_seq_length = None,
|
||||
**kwargs,
|
||||
):
|
||||
if learning_rate < 1e-7: raise FloatingPointError(f'Unsloth: Your learning rate of `{learning_rate}` is too small and less than 1e-7! Consider increasing it, otherwise gradient updates will be close to 0!')
|
||||
@@ -236,7 +241,7 @@ class UnslothPRMConfig(PRMConfig):
|
||||
save_strategy = 'no'
|
||||
if dataset_num_proc is None:
|
||||
from multiprocessing import cpu_count
|
||||
dataset_num_proc = min(cpu_count()*2, 2)
|
||||
dataset_num_proc = max(cpu_count()+4, 2)
|
||||
|
||||
super().__init__(
|
||||
output_dir = output_dir,
|
||||
@@ -376,6 +381,7 @@ class UnslothPRMConfig(PRMConfig):
|
||||
dataset_num_proc = dataset_num_proc,**kwargs)
|
||||
self.vllm_sampling_params = vllm_sampling_params
|
||||
self.unsloth_num_chunks = unsloth_num_chunks
|
||||
self.max_seq_length = max_seq_length
|
||||
pass
|
||||
|
||||
class _UnslothPRMTrainer(Trainer):
|
||||
@@ -728,7 +734,7 @@ class UnslothPRMTrainer(_UnslothPRMTrainer):
|
||||
print('Unsloth: Switching to float32 training since model cannot work with float16')
|
||||
force_float32 = True
|
||||
mixed_precision_dtype = os.environ.get('UNSLOTH_MIXED_PRECISION', 'float32')
|
||||
dtype = getattr(model.config, 'torch_dtype', None)
|
||||
dtype = getattr(model.config, 'dtype', None) or getattr(model.config, 'torch_dtype', None)
|
||||
if dtype is None: dtype = model.get_input_embeddings().dtype
|
||||
from unsloth_zoo.utils import _get_dtype
|
||||
dtype = _get_dtype(dtype)
|
||||
@@ -794,9 +800,17 @@ class UnslothPRMTrainer(_UnslothPRMTrainer):
|
||||
from unsloth_zoo.vision_utils import UnslothVisionDataCollator
|
||||
if not isinstance(data_collator, UnslothVisionDataCollator):
|
||||
if isinstance(data_collator, DataCollatorForSeq2Seq) and 'labels' not in train_dataset.column_names:
|
||||
data_collator = TransformersDataCollatorForLanguageModeling(__tokenizer, mlm = False, mlm_probability = 0.0)
|
||||
data_collator = TransformersDataCollatorForLanguageModeling(
|
||||
__tokenizer,
|
||||
mlm = False,
|
||||
mlm_probability = 0.0,
|
||||
pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None),
|
||||
)
|
||||
elif isinstance(data_collator, TransformersDataCollatorForLanguageModeling) and 'labels' in train_dataset.column_names:
|
||||
data_collator = DataCollatorForSeq2Seq(__tokenizer)
|
||||
data_collator = DataCollatorForSeq2Seq(
|
||||
__tokenizer,
|
||||
pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None),
|
||||
)
|
||||
else:
|
||||
if hasattr(args, 'remove_unused_columns'): args.remove_unused_columns = False
|
||||
if hasattr(args, 'dataset_text_field'): args.dataset_text_field = ''
|
||||
@@ -804,9 +818,17 @@ class UnslothPRMTrainer(_UnslothPRMTrainer):
|
||||
if not isinstance(data_collator, UnslothVisionDataCollator):
|
||||
if not hasattr(__tokenizer, 'pad') and hasattr(__tokenizer, 'tokenizer'):
|
||||
if isinstance(data_collator, DataCollatorForSeq2Seq):
|
||||
data_collator = DataCollatorForSeq2Seq(__tokenizer.tokenizer)
|
||||
data_collator = DataCollatorForSeq2Seq(
|
||||
__tokenizer.tokenizer,
|
||||
pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None),
|
||||
)
|
||||
else:
|
||||
data_collator = TransformersDataCollatorForLanguageModeling(__tokenizer.tokenizer, mlm = False, mlm_probability = 0.0)
|
||||
data_collator = TransformersDataCollatorForLanguageModeling(
|
||||
__tokenizer.tokenizer,
|
||||
mlm = False,
|
||||
mlm_probability = 0.0,
|
||||
pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None),
|
||||
)
|
||||
other_metrics = []
|
||||
|
||||
from unsloth_zoo.logging_utils import PatchRLStatistics
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
"""
|
||||
2025.8.4
|
||||
2025.8.5
|
||||
4.55.1
|
||||
2025.8.9
|
||||
2025.8.10
|
||||
4.55.4
|
||||
0.21.0
|
||||
__UNSLOTH_VERSIONING__
|
||||
"""
|
||||
@@ -99,6 +99,7 @@ class UnslothRLOOConfig(RLOOConfig):
|
||||
default = -1,
|
||||
metadata = {'help': 'Chunk size to reduce memory usage. -1 is most efficient.'},
|
||||
)
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
output_dir = None,
|
||||
@@ -261,6 +262,7 @@ class UnslothRLOOConfig(RLOOConfig):
|
||||
ds3_gather_for_generation = True,
|
||||
vllm_sampling_params = None,
|
||||
unsloth_num_chunks = -1,
|
||||
|
||||
**kwargs,
|
||||
):
|
||||
if learning_rate < 1e-7: raise FloatingPointError(f'Unsloth: Your learning rate of `{learning_rate}` is too small and less than 1e-7! Consider increasing it, otherwise gradient updates will be close to 0!')
|
||||
@@ -270,7 +272,7 @@ class UnslothRLOOConfig(RLOOConfig):
|
||||
save_strategy = 'no'
|
||||
if dataset_num_proc is None:
|
||||
from multiprocessing import cpu_count
|
||||
dataset_num_proc = min(cpu_count()*2, 2)
|
||||
dataset_num_proc = max(cpu_count()+4, 2)
|
||||
if temperature <= 0:
|
||||
raise MathError('Unsloth: Please set a positive non-zero temperature since your results will be wrong.')
|
||||
elif temperature >= 10:
|
||||
@@ -438,6 +440,7 @@ class UnslothRLOOConfig(RLOOConfig):
|
||||
ds3_gather_for_generation = ds3_gather_for_generation,**kwargs)
|
||||
self.vllm_sampling_params = vllm_sampling_params
|
||||
self.unsloth_num_chunks = unsloth_num_chunks
|
||||
|
||||
pass
|
||||
|
||||
class _UnslothRLOOTrainer(Trainer):
|
||||
@@ -865,7 +868,7 @@ class _UnslothRLOOTrainer(Trainer):
|
||||
|
||||
with torch.no_grad():
|
||||
pg_clipfrac = (pg_losses2 > pg_losses).float().mean()
|
||||
prob_dist = torch.nn.functional.softmax(logits, dim=-1)
|
||||
prob_dist = torch.nn.functional.softmax(logits, dim=-1, dtype = torch.float32).to(logits.dtype)
|
||||
entropy = torch.logsumexp(logits, dim=-1) - torch.sum(prob_dist * logits, dim=-1)
|
||||
approxkl = 0.5 * (logprobs_diff**2).mean()
|
||||
approxkl_stats[ppo_epoch_idx, minibatch_idx, gradient_accumulation_idx] = approxkl
|
||||
@@ -1125,9 +1128,17 @@ class UnslothRLOOTrainer(_UnslothRLOOTrainer):
|
||||
from unsloth_zoo.vision_utils import UnslothVisionDataCollator
|
||||
if not isinstance(data_collator, UnslothVisionDataCollator):
|
||||
if isinstance(data_collator, DataCollatorForSeq2Seq) and 'labels' not in train_dataset.column_names:
|
||||
data_collator = TransformersDataCollatorForLanguageModeling(__tokenizer, mlm = False, mlm_probability = 0.0)
|
||||
data_collator = TransformersDataCollatorForLanguageModeling(
|
||||
__tokenizer,
|
||||
mlm = False,
|
||||
mlm_probability = 0.0,
|
||||
pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None),
|
||||
)
|
||||
elif isinstance(data_collator, TransformersDataCollatorForLanguageModeling) and 'labels' in train_dataset.column_names:
|
||||
data_collator = DataCollatorForSeq2Seq(__tokenizer)
|
||||
data_collator = DataCollatorForSeq2Seq(
|
||||
__tokenizer,
|
||||
pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None),
|
||||
)
|
||||
else:
|
||||
if hasattr(args, 'remove_unused_columns'): args.remove_unused_columns = False
|
||||
if hasattr(args, 'dataset_text_field'): args.dataset_text_field = ''
|
||||
@@ -1135,9 +1146,17 @@ class UnslothRLOOTrainer(_UnslothRLOOTrainer):
|
||||
if not isinstance(data_collator, UnslothVisionDataCollator):
|
||||
if not hasattr(__tokenizer, 'pad') and hasattr(__tokenizer, 'tokenizer'):
|
||||
if isinstance(data_collator, DataCollatorForSeq2Seq):
|
||||
data_collator = DataCollatorForSeq2Seq(__tokenizer.tokenizer)
|
||||
data_collator = DataCollatorForSeq2Seq(
|
||||
__tokenizer.tokenizer,
|
||||
pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None),
|
||||
)
|
||||
else:
|
||||
data_collator = TransformersDataCollatorForLanguageModeling(__tokenizer.tokenizer, mlm = False, mlm_probability = 0.0)
|
||||
data_collator = TransformersDataCollatorForLanguageModeling(
|
||||
__tokenizer.tokenizer,
|
||||
mlm = False,
|
||||
mlm_probability = 0.0,
|
||||
pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None),
|
||||
)
|
||||
other_metrics = []
|
||||
|
||||
from unsloth_zoo.logging_utils import PatchRLStatistics
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
"""
|
||||
2025.8.4
|
||||
2025.8.5
|
||||
4.55.1
|
||||
2025.8.9
|
||||
2025.8.10
|
||||
4.55.4
|
||||
0.21.0
|
||||
__UNSLOTH_VERSIONING__
|
||||
"""
|
||||
@@ -86,6 +86,10 @@ class UnslothRewardConfig(RewardConfig):
|
||||
default = -1,
|
||||
metadata = {'help': 'Chunk size to reduce memory usage. -1 is most efficient.'},
|
||||
)
|
||||
max_seq_length : Optional[int] = field(
|
||||
default = None,
|
||||
metadata = {'help': 'Maximum sequence length to truncate to.'},
|
||||
)
|
||||
def __init__(
|
||||
self,
|
||||
output_dir = None,
|
||||
@@ -222,6 +226,7 @@ class UnslothRewardConfig(RewardConfig):
|
||||
center_rewards_coefficient = None,
|
||||
vllm_sampling_params = None,
|
||||
unsloth_num_chunks = -1,
|
||||
max_seq_length = None,
|
||||
**kwargs,
|
||||
):
|
||||
if learning_rate < 1e-7: raise FloatingPointError(f'Unsloth: Your learning rate of `{learning_rate}` is too small and less than 1e-7! Consider increasing it, otherwise gradient updates will be close to 0!')
|
||||
@@ -231,7 +236,7 @@ class UnslothRewardConfig(RewardConfig):
|
||||
save_strategy = 'no'
|
||||
if dataset_num_proc is None:
|
||||
from multiprocessing import cpu_count
|
||||
dataset_num_proc = min(cpu_count()*2, 2)
|
||||
dataset_num_proc = max(cpu_count()+4, 2)
|
||||
|
||||
super().__init__(
|
||||
output_dir = output_dir,
|
||||
@@ -368,6 +373,7 @@ class UnslothRewardConfig(RewardConfig):
|
||||
center_rewards_coefficient = center_rewards_coefficient,**kwargs)
|
||||
self.vllm_sampling_params = vllm_sampling_params
|
||||
self.unsloth_num_chunks = unsloth_num_chunks
|
||||
self.max_seq_length = max_seq_length
|
||||
pass
|
||||
|
||||
class _UnslothRewardTrainer(Trainer):
|
||||
@@ -746,7 +752,7 @@ class UnslothRewardTrainer(_UnslothRewardTrainer):
|
||||
print('Unsloth: Switching to float32 training since model cannot work with float16')
|
||||
force_float32 = True
|
||||
mixed_precision_dtype = os.environ.get('UNSLOTH_MIXED_PRECISION', 'float32')
|
||||
dtype = getattr(model.config, 'torch_dtype', None)
|
||||
dtype = getattr(model.config, 'dtype', None) or getattr(model.config, 'torch_dtype', None)
|
||||
if dtype is None: dtype = model.get_input_embeddings().dtype
|
||||
from unsloth_zoo.utils import _get_dtype
|
||||
dtype = _get_dtype(dtype)
|
||||
@@ -812,9 +818,17 @@ class UnslothRewardTrainer(_UnslothRewardTrainer):
|
||||
from unsloth_zoo.vision_utils import UnslothVisionDataCollator
|
||||
if not isinstance(data_collator, UnslothVisionDataCollator):
|
||||
if isinstance(data_collator, DataCollatorForSeq2Seq) and 'labels' not in train_dataset.column_names:
|
||||
data_collator = TransformersDataCollatorForLanguageModeling(__tokenizer, mlm = False, mlm_probability = 0.0)
|
||||
data_collator = TransformersDataCollatorForLanguageModeling(
|
||||
__tokenizer,
|
||||
mlm = False,
|
||||
mlm_probability = 0.0,
|
||||
pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None),
|
||||
)
|
||||
elif isinstance(data_collator, TransformersDataCollatorForLanguageModeling) and 'labels' in train_dataset.column_names:
|
||||
data_collator = DataCollatorForSeq2Seq(__tokenizer)
|
||||
data_collator = DataCollatorForSeq2Seq(
|
||||
__tokenizer,
|
||||
pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None),
|
||||
)
|
||||
else:
|
||||
if hasattr(args, 'remove_unused_columns'): args.remove_unused_columns = False
|
||||
if hasattr(args, 'dataset_text_field'): args.dataset_text_field = ''
|
||||
@@ -822,9 +836,17 @@ class UnslothRewardTrainer(_UnslothRewardTrainer):
|
||||
if not isinstance(data_collator, UnslothVisionDataCollator):
|
||||
if not hasattr(__tokenizer, 'pad') and hasattr(__tokenizer, 'tokenizer'):
|
||||
if isinstance(data_collator, DataCollatorForSeq2Seq):
|
||||
data_collator = DataCollatorForSeq2Seq(__tokenizer.tokenizer)
|
||||
data_collator = DataCollatorForSeq2Seq(
|
||||
__tokenizer.tokenizer,
|
||||
pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None),
|
||||
)
|
||||
else:
|
||||
data_collator = TransformersDataCollatorForLanguageModeling(__tokenizer.tokenizer, mlm = False, mlm_probability = 0.0)
|
||||
data_collator = TransformersDataCollatorForLanguageModeling(
|
||||
__tokenizer.tokenizer,
|
||||
mlm = False,
|
||||
mlm_probability = 0.0,
|
||||
pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None),
|
||||
)
|
||||
other_metrics = []
|
||||
|
||||
from unsloth_zoo.logging_utils import PatchRLStatistics
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
"""
|
||||
2025.8.4
|
||||
2025.8.5
|
||||
4.55.1
|
||||
2025.8.9
|
||||
2025.8.10
|
||||
4.55.4
|
||||
0.21.0
|
||||
__UNSLOTH_VERSIONING__
|
||||
"""
|
||||
@@ -10,7 +10,7 @@ import torch
|
||||
import torch.nn as nn
|
||||
from torch.nn import functional as F
|
||||
from typing import Any, List, Optional, Tuple, Union, Dict, Set, Callable
|
||||
from trl.trainer.sft_trainer import (Any, AutoModelForCausalLM, AutoTokenizer, BaseImageProcessor, Callable, DataCollator, DataCollatorForLanguageModeling, Dataset, EvalPrediction, FeatureExtractionMixin, IterableDataset, Optional, Path, PeftConfig, PeftModel, PreTrainedModel, PreTrainedTokenizerBase, ProcessorMixin, SFTConfig, SFTTrainer, Trainer, TrainerCallback, TrainingArguments, Union, clone_chat_template, contextlib, dataclass, dataclasses, defaultdict, generate_model_card, get_act_offloading_ctx_manager, get_comet_experiment_url, get_peft_model, is_conversational, is_peft_available, is_wandb_available, nn, os, pad, peft, peft_module_casting_to_bf16, prepare_model_for_kbit_training, torch, version, warnings, Callable, DataCollator, DataCollatorForLanguageModeling, Dataset, IterableDataset, Optional, Union, os, pad, Optional, PeftModel, PreTrainedModel, Trainer, is_peft_available, os, peft, torch, os)
|
||||
from trl.trainer.sft_trainer import (Any, AutoModelForCausalLM, AutoTokenizer, BaseImageProcessor, Callable, DataCollator, DataCollatorForLanguageModeling, Dataset, EvalPrediction, FeatureExtractionMixin, IterableDataset, Optional, Path, PeftConfig, PeftModel, PreTrainedModel, PreTrainedTokenizerBase, ProcessorMixin, SFTConfig, SFTTrainer, Trainer, TrainerCallback, TrainingArguments, Union, clone_chat_template, contextlib, dataclass, dataclasses, defaultdict, generate_model_card, get_act_offloading_ctx_manager, get_comet_experiment_url, get_peft_model, is_conversational, is_peft_available, is_wandb_available, nn, os, pack_dataset, pad, peft, peft_module_casting_to_bf16, prepare_model_for_kbit_training, torch, version, warnings, Callable, DataCollator, DataCollatorForLanguageModeling, Dataset, IterableDataset, Optional, Union, os, pack_dataset, pad, Optional, PeftModel, PreTrainedModel, Trainer, is_peft_available, os, peft, torch, os)
|
||||
|
||||
|
||||
import os
|
||||
@@ -132,6 +132,10 @@ class UnslothSFTConfig(SFTConfig):
|
||||
default = -1,
|
||||
metadata = {'help': 'Chunk size to reduce memory usage. -1 is most efficient.'},
|
||||
)
|
||||
max_seq_length : Optional[int] = field(
|
||||
default = None,
|
||||
metadata = {'help': 'Maximum sequence length to truncate to.'},
|
||||
)
|
||||
def __init__(
|
||||
self,
|
||||
output_dir = None,
|
||||
@@ -280,6 +284,7 @@ class UnslothSFTConfig(SFTConfig):
|
||||
activation_offloading = False,
|
||||
vllm_sampling_params = None,
|
||||
unsloth_num_chunks = -1,
|
||||
max_seq_length = None,
|
||||
**kwargs,
|
||||
):
|
||||
if learning_rate < 1e-7: raise FloatingPointError(f'Unsloth: Your learning rate of `{learning_rate}` is too small and less than 1e-7! Consider increasing it, otherwise gradient updates will be close to 0!')
|
||||
@@ -289,7 +294,13 @@ class UnslothSFTConfig(SFTConfig):
|
||||
save_strategy = 'no'
|
||||
if dataset_num_proc is None:
|
||||
from multiprocessing import cpu_count
|
||||
dataset_num_proc = min(cpu_count()*2, 2)
|
||||
dataset_num_proc = max(cpu_count()+4, 2)
|
||||
if os.environ.get('UNSLOTH_ENABLE_FLEX_ATTENTION', '0') == '1':
|
||||
from unsloth_zoo.flex_attention import HAS_FLEX_ATTENTION
|
||||
if HAS_FLEX_ATTENTION and pad_to_multiple_of is None:
|
||||
from unsloth_zoo.flex_attention import FLEX_ATTENTION_BLOCK_SIZE
|
||||
pad_to_multiple_of = FLEX_ATTENTION_BLOCK_SIZE
|
||||
|
||||
|
||||
super().__init__(
|
||||
output_dir = output_dir,
|
||||
@@ -438,6 +449,7 @@ class UnslothSFTConfig(SFTConfig):
|
||||
activation_offloading = activation_offloading,**kwargs)
|
||||
self.vllm_sampling_params = vllm_sampling_params
|
||||
self.unsloth_num_chunks = unsloth_num_chunks
|
||||
self.max_seq_length = max_seq_length
|
||||
pass
|
||||
|
||||
class _UnslothSFTTrainer(Trainer):
|
||||
@@ -868,7 +880,11 @@ class _UnslothSFTTrainer(Trainer):
|
||||
pass
|
||||
|
||||
if not isinstance(dataset, IterableDataset):
|
||||
map_kwargs["num_proc"] = getattr(args, "dataset_num_proc", 2)
|
||||
dataset_num_proc = getattr(args, "dataset_num_proc", None)
|
||||
if dataset_num_proc is None:
|
||||
from multiprocessing import cpu_count
|
||||
dataset_num_proc = max(cpu_count()+4, 2)
|
||||
map_kwargs["num_proc"] = dataset_num_proc
|
||||
else:
|
||||
map_kwargs["batch_size"] = dataset._ex_iterable.batch_size
|
||||
|
||||
@@ -882,18 +898,22 @@ class _UnslothSFTTrainer(Trainer):
|
||||
pass
|
||||
pass
|
||||
if packing:
|
||||
print("Unsloth: Hugging Face's packing is currently buggy - we're disabling it for now!")
|
||||
return dataset
|
||||
# Try using new packing which works in TRL
|
||||
try:
|
||||
pack_dataset
|
||||
except:
|
||||
print("Unsloth: Hugging Face's packing is currently buggy - we're disabling it for now!")
|
||||
return dataset
|
||||
|
||||
if max_seq_length == 0:
|
||||
raise ValueError("When packing is enabled, `max_seq_length` can't be `None`.")
|
||||
|
||||
if use_desc: map_kwargs["desc"] = f"Unsloth: Packing {dataset_name} dataset"
|
||||
dataset = dataset.select_columns(used_column_names).map(
|
||||
pack_examples,
|
||||
batched = True,
|
||||
fn_kwargs = {"seq_length": max_seq_length,},
|
||||
**map_kwargs,
|
||||
dataset = pack_dataset(
|
||||
dataset.select_columns(used_column_names),
|
||||
max_seq_length,
|
||||
getattr(args, "packing_strategy", "bfd"),
|
||||
map_kwargs,
|
||||
)
|
||||
pass
|
||||
return dataset
|
||||
@@ -1101,7 +1121,7 @@ class UnslothSFTTrainer(_UnslothSFTTrainer):
|
||||
print('Unsloth: Switching to float32 training since model cannot work with float16')
|
||||
force_float32 = True
|
||||
mixed_precision_dtype = os.environ.get('UNSLOTH_MIXED_PRECISION', 'float32')
|
||||
dtype = getattr(model.config, 'torch_dtype', None)
|
||||
dtype = getattr(model.config, 'dtype', None) or getattr(model.config, 'torch_dtype', None)
|
||||
if dtype is None: dtype = model.get_input_embeddings().dtype
|
||||
from unsloth_zoo.utils import _get_dtype
|
||||
dtype = _get_dtype(dtype)
|
||||
@@ -1166,9 +1186,7 @@ class UnslothSFTTrainer(_UnslothSFTTrainer):
|
||||
max_length = args.max_length
|
||||
else:
|
||||
model_max_length = getattr(model, 'max_seq_length', None)
|
||||
# print(model_max_length, 'mml1')
|
||||
if model_max_length is None: model_max_length = getattr(model, 'max_length', None)
|
||||
# print(model_max_length, 'mml2')
|
||||
if model_max_length is not None:
|
||||
args.max_length = model_max_length
|
||||
max_length = args.max_length
|
||||
@@ -1189,9 +1207,17 @@ class UnslothSFTTrainer(_UnslothSFTTrainer):
|
||||
from unsloth_zoo.vision_utils import UnslothVisionDataCollator
|
||||
if not isinstance(data_collator, UnslothVisionDataCollator):
|
||||
if isinstance(data_collator, DataCollatorForSeq2Seq) and 'labels' not in train_dataset.column_names:
|
||||
data_collator = TransformersDataCollatorForLanguageModeling(__tokenizer, mlm = False, mlm_probability = 0.0)
|
||||
data_collator = TransformersDataCollatorForLanguageModeling(
|
||||
__tokenizer,
|
||||
mlm = False,
|
||||
mlm_probability = 0.0,
|
||||
pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None),
|
||||
)
|
||||
elif isinstance(data_collator, TransformersDataCollatorForLanguageModeling) and 'labels' in train_dataset.column_names:
|
||||
data_collator = DataCollatorForSeq2Seq(__tokenizer)
|
||||
data_collator = DataCollatorForSeq2Seq(
|
||||
__tokenizer,
|
||||
pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None),
|
||||
)
|
||||
else:
|
||||
if hasattr(args, 'remove_unused_columns'): args.remove_unused_columns = False
|
||||
if hasattr(args, 'dataset_text_field'): args.dataset_text_field = ''
|
||||
@@ -1199,9 +1225,17 @@ class UnslothSFTTrainer(_UnslothSFTTrainer):
|
||||
if not isinstance(data_collator, UnslothVisionDataCollator):
|
||||
if not hasattr(__tokenizer, 'pad') and hasattr(__tokenizer, 'tokenizer'):
|
||||
if isinstance(data_collator, DataCollatorForSeq2Seq):
|
||||
data_collator = DataCollatorForSeq2Seq(__tokenizer.tokenizer)
|
||||
data_collator = DataCollatorForSeq2Seq(
|
||||
__tokenizer.tokenizer,
|
||||
pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None),
|
||||
)
|
||||
else:
|
||||
data_collator = TransformersDataCollatorForLanguageModeling(__tokenizer.tokenizer, mlm = False, mlm_probability = 0.0)
|
||||
data_collator = TransformersDataCollatorForLanguageModeling(
|
||||
__tokenizer.tokenizer,
|
||||
mlm = False,
|
||||
mlm_probability = 0.0,
|
||||
pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None),
|
||||
)
|
||||
other_metrics = []
|
||||
|
||||
from unsloth_zoo.logging_utils import PatchRLStatistics
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
"""
|
||||
2025.8.4
|
||||
2025.8.5
|
||||
4.55.1
|
||||
2025.8.9
|
||||
2025.8.10
|
||||
4.55.4
|
||||
0.21.0
|
||||
__UNSLOTH_VERSIONING__
|
||||
"""
|
||||
@@ -70,6 +70,10 @@ class UnslothXPOConfig(XPOConfig):
|
||||
default = -1,
|
||||
metadata = {'help': 'Chunk size to reduce memory usage. -1 is most efficient.'},
|
||||
)
|
||||
max_seq_length : Optional[int] = field(
|
||||
default = None,
|
||||
metadata = {'help': 'Maximum sequence length to truncate to.'},
|
||||
)
|
||||
def __init__(
|
||||
self,
|
||||
output_dir = None,
|
||||
@@ -216,6 +220,7 @@ class UnslothXPOConfig(XPOConfig):
|
||||
model_init_kwargs = None,
|
||||
vllm_sampling_params = None,
|
||||
unsloth_num_chunks = -1,
|
||||
max_seq_length = None,
|
||||
**kwargs,
|
||||
):
|
||||
if learning_rate < 1e-7: raise FloatingPointError(f'Unsloth: Your learning rate of `{learning_rate}` is too small and less than 1e-7! Consider increasing it, otherwise gradient updates will be close to 0!')
|
||||
@@ -225,7 +230,7 @@ class UnslothXPOConfig(XPOConfig):
|
||||
save_strategy = 'no'
|
||||
if dataset_num_proc is None:
|
||||
from multiprocessing import cpu_count
|
||||
dataset_num_proc = min(cpu_count()*2, 2)
|
||||
dataset_num_proc = max(cpu_count()+4, 2)
|
||||
if temperature <= 0:
|
||||
raise MathError('Unsloth: Please set a positive non-zero temperature since your results will be wrong.')
|
||||
elif temperature >= 10:
|
||||
@@ -377,6 +382,7 @@ class UnslothXPOConfig(XPOConfig):
|
||||
model_init_kwargs = model_init_kwargs,**kwargs)
|
||||
self.vllm_sampling_params = vllm_sampling_params
|
||||
self.unsloth_num_chunks = unsloth_num_chunks
|
||||
self.max_seq_length = max_seq_length
|
||||
pass
|
||||
|
||||
class _UnslothXPOTrainer(OnlineDPOTrainer):
|
||||
@@ -940,7 +946,7 @@ class UnslothXPOTrainer(_UnslothXPOTrainer):
|
||||
print('Unsloth: Switching to float32 training since model cannot work with float16')
|
||||
force_float32 = True
|
||||
mixed_precision_dtype = os.environ.get('UNSLOTH_MIXED_PRECISION', 'float32')
|
||||
dtype = getattr(model.config, 'torch_dtype', None)
|
||||
dtype = getattr(model.config, 'dtype', None) or getattr(model.config, 'torch_dtype', None)
|
||||
if dtype is None: dtype = model.get_input_embeddings().dtype
|
||||
from unsloth_zoo.utils import _get_dtype
|
||||
dtype = _get_dtype(dtype)
|
||||
@@ -1006,9 +1012,17 @@ class UnslothXPOTrainer(_UnslothXPOTrainer):
|
||||
from unsloth_zoo.vision_utils import UnslothVisionDataCollator
|
||||
if not isinstance(data_collator, UnslothVisionDataCollator):
|
||||
if isinstance(data_collator, DataCollatorForSeq2Seq) and 'labels' not in train_dataset.column_names:
|
||||
data_collator = TransformersDataCollatorForLanguageModeling(__tokenizer, mlm = False, mlm_probability = 0.0)
|
||||
data_collator = TransformersDataCollatorForLanguageModeling(
|
||||
__tokenizer,
|
||||
mlm = False,
|
||||
mlm_probability = 0.0,
|
||||
pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None),
|
||||
)
|
||||
elif isinstance(data_collator, TransformersDataCollatorForLanguageModeling) and 'labels' in train_dataset.column_names:
|
||||
data_collator = DataCollatorForSeq2Seq(__tokenizer)
|
||||
data_collator = DataCollatorForSeq2Seq(
|
||||
__tokenizer,
|
||||
pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None),
|
||||
)
|
||||
else:
|
||||
if hasattr(args, 'remove_unused_columns'): args.remove_unused_columns = False
|
||||
if hasattr(args, 'dataset_text_field'): args.dataset_text_field = ''
|
||||
@@ -1016,9 +1030,17 @@ class UnslothXPOTrainer(_UnslothXPOTrainer):
|
||||
if not isinstance(data_collator, UnslothVisionDataCollator):
|
||||
if not hasattr(__tokenizer, 'pad') and hasattr(__tokenizer, 'tokenizer'):
|
||||
if isinstance(data_collator, DataCollatorForSeq2Seq):
|
||||
data_collator = DataCollatorForSeq2Seq(__tokenizer.tokenizer)
|
||||
data_collator = DataCollatorForSeq2Seq(
|
||||
__tokenizer.tokenizer,
|
||||
pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None),
|
||||
)
|
||||
else:
|
||||
data_collator = TransformersDataCollatorForLanguageModeling(__tokenizer.tokenizer, mlm = False, mlm_probability = 0.0)
|
||||
data_collator = TransformersDataCollatorForLanguageModeling(
|
||||
__tokenizer.tokenizer,
|
||||
mlm = False,
|
||||
mlm_probability = 0.0,
|
||||
pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None),
|
||||
)
|
||||
other_metrics = []
|
||||
|
||||
from unsloth_zoo.logging_utils import PatchRLStatistics
|
||||
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Reference in New Issue
Block a user