instruct model setup
This commit is contained in:
@@ -1,7 +1,7 @@
|
||||
"""
|
||||
2025.8.4
|
||||
2025.8.5
|
||||
4.55.1
|
||||
2025.8.9
|
||||
2025.8.10
|
||||
4.55.4
|
||||
0.21.0
|
||||
__UNSLOTH_VERSIONING__
|
||||
"""
|
||||
@@ -106,6 +106,7 @@ class UnslothPPOConfig(PPOConfig):
|
||||
default = -1,
|
||||
metadata = {'help': 'Chunk size to reduce memory usage. -1 is most efficient.'},
|
||||
)
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
output_dir = None,
|
||||
@@ -270,6 +271,7 @@ class UnslothPPOConfig(PPOConfig):
|
||||
ds3_gather_for_generation = True,
|
||||
vllm_sampling_params = None,
|
||||
unsloth_num_chunks = -1,
|
||||
|
||||
**kwargs,
|
||||
):
|
||||
if learning_rate < 1e-7: raise FloatingPointError(f'Unsloth: Your learning rate of `{learning_rate}` is too small and less than 1e-7! Consider increasing it, otherwise gradient updates will be close to 0!')
|
||||
@@ -279,7 +281,7 @@ class UnslothPPOConfig(PPOConfig):
|
||||
save_strategy = 'no'
|
||||
if dataset_num_proc is None:
|
||||
from multiprocessing import cpu_count
|
||||
dataset_num_proc = min(cpu_count()*2, 2)
|
||||
dataset_num_proc = max(cpu_count()+4, 2)
|
||||
if temperature <= 0:
|
||||
raise MathError('Unsloth: Please set a positive non-zero temperature since your results will be wrong.')
|
||||
elif temperature >= 10:
|
||||
@@ -449,6 +451,7 @@ class UnslothPPOConfig(PPOConfig):
|
||||
ds3_gather_for_generation = ds3_gather_for_generation,**kwargs)
|
||||
self.vllm_sampling_params = vllm_sampling_params
|
||||
self.unsloth_num_chunks = unsloth_num_chunks
|
||||
|
||||
pass
|
||||
|
||||
class _UnslothPPOTrainer(Trainer):
|
||||
@@ -944,7 +947,7 @@ class _UnslothPPOTrainer(Trainer):
|
||||
pg_clipfrac = masked_mean(
|
||||
(pg_losses2 > pg_losses).float(), ~padding_mask[micro_batch_inds]
|
||||
)
|
||||
prob_dist = torch.nn.functional.softmax(logits, dim=-1)
|
||||
prob_dist = torch.nn.functional.softmax(logits, dim=-1, dtype = torch.float32).to(logits.dtype)
|
||||
entropy = torch.logsumexp(logits, dim=-1) - torch.sum(prob_dist * logits, dim=-1)
|
||||
approxkl = 0.5 * (logprobs_diff**2).mean()
|
||||
approxkl_stats[ppo_epoch_idx, minibatch_idx, gradient_accumulation_idx] = approxkl
|
||||
@@ -1202,7 +1205,7 @@ class UnslothPPOTrainer(_UnslothPPOTrainer):
|
||||
print('Unsloth: Switching to float32 training since model cannot work with float16')
|
||||
force_float32 = True
|
||||
mixed_precision_dtype = os.environ.get('UNSLOTH_MIXED_PRECISION', 'float32')
|
||||
dtype = getattr(model.config, 'torch_dtype', None)
|
||||
dtype = getattr(model.config, 'dtype', None) or getattr(model.config, 'torch_dtype', None)
|
||||
if dtype is None: dtype = model.get_input_embeddings().dtype
|
||||
from unsloth_zoo.utils import _get_dtype
|
||||
dtype = _get_dtype(dtype)
|
||||
@@ -1268,9 +1271,17 @@ class UnslothPPOTrainer(_UnslothPPOTrainer):
|
||||
from unsloth_zoo.vision_utils import UnslothVisionDataCollator
|
||||
if not isinstance(data_collator, UnslothVisionDataCollator):
|
||||
if isinstance(data_collator, DataCollatorForSeq2Seq) and 'labels' not in train_dataset.column_names:
|
||||
data_collator = TransformersDataCollatorForLanguageModeling(__tokenizer, mlm = False, mlm_probability = 0.0)
|
||||
data_collator = TransformersDataCollatorForLanguageModeling(
|
||||
__tokenizer,
|
||||
mlm = False,
|
||||
mlm_probability = 0.0,
|
||||
pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None),
|
||||
)
|
||||
elif isinstance(data_collator, TransformersDataCollatorForLanguageModeling) and 'labels' in train_dataset.column_names:
|
||||
data_collator = DataCollatorForSeq2Seq(__tokenizer)
|
||||
data_collator = DataCollatorForSeq2Seq(
|
||||
__tokenizer,
|
||||
pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None),
|
||||
)
|
||||
else:
|
||||
if hasattr(args, 'remove_unused_columns'): args.remove_unused_columns = False
|
||||
if hasattr(args, 'dataset_text_field'): args.dataset_text_field = ''
|
||||
@@ -1278,9 +1289,17 @@ class UnslothPPOTrainer(_UnslothPPOTrainer):
|
||||
if not isinstance(data_collator, UnslothVisionDataCollator):
|
||||
if not hasattr(__tokenizer, 'pad') and hasattr(__tokenizer, 'tokenizer'):
|
||||
if isinstance(data_collator, DataCollatorForSeq2Seq):
|
||||
data_collator = DataCollatorForSeq2Seq(__tokenizer.tokenizer)
|
||||
data_collator = DataCollatorForSeq2Seq(
|
||||
__tokenizer.tokenizer,
|
||||
pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None),
|
||||
)
|
||||
else:
|
||||
data_collator = TransformersDataCollatorForLanguageModeling(__tokenizer.tokenizer, mlm = False, mlm_probability = 0.0)
|
||||
data_collator = TransformersDataCollatorForLanguageModeling(
|
||||
__tokenizer.tokenizer,
|
||||
mlm = False,
|
||||
mlm_probability = 0.0,
|
||||
pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None),
|
||||
)
|
||||
other_metrics = []
|
||||
|
||||
from unsloth_zoo.logging_utils import PatchRLStatistics
|
||||
|
||||
Reference in New Issue
Block a user