instruct model setup

2025-08-28 17:57:59 +00:00
parent 77c563f358
commit d49b4ff2d5
55 changed files with 27760 additions and 326 deletions
@@ -1,7 +1,7 @@
 """
-2025.8.4
-2025.8.5
-4.55.1
+2025.8.9
+2025.8.10
+4.55.4
 0.21.0
 __UNSLOTH_VERSIONING__
 """
@@ -99,6 +99,7 @@ class UnslothRLOOConfig(RLOOConfig):
        default = -1,
        metadata = {'help': 'Chunk size to reduce memory usage. -1 is most efficient.'},
    )
+    
    def __init__(
        self,
        output_dir = None,
@@ -261,6 +262,7 @@ class UnslothRLOOConfig(RLOOConfig):
        ds3_gather_for_generation = True,
        vllm_sampling_params = None,
        unsloth_num_chunks = -1,
+        
        **kwargs,
    ):
        if learning_rate < 1e-7: raise FloatingPointError(f'Unsloth: Your learning rate of `{learning_rate}` is too small and less than 1e-7! Consider increasing it, otherwise gradient updates will be close to 0!')
@@ -270,7 +272,7 @@ class UnslothRLOOConfig(RLOOConfig):
            save_strategy = 'no'
        if dataset_num_proc is None:
            from multiprocessing import cpu_count
-            dataset_num_proc = min(cpu_count()*2, 2)
+            dataset_num_proc = max(cpu_count()+4, 2)
        if temperature <= 0:
            raise MathError('Unsloth: Please set a positive non-zero temperature since your results will be wrong.')
        elif temperature >= 10:
@@ -438,6 +440,7 @@ class UnslothRLOOConfig(RLOOConfig):
            ds3_gather_for_generation = ds3_gather_for_generation,**kwargs)
        self.vllm_sampling_params = vllm_sampling_params
        self.unsloth_num_chunks = unsloth_num_chunks
+        
 pass

 class _UnslothRLOOTrainer(Trainer):
@@ -865,7 +868,7 @@ class _UnslothRLOOTrainer(Trainer):

                            with torch.no_grad():
                                pg_clipfrac = (pg_losses2 > pg_losses).float().mean()
-                                prob_dist = torch.nn.functional.softmax(logits, dim=-1)
+                                prob_dist = torch.nn.functional.softmax(logits, dim=-1, dtype = torch.float32).to(logits.dtype)
                                entropy = torch.logsumexp(logits, dim=-1) - torch.sum(prob_dist * logits, dim=-1)
                                approxkl = 0.5 * (logprobs_diff**2).mean()
                                approxkl_stats[ppo_epoch_idx, minibatch_idx, gradient_accumulation_idx] = approxkl
@@ -1125,9 +1128,17 @@ class UnslothRLOOTrainer(_UnslothRLOOTrainer):
        from unsloth_zoo.vision_utils import UnslothVisionDataCollator
        if not isinstance(data_collator, UnslothVisionDataCollator):
            if isinstance(data_collator, DataCollatorForSeq2Seq) and 'labels' not in train_dataset.column_names:
-                data_collator = TransformersDataCollatorForLanguageModeling(__tokenizer, mlm = False, mlm_probability = 0.0)
+                data_collator = TransformersDataCollatorForLanguageModeling(
+                    __tokenizer,
+                    mlm = False,
+                    mlm_probability = 0.0,
+                    pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None),
+                )
            elif isinstance(data_collator, TransformersDataCollatorForLanguageModeling) and 'labels' in train_dataset.column_names:
-                data_collator = DataCollatorForSeq2Seq(__tokenizer)
+                data_collator = DataCollatorForSeq2Seq(
+                    __tokenizer,
+                    pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None),
+                )
        else:
            if hasattr(args, 'remove_unused_columns'): args.remove_unused_columns = False
            if hasattr(args, 'dataset_text_field'): args.dataset_text_field = ''
@@ -1135,9 +1146,17 @@ class UnslothRLOOTrainer(_UnslothRLOOTrainer):
        if not isinstance(data_collator, UnslothVisionDataCollator):
            if not hasattr(__tokenizer, 'pad') and hasattr(__tokenizer, 'tokenizer'):
                if isinstance(data_collator, DataCollatorForSeq2Seq):
-                    data_collator = DataCollatorForSeq2Seq(__tokenizer.tokenizer)
+                    data_collator = DataCollatorForSeq2Seq(
+                        __tokenizer.tokenizer,
+                        pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None),
+                    )
                else:
-                    data_collator = TransformersDataCollatorForLanguageModeling(__tokenizer.tokenizer, mlm = False, mlm_probability = 0.0)
+                    data_collator = TransformersDataCollatorForLanguageModeling(
+                        __tokenizer.tokenizer,
+                        mlm = False,
+                        mlm_probability = 0.0,
+                        pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None),
+                    )
        other_metrics = []
        
        from unsloth_zoo.logging_utils import PatchRLStatistics